69 lines
		
	
	
		
			2.3 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			69 lines
		
	
	
		
			2.3 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| #!/usr/bin/env python3
 | |
| 
 | |
| import pickle
 | |
| from pathlib import Path
 | |
| 
 | |
| def verify_alignment_fix():
 | |
|     """Verify that the alignment fix worked by checking the latest dataset"""
 | |
| 
 | |
|     # Load the latest dataset
 | |
|     data_dir = Path("phoneme_segmented_data")
 | |
|     latest_file = max(data_dir.glob("ctc_results_*.pkl"), key=lambda x: x.stat().st_mtime)
 | |
| 
 | |
|     print(f"Loading dataset: {latest_file}")
 | |
| 
 | |
|     # Load all batches from the results file
 | |
|     all_trials = []
 | |
|     try:
 | |
|         with open(latest_file, 'rb') as f:
 | |
|             while True:
 | |
|                 try:
 | |
|                     batch = pickle.load(f)
 | |
|                     all_trials.extend(batch)
 | |
|                 except EOFError:
 | |
|                     break
 | |
|     except Exception as e:
 | |
|         print(f"Error loading dataset: {e}")
 | |
|         return False
 | |
| 
 | |
|     print(f"Loaded {len(all_trials)} trials")
 | |
| 
 | |
|     # Check alignment in first 100 trials
 | |
|     total_segments = 0
 | |
|     error_segments = 0
 | |
| 
 | |
|     print(f"\n=== Checking alignment in first 100 trials ===")
 | |
| 
 | |
|     for trial_idx in range(min(100, len(all_trials))):
 | |
|         trial_data = all_trials[trial_idx]
 | |
|         alignment_info = trial_data.get('alignment_info', [])
 | |
| 
 | |
|         for segment_idx, (phoneme, start_time, end_time, confidence) in enumerate(alignment_info):
 | |
|             total_segments += 1
 | |
| 
 | |
|             if end_time < start_time:
 | |
|                 error_segments += 1
 | |
|                 if error_segments <= 5:  # Show first 5 errors
 | |
|                     print(f"❌ ERROR Trial {trial_idx}, Segment {segment_idx}: '{phoneme}' has end={end_time} < start={start_time}")
 | |
| 
 | |
|     print(f"\n=== Results ===")
 | |
|     print(f"Total segments checked: {total_segments}")
 | |
|     print(f"Error segments: {error_segments}")
 | |
| 
 | |
|     if error_segments == 0:
 | |
|         print("✅ SUCCESS: No temporal ordering bugs found!")
 | |
|         error_rate = 0.0
 | |
|     else:
 | |
|         error_rate = (error_segments / total_segments) * 100
 | |
|         print(f"❌ FAILED: {error_rate:.1f}% of segments have temporal ordering bugs")
 | |
| 
 | |
|     print(f"Error rate: {error_rate:.1f}%")
 | |
| 
 | |
|     return error_segments == 0
 | |
| 
 | |
| if __name__ == "__main__":
 | |
|     success = verify_alignment_fix()
 | |
|     if success:
 | |
|         print("\n🎉 ALIGNMENT FIX SUCCESSFUL! All temporal ordering issues resolved.")
 | |
|     else:
 | |
|         print("\n⚠️  Still has temporal ordering issues. Need further debugging.") | 
