39 lines
		
	
	
		
			1.4 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			39 lines
		
	
	
		
			1.4 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| import pickle
 | |
| from pathlib import Path
 | |
| 
 | |
| # Simple check of the latest dataset
 | |
| dataset_path = Path("phoneme_segmented_data/ctc_results_20251009_000024.pkl")
 | |
| print(f"Checking: {dataset_path}")
 | |
| print(f"File exists: {dataset_path.exists()}")
 | |
| 
 | |
| if dataset_path.exists():
 | |
|     print(f"File size: {dataset_path.stat().st_size / (1024*1024):.1f} MB")
 | |
| 
 | |
|     try:
 | |
|         with open(dataset_path, 'rb') as f:
 | |
|             data = pickle.load(f)
 | |
| 
 | |
|         print(f"Data type: {type(data)}")
 | |
|         print(f"Keys: {len(data) if isinstance(data, dict) else 'N/A'}")
 | |
| 
 | |
|         # Quick sample
 | |
|         if isinstance(data, dict):
 | |
|             sample_keys = list(data.keys())[:3]
 | |
|             for key in sample_keys:
 | |
|                 if 'trial_' in str(key):
 | |
|                     trial = data[key]
 | |
|                     if 'alignment_info' in trial:
 | |
|                         print(f"Trial {key}: {len(trial['alignment_info'])} segments")
 | |
| 
 | |
|                         # Check first few segments for temporal ordering
 | |
|                         for i, (phoneme, start, end, conf) in enumerate(trial['alignment_info'][:3]):
 | |
|                             status = "OK" if end >= start else "BUG"
 | |
|                             print(f"  {phoneme}: {start}-{end} [{status}]")
 | |
|                         break
 | |
| 
 | |
|         print("✅ Dataset loaded successfully")
 | |
| 
 | |
|     except Exception as e:
 | |
|         print(f"❌ Error: {e}")
 | |
| else:
 | |
|     print("❌ Dataset file not found") | 
