Files
b2txt25/data_analyse/verify_fix.py
2025-10-12 09:11:32 +08:00

69 lines
2.3 KiB
Python

#!/usr/bin/env python3
import pickle
from pathlib import Path
def verify_alignment_fix():
"""Verify that the alignment fix worked by checking the latest dataset"""
# Load the latest dataset
data_dir = Path("phoneme_segmented_data")
latest_file = max(data_dir.glob("ctc_results_*.pkl"), key=lambda x: x.stat().st_mtime)
print(f"Loading dataset: {latest_file}")
# Load all batches from the results file
all_trials = []
try:
with open(latest_file, 'rb') as f:
while True:
try:
batch = pickle.load(f)
all_trials.extend(batch)
except EOFError:
break
except Exception as e:
print(f"Error loading dataset: {e}")
return False
print(f"Loaded {len(all_trials)} trials")
# Check alignment in first 100 trials
total_segments = 0
error_segments = 0
print(f"\n=== Checking alignment in first 100 trials ===")
for trial_idx in range(min(100, len(all_trials))):
trial_data = all_trials[trial_idx]
alignment_info = trial_data.get('alignment_info', [])
for segment_idx, (phoneme, start_time, end_time, confidence) in enumerate(alignment_info):
total_segments += 1
if end_time < start_time:
error_segments += 1
if error_segments <= 5: # Show first 5 errors
print(f"❌ ERROR Trial {trial_idx}, Segment {segment_idx}: '{phoneme}' has end={end_time} < start={start_time}")
print(f"\n=== Results ===")
print(f"Total segments checked: {total_segments}")
print(f"Error segments: {error_segments}")
if error_segments == 0:
print("✅ SUCCESS: No temporal ordering bugs found!")
error_rate = 0.0
else:
error_rate = (error_segments / total_segments) * 100
print(f"❌ FAILED: {error_rate:.1f}% of segments have temporal ordering bugs")
print(f"Error rate: {error_rate:.1f}%")
return error_segments == 0
if __name__ == "__main__":
success = verify_alignment_fix()
if success:
print("\n🎉 ALIGNMENT FIX SUCCESSFUL! All temporal ordering issues resolved.")
else:
print("\n⚠️ Still has temporal ordering issues. Need further debugging.")