Files
b2txt25/data_analyse/quick_check.py
2025-10-12 09:11:32 +08:00

52 lines
1.6 KiB
Python

#!/usr/bin/env python3
import pickle
import sys
from pathlib import Path
try:
# Load the latest dataset
data_dir = Path("phoneme_segmented_data")
latest_file = max(data_dir.glob("ctc_results_*.pkl"), key=lambda x: x.stat().st_mtime)
print(f"Loading dataset: {latest_file}")
# Load all batches from the results file
all_trials = []
with open(latest_file, 'rb') as f:
while True:
try:
batch = pickle.load(f)
all_trials.extend(batch)
except EOFError:
break
print(f"Loaded {len(all_trials)} trials")
# Check alignment in first 20 trials only
total_segments = 0
error_segments = 0
for trial_idx in range(min(20, len(all_trials))):
trial_data = all_trials[trial_idx]
alignment_info = trial_data.get('alignment_info', [])
for segment_idx, (phoneme, start_time, end_time, confidence) in enumerate(alignment_info):
total_segments += 1
if end_time < start_time:
error_segments += 1
print(f"ERROR: Trial {trial_idx}, Segment {segment_idx}: '{phoneme}' has end={end_time} < start={start_time}")
print(f"Total segments checked: {total_segments}")
print(f"Error segments: {error_segments}")
if error_segments == 0:
print("SUCCESS: No temporal ordering bugs found!")
else:
error_rate = (error_segments / total_segments) * 100
print(f"FAILED: {error_rate:.1f}% of segments have temporal ordering bugs")
except Exception as e:
print(f"Error: {e}")
sys.exit(1)