Files
b2txt25/data_analyse/simple_check.py
2025-10-12 09:11:32 +08:00

39 lines
1.4 KiB
Python

import pickle
from pathlib import Path
# Simple check of the latest dataset
dataset_path = Path("phoneme_segmented_data/ctc_results_20251009_000024.pkl")
print(f"Checking: {dataset_path}")
print(f"File exists: {dataset_path.exists()}")
if dataset_path.exists():
print(f"File size: {dataset_path.stat().st_size / (1024*1024):.1f} MB")
try:
with open(dataset_path, 'rb') as f:
data = pickle.load(f)
print(f"Data type: {type(data)}")
print(f"Keys: {len(data) if isinstance(data, dict) else 'N/A'}")
# Quick sample
if isinstance(data, dict):
sample_keys = list(data.keys())[:3]
for key in sample_keys:
if 'trial_' in str(key):
trial = data[key]
if 'alignment_info' in trial:
print(f"Trial {key}: {len(trial['alignment_info'])} segments")
# Check first few segments for temporal ordering
for i, (phoneme, start, end, conf) in enumerate(trial['alignment_info'][:3]):
status = "OK" if end >= start else "BUG"
print(f" {phoneme}: {start}-{end} [{status}]")
break
print("✅ Dataset loaded successfully")
except Exception as e:
print(f"❌ Error: {e}")
else:
print("❌ Dataset file not found")