56 lines
2.0 KiB
Python
56 lines
2.0 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
检查转换后的音素数据集的结构
|
|
"""
|
|
|
|
import pickle
|
|
|
|
def check_data_structure(pkl_path):
|
|
"""检查PKL文件的数据结构"""
|
|
with open(pkl_path, 'rb') as f:
|
|
data = pickle.load(f)
|
|
|
|
print("=== 数据结构分析 ===")
|
|
print(f"数据类型: {type(data)}")
|
|
print(f"顶层键: {list(data.keys())}")
|
|
|
|
# 检查phoneme_data
|
|
if 'phoneme_data' in data:
|
|
phoneme_data = data['phoneme_data']
|
|
print(f"\nphoneme_data类型: {type(phoneme_data)}")
|
|
print(f"音素数量: {len(phoneme_data)}")
|
|
print(f"音素列表: {list(phoneme_data.keys())[:10]}...")
|
|
|
|
# 检查第一个音素的结构
|
|
first_phoneme = list(phoneme_data.keys())[0]
|
|
segments = phoneme_data[first_phoneme]
|
|
print(f"\n第一个音素 '{first_phoneme}':")
|
|
print(f" segments类型: {type(segments)}")
|
|
print(f" segments数量: {len(segments)}")
|
|
|
|
if len(segments) > 0:
|
|
first_segment = segments[0]
|
|
print(f" 第一个segment类型: {type(first_segment)}")
|
|
print(f" 第一个segment键: {list(first_segment.keys())}")
|
|
|
|
# 显示segment的详细内容
|
|
print(f" 第一个segment内容:")
|
|
for key, value in first_segment.items():
|
|
if key == 'original_timestamps':
|
|
print(f" {key}: {type(value)}")
|
|
if isinstance(value, dict):
|
|
for ts_key, ts_value in value.items():
|
|
print(f" {ts_key}: {ts_value}")
|
|
else:
|
|
print(f" {key}: {value}")
|
|
|
|
# 检查conversion_info
|
|
if 'conversion_info' in data:
|
|
conversion_info = data['conversion_info']
|
|
print(f"\nconversion_info:")
|
|
for key, value in conversion_info.items():
|
|
print(f" {key}: {value}")
|
|
|
|
if __name__ == "__main__":
|
|
pkl_path = "../phoneme_segmented_data/phoneme_dataset_20251009_202457_with_original_timestamps.pkl"
|
|
check_data_structure(pkl_path) |