56 lines
		
	
	
		
			2.0 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			56 lines
		
	
	
		
			2.0 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| #!/usr/bin/env python3
 | |
| """
 | |
| 检查转换后的音素数据集的结构
 | |
| """
 | |
| 
 | |
| import pickle
 | |
| 
 | |
| def check_data_structure(pkl_path):
 | |
|     """检查PKL文件的数据结构"""
 | |
|     with open(pkl_path, 'rb') as f:
 | |
|         data = pickle.load(f)
 | |
| 
 | |
|     print("=== 数据结构分析 ===")
 | |
|     print(f"数据类型: {type(data)}")
 | |
|     print(f"顶层键: {list(data.keys())}")
 | |
| 
 | |
|     # 检查phoneme_data
 | |
|     if 'phoneme_data' in data:
 | |
|         phoneme_data = data['phoneme_data']
 | |
|         print(f"\nphoneme_data类型: {type(phoneme_data)}")
 | |
|         print(f"音素数量: {len(phoneme_data)}")
 | |
|         print(f"音素列表: {list(phoneme_data.keys())[:10]}...")
 | |
| 
 | |
|         # 检查第一个音素的结构
 | |
|         first_phoneme = list(phoneme_data.keys())[0]
 | |
|         segments = phoneme_data[first_phoneme]
 | |
|         print(f"\n第一个音素 '{first_phoneme}':")
 | |
|         print(f"  segments类型: {type(segments)}")
 | |
|         print(f"  segments数量: {len(segments)}")
 | |
| 
 | |
|         if len(segments) > 0:
 | |
|             first_segment = segments[0]
 | |
|             print(f"  第一个segment类型: {type(first_segment)}")
 | |
|             print(f"  第一个segment键: {list(first_segment.keys())}")
 | |
| 
 | |
|             # 显示segment的详细内容
 | |
|             print(f"  第一个segment内容:")
 | |
|             for key, value in first_segment.items():
 | |
|                 if key == 'original_timestamps':
 | |
|                     print(f"    {key}: {type(value)}")
 | |
|                     if isinstance(value, dict):
 | |
|                         for ts_key, ts_value in value.items():
 | |
|                             print(f"      {ts_key}: {ts_value}")
 | |
|                 else:
 | |
|                     print(f"    {key}: {value}")
 | |
| 
 | |
|     # 检查conversion_info
 | |
|     if 'conversion_info' in data:
 | |
|         conversion_info = data['conversion_info']
 | |
|         print(f"\nconversion_info:")
 | |
|         for key, value in conversion_info.items():
 | |
|             print(f"  {key}: {value}")
 | |
| 
 | |
| if __name__ == "__main__":
 | |
|     pkl_path = "../phoneme_segmented_data/phoneme_dataset_20251009_202457_with_original_timestamps.pkl"
 | |
|     check_data_structure(pkl_path) | 
