73 lines
		
	
	
		
			2.2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			73 lines
		
	
	
		
			2.2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| #!/usr/bin/env python3
 | |
| """
 | |
| 检查音素数据集文件的内容和统计信息
 | |
| """
 | |
| 
 | |
| import pickle
 | |
| import os
 | |
| 
 | |
| def inspect_phoneme_dataset(file_path):
 | |
|     """检查音素数据集文件"""
 | |
| 
 | |
|     if not os.path.exists(file_path):
 | |
|         print(f"文件不存在: {file_path}")
 | |
|         return
 | |
| 
 | |
|     print(f"正在加载文件: {file_path}")
 | |
| 
 | |
|     try:
 | |
|         with open(file_path, 'rb') as f:
 | |
|             dataset = pickle.load(f)
 | |
| 
 | |
|         print(f"\n=== 数据集统计信息 ===")
 | |
|         print(f"音素类型数量: {len(dataset)}")
 | |
| 
 | |
|         total_segments = 0
 | |
|         print(f"\n各音素片段数量:")
 | |
| 
 | |
|         for phoneme, segments in dataset.items():
 | |
|             segment_count = len(segments)
 | |
|             total_segments += segment_count
 | |
|             print(f"  {phoneme}: {segment_count} 个片段")
 | |
| 
 | |
|         print(f"\n总片段数: {total_segments}")
 | |
| 
 | |
|         # 查看第一个音素的第一个片段示例
 | |
|         if dataset:
 | |
|             first_phoneme = list(dataset.keys())[2]
 | |
|             first_segment = dataset[first_phoneme][0]
 | |
| 
 | |
|             print(f"\n=== 数据片段示例 (音素: {first_phoneme}) ===")
 | |
|             for key, value in first_segment.items():
 | |
|                 if key == 'neural_features':
 | |
|                     print(f"  {key}: shape {value.shape}, dtype {value.dtype}")
 | |
|                 else:
 | |
|                     print(f"  {key}: {value}")
 | |
| 
 | |
|     except Exception as e:
 | |
|         print(f"加载文件时出错: {e}")
 | |
| 
 | |
| if __name__ == "__main__":
 | |
|     # 检查指定的音素数据集文件
 | |
|     file_path = "./data_analyse/phoneme_segmented_data/phoneme_dataset_20251008_233045.pkl"
 | |
| 
 | |
|     if os.path.exists(file_path):
 | |
|         inspect_phoneme_dataset(file_path)
 | |
|     else:
 | |
|         print(f"文件不存在: {file_path}")
 | |
| 
 | |
|         # 尝试查找其他可能的位置
 | |
|         possible_dirs = [
 | |
|             "./phoneme_segmented_data",
 | |
|             "../phoneme_segmented_data",
 | |
|             "../../phoneme_segmented_data"
 | |
|         ]
 | |
| 
 | |
|         for data_dir in possible_dirs:
 | |
|             if os.path.exists(data_dir):
 | |
|                 print(f"\n在 {data_dir} 中找到以下文件:")
 | |
|                 files = os.listdir(data_dir)
 | |
|                 for f in files:
 | |
|                     if f.endswith('.pkl'):
 | |
|                         print(f"  {f}")
 | |
|                 break | 
