Adjust safety margin in dataset shape analysis to account for data augmentation effects

This commit is contained in:
Zchen
2025-10-22 10:23:22 +08:00
parent 0d9bf29d07
commit d92889d435

View File

@@ -934,15 +934,15 @@ def analyze_dataset_shapes(dataset_tf: BrainToTextDatasetTF, sample_size: int =
'n_features': dataset_tf.feature_dim 'n_features': dataset_tf.feature_dim
} }
# 5. 添加适当的安全边际 - 基于分析范围调整 # 5. 添加适当的安全边际 - 基于分析范围和数据增强调整
if sample_size == -1: if sample_size == -1:
# 全数据分析:需要很小的边际应对可能的舍入误差 # 全数据分析:需要为数据增强预留空间特别是Gaussian平滑
safety_margin = 1.02 # 2% buffer for rounding errors safety_margin = 1.15 # 15% buffer for data augmentation effects
margin_reason = "minimal buffer for full dataset analysis" margin_reason = "buffer for full dataset analysis + data augmentation"
else: else:
# 采样分析:需要更大的边际应对未采样到的极值 # 采样分析:需要更大的边际应对未采样到的极值 + 数据增强
safety_margin = 1.3 # 30% buffer for sampling uncertainty safety_margin = 1.35 # 35% buffer for sampling uncertainty + data augmentation
margin_reason = f"larger buffer due to sampling only {sample_size} trials" margin_reason = f"larger buffer due to sampling only {sample_size} trials + data augmentation"
final_max_shapes = { final_max_shapes = {
'max_time_steps': int(original_max_shapes['max_time_steps'] * safety_margin), 'max_time_steps': int(original_max_shapes['max_time_steps'] * safety_margin),