This commit is contained in:
Zchen
2025-10-12 22:52:38 +08:00
parent 69e3892c27
commit 5c941d9efa
3 changed files with 47 additions and 12 deletions

View File

@@ -25,8 +25,9 @@ class NoiseModel(nn.Module):
# Day-specific input layers
self.day_layer_activation = nn.Softsign()
self.day_weights = nn.ParameterList([nn.Parameter(torch.eye(self.neural_dim, dtype=torch.bfloat16)) for _ in range(self.n_days)])
self.day_biases = nn.ParameterList([nn.Parameter(torch.zeros(1, self.neural_dim, dtype=torch.bfloat16)) for _ in range(self.n_days)])
# Let Accelerator handle dtype automatically for TPU compatibility
self.day_weights = nn.ParameterList([nn.Parameter(torch.eye(self.neural_dim)) for _ in range(self.n_days)])
self.day_biases = nn.ParameterList([nn.Parameter(torch.zeros(1, self.neural_dim)) for _ in range(self.n_days)])
self.day_layer_dropout = nn.Dropout(input_dropout)
# Calculate input size after patching
@@ -51,8 +52,8 @@ class NoiseModel(nn.Module):
if "weight_ih" in name:
nn.init.xavier_uniform_(param)
# Learnable initial hidden state
self.h0 = nn.Parameter(nn.init.xavier_uniform_(torch.zeros(1, 1, self.input_size, dtype=torch.bfloat16)))
# Learnable initial hidden state - let Accelerator handle dtype
self.h0 = nn.Parameter(nn.init.xavier_uniform_(torch.zeros(1, 1, self.input_size)))
def forward(self, x, day_idx, states=None):
# Apply day-specific transformation
@@ -110,8 +111,9 @@ class CleanSpeechModel(nn.Module):
# Day-specific input layers
self.day_layer_activation = nn.Softsign()
self.day_weights = nn.ParameterList([nn.Parameter(torch.eye(self.neural_dim, dtype=torch.bfloat16)) for _ in range(self.n_days)])
self.day_biases = nn.ParameterList([nn.Parameter(torch.zeros(1, self.neural_dim, dtype=torch.bfloat16)) for _ in range(self.n_days)])
# Let Accelerator handle dtype automatically for TPU compatibility
self.day_weights = nn.ParameterList([nn.Parameter(torch.eye(self.neural_dim)) for _ in range(self.n_days)])
self.day_biases = nn.ParameterList([nn.Parameter(torch.zeros(1, self.neural_dim)) for _ in range(self.n_days)])
self.day_layer_dropout = nn.Dropout(input_dropout)
# Calculate input size after patching
@@ -141,7 +143,7 @@ class CleanSpeechModel(nn.Module):
nn.init.xavier_uniform_(self.out.weight)
# Learnable initial hidden state
self.h0 = nn.Parameter(nn.init.xavier_uniform_(torch.zeros(1, 1, self.n_units, dtype=torch.bfloat16)))
self.h0 = nn.Parameter(nn.init.xavier_uniform_(torch.zeros(1, 1, self.n_units)))
def forward(self, x, day_idx, states=None, return_state=False):
# Apply day-specific transformation
@@ -229,7 +231,7 @@ class NoisySpeechModel(nn.Module):
nn.init.xavier_uniform_(self.out.weight)
# Learnable initial hidden state
self.h0 = nn.Parameter(nn.init.xavier_uniform_(torch.zeros(1, 1, self.n_units, dtype=torch.bfloat16)))
self.h0 = nn.Parameter(nn.init.xavier_uniform_(torch.zeros(1, 1, self.n_units)))
def forward(self, x, states=None, return_state=False):
# Note: NoisySpeechModel doesn't need day-specific layers as it processes noise