From a5a3179ca66ddd7fc9459a59bba935f98699d68a Mon Sep 17 00:00:00 2001 From: Zchen <161216199+ZH-CEN@users.noreply.github.com> Date: Fri, 17 Oct 2025 01:49:03 +0800 Subject: [PATCH] f --- model_training_nnn_tpu/trainer_tf.py | 31 +++++++--------------------- 1 file changed, 8 insertions(+), 23 deletions(-) diff --git a/model_training_nnn_tpu/trainer_tf.py b/model_training_nnn_tpu/trainer_tf.py index 7fe6caa..bb44d44 100644 --- a/model_training_nnn_tpu/trainer_tf.py +++ b/model_training_nnn_tpu/trainer_tf.py @@ -90,31 +90,16 @@ class BrainToTextDecoderTrainerTF: with self.strategy.scope(): self.model = self._build_model() self.optimizer = self._create_optimizer() + print("🔧 Initializing optimizer for TPU training...") - # For TPU, we initialize the optimizer by accessing its basic properties - # The optimizer will be properly built when first used in training - try: - print("✅ Checking optimizer initialization...") + print(f"Optimizer type: {type(self.optimizer).__name__}") - # Access optimizer properties to ensure it's properly initialized - # This is safe and works with all TensorFlow/Keras optimizer versions - print(f"Optimizer type: {type(self.optimizer).__name__}") - print(f"Learning rate: {self.optimizer.learning_rate}") - - # Access iterations to ensure optimizer state tracking is ready - # This creates the iterations variable without building the full state - iterations = self.optimizer.iterations - print(f"Optimizer iterations initialized: {iterations}") - - print("✅ Optimizer ready for TPU training") - print("📝 Note: Optimizer state will be built automatically during first training step") - - except Exception as e: - print(f"❌ CRITICAL: Could not initialize optimizer: {e}") - print(f"Error type: {type(e).__name__}") - import traceback - print(f"Full traceback: {traceback.format_exc()}") - raise RuntimeError(f"Optimizer initialization failed: {e}") from e + # Initialize optimizer slot variables within strategy scope + # This prevents the "different scope" error + print("🔧 Creating optimizer slot variables within TPU strategy scope...") + dummy_gradients = [tf.zeros_like(var) for var in self.model.trainable_variables] + self.optimizer.apply_gradients(zip(dummy_gradients, self.model.trainable_variables)) + print("✅ Optimizer ready for TPU training") self.lr_scheduler = self._create_lr_scheduler() self.ctc_loss = CTCLoss(blank_index=0, reduction='none')