#!/usr/bin/env python3 """ Advanced Parameter Optimization for TTA-E Neural Decoding 支持多种优化算法:差分进化、遗传算法、贝叶斯优化、粒子群优化 """ import os import sys import numpy as np import pickle import argparse import time from concurrent.futures import ThreadPoolExecutor, as_completed from typing import Dict, List, Tuple, Optional import json from dataclasses import dataclass from pathlib import Path # GPU加速支持 try: import cupy as cp GPU_AVAILABLE = True print("GPU acceleration available with CuPy") except ImportError: import numpy as cp GPU_AVAILABLE = False print("Using CPU computation with NumPy") # 优化算法库 try: from scipy.optimize import differential_evolution SCIPY_AVAILABLE = True except ImportError: SCIPY_AVAILABLE = False print("SciPy not available, some algorithms will be disabled") try: import skopt from skopt import gp_minimize from skopt.space import Real SKOPT_AVAILABLE = True except ImportError: SKOPT_AVAILABLE = False print("scikit-optimize not available, Bayesian optimization disabled") # 设置随机种子 np.random.seed(42) if GPU_AVAILABLE: cp.random.seed(42) @dataclass class OptimizationResult: """优化结果数据类""" best_params: Dict best_score: float optimization_history: List[Tuple[Dict, float]] total_evaluations: int elapsed_time: float algorithm: str def to_cpu(x): """将CuPy数组转换为NumPy数组""" if GPU_AVAILABLE and hasattr(x, 'get'): return x.get() return x def load_base_predictions(cache_file='base_predictions_cache.pkl'): """加载预计算的基础预测结果""" if os.path.exists(cache_file): print(f"Loading base predictions from {cache_file}") with open(cache_file, 'rb') as f: return pickle.load(f) else: print(f"Cache file {cache_file} not found. Please run generate_base_predictions() first.") return None def evaluate_config_gpu(params_dict, base_predictions_data): """GPU加速的配置评估函数""" try: # 解析参数 tta_weights = [params_dict[f'tta_weight_{i}'] for i in range(5)] gru_weight = params_dict['gru_weight'] # 转换为GPU数组(如果可用) if GPU_AVAILABLE: tta_weights = cp.array(tta_weights) else: tta_weights = np.array(tta_weights) total_per = 0.0 total_chars = 0 for trial_data in base_predictions_data: gru_probs = trial_data['gru_probs'] # shape: (5, seq_len, vocab_size) lstm_probs = trial_data['lstm_probs'] # shape: (5, seq_len, vocab_size) true_chars = trial_data['true_chars'] # 转换为GPU数组 if GPU_AVAILABLE: gru_probs = cp.asarray(gru_probs) lstm_probs = cp.asarray(lstm_probs) # 计算TTA加权平均 tta_weights_norm = tta_weights / cp.sum(tta_weights) if cp.sum(tta_weights) > 0 else cp.ones_like(tta_weights) / len(tta_weights) # 对每个TTA样本加权 gru_weighted = cp.sum(gru_probs * tta_weights_norm[:, None, None], axis=0) lstm_weighted = cp.sum(lstm_probs * tta_weights_norm[:, None, None], axis=0) # 模型集成 total_weight = gru_weight + (1 - gru_weight) if total_weight > 0: ensemble_probs = (gru_weighted * gru_weight + lstm_weighted * (1 - gru_weight)) / total_weight else: ensemble_probs = (gru_weighted + lstm_weighted) / 2 # 解码预测 if GPU_AVAILABLE: predicted_chars = cp.argmax(ensemble_probs, axis=1) predicted_chars = to_cpu(predicted_chars) else: predicted_chars = np.argmax(ensemble_probs, axis=1) # 计算PER per = calculate_per(predicted_chars, true_chars) total_per += per * len(true_chars) total_chars += len(true_chars) avg_per = total_per / total_chars if total_chars > 0 else 1.0 return avg_per except Exception as e: print(f"Error in evaluate_config_gpu: {e}") return 1.0 # 返回最差分数 def calculate_per(predicted, true): """计算音素错误率 (PER)""" if len(predicted) == 0 and len(true) == 0: return 0.0 if len(predicted) == 0 or len(true) == 0: return 1.0 # 简单的字符级编辑距离 n, m = len(predicted), len(true) dp = np.zeros((n + 1, m + 1)) for i in range(n + 1): dp[i][0] = i for j in range(m + 1): dp[0][j] = j for i in range(1, n + 1): for j in range(1, m + 1): if predicted[i-1] == true[j-1]: dp[i][j] = dp[i-1][j-1] else: dp[i][j] = 1 + min(dp[i-1][j], dp[i][j-1], dp[i-1][j-1]) return dp[n][m] / max(n, m) class DifferentialEvolution: """差分进化算法""" def __init__(self, bounds, popsize=15, maxiter=100, mutation=0.5, recombination=0.7): self.bounds = bounds self.popsize = popsize self.maxiter = maxiter self.mutation = mutation self.recombination = recombination def optimize(self, objective_func, base_predictions): """运行差分进化优化""" print(f"Starting Differential Evolution (popsize={self.popsize}, maxiter={self.maxiter})") param_names = list(self.bounds.keys()) scipy_bounds = [self.bounds[name] for name in param_names] def objective_wrapper(x): params = {param_names[i]: x[i] for i in range(len(x))} return objective_func(params, base_predictions) start_time = time.time() if SCIPY_AVAILABLE: result = differential_evolution( objective_wrapper, scipy_bounds, popsize=self.popsize, maxiter=self.maxiter, mutation=self.mutation, recombination=self.recombination, seed=42, disp=True ) best_params = {param_names[i]: result.x[i] for i in range(len(param_names))} return OptimizationResult( best_params=best_params, best_score=result.fun, optimization_history=[], # SciPy doesn't provide history total_evaluations=result.nfev, elapsed_time=time.time() - start_time, algorithm="Differential Evolution" ) else: # 简单的手动实现 return self._manual_de(objective_wrapper, scipy_bounds, param_names, start_time) def _manual_de(self, objective_func, bounds, param_names, start_time): """手动实现的差分进化""" n_params = len(bounds) # 初始化种群 population = np.random.rand(self.popsize, n_params) for i in range(n_params): low, high = bounds[i] population[:, i] = population[:, i] * (high - low) + low # 评估初始种群 fitness = np.array([objective_func(ind) for ind in population]) history = [] best_idx = np.argmin(fitness) best_individual = population[best_idx].copy() best_fitness = fitness[best_idx] print(f"Initial best fitness: {best_fitness:.6f}") for generation in range(self.maxiter): new_population = population.copy() for i in range(self.popsize): # 选择三个不同的个体 candidates = list(range(self.popsize)) candidates.remove(i) a, b, c = np.random.choice(candidates, 3, replace=False) # 变异 mutant = population[a] + self.mutation * (population[b] - population[c]) # 边界处理 for j in range(n_params): low, high = bounds[j] mutant[j] = np.clip(mutant[j], low, high) # 交叉 trial = population[i].copy() crossover_points = np.random.rand(n_params) < self.recombination trial[crossover_points] = mutant[crossover_points] # 评估试验个体 trial_fitness = objective_func(trial) # 选择 if trial_fitness < fitness[i]: new_population[i] = trial fitness[i] = trial_fitness if trial_fitness < best_fitness: best_individual = trial.copy() best_fitness = trial_fitness print(f"Generation {generation+1}: New best fitness {best_fitness:.6f}") population = new_population history.append((best_individual.copy(), best_fitness)) best_params = {param_names[i]: best_individual[i] for i in range(len(param_names))} return OptimizationResult( best_params=best_params, best_score=best_fitness, optimization_history=history, total_evaluations=self.popsize * (1 + self.maxiter), elapsed_time=time.time() - start_time, algorithm="Differential Evolution (Manual)" ) class GeneticAlgorithm: """遗传算法""" def __init__(self, bounds, popsize=50, generations=100, mutation_rate=0.1, crossover_rate=0.8): self.bounds = bounds self.popsize = popsize self.generations = generations self.mutation_rate = mutation_rate self.crossover_rate = crossover_rate def optimize(self, objective_func, base_predictions): """运行遗传算法优化""" print(f"Starting Genetic Algorithm (popsize={self.popsize}, generations={self.generations})") param_names = list(self.bounds.keys()) n_params = len(param_names) start_time = time.time() # 初始化种群 population = [] for _ in range(self.popsize): individual = {} for name in param_names: low, high = self.bounds[name] individual[name] = np.random.uniform(low, high) population.append(individual) # 评估初始种群 fitness_scores = [objective_func(ind, base_predictions) for ind in population] history = [] best_idx = np.argmin(fitness_scores) best_individual = population[best_idx].copy() best_score = fitness_scores[best_idx] print(f"Initial best fitness: {best_score:.6f}") for generation in range(self.generations): # 选择(锦标赛选择) new_population = [] for _ in range(self.popsize): parent1 = self._tournament_selection(population, fitness_scores) parent2 = self._tournament_selection(population, fitness_scores) # 交叉 if np.random.rand() < self.crossover_rate: child = self._crossover(parent1, parent2, param_names) else: child = parent1.copy() # 变异 child = self._mutate(child, param_names) new_population.append(child) # 评估新种群 population = new_population fitness_scores = [objective_func(ind, base_predictions) for ind in population] # 更新最佳个体 current_best_idx = np.argmin(fitness_scores) current_best_score = fitness_scores[current_best_idx] if current_best_score < best_score: best_individual = population[current_best_idx].copy() best_score = current_best_score print(f"Generation {generation+1}: New best fitness {best_score:.6f}") history.append((best_individual.copy(), best_score)) return OptimizationResult( best_params=best_individual, best_score=best_score, optimization_history=history, total_evaluations=self.popsize * (1 + self.generations), elapsed_time=time.time() - start_time, algorithm="Genetic Algorithm" ) def _tournament_selection(self, population, fitness_scores, tournament_size=3): """锦标赛选择""" tournament_indices = np.random.choice(len(population), tournament_size, replace=False) tournament_fitness = [fitness_scores[i] for i in tournament_indices] winner_idx = tournament_indices[np.argmin(tournament_fitness)] return population[winner_idx].copy() def _crossover(self, parent1, parent2, param_names): """均匀交叉""" child = {} for name in param_names: if np.random.rand() < 0.5: child[name] = parent1[name] else: child[name] = parent2[name] return child def _mutate(self, individual, param_names): """高斯变异""" mutated = individual.copy() for name in param_names: if np.random.rand() < self.mutation_rate: low, high = self.bounds[name] # 高斯变异,标准差为范围的10% noise = np.random.normal(0, (high - low) * 0.1) mutated[name] = np.clip(individual[name] + noise, low, high) return mutated class ParticleSwarmOptimization: """粒子群优化算法""" def __init__(self, bounds, n_particles=30, max_iter=100, w=0.5, c1=1.5, c2=1.5): self.bounds = bounds self.n_particles = n_particles self.max_iter = max_iter self.w = w # 惯性权重 self.c1 = c1 # 个体学习因子 self.c2 = c2 # 社会学习因子 def optimize(self, objective_func, base_predictions): """运行粒子群优化""" print(f"Starting Particle Swarm Optimization (particles={self.n_particles}, iterations={self.max_iter})") param_names = list(self.bounds.keys()) n_params = len(param_names) start_time = time.time() # 初始化粒子位置和速度 positions = np.zeros((self.n_particles, n_params)) velocities = np.zeros((self.n_particles, n_params)) personal_best_positions = np.zeros((self.n_particles, n_params)) personal_best_scores = np.full(self.n_particles, float('inf')) # 初始化位置 for i in range(n_params): low, high = self.bounds[param_names[i]] positions[:, i] = np.random.uniform(low, high, self.n_particles) velocities[:, i] = np.random.uniform(-abs(high-low)*0.1, abs(high-low)*0.1, self.n_particles) # 评估初始位置 for i in range(self.n_particles): params = {param_names[j]: positions[i, j] for j in range(n_params)} score = objective_func(params, base_predictions) personal_best_positions[i] = positions[i].copy() personal_best_scores[i] = score global_best_idx = np.argmin(personal_best_scores) global_best_position = personal_best_positions[global_best_idx].copy() global_best_score = personal_best_scores[global_best_idx] print(f"Initial best fitness: {global_best_score:.6f}") history = [] for iteration in range(self.max_iter): for i in range(self.n_particles): # 更新速度 r1, r2 = np.random.rand(2) velocities[i] = (self.w * velocities[i] + self.c1 * r1 * (personal_best_positions[i] - positions[i]) + self.c2 * r2 * (global_best_position - positions[i])) # 更新位置 positions[i] += velocities[i] # 边界处理 for j in range(n_params): low, high = self.bounds[param_names[j]] positions[i, j] = np.clip(positions[i, j], low, high) # 评估新位置 params = {param_names[j]: positions[i, j] for j in range(n_params)} score = objective_func(params, base_predictions) # 更新个体最佳 if score < personal_best_scores[i]: personal_best_positions[i] = positions[i].copy() personal_best_scores[i] = score # 更新全局最佳 if score < global_best_score: global_best_position = positions[i].copy() global_best_score = score print(f"Iteration {iteration+1}: New best fitness {global_best_score:.6f}") history.append((global_best_position.copy(), global_best_score)) best_params = {param_names[i]: global_best_position[i] for i in range(len(param_names))} return OptimizationResult( best_params=best_params, best_score=global_best_score, optimization_history=history, total_evaluations=self.n_particles * (1 + self.max_iter), elapsed_time=time.time() - start_time, algorithm="Particle Swarm Optimization" ) class BayesianOptimization: """贝叶斯优化(需要scikit-optimize)""" def __init__(self, bounds, n_calls=100, n_initial_points=10, acq_func='gp_hedge'): self.bounds = bounds self.n_calls = n_calls self.n_initial_points = n_initial_points self.acq_func = acq_func def optimize(self, objective_func, base_predictions): """运行贝叶斯优化""" if not SKOPT_AVAILABLE: raise ImportError("scikit-optimize is required for Bayesian optimization") print(f"Starting Bayesian Optimization (calls={self.n_calls}, initial_points={self.n_initial_points})") param_names = list(self.bounds.keys()) dimensions = [Real(self.bounds[name][0], self.bounds[name][1], name=name) for name in param_names] def objective_wrapper(x): params = {param_names[i]: x[i] for i in range(len(x))} return objective_func(params, base_predictions) start_time = time.time() result = gp_minimize( func=objective_wrapper, dimensions=dimensions, n_calls=self.n_calls, n_initial_points=self.n_initial_points, acq_func=self.acq_func, random_state=42 ) best_params = {param_names[i]: result.x[i] for i in range(len(param_names))} # 构建历史记录 history = [] for i in range(len(result.x_iters)): params = {param_names[j]: result.x_iters[i][j] for j in range(len(param_names))} score = result.func_vals[i] history.append((params, score)) return OptimizationResult( best_params=best_params, best_score=result.fun, optimization_history=history, total_evaluations=len(result.x_iters), elapsed_time=time.time() - start_time, algorithm="Bayesian Optimization" ) def generate_base_predictions(): """生成基础预测结果缓存""" print("Generating base predictions cache...") # 这里应该调用原始的评估代码来生成所有TTA样本的预测 # 为了演示,我们创建一个简单的模拟数据 cache_file = 'base_predictions_cache.pkl' if os.path.exists(cache_file): print(f"Cache file {cache_file} already exists.") return # 模拟数据 - 实际使用时应该替换为真实的预测生成代码 print("Generating mock base predictions for demonstration...") n_trials = 10 # 模拟10个试验 seq_len = 50 # 序列长度 vocab_size = 31 # 词汇表大小 n_tta = 5 # TTA样本数 base_predictions = [] for trial in range(n_trials): # 模拟GRU和LSTM的概率预测 gru_probs = np.random.rand(n_tta, seq_len, vocab_size) lstm_probs = np.random.rand(n_tta, seq_len, vocab_size) # 归一化为概率分布 gru_probs = gru_probs / np.sum(gru_probs, axis=2, keepdims=True) lstm_probs = lstm_probs / np.sum(lstm_probs, axis=2, keepdims=True) # 模拟真实字符序列 true_chars = np.random.randint(0, vocab_size, seq_len) base_predictions.append({ 'gru_probs': gru_probs, 'lstm_probs': lstm_probs, 'true_chars': true_chars }) # 保存缓存 with open(cache_file, 'wb') as f: pickle.dump(base_predictions, f) print(f"Base predictions cache saved to {cache_file}") def save_results(results: List[OptimizationResult], output_dir='optimization_results'): """保存优化结果""" os.makedirs(output_dir, exist_ok=True) # 保存详细结果 for result in results: filename = f"{result.algorithm.lower().replace(' ', '_')}_result.json" filepath = os.path.join(output_dir, filename) result_dict = { 'algorithm': result.algorithm, 'best_params': result.best_params, 'best_score': result.best_score, 'total_evaluations': result.total_evaluations, 'elapsed_time': result.elapsed_time, 'optimization_history': [ {'params': params if isinstance(params, dict) else params.tolist(), 'score': score} for params, score in result.optimization_history ] } with open(filepath, 'w') as f: json.dump(result_dict, f, indent=2) print(f"Results saved to {filepath}") # 保存汇总比较 summary_file = os.path.join(output_dir, 'optimization_summary.json') summary = { 'comparison': [ { 'algorithm': result.algorithm, 'best_score': result.best_score, 'total_evaluations': result.total_evaluations, 'elapsed_time': result.elapsed_time, 'best_params': result.best_params } for result in results ], 'best_overall': min(results, key=lambda x: x.best_score).__dict__ } # 处理numpy数组序列化 best_overall = summary['best_overall'] if hasattr(best_overall['best_params'], 'tolist'): best_overall['best_params'] = best_overall['best_params'].tolist() with open(summary_file, 'w') as f: json.dump(summary, f, indent=2, default=str) print(f"Summary saved to {summary_file}") def main(): parser = argparse.ArgumentParser(description='Advanced Parameter Optimization for TTA-E') parser.add_argument('--algorithms', type=str, nargs='+', default=['de', 'ga', 'pso'], choices=['de', 'ga', 'pso', 'bayes', 'all'], help='Optimization algorithms to run') parser.add_argument('--generate_cache', action='store_true', help='Generate base predictions cache') parser.add_argument('--cache_file', type=str, default='base_predictions_cache.pkl', help='Base predictions cache file') parser.add_argument('--output_dir', type=str, default='optimization_results', help='Output directory for results') # 算法特定参数 parser.add_argument('--de_popsize', type=int, default=15, help='DE population size') parser.add_argument('--de_maxiter', type=int, default=50, help='DE max iterations') parser.add_argument('--ga_popsize', type=int, default=30, help='GA population size') parser.add_argument('--ga_generations', type=int, default=50, help='GA generations') parser.add_argument('--pso_particles', type=int, default=20, help='PSO particle count') parser.add_argument('--pso_iterations', type=int, default=50, help='PSO iterations') parser.add_argument('--bayes_calls', type=int, default=100, help='Bayesian optimization calls') args = parser.parse_args() # 生成缓存(如果需要) if args.generate_cache: generate_base_predictions() return # 加载基础预测 base_predictions = load_base_predictions(args.cache_file) if base_predictions is None: print("Please run with --generate_cache first to create base predictions") return # 定义参数边界 bounds = { 'tta_weight_0': (0.0, 2.0), # 原始 'tta_weight_1': (0.0, 2.0), # 噪声 'tta_weight_2': (0.0, 2.0), # 缩放 'tta_weight_3': (0.0, 2.0), # 偏移 'tta_weight_4': (0.0, 2.0), # 平滑 'gru_weight': (0.0, 1.0) # GRU权重 } # 处理算法选择 if 'all' in args.algorithms: algorithms_to_run = ['de', 'ga', 'pso'] if SKOPT_AVAILABLE: algorithms_to_run.append('bayes') else: algorithms_to_run = args.algorithms # 运行优化算法 results = [] for algo in algorithms_to_run: print(f"\n{'='*60}") print(f"Running {algo.upper()} optimization...") print(f"{'='*60}") try: if algo == 'de': optimizer = DifferentialEvolution( bounds=bounds, popsize=args.de_popsize, maxiter=args.de_maxiter ) elif algo == 'ga': optimizer = GeneticAlgorithm( bounds=bounds, popsize=args.ga_popsize, generations=args.ga_generations ) elif algo == 'pso': optimizer = ParticleSwarmOptimization( bounds=bounds, n_particles=args.pso_particles, max_iter=args.pso_iterations ) elif algo == 'bayes': if not SKOPT_AVAILABLE: print("Skipping Bayesian optimization (scikit-optimize not available)") continue optimizer = BayesianOptimization( bounds=bounds, n_calls=args.bayes_calls ) else: print(f"Unknown algorithm: {algo}") continue result = optimizer.optimize(evaluate_config_gpu, base_predictions) results.append(result) print(f"\n{result.algorithm} Results:") print(f"Best Score: {result.best_score:.6f}") print(f"Best Parameters: {result.best_params}") print(f"Total Evaluations: {result.total_evaluations}") print(f"Elapsed Time: {result.elapsed_time:.2f}s") except Exception as e: print(f"Error running {algo}: {e}") continue # 保存结果 if results: save_results(results, args.output_dir) print(f"\n{'='*60}") print("FINAL COMPARISON") print(f"{'='*60}") # 按分数排序 results.sort(key=lambda x: x.best_score) for i, result in enumerate(results, 1): print(f"{i}. {result.algorithm}") print(f" Score: {result.best_score:.6f}") print(f" Time: {result.elapsed_time:.2f}s") print(f" Evaluations: {result.total_evaluations}") print(f" Parameters: {result.best_params}") print() print(f"Best overall: {results[0].algorithm} with score {results[0].best_score:.6f}") if __name__ == '__main__': main()