Test_AI/benchmark/optimized_visualizer.py

"""
优化压力测试可视化模块
专门为原生 TensorRT 测试结果生成图表
"""

import json
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from pathlib import Path
from typing import List, Dict, Any

# 设置字体
plt.rcParams['font.family'] = ['Arial', 'DejaVu Sans']
plt.rcParams['axes.unicode_minus'] = False
plt.rcParams['font.size'] = 10


def load_optimized_results(results_dir: str) -> pd.DataFrame:
    """加载优化测试结果"""
    results_path = Path(results_dir)
    
    # 查找最新的结果文件 (支持两种格式)
    json_files = list(results_path.glob("optimized_results_*.json")) + \
                list(results_path.glob("ultralytics_optimized_*.json"))
    
    if not json_files:
        raise FileNotFoundError("未找到优化测试结果文件")
    
    latest_file = max(json_files, key=lambda x: x.stat().st_mtime)
    
    with open(latest_file, 'r', encoding='utf-8') as f:
        data = json.load(f)
    
    return pd.DataFrame(data)


def create_performance_comparison(df: pd.DataFrame, output_dir: str) -> str:
    """创建性能对比图表"""
    fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(16, 12))
    fig.suptitle('RTX 3050 Optimized Performance Analysis', fontsize=20, fontweight='bold')
    
    # 1. 最大 FPS 对比 (优化前 vs 优化后)
    resolutions = [320, 480]
    original_fps = [33.8, 33.9]  # 之前的结果
    
    # 从测试结果中获取最大 FPS
    optimized_fps = []
    for res in resolutions:
        res_data = df[df['resolution'] == res]
        if len(res_data) > 0:
            max_fps = res_data['actual_fps'].max()
            optimized_fps.append(max_fps)
        else:
            optimized_fps.append(0)
    
    x = np.arange(len(resolutions))
    width = 0.35
    
    bars1 = ax1.bar(x - width/2, original_fps, width, label='Original (Ultralytics)', 
                   color='#FF6B6B', alpha=0.8)
    bars2 = ax1.bar(x + width/2, optimized_fps, width, label='Optimized (Native TensorRT)', 
                   color='#4ECDC4', alpha=0.8)
    
    ax1.set_title('Max FPS Comparison', fontweight='bold')
    ax1.set_xlabel('Resolution')
    ax1.set_ylabel('FPS')
    ax1.set_xticks(x)
    ax1.set_xticklabels([f'{res}x{res}' for res in resolutions])
    ax1.legend()
    ax1.grid(True, alpha=0.3)
    
    # 添加提升倍数标签
    for i, (orig, opt) in enumerate(zip(original_fps, optimized_fps)):
        if orig > 0 and opt > 0:
            improvement = opt / orig
            ax1.text(i, max(orig, opt) + 5, f'{improvement:.1f}x', 
                    ha='center', va='bottom', fontweight='bold', color='green')
    
    # 2. GPU 利用率对比
    gpu_utils_orig = [30, 34]  # 原始测试的 GPU 利用率
    gpu_utils_opt = []
    
    for res in resolutions:
        res_data = df[df['resolution'] == res]
        if len(res_data) > 0:
            max_util = res_data['gpu_utilization'].max()
            gpu_utils_opt.append(max_util)
        else:
            gpu_utils_opt.append(0)
    
    bars3 = ax2.bar(x - width/2, gpu_utils_orig, width, label='Original', 
                   color='#FF6B6B', alpha=0.8)
    bars4 = ax2.bar(x + width/2, gpu_utils_opt, width, label='Optimized', 
                   color='#4ECDC4', alpha=0.8)
    
    ax2.set_title('GPU Utilization Comparison', fontweight='bold')
    ax2.set_xlabel('Resolution')
    ax2.set_ylabel('GPU Utilization (%)')
    ax2.set_xticks(x)
    ax2.set_xticklabels([f'{res}x{res}' for res in resolutions])
    ax2.legend()
    ax2.grid(True, alpha=0.3)
    ax2.set_ylim(0, 100)
    
    # 添加目标线
    ax2.axhline(y=70, color='green', linestyle='--', alpha=0.7, label='Target (70%)')
    
    # 3. 批次大小 vs 性能 (支持两种字段名)
    batch_field = 'batch_size' if 'batch_size' in df.columns else None
    if batch_field:
        batch_perf = df.groupby(batch_field)['actual_fps'].mean()
        
        ax3.plot(batch_perf.index, batch_perf.values, marker='o', linewidth=3, 
                markersize=8, color='#95E1D3')
        ax3.set_title('Batch Size vs Performance', fontweight='bold')
        ax3.set_xlabel('Batch Size')
        ax3.set_ylabel('Average FPS')
        ax3.grid(True, alpha=0.3)
    
    # 4. 流数量/线程数量 vs 性能 (支持两种字段名)
    parallel_field = 'num_streams' if 'num_streams' in df.columns else 'num_threads'
    if parallel_field in df.columns:
        parallel_perf = df.groupby(parallel_field)['actual_fps'].mean()
        
        ax4.plot(parallel_perf.index, parallel_perf.values, marker='s', linewidth=3, 
                markersize=8, color='#F38BA8')
        
        title = 'Number of Streams vs Performance' if parallel_field == 'num_streams' else 'Number of Threads vs Performance'
        xlabel = 'Number of CUDA Streams' if parallel_field == 'num_streams' else 'Number of Threads'
        
        ax4.set_title(title, fontweight='bold')
        ax4.set_xlabel(xlabel)
        ax4.set_ylabel('Average FPS')
        ax4.grid(True, alpha=0.3)
    
    plt.tight_layout()
    output_file = Path(output_dir) / "optimized_performance_comparison.png"
    plt.savefig(output_file, dpi=300, bbox_inches='tight')
    plt.close()
    
    return str(output_file)


def create_optimization_analysis(df: pd.DataFrame, output_dir: str) -> str:
    """创建优化分析图表"""
    fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(16, 12))
    fig.suptitle('TensorRT Optimization Analysis', fontsize=16, fontweight='bold')
    
    # 1. 延迟分布对比
    if 'avg_inference_time_ms' in df.columns:
        latencies = df['avg_inference_time_ms'].dropna()
        
        ax1.hist(latencies, bins=20, alpha=0.7, color='#95E1D3', edgecolor='black')
        ax1.axvline(latencies.mean(), color='red', linestyle='--', linewidth=2, 
                   label=f'Mean: {latencies.mean():.2f}ms')
        ax1.set_title('Inference Latency Distribution', fontweight='bold')
        ax1.set_xlabel('Latency (ms)')
        ax1.set_ylabel('Frequency')
        ax1.legend()
        ax1.grid(True, alpha=0.3)
    
    # 2. 吞吐量 vs GPU 利用率
    if 'gpu_utilization' in df.columns and 'actual_fps' in df.columns:
        ax2.scatter(df['gpu_utilization'], df['actual_fps'], 
                   c=df['batch_size'] if 'batch_size' in df.columns else 'blue', 
                   cmap='viridis', s=100, alpha=0.7)
        ax2.set_title('Throughput vs GPU Utilization', fontweight='bold')
        ax2.set_xlabel('GPU Utilization (%)')
        ax2.set_ylabel('Throughput (FPS)')
        ax2.grid(True, alpha=0.3)
        
        if 'batch_size' in df.columns:
            cbar = plt.colorbar(ax2.collections[0], ax=ax2)
            cbar.set_label('Batch Size')
    
    # 3. 内存使用分析
    if 'memory_used_mb' in df.columns:
        memory_by_batch = df.groupby('batch_size')['memory_used_mb'].mean() if 'batch_size' in df.columns else df['memory_used_mb']
        
        if isinstance(memory_by_batch, pd.Series) and len(memory_by_batch) > 1:
            ax3.bar(range(len(memory_by_batch)), memory_by_batch.values, 
                   color='#FFEAA7', alpha=0.8)
            ax3.set_title('Memory Usage by Batch Size', fontweight='bold')
            ax3.set_xlabel('Batch Size')
            ax3.set_ylabel('Memory Usage (MB)')
            ax3.set_xticks(range(len(memory_by_batch)))
            ax3.set_xticklabels(memory_by_batch.index)
            
            # 添加总显存线
            ax3.axhline(y=8192, color='red', linestyle='--', alpha=0.7, label='Total VRAM (8GB)')
            ax3.legend()
        else:
            ax3.text(0.5, 0.5, 'Insufficient data for memory analysis', 
                    ha='center', va='center', transform=ax3.transAxes)
    
    # 4. 优化效果总结
    ax4.text(0.05, 0.95, 'Optimization Results Summary', 
            fontsize=16, fontweight='bold', transform=ax4.transAxes)
    
    # 计算改进指标
    if len(df) > 0:
        max_fps = df['actual_fps'].max()
        max_gpu_util = df['gpu_utilization'].max()
        avg_latency = df['avg_inference_time_ms'].mean() if 'avg_inference_time_ms' in df.columns else 0
        
        original_max_fps = 33.8
        original_gpu_util = 30
        
        fps_improvement = max_fps / original_max_fps if original_max_fps > 0 else 0
        gpu_improvement = max_gpu_util / original_gpu_util if original_gpu_util > 0 else 0
        
        summary_text = [
            f'🚀 Max FPS: {max_fps:.1f} (vs {original_max_fps:.1f})',
            f'📈 FPS Improvement: {fps_improvement:.1f}x',
            f'🔥 Max GPU Util: {max_gpu_util:.1f}% (vs {original_gpu_util}%)',
            f'📊 GPU Improvement: {gpu_improvement:.1f}x',
            f'⚡ Avg Latency: {avg_latency:.2f}ms',
            '',
            '✅ Optimization Success!' if fps_improvement > 2 else '⚠️ Needs More Optimization',
            f'Target: 70%+ GPU utilization',
            f'Achieved: {max_gpu_util:.1f}% GPU utilization'
        ]
        
        for i, text in enumerate(summary_text):
            ax4.text(0.05, 0.85 - i*0.08, text, fontsize=12, 
                    transform=ax4.transAxes)
    
    ax4.set_xlim(0, 1)
    ax4.set_ylim(0, 1)
    ax4.axis('off')
    
    plt.tight_layout()
    output_file = Path(output_dir) / "optimization_analysis.png"
    plt.savefig(output_file, dpi=300, bbox_inches='tight')
    plt.close()
    
    return str(output_file)


def create_deployment_recommendations(df: pd.DataFrame, output_dir: str) -> str:
    """创建部署建议图表"""
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 8))
    fig.suptitle('Optimized Deployment Recommendations', fontsize=16, fontweight='bold')
    
    # 1. 最优配置热力图
    if 'batch_size' in df.columns and 'num_streams' in df.columns:
        # 创建配置性能矩阵
        pivot_data = df.pivot_table(
            values='actual_fps', 
            index='batch_size', 
            columns='num_streams', 
            aggfunc='mean'
        )
        
        if not pivot_data.empty:
            im1 = ax1.imshow(pivot_data.values, cmap='RdYlGn', aspect='auto')
            ax1.set_title('Performance Heatmap (FPS)', fontweight='bold')
            ax1.set_xlabel('Number of Streams')
            ax1.set_ylabel('Batch Size')
            ax1.set_xticks(range(len(pivot_data.columns)))
            ax1.set_xticklabels(pivot_data.columns)
            ax1.set_yticks(range(len(pivot_data.index)))
            ax1.set_yticklabels(pivot_data.index)
            
            # 添加数值标签
            for i in range(len(pivot_data.index)):
                for j in range(len(pivot_data.columns)):
                    if not np.isnan(pivot_data.iloc[i, j]):
                        ax1.text(j, i, f'{pivot_data.iloc[i, j]:.1f}',
                               ha="center", va="center", color="black", fontweight='bold')
            
            plt.colorbar(im1, ax=ax1, label='FPS')
    
    # 2. 推荐配置
    ax2.text(0.05, 0.95, 'Recommended Configurations', 
            fontsize=16, fontweight='bold', transform=ax2.transAxes)
    
    # 基于测试结果生成推荐
    if len(df) > 0:
        # 找到最佳配置
        best_config = df.loc[df['actual_fps'].idxmax()]
        
        recommendations = [
            '🏆 Best Performance Configuration:',
            f'  • Resolution: {best_config["resolution"]}x{best_config["resolution"]}',
            f'  • Batch Size: {best_config.get("batch_size", "N/A")}',
            f'  • Streams: {best_config.get("num_streams", "N/A")}',
            f'  • Performance: {best_config["actual_fps"]:.1f} FPS',
            f'  • GPU Util: {best_config["gpu_utilization"]:.1f}%',
            '',
            '💡 Deployment Scenarios:',
            '',
            '🎯 High Throughput (Max FPS):',
            f'  • Use batch size 16-32',
            f'  • Use 4-8 CUDA streams',
            f'  • Expected: {best_config["actual_fps"]:.0f}+ FPS',
            '',
            '⚖️ Balanced (GPU ~70%):',
            f'  • Use batch size 8-16',
            f'  • Use 2-4 CUDA streams',
            f'  • Expected: {best_config["actual_fps"]*0.7:.0f} FPS',
            '',
            '🔋 Power Efficient (GPU ~50%):',
            f'  • Use batch size 4-8',
            f'  • Use 2 CUDA streams',
            f'  • Expected: {best_config["actual_fps"]*0.5:.0f} FPS'
        ]
        
        for i, rec in enumerate(recommendations):
            ax2.text(0.05, 0.85 - i*0.04, rec, fontsize=10, 
                    transform=ax2.transAxes)
    
    ax2.set_xlim(0, 1)
    ax2.set_ylim(0, 1)
    ax2.axis('off')
    
    plt.tight_layout()
    output_file = Path(output_dir) / "deployment_recommendations.png"
    plt.savefig(output_file, dpi=300, bbox_inches='tight')
    plt.close()
    
    return str(output_file)


def generate_optimized_charts(results_dir: str) -> List[str]:
    """生成优化测试的所有图表"""
    try:
        df = load_optimized_results(results_dir)
        
        chart_files = []
        
        # 1. 性能对比
        chart_files.append(create_performance_comparison(df, results_dir))
        
        # 2. 优化分析
        chart_files.append(create_optimization_analysis(df, results_dir))
        
        # 3. 部署建议
        chart_files.append(create_deployment_recommendations(df, results_dir))
        
        return chart_files
        
    except Exception as e:
        print(f"生成图表失败: {e}")
        return []


if __name__ == "__main__":
    import sys
    
    if len(sys.argv) > 1:
        results_dir = sys.argv[1]
    else:
        results_dir = "./optimized_stress_results"
    
    chart_files = generate_optimized_charts(results_dir)
    
    if chart_files:
        print(f"✅ 生成了 {len(chart_files)} 个图表:")
        for file in chart_files:
            print(f"  📊 {file}")
    else:
        print("❌ 未生成任何图表")
GPU测试 2026-01-20 10:54:30 +08:00			`"""`
			`优化压力测试可视化模块`
			`专门为原生 TensorRT 测试结果生成图表`
			`"""`

			`import json`
			`import matplotlib.pyplot as plt`
			`import numpy as np`
			`import pandas as pd`
			`from pathlib import Path`
			`from typing import List, Dict, Any`

			`# 设置字体`
			`plt.rcParams['font.family'] = ['Arial', 'DejaVu Sans']`
			`plt.rcParams['axes.unicode_minus'] = False`
			`plt.rcParams['font.size'] = 10`


			`def load_optimized_results(results_dir: str) -> pd.DataFrame:`
			`"""加载优化测试结果"""`
			`results_path = Path(results_dir)`

			`# 查找最新的结果文件 (支持两种格式)`
			`json_files = list(results_path.glob("optimized_results_*.json")) + \`
			`list(results_path.glob("ultralytics_optimized_*.json"))`

			`if not json_files:`
			`raise FileNotFoundError("未找到优化测试结果文件")`

			`latest_file = max(json_files, key=lambda x: x.stat().st_mtime)`

			`with open(latest_file, 'r', encoding='utf-8') as f:`
			`data = json.load(f)`

			`return pd.DataFrame(data)`


			`def create_performance_comparison(df: pd.DataFrame, output_dir: str) -> str:`
			`"""创建性能对比图表"""`
			`fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(16, 12))`
			`fig.suptitle('RTX 3050 Optimized Performance Analysis', fontsize=20, fontweight='bold')`

			`# 1. 最大 FPS 对比 (优化前 vs 优化后)`
			`resolutions = [320, 480]`
			`original_fps = [33.8, 33.9] # 之前的结果`

			`# 从测试结果中获取最大 FPS`
			`optimized_fps = []`
			`for res in resolutions:`
			`res_data = df[df['resolution'] == res]`
			`if len(res_data) > 0:`
			`max_fps = res_data['actual_fps'].max()`
			`optimized_fps.append(max_fps)`
			`else:`
			`optimized_fps.append(0)`

			`x = np.arange(len(resolutions))`
			`width = 0.35`

			`bars1 = ax1.bar(x - width/2, original_fps, width, label='Original (Ultralytics)',`
			`color='#FF6B6B', alpha=0.8)`
			`bars2 = ax1.bar(x + width/2, optimized_fps, width, label='Optimized (Native TensorRT)',`
			`color='#4ECDC4', alpha=0.8)`

			`ax1.set_title('Max FPS Comparison', fontweight='bold')`
			`ax1.set_xlabel('Resolution')`
			`ax1.set_ylabel('FPS')`
			`ax1.set_xticks(x)`
			`ax1.set_xticklabels([f'{res}x{res}' for res in resolutions])`
			`ax1.legend()`
			`ax1.grid(True, alpha=0.3)`

			`# 添加提升倍数标签`
			`for i, (orig, opt) in enumerate(zip(original_fps, optimized_fps)):`
			`if orig > 0 and opt > 0:`
			`improvement = opt / orig`
			`ax1.text(i, max(orig, opt) + 5, f'{improvement:.1f}x',`
			`ha='center', va='bottom', fontweight='bold', color='green')`

			`# 2. GPU 利用率对比`
			`gpu_utils_orig = [30, 34] # 原始测试的 GPU 利用率`
			`gpu_utils_opt = []`

			`for res in resolutions:`
			`res_data = df[df['resolution'] == res]`
			`if len(res_data) > 0:`
			`max_util = res_data['gpu_utilization'].max()`
			`gpu_utils_opt.append(max_util)`
			`else:`
			`gpu_utils_opt.append(0)`

			`bars3 = ax2.bar(x - width/2, gpu_utils_orig, width, label='Original',`
			`color='#FF6B6B', alpha=0.8)`
			`bars4 = ax2.bar(x + width/2, gpu_utils_opt, width, label='Optimized',`
			`color='#4ECDC4', alpha=0.8)`

			`ax2.set_title('GPU Utilization Comparison', fontweight='bold')`
			`ax2.set_xlabel('Resolution')`
			`ax2.set_ylabel('GPU Utilization (%)')`
			`ax2.set_xticks(x)`
			`ax2.set_xticklabels([f'{res}x{res}' for res in resolutions])`
			`ax2.legend()`
			`ax2.grid(True, alpha=0.3)`
			`ax2.set_ylim(0, 100)`

			`# 添加目标线`
			`ax2.axhline(y=70, color='green', linestyle='--', alpha=0.7, label='Target (70%)')`

			`# 3. 批次大小 vs 性能 (支持两种字段名)`
			`batch_field = 'batch_size' if 'batch_size' in df.columns else None`
			`if batch_field:`
			`batch_perf = df.groupby(batch_field)['actual_fps'].mean()`

			`ax3.plot(batch_perf.index, batch_perf.values, marker='o', linewidth=3,`
			`markersize=8, color='#95E1D3')`
			`ax3.set_title('Batch Size vs Performance', fontweight='bold')`
			`ax3.set_xlabel('Batch Size')`
			`ax3.set_ylabel('Average FPS')`
			`ax3.grid(True, alpha=0.3)`

			`# 4. 流数量/线程数量 vs 性能 (支持两种字段名)`
			`parallel_field = 'num_streams' if 'num_streams' in df.columns else 'num_threads'`
			`if parallel_field in df.columns:`
			`parallel_perf = df.groupby(parallel_field)['actual_fps'].mean()`

			`ax4.plot(parallel_perf.index, parallel_perf.values, marker='s', linewidth=3,`
			`markersize=8, color='#F38BA8')`

			`title = 'Number of Streams vs Performance' if parallel_field == 'num_streams' else 'Number of Threads vs Performance'`
			`xlabel = 'Number of CUDA Streams' if parallel_field == 'num_streams' else 'Number of Threads'`

			`ax4.set_title(title, fontweight='bold')`
			`ax4.set_xlabel(xlabel)`
			`ax4.set_ylabel('Average FPS')`
			`ax4.grid(True, alpha=0.3)`

			`plt.tight_layout()`
			`output_file = Path(output_dir) / "optimized_performance_comparison.png"`
			`plt.savefig(output_file, dpi=300, bbox_inches='tight')`
			`plt.close()`

			`return str(output_file)`


			`def create_optimization_analysis(df: pd.DataFrame, output_dir: str) -> str:`
			`"""创建优化分析图表"""`
			`fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(16, 12))`
			`fig.suptitle('TensorRT Optimization Analysis', fontsize=16, fontweight='bold')`

			`# 1. 延迟分布对比`
			`if 'avg_inference_time_ms' in df.columns:`
			`latencies = df['avg_inference_time_ms'].dropna()`

			`ax1.hist(latencies, bins=20, alpha=0.7, color='#95E1D3', edgecolor='black')`
			`ax1.axvline(latencies.mean(), color='red', linestyle='--', linewidth=2,`
			`label=f'Mean: {latencies.mean():.2f}ms')`
			`ax1.set_title('Inference Latency Distribution', fontweight='bold')`
			`ax1.set_xlabel('Latency (ms)')`
			`ax1.set_ylabel('Frequency')`
			`ax1.legend()`
			`ax1.grid(True, alpha=0.3)`

			`# 2. 吞吐量 vs GPU 利用率`
			`if 'gpu_utilization' in df.columns and 'actual_fps' in df.columns:`
			`ax2.scatter(df['gpu_utilization'], df['actual_fps'],`
			`c=df['batch_size'] if 'batch_size' in df.columns else 'blue',`
			`cmap='viridis', s=100, alpha=0.7)`
			`ax2.set_title('Throughput vs GPU Utilization', fontweight='bold')`
			`ax2.set_xlabel('GPU Utilization (%)')`
			`ax2.set_ylabel('Throughput (FPS)')`
			`ax2.grid(True, alpha=0.3)`

			`if 'batch_size' in df.columns:`
			`cbar = plt.colorbar(ax2.collections[0], ax=ax2)`
			`cbar.set_label('Batch Size')`

			`# 3. 内存使用分析`
			`if 'memory_used_mb' in df.columns:`
			`memory_by_batch = df.groupby('batch_size')['memory_used_mb'].mean() if 'batch_size' in df.columns else df['memory_used_mb']`

			`if isinstance(memory_by_batch, pd.Series) and len(memory_by_batch) > 1:`
			`ax3.bar(range(len(memory_by_batch)), memory_by_batch.values,`
			`color='#FFEAA7', alpha=0.8)`
			`ax3.set_title('Memory Usage by Batch Size', fontweight='bold')`
			`ax3.set_xlabel('Batch Size')`
			`ax3.set_ylabel('Memory Usage (MB)')`
			`ax3.set_xticks(range(len(memory_by_batch)))`
			`ax3.set_xticklabels(memory_by_batch.index)`

			`# 添加总显存线`
			`ax3.axhline(y=8192, color='red', linestyle='--', alpha=0.7, label='Total VRAM (8GB)')`
			`ax3.legend()`
			`else:`
			`ax3.text(0.5, 0.5, 'Insufficient data for memory analysis',`
			`ha='center', va='center', transform=ax3.transAxes)`

			`# 4. 优化效果总结`
			`ax4.text(0.05, 0.95, 'Optimization Results Summary',`
			`fontsize=16, fontweight='bold', transform=ax4.transAxes)`

			`# 计算改进指标`
			`if len(df) > 0:`
			`max_fps = df['actual_fps'].max()`
			`max_gpu_util = df['gpu_utilization'].max()`
			`avg_latency = df['avg_inference_time_ms'].mean() if 'avg_inference_time_ms' in df.columns else 0`

			`original_max_fps = 33.8`
			`original_gpu_util = 30`

			`fps_improvement = max_fps / original_max_fps if original_max_fps > 0 else 0`
			`gpu_improvement = max_gpu_util / original_gpu_util if original_gpu_util > 0 else 0`

			`summary_text = [`
			`f'🚀 Max FPS: {max_fps:.1f} (vs {original_max_fps:.1f})',`
			`f'📈 FPS Improvement: {fps_improvement:.1f}x',`
			`f'🔥 Max GPU Util: {max_gpu_util:.1f}% (vs {original_gpu_util}%)',`
			`f'📊 GPU Improvement: {gpu_improvement:.1f}x',`
			`f'⚡ Avg Latency: {avg_latency:.2f}ms',`
			`'',`
			`'✅ Optimization Success!' if fps_improvement > 2 else '⚠️ Needs More Optimization',`
			`f'Target: 70%+ GPU utilization',`
			`f'Achieved: {max_gpu_util:.1f}% GPU utilization'`
			`]`

			`for i, text in enumerate(summary_text):`
			`ax4.text(0.05, 0.85 - i*0.08, text, fontsize=12,`
			`transform=ax4.transAxes)`

			`ax4.set_xlim(0, 1)`
			`ax4.set_ylim(0, 1)`
			`ax4.axis('off')`

			`plt.tight_layout()`
			`output_file = Path(output_dir) / "optimization_analysis.png"`
			`plt.savefig(output_file, dpi=300, bbox_inches='tight')`
			`plt.close()`

			`return str(output_file)`


			`def create_deployment_recommendations(df: pd.DataFrame, output_dir: str) -> str:`
			`"""创建部署建议图表"""`
			`fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 8))`
			`fig.suptitle('Optimized Deployment Recommendations', fontsize=16, fontweight='bold')`

			`# 1. 最优配置热力图`
			`if 'batch_size' in df.columns and 'num_streams' in df.columns:`
			`# 创建配置性能矩阵`
			`pivot_data = df.pivot_table(`
			`values='actual_fps',`
			`index='batch_size',`
			`columns='num_streams',`
			`aggfunc='mean'`
			`)`

			`if not pivot_data.empty:`
			`im1 = ax1.imshow(pivot_data.values, cmap='RdYlGn', aspect='auto')`
			`ax1.set_title('Performance Heatmap (FPS)', fontweight='bold')`
			`ax1.set_xlabel('Number of Streams')`
			`ax1.set_ylabel('Batch Size')`
			`ax1.set_xticks(range(len(pivot_data.columns)))`
			`ax1.set_xticklabels(pivot_data.columns)`
			`ax1.set_yticks(range(len(pivot_data.index)))`
			`ax1.set_yticklabels(pivot_data.index)`

			`# 添加数值标签`
			`for i in range(len(pivot_data.index)):`
			`for j in range(len(pivot_data.columns)):`
			`if not np.isnan(pivot_data.iloc[i, j]):`
			`ax1.text(j, i, f'{pivot_data.iloc[i, j]:.1f}',`
			`ha="center", va="center", color="black", fontweight='bold')`

			`plt.colorbar(im1, ax=ax1, label='FPS')`

			`# 2. 推荐配置`
			`ax2.text(0.05, 0.95, 'Recommended Configurations',`
			`fontsize=16, fontweight='bold', transform=ax2.transAxes)`

			`# 基于测试结果生成推荐`
			`if len(df) > 0:`
			`# 找到最佳配置`
			`best_config = df.loc[df['actual_fps'].idxmax()]`

			`recommendations = [`
			`'🏆 Best Performance Configuration:',`
			`f' • Resolution: {best_config["resolution"]}x{best_config["resolution"]}',`
			`f' • Batch Size: {best_config.get("batch_size", "N/A")}',`
			`f' • Streams: {best_config.get("num_streams", "N/A")}',`
			`f' • Performance: {best_config["actual_fps"]:.1f} FPS',`
			`f' • GPU Util: {best_config["gpu_utilization"]:.1f}%',`
			`'',`
			`'💡 Deployment Scenarios:',`
			`'',`
			`'🎯 High Throughput (Max FPS):',`
			`f' • Use batch size 16-32',`
			`f' • Use 4-8 CUDA streams',`
			`f' • Expected: {best_config["actual_fps"]:.0f}+ FPS',`
			`'',`
			`'⚖️ Balanced (GPU ~70%):',`
			`f' • Use batch size 8-16',`
			`f' • Use 2-4 CUDA streams',`
			`f' • Expected: {best_config["actual_fps"]*0.7:.0f} FPS',`
			`'',`
			`'🔋 Power Efficient (GPU ~50%):',`
			`f' • Use batch size 4-8',`
			`f' • Use 2 CUDA streams',`
			`f' • Expected: {best_config["actual_fps"]*0.5:.0f} FPS'`
			`]`

			`for i, rec in enumerate(recommendations):`
			`ax2.text(0.05, 0.85 - i*0.04, rec, fontsize=10,`
			`transform=ax2.transAxes)`

			`ax2.set_xlim(0, 1)`
			`ax2.set_ylim(0, 1)`
			`ax2.axis('off')`

			`plt.tight_layout()`
			`output_file = Path(output_dir) / "deployment_recommendations.png"`
			`plt.savefig(output_file, dpi=300, bbox_inches='tight')`
			`plt.close()`

			`return str(output_file)`


			`def generate_optimized_charts(results_dir: str) -> List[str]:`
			`"""生成优化测试的所有图表"""`
			`try:`
			`df = load_optimized_results(results_dir)`

			`chart_files = []`

			`# 1. 性能对比`
			`chart_files.append(create_performance_comparison(df, results_dir))`

			`# 2. 优化分析`
			`chart_files.append(create_optimization_analysis(df, results_dir))`

			`# 3. 部署建议`
			`chart_files.append(create_deployment_recommendations(df, results_dir))`

			`return chart_files`

			`except Exception as e:`
			`print(f"生成图表失败: {e}")`
			`return []`


			`if __name__ == "__main__":`
			`import sys`

			`if len(sys.argv) > 1:`
			`results_dir = sys.argv[1]`
			`else:`
			`results_dir = "./optimized_stress_results"`

			`chart_files = generate_optimized_charts(results_dir)`

			`if chart_files:`
			`print(f"✅ 生成了 {len(chart_files)} 个图表:")`
			`for file in chart_files:`
			`print(f" 📊 {file}")`
			`else:`
			`print("❌ 未生成任何图表")`