""" 优化压力测试可视化模块 专门为原生 TensorRT 测试结果生成图表 """ import json import matplotlib.pyplot as plt import numpy as np import pandas as pd from pathlib import Path from typing import List, Dict, Any # 设置字体 plt.rcParams['font.family'] = ['Arial', 'DejaVu Sans'] plt.rcParams['axes.unicode_minus'] = False plt.rcParams['font.size'] = 10 def load_optimized_results(results_dir: str) -> pd.DataFrame: """加载优化测试结果""" results_path = Path(results_dir) # 查找最新的结果文件 (支持两种格式) json_files = list(results_path.glob("optimized_results_*.json")) + \ list(results_path.glob("ultralytics_optimized_*.json")) if not json_files: raise FileNotFoundError("未找到优化测试结果文件") latest_file = max(json_files, key=lambda x: x.stat().st_mtime) with open(latest_file, 'r', encoding='utf-8') as f: data = json.load(f) return pd.DataFrame(data) def create_performance_comparison(df: pd.DataFrame, output_dir: str) -> str: """创建性能对比图表""" fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(16, 12)) fig.suptitle('RTX 3050 Optimized Performance Analysis', fontsize=20, fontweight='bold') # 1. 最大 FPS 对比 (优化前 vs 优化后) resolutions = [320, 480] original_fps = [33.8, 33.9] # 之前的结果 # 从测试结果中获取最大 FPS optimized_fps = [] for res in resolutions: res_data = df[df['resolution'] == res] if len(res_data) > 0: max_fps = res_data['actual_fps'].max() optimized_fps.append(max_fps) else: optimized_fps.append(0) x = np.arange(len(resolutions)) width = 0.35 bars1 = ax1.bar(x - width/2, original_fps, width, label='Original (Ultralytics)', color='#FF6B6B', alpha=0.8) bars2 = ax1.bar(x + width/2, optimized_fps, width, label='Optimized (Native TensorRT)', color='#4ECDC4', alpha=0.8) ax1.set_title('Max FPS Comparison', fontweight='bold') ax1.set_xlabel('Resolution') ax1.set_ylabel('FPS') ax1.set_xticks(x) ax1.set_xticklabels([f'{res}x{res}' for res in resolutions]) ax1.legend() ax1.grid(True, alpha=0.3) # 添加提升倍数标签 for i, (orig, opt) in enumerate(zip(original_fps, optimized_fps)): if orig > 0 and opt > 0: improvement = opt / orig ax1.text(i, max(orig, opt) + 5, f'{improvement:.1f}x', ha='center', va='bottom', fontweight='bold', color='green') # 2. GPU 利用率对比 gpu_utils_orig = [30, 34] # 原始测试的 GPU 利用率 gpu_utils_opt = [] for res in resolutions: res_data = df[df['resolution'] == res] if len(res_data) > 0: max_util = res_data['gpu_utilization'].max() gpu_utils_opt.append(max_util) else: gpu_utils_opt.append(0) bars3 = ax2.bar(x - width/2, gpu_utils_orig, width, label='Original', color='#FF6B6B', alpha=0.8) bars4 = ax2.bar(x + width/2, gpu_utils_opt, width, label='Optimized', color='#4ECDC4', alpha=0.8) ax2.set_title('GPU Utilization Comparison', fontweight='bold') ax2.set_xlabel('Resolution') ax2.set_ylabel('GPU Utilization (%)') ax2.set_xticks(x) ax2.set_xticklabels([f'{res}x{res}' for res in resolutions]) ax2.legend() ax2.grid(True, alpha=0.3) ax2.set_ylim(0, 100) # 添加目标线 ax2.axhline(y=70, color='green', linestyle='--', alpha=0.7, label='Target (70%)') # 3. 批次大小 vs 性能 (支持两种字段名) batch_field = 'batch_size' if 'batch_size' in df.columns else None if batch_field: batch_perf = df.groupby(batch_field)['actual_fps'].mean() ax3.plot(batch_perf.index, batch_perf.values, marker='o', linewidth=3, markersize=8, color='#95E1D3') ax3.set_title('Batch Size vs Performance', fontweight='bold') ax3.set_xlabel('Batch Size') ax3.set_ylabel('Average FPS') ax3.grid(True, alpha=0.3) # 4. 流数量/线程数量 vs 性能 (支持两种字段名) parallel_field = 'num_streams' if 'num_streams' in df.columns else 'num_threads' if parallel_field in df.columns: parallel_perf = df.groupby(parallel_field)['actual_fps'].mean() ax4.plot(parallel_perf.index, parallel_perf.values, marker='s', linewidth=3, markersize=8, color='#F38BA8') title = 'Number of Streams vs Performance' if parallel_field == 'num_streams' else 'Number of Threads vs Performance' xlabel = 'Number of CUDA Streams' if parallel_field == 'num_streams' else 'Number of Threads' ax4.set_title(title, fontweight='bold') ax4.set_xlabel(xlabel) ax4.set_ylabel('Average FPS') ax4.grid(True, alpha=0.3) plt.tight_layout() output_file = Path(output_dir) / "optimized_performance_comparison.png" plt.savefig(output_file, dpi=300, bbox_inches='tight') plt.close() return str(output_file) def create_optimization_analysis(df: pd.DataFrame, output_dir: str) -> str: """创建优化分析图表""" fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(16, 12)) fig.suptitle('TensorRT Optimization Analysis', fontsize=16, fontweight='bold') # 1. 延迟分布对比 if 'avg_inference_time_ms' in df.columns: latencies = df['avg_inference_time_ms'].dropna() ax1.hist(latencies, bins=20, alpha=0.7, color='#95E1D3', edgecolor='black') ax1.axvline(latencies.mean(), color='red', linestyle='--', linewidth=2, label=f'Mean: {latencies.mean():.2f}ms') ax1.set_title('Inference Latency Distribution', fontweight='bold') ax1.set_xlabel('Latency (ms)') ax1.set_ylabel('Frequency') ax1.legend() ax1.grid(True, alpha=0.3) # 2. 吞吐量 vs GPU 利用率 if 'gpu_utilization' in df.columns and 'actual_fps' in df.columns: ax2.scatter(df['gpu_utilization'], df['actual_fps'], c=df['batch_size'] if 'batch_size' in df.columns else 'blue', cmap='viridis', s=100, alpha=0.7) ax2.set_title('Throughput vs GPU Utilization', fontweight='bold') ax2.set_xlabel('GPU Utilization (%)') ax2.set_ylabel('Throughput (FPS)') ax2.grid(True, alpha=0.3) if 'batch_size' in df.columns: cbar = plt.colorbar(ax2.collections[0], ax=ax2) cbar.set_label('Batch Size') # 3. 内存使用分析 if 'memory_used_mb' in df.columns: memory_by_batch = df.groupby('batch_size')['memory_used_mb'].mean() if 'batch_size' in df.columns else df['memory_used_mb'] if isinstance(memory_by_batch, pd.Series) and len(memory_by_batch) > 1: ax3.bar(range(len(memory_by_batch)), memory_by_batch.values, color='#FFEAA7', alpha=0.8) ax3.set_title('Memory Usage by Batch Size', fontweight='bold') ax3.set_xlabel('Batch Size') ax3.set_ylabel('Memory Usage (MB)') ax3.set_xticks(range(len(memory_by_batch))) ax3.set_xticklabels(memory_by_batch.index) # 添加总显存线 ax3.axhline(y=8192, color='red', linestyle='--', alpha=0.7, label='Total VRAM (8GB)') ax3.legend() else: ax3.text(0.5, 0.5, 'Insufficient data for memory analysis', ha='center', va='center', transform=ax3.transAxes) # 4. 优化效果总结 ax4.text(0.05, 0.95, 'Optimization Results Summary', fontsize=16, fontweight='bold', transform=ax4.transAxes) # 计算改进指标 if len(df) > 0: max_fps = df['actual_fps'].max() max_gpu_util = df['gpu_utilization'].max() avg_latency = df['avg_inference_time_ms'].mean() if 'avg_inference_time_ms' in df.columns else 0 original_max_fps = 33.8 original_gpu_util = 30 fps_improvement = max_fps / original_max_fps if original_max_fps > 0 else 0 gpu_improvement = max_gpu_util / original_gpu_util if original_gpu_util > 0 else 0 summary_text = [ f'🚀 Max FPS: {max_fps:.1f} (vs {original_max_fps:.1f})', f'📈 FPS Improvement: {fps_improvement:.1f}x', f'🔥 Max GPU Util: {max_gpu_util:.1f}% (vs {original_gpu_util}%)', f'📊 GPU Improvement: {gpu_improvement:.1f}x', f'⚡ Avg Latency: {avg_latency:.2f}ms', '', '✅ Optimization Success!' if fps_improvement > 2 else '⚠️ Needs More Optimization', f'Target: 70%+ GPU utilization', f'Achieved: {max_gpu_util:.1f}% GPU utilization' ] for i, text in enumerate(summary_text): ax4.text(0.05, 0.85 - i*0.08, text, fontsize=12, transform=ax4.transAxes) ax4.set_xlim(0, 1) ax4.set_ylim(0, 1) ax4.axis('off') plt.tight_layout() output_file = Path(output_dir) / "optimization_analysis.png" plt.savefig(output_file, dpi=300, bbox_inches='tight') plt.close() return str(output_file) def create_deployment_recommendations(df: pd.DataFrame, output_dir: str) -> str: """创建部署建议图表""" fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 8)) fig.suptitle('Optimized Deployment Recommendations', fontsize=16, fontweight='bold') # 1. 最优配置热力图 if 'batch_size' in df.columns and 'num_streams' in df.columns: # 创建配置性能矩阵 pivot_data = df.pivot_table( values='actual_fps', index='batch_size', columns='num_streams', aggfunc='mean' ) if not pivot_data.empty: im1 = ax1.imshow(pivot_data.values, cmap='RdYlGn', aspect='auto') ax1.set_title('Performance Heatmap (FPS)', fontweight='bold') ax1.set_xlabel('Number of Streams') ax1.set_ylabel('Batch Size') ax1.set_xticks(range(len(pivot_data.columns))) ax1.set_xticklabels(pivot_data.columns) ax1.set_yticks(range(len(pivot_data.index))) ax1.set_yticklabels(pivot_data.index) # 添加数值标签 for i in range(len(pivot_data.index)): for j in range(len(pivot_data.columns)): if not np.isnan(pivot_data.iloc[i, j]): ax1.text(j, i, f'{pivot_data.iloc[i, j]:.1f}', ha="center", va="center", color="black", fontweight='bold') plt.colorbar(im1, ax=ax1, label='FPS') # 2. 推荐配置 ax2.text(0.05, 0.95, 'Recommended Configurations', fontsize=16, fontweight='bold', transform=ax2.transAxes) # 基于测试结果生成推荐 if len(df) > 0: # 找到最佳配置 best_config = df.loc[df['actual_fps'].idxmax()] recommendations = [ '🏆 Best Performance Configuration:', f' • Resolution: {best_config["resolution"]}x{best_config["resolution"]}', f' • Batch Size: {best_config.get("batch_size", "N/A")}', f' • Streams: {best_config.get("num_streams", "N/A")}', f' • Performance: {best_config["actual_fps"]:.1f} FPS', f' • GPU Util: {best_config["gpu_utilization"]:.1f}%', '', '💡 Deployment Scenarios:', '', '🎯 High Throughput (Max FPS):', f' • Use batch size 16-32', f' • Use 4-8 CUDA streams', f' • Expected: {best_config["actual_fps"]:.0f}+ FPS', '', '⚖️ Balanced (GPU ~70%):', f' • Use batch size 8-16', f' • Use 2-4 CUDA streams', f' • Expected: {best_config["actual_fps"]*0.7:.0f} FPS', '', '🔋 Power Efficient (GPU ~50%):', f' • Use batch size 4-8', f' • Use 2 CUDA streams', f' • Expected: {best_config["actual_fps"]*0.5:.0f} FPS' ] for i, rec in enumerate(recommendations): ax2.text(0.05, 0.85 - i*0.04, rec, fontsize=10, transform=ax2.transAxes) ax2.set_xlim(0, 1) ax2.set_ylim(0, 1) ax2.axis('off') plt.tight_layout() output_file = Path(output_dir) / "deployment_recommendations.png" plt.savefig(output_file, dpi=300, bbox_inches='tight') plt.close() return str(output_file) def generate_optimized_charts(results_dir: str) -> List[str]: """生成优化测试的所有图表""" try: df = load_optimized_results(results_dir) chart_files = [] # 1. 性能对比 chart_files.append(create_performance_comparison(df, results_dir)) # 2. 优化分析 chart_files.append(create_optimization_analysis(df, results_dir)) # 3. 部署建议 chart_files.append(create_deployment_recommendations(df, results_dir)) return chart_files except Exception as e: print(f"生成图表失败: {e}") return [] if __name__ == "__main__": import sys if len(sys.argv) > 1: results_dir = sys.argv[1] else: results_dir = "./optimized_stress_results" chart_files = generate_optimized_charts(results_dir) if chart_files: print(f"✅ 生成了 {len(chart_files)} 个图表:") for file in chart_files: print(f" 📊 {file}") else: print("❌ 未生成任何图表")