#!/usr/bin/env python3 """ 性能测试结果可视化脚本 生成 PyTorch vs TensorRT 性能对比图表 """ import json import matplotlib.pyplot as plt import numpy as np import os from datetime import datetime # 设置中文字体 plt.rcParams['font.sans-serif'] = ['SimHei', 'Microsoft YaHei', 'DejaVu Sans'] plt.rcParams['axes.unicode_minus'] = False def load_results(json_file): """加载测试结果""" with open(json_file, 'r', encoding='utf-8') as f: return json.load(f) def create_fps_comparison_chart(results, output_dir): """创建 FPS 对比图表""" fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6)) # 单帧推理 FPS 对比 pytorch_single_fps = results['pytorch']['single_inference']['avg_fps'] tensorrt_single_fps = results['tensorrt']['single_inference']['avg_fps'] engines = ['PyTorch', 'TensorRT'] fps_values = [pytorch_single_fps, tensorrt_single_fps] colors = ['#FF6B6B', '#4ECDC4'] bars1 = ax1.bar(engines, fps_values, color=colors, alpha=0.8) ax1.set_title('单帧推理性能对比', fontsize=14, fontweight='bold') ax1.set_ylabel('FPS (帧/秒)', fontsize=12) ax1.grid(True, alpha=0.3) # 添加数值标签 for bar, value in zip(bars1, fps_values): height = bar.get_height() ax1.text(bar.get_x() + bar.get_width()/2., height + 1, f'{value:.1f}', ha='center', va='bottom', fontweight='bold') # 性能提升百分比 improvement = (tensorrt_single_fps - pytorch_single_fps) / pytorch_single_fps * 100 ax1.text(0.5, max(fps_values) * 0.8, f'TensorRT 提升: {improvement:.1f}%', ha='center', transform=ax1.transData, fontsize=12, bbox=dict(boxstyle="round,pad=0.3", facecolor="yellow", alpha=0.7)) # 批量推理 FPS 对比(仅显示 PyTorch 的批量结果,因为 TensorRT 批量测试失败) if 'batch_inference' in results['pytorch']: batch_sizes = [] pytorch_batch_fps = [] for batch_result in results['pytorch']['batch_inference']: batch_sizes.append(batch_result['batch_size']) pytorch_batch_fps.append(batch_result['avg_fps']) ax2.plot(batch_sizes, pytorch_batch_fps, 'o-', color='#FF6B6B', linewidth=2, markersize=8, label='PyTorch') ax2.axhline(y=tensorrt_single_fps, color='#4ECDC4', linestyle='--', linewidth=2, label='TensorRT (单帧)') ax2.set_title('批量推理性能 (PyTorch)', fontsize=14, fontweight='bold') ax2.set_xlabel('批次大小', fontsize=12) ax2.set_ylabel('FPS (帧/秒)', fontsize=12) ax2.grid(True, alpha=0.3) ax2.legend() # 添加数值标签 for i, (batch_size, fps) in enumerate(zip(batch_sizes, pytorch_batch_fps)): ax2.text(batch_size, fps + 2, f'{fps:.1f}', ha='center', va='bottom') plt.tight_layout() plt.savefig(os.path.join(output_dir, 'fps_comparison.png'), dpi=300, bbox_inches='tight') plt.show() def create_latency_comparison_chart(results, output_dir): """创建延迟对比图表""" fig, ax = plt.subplots(1, 1, figsize=(10, 6)) # 单帧推理延迟对比 pytorch_latency = results['pytorch']['single_inference']['avg_latency_ms'] tensorrt_latency = results['tensorrt']['single_inference']['avg_latency_ms'] engines = ['PyTorch', 'TensorRT'] latency_values = [pytorch_latency, tensorrt_latency] colors = ['#FF6B6B', '#4ECDC4'] bars = ax.bar(engines, latency_values, color=colors, alpha=0.8) ax.set_title('推理延迟对比', fontsize=14, fontweight='bold') ax.set_ylabel('延迟 (毫秒)', fontsize=12) ax.grid(True, alpha=0.3) # 添加数值标签 for bar, value in zip(bars, latency_values): height = bar.get_height() ax.text(bar.get_x() + bar.get_width()/2., height + 0.1, f'{value:.1f}ms', ha='center', va='bottom', fontweight='bold') # 延迟改善百分比 improvement = (pytorch_latency - tensorrt_latency) / pytorch_latency * 100 ax.text(0.5, max(latency_values) * 0.8, f'TensorRT 延迟减少: {improvement:.1f}%', ha='center', transform=ax.transData, fontsize=12, bbox=dict(boxstyle="round,pad=0.3", facecolor="lightgreen", alpha=0.7)) plt.tight_layout() plt.savefig(os.path.join(output_dir, 'latency_comparison.png'), dpi=300, bbox_inches='tight') plt.show() def create_concurrent_performance_chart(results, output_dir): """创建并发性能图表""" if 'concurrent_streams' not in results['pytorch']: return fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6)) # 提取并发数据 concurrent_counts = [] pytorch_total_fps = [] pytorch_single_fps = [] for result in results['pytorch']['concurrent_streams']: concurrent_counts.append(result['concurrent_streams']) pytorch_total_fps.append(result['avg_fps'] * result['concurrent_streams']) pytorch_single_fps.append(result['avg_fps']) # 总 FPS 对比 ax1.plot(concurrent_counts, pytorch_total_fps, 'o-', color='#FF6B6B', linewidth=2, markersize=8, label='PyTorch 总FPS') # TensorRT 理论总FPS(基于单帧性能) tensorrt_single_fps = results['tensorrt']['single_inference']['avg_fps'] tensorrt_theoretical_fps = [tensorrt_single_fps * count for count in concurrent_counts] ax1.plot(concurrent_counts, tensorrt_theoretical_fps, '--', color='#4ECDC4', linewidth=2, label='TensorRT 理论总FPS') ax1.set_title('并发总FPS性能', fontsize=14, fontweight='bold') ax1.set_xlabel('并发摄像头数量', fontsize=12) ax1.set_ylabel('总FPS (帧/秒)', fontsize=12) ax1.grid(True, alpha=0.3) ax1.legend() # 单流平均 FPS ax2.plot(concurrent_counts, pytorch_single_fps, 'o-', color='#FF6B6B', linewidth=2, markersize=8, label='PyTorch 单流FPS') ax2.axhline(y=tensorrt_single_fps, color='#4ECDC4', linestyle='--', linewidth=2, label='TensorRT 单流FPS') ax2.set_title('单流平均FPS性能', fontsize=14, fontweight='bold') ax2.set_xlabel('并发摄像头数量', fontsize=12) ax2.set_ylabel('单流FPS (帧/秒)', fontsize=12) ax2.grid(True, alpha=0.3) ax2.legend() plt.tight_layout() plt.savefig(os.path.join(output_dir, 'concurrent_performance.png'), dpi=300, bbox_inches='tight') plt.show() def create_resource_utilization_chart(results, output_dir): """创建资源利用率对比图表""" fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 10)) # GPU 利用率对比 pytorch_gpu = results['pytorch']['single_inference']['avg_gpu_util'] tensorrt_gpu = results['tensorrt']['single_inference']['avg_gpu_util'] engines = ['PyTorch', 'TensorRT'] gpu_utils = [pytorch_gpu, tensorrt_gpu] colors = ['#FF6B6B', '#4ECDC4'] bars1 = ax1.bar(engines, gpu_utils, color=colors, alpha=0.8) ax1.set_title('GPU 利用率对比', fontsize=12, fontweight='bold') ax1.set_ylabel('GPU 利用率 (%)', fontsize=10) ax1.grid(True, alpha=0.3) for bar, value in zip(bars1, gpu_utils): height = bar.get_height() ax1.text(bar.get_x() + bar.get_width()/2., height + 0.5, f'{value:.1f}%', ha='center', va='bottom', fontweight='bold') # GPU 内存使用对比 pytorch_mem = results['pytorch']['single_inference']['avg_gpu_memory_mb'] tensorrt_mem = results['tensorrt']['single_inference']['avg_gpu_memory_mb'] gpu_mems = [pytorch_mem, tensorrt_mem] bars2 = ax2.bar(engines, gpu_mems, color=colors, alpha=0.8) ax2.set_title('GPU 内存使用对比', fontsize=12, fontweight='bold') ax2.set_ylabel('GPU 内存 (MB)', fontsize=10) ax2.grid(True, alpha=0.3) for bar, value in zip(bars2, gpu_mems): height = bar.get_height() ax2.text(bar.get_x() + bar.get_width()/2., height + 10, f'{value:.0f}MB', ha='center', va='bottom', fontweight='bold') # CPU 利用率对比 pytorch_cpu = results['pytorch']['single_inference']['avg_cpu_util'] tensorrt_cpu = results['tensorrt']['single_inference']['avg_cpu_util'] cpu_utils = [pytorch_cpu, tensorrt_cpu] bars3 = ax3.bar(engines, cpu_utils, color=colors, alpha=0.8) ax3.set_title('CPU 利用率对比', fontsize=12, fontweight='bold') ax3.set_ylabel('CPU 利用率 (%)', fontsize=10) ax3.grid(True, alpha=0.3) for bar, value in zip(bars3, cpu_utils): height = bar.get_height() ax3.text(bar.get_x() + bar.get_width()/2., height + 0.2, f'{value:.1f}%', ha='center', va='bottom', fontweight='bold') # 综合性能效率(FPS/GPU利用率) pytorch_efficiency = pytorch_single_fps / pytorch_gpu if pytorch_gpu > 0 else 0 tensorrt_efficiency = tensorrt_single_fps / tensorrt_gpu if tensorrt_gpu > 0 else 0 efficiencies = [pytorch_efficiency, tensorrt_efficiency] bars4 = ax4.bar(engines, efficiencies, color=colors, alpha=0.8) ax4.set_title('性能效率对比 (FPS/GPU利用率)', fontsize=12, fontweight='bold') ax4.set_ylabel('效率 (FPS/%)', fontsize=10) ax4.grid(True, alpha=0.3) for bar, value in zip(bars4, efficiencies): height = bar.get_height() ax4.text(bar.get_x() + bar.get_width()/2., height + 0.05, f'{value:.1f}', ha='center', va='bottom', fontweight='bold') plt.tight_layout() plt.savefig(os.path.join(output_dir, 'resource_utilization.png'), dpi=300, bbox_inches='tight') plt.show() def generate_summary_report(results, output_dir): """生成总结报告""" pytorch_single = results['pytorch']['single_inference'] tensorrt_single = results['tensorrt']['single_inference'] # 计算性能提升 fps_improvement = (tensorrt_single['avg_fps'] - pytorch_single['avg_fps']) / pytorch_single['avg_fps'] * 100 latency_improvement = (pytorch_single['avg_latency_ms'] - tensorrt_single['avg_latency_ms']) / pytorch_single['avg_latency_ms'] * 100 gpu_util_change = tensorrt_single['avg_gpu_util'] - pytorch_single['avg_gpu_util'] report = f""" YOLOv11 性能对比测试总结报告 {'='*50} 测试时间: {results['timestamp']} 模型路径: {results['model_path']} 单帧推理性能对比: {'='*30} PyTorch: - 平均FPS: {pytorch_single['avg_fps']:.1f} - 平均延迟: {pytorch_single['avg_latency_ms']:.1f}ms - GPU利用率: {pytorch_single['avg_gpu_util']:.1f}% - GPU内存: {pytorch_single['avg_gpu_memory_mb']:.0f}MB TensorRT: - 平均FPS: {tensorrt_single['avg_fps']:.1f} - 平均延迟: {tensorrt_single['avg_latency_ms']:.1f}ms - GPU利用率: {tensorrt_single['avg_gpu_util']:.1f}% - GPU内存: {tensorrt_single['avg_gpu_memory_mb']:.0f}MB 性能提升: {'='*30} 🚀 FPS 提升: {fps_improvement:.1f}% ⚡ 延迟减少: {latency_improvement:.1f}% 📊 GPU利用率变化: {gpu_util_change:+.1f}% 批量推理性能 (PyTorch): {'='*30}""" if 'batch_inference' in results['pytorch']: for batch_result in results['pytorch']['batch_inference']: batch_size = batch_result['batch_size'] batch_fps = batch_result['avg_fps'] batch_latency = batch_result['avg_latency_ms'] report += f"\n批次大小 {batch_size}: {batch_fps:.1f} FPS, {batch_latency:.1f}ms" if 'concurrent_streams' in results['pytorch']: report += f"\n\n并发性能 (PyTorch):\n{'='*30}" for conc_result in results['pytorch']['concurrent_streams']: conc_count = conc_result['concurrent_streams'] total_fps = conc_result['avg_fps'] * conc_count single_fps = conc_result['avg_fps'] report += f"\n{conc_count}路并发: 总FPS {total_fps:.1f}, 单流FPS {single_fps:.1f}" report += f""" 推荐配置: {'='*30} ✅ 单摄像头场景: 推荐使用 TensorRT (性能提升 {fps_improvement:.1f}%) ✅ 多摄像头场景: 需要根据并发数量选择合适的引擎 ✅ 资源受限环境: TensorRT 在相同性能下GPU利用率更低 注意事项: {'='*30} ⚠️ TensorRT 引擎需要预先导出,首次导出耗时较长 ⚠️ TensorRT 批量推理需要固定批次大小的引擎 ⚠️ 实际部署时需要考虑模型加载时间和内存占用 """ # 保存报告 report_file = os.path.join(output_dir, 'performance_summary.txt') with open(report_file, 'w', encoding='utf-8') as f: f.write(report) print(report) print(f"\n📁 总结报告已保存: {report_file}") def main(): """主函数""" # 查找最新的测试结果文件 results_dir = "benchmark_results" if not os.path.exists(results_dir): print("❌ 未找到测试结果目录") return json_files = [f for f in os.listdir(results_dir) if f.startswith('benchmark_results_') and f.endswith('.json')] if not json_files: print("❌ 未找到测试结果文件") return # 使用最新的结果文件 latest_file = sorted(json_files)[-1] json_path = os.path.join(results_dir, latest_file) print(f"📊 加载测试结果: {json_path}") results = load_results(json_path) # 创建可视化输出目录 viz_dir = os.path.join(results_dir, "visualizations") os.makedirs(viz_dir, exist_ok=True) print("🎨 生成可视化图表...") # 生成各种图表 create_fps_comparison_chart(results, viz_dir) create_latency_comparison_chart(results, viz_dir) create_concurrent_performance_chart(results, viz_dir) create_resource_utilization_chart(results, viz_dir) # 生成总结报告 generate_summary_report(results, viz_dir) print(f"\n✅ 所有可视化图表已生成完成!") print(f"📁 输出目录: {viz_dir}") if __name__ == "__main__": main()