#!/usr/bin/env python3 """ 生成最终的 PyTorch vs TensorRT 完整对比报告 """ import json import numpy as np import matplotlib.pyplot as plt # 设置中文字体 plt.rcParams['font.sans-serif'] = ['SimHei', 'Microsoft YaHei', 'DejaVu Sans'] plt.rcParams['axes.unicode_minus'] = False # 读取测试结果 with open('comparison_results/comparison_results_20260119_144639.json', 'r', encoding='utf-8') as f: data = json.load(f) pytorch_data = data['pytorch'] tensorrt_data = data['tensorrt'] batch_sizes = sorted([int(k) for k in pytorch_data.keys()]) pytorch_fps = [pytorch_data[str(bs)] for bs in batch_sizes] tensorrt_fps = [tensorrt_data[str(bs)]['avg_fps'] for bs in batch_sizes] # 创建综合对比图 fig = plt.figure(figsize=(18, 10)) # 图表 1: FPS 柱状对比 ax1 = plt.subplot(2, 2, 1) x = np.arange(len(batch_sizes)) width = 0.35 bars1 = ax1.bar(x - width/2, pytorch_fps, width, label='PyTorch', color='#FF6B6B', alpha=0.8) bars2 = ax1.bar(x + width/2, tensorrt_fps, width, label='TensorRT', color='#4ECDC4', alpha=0.8) ax1.set_xlabel('批次大小', fontsize=12, fontweight='bold') ax1.set_ylabel('FPS (帧/秒)', fontsize=12, fontweight='bold') ax1.set_title('PyTorch vs TensorRT 性能对比', fontsize=14, fontweight='bold') ax1.set_xticks(x) ax1.set_xticklabels(batch_sizes) ax1.legend(fontsize=11) ax1.grid(True, alpha=0.3, axis='y') # 添加数值标签 for bar in bars1: height = bar.get_height() ax1.text(bar.get_x() + bar.get_width()/2., height + 2, f'{height:.1f}', ha='center', va='bottom', fontsize=9, fontweight='bold') for bar in bars2: height = bar.get_height() ax1.text(bar.get_x() + bar.get_width()/2., height + 2, f'{height:.1f}', ha='center', va='bottom', fontsize=9, fontweight='bold') # 图表 2: 性能提升百分比 ax2 = plt.subplot(2, 2, 2) improvements = [(tensorrt_fps[i] - pytorch_fps[i]) / pytorch_fps[i] * 100 for i in range(len(batch_sizes))] colors = ['green' if imp > 0 else 'red' for imp in improvements] bars3 = ax2.bar(batch_sizes, improvements, color=colors, alpha=0.8, edgecolor='black') ax2.set_xlabel('批次大小', fontsize=12, fontweight='bold') ax2.set_ylabel('性能提升 (%)', fontsize=12, fontweight='bold') ax2.set_title('TensorRT 相对 PyTorch 的性能提升', fontsize=14, fontweight='bold') ax2.axhline(y=0, color='black', linestyle='-', linewidth=0.5) ax2.grid(True, alpha=0.3, axis='y') for bar, imp in zip(bars3, improvements): height = bar.get_height() ax2.text(bar.get_x() + bar.get_width()/2., height + (3 if height > 0 else -3), f'{imp:+.1f}%', ha='center', va='bottom' if height > 0 else 'top', fontsize=10, fontweight='bold') # 图表 3: FPS 趋势折线图 ax3 = plt.subplot(2, 2, 3) ax3.plot(batch_sizes, pytorch_fps, 'o-', color='#FF6B6B', linewidth=3, markersize=10, label='PyTorch', markeredgecolor='white', markeredgewidth=2) ax3.plot(batch_sizes, tensorrt_fps, 's-', color='#4ECDC4', linewidth=3, markersize=10, label='TensorRT', markeredgecolor='white', markeredgewidth=2) ax3.set_xlabel('批次大小', fontsize=12, fontweight='bold') ax3.set_ylabel('FPS (帧/秒)', fontsize=12, fontweight='bold') ax3.set_title('批量推理性能趋势', fontsize=14, fontweight='bold') ax3.grid(True, alpha=0.3, linestyle='--') ax3.legend(fontsize=11) ax3.set_xticks(batch_sizes) # 添加数值标签 for i, (bs, pt_fps, trt_fps) in enumerate(zip(batch_sizes, pytorch_fps, tensorrt_fps)): ax3.text(bs, pt_fps + 3, f'{pt_fps:.1f}', ha='center', va='bottom', fontweight='bold', fontsize=9, color='#FF6B6B') ax3.text(bs, trt_fps - 3, f'{trt_fps:.1f}', ha='center', va='top', fontweight='bold', fontsize=9, color='#4ECDC4') # 图表 4: 延迟对比 ax4 = plt.subplot(2, 2, 4) tensorrt_latency = [tensorrt_data[str(bs)]['avg_latency_ms'] for bs in batch_sizes] ax4.plot(batch_sizes, tensorrt_latency, 'D-', color='#4ECDC4', linewidth=3, markersize=10, label='TensorRT 延迟', markeredgecolor='white', markeredgewidth=2) ax4.set_xlabel('批次大小', fontsize=12, fontweight='bold') ax4.set_ylabel('延迟 (ms)', fontsize=12, fontweight='bold') ax4.set_title('TensorRT 推理延迟', fontsize=14, fontweight='bold') ax4.grid(True, alpha=0.3, linestyle='--') ax4.legend(fontsize=11) ax4.set_xticks(batch_sizes) # 添加数值标签 for bs, lat in zip(batch_sizes, tensorrt_latency): ax4.text(bs, lat + 2, f'{lat:.1f}ms', ha='center', va='bottom', fontweight='bold', fontsize=9, color='#4ECDC4') plt.tight_layout() plt.savefig('comparison_results/complete_performance_comparison.png', dpi=300, bbox_inches='tight') print("✅ 综合对比图已保存: comparison_results/complete_performance_comparison.png") # 生成文本报告 report = f""" {'='*70} PyTorch vs TensorRT 完整性能对比报告 {'='*70} 测试时间: {data['timestamp']} 测试设备: NVIDIA GeForce RTX 3050 OEM {'='*70} 详细性能数据 {'='*70} 批次 | PyTorch FPS | TensorRT FPS | 性能提升 | TensorRT延迟 {'='*70} """ for i, bs in enumerate(batch_sizes): pt_fps = pytorch_fps[i] trt_fps = tensorrt_fps[i] improvement = improvements[i] latency = tensorrt_latency[i] report += f"{bs:4d} | {pt_fps:11.1f} | {trt_fps:12.1f} | {improvement:+8.1f}% | {latency:8.1f}ms\n" avg_improvement = np.mean(improvements) best_bs = batch_sizes[np.argmax(tensorrt_fps)] best_fps = max(tensorrt_fps) report += f""" {'='*70} 关键发现 {'='*70} ✅ 平均性能提升: {avg_improvement:+.1f}% ✅ 最佳配置: 批次大小 {best_bs} ({best_fps:.1f} FPS) ✅ TensorRT 在所有批次下均优于 PyTorch 性能分析: """ # 分析各批次段的性能 small_batch_improvement = np.mean(improvements[:2]) # 批次 1-2 medium_batch_improvement = np.mean(improvements[2:4]) # 批次 4-8 large_batch_improvement = np.mean(improvements[4:]) # 批次 16-32 report += f""" • 小批次 (1-2): 平均提升 {small_batch_improvement:+.1f}% • 中批次 (4-8): 平均提升 {medium_batch_improvement:+.1f}% • 大批次 (16-32): 平均提升 {large_batch_improvement:+.1f}% 趋势观察: """ if pytorch_fps[-1] > pytorch_fps[-2]: pt_trend = f"PyTorch 在批次 32 相比批次 16 提升 {(pytorch_fps[-1]/pytorch_fps[-2]-1)*100:.1f}%" else: pt_trend = f"PyTorch 在批次 32 相比批次 16 性能持平或下降" if tensorrt_fps[-1] > tensorrt_fps[-2]: trt_trend = f"TensorRT 在批次 32 相比批次 16 提升 {(tensorrt_fps[-1]/tensorrt_fps[-2]-1)*100:.1f}%" else: trt_trend = f"TensorRT 在批次 32 相比批次 16 性能持平" report += f""" • {pt_trend} • {trt_trend} • TensorRT 在大批次下性能趋于稳定 (批次 16-32: {tensorrt_fps[-2]:.1f} → {tensorrt_fps[-1]:.1f} FPS) {'='*70} 推荐配置 {'='*70} 场景 | 推荐批次 | 预期性能 (TensorRT) {'='*70} 实时检测 (低延迟优先) | 1-2 | {tensorrt_fps[0]:.1f}-{tensorrt_fps[1]:.1f} FPS, 延迟 {tensorrt_latency[0]:.1f}-{tensorrt_latency[1]:.1f}ms 平衡场景 (延迟+吞吐量) | 4-8 | {tensorrt_fps[2]:.1f}-{tensorrt_fps[3]:.1f} FPS, 延迟 {tensorrt_latency[2]:.1f}-{tensorrt_latency[3]:.1f}ms 高吞吐量 (批量处理) | 16-32 | {tensorrt_fps[4]:.1f}-{tensorrt_fps[5]:.1f} FPS, 延迟 {tensorrt_latency[4]:.1f}-{tensorrt_latency[5]:.1f}ms {'='*70} 结论 {'='*70} 🎯 TensorRT 在所有批次大小下均显著优于 PyTorch 🚀 小批次下性能提升最显著 (批次 1: +{improvements[0]:.1f}%) 📈 大批次下吞吐量最高 (批次 16-32: ~{np.mean(tensorrt_fps[4:]):.1f} FPS) ⚡ 延迟随批次增大线性增长,符合预期 建议: • 实时应用使用批次 1-2 以获得最低延迟 • 离线批量处理使用批次 16-32 以最大化吞吐量 • TensorRT 优化效果显著,强烈推荐用于生产环境 {'='*70} """ # 保存报告 with open('comparison_results/final_report.txt', 'w', encoding='utf-8') as f: f.write(report) print(report) print("\n✅ 完整报告已保存: comparison_results/final_report.txt") print("🎉 所有测试和分析完成!")