349 lines
14 KiB
Python
349 lines
14 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
性能测试结果可视化脚本
|
||
生成 PyTorch vs TensorRT 性能对比图表
|
||
"""
|
||
|
||
import json
|
||
import matplotlib.pyplot as plt
|
||
import numpy as np
|
||
import os
|
||
from datetime import datetime
|
||
|
||
# 设置中文字体
|
||
plt.rcParams['font.sans-serif'] = ['SimHei', 'Microsoft YaHei', 'DejaVu Sans']
|
||
plt.rcParams['axes.unicode_minus'] = False
|
||
|
||
def load_results(json_file):
|
||
"""加载测试结果"""
|
||
with open(json_file, 'r', encoding='utf-8') as f:
|
||
return json.load(f)
|
||
|
||
def create_fps_comparison_chart(results, output_dir):
|
||
"""创建 FPS 对比图表"""
|
||
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
|
||
|
||
# 单帧推理 FPS 对比
|
||
pytorch_single_fps = results['pytorch']['single_inference']['avg_fps']
|
||
tensorrt_single_fps = results['tensorrt']['single_inference']['avg_fps']
|
||
|
||
engines = ['PyTorch', 'TensorRT']
|
||
fps_values = [pytorch_single_fps, tensorrt_single_fps]
|
||
colors = ['#FF6B6B', '#4ECDC4']
|
||
|
||
bars1 = ax1.bar(engines, fps_values, color=colors, alpha=0.8)
|
||
ax1.set_title('单帧推理性能对比', fontsize=14, fontweight='bold')
|
||
ax1.set_ylabel('FPS (帧/秒)', fontsize=12)
|
||
ax1.grid(True, alpha=0.3)
|
||
|
||
# 添加数值标签
|
||
for bar, value in zip(bars1, fps_values):
|
||
height = bar.get_height()
|
||
ax1.text(bar.get_x() + bar.get_width()/2., height + 1,
|
||
f'{value:.1f}', ha='center', va='bottom', fontweight='bold')
|
||
|
||
# 性能提升百分比
|
||
improvement = (tensorrt_single_fps - pytorch_single_fps) / pytorch_single_fps * 100
|
||
ax1.text(0.5, max(fps_values) * 0.8, f'TensorRT 提升: {improvement:.1f}%',
|
||
ha='center', transform=ax1.transData, fontsize=12,
|
||
bbox=dict(boxstyle="round,pad=0.3", facecolor="yellow", alpha=0.7))
|
||
|
||
# 批量推理 FPS 对比(仅显示 PyTorch 的批量结果,因为 TensorRT 批量测试失败)
|
||
if 'batch_inference' in results['pytorch']:
|
||
batch_sizes = []
|
||
pytorch_batch_fps = []
|
||
|
||
for batch_result in results['pytorch']['batch_inference']:
|
||
batch_sizes.append(batch_result['batch_size'])
|
||
pytorch_batch_fps.append(batch_result['avg_fps'])
|
||
|
||
ax2.plot(batch_sizes, pytorch_batch_fps, 'o-', color='#FF6B6B',
|
||
linewidth=2, markersize=8, label='PyTorch')
|
||
ax2.axhline(y=tensorrt_single_fps, color='#4ECDC4', linestyle='--',
|
||
linewidth=2, label='TensorRT (单帧)')
|
||
|
||
ax2.set_title('批量推理性能 (PyTorch)', fontsize=14, fontweight='bold')
|
||
ax2.set_xlabel('批次大小', fontsize=12)
|
||
ax2.set_ylabel('FPS (帧/秒)', fontsize=12)
|
||
ax2.grid(True, alpha=0.3)
|
||
ax2.legend()
|
||
|
||
# 添加数值标签
|
||
for i, (batch_size, fps) in enumerate(zip(batch_sizes, pytorch_batch_fps)):
|
||
ax2.text(batch_size, fps + 2, f'{fps:.1f}', ha='center', va='bottom')
|
||
|
||
plt.tight_layout()
|
||
plt.savefig(os.path.join(output_dir, 'fps_comparison.png'), dpi=300, bbox_inches='tight')
|
||
plt.show()
|
||
|
||
def create_latency_comparison_chart(results, output_dir):
|
||
"""创建延迟对比图表"""
|
||
fig, ax = plt.subplots(1, 1, figsize=(10, 6))
|
||
|
||
# 单帧推理延迟对比
|
||
pytorch_latency = results['pytorch']['single_inference']['avg_latency_ms']
|
||
tensorrt_latency = results['tensorrt']['single_inference']['avg_latency_ms']
|
||
|
||
engines = ['PyTorch', 'TensorRT']
|
||
latency_values = [pytorch_latency, tensorrt_latency]
|
||
colors = ['#FF6B6B', '#4ECDC4']
|
||
|
||
bars = ax.bar(engines, latency_values, color=colors, alpha=0.8)
|
||
ax.set_title('推理延迟对比', fontsize=14, fontweight='bold')
|
||
ax.set_ylabel('延迟 (毫秒)', fontsize=12)
|
||
ax.grid(True, alpha=0.3)
|
||
|
||
# 添加数值标签
|
||
for bar, value in zip(bars, latency_values):
|
||
height = bar.get_height()
|
||
ax.text(bar.get_x() + bar.get_width()/2., height + 0.1,
|
||
f'{value:.1f}ms', ha='center', va='bottom', fontweight='bold')
|
||
|
||
# 延迟改善百分比
|
||
improvement = (pytorch_latency - tensorrt_latency) / pytorch_latency * 100
|
||
ax.text(0.5, max(latency_values) * 0.8, f'TensorRT 延迟减少: {improvement:.1f}%',
|
||
ha='center', transform=ax.transData, fontsize=12,
|
||
bbox=dict(boxstyle="round,pad=0.3", facecolor="lightgreen", alpha=0.7))
|
||
|
||
plt.tight_layout()
|
||
plt.savefig(os.path.join(output_dir, 'latency_comparison.png'), dpi=300, bbox_inches='tight')
|
||
plt.show()
|
||
|
||
def create_concurrent_performance_chart(results, output_dir):
|
||
"""创建并发性能图表"""
|
||
if 'concurrent_streams' not in results['pytorch']:
|
||
return
|
||
|
||
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
|
||
|
||
# 提取并发数据
|
||
concurrent_counts = []
|
||
pytorch_total_fps = []
|
||
pytorch_single_fps = []
|
||
|
||
for result in results['pytorch']['concurrent_streams']:
|
||
concurrent_counts.append(result['concurrent_streams'])
|
||
pytorch_total_fps.append(result['avg_fps'] * result['concurrent_streams'])
|
||
pytorch_single_fps.append(result['avg_fps'])
|
||
|
||
# 总 FPS 对比
|
||
ax1.plot(concurrent_counts, pytorch_total_fps, 'o-', color='#FF6B6B',
|
||
linewidth=2, markersize=8, label='PyTorch 总FPS')
|
||
|
||
# TensorRT 理论总FPS(基于单帧性能)
|
||
tensorrt_single_fps = results['tensorrt']['single_inference']['avg_fps']
|
||
tensorrt_theoretical_fps = [tensorrt_single_fps * count for count in concurrent_counts]
|
||
ax1.plot(concurrent_counts, tensorrt_theoretical_fps, '--', color='#4ECDC4',
|
||
linewidth=2, label='TensorRT 理论总FPS')
|
||
|
||
ax1.set_title('并发总FPS性能', fontsize=14, fontweight='bold')
|
||
ax1.set_xlabel('并发摄像头数量', fontsize=12)
|
||
ax1.set_ylabel('总FPS (帧/秒)', fontsize=12)
|
||
ax1.grid(True, alpha=0.3)
|
||
ax1.legend()
|
||
|
||
# 单流平均 FPS
|
||
ax2.plot(concurrent_counts, pytorch_single_fps, 'o-', color='#FF6B6B',
|
||
linewidth=2, markersize=8, label='PyTorch 单流FPS')
|
||
ax2.axhline(y=tensorrt_single_fps, color='#4ECDC4', linestyle='--',
|
||
linewidth=2, label='TensorRT 单流FPS')
|
||
|
||
ax2.set_title('单流平均FPS性能', fontsize=14, fontweight='bold')
|
||
ax2.set_xlabel('并发摄像头数量', fontsize=12)
|
||
ax2.set_ylabel('单流FPS (帧/秒)', fontsize=12)
|
||
ax2.grid(True, alpha=0.3)
|
||
ax2.legend()
|
||
|
||
plt.tight_layout()
|
||
plt.savefig(os.path.join(output_dir, 'concurrent_performance.png'), dpi=300, bbox_inches='tight')
|
||
plt.show()
|
||
|
||
def create_resource_utilization_chart(results, output_dir):
|
||
"""创建资源利用率对比图表"""
|
||
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 10))
|
||
|
||
# GPU 利用率对比
|
||
pytorch_gpu = results['pytorch']['single_inference']['avg_gpu_util']
|
||
tensorrt_gpu = results['tensorrt']['single_inference']['avg_gpu_util']
|
||
|
||
engines = ['PyTorch', 'TensorRT']
|
||
gpu_utils = [pytorch_gpu, tensorrt_gpu]
|
||
colors = ['#FF6B6B', '#4ECDC4']
|
||
|
||
bars1 = ax1.bar(engines, gpu_utils, color=colors, alpha=0.8)
|
||
ax1.set_title('GPU 利用率对比', fontsize=12, fontweight='bold')
|
||
ax1.set_ylabel('GPU 利用率 (%)', fontsize=10)
|
||
ax1.grid(True, alpha=0.3)
|
||
|
||
for bar, value in zip(bars1, gpu_utils):
|
||
height = bar.get_height()
|
||
ax1.text(bar.get_x() + bar.get_width()/2., height + 0.5,
|
||
f'{value:.1f}%', ha='center', va='bottom', fontweight='bold')
|
||
|
||
# GPU 内存使用对比
|
||
pytorch_mem = results['pytorch']['single_inference']['avg_gpu_memory_mb']
|
||
tensorrt_mem = results['tensorrt']['single_inference']['avg_gpu_memory_mb']
|
||
|
||
gpu_mems = [pytorch_mem, tensorrt_mem]
|
||
bars2 = ax2.bar(engines, gpu_mems, color=colors, alpha=0.8)
|
||
ax2.set_title('GPU 内存使用对比', fontsize=12, fontweight='bold')
|
||
ax2.set_ylabel('GPU 内存 (MB)', fontsize=10)
|
||
ax2.grid(True, alpha=0.3)
|
||
|
||
for bar, value in zip(bars2, gpu_mems):
|
||
height = bar.get_height()
|
||
ax2.text(bar.get_x() + bar.get_width()/2., height + 10,
|
||
f'{value:.0f}MB', ha='center', va='bottom', fontweight='bold')
|
||
|
||
# CPU 利用率对比
|
||
pytorch_cpu = results['pytorch']['single_inference']['avg_cpu_util']
|
||
tensorrt_cpu = results['tensorrt']['single_inference']['avg_cpu_util']
|
||
|
||
cpu_utils = [pytorch_cpu, tensorrt_cpu]
|
||
bars3 = ax3.bar(engines, cpu_utils, color=colors, alpha=0.8)
|
||
ax3.set_title('CPU 利用率对比', fontsize=12, fontweight='bold')
|
||
ax3.set_ylabel('CPU 利用率 (%)', fontsize=10)
|
||
ax3.grid(True, alpha=0.3)
|
||
|
||
for bar, value in zip(bars3, cpu_utils):
|
||
height = bar.get_height()
|
||
ax3.text(bar.get_x() + bar.get_width()/2., height + 0.2,
|
||
f'{value:.1f}%', ha='center', va='bottom', fontweight='bold')
|
||
|
||
# 综合性能效率(FPS/GPU利用率)
|
||
pytorch_efficiency = pytorch_single_fps / pytorch_gpu if pytorch_gpu > 0 else 0
|
||
tensorrt_efficiency = tensorrt_single_fps / tensorrt_gpu if tensorrt_gpu > 0 else 0
|
||
|
||
efficiencies = [pytorch_efficiency, tensorrt_efficiency]
|
||
bars4 = ax4.bar(engines, efficiencies, color=colors, alpha=0.8)
|
||
ax4.set_title('性能效率对比 (FPS/GPU利用率)', fontsize=12, fontweight='bold')
|
||
ax4.set_ylabel('效率 (FPS/%)', fontsize=10)
|
||
ax4.grid(True, alpha=0.3)
|
||
|
||
for bar, value in zip(bars4, efficiencies):
|
||
height = bar.get_height()
|
||
ax4.text(bar.get_x() + bar.get_width()/2., height + 0.05,
|
||
f'{value:.1f}', ha='center', va='bottom', fontweight='bold')
|
||
|
||
plt.tight_layout()
|
||
plt.savefig(os.path.join(output_dir, 'resource_utilization.png'), dpi=300, bbox_inches='tight')
|
||
plt.show()
|
||
|
||
def generate_summary_report(results, output_dir):
|
||
"""生成总结报告"""
|
||
pytorch_single = results['pytorch']['single_inference']
|
||
tensorrt_single = results['tensorrt']['single_inference']
|
||
|
||
# 计算性能提升
|
||
fps_improvement = (tensorrt_single['avg_fps'] - pytorch_single['avg_fps']) / pytorch_single['avg_fps'] * 100
|
||
latency_improvement = (pytorch_single['avg_latency_ms'] - tensorrt_single['avg_latency_ms']) / pytorch_single['avg_latency_ms'] * 100
|
||
gpu_util_change = tensorrt_single['avg_gpu_util'] - pytorch_single['avg_gpu_util']
|
||
|
||
report = f"""
|
||
YOLOv11 性能对比测试总结报告
|
||
{'='*50}
|
||
|
||
测试时间: {results['timestamp']}
|
||
模型路径: {results['model_path']}
|
||
|
||
单帧推理性能对比:
|
||
{'='*30}
|
||
PyTorch:
|
||
- 平均FPS: {pytorch_single['avg_fps']:.1f}
|
||
- 平均延迟: {pytorch_single['avg_latency_ms']:.1f}ms
|
||
- GPU利用率: {pytorch_single['avg_gpu_util']:.1f}%
|
||
- GPU内存: {pytorch_single['avg_gpu_memory_mb']:.0f}MB
|
||
|
||
TensorRT:
|
||
- 平均FPS: {tensorrt_single['avg_fps']:.1f}
|
||
- 平均延迟: {tensorrt_single['avg_latency_ms']:.1f}ms
|
||
- GPU利用率: {tensorrt_single['avg_gpu_util']:.1f}%
|
||
- GPU内存: {tensorrt_single['avg_gpu_memory_mb']:.0f}MB
|
||
|
||
性能提升:
|
||
{'='*30}
|
||
🚀 FPS 提升: {fps_improvement:.1f}%
|
||
⚡ 延迟减少: {latency_improvement:.1f}%
|
||
📊 GPU利用率变化: {gpu_util_change:+.1f}%
|
||
|
||
批量推理性能 (PyTorch):
|
||
{'='*30}"""
|
||
|
||
if 'batch_inference' in results['pytorch']:
|
||
for batch_result in results['pytorch']['batch_inference']:
|
||
batch_size = batch_result['batch_size']
|
||
batch_fps = batch_result['avg_fps']
|
||
batch_latency = batch_result['avg_latency_ms']
|
||
report += f"\n批次大小 {batch_size}: {batch_fps:.1f} FPS, {batch_latency:.1f}ms"
|
||
|
||
if 'concurrent_streams' in results['pytorch']:
|
||
report += f"\n\n并发性能 (PyTorch):\n{'='*30}"
|
||
for conc_result in results['pytorch']['concurrent_streams']:
|
||
conc_count = conc_result['concurrent_streams']
|
||
total_fps = conc_result['avg_fps'] * conc_count
|
||
single_fps = conc_result['avg_fps']
|
||
report += f"\n{conc_count}路并发: 总FPS {total_fps:.1f}, 单流FPS {single_fps:.1f}"
|
||
|
||
report += f"""
|
||
|
||
推荐配置:
|
||
{'='*30}
|
||
✅ 单摄像头场景: 推荐使用 TensorRT (性能提升 {fps_improvement:.1f}%)
|
||
✅ 多摄像头场景: 需要根据并发数量选择合适的引擎
|
||
✅ 资源受限环境: TensorRT 在相同性能下GPU利用率更低
|
||
|
||
注意事项:
|
||
{'='*30}
|
||
⚠️ TensorRT 引擎需要预先导出,首次导出耗时较长
|
||
⚠️ TensorRT 批量推理需要固定批次大小的引擎
|
||
⚠️ 实际部署时需要考虑模型加载时间和内存占用
|
||
"""
|
||
|
||
# 保存报告
|
||
report_file = os.path.join(output_dir, 'performance_summary.txt')
|
||
with open(report_file, 'w', encoding='utf-8') as f:
|
||
f.write(report)
|
||
|
||
print(report)
|
||
print(f"\n📁 总结报告已保存: {report_file}")
|
||
|
||
def main():
|
||
"""主函数"""
|
||
# 查找最新的测试结果文件
|
||
results_dir = "benchmark_results"
|
||
if not os.path.exists(results_dir):
|
||
print("❌ 未找到测试结果目录")
|
||
return
|
||
|
||
json_files = [f for f in os.listdir(results_dir) if f.startswith('benchmark_results_') and f.endswith('.json')]
|
||
if not json_files:
|
||
print("❌ 未找到测试结果文件")
|
||
return
|
||
|
||
# 使用最新的结果文件
|
||
latest_file = sorted(json_files)[-1]
|
||
json_path = os.path.join(results_dir, latest_file)
|
||
|
||
print(f"📊 加载测试结果: {json_path}")
|
||
results = load_results(json_path)
|
||
|
||
# 创建可视化输出目录
|
||
viz_dir = os.path.join(results_dir, "visualizations")
|
||
os.makedirs(viz_dir, exist_ok=True)
|
||
|
||
print("🎨 生成可视化图表...")
|
||
|
||
# 生成各种图表
|
||
create_fps_comparison_chart(results, viz_dir)
|
||
create_latency_comparison_chart(results, viz_dir)
|
||
create_concurrent_performance_chart(results, viz_dir)
|
||
create_resource_utilization_chart(results, viz_dir)
|
||
|
||
# 生成总结报告
|
||
generate_summary_report(results, viz_dir)
|
||
|
||
print(f"\n✅ 所有可视化图表已生成完成!")
|
||
print(f"📁 输出目录: {viz_dir}")
|
||
|
||
if __name__ == "__main__":
|
||
main() |