452 lines
16 KiB
Python
452 lines
16 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
PyTorch vs TensorRT 批次性能对比测试
|
||
基于已有的 PyTorch 数据,测试 TensorRT 性能并生成对比图表
|
||
"""
|
||
|
||
import os
|
||
import time
|
||
import json
|
||
import numpy as np
|
||
import torch
|
||
import matplotlib.pyplot as plt
|
||
from datetime import datetime
|
||
from ultralytics import YOLO
|
||
|
||
# 设置中文字体
|
||
plt.rcParams['font.sans-serif'] = ['SimHei', 'Microsoft YaHei', 'DejaVu Sans']
|
||
plt.rcParams['axes.unicode_minus'] = False
|
||
|
||
# PyTorch 已有数据(从图表中提取 + 新测试数据)
|
||
PYTORCH_DATA = {
|
||
1: 64.4,
|
||
2: 91.2,
|
||
4: 122.8,
|
||
8: 131.4,
|
||
16: 145.9, # 新测试数据
|
||
32: 147.8 # 新测试数据
|
||
}
|
||
|
||
def test_tensorrt_batch_performance(engine_path, batch_sizes, test_duration=20):
|
||
"""测试 TensorRT 批次性能"""
|
||
print("🚀 开始测试 TensorRT 批次性能")
|
||
print("=" * 60)
|
||
|
||
# 加载 TensorRT 引擎
|
||
print(f"📦 加载 TensorRT 引擎: {engine_path}")
|
||
model = YOLO(engine_path)
|
||
print("✅ 引擎加载成功")
|
||
|
||
results = {}
|
||
|
||
for batch_size in batch_sizes:
|
||
print(f"\n🔄 测试批次大小: {batch_size} (测试时长: {test_duration}秒)")
|
||
|
||
try:
|
||
# 预热
|
||
print("🔥 预热中...")
|
||
for _ in range(5):
|
||
test_images = [np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||
for _ in range(batch_size)]
|
||
model(test_images, verbose=False)
|
||
|
||
# 正式测试
|
||
fps_list = []
|
||
latency_list = []
|
||
batch_count = 0
|
||
|
||
start_time = time.time()
|
||
last_fps_time = start_time
|
||
fps_batch_count = 0
|
||
|
||
while time.time() - start_time < test_duration:
|
||
# 生成测试数据
|
||
test_images = [np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||
for _ in range(batch_size)]
|
||
|
||
# 推理
|
||
infer_start = time.time()
|
||
model(test_images, verbose=False)
|
||
infer_end = time.time()
|
||
|
||
latency_ms = (infer_end - infer_start) * 1000
|
||
latency_list.append(latency_ms)
|
||
|
||
batch_count += 1
|
||
fps_batch_count += 1
|
||
|
||
# 每秒计算一次 FPS
|
||
current_time = time.time()
|
||
if current_time - last_fps_time >= 1.0:
|
||
fps = (fps_batch_count * batch_size) / (current_time - last_fps_time)
|
||
fps_list.append(fps)
|
||
fps_batch_count = 0
|
||
last_fps_time = current_time
|
||
|
||
# 显示进度
|
||
elapsed = current_time - start_time
|
||
print(f" 进度: {elapsed:.1f}s/{test_duration}s, "
|
||
f"当前FPS: {fps:.1f}, 延迟: {latency_ms:.1f}ms")
|
||
|
||
# 计算结果
|
||
total_time = time.time() - start_time
|
||
total_frames = batch_count * batch_size
|
||
|
||
avg_fps = np.mean(fps_list) if fps_list else 0
|
||
avg_latency_ms = np.mean(latency_list)
|
||
|
||
results[batch_size] = {
|
||
'avg_fps': avg_fps,
|
||
'avg_latency_ms': avg_latency_ms,
|
||
'total_frames': total_frames,
|
||
'test_duration': total_time,
|
||
'success': True
|
||
}
|
||
|
||
print(f"✅ 批次 {batch_size} 测试完成:")
|
||
print(f" 平均FPS: {avg_fps:.1f}")
|
||
print(f" 平均延迟: {avg_latency_ms:.1f}ms")
|
||
|
||
except Exception as e:
|
||
print(f"❌ 批次 {batch_size} 测试失败: {e}")
|
||
results[batch_size] = {
|
||
'avg_fps': 0,
|
||
'avg_latency_ms': 0,
|
||
'success': False,
|
||
'error': str(e)
|
||
}
|
||
|
||
return results
|
||
|
||
def create_comparison_chart(pytorch_data, tensorrt_data, output_dir):
|
||
"""创建 PyTorch vs TensorRT 对比图表"""
|
||
print("\n🎨 生成对比图表...")
|
||
|
||
os.makedirs(output_dir, exist_ok=True)
|
||
|
||
# 提取数据
|
||
batch_sizes = sorted(pytorch_data.keys())
|
||
pytorch_fps = [pytorch_data[bs] if pytorch_data[bs] is not None else 0 for bs in batch_sizes]
|
||
tensorrt_fps = [tensorrt_data[bs]['avg_fps'] if tensorrt_data[bs]['success'] else 0
|
||
for bs in batch_sizes]
|
||
|
||
# 创建图表
|
||
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))
|
||
|
||
# 图表 1: FPS 对比
|
||
x = np.arange(len(batch_sizes))
|
||
width = 0.35
|
||
|
||
# 只显示有 PyTorch 数据的批次
|
||
pytorch_mask = [pytorch_data[bs] is not None for bs in batch_sizes]
|
||
pytorch_x = x[pytorch_mask]
|
||
pytorch_values = [pytorch_fps[i] for i, m in enumerate(pytorch_mask) if m]
|
||
|
||
bars1 = ax1.bar(pytorch_x - width/2, pytorch_values, width, label='PyTorch',
|
||
color='#FF6B6B', alpha=0.8)
|
||
bars2 = ax1.bar(x + width/2, tensorrt_fps, width, label='TensorRT',
|
||
color='#4ECDC4', alpha=0.8)
|
||
|
||
ax1.set_xlabel('批次大小', fontsize=12)
|
||
ax1.set_ylabel('FPS (帧/秒)', fontsize=12)
|
||
ax1.set_title('PyTorch vs TensorRT 批量推理性能对比', fontsize=14, fontweight='bold')
|
||
ax1.set_xticks(x)
|
||
ax1.set_xticklabels(batch_sizes)
|
||
ax1.legend()
|
||
ax1.grid(True, alpha=0.3, axis='y')
|
||
|
||
# 添加数值标签
|
||
for bar in bars1:
|
||
height = bar.get_height()
|
||
if height > 0:
|
||
ax1.text(bar.get_x() + bar.get_width()/2., height + 1,
|
||
f'{height:.1f}', ha='center', va='bottom', fontweight='bold')
|
||
|
||
for bar in bars2:
|
||
height = bar.get_height()
|
||
if height > 0:
|
||
ax1.text(bar.get_x() + bar.get_width()/2., height + 1,
|
||
f'{height:.1f}', ha='center', va='bottom', fontweight='bold')
|
||
|
||
# 图表 2: 性能提升百分比(只对比有 PyTorch 数据的批次)
|
||
improvements = []
|
||
improvement_labels = []
|
||
for bs in batch_sizes:
|
||
if pytorch_data[bs] is not None and tensorrt_data[bs]['success'] and pytorch_data[bs] > 0:
|
||
improvement = (tensorrt_data[bs]['avg_fps'] - pytorch_data[bs]) / pytorch_data[bs] * 100
|
||
improvements.append(improvement)
|
||
improvement_labels.append(bs)
|
||
|
||
if improvements:
|
||
colors = ['green' if imp > 0 else 'red' for imp in improvements]
|
||
bars3 = ax2.bar(improvement_labels, improvements, color=colors, alpha=0.8, edgecolor='black')
|
||
ax2.set_xlabel('批次大小', fontsize=12)
|
||
ax2.set_ylabel('性能提升 (%)', fontsize=12)
|
||
ax2.set_title('TensorRT 相对 PyTorch 的性能提升', fontsize=14, fontweight='bold')
|
||
ax2.axhline(y=0, color='black', linestyle='-', linewidth=0.5)
|
||
ax2.grid(True, alpha=0.3, axis='y')
|
||
|
||
# 添加数值标签
|
||
for bar, imp in zip(bars3, improvements):
|
||
height = bar.get_height()
|
||
ax2.text(bar.get_x() + bar.get_width()/2., height + (2 if height > 0 else -2),
|
||
f'{imp:+.1f}%', ha='center', va='bottom' if height > 0 else 'top',
|
||
fontweight='bold')
|
||
|
||
plt.tight_layout()
|
||
plt.savefig(os.path.join(output_dir, 'pytorch_vs_tensorrt_comparison.png'),
|
||
dpi=300, bbox_inches='tight')
|
||
plt.show()
|
||
print(f"✅ 对比图表已保存: pytorch_vs_tensorrt_comparison.png")
|
||
|
||
def create_combined_line_chart(pytorch_data, tensorrt_data, output_dir):
|
||
"""创建组合折线图"""
|
||
print("🎨 生成组合折线图...")
|
||
|
||
batch_sizes = sorted(pytorch_data.keys())
|
||
pytorch_fps = [pytorch_data[bs] if pytorch_data[bs] is not None else None for bs in batch_sizes]
|
||
tensorrt_fps = [tensorrt_data[bs]['avg_fps'] if tensorrt_data[bs]['success'] else 0
|
||
for bs in batch_sizes]
|
||
|
||
# 创建图表
|
||
fig, ax = plt.subplots(figsize=(12, 7))
|
||
|
||
# PyTorch 折线(只绘制有数据的点)
|
||
pytorch_valid_x = [bs for bs, fps in zip(batch_sizes, pytorch_fps) if fps is not None]
|
||
pytorch_valid_y = [fps for fps in pytorch_fps if fps is not None]
|
||
|
||
if pytorch_valid_x:
|
||
ax.plot(pytorch_valid_x, pytorch_valid_y, 'o-', color='#FF6B6B',
|
||
linewidth=3, markersize=12, label='PyTorch', markeredgecolor='white', markeredgewidth=2)
|
||
|
||
# TensorRT 折线(绘制所有批次)
|
||
ax.plot(batch_sizes, tensorrt_fps, 's-', color='#4ECDC4',
|
||
linewidth=3, markersize=12, label='TensorRT', markeredgecolor='white', markeredgewidth=2)
|
||
|
||
# TensorRT 单帧性能参考线(从之前的测试结果)
|
||
tensorrt_single_fps = 140.1 # 从之前的测试结果
|
||
ax.axhline(y=tensorrt_single_fps, color='#4ECDC4', linestyle='--',
|
||
linewidth=2, alpha=0.5, label='TensorRT (单帧参考)')
|
||
|
||
ax.set_xlabel('批次大小', fontsize=14, fontweight='bold')
|
||
ax.set_ylabel('FPS (帧/秒)', fontsize=14, fontweight='bold')
|
||
ax.set_title('批量推理性能对比 (PyTorch vs TensorRT)', fontsize=16, fontweight='bold', pad=20)
|
||
ax.grid(True, alpha=0.3, linestyle='--')
|
||
ax.legend(fontsize=12, loc='upper left')
|
||
|
||
# 添加数值标签
|
||
for i, (bs, pt_fps, trt_fps) in enumerate(zip(batch_sizes, pytorch_fps, tensorrt_fps)):
|
||
# PyTorch 标签
|
||
if pt_fps is not None:
|
||
ax.text(bs, pt_fps + 3, f'{pt_fps:.1f}', ha='center', va='bottom',
|
||
fontweight='bold', fontsize=10, color='#FF6B6B')
|
||
|
||
# TensorRT 标签
|
||
if trt_fps > 0:
|
||
ax.text(bs, trt_fps - 3, f'{trt_fps:.1f}', ha='center', va='top',
|
||
fontweight='bold', fontsize=10, color='#4ECDC4')
|
||
|
||
# 设置 x 轴刻度
|
||
ax.set_xticks(batch_sizes)
|
||
ax.set_xticklabels(batch_sizes, fontsize=12)
|
||
|
||
plt.tight_layout()
|
||
plt.savefig(os.path.join(output_dir, 'batch_performance_line_chart.png'),
|
||
dpi=300, bbox_inches='tight')
|
||
plt.show()
|
||
print(f"✅ 折线图已保存: batch_performance_line_chart.png")
|
||
|
||
def generate_comparison_report(pytorch_data, tensorrt_data, output_dir):
|
||
"""生成对比报告"""
|
||
print("\n📝 生成对比报告...")
|
||
|
||
report = f"""
|
||
PyTorch vs TensorRT 批量推理性能对比报告
|
||
{'='*60}
|
||
|
||
测试时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
|
||
|
||
详细对比数据:
|
||
{'='*60}
|
||
"""
|
||
|
||
batch_sizes = sorted(pytorch_data.keys())
|
||
|
||
for bs in batch_sizes:
|
||
pt_fps = pytorch_data[bs]
|
||
trt_result = tensorrt_data[bs]
|
||
|
||
if trt_result['success']:
|
||
trt_fps = trt_result['avg_fps']
|
||
|
||
if pt_fps is not None:
|
||
improvement = (trt_fps - pt_fps) / pt_fps * 100
|
||
report += f"""
|
||
批次大小: {bs}
|
||
PyTorch FPS: {pt_fps:.1f}
|
||
TensorRT FPS: {trt_fps:.1f}
|
||
性能提升: {improvement:+.1f}%
|
||
TensorRT 延迟: {trt_result['avg_latency_ms']:.1f}ms
|
||
"""
|
||
else:
|
||
report += f"""
|
||
批次大小: {bs}
|
||
PyTorch FPS: 未测试
|
||
TensorRT FPS: {trt_fps:.1f}
|
||
TensorRT 延迟: {trt_result['avg_latency_ms']:.1f}ms
|
||
"""
|
||
else:
|
||
if pt_fps is not None:
|
||
report += f"""
|
||
批次大小: {bs}
|
||
PyTorch FPS: {pt_fps:.1f}
|
||
TensorRT: 测试失败 - {trt_result.get('error', '未知错误')}
|
||
"""
|
||
else:
|
||
report += f"""
|
||
批次大小: {bs}
|
||
PyTorch: 未测试
|
||
TensorRT: 测试失败 - {trt_result.get('error', '未知错误')}
|
||
"""
|
||
|
||
# 计算总体统计
|
||
successful_tests = [bs for bs in batch_sizes if tensorrt_data[bs]['success']]
|
||
if successful_tests:
|
||
# 只计算有 PyTorch 对比数据的批次的平均提升
|
||
comparable_tests = [bs for bs in successful_tests if pytorch_data[bs] is not None]
|
||
|
||
if comparable_tests:
|
||
avg_improvement = np.mean([
|
||
(tensorrt_data[bs]['avg_fps'] - pytorch_data[bs]) / pytorch_data[bs] * 100
|
||
for bs in comparable_tests
|
||
])
|
||
else:
|
||
avg_improvement = None
|
||
|
||
best_bs = max(successful_tests, key=lambda bs: tensorrt_data[bs]['avg_fps'])
|
||
best_fps = tensorrt_data[best_bs]['avg_fps']
|
||
|
||
report += f"""
|
||
|
||
总体统计:
|
||
{'='*60}
|
||
成功测试: {len(successful_tests)}/{len(batch_sizes)}
|
||
"""
|
||
|
||
if avg_improvement is not None:
|
||
report += f"平均性能提升 (相对PyTorch): {avg_improvement:+.1f}%\n"
|
||
|
||
report += f"""最佳配置: 批次大小 {best_bs} ({best_fps:.1f} FPS)
|
||
|
||
推荐配置:
|
||
{'='*60}
|
||
✅ 实时场景 (低延迟): 批次大小 1-2
|
||
✅ 平衡场景: 批次大小 4-8
|
||
✅ 高吞吐量场景: 批次大小 16-32
|
||
|
||
关键发现:
|
||
{'='*60}
|
||
"""
|
||
|
||
# 分析性能趋势
|
||
if len(successful_tests) >= 2:
|
||
fps_values = [tensorrt_data[bs]['avg_fps'] for bs in successful_tests]
|
||
if fps_values[-1] > fps_values[0] * 1.5:
|
||
report += "🚀 TensorRT 在大批次下表现优异,吞吐量显著提升\n"
|
||
|
||
if comparable_tests and all(tensorrt_data[bs]['avg_fps'] > pytorch_data[bs] for bs in comparable_tests):
|
||
report += "✅ TensorRT 在所有可对比批次下均优于 PyTorch\n"
|
||
|
||
# 分析批次 16 和 32 的性能
|
||
if 16 in successful_tests and 32 in successful_tests:
|
||
fps_16 = tensorrt_data[16]['avg_fps']
|
||
fps_32 = tensorrt_data[32]['avg_fps']
|
||
if fps_32 > fps_16 * 1.3:
|
||
report += f"🎯 批次 32 相比批次 16 吞吐量提升 {(fps_32/fps_16-1)*100:.1f}%,GPU 利用率更高\n"
|
||
elif fps_32 < fps_16 * 1.1:
|
||
report += "⚠️ 批次 32 性能提升有限,可能受 GPU 显存或计算能力限制\n"
|
||
|
||
# 保存报告
|
||
report_file = os.path.join(output_dir, 'comparison_report.txt')
|
||
with open(report_file, 'w', encoding='utf-8') as f:
|
||
f.write(report)
|
||
|
||
print(report)
|
||
print(f"\n📁 报告已保存: {report_file}")
|
||
|
||
def main():
|
||
"""主函数"""
|
||
print("PyTorch vs TensorRT 批量推理性能对比测试")
|
||
print("=" * 60)
|
||
|
||
# TensorRT 引擎路径
|
||
engine_path = "C:/Users/16337/PycharmProjects/Security/yolo11n.engine"
|
||
|
||
# 检查引擎文件
|
||
if not os.path.exists(engine_path):
|
||
print(f"❌ TensorRT 引擎不存在: {engine_path}")
|
||
return
|
||
|
||
# 检查 CUDA
|
||
if not torch.cuda.is_available():
|
||
print("❌ CUDA 不可用")
|
||
return
|
||
|
||
print(f"✅ CUDA 可用,设备: {torch.cuda.get_device_name(0)}")
|
||
print(f"✅ TensorRT 引擎: {engine_path}")
|
||
|
||
# 测试批次大小(包括所有支持的批次)
|
||
batch_sizes = [1, 2, 4, 8, 16, 32]
|
||
test_duration = 20 # 每批次测试 20 秒
|
||
|
||
print(f"\n📊 测试配置:")
|
||
print(f" 批次大小: {batch_sizes}")
|
||
print(f" 每批次测试时长: {test_duration}秒")
|
||
print(f"\n📈 PyTorch 参考数据:")
|
||
for bs, fps in PYTORCH_DATA.items():
|
||
if fps is not None:
|
||
print(f" 批次 {bs}: {fps:.1f} FPS")
|
||
else:
|
||
print(f" 批次 {bs}: 待测试")
|
||
|
||
try:
|
||
# 测试 TensorRT 性能
|
||
tensorrt_results = test_tensorrt_batch_performance(engine_path, batch_sizes, test_duration)
|
||
|
||
# 保存结果
|
||
output_dir = "comparison_results"
|
||
os.makedirs(output_dir, exist_ok=True)
|
||
|
||
# 保存 JSON 数据
|
||
results_data = {
|
||
'pytorch': PYTORCH_DATA,
|
||
'tensorrt': tensorrt_results,
|
||
'timestamp': datetime.now().isoformat()
|
||
}
|
||
|
||
json_file = os.path.join(output_dir, f"comparison_results_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json")
|
||
with open(json_file, 'w', encoding='utf-8') as f:
|
||
json.dump(results_data, f, indent=2, ensure_ascii=False)
|
||
|
||
print(f"\n✅ 测试数据已保存: {json_file}")
|
||
|
||
# 生成可视化图表
|
||
create_comparison_chart(PYTORCH_DATA, tensorrt_results, output_dir)
|
||
create_combined_line_chart(PYTORCH_DATA, tensorrt_results, output_dir)
|
||
|
||
# 生成对比报告
|
||
generate_comparison_report(PYTORCH_DATA, tensorrt_results, output_dir)
|
||
|
||
print(f"\n🎉 测试完成!")
|
||
print(f"📁 所有结果已保存到: {output_dir}/")
|
||
|
||
except KeyboardInterrupt:
|
||
print("\n\n⏹️ 测试被用户中断")
|
||
except Exception as e:
|
||
print(f"\n❌ 测试过程中发生错误: {e}")
|
||
import traceback
|
||
traceback.print_exc()
|
||
|
||
if __name__ == "__main__":
|
||
main()
|