Files
Test_AI/benchmark/comparison_visualizer.py
2026-01-20 10:54:30 +08:00

527 lines
22 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
TensorRT vs PyTorch 对比可视化模块
生成详细的性能对比图表
"""
import json
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from pathlib import Path
from typing import List, Dict, Any
import seaborn as sns
# 设置中文字体和样式
plt.rcParams['font.family'] = ['Arial', 'DejaVu Sans', 'SimHei']
plt.rcParams['axes.unicode_minus'] = False
plt.rcParams['font.size'] = 10
sns.set_style("whitegrid")
def load_comparison_results(results_dir: str) -> pd.DataFrame:
"""加载对比测试结果"""
results_path = Path(results_dir)
# 查找最新的结果文件
json_files = list(results_path.glob("comparison_results_*.json"))
if not json_files:
raise FileNotFoundError("未找到对比测试结果文件")
latest_file = max(json_files, key=lambda x: x.stat().st_mtime)
with open(latest_file, 'r', encoding='utf-8') as f:
data = json.load(f)
return pd.DataFrame(data)
def create_performance_overview(df: pd.DataFrame, output_dir: str) -> str:
"""创建性能概览对比图"""
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(16, 12))
fig.suptitle('TensorRT vs PyTorch 性能对比概览', fontsize=20, fontweight='bold')
# 1. 单路最大 FPS 对比
single_camera = df[df['num_cameras'] == 1]
if not single_camera.empty:
pytorch_data = single_camera[single_camera['test_mode'] == 'pytorch']
tensorrt_data = single_camera[single_camera['test_mode'] == 'tensorrt']
resolutions = [320, 480]
pytorch_fps = []
tensorrt_fps = []
for res in resolutions:
pytorch_res = pytorch_data[pytorch_data['resolution'] == res]
tensorrt_res = tensorrt_data[tensorrt_data['resolution'] == res]
pytorch_fps.append(pytorch_res['actual_fps'].max() if not pytorch_res.empty else 0)
tensorrt_fps.append(tensorrt_res['actual_fps'].max() if not tensorrt_res.empty else 0)
x = np.arange(len(resolutions))
width = 0.35
bars1 = ax1.bar(x - width/2, pytorch_fps, width, label='PyTorch',
color='#FF6B6B', alpha=0.8)
bars2 = ax1.bar(x + width/2, tensorrt_fps, width, label='TensorRT',
color='#4ECDC4', alpha=0.8)
ax1.set_title('单路最大 FPS 对比', fontweight='bold')
ax1.set_xlabel('分辨率')
ax1.set_ylabel('FPS')
ax1.set_xticks(x)
ax1.set_xticklabels([f'{res}×{res}' for res in resolutions])
ax1.legend()
ax1.grid(True, alpha=0.3)
# 添加提升倍数标签
for i, (pytorch, tensorrt) in enumerate(zip(pytorch_fps, tensorrt_fps)):
if pytorch > 0 and tensorrt > 0:
improvement = tensorrt / pytorch
ax1.text(i, max(pytorch, tensorrt) + 2, f'{improvement:.1f}x',
ha='center', va='bottom', fontweight='bold', color='green')
# 2. GPU 利用率对比
if 'gpu_utilization' in df.columns:
pytorch_gpu = df[df['test_mode'] == 'pytorch']['gpu_utilization']
tensorrt_gpu = df[df['test_mode'] == 'tensorrt']['gpu_utilization']
ax2.hist(pytorch_gpu, bins=15, alpha=0.7, label='PyTorch', color='#FF6B6B')
ax2.hist(tensorrt_gpu, bins=15, alpha=0.7, label='TensorRT', color='#4ECDC4')
ax2.axvline(pytorch_gpu.mean(), color='#FF6B6B', linestyle='--',
label=f'PyTorch 平均: {pytorch_gpu.mean():.1f}%')
ax2.axvline(tensorrt_gpu.mean(), color='#4ECDC4', linestyle='--',
label=f'TensorRT 平均: {tensorrt_gpu.mean():.1f}%')
ax2.set_title('GPU 利用率分布对比', fontweight='bold')
ax2.set_xlabel('GPU 利用率 (%)')
ax2.set_ylabel('频次')
ax2.legend()
ax2.grid(True, alpha=0.3)
# 3. 延迟对比
if 'avg_latency_ms' in df.columns:
pytorch_latency = df[df['test_mode'] == 'pytorch']['avg_latency_ms']
tensorrt_latency = df[df['test_mode'] == 'tensorrt']['avg_latency_ms']
box_data = [pytorch_latency.dropna(), tensorrt_latency.dropna()]
box_labels = ['PyTorch', 'TensorRT']
bp = ax3.boxplot(box_data, labels=box_labels, patch_artist=True)
bp['boxes'][0].set_facecolor('#FF6B6B')
bp['boxes'][1].set_facecolor('#4ECDC4')
ax3.set_title('推理延迟对比', fontweight='bold')
ax3.set_ylabel('延迟 (ms)')
ax3.grid(True, alpha=0.3)
# 4. 摄像头数量 vs 单路 FPS
for test_mode, color, label in [('pytorch', '#FF6B6B', 'PyTorch'),
('tensorrt', '#4ECDC4', 'TensorRT')]:
mode_data = df[df['test_mode'] == test_mode]
if not mode_data.empty:
# 按摄像头数量分组,计算平均单路 FPS
camera_fps = mode_data.groupby('num_cameras')['per_camera_fps'].mean()
ax4.plot(camera_fps.index, camera_fps.values, 'o-',
color=color, label=label, linewidth=2, markersize=6)
ax4.set_title('摄像头数量 vs 单路 FPS', fontweight='bold')
ax4.set_xlabel('摄像头数量')
ax4.set_ylabel('单路 FPS')
ax4.legend()
ax4.grid(True, alpha=0.3)
plt.tight_layout()
output_file = Path(output_dir) / "performance_overview_comparison.png"
plt.savefig(output_file, dpi=300, bbox_inches='tight')
plt.close()
return str(output_file)
def create_scalability_analysis(df: pd.DataFrame, output_dir: str) -> str:
"""创建扩展性分析图"""
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(16, 12))
fig.suptitle('TensorRT vs PyTorch 扩展性分析', fontsize=16, fontweight='bold')
# 1. 总吞吐量 vs 摄像头数量
for test_mode, color, label in [('pytorch', '#FF6B6B', 'PyTorch'),
('tensorrt', '#4ECDC4', 'TensorRT')]:
mode_data = df[df['test_mode'] == test_mode]
if not mode_data.empty:
throughput_data = mode_data.groupby('num_cameras')['actual_fps'].mean()
ax1.plot(throughput_data.index, throughput_data.values, 'o-',
color=color, label=label, linewidth=2, markersize=6)
ax1.set_title('总吞吐量 vs 摄像头数量', fontweight='bold')
ax1.set_xlabel('摄像头数量')
ax1.set_ylabel('总 FPS')
ax1.legend()
ax1.grid(True, alpha=0.3)
# 2. GPU 利用率 vs 摄像头数量
for test_mode, color, label in [('pytorch', '#FF6B6B', 'PyTorch'),
('tensorrt', '#4ECDC4', 'TensorRT')]:
mode_data = df[df['test_mode'] == test_mode]
if not mode_data.empty and 'gpu_utilization' in df.columns:
gpu_data = mode_data.groupby('num_cameras')['gpu_utilization'].mean()
ax2.plot(gpu_data.index, gpu_data.values, 'o-',
color=color, label=label, linewidth=2, markersize=6)
ax2.set_title('GPU 利用率 vs 摄像头数量', fontweight='bold')
ax2.set_xlabel('摄像头数量')
ax2.set_ylabel('GPU 利用率 (%)')
ax2.legend()
ax2.grid(True, alpha=0.3)
ax2.set_ylim(0, 100)
# 3. 内存使用 vs 摄像头数量
if 'avg_memory_mb' in df.columns:
for test_mode, color, label in [('pytorch', '#FF6B6B', 'PyTorch'),
('tensorrt', '#4ECDC4', 'TensorRT')]:
mode_data = df[df['test_mode'] == test_mode]
if not mode_data.empty:
memory_data = mode_data.groupby('num_cameras')['avg_memory_mb'].mean()
ax3.plot(memory_data.index, memory_data.values, 'o-',
color=color, label=label, linewidth=2, markersize=6)
ax3.axhline(y=8192, color='red', linestyle='--', alpha=0.7, label='总显存 (8GB)')
ax3.set_title('显存使用 vs 摄像头数量', fontweight='bold')
ax3.set_xlabel('摄像头数量')
ax3.set_ylabel('显存使用 (MB)')
ax3.legend()
ax3.grid(True, alpha=0.3)
# 4. 性能效率对比 (FPS per GPU%)
for test_mode, color, label in [('pytorch', '#FF6B6B', 'PyTorch'),
('tensorrt', '#4ECDC4', 'TensorRT')]:
mode_data = df[df['test_mode'] == test_mode]
if not mode_data.empty and 'gpu_utilization' in df.columns:
# 计算性能效率FPS / GPU利用率
efficiency = mode_data['actual_fps'] / (mode_data['gpu_utilization'] + 1e-6)
efficiency_by_cameras = mode_data.groupby('num_cameras').apply(
lambda x: (x['actual_fps'] / (x['gpu_utilization'] + 1e-6)).mean()
)
ax4.plot(efficiency_by_cameras.index, efficiency_by_cameras.values, 'o-',
color=color, label=label, linewidth=2, markersize=6)
ax4.set_title('性能效率对比 (FPS/GPU%)', fontweight='bold')
ax4.set_xlabel('摄像头数量')
ax4.set_ylabel('效率 (FPS/GPU%)')
ax4.legend()
ax4.grid(True, alpha=0.3)
plt.tight_layout()
output_file = Path(output_dir) / "scalability_analysis.png"
plt.savefig(output_file, dpi=300, bbox_inches='tight')
plt.close()
return str(output_file)
def create_detailed_comparison(df: pd.DataFrame, output_dir: str) -> str:
"""创建详细对比分析图"""
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(16, 12))
fig.suptitle('TensorRT vs PyTorch 详细性能分析', fontsize=16, fontweight='bold')
# 1. 不同分辨率下的性能对比
resolutions = df['resolution'].unique()
pytorch_means = []
tensorrt_means = []
pytorch_stds = []
tensorrt_stds = []
for res in sorted(resolutions):
pytorch_data = df[(df['test_mode'] == 'pytorch') & (df['resolution'] == res)]['actual_fps']
tensorrt_data = df[(df['test_mode'] == 'tensorrt') & (df['resolution'] == res)]['actual_fps']
pytorch_means.append(pytorch_data.mean() if not pytorch_data.empty else 0)
tensorrt_means.append(tensorrt_data.mean() if not tensorrt_data.empty else 0)
pytorch_stds.append(pytorch_data.std() if not pytorch_data.empty else 0)
tensorrt_stds.append(tensorrt_data.std() if not tensorrt_data.empty else 0)
x = np.arange(len(resolutions))
width = 0.35
bars1 = ax1.bar(x - width/2, pytorch_means, width, yerr=pytorch_stds,
label='PyTorch', color='#FF6B6B', alpha=0.8, capsize=5)
bars2 = ax1.bar(x + width/2, tensorrt_means, width, yerr=tensorrt_stds,
label='TensorRT', color='#4ECDC4', alpha=0.8, capsize=5)
ax1.set_title('不同分辨率下的平均性能', fontweight='bold')
ax1.set_xlabel('分辨率')
ax1.set_ylabel('平均 FPS')
ax1.set_xticks(x)
ax1.set_xticklabels([f'{int(res)}×{int(res)}' for res in sorted(resolutions)])
ax1.legend()
ax1.grid(True, alpha=0.3)
# 2. 批次大小 vs 性能
if 'batch_size' in df.columns:
batch_sizes = sorted(df['batch_size'].unique())
for test_mode, color, label in [('pytorch', '#FF6B6B', 'PyTorch'),
('tensorrt', '#4ECDC4', 'TensorRT')]:
mode_data = df[df['test_mode'] == test_mode]
if not mode_data.empty:
batch_perf = mode_data.groupby('batch_size')['actual_fps'].mean()
ax2.plot(batch_perf.index, batch_perf.values, 'o-',
color=color, label=label, linewidth=2, markersize=6)
ax2.set_title('批次大小 vs 性能', fontweight='bold')
ax2.set_xlabel('批次大小')
ax2.set_ylabel('平均 FPS')
ax2.legend()
ax2.grid(True, alpha=0.3)
# 3. 延迟分布对比(分分辨率)
if 'avg_latency_ms' in df.columns:
for i, res in enumerate(sorted(resolutions)):
pytorch_latency = df[(df['test_mode'] == 'pytorch') &
(df['resolution'] == res)]['avg_latency_ms']
tensorrt_latency = df[(df['test_mode'] == 'tensorrt') &
(df['resolution'] == res)]['avg_latency_ms']
if not pytorch_latency.empty:
ax3.hist(pytorch_latency, bins=10, alpha=0.6,
label=f'PyTorch {int(res)}×{int(res)}',
color=plt.cm.Reds(0.7 - i*0.2))
if not tensorrt_latency.empty:
ax3.hist(tensorrt_latency, bins=10, alpha=0.6,
label=f'TensorRT {int(res)}×{int(res)}',
color=plt.cm.Blues(0.7 - i*0.2))
ax3.set_title('延迟分布对比(按分辨率)', fontweight='bold')
ax3.set_xlabel('延迟 (ms)')
ax3.set_ylabel('频次')
ax3.legend()
ax3.grid(True, alpha=0.3)
# 4. 资源利用率雷达图
categories = ['GPU利用率', 'FPS性能', '内存效率', '延迟性能']
# 计算归一化指标
pytorch_data = df[df['test_mode'] == 'pytorch']
tensorrt_data = df[df['test_mode'] == 'tensorrt']
if not pytorch_data.empty and not tensorrt_data.empty:
# 归一化到 0-1 范围
pytorch_metrics = [
pytorch_data['gpu_utilization'].mean() / 100 if 'gpu_utilization' in df.columns else 0,
pytorch_data['actual_fps'].mean() / df['actual_fps'].max() if df['actual_fps'].max() > 0 else 0,
1 - (pytorch_data['avg_memory_mb'].mean() / 8192) if 'avg_memory_mb' in df.columns else 0,
1 - (pytorch_data['avg_latency_ms'].mean() / df['avg_latency_ms'].max()) if 'avg_latency_ms' in df.columns else 0
]
tensorrt_metrics = [
tensorrt_data['gpu_utilization'].mean() / 100 if 'gpu_utilization' in df.columns else 0,
tensorrt_data['actual_fps'].mean() / df['actual_fps'].max() if df['actual_fps'].max() > 0 else 0,
1 - (tensorrt_data['avg_memory_mb'].mean() / 8192) if 'avg_memory_mb' in df.columns else 0,
1 - (tensorrt_data['avg_latency_ms'].mean() / df['avg_latency_ms'].max()) if 'avg_latency_ms' in df.columns else 0
]
# 创建雷达图
angles = np.linspace(0, 2 * np.pi, len(categories), endpoint=False)
angles = np.concatenate((angles, [angles[0]]))
pytorch_metrics.append(pytorch_metrics[0])
tensorrt_metrics.append(tensorrt_metrics[0])
ax4 = plt.subplot(224, projection='polar')
ax4.plot(angles, pytorch_metrics, 'o-', linewidth=2, label='PyTorch', color='#FF6B6B')
ax4.fill(angles, pytorch_metrics, alpha=0.25, color='#FF6B6B')
ax4.plot(angles, tensorrt_metrics, 'o-', linewidth=2, label='TensorRT', color='#4ECDC4')
ax4.fill(angles, tensorrt_metrics, alpha=0.25, color='#4ECDC4')
ax4.set_xticks(angles[:-1])
ax4.set_xticklabels(categories)
ax4.set_ylim(0, 1)
ax4.set_title('综合性能雷达图', fontweight='bold', pad=20)
ax4.legend(loc='upper right', bbox_to_anchor=(1.3, 1.0))
plt.tight_layout()
output_file = Path(output_dir) / "detailed_comparison.png"
plt.savefig(output_file, dpi=300, bbox_inches='tight')
plt.close()
return str(output_file)
def create_deployment_recommendations(df: pd.DataFrame, output_dir: str) -> str:
"""创建部署建议图表"""
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(16, 12))
fig.suptitle('TensorRT vs PyTorch 部署建议分析', fontsize=16, fontweight='bold')
# 1. 最优配置热力图 - PyTorch
pytorch_data = df[df['test_mode'] == 'pytorch']
if not pytorch_data.empty and 'batch_size' in df.columns:
pytorch_pivot = pytorch_data.pivot_table(
values='actual_fps',
index='batch_size',
columns='num_cameras',
aggfunc='mean'
)
if not pytorch_pivot.empty:
im1 = ax1.imshow(pytorch_pivot.values, cmap='Reds', aspect='auto')
ax1.set_title('PyTorch 性能热力图 (FPS)', fontweight='bold')
ax1.set_xlabel('摄像头数量')
ax1.set_ylabel('批次大小')
ax1.set_xticks(range(len(pytorch_pivot.columns)))
ax1.set_xticklabels(pytorch_pivot.columns)
ax1.set_yticks(range(len(pytorch_pivot.index)))
ax1.set_yticklabels(pytorch_pivot.index)
plt.colorbar(im1, ax=ax1, label='FPS')
# 2. 最优配置热力图 - TensorRT
tensorrt_data = df[df['test_mode'] == 'tensorrt']
if not tensorrt_data.empty and 'batch_size' in df.columns:
tensorrt_pivot = tensorrt_data.pivot_table(
values='actual_fps',
index='batch_size',
columns='num_cameras',
aggfunc='mean'
)
if not tensorrt_pivot.empty:
im2 = ax2.imshow(tensorrt_pivot.values, cmap='Blues', aspect='auto')
ax2.set_title('TensorRT 性能热力图 (FPS)', fontweight='bold')
ax2.set_xlabel('摄像头数量')
ax2.set_ylabel('批次大小')
ax2.set_xticks(range(len(tensorrt_pivot.columns)))
ax2.set_xticklabels(tensorrt_pivot.columns)
ax2.set_yticks(range(len(tensorrt_pivot.index)))
ax2.set_yticklabels(tensorrt_pivot.index)
plt.colorbar(im2, ax=ax2, label='FPS')
# 3. 成本效益分析
camera_counts = sorted(df['num_cameras'].unique())
pytorch_efficiency = []
tensorrt_efficiency = []
for cameras in camera_counts:
pytorch_subset = df[(df['test_mode'] == 'pytorch') & (df['num_cameras'] == cameras)]
tensorrt_subset = df[(df['test_mode'] == 'tensorrt') & (df['num_cameras'] == cameras)]
# 计算效率FPS / (GPU利用率 * 内存使用)
if not pytorch_subset.empty and 'gpu_utilization' in df.columns and 'avg_memory_mb' in df.columns:
pytorch_eff = pytorch_subset['actual_fps'].mean() / (
(pytorch_subset['gpu_utilization'].mean() + 1) *
(pytorch_subset['avg_memory_mb'].mean() / 1000 + 1)
)
pytorch_efficiency.append(pytorch_eff)
else:
pytorch_efficiency.append(0)
if not tensorrt_subset.empty and 'gpu_utilization' in df.columns and 'avg_memory_mb' in df.columns:
tensorrt_eff = tensorrt_subset['actual_fps'].mean() / (
(tensorrt_subset['gpu_utilization'].mean() + 1) *
(tensorrt_subset['avg_memory_mb'].mean() / 1000 + 1)
)
tensorrt_efficiency.append(tensorrt_eff)
else:
tensorrt_efficiency.append(0)
ax3.plot(camera_counts, pytorch_efficiency, 'o-', color='#FF6B6B',
label='PyTorch', linewidth=2, markersize=6)
ax3.plot(camera_counts, tensorrt_efficiency, 'o-', color='#4ECDC4',
label='TensorRT', linewidth=2, markersize=6)
ax3.set_title('成本效益分析 (FPS/资源消耗)', fontweight='bold')
ax3.set_xlabel('摄像头数量')
ax3.set_ylabel('效率指数')
ax3.legend()
ax3.grid(True, alpha=0.3)
# 4. 部署场景建议
ax4.text(0.05, 0.95, '部署场景建议', fontsize=16, fontweight='bold',
transform=ax4.transAxes)
# 基于测试结果生成建议
pytorch_max_fps = df[df['test_mode'] == 'pytorch']['actual_fps'].max()
tensorrt_max_fps = df[df['test_mode'] == 'tensorrt']['actual_fps'].max()
pytorch_max_cameras = df[(df['test_mode'] == 'pytorch') &
(df['per_camera_fps'] >= 5)]['num_cameras'].max()
tensorrt_max_cameras = df[(df['test_mode'] == 'tensorrt') &
(df['per_camera_fps'] >= 5)]['num_cameras'].max()
recommendations = [
'🎯 高性能场景 (推荐 TensorRT):',
f' • 最大 FPS: {tensorrt_max_fps:.0f} (vs PyTorch {pytorch_max_fps:.0f})',
f' • 最大摄像头数: {tensorrt_max_cameras} (vs PyTorch {pytorch_max_cameras})',
f' • 适用: 生产环境、多路并发',
'',
'⚖️ 平衡场景 (可选 PyTorch):',
f' • 摄像头数 ≤ {pytorch_max_cameras // 2}',
f' • 单路 FPS ≥ 10',
f' • 适用: 开发测试、小规模部署',
'',
'🔋 资源受限场景:',
f' • 320×320 分辨率',
f' • 批次大小 4-8',
f' • 适用: 边缘设备、功耗敏感',
'',
'📊 关键指标对比:',
f' • TensorRT 性能提升: {tensorrt_max_fps/pytorch_max_fps:.1f}x',
f' • TensorRT 扩展性提升: {tensorrt_max_cameras/pytorch_max_cameras:.1f}x',
f' • 推荐阈值: >10路选TensorRT'
]
for i, rec in enumerate(recommendations):
ax4.text(0.05, 0.85 - i*0.05, rec, fontsize=10,
transform=ax4.transAxes)
ax4.set_xlim(0, 1)
ax4.set_ylim(0, 1)
ax4.axis('off')
plt.tight_layout()
output_file = Path(output_dir) / "deployment_recommendations.png"
plt.savefig(output_file, dpi=300, bbox_inches='tight')
plt.close()
return str(output_file)
def generate_comparison_charts(results_dir: str) -> List[str]:
"""生成所有对比图表"""
try:
df = load_comparison_results(results_dir)
chart_files = []
# 1. 性能概览对比
chart_files.append(create_performance_overview(df, results_dir))
# 2. 扩展性分析
chart_files.append(create_scalability_analysis(df, results_dir))
# 3. 详细对比分析
chart_files.append(create_detailed_comparison(df, results_dir))
# 4. 部署建议
chart_files.append(create_deployment_recommendations(df, results_dir))
return chart_files
except Exception as e:
print(f"生成对比图表失败: {e}")
return []
if __name__ == "__main__":
import sys
if len(sys.argv) > 1:
results_dir = sys.argv[1]
else:
results_dir = "./comparison_results"
chart_files = generate_comparison_charts(results_dir)
if chart_files:
print(f"✅ 生成了 {len(chart_files)} 个对比图表:")
for file in chart_files:
print(f" 📊 {file}")
else:
print("❌ 未生成任何图表")