Files
Test_AI/benchmark/optimized_visualizer.py

364 lines
14 KiB
Python
Raw Normal View History

2026-01-20 10:54:30 +08:00
"""
优化压力测试可视化模块
专门为原生 TensorRT 测试结果生成图表
"""
import json
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from pathlib import Path
from typing import List, Dict, Any
# 设置字体
plt.rcParams['font.family'] = ['Arial', 'DejaVu Sans']
plt.rcParams['axes.unicode_minus'] = False
plt.rcParams['font.size'] = 10
def load_optimized_results(results_dir: str) -> pd.DataFrame:
"""加载优化测试结果"""
results_path = Path(results_dir)
# 查找最新的结果文件 (支持两种格式)
json_files = list(results_path.glob("optimized_results_*.json")) + \
list(results_path.glob("ultralytics_optimized_*.json"))
if not json_files:
raise FileNotFoundError("未找到优化测试结果文件")
latest_file = max(json_files, key=lambda x: x.stat().st_mtime)
with open(latest_file, 'r', encoding='utf-8') as f:
data = json.load(f)
return pd.DataFrame(data)
def create_performance_comparison(df: pd.DataFrame, output_dir: str) -> str:
"""创建性能对比图表"""
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(16, 12))
fig.suptitle('RTX 3050 Optimized Performance Analysis', fontsize=20, fontweight='bold')
# 1. 最大 FPS 对比 (优化前 vs 优化后)
resolutions = [320, 480]
original_fps = [33.8, 33.9] # 之前的结果
# 从测试结果中获取最大 FPS
optimized_fps = []
for res in resolutions:
res_data = df[df['resolution'] == res]
if len(res_data) > 0:
max_fps = res_data['actual_fps'].max()
optimized_fps.append(max_fps)
else:
optimized_fps.append(0)
x = np.arange(len(resolutions))
width = 0.35
bars1 = ax1.bar(x - width/2, original_fps, width, label='Original (Ultralytics)',
color='#FF6B6B', alpha=0.8)
bars2 = ax1.bar(x + width/2, optimized_fps, width, label='Optimized (Native TensorRT)',
color='#4ECDC4', alpha=0.8)
ax1.set_title('Max FPS Comparison', fontweight='bold')
ax1.set_xlabel('Resolution')
ax1.set_ylabel('FPS')
ax1.set_xticks(x)
ax1.set_xticklabels([f'{res}x{res}' for res in resolutions])
ax1.legend()
ax1.grid(True, alpha=0.3)
# 添加提升倍数标签
for i, (orig, opt) in enumerate(zip(original_fps, optimized_fps)):
if orig > 0 and opt > 0:
improvement = opt / orig
ax1.text(i, max(orig, opt) + 5, f'{improvement:.1f}x',
ha='center', va='bottom', fontweight='bold', color='green')
# 2. GPU 利用率对比
gpu_utils_orig = [30, 34] # 原始测试的 GPU 利用率
gpu_utils_opt = []
for res in resolutions:
res_data = df[df['resolution'] == res]
if len(res_data) > 0:
max_util = res_data['gpu_utilization'].max()
gpu_utils_opt.append(max_util)
else:
gpu_utils_opt.append(0)
bars3 = ax2.bar(x - width/2, gpu_utils_orig, width, label='Original',
color='#FF6B6B', alpha=0.8)
bars4 = ax2.bar(x + width/2, gpu_utils_opt, width, label='Optimized',
color='#4ECDC4', alpha=0.8)
ax2.set_title('GPU Utilization Comparison', fontweight='bold')
ax2.set_xlabel('Resolution')
ax2.set_ylabel('GPU Utilization (%)')
ax2.set_xticks(x)
ax2.set_xticklabels([f'{res}x{res}' for res in resolutions])
ax2.legend()
ax2.grid(True, alpha=0.3)
ax2.set_ylim(0, 100)
# 添加目标线
ax2.axhline(y=70, color='green', linestyle='--', alpha=0.7, label='Target (70%)')
# 3. 批次大小 vs 性能 (支持两种字段名)
batch_field = 'batch_size' if 'batch_size' in df.columns else None
if batch_field:
batch_perf = df.groupby(batch_field)['actual_fps'].mean()
ax3.plot(batch_perf.index, batch_perf.values, marker='o', linewidth=3,
markersize=8, color='#95E1D3')
ax3.set_title('Batch Size vs Performance', fontweight='bold')
ax3.set_xlabel('Batch Size')
ax3.set_ylabel('Average FPS')
ax3.grid(True, alpha=0.3)
# 4. 流数量/线程数量 vs 性能 (支持两种字段名)
parallel_field = 'num_streams' if 'num_streams' in df.columns else 'num_threads'
if parallel_field in df.columns:
parallel_perf = df.groupby(parallel_field)['actual_fps'].mean()
ax4.plot(parallel_perf.index, parallel_perf.values, marker='s', linewidth=3,
markersize=8, color='#F38BA8')
title = 'Number of Streams vs Performance' if parallel_field == 'num_streams' else 'Number of Threads vs Performance'
xlabel = 'Number of CUDA Streams' if parallel_field == 'num_streams' else 'Number of Threads'
ax4.set_title(title, fontweight='bold')
ax4.set_xlabel(xlabel)
ax4.set_ylabel('Average FPS')
ax4.grid(True, alpha=0.3)
plt.tight_layout()
output_file = Path(output_dir) / "optimized_performance_comparison.png"
plt.savefig(output_file, dpi=300, bbox_inches='tight')
plt.close()
return str(output_file)
def create_optimization_analysis(df: pd.DataFrame, output_dir: str) -> str:
"""创建优化分析图表"""
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(16, 12))
fig.suptitle('TensorRT Optimization Analysis', fontsize=16, fontweight='bold')
# 1. 延迟分布对比
if 'avg_inference_time_ms' in df.columns:
latencies = df['avg_inference_time_ms'].dropna()
ax1.hist(latencies, bins=20, alpha=0.7, color='#95E1D3', edgecolor='black')
ax1.axvline(latencies.mean(), color='red', linestyle='--', linewidth=2,
label=f'Mean: {latencies.mean():.2f}ms')
ax1.set_title('Inference Latency Distribution', fontweight='bold')
ax1.set_xlabel('Latency (ms)')
ax1.set_ylabel('Frequency')
ax1.legend()
ax1.grid(True, alpha=0.3)
# 2. 吞吐量 vs GPU 利用率
if 'gpu_utilization' in df.columns and 'actual_fps' in df.columns:
ax2.scatter(df['gpu_utilization'], df['actual_fps'],
c=df['batch_size'] if 'batch_size' in df.columns else 'blue',
cmap='viridis', s=100, alpha=0.7)
ax2.set_title('Throughput vs GPU Utilization', fontweight='bold')
ax2.set_xlabel('GPU Utilization (%)')
ax2.set_ylabel('Throughput (FPS)')
ax2.grid(True, alpha=0.3)
if 'batch_size' in df.columns:
cbar = plt.colorbar(ax2.collections[0], ax=ax2)
cbar.set_label('Batch Size')
# 3. 内存使用分析
if 'memory_used_mb' in df.columns:
memory_by_batch = df.groupby('batch_size')['memory_used_mb'].mean() if 'batch_size' in df.columns else df['memory_used_mb']
if isinstance(memory_by_batch, pd.Series) and len(memory_by_batch) > 1:
ax3.bar(range(len(memory_by_batch)), memory_by_batch.values,
color='#FFEAA7', alpha=0.8)
ax3.set_title('Memory Usage by Batch Size', fontweight='bold')
ax3.set_xlabel('Batch Size')
ax3.set_ylabel('Memory Usage (MB)')
ax3.set_xticks(range(len(memory_by_batch)))
ax3.set_xticklabels(memory_by_batch.index)
# 添加总显存线
ax3.axhline(y=8192, color='red', linestyle='--', alpha=0.7, label='Total VRAM (8GB)')
ax3.legend()
else:
ax3.text(0.5, 0.5, 'Insufficient data for memory analysis',
ha='center', va='center', transform=ax3.transAxes)
# 4. 优化效果总结
ax4.text(0.05, 0.95, 'Optimization Results Summary',
fontsize=16, fontweight='bold', transform=ax4.transAxes)
# 计算改进指标
if len(df) > 0:
max_fps = df['actual_fps'].max()
max_gpu_util = df['gpu_utilization'].max()
avg_latency = df['avg_inference_time_ms'].mean() if 'avg_inference_time_ms' in df.columns else 0
original_max_fps = 33.8
original_gpu_util = 30
fps_improvement = max_fps / original_max_fps if original_max_fps > 0 else 0
gpu_improvement = max_gpu_util / original_gpu_util if original_gpu_util > 0 else 0
summary_text = [
f'🚀 Max FPS: {max_fps:.1f} (vs {original_max_fps:.1f})',
f'📈 FPS Improvement: {fps_improvement:.1f}x',
f'🔥 Max GPU Util: {max_gpu_util:.1f}% (vs {original_gpu_util}%)',
f'📊 GPU Improvement: {gpu_improvement:.1f}x',
f'⚡ Avg Latency: {avg_latency:.2f}ms',
'',
'✅ Optimization Success!' if fps_improvement > 2 else '⚠️ Needs More Optimization',
f'Target: 70%+ GPU utilization',
f'Achieved: {max_gpu_util:.1f}% GPU utilization'
]
for i, text in enumerate(summary_text):
ax4.text(0.05, 0.85 - i*0.08, text, fontsize=12,
transform=ax4.transAxes)
ax4.set_xlim(0, 1)
ax4.set_ylim(0, 1)
ax4.axis('off')
plt.tight_layout()
output_file = Path(output_dir) / "optimization_analysis.png"
plt.savefig(output_file, dpi=300, bbox_inches='tight')
plt.close()
return str(output_file)
def create_deployment_recommendations(df: pd.DataFrame, output_dir: str) -> str:
"""创建部署建议图表"""
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 8))
fig.suptitle('Optimized Deployment Recommendations', fontsize=16, fontweight='bold')
# 1. 最优配置热力图
if 'batch_size' in df.columns and 'num_streams' in df.columns:
# 创建配置性能矩阵
pivot_data = df.pivot_table(
values='actual_fps',
index='batch_size',
columns='num_streams',
aggfunc='mean'
)
if not pivot_data.empty:
im1 = ax1.imshow(pivot_data.values, cmap='RdYlGn', aspect='auto')
ax1.set_title('Performance Heatmap (FPS)', fontweight='bold')
ax1.set_xlabel('Number of Streams')
ax1.set_ylabel('Batch Size')
ax1.set_xticks(range(len(pivot_data.columns)))
ax1.set_xticklabels(pivot_data.columns)
ax1.set_yticks(range(len(pivot_data.index)))
ax1.set_yticklabels(pivot_data.index)
# 添加数值标签
for i in range(len(pivot_data.index)):
for j in range(len(pivot_data.columns)):
if not np.isnan(pivot_data.iloc[i, j]):
ax1.text(j, i, f'{pivot_data.iloc[i, j]:.1f}',
ha="center", va="center", color="black", fontweight='bold')
plt.colorbar(im1, ax=ax1, label='FPS')
# 2. 推荐配置
ax2.text(0.05, 0.95, 'Recommended Configurations',
fontsize=16, fontweight='bold', transform=ax2.transAxes)
# 基于测试结果生成推荐
if len(df) > 0:
# 找到最佳配置
best_config = df.loc[df['actual_fps'].idxmax()]
recommendations = [
'🏆 Best Performance Configuration:',
f' • Resolution: {best_config["resolution"]}x{best_config["resolution"]}',
f' • Batch Size: {best_config.get("batch_size", "N/A")}',
f' • Streams: {best_config.get("num_streams", "N/A")}',
f' • Performance: {best_config["actual_fps"]:.1f} FPS',
f' • GPU Util: {best_config["gpu_utilization"]:.1f}%',
'',
'💡 Deployment Scenarios:',
'',
'🎯 High Throughput (Max FPS):',
f' • Use batch size 16-32',
f' • Use 4-8 CUDA streams',
f' • Expected: {best_config["actual_fps"]:.0f}+ FPS',
'',
'⚖️ Balanced (GPU ~70%):',
f' • Use batch size 8-16',
f' • Use 2-4 CUDA streams',
f' • Expected: {best_config["actual_fps"]*0.7:.0f} FPS',
'',
'🔋 Power Efficient (GPU ~50%):',
f' • Use batch size 4-8',
f' • Use 2 CUDA streams',
f' • Expected: {best_config["actual_fps"]*0.5:.0f} FPS'
]
for i, rec in enumerate(recommendations):
ax2.text(0.05, 0.85 - i*0.04, rec, fontsize=10,
transform=ax2.transAxes)
ax2.set_xlim(0, 1)
ax2.set_ylim(0, 1)
ax2.axis('off')
plt.tight_layout()
output_file = Path(output_dir) / "deployment_recommendations.png"
plt.savefig(output_file, dpi=300, bbox_inches='tight')
plt.close()
return str(output_file)
def generate_optimized_charts(results_dir: str) -> List[str]:
"""生成优化测试的所有图表"""
try:
df = load_optimized_results(results_dir)
chart_files = []
# 1. 性能对比
chart_files.append(create_performance_comparison(df, results_dir))
# 2. 优化分析
chart_files.append(create_optimization_analysis(df, results_dir))
# 3. 部署建议
chart_files.append(create_deployment_recommendations(df, results_dir))
return chart_files
except Exception as e:
print(f"生成图表失败: {e}")
return []
if __name__ == "__main__":
import sys
if len(sys.argv) > 1:
results_dir = sys.argv[1]
else:
results_dir = "./optimized_stress_results"
chart_files = generate_optimized_charts(results_dir)
if chart_files:
print(f"✅ 生成了 {len(chart_files)} 个图表:")
for file in chart_files:
print(f" 📊 {file}")
else:
print("❌ 未生成任何图表")