Files
Test_AI/benchmark/optimized_visualizer.py
2026-01-20 10:54:30 +08:00

364 lines
14 KiB
Python

"""
优化压力测试可视化模块
专门为原生 TensorRT 测试结果生成图表
"""
import json
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from pathlib import Path
from typing import List, Dict, Any
# 设置字体
plt.rcParams['font.family'] = ['Arial', 'DejaVu Sans']
plt.rcParams['axes.unicode_minus'] = False
plt.rcParams['font.size'] = 10
def load_optimized_results(results_dir: str) -> pd.DataFrame:
"""加载优化测试结果"""
results_path = Path(results_dir)
# 查找最新的结果文件 (支持两种格式)
json_files = list(results_path.glob("optimized_results_*.json")) + \
list(results_path.glob("ultralytics_optimized_*.json"))
if not json_files:
raise FileNotFoundError("未找到优化测试结果文件")
latest_file = max(json_files, key=lambda x: x.stat().st_mtime)
with open(latest_file, 'r', encoding='utf-8') as f:
data = json.load(f)
return pd.DataFrame(data)
def create_performance_comparison(df: pd.DataFrame, output_dir: str) -> str:
"""创建性能对比图表"""
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(16, 12))
fig.suptitle('RTX 3050 Optimized Performance Analysis', fontsize=20, fontweight='bold')
# 1. 最大 FPS 对比 (优化前 vs 优化后)
resolutions = [320, 480]
original_fps = [33.8, 33.9] # 之前的结果
# 从测试结果中获取最大 FPS
optimized_fps = []
for res in resolutions:
res_data = df[df['resolution'] == res]
if len(res_data) > 0:
max_fps = res_data['actual_fps'].max()
optimized_fps.append(max_fps)
else:
optimized_fps.append(0)
x = np.arange(len(resolutions))
width = 0.35
bars1 = ax1.bar(x - width/2, original_fps, width, label='Original (Ultralytics)',
color='#FF6B6B', alpha=0.8)
bars2 = ax1.bar(x + width/2, optimized_fps, width, label='Optimized (Native TensorRT)',
color='#4ECDC4', alpha=0.8)
ax1.set_title('Max FPS Comparison', fontweight='bold')
ax1.set_xlabel('Resolution')
ax1.set_ylabel('FPS')
ax1.set_xticks(x)
ax1.set_xticklabels([f'{res}x{res}' for res in resolutions])
ax1.legend()
ax1.grid(True, alpha=0.3)
# 添加提升倍数标签
for i, (orig, opt) in enumerate(zip(original_fps, optimized_fps)):
if orig > 0 and opt > 0:
improvement = opt / orig
ax1.text(i, max(orig, opt) + 5, f'{improvement:.1f}x',
ha='center', va='bottom', fontweight='bold', color='green')
# 2. GPU 利用率对比
gpu_utils_orig = [30, 34] # 原始测试的 GPU 利用率
gpu_utils_opt = []
for res in resolutions:
res_data = df[df['resolution'] == res]
if len(res_data) > 0:
max_util = res_data['gpu_utilization'].max()
gpu_utils_opt.append(max_util)
else:
gpu_utils_opt.append(0)
bars3 = ax2.bar(x - width/2, gpu_utils_orig, width, label='Original',
color='#FF6B6B', alpha=0.8)
bars4 = ax2.bar(x + width/2, gpu_utils_opt, width, label='Optimized',
color='#4ECDC4', alpha=0.8)
ax2.set_title('GPU Utilization Comparison', fontweight='bold')
ax2.set_xlabel('Resolution')
ax2.set_ylabel('GPU Utilization (%)')
ax2.set_xticks(x)
ax2.set_xticklabels([f'{res}x{res}' for res in resolutions])
ax2.legend()
ax2.grid(True, alpha=0.3)
ax2.set_ylim(0, 100)
# 添加目标线
ax2.axhline(y=70, color='green', linestyle='--', alpha=0.7, label='Target (70%)')
# 3. 批次大小 vs 性能 (支持两种字段名)
batch_field = 'batch_size' if 'batch_size' in df.columns else None
if batch_field:
batch_perf = df.groupby(batch_field)['actual_fps'].mean()
ax3.plot(batch_perf.index, batch_perf.values, marker='o', linewidth=3,
markersize=8, color='#95E1D3')
ax3.set_title('Batch Size vs Performance', fontweight='bold')
ax3.set_xlabel('Batch Size')
ax3.set_ylabel('Average FPS')
ax3.grid(True, alpha=0.3)
# 4. 流数量/线程数量 vs 性能 (支持两种字段名)
parallel_field = 'num_streams' if 'num_streams' in df.columns else 'num_threads'
if parallel_field in df.columns:
parallel_perf = df.groupby(parallel_field)['actual_fps'].mean()
ax4.plot(parallel_perf.index, parallel_perf.values, marker='s', linewidth=3,
markersize=8, color='#F38BA8')
title = 'Number of Streams vs Performance' if parallel_field == 'num_streams' else 'Number of Threads vs Performance'
xlabel = 'Number of CUDA Streams' if parallel_field == 'num_streams' else 'Number of Threads'
ax4.set_title(title, fontweight='bold')
ax4.set_xlabel(xlabel)
ax4.set_ylabel('Average FPS')
ax4.grid(True, alpha=0.3)
plt.tight_layout()
output_file = Path(output_dir) / "optimized_performance_comparison.png"
plt.savefig(output_file, dpi=300, bbox_inches='tight')
plt.close()
return str(output_file)
def create_optimization_analysis(df: pd.DataFrame, output_dir: str) -> str:
"""创建优化分析图表"""
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(16, 12))
fig.suptitle('TensorRT Optimization Analysis', fontsize=16, fontweight='bold')
# 1. 延迟分布对比
if 'avg_inference_time_ms' in df.columns:
latencies = df['avg_inference_time_ms'].dropna()
ax1.hist(latencies, bins=20, alpha=0.7, color='#95E1D3', edgecolor='black')
ax1.axvline(latencies.mean(), color='red', linestyle='--', linewidth=2,
label=f'Mean: {latencies.mean():.2f}ms')
ax1.set_title('Inference Latency Distribution', fontweight='bold')
ax1.set_xlabel('Latency (ms)')
ax1.set_ylabel('Frequency')
ax1.legend()
ax1.grid(True, alpha=0.3)
# 2. 吞吐量 vs GPU 利用率
if 'gpu_utilization' in df.columns and 'actual_fps' in df.columns:
ax2.scatter(df['gpu_utilization'], df['actual_fps'],
c=df['batch_size'] if 'batch_size' in df.columns else 'blue',
cmap='viridis', s=100, alpha=0.7)
ax2.set_title('Throughput vs GPU Utilization', fontweight='bold')
ax2.set_xlabel('GPU Utilization (%)')
ax2.set_ylabel('Throughput (FPS)')
ax2.grid(True, alpha=0.3)
if 'batch_size' in df.columns:
cbar = plt.colorbar(ax2.collections[0], ax=ax2)
cbar.set_label('Batch Size')
# 3. 内存使用分析
if 'memory_used_mb' in df.columns:
memory_by_batch = df.groupby('batch_size')['memory_used_mb'].mean() if 'batch_size' in df.columns else df['memory_used_mb']
if isinstance(memory_by_batch, pd.Series) and len(memory_by_batch) > 1:
ax3.bar(range(len(memory_by_batch)), memory_by_batch.values,
color='#FFEAA7', alpha=0.8)
ax3.set_title('Memory Usage by Batch Size', fontweight='bold')
ax3.set_xlabel('Batch Size')
ax3.set_ylabel('Memory Usage (MB)')
ax3.set_xticks(range(len(memory_by_batch)))
ax3.set_xticklabels(memory_by_batch.index)
# 添加总显存线
ax3.axhline(y=8192, color='red', linestyle='--', alpha=0.7, label='Total VRAM (8GB)')
ax3.legend()
else:
ax3.text(0.5, 0.5, 'Insufficient data for memory analysis',
ha='center', va='center', transform=ax3.transAxes)
# 4. 优化效果总结
ax4.text(0.05, 0.95, 'Optimization Results Summary',
fontsize=16, fontweight='bold', transform=ax4.transAxes)
# 计算改进指标
if len(df) > 0:
max_fps = df['actual_fps'].max()
max_gpu_util = df['gpu_utilization'].max()
avg_latency = df['avg_inference_time_ms'].mean() if 'avg_inference_time_ms' in df.columns else 0
original_max_fps = 33.8
original_gpu_util = 30
fps_improvement = max_fps / original_max_fps if original_max_fps > 0 else 0
gpu_improvement = max_gpu_util / original_gpu_util if original_gpu_util > 0 else 0
summary_text = [
f'🚀 Max FPS: {max_fps:.1f} (vs {original_max_fps:.1f})',
f'📈 FPS Improvement: {fps_improvement:.1f}x',
f'🔥 Max GPU Util: {max_gpu_util:.1f}% (vs {original_gpu_util}%)',
f'📊 GPU Improvement: {gpu_improvement:.1f}x',
f'⚡ Avg Latency: {avg_latency:.2f}ms',
'',
'✅ Optimization Success!' if fps_improvement > 2 else '⚠️ Needs More Optimization',
f'Target: 70%+ GPU utilization',
f'Achieved: {max_gpu_util:.1f}% GPU utilization'
]
for i, text in enumerate(summary_text):
ax4.text(0.05, 0.85 - i*0.08, text, fontsize=12,
transform=ax4.transAxes)
ax4.set_xlim(0, 1)
ax4.set_ylim(0, 1)
ax4.axis('off')
plt.tight_layout()
output_file = Path(output_dir) / "optimization_analysis.png"
plt.savefig(output_file, dpi=300, bbox_inches='tight')
plt.close()
return str(output_file)
def create_deployment_recommendations(df: pd.DataFrame, output_dir: str) -> str:
"""创建部署建议图表"""
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 8))
fig.suptitle('Optimized Deployment Recommendations', fontsize=16, fontweight='bold')
# 1. 最优配置热力图
if 'batch_size' in df.columns and 'num_streams' in df.columns:
# 创建配置性能矩阵
pivot_data = df.pivot_table(
values='actual_fps',
index='batch_size',
columns='num_streams',
aggfunc='mean'
)
if not pivot_data.empty:
im1 = ax1.imshow(pivot_data.values, cmap='RdYlGn', aspect='auto')
ax1.set_title('Performance Heatmap (FPS)', fontweight='bold')
ax1.set_xlabel('Number of Streams')
ax1.set_ylabel('Batch Size')
ax1.set_xticks(range(len(pivot_data.columns)))
ax1.set_xticklabels(pivot_data.columns)
ax1.set_yticks(range(len(pivot_data.index)))
ax1.set_yticklabels(pivot_data.index)
# 添加数值标签
for i in range(len(pivot_data.index)):
for j in range(len(pivot_data.columns)):
if not np.isnan(pivot_data.iloc[i, j]):
ax1.text(j, i, f'{pivot_data.iloc[i, j]:.1f}',
ha="center", va="center", color="black", fontweight='bold')
plt.colorbar(im1, ax=ax1, label='FPS')
# 2. 推荐配置
ax2.text(0.05, 0.95, 'Recommended Configurations',
fontsize=16, fontweight='bold', transform=ax2.transAxes)
# 基于测试结果生成推荐
if len(df) > 0:
# 找到最佳配置
best_config = df.loc[df['actual_fps'].idxmax()]
recommendations = [
'🏆 Best Performance Configuration:',
f' • Resolution: {best_config["resolution"]}x{best_config["resolution"]}',
f' • Batch Size: {best_config.get("batch_size", "N/A")}',
f' • Streams: {best_config.get("num_streams", "N/A")}',
f' • Performance: {best_config["actual_fps"]:.1f} FPS',
f' • GPU Util: {best_config["gpu_utilization"]:.1f}%',
'',
'💡 Deployment Scenarios:',
'',
'🎯 High Throughput (Max FPS):',
f' • Use batch size 16-32',
f' • Use 4-8 CUDA streams',
f' • Expected: {best_config["actual_fps"]:.0f}+ FPS',
'',
'⚖️ Balanced (GPU ~70%):',
f' • Use batch size 8-16',
f' • Use 2-4 CUDA streams',
f' • Expected: {best_config["actual_fps"]*0.7:.0f} FPS',
'',
'🔋 Power Efficient (GPU ~50%):',
f' • Use batch size 4-8',
f' • Use 2 CUDA streams',
f' • Expected: {best_config["actual_fps"]*0.5:.0f} FPS'
]
for i, rec in enumerate(recommendations):
ax2.text(0.05, 0.85 - i*0.04, rec, fontsize=10,
transform=ax2.transAxes)
ax2.set_xlim(0, 1)
ax2.set_ylim(0, 1)
ax2.axis('off')
plt.tight_layout()
output_file = Path(output_dir) / "deployment_recommendations.png"
plt.savefig(output_file, dpi=300, bbox_inches='tight')
plt.close()
return str(output_file)
def generate_optimized_charts(results_dir: str) -> List[str]:
"""生成优化测试的所有图表"""
try:
df = load_optimized_results(results_dir)
chart_files = []
# 1. 性能对比
chart_files.append(create_performance_comparison(df, results_dir))
# 2. 优化分析
chart_files.append(create_optimization_analysis(df, results_dir))
# 3. 部署建议
chart_files.append(create_deployment_recommendations(df, results_dir))
return chart_files
except Exception as e:
print(f"生成图表失败: {e}")
return []
if __name__ == "__main__":
import sys
if len(sys.argv) > 1:
results_dir = sys.argv[1]
else:
results_dir = "./optimized_stress_results"
chart_files = generate_optimized_charts(results_dir)
if chart_files:
print(f"✅ 生成了 {len(chart_files)} 个图表:")
for file in chart_files:
print(f" 📊 {file}")
else:
print("❌ 未生成任何图表")