364 lines
14 KiB
Python
364 lines
14 KiB
Python
"""
|
|
优化压力测试可视化模块
|
|
专门为原生 TensorRT 测试结果生成图表
|
|
"""
|
|
|
|
import json
|
|
import matplotlib.pyplot as plt
|
|
import numpy as np
|
|
import pandas as pd
|
|
from pathlib import Path
|
|
from typing import List, Dict, Any
|
|
|
|
# 设置字体
|
|
plt.rcParams['font.family'] = ['Arial', 'DejaVu Sans']
|
|
plt.rcParams['axes.unicode_minus'] = False
|
|
plt.rcParams['font.size'] = 10
|
|
|
|
|
|
def load_optimized_results(results_dir: str) -> pd.DataFrame:
|
|
"""加载优化测试结果"""
|
|
results_path = Path(results_dir)
|
|
|
|
# 查找最新的结果文件 (支持两种格式)
|
|
json_files = list(results_path.glob("optimized_results_*.json")) + \
|
|
list(results_path.glob("ultralytics_optimized_*.json"))
|
|
|
|
if not json_files:
|
|
raise FileNotFoundError("未找到优化测试结果文件")
|
|
|
|
latest_file = max(json_files, key=lambda x: x.stat().st_mtime)
|
|
|
|
with open(latest_file, 'r', encoding='utf-8') as f:
|
|
data = json.load(f)
|
|
|
|
return pd.DataFrame(data)
|
|
|
|
|
|
def create_performance_comparison(df: pd.DataFrame, output_dir: str) -> str:
|
|
"""创建性能对比图表"""
|
|
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(16, 12))
|
|
fig.suptitle('RTX 3050 Optimized Performance Analysis', fontsize=20, fontweight='bold')
|
|
|
|
# 1. 最大 FPS 对比 (优化前 vs 优化后)
|
|
resolutions = [320, 480]
|
|
original_fps = [33.8, 33.9] # 之前的结果
|
|
|
|
# 从测试结果中获取最大 FPS
|
|
optimized_fps = []
|
|
for res in resolutions:
|
|
res_data = df[df['resolution'] == res]
|
|
if len(res_data) > 0:
|
|
max_fps = res_data['actual_fps'].max()
|
|
optimized_fps.append(max_fps)
|
|
else:
|
|
optimized_fps.append(0)
|
|
|
|
x = np.arange(len(resolutions))
|
|
width = 0.35
|
|
|
|
bars1 = ax1.bar(x - width/2, original_fps, width, label='Original (Ultralytics)',
|
|
color='#FF6B6B', alpha=0.8)
|
|
bars2 = ax1.bar(x + width/2, optimized_fps, width, label='Optimized (Native TensorRT)',
|
|
color='#4ECDC4', alpha=0.8)
|
|
|
|
ax1.set_title('Max FPS Comparison', fontweight='bold')
|
|
ax1.set_xlabel('Resolution')
|
|
ax1.set_ylabel('FPS')
|
|
ax1.set_xticks(x)
|
|
ax1.set_xticklabels([f'{res}x{res}' for res in resolutions])
|
|
ax1.legend()
|
|
ax1.grid(True, alpha=0.3)
|
|
|
|
# 添加提升倍数标签
|
|
for i, (orig, opt) in enumerate(zip(original_fps, optimized_fps)):
|
|
if orig > 0 and opt > 0:
|
|
improvement = opt / orig
|
|
ax1.text(i, max(orig, opt) + 5, f'{improvement:.1f}x',
|
|
ha='center', va='bottom', fontweight='bold', color='green')
|
|
|
|
# 2. GPU 利用率对比
|
|
gpu_utils_orig = [30, 34] # 原始测试的 GPU 利用率
|
|
gpu_utils_opt = []
|
|
|
|
for res in resolutions:
|
|
res_data = df[df['resolution'] == res]
|
|
if len(res_data) > 0:
|
|
max_util = res_data['gpu_utilization'].max()
|
|
gpu_utils_opt.append(max_util)
|
|
else:
|
|
gpu_utils_opt.append(0)
|
|
|
|
bars3 = ax2.bar(x - width/2, gpu_utils_orig, width, label='Original',
|
|
color='#FF6B6B', alpha=0.8)
|
|
bars4 = ax2.bar(x + width/2, gpu_utils_opt, width, label='Optimized',
|
|
color='#4ECDC4', alpha=0.8)
|
|
|
|
ax2.set_title('GPU Utilization Comparison', fontweight='bold')
|
|
ax2.set_xlabel('Resolution')
|
|
ax2.set_ylabel('GPU Utilization (%)')
|
|
ax2.set_xticks(x)
|
|
ax2.set_xticklabels([f'{res}x{res}' for res in resolutions])
|
|
ax2.legend()
|
|
ax2.grid(True, alpha=0.3)
|
|
ax2.set_ylim(0, 100)
|
|
|
|
# 添加目标线
|
|
ax2.axhline(y=70, color='green', linestyle='--', alpha=0.7, label='Target (70%)')
|
|
|
|
# 3. 批次大小 vs 性能 (支持两种字段名)
|
|
batch_field = 'batch_size' if 'batch_size' in df.columns else None
|
|
if batch_field:
|
|
batch_perf = df.groupby(batch_field)['actual_fps'].mean()
|
|
|
|
ax3.plot(batch_perf.index, batch_perf.values, marker='o', linewidth=3,
|
|
markersize=8, color='#95E1D3')
|
|
ax3.set_title('Batch Size vs Performance', fontweight='bold')
|
|
ax3.set_xlabel('Batch Size')
|
|
ax3.set_ylabel('Average FPS')
|
|
ax3.grid(True, alpha=0.3)
|
|
|
|
# 4. 流数量/线程数量 vs 性能 (支持两种字段名)
|
|
parallel_field = 'num_streams' if 'num_streams' in df.columns else 'num_threads'
|
|
if parallel_field in df.columns:
|
|
parallel_perf = df.groupby(parallel_field)['actual_fps'].mean()
|
|
|
|
ax4.plot(parallel_perf.index, parallel_perf.values, marker='s', linewidth=3,
|
|
markersize=8, color='#F38BA8')
|
|
|
|
title = 'Number of Streams vs Performance' if parallel_field == 'num_streams' else 'Number of Threads vs Performance'
|
|
xlabel = 'Number of CUDA Streams' if parallel_field == 'num_streams' else 'Number of Threads'
|
|
|
|
ax4.set_title(title, fontweight='bold')
|
|
ax4.set_xlabel(xlabel)
|
|
ax4.set_ylabel('Average FPS')
|
|
ax4.grid(True, alpha=0.3)
|
|
|
|
plt.tight_layout()
|
|
output_file = Path(output_dir) / "optimized_performance_comparison.png"
|
|
plt.savefig(output_file, dpi=300, bbox_inches='tight')
|
|
plt.close()
|
|
|
|
return str(output_file)
|
|
|
|
|
|
def create_optimization_analysis(df: pd.DataFrame, output_dir: str) -> str:
|
|
"""创建优化分析图表"""
|
|
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(16, 12))
|
|
fig.suptitle('TensorRT Optimization Analysis', fontsize=16, fontweight='bold')
|
|
|
|
# 1. 延迟分布对比
|
|
if 'avg_inference_time_ms' in df.columns:
|
|
latencies = df['avg_inference_time_ms'].dropna()
|
|
|
|
ax1.hist(latencies, bins=20, alpha=0.7, color='#95E1D3', edgecolor='black')
|
|
ax1.axvline(latencies.mean(), color='red', linestyle='--', linewidth=2,
|
|
label=f'Mean: {latencies.mean():.2f}ms')
|
|
ax1.set_title('Inference Latency Distribution', fontweight='bold')
|
|
ax1.set_xlabel('Latency (ms)')
|
|
ax1.set_ylabel('Frequency')
|
|
ax1.legend()
|
|
ax1.grid(True, alpha=0.3)
|
|
|
|
# 2. 吞吐量 vs GPU 利用率
|
|
if 'gpu_utilization' in df.columns and 'actual_fps' in df.columns:
|
|
ax2.scatter(df['gpu_utilization'], df['actual_fps'],
|
|
c=df['batch_size'] if 'batch_size' in df.columns else 'blue',
|
|
cmap='viridis', s=100, alpha=0.7)
|
|
ax2.set_title('Throughput vs GPU Utilization', fontweight='bold')
|
|
ax2.set_xlabel('GPU Utilization (%)')
|
|
ax2.set_ylabel('Throughput (FPS)')
|
|
ax2.grid(True, alpha=0.3)
|
|
|
|
if 'batch_size' in df.columns:
|
|
cbar = plt.colorbar(ax2.collections[0], ax=ax2)
|
|
cbar.set_label('Batch Size')
|
|
|
|
# 3. 内存使用分析
|
|
if 'memory_used_mb' in df.columns:
|
|
memory_by_batch = df.groupby('batch_size')['memory_used_mb'].mean() if 'batch_size' in df.columns else df['memory_used_mb']
|
|
|
|
if isinstance(memory_by_batch, pd.Series) and len(memory_by_batch) > 1:
|
|
ax3.bar(range(len(memory_by_batch)), memory_by_batch.values,
|
|
color='#FFEAA7', alpha=0.8)
|
|
ax3.set_title('Memory Usage by Batch Size', fontweight='bold')
|
|
ax3.set_xlabel('Batch Size')
|
|
ax3.set_ylabel('Memory Usage (MB)')
|
|
ax3.set_xticks(range(len(memory_by_batch)))
|
|
ax3.set_xticklabels(memory_by_batch.index)
|
|
|
|
# 添加总显存线
|
|
ax3.axhline(y=8192, color='red', linestyle='--', alpha=0.7, label='Total VRAM (8GB)')
|
|
ax3.legend()
|
|
else:
|
|
ax3.text(0.5, 0.5, 'Insufficient data for memory analysis',
|
|
ha='center', va='center', transform=ax3.transAxes)
|
|
|
|
# 4. 优化效果总结
|
|
ax4.text(0.05, 0.95, 'Optimization Results Summary',
|
|
fontsize=16, fontweight='bold', transform=ax4.transAxes)
|
|
|
|
# 计算改进指标
|
|
if len(df) > 0:
|
|
max_fps = df['actual_fps'].max()
|
|
max_gpu_util = df['gpu_utilization'].max()
|
|
avg_latency = df['avg_inference_time_ms'].mean() if 'avg_inference_time_ms' in df.columns else 0
|
|
|
|
original_max_fps = 33.8
|
|
original_gpu_util = 30
|
|
|
|
fps_improvement = max_fps / original_max_fps if original_max_fps > 0 else 0
|
|
gpu_improvement = max_gpu_util / original_gpu_util if original_gpu_util > 0 else 0
|
|
|
|
summary_text = [
|
|
f'🚀 Max FPS: {max_fps:.1f} (vs {original_max_fps:.1f})',
|
|
f'📈 FPS Improvement: {fps_improvement:.1f}x',
|
|
f'🔥 Max GPU Util: {max_gpu_util:.1f}% (vs {original_gpu_util}%)',
|
|
f'📊 GPU Improvement: {gpu_improvement:.1f}x',
|
|
f'⚡ Avg Latency: {avg_latency:.2f}ms',
|
|
'',
|
|
'✅ Optimization Success!' if fps_improvement > 2 else '⚠️ Needs More Optimization',
|
|
f'Target: 70%+ GPU utilization',
|
|
f'Achieved: {max_gpu_util:.1f}% GPU utilization'
|
|
]
|
|
|
|
for i, text in enumerate(summary_text):
|
|
ax4.text(0.05, 0.85 - i*0.08, text, fontsize=12,
|
|
transform=ax4.transAxes)
|
|
|
|
ax4.set_xlim(0, 1)
|
|
ax4.set_ylim(0, 1)
|
|
ax4.axis('off')
|
|
|
|
plt.tight_layout()
|
|
output_file = Path(output_dir) / "optimization_analysis.png"
|
|
plt.savefig(output_file, dpi=300, bbox_inches='tight')
|
|
plt.close()
|
|
|
|
return str(output_file)
|
|
|
|
|
|
def create_deployment_recommendations(df: pd.DataFrame, output_dir: str) -> str:
|
|
"""创建部署建议图表"""
|
|
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 8))
|
|
fig.suptitle('Optimized Deployment Recommendations', fontsize=16, fontweight='bold')
|
|
|
|
# 1. 最优配置热力图
|
|
if 'batch_size' in df.columns and 'num_streams' in df.columns:
|
|
# 创建配置性能矩阵
|
|
pivot_data = df.pivot_table(
|
|
values='actual_fps',
|
|
index='batch_size',
|
|
columns='num_streams',
|
|
aggfunc='mean'
|
|
)
|
|
|
|
if not pivot_data.empty:
|
|
im1 = ax1.imshow(pivot_data.values, cmap='RdYlGn', aspect='auto')
|
|
ax1.set_title('Performance Heatmap (FPS)', fontweight='bold')
|
|
ax1.set_xlabel('Number of Streams')
|
|
ax1.set_ylabel('Batch Size')
|
|
ax1.set_xticks(range(len(pivot_data.columns)))
|
|
ax1.set_xticklabels(pivot_data.columns)
|
|
ax1.set_yticks(range(len(pivot_data.index)))
|
|
ax1.set_yticklabels(pivot_data.index)
|
|
|
|
# 添加数值标签
|
|
for i in range(len(pivot_data.index)):
|
|
for j in range(len(pivot_data.columns)):
|
|
if not np.isnan(pivot_data.iloc[i, j]):
|
|
ax1.text(j, i, f'{pivot_data.iloc[i, j]:.1f}',
|
|
ha="center", va="center", color="black", fontweight='bold')
|
|
|
|
plt.colorbar(im1, ax=ax1, label='FPS')
|
|
|
|
# 2. 推荐配置
|
|
ax2.text(0.05, 0.95, 'Recommended Configurations',
|
|
fontsize=16, fontweight='bold', transform=ax2.transAxes)
|
|
|
|
# 基于测试结果生成推荐
|
|
if len(df) > 0:
|
|
# 找到最佳配置
|
|
best_config = df.loc[df['actual_fps'].idxmax()]
|
|
|
|
recommendations = [
|
|
'🏆 Best Performance Configuration:',
|
|
f' • Resolution: {best_config["resolution"]}x{best_config["resolution"]}',
|
|
f' • Batch Size: {best_config.get("batch_size", "N/A")}',
|
|
f' • Streams: {best_config.get("num_streams", "N/A")}',
|
|
f' • Performance: {best_config["actual_fps"]:.1f} FPS',
|
|
f' • GPU Util: {best_config["gpu_utilization"]:.1f}%',
|
|
'',
|
|
'💡 Deployment Scenarios:',
|
|
'',
|
|
'🎯 High Throughput (Max FPS):',
|
|
f' • Use batch size 16-32',
|
|
f' • Use 4-8 CUDA streams',
|
|
f' • Expected: {best_config["actual_fps"]:.0f}+ FPS',
|
|
'',
|
|
'⚖️ Balanced (GPU ~70%):',
|
|
f' • Use batch size 8-16',
|
|
f' • Use 2-4 CUDA streams',
|
|
f' • Expected: {best_config["actual_fps"]*0.7:.0f} FPS',
|
|
'',
|
|
'🔋 Power Efficient (GPU ~50%):',
|
|
f' • Use batch size 4-8',
|
|
f' • Use 2 CUDA streams',
|
|
f' • Expected: {best_config["actual_fps"]*0.5:.0f} FPS'
|
|
]
|
|
|
|
for i, rec in enumerate(recommendations):
|
|
ax2.text(0.05, 0.85 - i*0.04, rec, fontsize=10,
|
|
transform=ax2.transAxes)
|
|
|
|
ax2.set_xlim(0, 1)
|
|
ax2.set_ylim(0, 1)
|
|
ax2.axis('off')
|
|
|
|
plt.tight_layout()
|
|
output_file = Path(output_dir) / "deployment_recommendations.png"
|
|
plt.savefig(output_file, dpi=300, bbox_inches='tight')
|
|
plt.close()
|
|
|
|
return str(output_file)
|
|
|
|
|
|
def generate_optimized_charts(results_dir: str) -> List[str]:
|
|
"""生成优化测试的所有图表"""
|
|
try:
|
|
df = load_optimized_results(results_dir)
|
|
|
|
chart_files = []
|
|
|
|
# 1. 性能对比
|
|
chart_files.append(create_performance_comparison(df, results_dir))
|
|
|
|
# 2. 优化分析
|
|
chart_files.append(create_optimization_analysis(df, results_dir))
|
|
|
|
# 3. 部署建议
|
|
chart_files.append(create_deployment_recommendations(df, results_dir))
|
|
|
|
return chart_files
|
|
|
|
except Exception as e:
|
|
print(f"生成图表失败: {e}")
|
|
return []
|
|
|
|
|
|
if __name__ == "__main__":
|
|
import sys
|
|
|
|
if len(sys.argv) > 1:
|
|
results_dir = sys.argv[1]
|
|
else:
|
|
results_dir = "./optimized_stress_results"
|
|
|
|
chart_files = generate_optimized_charts(results_dir)
|
|
|
|
if chart_files:
|
|
print(f"✅ 生成了 {len(chart_files)} 个图表:")
|
|
for file in chart_files:
|
|
print(f" 📊 {file}")
|
|
else:
|
|
print("❌ 未生成任何图表") |