Files
Test_AI/benchmark_engines.py
16337 942244bd88 Add YOLO11 TensorRT quantization benchmark scripts
- Engine build scripts (FP16/INT8)
- Benchmark validation scripts
- Result parsing and analysis tools
- COCO dataset configuration
2026-01-29 13:59:42 +08:00

362 lines
13 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
YOLO11n TensorRT Engine 性能对比分析
对比 INT8 640p vs FP16 480p 的性能、精度、速度等指标
"""
import os
import subprocess
import time
import json
import numpy as np
from pathlib import Path
from datetime import datetime
class EngineBenchmark:
def __init__(self):
self.results_dir = Path("benchmark_results")
self.results_dir.mkdir(exist_ok=True)
self.engines = {
"INT8_640": "yolo11n_int8_b1_8.engine",
"FP16_640": "yolo11n_fp16_640.engine",
"INT8_480": "yolo11n_int8_480.engine",
"FP16_480": "yolo11n_fp16_480.engine"
}
self.benchmark_data = {}
def build_engine(self, name, onnx_path, precision, input_size):
"""构建指定配置的engine"""
engine_path = self.engines[name]
if os.path.exists(engine_path):
print(f"[✓] {name} engine already exists: {engine_path}")
return True
print(f"\n{'='*60}")
print(f"Building {name} engine ({precision}, {input_size}p)")
print(f"{'='*60}")
# 构建命令
cmd = [
"trtexec",
f"--onnx={onnx_path}",
f"--saveEngine={engine_path}",
"--explicitBatch",
f"--{precision.lower()}",
"--workspace=4096",
"--builderOptimizationLevel=4",
"--profilingVerbosity=detailed",
"--optShapes=input:4x3x{input_size}x{input_size}",
"--maxShapes=input:8x3x{input_size}x{input_size}",
"--useCudaGraph",
"--useSpinWait",
"--noTF32"
]
print(f"Command: {' '.join(cmd)}")
try:
result = subprocess.run(
cmd,
capture_output=True,
text=True,
timeout=600
)
if result.returncode == 0:
print(f"[✓] {name} engine built successfully!")
return True
else:
print(f"[✗] {name} engine build failed!")
print(f"Error: {result.stderr}")
return False
except Exception as e:
print(f"[✗] Error building {name}: {e}")
return False
def validate_engine(self, name, engine_path):
"""验证engine并获取mAP"""
print(f"\nValidating {name}...")
# 运行验证
cmd = [
"yolo", "val",
f"model={engine_path}",
"data=coco.yaml",
"imgsz=640" if "640" in name else "imgsz=480",
"rect=False",
"batch=1"
]
try:
result = subprocess.run(
cmd,
capture_output=True,
text=True,
timeout=3600
)
# 解析输出获取mAP
output = result.stdout + result.stderr
# 提取关键指标
metrics = {
"name": name,
"engine": engine_path,
"timestamp": datetime.now().isoformat()
}
# 查找AP指标
for line in output.split('\n'):
if 'Average Precision' in line and 'IoU=0.50:0.95' in line:
try:
ap_value = float(line.split('=')[1].strip().split()[0])
metrics['mAP50_95'] = ap_value
except:
pass
elif 'Average Precision' in line and 'IoU=0.50' in line and '0.95' not in line:
try:
ap_value = float(line.split('=')[1].strip().split()[0])
metrics['mAP50'] = ap_value
except:
pass
# 查找速度
for line in output.split('\n'):
if 'preprocess' in line and 'inference' in line:
try:
parts = line.split()
inf_idx = parts.index('inference')
metrics['inference_ms'] = float(parts[inf_idx-1])
except:
pass
print(f"[✓] {name} validation complete")
print(f" mAP50-95: {metrics.get('mAP50_95', 'N/A')}")
print(f" mAP50: {metrics.get('mAP50', 'N/A')}")
print(f" Inference: {metrics.get('inference_ms', 'N/A')}ms")
return metrics
except Exception as e:
print(f"[✗] Error validating {name}: {e}")
return {"name": name, "error": str(e)}
def run_benchmark(self):
"""运行完整基准测试"""
print("="*60)
print("YOLO11n TensorRT Engine 性能对比分析")
print("="*60)
print(f"\n配置:")
print(" - INT8 640p: 8位整数量化, 640x640输入")
print(" - FP16 640p: 半精度浮点, 640x640输入")
print(" - INT8 480p: 8位整数量化, 480x480输入")
print(" - FP16 480p: 半精度浮点, 480x480输入")
print(" - Batch: 1-8, Opt: 4, 优化级别: 4")
print()
# 验证FP32基线
print("\n" + "="*60)
print("Step 1: 获取FP32基线 (PyTorch)")
print("="*60)
fp32_metrics = self.validate_engine("FP32_PyTorch", "yolo11n.pt")
self.benchmark_data['FP32'] = fp32_metrics
# 构建并验证各engine
configs = [
("INT8_640", "yolo11n.onnx", "INT8", 640),
("FP16_640", "yolo11n.onnx", "FP16", 640),
("INT8_480", "yolo11n.onnx", "INT8", 480),
("FP16_480", "yolo11n.onnx", "FP16", 480),
]
for name, onnx_path, precision, size in configs:
self.build_engine(name, onnx_path, precision, size)
if os.path.exists(self.engines[name]):
metrics = self.validate_engine(name, self.engines[name])
self.benchmark_data[name] = metrics
# 保存结果
self.save_results()
# 生成报告
self.generate_report()
def save_results(self):
"""保存原始结果"""
results_file = self.results_dir / "benchmark_raw.json"
with open(results_file, 'w', encoding='utf-8') as f:
json.dump(self.benchmark_data, f, indent=2, ensure_ascii=False)
print(f"\n结果已保存到: {results_file}")
def generate_report(self):
"""生成详细分析报告"""
report = []
report.append("="*70)
report.append("YOLO11n TensorRT Engine 性能对比分析报告")
report.append("="*70)
report.append(f"生成时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
report.append("")
# 表头
report.append("-"*70)
report.append("一、性能指标对比表")
report.append("-"*70)
report.append(f"{'配置':<15} {'mAP50-95':<12} {'mAP50':<12} {'推理速度':<12} {'FPS':<10}")
report.append("-"*70)
fp32_map = self.benchmark_data.get('FP32', {}).get('mAP50_95', 0)
for name, data in self.benchmark_data.items():
if 'error' in data:
continue
map50_95 = data.get('mAP50_95', 'N/A')
map50 = data.get('mAP50', 'N/A')
inf_ms = data.get('inference_ms', 'N/A')
fps = round(1000/inf_ms, 1) if inf_ms != 'N/A' else 'N/A'
report.append(f"{name:<15} {map50_95:<12} {map50:<12} {inf_ms}ms{' '*5} {fps}")
report.append("")
report.append("-"*70)
report.append("二、精度掉点分析 (相对于FP32)")
report.append("-"*70)
for name, data in self.benchmark_data.items():
if name == 'FP32' or 'error' in data:
continue
map50_95 = data.get('mAP50_95', 0)
if fp32_map > 0 and map50_95 > 0:
drop = (fp32_map - map50_95) / fp32_map * 100
report.append(f"{name:<15}: mAP50-95 掉点 {drop:.2f}%")
report.append("")
report.append("-"*70)
report.append("三、速度对比")
report.append("-"*70)
# 找最快速度
speeds = []
for name, data in self.benchmark_data.items():
if 'error' not in data and 'inference_ms' in data:
inf_ms = data.get('inference_ms')
if inf_ms != 'N/A':
speeds.append((name, inf_ms))
if speeds:
speeds.sort(key=lambda x: x[1])
fastest = speeds[0]
report.append(f"最快配置: {fastest[0]} ({fastest[1]}ms)")
report.append("")
report.append("速度排名:")
for i, (name, ms) in enumerate(speeds, 1):
report.append(f" {i}. {name}: {ms}ms")
report.append("")
report.append("="*70)
report.append("四、结论与建议")
report.append("="*70)
report.append("")
report.append("1. 如果追求最高精度: 选择 INT8 640p 或 FP16 640p")
report.append("2. 如果追求最快速度: 选择 FP16 480p")
report.append("3. 如果平衡精度和速度: 选择 FP16 640p")
report.append("4. INT8量化会有约10%的mAP掉点但推理速度显著提升")
report.append("")
# 保存报告
report_text = '\n'.join(report)
report_file = self.results_dir / "benchmark_report.txt"
with open(report_file, 'w', encoding='utf-8') as f:
f.write(report_text)
print(f"\n报告已保存到: {report_file}")
print("\n" + report_text)
def generate_charts(self):
"""生成可视化图表需要matplotlib"""
try:
import matplotlib.pyplot as plt
import matplotlib
matplotlib.use('Agg')
# 创建图表
fig, axes = plt.subplots(2, 2, figsize=(14, 10))
fig.suptitle('YOLO11n TensorRT Engine 性能对比', fontsize=14, fontweight='bold')
# 数据准备
configs = []
map50_95_values = []
map50_values = []
inference_values = []
for name, data in self.benchmark_data.items():
if 'error' in data:
continue
configs.append(name)
map50_95_values.append(data.get('mAP50_95', 0))
map50_values.append(data.get('mAP50', 0))
inf_ms = data.get('inference_ms', 0)
if inf_ms != 'N/A' and inf_ms > 0:
inference_values.append(inf_ms)
else:
inference_values.append(0)
colors = ['#2ecc71', '#3498db', '#e74c3c', '#9b59b6']
# 1. mAP50-95对比
ax1 = axes[0, 0]
bars1 = ax1.bar(configs, map50_95_values, color=colors[:len(configs)])
ax1.set_title('mAP50-95 对比', fontsize=12)
ax1.set_ylabel('mAP50-95')
ax1.set_ylim(0, max(map50_95_values) * 1.2)
for bar, val in zip(bars1, map50_95_values):
ax1.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01,
f'{val:.3f}', ha='center', va='bottom', fontsize=9)
# 2. mAP50对比
ax2 = axes[0, 1]
bars2 = ax2.bar(configs, map50_values, color=colors[:len(configs)])
ax2.set_title('mAP50 对比', fontsize=12)
ax2.set_ylabel('mAP50')
ax2.set_ylim(0, max(map50_values) * 1.2)
for bar, val in zip(bars2, map50_values):
ax2.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01,
f'{val:.3f}', ha='center', va='bottom', fontsize=9)
# 3. 推理速度对比
ax3 = axes[1, 0]
bars3 = ax3.bar(configs, inference_values, color=colors[:len(configs)])
ax3.set_title('推理速度对比 (ms)', fontsize=12)
ax3.set_ylabel('推理时间 (ms)')
for bar, val in zip(bars3, inference_values):
ax3.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.1,
f'{val:.1f}ms', ha='center', va='bottom', fontsize=9)
# 4. FPS对比
ax4 = axes[1, 1]
fps_values = [1000/v if v > 0 else 0 for v in inference_values]
bars4 = ax4.bar(configs, fps_values, color=colors[:len(configs)])
ax4.set_title('FPS 对比', fontsize=12)
ax4.set_ylabel('FPS')
for bar, val in zip(bars4, fps_values):
ax4.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 1,
f'{val:.1f}', ha='center', va='bottom', fontsize=9)
plt.tight_layout()
chart_file = self.results_dir / "benchmark_charts.png"
plt.savefig(chart_file, dpi=150, bbox_inches='tight')
print(f"\n图表已保存到: {chart_file}")
except ImportError:
print("\n[提示] 需要安装matplotlib才能生成图表: pip install matplotlib")
def main():
benchmark = EngineBenchmark()
benchmark.run_benchmark()
benchmark.generate_charts()
if __name__ == "__main__":
main()