852 lines
33 KiB
Python
852 lines
33 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
YOLOv11 性能对比测试系统
|
||
PyTorch vs TensorRT 完整性能测试
|
||
"""
|
||
|
||
import os
|
||
import sys
|
||
import time
|
||
import json
|
||
import threading
|
||
import numpy as np
|
||
import cv2
|
||
import torch
|
||
import psutil
|
||
from pathlib import Path
|
||
from datetime import datetime
|
||
from typing import Dict, List, Tuple, Optional
|
||
from dataclasses import dataclass, asdict
|
||
from ultralytics import YOLO
|
||
|
||
# 性能指标数据类
|
||
@dataclass
|
||
class PerformanceMetrics:
|
||
timestamp: float
|
||
engine_type: str
|
||
fps: Optional[float] = None
|
||
latency_ms: Optional[float] = None
|
||
gpu_utilization: Optional[float] = None
|
||
gpu_memory_mb: Optional[float] = None
|
||
cpu_utilization: Optional[float] = None
|
||
memory_mb: Optional[float] = None
|
||
concurrent_streams: Optional[int] = None
|
||
batch_size: Optional[int] = None
|
||
|
||
@dataclass
|
||
class TestResult:
|
||
engine_type: str
|
||
test_type: str
|
||
avg_fps: float
|
||
max_fps: float
|
||
min_fps: float
|
||
avg_latency_ms: float
|
||
max_latency_ms: float
|
||
min_latency_ms: float
|
||
avg_gpu_util: float
|
||
max_gpu_util: float
|
||
avg_gpu_memory_mb: float
|
||
max_gpu_memory_mb: float
|
||
avg_cpu_util: float
|
||
max_cpu_util: float
|
||
test_duration: float
|
||
total_frames: int
|
||
concurrent_streams: int = 1
|
||
batch_size: int = 1
|
||
class ResourceMonitor:
|
||
"""系统资源监控器"""
|
||
|
||
def __init__(self, sampling_interval: float = 0.1):
|
||
self.sampling_interval = sampling_interval
|
||
self.is_monitoring = False
|
||
self.metrics_history = []
|
||
self.monitor_thread = None
|
||
|
||
def start_monitoring(self):
|
||
"""开始监控"""
|
||
self.is_monitoring = True
|
||
self.metrics_history = []
|
||
self.monitor_thread = threading.Thread(target=self._monitor_loop, daemon=True)
|
||
self.monitor_thread.start()
|
||
|
||
def stop_monitoring(self):
|
||
"""停止监控"""
|
||
self.is_monitoring = False
|
||
if self.monitor_thread:
|
||
self.monitor_thread.join(timeout=1.0)
|
||
|
||
def _monitor_loop(self):
|
||
"""监控循环"""
|
||
try:
|
||
import GPUtil
|
||
except ImportError:
|
||
print("警告: GPUtil 未安装,GPU 监控不可用")
|
||
GPUtil = None
|
||
|
||
while self.is_monitoring:
|
||
try:
|
||
# CPU 和内存监控
|
||
cpu_util = psutil.cpu_percent(interval=None)
|
||
memory_info = psutil.virtual_memory()
|
||
memory_mb = memory_info.used / 1024 / 1024
|
||
|
||
# GPU 监控
|
||
gpu_util = None
|
||
gpu_memory_mb = None
|
||
|
||
if GPUtil and torch.cuda.is_available():
|
||
try:
|
||
gpus = GPUtil.getGPUs()
|
||
if gpus:
|
||
gpu = gpus[0]
|
||
gpu_util = gpu.load * 100
|
||
gpu_memory_mb = gpu.memoryUsed
|
||
except:
|
||
pass
|
||
|
||
# 使用 torch 获取 GPU 信息作为备选
|
||
if gpu_util is None and torch.cuda.is_available():
|
||
try:
|
||
gpu_memory_mb = torch.cuda.memory_allocated(0) / 1024 / 1024
|
||
# GPU 利用率通过 torch 较难获取,使用占位符
|
||
gpu_util = 0.0
|
||
except:
|
||
pass
|
||
|
||
metrics = {
|
||
'timestamp': time.time(),
|
||
'cpu_utilization': cpu_util,
|
||
'memory_mb': memory_mb,
|
||
'gpu_utilization': gpu_util,
|
||
'gpu_memory_mb': gpu_memory_mb
|
||
}
|
||
|
||
self.metrics_history.append(metrics)
|
||
|
||
except Exception as e:
|
||
print(f"监控错误: {e}")
|
||
|
||
time.sleep(self.sampling_interval)
|
||
|
||
def get_average_metrics(self) -> Dict:
|
||
"""获取平均指标"""
|
||
if not self.metrics_history:
|
||
return {}
|
||
|
||
metrics = {}
|
||
for key in ['cpu_utilization', 'memory_mb', 'gpu_utilization', 'gpu_memory_mb']:
|
||
values = [m[key] for m in self.metrics_history if m[key] is not None]
|
||
if values:
|
||
metrics[f'avg_{key}'] = np.mean(values)
|
||
metrics[f'max_{key}'] = np.max(values)
|
||
metrics[f'min_{key}'] = np.min(values)
|
||
|
||
return metrics
|
||
class MockCamera:
|
||
"""模拟摄像头"""
|
||
|
||
def __init__(self, width: int = 640, height: int = 640, fps: int = 30):
|
||
self.width = width
|
||
self.height = height
|
||
self.fps = fps
|
||
self.frame_count = 0
|
||
|
||
def generate_frame(self) -> np.ndarray:
|
||
"""生成模拟帧"""
|
||
# 生成随机图像
|
||
frame = np.random.randint(0, 255, (self.height, self.width, 3), dtype=np.uint8)
|
||
|
||
# 添加一些简单的几何形状模拟目标
|
||
if self.frame_count % 10 < 5: # 50% 概率有目标
|
||
# 添加矩形模拟人员
|
||
x1, y1 = np.random.randint(50, self.width-100), np.random.randint(50, self.height-150)
|
||
x2, y2 = x1 + 50, y1 + 100
|
||
cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 255, 255), -1)
|
||
|
||
self.frame_count += 1
|
||
return frame
|
||
|
||
def generate_batch(self, batch_size: int) -> List[np.ndarray]:
|
||
"""生成批量帧"""
|
||
return [self.generate_frame() for _ in range(batch_size)]
|
||
|
||
class InferenceEngine:
|
||
"""推理引擎基类"""
|
||
|
||
def __init__(self, model_path: str, engine_type: str):
|
||
self.model_path = model_path
|
||
self.engine_type = engine_type
|
||
self.model = None
|
||
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
||
|
||
def load_model(self):
|
||
"""加载模型"""
|
||
raise NotImplementedError
|
||
|
||
def infer_single(self, image: np.ndarray) -> Dict:
|
||
"""单帧推理"""
|
||
raise NotImplementedError
|
||
|
||
def infer_batch(self, images: List[np.ndarray]) -> List[Dict]:
|
||
"""批量推理"""
|
||
raise NotImplementedError
|
||
|
||
def cleanup(self):
|
||
"""清理资源"""
|
||
if hasattr(self, 'model') and self.model:
|
||
del self.model
|
||
if torch.cuda.is_available():
|
||
torch.cuda.empty_cache()
|
||
|
||
class PyTorchEngine(InferenceEngine):
|
||
"""PyTorch 推理引擎"""
|
||
|
||
def __init__(self, model_path: str):
|
||
super().__init__(model_path, "pytorch")
|
||
|
||
def load_model(self):
|
||
"""加载 PyTorch 模型"""
|
||
print(f"加载 PyTorch 模型: {self.model_path}")
|
||
self.model = YOLO(self.model_path)
|
||
self.model.to(self.device)
|
||
print(f"✅ PyTorch 模型加载完成,设备: {self.device}")
|
||
|
||
def infer_single(self, image: np.ndarray) -> Dict:
|
||
"""单帧推理"""
|
||
start_time = time.time()
|
||
results = self.model(image, verbose=False, device=self.device)
|
||
end_time = time.time()
|
||
|
||
latency_ms = (end_time - start_time) * 1000
|
||
|
||
return {
|
||
'latency_ms': latency_ms,
|
||
'detections': len(results[0].boxes) if results[0].boxes is not None else 0
|
||
}
|
||
|
||
def infer_batch(self, images: List[np.ndarray]) -> List[Dict]:
|
||
"""批量推理"""
|
||
start_time = time.time()
|
||
results = self.model(images, verbose=False, device=self.device)
|
||
end_time = time.time()
|
||
|
||
total_latency_ms = (end_time - start_time) * 1000
|
||
avg_latency_ms = total_latency_ms / len(images)
|
||
|
||
return [{
|
||
'latency_ms': avg_latency_ms,
|
||
'detections': len(result.boxes) if result.boxes is not None else 0
|
||
} for result in results]
|
||
class TensorRTEngine(InferenceEngine):
|
||
"""TensorRT 推理引擎"""
|
||
|
||
def __init__(self, model_path: str):
|
||
super().__init__(model_path, "tensorrt")
|
||
self.engine_path = None
|
||
|
||
def load_model(self):
|
||
"""加载或创建 TensorRT 模型"""
|
||
# 检查是否已有 TensorRT 引擎文件
|
||
engine_path = self.model_path.replace('.pt', '.engine')
|
||
|
||
if os.path.exists(engine_path):
|
||
print(f"找到现有 TensorRT 引擎: {engine_path}")
|
||
self.engine_path = engine_path
|
||
else:
|
||
print(f"创建 TensorRT 引擎: {self.model_path} -> {engine_path}")
|
||
self._export_tensorrt_engine(engine_path)
|
||
|
||
# 加载 TensorRT 引擎
|
||
self.model = YOLO(self.engine_path)
|
||
print(f"✅ TensorRT 模型加载完成")
|
||
|
||
def _export_tensorrt_engine(self, engine_path: str):
|
||
"""导出 TensorRT 引擎"""
|
||
print("正在导出 TensorRT 引擎,这可能需要几分钟...")
|
||
|
||
# 加载原始模型
|
||
model = YOLO(self.model_path)
|
||
|
||
# 导出为 TensorRT
|
||
try:
|
||
exported_model = model.export(
|
||
format='engine',
|
||
imgsz=640,
|
||
device=0 if torch.cuda.is_available() else 'cpu',
|
||
half=True, # FP16
|
||
dynamic=False,
|
||
simplify=True,
|
||
workspace=4, # GB
|
||
verbose=True
|
||
)
|
||
self.engine_path = exported_model
|
||
print(f"✅ TensorRT 引擎导出完成: {self.engine_path}")
|
||
|
||
except Exception as e:
|
||
print(f"❌ TensorRT 引擎导出失败: {e}")
|
||
raise
|
||
|
||
def infer_single(self, image: np.ndarray) -> Dict:
|
||
"""单帧推理"""
|
||
start_time = time.time()
|
||
results = self.model(image, verbose=False)
|
||
end_time = time.time()
|
||
|
||
latency_ms = (end_time - start_time) * 1000
|
||
|
||
return {
|
||
'latency_ms': latency_ms,
|
||
'detections': len(results[0].boxes) if results[0].boxes is not None else 0
|
||
}
|
||
|
||
def infer_batch(self, images: List[np.ndarray]) -> List[Dict]:
|
||
"""批量推理"""
|
||
start_time = time.time()
|
||
results = self.model(images, verbose=False)
|
||
end_time = time.time()
|
||
|
||
total_latency_ms = (end_time - start_time) * 1000
|
||
avg_latency_ms = total_latency_ms / len(images)
|
||
|
||
return [{
|
||
'latency_ms': avg_latency_ms,
|
||
'detections': len(result.boxes) if result.boxes is not None else 0
|
||
} for result in results]
|
||
class PerformanceTester:
|
||
"""性能测试器"""
|
||
|
||
def __init__(self, model_path: str):
|
||
self.model_path = model_path
|
||
self.results = []
|
||
self.resource_monitor = ResourceMonitor()
|
||
|
||
def test_single_inference(self, engine: InferenceEngine, test_duration: int = 30) -> TestResult:
|
||
"""测试单帧推理性能"""
|
||
print(f"\n🔄 测试 {engine.engine_type} 单帧推理性能 ({test_duration}秒)...")
|
||
|
||
camera = MockCamera()
|
||
fps_list = []
|
||
latency_list = []
|
||
frame_count = 0
|
||
|
||
# 开始资源监控
|
||
self.resource_monitor.start_monitoring()
|
||
|
||
start_time = time.time()
|
||
last_fps_time = start_time
|
||
fps_frame_count = 0
|
||
|
||
while time.time() - start_time < test_duration:
|
||
# 生成测试帧
|
||
frame = camera.generate_frame()
|
||
|
||
# 推理
|
||
result = engine.infer_single(frame)
|
||
latency_list.append(result['latency_ms'])
|
||
|
||
frame_count += 1
|
||
fps_frame_count += 1
|
||
|
||
# 每秒计算一次 FPS
|
||
current_time = time.time()
|
||
if current_time - last_fps_time >= 1.0:
|
||
fps = fps_frame_count / (current_time - last_fps_time)
|
||
fps_list.append(fps)
|
||
fps_frame_count = 0
|
||
last_fps_time = current_time
|
||
|
||
# 显示进度
|
||
elapsed = current_time - start_time
|
||
print(f" 进度: {elapsed:.1f}s/{test_duration}s, 当前FPS: {fps:.1f}, 延迟: {result['latency_ms']:.1f}ms")
|
||
|
||
# 停止监控
|
||
self.resource_monitor.stop_monitoring()
|
||
resource_metrics = self.resource_monitor.get_average_metrics()
|
||
|
||
# 计算结果
|
||
total_time = time.time() - start_time
|
||
|
||
result = TestResult(
|
||
engine_type=engine.engine_type,
|
||
test_type="single_inference",
|
||
avg_fps=np.mean(fps_list) if fps_list else 0,
|
||
max_fps=np.max(fps_list) if fps_list else 0,
|
||
min_fps=np.min(fps_list) if fps_list else 0,
|
||
avg_latency_ms=np.mean(latency_list),
|
||
max_latency_ms=np.max(latency_list),
|
||
min_latency_ms=np.min(latency_list),
|
||
avg_gpu_util=resource_metrics.get('avg_gpu_utilization', 0),
|
||
max_gpu_util=resource_metrics.get('max_gpu_utilization', 0),
|
||
avg_gpu_memory_mb=resource_metrics.get('avg_gpu_memory_mb', 0),
|
||
max_gpu_memory_mb=resource_metrics.get('max_gpu_memory_mb', 0),
|
||
avg_cpu_util=resource_metrics.get('avg_cpu_utilization', 0),
|
||
max_cpu_util=resource_metrics.get('max_cpu_utilization', 0),
|
||
test_duration=total_time,
|
||
total_frames=frame_count
|
||
)
|
||
|
||
print(f"✅ {engine.engine_type} 单帧推理测试完成:")
|
||
print(f" 平均FPS: {result.avg_fps:.1f}")
|
||
print(f" 平均延迟: {result.avg_latency_ms:.1f}ms")
|
||
print(f" GPU利用率: {result.avg_gpu_util:.1f}%")
|
||
print(f" GPU内存: {result.avg_gpu_memory_mb:.1f}MB")
|
||
|
||
return result
|
||
def test_batch_inference(self, engine: InferenceEngine, batch_sizes: List[int], test_duration: int = 20) -> List[TestResult]:
|
||
"""测试批量推理性能"""
|
||
results = []
|
||
|
||
for batch_size in batch_sizes:
|
||
print(f"\n🔄 测试 {engine.engine_type} 批量推理性能 (批次大小: {batch_size}, {test_duration}秒)...")
|
||
|
||
camera = MockCamera()
|
||
fps_list = []
|
||
latency_list = []
|
||
batch_count = 0
|
||
|
||
# 开始资源监控
|
||
self.resource_monitor.start_monitoring()
|
||
|
||
start_time = time.time()
|
||
last_fps_time = start_time
|
||
fps_batch_count = 0
|
||
|
||
while time.time() - start_time < test_duration:
|
||
# 生成批量测试帧
|
||
batch_frames = camera.generate_batch(batch_size)
|
||
|
||
# 批量推理
|
||
batch_results = engine.infer_batch(batch_frames)
|
||
avg_latency = np.mean([r['latency_ms'] for r in batch_results])
|
||
latency_list.append(avg_latency)
|
||
|
||
batch_count += 1
|
||
fps_batch_count += 1
|
||
|
||
# 每秒计算一次 FPS
|
||
current_time = time.time()
|
||
if current_time - last_fps_time >= 1.0:
|
||
# 批量FPS = 批次数 * 批次大小 / 时间
|
||
fps = (fps_batch_count * batch_size) / (current_time - last_fps_time)
|
||
fps_list.append(fps)
|
||
fps_batch_count = 0
|
||
last_fps_time = current_time
|
||
|
||
# 显示进度
|
||
elapsed = current_time - start_time
|
||
print(f" 进度: {elapsed:.1f}s/{test_duration}s, 当前FPS: {fps:.1f}, 延迟: {avg_latency:.1f}ms")
|
||
|
||
# 停止监控
|
||
self.resource_monitor.stop_monitoring()
|
||
resource_metrics = self.resource_monitor.get_average_metrics()
|
||
|
||
# 计算结果
|
||
total_time = time.time() - start_time
|
||
total_frames = batch_count * batch_size
|
||
|
||
result = TestResult(
|
||
engine_type=engine.engine_type,
|
||
test_type="batch_inference",
|
||
avg_fps=np.mean(fps_list) if fps_list else 0,
|
||
max_fps=np.max(fps_list) if fps_list else 0,
|
||
min_fps=np.min(fps_list) if fps_list else 0,
|
||
avg_latency_ms=np.mean(latency_list),
|
||
max_latency_ms=np.max(latency_list),
|
||
min_latency_ms=np.min(latency_list),
|
||
avg_gpu_util=resource_metrics.get('avg_gpu_utilization', 0),
|
||
max_gpu_util=resource_metrics.get('max_gpu_utilization', 0),
|
||
avg_gpu_memory_mb=resource_metrics.get('avg_gpu_memory_mb', 0),
|
||
max_gpu_memory_mb=resource_metrics.get('max_gpu_memory_mb', 0),
|
||
avg_cpu_util=resource_metrics.get('avg_cpu_utilization', 0),
|
||
max_cpu_util=resource_metrics.get('max_cpu_utilization', 0),
|
||
test_duration=total_time,
|
||
total_frames=total_frames,
|
||
batch_size=batch_size
|
||
)
|
||
|
||
print(f"✅ {engine.engine_type} 批量推理测试完成 (批次大小: {batch_size}):")
|
||
print(f" 平均FPS: {result.avg_fps:.1f}")
|
||
print(f" 平均延迟: {result.avg_latency_ms:.1f}ms")
|
||
print(f" GPU利用率: {result.avg_gpu_util:.1f}%")
|
||
print(f" GPU内存: {result.avg_gpu_memory_mb:.1f}MB")
|
||
|
||
results.append(result)
|
||
|
||
return results
|
||
def test_concurrent_streams(self, engine: InferenceEngine, concurrent_counts: List[int], test_duration: int = 30) -> List[TestResult]:
|
||
"""测试并发流性能"""
|
||
results = []
|
||
|
||
for concurrent_count in concurrent_counts:
|
||
print(f"\n🔄 测试 {engine.engine_type} 并发性能 (并发数: {concurrent_count}, {test_duration}秒)...")
|
||
|
||
# 创建多个摄像头
|
||
cameras = [MockCamera() for _ in range(concurrent_count)]
|
||
|
||
# 共享变量
|
||
fps_list = []
|
||
latency_list = []
|
||
total_frames = 0
|
||
threads = []
|
||
thread_results = [[] for _ in range(concurrent_count)]
|
||
stop_flag = threading.Event()
|
||
|
||
# 开始资源监控
|
||
self.resource_monitor.start_monitoring()
|
||
|
||
def worker_thread(thread_id: int, camera: MockCamera, results_list: List):
|
||
"""工作线程"""
|
||
local_fps_list = []
|
||
local_latency_list = []
|
||
frame_count = 0
|
||
|
||
last_fps_time = time.time()
|
||
fps_frame_count = 0
|
||
|
||
while not stop_flag.is_set():
|
||
try:
|
||
# 生成测试帧
|
||
frame = camera.generate_frame()
|
||
|
||
# 推理
|
||
result = engine.infer_single(frame)
|
||
local_latency_list.append(result['latency_ms'])
|
||
|
||
frame_count += 1
|
||
fps_frame_count += 1
|
||
|
||
# 每秒计算一次 FPS
|
||
current_time = time.time()
|
||
if current_time - last_fps_time >= 1.0:
|
||
fps = fps_frame_count / (current_time - last_fps_time)
|
||
local_fps_list.append(fps)
|
||
fps_frame_count = 0
|
||
last_fps_time = current_time
|
||
|
||
except Exception as e:
|
||
print(f"线程 {thread_id} 错误: {e}")
|
||
break
|
||
|
||
results_list.extend([{
|
||
'fps_list': local_fps_list,
|
||
'latency_list': local_latency_list,
|
||
'frame_count': frame_count
|
||
}])
|
||
|
||
# 启动工作线程
|
||
start_time = time.time()
|
||
for i in range(concurrent_count):
|
||
thread = threading.Thread(
|
||
target=worker_thread,
|
||
args=(i, cameras[i], thread_results[i]),
|
||
daemon=True
|
||
)
|
||
threads.append(thread)
|
||
thread.start()
|
||
|
||
# 等待测试完成
|
||
time.sleep(test_duration)
|
||
stop_flag.set()
|
||
|
||
# 等待所有线程结束
|
||
for thread in threads:
|
||
thread.join(timeout=5.0)
|
||
|
||
# 停止监控
|
||
self.resource_monitor.stop_monitoring()
|
||
resource_metrics = self.resource_monitor.get_average_metrics()
|
||
|
||
# 汇总结果
|
||
all_fps = []
|
||
all_latency = []
|
||
total_frames = 0
|
||
|
||
for thread_result_list in thread_results:
|
||
if thread_result_list:
|
||
result = thread_result_list[0]
|
||
all_fps.extend(result['fps_list'])
|
||
all_latency.extend(result['latency_list'])
|
||
total_frames += result['frame_count']
|
||
|
||
total_time = time.time() - start_time
|
||
|
||
result = TestResult(
|
||
engine_type=engine.engine_type,
|
||
test_type="concurrent_streams",
|
||
avg_fps=np.mean(all_fps) if all_fps else 0,
|
||
max_fps=np.max(all_fps) if all_fps else 0,
|
||
min_fps=np.min(all_fps) if all_fps else 0,
|
||
avg_latency_ms=np.mean(all_latency) if all_latency else 0,
|
||
max_latency_ms=np.max(all_latency) if all_latency else 0,
|
||
min_latency_ms=np.min(all_latency) if all_latency else 0,
|
||
avg_gpu_util=resource_metrics.get('avg_gpu_utilization', 0),
|
||
max_gpu_util=resource_metrics.get('max_gpu_utilization', 0),
|
||
avg_gpu_memory_mb=resource_metrics.get('avg_gpu_memory_mb', 0),
|
||
max_gpu_memory_mb=resource_metrics.get('max_gpu_memory_mb', 0),
|
||
avg_cpu_util=resource_metrics.get('avg_cpu_utilization', 0),
|
||
max_cpu_util=resource_metrics.get('max_cpu_utilization', 0),
|
||
test_duration=total_time,
|
||
total_frames=total_frames,
|
||
concurrent_streams=concurrent_count
|
||
)
|
||
|
||
print(f"✅ {engine.engine_type} 并发测试完成 (并发数: {concurrent_count}):")
|
||
print(f" 总FPS: {result.avg_fps * concurrent_count:.1f}")
|
||
print(f" 平均单流FPS: {result.avg_fps:.1f}")
|
||
print(f" 平均延迟: {result.avg_latency_ms:.1f}ms")
|
||
print(f" GPU利用率: {result.avg_gpu_util:.1f}%")
|
||
print(f" GPU内存: {result.avg_gpu_memory_mb:.1f}MB")
|
||
|
||
results.append(result)
|
||
|
||
return results
|
||
def run_full_benchmark(self) -> Dict:
|
||
"""运行完整基准测试"""
|
||
print("🚀 开始 YOLOv11 性能对比测试")
|
||
print("=" * 60)
|
||
|
||
all_results = {
|
||
'pytorch': {},
|
||
'tensorrt': {},
|
||
'comparison': {},
|
||
'timestamp': datetime.now().isoformat(),
|
||
'model_path': self.model_path
|
||
}
|
||
|
||
# 测试配置
|
||
batch_sizes = [1, 2, 4, 8]
|
||
concurrent_counts = [1, 2, 4, 6, 8, 10]
|
||
|
||
# 测试 PyTorch
|
||
print("\n📊 测试 PyTorch 引擎")
|
||
print("-" * 40)
|
||
pytorch_engine = PyTorchEngine(self.model_path)
|
||
pytorch_engine.load_model()
|
||
|
||
# PyTorch 单帧推理测试
|
||
pytorch_single = self.test_single_inference(pytorch_engine, test_duration=30)
|
||
all_results['pytorch']['single_inference'] = asdict(pytorch_single)
|
||
|
||
# PyTorch 批量推理测试
|
||
pytorch_batch = self.test_batch_inference(pytorch_engine, batch_sizes, test_duration=20)
|
||
all_results['pytorch']['batch_inference'] = [asdict(r) for r in pytorch_batch]
|
||
|
||
# PyTorch 并发测试
|
||
pytorch_concurrent = self.test_concurrent_streams(pytorch_engine, concurrent_counts, test_duration=30)
|
||
all_results['pytorch']['concurrent_streams'] = [asdict(r) for r in pytorch_concurrent]
|
||
|
||
pytorch_engine.cleanup()
|
||
|
||
# 测试 TensorRT
|
||
print("\n📊 测试 TensorRT 引擎")
|
||
print("-" * 40)
|
||
try:
|
||
tensorrt_engine = TensorRTEngine(self.model_path)
|
||
tensorrt_engine.load_model()
|
||
|
||
# TensorRT 单帧推理测试
|
||
tensorrt_single = self.test_single_inference(tensorrt_engine, test_duration=30)
|
||
all_results['tensorrt']['single_inference'] = asdict(tensorrt_single)
|
||
|
||
# TensorRT 批量推理测试
|
||
tensorrt_batch = self.test_batch_inference(tensorrt_engine, batch_sizes, test_duration=20)
|
||
all_results['tensorrt']['batch_inference'] = [asdict(r) for r in tensorrt_batch]
|
||
|
||
# TensorRT 并发测试
|
||
tensorrt_concurrent = self.test_concurrent_streams(tensorrt_engine, concurrent_counts, test_duration=30)
|
||
all_results['tensorrt']['concurrent_streams'] = [asdict(r) for r in tensorrt_concurrent]
|
||
|
||
tensorrt_engine.cleanup()
|
||
|
||
# 性能对比分析
|
||
all_results['comparison'] = self._analyze_performance_comparison(
|
||
pytorch_single, tensorrt_single,
|
||
pytorch_batch, tensorrt_batch,
|
||
pytorch_concurrent, tensorrt_concurrent
|
||
)
|
||
|
||
except Exception as e:
|
||
print(f"❌ TensorRT 测试失败: {e}")
|
||
all_results['tensorrt']['error'] = str(e)
|
||
|
||
return all_results
|
||
|
||
def _analyze_performance_comparison(self, pytorch_single, tensorrt_single,
|
||
pytorch_batch, tensorrt_batch,
|
||
pytorch_concurrent, tensorrt_concurrent) -> Dict:
|
||
"""分析性能对比"""
|
||
comparison = {}
|
||
|
||
# 单帧推理对比
|
||
fps_improvement = (tensorrt_single.avg_fps - pytorch_single.avg_fps) / pytorch_single.avg_fps * 100
|
||
latency_improvement = (pytorch_single.avg_latency_ms - tensorrt_single.avg_latency_ms) / pytorch_single.avg_latency_ms * 100
|
||
|
||
comparison['single_inference'] = {
|
||
'fps_improvement_percent': fps_improvement,
|
||
'latency_improvement_percent': latency_improvement,
|
||
'pytorch_fps': pytorch_single.avg_fps,
|
||
'tensorrt_fps': tensorrt_single.avg_fps,
|
||
'pytorch_latency_ms': pytorch_single.avg_latency_ms,
|
||
'tensorrt_latency_ms': tensorrt_single.avg_latency_ms
|
||
}
|
||
|
||
# 批量推理对比
|
||
batch_comparison = []
|
||
for pt_batch, trt_batch in zip(pytorch_batch, tensorrt_batch):
|
||
fps_imp = (trt_batch.avg_fps - pt_batch.avg_fps) / pt_batch.avg_fps * 100
|
||
latency_imp = (pt_batch.avg_latency_ms - trt_batch.avg_latency_ms) / pt_batch.avg_latency_ms * 100
|
||
|
||
batch_comparison.append({
|
||
'batch_size': pt_batch.batch_size,
|
||
'fps_improvement_percent': fps_imp,
|
||
'latency_improvement_percent': latency_imp,
|
||
'pytorch_fps': pt_batch.avg_fps,
|
||
'tensorrt_fps': trt_batch.avg_fps
|
||
})
|
||
|
||
comparison['batch_inference'] = batch_comparison
|
||
|
||
# 并发对比
|
||
concurrent_comparison = []
|
||
for pt_conc, trt_conc in zip(pytorch_concurrent, tensorrt_concurrent):
|
||
fps_imp = (trt_conc.avg_fps - pt_conc.avg_fps) / pt_conc.avg_fps * 100
|
||
|
||
concurrent_comparison.append({
|
||
'concurrent_streams': pt_conc.concurrent_streams,
|
||
'fps_improvement_percent': fps_imp,
|
||
'pytorch_total_fps': pt_conc.avg_fps * pt_conc.concurrent_streams,
|
||
'tensorrt_total_fps': trt_conc.avg_fps * trt_conc.concurrent_streams
|
||
})
|
||
|
||
comparison['concurrent_streams'] = concurrent_comparison
|
||
|
||
return comparison
|
||
def save_results(results: Dict, output_dir: str = "benchmark_results"):
|
||
"""保存测试结果"""
|
||
os.makedirs(output_dir, exist_ok=True)
|
||
|
||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||
|
||
# 保存 JSON 结果
|
||
json_file = os.path.join(output_dir, f"benchmark_results_{timestamp}.json")
|
||
with open(json_file, 'w', encoding='utf-8') as f:
|
||
json.dump(results, f, indent=2, ensure_ascii=False)
|
||
|
||
print(f"✅ 测试结果已保存: {json_file}")
|
||
|
||
# 生成简要报告
|
||
report_file = os.path.join(output_dir, f"benchmark_report_{timestamp}.txt")
|
||
with open(report_file, 'w', encoding='utf-8') as f:
|
||
f.write("YOLOv11 性能对比测试报告\n")
|
||
f.write("=" * 50 + "\n")
|
||
f.write(f"测试时间: {results['timestamp']}\n")
|
||
f.write(f"模型路径: {results['model_path']}\n\n")
|
||
|
||
if 'comparison' in results and results['comparison']:
|
||
comp = results['comparison']
|
||
|
||
# 单帧推理对比
|
||
if 'single_inference' in comp:
|
||
single = comp['single_inference']
|
||
f.write("单帧推理性能对比:\n")
|
||
f.write(f" PyTorch FPS: {single['pytorch_fps']:.1f}\n")
|
||
f.write(f" TensorRT FPS: {single['tensorrt_fps']:.1f}\n")
|
||
f.write(f" FPS 提升: {single['fps_improvement_percent']:.1f}%\n")
|
||
f.write(f" PyTorch 延迟: {single['pytorch_latency_ms']:.1f}ms\n")
|
||
f.write(f" TensorRT 延迟: {single['tensorrt_latency_ms']:.1f}ms\n")
|
||
f.write(f" 延迟改善: {single['latency_improvement_percent']:.1f}%\n\n")
|
||
|
||
# 批量推理对比
|
||
if 'batch_inference' in comp:
|
||
f.write("批量推理性能对比:\n")
|
||
for batch in comp['batch_inference']:
|
||
f.write(f" 批次大小 {batch['batch_size']}: TensorRT FPS提升 {batch['fps_improvement_percent']:.1f}%\n")
|
||
f.write("\n")
|
||
|
||
# 并发对比
|
||
if 'concurrent_streams' in comp:
|
||
f.write("并发性能对比:\n")
|
||
for conc in comp['concurrent_streams']:
|
||
f.write(f" {conc['concurrent_streams']}路并发: TensorRT总FPS提升 {conc['fps_improvement_percent']:.1f}%\n")
|
||
|
||
f.write("\n详细数据请查看 JSON 文件。\n")
|
||
|
||
print(f"✅ 测试报告已保存: {report_file}")
|
||
|
||
return json_file, report_file
|
||
|
||
def print_summary(results: Dict):
|
||
"""打印测试总结"""
|
||
print("\n" + "=" * 60)
|
||
print("🎯 性能测试总结")
|
||
print("=" * 60)
|
||
|
||
if 'comparison' in results and results['comparison']:
|
||
comp = results['comparison']
|
||
|
||
# 单帧推理总结
|
||
if 'single_inference' in comp:
|
||
single = comp['single_inference']
|
||
print(f"\n📈 单帧推理性能:")
|
||
print(f" PyTorch: {single['pytorch_fps']:.1f} FPS, {single['pytorch_latency_ms']:.1f}ms")
|
||
print(f" TensorRT: {single['tensorrt_fps']:.1f} FPS, {single['tensorrt_latency_ms']:.1f}ms")
|
||
print(f" 🚀 TensorRT FPS 提升: {single['fps_improvement_percent']:.1f}%")
|
||
print(f" ⚡ TensorRT 延迟改善: {single['latency_improvement_percent']:.1f}%")
|
||
|
||
# 最佳批量推理
|
||
if 'batch_inference' in comp and comp['batch_inference']:
|
||
best_batch = max(comp['batch_inference'], key=lambda x: x['fps_improvement_percent'])
|
||
print(f"\n📦 最佳批量推理 (批次大小 {best_batch['batch_size']}):")
|
||
print(f" PyTorch: {best_batch['pytorch_fps']:.1f} FPS")
|
||
print(f" TensorRT: {best_batch['tensorrt_fps']:.1f} FPS")
|
||
print(f" 🚀 TensorRT FPS 提升: {best_batch['fps_improvement_percent']:.1f}%")
|
||
|
||
# 最大并发能力
|
||
if 'concurrent_streams' in comp and comp['concurrent_streams']:
|
||
max_concurrent = comp['concurrent_streams'][-1] # 最后一个通常是最大并发数
|
||
print(f"\n🔄 最大并发能力 ({max_concurrent['concurrent_streams']}路):")
|
||
print(f" PyTorch 总FPS: {max_concurrent['pytorch_total_fps']:.1f}")
|
||
print(f" TensorRT 总FPS: {max_concurrent['tensorrt_total_fps']:.1f}")
|
||
print(f" 🚀 TensorRT 总FPS 提升: {max_concurrent['fps_improvement_percent']:.1f}%")
|
||
|
||
print("\n" + "=" * 60)
|
||
|
||
def main():
|
||
"""主函数"""
|
||
print("YOLOv11 性能对比测试系统")
|
||
print("PyTorch vs TensorRT 完整性能测试")
|
||
print("=" * 60)
|
||
|
||
# 模型路径
|
||
model_path = "C:/Users/16337/PycharmProjects/Security/yolo11n.pt"
|
||
|
||
if not os.path.exists(model_path):
|
||
print(f"❌ 模型文件不存在: {model_path}")
|
||
return
|
||
|
||
# 创建测试器
|
||
tester = PerformanceTester(model_path)
|
||
|
||
try:
|
||
# 运行完整基准测试
|
||
results = tester.run_full_benchmark()
|
||
|
||
# 保存结果
|
||
json_file, report_file = save_results(results)
|
||
|
||
# 打印总结
|
||
print_summary(results)
|
||
|
||
print(f"\n📁 结果文件:")
|
||
print(f" JSON: {json_file}")
|
||
print(f" 报告: {report_file}")
|
||
|
||
except KeyboardInterrupt:
|
||
print("\n\n⏹️ 测试被用户中断")
|
||
except Exception as e:
|
||
print(f"\n❌ 测试过程中发生错误: {e}")
|
||
import traceback
|
||
traceback.print_exc()
|
||
|
||
if __name__ == "__main__":
|
||
main() |