TensorRT测试
This commit is contained in:
259
export_dynamic_tensorrt.py
Normal file
259
export_dynamic_tensorrt.py
Normal file
@@ -0,0 +1,259 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
导出支持动态批次的 TensorRT 引擎
|
||||
支持 batch size: 1, 2, 4, 8, 16, 32
|
||||
"""
|
||||
|
||||
import os
|
||||
import torch
|
||||
from ultralytics import YOLO
|
||||
import time
|
||||
|
||||
def export_dynamic_tensorrt_engine(model_path, output_path=None):
|
||||
"""导出支持动态批次的 TensorRT 引擎"""
|
||||
|
||||
print("🚀 开始导出动态批次 TensorRT 引擎")
|
||||
print("=" * 60)
|
||||
|
||||
# 检查 CUDA 可用性
|
||||
if not torch.cuda.is_available():
|
||||
print("❌ CUDA 不可用,无法导出 TensorRT 引擎")
|
||||
print("请确保:")
|
||||
print("1. 已安装 CUDA 驱动")
|
||||
print("2. PyTorch 支持 CUDA")
|
||||
print("3. 在正确的 conda 环境中")
|
||||
return None
|
||||
|
||||
print(f"✅ CUDA 可用,设备数量: {torch.cuda.device_count()}")
|
||||
print(f"✅ 当前设备: {torch.cuda.get_device_name(0)}")
|
||||
|
||||
# 检查模型文件
|
||||
if not os.path.exists(model_path):
|
||||
print(f"❌ 模型文件不存在: {model_path}")
|
||||
return None
|
||||
|
||||
print(f"📁 模型路径: {model_path}")
|
||||
|
||||
# 生成输出路径
|
||||
if output_path is None:
|
||||
base_name = os.path.splitext(model_path)[0]
|
||||
output_path = f"{base_name}_dynamic.engine"
|
||||
|
||||
print(f"📁 输出路径: {output_path}")
|
||||
|
||||
# 删除现有的引擎文件
|
||||
if os.path.exists(output_path):
|
||||
os.remove(output_path)
|
||||
print(f"🗑️ 删除现有引擎文件: {output_path}")
|
||||
|
||||
try:
|
||||
# 加载模型
|
||||
print("\n📦 加载 YOLO 模型...")
|
||||
model = YOLO(model_path)
|
||||
|
||||
# 导出动态 TensorRT 引擎
|
||||
print("\n🔧 导出动态 TensorRT 引擎...")
|
||||
print("配置参数:")
|
||||
print(" - 格式: TensorRT Engine")
|
||||
print(" - 输入尺寸: 640x640")
|
||||
print(" - 精度: FP16")
|
||||
print(" - 动态批次: 1-32")
|
||||
print(" - 工作空间: 8GB")
|
||||
print(" - 设备: GPU")
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
# 导出参数 - 先导出 ONNX 再转 TensorRT
|
||||
print("🔧 第一步: 导出动态 ONNX 模型...")
|
||||
onnx_path = f"{base_name}_dynamic.onnx"
|
||||
|
||||
# 导出动态 ONNX
|
||||
onnx_export_args = {
|
||||
'format': 'onnx', # ONNX format
|
||||
'imgsz': 640, # Input image size
|
||||
'device': 0, # GPU device
|
||||
'dynamic': True, # Enable dynamic shapes
|
||||
'simplify': True, # Simplify ONNX model
|
||||
'verbose': True, # Verbose output
|
||||
}
|
||||
|
||||
# 执行 ONNX 导出
|
||||
onnx_model = model.export(**onnx_export_args)
|
||||
print(f"✅ ONNX 模型导出完成: {onnx_model}")
|
||||
|
||||
print("\n🔧 第二步: 转换为动态 TensorRT 引擎...")
|
||||
|
||||
# 使用 trtexec 命令行工具创建动态引擎
|
||||
import subprocess
|
||||
|
||||
trtexec_cmd = [
|
||||
"trtexec",
|
||||
f"--onnx={onnx_model}",
|
||||
f"--saveEngine={output_path}",
|
||||
"--fp16", # FP16 精度
|
||||
"--workspace=8192", # 8GB 工作空间
|
||||
"--minShapes=images:1x3x640x640", # 最小批次大小
|
||||
"--optShapes=images:8x3x640x640", # 优化批次大小
|
||||
"--maxShapes=images:32x3x640x640", # 最大批次大小
|
||||
"--verbose"
|
||||
]
|
||||
|
||||
print(f"执行命令: {' '.join(trtexec_cmd)}")
|
||||
|
||||
try:
|
||||
result = subprocess.run(trtexec_cmd, capture_output=True, text=True, timeout=600)
|
||||
if result.returncode == 0:
|
||||
print("✅ TensorRT 引擎创建成功!")
|
||||
else:
|
||||
print(f"❌ trtexec 执行失败:")
|
||||
print(f"stdout: {result.stdout}")
|
||||
print(f"stderr: {result.stderr}")
|
||||
|
||||
# 回退到 ultralytics 导出方式
|
||||
print("\n🔄 回退到 ultralytics 导出方式...")
|
||||
export_args = {
|
||||
'format': 'engine', # TensorRT engine format
|
||||
'imgsz': 640, # Input image size
|
||||
'device': 0, # GPU device
|
||||
'half': True, # FP16 precision
|
||||
'dynamic': True, # Enable dynamic shapes
|
||||
'simplify': True, # Simplify ONNX model
|
||||
'workspace': 8, # Workspace size in GB
|
||||
'verbose': True, # Verbose output
|
||||
}
|
||||
|
||||
exported_model = model.export(**export_args)
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
print("❌ trtexec 执行超时,回退到 ultralytics 导出方式...")
|
||||
export_args = {
|
||||
'format': 'engine', # TensorRT engine format
|
||||
'imgsz': 640, # Input image size
|
||||
'device': 0, # GPU device
|
||||
'half': True, # FP16 precision
|
||||
'dynamic': True, # Enable dynamic shapes
|
||||
'simplify': True, # Simplify ONNX model
|
||||
'workspace': 8, # Workspace size in GB
|
||||
'verbose': True, # Verbose output
|
||||
}
|
||||
|
||||
exported_model = model.export(**export_args)
|
||||
|
||||
except FileNotFoundError:
|
||||
print("❌ trtexec 未找到,回退到 ultralytics 导出方式...")
|
||||
export_args = {
|
||||
'format': 'engine', # TensorRT engine format
|
||||
'imgsz': 640, # Input image size
|
||||
'device': 0, # GPU device
|
||||
'half': True, # FP16 precision
|
||||
'dynamic': True, # Enable dynamic shapes
|
||||
'simplify': True, # Simplify ONNX model
|
||||
'workspace': 8, # Workspace size in GB
|
||||
'verbose': True, # Verbose output
|
||||
}
|
||||
|
||||
exported_model = model.export(**export_args)
|
||||
|
||||
print(f"\n⏳ 开始导出(预计需要 5-10 分钟)...")
|
||||
|
||||
# 执行导出
|
||||
if 'exported_model' not in locals():
|
||||
exported_model = output_path
|
||||
|
||||
export_time = time.time() - start_time
|
||||
|
||||
print(f"\n✅ TensorRT 引擎导出完成!")
|
||||
print(f"⏱️ 导出耗时: {export_time:.1f} 秒")
|
||||
print(f"📁 引擎文件: {exported_model}")
|
||||
|
||||
# 检查文件大小
|
||||
if os.path.exists(exported_model):
|
||||
file_size = os.path.getsize(exported_model) / (1024 * 1024) # MB
|
||||
print(f"📊 文件大小: {file_size:.1f} MB")
|
||||
|
||||
return exported_model
|
||||
|
||||
except Exception as e:
|
||||
print(f"\n❌ 导出失败: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return None
|
||||
|
||||
def test_dynamic_engine(engine_path):
|
||||
"""测试动态引擎的不同批次大小"""
|
||||
print(f"\n🧪 测试动态引擎: {engine_path}")
|
||||
|
||||
if not os.path.exists(engine_path):
|
||||
print(f"❌ 引擎文件不存在: {engine_path}")
|
||||
return False
|
||||
|
||||
try:
|
||||
# 加载引擎
|
||||
model = YOLO(engine_path)
|
||||
print("✅ 引擎加载成功")
|
||||
|
||||
# 测试不同批次大小
|
||||
batch_sizes = [1, 2, 4, 8]
|
||||
|
||||
for batch_size in batch_sizes:
|
||||
print(f"\n📊 测试批次大小: {batch_size}")
|
||||
|
||||
# 创建测试数据
|
||||
import numpy as np
|
||||
test_images = []
|
||||
for i in range(batch_size):
|
||||
# 生成随机图像 (640x640x3)
|
||||
img = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
test_images.append(img)
|
||||
|
||||
try:
|
||||
# 执行推理
|
||||
start_time = time.time()
|
||||
results = model(test_images, verbose=False)
|
||||
inference_time = time.time() - start_time
|
||||
|
||||
print(f" ✅ 批次 {batch_size}: {inference_time*1000:.1f}ms")
|
||||
print(f" 📈 平均每帧: {inference_time*1000/batch_size:.1f}ms")
|
||||
|
||||
except Exception as e:
|
||||
print(f" ❌ 批次 {batch_size} 测试失败: {e}")
|
||||
return False
|
||||
|
||||
print("\n🎉 所有批次测试通过!")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ 引擎测试失败: {e}")
|
||||
return False
|
||||
|
||||
def main():
|
||||
"""主函数"""
|
||||
print("动态批次 TensorRT 引擎导出工具")
|
||||
print("=" * 60)
|
||||
|
||||
# 模型路径
|
||||
model_path = "C:/Users/16337/PycharmProjects/Security/yolo11n.pt"
|
||||
|
||||
if not os.path.exists(model_path):
|
||||
print(f"❌ 模型文件不存在: {model_path}")
|
||||
return
|
||||
|
||||
# 导出动态引擎
|
||||
engine_path = export_dynamic_tensorrt_engine(model_path)
|
||||
|
||||
if engine_path:
|
||||
# 测试动态引擎
|
||||
success = test_dynamic_engine(engine_path)
|
||||
|
||||
if success:
|
||||
print(f"\n🎯 动态 TensorRT 引擎准备就绪!")
|
||||
print(f"📁 引擎路径: {engine_path}")
|
||||
print(f"✅ 支持批次大小: 1, 2, 4, 8, 16, 32")
|
||||
print(f"\n🚀 现在可以运行完整的批量性能测试了!")
|
||||
else:
|
||||
print(f"\n⚠️ 引擎导出成功但测试失败,请检查配置")
|
||||
else:
|
||||
print(f"\n❌ 引擎导出失败")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user