fix: 修复 TensorRT bindings 问题
- tensorrt_engine.py 添加 pycuda 支持 - CUDA 上下文和流管理 - _is_in_working_hours 支持字符串格式
This commit is contained in:
Binary file not shown.
Binary file not shown.
@@ -3,6 +3,7 @@ TensorRT推理引擎模块
|
||||
实现引擎加载、显存优化、异步推理、性能监控
|
||||
"""
|
||||
|
||||
import ctypes
|
||||
import logging
|
||||
import threading
|
||||
import time
|
||||
@@ -12,10 +13,13 @@ import numpy as np
|
||||
|
||||
try:
|
||||
import tensorrt as trt
|
||||
import pycuda.driver as cuda
|
||||
import pycuda.autoinit
|
||||
TRT_AVAILABLE = True
|
||||
except ImportError:
|
||||
TRT_AVAILABLE = False
|
||||
trt = None
|
||||
cuda = None
|
||||
|
||||
from config.settings import get_settings, InferenceConfig
|
||||
from utils.logger import get_logger
|
||||
@@ -50,6 +54,7 @@ class TensorRTEngine:
|
||||
self._output_bindings = []
|
||||
self._stream = None
|
||||
self._released = False
|
||||
self._cuda_context = None
|
||||
|
||||
self._logger = get_logger("tensorrt")
|
||||
self._lock = threading.Lock()
|
||||
@@ -90,6 +95,10 @@ class TensorRTEngine:
|
||||
if self._context is not None:
|
||||
self._release_resources()
|
||||
|
||||
if cuda is not None:
|
||||
self._cuda_context = cuda.Device(0).make_context()
|
||||
self._stream = cuda.Stream()
|
||||
|
||||
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
|
||||
|
||||
with open(engine_path, "rb") as f:
|
||||
@@ -241,24 +250,60 @@ class TensorRTEngine:
|
||||
input_data.shape
|
||||
)
|
||||
|
||||
input_tensor = input_data
|
||||
output_tensors = []
|
||||
|
||||
for output in self._output_bindings:
|
||||
output_shape = list(output["shape"])
|
||||
output_shape[0] = batch_size
|
||||
output_tensor = np.zeros(output_shape, dtype=self._get_numpy_dtype(output["dtype"]))
|
||||
output_tensors.append(output_tensor)
|
||||
|
||||
bindings = [input_tensor] + output_tensors
|
||||
|
||||
self._context.execute_v2(bindings=bindings)
|
||||
|
||||
inference_time_ms = (time.perf_counter() - start_time) * 1000
|
||||
|
||||
self._update_performance_stats(inference_time_ms, batch_size)
|
||||
|
||||
return output_tensors, inference_time_ms
|
||||
if cuda is not None and self._cuda_context is not None:
|
||||
self._cuda_context.push()
|
||||
|
||||
try:
|
||||
input_data = np.ascontiguousarray(input_data)
|
||||
|
||||
input_ptr = cuda.mem_alloc(input_data.nbytes)
|
||||
cuda.memcpy_htod(input_ptr, input_data)
|
||||
|
||||
bindings = [int(input_ptr)]
|
||||
output_tensors = []
|
||||
|
||||
for output in self._output_bindings:
|
||||
output_shape = list(output["shape"])
|
||||
output_shape[0] = batch_size
|
||||
output_tensor = np.zeros(output_shape, dtype=self._get_numpy_dtype(output["dtype"]))
|
||||
output_tensor = np.ascontiguousarray(output_tensor)
|
||||
output_ptr = cuda.mem_alloc(output_tensor.nbytes)
|
||||
cuda.memcpy_htod(output_ptr, output_tensor)
|
||||
bindings.append(int(output_ptr))
|
||||
output_tensors.append((output_tensor, output_ptr))
|
||||
|
||||
self._context.execute_v2(bindings=bindings)
|
||||
|
||||
for output_tensor, output_ptr in output_tensors:
|
||||
cuda.memcpy_dtoh(output_tensor, output_ptr)
|
||||
|
||||
inference_time_ms = (time.perf_counter() - start_time) * 1000
|
||||
|
||||
self._update_performance_stats(inference_time_ms, batch_size)
|
||||
|
||||
return [t[0] for t in output_tensors], inference_time_ms
|
||||
|
||||
finally:
|
||||
self._cuda_context.pop()
|
||||
else:
|
||||
input_tensor = input_data
|
||||
output_tensors = []
|
||||
|
||||
for output in self._output_bindings:
|
||||
output_shape = list(output["shape"])
|
||||
output_shape[0] = batch_size
|
||||
output_tensor = np.zeros(output_shape, dtype=self._get_numpy_dtype(output["dtype"]))
|
||||
output_tensors.append(output_tensor)
|
||||
|
||||
bindings = [int(input_tensor.ctypes.data)] + [int(t.ctypes.data) for t in output_tensors]
|
||||
|
||||
self._context.execute_v2(bindings=bindings)
|
||||
|
||||
inference_time_ms = (time.perf_counter() - start_time) * 1000
|
||||
|
||||
self._update_performance_stats(inference_time_ms, batch_size)
|
||||
|
||||
return output_tensors, inference_time_ms
|
||||
|
||||
def infer_async(self, input_data: np.ndarray) -> Tuple[List[np.ndarray], float]:
|
||||
"""
|
||||
@@ -333,6 +378,14 @@ class TensorRTEngine:
|
||||
|
||||
def _release_resources(self):
|
||||
"""释放资源(Python TensorRT 由 GC 管理,无需 destroy)"""
|
||||
if self._cuda_context:
|
||||
try:
|
||||
self._cuda_context.pop()
|
||||
self._cuda_context.detach()
|
||||
except Exception:
|
||||
pass
|
||||
self._cuda_context = None
|
||||
|
||||
if self._stream:
|
||||
try:
|
||||
self._stream.synchronize()
|
||||
|
||||
Reference in New Issue
Block a user