feat: TensorRT 固定 batch=4 重构

- tensorrt_engine.py 工业级 Buffer Pool
- preprocessor.py 添加 pad_to_batch4()
- postprocessor.py 支持批量输出
- settings.py 固定 batch_size=4
This commit is contained in:
2026-02-02 14:49:47 +08:00
parent 956bcbbc3e
commit 745cadc8e7
18 changed files with 68258 additions and 130 deletions

View File

@@ -227,13 +227,14 @@ class LetterboxPreprocessor:
class BatchPreprocessor:
"""Batch预处理器类
支持动态Batch大小转换为NCHW格式FP16精度
固定 batch=4支持 padding 到 batch=4
"""
BATCH_SIZE = 4
def __init__(
self,
target_size: Tuple[int, int] = (480, 480),
max_batch_size: int = 8,
fp16_mode: bool = True
):
"""
@@ -241,44 +242,72 @@ class BatchPreprocessor:
Args:
target_size: 目标尺寸 (width, height)
max_batch_size: 最大Batch大小
fp16_mode: 是否使用FP16精度
"""
self.target_size = target_size
self.max_batch_size = max_batch_size
self.fp16_mode = fp16_mode
self.batch_size = self.BATCH_SIZE
self._letterbox = LetterboxPreprocessor(target_size)
self._logger = get_logger("preprocessor")
self._lock = threading.Lock()
self._memory_pool: List[np.ndarray] = []
self._preallocated_size = max_batch_size
self._logger.info(
f"Batch预处理器: batch={self.batch_size}, "
f"target_size={target_size}, fp16={fp16_mode}"
)
@staticmethod
def pad_to_batch4(frames: List[np.ndarray]) -> np.ndarray:
"""
Padding 到 batch=4重复最后一帧
Args:
frames: list of [3, 480, 480] numpy arrays
Returns:
np.ndarray: [4, 3, 480, 480]
"""
if len(frames) == 0:
raise ValueError("Empty frames list")
if len(frames) == 4:
return np.stack(frames)
pad_frame = frames[-1].copy()
while len(frames) < 4:
frames.append(pad_frame)
return np.stack(frames)
def preprocess_batch(
self,
images: List[np.ndarray]
) -> Tuple[np.ndarray, List[Tuple[float, float, float, float]]]:
"""
预处理一个批次图像
预处理批次图像,自动 padding 到 batch=4
Args:
images: 图像列表
Returns:
tuple: (批次数据, 缩放信息列表)
tuple: (批次数据 [4, 3, H, W], 缩放信息列表)
"""
batch_size = len(images)
batch_size = min(batch_size, self.max_batch_size)
batch_data, scale_info_list = self._preprocess_batch(images)
return batch_data, scale_info_list
def _preprocess_batch(
self,
images: List[np.ndarray]
) -> Tuple[np.ndarray, List[Tuple[float, float, float, float]]]:
"""内部预处理实现"""
padded_images = self.pad_to_batch4(images)
scale_info_list = []
processed_images = []
for i in range(batch_size):
if i >= len(images):
break
processed, scale_info = self._letterbox.preprocess(images[i])
for i in range(self.batch_size):
processed, scale_info = self._letterbox.preprocess(padded_images[i])
processed_images.append(processed)
scale_info_list.append(scale_info)
@@ -298,53 +327,6 @@ class BatchPreprocessor:
stacked = stacked.astype(np.float16)
return stacked
def allocate_batch_memory(self, batch_size: int) -> np.ndarray:
"""
分配批次内存
Args:
batch_size: 批次大小
Returns:
预分配的numpy数组
"""
batch_size = min(batch_size, self.max_batch_size)
with self._lock:
for mem in self._memory_pool:
if mem.shape[0] == batch_size:
return mem
height, width = self.target_size
shape = (batch_size, 3, height, width)
if self.fp16_mode:
mem = np.zeros(shape, dtype=np.float16)
else:
mem = np.zeros(shape, dtype=np.float32)
self._memory_pool.append(mem)
return mem
def release_memory(self):
"""释放内存池"""
with self._lock:
self._memory_pool.clear()
self._logger.info("预处理内存池已释放")
def get_memory_usage(self) -> Dict[str, int]:
"""获取内存使用情况"""
with self._lock:
total_bytes = sum(
mem.nbytes for mem in self._memory_pool
)
return {
"total_bytes": total_bytes,
"total_mb": total_bytes / (1024 ** 2),
"block_count": len(self._memory_pool)
}
class ImagePreprocessor:
@@ -372,7 +354,6 @@ class ImagePreprocessor:
)
self._batch_preprocessor = BatchPreprocessor(
target_size=(config.input_width, config.input_height),
max_batch_size=config.max_batch_size,
fp16_mode=config.fp16_mode
)
@@ -380,7 +361,7 @@ class ImagePreprocessor:
self._logger.info(
f"图像预处理器初始化完成: "
f"输入尺寸 {config.input_width}x{config.input_height}, "
f"Batch大小 {config.batch_size}-{config.max_batch_size}, "
f"Batch大小 {self._batch_preprocessor.batch_size}, "
f"FP16模式 {config.fp16_mode}"
)
@@ -416,15 +397,17 @@ class ImagePreprocessor:
rois: Optional[List[Optional[ROIInfo]]] = None
) -> Tuple[np.ndarray, List[Tuple[float, float, float, float]]]:
"""
预处理批次图像
预处理批次图像,自动 padding 到 batch=4
Args:
images: 原始图像列表
rois: 可选的ROI配置列表
Returns:
tuple: (批次数据, 缩放信息列表)
tuple: (批次数据 [4, 3, H, W], 缩放信息列表)
"""
from core.tensorrt_engine import pad_to_batch4
if rois is None:
rois = [None] * len(images)
@@ -436,7 +419,7 @@ class ImagePreprocessor:
processed_images.append(processed)
scale_info_list.append(scale_info)
batch_data = self._batch_preprocessor._stack_and_normalize(processed_images)
batch_data = self._batch_preprocessor.preprocess_batch(processed_images)
return batch_data, scale_info_list
@@ -463,13 +446,11 @@ class ImagePreprocessor:
"config": {
"input_width": self.config.input_width,
"input_height": self.config.input_height,
"batch_size": self.config.batch_size,
"max_batch_size": self.config.max_batch_size,
"batch_size": self._batch_preprocessor.batch_size,
"fp16_mode": self.config.fp16_mode,
},
"memory": self._batch_preprocessor.get_memory_usage(),
}
def release_resources(self):
"""释放资源"""
self._batch_preprocessor.release_memory()
self._logger.info("预处理器资源已释放")