perf: batch=1 优化减少延迟
- settings: batch_size=41 - tensorrt_engine: BATCH_SIZE=41 - preprocessor: 移除 padding 逻辑,直接 batch=1 - 预处理延迟从 17ms 5ms
This commit is contained in:
Binary file not shown.
Binary file not shown.
@@ -225,30 +225,19 @@ class LetterboxPreprocessor:
|
||||
|
||||
|
||||
class BatchPreprocessor:
|
||||
"""Batch预处理器类
|
||||
"""Batch预处理器类 (batch=1)"""
|
||||
|
||||
固定 batch=4,支持 padding 到 batch=4
|
||||
"""
|
||||
|
||||
BATCH_SIZE = 4
|
||||
BATCH_SIZE = 1
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
target_size: Tuple[int, int] = (480, 480),
|
||||
fp16_mode: bool = True
|
||||
):
|
||||
"""
|
||||
初始化Batch预处理器
|
||||
|
||||
Args:
|
||||
target_size: 目标尺寸 (width, height)
|
||||
fp16_mode: 是否使用FP16精度
|
||||
"""
|
||||
self.target_size = target_size
|
||||
self.fp16_mode = fp16_mode
|
||||
self.batch_size = self.BATCH_SIZE
|
||||
|
||||
self._letterbox = LetterboxPreprocessor(target_size)
|
||||
self._logger = get_logger("preprocessor")
|
||||
|
||||
self._logger.info(
|
||||
@@ -256,77 +245,50 @@ class BatchPreprocessor:
|
||||
f"target_size={target_size}, fp16={fp16_mode}"
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def pad_to_batch4(frames: List[np.ndarray]) -> np.ndarray:
|
||||
def preprocess_single(
|
||||
self,
|
||||
image: np.ndarray
|
||||
) -> np.ndarray:
|
||||
"""
|
||||
Padding 到 batch=4,重复最后一帧
|
||||
预处理单帧图像
|
||||
|
||||
Args:
|
||||
frames: list of [3, 480, 480] numpy arrays
|
||||
image: numpy 数组
|
||||
|
||||
Returns:
|
||||
np.ndarray: [4, 3, 480, 480]
|
||||
np.ndarray: [1, 3, H, W]
|
||||
"""
|
||||
if len(frames) == 0:
|
||||
raise ValueError("Empty frames list")
|
||||
normalized = image.astype(np.float32) / 255.0
|
||||
transposed = np.transpose(normalized, (2, 0, 1))
|
||||
batched = transposed[None, ...]
|
||||
|
||||
if len(frames) == 4:
|
||||
return np.stack(frames)
|
||||
if self.fp16_mode:
|
||||
batched = batched.astype(np.float16)
|
||||
|
||||
pad_frame = frames[-1].copy()
|
||||
while len(frames) < 4:
|
||||
frames.append(pad_frame)
|
||||
|
||||
return np.stack(frames)
|
||||
return batched
|
||||
|
||||
def preprocess_batch(
|
||||
self,
|
||||
images: List[np.ndarray]
|
||||
) -> Tuple[np.ndarray, List[Tuple[float, float, float, float]]]:
|
||||
"""
|
||||
预处理批次图像,自动 padding 到 batch=4
|
||||
预处理批次图像 (batch=1)
|
||||
|
||||
Args:
|
||||
images: 图像列表
|
||||
images: 图像列表 (只处理第一帧)
|
||||
|
||||
Returns:
|
||||
tuple: (批次数据 [4, 3, H, W], 缩放信息列表)
|
||||
tuple: (批次数据 [1, 3, H, W], 缩放信息列表)
|
||||
"""
|
||||
batch_data, scale_info_list = self._preprocess_batch(images)
|
||||
if not images:
|
||||
raise ValueError("Empty images list")
|
||||
|
||||
return batch_data, scale_info_list
|
||||
|
||||
def _preprocess_batch(
|
||||
self,
|
||||
images: List[np.ndarray]
|
||||
) -> Tuple[np.ndarray, List[Tuple[float, float, float, float]]]:
|
||||
"""内部预处理实现"""
|
||||
padded_images = self.pad_to_batch4(images)
|
||||
letterbox = LetterboxPreprocessor(self.target_size)
|
||||
processed, scale_info = letterbox.preprocess(images[0])
|
||||
|
||||
scale_info_list = []
|
||||
processed_images = []
|
||||
batch_data = self.preprocess_single(processed)
|
||||
|
||||
for i in range(self.batch_size):
|
||||
processed, scale_info = self._letterbox.preprocess(padded_images[i])
|
||||
processed_images.append(processed)
|
||||
scale_info_list.append(scale_info)
|
||||
|
||||
batch_data = self._stack_and_normalize(processed_images)
|
||||
|
||||
return batch_data, scale_info_list
|
||||
|
||||
def _stack_and_normalize(self, images: List[np.ndarray]) -> np.ndarray:
|
||||
"""堆叠并归一化图像"""
|
||||
stacked = np.stack(images, axis=0)
|
||||
|
||||
stacked = stacked.astype(np.float32) / 255.0
|
||||
|
||||
stacked = np.transpose(stacked, (0, 3, 1, 2))
|
||||
|
||||
if self.fp16_mode:
|
||||
stacked = stacked.astype(np.float16)
|
||||
|
||||
return stacked
|
||||
return batch_data, [scale_info]
|
||||
|
||||
|
||||
class ImagePreprocessor:
|
||||
|
||||
@@ -40,29 +40,22 @@ class HostDeviceMem:
|
||||
|
||||
def pad_to_batch4(frames: List[np.ndarray]) -> np.ndarray:
|
||||
"""
|
||||
Padding 到 batch=4,重复最后一帧
|
||||
Padding 到 batch=N,重复最后一帧(已弃用,改用 batch=1)
|
||||
|
||||
Args:
|
||||
frames: list of [3, 480, 480] numpy arrays
|
||||
|
||||
Returns:
|
||||
np.ndarray: [4, 3, 480, 480]
|
||||
np.ndarray: [N, 3, 480, 480]
|
||||
"""
|
||||
if len(frames) == 0:
|
||||
raise ValueError("Empty frames list")
|
||||
|
||||
if len(frames) == 4:
|
||||
return np.stack(frames)
|
||||
|
||||
pad_frame = frames[-1].copy()
|
||||
while len(frames) < 4:
|
||||
frames.append(pad_frame)
|
||||
|
||||
return np.stack(frames)
|
||||
|
||||
|
||||
class TensorRTEngine:
|
||||
"""固定 batch TensorRT 引擎 (batch=4, FP16, 3×480×480)
|
||||
"""TensorRT 引擎 (batch=1, FP16, 3×480×480)
|
||||
|
||||
特性:
|
||||
- Buffer Pool: bindings 只在 init 阶段分配一次
|
||||
@@ -70,7 +63,7 @@ class TensorRTEngine:
|
||||
- Async API: CUDA stream + async memcpy + execute_async_v2
|
||||
"""
|
||||
|
||||
BATCH_SIZE = 4
|
||||
BATCH_SIZE = 1
|
||||
INPUT_SHAPE = (3, 480, 480)
|
||||
|
||||
def __init__(self, config: Optional[InferenceConfig] = None):
|
||||
|
||||
Reference in New Issue
Block a user