fix: 修复10个关键bug提升系统稳定性和性能

1. YOLO11输出解析错误: 移除不存在的objectness行,正确使用class_scores.max()
2. CPU NMS逻辑错误: keep_mask同时标记保留和抑制框导致NMS失效,改用独立suppressed集合
3. 坐标映射缺失: _build_tracks中scale_info未使用,添加revert_boxes还原到ROI裁剪空间
4. batch=1限制: 恢复真正的动态batch推理(1~8),BatchPreprocessor支持多图stack
5. 帧率控制缺失: _read_frame添加time.monotonic()间隔控制,按target_fps跳帧
6. 拉流推理耦合: 新增独立推理线程(InferenceWorker),生产者-消费者模式解耦
7. 攒批形同虚设: 添加50ms攒批窗口+max_batch阈值,替代>=1立即处理
8. LeavePost双重等待: LEAVING确认后直接触发告警,不再进入OFF_DUTY二次等待
9. register_algorithm每帧调用: 添加_registered_keys缓存,O(1)快速路径跳过
10. GPU context线程安全: TensorRT infer()内部加锁,防止多线程CUDA context竞争

附带修复:
- reset_algorithm中未定义algorithm_type变量(NameError)
- update_roi_params中循环变量key覆盖外层key
- AlertInfo缺少bind_id字段(TypeError)
- _logger.log_alert在标准logger上不存在(AttributeError)
- AlarmStateMachine死锁(Lock改为RLock)
- ROICropper.create_mask坐标解析错误
- 更新测试用例适配新API

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-02-04 16:47:26 +08:00
parent fa0304aa47
commit 98595402c6
9 changed files with 352 additions and 234 deletions

View File

@@ -161,16 +161,10 @@ class LeavePostAlgorithm:
self.state = self.STATE_ON_DUTY self.state = self.STATE_ON_DUTY
self.state_start_time = current_time self.state_start_time = current_time
elif elapsed >= self.confirm_leave_sec: elif elapsed >= self.confirm_leave_sec:
# 确认离岗后直接触发告警,不再进入 OFF_DUTY 二次等待
self.state = self.STATE_OFF_DUTY self.state = self.STATE_OFF_DUTY
self.state_start_time = current_time self.state_start_time = current_time
elif self.state == self.STATE_OFF_DUTY:
elapsed = (current_time - self.state_start_time).total_seconds()
if roi_has_person:
self.state = self.STATE_ON_DUTY
self.state_start_time = current_time
elif elapsed >= self.confirm_leave_sec:
cooldown_key = f"{camera_id}_{roi_id}" cooldown_key = f"{camera_id}_{roi_id}"
now = datetime.now() now = datetime.now()
if cooldown_key not in self.alert_cooldowns or (now - self.alert_cooldowns[cooldown_key]).total_seconds() > self.cooldown_sec: if cooldown_key not in self.alert_cooldowns or (now - self.alert_cooldowns[cooldown_key]).total_seconds() > self.cooldown_sec:
@@ -186,6 +180,28 @@ class LeavePostAlgorithm:
}) })
self.alert_cooldowns[cooldown_key] = now self.alert_cooldowns[cooldown_key] = now
elif self.state == self.STATE_OFF_DUTY:
# OFF_DUTY 状态:等待人员回岗或冷却后可再次告警
if roi_has_person:
self.state = self.STATE_ON_DUTY
self.state_start_time = current_time
else:
elapsed = (current_time - self.state_start_time).total_seconds()
cooldown_key = f"{camera_id}_{roi_id}"
now = datetime.now()
if cooldown_key in self.alert_cooldowns and (now - self.alert_cooldowns[cooldown_key]).total_seconds() > self.cooldown_sec:
bbox = self._get_latest_bbox(tracks, roi_id)
elapsed_minutes = int(elapsed / 60)
alerts.append({
"track_id": roi_id,
"camera_id": camera_id,
"bbox": bbox,
"duration_minutes": elapsed_minutes,
"alert_type": "leave_post",
"message": f"持续离岗 {elapsed_minutes} 分钟",
})
self.alert_cooldowns[cooldown_key] = now
return alerts return alerts
def reset(self): def reset(self):
@@ -373,6 +389,7 @@ class AlgorithmManager:
self.algorithms: Dict[str, Dict[str, Any]] = {} self.algorithms: Dict[str, Dict[str, Any]] = {}
self.working_hours = working_hours or [] self.working_hours = working_hours or []
self._update_lock = threading.Lock() self._update_lock = threading.Lock()
self._registered_keys: set = set() # 已注册的 (roi_id, bind_id, algo_type) 缓存
self.default_params = { self.default_params = {
"leave_post": { "leave_post": {
@@ -580,11 +597,14 @@ class AlgorithmManager:
algorithm_type: str, algorithm_type: str,
params: Optional[Dict[str, Any]] = None, params: Optional[Dict[str, Any]] = None,
): ):
"""注册算法支持绑定ID""" """注册算法支持绑定ID,使用缓存避免每帧重复查询"""
key = f"{roi_id}_{bind_id}" cache_key = (roi_id, bind_id, algorithm_type)
if key in self.algorithms and algorithm_type in self.algorithms[key]: # 快速路径:已注册直接返回
if cache_key in self._registered_keys:
return return
key = f"{roi_id}_{bind_id}"
if roi_id not in self.algorithms: if roi_id not in self.algorithms:
self.algorithms[roi_id] = {} self.algorithms[roi_id] = {}
@@ -618,6 +638,8 @@ class AlgorithmManager:
cooldown_seconds=algo_params.get("cooldown_seconds", 300), cooldown_seconds=algo_params.get("cooldown_seconds", 300),
target_class=algo_params.get("target_class", "person"), target_class=algo_params.get("target_class", "person"),
) )
self._registered_keys.add(cache_key)
def process( def process(
self, self,
@@ -646,24 +668,33 @@ class AlgorithmManager:
key = f"{roi_id}_{bind_id}" key = f"{roi_id}_{bind_id}"
if roi_id in self.algorithms and key in self.algorithms[roi_id] and algorithm_type in self.algorithms[roi_id][key]: if roi_id in self.algorithms and key in self.algorithms[roi_id] and algorithm_type in self.algorithms[roi_id][key]:
algo = self.algorithms[roi_id][key][algorithm_type] algo = self.algorithms[roi_id][key][algorithm_type]
for key, value in params.items(): for param_key, value in params.items():
if hasattr(algo, key): if hasattr(algo, param_key):
setattr(algo, key, value) setattr(algo, param_key, value)
def reset_algorithm(self, roi_id: str, bind_id: Optional[str] = None): def reset_algorithm(self, roi_id: str, bind_id: Optional[str] = None):
"""重置算法状态支持绑定ID""" """重置算法状态支持绑定ID"""
if roi_id not in self.algorithms: if roi_id not in self.algorithms:
return return
if bind_id: if bind_id:
key = f"{roi_id}_{bind_id}" key = f"{roi_id}_{bind_id}"
if key in self.algorithms[roi_id]: if key in self.algorithms[roi_id]:
if algorithm_type in self.algorithms[roi_id][key]: for algo in self.algorithms[roi_id][key].values():
self.algorithms[roi_id][key][algorithm_type].reset() algo.reset()
# 清除注册缓存
self._registered_keys = {
k for k in self._registered_keys
if not (k[0] == roi_id and k[1] == bind_id)
}
else: else:
for key in self.algorithms[roi_id]: for key in self.algorithms[roi_id]:
for algo in self.algorithms[roi_id][key].values(): for algo in self.algorithms[roi_id][key].values():
algo.reset() algo.reset()
# 清除该 roi 的所有注册缓存
self._registered_keys = {
k for k in self._registered_keys if k[0] != roi_id
}
def reset_all(self): def reset_all(self):
"""重置所有算法""" """重置所有算法"""

View File

@@ -104,55 +104,55 @@ class NMSProcessor:
) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
"""CPU 向量化 NMS""" """CPU 向量化 NMS"""
order = np.argsort(scores)[::-1] order = np.argsort(scores)[::-1]
keep_mask = np.zeros(len(boxes), dtype=bool) keep = []
suppressed = np.zeros(len(boxes), dtype=bool)
i = 0
while i < len(order) and i < max_output_size: for i in range(len(order)):
if len(keep) >= max_output_size:
break
idx = order[i] idx = order[i]
if keep_mask[idx]: if suppressed[idx]:
i += 1
continue continue
keep_mask[idx] = True keep.append(idx)
remaining = order[i + 1:] remaining = order[i + 1:]
if len(remaining) == 0: if len(remaining) == 0:
break break
remaining_mask = ~keep_mask[remaining] remaining = remaining[~suppressed[remaining]]
if not np.any(remaining_mask): if len(remaining) == 0:
break break
remaining = remaining[remaining_mask]
xx1 = np.maximum(boxes[idx, 0], boxes[remaining, 0]) xx1 = np.maximum(boxes[idx, 0], boxes[remaining, 0])
yy1 = np.maximum(boxes[idx, 1], boxes[remaining, 1]) yy1 = np.maximum(boxes[idx, 1], boxes[remaining, 1])
xx2 = np.minimum(boxes[idx, 2], boxes[remaining, 2]) xx2 = np.minimum(boxes[idx, 2], boxes[remaining, 2])
yy2 = np.minimum(boxes[idx, 3], boxes[remaining, 3]) yy2 = np.minimum(boxes[idx, 3], boxes[remaining, 3])
w = np.maximum(0.0, xx2 - xx1 + 1) w = np.maximum(0.0, xx2 - xx1 + 1)
h = np.maximum(0.0, yy2 - yy1 + 1) h = np.maximum(0.0, yy2 - yy1 + 1)
inter = w * h inter = w * h
areas = (boxes[:, 2] - boxes[:, 0] + 1) * (boxes[:, 3] - boxes[:, 1] + 1) areas = (boxes[:, 2] - boxes[:, 0] + 1) * (boxes[:, 3] - boxes[:, 1] + 1)
ovr = inter / (areas[idx] + areas[remaining] - inter + 1e-6) ovr = inter / (areas[idx] + areas[remaining] - inter + 1e-6)
suppress_mask = ovr > self.nms_threshold suppress_mask = ovr > self.nms_threshold
for j in np.where(suppress_mask)[0]: suppressed[remaining[suppress_mask]] = True
keep_mask[remaining[j]] = True
keep_indices = np.array(keep, dtype=np.int32) if keep else np.array([], dtype=np.int32)
i += 1
if len(keep_indices) == 0:
keep_indices = np.where(keep_mask)[0] return (
np.array([], dtype=np.int32),
if len(keep_indices) > max_output_size: np.array([]),
top_k = np.argsort(scores[keep_indices])[::-1][:max_output_size] np.array([])
keep_indices = keep_indices[top_k] )
return ( return (
keep_indices.astype(np.int32), keep_indices,
scores[keep_indices], scores[keep_indices],
class_ids[keep_indices] if class_ids is not None else np.array([]) class_ids[keep_indices] if class_ids is not None else np.array([])
) )
@@ -448,7 +448,7 @@ class AlarmStateMachine:
self.alert_cooldown = alert_cooldown self.alert_cooldown = alert_cooldown
self._states: Dict[str, AlarmState] = {} self._states: Dict[str, AlarmState] = {}
self._lock = threading.Lock() self._lock = threading.RLock()
self._logger = get_logger("postprocessor") self._logger = get_logger("postprocessor")
@@ -513,9 +513,9 @@ class AlarmStateMachine:
self._logger.log_alert( self._logger.log_alert(
"detection_threshold_reached", "detection_threshold_reached",
roi_id=roi_id, "",
camera_id="", roi_id,
confidence=None None
) )
return { return {
@@ -669,32 +669,33 @@ class PostProcessor:
return np.array([]), np.array([]), np.array([]) return np.array([]), np.array([]), np.array([])
boxes_xywh = output[0:4, :].T boxes_xywh = output[0:4, :].T
obj_conf = output[4, :] # YOLO11 输出格式: [4+num_classes, 8400]
# 前4行是 xywh后80行是各类别分数没有单独的 objectness 行
person_scores = output[5, :] class_scores = output[4:, :] # [num_classes, 8400]
scores = class_scores.max(axis=0) # 取各类别最大分数
scores = obj_conf * person_scores class_ids = class_scores.argmax(axis=0) # 对应类别ID
coarse_mask = scores > prefilter_threshold coarse_mask = scores > prefilter_threshold
if not np.any(coarse_mask): if not np.any(coarse_mask):
return np.array([]), np.array([]), np.array([]) return np.array([]), np.array([]), np.array([])
boxes = boxes_xywh[coarse_mask] boxes = boxes_xywh[coarse_mask]
scores_coarse = scores[coarse_mask] scores_coarse = scores[coarse_mask]
class_ids_filtered = class_ids[coarse_mask]
valid_count = len(boxes) valid_count = len(boxes)
np.copyto(self._buffer_boxes_xywh[:valid_count], boxes) np.copyto(self._buffer_boxes_xywh[:valid_count], boxes)
self._buffer_xyxy[:valid_count, 0] = boxes[:, 0] - boxes[:, 2] / 2 self._buffer_xyxy[:valid_count, 0] = boxes[:, 0] - boxes[:, 2] / 2
self._buffer_xyxy[:valid_count, 1] = boxes[:, 1] - boxes[:, 3] / 2 self._buffer_xyxy[:valid_count, 1] = boxes[:, 1] - boxes[:, 3] / 2
self._buffer_xyxy[:valid_count, 2] = boxes[:, 0] + boxes[:, 2] / 2 self._buffer_xyxy[:valid_count, 2] = boxes[:, 0] + boxes[:, 2] / 2
self._buffer_xyxy[:valid_count, 3] = boxes[:, 1] + boxes[:, 3] / 2 self._buffer_xyxy[:valid_count, 3] = boxes[:, 1] + boxes[:, 3] / 2
self._buffer_class_ids[:valid_count] = 0 self._buffer_class_ids[:valid_count] = class_ids_filtered
return ( return (
self._buffer_xyxy[:valid_count].copy(), self._buffer_xyxy[:valid_count].copy(),
scores_coarse.astype(np.float32), scores_coarse.astype(np.float32),
@@ -762,9 +763,10 @@ class PostProcessor:
continue continue
boxes_xywh = output[0:4, :].T boxes_xywh = output[0:4, :].T
obj_conf = output[4, :] # YOLO11: 无 objectness直接取各类别最大分数
person_scores = output[5, :] class_scores = output[4:, :]
scores = obj_conf * person_scores scores = class_scores.max(axis=0)
class_ids_raw = class_scores.argmax(axis=0)
coarse_mask = scores > 0.3 coarse_mask = scores > 0.3
if not np.any(coarse_mask): if not np.any(coarse_mask):
@@ -773,7 +775,7 @@ class PostProcessor:
boxes = boxes_xywh[coarse_mask] boxes = boxes_xywh[coarse_mask]
scores_coarse = scores[coarse_mask] scores_coarse = scores[coarse_mask]
class_ids = np.zeros(len(boxes), dtype=np.int32) class_ids = class_ids_raw[coarse_mask].astype(np.int32)
valid_count = len(boxes) valid_count = len(boxes)

View File

@@ -127,8 +127,8 @@ class ROICropper:
if roi.roi_type == ROIType.RECTANGLE: if roi.roi_type == ROIType.RECTANGLE:
if len(roi.coordinates) >= 2: if len(roi.coordinates) >= 2:
x1, y1 = int(roi.coordinates[0]) x1, y1 = int(roi.coordinates[0][0]), int(roi.coordinates[0][1])
x2, y2 = int(roi.coordinates[1]) x2, y2 = int(roi.coordinates[1][0]), int(roi.coordinates[1][1])
x1, x2 = sorted([x1, x2]) x1, x2 = sorted([x1, x2])
y1, y2 = sorted([y1, y2]) y1, y2 = sorted([y1, y2])
mask[y1:y2, x1:x2] = 255 mask[y1:y2, x1:x2] = 255
@@ -225,10 +225,10 @@ class LetterboxPreprocessor:
class BatchPreprocessor: class BatchPreprocessor:
"""Batch预处理器类 (batch=1)""" """Batch预处理器类 (支持动态 batch 1~8)"""
BATCH_SIZE = 1 MAX_BATCH_SIZE = 8
def __init__( def __init__(
self, self,
target_size: Tuple[int, int] = (480, 480), target_size: Tuple[int, int] = (480, 480),
@@ -236,12 +236,12 @@ class BatchPreprocessor:
): ):
self.target_size = target_size self.target_size = target_size
self.fp16_mode = fp16_mode self.fp16_mode = fp16_mode
self.batch_size = self.BATCH_SIZE self.max_batch_size = self.MAX_BATCH_SIZE
self._logger = get_logger("preprocessor") self._logger = get_logger("preprocessor")
self._logger.info( self._logger.info(
f"Batch预处理器: batch={self.batch_size}, " f"Batch预处理器: max_batch={self.max_batch_size}, "
f"target_size={target_size}, fp16={fp16_mode}" f"target_size={target_size}, fp16={fp16_mode}"
) )
@@ -272,23 +272,39 @@ class BatchPreprocessor:
images: List[np.ndarray] images: List[np.ndarray]
) -> Tuple[np.ndarray, List[Tuple[float, float, float, float]]]: ) -> Tuple[np.ndarray, List[Tuple[float, float, float, float]]]:
""" """
预处理批次图像 (batch=1) 预处理批次图像 (支持动态 batch)
Args: Args:
images: 图像列表 (只处理第一帧) images: 已经过 letterbox 的图像列表
Returns: Returns:
tuple: (批次数据 [1, 3, H, W], 缩放信息列表) tuple: (批次数据 [N, 3, H, W], 缩放信息列表)
""" """
if not images: if not images:
raise ValueError("Empty images list") raise ValueError("Empty images list")
letterbox = LetterboxPreprocessor(self.target_size) letterbox = LetterboxPreprocessor(self.target_size)
processed, scale_info = letterbox.preprocess(images[0]) processed_list = []
scale_infos = []
batch_data = self.preprocess_single(processed)
for img in images:
return batch_data, [scale_info] processed, scale_info = letterbox.preprocess(img)
processed_list.append(processed)
scale_infos.append(scale_info)
# 逐帧 normalize + transpose然后 stack 成 [N, 3, H, W]
batch_frames = []
for processed in processed_list:
normalized = processed.astype(np.float32) / 255.0
transposed = np.transpose(normalized, (2, 0, 1))
batch_frames.append(transposed)
batch_data = np.stack(batch_frames)
if self.fp16_mode:
batch_data = batch_data.astype(np.float16)
return batch_data, scale_infos
class ImagePreprocessor: class ImagePreprocessor:
@@ -323,7 +339,7 @@ class ImagePreprocessor:
self._logger.info( self._logger.info(
f"图像预处理器初始化完成: " f"图像预处理器初始化完成: "
f"输入尺寸 {config.input_width}x{config.input_height}, " f"输入尺寸 {config.input_width}x{config.input_height}, "
f"Batch大小 {self._batch_preprocessor.batch_size}, " f"最大Batch {self._batch_preprocessor.max_batch_size}, "
f"FP16模式 {config.fp16_mode}" f"FP16模式 {config.fp16_mode}"
) )
@@ -359,31 +375,36 @@ class ImagePreprocessor:
rois: Optional[List[Optional[ROIInfo]]] = None rois: Optional[List[Optional[ROIInfo]]] = None
) -> Tuple[np.ndarray, List[Tuple[float, float, float, float]]]: ) -> Tuple[np.ndarray, List[Tuple[float, float, float, float]]]:
""" """
预处理批次图像,自动 padding 到 batch=4 预处理批次图像
Args: Args:
images: 原始图像列表 images: 原始图像列表
rois: 可选的ROI配置列表 rois: 可选的ROI配置列表
Returns: Returns:
tuple: (批次数据 [4, 3, H, W], 缩放信息列表) tuple: (批次数据 [N, 3, H, W], 缩放信息列表)
""" """
from core.tensorrt_engine import pad_to_batch4
if rois is None: if rois is None:
rois = [None] * len(images) rois = [None] * len(images)
processed_images = [] processed_images = []
scale_info_list = [] scale_info_list = []
for image, roi in zip(images, rois): for image, roi in zip(images, rois):
processed, scale_info = self.preprocess_single(image, roi) if roi is not None:
processed_images.append(processed) cropped = self._cropper.crop(image, roi)
scale_info_list.append(scale_info) if cropped is None:
cropped = image
batch_data = self._batch_preprocessor.preprocess_batch(processed_images) else:
cropped = image
return batch_data, scale_info_list processed_images.append(cropped)
# BatchPreprocessor 处理 letterbox + normalize + stack
batch_data, batch_scale_infos = self._batch_preprocessor.preprocess_batch(
processed_images
)
return batch_data, batch_scale_infos
def revert_boxes( def revert_boxes(
self, self,
@@ -408,7 +429,7 @@ class ImagePreprocessor:
"config": { "config": {
"input_width": self.config.input_width, "input_width": self.config.input_width,
"input_height": self.config.input_height, "input_height": self.config.input_height,
"batch_size": self._batch_preprocessor.batch_size, "batch_size": self._batch_preprocessor.max_batch_size,
"fp16_mode": self.config.fp16_mode, "fp16_mode": self.config.fp16_mode,
}, },
} }

View File

@@ -31,6 +31,7 @@ class AlertInfo:
camera_id: str camera_id: str
roi_id: str roi_id: str
alert_type: str alert_type: str
bind_id: Optional[str] = None
target_class: Optional[str] = None target_class: Optional[str] = None
confidence: Optional[float] = None confidence: Optional[float] = None
bbox: Optional[List[float]] = field(default_factory=list) bbox: Optional[List[float]] = field(default_factory=list)
@@ -229,11 +230,10 @@ class ResultReporter:
result = self._client.publish(topic, json.dumps(alert_data, ensure_ascii=False)) result = self._client.publish(topic, json.dumps(alert_data, ensure_ascii=False))
if result[0] == mqtt.MQTT_ERR_SUCCESS: if result[0] == mqtt.MQTT_ERR_SUCCESS:
self._logger.log_alert( self._logger.info(
alert.alert_type, f"告警已发布: type={alert.alert_type}, "
alert.camera_id, f"camera={alert.camera_id}, roi={alert.roi_id}, "
alert.roi_id, f"confidence={alert.confidence}"
alert.confidence
) )
else: else:
raise Exception(f"MQTT 发布失败: {result[0]}") raise Exception(f"MQTT 发布失败: {result[0]}")

View File

@@ -55,15 +55,17 @@ def pad_to_batch4(frames: List[np.ndarray]) -> np.ndarray:
class TensorRTEngine: class TensorRTEngine:
"""TensorRT 引擎 (batch=1, FP16, 3×480×480) """TensorRT 引擎 (动态 batch 1~8, opt=4, FP16, 3×480×480)
特性: 特性:
- Buffer Pool: bindings 只在 init 阶段分配一次 - Buffer Pool: bindings 按 max_batch 分配,推理时按实际 batch 使用
- Pinned Memory: 使用 pagelocked host memory 提升 H2D/D2H 性能 - Pinned Memory: 使用 pagelocked host memory 提升 H2D/D2H 性能
- Async API: CUDA stream + async memcpy + execute_async_v2 - Async API: CUDA stream + async memcpy + execute_async_v2
- 推理锁: 保证多线程下 CUDA context 安全
""" """
BATCH_SIZE = 1 MAX_BATCH_SIZE = 8
OPT_BATCH_SIZE = 4
INPUT_SHAPE = (3, 480, 480) INPUT_SHAPE = (3, 480, 480)
def __init__(self, config: Optional[InferenceConfig] = None): def __init__(self, config: Optional[InferenceConfig] = None):
@@ -99,7 +101,7 @@ class TensorRTEngine:
self._logger.info( self._logger.info(
f"TensorRT 引擎初始化: " f"TensorRT 引擎初始化: "
f"{config.model_path}, " f"{config.model_path}, "
f"batch={self.BATCH_SIZE}, " f"batch=1~{self.MAX_BATCH_SIZE} (opt={self.OPT_BATCH_SIZE}), "
f"shape={self.INPUT_SHAPE}, " f"shape={self.INPUT_SHAPE}, "
f"fp16={config.fp16_mode}" f"fp16={config.fp16_mode}"
) )
@@ -131,7 +133,7 @@ class TensorRTEngine:
"load", "TensorRT", engine_path, True "load", "TensorRT", engine_path, True
) )
self._logger.info(f"TensorRT 引擎加载成功: {engine_path}") self._logger.info(f"TensorRT 引擎加载成功: {engine_path}")
self._logger.info(f" 输入: {len(self._inputs)}, 输出: {len(self._outputs)}, batch={self.BATCH_SIZE}") self._logger.info(f" 输入: {len(self._inputs)}, 输出: {len(self._outputs)}, batch=1~{self.MAX_BATCH_SIZE}")
return True return True
@@ -153,7 +155,7 @@ class TensorRTEngine:
dtype = trt.nptype(self._engine.get_binding_dtype(binding_idx)) dtype = trt.nptype(self._engine.get_binding_dtype(binding_idx))
if shape[0] == -1: if shape[0] == -1:
shape[0] = self.BATCH_SIZE shape[0] = self.MAX_BATCH_SIZE
shape = tuple(max(1, s) if s < 0 else s for s in shape) shape = tuple(max(1, s) if s < 0 else s for s in shape)
size = trt.volume(shape) size = trt.volume(shape)
@@ -181,65 +183,74 @@ class TensorRTEngine:
def infer(self, input_batch: np.ndarray) -> Tuple[List[np.ndarray], float]: def infer(self, input_batch: np.ndarray) -> Tuple[List[np.ndarray], float]:
""" """
执行推理(工业级 async 模式) 执行推理(工业级 async 模式,线程安全
Args: Args:
input_batch: numpy 输入shape = [batch, 3, 480, 480]dtype = np.float16 input_batch: numpy 输入shape = [batch, 3, 480, 480]dtype = np.float16
batch 可以是 1~MAX_BATCH_SIZE 的任意值
Returns: Returns:
tuple: (输出列表, 推理耗时ms) tuple: (输出列表, 推理耗时ms)
""" """
if self._engine is None or self._context is None: if self._engine is None or self._context is None:
raise RuntimeError("引擎未加载") raise RuntimeError("引擎未加载")
if len(self._inputs) == 0: if len(self._inputs) == 0:
raise RuntimeError("未分配输入 buffer") raise RuntimeError("未分配输入 buffer")
batch_size = input_batch.shape[0] batch_size = input_batch.shape[0]
start_time = time.perf_counter() with self._lock:
start_time = time.perf_counter()
self._cuda_context.push()
self._cuda_context.push()
try:
input_batch = np.ascontiguousarray(input_batch) try:
input_batch = np.ascontiguousarray(input_batch)
input_name = self._engine.get_binding_name(0)
actual_shape = list(input_batch.shape) input_name = self._engine.get_binding_name(0)
self._context.set_input_shape(input_name, actual_shape) actual_shape = list(input_batch.shape)
self._context.set_input_shape(input_name, actual_shape)
np.copyto(self._inputs[0].host, input_batch.ravel())
np.copyto(self._inputs[0].host[:input_batch.size], input_batch.ravel())
cuda.memcpy_htod_async(
self._inputs[0].device, cuda.memcpy_htod_async(
self._inputs[0].host, self._inputs[0].device,
self._stream self._inputs[0].host,
)
self._context.execute_async_v2(
bindings=self._bindings,
stream_handle=self._stream.handle
)
results = []
for out in self._outputs:
cuda.memcpy_dtoh_async(
out.host,
out.device,
self._stream self._stream
) )
results.append(out.host.copy())
self._context.execute_async_v2(
self._stream.synchronize() bindings=self._bindings,
stream_handle=self._stream.handle
inference_time_ms = (time.perf_counter() - start_time) * 1000 )
self._update_performance_stats(inference_time_ms, self.BATCH_SIZE) results = []
for out in self._outputs:
return results, inference_time_ms cuda.memcpy_dtoh_async(
out.host,
finally: out.device,
self._cuda_context.pop() self._stream
)
self._stream.synchronize()
# 根据实际 batch_size 裁剪输出
for out in self._outputs:
output_data = out.host.copy()
# 输出 shape 需按 batch_size 重新划分
per_batch_size = len(output_data) // self.MAX_BATCH_SIZE
actual_size = per_batch_size * batch_size
results.append(output_data[:actual_size])
inference_time_ms = (time.perf_counter() - start_time) * 1000
self._update_performance_stats(inference_time_ms, batch_size)
return results, inference_time_ms
finally:
self._cuda_context.pop()
def _update_performance_stats(self, inference_time_ms: float, batch_size: int): def _update_performance_stats(self, inference_time_ms: float, batch_size: int):
"""更新性能统计""" """更新性能统计"""

View File

@@ -174,25 +174,35 @@ class RTSPStreamReader:
) )
def _read_frame(self): def _read_frame(self):
"""读取帧线程函数""" """读取帧线程函数(带帧率控制)"""
frame_interval = 1.0 / self.target_fps
last_process_time = 0.0
while not self._stop_event.is_set(): while not self._stop_event.is_set():
if not self._connected: if not self._connected:
if not self._reconnect(): if not self._reconnect():
time.sleep(1) time.sleep(1)
continue continue
try: try:
ret, frame = self._cap.read() ret, frame = self._cap.read()
if not ret or frame is None: if not ret or frame is None:
self._logger.warning(f"读取帧失败: {self.camera_id}") self._logger.warning(f"读取帧失败: {self.camera_id}")
self._connected = False self._connected = False
continue continue
self._frame_count += 1 self._frame_count += 1
current_time_mono = time.monotonic()
# 帧率控制:跳过不满足间隔的帧
if (current_time_mono - last_process_time) < frame_interval:
continue
last_process_time = current_time_mono
current_time = datetime.now() current_time = datetime.now()
self._last_frame_time = current_time self._last_frame_time = current_time
frame_obj = VideoFrame( frame_obj = VideoFrame(
frame_id=generate_unique_id("frame"), frame_id=generate_unique_id("frame"),
camera_id=self.camera_id, camera_id=self.camera_id,
@@ -203,22 +213,22 @@ class RTSPStreamReader:
height=frame.shape[0], height=frame.shape[0],
fps=self.target_fps fps=self.target_fps
) )
try: try:
if self._frame_buffer.full(): if self._frame_buffer.full():
try: try:
self._frame_buffer.get_nowait() self._frame_buffer.get_nowait()
except queue.Empty: except queue.Empty:
pass pass
self._frame_buffer.put_nowait(frame_obj) self._frame_buffer.put_nowait(frame_obj)
if self._on_frame_callback: if self._on_frame_callback:
self._on_frame_callback(frame_obj) self._on_frame_callback(frame_obj)
except queue.Full: except queue.Full:
self._logger.debug(f"帧缓冲区已满: {self.camera_id}") self._logger.debug(f"帧缓冲区已满: {self.camera_id}")
except Exception as e: except Exception as e:
self._logger.error(f"读取帧异常: {e}") self._logger.error(f"读取帧异常: {e}")
self._connected = False self._connected = False

105
main.py
View File

@@ -59,7 +59,11 @@ class EdgeInferenceService:
self._batch_roi_queue: List[tuple] = [] self._batch_roi_queue: List[tuple] = []
self._batch_lock = threading.Lock() self._batch_lock = threading.Lock()
self._batch_event = threading.Event()
self._inference_thread: Optional[threading.Thread] = None
self._max_batch_size = 8
self._batch_timeout_sec = 0.05 # 50ms 攒批窗口
self._logger.info("Edge_Inference_Service 初始化开始") self._logger.info("Edge_Inference_Service 初始化开始")
def _init_database(self): def _init_database(self):
@@ -205,59 +209,59 @@ class EdgeInferenceService:
if not roi_items: if not roi_items:
return return
with self._batch_lock: with self._batch_lock:
self._batch_roi_queue.extend(roi_items) self._batch_roi_queue.extend(roi_items)
batch_size = len(self._batch_roi_queue) # 通知推理线程有新数据
if batch_size >= 1: self._batch_event.set()
self._batch_process_rois()
self._performance_stats["total_frames_processed"] += 1 self._performance_stats["total_frames_processed"] += 1
except Exception as e: except Exception as e:
self._logger.error(f"处理帧失败 {camera_id}: {e}") self._logger.error(f"处理帧失败 {camera_id}: {e}")
def _batch_process_rois(self): def _batch_process_rois(self):
"""批量处理 ROI - 预处理、推理、后处""" """批量处理 ROI - 真正的 batch 推"""
with self._batch_lock: with self._batch_lock:
roi_items = self._batch_roi_queue roi_items = self._batch_roi_queue
if not roi_items: if not roi_items:
return return
self._batch_roi_queue = [] self._batch_roi_queue = []
batch_size = len(roi_items)
try: try:
images = [item[4] for item in roi_items] images = [item[4] for item in roi_items]
scale_infos = [item[5] for item in roi_items] scale_infos = [item[5] for item in roi_items]
# 真正的 batch: 将所有 ROI 裁剪图拼成 [N,3,H,W] 一次推理
batch_data, _ = self._preprocessor._batch_preprocessor.preprocess_batch( batch_data, _ = self._preprocessor._batch_preprocessor.preprocess_batch(
images images
) )
engine = self._engine_manager.get_engine("default") engine = self._engine_manager.get_engine("default")
if engine is None: if engine is None:
return return
# 一次性推理整个 batch
outputs, inference_time_ms = engine.infer(batch_data) outputs, inference_time_ms = engine.infer(batch_data)
batch_size = len(roi_items)
batch_results = self._postprocessor.batch_process_detections( batch_results = self._postprocessor.batch_process_detections(
outputs, outputs,
batch_size, batch_size,
conf_threshold=self._settings.inference.conf_threshold conf_threshold=self._settings.inference.conf_threshold
) )
for idx, (camera_id, roi, bind, frame, _, scale_info) in enumerate(roi_items): for idx, (camera_id, roi, bind, frame, _, scale_info) in enumerate(roi_items):
boxes, scores, class_ids = batch_results[idx] boxes, scores, class_ids = batch_results[idx]
if len(boxes) > 0: if len(boxes) > 0:
self._handle_detections( self._handle_detections(
camera_id, roi, bind, frame, camera_id, roi, bind, frame,
boxes, scores, class_ids, boxes, scores, class_ids,
scale_info scale_info
) )
except Exception as e: except Exception as e:
self._logger.error(f"批量处理 ROI 失败: {e}") self._logger.error(f"批量处理 ROI 失败: {e}")
@@ -273,11 +277,20 @@ class EdgeInferenceService:
class_ids: any, class_ids: any,
scale_info: tuple scale_info: tuple
) -> list: ) -> list:
"""将检测结果转换为算法所需的 tracks 格式""" """将检测结果转换为算法所需的 tracks 格式
坐标从 letterbox 空间还原到 ROI 裁剪空间
"""
tracks = [] tracks = []
class_names = getattr(self._settings, 'class_names', ['person']) class_names = getattr(self._settings, 'class_names', ['person'])
for i, box in enumerate(boxes): # 将 letterbox 坐标还原到 ROI 裁剪空间
reverted_boxes = self._preprocessor.revert_boxes(
[box.tolist() if hasattr(box, 'tolist') else list(box) for box in boxes],
scale_info
)
for i, box in enumerate(reverted_boxes):
class_id = int(class_ids[i]) if class_ids[i] else 0 class_id = int(class_ids[i]) if class_ids[i] else 0
track = { track = {
"track_id": f"{roi.roi_id}_{i}", "track_id": f"{roi.roi_id}_{i}",
@@ -287,7 +300,7 @@ class EdgeInferenceService:
"matched_rois": [{"roi_id": roi.roi_id}], "matched_rois": [{"roi_id": roi.roi_id}],
} }
tracks.append(track) tracks.append(track)
return tracks return tracks
def _handle_detections( def _handle_detections(
@@ -351,16 +364,37 @@ class EdgeInferenceService:
) )
self._reporter.report_alert(alert_info, screenshot=frame.image) self._reporter.report_alert(alert_info, screenshot=frame.image)
self._logger.log_alert( self._logger.info(
alert.get("alert_type", "detection"), f"告警已生成: type={alert.get('alert_type', 'detection')}, "
camera_id, f"camera={camera_id}, roi={roi_id}, "
roi_id, f"confidence={alert.get('confidence', 1.0)}"
alert.get("confidence", 1.0)
) )
except Exception as e: except Exception as e:
self._logger.error(f"处理检测结果失败: {e}") self._logger.error(f"处理检测结果失败: {e}")
def _inference_worker(self):
"""推理线程:攒批窗口内收集 ROI 请求,批量推理"""
while not self._stop_event.is_set():
# 等待有新数据到达或超时
self._batch_event.wait(timeout=self._batch_timeout_sec)
self._batch_event.clear()
with self._batch_lock:
queue_size = len(self._batch_roi_queue)
# 攒批窗口:等到攒够 max_batch 或超时后处理
if queue_size > 0 and queue_size < self._max_batch_size:
# 再等一小段时间凑更多
self._batch_event.wait(timeout=self._batch_timeout_sec)
self._batch_event.clear()
with self._batch_lock:
if not self._batch_roi_queue:
continue
self._batch_process_rois()
def start(self): def start(self):
"""启动服务""" """启动服务"""
if self._running: if self._running:
@@ -370,7 +404,16 @@ class EdgeInferenceService:
self._stop_event.clear() self._stop_event.clear()
self._load_cameras() self._load_cameras()
# 启动独立推理线程(生产者-消费者模式)
self._inference_thread = threading.Thread(
target=self._inference_worker,
name="InferenceWorker",
daemon=True
)
self._inference_thread.start()
self._logger.info("推理线程已启动")
self._stream_manager.start_all() self._stream_manager.start_all()
self._logger.info("Edge_Inference_Service 已启动") self._logger.info("Edge_Inference_Service 已启动")
@@ -429,7 +472,11 @@ class EdgeInferenceService:
self._running = False self._running = False
self._stop_event.set() self._stop_event.set()
self._batch_event.set() # 唤醒推理线程以退出
if self._inference_thread and self._inference_thread.is_alive():
self._inference_thread.join(timeout=5)
if self._stream_manager: if self._stream_manager:
self._stream_manager.stop_all() self._stream_manager.stop_all()
self._stream_manager.close() self._stream_manager.close()

View File

@@ -30,21 +30,22 @@ class TestNMSProcessor(unittest.TestCase):
self.assertEqual(len(keep_boxes), 1) self.assertEqual(len(keep_boxes), 1)
def test_nms_multiple_boxes(self): def test_nms_multiple_boxes(self):
"""测试多个检测框""" """测试多个检测框高IoU重叠框应被抑制"""
from core.postprocessor import NMSProcessor from core.postprocessor import NMSProcessor
nms = NMSProcessor(nms_threshold=0.45) nms = NMSProcessor(nms_threshold=0.45)
# box1 和 box2 高度重叠 (IoU > 0.45)box3 独立
boxes = np.array([ boxes = np.array([
[100, 100, 200, 200], [100, 100, 200, 200],
[150, 150, 250, 250], [110, 110, 210, 210],
[300, 300, 400, 400] [300, 300, 400, 400]
]) ])
scores = np.array([0.9, 0.85, 0.8]) scores = np.array([0.9, 0.85, 0.8])
class_ids = np.array([0, 0, 0]) class_ids = np.array([0, 0, 0])
keep_boxes, keep_scores, keep_classes = nms.process(boxes, scores, class_ids) keep_boxes, keep_scores, keep_classes = nms.process(boxes, scores, class_ids)
self.assertLessEqual(len(keep_boxes), 2) self.assertLessEqual(len(keep_boxes), 2)
def test_nms_empty_boxes(self): def test_nms_empty_boxes(self):

View File

@@ -130,41 +130,36 @@ class TestLetterboxPreprocessor(unittest.TestCase):
class TestBatchPreprocessor(unittest.TestCase): class TestBatchPreprocessor(unittest.TestCase):
"""测试Batch预处理器""" """测试Batch预处理器"""
def test_preprocess_batch(self): def test_preprocess_batch(self):
"""测试批次预处理""" """测试批次预处理"""
from core.preprocessor import BatchPreprocessor from core.preprocessor import BatchPreprocessor
preprocessor = BatchPreprocessor( preprocessor = BatchPreprocessor(
target_size=(480, 480), target_size=(480, 480),
max_batch_size=4,
fp16_mode=True fp16_mode=True
) )
images = [ images = [
np.zeros((640, 640, 3), dtype=np.uint8) np.zeros((640, 640, 3), dtype=np.uint8)
for _ in range(2) for _ in range(2)
] ]
result, scale_info_list = preprocessor.preprocess_batch(images) result, scale_info_list = preprocessor.preprocess_batch(images)
self.assertEqual(result.shape[0], 2) self.assertEqual(result.shape[0], 2)
self.assertEqual(len(scale_info_list), 2) self.assertEqual(len(scale_info_list), 2)
def test_memory_allocation(self): def test_max_batch_size(self):
"""测试内存分配""" """测试最大batch大小"""
from core.preprocessor import BatchPreprocessor from core.preprocessor import BatchPreprocessor
preprocessor = BatchPreprocessor( preprocessor = BatchPreprocessor(
target_size=(480, 480), target_size=(480, 480),
max_batch_size=4,
fp16_mode=True fp16_mode=True
) )
mem = preprocessor.allocate_batch_memory(2) self.assertEqual(preprocessor.max_batch_size, 8)
self.assertEqual(mem.shape[0], 2)
self.assertEqual(mem.dtype, np.float16)
class TestImagePreprocessor(unittest.TestCase): class TestImagePreprocessor(unittest.TestCase):
@@ -200,12 +195,12 @@ class TestImagePreprocessor(unittest.TestCase):
def test_get_statistics(self): def test_get_statistics(self):
"""测试获取统计""" """测试获取统计"""
from core.preprocessor import ImagePreprocessor from core.preprocessor import ImagePreprocessor
preprocessor = ImagePreprocessor() preprocessor = ImagePreprocessor()
stats = preprocessor.get_statistics() stats = preprocessor.get_statistics()
self.assertIn("config", stats) self.assertIn("config", stats)
self.assertIn("memory", stats) self.assertIn("batch_size", stats["config"])
if __name__ == "__main__": if __name__ == "__main__":