fix: 修复10个关键bug提升系统稳定性和性能

1. YOLO11输出解析错误: 移除不存在的objectness行,正确使用class_scores.max()
2. CPU NMS逻辑错误: keep_mask同时标记保留和抑制框导致NMS失效,改用独立suppressed集合
3. 坐标映射缺失: _build_tracks中scale_info未使用,添加revert_boxes还原到ROI裁剪空间
4. batch=1限制: 恢复真正的动态batch推理(1~8),BatchPreprocessor支持多图stack
5. 帧率控制缺失: _read_frame添加time.monotonic()间隔控制,按target_fps跳帧
6. 拉流推理耦合: 新增独立推理线程(InferenceWorker),生产者-消费者模式解耦
7. 攒批形同虚设: 添加50ms攒批窗口+max_batch阈值,替代>=1立即处理
8. LeavePost双重等待: LEAVING确认后直接触发告警,不再进入OFF_DUTY二次等待
9. register_algorithm每帧调用: 添加_registered_keys缓存,O(1)快速路径跳过
10. GPU context线程安全: TensorRT infer()内部加锁,防止多线程CUDA context竞争

附带修复:
- reset_algorithm中未定义algorithm_type变量(NameError)
- update_roi_params中循环变量key覆盖外层key
- AlertInfo缺少bind_id字段(TypeError)
- _logger.log_alert在标准logger上不存在(AttributeError)
- AlarmStateMachine死锁(Lock改为RLock)
- ROICropper.create_mask坐标解析错误
- 更新测试用例适配新API

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-02-04 16:47:26 +08:00
parent fa0304aa47
commit 98595402c6
9 changed files with 352 additions and 234 deletions

View File

@@ -104,55 +104,55 @@ class NMSProcessor:
) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
"""CPU 向量化 NMS"""
order = np.argsort(scores)[::-1]
keep_mask = np.zeros(len(boxes), dtype=bool)
i = 0
while i < len(order) and i < max_output_size:
keep = []
suppressed = np.zeros(len(boxes), dtype=bool)
for i in range(len(order)):
if len(keep) >= max_output_size:
break
idx = order[i]
if keep_mask[idx]:
i += 1
if suppressed[idx]:
continue
keep_mask[idx] = True
keep.append(idx)
remaining = order[i + 1:]
if len(remaining) == 0:
break
remaining_mask = ~keep_mask[remaining]
if not np.any(remaining_mask):
remaining = remaining[~suppressed[remaining]]
if len(remaining) == 0:
break
remaining = remaining[remaining_mask]
xx1 = np.maximum(boxes[idx, 0], boxes[remaining, 0])
yy1 = np.maximum(boxes[idx, 1], boxes[remaining, 1])
xx2 = np.minimum(boxes[idx, 2], boxes[remaining, 2])
yy2 = np.minimum(boxes[idx, 3], boxes[remaining, 3])
w = np.maximum(0.0, xx2 - xx1 + 1)
h = np.maximum(0.0, yy2 - yy1 + 1)
inter = w * h
areas = (boxes[:, 2] - boxes[:, 0] + 1) * (boxes[:, 3] - boxes[:, 1] + 1)
ovr = inter / (areas[idx] + areas[remaining] - inter + 1e-6)
suppress_mask = ovr > self.nms_threshold
for j in np.where(suppress_mask)[0]:
keep_mask[remaining[j]] = True
i += 1
keep_indices = np.where(keep_mask)[0]
if len(keep_indices) > max_output_size:
top_k = np.argsort(scores[keep_indices])[::-1][:max_output_size]
keep_indices = keep_indices[top_k]
suppressed[remaining[suppress_mask]] = True
keep_indices = np.array(keep, dtype=np.int32) if keep else np.array([], dtype=np.int32)
if len(keep_indices) == 0:
return (
np.array([], dtype=np.int32),
np.array([]),
np.array([])
)
return (
keep_indices.astype(np.int32),
keep_indices,
scores[keep_indices],
class_ids[keep_indices] if class_ids is not None else np.array([])
)
@@ -448,7 +448,7 @@ class AlarmStateMachine:
self.alert_cooldown = alert_cooldown
self._states: Dict[str, AlarmState] = {}
self._lock = threading.Lock()
self._lock = threading.RLock()
self._logger = get_logger("postprocessor")
@@ -513,9 +513,9 @@ class AlarmStateMachine:
self._logger.log_alert(
"detection_threshold_reached",
roi_id=roi_id,
camera_id="",
confidence=None
"",
roi_id,
None
)
return {
@@ -669,32 +669,33 @@ class PostProcessor:
return np.array([]), np.array([]), np.array([])
boxes_xywh = output[0:4, :].T
obj_conf = output[4, :]
person_scores = output[5, :]
scores = obj_conf * person_scores
# YOLO11 输出格式: [4+num_classes, 8400]
# 前4行是 xywh后80行是各类别分数没有单独的 objectness 行
class_scores = output[4:, :] # [num_classes, 8400]
scores = class_scores.max(axis=0) # 取各类别最大分数
class_ids = class_scores.argmax(axis=0) # 对应类别ID
coarse_mask = scores > prefilter_threshold
if not np.any(coarse_mask):
return np.array([]), np.array([]), np.array([])
boxes = boxes_xywh[coarse_mask]
scores_coarse = scores[coarse_mask]
class_ids_filtered = class_ids[coarse_mask]
valid_count = len(boxes)
np.copyto(self._buffer_boxes_xywh[:valid_count], boxes)
self._buffer_xyxy[:valid_count, 0] = boxes[:, 0] - boxes[:, 2] / 2
self._buffer_xyxy[:valid_count, 1] = boxes[:, 1] - boxes[:, 3] / 2
self._buffer_xyxy[:valid_count, 2] = boxes[:, 0] + boxes[:, 2] / 2
self._buffer_xyxy[:valid_count, 3] = boxes[:, 1] + boxes[:, 3] / 2
self._buffer_class_ids[:valid_count] = 0
self._buffer_class_ids[:valid_count] = class_ids_filtered
return (
self._buffer_xyxy[:valid_count].copy(),
scores_coarse.astype(np.float32),
@@ -762,9 +763,10 @@ class PostProcessor:
continue
boxes_xywh = output[0:4, :].T
obj_conf = output[4, :]
person_scores = output[5, :]
scores = obj_conf * person_scores
# YOLO11: 无 objectness直接取各类别最大分数
class_scores = output[4:, :]
scores = class_scores.max(axis=0)
class_ids_raw = class_scores.argmax(axis=0)
coarse_mask = scores > 0.3
if not np.any(coarse_mask):
@@ -773,7 +775,7 @@ class PostProcessor:
boxes = boxes_xywh[coarse_mask]
scores_coarse = scores[coarse_mask]
class_ids = np.zeros(len(boxes), dtype=np.int32)
class_ids = class_ids_raw[coarse_mask].astype(np.int32)
valid_count = len(boxes)