feat: 告警HTTP上报 + 日志精简 + 边缘节点统一为edge

- 新增 alarm_upload_worker.py 异步告警上报(COS+HTTP)
- result_reporter 重构为Redis队列模式
- config_sync 适配WVP直推的聚合配置格式
- settings 默认 EDGE_DEVICE_ID 改为 edge
- 日志设置非告警模块为WARNING级别减少噪音
- main.py 集成新的告警上报流程

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-10 15:21:45 +08:00
parent 93a2278626
commit 0191e498f1
6 changed files with 1279 additions and 1075 deletions

116
main.py
View File

@@ -19,6 +19,7 @@ from core.preprocessor import ImagePreprocessor
from core.tensorrt_engine import TensorRTEngine, EngineManager
from core.postprocessor import PostProcessor
from core.result_reporter import ResultReporter
from core.alarm_upload_worker import AlarmUploadWorker
from algorithms import AlgorithmManager
from utils.logger import get_logger, StructuredLogger
from utils.version_control import get_version_control
@@ -45,6 +46,7 @@ class EdgeInferenceService:
self._engine_manager: Optional[EngineManager] = None
self._postprocessor: Optional[PostProcessor] = None
self._reporter: Optional[ResultReporter] = None
self._alarm_worker: Optional[AlarmUploadWorker] = None
self._algorithm_manager: Optional[AlgorithmManager] = None
self._processing_threads: Dict[str, threading.Thread] = {}
@@ -129,12 +131,22 @@ class EdgeInferenceService:
try:
self._reporter = ResultReporter()
self._logger.info("ResultReporter 对象已创建,准备初始化...")
self._reporter.initialize() # 初始化存储和MQTT连接
self._reporter.initialize() # 初始化 Redis 连接和本地存储
self._logger.info("结果上报器初始化成功")
except Exception as e:
self._logger.error(f"结果上报器初始化失败: {e}")
import traceback
self._logger.error(traceback.format_exc())
# 启动告警上报 Worker
try:
self._alarm_worker = AlarmUploadWorker()
self._alarm_worker.start()
self._logger.info("告警上报 Worker 启动成功")
except Exception as e:
self._logger.error(f"告警上报 Worker 启动失败: {e}")
import traceback
self._logger.error(traceback.format_exc())
def _init_algorithm_manager(self):
"""初始化算法管理器"""
@@ -199,9 +211,9 @@ class EdgeInferenceService:
try:
roi_configs = self._config_manager.get_roi_configs_with_bindings(camera_id)
# 每100帧打印一次状态
# 每100帧打印一次状态(非告警诊断日志,使用 DEBUG 级别)
if self._performance_stats["total_frames_processed"] % 100 == 0:
self._logger.info(f"[{camera_id}] 已处理 {self._performance_stats['total_frames_processed']} 帧, ROI数: {len(roi_configs)}")
self._logger.debug(f"[{camera_id}] 已处理 {self._performance_stats['total_frames_processed']} 帧, ROI数: {len(roi_configs)}")
roi_items = []
for roi in roi_configs:
@@ -260,13 +272,13 @@ class EdgeInferenceService:
# 一次性推理整个 batch
outputs, inference_time_ms = engine.infer(batch_data)
# 诊断:输出原始推理结果形状
# 诊断:输出原始推理结果形状(非告警诊断日志,使用 DEBUG 级别)
import numpy as np
if isinstance(outputs, np.ndarray):
self._logger.info(f"[推理诊断] batch_data shape={batch_data.shape}, output shape={outputs.shape}, 耗时={inference_time_ms:.1f}ms")
self._logger.debug(f"[推理诊断] batch_data shape={batch_data.shape}, output shape={outputs.shape}, 耗时={inference_time_ms:.1f}ms")
elif isinstance(outputs, (list, tuple)):
shapes = [o.shape if hasattr(o, 'shape') else type(o) for o in outputs]
self._logger.info(f"[推理诊断] batch_data shape={batch_data.shape}, outputs={shapes}, 耗时={inference_time_ms:.1f}ms")
self._logger.debug(f"[推理诊断] batch_data shape={batch_data.shape}, outputs={shapes}, 耗时={inference_time_ms:.1f}ms")
batch_size = len(roi_items)
batch_results = self._postprocessor.batch_process_detections(
@@ -276,7 +288,7 @@ class EdgeInferenceService:
)
total_detections = sum(len(r[0]) for r in batch_results)
self._logger.info(f"[推理] batch_size={batch_size}, 总检测数={total_detections}, conf_thresh={self._settings.inference.conf_threshold}")
self._logger.debug(f"[推理] batch_size={batch_size}, 总检测数={total_detections}, conf_thresh={self._settings.inference.conf_threshold}")
for idx, (camera_id, roi, bind, frame, _, scale_info) in enumerate(roi_items):
boxes, scores, class_ids = batch_results[idx]
@@ -350,9 +362,9 @@ class EdgeInferenceService:
algo_code = bind.algo_code
algo_params = bind.params or {}
# 诊断日志:检测到目标(使用 INFO 级别确保能看到
# 诊断日志:检测到目标(非告警诊断日志,使用 DEBUG 级别
if len(boxes) > 0:
self._logger.info(f"[{camera_id}] ROI={roi_id[:8]} 检测到 {len(boxes)} 个目标, algo={algo_code}")
self._logger.debug(f"[{camera_id}] ROI={roi_id[:8]} 检测到 {len(boxes)} 个目标, algo={algo_code}")
self._algorithm_manager.register_algorithm(
roi_id=roi_id,
@@ -366,8 +378,8 @@ class EdgeInferenceService:
if not tracks:
return
# 诊断日志tracks 内容(INFO 级别)
self._logger.info(f"[{camera_id}] tracks: {[t.get('class') for t in tracks]}, target_class={bind.target_class}")
# 诊断日志tracks 内容(非告警诊断日志,使用 DEBUG 级别)
self._logger.debug(f"[{camera_id}] tracks: {[t.get('class') for t in tracks]}, target_class={bind.target_class}")
alerts = self._algorithm_manager.process(
roi_id=roi_id,
@@ -382,9 +394,9 @@ class EdgeInferenceService:
if alerts:
self._logger.info(f"[{camera_id}] 算法 {algo_code} 返回 {len(alerts)} 个告警")
else:
# 获取算法状态用于诊断
# 获取算法状态用于诊断(非告警诊断日志,使用 DEBUG 级别)
algo_status = self._algorithm_manager.get_status(roi_id)
self._logger.info(f"[{camera_id}] 算法 {algo_code} 无告警, 状态: {algo_status}")
self._logger.debug(f"[{camera_id}] 算法 {algo_code} 无告警, 状态: {algo_status}")
for alert in alerts:
alert_type = alert.get("alert_type", "detection")
@@ -396,7 +408,7 @@ class EdgeInferenceService:
if last_alert_time is not None:
elapsed = (now - last_alert_time).total_seconds()
if elapsed < self._camera_cooldown_seconds:
self._logger.info(
self._logger.debug(
f"[去重] 跳过告警: camera={camera_id}, type={alert_type}, "
f"roi={roi_id}, 距上次={elapsed:.1f}s < {self._camera_cooldown_seconds}s"
)
@@ -405,23 +417,27 @@ class EdgeInferenceService:
self._camera_alert_cooldown[dedup_key] = now
self._performance_stats["total_alerts_generated"] += 1
from core.result_reporter import AlertInfo
alert_info = AlertInfo(
alert_id=f"{roi_id}_{bind.bind_id}_{int(frame.timestamp.timestamp())}",
camera_id=camera_id,
roi_id=roi_id,
bind_id=bind.bind_id,
device_id=self._settings.mqtt.device_id,
alert_type=alert_type,
algorithm=algo_code,
target_class=alert.get("class", bind.target_class or "unknown"),
confidence=alert.get("confidence", 1.0),
bbox=alert.get("bbox", []),
message=alert.get("message", ""),
timestamp=frame.timestamp,
duration_minutes=alert.get("duration_minutes"),
from core.result_reporter import AlarmInfo, generate_alarm_id
alarm_info = AlarmInfo(
alarm_id=generate_alarm_id(self._settings.mqtt.device_id),
alarm_type=alert_type,
device_id=camera_id,
scene_id=roi_id,
event_time=frame.timestamp.isoformat(),
alarm_level=alert.get("alarm_level", 2),
algorithm_code=algo_code,
confidence_score=alert.get("confidence", 1.0),
ext_data={
"duration_ms": int(alert.get("duration_minutes", 0) * 60 * 1000) if alert.get("duration_minutes") else None,
"roi_id": roi_id,
"bbox": alert.get("bbox", []),
"target_class": alert.get("class", bind.target_class or "unknown"),
"bind_id": bind.bind_id,
"message": alert.get("message", ""),
"edge_node_id": self._settings.mqtt.device_id,
},
)
self._reporter.report_alert(alert_info, screenshot=frame.image)
self._reporter.report_alarm(alarm_info, screenshot=frame.image)
self._logger.info(
f"告警已生成: type={alert_type}, "
@@ -474,42 +490,13 @@ class EdgeInferenceService:
self._logger.info("推理线程已启动")
self._stream_manager.start_all()
self._logger.info("Edge_Inference_Service 已启动")
self._start_heartbeat_thread()
self._register_signal_handlers()
self._wait_for_shutdown()
def _start_heartbeat_thread(self):
"""启动心跳线程"""
def heartbeat():
while not self._stop_event.is_set():
try:
uptime = (datetime.now() - self._performance_stats["start_time"]).total_seconds()
self._performance_stats["uptime_seconds"] = uptime
status = {
"running": True,
"uptime_seconds": uptime,
"frames_processed": self._performance_stats["total_frames_processed"],
"alerts_generated": self._performance_stats["total_alerts_generated"],
"stream_stats": self._stream_manager.get_statistics() if self._stream_manager else {},
}
if self._reporter:
self._reporter.report_heartbeat(self._settings.mqtt.device_id, status)
except Exception as e:
self._logger.error(f"心跳上报失败: {e}")
time.sleep(30)
thread = threading.Thread(target=heartbeat, name="Heartbeat", daemon=True)
thread.start()
def _register_signal_handlers(self):
"""注册信号处理器"""
def handle_signal(signum, frame):
@@ -549,7 +536,10 @@ class EdgeInferenceService:
if self._algorithm_manager:
self._algorithm_manager.stop_config_subscription()
if self._alarm_worker:
self._alarm_worker.stop()
if self._reporter:
self._reporter.close()