feat: 告警HTTP上报 + 日志精简 + 边缘节点统一为edge
- 新增 alarm_upload_worker.py 异步告警上报(COS+HTTP) - result_reporter 重构为Redis队列模式 - config_sync 适配WVP直推的聚合配置格式 - settings 默认 EDGE_DEVICE_ID 改为 edge - 日志设置非告警模块为WARNING级别减少噪音 - main.py 集成新的告警上报流程 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
116
main.py
116
main.py
@@ -19,6 +19,7 @@ from core.preprocessor import ImagePreprocessor
|
||||
from core.tensorrt_engine import TensorRTEngine, EngineManager
|
||||
from core.postprocessor import PostProcessor
|
||||
from core.result_reporter import ResultReporter
|
||||
from core.alarm_upload_worker import AlarmUploadWorker
|
||||
from algorithms import AlgorithmManager
|
||||
from utils.logger import get_logger, StructuredLogger
|
||||
from utils.version_control import get_version_control
|
||||
@@ -45,6 +46,7 @@ class EdgeInferenceService:
|
||||
self._engine_manager: Optional[EngineManager] = None
|
||||
self._postprocessor: Optional[PostProcessor] = None
|
||||
self._reporter: Optional[ResultReporter] = None
|
||||
self._alarm_worker: Optional[AlarmUploadWorker] = None
|
||||
self._algorithm_manager: Optional[AlgorithmManager] = None
|
||||
|
||||
self._processing_threads: Dict[str, threading.Thread] = {}
|
||||
@@ -129,12 +131,22 @@ class EdgeInferenceService:
|
||||
try:
|
||||
self._reporter = ResultReporter()
|
||||
self._logger.info("ResultReporter 对象已创建,准备初始化...")
|
||||
self._reporter.initialize() # 初始化存储和MQTT连接
|
||||
self._reporter.initialize() # 初始化 Redis 连接和本地存储
|
||||
self._logger.info("结果上报器初始化成功")
|
||||
except Exception as e:
|
||||
self._logger.error(f"结果上报器初始化失败: {e}")
|
||||
import traceback
|
||||
self._logger.error(traceback.format_exc())
|
||||
|
||||
# 启动告警上报 Worker
|
||||
try:
|
||||
self._alarm_worker = AlarmUploadWorker()
|
||||
self._alarm_worker.start()
|
||||
self._logger.info("告警上报 Worker 启动成功")
|
||||
except Exception as e:
|
||||
self._logger.error(f"告警上报 Worker 启动失败: {e}")
|
||||
import traceback
|
||||
self._logger.error(traceback.format_exc())
|
||||
|
||||
def _init_algorithm_manager(self):
|
||||
"""初始化算法管理器"""
|
||||
@@ -199,9 +211,9 @@ class EdgeInferenceService:
|
||||
try:
|
||||
roi_configs = self._config_manager.get_roi_configs_with_bindings(camera_id)
|
||||
|
||||
# 每100帧打印一次状态
|
||||
# 每100帧打印一次状态(非告警诊断日志,使用 DEBUG 级别)
|
||||
if self._performance_stats["total_frames_processed"] % 100 == 0:
|
||||
self._logger.info(f"[{camera_id}] 已处理 {self._performance_stats['total_frames_processed']} 帧, ROI数: {len(roi_configs)}")
|
||||
self._logger.debug(f"[{camera_id}] 已处理 {self._performance_stats['total_frames_processed']} 帧, ROI数: {len(roi_configs)}")
|
||||
|
||||
roi_items = []
|
||||
for roi in roi_configs:
|
||||
@@ -260,13 +272,13 @@ class EdgeInferenceService:
|
||||
# 一次性推理整个 batch
|
||||
outputs, inference_time_ms = engine.infer(batch_data)
|
||||
|
||||
# 诊断:输出原始推理结果形状
|
||||
# 诊断:输出原始推理结果形状(非告警诊断日志,使用 DEBUG 级别)
|
||||
import numpy as np
|
||||
if isinstance(outputs, np.ndarray):
|
||||
self._logger.info(f"[推理诊断] batch_data shape={batch_data.shape}, output shape={outputs.shape}, 耗时={inference_time_ms:.1f}ms")
|
||||
self._logger.debug(f"[推理诊断] batch_data shape={batch_data.shape}, output shape={outputs.shape}, 耗时={inference_time_ms:.1f}ms")
|
||||
elif isinstance(outputs, (list, tuple)):
|
||||
shapes = [o.shape if hasattr(o, 'shape') else type(o) for o in outputs]
|
||||
self._logger.info(f"[推理诊断] batch_data shape={batch_data.shape}, outputs={shapes}, 耗时={inference_time_ms:.1f}ms")
|
||||
self._logger.debug(f"[推理诊断] batch_data shape={batch_data.shape}, outputs={shapes}, 耗时={inference_time_ms:.1f}ms")
|
||||
|
||||
batch_size = len(roi_items)
|
||||
batch_results = self._postprocessor.batch_process_detections(
|
||||
@@ -276,7 +288,7 @@ class EdgeInferenceService:
|
||||
)
|
||||
|
||||
total_detections = sum(len(r[0]) for r in batch_results)
|
||||
self._logger.info(f"[推理] batch_size={batch_size}, 总检测数={total_detections}, conf_thresh={self._settings.inference.conf_threshold}")
|
||||
self._logger.debug(f"[推理] batch_size={batch_size}, 总检测数={total_detections}, conf_thresh={self._settings.inference.conf_threshold}")
|
||||
|
||||
for idx, (camera_id, roi, bind, frame, _, scale_info) in enumerate(roi_items):
|
||||
boxes, scores, class_ids = batch_results[idx]
|
||||
@@ -350,9 +362,9 @@ class EdgeInferenceService:
|
||||
algo_code = bind.algo_code
|
||||
algo_params = bind.params or {}
|
||||
|
||||
# 诊断日志:检测到目标(使用 INFO 级别确保能看到)
|
||||
# 诊断日志:检测到目标(非告警诊断日志,使用 DEBUG 级别)
|
||||
if len(boxes) > 0:
|
||||
self._logger.info(f"[{camera_id}] ROI={roi_id[:8]} 检测到 {len(boxes)} 个目标, algo={algo_code}")
|
||||
self._logger.debug(f"[{camera_id}] ROI={roi_id[:8]} 检测到 {len(boxes)} 个目标, algo={algo_code}")
|
||||
|
||||
self._algorithm_manager.register_algorithm(
|
||||
roi_id=roi_id,
|
||||
@@ -366,8 +378,8 @@ class EdgeInferenceService:
|
||||
if not tracks:
|
||||
return
|
||||
|
||||
# 诊断日志:tracks 内容(INFO 级别)
|
||||
self._logger.info(f"[{camera_id}] tracks: {[t.get('class') for t in tracks]}, target_class={bind.target_class}")
|
||||
# 诊断日志:tracks 内容(非告警诊断日志,使用 DEBUG 级别)
|
||||
self._logger.debug(f"[{camera_id}] tracks: {[t.get('class') for t in tracks]}, target_class={bind.target_class}")
|
||||
|
||||
alerts = self._algorithm_manager.process(
|
||||
roi_id=roi_id,
|
||||
@@ -382,9 +394,9 @@ class EdgeInferenceService:
|
||||
if alerts:
|
||||
self._logger.info(f"[{camera_id}] 算法 {algo_code} 返回 {len(alerts)} 个告警")
|
||||
else:
|
||||
# 获取算法状态用于诊断
|
||||
# 获取算法状态用于诊断(非告警诊断日志,使用 DEBUG 级别)
|
||||
algo_status = self._algorithm_manager.get_status(roi_id)
|
||||
self._logger.info(f"[{camera_id}] 算法 {algo_code} 无告警, 状态: {algo_status}")
|
||||
self._logger.debug(f"[{camera_id}] 算法 {algo_code} 无告警, 状态: {algo_status}")
|
||||
|
||||
for alert in alerts:
|
||||
alert_type = alert.get("alert_type", "detection")
|
||||
@@ -396,7 +408,7 @@ class EdgeInferenceService:
|
||||
if last_alert_time is not None:
|
||||
elapsed = (now - last_alert_time).total_seconds()
|
||||
if elapsed < self._camera_cooldown_seconds:
|
||||
self._logger.info(
|
||||
self._logger.debug(
|
||||
f"[去重] 跳过告警: camera={camera_id}, type={alert_type}, "
|
||||
f"roi={roi_id}, 距上次={elapsed:.1f}s < {self._camera_cooldown_seconds}s"
|
||||
)
|
||||
@@ -405,23 +417,27 @@ class EdgeInferenceService:
|
||||
self._camera_alert_cooldown[dedup_key] = now
|
||||
self._performance_stats["total_alerts_generated"] += 1
|
||||
|
||||
from core.result_reporter import AlertInfo
|
||||
alert_info = AlertInfo(
|
||||
alert_id=f"{roi_id}_{bind.bind_id}_{int(frame.timestamp.timestamp())}",
|
||||
camera_id=camera_id,
|
||||
roi_id=roi_id,
|
||||
bind_id=bind.bind_id,
|
||||
device_id=self._settings.mqtt.device_id,
|
||||
alert_type=alert_type,
|
||||
algorithm=algo_code,
|
||||
target_class=alert.get("class", bind.target_class or "unknown"),
|
||||
confidence=alert.get("confidence", 1.0),
|
||||
bbox=alert.get("bbox", []),
|
||||
message=alert.get("message", ""),
|
||||
timestamp=frame.timestamp,
|
||||
duration_minutes=alert.get("duration_minutes"),
|
||||
from core.result_reporter import AlarmInfo, generate_alarm_id
|
||||
alarm_info = AlarmInfo(
|
||||
alarm_id=generate_alarm_id(self._settings.mqtt.device_id),
|
||||
alarm_type=alert_type,
|
||||
device_id=camera_id,
|
||||
scene_id=roi_id,
|
||||
event_time=frame.timestamp.isoformat(),
|
||||
alarm_level=alert.get("alarm_level", 2),
|
||||
algorithm_code=algo_code,
|
||||
confidence_score=alert.get("confidence", 1.0),
|
||||
ext_data={
|
||||
"duration_ms": int(alert.get("duration_minutes", 0) * 60 * 1000) if alert.get("duration_minutes") else None,
|
||||
"roi_id": roi_id,
|
||||
"bbox": alert.get("bbox", []),
|
||||
"target_class": alert.get("class", bind.target_class or "unknown"),
|
||||
"bind_id": bind.bind_id,
|
||||
"message": alert.get("message", ""),
|
||||
"edge_node_id": self._settings.mqtt.device_id,
|
||||
},
|
||||
)
|
||||
self._reporter.report_alert(alert_info, screenshot=frame.image)
|
||||
self._reporter.report_alarm(alarm_info, screenshot=frame.image)
|
||||
|
||||
self._logger.info(
|
||||
f"告警已生成: type={alert_type}, "
|
||||
@@ -474,42 +490,13 @@ class EdgeInferenceService:
|
||||
self._logger.info("推理线程已启动")
|
||||
|
||||
self._stream_manager.start_all()
|
||||
|
||||
|
||||
self._logger.info("Edge_Inference_Service 已启动")
|
||||
|
||||
self._start_heartbeat_thread()
|
||||
|
||||
|
||||
self._register_signal_handlers()
|
||||
|
||||
|
||||
self._wait_for_shutdown()
|
||||
|
||||
def _start_heartbeat_thread(self):
|
||||
"""启动心跳线程"""
|
||||
def heartbeat():
|
||||
while not self._stop_event.is_set():
|
||||
try:
|
||||
uptime = (datetime.now() - self._performance_stats["start_time"]).total_seconds()
|
||||
self._performance_stats["uptime_seconds"] = uptime
|
||||
|
||||
status = {
|
||||
"running": True,
|
||||
"uptime_seconds": uptime,
|
||||
"frames_processed": self._performance_stats["total_frames_processed"],
|
||||
"alerts_generated": self._performance_stats["total_alerts_generated"],
|
||||
"stream_stats": self._stream_manager.get_statistics() if self._stream_manager else {},
|
||||
}
|
||||
|
||||
if self._reporter:
|
||||
self._reporter.report_heartbeat(self._settings.mqtt.device_id, status)
|
||||
|
||||
except Exception as e:
|
||||
self._logger.error(f"心跳上报失败: {e}")
|
||||
|
||||
time.sleep(30)
|
||||
|
||||
thread = threading.Thread(target=heartbeat, name="Heartbeat", daemon=True)
|
||||
thread.start()
|
||||
|
||||
def _register_signal_handlers(self):
|
||||
"""注册信号处理器"""
|
||||
def handle_signal(signum, frame):
|
||||
@@ -549,7 +536,10 @@ class EdgeInferenceService:
|
||||
|
||||
if self._algorithm_manager:
|
||||
self._algorithm_manager.stop_config_subscription()
|
||||
|
||||
|
||||
if self._alarm_worker:
|
||||
self._alarm_worker.stop()
|
||||
|
||||
if self._reporter:
|
||||
self._reporter.close()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user