优化:推理性能日志改为周期汇总输出

This commit is contained in:
2026-03-31 16:08:53 +08:00
parent 5dd9dc15d5
commit 714361b57f
2 changed files with 40 additions and 10 deletions

41
main.py
View File

@@ -9,6 +9,7 @@ import sys
import threading import threading
import signal import signal
import time import time
from collections import deque
from datetime import datetime from datetime import datetime
from typing import Dict, Any, Optional, List, Tuple from typing import Dict, Any, Optional, List, Tuple
@@ -84,6 +85,9 @@ class EdgeInferenceService:
self._scheduler_interval_sec = 0.01 self._scheduler_interval_sec = 0.01
self._max_frame_age_sec = 0.5 self._max_frame_age_sec = 0.5
self._max_pending_roi_items = self._max_batch_size * 32 self._max_pending_roi_items = self._max_batch_size * 32
self._latency_window = deque(maxlen=256)
self._last_latency_log_time = 0.0
self._latency_log_interval_sec = 5.0
# 摄像头级别告警去重:同一摄像头+告警类型在冷却期内只上报一次 # 摄像头级别告警去重:同一摄像头+告警类型在冷却期内只上报一次
self._camera_alert_cooldown: Dict[str, datetime] = {} self._camera_alert_cooldown: Dict[str, datetime] = {}
@@ -672,6 +676,35 @@ class EdgeInferenceService:
if not had_frame: if not had_frame:
self._stop_event.wait(self._scheduler_interval_sec) self._stop_event.wait(self._scheduler_interval_sec)
def _log_inference_latency_summary(self, inference_time_ms: float, batch_size: int):
"""聚合输出推理时延,避免逐批 INFO 日志刷屏。"""
self._latency_window.append({
"latency_ms": inference_time_ms,
"batch_size": batch_size,
})
now = time.monotonic()
if now - self._last_latency_log_time < self._latency_log_interval_sec:
return
self._last_latency_log_time = now
samples = list(self._latency_window)
if not samples:
return
latencies = sorted(sample["latency_ms"] for sample in samples)
batches = [sample["batch_size"] for sample in samples]
p95_index = max(0, min(len(latencies) - 1, int(len(latencies) * 0.95) - 1))
self._logger.info(
"推理性能汇总: "
f"样本={len(samples)}, "
f"batch_avg={sum(batches) / len(batches):.2f}, "
f"latency_avg={sum(latencies) / len(latencies):.2f}ms, "
f"latency_p95={latencies[p95_index]:.2f}ms, "
f"latency_max={latencies[-1]:.2f}ms"
)
def _batch_process_rois(self): def _batch_process_rois(self):
"""批量处理 ROI - 真正的 batch 推理(按 max_batch_size 分块)""" """批量处理 ROI - 真正的 batch 推理(按 max_batch_size 分块)"""
@@ -701,7 +734,13 @@ class EdgeInferenceService:
# 一次性推理整个 batch # 一次性推理整个 batch
outputs, inference_time_ms = engine.infer(batch_data) outputs, inference_time_ms = engine.infer(batch_data)
self._performance_stats["inference_batches"] += 1 self._performance_stats["inference_batches"] += 1
self._logger.log_inference_latency( self._logger.performance(
"inference_latency_ms",
inference_time_ms,
batch_size=len(chunk),
throughput_fps=1000.0 / inference_time_ms if inference_time_ms > 0 else 0
)
self._log_inference_latency_summary(
inference_time_ms, inference_time_ms,
batch_size=len(chunk), batch_size=len(chunk),
) )

View File

@@ -222,15 +222,6 @@ class StructuredLogger:
duration_ms: Optional[float] = None, **tags): duration_ms: Optional[float] = None, **tags):
"""记录性能指标""" """记录性能指标"""
self._performance_logger.record(metric_name, value, tags) self._performance_logger.record(metric_name, value, tags)
perf_data = {
"metric": metric_name,
"value": value,
"duration_ms": duration_ms,
"tags": tags
}
self.info(f"性能指标: {metric_name} = {value}", **perf_data)
def log_inference_latency(self, latency_ms: float, batch_size: int = 1): def log_inference_latency(self, latency_ms: float, batch_size: int = 1):
"""记录推理延迟""" """记录推理延迟"""