Files
Security_AI_integrated/utils/metrics.py
2026-01-20 17:42:18 +08:00

114 lines
2.9 KiB
Python

from typing import Optional
from prometheus_client import Counter, Gauge, Histogram, Info, start_http_server
from config import get_config
SYSTEM_INFO = Info("system", "System information")
CAMERA_COUNT = Gauge("camera_count", "Number of active cameras")
CAMERA_FPS = Gauge("camera_fps", "Camera FPS", ["camera_id"])
INFERENCE_LATENCY = Histogram(
"inference_latency_seconds",
"Inference latency in seconds",
["camera_id"],
buckets=[0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0],
)
ALERT_COUNT = Counter(
"alert_total",
"Total number of alerts",
["camera_id", "event_type"],
)
EVENT_QUEUE_SIZE = Gauge(
"event_queue_size",
"Current size of event queue",
)
DETECTION_COUNT = Counter(
"detection_total",
"Total number of detections",
["camera_id", "roi_id"],
)
GPU_MEMORY_USED = Gauge(
"gpu_memory_used_bytes",
"GPU memory used",
["device"],
)
GPU_UTILIZATION = Gauge(
"gpu_utilization_percent",
"GPU utilization",
["device"],
)
class MetricsServer:
def __init__(self, port: int = 9090):
self.port = port
self.started = False
def start(self):
if self.started:
return
config = get_config()
if not config.monitoring.enabled:
return
start_http_server(self.port)
self.started = True
print(f"Prometheus metrics server started on port {self.port}")
def update_camera_metrics(self, camera_id: int, fps: float):
CAMERA_FPS.labels(camera_id=str(camera_id)).set(fps)
def record_inference(self, camera_id: int, latency: float):
INFERENCE_LATENCY.labels(camera_id=str(camera_id)).observe(latency)
def record_alert(self, camera_id: int, event_type: str):
ALERT_COUNT.labels(camera_id=str(camera_id), event_type=event_type).inc()
def update_event_queue(self, size: int):
EVENT_QUEUE_SIZE.set(size)
def record_detection(self, camera_id: int, roi_id: str):
DETECTION_COUNT.labels(camera_id=str(camera_id), roi_id=roi_id).inc()
def update_gpu_metrics(self, device: int, memory_bytes: float, utilization: float):
GPU_MEMORY_USED.labels(device=str(device)).set(memory_bytes)
GPU_UTILIZATION.labels(device=str(device)).set(utilization)
_metrics_server: Optional[MetricsServer] = None
def get_metrics_server() -> MetricsServer:
global _metrics_server
if _metrics_server is None:
config = get_config()
_metrics_server = MetricsServer(port=config.monitoring.port)
return _metrics_server
def start_metrics_server():
server = get_metrics_server()
server.start()
def update_system_info():
import platform
import psutil
SYSTEM_INFO.info({
"os": platform.system(),
"os_version": platform.version(),
"python_version": platform.python_version(),
"cpu_count": str(psutil.cpu_count()),
"memory_total_gb": str(round(psutil.virtual_memory().total / (1024**3), 2)),
})