from typing import Optional from prometheus_client import Counter, Gauge, Histogram, Info, start_http_server from config import get_config SYSTEM_INFO = Info("system", "System information") CAMERA_COUNT = Gauge("camera_count", "Number of active cameras") CAMERA_FPS = Gauge("camera_fps", "Camera FPS", ["camera_id"]) INFERENCE_LATENCY = Histogram( "inference_latency_seconds", "Inference latency in seconds", ["camera_id"], buckets=[0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0], ) ALERT_COUNT = Counter( "alert_total", "Total number of alerts", ["camera_id", "event_type"], ) EVENT_QUEUE_SIZE = Gauge( "event_queue_size", "Current size of event queue", ) DETECTION_COUNT = Counter( "detection_total", "Total number of detections", ["camera_id", "roi_id"], ) GPU_MEMORY_USED = Gauge( "gpu_memory_used_bytes", "GPU memory used", ["device"], ) GPU_UTILIZATION = Gauge( "gpu_utilization_percent", "GPU utilization", ["device"], ) class MetricsServer: def __init__(self, port: int = 9090): self.port = port self.started = False def start(self): if self.started: return config = get_config() if not config.monitoring.enabled: return start_http_server(self.port) self.started = True print(f"Prometheus metrics server started on port {self.port}") def update_camera_metrics(self, camera_id: int, fps: float): CAMERA_FPS.labels(camera_id=str(camera_id)).set(fps) def record_inference(self, camera_id: int, latency: float): INFERENCE_LATENCY.labels(camera_id=str(camera_id)).observe(latency) def record_alert(self, camera_id: int, event_type: str): ALERT_COUNT.labels(camera_id=str(camera_id), event_type=event_type).inc() def update_event_queue(self, size: int): EVENT_QUEUE_SIZE.set(size) def record_detection(self, camera_id: int, roi_id: str): DETECTION_COUNT.labels(camera_id=str(camera_id), roi_id=roi_id).inc() def update_gpu_metrics(self, device: int, memory_bytes: float, utilization: float): GPU_MEMORY_USED.labels(device=str(device)).set(memory_bytes) GPU_UTILIZATION.labels(device=str(device)).set(utilization) _metrics_server: Optional[MetricsServer] = None def get_metrics_server() -> MetricsServer: global _metrics_server if _metrics_server is None: config = get_config() _metrics_server = MetricsServer(port=config.monitoring.port) return _metrics_server def start_metrics_server(): server = get_metrics_server() server.start() def update_system_info(): import platform import psutil SYSTEM_INFO.info({ "os": platform.system(), "os_version": platform.version(), "python_version": platform.python_version(), "cpu_count": str(psutil.cpu_count()), "memory_total_gb": str(round(psutil.virtual_memory().total / (1024**3), 2)), })