114 lines
2.9 KiB
Python
114 lines
2.9 KiB
Python
from typing import Optional
|
|
|
|
from prometheus_client import Counter, Gauge, Histogram, Info, start_http_server
|
|
|
|
from config import get_config
|
|
|
|
SYSTEM_INFO = Info("system", "System information")
|
|
|
|
CAMERA_COUNT = Gauge("camera_count", "Number of active cameras")
|
|
|
|
CAMERA_FPS = Gauge("camera_fps", "Camera FPS", ["camera_id"])
|
|
|
|
INFERENCE_LATENCY = Histogram(
|
|
"inference_latency_seconds",
|
|
"Inference latency in seconds",
|
|
["camera_id"],
|
|
buckets=[0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0],
|
|
)
|
|
|
|
ALERT_COUNT = Counter(
|
|
"alert_total",
|
|
"Total number of alerts",
|
|
["camera_id", "event_type"],
|
|
)
|
|
|
|
EVENT_QUEUE_SIZE = Gauge(
|
|
"event_queue_size",
|
|
"Current size of event queue",
|
|
)
|
|
|
|
DETECTION_COUNT = Counter(
|
|
"detection_total",
|
|
"Total number of detections",
|
|
["camera_id", "roi_id"],
|
|
)
|
|
|
|
GPU_MEMORY_USED = Gauge(
|
|
"gpu_memory_used_bytes",
|
|
"GPU memory used",
|
|
["device"],
|
|
)
|
|
|
|
GPU_UTILIZATION = Gauge(
|
|
"gpu_utilization_percent",
|
|
"GPU utilization",
|
|
["device"],
|
|
)
|
|
|
|
|
|
class MetricsServer:
|
|
def __init__(self, port: int = 9090):
|
|
self.port = port
|
|
self.started = False
|
|
|
|
def start(self):
|
|
if self.started:
|
|
return
|
|
|
|
config = get_config()
|
|
if not config.monitoring.enabled:
|
|
return
|
|
|
|
start_http_server(self.port)
|
|
self.started = True
|
|
print(f"Prometheus metrics server started on port {self.port}")
|
|
|
|
def update_camera_metrics(self, camera_id: int, fps: float):
|
|
CAMERA_FPS.labels(camera_id=str(camera_id)).set(fps)
|
|
|
|
def record_inference(self, camera_id: int, latency: float):
|
|
INFERENCE_LATENCY.labels(camera_id=str(camera_id)).observe(latency)
|
|
|
|
def record_alert(self, camera_id: int, event_type: str):
|
|
ALERT_COUNT.labels(camera_id=str(camera_id), event_type=event_type).inc()
|
|
|
|
def update_event_queue(self, size: int):
|
|
EVENT_QUEUE_SIZE.set(size)
|
|
|
|
def record_detection(self, camera_id: int, roi_id: str):
|
|
DETECTION_COUNT.labels(camera_id=str(camera_id), roi_id=roi_id).inc()
|
|
|
|
def update_gpu_metrics(self, device: int, memory_bytes: float, utilization: float):
|
|
GPU_MEMORY_USED.labels(device=str(device)).set(memory_bytes)
|
|
GPU_UTILIZATION.labels(device=str(device)).set(utilization)
|
|
|
|
|
|
_metrics_server: Optional[MetricsServer] = None
|
|
|
|
|
|
def get_metrics_server() -> MetricsServer:
|
|
global _metrics_server
|
|
if _metrics_server is None:
|
|
config = get_config()
|
|
_metrics_server = MetricsServer(port=config.monitoring.port)
|
|
return _metrics_server
|
|
|
|
|
|
def start_metrics_server():
|
|
server = get_metrics_server()
|
|
server.start()
|
|
|
|
|
|
def update_system_info():
|
|
import platform
|
|
import psutil
|
|
|
|
SYSTEM_INFO.info({
|
|
"os": platform.system(),
|
|
"os_version": platform.version(),
|
|
"python_version": platform.python_version(),
|
|
"cpu_count": str(psutil.cpu_count()),
|
|
"memory_total_gb": str(round(psutil.virtual_memory().total / (1024**3), 2)),
|
|
})
|