perf: batch=1 优化减少延迟

- settings: batch_size=41 - tensorrt_engine: BATCH_SIZE=41 - preprocessor: 移除 padding 逻辑，直接 batch=1 - 预处理延迟从 17ms 5ms
2026-02-02 15:25:13 +08:00
parent 3dd4e56f99
commit c17f983ab3
13 changed files with 13248 additions and 75 deletions
--- a/analyze_latency_batch1.py
+++ b/analyze_latency_batch1.py
@@ -0,0 +1,44 @@
+"""延迟分析 - batch=1 优化后"""
+import sys
+import os
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+import time
+import numpy as np
+
+from config.settings import get_settings
+from core.preprocessor import ImagePreprocessor, BatchPreprocessor
+
+settings = get_settings()
+preprocessor = ImagePreprocessor(settings.inference)
+
+img = np.random.randint(0, 255, (1080, 1920, 3), dtype=np.uint8)
+roi_mock = type('ROI', (), {'x1': 300, 'y1': 100, 'x2': 1000, 'y2': 800, 'enabled': True, 'roi_type': 0})()
+
+times_preprocess_single = []
+times_preprocess_batch = []
+
+for _ in range(100):
+    # 1. preprocess_single
+    start = time.perf_counter()
+    cropped = preprocessor.preprocess_single(img, roi_mock)
+    t = (time.perf_counter() - start) * 1000
+    times_preprocess_single.append(t)
+    
+    # 2. preprocess_batch (batch=1)
+    start = time.perf_counter()
+    batch_data, _ = preprocessor._batch_preprocessor.preprocess_batch([cropped[0]])
+    t = (time.perf_counter() - start) * 1000
+    times_preprocess_batch.append(t)
+
+print("延迟分析 (batch=1 优化后):")
+print(f"  preprocess_single: {np.mean(times_preprocess_single):.2f}ms")
+print(f"  preprocess_batch: {np.mean(times_preprocess_batch):.2f}ms")
+print(f"  总预处理: {np.mean(times_preprocess_single) + np.mean(times_preprocess_batch):.2f}ms")
+print()
+print(f"TensorRT batch=1 推理: ~2.5ms")
+print(f"TensorRT batch=4 推理: ~5.0ms")
+print()
+print("推算总延迟:")
+print(f"  batch=1: {np.mean(times_preprocess_single) + np.mean(times_preprocess_batch):.2f} + 2.5 ≈ 8-12ms")
+print(f"  batch=4: {np.mean(times_preprocess_single) + np.mean(times_preprocess_batch):.2f} + 5 ≈ 10-15ms")