修复:P0+P1 生产稳定性和性能优化(6项)
P0 稳定性修复: - 告警去重字典添加惰性清理机制,防止长时间运行内存溢出 - Redis 连接断开时显式 close() 后再置 None,防止文件描述符泄漏 - 截图消息 ACK 移至成功路径,失败消息留在 pending list 自动重试 P1 性能优化: - GPU NMS 添加 torch.no_grad() + 显式释放临时张量,减少显存碎片 - 截图存储改为 Redis 原始 bytes,去掉 Base64 编解码开销(兼容旧格式) - ROI 配置查询 N+1 改为 get_all_bindings() 单次 JOIN 查询 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -47,6 +47,7 @@ class AlarmUploadWorker:
|
||||
self._logger = logging.getLogger("alarm_upload_worker")
|
||||
|
||||
self._redis: Optional[redis.Redis] = None
|
||||
self._redis_binary: Optional[redis.Redis] = None # 用于读取截图 bytes
|
||||
self._cos_client = None # 懒初始化
|
||||
|
||||
self._thread: Optional[threading.Thread] = None
|
||||
@@ -80,6 +81,16 @@ class AlarmUploadWorker:
|
||||
)
|
||||
self._redis.ping()
|
||||
self._logger.info(f"Worker Redis 连接成功: {redis_cfg.host}:{redis_cfg.port}/{redis_cfg.db}")
|
||||
|
||||
# 二进制 Redis 连接(用于读取截图 bytes,不做 decode)
|
||||
self._redis_binary = redis.Redis(
|
||||
host=redis_cfg.host,
|
||||
port=redis_cfg.port,
|
||||
db=redis_cfg.db,
|
||||
password=redis_cfg.password,
|
||||
decode_responses=False,
|
||||
socket_connect_timeout=5,
|
||||
)
|
||||
except Exception as e:
|
||||
self._logger.error(f"Worker Redis 连接失败: {e}")
|
||||
return
|
||||
@@ -136,6 +147,12 @@ class AlarmUploadWorker:
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if self._redis_binary:
|
||||
try:
|
||||
self._redis_binary.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
self._logger.info("AlarmUploadWorker 已停止")
|
||||
|
||||
def _worker_loop(self):
|
||||
@@ -184,21 +201,43 @@ class AlarmUploadWorker:
|
||||
|
||||
self._logger.info(f"开始处理告警: {alarm_id} (retry={retry_count})")
|
||||
|
||||
# Step 1: 上传截图到 COS(从 base64 解码后直接上传字节流)
|
||||
# Step 1: 上传截图到 COS
|
||||
snapshot_key = (alarm_data.get("ext_data") or {}).get("_snapshot_key")
|
||||
snapshot_b64 = alarm_data.get("snapshot_b64")
|
||||
object_key = None
|
||||
|
||||
if snapshot_b64:
|
||||
if snapshot_key:
|
||||
# 新格式:从独立 Redis key 获取原始 bytes
|
||||
try:
|
||||
image_bytes = self._redis_binary.get(snapshot_key) if self._redis_binary else None
|
||||
if image_bytes is None:
|
||||
self._logger.warning(f"截图 key 已过期: {snapshot_key}, 无截图继续上报")
|
||||
else:
|
||||
object_key = self._upload_snapshot_to_cos(
|
||||
image_bytes, alarm_id, alarm_data.get("device_id", "unknown")
|
||||
)
|
||||
if object_key is None:
|
||||
self._handle_retry(alarm_json, "COS 上传失败")
|
||||
return
|
||||
# 上传成功后删除临时 key
|
||||
try:
|
||||
if self._redis_binary:
|
||||
self._redis_binary.delete(snapshot_key)
|
||||
except Exception:
|
||||
pass
|
||||
except Exception as e:
|
||||
self._logger.error(f"截图获取/上传失败: {e}")
|
||||
self._handle_retry(alarm_json, f"截图处理失败: {e}")
|
||||
return
|
||||
elif snapshot_b64:
|
||||
# 兼容旧格式 (Base64)
|
||||
try:
|
||||
import base64
|
||||
image_bytes = base64.b64decode(snapshot_b64)
|
||||
object_key = self._upload_snapshot_to_cos(
|
||||
image_bytes,
|
||||
alarm_id,
|
||||
alarm_data.get("device_id", "unknown"),
|
||||
image_bytes, alarm_id, alarm_data.get("device_id", "unknown")
|
||||
)
|
||||
if object_key is None:
|
||||
# COS 上传失败,进入重试
|
||||
self._handle_retry(alarm_json, "COS 上传失败")
|
||||
return
|
||||
except Exception as e:
|
||||
|
||||
Reference in New Issue
Block a user