157 lines
5.6 KiB
Python
157 lines
5.6 KiB
Python
"""
|
||
结果输出和报告生成模块
|
||
"""
|
||
|
||
import os
|
||
import json
|
||
import csv
|
||
from dataclasses import dataclass, asdict, field
|
||
from typing import List, Dict, Any, Optional
|
||
from datetime import datetime
|
||
from pathlib import Path
|
||
|
||
from .utils import ensure_dir, get_timestamp
|
||
|
||
|
||
@dataclass
|
||
class TestResult:
|
||
"""单次测试结果"""
|
||
resolution: int
|
||
batch_size: int
|
||
num_cameras: int
|
||
target_fps: float
|
||
|
||
gpu_utilization_avg: float = 0.0
|
||
gpu_utilization_max: float = 0.0
|
||
gpu_utilization_min: float = 0.0
|
||
memory_used_mb: float = 0.0
|
||
memory_utilization: float = 0.0
|
||
|
||
total_throughput_fps: float = 0.0
|
||
per_camera_fps: float = 0.0
|
||
total_frames: int = 0
|
||
total_batches: int = 0
|
||
|
||
avg_latency_ms: float = 0.0
|
||
p95_latency_ms: float = 0.0
|
||
p99_latency_ms: float = 0.0
|
||
max_latency_ms: float = 0.0
|
||
min_latency_ms: float = 0.0
|
||
|
||
frame_drop_rate: float = 0.0
|
||
dropped_frames: int = 0
|
||
|
||
is_gpu_saturated: bool = False
|
||
is_realtime_capable: bool = True
|
||
saturation_reason: Optional[str] = None
|
||
|
||
test_duration_sec: float = 0.0
|
||
timestamp: str = field(default_factory=lambda: datetime.now().isoformat())
|
||
|
||
def to_dict(self) -> Dict[str, Any]:
|
||
return asdict(self)
|
||
|
||
@classmethod
|
||
def from_dict(cls, data: Dict[str, Any]) -> "TestResult":
|
||
return cls(**data)
|
||
|
||
def check_realtime_capability(self) -> bool:
|
||
frame_interval_ms = 1000 / self.target_fps
|
||
self.is_realtime_capable = self.p95_latency_ms < frame_interval_ms
|
||
return self.is_realtime_capable
|
||
|
||
|
||
def export_json(results: List[TestResult], output_path: str):
|
||
"""导出结果为 JSON 格式"""
|
||
ensure_dir(os.path.dirname(output_path))
|
||
|
||
data = {
|
||
"benchmark_results": [r.to_dict() for r in results],
|
||
"generated_at": datetime.now().isoformat(),
|
||
}
|
||
|
||
with open(output_path, "w", encoding="utf-8") as f:
|
||
json.dump(data, f, indent=2, ensure_ascii=False)
|
||
|
||
|
||
def export_csv(results: List[TestResult], output_path: str):
|
||
"""导出结果为 CSV 格式"""
|
||
ensure_dir(os.path.dirname(output_path))
|
||
|
||
if not results:
|
||
return
|
||
|
||
fieldnames = list(results[0].to_dict().keys())
|
||
|
||
with open(output_path, "w", newline="", encoding="utf-8") as f:
|
||
writer = csv.DictWriter(f, fieldnames=fieldnames)
|
||
writer.writeheader()
|
||
for result in results:
|
||
writer.writerow(result.to_dict())
|
||
|
||
|
||
def generate_report(results: List[TestResult], output_dir: str) -> str:
|
||
"""生成 Markdown 分析报告"""
|
||
ensure_dir(output_dir)
|
||
report_path = os.path.join(output_dir, f"benchmark_report_{get_timestamp()}.md")
|
||
|
||
lines = []
|
||
|
||
lines.append("# FP16 性能评估 Benchmark 报告")
|
||
lines.append("")
|
||
lines.append(f"生成时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
||
lines.append("")
|
||
|
||
lines.append("## 1. 测试概述")
|
||
lines.append("")
|
||
lines.append(f"- 总测试数: {len(results)}")
|
||
lines.append(f"- 测试分辨率: {sorted(set(r.resolution for r in results))}")
|
||
lines.append(f"- Batch Size: {sorted(set(r.batch_size for r in results))}")
|
||
lines.append(f"- 摄像头数量: {sorted(set(r.num_cameras for r in results))}")
|
||
lines.append("")
|
||
|
||
lines.append("## 2. GPU 利用率分析")
|
||
lines.append("")
|
||
lines.append("### 2.1 为什么原系统 GPU 利用率只有 ~30%?")
|
||
lines.append("")
|
||
lines.append("1. **单帧同步推理**: 每次只处理 1 帧,GPU 计算单元大量空闲")
|
||
lines.append("2. **无 TensorRT 优化**: PyTorch 直接推理缺少算子融合和内存优化")
|
||
lines.append("3. **单 CUDA Stream**: 所有操作串行执行,无法实现流水线并行")
|
||
lines.append("4. **CPU 瓶颈**: 解码和推理串行,GPU 等待数据")
|
||
lines.append("")
|
||
|
||
lines.append("## 3. 性能测试结果")
|
||
lines.append("")
|
||
|
||
for resolution in sorted(set(r.resolution for r in results)):
|
||
res_results = [r for r in results if r.resolution == resolution]
|
||
|
||
lines.append(f"### 3.{resolution // 100}. 分辨率 {resolution}×{resolution}")
|
||
lines.append("")
|
||
lines.append("| Batch | 摄像头 | 目标FPS | 实际吞吐 | GPU利用率 | 平均延迟 | P95延迟 | 实时性 | 饱和 |")
|
||
lines.append("|-------|--------|---------|----------|-----------|----------|---------|--------|------|")
|
||
|
||
for r in sorted(res_results, key=lambda x: (x.batch_size, x.num_cameras, x.target_fps)):
|
||
realtime = "✅" if r.is_realtime_capable else "❌"
|
||
saturated = "🔴" if r.is_gpu_saturated else "🟢"
|
||
lines.append(
|
||
f"| {r.batch_size} | {r.num_cameras} | {r.target_fps:.0f} | "
|
||
f"{r.total_throughput_fps:.1f} | {r.gpu_utilization_avg:.1f}% | "
|
||
f"{r.avg_latency_ms:.2f}ms | {r.p95_latency_ms:.2f}ms | {realtime} | {saturated} |"
|
||
)
|
||
lines.append("")
|
||
|
||
lines.append("## 4. 部署建议")
|
||
lines.append("")
|
||
lines.append("| 应用场景 | 推荐分辨率 | 推荐 Batch Size | 最大摄像头数 | 说明 |")
|
||
lines.append("|----------|------------|-----------------|--------------|------|")
|
||
lines.append("| 离岗检测 | 320×320 | 8 | 15-30 | 低精度要求,追求高并发 |")
|
||
lines.append("| 周界入侵 | 480×480 | 4-8 | 10-15 | 高精度要求,平衡延迟 |")
|
||
lines.append("| 综合部署 | 320×320 | 8 | 10-15 | 兼顾精度和并发 |")
|
||
lines.append("")
|
||
|
||
with open(report_path, "w", encoding="utf-8") as f:
|
||
f.write("\n".join(lines))
|
||
|
||
return report_path
|