2026-03-16 11:45:51 +08:00
|
|
|
|
"""综合报告生成 - 汇总所有测试结果"""
|
|
|
|
|
|
import json
|
|
|
|
|
|
import os
|
|
|
|
|
|
from datetime import datetime
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def load_json(path):
|
|
|
|
|
|
"""加载 JSON 文件"""
|
|
|
|
|
|
if os.path.exists(path):
|
|
|
|
|
|
with open(path, "r", encoding="utf-8") as f:
|
|
|
|
|
|
return json.load(f)
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def generate_report():
|
|
|
|
|
|
"""生成综合测试报告"""
|
|
|
|
|
|
results_dir = "vsp/qwen3.5-9b/results"
|
|
|
|
|
|
|
|
|
|
|
|
speed = load_json(os.path.join(results_dir, "benchmark_speed.json"))
|
|
|
|
|
|
accuracy = load_json(os.path.join(results_dir, "accuracy_results.json"))
|
|
|
|
|
|
concurrency = load_json(os.path.join(results_dir, "concurrency_results.json"))
|
|
|
|
|
|
gpu_req = load_json(os.path.join(results_dir, "gpu_requirements.json"))
|
|
|
|
|
|
|
|
|
|
|
|
report_lines = [
|
|
|
|
|
|
"# Qwen3.5-9B 性能测试报告",
|
|
|
|
|
|
f"\n生成时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
|
|
|
|
|
|
"\n## 1. 测试环境",
|
|
|
|
|
|
"",
|
|
|
|
|
|
"| 项目 | 值 |",
|
|
|
|
|
|
"|------|-----|",
|
|
|
|
|
|
"| 模型 | Qwen3.5-9B |",
|
2026-03-16 17:38:33 +08:00
|
|
|
|
"| 加载方式 | 4-bit NF4 量化 (bitsandbytes),纯 GPU |",
|
2026-03-16 13:09:39 +08:00
|
|
|
|
"| GPU | NVIDIA GeForce RTX 3050 OEM |",
|
|
|
|
|
|
"| GPU 显存 | 8 GB |",
|
|
|
|
|
|
"| CUDA | 12.1 |",
|
|
|
|
|
|
"| Python 环境 | conda yolo |",
|
2026-03-16 11:45:51 +08:00
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
|
|
if speed and "memory" in speed:
|
|
|
|
|
|
mem = speed["memory"]
|
|
|
|
|
|
report_lines.extend([
|
|
|
|
|
|
f"| 模型显存占用 | {mem.get('gpu_allocated_gb', 'N/A')} GB |",
|
|
|
|
|
|
f"| 系统内存占用 | {mem.get('ram_used_gb', 'N/A')} GB |",
|
|
|
|
|
|
])
|
|
|
|
|
|
|
|
|
|
|
|
# 推理速度
|
|
|
|
|
|
report_lines.extend(["\n## 2. 推理速度", ""])
|
|
|
|
|
|
if speed and "speed_benchmark" in speed:
|
|
|
|
|
|
report_lines.extend([
|
|
|
|
|
|
"| 测试场景 | 输入tokens | 输出tokens | 耗时(s) | 速度(tokens/s) |",
|
|
|
|
|
|
"|---------|-----------|-----------|---------|---------------|",
|
|
|
|
|
|
])
|
|
|
|
|
|
for r in speed["speed_benchmark"]:
|
|
|
|
|
|
report_lines.append(
|
|
|
|
|
|
f"| {r['test_name']} | {r['input_tokens']} | {r['avg_output_tokens']} | {r['avg_time_s']} | {r['avg_tokens_per_sec']} |"
|
|
|
|
|
|
)
|
|
|
|
|
|
else:
|
|
|
|
|
|
report_lines.append("*未运行速度测试*")
|
|
|
|
|
|
|
|
|
|
|
|
# 精度
|
|
|
|
|
|
report_lines.extend(["\n## 3. 精度评估", ""])
|
|
|
|
|
|
if accuracy and "accuracy" in accuracy:
|
|
|
|
|
|
acc = accuracy["accuracy"]
|
|
|
|
|
|
report_lines.append(f"**总准确率: {acc['accuracy']}% ({acc['passed']}/{acc['total']})**\n")
|
|
|
|
|
|
report_lines.extend([
|
|
|
|
|
|
"| 分类 | 通过/总数 | 准确率 |",
|
|
|
|
|
|
"|------|---------|--------|",
|
|
|
|
|
|
])
|
|
|
|
|
|
for cat, stats in acc.get("category_stats", {}).items():
|
|
|
|
|
|
rate = stats["passed"] / stats["total"] * 100
|
|
|
|
|
|
report_lines.append(f"| {cat} | {stats['passed']}/{stats['total']} | {rate:.0f}% |")
|
|
|
|
|
|
else:
|
|
|
|
|
|
report_lines.append("*未运行精度测试*")
|
|
|
|
|
|
|
|
|
|
|
|
# 并发
|
|
|
|
|
|
report_lines.extend(["\n## 4. 并发性能", ""])
|
|
|
|
|
|
if concurrency and "concurrency_results" in concurrency:
|
|
|
|
|
|
report_lines.extend([
|
|
|
|
|
|
"| 并发数 | 总耗时(s) | 吞吐量(tokens/s) | 平均延迟(s) |",
|
|
|
|
|
|
"|-------|---------|----------------|-----------|",
|
|
|
|
|
|
])
|
|
|
|
|
|
for r in concurrency["concurrency_results"]:
|
|
|
|
|
|
report_lines.append(
|
|
|
|
|
|
f"| {r['concurrency']} | {r['total_time_s']} | {r['throughput_tokens_per_sec']} | {r['avg_latency_s']} |"
|
|
|
|
|
|
)
|
|
|
|
|
|
report_lines.append(f"\n> 注: {concurrency.get('note', '单GPU串行推理')}")
|
|
|
|
|
|
else:
|
|
|
|
|
|
report_lines.append("*未运行并发测试*")
|
|
|
|
|
|
|
|
|
|
|
|
# GPU 需求
|
|
|
|
|
|
report_lines.extend(["\n## 5. GPU 算力需求", ""])
|
|
|
|
|
|
if gpu_req:
|
|
|
|
|
|
report_lines.extend([
|
|
|
|
|
|
"| 精度 | 模型大小 | 最低显存 | 推荐显卡 |",
|
|
|
|
|
|
"|------|---------|---------|---------|",
|
|
|
|
|
|
])
|
|
|
|
|
|
for precision, info in gpu_req.get("precision_requirements", {}).items():
|
|
|
|
|
|
gpus = ", ".join(info["recommended_gpus"][:2])
|
|
|
|
|
|
report_lines.append(
|
|
|
|
|
|
f"| {precision} | {info['model_size_gb']}GB | {info['min_vram_gb']}GB | {gpus} |"
|
|
|
|
|
|
)
|
|
|
|
|
|
|
2026-03-16 13:09:39 +08:00
|
|
|
|
# 实际测试结论
|
2026-03-16 11:45:51 +08:00
|
|
|
|
report_lines.extend([
|
2026-03-16 13:09:39 +08:00
|
|
|
|
"\n## 6. 实际测试结论",
|
2026-03-16 11:45:51 +08:00
|
|
|
|
"",
|
2026-03-16 17:38:33 +08:00
|
|
|
|
"### RTX 3050 8GB 测试结果 (4-bit NF4 量化,纯 GPU)",
|
2026-03-16 13:09:39 +08:00
|
|
|
|
"",
|
|
|
|
|
|
"| 指标 | 结果 |",
|
|
|
|
|
|
"|------|------|",
|
2026-03-16 17:38:33 +08:00
|
|
|
|
"| GPU 显存占用 | 7.13 GB / 8 GB |",
|
|
|
|
|
|
"| 系统内存占用 | 7.59 GB |",
|
|
|
|
|
|
"| 推理速度 | 1.0-1.8 tokens/s |",
|
|
|
|
|
|
"| 精度 | 90% (9/10) |",
|
|
|
|
|
|
"| 输出质量 | 正常,回答准确 |",
|
2026-03-16 13:09:39 +08:00
|
|
|
|
"",
|
2026-03-16 17:38:33 +08:00
|
|
|
|
"### 注意事项",
|
2026-03-16 13:09:39 +08:00
|
|
|
|
"",
|
2026-03-16 17:38:33 +08:00
|
|
|
|
"1. **必须使用 4-bit NF4 量化**: device_map={\"\":0} 将模型全部放在 GPU 上",
|
|
|
|
|
|
"2. **必须关闭 thinking 模式**: enable_thinking=False,否则输出中包含思考过程且容易被截断",
|
|
|
|
|
|
"3. **显存接近上限**: 7.13GB / 8GB,长文本输入可能导致 OOM",
|
|
|
|
|
|
"4. **并发不可行**: 单 GPU 串行推理,吞吐量恒定 ~2 tokens/s",
|
2026-03-16 13:09:39 +08:00
|
|
|
|
"",
|
2026-03-16 17:38:33 +08:00
|
|
|
|
"### 部署建议",
|
2026-03-16 13:09:39 +08:00
|
|
|
|
"",
|
2026-03-16 17:38:33 +08:00
|
|
|
|
"1. **RTX 3050 8GB 可用于开发测试**,4-bit 量化后勉强可用",
|
|
|
|
|
|
"2. **推荐**: RTX 3060 12GB (INT8) 或 RTX 4060 Ti 16GB,有更大显存余量",
|
2026-03-16 13:09:39 +08:00
|
|
|
|
"3. **生产部署**: RTX 4090 24GB (FP16) 或 A100 40GB/80GB + vLLM",
|
2026-03-16 17:38:33 +08:00
|
|
|
|
"4. **替代方案**: 使用更小的模型如 Qwen2.5-3B 或 Qwen2.5-7B 在 8GB 显卡上更流畅运行",
|
2026-03-16 11:45:51 +08:00
|
|
|
|
])
|
|
|
|
|
|
|
|
|
|
|
|
# 保存报告
|
|
|
|
|
|
report_text = "\n".join(report_lines)
|
|
|
|
|
|
report_path = os.path.join(results_dir, "REPORT.md")
|
|
|
|
|
|
with open(report_path, "w", encoding="utf-8") as f:
|
|
|
|
|
|
f.write(report_text)
|
|
|
|
|
|
|
|
|
|
|
|
print(report_text)
|
|
|
|
|
|
print(f"\n\n报告已保存到 {report_path}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
|
os.chdir(os.path.dirname(os.path.abspath(__file__)) + "/../..")
|
|
|
|
|
|
generate_report()
|