feat: 更新 GPU 需求分析,添加实际测试结果和综合报告

- 根据 RTX 3050 8GB 实测结果更新 GPU 需求建议
- 添加 bitsandbytes 兼容性问题记录
- 生成包含实测数据的综合测试报告 REPORT.md

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-16 13:09:39 +08:00
parent 4ac406572e
commit 42db2b0ca9
4 changed files with 187 additions and 13 deletions

View File

@@ -29,16 +29,30 @@ GPU_REQUIREMENTS = {
"INT4 (NF4)": {
"model_size_gb": 5,
"min_vram_gb": 8,
"recommended_gpus": ["RTX 3050 8GB", "RTX 4060 8GB", "RTX 3060 12GB", "RTX 3070 8GB"],
"note": "适合显存有限的消费级显卡,有一定精度损失",
"recommended_gpus": ["RTX 4060 8GB", "RTX 3060 12GB", "RTX 3070 8GB"],
"note": "理论可行但 bitsandbytes 在 Windows 上兼容性差,不推荐",
},
},
"actual_test_results": {
"gpu": "NVIDIA GeForce RTX 3050 OEM 8GB",
"method": "FP16 + CPU offload (accelerate device_map=auto)",
"gpu_vram_used_gb": 3.91,
"ram_used_gb": 13.60,
"inference_speed_tokens_per_sec": 0.4,
"output_quality": "极差(乱码/重复输出)",
"conclusion": "RTX 3050 8GB 无法有效运行 Qwen3.5-9B显存不足导致大量层 offload 到 CPU推理极慢且输出质量不可用",
"issues": [
"bitsandbytes 4-bit 量化不支持 CPU offload8GB 显存装不下完整 4-bit 模型",
"bitsandbytes INT8 与 accelerate 版本不兼容Windows",
"FP16 + CPU offload 虽可加载但速度仅 0.4 tokens/s输出为乱码",
],
},
"deployment_recommendations": {
"开发测试": {
"gpu": "RTX 3050/4060 (8GB)",
"precision": "INT4",
"gpu": "RTX 3060 12GB / RTX 4060 Ti 16GB",
"precision": "INT8 或 INT4",
"concurrent": 1,
"cost_estimate": "~2000-3000 RMB (显卡)",
"cost_estimate": "~2500-4000 RMB (显卡)",
},
"小规模部署": {
"gpu": "RTX 4090 (24GB)",