feat: 更新 GPU 需求分析,添加实际测试结果和综合报告
- 根据 RTX 3050 8GB 实测结果更新 GPU 需求建议 - 添加 bitsandbytes 兼容性问题记录 - 生成包含实测数据的综合测试报告 REPORT.md Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -29,16 +29,30 @@ GPU_REQUIREMENTS = {
|
||||
"INT4 (NF4)": {
|
||||
"model_size_gb": 5,
|
||||
"min_vram_gb": 8,
|
||||
"recommended_gpus": ["RTX 3050 8GB", "RTX 4060 8GB", "RTX 3060 12GB", "RTX 3070 8GB"],
|
||||
"note": "适合显存有限的消费级显卡,有一定精度损失",
|
||||
"recommended_gpus": ["RTX 4060 8GB", "RTX 3060 12GB", "RTX 3070 8GB"],
|
||||
"note": "理论可行但 bitsandbytes 在 Windows 上兼容性差,不推荐",
|
||||
},
|
||||
},
|
||||
"actual_test_results": {
|
||||
"gpu": "NVIDIA GeForce RTX 3050 OEM 8GB",
|
||||
"method": "FP16 + CPU offload (accelerate device_map=auto)",
|
||||
"gpu_vram_used_gb": 3.91,
|
||||
"ram_used_gb": 13.60,
|
||||
"inference_speed_tokens_per_sec": 0.4,
|
||||
"output_quality": "极差(乱码/重复输出)",
|
||||
"conclusion": "RTX 3050 8GB 无法有效运行 Qwen3.5-9B,显存不足导致大量层 offload 到 CPU,推理极慢且输出质量不可用",
|
||||
"issues": [
|
||||
"bitsandbytes 4-bit 量化不支持 CPU offload,8GB 显存装不下完整 4-bit 模型",
|
||||
"bitsandbytes INT8 与 accelerate 版本不兼容(Windows)",
|
||||
"FP16 + CPU offload 虽可加载但速度仅 0.4 tokens/s,输出为乱码",
|
||||
],
|
||||
},
|
||||
"deployment_recommendations": {
|
||||
"开发测试": {
|
||||
"gpu": "RTX 3050/4060 (8GB)",
|
||||
"precision": "INT4",
|
||||
"gpu": "RTX 3060 12GB / RTX 4060 Ti 16GB",
|
||||
"precision": "INT8 或 INT4",
|
||||
"concurrent": 1,
|
||||
"cost_estimate": "~2000-3000 RMB (显卡)",
|
||||
"cost_estimate": "~2500-4000 RMB (显卡)",
|
||||
},
|
||||
"小规模部署": {
|
||||
"gpu": "RTX 4090 (24GB)",
|
||||
|
||||
Reference in New Issue
Block a user