- 根据 RTX 3050 8GB 实测结果更新 GPU 需求建议 - 添加 bitsandbytes 兼容性问题记录 - 生成包含实测数据的综合测试报告 REPORT.md Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
81 lines
2.3 KiB
JSON
81 lines
2.3 KiB
JSON
{
|
||
"model": "Qwen3.5-9B",
|
||
"parameters": "9B",
|
||
"precision_requirements": {
|
||
"FP32": {
|
||
"model_size_gb": 36,
|
||
"min_vram_gb": 40,
|
||
"recommended_gpus": [
|
||
"A100 80GB",
|
||
"H100 80GB"
|
||
],
|
||
"note": "不推荐,显存占用过大"
|
||
},
|
||
"FP16/BF16": {
|
||
"model_size_gb": 18,
|
||
"min_vram_gb": 22,
|
||
"recommended_gpus": [
|
||
"A100 40GB",
|
||
"RTX 4090 24GB",
|
||
"RTX A6000 48GB",
|
||
"V100 32GB"
|
||
],
|
||
"note": "标准推理精度,推荐用于生产环境"
|
||
},
|
||
"INT8": {
|
||
"model_size_gb": 9,
|
||
"min_vram_gb": 12,
|
||
"recommended_gpus": [
|
||
"RTX 4070 Ti 16GB",
|
||
"RTX 3090 24GB",
|
||
"T4 16GB",
|
||
"RTX 4080 16GB"
|
||
],
|
||
"note": "轻微精度损失,性价比高"
|
||
},
|
||
"INT4 (NF4)": {
|
||
"model_size_gb": 5,
|
||
"min_vram_gb": 8,
|
||
"recommended_gpus": [
|
||
"RTX 4060 8GB",
|
||
"RTX 3060 12GB",
|
||
"RTX 3070 8GB"
|
||
],
|
||
"note": "理论可行但 bitsandbytes 在 Windows 上兼容性差,不推荐"
|
||
}
|
||
},
|
||
"actual_test_results": {
|
||
"gpu": "NVIDIA GeForce RTX 3050 OEM 8GB",
|
||
"method": "FP16 + CPU offload (accelerate device_map=auto)",
|
||
"gpu_vram_used_gb": 3.91,
|
||
"ram_used_gb": 13.6,
|
||
"inference_speed_tokens_per_sec": 0.4,
|
||
"output_quality": "极差(乱码/重复输出)",
|
||
"conclusion": "RTX 3050 8GB 无法有效运行 Qwen3.5-9B,显存不足导致大量层 offload 到 CPU,推理极慢且输出质量不可用",
|
||
"issues": [
|
||
"bitsandbytes 4-bit 量化不支持 CPU offload,8GB 显存装不下完整 4-bit 模型",
|
||
"bitsandbytes INT8 与 accelerate 版本不兼容(Windows)",
|
||
"FP16 + CPU offload 虽可加载但速度仅 0.4 tokens/s,输出为乱码"
|
||
]
|
||
},
|
||
"deployment_recommendations": {
|
||
"开发测试": {
|
||
"gpu": "RTX 3060 12GB / RTX 4060 Ti 16GB",
|
||
"precision": "INT8 或 INT4",
|
||
"concurrent": 1,
|
||
"cost_estimate": "~2500-4000 RMB (显卡)"
|
||
},
|
||
"小规模部署": {
|
||
"gpu": "RTX 4090 (24GB)",
|
||
"precision": "FP16",
|
||
"concurrent": "2-4",
|
||
"cost_estimate": "~12000-15000 RMB (显卡)"
|
||
},
|
||
"生产环境": {
|
||
"gpu": "A100 40GB / H100 80GB",
|
||
"precision": "FP16/BF16",
|
||
"concurrent": "8-32 (vLLM)",
|
||
"cost_estimate": "~60000-200000 RMB (显卡) 或云服务按需"
|
||
}
|
||
}
|
||
} |