- 根据 RTX 3050 8GB 实测结果更新 GPU 需求建议 - 添加 bitsandbytes 兼容性问题记录 - 生成包含实测数据的综合测试报告 REPORT.md Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
105 lines
3.7 KiB
Python
105 lines
3.7 KiB
Python
"""GPU 算力需求分析"""
|
||
import json
|
||
import os
|
||
|
||
|
||
# Qwen3.5-9B 不同精度下的显存需求估算
|
||
GPU_REQUIREMENTS = {
|
||
"model": "Qwen3.5-9B",
|
||
"parameters": "9B",
|
||
"precision_requirements": {
|
||
"FP32": {
|
||
"model_size_gb": 36,
|
||
"min_vram_gb": 40,
|
||
"recommended_gpus": ["A100 80GB", "H100 80GB"],
|
||
"note": "不推荐,显存占用过大",
|
||
},
|
||
"FP16/BF16": {
|
||
"model_size_gb": 18,
|
||
"min_vram_gb": 22,
|
||
"recommended_gpus": ["A100 40GB", "RTX 4090 24GB", "RTX A6000 48GB", "V100 32GB"],
|
||
"note": "标准推理精度,推荐用于生产环境",
|
||
},
|
||
"INT8": {
|
||
"model_size_gb": 9,
|
||
"min_vram_gb": 12,
|
||
"recommended_gpus": ["RTX 4070 Ti 16GB", "RTX 3090 24GB", "T4 16GB", "RTX 4080 16GB"],
|
||
"note": "轻微精度损失,性价比高",
|
||
},
|
||
"INT4 (NF4)": {
|
||
"model_size_gb": 5,
|
||
"min_vram_gb": 8,
|
||
"recommended_gpus": ["RTX 4060 8GB", "RTX 3060 12GB", "RTX 3070 8GB"],
|
||
"note": "理论可行但 bitsandbytes 在 Windows 上兼容性差,不推荐",
|
||
},
|
||
},
|
||
"actual_test_results": {
|
||
"gpu": "NVIDIA GeForce RTX 3050 OEM 8GB",
|
||
"method": "FP16 + CPU offload (accelerate device_map=auto)",
|
||
"gpu_vram_used_gb": 3.91,
|
||
"ram_used_gb": 13.60,
|
||
"inference_speed_tokens_per_sec": 0.4,
|
||
"output_quality": "极差(乱码/重复输出)",
|
||
"conclusion": "RTX 3050 8GB 无法有效运行 Qwen3.5-9B,显存不足导致大量层 offload 到 CPU,推理极慢且输出质量不可用",
|
||
"issues": [
|
||
"bitsandbytes 4-bit 量化不支持 CPU offload,8GB 显存装不下完整 4-bit 模型",
|
||
"bitsandbytes INT8 与 accelerate 版本不兼容(Windows)",
|
||
"FP16 + CPU offload 虽可加载但速度仅 0.4 tokens/s,输出为乱码",
|
||
],
|
||
},
|
||
"deployment_recommendations": {
|
||
"开发测试": {
|
||
"gpu": "RTX 3060 12GB / RTX 4060 Ti 16GB",
|
||
"precision": "INT8 或 INT4",
|
||
"concurrent": 1,
|
||
"cost_estimate": "~2500-4000 RMB (显卡)",
|
||
},
|
||
"小规模部署": {
|
||
"gpu": "RTX 4090 (24GB)",
|
||
"precision": "FP16",
|
||
"concurrent": "2-4",
|
||
"cost_estimate": "~12000-15000 RMB (显卡)",
|
||
},
|
||
"生产环境": {
|
||
"gpu": "A100 40GB / H100 80GB",
|
||
"precision": "FP16/BF16",
|
||
"concurrent": "8-32 (vLLM)",
|
||
"cost_estimate": "~60000-200000 RMB (显卡) 或云服务按需",
|
||
},
|
||
},
|
||
}
|
||
|
||
|
||
def analyze_gpu_requirements():
|
||
"""输出 GPU 需求分析"""
|
||
print("=" * 60)
|
||
print("Qwen3.5-9B GPU 算力需求分析")
|
||
print("=" * 60)
|
||
|
||
for precision, info in GPU_REQUIREMENTS["precision_requirements"].items():
|
||
print(f"\n{precision}:")
|
||
print(f" 模型大小: ~{info['model_size_gb']} GB")
|
||
print(f" 最低显存: {info['min_vram_gb']} GB")
|
||
print(f" 推荐显卡: {', '.join(info['recommended_gpus'])}")
|
||
print(f" 备注: {info['note']}")
|
||
|
||
print(f"\n{'='*60}")
|
||
print("部署方案推荐")
|
||
print(f"{'='*60}")
|
||
for scenario, info in GPU_REQUIREMENTS["deployment_recommendations"].items():
|
||
print(f"\n{scenario}:")
|
||
for k, v in info.items():
|
||
print(f" {k}: {v}")
|
||
|
||
# 保存
|
||
output_dir = "vsp/qwen3.5-9b/results"
|
||
os.makedirs(output_dir, exist_ok=True)
|
||
path = os.path.join(output_dir, "gpu_requirements.json")
|
||
with open(path, "w", encoding="utf-8") as f:
|
||
json.dump(GPU_REQUIREMENTS, f, ensure_ascii=False, indent=2)
|
||
print(f"\n结果已保存到 {path}")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
analyze_gpu_requirements()
|