Files
qwen-test/vsp/qwen3.5-9b/gpu_requirements.py
16337 42db2b0ca9 feat: 更新 GPU 需求分析,添加实际测试结果和综合报告
- 根据 RTX 3050 8GB 实测结果更新 GPU 需求建议
- 添加 bitsandbytes 兼容性问题记录
- 生成包含实测数据的综合测试报告 REPORT.md

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-16 13:09:39 +08:00

105 lines
3.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""GPU 算力需求分析"""
import json
import os
# Qwen3.5-9B 不同精度下的显存需求估算
GPU_REQUIREMENTS = {
"model": "Qwen3.5-9B",
"parameters": "9B",
"precision_requirements": {
"FP32": {
"model_size_gb": 36,
"min_vram_gb": 40,
"recommended_gpus": ["A100 80GB", "H100 80GB"],
"note": "不推荐,显存占用过大",
},
"FP16/BF16": {
"model_size_gb": 18,
"min_vram_gb": 22,
"recommended_gpus": ["A100 40GB", "RTX 4090 24GB", "RTX A6000 48GB", "V100 32GB"],
"note": "标准推理精度,推荐用于生产环境",
},
"INT8": {
"model_size_gb": 9,
"min_vram_gb": 12,
"recommended_gpus": ["RTX 4070 Ti 16GB", "RTX 3090 24GB", "T4 16GB", "RTX 4080 16GB"],
"note": "轻微精度损失,性价比高",
},
"INT4 (NF4)": {
"model_size_gb": 5,
"min_vram_gb": 8,
"recommended_gpus": ["RTX 4060 8GB", "RTX 3060 12GB", "RTX 3070 8GB"],
"note": "理论可行但 bitsandbytes 在 Windows 上兼容性差,不推荐",
},
},
"actual_test_results": {
"gpu": "NVIDIA GeForce RTX 3050 OEM 8GB",
"method": "FP16 + CPU offload (accelerate device_map=auto)",
"gpu_vram_used_gb": 3.91,
"ram_used_gb": 13.60,
"inference_speed_tokens_per_sec": 0.4,
"output_quality": "极差(乱码/重复输出)",
"conclusion": "RTX 3050 8GB 无法有效运行 Qwen3.5-9B显存不足导致大量层 offload 到 CPU推理极慢且输出质量不可用",
"issues": [
"bitsandbytes 4-bit 量化不支持 CPU offload8GB 显存装不下完整 4-bit 模型",
"bitsandbytes INT8 与 accelerate 版本不兼容Windows",
"FP16 + CPU offload 虽可加载但速度仅 0.4 tokens/s输出为乱码",
],
},
"deployment_recommendations": {
"开发测试": {
"gpu": "RTX 3060 12GB / RTX 4060 Ti 16GB",
"precision": "INT8 或 INT4",
"concurrent": 1,
"cost_estimate": "~2500-4000 RMB (显卡)",
},
"小规模部署": {
"gpu": "RTX 4090 (24GB)",
"precision": "FP16",
"concurrent": "2-4",
"cost_estimate": "~12000-15000 RMB (显卡)",
},
"生产环境": {
"gpu": "A100 40GB / H100 80GB",
"precision": "FP16/BF16",
"concurrent": "8-32 (vLLM)",
"cost_estimate": "~60000-200000 RMB (显卡) 或云服务按需",
},
},
}
def analyze_gpu_requirements():
"""输出 GPU 需求分析"""
print("=" * 60)
print("Qwen3.5-9B GPU 算力需求分析")
print("=" * 60)
for precision, info in GPU_REQUIREMENTS["precision_requirements"].items():
print(f"\n{precision}:")
print(f" 模型大小: ~{info['model_size_gb']} GB")
print(f" 最低显存: {info['min_vram_gb']} GB")
print(f" 推荐显卡: {', '.join(info['recommended_gpus'])}")
print(f" 备注: {info['note']}")
print(f"\n{'='*60}")
print("部署方案推荐")
print(f"{'='*60}")
for scenario, info in GPU_REQUIREMENTS["deployment_recommendations"].items():
print(f"\n{scenario}:")
for k, v in info.items():
print(f" {k}: {v}")
# 保存
output_dir = "vsp/qwen3.5-9b/results"
os.makedirs(output_dir, exist_ok=True)
path = os.path.join(output_dir, "gpu_requirements.json")
with open(path, "w", encoding="utf-8") as f:
json.dump(GPU_REQUIREMENTS, f, ensure_ascii=False, indent=2)
print(f"\n结果已保存到 {path}")
if __name__ == "__main__":
analyze_gpu_requirements()