91 lines
3.0 KiB
Python
91 lines
3.0 KiB
Python
"""GPU 算力需求分析"""
|
|
import json
|
|
import os
|
|
|
|
|
|
# Qwen3.5-9B 不同精度下的显存需求估算
|
|
GPU_REQUIREMENTS = {
|
|
"model": "Qwen3.5-9B",
|
|
"parameters": "9B",
|
|
"precision_requirements": {
|
|
"FP32": {
|
|
"model_size_gb": 36,
|
|
"min_vram_gb": 40,
|
|
"recommended_gpus": ["A100 80GB", "H100 80GB"],
|
|
"note": "不推荐,显存占用过大",
|
|
},
|
|
"FP16/BF16": {
|
|
"model_size_gb": 18,
|
|
"min_vram_gb": 22,
|
|
"recommended_gpus": ["A100 40GB", "RTX 4090 24GB", "RTX A6000 48GB", "V100 32GB"],
|
|
"note": "标准推理精度,推荐用于生产环境",
|
|
},
|
|
"INT8": {
|
|
"model_size_gb": 9,
|
|
"min_vram_gb": 12,
|
|
"recommended_gpus": ["RTX 4070 Ti 16GB", "RTX 3090 24GB", "T4 16GB", "RTX 4080 16GB"],
|
|
"note": "轻微精度损失,性价比高",
|
|
},
|
|
"INT4 (NF4)": {
|
|
"model_size_gb": 5,
|
|
"min_vram_gb": 8,
|
|
"recommended_gpus": ["RTX 3050 8GB", "RTX 4060 8GB", "RTX 3060 12GB", "RTX 3070 8GB"],
|
|
"note": "适合显存有限的消费级显卡,有一定精度损失",
|
|
},
|
|
},
|
|
"deployment_recommendations": {
|
|
"开发测试": {
|
|
"gpu": "RTX 3050/4060 (8GB)",
|
|
"precision": "INT4",
|
|
"concurrent": 1,
|
|
"cost_estimate": "~2000-3000 RMB (显卡)",
|
|
},
|
|
"小规模部署": {
|
|
"gpu": "RTX 4090 (24GB)",
|
|
"precision": "FP16",
|
|
"concurrent": "2-4",
|
|
"cost_estimate": "~12000-15000 RMB (显卡)",
|
|
},
|
|
"生产环境": {
|
|
"gpu": "A100 40GB / H100 80GB",
|
|
"precision": "FP16/BF16",
|
|
"concurrent": "8-32 (vLLM)",
|
|
"cost_estimate": "~60000-200000 RMB (显卡) 或云服务按需",
|
|
},
|
|
},
|
|
}
|
|
|
|
|
|
def analyze_gpu_requirements():
|
|
"""输出 GPU 需求分析"""
|
|
print("=" * 60)
|
|
print("Qwen3.5-9B GPU 算力需求分析")
|
|
print("=" * 60)
|
|
|
|
for precision, info in GPU_REQUIREMENTS["precision_requirements"].items():
|
|
print(f"\n{precision}:")
|
|
print(f" 模型大小: ~{info['model_size_gb']} GB")
|
|
print(f" 最低显存: {info['min_vram_gb']} GB")
|
|
print(f" 推荐显卡: {', '.join(info['recommended_gpus'])}")
|
|
print(f" 备注: {info['note']}")
|
|
|
|
print(f"\n{'='*60}")
|
|
print("部署方案推荐")
|
|
print(f"{'='*60}")
|
|
for scenario, info in GPU_REQUIREMENTS["deployment_recommendations"].items():
|
|
print(f"\n{scenario}:")
|
|
for k, v in info.items():
|
|
print(f" {k}: {v}")
|
|
|
|
# 保存
|
|
output_dir = "vsp/qwen3.5-9b/results"
|
|
os.makedirs(output_dir, exist_ok=True)
|
|
path = os.path.join(output_dir, "gpu_requirements.json")
|
|
with open(path, "w", encoding="utf-8") as f:
|
|
json.dump(GPU_REQUIREMENTS, f, ensure_ascii=False, indent=2)
|
|
print(f"\n结果已保存到 {path}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
analyze_gpu_requirements()
|