"""GPU 算力需求分析""" import json import os # Qwen3.5-9B 不同精度下的显存需求估算 GPU_REQUIREMENTS = { "model": "Qwen3.5-9B", "parameters": "9B", "precision_requirements": { "FP32": { "model_size_gb": 36, "min_vram_gb": 40, "recommended_gpus": ["A100 80GB", "H100 80GB"], "note": "不推荐,显存占用过大", }, "FP16/BF16": { "model_size_gb": 18, "min_vram_gb": 22, "recommended_gpus": ["A100 40GB", "RTX 4090 24GB", "RTX A6000 48GB", "V100 32GB"], "note": "标准推理精度,推荐用于生产环境", }, "INT8": { "model_size_gb": 9, "min_vram_gb": 12, "recommended_gpus": ["RTX 4070 Ti 16GB", "RTX 3090 24GB", "T4 16GB", "RTX 4080 16GB"], "note": "轻微精度损失,性价比高", }, "INT4 (NF4)": { "model_size_gb": 5, "min_vram_gb": 8, "recommended_gpus": ["RTX 3050 8GB", "RTX 4060 8GB", "RTX 3060 12GB", "RTX 3070 8GB"], "note": "适合显存有限的消费级显卡,有一定精度损失", }, }, "deployment_recommendations": { "开发测试": { "gpu": "RTX 3050/4060 (8GB)", "precision": "INT4", "concurrent": 1, "cost_estimate": "~2000-3000 RMB (显卡)", }, "小规模部署": { "gpu": "RTX 4090 (24GB)", "precision": "FP16", "concurrent": "2-4", "cost_estimate": "~12000-15000 RMB (显卡)", }, "生产环境": { "gpu": "A100 40GB / H100 80GB", "precision": "FP16/BF16", "concurrent": "8-32 (vLLM)", "cost_estimate": "~60000-200000 RMB (显卡) 或云服务按需", }, }, } def analyze_gpu_requirements(): """输出 GPU 需求分析""" print("=" * 60) print("Qwen3.5-9B GPU 算力需求分析") print("=" * 60) for precision, info in GPU_REQUIREMENTS["precision_requirements"].items(): print(f"\n{precision}:") print(f" 模型大小: ~{info['model_size_gb']} GB") print(f" 最低显存: {info['min_vram_gb']} GB") print(f" 推荐显卡: {', '.join(info['recommended_gpus'])}") print(f" 备注: {info['note']}") print(f"\n{'='*60}") print("部署方案推荐") print(f"{'='*60}") for scenario, info in GPU_REQUIREMENTS["deployment_recommendations"].items(): print(f"\n{scenario}:") for k, v in info.items(): print(f" {k}: {v}") # 保存 output_dir = "vsp/qwen3.5-9b/results" os.makedirs(output_dir, exist_ok=True) path = os.path.join(output_dir, "gpu_requirements.json") with open(path, "w", encoding="utf-8") as f: json.dump(GPU_REQUIREMENTS, f, ensure_ascii=False, indent=2) print(f"\n结果已保存到 {path}") if __name__ == "__main__": analyze_gpu_requirements()