"""共享模型加载工具 - 统一加载配置""" import os import sys import glob import torch from transformers import AutoModelForCausalLM, AutoTokenizer # 修复 Windows GBK 编码问题 sys.stdout.reconfigure(encoding='utf-8', errors='replace') sys.stderr.reconfigure(encoding='utf-8', errors='replace') def get_model_path(): """获取本地模型路径""" paths = glob.glob("vsp/qwen3.5-9b/model/**/config.json", recursive=True) if paths: return os.path.dirname(paths[0]) return "Qwen/Qwen3.5-9B" def load_model(): """加载模型 (FP16 + GPU/CPU offload) RTX 3050 8GB VRAM 不够放完整模型,使用 FP16 并将部分层 offload 到 CPU。 """ model_path = get_model_path() print(f"模型路径: {model_path}") tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) max_memory = {0: "6GiB", "cpu": "24GiB"} model = AutoModelForCausalLM.from_pretrained( model_path, torch_dtype=torch.float16, device_map="auto", max_memory=max_memory, offload_folder="vsp/qwen3.5-9b/offload", trust_remote_code=True, ) return model, tokenizer