From 2e43d8c22646b8b0265c660b157c4e5e4f8e5bde Mon Sep 17 00:00:00 2001 From: 16337 <1633794139@qq.com> Date: Mon, 16 Mar 2026 10:17:56 +0800 Subject: [PATCH] feat: initialize qwen35-9b benchmark project structure --- .env.example | 16 +++++++++++++ benchmarks/__init__.py | 0 config.py | 52 ++++++++++++++++++++++++++++++++++++++++++ requirements.txt | 28 +++++++++++++++++++++++ src/__init__.py | 0 tests/__init__.py | 0 6 files changed, 96 insertions(+) create mode 100644 .env.example create mode 100644 benchmarks/__init__.py create mode 100644 config.py create mode 100644 requirements.txt create mode 100644 src/__init__.py create mode 100644 tests/__init__.py diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..eb444ff --- /dev/null +++ b/.env.example @@ -0,0 +1,16 @@ +# Model Configuration +MODEL_ID=Qwen/Qwen3.5-9B +CACHE_DIR=./cache +TORCH_DTYPE=float16 + +# Hardware Configuration +CUDA_VISIBLE_DEVICES=0 + +# Benchmark Configuration +WARMUP_RUNS=3 +BENCHMARK_RUNS=10 +MAX_LENGTH=8192 + +# Optional: Proxy for model download (if needed) +# HTTP_PROXY=http://proxy.company.com:8080 +# HTTPS_PROXY=http://proxy.company.com:8080 \ No newline at end of file diff --git a/benchmarks/__init__.py b/benchmarks/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/config.py b/config.py new file mode 100644 index 0000000..fc80327 --- /dev/null +++ b/config.py @@ -0,0 +1,52 @@ +"""Configuration for Qwen3.5-9B benchmark.""" +import os +from dataclasses import dataclass, field +from typing import Optional, List +from dotenv import load_dotenv + +load_dotenv() + +@dataclass +class ModelConfig: + """Model configuration.""" + model_id: str = "Qwen/Qwen3.5-9B" + local_path: str = "./models/Qwen3.5-9B" + cache_dir: str = "./cache" + device: str = "cuda" + torch_dtype: str = "float16" # float16, bfloat16, int8, int4 + max_length: int = 8192 + trust_remote_code: bool = True + +@dataclass +class BenchmarkConfig: + """Benchmark configuration.""" + # Test sequences of different lengths + input_lengths: List[int] = field(default_factory=lambda: [128, 512, 1024, 2048]) + output_lengths: List[int] = field(default_factory=lambda: [128, 256, 512, 1024]) + + # Concurrency levels + concurrency_levels: List[int] = field(default_factory=lambda: [1, 2, 4, 8, 16]) + + # Number of warmup runs + warmup_runs: int = 3 + + # Number of benchmark runs + benchmark_runs: int = 10 + + # Batch sizes for throughput testing + batch_sizes: List[int] = field(default_factory=lambda: [1, 2, 4, 8, 16]) + + # Results output + results_dir: str = "./results" + +@dataclass +class GPUConfig: + """GPU monitoring configuration.""" + monitor_interval: float = 0.1 # seconds + log_memory: bool = True + log_utilization: bool = True + +# Global configs +model_config = ModelConfig() +benchmark_config = BenchmarkConfig() +gpu_config = GPUConfig() \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..b4c98c4 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,28 @@ +# Core dependencies +torch>=2.0.0 +transformers>=4.40.0 +accelerate>=0.25.0 +modelscope>=1.11.0 + +# Inference optimization +vllm>=0.4.0 +auto-gptq>=0.7.0 +optimum>=1.18.0 + +# Benchmarking +pytest>=7.4.0 +pytest-asyncio>=0.21.0 +psutil>=5.9.0 +py3nvml>=0.2.7 + +# Utilities +pydantic>=2.0.0 +python-dotenv>=1.0.0 +tqdm>=4.66.0 +pandas>=2.0.0 +matplotlib>=3.7.0 +seaborn>=0.12.0 + +# Optional for accuracy testing +datasets>=2.14.0 +scikit-learn>=1.3.0 \ No newline at end of file diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29