feat: initialize qwen35-9b benchmark project structure

2026-03-16 10:17:56 +08:00
commit 2e43d8c226
6 changed files with 96 additions and 0 deletions
--- a/.env.example
+++ b/.env.example
@@ -0,0 +1,16 @@
+# Model Configuration
+MODEL_ID=Qwen/Qwen3.5-9B
+CACHE_DIR=./cache
+TORCH_DTYPE=float16
+
+# Hardware Configuration
+CUDA_VISIBLE_DEVICES=0
+
+# Benchmark Configuration
+WARMUP_RUNS=3
+BENCHMARK_RUNS=10
+MAX_LENGTH=8192
+
+# Optional: Proxy for model download (if needed)
+# HTTP_PROXY=http://proxy.company.com:8080
+# HTTPS_PROXY=http://proxy.company.com:8080
--- a/benchmarks/init.py
+++ b/benchmarks/init.py
--- a/config.py
+++ b/config.py
@@ -0,0 +1,52 @@
+"""Configuration for Qwen3.5-9B benchmark."""
+import os
+from dataclasses import dataclass, field
+from typing import Optional, List
+from dotenv import load_dotenv
+
+load_dotenv()
+
+@dataclass
+class ModelConfig:
+    """Model configuration."""
+    model_id: str = "Qwen/Qwen3.5-9B"
+    local_path: str = "./models/Qwen3.5-9B"
+    cache_dir: str = "./cache"
+    device: str = "cuda"
+    torch_dtype: str = "float16"  # float16, bfloat16, int8, int4
+    max_length: int = 8192
+    trust_remote_code: bool = True
+
+@dataclass
+class BenchmarkConfig:
+    """Benchmark configuration."""
+    # Test sequences of different lengths
+    input_lengths: List[int] = field(default_factory=lambda: [128, 512, 1024, 2048])
+    output_lengths: List[int] = field(default_factory=lambda: [128, 256, 512, 1024])
+
+    # Concurrency levels
+    concurrency_levels: List[int] = field(default_factory=lambda: [1, 2, 4, 8, 16])
+
+    # Number of warmup runs
+    warmup_runs: int = 3
+
+    # Number of benchmark runs
+    benchmark_runs: int = 10
+
+    # Batch sizes for throughput testing
+    batch_sizes: List[int] = field(default_factory=lambda: [1, 2, 4, 8, 16])
+
+    # Results output
+    results_dir: str = "./results"
+
+@dataclass
+class GPUConfig:
+    """GPU monitoring configuration."""
+    monitor_interval: float = 0.1  # seconds
+    log_memory: bool = True
+    log_utilization: bool = True
+
+# Global configs
+model_config = ModelConfig()
+benchmark_config = BenchmarkConfig()
+gpu_config = GPUConfig()
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,28 @@
+# Core dependencies
+torch>=2.0.0
+transformers>=4.40.0
+accelerate>=0.25.0
+modelscope>=1.11.0
+
+# Inference optimization
+vllm>=0.4.0
+auto-gptq>=0.7.0
+optimum>=1.18.0
+
+# Benchmarking
+pytest>=7.4.0
+pytest-asyncio>=0.21.0
+psutil>=5.9.0
+py3nvml>=0.2.7
+
+# Utilities
+pydantic>=2.0.0
+python-dotenv>=1.0.0
+tqdm>=4.66.0
+pandas>=2.0.0
+matplotlib>=3.7.0
+seaborn>=0.12.0
+
+# Optional for accuracy testing
+datasets>=2.14.0
+scikit-learn>=1.3.0
--- a/src/init.py
+++ b/src/init.py
--- a/tests/init.py
+++ b/tests/init.py