feat: initialize qwen35-9b benchmark project structure

This commit is contained in:
2026-03-16 10:17:56 +08:00
commit 2e43d8c226
6 changed files with 96 additions and 0 deletions

16
.env.example Normal file
View File

@@ -0,0 +1,16 @@
# Model Configuration
MODEL_ID=Qwen/Qwen3.5-9B
CACHE_DIR=./cache
TORCH_DTYPE=float16
# Hardware Configuration
CUDA_VISIBLE_DEVICES=0
# Benchmark Configuration
WARMUP_RUNS=3
BENCHMARK_RUNS=10
MAX_LENGTH=8192
# Optional: Proxy for model download (if needed)
# HTTP_PROXY=http://proxy.company.com:8080
# HTTPS_PROXY=http://proxy.company.com:8080

0
benchmarks/__init__.py Normal file
View File

52
config.py Normal file
View File

@@ -0,0 +1,52 @@
"""Configuration for Qwen3.5-9B benchmark."""
import os
from dataclasses import dataclass, field
from typing import Optional, List
from dotenv import load_dotenv
load_dotenv()
@dataclass
class ModelConfig:
"""Model configuration."""
model_id: str = "Qwen/Qwen3.5-9B"
local_path: str = "./models/Qwen3.5-9B"
cache_dir: str = "./cache"
device: str = "cuda"
torch_dtype: str = "float16" # float16, bfloat16, int8, int4
max_length: int = 8192
trust_remote_code: bool = True
@dataclass
class BenchmarkConfig:
"""Benchmark configuration."""
# Test sequences of different lengths
input_lengths: List[int] = field(default_factory=lambda: [128, 512, 1024, 2048])
output_lengths: List[int] = field(default_factory=lambda: [128, 256, 512, 1024])
# Concurrency levels
concurrency_levels: List[int] = field(default_factory=lambda: [1, 2, 4, 8, 16])
# Number of warmup runs
warmup_runs: int = 3
# Number of benchmark runs
benchmark_runs: int = 10
# Batch sizes for throughput testing
batch_sizes: List[int] = field(default_factory=lambda: [1, 2, 4, 8, 16])
# Results output
results_dir: str = "./results"
@dataclass
class GPUConfig:
"""GPU monitoring configuration."""
monitor_interval: float = 0.1 # seconds
log_memory: bool = True
log_utilization: bool = True
# Global configs
model_config = ModelConfig()
benchmark_config = BenchmarkConfig()
gpu_config = GPUConfig()

28
requirements.txt Normal file
View File

@@ -0,0 +1,28 @@
# Core dependencies
torch>=2.0.0
transformers>=4.40.0
accelerate>=0.25.0
modelscope>=1.11.0
# Inference optimization
vllm>=0.4.0
auto-gptq>=0.7.0
optimum>=1.18.0
# Benchmarking
pytest>=7.4.0
pytest-asyncio>=0.21.0
psutil>=5.9.0
py3nvml>=0.2.7
# Utilities
pydantic>=2.0.0
python-dotenv>=1.0.0
tqdm>=4.66.0
pandas>=2.0.0
matplotlib>=3.7.0
seaborn>=0.12.0
# Optional for accuracy testing
datasets>=2.14.0
scikit-learn>=1.3.0

0
src/__init__.py Normal file
View File

0
tests/__init__.py Normal file
View File