feat: initialize qwen35-9b benchmark project structure
This commit is contained in:
16
.env.example
Normal file
16
.env.example
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
# Model Configuration
|
||||||
|
MODEL_ID=Qwen/Qwen3.5-9B
|
||||||
|
CACHE_DIR=./cache
|
||||||
|
TORCH_DTYPE=float16
|
||||||
|
|
||||||
|
# Hardware Configuration
|
||||||
|
CUDA_VISIBLE_DEVICES=0
|
||||||
|
|
||||||
|
# Benchmark Configuration
|
||||||
|
WARMUP_RUNS=3
|
||||||
|
BENCHMARK_RUNS=10
|
||||||
|
MAX_LENGTH=8192
|
||||||
|
|
||||||
|
# Optional: Proxy for model download (if needed)
|
||||||
|
# HTTP_PROXY=http://proxy.company.com:8080
|
||||||
|
# HTTPS_PROXY=http://proxy.company.com:8080
|
||||||
0
benchmarks/__init__.py
Normal file
0
benchmarks/__init__.py
Normal file
52
config.py
Normal file
52
config.py
Normal file
@@ -0,0 +1,52 @@
|
|||||||
|
"""Configuration for Qwen3.5-9B benchmark."""
|
||||||
|
import os
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from typing import Optional, List
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ModelConfig:
|
||||||
|
"""Model configuration."""
|
||||||
|
model_id: str = "Qwen/Qwen3.5-9B"
|
||||||
|
local_path: str = "./models/Qwen3.5-9B"
|
||||||
|
cache_dir: str = "./cache"
|
||||||
|
device: str = "cuda"
|
||||||
|
torch_dtype: str = "float16" # float16, bfloat16, int8, int4
|
||||||
|
max_length: int = 8192
|
||||||
|
trust_remote_code: bool = True
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class BenchmarkConfig:
|
||||||
|
"""Benchmark configuration."""
|
||||||
|
# Test sequences of different lengths
|
||||||
|
input_lengths: List[int] = field(default_factory=lambda: [128, 512, 1024, 2048])
|
||||||
|
output_lengths: List[int] = field(default_factory=lambda: [128, 256, 512, 1024])
|
||||||
|
|
||||||
|
# Concurrency levels
|
||||||
|
concurrency_levels: List[int] = field(default_factory=lambda: [1, 2, 4, 8, 16])
|
||||||
|
|
||||||
|
# Number of warmup runs
|
||||||
|
warmup_runs: int = 3
|
||||||
|
|
||||||
|
# Number of benchmark runs
|
||||||
|
benchmark_runs: int = 10
|
||||||
|
|
||||||
|
# Batch sizes for throughput testing
|
||||||
|
batch_sizes: List[int] = field(default_factory=lambda: [1, 2, 4, 8, 16])
|
||||||
|
|
||||||
|
# Results output
|
||||||
|
results_dir: str = "./results"
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class GPUConfig:
|
||||||
|
"""GPU monitoring configuration."""
|
||||||
|
monitor_interval: float = 0.1 # seconds
|
||||||
|
log_memory: bool = True
|
||||||
|
log_utilization: bool = True
|
||||||
|
|
||||||
|
# Global configs
|
||||||
|
model_config = ModelConfig()
|
||||||
|
benchmark_config = BenchmarkConfig()
|
||||||
|
gpu_config = GPUConfig()
|
||||||
28
requirements.txt
Normal file
28
requirements.txt
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
# Core dependencies
|
||||||
|
torch>=2.0.0
|
||||||
|
transformers>=4.40.0
|
||||||
|
accelerate>=0.25.0
|
||||||
|
modelscope>=1.11.0
|
||||||
|
|
||||||
|
# Inference optimization
|
||||||
|
vllm>=0.4.0
|
||||||
|
auto-gptq>=0.7.0
|
||||||
|
optimum>=1.18.0
|
||||||
|
|
||||||
|
# Benchmarking
|
||||||
|
pytest>=7.4.0
|
||||||
|
pytest-asyncio>=0.21.0
|
||||||
|
psutil>=5.9.0
|
||||||
|
py3nvml>=0.2.7
|
||||||
|
|
||||||
|
# Utilities
|
||||||
|
pydantic>=2.0.0
|
||||||
|
python-dotenv>=1.0.0
|
||||||
|
tqdm>=4.66.0
|
||||||
|
pandas>=2.0.0
|
||||||
|
matplotlib>=3.7.0
|
||||||
|
seaborn>=0.12.0
|
||||||
|
|
||||||
|
# Optional for accuracy testing
|
||||||
|
datasets>=2.14.0
|
||||||
|
scikit-learn>=1.3.0
|
||||||
0
src/__init__.py
Normal file
0
src/__init__.py
Normal file
0
tests/__init__.py
Normal file
0
tests/__init__.py
Normal file
Reference in New Issue
Block a user