feat: initialize qwen35-9b benchmark project structure
This commit is contained in:
16
.env.example
Normal file
16
.env.example
Normal file
@@ -0,0 +1,16 @@
|
||||
# Model Configuration
|
||||
MODEL_ID=Qwen/Qwen3.5-9B
|
||||
CACHE_DIR=./cache
|
||||
TORCH_DTYPE=float16
|
||||
|
||||
# Hardware Configuration
|
||||
CUDA_VISIBLE_DEVICES=0
|
||||
|
||||
# Benchmark Configuration
|
||||
WARMUP_RUNS=3
|
||||
BENCHMARK_RUNS=10
|
||||
MAX_LENGTH=8192
|
||||
|
||||
# Optional: Proxy for model download (if needed)
|
||||
# HTTP_PROXY=http://proxy.company.com:8080
|
||||
# HTTPS_PROXY=http://proxy.company.com:8080
|
||||
0
benchmarks/__init__.py
Normal file
0
benchmarks/__init__.py
Normal file
52
config.py
Normal file
52
config.py
Normal file
@@ -0,0 +1,52 @@
|
||||
"""Configuration for Qwen3.5-9B benchmark."""
|
||||
import os
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Optional, List
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
@dataclass
|
||||
class ModelConfig:
|
||||
"""Model configuration."""
|
||||
model_id: str = "Qwen/Qwen3.5-9B"
|
||||
local_path: str = "./models/Qwen3.5-9B"
|
||||
cache_dir: str = "./cache"
|
||||
device: str = "cuda"
|
||||
torch_dtype: str = "float16" # float16, bfloat16, int8, int4
|
||||
max_length: int = 8192
|
||||
trust_remote_code: bool = True
|
||||
|
||||
@dataclass
|
||||
class BenchmarkConfig:
|
||||
"""Benchmark configuration."""
|
||||
# Test sequences of different lengths
|
||||
input_lengths: List[int] = field(default_factory=lambda: [128, 512, 1024, 2048])
|
||||
output_lengths: List[int] = field(default_factory=lambda: [128, 256, 512, 1024])
|
||||
|
||||
# Concurrency levels
|
||||
concurrency_levels: List[int] = field(default_factory=lambda: [1, 2, 4, 8, 16])
|
||||
|
||||
# Number of warmup runs
|
||||
warmup_runs: int = 3
|
||||
|
||||
# Number of benchmark runs
|
||||
benchmark_runs: int = 10
|
||||
|
||||
# Batch sizes for throughput testing
|
||||
batch_sizes: List[int] = field(default_factory=lambda: [1, 2, 4, 8, 16])
|
||||
|
||||
# Results output
|
||||
results_dir: str = "./results"
|
||||
|
||||
@dataclass
|
||||
class GPUConfig:
|
||||
"""GPU monitoring configuration."""
|
||||
monitor_interval: float = 0.1 # seconds
|
||||
log_memory: bool = True
|
||||
log_utilization: bool = True
|
||||
|
||||
# Global configs
|
||||
model_config = ModelConfig()
|
||||
benchmark_config = BenchmarkConfig()
|
||||
gpu_config = GPUConfig()
|
||||
28
requirements.txt
Normal file
28
requirements.txt
Normal file
@@ -0,0 +1,28 @@
|
||||
# Core dependencies
|
||||
torch>=2.0.0
|
||||
transformers>=4.40.0
|
||||
accelerate>=0.25.0
|
||||
modelscope>=1.11.0
|
||||
|
||||
# Inference optimization
|
||||
vllm>=0.4.0
|
||||
auto-gptq>=0.7.0
|
||||
optimum>=1.18.0
|
||||
|
||||
# Benchmarking
|
||||
pytest>=7.4.0
|
||||
pytest-asyncio>=0.21.0
|
||||
psutil>=5.9.0
|
||||
py3nvml>=0.2.7
|
||||
|
||||
# Utilities
|
||||
pydantic>=2.0.0
|
||||
python-dotenv>=1.0.0
|
||||
tqdm>=4.66.0
|
||||
pandas>=2.0.0
|
||||
matplotlib>=3.7.0
|
||||
seaborn>=0.12.0
|
||||
|
||||
# Optional for accuracy testing
|
||||
datasets>=2.14.0
|
||||
scikit-learn>=1.3.0
|
||||
0
src/__init__.py
Normal file
0
src/__init__.py
Normal file
0
tests/__init__.py
Normal file
0
tests/__init__.py
Normal file
Reference in New Issue
Block a user