qwen-test/config.py

"""Configuration for Qwen3.5-9B benchmark."""
import os
from dataclasses import dataclass, field
from typing import Optional, List
from dotenv import load_dotenv

load_dotenv()

@dataclass
class ModelConfig:
    """Model configuration."""
    model_id: str = "Qwen/Qwen3.5-9B"
    local_path: str = "./models/Qwen3.5-9B"
    cache_dir: str = "./cache"
    device: str = "cuda"
    torch_dtype: str = "float16"  # float16, bfloat16, int8, int4
    max_length: int = 8192
    trust_remote_code: bool = True

@dataclass
class BenchmarkConfig:
    """Benchmark configuration."""
    # Test sequences of different lengths
    input_lengths: List[int] = field(default_factory=lambda: [128, 512, 1024, 2048])
    output_lengths: List[int] = field(default_factory=lambda: [128, 256, 512, 1024])

    # Concurrency levels
    concurrency_levels: List[int] = field(default_factory=lambda: [1, 2, 4, 8, 16])

    # Number of warmup runs
    warmup_runs: int = 3

    # Number of benchmark runs
    benchmark_runs: int = 10

    # Batch sizes for throughput testing
    batch_sizes: List[int] = field(default_factory=lambda: [1, 2, 4, 8, 16])

    # Results output
    results_dir: str = "./results"

@dataclass
class GPUConfig:
    """GPU monitoring configuration."""
    monitor_interval: float = 0.1  # seconds
    log_memory: bool = True
    log_utilization: bool = True

# Global configs
model_config = ModelConfig()
benchmark_config = BenchmarkConfig()
gpu_config = GPUConfig()