{ "timestamp": "2026-03-16T17:34:50.981411", "model": "Qwen3.5-9B", "quantization": "4-bit NF4", "note": "单GPU串行推理,并发测试主要体现请求排队效果", "concurrency_results": [ { "concurrency": 1, "total_time_s": 33.29, "total_tokens": 64, "throughput_tokens_per_sec": 1.9, "avg_latency_s": 33.18, "requests_completed": 1 }, { "concurrency": 2, "total_time_s": 65.01, "total_tokens": 128, "throughput_tokens_per_sec": 2.0, "avg_latency_s": 49.14, "requests_completed": 2 }, { "concurrency": 4, "total_time_s": 128.55, "total_tokens": 256, "throughput_tokens_per_sec": 2.0, "avg_latency_s": 80.09, "requests_completed": 4 }, { "concurrency": 8, "total_time_s": 275.44, "total_tokens": 512, "throughput_tokens_per_sec": 1.9, "avg_latency_s": 148.94, "requests_completed": 8 } ] }