Files
qwen-test/vsp/qwen3.5-9b/results/concurrency_results.json

40 lines
985 B
JSON
Raw Normal View History

{
"timestamp": "2026-03-16T17:34:50.981411",
"model": "Qwen3.5-9B",
"quantization": "4-bit NF4",
"note": "单GPU串行推理并发测试主要体现请求排队效果",
"concurrency_results": [
{
"concurrency": 1,
"total_time_s": 33.29,
"total_tokens": 64,
"throughput_tokens_per_sec": 1.9,
"avg_latency_s": 33.18,
"requests_completed": 1
},
{
"concurrency": 2,
"total_time_s": 65.01,
"total_tokens": 128,
"throughput_tokens_per_sec": 2.0,
"avg_latency_s": 49.14,
"requests_completed": 2
},
{
"concurrency": 4,
"total_time_s": 128.55,
"total_tokens": 256,
"throughput_tokens_per_sec": 2.0,
"avg_latency_s": 80.09,
"requests_completed": 4
},
{
"concurrency": 8,
"total_time_s": 275.44,
"total_tokens": 512,
"throughput_tokens_per_sec": 1.9,
"avg_latency_s": 148.94,
"requests_completed": 8
}
]
}