40 lines
985 B
JSON
40 lines
985 B
JSON
|
|
{
|
|||
|
|
"timestamp": "2026-03-16T17:34:50.981411",
|
|||
|
|
"model": "Qwen3.5-9B",
|
|||
|
|
"quantization": "4-bit NF4",
|
|||
|
|
"note": "单GPU串行推理,并发测试主要体现请求排队效果",
|
|||
|
|
"concurrency_results": [
|
|||
|
|
{
|
|||
|
|
"concurrency": 1,
|
|||
|
|
"total_time_s": 33.29,
|
|||
|
|
"total_tokens": 64,
|
|||
|
|
"throughput_tokens_per_sec": 1.9,
|
|||
|
|
"avg_latency_s": 33.18,
|
|||
|
|
"requests_completed": 1
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"concurrency": 2,
|
|||
|
|
"total_time_s": 65.01,
|
|||
|
|
"total_tokens": 128,
|
|||
|
|
"throughput_tokens_per_sec": 2.0,
|
|||
|
|
"avg_latency_s": 49.14,
|
|||
|
|
"requests_completed": 2
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"concurrency": 4,
|
|||
|
|
"total_time_s": 128.55,
|
|||
|
|
"total_tokens": 256,
|
|||
|
|
"throughput_tokens_per_sec": 2.0,
|
|||
|
|
"avg_latency_s": 80.09,
|
|||
|
|
"requests_completed": 4
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"concurrency": 8,
|
|||
|
|
"total_time_s": 275.44,
|
|||
|
|
"total_tokens": 512,
|
|||
|
|
"throughput_tokens_per_sec": 1.9,
|
|||
|
|
"avg_latency_s": 148.94,
|
|||
|
|
"requests_completed": 8
|
|||
|
|
}
|
|||
|
|
]
|
|||
|
|
}
|