| model |
size |
params |
backend |
ngl |
ncpumoe |
cpu_strict |
n_batch |
n_ubatch |
fa |
mmap |
dio |
test |
t/s |
| qwen35moe 35B.A3B IQ4_NL - 4.5 bpw |
16.79 GiB |
34.66 B |
CUDA |
99 |
38 |
1 |
4096 |
1536 |
1 |
0 |
1 |
pp4096 |
518.25 ± 0.00 |
| qwen35moe 35B.A3B IQ4_NL - 4.5 bpw |
16.79 GiB |
34.66 B |
CUDA |
99 |
38 |
1 |
4096 |
1536 |
1 |
0 |
1 |
tg128 |
19.79 ± 0.00 |
| qwen35moe 35B.A3B IQ4_NL - 4.5 bpw |
16.79 GiB |
34.66 B |
CUDA |
99 |
38 |
1 |
4096 |
1536 |
1 |
0 |
1 |
pp4096 @ d5120 |
453.65 ± 0.00 |
| qwen35moe 35B.A3B IQ4_NL - 4.5 bpw |
16.79 GiB |
34.66 B |
CUDA |
99 |
38 |
1 |
4096 |
1536 |
1 |
0 |
1 |
tg128 @ d5120 |
19.83 ± 0.00 |
| qwen35moe 35B.A3B IQ4_NL - 4.5 bpw |
16.79 GiB |
34.66 B |
CUDA |
99 |
38 |
1 |
4096 |
1536 |
1 |
0 |
1 |
pp4096 @ d10240 |
405.45 ± 0.00 |
| qwen35moe 35B.A3B IQ4_NL - 4.5 bpw |
16.79 GiB |
34.66 B |
CUDA |
99 |
38 |
1 |
4096 |
1536 |
1 |
0 |
1 |
tg128 @ d10240 |
19.43 ± 0.00 |
| qwen35moe 35B.A3B IQ4_NL - 4.5 bpw |
16.79 GiB |
34.66 B |
CUDA |
99 |
38 |
1 |
4096 |
1536 |
1 |
0 |
1 |
pp4096 @ d15360 |
367.85 ± 0.00 |
| qwen35moe 35B.A3B IQ4_NL - 4.5 bpw |
16.79 GiB |
34.66 B |
CUDA |
99 |
38 |
1 |
4096 |
1536 |
1 |
0 |
1 |
tg128 @ d15360 |
19.07 ± 0.00 |
| qwen35moe 35B.A3B IQ4_NL - 4.5 bpw |
16.79 GiB |
34.66 B |
CUDA |
99 |
38 |
1 |
4096 |
1536 |
1 |
0 |
1 |
pp4096 @ d20480 |
334.75 ± 0.00 |
| qwen35moe 35B.A3B IQ4_NL - 4.5 bpw |
16.79 GiB |
34.66 B |
CUDA |
99 |
38 |
1 |
4096 |
1536 |
1 |
0 |
1 |
tg128 @ d20480 |
18.30 ± 0.00 |
| qwen35moe 35B.A3B IQ4_NL - 4.5 bpw |
16.79 GiB |
34.66 B |
CUDA |
99 |
38 |
1 |
4096 |
1536 |
1 |
0 |
1 |
pp4096 @ d25600 |
311.31 ± 0.00 |
| qwen35moe 35B.A3B IQ4_NL - 4.5 bpw |
16.79 GiB |
34.66 B |
CUDA |
99 |
38 |
1 |
4096 |
1536 |
1 |
0 |
1 |
tg128 @ d25600 |
18.64 ± 0.00 |
| qwen35moe 35B.A3B IQ4_NL - 4.5 bpw |
16.79 GiB |
34.66 B |
CUDA |
99 |
38 |
1 |
4096 |
1536 |
1 |
0 |
1 |
pp4096 @ d30720 |
287.13 ± 0.00 |
| qwen35moe 35B.A3B IQ4_NL - 4.5 bpw |
16.79 GiB |
34.66 B |
CUDA |
99 |
38 |
1 |
4096 |
1536 |
1 |
0 |
1 |
tg128 @ d30720 |
17.87 ± 0.00 |
| qwen35moe 35B.A3B IQ4_NL - 4.5 bpw |
16.79 GiB |
34.66 B |
CUDA |
99 |
38 |
1 |
4096 |
1536 |
1 |
0 |
1 |
pp4096 @ d35840 |
267.69 ± 0.00 |
| qwen35moe 35B.A3B IQ4_NL - 4.5 bpw |
16.79 GiB |
34.66 B |
CUDA |
99 |
38 |
1 |
4096 |
1536 |
1 |
0 |
1 |
tg128 @ d35840 |
18.42 ± 0.00 |
| qwen35moe 35B.A3B IQ4_NL - 4.5 bpw |
16.79 GiB |
34.66 B |
CUDA |
99 |
38 |
1 |
4096 |
1536 |
1 |
0 |
1 |
pp4096 @ d40960 |
250.68 ± 0.00 |
| qwen35moe 35B.A3B IQ4_NL - 4.5 bpw |
16.79 GiB |
34.66 B |
CUDA |
99 |
38 |
1 |
4096 |
1536 |
1 |
0 |
1 |
tg128 @ d40960 |
17.50 ± 0.00 |
| qwen35moe 35B.A3B IQ4_NL - 4.5 bpw |
16.79 GiB |
34.66 B |
CUDA |
99 |
38 |
1 |
4096 |
1536 |
1 |
0 |
1 |
pp4096 @ d46080 |
237.45 ± 0.00 |
| qwen35moe 35B.A3B IQ4_NL - 4.5 bpw |
16.79 GiB |
34.66 B |
CUDA |
99 |
38 |
1 |
4096 |
1536 |
1 |
0 |
1 |
tg128 @ d46080 |
17.19 ± 0.00 |
Bình luận