|
8 | 8 |
|
9 | 9 | | Model | Model Size | Dtype | Seqlen | New_tokens | TTFT(ms) | Tokens/s | memory(MB) | |
10 | 10 | | :-------- | :--------: | :---: | :----: | :--------: | :------: | :------: | :--------: | |
11 | | -| Qwen2 | 0.5B | w8a8 | 128 | 64 | 372.54 | 40.26 | 639.72 | |
12 | | -| MiniCPM4 | 0.5B | w8a8 | 128 | 64 | 156.01 | 43.93 | 507.45 | |
13 | | -| Qwen3 | 0.6B | w8a8 | 128 | 64 | 215.76 | 29.52 | 772.98 | |
14 | | -| TinyLLAMA | 1.1B | w8a8 | 128 | 64 | 294.6 | 24.26 | 1058.46 | |
| 11 | +| Qwen2 | 0.5B | w8a8 | 128 | 64 | 143.83 | 42.58 | 654.26 | |
| 12 | +| MiniCPM4 | 0.5B | w8a8 | 128 | 64 | 128.46 | 45.13 | 524.55 | |
| 13 | +| Qwen3 | 0.6B | w8a8 | 128 | 64 | 213.50 | 32.16 | 773.77 | |
| 14 | +| TinyLLAMA | 1.1B | w8a8 | 128 | 64 | 239.00 | 24.49 | 1085.21 | |
15 | 15 | | Qwen2.5 | 1.5B | w8a8 | 128 | 64 | 412.27 | 16.32 | 1659.15 | |
16 | | -| RWKV7 | 1.5B | w8a8 | 128 | 64 | 828.62 | 13.81 | 1460.15 | |
17 | | -| InternLM2 | 1.8B | w8a8 | 128 | 64 | 419.47 | 15.4 | 1764.16 | |
18 | | -| Gemma2 | 2B | w8a8 | 128 | 64 | 693.95 | 9.65 | 2764.42 | |
19 | | -| TeleChat2 | 3B | w8a8 | 128 | 64 | 657.6 | 10.12 | 2775.94 | |
20 | | -| Phi3 | 3.8B | w8a8 | 128 | 64 | 1049.66 | 7.49 | 3747.69 | |
21 | | -| MiniCPM3 | 4B | w8a8 | 128 | 64 | 1432.4 | 5.98 | 4337.68 | |
22 | | -| ChatGLM3 | 6B | w8a8 | 128 | 64 | 1451.8 | 4.94 | 5883.87 | |
| 16 | +| RWKV7 | 1.5B | w8a8 | 128 | 64 | 788.00 | 13.33 | 1450.29 | |
| 17 | +| InternLM2 | 1.8B | w8a8 | 128 | 64 | 374.00 | 15.58 | 1765.71 | |
| 18 | +| Gemma2 | 2B | w8a8 | 128 | 64 | 679.90 | 9.80 | 2765.30 | |
| 19 | +| Gemma3n | 2B | w8a8 | 128 | 64 | 1220.40 | 9.46 | 2709.25 | |
| 20 | +| TeleChat2 | 3B | w8a8 | 128 | 64 | 649.60 | 10.22 | 2777.00 | |
| 21 | +| Phi3 | 3.8B | w8a8 | 128 | 64 | 1022.00 | 7.50 | 3747.73 | |
| 22 | +| MiniCPM3 | 4B | w8a8 | 128 | 64 | 1385.92 | 5.99 | 4339.61 | |
| 23 | +| ChatGLM3 | 6B | w8a8 | 128 | 64 | 1395.34 | 4.94 | 5976.43 | |
23 | 24 |
|
24 | 25 | ### RK3576 |
25 | 26 |
|
26 | 27 | | Model | Model Size | Dtype | Seqlen | New_tokens | TTFT(ms) | Tokens/s | memory(MB) | |
27 | 28 | | :-------- | :--------: | :--------: | :----: | :--------: | :------: | :------: | :--------: | |
28 | | -| Qwen2 | 0.5B | w4a16 | 128 | 64 | 432.63 | 32.61 | 411.71 | |
29 | | -| | 0.5B | w4a16_g128 | 128 | 64 | 448.63 | 27.88 | 431.8 | |
30 | | -| | 0.5B | w8a8 | 128 | 64 | 379.46 | 21.72 | 647.03 | |
31 | | -| MiniCPM4 | 0.5B | w4a16 | 128 | 64 | 414.05 | 36.76 | 305.61 | |
32 | | -| | 0.5B | w4a16_g128 | 128 | 64 | 420.83 | 33.75 | 346.41 | |
33 | | -| | 0.5B | w8a8 | 128 | 64 | 377.83 | 23.94 | 511.98 | |
34 | | -| Qwen3 | 0.6B | w4a16 | 128 | 64 | 569.08 | 23.72 | 494.83 | |
35 | | -| | 0.6B | w4a16_g128 | 128 | 64 | 582 | 22.52 | 527.46 | |
36 | | -| | 0.6B | w8a8 | 128 | 64 | 525 | 16.92 | 778.37 | |
37 | | -| TinyLLAMA | 1.1B | w4a16 | 128 | 64 | 706 | 21.02 | 573.3 | |
38 | | -| | 1.1B | w4a16_g128 | 128 | 64 | 822 | 18.91 | 655.82 | |
39 | | -| | 1.1B | w8a8 | 128 | 64 | 619 | 12.59 | 1064.64 | |
40 | | -| Qwen2.5 | 1.5B | w4a16 | 128 | 64 | 959.23 | 14.45 | 932.98 | |
41 | | -| | 1.5B | w4a16_g128 | 128 | 64 | 1095.06 | 12.87 | 1015.79 | |
42 | | -| | 1.5B | w8a8 | 128 | 64 | 814.69 | 8.51 | 1665.71 | |
43 | | -| RWKV7 | 1.5B | w4a16 | 128 | 64 | 2127.31 | 10.36 | 810.59 | |
44 | | -| | 1.5B | w4a16_g128 | 128 | 64 | 2229.17 | 9.65 | 901.84 | |
45 | | -| | 1.5B | w8a8 | 128 | 64 | 1878.21 | 7.17 | 1469.38 | |
46 | | -| InternLM2 | 1.8B | w4a16 | 128 | 64 | 970.65 | 13.62 | 964.26 | |
47 | | -| | 1.8B | w4a16_g128 | 128 | 64 | 1150.83 | 12.06 | 1059.62 | |
48 | | -| | 1.8B | w8a8 | 128 | 64 | 820.58 | 7.9 | 1771.28 | |
49 | | -| Gemma2 | 2B | w4a16 | 128 | 64 | 1262.21 | 8.49 | 1527.76 | |
50 | | -| | 2B | w4a16_g128 | 128 | 64 | 1535 | 7.72 | 1615.4 | |
51 | | -| | 2B | w8a8 | 128 | 64 | 1126.28 | 4.92 | 2770.05 | |
52 | | -| TeleChat2 | 3B | w4a16 | 128 | 64 | 1356.45 | 8.98 | 1513.85 | |
53 | | -| | 3B | w4a16_g128 | 128 | 64 | 1585.8 | 7.82 | 1632.43 | |
54 | | -| | 3B | w8a8 | 128 | 64 | 1129.12 | 5.13 | 2782.64 | |
55 | | -| Phi3 | 3.8B | w4a16 | 128 | 64 | 1980.87 | 6.35 | 1985.4 | |
56 | | -| | 3.8B | w4a16_g128 | 128 | 64 | 2392.96 | 5.84 | 2141.5 | |
57 | | -| | 3.8B | w8a8 | 128 | 64 | 1641.84 | 3.75 | 3756.92 | |
58 | | -| MiniCPM3 | 4B | w4a16 | 128 | 64 | 2899.35 | 4.94 | 2334.24 | |
59 | | -| | 4B | w4a16_g128 | 128 | 64 | 3377.92 | 4.49 | 2615.88 | |
60 | | -| | 4B | w8a8 | 128 | 64 | 2621.17 | 3.03 | 4364.42 | |
61 | | -| ChatGLM3 | 6B | w4a16 | 128 | 64 | 2362.78 | 4.62 | 2983.15 | |
62 | | -| | 6B | w4a16_g128 | 128 | 64 | 3170.31 | 4 | 3196.36 | |
63 | | -| | 6B | w8a8 | 128 | 64 | 2037.96 | 2.29 | 5894.02 | |
| 29 | +| Qwen2 | 0.5B | w4a16 | 128 | 64 | 327.72 | 34.24 | 426.24 | |
| 30 | +| | 0.5B | w4a16_g128 | 128 | 64 | 363.58 | 33.22 | 445.95 | |
| 31 | +| | 0.5B | w8a8 | 128 | 64 | 334.26 | 22.95 | 661.1 | |
| 32 | +| MiniCPM4 | 0.5B | w4a16 | 128 | 64 | 348.87 | 35.8 | 322.41 | |
| 33 | +| | 0.5B | w4a16_g128 | 128 | 64 | 371.96 | 32.88 | 362.23 | |
| 34 | +| | 0.5B | w8a8 | 128 | 64 | 337.52 | 23.71 | 528.96 | |
| 35 | +| Qwen3 | 0.6B | w4a16 | 128 | 64 | 482.82 | 25.16 | 495.99 | |
| 36 | +| | 0.6B | w4a16_g128 | 128 | 64 | 512.36 | 24.3 | 528.48 | |
| 37 | +| | 0.6B | w8a8 | 128 | 64 | 448.94 | 17.09 | 779.62 | |
| 38 | +| TinyLLAMA | 1.1B | w4a16 | 128 | 64 | 517.82 | 21.32 | 591 | |
| 39 | +| | 1.1B | w4a16_g128 | 128 | 64 | 658.78 | 18.89 | 681 | |
| 40 | +| | 1.1B | w8a8 | 128 | 64 | 537.82 | 12.63 | 1082.83 | |
| 41 | +| RWKV7 | 1.5B | w4a16 | 128 | 64 | 1779.65 | 9.96 | 799.89 | |
| 42 | +| | 1.5B | w4a16_g128 | 128 | 64 | 1877.95 | 9.37 | 890.16 | |
| 43 | +| | 1.5B | w8a8 | 128 | 64 | 1718.8 | 6.96 | 1458.48 | |
| 44 | +| InternLM2 | 1.8B | w4a16 | 128 | 64 | 771.6 | 13.65 | 966.12 | |
| 45 | +| | 1.8B | w4a16_g128 | 128 | 64 | 1001.23 | 12.18 | 1061.57 | |
| 46 | +| | 1.8B | w8a8 | 128 | 64 | 777.86 | 7.91 | 1773.23 | |
| 47 | +| Gemma2 | 2B | w4a16 | 128 | 64 | 1119.51 | 8.45 | 1529.03 | |
| 48 | +| | 2B | w4a16_g128 | 128 | 64 | 1407.31 | 7.76 | 1616.45 | |
| 49 | +| | 2B | w8a8 | 128 | 64 | 1052.77 | 5.01 | 2771.54 | |
| 50 | +| Gemma-3n | 2B | w4a16 | 128 | 64 | 3187 | 7.38 | 1574.34 | |
| 51 | +| | 2B | w8a8 | 128 | 64 | 3229.16 | 4.75 | 2722.76 | |
| 52 | +| TeleChat2 | 3B | w4a16 | 128 | 64 | 1143.73 | 9.05 | 1514.98 | |
| 53 | +| | 3B | w4a16_g128 | 128 | 64 | 1422.38 | 7.91 | 1633.54 | |
| 54 | +| | 3B | w8a8 | 128 | 64 | 1035.37 | 5.15 | 2783.73 | |
| 55 | +| Phi3 | 3.8B | w4a16 | 128 | 64 | 1800.92 | 6.52 | 1985.75 | |
| 56 | +| | 3.8B | w4a16_g128 | 128 | 64 | 2236.9 | 5.96 | 2141.89 | |
| 57 | +| | 3.8B | w8a8 | 128 | 64 | 1591.59 | 3.76 | 3757.22 | |
| 58 | +| MiniCPM3 | 4B | w4a16 | 128 | 64 | 2484.63 | 4.94 | 2336.73 | |
| 59 | +| | 4B | w4a16_g128 | 128 | 64 | 3053.52 | 4.49 | 2618.14 | |
| 60 | +| | 4B | w8a8 | 128 | 64 | 2509.27 | 3.04 | 4366.85 | |
| 61 | +| ChatGLM3 | 6B | w4a16 | 128 | 64 | 2121.26 | 4.7 | 3014.38 | |
| 62 | +| | 6B | w4a16_g128 | 128 | 64 | 2958.88 | 4.03 | 3244.15 | |
| 63 | +| | 6B | w8a8 | 128 | 64 | 1920.97 | 2.5 | 5958.65 | |
64 | 64 |
|
65 | 65 | ### RK3562 |
66 | 66 |
|
67 | 67 | | Model | Model Size | Dtype | Seqlen | New_tokens | TTFT(ms) | Tokens/s | memory(MB) | |
68 | 68 | | :------- | :--------: | :---: | :----: | :--------: | :------: | :------: | :--------: | |
69 | | -| Qwen2 | 0.5B | w8a8 | 128 | 64 | 946.58 | 11.46 | 625.14 | |
70 | | -| MiniCPM4 | 0.5B | w8a8 | 128 | 64 | 905.69 | 10.47 | 492.4 | |
71 | | -| Qwen3 | 0.6B | w8a8 | 128 | 64 | 1248.58 | 8.87 | 755.48 | |
| 69 | +| Qwen2 | 0.5B | w8a8 | 128 | 64 | 650.37 | 12.94 | 632.48 | |
| 70 | +| MiniCPM4 | 0.5B | w8a8 | 128 | 64 | 689.88 | 11.78 | 500.54 | |
| 71 | +| Qwen3 | 0.6B | w8a8 | 128 | 64 | 901.09 | 10.00 | 756.72 | |
72 | 72 |
|
73 | 73 | ### RV1126B |
74 | 74 |
|
75 | 75 | | Model | Model Size | Dtype | Seqlen | New_tokens | TTFT(ms) | Tokens/s | |
76 | 76 | | :------- | :--------: | :--------: | :----: | :--------: | :------: | :------: | |
77 | | -| Qwen2 | 0.5B | w4a16 | 128 | 64 | 975.36 | 16.98 | |
78 | | -| | 0.5B | w4a16_g128 | 128 | 64 | 831.39 | 15.3 | |
79 | | -| | 0.5B | w8a8 | 128 | 64 | 969.68 | 11.7 | |
80 | | -| MiniCPM4 | 0.6B | w4a16 | 128 | 64 | 941.48 | 20.37 | |
81 | | -| | 0.6B | w4a16_g128 | 128 | 64 | 862.57 | 17.73 | |
82 | | -| | 0.6B | w8a8 | 128 | 64 | 955.15 | 13.47 | |
| 77 | +| Qwen2 | 0.5B | w4a16 | 128 | 64 | 650.69 | 21.43 | |
| 78 | +| | 0.5B | w4a16_g128 | 128 | 64 | 679.78 | 18.18 | |
| 79 | +| | 0.5B | w8a8 | 128 | 64 | 636.90 | 13.91 | |
| 80 | +| MiniCPM4 | 0.5B | w4a16 | 128 | 64 | 654.20 | 22.97 | |
| 81 | +| | 0.5B | w4a16_g128 | 128 | 64 | 691.57 | 18.78 | |
| 82 | +| | 0.5B | w8a8 | 128 | 64 | 663.41 | 15.12 | |
83 | 83 |
|
84 | 84 | ### Multimodal |
85 | 85 |
|
86 | | -| model | Stage | RK3588(w8a8) | RK3576(w4a16) | |
87 | | -| :------------ | :------------------: | :------------: | :-----------: | |
88 | | -| Qwen2-VL-2B | img-encoder(392*392) | 3.28s | 3.55s | |
89 | | -| | Prefill(len=196) | 693.4ms | 1533.8ms | |
90 | | -| | Decode | 16.29 tokens/s | 14.1 tokens/s | |
91 | | -| Qwen2.5-VL-3B | img-encoder(392*392) | 2.93s | 2.87s | |
92 | | -| | Prefill(len=196) | 1262ms | 2656ms | |
93 | | -| | Decode | 8.66 tokens/s | 7.82 tokens/s | |
94 | | -| MiniCPM-V-2_6 | img-encoder(448*448) | 3.27s | 2.4s | |
95 | | -| | Prefill(len=64) | 869.5ms | 1415ms | |
96 | | -| | Decode | 4.04 tokens/s | 3.94 tokens/s | |
97 | | -| SmolVLM-256M | Img-encoder(512*512) | 842ms | 768ms | |
98 | | -| | Prefill(len=128) | 87.2ms | 251ms | |
99 | | -| | Decode | 77.7 tokens/s | 54.8 tokens/s | |
| 86 | +| model | Stage | RK3588(w8a8) | RK3576(w4a16) | |
| 87 | +| :------------ | :------------------: | :-----------: | :------------: | |
| 88 | +| Qwen2-VL-2B | img-encoder(392*392) | 3.28s | 3.55s | |
| 89 | +| | Prefill(len=196) | 632.6ms | 1234.9ms | |
| 90 | +| | Decode | 16.6 tokens/s | 14.57 tokens/s | |
| 91 | +| Qwen2.5-VL-3B | img-encoder(392*392) | 2.93s | 2.87s | |
| 92 | +| | Prefill(len=196) | 1120ms | 2130ms | |
| 93 | +| | Decode | 8.66 tokens/s | 7.87 tokens/s | |
| 94 | +| MiniCPM-V-2_6 | img-encoder(448*448) | 3.27s | 2.4s | |
| 95 | +| | Prefill(len=64) | 826ms | 1230ms | |
| 96 | +| | Decode | 4.18 tokens/s | 3.85 tokens/s | |
| 97 | +| SmolVLM-256M | Img-encoder(512*512) | 842ms | 768ms | |
| 98 | +| | Prefill(len=128) | 77.3ms | 180ms | |
| 99 | +| | Decode | 78 tokens/s | 57.73tokens/s | |
100 | 100 |
|
101 | 101 | - The img-encoder runs inference on RKNN with FP16, tested using all NPU cores. |
0 commit comments