Skip to content

Commit 29a9fb9

Browse files
author
will.yang
committed
update documents and demo
1 parent 8623edd commit 29a9fb9

File tree

522 files changed

+419
-409
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

522 files changed

+419
-409
lines changed

README.md

Lines changed: 48 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -32,55 +32,65 @@
3232
- [x] [TeleChat models](https://huggingface.co/Tele-AI)
3333
- [x] [Qwen2-VL](https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct)
3434
- [x] [MiniCPM-V](https://huggingface.co/openbmb/MiniCPM-V-2_6)
35+
- [x] [DeepSeek-R1-Distill](https://huggingface.co/collections/deepseek-ai/deepseek-r1-678e1e131c0169c0bc89728d)
3536

3637
# Model Performance Benchmark
3738

38-
| model | dtype | seqlen | max_context | new_tokens | TTFT(ms) | Tokens/s | memory(G) | platform |
39-
|:-------------- |:---------- |:------:|:-----------:|:----------:|:--------:|:--------:|:---------:|:--------:|
40-
| TinyLLAMA-1.1B | w4a16 | 64 | 320 | 256 | 345.00 | 21.10 | 0.77 | RK3576 |
41-
| | w4a16_g128 | 64 | 320 | 256 | 410.00 | 18.50 | 0.8 | RK3576 |
42-
| | w8a8 | 64 | 320 | 256 | 140.46 | 24.21 | 1.25 | RK3588 |
43-
| | w8a8_g512 | 64 | 320 | 256 | 195.00 | 20.08 | 1.29 | RK3588 |
44-
| Qwen2-1.5B | w4a16 | 64 | 320 | 256 | 512.00 | 14.40 | 1.75 | RK3576 |
45-
| | w4a16_g128 | 64 | 320 | 256 | 550.00 | 12.75 | 1.76 | RK3576 |
46-
| | w8a8 | 64 | 320 | 256 | 206.00 | 16.46 | 2.47 | RK3588 |
47-
| | w8a8_g128 | 64 | 320 | 256 | 725.00 | 7.00 | 2.65 | RK3588 |
48-
| Phi-3-3.8B | w4a16 | 64 | 320 | 256 | 975.00 | 6.60 | 2.16 | RK3576 |
49-
| | w4a16_g128 | 64 | 320 | 256 | 1180.00 | 5.85 | 2.23 | RK3576 |
50-
| | w8a8 | 64 | 320 | 256 | 516.00 | 7.44 | 3.88 | RK3588 |
51-
| | w8a8_g512 | 64 | 320 | 256 | 610.00 | 6.13 | 3.95 | RK3588 |
52-
| ChatGLM3-6B | w4a16 | 64 | 320 | 256 | 1168.00 | 4.62 | 3.86 | RK3576 |
53-
| | w4a16_g128 | 64 | 320 | 256 | 1582.56 | 3.82 | 3.96 | RK3576 |
54-
| | w8a8 | 64 | 320 | 256 | 800.00 | 4.95 | 6.69 | RK3588 |
55-
| | w8a8_g128 | 64 | 320 | 256 | 2190.00 | 2.70 | 7.18 | RK3588 |
56-
| Gemma2-2B | w4a16 | 64 | 320 | 256 | 628.00 | 8.00 | 3.63 | RK3576 |
57-
| | w4a16_g128 | 64 | 320 | 256 | 776.20 | 7.40 | 3.63 | RK3576 |
58-
| | w8a8 | 64 | 320 | 256 | 342.29 | 9.67 | 4.84 | RK3588 |
59-
| | w8a8_g128 | 64 | 320 | 256 | 1055.00 | 5.49 | 5.14 | RK3588 |
60-
| InternLM2-1.8B | w4a16 | 64 | 320 | 256 | 475.00 | 13.30 | 1.59 | RK3576 |
61-
| | w4a16_g128 | 64 | 320 | 256 | 572.00 | 11.95 | 1.62 | RK3576 |
62-
| | w8a8 | 64 | 320 | 256 | 205.97 | 15.66 | 2.38 | RK3588 |
63-
| | w8a8_g512 | 64 | 320 | 256 | 298.00 | 12.66 | 2.45 | RK3588 |
64-
| MiniCPM3-4B | w4a16 | 64 | 320 | 256 | 1397.00 | 4.80 | 2.7 | RK3576 |
65-
| | w4a16_g128 | 64 | 320 | 256 | 1645.00 | 4.39 | 2.8 | RK3576 |
66-
| | w8a8 | 64 | 320 | 256 | 702.18 | 6.15 | 4.65 | RK3588 |
67-
| | w8a8_g128 | 64 | 320 | 256 | 1691.00 | 3.42 | 5.06 | RK3588 |
68-
| llama3-8B | w4a16 | 64 | 320 | 256 | 1607.98 | 3.60 | 5.63 | RK3576 |
69-
| | w4a16_g128 | 64 | 320 | 256 | 2010.00 | 3.00 | 5.76 | RK3576 |
70-
| | w8a8 | 64 | 320 | 256 | 1128.00 | 3.79 | 9.21 | RK3588 |
71-
| | w8a8_g512 | 64 | 320 | 256 | 1281.35 | 3.05 | 9.45 | RK3588 |
39+
| llm model | dtype | seqlen | max_context | new_tokens | TTFT(ms) | Tokens/s | memory(G) | platform |
40+
| :------------- | :--------- | :----: | :---------: | :--------: | :------: | :------: | :-------: | :------: |
41+
| TinyLLAMA-1.1B | w4a16 | 64 | 320 | 256 | 345.00 | 21.10 | 0.77 | RK3576 |
42+
| | w4a16_g128 | 64 | 320 | 256 | 410.00 | 18.50 | 0.8 | RK3576 |
43+
| | w8a8 | 64 | 320 | 256 | 140.46 | 24.21 | 1.25 | RK3588 |
44+
| | w8a8_g512 | 64 | 320 | 256 | 195.00 | 20.08 | 1.29 | RK3588 |
45+
| Qwen2-1.5B | w4a16 | 64 | 320 | 256 | 512.00 | 14.40 | 1.75 | RK3576 |
46+
| | w4a16_g128 | 64 | 320 | 256 | 550.00 | 12.75 | 1.76 | RK3576 |
47+
| | w8a8 | 64 | 320 | 256 | 206.00 | 16.46 | 2.47 | RK3588 |
48+
| | w8a8_g128 | 64 | 320 | 256 | 725.00 | 7.00 | 2.65 | RK3588 |
49+
| Phi-3-3.8B | w4a16 | 64 | 320 | 256 | 975.00 | 6.60 | 2.16 | RK3576 |
50+
| | w4a16_g128 | 64 | 320 | 256 | 1180.00 | 5.85 | 2.23 | RK3576 |
51+
| | w8a8 | 64 | 320 | 256 | 516.00 | 7.44 | 3.88 | RK3588 |
52+
| | w8a8_g512 | 64 | 320 | 256 | 610.00 | 6.13 | 3.95 | RK3588 |
53+
| ChatGLM3-6B | w4a16 | 64 | 320 | 256 | 1168.00 | 4.62 | 3.86 | RK3576 |
54+
| | w4a16_g128 | 64 | 320 | 256 | 1582.56 | 3.82 | 3.96 | RK3576 |
55+
| | w8a8 | 64 | 320 | 256 | 800.00 | 4.95 | 6.69 | RK3588 |
56+
| | w8a8_g128 | 64 | 320 | 256 | 2190.00 | 2.70 | 7.18 | RK3588 |
57+
| Gemma2-2B | w4a16 | 64 | 320 | 256 | 628.00 | 8.00 | 3.63 | RK3576 |
58+
| | w4a16_g128 | 64 | 320 | 256 | 776.20 | 7.40 | 3.63 | RK3576 |
59+
| | w8a8 | 64 | 320 | 256 | 342.29 | 9.67 | 4.84 | RK3588 |
60+
| | w8a8_g128 | 64 | 320 | 256 | 1055.00 | 5.49 | 5.14 | RK3588 |
61+
| InternLM2-1.8B | w4a16 | 64 | 320 | 256 | 475.00 | 13.30 | 1.59 | RK3576 |
62+
| | w4a16_g128 | 64 | 320 | 256 | 572.00 | 11.95 | 1.62 | RK3576 |
63+
| | w8a8 | 64 | 320 | 256 | 205.97 | 15.66 | 2.38 | RK3588 |
64+
| | w8a8_g512 | 64 | 320 | 256 | 298.00 | 12.66 | 2.45 | RK3588 |
65+
| MiniCPM3-4B | w4a16 | 64 | 320 | 256 | 1397.00 | 4.80 | 2.7 | RK3576 |
66+
| | w4a16_g128 | 64 | 320 | 256 | 1645.00 | 4.39 | 2.8 | RK3576 |
67+
| | w8a8 | 64 | 320 | 256 | 702.18 | 6.15 | 4.65 | RK3588 |
68+
| | w8a8_g128 | 64 | 320 | 256 | 1691.00 | 3.42 | 5.06 | RK3588 |
69+
| llama3-8B | w4a16 | 64 | 320 | 256 | 1607.98 | 3.60 | 5.63 | RK3576 |
70+
| | w4a16_g128 | 64 | 320 | 256 | 2010.00 | 3.00 | 5.76 | RK3576 |
71+
| | w8a8 | 64 | 320 | 256 | 1128.00 | 3.79 | 9.21 | RK3588 |
72+
| | w8a8_g512 | 64 | 320 | 256 | 1281.35 | 3.05 | 9.45 | RK3588 |
73+
74+
| multimodal model | image input size | vision model dtype | vision infer time(s) | vision memory(MB) | llm model dtype | seqlen | max_context | new_tokens | TTFT(ms) | Tokens/s | llm memory(G) | platform |
75+
|:-------------- |:---------- |:------:|:-----------:|:----------:|:--------:|:--------:|:---------:|:--------:|:---------:|:---------:|:---------:|:---------:|
76+
| Qwen2-VL-2B | (1, 3, 392, 392) | fp16 | 3.55 | 1436.52 | w4a16 | 256 | 384 | 128 | 2094.17 | 13.23 | 1.75 | RK3576 |
77+
| | | fp16 | 3.28 | 1436.52 | w8a8 | 256 | 384 | 128 | 856.86 | 16.19 | 2.47 | RK3588 |
78+
| MiniCPM-V-2_6 | (1, 3, 448, 448) | fp16 | 2.40 | 1031.30 | w4a16 | 128 | 256 | 128 | 2997.70 | 3.84 | 5.50 | RK3576 |
79+
| | | fp16 | 3.27 | 976.98 | w8a8 | 128 | 256 | 128 | 1720.60 | 4.13 | 8.88 | RK3588 |
7280

7381
- This performance data were collected based on the maximum CPU and NPU frequencies of each platform with version 1.1.0.
7482
- The script for setting the frequencies is located in the scripts directory.
83+
- The vision model were tested based on all NPU core with rknn-toolkit2 version 2.2.0.
7584

7685
# Download
7786

78-
You can download the latest package, docker image, example, documentation, and platform-tool from [RKLLM_SDK](https://console.zbox.filez.com/l/RJJDmB), fetch code: rkllm
87+
1. You can download the **latest package** from [RKLLM_SDK](https://console.zbox.filez.com/l/RJJDmB), fetch code: rkllm
88+
2. You can download the **converted rkllm model** from [rkllm_model_zoo](https://console.box.lenovo.com/l/l0tXb8), fetch code: rkllm
7989

8090
# Examples
8191

82-
1. Multimodel deployment demo: [rkllm_multimodel_demo](https://github.com/airockchip/rknn-llm/tree/main/examples/rkllm_multimodel_demo)
83-
2. API usage demo: [rkllm_api_demo](https://github.com/airockchip/rknn-llm/tree/main/examples/rkllm_api_demo)
92+
1. Multimodel deployment demo: [Qwen2-VL-2B_Demo](https://github.com/airockchip/rknn-llm/tree/main/examples/Qwen2-VL-2B_Demo)
93+
2. API usage demo: [DeepSeek-R1-Distill-Qwen-1.5B_Demo](https://github.com/airockchip/rknn-llm/tree/main/examples/DeepSeek-R1-Distill-Qwen-1.5B_Demo)
8494
3. API server demo: [rkllm_server_demo](https://github.com/airockchip/rknn-llm/tree/main/examples/rkllm_server_demo)
8595

8696
# Note
@@ -117,4 +127,4 @@ https://github.com/airockchip/rknn-toolkit2
117127
- Add support for models such as Llama3, Gemma2, and MiniCPM3.
118128
- Resolve catastrophic forgetting issue when the number of tokens exceeds max_context.
119129

120-
for older version, please refer [CHANGELOG](CHANGELOG.md)
130+
for older version, please refer [CHANGELOG](CHANGELOG.md)
-1.41 MB
Binary file not shown.
1.46 MB
Binary file not shown.
Lines changed: 194 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,194 @@
1+
# DeepSeek-R1-Distill-Qwen-1.5B Demo
2+
1. This demo demonstrates how to deploy the DeepSeek-R1-Distill-Qwen-1.5B model.
3+
2. The open-source model used in this demo is available at: [DeepSeek-R1-Distill-Qwen-1.5B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B)
4+
5+
## 1. Requirements
6+
7+
```
8+
rkllm-toolkit==1.1.4
9+
rkllm-runtime==1.1.4
10+
python==3.8 or python==3.10
11+
```
12+
13+
## 2. Model Conversion
14+
15+
1. Firstly, you need to create `data_quant.json` for quantizing the rkllm model, we use fp16 model generation results as the quantization calibration data.
16+
2. Secondly, you run the following code to generate `data_quant.json` and export the rkllm model.
17+
3. You can also download the **converted rkllm model** from [rkllm_model_zoo](https://console.box.lenovo.com/l/l0tXb8), fetch code: rkllm
18+
19+
```bash
20+
cd export
21+
python generate_data_quant.py -m /path/to/DeepSeek-R1-Distill-Qwen-1.5B
22+
python export_rkllm.py
23+
```
24+
25+
## 3. C++ Demo
26+
27+
In the `deploy` directory, we provide example code for board-side inference.
28+
29+
### 1. Compile and Build
30+
31+
Users can directly compile the example code by running the `deploy/build-linux.sh` or `deploy/build-android.sh` script (replacing the cross-compiler path with the actual path). This will generate an `install/demo_Linux_aarch64` folder in the `deploy` directory, containing the executables `llm_demo`, and the `lib` folder.
32+
33+
```bash
34+
cd deploy
35+
# for linux
36+
./build-linux.sh
37+
# for android
38+
./build-android.sh
39+
# push install dir to device
40+
adb push install/demo_Linux_aarch64 /data
41+
# push model file to device
42+
adb push DeepSeek-R1-Distill-Qwen-1.5B.rkllm /data/demo_Linux_aarch64
43+
```
44+
45+
### 2. Run Demo
46+
47+
Enter the `/data/demo_Linux_aarch64` directory on the board and run the example using the following code
48+
49+
```bash
50+
adb shell
51+
cd /data/demo_Linux_aarch64
52+
# export lib path
53+
export LD_LIBRARY_PATH=./lib
54+
taskset f0 ./llm_demo /path/to/your/rkllm/model 2048 4096
55+
56+
# Running result
57+
rkllm init start
58+
rkllm init success
59+
60+
**********************可输入以下问题对应序号获取回答/或自定义输入********************
61+
62+
[0] 现有一笼子,里面有鸡和兔子若干只,数一数,共有头14个,腿38条,求鸡和兔子各有多少只?
63+
[1] 有28位小朋友排成一行,从左边开始数第10位是学豆,从右边开始数他是第几位?
64+
65+
*************************************************************************
66+
67+
68+
user:
69+
```
70+
71+
example 1 (DeepSeek-R1-Distill-Qwen-1.5B_W8A8_RK3588.rkllm)
72+
73+
```
74+
user: 0
75+
现有一笼子,里面有鸡和兔子若干只,数一数,共有头14个,腿38条,求鸡和兔子各有多少只?
76+
robot: <think>
77+
首先,设鸡的数量为x,兔子的数量为y。
78+
79+
根据题目中的条件,我们知道:
80+
81+
1. 鸡和兔子的总数是14,因此有方程:
82+
x + y = 14
83+
84+
2. 鸡有两条腿,兔子有四条腿,总腿数是38,所以有另一个方程:
85+
2x + 4y = 38
86+
87+
接下来,通过代入法或消元法来解这两个方程。假设我们用代入法:
88+
89+
从第一个方程中,可以得到:
90+
x = 14 - y
91+
92+
将这个表达式代入第二个方程:
93+
2(14 - y) + 4y = 38
94+
展开计算后得到:
95+
28 - 2y + 4y = 38
96+
合并同类项:
97+
2y = 10
98+
解得:
99+
y = 5
100+
101+
然后,将y的值代入x = 14 - y中:
102+
x = 14 - 5 = 9
103+
104+
因此,鸡有9只,兔子有5只。
105+
</think>
106+
107+
要解决这个问题,我们可以设鸡的数量为 \( x \),兔子的数量为 \( y \)。根据题目给出的条件:
108+
109+
1. **头的总数**:每只鸡和兔子都有一个头,所以:
110+
\[
111+
x + y = 14
112+
\]
113+
114+
2. **腿的总数**:鸡有两条腿,兔子有四条腿,总腿数为38条,因此:
115+
\[
116+
2x + 4y = 38
117+
\]
118+
119+
接下来,我们可以通过解这两个方程来找到 \( x \) 和 \( y \) 的值。
120+
121+
**步骤一:简化第二个方程**
122+
123+
将第二个方程两边同时除以2:
124+
\[
125+
x + 2y = 19
126+
\]
127+
128+
现在,我们有两个方程:
129+
\[
130+
\begin{cases}
131+
x + y = 14 \\
132+
x + 2y = 19
133+
\end{cases}
134+
\]
135+
136+
**步骤二:消元法**
137+
138+
用第二个方程减去第一个方程:
139+
\[
140+
(x + 2y) - (x + y) = 19 - 14 \\
141+
y = 5
142+
\]
143+
144+
**步骤三:代入求 \( x \)**
145+
146+
将 \( y = 5 \) 代入第一个方程:
147+
\[
148+
x + 5 = 14 \\
149+
x = 14 - 5 \\
150+
x = 9
151+
\]
152+
153+
因此,鸡的数量是 **9只**,兔子的数量是 **5只**。
154+
155+
**最终答案:**
156+
鸡有 \(\boxed{9}\) 只,兔子有 \(\boxed{5}\) 只。
157+
```
158+
159+
example 2 (DeepSeek-R1-Distill-Qwen-1.5B_W8A8_RK3588.rkllm)
160+
161+
```
162+
163+
user: 1
164+
有28位小朋友排成一行,从左边开始数第10位是学豆,从右边开始数他是第几位?
165+
robot: <think>
166+
首先,总共有28位小朋友。
167+
168+
从左边开始数,第10位是学豆的位置。
169+
170+
因此,从右边开始数,学豆的位置是从右边数的第(28 - 10 + 1) = 第19位。
171+
</think>
172+
173+
**解答:**
174+
175+
我们有28位小朋友排成一行。题目要求确定从右边开始数时,第10位是学豆的位置。
176+
177+
**步骤如下:**
178+
179+
1. **总人数**:共有28位小朋友。
180+
2. **左边数的顺序**:从左边开始数,第10位是学豆。
181+
3. **右边数的计算**:
182+
- 从右边数时,第1位对应左边数的第28位。
183+
- 因此,第n位在左边对应的是第(28 - n + 1)位在右边。
184+
185+
4. **具体计算**:
186+
\[
187+
第10位在左边 = 第(28 - 10 + 1) = 第19位在右边
188+
\]
189+
190+
**最终答案:**
191+
192+
\boxed{19}
193+
```
194+
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
cmake_minimum_required(VERSION 3.10)
2+
project(rkllm_demo)
3+
4+
set(CMAKE_CXX_STANDARD 11)
5+
set(CMAKE_CXX_STANDARD_REQUIRED ON)
6+
7+
if (CMAKE_SYSTEM_NAME STREQUAL "Android")
8+
set (TARGET_LIB_ARCH ${CMAKE_ANDROID_ARCH_ABI})
9+
else()
10+
if(CMAKE_SIZEOF_VOID_P EQUAL 8)
11+
set (TARGET_LIB_ARCH aarch64)
12+
else()
13+
set (TARGET_LIB_ARCH armhf)
14+
endif()
15+
if (CMAKE_C_COMPILER MATCHES "uclibc")
16+
set (TARGET_LIB_ARCH ${TARGET_LIB_ARCH}_uclibc)
17+
endif()
18+
endif()
19+
20+
set(SOURCE_FILES_1 src/llm_demo.cpp)
21+
add_executable(llm_demo ${SOURCE_FILES_1})
22+
23+
set(RKLLM_API_PATH "${CMAKE_SOURCE_DIR}/../../../rkllm-runtime/${CMAKE_SYSTEM_NAME}/librkllm_api")
24+
include_directories(${RKLLM_API_PATH}/include)
25+
if(CMAKE_SYSTEM_NAME STREQUAL "Android")
26+
set(RKLLM_RT_LIB ${RKLLM_API_PATH}/${CMAKE_ANDROID_ARCH_ABI}/librkllmrt.so)
27+
find_package(OpenMP REQUIRED)
28+
target_link_libraries(llm_demo ${RKLLM_RT_LIB} log OpenMP::OpenMP_CXX)
29+
elseif(CMAKE_SYSTEM_NAME STREQUAL "Linux")
30+
set(RKLLM_RT_LIB ${RKLLM_API_PATH}/aarch64/librkllmrt.so)
31+
target_link_libraries(llm_demo ${RKLLM_RT_LIB})
32+
endif()
33+
34+
# Install the executable file to the specified directory
35+
set(CMAKE_INSTALL_PREFIX ${CMAKE_SOURCE_DIR}/install/demo_${CMAKE_SYSTEM_NAME}_${TARGET_LIB_ARCH})
36+
install(TARGETS llm_demo DESTINATION ./)
37+
install(PROGRAMS ${RKLLM_RT_LIB} DESTINATION lib)

examples/rkllm_api_demo/build-android.sh renamed to examples/DeepSeek-R1-Distill-Qwen-1.5B_Demo/deploy/build-android.sh

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ if [[ -z ${BUILD_TYPE} ]];then
44
BUILD_TYPE=Release
55
fi
66

7-
ANDROID_NDK_PATH=~/opts/ndk/android-ndk-r21e
7+
ANDROID_NDK_PATH=~/opts/android-ndk-r21e
88
TARGET_ARCH=arm64-v8a
99

1010
TARGET_PLATFORM=android
@@ -30,4 +30,5 @@ cmake ../.. \
3030
-DCMAKE_INSTALL_PREFIX=${INSTALL_DIR} \
3131
-DCMAKE_POSITION_INDEPENDENT_CODE=ON \
3232

33-
make -j4
33+
make -j4
34+
make install

examples/rkllm_api_demo/build-linux.sh renamed to examples/DeepSeek-R1-Distill-Qwen-1.5B_Demo/deploy/build-linux.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,4 +31,5 @@ cmake ../.. \
3131
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
3232
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
3333

34-
make -j4
34+
make -j4
35+
make install

0 commit comments

Comments
 (0)