airockchip
diff --git a/‎CHANGELOG.md‎
Lines changed: 17 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎LICENSE‎
Lines changed: 64 additions & 0 deletions b/‎LICENSE‎
Lines changed: 64 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 24 additions & 12 deletions b/‎README.md‎
Lines changed: 24 additions & 12 deletions
diff --git a/‎doc/Rockchip_RKLLM_SDK_CN.pdf‎
457 KB b/‎doc/Rockchip_RKLLM_SDK_CN.pdf‎
457 KB
diff --git a/‎doc/Rockchip_RKLLM_SDK_EN.pdf‎
957 KB b/‎doc/Rockchip_RKLLM_SDK_EN.pdf‎
957 KB
diff --git a/‎rkllm-runtime/example/CMakeLists.txt‎ ‎…e/examples/rkllm_api_demo/CMakeLists.txt‎rkllm-runtime/example/CMakeLists.txt renamed to rkllm-runtime/examples/rkllm_api_demo/CMakeLists.txt
Lines changed: 3 additions & 2 deletions b/‎rkllm-runtime/example/CMakeLists.txt‎ ‎…e/examples/rkllm_api_demo/CMakeLists.txt‎rkllm-runtime/example/CMakeLists.txt renamed to rkllm-runtime/examples/rkllm_api_demo/CMakeLists.txt
Lines changed: 3 additions & 2 deletions
diff --git a/‎rkllm-runtime/example/Readme.md‎ ‎…untime/examples/rkllm_api_demo/Readme.md‎rkllm-runtime/example/Readme.md renamed to rkllm-runtime/examples/rkllm_api_demo/Readme.md
Lines changed: 2 additions & 2 deletions b/‎rkllm-runtime/example/Readme.md‎ ‎…untime/examples/rkllm_api_demo/Readme.md‎rkllm-runtime/example/Readme.md renamed to rkllm-runtime/examples/rkllm_api_demo/Readme.md
Lines changed: 2 additions & 2 deletions
diff --git a/‎rkllm-runtime/example/build-android.sh‎ ‎…examples/rkllm_api_demo/build-android.sh‎rkllm-runtime/example/build-android.sh renamed to rkllm-runtime/examples/rkllm_api_demo/build-android.sh
Lines changed: 1 addition & 1 deletion b/‎rkllm-runtime/example/build-android.sh‎ ‎…examples/rkllm_api_demo/build-android.sh‎rkllm-runtime/example/build-android.sh renamed to rkllm-runtime/examples/rkllm_api_demo/build-android.sh
Lines changed: 1 addition & 1 deletion
diff --git a/‎rkllm-runtime/example/build-linux.sh‎ ‎…e/examples/rkllm_api_demo/build-linux.sh‎rkllm-runtime/example/build-linux.sh renamed to rkllm-runtime/examples/rkllm_api_demo/build-linux.sh b/‎rkllm-runtime/example/build-linux.sh‎ ‎…e/examples/rkllm_api_demo/build-linux.sh‎rkllm-runtime/example/build-linux.sh renamed to rkllm-runtime/examples/rkllm_api_demo/build-linux.sh
diff --git a/‎rkllm-runtime/example/src/main.cpp‎ ‎…ime/examples/rkllm_api_demo/src/main.cpp‎rkllm-runtime/example/src/main.cpp renamed to rkllm-runtime/examples/rkllm_api_demo/src/main.cpp
Lines changed: 11 additions & 7 deletions b/‎rkllm-runtime/example/src/main.cpp‎ ‎…ime/examples/rkllm_api_demo/src/main.cpp‎rkllm-runtime/example/src/main.cpp renamed to rkllm-runtime/examples/rkllm_api_demo/src/main.cpp
Lines changed: 11 additions & 7 deletions
@@ -0,0 +1,17 @@
+# CHANGELOG
+## v1.0.1
+ - Optimize model conversion memory occupation
+ - Optimize inference memory occupation
+ - Increase prefill speed
+ - Reduce initialization time
+ - Improve quantization accuracy
+ - Add support for Gemma, ChatGLM3, MiniCPM, InternLM2, and Phi-3
+ - Add Server invocation
+ - Add inference interruption interface
+ - Add logprob and token_id to the return value
+
+## v1.0.0
+ - Supports the conversion and deployment of LLM models on RK3588/RK3576 platforms
+ - Compatible with Hugging Face model architectures
+ - Currently supports the models Llama, Qwen, Qwen2, and Phi-2
+ - Supports quantization with w8a8 and w4a16 precision
@@ -0,0 +1,64 @@
+Copyright (c) Rockchip Electronics Co., Ltd.
+All rights reserved.
+
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// 1. Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// 4. This Software may contain some Open Source Software. You may not redistribute 
+// and/or modify such Open Source Software except in compliance with the applicable 
+// Open Source License.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+
+The following Open Source Software have been modified by Rockchip Electronics Co., Ltd. 
+----------------------------------------------------------------------------------------
+1. ggml  master
+Copyright (c) 2023-2024 The ggml authors
+All rights reserved.
+Licensed under the terms of the MIT License
+
+2. llama.cpp  master
+Copyright (c) 2023-2024 The ggml authors
+All rights reserved.
+Licensed under the terms of the MIT License 
+
+The terms of the MIT License:
+--------------------------------------------------------------------
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
@@ -17,23 +17,35 @@
   - RK3588 Series
   - RK3576 Series
 
+# Support Models
+  - [X] [TinyLLAMA 1.1B](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0/tree/fe8a4ea1ffedaf415f4da2f062534de366a451e6) 
+  - [X] [Qwen 1.8B](https://huggingface.co/Qwen/Qwen-1_8B-Chat/tree/1d0f68de57b88cfde81f3c3e537f24464d889081)
+  - [X] [Qwen2 0.5B](https://huggingface.co/Qwen/Qwen1.5-0.5B/tree/8f445e3628f3500ee69f24e1303c9f10f5342a39)
+  - [X] [Phi-2 2.7B](https://hf-mirror.com/microsoft/phi-2/tree/834565c23f9b28b96ccbeabe614dd906b6db551a)
+  - [X] [Phi-3 3.8B](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/tree/291e9e30e38030c23497afa30f3af1f104837aa6)
+  - [X] [ChatGLM3 6B](https://huggingface.co/THUDM/chatglm3-6b/tree/103caa40027ebfd8450289ca2f278eac4ff26405)
+  - [X] [Gemma 2B](https://huggingface.co/google/gemma-2b-it/tree/de144fb2268dee1066f515465df532c05e699d48)
+  - [X] [InternLM2 1.8B](https://huggingface.co/internlm/internlm2-chat-1_8b/tree/ecccbb5c87079ad84e5788baa55dd6e21a9c614d)
+  - [X] [MiniCPM 2B](https://huggingface.co/openbmb/MiniCPM-2B-sft-bf16/tree/79fbb1db171e6d8bf77cdb0a94076a43003abd9e)
+
 # Download
 - You can also download all packages, docker image, examples, docs and platform-tools from [RKLLM_SDK](https://console.zbox.filez.com/l/RJJDmB), fetch code: rkllm
 
 # RKNN Toolkit2
-If you want to deploy additional AI model, we have introduced a new SDK called RKNN-Toolkit2. For details, please refer to:
+If you want to deploy additional AI model, we have introduced a SDK called RKNN-Toolkit2. For details, please refer to:
 
 https://github.com/airockchip/rknn-toolkit2
 
-# Notes
-
-Due to recent updates to the Phi2 model, the current version of the RKLLM SDK does not yet support these changes. 
-Please ensure to download a version of the [Phi2](https://hf-mirror.com/microsoft/phi-2/tree/834565c23f9b28b96ccbeabe614dd906b6db551a) model that is supported. 
-
 # CHANGELOG
-
-## v1.0.0-beta
- - Supports the conversion and deployment of LLM models on RK3588/RK3576 platforms
- - Compatible with Hugging Face model architectures
- - Currently supports the models LLaMA, Qwen, Qwen2, and Phi-2
- - Supports quantization with w8a8 and w4a16 precision
+## v1.0.1
+ - Optimize model conversion memory occupation
+ - Optimize inference memory occupation
+ - Increase prefill speed
+ - Reduce initialization time
+ - Improve quantization accuracy
+ - Add support for Gemma, ChatGLM3, MiniCPM, InternLM2, and Phi-3
+ - Add Server invocation
+ - Add inference interruption interface
+ - Add logprob and token_id to the return value
+
+for older version, please refer [CHANGELOG](CHANGELOG.md)
@@ -8,13 +8,14 @@ set(SOURCE_FILES src/main.cpp)
 
 add_executable(${PROJECT_NAME} ${SOURCE_FILES})
 
-set(RKLLM_API_PATH "${CMAKE_SOURCE_DIR}/../runtime/${CMAKE_SYSTEM_NAME}/librkllm_api")
+set(RKLLM_API_PATH "${CMAKE_SOURCE_DIR}/../../runtime/${CMAKE_SYSTEM_NAME}/librkllm_api")
 include_directories(${RKLLM_API_PATH}/include)
 if(CMAKE_SYSTEM_NAME STREQUAL "Android")
     set(RKLLM_RT_LIB ${RKLLM_API_PATH}/${CMAKE_ANDROID_ARCH_ABI}/librkllmrt.so)
+    target_link_libraries(${PROJECT_NAME}  ${RKLLM_RT_LIB} log)
 elseif(CMAKE_SYSTEM_NAME STREQUAL "Linux")
     set(RKLLM_RT_LIB ${RKLLM_API_PATH}/aarch64/librkllmrt.so)
+    target_link_libraries(${PROJECT_NAME}  ${RKLLM_RT_LIB})
 endif()
 
 
-target_link_libraries(${PROJECT_NAME}  ${RKLLM_RT_LIB})
 
@@ -13,7 +13,7 @@ bash build-linux.sh
 Push the compiled `llm_demo` file and `librkllmrt.so` file to the device:
 ```bash
 adb push build/build_linux_aarch64_Release/llm_demo /userdata/llm
-adb push ../runtime/Linux/librkllm_api/aarch64/librkllmrt.so /userdata/llm/lib
+adb push ../../runtime/Linux/librkllm_api/aarch64/librkllmrt.so /userdata/llm/lib
 ```
 
 ## Run
@@ -39,7 +39,7 @@ bash build-android.sh
 Push the compiled `llm_demo` file and `librkllmrt.so` file to the device:
 ```bash
 adb push build/build_android_arm64-v8a_Release/llm_demo /userdata/llm
-adb push ../runtime/Android/librkllm_api/arm64-v8a/librkllmrt.so /userdata/llm/lib
+adb push ../../runtime/Android/librkllm_api/arm64-v8a/librkllmrt.so /userdata/llm/lib
 ```
 
 ## Run
 
@@ -4,7 +4,7 @@ if [[ -z ${BUILD_TYPE} ]];then
     BUILD_TYPE=Release
 fi
 
-ANDROID_NDK_PATH=~/android-ndk-r18b
+ANDROID_NDK_PATH=~/android-ndk-r21e
 TARGET_ARCH=arm64-v8a
 
 TARGET_PLATFORM=android
 
@@ -41,9 +41,8 @@ void exit_handler(int signal)
     }
 }
 
-void callback(const char *text, void *userdata, LLMCallState state)
+void callback(RKLLMResult *result, void *userdata, LLMCallState state)
 {
-    
     if (state == LLM_RUN_FINISH)
     {
         printf("\n");
@@ -52,8 +51,9 @@ void callback(const char *text, void *userdata, LLMCallState state)
     {
         printf("\\run error\n");
     }
-    else{
-        printf("%s", text);
+    else
+    {
+        printf("%s", result->text);
     }
 }
 
@@ -70,12 +70,14 @@ int main(int argc, char **argv)
 
     //设置参数及初始化
     RKLLMParam param = rkllm_createDefaultParam();
-    param.modelPath = rkllm_model.c_str();
-    param.target_platform = "rk3588";
+    param.model_path = rkllm_model.c_str();
     param.num_npu_core = 2;
     param.top_k = 1;
     param.max_new_tokens = 256;
     param.max_context_len = 512;
+    param.logprobs = false;
+    param.top_logprobs = 5;
+    param.use_gpu = false;
     rkllm_init(&llmHandle, param, callback);
     printf("rkllm init success\n");
 
@@ -113,7 +115,9 @@ int main(int argc, char **argv)
                 cout << input_str << endl;
             }
         }
-        string text = PROMPT_TEXT_PREFIX + input_str + PROMPT_TEXT_POSTFIX;
+        // string text = PROMPT_TEXT_PREFIX + input_str + PROMPT_TEXT_POSTFIX;
+        string text = input_str;
+
         printf("robot: ");
         rkllm_run(llmHandle, text.c_str(), NULL);
     }
Original file line number	Diff line number	Diff line change
`@@ -41,9 +41,8 @@ void exit_handler(int signal)`
`41`	`41`	`}`
`42`	`42`	`}`
`43`	`43`
`44`		`-void callback(const char text, void userdata, LLMCallState state)`
	`44`	`+void callback(RKLLMResult result, void userdata, LLMCallState state)`
`45`	`45`	`{`
`46`		`-`
`47`	`46`	`if (state == LLM_RUN_FINISH)`
`48`	`47`	`{`
`49`	`48`	`printf("\n");`
`@@ -52,8 +51,9 @@ void callback(const char text, void userdata, LLMCallState state)`
`52`	`51`	`{`
`53`	`52`	`printf("\\run error\n");`
`54`	`53`	`}`
`55`		`- else{`
`56`		`- printf("%s", text);`
	`54`	`+ else`
	`55`	`+ {`
	`56`	`+ printf("%s", result->text);`
`57`	`57`	`}`
`58`	`58`	`}`
`59`	`59`
`@@ -70,12 +70,14 @@ int main(int argc, char **argv)`
`70`	`70`
`71`	`71`	`//设置参数及初始化`
`72`	`72`	`RKLLMParam param = rkllm_createDefaultParam();`
`73`		`- param.modelPath = rkllm_model.c_str();`
`74`		`- param.target_platform = "rk3588";`
	`73`	`+ param.model_path = rkllm_model.c_str();`
`75`	`74`	`param.num_npu_core = 2;`
`76`	`75`	`param.top_k = 1;`
`77`	`76`	`param.max_new_tokens = 256;`
`78`	`77`	`param.max_context_len = 512;`
	`78`	`+ param.logprobs = false;`
	`79`	`+ param.top_logprobs = 5;`
	`80`	`+ param.use_gpu = false;`
`79`	`81`	`rkllm_init(&llmHandle, param, callback);`
`80`	`82`	`printf("rkllm init success\n");`
`81`	`83`
`@@ -113,7 +115,9 @@ int main(int argc, char **argv)`
`113`	`115`	`cout << input_str << endl;`
`114`	`116`	`}`
`115`	`117`	`}`
`116`		`- string text = PROMPT_TEXT_PREFIX + input_str + PROMPT_TEXT_POSTFIX;`
	`118`	`+ // string text = PROMPT_TEXT_PREFIX + input_str + PROMPT_TEXT_POSTFIX;`
	`119`	`+ string text = input_str;`
	`120`	`+`
`117`	`121`	`printf("robot: ");`
`118`	`122`	`rkllm_run(llmHandle, text.c_str(), NULL);`
`119`	`123`	`}`