wasi-nn: Add external delegation to support several NPU/GPU (#2162)

Add VX delegation as an external delegation of TFLite, so that several NPU/GPU
(from VeriSilicon, NXP, Amlogic) can be controlled via WASI-NN.

Test Code can work with the X86 simulator.
This commit is contained in:
ayakoakasaka 2023-05-05 10:29:36 +02:00 committed by GitHub
parent 5a23ae465c
commit 89be5622a5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 159 additions and 3 deletions

View File

@ -341,6 +341,13 @@ if (WAMR_BUILD_WASI_NN EQUAL 1)
message (" WASI-NN: GPU enabled") message (" WASI-NN: GPU enabled")
add_definitions (-DWASI_NN_ENABLE_GPU=1) add_definitions (-DWASI_NN_ENABLE_GPU=1)
endif () endif ()
if (WAMR_BUILD_WASI_NN_ENABLE_EXT EQUAL 1)
message (" WASI-NN: External Delegation enabled")
add_definitions (-DWASI_NN_ENABLE_EXTERNAL_DELEGATE=1)
endif ()
if (DEFINED WASI_NN_EXT_DELEGATE_PATH)
add_definitions (-DWASI_NN_EXT_DELEGATE_PATH="${WASI_NN_EXT_DELEGATE_PATH}")
endif ()
endif () endif ()
if (WAMR_BUILD_ALLOC_WITH_USER_DATA EQUAL 1) if (WAMR_BUILD_ALLOC_WITH_USER_DATA EQUAL 1)
add_definitions(-DWASM_MEM_ALLOC_WITH_USER_DATA=1) add_definitions(-DWASM_MEM_ALLOC_WITH_USER_DATA=1)

View File

@ -24,6 +24,7 @@ Build the runtime image for your execution target type.
`EXECUTION_TYPE` can be: `EXECUTION_TYPE` can be:
* `cpu` * `cpu`
* `nvidia-gpu` * `nvidia-gpu`
* `vx-delegate`
``` ```
EXECUTION_TYPE=cpu EXECUTION_TYPE=cpu
@ -71,6 +72,18 @@ docker run \
/assets/test_tensorflow.wasm /assets/test_tensorflow.wasm
``` ```
* vx-delegate for NPU (x86 simulater)
```
docker run \
-v $PWD/core/iwasm/libraries/wasi-nn/test:/assets wasi-nn-vx-delegate \
--dir=/assets \
--env="TARGET=gpu" \
/assets/test_tensorflow.wasm
```
Requirements: Requirements:
* [NVIDIA docker](https://github.com/NVIDIA/nvidia-docker). * [NVIDIA docker](https://github.com/NVIDIA/nvidia-docker).

View File

@ -21,6 +21,10 @@
#include <tensorflow/lite/delegates/gpu/delegate.h> #include <tensorflow/lite/delegates/gpu/delegate.h>
#endif #endif
#if defined(WASI_NN_ENABLE_EXTERNAL_DELEGATE)
#include <tensorflow/lite/delegates/external/external_delegate.h>
#endif
/* Maximum number of graphs per WASM instance */ /* Maximum number of graphs per WASM instance */
#define MAX_GRAPHS_PER_INST 10 #define MAX_GRAPHS_PER_INST 10
/* Maximum number of graph execution context per WASM instance*/ /* Maximum number of graph execution context per WASM instance*/
@ -42,6 +46,7 @@ typedef struct {
uint32_t current_interpreters; uint32_t current_interpreters;
Interpreter interpreters[MAX_GRAPH_EXEC_CONTEXTS_PER_INST]; Interpreter interpreters[MAX_GRAPH_EXEC_CONTEXTS_PER_INST];
korp_mutex g_lock; korp_mutex g_lock;
TfLiteDelegate *delegate;
} TFLiteContext; } TFLiteContext;
/* Utils */ /* Utils */
@ -194,18 +199,40 @@ tensorflowlite_init_execution_context(void *tflite_ctx, graph g,
#if defined(WASI_NN_ENABLE_GPU) #if defined(WASI_NN_ENABLE_GPU)
NN_WARN_PRINTF("GPU enabled."); NN_WARN_PRINTF("GPU enabled.");
// https://www.tensorflow.org/lite/performance/gpu // https://www.tensorflow.org/lite/performance/gpu
auto options = TfLiteGpuDelegateOptionsV2Default(); TfLiteGpuDelegateOptionsV2 options =
TfLiteGpuDelegateOptionsV2Default();
options.inference_preference = options.inference_preference =
TFLITE_GPU_INFERENCE_PREFERENCE_SUSTAINED_SPEED; TFLITE_GPU_INFERENCE_PREFERENCE_SUSTAINED_SPEED;
options.inference_priority1 = options.inference_priority1 =
TFLITE_GPU_INFERENCE_PRIORITY_MIN_LATENCY; TFLITE_GPU_INFERENCE_PRIORITY_MIN_LATENCY;
auto *delegate = TfLiteGpuDelegateV2Create(&options); tfl_ctx->delegate = TfLiteGpuDelegateV2Create(&options);
if (tfl_ctx->delegate == NULL) {
NN_ERR_PRINTF("Error when generating GPU delegate.");
use_default = true;
return missing_memory;
}
if (tfl_ctx->interpreters[*ctx] if (tfl_ctx->interpreters[*ctx]
.interpreter->ModifyGraphWithDelegate(delegate) .interpreter->ModifyGraphWithDelegate(tfl_ctx->delegate)
!= kTfLiteOk) { != kTfLiteOk) {
NN_ERR_PRINTF("Error when enabling GPU delegate."); NN_ERR_PRINTF("Error when enabling GPU delegate.");
use_default = true; use_default = true;
} }
#elif defined(WASI_NN_ENABLE_EXTERNAL_DELEGATE)
NN_WARN_PRINTF("external delegation enabled.");
TfLiteExternalDelegateOptions options =
TfLiteExternalDelegateOptionsDefault(WASI_NN_EXT_DELEGATE_PATH);
tfl_ctx->delegate = TfLiteExternalDelegateCreate(&options);
if (tfl_ctx->delegate == NULL) {
NN_ERR_PRINTF("Error when generating External delegate.");
use_default = true;
return missing_memory;
}
if (tfl_ctx->interpreters[*ctx]
.interpreter->ModifyGraphWithDelegate(tfl_ctx->delegate)
!= kTfLiteOk) {
NN_ERR_PRINTF("Error when enabling External delegate.");
use_default = true;
}
#else #else
NN_WARN_PRINTF("GPU not enabled."); NN_WARN_PRINTF("GPU not enabled.");
use_default = true; use_default = true;
@ -350,6 +377,8 @@ tensorflowlite_initialize(void **tflite_ctx)
NN_ERR_PRINTF("Error while initializing the lock"); NN_ERR_PRINTF("Error while initializing the lock");
} }
tfl_ctx->delegate = NULL;
*tflite_ctx = (void *)tfl_ctx; *tflite_ctx = (void *)tfl_ctx;
} }
@ -364,6 +393,14 @@ tensorflowlite_destroy(void *tflite_ctx)
*/ */
TFLiteContext *tfl_ctx = (TFLiteContext *)tflite_ctx; TFLiteContext *tfl_ctx = (TFLiteContext *)tflite_ctx;
if (tfl_ctx->delegate != NULL) {
#if defined(WASI_NN_ENABLE_GPU)
TfLiteGpuDelegateV2Delete(tfl_ctx->delegate);
#elif defined(WASI_NN_ENABLE_EXTERNAL_DELEGATE)
TfLiteExternalDelegateDelete(tfl_ctx->delegate);
#endif
}
NN_DBG_PRINTF("Freeing memory."); NN_DBG_PRINTF("Freeing memory.");
for (int i = 0; i < MAX_GRAPHS_PER_INST; ++i) { for (int i = 0; i < MAX_GRAPHS_PER_INST; ++i) {
tfl_ctx->models[i].model.reset(); tfl_ctx->models[i].model.reset();

View File

@ -0,0 +1,99 @@
# Copyright (C) 2019 Intel Corporation. All rights reserved.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
FROM ubuntu:20.04 AS base
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && apt-get install -y \
cmake build-essential git curl libssl-dev python3
# Build TensorFlow Lite VX delegate default built for x86-64 simulator
WORKDIR /tmp
RUN git clone https://github.com/VeriSilicon/TIM-VX.git tim-vx
RUN git clone https://github.com/VeriSilicon/tflite-vx-delegate.git
RUN git clone https://github.com/tensorflow/tensorflow.git
# Build TIM-VX
WORKDIR /tmp/tim-vx/host_build
RUN cmake -DCMAKE_INSTALL_PREFIX=/usr/local ../
RUN make -j$(grep -c ^processor /proc/cpuinfo)
RUN make install
WORKDIR /tmp/tim-vx
#RUN mkdir -p prebuilt-sdk/x86_64_linux/lib/include
#RUN cp prebuilt-sdk/x86_64_linux/include/CL prebuilt-sdk/x86_64_linux/lib/include -fr
# Build TensorFlow Lite
WORKDIR /tmp/tensorflow/build
RUN cmake \
-DBUILD_SHARED_LIBS=ON=on \
-DTFLITE_ENABLE_RUY=on \
-DTFLITE_ENABLE_NNAPI=off \
-DTFLITE_ENABLE_XNNPACK=on \
-DTFLITE_ENABLE_EXTERNAL_DELEGATE=on \
../tensorflow/lite/
RUN make -j$(grep -c ^processor /proc/cpuinfo)
RUN make install
RUN cp --no-preserve=ownership -d lib*.so* /usr/local/lib
RUN cp -r --no-preserve=ownership -d flatbuffers/include/flatbuffers /usr/local/include
# install header files
RUN install -d /usr/local/include/tensorflow/lite && \
cd /tmp/tensorflow/tensorflow/lite && \
cp --parents \
$(find . -name "*.h*") \
/usr/local/include/tensorflow/lite
# install version.h from core
RUN install -d /usr/local/include/tensorflow/core/public && \
cp /tmp/tensorflow/tensorflow/core/public/version.h /usr/local/include/tensorflow/core/public
# Build Vx Delegate default built for x86-64 simulator
WORKDIR /tmp/tflite-vx-delegate/build
RUN cmake \
-DBUILD_SHARED_LIBS=ON \
-DFETCHCONTENT_SOURCE_DIR_TENSORFLOW=/tmp/tensorflow \
-DTFLITE_LIB_LOC=/usr/local/lib/libtensorflow-lite.so \
-DTIM_VX_INSTALL=/usr/local \
-DCMAKE_INSTALL_PREFIX=/usr/ \
../
RUN make vx_delegate -j$(grep -c ^processor /proc/cpuinfo)
RUN make install
RUN cp --no-preserve=ownership -d lib*.so* /usr/lib
# install header files
RUN install -d /usr/local/include/tensorflow-lite-vx-delegate && \
cd /tmp/tflite-vx-delegate/ && \
cp --parents \
$(find . -name "*.h*") \
/usr/local/include/tensorflow-lite-vx-delegate
ENV VIVANTE_SDK_DIR=/tmp/tim-vx/prebuilt-sdk/x86_64_linux/
ENV VSIMULATOR_CONFIG=czl
ENV LD_LIBRARY_PATH=/tmp/tim-vx/prebuilt-sdk/x86_64_linux/lib:/usr/local/lib:/lib/x86_64-linux-gnu/:/lib64/:/usr/lib:$LD_LIBRARY_PATH
# Build WASI-NN
WORKDIR /home/wamr
COPY . .
WORKDIR /home/wamr/core/iwasm/libraries/wasi-nn/test/build
RUN cmake \
-DCMAKE_LIBRARY_PATH=${CMAKE_LIBRARY_PATH}:/usr/local/lib/ \
-DCMAKE_INCLUDE_PATH=${CMAKE_INCLUDE_PATH}:/usr/local/include/ \
-DWAMR_BUILD_WASI_NN=1 \
-DWAMR_BUILD_WASI_NN_ENABLE_EXT=1 \
-DWASI_NN_EXT_DELEGATE_PATH="/usr/lib/libvx_delegate.so" \
..
RUN make -j $(grep -c ^processor /proc/cpuinfo)
RUN cp /home/wamr/core/iwasm/libraries/wasi-nn/test/build/iwasm /run/iwasm
ENTRYPOINT [ "/run/iwasm" ]