mirror of
https://github.com/bytecodealliance/wasm-micro-runtime.git
synced 2025-02-06 06:55:07 +00:00
wasi-nn: Add external delegation to support several NPU/GPU (#2162)
Add VX delegation as an external delegation of TFLite, so that several NPU/GPU (from VeriSilicon, NXP, Amlogic) can be controlled via WASI-NN. Test Code can work with the X86 simulator.
This commit is contained in:
parent
5a23ae465c
commit
89be5622a5
|
@ -341,6 +341,13 @@ if (WAMR_BUILD_WASI_NN EQUAL 1)
|
||||||
message (" WASI-NN: GPU enabled")
|
message (" WASI-NN: GPU enabled")
|
||||||
add_definitions (-DWASI_NN_ENABLE_GPU=1)
|
add_definitions (-DWASI_NN_ENABLE_GPU=1)
|
||||||
endif ()
|
endif ()
|
||||||
|
if (WAMR_BUILD_WASI_NN_ENABLE_EXT EQUAL 1)
|
||||||
|
message (" WASI-NN: External Delegation enabled")
|
||||||
|
add_definitions (-DWASI_NN_ENABLE_EXTERNAL_DELEGATE=1)
|
||||||
|
endif ()
|
||||||
|
if (DEFINED WASI_NN_EXT_DELEGATE_PATH)
|
||||||
|
add_definitions (-DWASI_NN_EXT_DELEGATE_PATH="${WASI_NN_EXT_DELEGATE_PATH}")
|
||||||
|
endif ()
|
||||||
endif ()
|
endif ()
|
||||||
if (WAMR_BUILD_ALLOC_WITH_USER_DATA EQUAL 1)
|
if (WAMR_BUILD_ALLOC_WITH_USER_DATA EQUAL 1)
|
||||||
add_definitions(-DWASM_MEM_ALLOC_WITH_USER_DATA=1)
|
add_definitions(-DWASM_MEM_ALLOC_WITH_USER_DATA=1)
|
||||||
|
|
|
@ -24,6 +24,7 @@ Build the runtime image for your execution target type.
|
||||||
`EXECUTION_TYPE` can be:
|
`EXECUTION_TYPE` can be:
|
||||||
* `cpu`
|
* `cpu`
|
||||||
* `nvidia-gpu`
|
* `nvidia-gpu`
|
||||||
|
* `vx-delegate`
|
||||||
|
|
||||||
```
|
```
|
||||||
EXECUTION_TYPE=cpu
|
EXECUTION_TYPE=cpu
|
||||||
|
@ -71,6 +72,18 @@ docker run \
|
||||||
/assets/test_tensorflow.wasm
|
/assets/test_tensorflow.wasm
|
||||||
```
|
```
|
||||||
|
|
||||||
|
* vx-delegate for NPU (x86 simulater)
|
||||||
|
|
||||||
|
```
|
||||||
|
docker run \
|
||||||
|
-v $PWD/core/iwasm/libraries/wasi-nn/test:/assets wasi-nn-vx-delegate \
|
||||||
|
--dir=/assets \
|
||||||
|
--env="TARGET=gpu" \
|
||||||
|
/assets/test_tensorflow.wasm
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Requirements:
|
Requirements:
|
||||||
* [NVIDIA docker](https://github.com/NVIDIA/nvidia-docker).
|
* [NVIDIA docker](https://github.com/NVIDIA/nvidia-docker).
|
||||||
|
|
||||||
|
|
|
@ -21,6 +21,10 @@
|
||||||
#include <tensorflow/lite/delegates/gpu/delegate.h>
|
#include <tensorflow/lite/delegates/gpu/delegate.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined(WASI_NN_ENABLE_EXTERNAL_DELEGATE)
|
||||||
|
#include <tensorflow/lite/delegates/external/external_delegate.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Maximum number of graphs per WASM instance */
|
/* Maximum number of graphs per WASM instance */
|
||||||
#define MAX_GRAPHS_PER_INST 10
|
#define MAX_GRAPHS_PER_INST 10
|
||||||
/* Maximum number of graph execution context per WASM instance*/
|
/* Maximum number of graph execution context per WASM instance*/
|
||||||
|
@ -42,6 +46,7 @@ typedef struct {
|
||||||
uint32_t current_interpreters;
|
uint32_t current_interpreters;
|
||||||
Interpreter interpreters[MAX_GRAPH_EXEC_CONTEXTS_PER_INST];
|
Interpreter interpreters[MAX_GRAPH_EXEC_CONTEXTS_PER_INST];
|
||||||
korp_mutex g_lock;
|
korp_mutex g_lock;
|
||||||
|
TfLiteDelegate *delegate;
|
||||||
} TFLiteContext;
|
} TFLiteContext;
|
||||||
|
|
||||||
/* Utils */
|
/* Utils */
|
||||||
|
@ -194,18 +199,40 @@ tensorflowlite_init_execution_context(void *tflite_ctx, graph g,
|
||||||
#if defined(WASI_NN_ENABLE_GPU)
|
#if defined(WASI_NN_ENABLE_GPU)
|
||||||
NN_WARN_PRINTF("GPU enabled.");
|
NN_WARN_PRINTF("GPU enabled.");
|
||||||
// https://www.tensorflow.org/lite/performance/gpu
|
// https://www.tensorflow.org/lite/performance/gpu
|
||||||
auto options = TfLiteGpuDelegateOptionsV2Default();
|
TfLiteGpuDelegateOptionsV2 options =
|
||||||
|
TfLiteGpuDelegateOptionsV2Default();
|
||||||
options.inference_preference =
|
options.inference_preference =
|
||||||
TFLITE_GPU_INFERENCE_PREFERENCE_SUSTAINED_SPEED;
|
TFLITE_GPU_INFERENCE_PREFERENCE_SUSTAINED_SPEED;
|
||||||
options.inference_priority1 =
|
options.inference_priority1 =
|
||||||
TFLITE_GPU_INFERENCE_PRIORITY_MIN_LATENCY;
|
TFLITE_GPU_INFERENCE_PRIORITY_MIN_LATENCY;
|
||||||
auto *delegate = TfLiteGpuDelegateV2Create(&options);
|
tfl_ctx->delegate = TfLiteGpuDelegateV2Create(&options);
|
||||||
|
if (tfl_ctx->delegate == NULL) {
|
||||||
|
NN_ERR_PRINTF("Error when generating GPU delegate.");
|
||||||
|
use_default = true;
|
||||||
|
return missing_memory;
|
||||||
|
}
|
||||||
if (tfl_ctx->interpreters[*ctx]
|
if (tfl_ctx->interpreters[*ctx]
|
||||||
.interpreter->ModifyGraphWithDelegate(delegate)
|
.interpreter->ModifyGraphWithDelegate(tfl_ctx->delegate)
|
||||||
!= kTfLiteOk) {
|
!= kTfLiteOk) {
|
||||||
NN_ERR_PRINTF("Error when enabling GPU delegate.");
|
NN_ERR_PRINTF("Error when enabling GPU delegate.");
|
||||||
use_default = true;
|
use_default = true;
|
||||||
}
|
}
|
||||||
|
#elif defined(WASI_NN_ENABLE_EXTERNAL_DELEGATE)
|
||||||
|
NN_WARN_PRINTF("external delegation enabled.");
|
||||||
|
TfLiteExternalDelegateOptions options =
|
||||||
|
TfLiteExternalDelegateOptionsDefault(WASI_NN_EXT_DELEGATE_PATH);
|
||||||
|
tfl_ctx->delegate = TfLiteExternalDelegateCreate(&options);
|
||||||
|
if (tfl_ctx->delegate == NULL) {
|
||||||
|
NN_ERR_PRINTF("Error when generating External delegate.");
|
||||||
|
use_default = true;
|
||||||
|
return missing_memory;
|
||||||
|
}
|
||||||
|
if (tfl_ctx->interpreters[*ctx]
|
||||||
|
.interpreter->ModifyGraphWithDelegate(tfl_ctx->delegate)
|
||||||
|
!= kTfLiteOk) {
|
||||||
|
NN_ERR_PRINTF("Error when enabling External delegate.");
|
||||||
|
use_default = true;
|
||||||
|
}
|
||||||
#else
|
#else
|
||||||
NN_WARN_PRINTF("GPU not enabled.");
|
NN_WARN_PRINTF("GPU not enabled.");
|
||||||
use_default = true;
|
use_default = true;
|
||||||
|
@ -350,6 +377,8 @@ tensorflowlite_initialize(void **tflite_ctx)
|
||||||
NN_ERR_PRINTF("Error while initializing the lock");
|
NN_ERR_PRINTF("Error while initializing the lock");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
tfl_ctx->delegate = NULL;
|
||||||
|
|
||||||
*tflite_ctx = (void *)tfl_ctx;
|
*tflite_ctx = (void *)tfl_ctx;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -364,6 +393,14 @@ tensorflowlite_destroy(void *tflite_ctx)
|
||||||
*/
|
*/
|
||||||
TFLiteContext *tfl_ctx = (TFLiteContext *)tflite_ctx;
|
TFLiteContext *tfl_ctx = (TFLiteContext *)tflite_ctx;
|
||||||
|
|
||||||
|
if (tfl_ctx->delegate != NULL) {
|
||||||
|
#if defined(WASI_NN_ENABLE_GPU)
|
||||||
|
TfLiteGpuDelegateV2Delete(tfl_ctx->delegate);
|
||||||
|
#elif defined(WASI_NN_ENABLE_EXTERNAL_DELEGATE)
|
||||||
|
TfLiteExternalDelegateDelete(tfl_ctx->delegate);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
NN_DBG_PRINTF("Freeing memory.");
|
NN_DBG_PRINTF("Freeing memory.");
|
||||||
for (int i = 0; i < MAX_GRAPHS_PER_INST; ++i) {
|
for (int i = 0; i < MAX_GRAPHS_PER_INST; ++i) {
|
||||||
tfl_ctx->models[i].model.reset();
|
tfl_ctx->models[i].model.reset();
|
||||||
|
|
99
core/iwasm/libraries/wasi-nn/test/Dockerfile.vx-delegate
Normal file
99
core/iwasm/libraries/wasi-nn/test/Dockerfile.vx-delegate
Normal file
|
@ -0,0 +1,99 @@
|
||||||
|
# Copyright (C) 2019 Intel Corporation. All rights reserved.
|
||||||
|
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||||
|
|
||||||
|
FROM ubuntu:20.04 AS base
|
||||||
|
|
||||||
|
ENV DEBIAN_FRONTEND=noninteractive
|
||||||
|
|
||||||
|
|
||||||
|
RUN apt-get update && apt-get install -y \
|
||||||
|
cmake build-essential git curl libssl-dev python3
|
||||||
|
|
||||||
|
|
||||||
|
# Build TensorFlow Lite VX delegate default built for x86-64 simulator
|
||||||
|
WORKDIR /tmp
|
||||||
|
RUN git clone https://github.com/VeriSilicon/TIM-VX.git tim-vx
|
||||||
|
RUN git clone https://github.com/VeriSilicon/tflite-vx-delegate.git
|
||||||
|
RUN git clone https://github.com/tensorflow/tensorflow.git
|
||||||
|
|
||||||
|
|
||||||
|
# Build TIM-VX
|
||||||
|
WORKDIR /tmp/tim-vx/host_build
|
||||||
|
RUN cmake -DCMAKE_INSTALL_PREFIX=/usr/local ../
|
||||||
|
RUN make -j$(grep -c ^processor /proc/cpuinfo)
|
||||||
|
RUN make install
|
||||||
|
|
||||||
|
WORKDIR /tmp/tim-vx
|
||||||
|
#RUN mkdir -p prebuilt-sdk/x86_64_linux/lib/include
|
||||||
|
#RUN cp prebuilt-sdk/x86_64_linux/include/CL prebuilt-sdk/x86_64_linux/lib/include -fr
|
||||||
|
|
||||||
|
|
||||||
|
# Build TensorFlow Lite
|
||||||
|
WORKDIR /tmp/tensorflow/build
|
||||||
|
RUN cmake \
|
||||||
|
-DBUILD_SHARED_LIBS=ON=on \
|
||||||
|
-DTFLITE_ENABLE_RUY=on \
|
||||||
|
-DTFLITE_ENABLE_NNAPI=off \
|
||||||
|
-DTFLITE_ENABLE_XNNPACK=on \
|
||||||
|
-DTFLITE_ENABLE_EXTERNAL_DELEGATE=on \
|
||||||
|
../tensorflow/lite/
|
||||||
|
RUN make -j$(grep -c ^processor /proc/cpuinfo)
|
||||||
|
RUN make install
|
||||||
|
RUN cp --no-preserve=ownership -d lib*.so* /usr/local/lib
|
||||||
|
RUN cp -r --no-preserve=ownership -d flatbuffers/include/flatbuffers /usr/local/include
|
||||||
|
# install header files
|
||||||
|
RUN install -d /usr/local/include/tensorflow/lite && \
|
||||||
|
cd /tmp/tensorflow/tensorflow/lite && \
|
||||||
|
cp --parents \
|
||||||
|
$(find . -name "*.h*") \
|
||||||
|
/usr/local/include/tensorflow/lite
|
||||||
|
# install version.h from core
|
||||||
|
RUN install -d /usr/local/include/tensorflow/core/public && \
|
||||||
|
cp /tmp/tensorflow/tensorflow/core/public/version.h /usr/local/include/tensorflow/core/public
|
||||||
|
|
||||||
|
|
||||||
|
# Build Vx Delegate default built for x86-64 simulator
|
||||||
|
WORKDIR /tmp/tflite-vx-delegate/build
|
||||||
|
RUN cmake \
|
||||||
|
-DBUILD_SHARED_LIBS=ON \
|
||||||
|
-DFETCHCONTENT_SOURCE_DIR_TENSORFLOW=/tmp/tensorflow \
|
||||||
|
-DTFLITE_LIB_LOC=/usr/local/lib/libtensorflow-lite.so \
|
||||||
|
-DTIM_VX_INSTALL=/usr/local \
|
||||||
|
-DCMAKE_INSTALL_PREFIX=/usr/ \
|
||||||
|
../
|
||||||
|
RUN make vx_delegate -j$(grep -c ^processor /proc/cpuinfo)
|
||||||
|
RUN make install
|
||||||
|
RUN cp --no-preserve=ownership -d lib*.so* /usr/lib
|
||||||
|
# install header files
|
||||||
|
RUN install -d /usr/local/include/tensorflow-lite-vx-delegate && \
|
||||||
|
cd /tmp/tflite-vx-delegate/ && \
|
||||||
|
cp --parents \
|
||||||
|
$(find . -name "*.h*") \
|
||||||
|
/usr/local/include/tensorflow-lite-vx-delegate
|
||||||
|
|
||||||
|
ENV VIVANTE_SDK_DIR=/tmp/tim-vx/prebuilt-sdk/x86_64_linux/
|
||||||
|
ENV VSIMULATOR_CONFIG=czl
|
||||||
|
|
||||||
|
ENV LD_LIBRARY_PATH=/tmp/tim-vx/prebuilt-sdk/x86_64_linux/lib:/usr/local/lib:/lib/x86_64-linux-gnu/:/lib64/:/usr/lib:$LD_LIBRARY_PATH
|
||||||
|
|
||||||
|
|
||||||
|
# Build WASI-NN
|
||||||
|
WORKDIR /home/wamr
|
||||||
|
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
WORKDIR /home/wamr/core/iwasm/libraries/wasi-nn/test/build
|
||||||
|
|
||||||
|
RUN cmake \
|
||||||
|
-DCMAKE_LIBRARY_PATH=${CMAKE_LIBRARY_PATH}:/usr/local/lib/ \
|
||||||
|
-DCMAKE_INCLUDE_PATH=${CMAKE_INCLUDE_PATH}:/usr/local/include/ \
|
||||||
|
-DWAMR_BUILD_WASI_NN=1 \
|
||||||
|
-DWAMR_BUILD_WASI_NN_ENABLE_EXT=1 \
|
||||||
|
-DWASI_NN_EXT_DELEGATE_PATH="/usr/lib/libvx_delegate.so" \
|
||||||
|
..
|
||||||
|
|
||||||
|
RUN make -j $(grep -c ^processor /proc/cpuinfo)
|
||||||
|
|
||||||
|
RUN cp /home/wamr/core/iwasm/libraries/wasi-nn/test/build/iwasm /run/iwasm
|
||||||
|
|
||||||
|
ENTRYPOINT [ "/run/iwasm" ]
|
Loading…
Reference in New Issue
Block a user