wasi-nn: Add external delegation to support several NPU/GPU (#2162)

Add VX delegation as an external delegation of TFLite, so that several NPU/GPU (from VeriSilicon, NXP, Amlogic) can be controlled via WASI-NN. Test Code can work with the X86 simulator.
2025-11-28 10:31:05 +00:00 · 2023-05-05 10:29:36 +02:00 · 2023-05-05 10:29:36 +02:00 · 89be5622a5
commit 89be5622a5
parent 5a23ae465c
4 changed files with 159 additions and 3 deletions
--- a/build-scripts/config_common.cmake
+++ b/build-scripts/config_common.cmake
@ -341,6 +341,13 @@ if (WAMR_BUILD_WASI_NN EQUAL 1)
      message ("     WASI-NN: GPU enabled")
      add_definitions (-DWASI_NN_ENABLE_GPU=1)
  endif ()
  if (WAMR_BUILD_WASI_NN_ENABLE_EXT EQUAL 1)
      message ("     WASI-NN: External Delegation enabled")
      add_definitions (-DWASI_NN_ENABLE_EXTERNAL_DELEGATE=1)
  endif ()
  if (DEFINED WASI_NN_EXT_DELEGATE_PATH)
      add_definitions (-DWASI_NN_EXT_DELEGATE_PATH="${WASI_NN_EXT_DELEGATE_PATH}")
  endif ()
 endif ()
 if (WAMR_BUILD_ALLOC_WITH_USER_DATA EQUAL 1)
  add_definitions(-DWASM_MEM_ALLOC_WITH_USER_DATA=1)
--- a/core/iwasm/libraries/wasi-nn/README.md
+++ b/core/iwasm/libraries/wasi-nn/README.md
@ -24,6 +24,7 @@ Build the runtime image for your execution target type.
 `EXECUTION_TYPE` can be:
 * `cpu`
 * `nvidia-gpu`
 * `vx-delegate`
 ```
 EXECUTION_TYPE=cpu
@ -71,6 +72,18 @@ docker run \
    /assets/test_tensorflow.wasm
 ```
 * vx-delegate for NPU (x86 simulater)
 ```
 docker run \
    -v $PWD/core/iwasm/libraries/wasi-nn/test:/assets wasi-nn-vx-delegate \
    --dir=/assets \
    --env="TARGET=gpu" \
    /assets/test_tensorflow.wasm
 ```
 Requirements:
 * [NVIDIA docker](https://github.com/NVIDIA/nvidia-docker).
--- a/core/iwasm/libraries/wasi-nn/src/wasi_nn_tensorflowlite.cpp
+++ b/core/iwasm/libraries/wasi-nn/src/wasi_nn_tensorflowlite.cpp
@ -21,6 +21,10 @@
 #include <tensorflow/lite/delegates/gpu/delegate.h>
 #endif
 #if defined(WASI_NN_ENABLE_EXTERNAL_DELEGATE)
 #include <tensorflow/lite/delegates/external/external_delegate.h>
 #endif
 /* Maximum number of graphs per WASM instance */
 #define MAX_GRAPHS_PER_INST 10
 /* Maximum number of graph execution context per WASM instance*/
@ -42,6 +46,7 @@ typedef struct {
    uint32_t current_interpreters;
    Interpreter interpreters[MAX_GRAPH_EXEC_CONTEXTS_PER_INST];
    korp_mutex g_lock;
    TfLiteDelegate *delegate;
 } TFLiteContext;
 /* Utils */
@ -194,18 +199,40 @@ tensorflowlite_init_execution_context(void *tflite_ctx, graph g,
 #if defined(WASI_NN_ENABLE_GPU)
            NN_WARN_PRINTF("GPU enabled.");
            // https://www.tensorflow.org/lite/performance/gpu
-            auto options = TfLiteGpuDelegateOptionsV2Default();
+            TfLiteGpuDelegateOptionsV2 options =
                TfLiteGpuDelegateOptionsV2Default();
            options.inference_preference =
                TFLITE_GPU_INFERENCE_PREFERENCE_SUSTAINED_SPEED;
            options.inference_priority1 =
                TFLITE_GPU_INFERENCE_PRIORITY_MIN_LATENCY;
-            auto *delegate = TfLiteGpuDelegateV2Create(&options);
+            tfl_ctx->delegate = TfLiteGpuDelegateV2Create(&options);
            if (tfl_ctx->delegate == NULL) {
                NN_ERR_PRINTF("Error when generating GPU delegate.");
                use_default = true;
                return missing_memory;
            }
            if (tfl_ctx->interpreters[*ctx]
-                    .interpreter->ModifyGraphWithDelegate(delegate)
+                    .interpreter->ModifyGraphWithDelegate(tfl_ctx->delegate)
                != kTfLiteOk) {
                NN_ERR_PRINTF("Error when enabling GPU delegate.");
                use_default = true;
            }
 #elif defined(WASI_NN_ENABLE_EXTERNAL_DELEGATE)
            NN_WARN_PRINTF("external delegation enabled.");
            TfLiteExternalDelegateOptions options =
                TfLiteExternalDelegateOptionsDefault(WASI_NN_EXT_DELEGATE_PATH);
            tfl_ctx->delegate = TfLiteExternalDelegateCreate(&options);
            if (tfl_ctx->delegate == NULL) {
                NN_ERR_PRINTF("Error when generating External delegate.");
                use_default = true;
                return missing_memory;
            }
            if (tfl_ctx->interpreters[*ctx]
                    .interpreter->ModifyGraphWithDelegate(tfl_ctx->delegate)
                != kTfLiteOk) {
                NN_ERR_PRINTF("Error when enabling External delegate.");
                use_default = true;
            }
 #else
            NN_WARN_PRINTF("GPU not enabled.");
            use_default = true;
@ -350,6 +377,8 @@ tensorflowlite_initialize(void **tflite_ctx)
        NN_ERR_PRINTF("Error while initializing the lock");
    }
    tfl_ctx->delegate = NULL;
    *tflite_ctx = (void *)tfl_ctx;
 }
@ -364,6 +393,14 @@ tensorflowlite_destroy(void *tflite_ctx)
    */
    TFLiteContext *tfl_ctx = (TFLiteContext *)tflite_ctx;
    if (tfl_ctx->delegate != NULL) {
 #if defined(WASI_NN_ENABLE_GPU)
        TfLiteGpuDelegateV2Delete(tfl_ctx->delegate);
 #elif defined(WASI_NN_ENABLE_EXTERNAL_DELEGATE)
        TfLiteExternalDelegateDelete(tfl_ctx->delegate);
 #endif
    }
    NN_DBG_PRINTF("Freeing memory.");
    for (int i = 0; i < MAX_GRAPHS_PER_INST; ++i) {
        tfl_ctx->models[i].model.reset();
--- a/core/iwasm/libraries/wasi-nn/test/Dockerfile.vx-delegate
+++ b/core/iwasm/libraries/wasi-nn/test/Dockerfile.vx-delegate
@ -0,0 +1,99 @@
 # Copyright (C) 2019 Intel Corporation.  All rights reserved.
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 FROM ubuntu:20.04 AS base
 ENV DEBIAN_FRONTEND=noninteractive
 RUN apt-get update && apt-get install -y \
    cmake build-essential git curl libssl-dev python3
 # Build TensorFlow Lite VX delegate default built for x86-64 simulator
 WORKDIR /tmp
 RUN git clone https://github.com/VeriSilicon/TIM-VX.git tim-vx
 RUN git clone https://github.com/VeriSilicon/tflite-vx-delegate.git
 RUN git clone https://github.com/tensorflow/tensorflow.git
 # Build TIM-VX
 WORKDIR /tmp/tim-vx/host_build
 RUN cmake -DCMAKE_INSTALL_PREFIX=/usr/local  ../
 RUN make -j$(grep -c ^processor /proc/cpuinfo)
 RUN make install
 WORKDIR /tmp/tim-vx
 #RUN mkdir -p prebuilt-sdk/x86_64_linux/lib/include 
 #RUN cp prebuilt-sdk/x86_64_linux/include/CL prebuilt-sdk/x86_64_linux/lib/include -fr
 # Build TensorFlow Lite
 WORKDIR /tmp/tensorflow/build
 RUN cmake \
  -DBUILD_SHARED_LIBS=ON=on \
  -DTFLITE_ENABLE_RUY=on \
  -DTFLITE_ENABLE_NNAPI=off \
  -DTFLITE_ENABLE_XNNPACK=on \
  -DTFLITE_ENABLE_EXTERNAL_DELEGATE=on \
  ../tensorflow/lite/
 RUN make -j$(grep -c ^processor /proc/cpuinfo)
 RUN make install
 RUN cp --no-preserve=ownership -d lib*.so* /usr/local/lib
 RUN cp -r --no-preserve=ownership -d flatbuffers/include/flatbuffers /usr/local/include
 # install header files
 RUN install -d /usr/local/include/tensorflow/lite && \
    cd /tmp/tensorflow/tensorflow/lite && \
    cp --parents \
        $(find . -name "*.h*") \
        /usr/local/include/tensorflow/lite
 # install version.h from core
 RUN install -d /usr/local/include/tensorflow/core/public && \
    cp /tmp/tensorflow/tensorflow/core/public/version.h /usr/local/include/tensorflow/core/public
 # Build Vx Delegate default built for x86-64 simulator
 WORKDIR /tmp/tflite-vx-delegate/build
 RUN cmake \
   -DBUILD_SHARED_LIBS=ON \
   -DFETCHCONTENT_SOURCE_DIR_TENSORFLOW=/tmp/tensorflow \
   -DTFLITE_LIB_LOC=/usr/local/lib/libtensorflow-lite.so \
   -DTIM_VX_INSTALL=/usr/local \
   -DCMAKE_INSTALL_PREFIX=/usr/  \
   ../
 RUN make vx_delegate -j$(grep -c ^processor /proc/cpuinfo)
 RUN make install
 RUN cp --no-preserve=ownership -d lib*.so* /usr/lib
 # install header files
 RUN install -d /usr/local/include/tensorflow-lite-vx-delegate && \
    cd  /tmp/tflite-vx-delegate/ && \
    cp --parents \
        $(find . -name "*.h*") \
        /usr/local/include/tensorflow-lite-vx-delegate
 ENV VIVANTE_SDK_DIR=/tmp/tim-vx/prebuilt-sdk/x86_64_linux/
 ENV VSIMULATOR_CONFIG=czl
 ENV LD_LIBRARY_PATH=/tmp/tim-vx/prebuilt-sdk/x86_64_linux/lib:/usr/local/lib:/lib/x86_64-linux-gnu/:/lib64/:/usr/lib:$LD_LIBRARY_PATH 
 # Build WASI-NN
 WORKDIR /home/wamr
 COPY . .
 WORKDIR /home/wamr/core/iwasm/libraries/wasi-nn/test/build
 RUN cmake \
    -DCMAKE_LIBRARY_PATH=${CMAKE_LIBRARY_PATH}:/usr/local/lib/ \
    -DCMAKE_INCLUDE_PATH=${CMAKE_INCLUDE_PATH}:/usr/local/include/ \
    -DWAMR_BUILD_WASI_NN=1 \
    -DWAMR_BUILD_WASI_NN_ENABLE_EXT=1 \
    -DWASI_NN_EXT_DELEGATE_PATH="/usr/lib/libvx_delegate.so" \
    ..
 RUN make -j $(grep -c ^processor /proc/cpuinfo)
 RUN cp /home/wamr/core/iwasm/libraries/wasi-nn/test/build/iwasm /run/iwasm
 ENTRYPOINT [ "/run/iwasm" ]