From 89be5622a5776e5240253c7acde39a1794ae31bb Mon Sep 17 00:00:00 2001
From: ayakoakasaka <98828539+ayakoakasaka@users.noreply.github.com>
Date: Fri, 5 May 2023 10:29:36 +0200
Subject: [PATCH] wasi-nn: Add external delegation to support several NPU/GPU
 (#2162)

Add VX delegation as an external delegation of TFLite, so that several NPU/GPU
(from VeriSilicon, NXP, Amlogic) can be controlled via WASI-NN.

Test Code can work with the X86 simulator.
---
 build-scripts/config_common.cmake             |  7 ++
 core/iwasm/libraries/wasi-nn/README.md        | 13 +++
 .../wasi-nn/src/wasi_nn_tensorflowlite.cpp    | 43 +++++++-
 .../wasi-nn/test/Dockerfile.vx-delegate       | 99 +++++++++++++++++++
 4 files changed, 159 insertions(+), 3 deletions(-)
 create mode 100644 core/iwasm/libraries/wasi-nn/test/Dockerfile.vx-delegate

diff --git a/build-scripts/config_common.cmake b/build-scripts/config_common.cmake
index 4b29ec869..ea8ad1f32 100644
--- a/build-scripts/config_common.cmake
+++ b/build-scripts/config_common.cmake
@@ -341,6 +341,13 @@ if (WAMR_BUILD_WASI_NN EQUAL 1)
       message ("     WASI-NN: GPU enabled")
       add_definitions (-DWASI_NN_ENABLE_GPU=1)
   endif ()
+  if (WAMR_BUILD_WASI_NN_ENABLE_EXT EQUAL 1)
+      message ("     WASI-NN: External Delegation enabled")
+      add_definitions (-DWASI_NN_ENABLE_EXTERNAL_DELEGATE=1)
+  endif ()
+  if (DEFINED WASI_NN_EXT_DELEGATE_PATH)
+      add_definitions (-DWASI_NN_EXT_DELEGATE_PATH="${WASI_NN_EXT_DELEGATE_PATH}")
+  endif ()
 endif ()
 if (WAMR_BUILD_ALLOC_WITH_USER_DATA EQUAL 1)
   add_definitions(-DWASM_MEM_ALLOC_WITH_USER_DATA=1)
diff --git a/core/iwasm/libraries/wasi-nn/README.md b/core/iwasm/libraries/wasi-nn/README.md
index c5762618d..ac737c281 100644
--- a/core/iwasm/libraries/wasi-nn/README.md
+++ b/core/iwasm/libraries/wasi-nn/README.md
@@ -24,6 +24,7 @@ Build the runtime image for your execution target type.
 `EXECUTION_TYPE` can be:
 * `cpu`
 * `nvidia-gpu`
+* `vx-delegate`
 
 ```
 EXECUTION_TYPE=cpu
@@ -71,6 +72,18 @@ docker run \
     /assets/test_tensorflow.wasm
 ```
 
+* vx-delegate for NPU (x86 simulater)
+
+```
+docker run \
+    -v $PWD/core/iwasm/libraries/wasi-nn/test:/assets wasi-nn-vx-delegate \
+    --dir=/assets \
+    --env="TARGET=gpu" \
+    /assets/test_tensorflow.wasm
+```
+
+
+
 Requirements:
 * [NVIDIA docker](https://github.com/NVIDIA/nvidia-docker).
 
diff --git a/core/iwasm/libraries/wasi-nn/src/wasi_nn_tensorflowlite.cpp b/core/iwasm/libraries/wasi-nn/src/wasi_nn_tensorflowlite.cpp
index 0fe156381..dfd21787c 100644
--- a/core/iwasm/libraries/wasi-nn/src/wasi_nn_tensorflowlite.cpp
+++ b/core/iwasm/libraries/wasi-nn/src/wasi_nn_tensorflowlite.cpp
@@ -21,6 +21,10 @@
 #include <tensorflow/lite/delegates/gpu/delegate.h>
 #endif
 
+#if defined(WASI_NN_ENABLE_EXTERNAL_DELEGATE)
+#include <tensorflow/lite/delegates/external/external_delegate.h>
+#endif
+
 /* Maximum number of graphs per WASM instance */
 #define MAX_GRAPHS_PER_INST 10
 /* Maximum number of graph execution context per WASM instance*/
@@ -42,6 +46,7 @@ typedef struct {
     uint32_t current_interpreters;
     Interpreter interpreters[MAX_GRAPH_EXEC_CONTEXTS_PER_INST];
     korp_mutex g_lock;
+    TfLiteDelegate *delegate;
 } TFLiteContext;
 
 /* Utils */
@@ -194,18 +199,40 @@ tensorflowlite_init_execution_context(void *tflite_ctx, graph g,
 #if defined(WASI_NN_ENABLE_GPU)
             NN_WARN_PRINTF("GPU enabled.");
             // https://www.tensorflow.org/lite/performance/gpu
-            auto options = TfLiteGpuDelegateOptionsV2Default();
+            TfLiteGpuDelegateOptionsV2 options =
+                TfLiteGpuDelegateOptionsV2Default();
             options.inference_preference =
                 TFLITE_GPU_INFERENCE_PREFERENCE_SUSTAINED_SPEED;
             options.inference_priority1 =
                 TFLITE_GPU_INFERENCE_PRIORITY_MIN_LATENCY;
-            auto *delegate = TfLiteGpuDelegateV2Create(&options);
+            tfl_ctx->delegate = TfLiteGpuDelegateV2Create(&options);
+            if (tfl_ctx->delegate == NULL) {
+                NN_ERR_PRINTF("Error when generating GPU delegate.");
+                use_default = true;
+                return missing_memory;
+            }
             if (tfl_ctx->interpreters[*ctx]
-                    .interpreter->ModifyGraphWithDelegate(delegate)
+                    .interpreter->ModifyGraphWithDelegate(tfl_ctx->delegate)
                 != kTfLiteOk) {
                 NN_ERR_PRINTF("Error when enabling GPU delegate.");
                 use_default = true;
             }
+#elif defined(WASI_NN_ENABLE_EXTERNAL_DELEGATE)
+            NN_WARN_PRINTF("external delegation enabled.");
+            TfLiteExternalDelegateOptions options =
+                TfLiteExternalDelegateOptionsDefault(WASI_NN_EXT_DELEGATE_PATH);
+            tfl_ctx->delegate = TfLiteExternalDelegateCreate(&options);
+            if (tfl_ctx->delegate == NULL) {
+                NN_ERR_PRINTF("Error when generating External delegate.");
+                use_default = true;
+                return missing_memory;
+            }
+            if (tfl_ctx->interpreters[*ctx]
+                    .interpreter->ModifyGraphWithDelegate(tfl_ctx->delegate)
+                != kTfLiteOk) {
+                NN_ERR_PRINTF("Error when enabling External delegate.");
+                use_default = true;
+            }
 #else
             NN_WARN_PRINTF("GPU not enabled.");
             use_default = true;
@@ -350,6 +377,8 @@ tensorflowlite_initialize(void **tflite_ctx)
         NN_ERR_PRINTF("Error while initializing the lock");
     }
 
+    tfl_ctx->delegate = NULL;
+
     *tflite_ctx = (void *)tfl_ctx;
 }
 
@@ -364,6 +393,14 @@ tensorflowlite_destroy(void *tflite_ctx)
     */
     TFLiteContext *tfl_ctx = (TFLiteContext *)tflite_ctx;
 
+    if (tfl_ctx->delegate != NULL) {
+#if defined(WASI_NN_ENABLE_GPU)
+        TfLiteGpuDelegateV2Delete(tfl_ctx->delegate);
+#elif defined(WASI_NN_ENABLE_EXTERNAL_DELEGATE)
+        TfLiteExternalDelegateDelete(tfl_ctx->delegate);
+#endif
+    }
+
     NN_DBG_PRINTF("Freeing memory.");
     for (int i = 0; i < MAX_GRAPHS_PER_INST; ++i) {
         tfl_ctx->models[i].model.reset();
diff --git a/core/iwasm/libraries/wasi-nn/test/Dockerfile.vx-delegate b/core/iwasm/libraries/wasi-nn/test/Dockerfile.vx-delegate
new file mode 100644
index 000000000..89cc1a9de
--- /dev/null
+++ b/core/iwasm/libraries/wasi-nn/test/Dockerfile.vx-delegate
@@ -0,0 +1,99 @@
+# Copyright (C) 2019 Intel Corporation.  All rights reserved.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+FROM ubuntu:20.04 AS base
+
+ENV DEBIAN_FRONTEND=noninteractive
+
+
+RUN apt-get update && apt-get install -y \
+    cmake build-essential git curl libssl-dev python3
+
+
+# Build TensorFlow Lite VX delegate default built for x86-64 simulator
+WORKDIR /tmp
+RUN git clone https://github.com/VeriSilicon/TIM-VX.git tim-vx
+RUN git clone https://github.com/VeriSilicon/tflite-vx-delegate.git
+RUN git clone https://github.com/tensorflow/tensorflow.git
+
+
+# Build TIM-VX
+WORKDIR /tmp/tim-vx/host_build
+RUN cmake -DCMAKE_INSTALL_PREFIX=/usr/local  ../
+RUN make -j$(grep -c ^processor /proc/cpuinfo)
+RUN make install
+
+WORKDIR /tmp/tim-vx
+#RUN mkdir -p prebuilt-sdk/x86_64_linux/lib/include 
+#RUN cp prebuilt-sdk/x86_64_linux/include/CL prebuilt-sdk/x86_64_linux/lib/include -fr
+
+
+# Build TensorFlow Lite
+WORKDIR /tmp/tensorflow/build
+RUN cmake \
+  -DBUILD_SHARED_LIBS=ON=on \
+  -DTFLITE_ENABLE_RUY=on \
+  -DTFLITE_ENABLE_NNAPI=off \
+  -DTFLITE_ENABLE_XNNPACK=on \
+  -DTFLITE_ENABLE_EXTERNAL_DELEGATE=on \
+  ../tensorflow/lite/
+RUN make -j$(grep -c ^processor /proc/cpuinfo)
+RUN make install
+RUN cp --no-preserve=ownership -d lib*.so* /usr/local/lib
+RUN cp -r --no-preserve=ownership -d flatbuffers/include/flatbuffers /usr/local/include
+# install header files
+RUN install -d /usr/local/include/tensorflow/lite && \
+    cd /tmp/tensorflow/tensorflow/lite && \
+    cp --parents \
+        $(find . -name "*.h*") \
+        /usr/local/include/tensorflow/lite
+# install version.h from core
+RUN install -d /usr/local/include/tensorflow/core/public && \
+    cp /tmp/tensorflow/tensorflow/core/public/version.h /usr/local/include/tensorflow/core/public
+
+
+# Build Vx Delegate default built for x86-64 simulator
+WORKDIR /tmp/tflite-vx-delegate/build
+RUN cmake \
+   -DBUILD_SHARED_LIBS=ON \
+   -DFETCHCONTENT_SOURCE_DIR_TENSORFLOW=/tmp/tensorflow \
+   -DTFLITE_LIB_LOC=/usr/local/lib/libtensorflow-lite.so \
+   -DTIM_VX_INSTALL=/usr/local \
+   -DCMAKE_INSTALL_PREFIX=/usr/  \
+   ../
+RUN make vx_delegate -j$(grep -c ^processor /proc/cpuinfo)
+RUN make install
+RUN cp --no-preserve=ownership -d lib*.so* /usr/lib
+# install header files
+RUN install -d /usr/local/include/tensorflow-lite-vx-delegate && \
+    cd  /tmp/tflite-vx-delegate/ && \
+    cp --parents \
+        $(find . -name "*.h*") \
+        /usr/local/include/tensorflow-lite-vx-delegate
+
+ENV VIVANTE_SDK_DIR=/tmp/tim-vx/prebuilt-sdk/x86_64_linux/
+ENV VSIMULATOR_CONFIG=czl
+
+ENV LD_LIBRARY_PATH=/tmp/tim-vx/prebuilt-sdk/x86_64_linux/lib:/usr/local/lib:/lib/x86_64-linux-gnu/:/lib64/:/usr/lib:$LD_LIBRARY_PATH 
+
+
+# Build WASI-NN
+WORKDIR /home/wamr
+
+COPY . .
+
+WORKDIR /home/wamr/core/iwasm/libraries/wasi-nn/test/build
+
+RUN cmake \
+    -DCMAKE_LIBRARY_PATH=${CMAKE_LIBRARY_PATH}:/usr/local/lib/ \
+    -DCMAKE_INCLUDE_PATH=${CMAKE_INCLUDE_PATH}:/usr/local/include/ \
+    -DWAMR_BUILD_WASI_NN=1 \
+    -DWAMR_BUILD_WASI_NN_ENABLE_EXT=1 \
+    -DWASI_NN_EXT_DELEGATE_PATH="/usr/lib/libvx_delegate.so" \
+    ..
+
+RUN make -j $(grep -c ^processor /proc/cpuinfo)
+
+RUN cp /home/wamr/core/iwasm/libraries/wasi-nn/test/build/iwasm /run/iwasm
+
+ENTRYPOINT [ "/run/iwasm" ]