wasi-nn: Improve TPU support (#2447)

1. Allow TPU and GPU support at the same time. 2. Add Dockerfile to run example with [Coral USB](https://coral.ai/products/accelerator/).
2025-06-18 02:59:21 +00:00 · 2023-08-14 14:03:56 +02:00 · 2023-08-14 14:03:56 +02:00 · b45d014112
commit b45d014112
parent 923e8d6471
10 changed files with 130 additions and 37 deletions
--- a/build-scripts/config_common.cmake
+++ b/build-scripts/config_common.cmake
@ -359,16 +359,16 @@ endif ()
 if (WAMR_BUILD_WASI_NN EQUAL 1)
  message ("     WASI-NN enabled")
  add_definitions (-DWASM_ENABLE_WASI_NN=1)
-  if (WASI_NN_ENABLE_GPU EQUAL 1)
+  if (WAMR_BUILD_WASI_NN_ENABLE_GPU EQUAL 1)
      message ("     WASI-NN: GPU enabled")
-      add_definitions (-DWASI_NN_ENABLE_GPU=1)
+      add_definitions (-DWASM_ENABLE_WASI_NN_GPU=1)
  endif ()
-  if (WAMR_BUILD_WASI_NN_ENABLE_EXT EQUAL 1)
+  if (WAMR_BUILD_WASI_NN_ENABLE_EXTERNAL_DELEGATE EQUAL 1)
      message ("     WASI-NN: External Delegation enabled")
-      add_definitions (-DWASI_NN_ENABLE_EXTERNAL_DELEGATE=1)
+      add_definitions (-DWASM_ENABLE_WASI_NN_EXTERNAL_DELEGATE=1)
  endif ()
-  if (DEFINED WASI_NN_EXT_DELEGATE_PATH)
+  if (DEFINED WAMR_BUILD_WASI_NN_EXTERNAL_DELEGATE_PATH)
-      add_definitions (-DWASI_NN_EXT_DELEGATE_PATH="${WASI_NN_EXT_DELEGATE_PATH}")
+      add_definitions (-DWASM_WASI_NN_EXTERNAL_DELEGATE_PATH="${WAMR_BUILD_WASI_NN_EXTERNAL_DELEGATE_PATH}")
  endif ()
 endif ()
 if (WAMR_BUILD_ALLOC_WITH_USER_DATA EQUAL 1)
--- a/core/config.h
+++ b/core/config.h
@ -144,6 +144,14 @@
 #define WASM_ENABLE_WASI_NN 0
 #endif
 #ifndef WASM_ENABLE_WASI_NN_GPU
 #define WASM_ENABLE_WASI_NN_GPU 0
 #endif
 #ifndef WASM_ENABLE_WASI_NN_EXTERNAL_DELEGATE
 #define WASM_ENABLE_WASI_NN_EXTERNAL_DELEGATE 0
 #endif
 /* Default disable libc emcc */
 #ifndef WASM_ENABLE_LIBC_EMCC
 #define WASM_ENABLE_LIBC_EMCC 0
--- a/core/iwasm/libraries/wasi-nn/README.md
+++ b/core/iwasm/libraries/wasi-nn/README.md
@ -25,6 +25,7 @@ Build the runtime image for your execution target type.
 * `cpu`
 * `nvidia-gpu`
 * `vx-delegate`
 * `tpu`
 ```
 EXECUTION_TYPE=cpu
@ -64,6 +65,8 @@ docker run \
 ```
 * (NVIDIA) GPU
    * Requirements:
        * [NVIDIA docker](https://github.com/NVIDIA/nvidia-docker).
 ```
 docker run \
@ -76,25 +79,36 @@ docker run \
    /assets/test_tensorflow.wasm
 ```
-* vx-delegate for NPU (x86 simulater)
+* vx-delegate for NPU (x86 simulator)
 ```
 docker run \
-    -v $PWD/core/iwasm/libraries/wasi-nn/test:/assets wasi-nn-vx-delegate \
+    -v $PWD/core/iwasm/libraries/wasi-nn/test:/assets \
-    --dir=/assets \
+    wasi-nn-vx-delegate \
    --dir=/ \
    --env="TARGET=gpu" \
-    /assets/test_tensorflow.wasm
+    /assets/test_tensorflow_quantized.wasm
 ```
 * (Coral) TPU
    * Requirements:
        * [Coral USB](https://coral.ai/products/accelerator/).
-
+```
-Requirements:
+docker run \
-* [NVIDIA docker](https://github.com/NVIDIA/nvidia-docker).
+    --privileged \
    --device=/dev/bus/usb:/dev/bus/usb \
    -v $PWD/core/iwasm/libraries/wasi-nn/test:/assets \
    wasi-nn-tpu \
    --dir=/ \
    --env="TARGET=tpu" \
    /assets/test_tensorflow_quantized.wasm
 ```
 ## What is missing
 Supported:
 * Graph encoding: `tensorflowlite`.
-* Execution target: `cpu` and `gpu`.
+* Execution target: `cpu`, `gpu` and `tpu`.
 * Tensor type: `fp32`.
--- a/core/iwasm/libraries/wasi-nn/cmake/Findtensorflow_lite.cmake
+++ b/core/iwasm/libraries/wasi-nn/cmake/Findtensorflow_lite.cmake
@ -18,12 +18,16 @@ if(NOT EXISTS ${TENSORFLOW_LITE})
  set(TENSORFLOW_SOURCE_DIR "${WAMR_ROOT_DIR}/core/deps/tensorflow-src")
-  if(WASI_NN_ENABLE_GPU EQUAL 1)
+  if(WAMR_BUILD_WASI_NN_ENABLE_GPU EQUAL 1)
    # Tensorflow specific:
    # * https://www.tensorflow.org/lite/guide/build_cmake#available_options_to_build_tensorflow_lite
    set (TFLITE_ENABLE_GPU ON)
  endif()
  if (CMAKE_SIZEOF_VOID_P EQUAL 4)
    set (TFLITE_ENABLE_XNNPACK OFF)
  endif()
  add_subdirectory(
    "${TENSORFLOW_SOURCE_DIR}/tensorflow/lite"
    "${CMAKE_CURRENT_BINARY_DIR}/tensorflow-lite"
--- a/core/iwasm/libraries/wasi-nn/src/wasi_nn_tensorflowlite.cpp
+++ b/core/iwasm/libraries/wasi-nn/src/wasi_nn_tensorflowlite.cpp
@ -16,11 +16,11 @@
 #include <tensorflow/lite/optional_debug_tools.h>
 #include <tensorflow/lite/error_reporter.h>
-#if defined(WASI_NN_ENABLE_GPU)
+#if WASM_ENABLE_WASI_NN_GPU != 0
 #include <tensorflow/lite/delegates/gpu/delegate.h>
 #endif
-#if defined(WASI_NN_ENABLE_EXTERNAL_DELEGATE)
+#if WASM_ENABLE_WASI_NN_EXTERNAL_DELEGATE != 0
 #include <tensorflow/lite/delegates/external/external_delegate.h>
 #endif
@ -130,8 +130,8 @@ tensorflowlite_load(void *tflite_ctx, graph_builder_array *builder,
        return invalid_argument;
    }
-    if (target != cpu && target != gpu) {
+    if (target != cpu && target != gpu && target != tpu) {
-        NN_ERR_PRINTF("Only CPU and GPU target is supported.");
+        NN_ERR_PRINTF("Only CPU, GPU and TPU target is supported.");
        return invalid_argument;
    }
@ -195,7 +195,7 @@ tensorflowlite_init_execution_context(void *tflite_ctx, graph g,
    switch (tfl_ctx->models[g].target) {
        case gpu:
        {
-#if defined(WASI_NN_ENABLE_GPU)
+#if WASM_ENABLE_WASI_NN_GPU != 0
            NN_WARN_PRINTF("GPU enabled.");
            // https://www.tensorflow.org/lite/performance/gpu
            TfLiteGpuDelegateOptionsV2 options =
@ -216,10 +216,19 @@ tensorflowlite_init_execution_context(void *tflite_ctx, graph g,
                NN_ERR_PRINTF("Error when enabling GPU delegate.");
                use_default = true;
            }
-#elif defined(WASI_NN_ENABLE_EXTERNAL_DELEGATE)
+#else
            NN_WARN_PRINTF("GPU not enabled.");
            use_default = true;
 #endif
            break;
        }
        case tpu:
        {
 #if WASM_ENABLE_WASI_NN_EXTERNAL_DELEGATE != 0
            NN_WARN_PRINTF("external delegation enabled.");
            TfLiteExternalDelegateOptions options =
-                TfLiteExternalDelegateOptionsDefault(WASI_NN_EXT_DELEGATE_PATH);
+                TfLiteExternalDelegateOptionsDefault(
                    WASM_WASI_NN_EXTERNAL_DELEGATE_PATH);
            tfl_ctx->delegate = TfLiteExternalDelegateCreate(&options);
            if (tfl_ctx->delegate == NULL) {
                NN_ERR_PRINTF("Error when generating External delegate.");
@ -233,7 +242,7 @@ tensorflowlite_init_execution_context(void *tflite_ctx, graph g,
                use_default = true;
            }
 #else
-            NN_WARN_PRINTF("GPU not enabled.");
+            NN_WARN_PRINTF("External delegate not enabled.");
            use_default = true;
 #endif
            break;
@ -438,19 +447,35 @@ tensorflowlite_destroy(void *tflite_ctx)
    */
    TFLiteContext *tfl_ctx = (TFLiteContext *)tflite_ctx;
    if (tfl_ctx->delegate != NULL) {
 #if defined(WASI_NN_ENABLE_GPU)
        TfLiteGpuDelegateV2Delete(tfl_ctx->delegate);
 #elif defined(WASI_NN_ENABLE_EXTERNAL_DELEGATE)
        TfLiteExternalDelegateDelete(tfl_ctx->delegate);
 #endif
    }
    NN_DBG_PRINTF("Freeing memory.");
    for (int i = 0; i < MAX_GRAPHS_PER_INST; ++i) {
        tfl_ctx->models[i].model.reset();
-        if (tfl_ctx->models[i].model_pointer)
+        if (tfl_ctx->models[i].model_pointer) {
            if (tfl_ctx->delegate) {
                switch (tfl_ctx->models[i].target) {
                    case gpu:
                    {
 #if WASM_ENABLE_WASI_NN_GPU != 0
                        TfLiteGpuDelegateV2Delete(tfl_ctx->delegate);
 #else
                        NN_ERR_PRINTF("GPU delegate delete but not enabled.");
 #endif
                        break;
                    }
                    case tpu:
                    {
 #if WASM_ENABLE_WASI_NN_EXTERNAL_DELEGATE != 0
                        TfLiteExternalDelegateDelete(tfl_ctx->delegate);
 #else
                        NN_ERR_PRINTF(
                            "External delegate delete but not enabled.");
 #endif
                        break;
                    }
                }
            }
            wasm_runtime_free(tfl_ctx->models[i].model_pointer);
        }
        tfl_ctx->models[i].model_pointer = NULL;
    }
    for (int i = 0; i < MAX_GRAPH_EXEC_CONTEXTS_PER_INST; ++i) {
--- a/core/iwasm/libraries/wasi-nn/test/Dockerfile.nvidia-gpu
+++ b/core/iwasm/libraries/wasi-nn/test/Dockerfile.nvidia-gpu
@ -24,7 +24,7 @@ RUN apt-get install -y wget ca-certificates --no-install-recommends \
 RUN cmake \
    -DWAMR_BUILD_WASI_NN=1 \
-    -DWASI_NN_ENABLE_GPU=1 \
+    -DWAMR_BUILD_WASI_NN_ENABLE_GPU=1 \
    ..
 RUN make -j "$(grep -c ^processor /proc/cpuinfo)"
--- a/core/iwasm/libraries/wasi-nn/test/Dockerfile.tpu
+++ b/core/iwasm/libraries/wasi-nn/test/Dockerfile.tpu
@ -0,0 +1,37 @@
 # Copyright (C) 2019 Intel Corporation.  All rights reserved.
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 FROM ubuntu:20.04 AS base
 ENV DEBIAN_FRONTEND=noninteractive
 # hadolint ignore=DL3008
 RUN apt-get update && apt-get install -y \
    cmake build-essential git curl gnupg --no-install-recommends && \
    rm -rf /var/lib/apt/lists/*
 # hadolint ignore=DL3008,DL4006
 RUN echo "deb https://packages.cloud.google.com/apt coral-edgetpu-stable main" | tee /etc/apt/sources.list.d/coral-edgetpu.list && \
    curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add - && \
    apt-get update && apt-get install -y libedgetpu1-std --no-install-recommends && \
    rm -rf /var/lib/apt/lists/*
 WORKDIR /home/wamr
 COPY . .
 WORKDIR /home/wamr/product-mini/platforms/linux/build
 RUN cmake \
  -DWAMR_BUILD_WASI_NN=1 \
  -DWAMR_BUILD_WASI_NN_ENABLE_EXTERNAL_DELEGATE=1 \
  -DWAMR_BUILD_WASI_NN_EXTERNAL_DELEGATE_PATH="libedgetpu.so.1.0" \
  -DWAMR_BUILD_WASI_NN_ENABLE_GPU=1 \
  ..
 RUN make -j "$(grep -c ^processor /proc/cpuinfo)" && \
    cp /home/wamr/product-mini/platforms/linux/build/iwasm /iwasm
 WORKDIR /assets
 ENTRYPOINT [ "/iwasm" ]
--- a/core/iwasm/libraries/wasi-nn/test/utils.c
+++ b/core/iwasm/libraries/wasi-nn/test/utils.c
@ -132,8 +132,8 @@ run_inference(execution_target target, float *input, uint32_t *input_size,
        *output_size = MAX_OUTPUT_TENSOR_SIZE - *output_size;
        if (wasm_get_output(ctx, i, &out_tensor[offset], output_size)
            != success) {
-            NN_ERR_PRINTF("Error when getting output.");
+            NN_ERR_PRINTF("Error when getting index %d.", i);
-            exit(1);
+            break;
        }
        offset += *output_size;
--- a/core/iwasm/libraries/wasi-nn/test/utils.h
+++ b/core/iwasm/libraries/wasi-nn/test/utils.h
@ -11,7 +11,7 @@
 #include "wasi_nn.h"
 #define MAX_MODEL_SIZE 85000000
-#define MAX_OUTPUT_TENSOR_SIZE 200
+#define MAX_OUTPUT_TENSOR_SIZE 1000000
 #define INPUT_TENSOR_DIMS 4
 #define EPSILON 1e-8
--- a/doc/build_wamr.md
+++ b/doc/build_wamr.md
@ -94,7 +94,12 @@ cmake -DWAMR_BUILD_PLATFORM=linux -DWAMR_BUILD_TARGET=ARM
 - **WAMR_BUILD_WASI_NN**=1/0, default to disable if not set
 #### **Enable lib wasi-nn GPU mode**
- **WASI_NN_ENABLE_GPU**=1/0, default to disable if not set
+- **WAMR_BUILD_WASI_NN_ENABLE_GPU**=1/0, default to disable if not set
 #### **Enable lib wasi-nn external delegate mode**
 - **WAMR_BUILD_WASI_NN_ENABLE_EXTERNAL_DELEGATE**=1/0, default to disable if not set
 - **WAMR_BUILD_WASI_NN_EXTERNAL_DELEGATE_PATH**=Path to the external delegate shared library (e.g. `libedgetpu.so.1.0` for Coral USB)
 #### **Disable boundary check with hardware trap**
 - **WAMR_DISABLE_HW_BOUND_CHECK**=1/0, default to enable if not set and supported by platform