wasi-nn: Improve TPU support (#2447)

1. Allow TPU and GPU support at the same time. 2. Add Dockerfile to run example with [Coral USB](https://coral.ai/products/accelerator/).
2025-11-28 02:20:50 +00:00 · 2023-08-14 14:03:56 +02:00 · 2023-08-14 14:03:56 +02:00 · b45d014112
commit b45d014112
parent 923e8d6471
10 changed files with 130 additions and 37 deletions
--- a/build-scripts/config_common.cmake
+++ b/build-scripts/config_common.cmake
@ -359,16 +359,16 @@ endif ()
 if (WAMR_BUILD_WASI_NN EQUAL 1)
  message ("     WASI-NN enabled")
  add_definitions (-DWASM_ENABLE_WASI_NN=1)
-  if (WASI_NN_ENABLE_GPU EQUAL 1)
+  if (WAMR_BUILD_WASI_NN_ENABLE_GPU EQUAL 1)
      message ("     WASI-NN: GPU enabled")
-      add_definitions (-DWASI_NN_ENABLE_GPU=1)
+      add_definitions (-DWASM_ENABLE_WASI_NN_GPU=1)
  endif ()
-  if (WAMR_BUILD_WASI_NN_ENABLE_EXT EQUAL 1)
+  if (WAMR_BUILD_WASI_NN_ENABLE_EXTERNAL_DELEGATE EQUAL 1)
      message ("     WASI-NN: External Delegation enabled")
-      add_definitions (-DWASI_NN_ENABLE_EXTERNAL_DELEGATE=1)
+      add_definitions (-DWASM_ENABLE_WASI_NN_EXTERNAL_DELEGATE=1)
  endif ()
-  if (DEFINED WASI_NN_EXT_DELEGATE_PATH)
-      add_definitions (-DWASI_NN_EXT_DELEGATE_PATH="${WASI_NN_EXT_DELEGATE_PATH}")
+  if (DEFINED WAMR_BUILD_WASI_NN_EXTERNAL_DELEGATE_PATH)
+      add_definitions (-DWASM_WASI_NN_EXTERNAL_DELEGATE_PATH="${WAMR_BUILD_WASI_NN_EXTERNAL_DELEGATE_PATH}")
  endif ()
 endif ()
 if (WAMR_BUILD_ALLOC_WITH_USER_DATA EQUAL 1)
--- a/core/config.h
+++ b/core/config.h
@ -144,6 +144,14 @@
 #define WASM_ENABLE_WASI_NN 0
 #endif

+#ifndef WASM_ENABLE_WASI_NN_GPU
+#define WASM_ENABLE_WASI_NN_GPU 0
+#endif
+
+#ifndef WASM_ENABLE_WASI_NN_EXTERNAL_DELEGATE
+#define WASM_ENABLE_WASI_NN_EXTERNAL_DELEGATE 0
+#endif
+
 /* Default disable libc emcc */
 #ifndef WASM_ENABLE_LIBC_EMCC
 #define WASM_ENABLE_LIBC_EMCC 0
--- a/core/iwasm/libraries/wasi-nn/README.md
+++ b/core/iwasm/libraries/wasi-nn/README.md
@ -25,6 +25,7 @@ Build the runtime image for your execution target type.
 * `cpu`
 * `nvidia-gpu`
 * `vx-delegate`
+* `tpu`

 ```
 EXECUTION_TYPE=cpu
@ -64,6 +65,8 @@ docker run \
 ```

 * (NVIDIA) GPU
+    * Requirements:
+        * [NVIDIA docker](https://github.com/NVIDIA/nvidia-docker).

 ```
 docker run \
@ -76,25 +79,36 @@ docker run \
    /assets/test_tensorflow.wasm
 ```

-* vx-delegate for NPU (x86 simulater)
+* vx-delegate for NPU (x86 simulator)

 ```
 docker run \
-    -v $PWD/core/iwasm/libraries/wasi-nn/test:/assets wasi-nn-vx-delegate \
-    --dir=/assets \
+    -v $PWD/core/iwasm/libraries/wasi-nn/test:/assets \
+    wasi-nn-vx-delegate \
+    --dir=/ \
    --env="TARGET=gpu" \
-    /assets/test_tensorflow.wasm
+    /assets/test_tensorflow_quantized.wasm
 ```

+* (Coral) TPU
+    * Requirements:
+        * [Coral USB](https://coral.ai/products/accelerator/).

-
-Requirements:
-* [NVIDIA docker](https://github.com/NVIDIA/nvidia-docker).
+```
+docker run \
+    --privileged \
+    --device=/dev/bus/usb:/dev/bus/usb \
+    -v $PWD/core/iwasm/libraries/wasi-nn/test:/assets \
+    wasi-nn-tpu \
+    --dir=/ \
+    --env="TARGET=tpu" \
+    /assets/test_tensorflow_quantized.wasm
+```

 ## What is missing

 Supported:

 * Graph encoding: `tensorflowlite`.
-* Execution target: `cpu` and `gpu`.
+* Execution target: `cpu`, `gpu` and `tpu`.
 * Tensor type: `fp32`.
--- a/core/iwasm/libraries/wasi-nn/cmake/Findtensorflow_lite.cmake
+++ b/core/iwasm/libraries/wasi-nn/cmake/Findtensorflow_lite.cmake
@ -18,12 +18,16 @@ if(NOT EXISTS ${TENSORFLOW_LITE})

  set(TENSORFLOW_SOURCE_DIR "${WAMR_ROOT_DIR}/core/deps/tensorflow-src")

-  if(WASI_NN_ENABLE_GPU EQUAL 1)
+  if(WAMR_BUILD_WASI_NN_ENABLE_GPU EQUAL 1)
    # Tensorflow specific:
    # * https://www.tensorflow.org/lite/guide/build_cmake#available_options_to_build_tensorflow_lite
    set (TFLITE_ENABLE_GPU ON)
  endif()

+  if (CMAKE_SIZEOF_VOID_P EQUAL 4)
+    set (TFLITE_ENABLE_XNNPACK OFF)
+  endif()
+
  add_subdirectory(
    "${TENSORFLOW_SOURCE_DIR}/tensorflow/lite"
    "${CMAKE_CURRENT_BINARY_DIR}/tensorflow-lite"
--- a/core/iwasm/libraries/wasi-nn/src/wasi_nn_tensorflowlite.cpp
+++ b/core/iwasm/libraries/wasi-nn/src/wasi_nn_tensorflowlite.cpp
@ -16,11 +16,11 @@
 #include <tensorflow/lite/optional_debug_tools.h>
 #include <tensorflow/lite/error_reporter.h>

-#if defined(WASI_NN_ENABLE_GPU)
+#if WASM_ENABLE_WASI_NN_GPU != 0
 #include <tensorflow/lite/delegates/gpu/delegate.h>
 #endif

-#if defined(WASI_NN_ENABLE_EXTERNAL_DELEGATE)
+#if WASM_ENABLE_WASI_NN_EXTERNAL_DELEGATE != 0
 #include <tensorflow/lite/delegates/external/external_delegate.h>
 #endif

@ -130,8 +130,8 @@ tensorflowlite_load(void *tflite_ctx, graph_builder_array *builder,
        return invalid_argument;
    }

-    if (target != cpu && target != gpu) {
-        NN_ERR_PRINTF("Only CPU and GPU target is supported.");
+    if (target != cpu && target != gpu && target != tpu) {
+        NN_ERR_PRINTF("Only CPU, GPU and TPU target is supported.");
        return invalid_argument;
    }

@ -195,7 +195,7 @@ tensorflowlite_init_execution_context(void *tflite_ctx, graph g,
    switch (tfl_ctx->models[g].target) {
        case gpu:
        {
-#if defined(WASI_NN_ENABLE_GPU)
+#if WASM_ENABLE_WASI_NN_GPU != 0
            NN_WARN_PRINTF("GPU enabled.");
            // https://www.tensorflow.org/lite/performance/gpu
            TfLiteGpuDelegateOptionsV2 options =
@ -216,10 +216,19 @@ tensorflowlite_init_execution_context(void *tflite_ctx, graph g,
                NN_ERR_PRINTF("Error when enabling GPU delegate.");
                use_default = true;
            }
-#elif defined(WASI_NN_ENABLE_EXTERNAL_DELEGATE)
+#else
+            NN_WARN_PRINTF("GPU not enabled.");
+            use_default = true;
+#endif
+            break;
+        }
+        case tpu:
+        {
+#if WASM_ENABLE_WASI_NN_EXTERNAL_DELEGATE != 0
            NN_WARN_PRINTF("external delegation enabled.");
            TfLiteExternalDelegateOptions options =
-                TfLiteExternalDelegateOptionsDefault(WASI_NN_EXT_DELEGATE_PATH);
+                TfLiteExternalDelegateOptionsDefault(
+                    WASM_WASI_NN_EXTERNAL_DELEGATE_PATH);
            tfl_ctx->delegate = TfLiteExternalDelegateCreate(&options);
            if (tfl_ctx->delegate == NULL) {
                NN_ERR_PRINTF("Error when generating External delegate.");
@ -233,7 +242,7 @@ tensorflowlite_init_execution_context(void *tflite_ctx, graph g,
                use_default = true;
            }
 #else
-            NN_WARN_PRINTF("GPU not enabled.");
+            NN_WARN_PRINTF("External delegate not enabled.");
            use_default = true;
 #endif
            break;
@ -438,19 +447,35 @@ tensorflowlite_destroy(void *tflite_ctx)
    */
    TFLiteContext *tfl_ctx = (TFLiteContext *)tflite_ctx;

-    if (tfl_ctx->delegate != NULL) {
-#if defined(WASI_NN_ENABLE_GPU)
-        TfLiteGpuDelegateV2Delete(tfl_ctx->delegate);
-#elif defined(WASI_NN_ENABLE_EXTERNAL_DELEGATE)
-        TfLiteExternalDelegateDelete(tfl_ctx->delegate);
-#endif
-    }
-
    NN_DBG_PRINTF("Freeing memory.");
    for (int i = 0; i < MAX_GRAPHS_PER_INST; ++i) {
        tfl_ctx->models[i].model.reset();
-        if (tfl_ctx->models[i].model_pointer)
+        if (tfl_ctx->models[i].model_pointer) {
+            if (tfl_ctx->delegate) {
+                switch (tfl_ctx->models[i].target) {
+                    case gpu:
+                    {
+#if WASM_ENABLE_WASI_NN_GPU != 0
+                        TfLiteGpuDelegateV2Delete(tfl_ctx->delegate);
+#else
+                        NN_ERR_PRINTF("GPU delegate delete but not enabled.");
+#endif
+                        break;
+                    }
+                    case tpu:
+                    {
+#if WASM_ENABLE_WASI_NN_EXTERNAL_DELEGATE != 0
+                        TfLiteExternalDelegateDelete(tfl_ctx->delegate);
+#else
+                        NN_ERR_PRINTF(
+                            "External delegate delete but not enabled.");
+#endif
+                        break;
+                    }
+                }
+            }
            wasm_runtime_free(tfl_ctx->models[i].model_pointer);
+        }
        tfl_ctx->models[i].model_pointer = NULL;
    }
    for (int i = 0; i < MAX_GRAPH_EXEC_CONTEXTS_PER_INST; ++i) {
--- a/core/iwasm/libraries/wasi-nn/test/Dockerfile.nvidia-gpu
+++ b/core/iwasm/libraries/wasi-nn/test/Dockerfile.nvidia-gpu
@ -24,7 +24,7 @@ RUN apt-get install -y wget ca-certificates --no-install-recommends \

 RUN cmake \
    -DWAMR_BUILD_WASI_NN=1 \
-    -DWASI_NN_ENABLE_GPU=1 \
+    -DWAMR_BUILD_WASI_NN_ENABLE_GPU=1 \
    ..

 RUN make -j "$(grep -c ^processor /proc/cpuinfo)"
--- a/core/iwasm/libraries/wasi-nn/test/Dockerfile.tpu
+++ b/core/iwasm/libraries/wasi-nn/test/Dockerfile.tpu
@ -0,0 +1,37 @@
+# Copyright (C) 2019 Intel Corporation.  All rights reserved.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+FROM ubuntu:20.04 AS base
+
+ENV DEBIAN_FRONTEND=noninteractive
+
+# hadolint ignore=DL3008
+RUN apt-get update && apt-get install -y \
+    cmake build-essential git curl gnupg --no-install-recommends && \
+    rm -rf /var/lib/apt/lists/*
+
+# hadolint ignore=DL3008,DL4006
+RUN echo "deb https://packages.cloud.google.com/apt coral-edgetpu-stable main" | tee /etc/apt/sources.list.d/coral-edgetpu.list && \
+    curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add - && \
+    apt-get update && apt-get install -y libedgetpu1-std --no-install-recommends && \
+    rm -rf /var/lib/apt/lists/*
+
+WORKDIR /home/wamr
+
+COPY . .
+
+WORKDIR /home/wamr/product-mini/platforms/linux/build
+
+RUN cmake \
+  -DWAMR_BUILD_WASI_NN=1 \
+  -DWAMR_BUILD_WASI_NN_ENABLE_EXTERNAL_DELEGATE=1 \
+  -DWAMR_BUILD_WASI_NN_EXTERNAL_DELEGATE_PATH="libedgetpu.so.1.0" \
+  -DWAMR_BUILD_WASI_NN_ENABLE_GPU=1 \
+  ..
+
+RUN make -j "$(grep -c ^processor /proc/cpuinfo)" && \
+    cp /home/wamr/product-mini/platforms/linux/build/iwasm /iwasm
+
+WORKDIR /assets
+
+ENTRYPOINT [ "/iwasm" ]
--- a/core/iwasm/libraries/wasi-nn/test/utils.c
+++ b/core/iwasm/libraries/wasi-nn/test/utils.c
@ -132,8 +132,8 @@ run_inference(execution_target target, float *input, uint32_t *input_size,
        *output_size = MAX_OUTPUT_TENSOR_SIZE - *output_size;
        if (wasm_get_output(ctx, i, &out_tensor[offset], output_size)
            != success) {
-            NN_ERR_PRINTF("Error when getting output.");
-            exit(1);
+            NN_ERR_PRINTF("Error when getting index %d.", i);
+            break;
        }

        offset += *output_size;
--- a/core/iwasm/libraries/wasi-nn/test/utils.h
+++ b/core/iwasm/libraries/wasi-nn/test/utils.h
@ -11,7 +11,7 @@
 #include "wasi_nn.h"

 #define MAX_MODEL_SIZE 85000000
-#define MAX_OUTPUT_TENSOR_SIZE 200
+#define MAX_OUTPUT_TENSOR_SIZE 1000000
 #define INPUT_TENSOR_DIMS 4
 #define EPSILON 1e-8

--- a/doc/build_wamr.md
+++ b/doc/build_wamr.md
@ -94,7 +94,12 @@ cmake -DWAMR_BUILD_PLATFORM=linux -DWAMR_BUILD_TARGET=ARM
 - **WAMR_BUILD_WASI_NN**=1/0, default to disable if not set

 #### **Enable lib wasi-nn GPU mode**
- **WASI_NN_ENABLE_GPU**=1/0, default to disable if not set
+- **WAMR_BUILD_WASI_NN_ENABLE_GPU**=1/0, default to disable if not set
+
+#### **Enable lib wasi-nn external delegate mode**
+- **WAMR_BUILD_WASI_NN_ENABLE_EXTERNAL_DELEGATE**=1/0, default to disable if not set
+
+- **WAMR_BUILD_WASI_NN_EXTERNAL_DELEGATE_PATH**=Path to the external delegate shared library (e.g. `libedgetpu.so.1.0` for Coral USB)

 #### **Disable boundary check with hardware trap**
 - **WAMR_DISABLE_HW_BOUND_CHECK**=1/0, default to enable if not set and supported by platform