wasi-nn: Enable GPU support (#1922)

- Split logic in several dockers
  - runtime: wasi-nn-cpu and wasi-nn- Nvidia-gpu.
  - compilation: wasi-nn-compile. Prepare the testing wasm and generates the TFLites.
- Implement GPU support for TFLite with Opencl.
This commit is contained in:
tonibofarull 2023-02-02 01:09:46 +01:00 committed by GitHub
parent fe3347d5d2
commit 1614ce12fa
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 178 additions and 56 deletions

View File

@ -17,24 +17,69 @@ By only including this file in your WASM application you will bind WASI-NN into
To run the tests we assume that the current directory is the root of the repository. To run the tests we assume that the current directory is the root of the repository.
1. Build the docker image, ### Build the runtime
Build the runtime base image,
``` ```
docker build -t wasi-nn -f core/iwasm/libraries/wasi-nn/test/Dockerfile . docker build -t wasi-nn-base -f core/iwasm/libraries/wasi-nn/test/Dockerfile.base .
``` ```
2. Run the container Build the runtime image for your execution target type.
`EXECUTION_TYPE` can be:
* `cpu`
* `nvidia-gpu`
``` ```
docker run wasi-nn EXECUTION_TYPE=cpu
docker build -t wasi-nn-${EXECUTION_TYPE} -f core/iwasm/libraries/wasi-nn/test/Dockerfile.${EXECUTION_TYPE} .
``` ```
### Build wasm app
```
docker build -t wasi-nn-compile -f core/iwasm/libraries/wasi-nn/test/Dockerfile.compile .
```
```
docker run -v $PWD/core/iwasm/libraries/wasi-nn:/wasi-nn wasi-nn-compile
```
### Run wasm app
If all the tests have run properly you will the the following message in the terminal, If all the tests have run properly you will the the following message in the terminal,
``` ```
Tests: passed! Tests: passed!
``` ```
* CPU
```
docker run \
-v $PWD/core/iwasm/libraries/wasi-nn/test:/assets wasi-nn-cpu \
--dir=/assets \
--env="TARGET=cpu" \
/assets/test_tensorflow.wasm
```
* (NVIDIA) GPU
```
docker run \
--runtime=nvidia \
-v $PWD/core/iwasm/libraries/wasi-nn/test:/assets wasi-nn-nvidia-gpu \
--dir=/assets \
--env="TARGET=gpu" \
/assets/test_tensorflow.wasm
```
Requirements:
* [NVIDIA docker](https://github.com/NVIDIA/nvidia-docker).
## What is missing ## What is missing
Supported: Supported:
@ -43,5 +88,5 @@ Supported:
* Only 1 model at a time. * Only 1 model at a time.
* `graph` and `graph-execution-context` are ignored. * `graph` and `graph-execution-context` are ignored.
* Graph encoding: `tensorflowlite`. * Graph encoding: `tensorflowlite`.
* Execution target: `cpu`. * Execution target: `cpu` and `gpu`.
* Tensor type: `fp32`. * Tensor type: `fp32`.

View File

@ -16,6 +16,7 @@
#include <tensorflow/lite/model.h> #include <tensorflow/lite/model.h>
#include <tensorflow/lite/optional_debug_tools.h> #include <tensorflow/lite/optional_debug_tools.h>
#include <tensorflow/lite/error_reporter.h> #include <tensorflow/lite/error_reporter.h>
#include <tensorflow/lite/delegates/gpu/delegate.h>
/* Global variables */ /* Global variables */
@ -45,8 +46,8 @@ tensorflowlite_load(graph_builder_array *builder, graph_encoding encoding,
return invalid_argument; return invalid_argument;
} }
if (target != cpu) { if (target != cpu && target != gpu) {
NN_ERR_PRINTF("Only CPU target is supported."); NN_ERR_PRINTF("Only CPU and GPU target is supported.");
return invalid_argument; return invalid_argument;
} }
@ -79,6 +80,29 @@ tensorflowlite_load(graph_builder_array *builder, graph_encoding encoding,
return missing_memory; return missing_memory;
} }
bool use_default = false;
switch (target) {
case gpu:
{
// https://www.tensorflow.org/lite/performance/gpu
auto options = TfLiteGpuDelegateOptionsV2Default();
options.inference_preference =
TFLITE_GPU_INFERENCE_PREFERENCE_SUSTAINED_SPEED;
options.inference_priority1 =
TFLITE_GPU_INFERENCE_PRIORITY_MIN_LATENCY;
auto *delegate = TfLiteGpuDelegateV2Create(&options);
if (interpreter->ModifyGraphWithDelegate(delegate) != kTfLiteOk) {
NN_ERR_PRINTF("Error when enabling GPU delegate.");
use_default = true;
}
break;
}
default:
use_default = true;
}
if (use_default)
NN_WARN_PRINTF("Default encoding is CPU.");
return success; return success;
} }

View File

@ -1 +0,0 @@
Dockerfile

View File

@ -7,10 +7,10 @@ project (iwasm)
set (CMAKE_VERBOSE_MAKEFILE OFF) set (CMAKE_VERBOSE_MAKEFILE OFF)
# Reset default linker flags # Reset default linker flags
set (CMAKE_SHARED_LIBRARY_LINK_C_FLAGS "")
set (CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS "")
set (CMAKE_C_STANDARD 99) set (CMAKE_C_STANDARD 99)
set (CMAKE_CXX_STANDARD 14) set (CMAKE_CXX_STANDARD 14)
set (CMAKE_SHARED_LIBRARY_LINK_C_FLAGS "")
set (CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS "")
if (NOT DEFINED WAMR_BUILD_PLATFORM) if (NOT DEFINED WAMR_BUILD_PLATFORM)
set (WAMR_BUILD_PLATFORM "linux") set (WAMR_BUILD_PLATFORM "linux")

View File

@ -0,0 +1,22 @@
# Copyright (C) 2019 Intel Corporation. All rights reserved.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
FROM ubuntu:20.04 AS base
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && apt-get install -y \
cmake build-essential git
WORKDIR /home/wamr
COPY . .
WORKDIR /home/wamr/core/iwasm/libraries/wasi-nn/test/build
RUN cmake \
-DWAMR_BUILD_WASI_NN=1 \
-DTFLITE_ENABLE_GPU=ON \
..
RUN make -j $(grep -c ^processor /proc/cpuinfo)

View File

@ -1,38 +1,23 @@
# Copyright (C) 2019 Intel Corporation. All rights reserved. # Copyright (C) 2019 Intel Corporation. All rights reserved.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
FROM ubuntu:22.04 FROM ubuntu:20.04
ENV DEBIAN_FRONTEND=noninteractive ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && apt-get install -y \ RUN apt-get update && apt-get install -y \
cmake build-essential git wget python3.10 python3-pip cmake build-essential git wget python3.10 python3-pip
ARG WASI_SDK_VER=16 ARG WASI_SDK_VER=19
RUN wget -c --progress=dot:giga https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-${WASI_SDK_VER}/wasi-sdk-${WASI_SDK_VER}.0-linux.tar.gz -P /opt \ RUN wget -c --progress=dot:giga https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-${WASI_SDK_VER}/wasi-sdk-${WASI_SDK_VER}.0-linux.tar.gz -P /opt \
&& tar xf /opt/wasi-sdk-${WASI_SDK_VER}.0-linux.tar.gz -C /opt \ && tar xf /opt/wasi-sdk-${WASI_SDK_VER}.0-linux.tar.gz -C /opt \
&& ln -fs /opt/wasi-sdk-${WASI_SDK_VER}.0 /opt/wasi-sdk \ && ln -fs /opt/wasi-sdk-${WASI_SDK_VER}.0 /opt/wasi-sdk \
&& rm /opt/wasi-sdk-${WASI_SDK_VER}.0-linux.tar.gz && rm /opt/wasi-sdk-${WASI_SDK_VER}.0-linux.tar.gz
WORKDIR /home/wamr WORKDIR /wasi-nn/test
COPY core/deps/install_tensorflow.sh core/deps/install_tensorflow.sh
RUN ./core/deps/install_tensorflow.sh
COPY core/iwasm/libraries/wasi-nn/test/requirements.txt . COPY core/iwasm/libraries/wasi-nn/test/requirements.txt .
RUN pip3 install -r requirements.txt
COPY core core RUN pip3 install -r requirements.txt && rm requirements.txt
COPY build-scripts build-scripts
COPY product-mini product-mini
WORKDIR /home/wamr/core/iwasm/libraries/wasi-nn/test/build ENTRYPOINT [ "bash", "./build.sh" ]
RUN cmake -DWAMR_BUILD_WASI_NN=1 ..
RUN make -j $(grep -c ^processor /proc/cpuinfo)
WORKDIR /home/wamr/core/iwasm/libraries/wasi-nn/test
RUN ./build.sh
ENTRYPOINT [ "./build/iwasm", "--dir=.", "test_tensorflow.wasm" ]

View File

@ -0,0 +1,8 @@
# Copyright (C) 2019 Intel Corporation. All rights reserved.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
FROM ubuntu:20.04
COPY --from=wasi-nn-base /home/wamr/core/iwasm/libraries/wasi-nn/test/build/iwasm /run/iwasm
ENTRYPOINT [ "/run/iwasm" ]

View File

@ -0,0 +1,20 @@
# Copyright (C) 2019 Intel Corporation. All rights reserved.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
FROM nvidia/cuda:11.3.0-runtime-ubuntu20.04
RUN apt-get update && apt-get install -y --no-install-recommends \
ocl-icd-libopencl1 \
ocl-icd-opencl-dev \
clinfo && \
rm -rf /var/lib/apt/lists/*
RUN mkdir -p /etc/OpenCL/vendors && \
echo "libnvidia-opencl.so.1" > /etc/OpenCL/vendors/nvidia.icd
ENV NVIDIA_VISIBLE_DEVICES=all
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
COPY --from=wasi-nn-base /home/wamr/core/iwasm/libraries/wasi-nn/test/build/iwasm /run/iwasm
ENTRYPOINT [ "/run/iwasm" ]

View File

@ -7,7 +7,7 @@
-Wl,--allow-undefined \ -Wl,--allow-undefined \
-Wl,--strip-all,--no-entry \ -Wl,--strip-all,--no-entry \
--sysroot=/opt/wasi-sdk/share/wasi-sysroot \ --sysroot=/opt/wasi-sdk/share/wasi-sysroot \
-I/home/wamr/core/iwasm/libraries/wasi-nn \ -I.. \
-o test_tensorflow.wasm test_tensorflow.c -o test_tensorflow.wasm test_tensorflow.c
# TFLite models to use in the tests # TFLite models to use in the tests

69
core/iwasm/libraries/wasi-nn/test/test_tensorflow.c Executable file → Normal file
View File

@ -28,7 +28,7 @@ typedef struct {
// WASI-NN wrappers // WASI-NN wrappers
error error
wasm_load(char *model_name, graph *g) wasm_load(char *model_name, graph *g, execution_target target)
{ {
FILE *pFile = fopen(model_name, "r"); FILE *pFile = fopen(model_name, "r");
if (pFile == NULL) if (pFile == NULL)
@ -64,7 +64,7 @@ wasm_load(char *model_name, graph *g)
arr.buf[0].size = result; arr.buf[0].size = result;
arr.buf[0].buf = buffer; arr.buf[0].buf = buffer;
error res = load(&arr, tensorflowlite, cpu, g); error res = load(&arr, tensorflowlite, target, g);
fclose(pFile); fclose(pFile);
free(buffer); free(buffer);
@ -115,11 +115,12 @@ wasm_get_output(graph_execution_context ctx, uint32_t index, float *out_tensor,
// Inference // Inference
float * float *
run_inference(float *input, uint32_t *input_size, uint32_t *output_size, run_inference(execution_target target, float *input, uint32_t *input_size,
char *model_name, uint32_t num_output_tensors) uint32_t *output_size, char *model_name,
uint32_t num_output_tensors)
{ {
graph graph; graph graph;
if (wasm_load(model_name, &graph) != success) { if (wasm_load(model_name, &graph, target) != success) {
fprintf(stderr, "Error when loading model."); fprintf(stderr, "Error when loading model.");
exit(1); exit(1);
} }
@ -185,14 +186,14 @@ create_input(int *dims)
// TESTS // TESTS
void void
test_sum() test_sum(execution_target target)
{ {
int dims[] = { 1, 5, 5, 1 }; int dims[] = { 1, 5, 5, 1 };
input_info input = create_input(dims); input_info input = create_input(dims);
uint32_t output_size = 0; uint32_t output_size = 0;
float *output = run_inference(input.input_tensor, input.dim, &output_size, float *output = run_inference(target, input.input_tensor, input.dim,
"models/sum.tflite", 1); &output_size, "/assets/models/sum.tflite", 1);
assert(output_size == 1); assert(output_size == 1);
assert(fabs(output[0] - 300.0) < EPSILON); assert(fabs(output[0] - 300.0) < EPSILON);
@ -203,14 +204,14 @@ test_sum()
} }
void void
test_max() test_max(execution_target target)
{ {
int dims[] = { 1, 5, 5, 1 }; int dims[] = { 1, 5, 5, 1 };
input_info input = create_input(dims); input_info input = create_input(dims);
uint32_t output_size = 0; uint32_t output_size = 0;
float *output = run_inference(input.input_tensor, input.dim, &output_size, float *output = run_inference(target, input.input_tensor, input.dim,
"models/max.tflite", 1); &output_size, "/assets/models/max.tflite", 1);
assert(output_size == 1); assert(output_size == 1);
assert(fabs(output[0] - 24.0) < EPSILON); assert(fabs(output[0] - 24.0) < EPSILON);
@ -222,14 +223,15 @@ test_max()
} }
void void
test_average() test_average(execution_target target)
{ {
int dims[] = { 1, 5, 5, 1 }; int dims[] = { 1, 5, 5, 1 };
input_info input = create_input(dims); input_info input = create_input(dims);
uint32_t output_size = 0; uint32_t output_size = 0;
float *output = run_inference(input.input_tensor, input.dim, &output_size, float *output =
"models/average.tflite", 1); run_inference(target, input.input_tensor, input.dim, &output_size,
"/assets/models/average.tflite", 1);
assert(output_size == 1); assert(output_size == 1);
assert(fabs(output[0] - 12.0) < EPSILON); assert(fabs(output[0] - 12.0) < EPSILON);
@ -241,14 +243,15 @@ test_average()
} }
void void
test_mult_dimensions() test_mult_dimensions(execution_target target)
{ {
int dims[] = { 1, 3, 3, 1 }; int dims[] = { 1, 3, 3, 1 };
input_info input = create_input(dims); input_info input = create_input(dims);
uint32_t output_size = 0; uint32_t output_size = 0;
float *output = run_inference(input.input_tensor, input.dim, &output_size, float *output =
"models/mult_dim.tflite", 1); run_inference(target, input.input_tensor, input.dim, &output_size,
"/assets/models/mult_dim.tflite", 1);
assert(output_size == 9); assert(output_size == 9);
for (int i = 0; i < 9; i++) for (int i = 0; i < 9; i++)
@ -260,14 +263,15 @@ test_mult_dimensions()
} }
void void
test_mult_outputs() test_mult_outputs(execution_target target)
{ {
int dims[] = { 1, 4, 4, 1 }; int dims[] = { 1, 4, 4, 1 };
input_info input = create_input(dims); input_info input = create_input(dims);
uint32_t output_size = 0; uint32_t output_size = 0;
float *output = run_inference(input.input_tensor, input.dim, &output_size, float *output =
"models/mult_out.tflite", 2); run_inference(target, input.input_tensor, input.dim, &output_size,
"/assets/models/mult_out.tflite", 2);
assert(output_size == 8); assert(output_size == 8);
// first tensor check // first tensor check
@ -285,16 +289,31 @@ test_mult_outputs()
int int
main() main()
{ {
char *env = getenv("TARGET");
if (env == NULL) {
printf("Usage:\n--env=\"TARGET=[cpu|gpu]\"\n");
return 1;
}
execution_target target;
if (strcmp(env, "cpu") == 0)
target = cpu;
else if (strcmp(env, "gpu") == 0)
target = gpu;
else {
printf("Wrong target!");
return 1;
}
printf("################### Testing sum...\n"); printf("################### Testing sum...\n");
test_sum(); test_sum(target);
printf("################### Testing max...\n"); printf("################### Testing max...\n");
test_max(); test_max(target);
printf("################### Testing average...\n"); printf("################### Testing average...\n");
test_average(); test_average(target);
printf("################### Testing multiple dimensions...\n"); printf("################### Testing multiple dimensions...\n");
test_mult_dimensions(); test_mult_dimensions(target);
printf("################### Testing multiple outputs...\n"); printf("################### Testing multiple outputs...\n");
test_mult_outputs(); test_mult_outputs(target);
printf("Tests: passed!\n"); printf("Tests: passed!\n");
return 0; return 0;
} }