wasm-micro-runtime/core/iwasm/libraries/wasi-nn/wasi_nn.h

/*
 * Copyright (C) 2019 Intel Corporation.  All rights reserved.
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 */

#ifndef WASI_NN_WASM_H
#define WASI_NN_WASM_H

#include "wasi_nn_common.h"

/**
 * Following definition from:
 * [Aug 10th, 2022]
 * https://github.com/WebAssembly/wasi-nn/blob/e5e1a6c31f424c7cd63026cd270e9746775675a0/wasi-nn.wit.md
 */

/* The graph initialization data. */

// This consists of an array of buffers because implementing backends may encode
// their graph IR in parts (e.g., OpenVINO stores its IR and weights
// separately).
typedef struct {
    uint8_t *buf;
    uint32_t size;
} graph_builder;

typedef struct {
    graph_builder *buf;
    uint32_t size;
} graph_builder_array;

/* The dimensions of a tensor. */

// The array length matches the tensor rank and each element in the array
// describes the size of each dimension.
typedef struct {
    uint32_t *buf;
    uint32_t size;
} tensor_dimensions;

/* The tensor data. */

// Initially conceived as a sparse representation, each empty cell would be
// filled with zeros and the array length must match the product of all of the
// dimensions and the number of bytes in the type (e.g., a 2x2 tensor with
// 4-byte f32 elements would have a data array of length 16). Naturally, this
// representation requires some knowledge of how to lay out data in
// memory--e.g., using row-major ordering--and could perhaps be improved.
typedef uint8_t *tensor_data;

/* A tensor. */

typedef struct {
    // Describe the size of the tensor (e.g., 2x2x2x2 -> [2, 2, 2, 2]). To
    // represent a tensor containing a single value, use `[1]` for the tensor
    // dimensions.
    tensor_dimensions *dimensions;
    // Describe the type of element in the tensor (e.g., f32).
    tensor_type type;
    // Contains the tensor data.
    tensor_data data;
} tensor;

/**
 * @brief Load an opaque sequence of bytes to use for inference.
 *
 * @param builder   Model builder.
 * @param encoding  Model encoding.
 * @param target    Execution target.
 * @param graph     Graph.
 * @return error    Execution status.
 */
error
load(graph_builder_array *builder, graph_encoding encoding,
     execution_target target, graph *graph)
    __attribute__((export_module("wasi_nn")))
    __attribute__((import_module("wasi_nn")));

/**
 * @brief Create an execution instance of a loaded graph.
 *
 * @param graph     Graph.
 * @param ctx       Execution context.
 * @return error    Execution status.
 */
error
init_execution_context(graph graph, graph_execution_context *ctx)
    __attribute__((export_module("wasi_nn")))
    __attribute__((import_module("wasi_nn")));

/**
 * @brief Define the inputs to use for inference.
 *
 * @param ctx       Execution context.
 * @param index     Input tensor index.
 * @param tensor    Input tensor.
 * @return error    Execution status.
 */
error
set_input(graph_execution_context ctx, uint32_t index, tensor *tensor)
    __attribute__((export_module("wasi_nn")))
    __attribute__((import_module("wasi_nn")));

/**
 * @brief Compute the inference on the given inputs.
 *
 * @param ctx       Execution context.
 * @return error    Execution status.
 */
error
compute(graph_execution_context ctx) __attribute__((export_module("wasi_nn")))
__attribute__((import_module("wasi_nn")));

/**
 * @brief Extract the outputs after inference.
 *
 * @param ctx                   Execution context.
 * @param index                 Output tensor index.
 * @param output_tensor         Buffer where output tensor with index `index` is
 * copied.
 * @param output_tensor_size    Pointer to `output_tensor` maximum size.
 *                              After the function call it is updated with the
 * copied number of bytes.
 * @return error                Execution status.
 */
error
get_output(graph_execution_context ctx, uint32_t index,
           tensor_data output_tensor, uint32_t *output_tensor_size)
    __attribute__((export_module("wasi_nn")))
    __attribute__((import_module("wasi_nn")));

#endif