diff --git a/core/iwasm/libraries/wasi-nn/include/wasi_nn_types.h b/core/iwasm/libraries/wasi-nn/include/wasi_nn_types.h index d2e3f4cac..952fb65e2 100644 --- a/core/iwasm/libraries/wasi-nn/include/wasi_nn_types.h +++ b/core/iwasm/libraries/wasi-nn/include/wasi_nn_types.h @@ -99,7 +99,7 @@ typedef enum { // 4-byte f32 elements would have a data array of length 16). Naturally, this // representation requires some knowledge of how to lay out data in // memory--e.g., using row-major ordering--and could perhaps be improved. -#if WASM_ENABLE_WASI_EPHEMERAL_NN != 0 && defined(__wasm__) +#if !defined(__wasm__) || WASM_ENABLE_WASI_EPHEMERAL_NN != 0 typedef struct { uint8_t *buf; uint32_t size; diff --git a/core/iwasm/libraries/wasi-nn/src/utils/wasi_nn_app_native.c b/core/iwasm/libraries/wasi-nn/src/utils/wasi_nn_app_native.c index 6e91c949b..4d56fed93 100644 --- a/core/iwasm/libraries/wasi-nn/src/utils/wasi_nn_app_native.c +++ b/core/iwasm/libraries/wasi-nn/src/utils/wasi_nn_app_native.c @@ -99,7 +99,8 @@ graph_builder_array_app_native(wasm_module_inst_t instance, static wasi_nn_error tensor_data_app_native(wasm_module_inst_t instance, uint32_t total_elements, - tensor_wasm *input_tensor_wasm, tensor_data *data) + tensor_wasm *input_tensor_wasm, void **data, + uint32_t *size) { #if WASM_ENABLE_WASI_EPHEMERAL_NN != 0 #define data_size input_tensor_wasm->data_size @@ -113,8 +114,9 @@ tensor_data_app_native(wasm_module_inst_t instance, uint32_t total_elements, NN_ERR_PRINTF("input_tensor_wasm->data_offset is invalid"); return invalid_argument; } - *data = (tensor_data)wasm_runtime_addr_app_to_native( + *data = wasm_runtime_addr_app_to_native( instance, (uint64)input_tensor_wasm->data_offset); + *size = data_size; return success; #undef data_size } @@ -188,16 +190,19 @@ tensor_app_native(wasm_module_inst_t instance, tensor_wasm *input_tensor_wasm, NN_DBG_PRINTF("Tensor type: %d", input_tensor_wasm->type); NN_DBG_PRINTF("Total number of elements: %d", total_elements); - tensor_data data = NULL; + void *data = NULL; + uint32_t datasize; if (success - != (res = tensor_data_app_native(instance, total_elements, - input_tensor_wasm, &data))) { + != (res = + tensor_data_app_native(instance, total_elements, + input_tensor_wasm, &data, &datasize))) { wasm_runtime_free(dimensions); return res; } input_tensor->type = input_tensor_wasm->type; input_tensor->dimensions = dimensions; - input_tensor->data = data; + input_tensor->data.buf = data; + input_tensor->data.size = datasize; return success; } diff --git a/core/iwasm/libraries/wasi-nn/src/wasi_nn.c b/core/iwasm/libraries/wasi-nn/src/wasi_nn.c index 25d70e6a9..5c865c5be 100644 --- a/core/iwasm/libraries/wasi-nn/src/wasi_nn.c +++ b/core/iwasm/libraries/wasi-nn/src/wasi_nn.c @@ -720,12 +720,12 @@ fail: #if WASM_ENABLE_WASI_EPHEMERAL_NN != 0 wasi_nn_error wasi_nn_get_output(wasm_exec_env_t exec_env, graph_execution_context ctx, - uint32_t index, tensor_data output_tensor, + uint32_t index, void *output_tensor, uint32_t output_tensor_len, uint32_t *output_tensor_size) #else /* WASM_ENABLE_WASI_EPHEMERAL_NN == 0 */ wasi_nn_error wasi_nn_get_output(wasm_exec_env_t exec_env, graph_execution_context ctx, - uint32_t index, tensor_data output_tensor, + uint32_t index, void *output_tensor, uint32_t *output_tensor_size) #endif /* WASM_ENABLE_WASI_EPHEMERAL_NN != 0 */ { @@ -753,16 +753,17 @@ wasi_nn_get_output(wasm_exec_env_t exec_env, graph_execution_context ctx, goto fail; } + tensor_data tensor = { + .buf = output_tensor, #if WASM_ENABLE_WASI_EPHEMERAL_NN != 0 + .size = output_tensor_len, +#else + .size = *output_tensor_size, +#endif + }; call_wasi_nn_func(wasi_nn_ctx->backend, get_output, res, - wasi_nn_ctx->backend_ctx, ctx, index, output_tensor, - &output_tensor_len); - *output_tensor_size = output_tensor_len; -#else /* WASM_ENABLE_WASI_EPHEMERAL_NN == 0 */ - call_wasi_nn_func(wasi_nn_ctx->backend, get_output, res, - wasi_nn_ctx->backend_ctx, ctx, index, output_tensor, + wasi_nn_ctx->backend_ctx, ctx, index, &tensor, output_tensor_size); -#endif /* WASM_ENABLE_WASI_EPHEMERAL_NN != 0 */ fail: unlock_ctx(wasi_nn_ctx); return res; diff --git a/core/iwasm/libraries/wasi-nn/src/wasi_nn_llamacpp.c b/core/iwasm/libraries/wasi-nn/src/wasi_nn_llamacpp.c index ff31e3adb..65ec83f8d 100644 --- a/core/iwasm/libraries/wasi-nn/src/wasi_nn_llamacpp.c +++ b/core/iwasm/libraries/wasi-nn/src/wasi_nn_llamacpp.c @@ -385,7 +385,7 @@ set_input(void *ctx, graph_execution_context exec_ctx, uint32_t index, { struct LlamaContext *backend_ctx = (struct LlamaContext *)ctx; // tensor->data is the prompt string. ends with \0 - char *prompt_text = (char *)wasi_nn_tensor->data; + char *prompt_text = (char *)wasi_nn_tensor->data.buf; #ifndef NDEBUG NN_DBG_PRINTF("--------------------------------------------------"); @@ -552,7 +552,7 @@ fail: __attribute__((visibility("default"))) wasi_nn_error get_output(void *ctx, graph_execution_context exec_ctx, uint32_t index, - tensor_data output_tensor, uint32_t *output_tensor_size) + tensor_data *output_tensor, uint32_t *output_tensor_size) { struct LlamaContext *backend_ctx = (struct LlamaContext *)ctx; @@ -571,7 +571,7 @@ get_output(void *ctx, graph_execution_context exec_ctx, uint32_t index, printf("%s\n", output_metadata); } - memcpy(output_tensor, output_metadata, strlen(output_metadata)); + memcpy(output_tensor->buf, output_metadata, strlen(output_metadata)); *output_tensor_size = strlen(output_metadata); return success; } @@ -591,7 +591,7 @@ get_output(void *ctx, graph_execution_context exec_ctx, uint32_t index, printf("%s", buf); } - memcpy(output_tensor + end_pos, buf, strlen(buf)); + memcpy(output_tensor->buf + end_pos, buf, strlen(buf)); end_pos += strlen(buf); } diff --git a/core/iwasm/libraries/wasi-nn/src/wasi_nn_openvino.c b/core/iwasm/libraries/wasi-nn/src/wasi_nn_openvino.c index 3c3b0ae6c..8c62ad689 100644 --- a/core/iwasm/libraries/wasi-nn/src/wasi_nn_openvino.c +++ b/core/iwasm/libraries/wasi-nn/src/wasi_nn_openvino.c @@ -402,7 +402,7 @@ set_input(void *ctx, graph_execution_context exec_ctx, uint32_t index, shape_info); CHECK_OV_STATUS(ov_tensor_create_from_host_ptr(input_type, input_shape, - wasi_nn_tensor->data, + wasi_nn_tensor->data.buf, &input_tensor), ret); } @@ -441,7 +441,7 @@ fail: __attribute__((visibility("default"))) wasi_nn_error get_output(void *ctx, graph_execution_context exec_ctx, uint32_t index, - tensor_data output_tensor, uint32_t *output_tensor_size) + tensor_data *output_tensor, uint32_t *output_tensor_size) { OpenVINOContext *ov_ctx = (OpenVINOContext *)ctx; struct OpenVINOExecutionContext *exec; @@ -460,14 +460,14 @@ get_output(void *ctx, graph_execution_context exec_ctx, uint32_t index, CHECK_OV_STATUS(ov_tensor_get_byte_size(ov_tensor, &byte_size), ret); - if (byte_size > *output_tensor_size) { + if (byte_size > output_tensor->size) { ret = too_large; goto fail; } CHECK_OV_STATUS(ov_tensor_data(ov_tensor, &data), ret); - memcpy(output_tensor, data, byte_size); + memcpy(output_tensor->buf, data, byte_size); *output_tensor_size = (uint32_t)byte_size; diff --git a/core/iwasm/libraries/wasi-nn/src/wasi_nn_openvino.h b/core/iwasm/libraries/wasi-nn/src/wasi_nn_openvino.h index ea03a226f..0233568c0 100644 --- a/core/iwasm/libraries/wasi-nn/src/wasi_nn_openvino.h +++ b/core/iwasm/libraries/wasi-nn/src/wasi_nn_openvino.h @@ -24,7 +24,7 @@ compute(void *ctx, graph_execution_context exec_ctx); __attribute__((visibility("default"))) wasi_nn_error get_output(void *ctx, graph_execution_context exec_ctx, uint32_t index, - tensor_data output_tensor, uint32_t *output_tensor_size); + tensor_data *output_tensor, uint32_t *output_tensor_size); __attribute__((visibility("default"))) wasi_nn_error init_backend(void **ctx); @@ -32,4 +32,4 @@ init_backend(void **ctx); __attribute__((visibility("default"))) wasi_nn_error deinit_backend(void *ctx); -#endif /* WASI_NN_OPENVINO_HPP */ \ No newline at end of file +#endif /* WASI_NN_OPENVINO_HPP */ diff --git a/core/iwasm/libraries/wasi-nn/src/wasi_nn_private.h b/core/iwasm/libraries/wasi-nn/src/wasi_nn_private.h index a20ad1718..466f2cef4 100644 --- a/core/iwasm/libraries/wasi-nn/src/wasi_nn_private.h +++ b/core/iwasm/libraries/wasi-nn/src/wasi_nn_private.h @@ -32,7 +32,7 @@ typedef wasi_nn_error (*SET_INPUT)(void *, graph_execution_context, uint32_t, tensor *); typedef wasi_nn_error (*COMPUTE)(void *, graph_execution_context); typedef wasi_nn_error (*GET_OUTPUT)(void *, graph_execution_context, uint32_t, - tensor_data, uint32_t *); + tensor_data *, uint32_t *); /* wasi-nn general APIs */ typedef wasi_nn_error (*BACKEND_INITIALIZE)(void **); typedef wasi_nn_error (*BACKEND_DEINITIALIZE)(void *); diff --git a/core/iwasm/libraries/wasi-nn/src/wasi_nn_tensorflowlite.cpp b/core/iwasm/libraries/wasi-nn/src/wasi_nn_tensorflowlite.cpp index c9064a5ec..0ca323b70 100644 --- a/core/iwasm/libraries/wasi-nn/src/wasi_nn_tensorflowlite.cpp +++ b/core/iwasm/libraries/wasi-nn/src/wasi_nn_tensorflowlite.cpp @@ -324,7 +324,7 @@ set_input(void *tflite_ctx, graph_execution_context ctx, uint32_t index, index); int size = model_tensor_size * sizeof(float); - bh_memcpy_s(it, size, input_tensor->data, size); + bh_memcpy_s(it, size, input_tensor->data.buf, size); } else { // TODO: Assuming uint8 quantized networks. TfLiteAffineQuantization *quant_info = @@ -342,7 +342,7 @@ set_input(void *tflite_ctx, graph_execution_context ctx, uint32_t index, NN_DBG_PRINTF("input tensor: (scale, offset) = (%f, %f)", scale, zero_point); - float *input_tensor_f = (float *)input_tensor->data; + float *input_tensor_f = (float *)input_tensor->data.buf; for (uint32_t i = 0; i < model_tensor_size; ++i) { it[i] = (uint8_t)(input_tensor_f[i] / scale + zero_point); } @@ -366,7 +366,7 @@ compute(void *tflite_ctx, graph_execution_context ctx) __attribute__((visibility("default"))) wasi_nn_error get_output(void *tflite_ctx, graph_execution_context ctx, uint32_t index, - tensor_data output_tensor, uint32_t *output_tensor_size) + tensor_data *output_tensor, uint32_t *output_tensor_size) { TFLiteContext *tfl_ctx = (TFLiteContext *)tflite_ctx; @@ -392,7 +392,7 @@ get_output(void *tflite_ctx, graph_execution_context ctx, uint32_t index, if (tensor->quantization.type == kTfLiteNoQuantization) { NN_DBG_PRINTF("No quantization information"); #if WASM_ENABLE_WASI_EPHEMERAL_NN != 0 - if (*output_tensor_size < tensor->bytes) { + if (output_tensor->size < tensor->bytes) { NN_ERR_PRINTF("Insufficient memory to copy tensor %d", index); return too_large; } @@ -401,12 +401,12 @@ get_output(void *tflite_ctx, graph_execution_context ctx, uint32_t index, * for now, maintain the bug-to-bug compatibility with the old abi, * where the size here is the number of fp32, not bytes. */ - if (*output_tensor_size < tensor->bytes / sizeof(float)) { + if (output_tensor->size < tensor->bytes / sizeof(float)) { NN_ERR_PRINTF("Insufficient memory to copy tensor %d", index); return too_large; } #endif - bh_memcpy_s(output_tensor, *output_tensor_size, tensor->data.data, + bh_memcpy_s(output_tensor->buf, output_tensor->size, tensor->data.data, tensor->bytes); #if WASM_ENABLE_WASI_EPHEMERAL_NN != 0 *output_tensor_size = tensor->bytes; @@ -431,7 +431,7 @@ get_output(void *tflite_ctx, graph_execution_context ctx, uint32_t index, model_tensor_size *= (uint32_t)tensor->dims->data[i]; #if WASM_ENABLE_WASI_EPHEMERAL_NN != 0 - if (*output_tensor_size / sizeof(float) < model_tensor_size) { + if (output_tensor->size / sizeof(float) < model_tensor_size) { NN_ERR_PRINTF("Insufficient memory to copy tensor %d", index); return too_large; } @@ -440,7 +440,7 @@ get_output(void *tflite_ctx, graph_execution_context ctx, uint32_t index, * for now, maintain the bug-to-bug compatibility with the old abi, * where the size here is the number of fp32, not bytes. */ - if (*output_tensor_size < model_tensor_size) { + if (output_tensor->size < model_tensor_size) { NN_ERR_PRINTF("Insufficient memory to copy tensor %d", index); return too_large; } @@ -454,7 +454,7 @@ get_output(void *tflite_ctx, graph_execution_context ctx, uint32_t index, NN_DBG_PRINTF("output tensor: (scale, offset) = (%f, %f)", scale, zero_point); - float *output_tensor_f = (float *)output_tensor; + float *output_tensor_f = (float *)output_tensor->buf; for (uint32_t i = 0; i < model_tensor_size; ++i) { output_tensor_f[i] = (ot[i] - zero_point) * scale; } diff --git a/core/iwasm/libraries/wasi-nn/src/wasi_nn_tensorflowlite.hpp b/core/iwasm/libraries/wasi-nn/src/wasi_nn_tensorflowlite.hpp index d6e04ab0e..4ded6e407 100644 --- a/core/iwasm/libraries/wasi-nn/src/wasi_nn_tensorflowlite.hpp +++ b/core/iwasm/libraries/wasi-nn/src/wasi_nn_tensorflowlite.hpp @@ -32,7 +32,7 @@ compute(void *tflite_ctx, graph_execution_context ctx); __attribute__((visibility("default"))) wasi_nn_error get_output(void *tflite_ctx, graph_execution_context ctx, uint32_t index, - tensor_data output_tensor, uint32_t *output_tensor_size); + tensor_data *output_tensor, uint32_t *output_tensor_size); __attribute__((visibility("default"))) wasi_nn_error init_backend(void **tflite_ctx);