mirror of
https://github.com/bytecodealliance/wasm-micro-runtime.git
synced 2025-09-05 17:32:26 +00:00
wasi_nn_openvino.c: implement multiple models per instance (#4380)
tested with two models: ``` --load-graph=id=graph1,file=public/license-plate-recognition-barrier-0007/FP32/license-plate-recognition-barrier-0007.xml,file=public/license-plate-recognition-barrier-0007/FP32/license-plate-recognition-barrier-0007.bin \ --load-graph=id=graph2,file=classify/model.xml,file=classify/model.bin \ --init-execution-context=id=exec1,graph-id=graph1 \ --init-execution-context=id=exec2,graph-id=graph2 \ --set-input=context-id=exec1,dim=1,dim=24,dim=94,dim=3,file=out.bin \ --set-input=context-id=exec2,file=classify/banana-3x224x224-bgr.bin,dim=1,dim=3,dim=224,dim=224 \ --compute=context-id=exec1 \ --compute=context-id=exec2 \ --get-output=context-id=exec1,file=exec1-result.bin \ --get-output=context-id=exec2,file=exec2-result.bin ``` a detailed HOWTO: https://github.com/bytecodealliance/wasm-micro-runtime/pull/4380#issuecomment-2986882718
This commit is contained in:
parent
64cafaff1e
commit
f449b79a31
|
@ -26,15 +26,25 @@
|
||||||
* from 4. to 6. is the Inference Loop
|
* from 4. to 6. is the Inference Loop
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/* these limits are arbitrary. */
|
||||||
|
#define MAX_GRAPHS 4
|
||||||
|
#define MAX_EXECUTION_CONTEXTS 4
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
ov_core_t *core;
|
ov_core_t *core;
|
||||||
/* keep input model files */
|
/* keep input model files */
|
||||||
void *weight_data;
|
struct OpenVINOGraph {
|
||||||
ov_tensor_t *weights_tensor;
|
void *weight_data;
|
||||||
ov_model_t *model;
|
ov_tensor_t *weights_tensor;
|
||||||
ov_compiled_model_t *compiled_model;
|
ov_model_t *model;
|
||||||
ov_infer_request_t *infer_request;
|
ov_compiled_model_t *compiled_model;
|
||||||
ov_tensor_t *input_tensor;
|
} graphs[MAX_GRAPHS];
|
||||||
|
struct OpenVINOExecutionContext {
|
||||||
|
struct OpenVINOGraph *graph;
|
||||||
|
ov_infer_request_t *infer_request;
|
||||||
|
} execution_contexts[MAX_EXECUTION_CONTEXTS];
|
||||||
|
unsigned int n_graphs;
|
||||||
|
unsigned int n_execution_contexts;
|
||||||
} OpenVINOContext;
|
} OpenVINOContext;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -179,6 +189,29 @@ wasi_nn_tensor_type_to_openvino_element_type(tensor_type wasi_nn_type)
|
||||||
return UNDEFINED;
|
return UNDEFINED;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
free_graph(struct OpenVINOGraph *graph)
|
||||||
|
{
|
||||||
|
if (graph->weight_data)
|
||||||
|
os_free(graph->weight_data);
|
||||||
|
|
||||||
|
if (graph->weights_tensor)
|
||||||
|
ov_tensor_free(graph->weights_tensor);
|
||||||
|
|
||||||
|
if (graph->model)
|
||||||
|
ov_model_free(graph->model);
|
||||||
|
|
||||||
|
if (graph->compiled_model)
|
||||||
|
ov_compiled_model_free(graph->compiled_model);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
free_execution_context(struct OpenVINOExecutionContext *c)
|
||||||
|
{
|
||||||
|
if (c->infer_request)
|
||||||
|
ov_infer_request_free(c->infer_request);
|
||||||
|
}
|
||||||
|
|
||||||
static wasi_nn_error
|
static wasi_nn_error
|
||||||
uint32_array_to_int64_array(uint32_t array_size, uint32_t *src, int64_t **dst)
|
uint32_array_to_int64_array(uint32_t array_size, uint32_t *src, int64_t **dst)
|
||||||
{
|
{
|
||||||
|
@ -198,6 +231,8 @@ load(void *ctx, graph_builder_array *builder, graph_encoding encoding,
|
||||||
execution_target target, graph *g)
|
execution_target target, graph *g)
|
||||||
{
|
{
|
||||||
OpenVINOContext *ov_ctx = (OpenVINOContext *)ctx;
|
OpenVINOContext *ov_ctx = (OpenVINOContext *)ctx;
|
||||||
|
struct OpenVINOGraph *graph;
|
||||||
|
unsigned int graph_idx;
|
||||||
wasi_nn_error ret = unsupported_operation;
|
wasi_nn_error ret = unsupported_operation;
|
||||||
|
|
||||||
if (encoding != openvino) {
|
if (encoding != openvino) {
|
||||||
|
@ -223,33 +258,47 @@ load(void *ctx, graph_builder_array *builder, graph_encoding encoding,
|
||||||
graph_builder xml = builder->buf[0];
|
graph_builder xml = builder->buf[0];
|
||||||
graph_builder weight = builder->buf[1];
|
graph_builder weight = builder->buf[1];
|
||||||
|
|
||||||
|
graph_idx = ov_ctx->n_graphs;
|
||||||
|
if (graph_idx >= MAX_GRAPHS) {
|
||||||
|
return runtime_error;
|
||||||
|
}
|
||||||
|
graph = &ov_ctx->graphs[graph_idx];
|
||||||
|
memset(graph, 0, sizeof(*graph));
|
||||||
|
|
||||||
/* transfer weight to an ov tensor */
|
/* transfer weight to an ov tensor */
|
||||||
{
|
{
|
||||||
ov_ctx->weight_data = os_malloc(weight.size);
|
graph->weight_data = os_malloc(weight.size);
|
||||||
if (!ov_ctx->weight_data)
|
if (!graph->weight_data)
|
||||||
goto fail;
|
goto fail;
|
||||||
memcpy(ov_ctx->weight_data, weight.buf, weight.size);
|
memcpy(graph->weight_data, weight.buf, weight.size);
|
||||||
|
|
||||||
ov_element_type_e type = U8;
|
ov_element_type_e type = U8;
|
||||||
int64_t dims[1] = { weight.size };
|
int64_t dims[1] = { weight.size };
|
||||||
ov_shape_t shape = { 1, dims };
|
ov_shape_t shape = { 1, dims };
|
||||||
CHECK_OV_STATUS(ov_tensor_create_from_host_ptr(type, shape,
|
CHECK_OV_STATUS(ov_tensor_create_from_host_ptr(type, shape,
|
||||||
ov_ctx->weight_data,
|
graph->weight_data,
|
||||||
&ov_ctx->weights_tensor),
|
&graph->weights_tensor),
|
||||||
ret);
|
ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* load model from buffer */
|
/* load model from buffer */
|
||||||
CHECK_OV_STATUS(ov_core_read_model_from_memory_buffer(
|
CHECK_OV_STATUS(ov_core_read_model_from_memory_buffer(
|
||||||
ov_ctx->core, (char *)xml.buf, xml.size,
|
ov_ctx->core, (char *)xml.buf, xml.size,
|
||||||
ov_ctx->weights_tensor, &ov_ctx->model),
|
graph->weights_tensor, &graph->model),
|
||||||
ret);
|
ret);
|
||||||
#ifndef NDEBUG
|
#ifndef NDEBUG
|
||||||
print_model_input_output_info(ov_ctx->model);
|
print_model_input_output_info(ov_ctx->model);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
ret = success;
|
CHECK_OV_STATUS(ov_core_compile_model(ov_ctx->core, graph->model, "CPU", 0,
|
||||||
|
&graph->compiled_model),
|
||||||
|
ret);
|
||||||
|
|
||||||
|
*g = graph_idx;
|
||||||
|
ov_ctx->n_graphs++;
|
||||||
|
return success;
|
||||||
fail:
|
fail:
|
||||||
|
free_graph(graph);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -257,20 +306,62 @@ __attribute__((visibility("default"))) wasi_nn_error
|
||||||
load_by_name(void *ctx, const char *filename, uint32_t filename_len, graph *g)
|
load_by_name(void *ctx, const char *filename, uint32_t filename_len, graph *g)
|
||||||
{
|
{
|
||||||
OpenVINOContext *ov_ctx = (OpenVINOContext *)ctx;
|
OpenVINOContext *ov_ctx = (OpenVINOContext *)ctx;
|
||||||
|
struct OpenVINOGraph *graph;
|
||||||
|
unsigned int graph_idx;
|
||||||
wasi_nn_error ret = unsupported_operation;
|
wasi_nn_error ret = unsupported_operation;
|
||||||
|
|
||||||
CHECK_OV_STATUS(
|
graph_idx = ov_ctx->n_graphs;
|
||||||
ov_core_read_model(ov_ctx->core, filename, NULL, &ov_ctx->model), ret);
|
if (graph_idx >= MAX_GRAPHS) {
|
||||||
|
return runtime_error;
|
||||||
|
}
|
||||||
|
graph = &ov_ctx->graphs[graph_idx];
|
||||||
|
|
||||||
ret = success;
|
memset(graph, 0, sizeof(*graph));
|
||||||
|
CHECK_OV_STATUS(
|
||||||
|
ov_core_read_model(ov_ctx->core, filename, NULL, &graph->model), ret);
|
||||||
|
|
||||||
|
CHECK_OV_STATUS(ov_core_compile_model(ov_ctx->core, graph->model, "CPU", 0,
|
||||||
|
&graph->compiled_model),
|
||||||
|
ret);
|
||||||
|
|
||||||
|
*g = graph_idx;
|
||||||
|
ov_ctx->n_graphs++;
|
||||||
|
return success;
|
||||||
fail:
|
fail:
|
||||||
|
free_graph(graph);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
__attribute__((visibility("default"))) wasi_nn_error
|
__attribute__((visibility("default"))) wasi_nn_error
|
||||||
init_execution_context(void *ctx, graph g, graph_execution_context *exec_ctx)
|
init_execution_context(void *ctx, graph g, graph_execution_context *exec_ctx)
|
||||||
{
|
{
|
||||||
|
OpenVINOContext *ov_ctx = (OpenVINOContext *)ctx;
|
||||||
|
struct OpenVINOGraph *graph;
|
||||||
|
struct OpenVINOExecutionContext *exec;
|
||||||
|
unsigned int exec_idx;
|
||||||
|
wasi_nn_error ret;
|
||||||
|
|
||||||
|
if (g >= ov_ctx->n_graphs)
|
||||||
|
return runtime_error;
|
||||||
|
graph = &ov_ctx->graphs[g];
|
||||||
|
|
||||||
|
exec_idx = ov_ctx->n_execution_contexts;
|
||||||
|
if (exec_idx >= MAX_EXECUTION_CONTEXTS)
|
||||||
|
return runtime_error;
|
||||||
|
exec = &ov_ctx->execution_contexts[exec_idx];
|
||||||
|
|
||||||
|
memset(exec, 0, sizeof(*exec));
|
||||||
|
exec->graph = graph;
|
||||||
|
|
||||||
|
CHECK_OV_STATUS(ov_compiled_model_create_infer_request(
|
||||||
|
graph->compiled_model, &exec->infer_request),
|
||||||
|
ret);
|
||||||
|
|
||||||
|
*exec_ctx = exec_idx;
|
||||||
|
ov_ctx->n_execution_contexts++;
|
||||||
return success;
|
return success;
|
||||||
|
fail:
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
__attribute__((visibility("default"))) wasi_nn_error
|
__attribute__((visibility("default"))) wasi_nn_error
|
||||||
|
@ -278,10 +369,16 @@ set_input(void *ctx, graph_execution_context exec_ctx, uint32_t index,
|
||||||
tensor *wasi_nn_tensor)
|
tensor *wasi_nn_tensor)
|
||||||
{
|
{
|
||||||
OpenVINOContext *ov_ctx = (OpenVINOContext *)ctx;
|
OpenVINOContext *ov_ctx = (OpenVINOContext *)ctx;
|
||||||
|
struct OpenVINOExecutionContext *exec;
|
||||||
wasi_nn_error ret = unsupported_operation;
|
wasi_nn_error ret = unsupported_operation;
|
||||||
ov_shape_t input_shape = { 0 };
|
ov_shape_t input_shape = { 0 };
|
||||||
|
ov_tensor_t *input_tensor = NULL;
|
||||||
int64_t *ov_dims = NULL;
|
int64_t *ov_dims = NULL;
|
||||||
|
|
||||||
|
if (exec_ctx >= ov_ctx->n_execution_contexts)
|
||||||
|
return runtime_error;
|
||||||
|
exec = &ov_ctx->execution_contexts[exec_ctx];
|
||||||
|
|
||||||
/* wasi_nn_tensor -> ov_tensor */
|
/* wasi_nn_tensor -> ov_tensor */
|
||||||
{
|
{
|
||||||
ret = uint32_array_to_int64_array(wasi_nn_tensor->dimensions->size,
|
ret = uint32_array_to_int64_array(wasi_nn_tensor->dimensions->size,
|
||||||
|
@ -306,27 +403,20 @@ set_input(void *ctx, graph_execution_context exec_ctx, uint32_t index,
|
||||||
|
|
||||||
CHECK_OV_STATUS(ov_tensor_create_from_host_ptr(input_type, input_shape,
|
CHECK_OV_STATUS(ov_tensor_create_from_host_ptr(input_type, input_shape,
|
||||||
wasi_nn_tensor->data,
|
wasi_nn_tensor->data,
|
||||||
&ov_ctx->input_tensor),
|
&input_tensor),
|
||||||
ret);
|
ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
CHECK_OV_STATUS(ov_core_compile_model(ov_ctx->core, ov_ctx->model, "CPU", 0,
|
|
||||||
&ov_ctx->compiled_model),
|
|
||||||
ret);
|
|
||||||
|
|
||||||
CHECK_OV_STATUS(ov_compiled_model_create_infer_request(
|
|
||||||
ov_ctx->compiled_model, &ov_ctx->infer_request),
|
|
||||||
ret);
|
|
||||||
|
|
||||||
/* install ov_tensor -> infer_request */
|
/* install ov_tensor -> infer_request */
|
||||||
CHECK_OV_STATUS(ov_infer_request_set_input_tensor_by_index(
|
CHECK_OV_STATUS(ov_infer_request_set_input_tensor_by_index(
|
||||||
ov_ctx->infer_request, index, ov_ctx->input_tensor),
|
exec->infer_request, index, input_tensor),
|
||||||
ret);
|
ret);
|
||||||
ret = success;
|
ret = success;
|
||||||
|
|
||||||
fail:
|
fail:
|
||||||
if (ov_dims)
|
if (ov_dims)
|
||||||
os_free(ov_dims);
|
os_free(ov_dims);
|
||||||
|
if (input_tensor)
|
||||||
|
ov_tensor_free(input_tensor);
|
||||||
ov_shape_free(&input_shape);
|
ov_shape_free(&input_shape);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
|
@ -336,9 +426,14 @@ __attribute__((visibility("default"))) wasi_nn_error
|
||||||
compute(void *ctx, graph_execution_context exec_ctx)
|
compute(void *ctx, graph_execution_context exec_ctx)
|
||||||
{
|
{
|
||||||
OpenVINOContext *ov_ctx = (OpenVINOContext *)ctx;
|
OpenVINOContext *ov_ctx = (OpenVINOContext *)ctx;
|
||||||
|
struct OpenVINOExecutionContext *exec;
|
||||||
wasi_nn_error ret = unsupported_operation;
|
wasi_nn_error ret = unsupported_operation;
|
||||||
|
|
||||||
CHECK_OV_STATUS(ov_infer_request_infer(ov_ctx->infer_request), ret);
|
if (exec_ctx >= ov_ctx->n_execution_contexts)
|
||||||
|
return runtime_error;
|
||||||
|
exec = &ov_ctx->execution_contexts[exec_ctx];
|
||||||
|
|
||||||
|
CHECK_OV_STATUS(ov_infer_request_infer(exec->infer_request), ret);
|
||||||
ret = success;
|
ret = success;
|
||||||
fail:
|
fail:
|
||||||
return ret;
|
return ret;
|
||||||
|
@ -349,13 +444,18 @@ get_output(void *ctx, graph_execution_context exec_ctx, uint32_t index,
|
||||||
tensor_data output_tensor, uint32_t *output_tensor_size)
|
tensor_data output_tensor, uint32_t *output_tensor_size)
|
||||||
{
|
{
|
||||||
OpenVINOContext *ov_ctx = (OpenVINOContext *)ctx;
|
OpenVINOContext *ov_ctx = (OpenVINOContext *)ctx;
|
||||||
|
struct OpenVINOExecutionContext *exec;
|
||||||
wasi_nn_error ret = unsupported_operation;
|
wasi_nn_error ret = unsupported_operation;
|
||||||
ov_tensor_t *ov_tensor = NULL;
|
ov_tensor_t *ov_tensor = NULL;
|
||||||
void *data = NULL;
|
void *data = NULL;
|
||||||
size_t byte_size = 0;
|
size_t byte_size = 0;
|
||||||
|
|
||||||
|
if (exec_ctx >= ov_ctx->n_execution_contexts)
|
||||||
|
return runtime_error;
|
||||||
|
exec = &ov_ctx->execution_contexts[exec_ctx];
|
||||||
|
|
||||||
CHECK_OV_STATUS(ov_infer_request_get_output_tensor_by_index(
|
CHECK_OV_STATUS(ov_infer_request_get_output_tensor_by_index(
|
||||||
ov_ctx->infer_request, index, &ov_tensor),
|
exec->infer_request, index, &ov_tensor),
|
||||||
ret);
|
ret);
|
||||||
|
|
||||||
CHECK_OV_STATUS(ov_tensor_get_byte_size(ov_tensor, &byte_size), ret);
|
CHECK_OV_STATUS(ov_tensor_get_byte_size(ov_tensor, &byte_size), ret);
|
||||||
|
@ -421,27 +521,16 @@ __attribute__((visibility("default"))) wasi_nn_error
|
||||||
deinit_backend(void *ctx)
|
deinit_backend(void *ctx)
|
||||||
{
|
{
|
||||||
OpenVINOContext *ov_ctx = (OpenVINOContext *)ctx;
|
OpenVINOContext *ov_ctx = (OpenVINOContext *)ctx;
|
||||||
|
unsigned int i;
|
||||||
|
|
||||||
if (!ov_ctx)
|
if (!ov_ctx)
|
||||||
return invalid_argument;
|
return invalid_argument;
|
||||||
|
|
||||||
if (ov_ctx->weight_data)
|
for (i = 0; i < ov_ctx->n_execution_contexts; i++)
|
||||||
os_free(ov_ctx->weight_data);
|
free_execution_context(&ov_ctx->execution_contexts[i]);
|
||||||
|
|
||||||
if (ov_ctx->weights_tensor)
|
for (i = 0; i < ov_ctx->n_graphs; i++)
|
||||||
ov_tensor_free(ov_ctx->weights_tensor);
|
free_graph(&ov_ctx->graphs[i]);
|
||||||
|
|
||||||
if (ov_ctx->input_tensor)
|
|
||||||
ov_tensor_free(ov_ctx->input_tensor);
|
|
||||||
|
|
||||||
if (ov_ctx->infer_request)
|
|
||||||
ov_infer_request_free(ov_ctx->infer_request);
|
|
||||||
|
|
||||||
if (ov_ctx->compiled_model)
|
|
||||||
ov_compiled_model_free(ov_ctx->compiled_model);
|
|
||||||
|
|
||||||
if (ov_ctx->model)
|
|
||||||
ov_model_free(ov_ctx->model);
|
|
||||||
|
|
||||||
if (ov_ctx->core)
|
if (ov_ctx->core)
|
||||||
ov_core_free(ov_ctx->core);
|
ov_core_free(ov_ctx->core);
|
||||||
|
|
Loading…
Reference in New Issue
Block a user