From 831e4bbfd51323e10e47db1d5f7200341e87a6bb Mon Sep 17 00:00:00 2001 From: Wenyong Huang Date: Mon, 20 Jan 2025 09:39:32 +0800 Subject: [PATCH] Refine getting const offsets in wasm loader of fast-interp (#4012) - Refine const offsets in loader for fast-interp - handle const cell num overflow - Use const array, remove list --- core/iwasm/interpreter/wasm_loader.c | 336 +++++++++++++--------- core/iwasm/interpreter/wasm_mini_loader.c | 336 +++++++++++++--------- 2 files changed, 412 insertions(+), 260 deletions(-) diff --git a/core/iwasm/interpreter/wasm_loader.c b/core/iwasm/interpreter/wasm_loader.c index 4ee553e0b..6cd1ece4f 100644 --- a/core/iwasm/interpreter/wasm_loader.c +++ b/core/iwasm/interpreter/wasm_loader.c @@ -355,9 +355,14 @@ memory_realloc(void *mem_old, uint32 size_old, uint32 size_new, char *error_buf, { uint8 *mem_new; bh_assert(size_new > size_old); + + if ((mem_new = wasm_runtime_realloc(mem_old, size_new))) { + memset(mem_new + size_old, 0, size_new - size_old); + return mem_new; + } + if ((mem_new = loader_malloc(size_new, error_buf, error_buf_size))) { bh_memcpy_s(mem_new, size_new, mem_old, size_old); - memset(mem_new + size_old, 0, size_new - size_old); wasm_runtime_free(mem_old); } return mem_new; @@ -7949,11 +7954,16 @@ typedef struct WASMLoaderContext { /* preserved local offset */ int16 preserved_local_offset; - /* const buffer */ - uint8 *const_buf; - uint16 num_const; - uint16 const_cell_num; - uint32 const_buf_size; + /* const buffer for i64 and f64 consts, note that the raw bytes + * of i64 and f64 are the same, so we read an i64 value from an + * f64 const with its raw bytes, something like `*(int64 *)&f64 */ + int64 *i64_consts; + uint32 i64_const_max_num; + uint32 i64_const_num; + /* const buffer for i32 and f32 consts */ + int32 *i32_consts; + uint32 i32_const_max_num; + uint32 i32_const_num; /* processed code */ uint8 *p_code_compiled; @@ -7966,12 +7976,6 @@ typedef struct WASMLoaderContext { #endif } WASMLoaderContext; -typedef struct Const { - WASMValue value; - uint16 slot_index; - uint8 value_type; -} Const; - #define CHECK_CSP_PUSH() \ do { \ if (ctx->frame_csp >= ctx->frame_csp_boundary) { \ @@ -8189,8 +8193,10 @@ wasm_loader_ctx_destroy(WASMLoaderContext *ctx) #if WASM_ENABLE_FAST_INTERP != 0 if (ctx->frame_offset_bottom) wasm_runtime_free(ctx->frame_offset_bottom); - if (ctx->const_buf) - wasm_runtime_free(ctx->const_buf); + if (ctx->i64_consts) + wasm_runtime_free(ctx->i64_consts); + if (ctx->i32_consts) + wasm_runtime_free(ctx->i32_consts); #endif wasm_runtime_free(ctx); } @@ -8238,10 +8244,15 @@ wasm_loader_ctx_init(WASMFunction *func, char *error_buf, uint32 error_buf_size) goto fail; loader_ctx->frame_offset_boundary = loader_ctx->frame_offset_bottom + 32; - loader_ctx->num_const = 0; - loader_ctx->const_buf_size = sizeof(Const) * 8; - if (!(loader_ctx->const_buf = loader_malloc(loader_ctx->const_buf_size, - error_buf, error_buf_size))) + loader_ctx->i64_const_max_num = 8; + if (!(loader_ctx->i64_consts = + loader_malloc(sizeof(int64) * loader_ctx->i64_const_max_num, + error_buf, error_buf_size))) + goto fail; + loader_ctx->i32_const_max_num = 8; + if (!(loader_ctx->i32_consts = + loader_malloc(sizeof(int32) * loader_ctx->i32_const_max_num, + error_buf, error_buf_size))) goto fail; if (func->param_cell_num >= (int32)INT16_MAX - func->local_cell_num) { @@ -9489,108 +9500,116 @@ wasm_loader_push_pop_frame_ref_offset(WASMLoaderContext *ctx, uint8 pop_cnt, return true; } +static int +cmp_i64_const(const void *p_i64_const1, const void *p_i64_const2) +{ + int64 i64_const1 = *(int64 *)p_i64_const1; + int64 i64_const2 = *(int64 *)p_i64_const2; + + return (i64_const1 < i64_const2) ? -1 : (i64_const1 > i64_const2) ? 1 : 0; +} + +static int +cmp_i32_const(const void *p_i32_const1, const void *p_i32_const2) +{ + int32 i32_const1 = *(int32 *)p_i32_const1; + int32 i32_const2 = *(int32 *)p_i32_const2; + + return (i32_const1 < i32_const2) ? -1 : (i32_const1 > i32_const2) ? 1 : 0; +} + static bool wasm_loader_get_const_offset(WASMLoaderContext *ctx, uint8 type, void *value, int16 *offset, char *error_buf, uint32 error_buf_size) { - int8 bytes_to_increase; - int16 operand_offset = 0; - Const *c; + if (!ctx->p_code_compiled) { + /* Treat i64 and f64 as the same by reading i64 value from + the raw bytes */ + if (type == VALUE_TYPE_I64 || type == VALUE_TYPE_F64) { + /* No slot left, emit const instead */ + if (ctx->i64_const_num * 2 + ctx->i32_const_num > INT16_MAX - 2) { + *offset = 0; + return true; + } - /* Search existing constant */ - for (c = (Const *)ctx->const_buf; - (uint8 *)c < ctx->const_buf + ctx->num_const * sizeof(Const); c++) { - /* TODO: handle v128 type? */ - if ((type == c->value_type) - && ((type == VALUE_TYPE_I64 && *(int64 *)value == c->value.i64) - || (type == VALUE_TYPE_I32 && *(int32 *)value == c->value.i32) -#if WASM_ENABLE_REF_TYPES != 0 && WASM_ENABLE_GC == 0 - || (type == VALUE_TYPE_FUNCREF - && *(int32 *)value == c->value.i32) - || (type == VALUE_TYPE_EXTERNREF - && *(int32 *)value == c->value.i32) -#endif - || (type == VALUE_TYPE_F64 - && (0 == memcmp(value, &(c->value.f64), sizeof(float64)))) - || (type == VALUE_TYPE_F32 - && (0 - == memcmp(value, &(c->value.f32), sizeof(float32)))))) { - operand_offset = c->slot_index; - break; - } - if (is_32bit_type(c->value_type)) - operand_offset += 1; - else - operand_offset += 2; - } + /* Traverse the list if the const num is small */ + if (ctx->i64_const_num < 10) { + for (uint32 i = 0; i < ctx->i64_const_num; i++) { + if (ctx->i64_consts[i] == *(int64 *)value) { + *offset = -1; + return true; + } + } + } - if ((uint8 *)c == ctx->const_buf + ctx->num_const * sizeof(Const)) { - /* New constant, append to the const buffer */ - if ((type == VALUE_TYPE_F64) || (type == VALUE_TYPE_I64)) { - bytes_to_increase = 2; + if (ctx->i64_const_num >= ctx->i64_const_max_num) { + MEM_REALLOC(ctx->i64_consts, + sizeof(int64) * ctx->i64_const_max_num, + sizeof(int64) * (ctx->i64_const_max_num * 2)); + ctx->i64_const_max_num *= 2; + } + ctx->i64_consts[ctx->i64_const_num++] = *(int64 *)value; } else { - bytes_to_increase = 1; + /* Treat i32 and f32 as the same by reading i32 value from + the raw bytes */ + bh_assert(type == VALUE_TYPE_I32 || type == VALUE_TYPE_F32); + + /* No slot left, emit const instead */ + if (ctx->i64_const_num * 2 + ctx->i32_const_num > INT16_MAX - 1) { + *offset = 0; + return true; + } + + /* Traverse the list if the const num is small */ + if (ctx->i32_const_num < 10) { + for (uint32 i = 0; i < ctx->i32_const_num; i++) { + if (ctx->i32_consts[i] == *(int32 *)value) { + *offset = -1; + return true; + } + } + } + + if (ctx->i32_const_num >= ctx->i32_const_max_num) { + MEM_REALLOC(ctx->i32_consts, + sizeof(int32) * ctx->i32_const_max_num, + sizeof(int32) * (ctx->i32_const_max_num * 2)); + ctx->i32_const_max_num *= 2; + } + ctx->i32_consts[ctx->i32_const_num++] = *(int32 *)value; } - /* The max cell num of const buffer is 32768 since the valid index range - * is -32768 ~ -1. Return an invalid index 0 to indicate the buffer is - * full */ - if (ctx->const_cell_num > INT16_MAX - bytes_to_increase + 1) { - *offset = 0; - return true; - } - - if ((uint8 *)c == ctx->const_buf + ctx->const_buf_size) { - MEM_REALLOC(ctx->const_buf, ctx->const_buf_size, - ctx->const_buf_size + 4 * sizeof(Const)); - ctx->const_buf_size += 4 * sizeof(Const); - c = (Const *)(ctx->const_buf + ctx->num_const * sizeof(Const)); - } - c->value_type = type; - switch (type) { - case VALUE_TYPE_F64: - bh_memcpy_s(&(c->value.f64), sizeof(WASMValue), value, - sizeof(float64)); - ctx->const_cell_num += 2; - /* The const buf will be reversed, we use the second cell */ - /* of the i64/f64 const so the final offset is correct */ - operand_offset++; - break; - case VALUE_TYPE_I64: - c->value.i64 = *(int64 *)value; - ctx->const_cell_num += 2; - operand_offset++; - break; - case VALUE_TYPE_F32: - bh_memcpy_s(&(c->value.f32), sizeof(WASMValue), value, - sizeof(float32)); - ctx->const_cell_num++; - break; - case VALUE_TYPE_I32: - c->value.i32 = *(int32 *)value; - ctx->const_cell_num++; - break; -#if WASM_ENABLE_REF_TYPES != 0 && WASM_ENABLE_GC == 0 - case VALUE_TYPE_EXTERNREF: - case VALUE_TYPE_FUNCREF: - c->value.i32 = *(int32 *)value; - ctx->const_cell_num++; - break; -#endif - default: - break; - } - c->slot_index = operand_offset; - ctx->num_const++; - LOG_OP("#### new const [%d]: %ld\n", ctx->num_const, - (int64)c->value.i64); + *offset = -1; + return true; + } + else { + if (type == VALUE_TYPE_I64 || type == VALUE_TYPE_F64) { + int64 key = *(int64 *)value, *i64_const; + i64_const = bsearch(&key, ctx->i64_consts, ctx->i64_const_num, + sizeof(int64), cmp_i64_const); + if (!i64_const) { /* not found, emit const instead */ + *offset = 0; + return true; + } + *offset = -(uint32)(ctx->i64_const_num * 2 + ctx->i32_const_num) + + (uint32)(i64_const - ctx->i64_consts) * 2; + } + else { + int32 key = *(int32 *)value, *i32_const; + i32_const = bsearch(&key, ctx->i32_consts, ctx->i32_const_num, + sizeof(int32), cmp_i32_const); + if (!i32_const) { /* not found, emit const instead */ + *offset = 0; + return true; + } + *offset = -(uint32)(ctx->i32_const_num) + + (uint32)(i32_const - ctx->i32_consts); + } + + return true; } - /* use negative index for const */ - operand_offset = -(operand_offset + 1); - *offset = operand_offset; - return true; fail: return false; } @@ -11028,7 +11047,6 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, bool need_ref_type_map; #endif #if WASM_ENABLE_FAST_INTERP != 0 - uint8 *func_const_end, *func_const = NULL; int16 operand_offset = 0; uint8 last_op = 0; bool disable_emit, preserve_local = false, if_condition_available = true; @@ -11095,6 +11113,68 @@ re_scan: p = func->code; func->code_compiled = loader_ctx->p_code_compiled; func->code_compiled_size = loader_ctx->code_compiled_size; + + if (loader_ctx->i64_const_num > 0) { + int64 *i64_consts_old = loader_ctx->i64_consts; + + /* Sort the i64 consts */ + qsort(i64_consts_old, loader_ctx->i64_const_num, sizeof(int64), + cmp_i64_const); + + /* Remove the duplicated i64 consts */ + uint32 k = 1; + for (i = 1; i < loader_ctx->i64_const_num; i++) { + if (i64_consts_old[i] != i64_consts_old[i - 1]) { + i64_consts_old[k++] = i64_consts_old[i]; + } + } + + if (k < loader_ctx->i64_const_num) { + int64 *i64_consts_new; + /* Try to reallocate memory with a smaller size */ + if ((i64_consts_new = + wasm_runtime_malloc((uint32)sizeof(int64) * k))) { + bh_memcpy_s(i64_consts_new, (uint32)sizeof(int64) * k, + i64_consts_old, (uint32)sizeof(int64) * k); + /* Free the old memory */ + wasm_runtime_free(i64_consts_old); + loader_ctx->i64_consts = i64_consts_new; + loader_ctx->i64_const_max_num = k; + } + loader_ctx->i64_const_num = k; + } + } + + if (loader_ctx->i32_const_num > 0) { + int32 *i32_consts_old = loader_ctx->i32_consts; + + /* Sort the i32 consts */ + qsort(i32_consts_old, loader_ctx->i32_const_num, sizeof(int32), + cmp_i32_const); + + /* Remove the duplicated i32 consts */ + uint32 k = 1; + for (i = 1; i < loader_ctx->i32_const_num; i++) { + if (i32_consts_old[i] != i32_consts_old[i - 1]) { + i32_consts_old[k++] = i32_consts_old[i]; + } + } + + if (k < loader_ctx->i32_const_num) { + int32 *i32_consts_new; + /* Try to reallocate memory with a smaller size */ + if ((i32_consts_new = + wasm_runtime_malloc((uint32)sizeof(int32) * k))) { + bh_memcpy_s(i32_consts_new, (uint32)sizeof(int32) * k, + i32_consts_old, (uint32)sizeof(int32) * k); + /* Free the old memory */ + wasm_runtime_free(i32_consts_old); + loader_ctx->i32_consts = i32_consts_new; + loader_ctx->i32_const_max_num = k; + } + loader_ctx->i32_const_num = k; + } + } } #endif @@ -16016,29 +16096,25 @@ re_scan: if (loader_ctx->p_code_compiled == NULL) goto re_scan; - func->const_cell_num = loader_ctx->const_cell_num; + func->const_cell_num = + loader_ctx->i64_const_num * 2 + loader_ctx->i32_const_num; if (func->const_cell_num > 0) { - int32 j; - - if (!(func->consts = func_const = loader_malloc( - func->const_cell_num * 4, error_buf, error_buf_size))) + if (!(func->consts = + loader_malloc((uint64)sizeof(uint32) * func->const_cell_num, + error_buf, error_buf_size))) goto fail; - - func_const_end = func->consts + func->const_cell_num * 4; - /* reverse the const buf */ - for (j = loader_ctx->num_const - 1; j >= 0; j--) { - Const *c = (Const *)(loader_ctx->const_buf + j * sizeof(Const)); - if (c->value_type == VALUE_TYPE_F64 - || c->value_type == VALUE_TYPE_I64) { - bh_memcpy_s(func_const, (uint32)(func_const_end - func_const), - &(c->value.f64), (uint32)sizeof(int64)); - func_const += sizeof(int64); - } - else { - bh_memcpy_s(func_const, (uint32)(func_const_end - func_const), - &(c->value.f32), (uint32)sizeof(int32)); - func_const += sizeof(int32); - } + if (loader_ctx->i64_const_num > 0) { + bh_memcpy_s(func->consts, + (uint32)sizeof(int64) * loader_ctx->i64_const_num, + loader_ctx->i64_consts, + (uint32)sizeof(int64) * loader_ctx->i64_const_num); + } + if (loader_ctx->i32_const_num > 0) { + bh_memcpy_s(func->consts + + sizeof(int64) * loader_ctx->i64_const_num, + (uint32)sizeof(int32) * loader_ctx->i32_const_num, + loader_ctx->i32_consts, + (uint32)sizeof(int32) * loader_ctx->i32_const_num); } } diff --git a/core/iwasm/interpreter/wasm_mini_loader.c b/core/iwasm/interpreter/wasm_mini_loader.c index fde969c94..1638459a5 100644 --- a/core/iwasm/interpreter/wasm_mini_loader.c +++ b/core/iwasm/interpreter/wasm_mini_loader.c @@ -208,9 +208,14 @@ memory_realloc(void *mem_old, uint32 size_old, uint32 size_new, char *error_buf, { uint8 *mem_new; bh_assert(size_new > size_old); + + if ((mem_new = wasm_runtime_realloc(mem_old, size_new))) { + memset(mem_new + size_old, 0, size_new - size_old); + return mem_new; + } + if ((mem_new = loader_malloc(size_new, error_buf, error_buf_size))) { bh_memcpy_s(mem_new, size_new, mem_old, size_old); - memset(mem_new + size_old, 0, size_new - size_old); wasm_runtime_free(mem_old); } return mem_new; @@ -4047,11 +4052,16 @@ typedef struct WASMLoaderContext { /* preserved local offset */ int16 preserved_local_offset; - /* const buffer */ - uint8 *const_buf; - uint16 num_const; - uint16 const_cell_num; - uint32 const_buf_size; + /* const buffer for i64 and f64 consts, note that the raw bytes + * of i64 and f64 are the same, so we read an i64 value from an + * f64 const with its raw bytes, something like `*(int64 *)&f64 */ + int64 *i64_consts; + uint32 i64_const_max_num; + uint32 i64_const_num; + /* const buffer for i32 and f32 consts */ + int32 *i32_consts; + uint32 i32_const_max_num; + uint32 i32_const_num; /* processed code */ uint8 *p_code_compiled; @@ -4064,12 +4074,6 @@ typedef struct WASMLoaderContext { #endif } WASMLoaderContext; -typedef struct Const { - WASMValue value; - uint16 slot_index; - uint8 value_type; -} Const; - #define CHECK_CSP_PUSH() \ do { \ if (ctx->frame_csp >= ctx->frame_csp_boundary) { \ @@ -4224,8 +4228,10 @@ wasm_loader_ctx_destroy(WASMLoaderContext *ctx) #if WASM_ENABLE_FAST_INTERP != 0 if (ctx->frame_offset_bottom) wasm_runtime_free(ctx->frame_offset_bottom); - if (ctx->const_buf) - wasm_runtime_free(ctx->const_buf); + if (ctx->i64_consts) + wasm_runtime_free(ctx->i64_consts); + if (ctx->i32_consts) + wasm_runtime_free(ctx->i32_consts); #endif wasm_runtime_free(ctx); } @@ -4259,10 +4265,15 @@ wasm_loader_ctx_init(WASMFunction *func, char *error_buf, uint32 error_buf_size) goto fail; loader_ctx->frame_offset_boundary = loader_ctx->frame_offset_bottom + 32; - loader_ctx->num_const = 0; - loader_ctx->const_buf_size = sizeof(Const) * 8; - if (!(loader_ctx->const_buf = loader_malloc(loader_ctx->const_buf_size, - error_buf, error_buf_size))) + loader_ctx->i64_const_max_num = 8; + if (!(loader_ctx->i64_consts = + loader_malloc(sizeof(int64) * loader_ctx->i64_const_max_num, + error_buf, error_buf_size))) + goto fail; + loader_ctx->i32_const_max_num = 8; + if (!(loader_ctx->i32_consts = + loader_malloc(sizeof(int32) * loader_ctx->i32_const_max_num, + error_buf, error_buf_size))) goto fail; if (func->param_cell_num >= (int32)INT16_MAX - func->local_cell_num) { @@ -5085,107 +5096,116 @@ wasm_loader_push_pop_frame_ref_offset(WASMLoaderContext *ctx, uint8 pop_cnt, return true; } +static int +cmp_i64_const(const void *p_i64_const1, const void *p_i64_const2) +{ + int64 i64_const1 = *(int64 *)p_i64_const1; + int64 i64_const2 = *(int64 *)p_i64_const2; + + return (i64_const1 < i64_const2) ? -1 : (i64_const1 > i64_const2) ? 1 : 0; +} + +static int +cmp_i32_const(const void *p_i32_const1, const void *p_i32_const2) +{ + int32 i32_const1 = *(int32 *)p_i32_const1; + int32 i32_const2 = *(int32 *)p_i32_const2; + + return (i32_const1 < i32_const2) ? -1 : (i32_const1 > i32_const2) ? 1 : 0; +} + static bool wasm_loader_get_const_offset(WASMLoaderContext *ctx, uint8 type, void *value, int16 *offset, char *error_buf, uint32 error_buf_size) { - int8 bytes_to_increase; - int16 operand_offset = 0; - Const *c; + if (!ctx->p_code_compiled) { + /* Treat i64 and f64 as the same by reading i64 value from + the raw bytes */ + if (type == VALUE_TYPE_I64 || type == VALUE_TYPE_F64) { + /* No slot left, emit const instead */ + if (ctx->i64_const_num * 2 + ctx->i32_const_num > INT16_MAX - 2) { + *offset = 0; + return true; + } - /* Search existing constant */ - for (c = (Const *)ctx->const_buf; - (uint8 *)c < ctx->const_buf + ctx->num_const * sizeof(Const); c++) { - if ((type == c->value_type) - && ((type == VALUE_TYPE_I64 && *(int64 *)value == c->value.i64) - || (type == VALUE_TYPE_I32 && *(int32 *)value == c->value.i32) -#if WASM_ENABLE_REF_TYPES != 0 - || (type == VALUE_TYPE_FUNCREF - && *(int32 *)value == c->value.i32) - || (type == VALUE_TYPE_EXTERNREF - && *(int32 *)value == c->value.i32) -#endif - || (type == VALUE_TYPE_F64 - && (0 == memcmp(value, &(c->value.f64), sizeof(float64)))) - || (type == VALUE_TYPE_F32 - && (0 - == memcmp(value, &(c->value.f32), sizeof(float32)))))) { - operand_offset = c->slot_index; - break; - } - if (c->value_type == VALUE_TYPE_I64 || c->value_type == VALUE_TYPE_F64) - operand_offset += 2; - else - operand_offset += 1; - } + /* Traverse the list if the const num is small */ + if (ctx->i64_const_num < 10) { + for (uint32 i = 0; i < ctx->i64_const_num; i++) { + if (ctx->i64_consts[i] == *(int64 *)value) { + *offset = -1; + return true; + } + } + } - if ((uint8 *)c == ctx->const_buf + ctx->num_const * sizeof(Const)) { - /* New constant, append to the const buffer */ - if ((type == VALUE_TYPE_F64) || (type == VALUE_TYPE_I64)) { - bytes_to_increase = 2; + if (ctx->i64_const_num >= ctx->i64_const_max_num) { + MEM_REALLOC(ctx->i64_consts, + sizeof(int64) * ctx->i64_const_max_num, + sizeof(int64) * (ctx->i64_const_max_num * 2)); + ctx->i64_const_max_num *= 2; + } + ctx->i64_consts[ctx->i64_const_num++] = *(int64 *)value; } else { - bytes_to_increase = 1; + /* Treat i32 and f32 as the same by reading i32 value from + the raw bytes */ + bh_assert(type == VALUE_TYPE_I32 || type == VALUE_TYPE_F32); + + /* No slot left, emit const instead */ + if (ctx->i64_const_num * 2 + ctx->i32_const_num > INT16_MAX - 1) { + *offset = 0; + return true; + } + + /* Traverse the list if the const num is small */ + if (ctx->i32_const_num < 10) { + for (uint32 i = 0; i < ctx->i32_const_num; i++) { + if (ctx->i32_consts[i] == *(int32 *)value) { + *offset = -1; + return true; + } + } + } + + if (ctx->i32_const_num >= ctx->i32_const_max_num) { + MEM_REALLOC(ctx->i32_consts, + sizeof(int32) * ctx->i32_const_max_num, + sizeof(int32) * (ctx->i32_const_max_num * 2)); + ctx->i32_const_max_num *= 2; + } + ctx->i32_consts[ctx->i32_const_num++] = *(int32 *)value; } - /* The max cell num of const buffer is 32768 since the valid index range - * is -32768 ~ -1. Return an invalid index 0 to indicate the buffer is - * full */ - if (ctx->const_cell_num > INT16_MAX - bytes_to_increase + 1) { - *offset = 0; - return true; - } - - if ((uint8 *)c == ctx->const_buf + ctx->const_buf_size) { - MEM_REALLOC(ctx->const_buf, ctx->const_buf_size, - ctx->const_buf_size + 4 * sizeof(Const)); - ctx->const_buf_size += 4 * sizeof(Const); - c = (Const *)(ctx->const_buf + ctx->num_const * sizeof(Const)); - } - c->value_type = type; - switch (type) { - case VALUE_TYPE_F64: - bh_memcpy_s(&(c->value.f64), sizeof(WASMValue), value, - sizeof(float64)); - ctx->const_cell_num += 2; - /* The const buf will be reversed, we use the second cell */ - /* of the i64/f64 const so the finnal offset is corrent */ - operand_offset++; - break; - case VALUE_TYPE_I64: - c->value.i64 = *(int64 *)value; - ctx->const_cell_num += 2; - operand_offset++; - break; - case VALUE_TYPE_F32: - bh_memcpy_s(&(c->value.f32), sizeof(WASMValue), value, - sizeof(float32)); - ctx->const_cell_num++; - break; - case VALUE_TYPE_I32: - c->value.i32 = *(int32 *)value; - ctx->const_cell_num++; - break; -#if WASM_ENABLE_REF_TYPES != 0 - case VALUE_TYPE_EXTERNREF: - case VALUE_TYPE_FUNCREF: - c->value.i32 = *(int32 *)value; - ctx->const_cell_num++; - break; -#endif - default: - break; - } - c->slot_index = operand_offset; - ctx->num_const++; - LOG_OP("#### new const [%d]: %ld\n", ctx->num_const, - (int64)c->value.i64); + *offset = -1; + return true; + } + else { + if (type == VALUE_TYPE_I64 || type == VALUE_TYPE_F64) { + int64 key = *(int64 *)value, *i64_const; + i64_const = bsearch(&key, ctx->i64_consts, ctx->i64_const_num, + sizeof(int64), cmp_i64_const); + if (!i64_const) { /* not found, emit const instead */ + *offset = 0; + return true; + } + *offset = -(uint32)(ctx->i64_const_num * 2 + ctx->i32_const_num) + + (uint32)(i64_const - ctx->i64_consts) * 2; + } + else { + int32 key = *(int32 *)value, *i32_const; + i32_const = bsearch(&key, ctx->i32_consts, ctx->i32_const_num, + sizeof(int32), cmp_i32_const); + if (!i32_const) { /* not found, emit const instead */ + *offset = 0; + return true; + } + *offset = -(uint32)(ctx->i32_const_num) + + (uint32)(i32_const - ctx->i32_consts); + } + + return true; } - /* use negetive index for const */ - operand_offset = -(operand_offset + 1); - *offset = operand_offset; - return true; fail: return false; } @@ -6151,11 +6171,9 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, uint32 segment_index; #endif #if WASM_ENABLE_FAST_INTERP != 0 - uint8 *func_const_end, *func_const = NULL; int16 operand_offset = 0; uint8 last_op = 0; bool disable_emit, preserve_local = false, if_condition_available = true; - ; float32 f32_const; float64 f64_const; @@ -6206,6 +6224,68 @@ re_scan: p = func->code; func->code_compiled = loader_ctx->p_code_compiled; func->code_compiled_size = loader_ctx->code_compiled_size; + + if (loader_ctx->i64_const_num > 0) { + int64 *i64_consts_old = loader_ctx->i64_consts; + + /* Sort the i64 consts */ + qsort(i64_consts_old, loader_ctx->i64_const_num, sizeof(int64), + cmp_i64_const); + + /* Remove the duplicated i64 consts */ + uint32 k = 1; + for (i = 1; i < loader_ctx->i64_const_num; i++) { + if (i64_consts_old[i] != i64_consts_old[i - 1]) { + i64_consts_old[k++] = i64_consts_old[i]; + } + } + + if (k < loader_ctx->i64_const_num) { + int64 *i64_consts_new; + /* Try to reallocate memory with a smaller size */ + if ((i64_consts_new = + wasm_runtime_malloc((uint32)sizeof(int64) * k))) { + bh_memcpy_s(i64_consts_new, (uint32)sizeof(int64) * k, + i64_consts_old, (uint32)sizeof(int64) * k); + /* Free the old memory */ + wasm_runtime_free(i64_consts_old); + loader_ctx->i64_consts = i64_consts_new; + loader_ctx->i64_const_max_num = k; + } + loader_ctx->i64_const_num = k; + } + } + + if (loader_ctx->i32_const_num > 0) { + int32 *i32_consts_old = loader_ctx->i32_consts; + + /* Sort the i32 consts */ + qsort(i32_consts_old, loader_ctx->i32_const_num, sizeof(int32), + cmp_i32_const); + + /* Remove the duplicated i32 consts */ + uint32 k = 1; + for (i = 1; i < loader_ctx->i32_const_num; i++) { + if (i32_consts_old[i] != i32_consts_old[i - 1]) { + i32_consts_old[k++] = i32_consts_old[i]; + } + } + + if (k < loader_ctx->i32_const_num) { + int32 *i32_consts_new; + /* Try to reallocate memory with a smaller size */ + if ((i32_consts_new = + wasm_runtime_malloc((uint32)sizeof(int32) * k))) { + bh_memcpy_s(i32_consts_new, (uint32)sizeof(int32) * k, + i32_consts_old, (uint32)sizeof(int32) * k); + /* Free the old memory */ + wasm_runtime_free(i32_consts_old); + loader_ctx->i32_consts = i32_consts_new; + loader_ctx->i32_const_max_num = k; + } + loader_ctx->i32_const_num = k; + } + } } #endif @@ -8352,29 +8432,25 @@ re_scan: if (loader_ctx->p_code_compiled == NULL) goto re_scan; - func->const_cell_num = loader_ctx->const_cell_num; + func->const_cell_num = + loader_ctx->i64_const_num * 2 + loader_ctx->i32_const_num; if (func->const_cell_num > 0) { - int32 j; - - if (!(func->consts = func_const = loader_malloc( - func->const_cell_num * 4, error_buf, error_buf_size))) + if (!(func->consts = + loader_malloc((uint64)sizeof(uint32) * func->const_cell_num, + error_buf, error_buf_size))) goto fail; - - func_const_end = func->consts + func->const_cell_num * 4; - /* reverse the const buf */ - for (j = loader_ctx->num_const - 1; j >= 0; j--) { - Const *c = (Const *)(loader_ctx->const_buf + j * sizeof(Const)); - if (c->value_type == VALUE_TYPE_F64 - || c->value_type == VALUE_TYPE_I64) { - bh_memcpy_s(func_const, (uint32)(func_const_end - func_const), - &(c->value.f64), (uint32)sizeof(int64)); - func_const += sizeof(int64); - } - else { - bh_memcpy_s(func_const, (uint32)(func_const_end - func_const), - &(c->value.f32), (uint32)sizeof(int32)); - func_const += sizeof(int32); - } + if (loader_ctx->i64_const_num > 0) { + bh_memcpy_s(func->consts, + (uint32)sizeof(int64) * loader_ctx->i64_const_num, + loader_ctx->i64_consts, + (uint32)sizeof(int64) * loader_ctx->i64_const_num); + } + if (loader_ctx->i32_const_num > 0) { + bh_memcpy_s(func->consts + + sizeof(int64) * loader_ctx->i64_const_num, + (uint32)sizeof(int32) * loader_ctx->i32_const_num, + loader_ctx->i32_consts, + (uint32)sizeof(int32) * loader_ctx->i32_const_num); } }