From adaaf348edd327a1a89c7a4bd9417fbb0cc99532 Mon Sep 17 00:00:00 2001 From: Wenyong Huang Date: Sat, 23 Apr 2022 19:15:55 +0800 Subject: [PATCH] Refine opcode br_table for classic interpreter (#1112) Refine opcode br_table for classic interpreter as there may be a lot of leb128 decoding when the br count is big: 1. Use the bytecode itself to store the decoded leb br depths if each decoded depth can be stored with one byte 2. Create br_table cache to store the decode leb br depths if the decoded depth cannot be stored with one byte After the optimization, the class interpreter can access the br depths array with index, no need to decode the leb128 again. And fix function record_fast_op() return value unchecked issue in source debugging feature. --- core/iwasm/compilation/aot_compiler.c | 34 +++++ core/iwasm/interpreter/wasm.h | 13 ++ core/iwasm/interpreter/wasm_interp_classic.c | 27 +++- core/iwasm/interpreter/wasm_interp_fast.c | 1 + core/iwasm/interpreter/wasm_loader.c | 125 +++++++++++++++++-- core/iwasm/interpreter/wasm_mini_loader.c | 97 +++++++++++++- core/iwasm/interpreter/wasm_opcode.h | 12 +- 7 files changed, 292 insertions(+), 17 deletions(-) diff --git a/core/iwasm/compilation/aot_compiler.c b/core/iwasm/compilation/aot_compiler.c index e1516eebe..a0177be5e 100644 --- a/core/iwasm/compilation/aot_compiler.c +++ b/core/iwasm/compilation/aot_compiler.c @@ -276,8 +276,13 @@ aot_compile_func(AOTCompContext *comp_ctx, uint32 func_index) aot_set_last_error("allocate memory failed."); goto fail; } +#if WASM_ENABLE_FAST_INTERP != 0 for (i = 0; i <= br_count; i++) read_leb_uint32(frame_ip, frame_ip_end, br_depths[i]); +#else + for (i = 0; i <= br_count; i++) + br_depths[i] = *frame_ip++; +#endif if (!aot_compile_op_br_table(comp_ctx, func_ctx, br_depths, br_count, &frame_ip)) { @@ -288,6 +293,35 @@ aot_compile_func(AOTCompContext *comp_ctx, uint32 func_index) wasm_runtime_free(br_depths); break; +#if WASM_ENABLE_FAST_INTERP == 0 + case EXT_OP_BR_TABLE_CACHE: + { + BrTableCache *node = bh_list_first_elem( + comp_ctx->comp_data->wasm_module->br_table_cache_list); + BrTableCache *node_next; + uint8 *p_opcode = frame_ip - 1; + + read_leb_uint32(frame_ip, frame_ip_end, br_count); + + while (node) { + node_next = bh_list_elem_next(node); + if (node->br_table_op_addr == p_opcode) { + br_depths = node->br_depths; + if (!aot_compile_op_br_table(comp_ctx, func_ctx, + br_depths, br_count, + &frame_ip)) { + return false; + } + break; + } + node = node_next; + } + bh_assert(node); + + break; + } +#endif + case WASM_OP_RETURN: if (!aot_compile_op_return(comp_ctx, func_ctx, &frame_ip)) return false; diff --git a/core/iwasm/interpreter/wasm.h b/core/iwasm/interpreter/wasm.h index ac7909249..553545276 100644 --- a/core/iwasm/interpreter/wasm.h +++ b/core/iwasm/interpreter/wasm.h @@ -319,6 +319,14 @@ typedef struct StringNode { char *str; } StringNode, *StringList; +typedef struct BrTableCache { + struct BrTableCache *next; + /* Address of br_table opcode */ + uint8 *br_table_op_addr; + uint32 br_count; + uint32 br_depths[1]; +} BrTableCache; + #if WASM_ENABLE_DEBUG_INTERP != 0 typedef struct WASMFastOPCodeNode { struct WASMFastOPCodeNode *next; @@ -326,6 +334,7 @@ typedef struct WASMFastOPCodeNode { uint8 orig_op; } WASMFastOPCodeNode; #endif + struct WASMModule { /* Module type, for module loaded from WASM bytecode binary, this field is Wasm_Module_Bytecode; @@ -403,6 +412,10 @@ struct WASMModule { bool possible_memory_grow; StringList const_str_list; +#if WASM_ENABLE_FAST_INTERP == 0 + bh_list br_table_cache_list_head; + bh_list *br_table_cache_list; +#endif #if WASM_ENABLE_LIBC_WASI != 0 WASIArguments wasi_args; diff --git a/core/iwasm/interpreter/wasm_interp_classic.c b/core/iwasm/interpreter/wasm_interp_classic.c index 5305c7758..6a644bc6d 100644 --- a/core/iwasm/interpreter/wasm_interp_classic.c +++ b/core/iwasm/interpreter/wasm_interp_classic.c @@ -1199,12 +1199,33 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, lidx = POP_I32(); if (lidx > count) lidx = count; - for (i = 0; i < lidx; i++) - skip_leb(frame_ip); - read_leb_uint32(frame_ip, frame_ip_end, depth); + depth = frame_ip[lidx]; goto label_pop_csp_n; } + HANDLE_OP(EXT_OP_BR_TABLE_CACHE) + { + BrTableCache *node = + bh_list_first_elem(module->module->br_table_cache_list); + BrTableCache *node_next; + +#if WASM_ENABLE_THREAD_MGR != 0 + CHECK_SUSPEND_FLAGS(); +#endif + lidx = POP_I32(); + + while (node) { + node_next = bh_list_elem_next(node); + if (node->br_table_op_addr == frame_ip - 1) { + depth = node->br_depths[lidx]; + goto label_pop_csp_n; + } + node = node_next; + } + bh_assert(0); + HANDLE_OP_END(); + } + HANDLE_OP(WASM_OP_RETURN) { frame_sp -= cur_func->ret_cell_num; diff --git a/core/iwasm/interpreter/wasm_interp_fast.c b/core/iwasm/interpreter/wasm_interp_fast.c index 1c817b965..48dcb8f16 100644 --- a/core/iwasm/interpreter/wasm_interp_fast.c +++ b/core/iwasm/interpreter/wasm_interp_fast.c @@ -3539,6 +3539,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, HANDLE_OP(EXT_OP_BLOCK) HANDLE_OP(EXT_OP_LOOP) HANDLE_OP(EXT_OP_IF) + HANDLE_OP(EXT_OP_BR_TABLE_CACHE) { wasm_set_exception(module, "unsupported opcode"); goto got_exception; diff --git a/core/iwasm/interpreter/wasm_loader.c b/core/iwasm/interpreter/wasm_loader.c index 63d2036e3..0b603ffb2 100644 --- a/core/iwasm/interpreter/wasm_loader.c +++ b/core/iwasm/interpreter/wasm_loader.c @@ -3245,6 +3245,9 @@ create_module(char *error_buf, uint32 error_buf_size) { WASMModule *module = loader_malloc(sizeof(WASMModule), error_buf, error_buf_size); +#if WASM_ENABLE_FAST_INTERP == 0 + bh_list_status ret; +#endif if (!module) { return NULL; @@ -3255,6 +3258,13 @@ create_module(char *error_buf, uint32 error_buf_size) /* Set start_function to -1, means no start function */ module->start_function = (uint32)-1; +#if WASM_ENABLE_FAST_INTERP == 0 + module->br_table_cache_list = &module->br_table_cache_list_head; + ret = bh_list_init(module->br_table_cache_list); + bh_assert(ret == BH_LIST_SUCCESS); + (void)ret; +#endif + #if WASM_ENABLE_MULTI_MODULE != 0 module->import_module_list = &module->import_module_list_head; #endif @@ -3269,16 +3279,18 @@ create_module(char *error_buf, uint32 error_buf_size) } #if WASM_ENABLE_DEBUG_INTERP != 0 -static void -record_fast_op(WASMModule *module, uint8 *pos, uint8 orig_op) +static bool +record_fast_op(WASMModule *module, uint8 *pos, uint8 orig_op, char *error_buf, + uint32 error_buf_size) { WASMFastOPCodeNode *fast_op = - loader_malloc(sizeof(WASMFastOPCodeNode), NULL, 0); + loader_malloc(sizeof(WASMFastOPCodeNode), error_buf, error_buf_size); if (fast_op) { fast_op->offset = pos - module->load_addr; fast_op->orig_op = orig_op; bh_list_insert(&module->fast_opcode_list, fast_op); } + return fast_op ? true : false; } #endif @@ -3695,6 +3707,18 @@ wasm_loader_unload(WASMModule *module) } } +#if WASM_ENABLE_FAST_INTERP == 0 + if (module->br_table_cache_list) { + BrTableCache *node = bh_list_first_elem(module->br_table_cache_list); + BrTableCache *node_next; + while (node) { + node_next = bh_list_elem_next(node); + wasm_runtime_free(node); + node = node_next; + } + } +#endif + #if WASM_ENABLE_MULTI_MODULE != 0 /* just release the sub module list */ if (module->import_module_list) { @@ -3854,10 +3878,24 @@ wasm_loader_find_block_addr(WASMExecEnv *exec_env, BlockAddr *block_addr_cache, case WASM_OP_BR_TABLE: read_leb_uint32(p, p_end, count); /* lable num */ - for (i = 0; i <= count; i++) /* lableidxs */ +#if WASM_ENABLE_FAST_INTERP != 0 + for (i = 0; i <= count; i++) /* lableidxs */ skip_leb_uint32(p, p_end); +#else + p += count + 1; + while (*p == WASM_OP_NOP) + p++; +#endif break; +#if WASM_ENABLE_FAST_INTERP == 0 + case EXT_OP_BR_TABLE_CACHE: + read_leb_uint32(p, p_end, count); /* lable num */ + while (*p == WASM_OP_NOP) + p++; + break; +#endif + case WASM_OP_RETURN: break; @@ -6529,7 +6567,10 @@ re_scan: * the block quickly. */ #if WASM_ENABLE_DEBUG_INTERP != 0 - record_fast_op(module, p_org, *p_org); + if (!record_fast_op(module, p_org, *p_org, error_buf, + error_buf_size)) { + goto fail; + } #endif *p_org = EXT_OP_BLOCK + (opcode - WASM_OP_BLOCK); #endif @@ -6778,6 +6819,13 @@ re_scan: { uint8 *ret_types = NULL; uint32 ret_count = 0; +#if WASM_ENABLE_FAST_INTERP == 0 + uint8 *p_depth_begin, *p_depth; + uint32 depth, j; + BrTableCache *br_table_cache = NULL; + + p_org = p - 1; +#endif read_leb_uint32(p, p_end, count); #if WASM_ENABLE_FAST_INTERP != 0 @@ -6785,6 +6833,9 @@ re_scan: #endif POP_I32(); +#if WASM_ENABLE_FAST_INTERP == 0 + p_depth_begin = p_depth = p; +#endif for (i = 0; i <= count; i++) { if (!(frame_csp_tmp = check_branch_block(loader_ctx, &p, p_end, @@ -6818,8 +6869,57 @@ re_scan: goto fail; } } + +#if WASM_ENABLE_FAST_INTERP == 0 + depth = (uint32)(loader_ctx->frame_csp - 1 - frame_csp_tmp); + if (br_table_cache) { + br_table_cache->br_depths[i] = depth; + } + else { + if (depth > 255) { + /* The depth cannot be stored in one byte, + create br_table cache to store each depth */ +#if WASM_ENABLE_DEBUG_INTERP != 0 + if (!record_fast_op(module, p_org, *p_org, + error_buf, error_buf_size)) { + goto fail; + } +#endif + if (!(br_table_cache = loader_malloc( + offsetof(BrTableCache, br_depths) + + sizeof(uint32) + * (uint64)(count + 1), + error_buf, error_buf_size))) { + goto fail; + } + *p_org = EXT_OP_BR_TABLE_CACHE; + br_table_cache->br_table_op_addr = p_org; + br_table_cache->br_count = count; + /* Copy previous depths which are one byte */ + for (j = 0; j < i; j++) { + br_table_cache->br_depths[j] = p_depth_begin[j]; + } + br_table_cache->br_depths[i] = depth; + bh_list_insert(module->br_table_cache_list, + br_table_cache); + } + else { + /* The depth can be stored in one byte, use the + byte of the leb to store it */ + *p_depth++ = (uint8)depth; + } + } +#endif } +#if WASM_ENABLE_FAST_INTERP == 0 + /* Set the tailing bytes to nop */ + if (br_table_cache) + p_depth = p_depth_begin; + while (p_depth < p) + *p_depth++ = WASM_OP_NOP; +#endif + RESET_STACK(); SET_CUR_BLOCK_STACK_POLYMORPHIC_STATE(true); break; @@ -7563,7 +7663,10 @@ re_scan: if (global_type == VALUE_TYPE_I64 || global_type == VALUE_TYPE_F64) { #if WASM_ENABLE_DEBUG_INTERP != 0 - record_fast_op(module, p_org, *p_org); + if (!record_fast_op(module, p_org, *p_org, error_buf, + error_buf_size)) { + goto fail; + } #endif *p_org = WASM_OP_GET_GLOBAL_64; } @@ -7617,14 +7720,20 @@ re_scan: if (global_type == VALUE_TYPE_I64 || global_type == VALUE_TYPE_F64) { #if WASM_ENABLE_DEBUG_INTERP != 0 - record_fast_op(module, p_org, *p_org); + if (!record_fast_op(module, p_org, *p_org, error_buf, + error_buf_size)) { + goto fail; + } #endif *p_org = WASM_OP_SET_GLOBAL_64; } else if (module->aux_stack_size > 0 && global_idx == module->aux_stack_top_global_index) { #if WASM_ENABLE_DEBUG_INTERP != 0 - record_fast_op(module, p_org, *p_org); + if (!record_fast_op(module, p_org, *p_org, error_buf, + error_buf_size)) { + goto fail; + } #endif *p_org = WASM_OP_SET_GLOBAL_AUX_STACK; } diff --git a/core/iwasm/interpreter/wasm_mini_loader.c b/core/iwasm/interpreter/wasm_mini_loader.c index a86a8bc16..97cf6b50d 100644 --- a/core/iwasm/interpreter/wasm_mini_loader.c +++ b/core/iwasm/interpreter/wasm_mini_loader.c @@ -2151,6 +2151,9 @@ create_module(char *error_buf, uint32 error_buf_size) { WASMModule *module = loader_malloc(sizeof(WASMModule), error_buf, error_buf_size); +#if WASM_ENABLE_FAST_INTERP == 0 + bh_list_status ret; +#endif if (!module) { return NULL; @@ -2161,6 +2164,13 @@ create_module(char *error_buf, uint32 error_buf_size) /* Set start_function to -1, means no start function */ module->start_function = (uint32)-1; +#if WASM_ENABLE_FAST_INTERP == 0 + module->br_table_cache_list = &module->br_table_cache_list_head; + ret = bh_list_init(module->br_table_cache_list); + bh_assert(ret == BH_LIST_SUCCESS); + (void)ret; +#endif + return module; } @@ -2432,6 +2442,18 @@ wasm_loader_unload(WASMModule *module) } } +#if WASM_ENABLE_FAST_INTERP == 0 + if (module->br_table_cache_list) { + BrTableCache *node = bh_list_first_elem(module->br_table_cache_list); + BrTableCache *node_next; + while (node) { + node_next = bh_list_elem_next(node); + wasm_runtime_free(node); + node = node_next; + } + } +#endif + wasm_runtime_free(module); } @@ -2557,10 +2579,24 @@ wasm_loader_find_block_addr(WASMExecEnv *exec_env, BlockAddr *block_addr_cache, case WASM_OP_BR_TABLE: read_leb_uint32(p, p_end, count); /* lable num */ - for (i = 0; i <= count; i++) /* lableidxs */ +#if WASM_ENABLE_FAST_INTERP != 0 + for (i = 0; i <= count; i++) /* lableidxs */ skip_leb_uint32(p, p_end); +#else + p += count + 1; + while (*p == WASM_OP_NOP) + p++; +#endif break; +#if WASM_ENABLE_FAST_INTERP == 0 + case EXT_OP_BR_TABLE_CACHE: + read_leb_uint32(p, p_end, count); /* lable num */ + while (*p == WASM_OP_NOP) + p++; + break; +#endif + case WASM_OP_RETURN: break; @@ -5149,6 +5185,13 @@ re_scan: { uint8 *ret_types = NULL; uint32 ret_count = 0; +#if WASM_ENABLE_FAST_INTERP == 0 + uint8 *p_depth_begin, *p_depth; + uint32 depth, j; + BrTableCache *br_table_cache = NULL; + + p_org = p - 1; +#endif read_leb_uint32(p, p_end, count); #if WASM_ENABLE_FAST_INTERP != 0 @@ -5156,13 +5199,65 @@ re_scan: #endif POP_I32(); +#if WASM_ENABLE_FAST_INTERP == 0 + p_depth_begin = p_depth = p; +#endif for (i = 0; i <= count; i++) { if (!(frame_csp_tmp = check_branch_block(loader_ctx, &p, p_end, error_buf, error_buf_size))) goto fail; + +#if WASM_ENABLE_FAST_INTERP == 0 + depth = (uint32)(loader_ctx->frame_csp - 1 - frame_csp_tmp); + if (br_table_cache) { + br_table_cache->br_depths[i] = depth; + } + else { + if (depth > 255) { + /* The depth cannot be stored in one byte, + create br_table cache to store each depth */ +#if WASM_ENABLE_DEBUG_INTERP != 0 + if (!record_fast_op(module, p_org, *p_org, + error_buf, error_buf_size)) { + goto fail; + } +#endif + if (!(br_table_cache = loader_malloc( + offsetof(BrTableCache, br_depths) + + sizeof(uint32) + * (uint64)(count + 1), + error_buf, error_buf_size))) { + goto fail; + } + *p_org = EXT_OP_BR_TABLE_CACHE; + br_table_cache->br_table_op_addr = p_org; + br_table_cache->br_count = count; + /* Copy previous depths which are one byte */ + for (j = 0; j < i; j++) { + br_table_cache->br_depths[j] = p_depth_begin[j]; + } + br_table_cache->br_depths[i] = depth; + bh_list_insert(module->br_table_cache_list, + br_table_cache); + } + else { + /* The depth can be stored in one byte, use the + byte of the leb to store it */ + *p_depth++ = (uint8)depth; + } + } +#endif } +#if WASM_ENABLE_FAST_INTERP == 0 + /* Set the tailing bytes to nop */ + if (br_table_cache) + p_depth = p_depth_begin; + while (p_depth < p) + *p_depth++ = WASM_OP_NOP; +#endif + RESET_STACK(); SET_CUR_BLOCK_STACK_POLYMORPHIC_STATE(true); diff --git a/core/iwasm/interpreter/wasm_opcode.h b/core/iwasm/interpreter/wasm_opcode.h index f31d32626..cd7478a6b 100644 --- a/core/iwasm/interpreter/wasm_opcode.h +++ b/core/iwasm/interpreter/wasm_opcode.h @@ -263,12 +263,13 @@ typedef enum WASMOpcode { WASM_OP_REF_IS_NULL = 0xd1, /* ref.is_null */ WASM_OP_REF_FUNC = 0xd2, /* ref.func */ - EXT_OP_BLOCK = 0xd3, /* block with blocktype */ - EXT_OP_LOOP = 0xd4, /* loop with blocktype */ - EXT_OP_IF = 0xd5, /* if with blocktype */ + EXT_OP_BLOCK = 0xd3, /* block with blocktype */ + EXT_OP_LOOP = 0xd4, /* loop with blocktype */ + EXT_OP_IF = 0xd5, /* if with blocktype */ + EXT_OP_BR_TABLE_CACHE = 0xd6, /* br_table from cache */ #if WASM_ENABLE_DEBUG_INTERP != 0 - DEBUG_OP_BREAK = 0xd6, /* debug break point */ + DEBUG_OP_BREAK = 0xd7, /* debug break point */ #endif /* Post-MVP extend op prefix */ @@ -675,7 +676,7 @@ typedef enum WASMAtomicEXTOpcode { #if WASM_ENABLE_DEBUG_INTERP != 0 #define DEF_DEBUG_BREAK_HANDLE(_name) \ - _name[DEBUG_OP_BREAK] = HANDLE_OPCODE(DEBUG_OP_BREAK); /* 0xd6 */ + _name[DEBUG_OP_BREAK] = HANDLE_OPCODE(DEBUG_OP_BREAK); /* 0xd7 */ #else #define DEF_DEBUG_BREAK_HANDLE(_name) #endif @@ -901,6 +902,7 @@ typedef enum WASMAtomicEXTOpcode { HANDLE_OPCODE(EXT_OP_BLOCK), /* 0xd3 */ \ HANDLE_OPCODE(EXT_OP_LOOP), /* 0xd4 */ \ HANDLE_OPCODE(EXT_OP_IF), /* 0xd5 */ \ + HANDLE_OPCODE(EXT_OP_BR_TABLE_CACHE), /* 0xd6 */ \ }; \ do { \ _name[WASM_OP_MISC_PREFIX] = \