From 9c2bdd1b831479479eb287b0d065ee46049691e7 Mon Sep 17 00:00:00 2001 From: Wenyong Huang Date: Wed, 6 Jul 2022 19:20:55 +0800 Subject: [PATCH 1/2] Refine fe translate op br_if and br_table (#1263) Refine fast jit frontend translation of opcode br_if and br_table: for br_if, no need to clear jit frame after handling new basic block, so as to re-use registers of current basic block, for br_table, no need to create a new basic block to jump if there is no parameters/results to copy to new block, just jumping to current existing basic block. --- core/iwasm/fast-jit/fe/jit_emit_control.c | 189 ++++++++++++++++++---- 1 file changed, 154 insertions(+), 35 deletions(-) diff --git a/core/iwasm/fast-jit/fe/jit_emit_control.c b/core/iwasm/fast-jit/fe/jit_emit_control.c index a0a3f96f4..a8a62486b 100644 --- a/core/iwasm/fast-jit/fe/jit_emit_control.c +++ b/core/iwasm/fast-jit/fe/jit_emit_control.c @@ -449,9 +449,24 @@ handle_op_end(JitCompContext *cc, uint8 **p_frame_ip, bool is_block_polymorphic) incoming_insn = block->incoming_insns_for_end_bb; while (incoming_insn) { insn = incoming_insn->insn; - bh_assert(insn->opcode == JIT_OP_JMP || insn->opcode == JIT_OP_BNE); - *(jit_insn_opnd(insn, incoming_insn->opnd_idx)) = - jit_basic_block_label(block->basic_block_end); + bh_assert(insn->opcode == JIT_OP_JMP || insn->opcode == JIT_OP_BNE + || insn->opcode == JIT_OP_LOOKUPSWITCH); + if (insn->opcode == JIT_OP_JMP || insn->opcode == JIT_OP_BNE) { + *(jit_insn_opnd(insn, incoming_insn->opnd_idx)) = + jit_basic_block_label(block->basic_block_end); + } + else { + /* Patch LOOKUPSWITCH INSN */ + JitOpndLookupSwitch *opnd = jit_insn_opndls(insn); + if (incoming_insn->opnd_idx < opnd->match_pairs_num) { + opnd->match_pairs[incoming_insn->opnd_idx].target = + jit_basic_block_label(block->basic_block_end); + } + else { + opnd->default_target = + jit_basic_block_label(block->basic_block_end); + } + } incoming_insn = incoming_insn->next; } @@ -735,13 +750,41 @@ jit_compile_op_end(JitCompContext *cc, uint8 **p_frame_ip) return handle_op_end(cc, p_frame_ip, false); } +/* Check whether need to copy arities when jumping from current block + to the dest block */ +static bool +check_copy_arities(const JitBlock *block_dst, JitFrame *jit_frame) +{ + JitValueSlot *frame_sp_src = NULL; + + if (block_dst->label_type == LABEL_TYPE_LOOP) { + frame_sp_src = + jit_frame->sp + - wasm_get_cell_num(block_dst->param_types, block_dst->param_count); + /* There are parameters to copy and the src/dst addr are different */ + return (block_dst->param_count > 0 + && block_dst->frame_sp_begin != frame_sp_src) + ? true + : false; + } + else { + frame_sp_src = jit_frame->sp + - wasm_get_cell_num(block_dst->result_types, + block_dst->result_count); + /* There are results to copy and the src/dst addr are different */ + return (block_dst->result_count > 0 + && block_dst->frame_sp_begin != frame_sp_src) + ? true + : false; + } +} + static bool handle_op_br(JitCompContext *cc, uint32 br_depth, uint8 **p_frame_ip) { JitFrame *jit_frame; JitBlock *block_dst, *block; JitReg frame_sp_dst; - JitValueSlot *frame_sp_src = NULL; JitInsn *insn; bool copy_arities; uint32 offset; @@ -758,19 +801,9 @@ handle_op_br(JitCompContext *cc, uint32 br_depth, uint8 **p_frame_ip) jit_frame = cc->jit_frame; - if (block_dst->label_type == LABEL_TYPE_LOOP) { - frame_sp_src = - jit_frame->sp - - wasm_get_cell_num(block_dst->param_types, block_dst->param_count); - } - else { - frame_sp_src = jit_frame->sp - - wasm_get_cell_num(block_dst->result_types, - block_dst->result_count); - } - - /* Only copy parameters or results when the src/dst addr are different */ - copy_arities = (block_dst->frame_sp_begin != frame_sp_src) ? true : false; + /* Only opy parameters or results when their count > 0 and + the src/dst addr are different */ + copy_arities = check_copy_arities(block_dst, jit_frame); if (copy_arities) { frame_sp_dst = jit_cc_new_reg_ptr(cc); @@ -832,15 +865,51 @@ jit_compile_op_br(JitCompContext *cc, uint32 br_depth, uint8 **p_frame_ip) && handle_next_reachable_block(cc, p_frame_ip); } +static JitFrame * +jit_frame_clone(const JitFrame *jit_frame) +{ + JitFrame *jit_frame_cloned; + uint32 max_locals = jit_frame->max_locals; + uint32 max_stacks = jit_frame->max_stacks; + uint32 total_size; + + total_size = (uint32)(offsetof(JitFrame, lp) + + sizeof(*jit_frame->lp) * (max_locals + max_stacks)); + + jit_frame_cloned = jit_calloc(total_size); + if (jit_frame_cloned) { + bh_memcpy_s(jit_frame_cloned, total_size, jit_frame, total_size); + jit_frame_cloned->sp = + jit_frame_cloned->lp + (jit_frame->sp - jit_frame->lp); + } + + return jit_frame_cloned; +} + +static void +jit_frame_copy(JitFrame *jit_frame_dst, const JitFrame *jit_frame_src) +{ + uint32 max_locals = jit_frame_src->max_locals; + uint32 max_stacks = jit_frame_src->max_stacks; + uint32 total_size; + + total_size = + (uint32)(offsetof(JitFrame, lp) + + sizeof(*jit_frame_src->lp) * (max_locals + max_stacks)); + bh_memcpy_s(jit_frame_dst, total_size, jit_frame_src, total_size); + jit_frame_dst->sp = + jit_frame_dst->lp + (jit_frame_src->sp - jit_frame_src->lp); +} + bool jit_compile_op_br_if(JitCompContext *cc, uint32 br_depth, uint8 **p_frame_ip) { - JitFrame *jit_frame; + JitFrame *jit_frame, *jit_frame_cloned; JitBlock *block_dst; JitReg cond; JitBasicBlock *cur_basic_block, *if_basic_block = NULL; - JitValueSlot *frame_sp_src; JitInsn *insn; + bool copy_arities; if (!(block_dst = get_target_block(cc, br_depth))) { return false; @@ -853,18 +922,11 @@ jit_compile_op_br_if(JitCompContext *cc, uint32 br_depth, uint8 **p_frame_ip) cur_basic_block = cc->cur_basic_block; gen_commit_values(jit_frame, jit_frame->lp, jit_frame->sp); - if (block_dst->label_type == LABEL_TYPE_LOOP) { - frame_sp_src = - jit_frame->sp - - wasm_get_cell_num(block_dst->param_types, block_dst->param_count); - } - else { - frame_sp_src = jit_frame->sp - - wasm_get_cell_num(block_dst->result_types, - block_dst->result_count); - } + /* Only opy parameters or results when their count > 0 and + the src/dst addr are different */ + copy_arities = check_copy_arities(block_dst, jit_frame); - if (block_dst->frame_sp_begin == frame_sp_src) { + if (!copy_arities) { if (block_dst->label_type == LABEL_TYPE_LOOP) { if (!GEN_INSN(CMP, cc->cmp_reg, cond, NEW_CONST(I32, 0)) || !GEN_INSN( @@ -899,11 +961,26 @@ jit_compile_op_br_if(JitCompContext *cc, uint32 br_depth, uint8 **p_frame_ip) SET_BUILDER_POS(if_basic_block); SET_BB_BEGIN_BCIP(if_basic_block, *p_frame_ip - 1); - clear_values(cc->jit_frame); - if (!handle_op_br(cc, br_depth, p_frame_ip)) + /* Clone current jit frame to a new jit fame */ + if (!(jit_frame_cloned = jit_frame_clone(jit_frame))) { + jit_set_last_error(cc, "allocate memory failed"); goto fail; + } - /* continue processing opcodes after BR_IF */ + /* Clear current jit frame so that the registers + in the new basic block will be loaded again */ + clear_values(jit_frame); + if (!handle_op_br(cc, br_depth, p_frame_ip)) { + jit_free(jit_frame_cloned); + goto fail; + } + + /* Restore the jit frame so that the registers can + be used again in current basic block */ + jit_frame_copy(jit_frame, jit_frame_cloned); + jit_free(jit_frame_cloned); + + /* Continue processing opcodes after BR_IF */ SET_BUILDER_POS(cur_basic_block); return true; fail: @@ -939,9 +1016,51 @@ jit_compile_op_br_table(JitCompContext *cc, uint32 *br_depths, uint32 br_count, for (i = 0, opnd = jit_insn_opndls(insn); i < br_count + 1; i++) { JitBasicBlock *basic_block = NULL; + JitBlock *block_dst; + bool copy_arities; - /* TODO: refine the code */ + if (!(block_dst = get_target_block(cc, br_depths[i]))) { + goto fail; + } + /* Only opy parameters or results when their count > 0 and + the src/dst addr are different */ + copy_arities = check_copy_arities(block_dst, cc->jit_frame); + + if (!copy_arities) { + /* No need to create new basic block, direclty jump to + the existing basic block when no need to copy arities */ + if (i == br_count) { + if (block_dst->label_type == LABEL_TYPE_LOOP) { + opnd->default_target = + jit_basic_block_label(block_dst->basic_block_entry); + } + else { + bh_assert(!block_dst->basic_block_end); + if (!jit_block_add_incoming_insn(block_dst, insn, i)) { + jit_set_last_error(cc, "add incoming insn failed"); + goto fail; + } + } + } + else { + opnd->match_pairs[i].value = i; + if (block_dst->label_type == LABEL_TYPE_LOOP) { + opnd->match_pairs[i].target = + jit_basic_block_label(block_dst->basic_block_entry); + } + else { + bh_assert(!block_dst->basic_block_end); + if (!jit_block_add_incoming_insn(block_dst, insn, i)) { + jit_set_last_error(cc, "add incoming insn failed"); + goto fail; + } + } + } + continue; + } + + /* Create new basic block when need to copy arities */ CREATE_BASIC_BLOCK(basic_block); SET_BB_BEGIN_BCIP(basic_block, *p_frame_ip - 1); @@ -959,7 +1078,7 @@ jit_compile_op_br_table(JitCompContext *cc, uint32 *br_depths, uint32 br_count, goto fail; } - /* go to next aviable block */ + /* Search next available block to handle */ return handle_next_reachable_block(cc, p_frame_ip); fail: return false; From de52901583753a25ef805dbc13f5fa47709c3adb Mon Sep 17 00:00:00 2001 From: Wenyong Huang Date: Thu, 7 Jul 2022 11:16:58 +0800 Subject: [PATCH 2/2] Refine fast jit frontend: merge cmp and if/br_if (#1267) Merge below INSNs translated by wasm opcodes cmp + if or cmp + br_if: CMP, SELECTcc, CMP, Bcc into CMP, Bcc So as to reduce the instructions --- .../fast-jit/cg/x86-64/jit_codegen_x86_64.cpp | 62 ++++---- core/iwasm/fast-jit/fe/jit_emit_control.c | 136 ++++++++++++++---- core/iwasm/fast-jit/fe/jit_emit_control.h | 5 +- core/iwasm/fast-jit/jit_frontend.c | 44 +++++- core/iwasm/fast-jit/jit_regalloc.c | 14 +- 5 files changed, 197 insertions(+), 64 deletions(-) diff --git a/core/iwasm/fast-jit/cg/x86-64/jit_codegen_x86_64.cpp b/core/iwasm/fast-jit/cg/x86-64/jit_codegen_x86_64.cpp index b1b500600..3de6aef90 100644 --- a/core/iwasm/fast-jit/cg/x86-64/jit_codegen_x86_64.cpp +++ b/core/iwasm/fast-jit/cg/x86-64/jit_codegen_x86_64.cpp @@ -381,7 +381,7 @@ cmp_r_and_jmp_label(JitCompContext *cc, x86::Assembler &a, bool fp_cmp = cc->last_cmp_on_fp; - bh_assert(!fp_cmp || (fp_cmp && (op == GES))); + bh_assert(!fp_cmp || (fp_cmp && (op == GTS || op == GES))); switch (op) { case EQ: @@ -396,7 +396,10 @@ cmp_r_and_jmp_label(JitCompContext *cc, x86::Assembler &a, } case GTS: { - a.jg(imm); + if (fp_cmp) + a.ja(imm); + else + a.jg(imm); break; } case LES: @@ -5149,8 +5152,7 @@ cmp_r_and_jmp_relative(JitCompContext *cc, x86::Assembler &a, COND_OP op, int32 offset) { Imm target(INT32_MAX); - char *stream = (char *)a.code()->sectionById(0)->buffer().data() - + a.code()->sectionById(0)->buffer().size(); + char *stream; bool fp_cmp = cc->last_cmp_on_fp; bh_assert(!fp_cmp || (fp_cmp && (op == GTS || op == GES))); @@ -5223,8 +5225,14 @@ cmp_r_and_jmp_relative(JitCompContext *cc, x86::Assembler &a, COND_OP op, } } - /* The offset written by asmjit is always 0, we patch it again */ - *(int32 *)(stream + 2) = offset; + JitErrorHandler *err_handler = (JitErrorHandler *)a.code()->errorHandler(); + + if (!err_handler->err) { + /* The offset written by asmjit is always 0, we patch it again */ + stream = (char *)a.code()->sectionById(0)->buffer().data() + + a.code()->sectionById(0)->buffer().size() - 6; + *(int32 *)(stream + 2) = offset; + } return true; } @@ -5302,24 +5310,30 @@ fail: } /* jmp to dst label */ -#define JMP_TO_LABEL(label_dst, label_src) \ - do { \ - if (label_is_ahead(cc, label_dst, label_src)) { \ - char *stream = (char *)a.code()->sectionById(0)->buffer().data() \ - + a.code()->sectionById(0)->buffer().size(); \ - int32 _offset = label_offsets[label_dst] \ - - a.code()->sectionById(0)->buffer().size(); \ - Imm imm(INT32_MAX); \ - a.jmp(imm); \ - /* The offset written by asmjit is always 0, we patch it again, \ - 6 is the size of jmp instruciton */ \ - *(int32 *)(stream + 2) = _offset - 6; \ - } \ - else { \ - if (!jmp_from_label_to_label(a, jmp_info_list, label_dst, \ - label_src)) \ - GOTO_FAIL; \ - } \ +#define JMP_TO_LABEL(label_dst, label_src) \ + do { \ + if (label_is_ahead(cc, label_dst, label_src)) { \ + JitErrorHandler *err_handler = \ + (JitErrorHandler *)a.code()->errorHandler(); \ + int32 _offset; \ + char *stream; \ + Imm imm(INT32_MAX); \ + a.jmp(imm); \ + if (!err_handler->err) { \ + /* The offset written by asmjit is always 0, we patch it \ + again, 6 is the size of jmp instruciton */ \ + stream = (char *)a.code()->sectionById(0)->buffer().data() \ + + a.code()->sectionById(0)->buffer().size() - 6; \ + _offset = label_offsets[label_dst] \ + - a.code()->sectionById(0)->buffer().size(); \ + *(int32 *)(stream + 2) = _offset; \ + } \ + } \ + else { \ + if (!jmp_from_label_to_label(a, jmp_info_list, label_dst, \ + label_src)) \ + GOTO_FAIL; \ + } \ } while (0) /** diff --git a/core/iwasm/fast-jit/fe/jit_emit_control.c b/core/iwasm/fast-jit/fe/jit_emit_control.c index a8a62486b..2ac8fc2e9 100644 --- a/core/iwasm/fast-jit/fe/jit_emit_control.c +++ b/core/iwasm/fast-jit/fe/jit_emit_control.c @@ -164,9 +164,40 @@ fail: return false; } +static bool +jit_reg_is_i32_const(JitCompContext *cc, JitReg reg, int32 val) +{ + return (jit_reg_kind(reg) == JIT_REG_KIND_I32 && jit_reg_is_const(reg) + && jit_cc_get_const_I32(cc, reg) == val) + ? true + : false; +} + +/** + * get the last two insns: + * CMP cmp_reg, r0, r1 + * SELECTcc r2, cmp_reg, 1, 0 + */ +static void +get_last_cmp_and_selectcc(JitCompContext *cc, JitReg cond, JitInsn **p_insn_cmp, + JitInsn **p_insn_select) +{ + JitInsn *insn = jit_basic_block_last_insn(cc->cur_basic_block); + + if (insn && insn->prev && insn->prev->opcode == JIT_OP_CMP + && insn->opcode >= JIT_OP_SELECTEQ && insn->opcode <= JIT_OP_SELECTLEU + && *jit_insn_opnd(insn, 0) == cond + && jit_reg_is_i32_const(cc, *jit_insn_opnd(insn, 2), 1) + && jit_reg_is_i32_const(cc, *jit_insn_opnd(insn, 3), 0)) { + *p_insn_cmp = insn->prev; + *p_insn_select = insn; + } +} + static bool push_jit_block_to_stack_and_pass_params(JitCompContext *cc, JitBlock *block, - JitBasicBlock *basic_block, JitReg cond) + JitBasicBlock *basic_block, JitReg cond, + bool merge_cmp_and_if) { JitFrame *jit_frame = cc->jit_frame; JitValue *value_list_head = NULL, *value_list_end = NULL, *jit_value; @@ -205,6 +236,12 @@ push_jit_block_to_stack_and_pass_params(JitCompContext *cc, JitBlock *block, /* Continue to translate current block */ } else { + JitInsn *insn_select = NULL, *insn_cmp = NULL; + + if (merge_cmp_and_if) { + get_last_cmp_and_selectcc(cc, cond, &insn_cmp, &insn_select); + } + /* Commit register values to locals and stacks */ gen_commit_values(jit_frame, jit_frame->lp, jit_frame->sp); @@ -227,11 +264,26 @@ push_jit_block_to_stack_and_pass_params(JitCompContext *cc, JitBlock *block, } else { /* IF block with condition br insn */ - if (!GEN_INSN(CMP, cc->cmp_reg, cond, NEW_CONST(I32, 0)) - || !(insn = GEN_INSN(BNE, cc->cmp_reg, - jit_basic_block_label(basic_block), 0))) { - jit_set_last_error(cc, "generate cond br failed"); - goto fail; + if (insn_select && insn_cmp) { + /* Change `CMP + SELECTcc` into `CMP + Bcc` */ + if (!(insn = GEN_INSN(BEQ, cc->cmp_reg, + jit_basic_block_label(basic_block), 0))) { + jit_set_last_error(cc, "generate cond br failed"); + goto fail; + } + insn->opcode = + JIT_OP_BEQ + (insn_select->opcode - JIT_OP_SELECTEQ); + jit_insn_unlink(insn_select); + jit_insn_delete(insn_select); + } + else { + if (!GEN_INSN(CMP, cc->cmp_reg, cond, NEW_CONST(I32, 0)) + || !(insn = + GEN_INSN(BNE, cc->cmp_reg, + jit_basic_block_label(basic_block), 0))) { + jit_set_last_error(cc, "generate cond br failed"); + goto fail; + } } /* Don't create else basic block or end basic block now, just @@ -449,9 +501,15 @@ handle_op_end(JitCompContext *cc, uint8 **p_frame_ip, bool is_block_polymorphic) incoming_insn = block->incoming_insns_for_end_bb; while (incoming_insn) { insn = incoming_insn->insn; - bh_assert(insn->opcode == JIT_OP_JMP || insn->opcode == JIT_OP_BNE - || insn->opcode == JIT_OP_LOOKUPSWITCH); - if (insn->opcode == JIT_OP_JMP || insn->opcode == JIT_OP_BNE) { + + bh_assert( + insn->opcode == JIT_OP_JMP + || (insn->opcode >= JIT_OP_BEQ && insn->opcode <= JIT_OP_BLEU) + || insn->opcode == JIT_OP_LOOKUPSWITCH); + + if (insn->opcode == JIT_OP_JMP + || (insn->opcode >= JIT_OP_BEQ + && insn->opcode <= JIT_OP_BLEU)) { *(jit_insn_opnd(insn, incoming_insn->opnd_idx)) = jit_basic_block_label(block->basic_block_end); } @@ -467,6 +525,7 @@ handle_op_end(JitCompContext *cc, uint8 **p_frame_ip, bool is_block_polymorphic) jit_basic_block_label(block->basic_block_end); } } + incoming_insn = incoming_insn->next; } @@ -616,7 +675,7 @@ bool jit_compile_op_block(JitCompContext *cc, uint8 **p_frame_ip, uint8 *frame_ip_end, uint32 label_type, uint32 param_count, uint8 *param_types, uint32 result_count, - uint8 *result_types) + uint8 *result_types, bool merge_cmp_and_if) { BlockAddr block_addr_cache[BLOCK_ADDR_CACHE_SIZE][BLOCK_ADDR_CONFLICT_SIZE]; JitBlock *block; @@ -671,8 +730,8 @@ jit_compile_op_block(JitCompContext *cc, uint8 **p_frame_ip, if (label_type == LABEL_TYPE_BLOCK) { /* Push the new jit block to block stack and continue to translate current basic block */ - if (!push_jit_block_to_stack_and_pass_params(cc, block, - cc->cur_basic_block, 0)) + if (!push_jit_block_to_stack_and_pass_params( + cc, block, cc->cur_basic_block, 0, false)) goto fail; } else if (label_type == LABEL_TYPE_LOOP) { @@ -682,7 +741,7 @@ jit_compile_op_block(JitCompContext *cc, uint8 **p_frame_ip, /* Push the new jit block to block stack and continue to translate the new basic block */ if (!push_jit_block_to_stack_and_pass_params( - cc, block, block->basic_block_entry, 0)) + cc, block, block->basic_block_entry, 0, false)) goto fail; } else if (label_type == LABEL_TYPE_IF) { @@ -697,7 +756,8 @@ jit_compile_op_block(JitCompContext *cc, uint8 **p_frame_ip, SET_BB_BEGIN_BCIP(block->basic_block_entry, *p_frame_ip); if (!push_jit_block_to_stack_and_pass_params( - cc, block, block->basic_block_entry, value)) + cc, block, block->basic_block_entry, value, + merge_cmp_and_if)) goto fail; } else { @@ -706,7 +766,7 @@ jit_compile_op_block(JitCompContext *cc, uint8 **p_frame_ip, BASIC_BLOCK if cannot be reached, we treat it same as LABEL_TYPE_BLOCK and start to translate if branch */ if (!push_jit_block_to_stack_and_pass_params( - cc, block, cc->cur_basic_block, 0)) + cc, block, cc->cur_basic_block, 0, false)) goto fail; } else { @@ -715,7 +775,7 @@ jit_compile_op_block(JitCompContext *cc, uint8 **p_frame_ip, BASIC_BLOCK if cannot be reached, we treat it same as LABEL_TYPE_BLOCK and start to translate else branch */ if (!push_jit_block_to_stack_and_pass_params( - cc, block, cc->cur_basic_block, 0)) + cc, block, cc->cur_basic_block, 0, false)) goto fail; *p_frame_ip = else_addr + 1; } @@ -902,13 +962,14 @@ jit_frame_copy(JitFrame *jit_frame_dst, const JitFrame *jit_frame_src) } bool -jit_compile_op_br_if(JitCompContext *cc, uint32 br_depth, uint8 **p_frame_ip) +jit_compile_op_br_if(JitCompContext *cc, uint32 br_depth, + bool merge_cmp_and_br_if, uint8 **p_frame_ip) { JitFrame *jit_frame, *jit_frame_cloned; JitBlock *block_dst; JitReg cond; JitBasicBlock *cur_basic_block, *if_basic_block = NULL; - JitInsn *insn; + JitInsn *insn, *insn_select = NULL, *insn_cmp = NULL; bool copy_arities; if (!(block_dst = get_target_block(cc, br_depth))) { @@ -918,27 +979,37 @@ jit_compile_op_br_if(JitCompContext *cc, uint32 br_depth, uint8 **p_frame_ip) /* append IF to current basic block */ POP_I32(cond); + if (merge_cmp_and_br_if) { + get_last_cmp_and_selectcc(cc, cond, &insn_cmp, &insn_select); + } + jit_frame = cc->jit_frame; cur_basic_block = cc->cur_basic_block; gen_commit_values(jit_frame, jit_frame->lp, jit_frame->sp); + if (!(insn_select && insn_cmp)) { + if (!GEN_INSN(CMP, cc->cmp_reg, cond, NEW_CONST(I32, 0))) { + jit_set_last_error(cc, "generate cmp insn failed"); + goto fail; + } + } + /* Only opy parameters or results when their count > 0 and the src/dst addr are different */ copy_arities = check_copy_arities(block_dst, jit_frame); if (!copy_arities) { if (block_dst->label_type == LABEL_TYPE_LOOP) { - if (!GEN_INSN(CMP, cc->cmp_reg, cond, NEW_CONST(I32, 0)) - || !GEN_INSN( - BNE, cc->cmp_reg, - jit_basic_block_label(block_dst->basic_block_entry), 0)) { + if (!(insn = GEN_INSN( + BNE, cc->cmp_reg, + jit_basic_block_label(block_dst->basic_block_entry), + 0))) { jit_set_last_error(cc, "generate bne insn failed"); goto fail; } } else { - if (!GEN_INSN(CMP, cc->cmp_reg, cond, NEW_CONST(I32, 0)) - || !(insn = GEN_INSN(BNE, cc->cmp_reg, 0, 0))) { + if (!(insn = GEN_INSN(BNE, cc->cmp_reg, 0, 0))) { jit_set_last_error(cc, "generate bne insn failed"); goto fail; } @@ -947,16 +1018,27 @@ jit_compile_op_br_if(JitCompContext *cc, uint32 br_depth, uint8 **p_frame_ip) goto fail; } } + if (insn_select && insn_cmp) { + /* Change `CMP + SELECTcc` into `CMP + Bcc` */ + insn->opcode = JIT_OP_BEQ + (insn_select->opcode - JIT_OP_SELECTEQ); + jit_insn_unlink(insn_select); + jit_insn_delete(insn_select); + } return true; } CREATE_BASIC_BLOCK(if_basic_block); - if (!GEN_INSN(CMP, cc->cmp_reg, cond, NEW_CONST(I32, 0)) - || !GEN_INSN(BNE, cc->cmp_reg, jit_basic_block_label(if_basic_block), - 0)) { + if (!(insn = GEN_INSN(BNE, cc->cmp_reg, + jit_basic_block_label(if_basic_block), 0))) { jit_set_last_error(cc, "generate bne insn failed"); goto fail; } + if (insn_select && insn_cmp) { + /* Change `CMP + SELECTcc` into `CMP + Bcc` */ + insn->opcode = JIT_OP_BEQ + (insn_select->opcode - JIT_OP_SELECTEQ); + jit_insn_unlink(insn_select); + jit_insn_delete(insn_select); + } SET_BUILDER_POS(if_basic_block); SET_BB_BEGIN_BCIP(if_basic_block, *p_frame_ip - 1); diff --git a/core/iwasm/fast-jit/fe/jit_emit_control.h b/core/iwasm/fast-jit/fe/jit_emit_control.h index f72a2e924..e1bc09a0a 100644 --- a/core/iwasm/fast-jit/fe/jit_emit_control.h +++ b/core/iwasm/fast-jit/fe/jit_emit_control.h @@ -16,7 +16,7 @@ bool jit_compile_op_block(JitCompContext *cc, uint8 **p_frame_ip, uint8 *frame_ip_end, uint32 label_type, uint32 param_count, uint8 *param_types, uint32 result_count, - uint8 *result_types); + uint8 *result_types, bool merge_cmp_and_if); bool jit_compile_op_else(JitCompContext *cc, uint8 **p_frame_ip); @@ -28,7 +28,8 @@ bool jit_compile_op_br(JitCompContext *cc, uint32 br_depth, uint8 **p_frame_ip); bool -jit_compile_op_br_if(JitCompContext *cc, uint32 br_depth, uint8 **p_frame_ip); +jit_compile_op_br_if(JitCompContext *cc, uint32 br_depth, + bool merge_cmp_and_br_if, uint8 **p_frame_ip); bool jit_compile_op_br_table(JitCompContext *cc, uint32 *br_depths, uint32 br_count, diff --git a/core/iwasm/fast-jit/jit_frontend.c b/core/iwasm/fast-jit/jit_frontend.c index 8a887aad2..a84a48986 100644 --- a/core/iwasm/fast-jit/jit_frontend.c +++ b/core/iwasm/fast-jit/jit_frontend.c @@ -1010,6 +1010,7 @@ jit_compile_func(JitCompContext *cc) uint32 br_depth, *br_depths, br_count; uint32 func_idx, type_idx, mem_idx, local_idx, global_idx, i; uint32 bytes = 4, align, offset; + bool merge_cmp_and_if = false, merge_cmp_and_br_if = false; bool sign = true; int32 i32_const; int64 i64_const; @@ -1069,8 +1070,11 @@ jit_compile_func(JitCompContext *cc) if (!jit_compile_op_block( cc, &frame_ip, frame_ip_end, (uint32)(LABEL_TYPE_BLOCK + opcode - WASM_OP_BLOCK), - param_count, param_types, result_count, result_types)) + param_count, param_types, result_count, result_types, + merge_cmp_and_if)) return false; + /* Clear flag */ + merge_cmp_and_if = false; break; } case EXT_OP_BLOCK: @@ -1086,8 +1090,11 @@ jit_compile_func(JitCompContext *cc) if (!jit_compile_op_block( cc, &frame_ip, frame_ip_end, (uint32)(LABEL_TYPE_BLOCK + opcode - EXT_OP_BLOCK), - param_count, param_types, result_count, result_types)) + param_count, param_types, result_count, result_types, + merge_cmp_and_if)) return false; + /* Clear flag */ + merge_cmp_and_if = false; break; } @@ -1109,8 +1116,11 @@ jit_compile_func(JitCompContext *cc) case WASM_OP_BR_IF: read_leb_uint32(frame_ip, frame_ip_end, br_depth); - if (!jit_compile_op_br_if(cc, br_depth, &frame_ip)) + if (!jit_compile_op_br_if(cc, br_depth, merge_cmp_and_br_if, + &frame_ip)) return false; + /* Clear flag */ + merge_cmp_and_br_if = false; break; case WASM_OP_BR_TABLE: @@ -1506,6 +1516,13 @@ jit_compile_func(JitCompContext *cc) if (!jit_compile_op_i32_compare(cc, INT_EQZ + opcode - WASM_OP_I32_EQZ)) return false; + if (frame_ip < frame_ip_end) { + /* Merge `CMP, SELECTcc, CMP, BNE` insns into `CMP, Bcc` */ + if (*frame_ip == WASM_OP_IF || *frame_ip == EXT_OP_IF) + merge_cmp_and_if = true; + if (*frame_ip == WASM_OP_BR_IF) + merge_cmp_and_br_if = true; + } break; case WASM_OP_I64_EQZ: @@ -1522,6 +1539,13 @@ jit_compile_func(JitCompContext *cc) if (!jit_compile_op_i64_compare(cc, INT_EQZ + opcode - WASM_OP_I64_EQZ)) return false; + if (frame_ip < frame_ip_end) { + /* Merge `CMP, SELECTcc, CMP, BNE` insns into `CMP, Bcc` */ + if (*frame_ip == WASM_OP_IF || *frame_ip == EXT_OP_IF) + merge_cmp_and_if = true; + if (*frame_ip == WASM_OP_BR_IF) + merge_cmp_and_br_if = true; + } break; case WASM_OP_F32_EQ: @@ -1533,6 +1557,13 @@ jit_compile_func(JitCompContext *cc) if (!jit_compile_op_f32_compare(cc, FLOAT_EQ + opcode - WASM_OP_F32_EQ)) return false; + if (frame_ip < frame_ip_end) { + /* Merge `CMP, SELECTcc, CMP, BNE` insns into `CMP, Bcc` */ + if (*frame_ip == WASM_OP_IF || *frame_ip == EXT_OP_IF) + merge_cmp_and_if = true; + if (*frame_ip == WASM_OP_BR_IF) + merge_cmp_and_br_if = true; + } break; case WASM_OP_F64_EQ: @@ -1544,6 +1575,13 @@ jit_compile_func(JitCompContext *cc) if (!jit_compile_op_f64_compare(cc, FLOAT_EQ + opcode - WASM_OP_F64_EQ)) return false; + if (frame_ip < frame_ip_end) { + /* Merge `CMP, SELECTcc, CMP, BNE` insns into `CMP, Bcc` */ + if (*frame_ip == WASM_OP_IF || *frame_ip == EXT_OP_IF) + merge_cmp_and_if = true; + if (*frame_ip == WASM_OP_BR_IF) + merge_cmp_and_br_if = true; + } break; case WASM_OP_I32_CLZ: diff --git a/core/iwasm/fast-jit/jit_regalloc.c b/core/iwasm/fast-jit/jit_regalloc.c index 4b4b8fed3..e8c629658 100644 --- a/core/iwasm/fast-jit/jit_regalloc.c +++ b/core/iwasm/fast-jit/jit_regalloc.c @@ -339,7 +339,7 @@ fail: } /** - * Check whether the gien register is an allocation candidate, which + * Check whether the given register is an allocation candidate, which * must be a variable register that is not fixed hard register. * * @param cc the compilation context @@ -359,10 +359,8 @@ static void check_vreg_definition(RegallocContext *rc, JitInsn *insn) { JitRegVec regvec = jit_insn_opnd_regs(insn); - unsigned i; - JitReg *regp; - unsigned first_use = jit_insn_opnd_first_use(insn); - JitReg reg_defined; + JitReg *regp, reg_defined = 0; + unsigned i, first_use = jit_insn_opnd_first_use(insn); /* check if there is the definition of an vr before its references */ JIT_REG_VEC_FOREACH(regvec, i, regp) @@ -372,7 +370,7 @@ check_vreg_definition(RegallocContext *rc, JitInsn *insn) if (!is_alloc_candidate(rc->cc, *regp)) continue; - /*a strong assumption that there is only on defined reg*/ + /* a strong assumption that there is only one defined reg */ if (i < first_use) { reg_defined = *regp; continue; @@ -380,8 +378,8 @@ check_vreg_definition(RegallocContext *rc, JitInsn *insn) /** * both definition and references are in one instruction, - * like MOV i3,i3 - **/ + * like MOV i3, i3 + */ if (reg_defined == *regp) continue;