diff --git a/core/iwasm/fast-jit/cg/x86-64/jit_codegen_x86_64.cpp b/core/iwasm/fast-jit/cg/x86-64/jit_codegen_x86_64.cpp index b1b500600..3de6aef90 100644 --- a/core/iwasm/fast-jit/cg/x86-64/jit_codegen_x86_64.cpp +++ b/core/iwasm/fast-jit/cg/x86-64/jit_codegen_x86_64.cpp @@ -381,7 +381,7 @@ cmp_r_and_jmp_label(JitCompContext *cc, x86::Assembler &a, bool fp_cmp = cc->last_cmp_on_fp; - bh_assert(!fp_cmp || (fp_cmp && (op == GES))); + bh_assert(!fp_cmp || (fp_cmp && (op == GTS || op == GES))); switch (op) { case EQ: @@ -396,7 +396,10 @@ cmp_r_and_jmp_label(JitCompContext *cc, x86::Assembler &a, } case GTS: { - a.jg(imm); + if (fp_cmp) + a.ja(imm); + else + a.jg(imm); break; } case LES: @@ -5149,8 +5152,7 @@ cmp_r_and_jmp_relative(JitCompContext *cc, x86::Assembler &a, COND_OP op, int32 offset) { Imm target(INT32_MAX); - char *stream = (char *)a.code()->sectionById(0)->buffer().data() - + a.code()->sectionById(0)->buffer().size(); + char *stream; bool fp_cmp = cc->last_cmp_on_fp; bh_assert(!fp_cmp || (fp_cmp && (op == GTS || op == GES))); @@ -5223,8 +5225,14 @@ cmp_r_and_jmp_relative(JitCompContext *cc, x86::Assembler &a, COND_OP op, } } - /* The offset written by asmjit is always 0, we patch it again */ - *(int32 *)(stream + 2) = offset; + JitErrorHandler *err_handler = (JitErrorHandler *)a.code()->errorHandler(); + + if (!err_handler->err) { + /* The offset written by asmjit is always 0, we patch it again */ + stream = (char *)a.code()->sectionById(0)->buffer().data() + + a.code()->sectionById(0)->buffer().size() - 6; + *(int32 *)(stream + 2) = offset; + } return true; } @@ -5302,24 +5310,30 @@ fail: } /* jmp to dst label */ -#define JMP_TO_LABEL(label_dst, label_src) \ - do { \ - if (label_is_ahead(cc, label_dst, label_src)) { \ - char *stream = (char *)a.code()->sectionById(0)->buffer().data() \ - + a.code()->sectionById(0)->buffer().size(); \ - int32 _offset = label_offsets[label_dst] \ - - a.code()->sectionById(0)->buffer().size(); \ - Imm imm(INT32_MAX); \ - a.jmp(imm); \ - /* The offset written by asmjit is always 0, we patch it again, \ - 6 is the size of jmp instruciton */ \ - *(int32 *)(stream + 2) = _offset - 6; \ - } \ - else { \ - if (!jmp_from_label_to_label(a, jmp_info_list, label_dst, \ - label_src)) \ - GOTO_FAIL; \ - } \ +#define JMP_TO_LABEL(label_dst, label_src) \ + do { \ + if (label_is_ahead(cc, label_dst, label_src)) { \ + JitErrorHandler *err_handler = \ + (JitErrorHandler *)a.code()->errorHandler(); \ + int32 _offset; \ + char *stream; \ + Imm imm(INT32_MAX); \ + a.jmp(imm); \ + if (!err_handler->err) { \ + /* The offset written by asmjit is always 0, we patch it \ + again, 6 is the size of jmp instruciton */ \ + stream = (char *)a.code()->sectionById(0)->buffer().data() \ + + a.code()->sectionById(0)->buffer().size() - 6; \ + _offset = label_offsets[label_dst] \ + - a.code()->sectionById(0)->buffer().size(); \ + *(int32 *)(stream + 2) = _offset; \ + } \ + } \ + else { \ + if (!jmp_from_label_to_label(a, jmp_info_list, label_dst, \ + label_src)) \ + GOTO_FAIL; \ + } \ } while (0) /** diff --git a/core/iwasm/fast-jit/fe/jit_emit_control.c b/core/iwasm/fast-jit/fe/jit_emit_control.c index a0a3f96f4..e95ef4d5f 100644 --- a/core/iwasm/fast-jit/fe/jit_emit_control.c +++ b/core/iwasm/fast-jit/fe/jit_emit_control.c @@ -164,9 +164,40 @@ fail: return false; } +static bool +jit_reg_is_i32_const(JitCompContext *cc, JitReg reg, int32 val) +{ + return (jit_reg_kind(reg) == JIT_REG_KIND_I32 && jit_reg_is_const(reg) + && jit_cc_get_const_I32(cc, reg) == val) + ? true + : false; +} + +/** + * get the last two insns: + * CMP cmp_reg, r0, r1 + * SELECTcc r2, cmp_reg, 1, 0 + */ +static void +get_last_cmp_and_selectcc(JitCompContext *cc, JitReg cond, JitInsn **p_insn_cmp, + JitInsn **p_insn_select) +{ + JitInsn *insn = jit_basic_block_last_insn(cc->cur_basic_block); + + if (insn && insn->prev && insn->prev->opcode == JIT_OP_CMP + && insn->opcode >= JIT_OP_SELECTEQ && insn->opcode <= JIT_OP_SELECTLEU + && *jit_insn_opnd(insn, 0) == cond + && jit_reg_is_i32_const(cc, *jit_insn_opnd(insn, 2), 1) + && jit_reg_is_i32_const(cc, *jit_insn_opnd(insn, 3), 0)) { + *p_insn_cmp = insn->prev; + *p_insn_select = insn; + } +} + static bool push_jit_block_to_stack_and_pass_params(JitCompContext *cc, JitBlock *block, - JitBasicBlock *basic_block, JitReg cond) + JitBasicBlock *basic_block, JitReg cond, + bool merge_cmp_and_if) { JitFrame *jit_frame = cc->jit_frame; JitValue *value_list_head = NULL, *value_list_end = NULL, *jit_value; @@ -205,6 +236,12 @@ push_jit_block_to_stack_and_pass_params(JitCompContext *cc, JitBlock *block, /* Continue to translate current block */ } else { + JitInsn *insn_select = NULL, *insn_cmp = NULL; + + if (merge_cmp_and_if) { + get_last_cmp_and_selectcc(cc, cond, &insn_cmp, &insn_select); + } + /* Commit register values to locals and stacks */ gen_commit_values(jit_frame, jit_frame->lp, jit_frame->sp); @@ -227,11 +264,26 @@ push_jit_block_to_stack_and_pass_params(JitCompContext *cc, JitBlock *block, } else { /* IF block with condition br insn */ - if (!GEN_INSN(CMP, cc->cmp_reg, cond, NEW_CONST(I32, 0)) - || !(insn = GEN_INSN(BNE, cc->cmp_reg, - jit_basic_block_label(basic_block), 0))) { - jit_set_last_error(cc, "generate cond br failed"); - goto fail; + if (insn_select && insn_cmp) { + /* Change `CMP + SELECTcc` into `CMP + Bcc` */ + if (!(insn = GEN_INSN(BEQ, cc->cmp_reg, + jit_basic_block_label(basic_block), 0))) { + jit_set_last_error(cc, "generate cond br failed"); + goto fail; + } + insn->opcode = + JIT_OP_BEQ + (insn_select->opcode - JIT_OP_SELECTEQ); + jit_insn_unlink(insn_select); + jit_insn_delete(insn_select); + } + else { + if (!GEN_INSN(CMP, cc->cmp_reg, cond, NEW_CONST(I32, 0)) + || !(insn = + GEN_INSN(BNE, cc->cmp_reg, + jit_basic_block_label(basic_block), 0))) { + jit_set_last_error(cc, "generate cond br failed"); + goto fail; + } } /* Don't create else basic block or end basic block now, just @@ -449,7 +501,9 @@ handle_op_end(JitCompContext *cc, uint8 **p_frame_ip, bool is_block_polymorphic) incoming_insn = block->incoming_insns_for_end_bb; while (incoming_insn) { insn = incoming_insn->insn; - bh_assert(insn->opcode == JIT_OP_JMP || insn->opcode == JIT_OP_BNE); + bh_assert( + insn->opcode == JIT_OP_JMP + || (insn->opcode >= JIT_OP_BEQ && insn->opcode <= JIT_OP_BLEU)); *(jit_insn_opnd(insn, incoming_insn->opnd_idx)) = jit_basic_block_label(block->basic_block_end); incoming_insn = incoming_insn->next; @@ -601,7 +655,7 @@ bool jit_compile_op_block(JitCompContext *cc, uint8 **p_frame_ip, uint8 *frame_ip_end, uint32 label_type, uint32 param_count, uint8 *param_types, uint32 result_count, - uint8 *result_types) + uint8 *result_types, bool merge_cmp_and_if) { BlockAddr block_addr_cache[BLOCK_ADDR_CACHE_SIZE][BLOCK_ADDR_CONFLICT_SIZE]; JitBlock *block; @@ -656,8 +710,8 @@ jit_compile_op_block(JitCompContext *cc, uint8 **p_frame_ip, if (label_type == LABEL_TYPE_BLOCK) { /* Push the new jit block to block stack and continue to translate current basic block */ - if (!push_jit_block_to_stack_and_pass_params(cc, block, - cc->cur_basic_block, 0)) + if (!push_jit_block_to_stack_and_pass_params( + cc, block, cc->cur_basic_block, 0, false)) goto fail; } else if (label_type == LABEL_TYPE_LOOP) { @@ -667,7 +721,7 @@ jit_compile_op_block(JitCompContext *cc, uint8 **p_frame_ip, /* Push the new jit block to block stack and continue to translate the new basic block */ if (!push_jit_block_to_stack_and_pass_params( - cc, block, block->basic_block_entry, 0)) + cc, block, block->basic_block_entry, 0, false)) goto fail; } else if (label_type == LABEL_TYPE_IF) { @@ -682,7 +736,8 @@ jit_compile_op_block(JitCompContext *cc, uint8 **p_frame_ip, SET_BB_BEGIN_BCIP(block->basic_block_entry, *p_frame_ip); if (!push_jit_block_to_stack_and_pass_params( - cc, block, block->basic_block_entry, value)) + cc, block, block->basic_block_entry, value, + merge_cmp_and_if)) goto fail; } else { @@ -691,7 +746,7 @@ jit_compile_op_block(JitCompContext *cc, uint8 **p_frame_ip, BASIC_BLOCK if cannot be reached, we treat it same as LABEL_TYPE_BLOCK and start to translate if branch */ if (!push_jit_block_to_stack_and_pass_params( - cc, block, cc->cur_basic_block, 0)) + cc, block, cc->cur_basic_block, 0, false)) goto fail; } else { @@ -700,7 +755,7 @@ jit_compile_op_block(JitCompContext *cc, uint8 **p_frame_ip, BASIC_BLOCK if cannot be reached, we treat it same as LABEL_TYPE_BLOCK and start to translate else branch */ if (!push_jit_block_to_stack_and_pass_params( - cc, block, cc->cur_basic_block, 0)) + cc, block, cc->cur_basic_block, 0, false)) goto fail; *p_frame_ip = else_addr + 1; } @@ -833,14 +888,15 @@ jit_compile_op_br(JitCompContext *cc, uint32 br_depth, uint8 **p_frame_ip) } bool -jit_compile_op_br_if(JitCompContext *cc, uint32 br_depth, uint8 **p_frame_ip) +jit_compile_op_br_if(JitCompContext *cc, uint32 br_depth, + bool merge_cmp_and_br_if, uint8 **p_frame_ip) { JitFrame *jit_frame; JitBlock *block_dst; JitReg cond; JitBasicBlock *cur_basic_block, *if_basic_block = NULL; JitValueSlot *frame_sp_src; - JitInsn *insn; + JitInsn *insn, *insn_select = NULL, *insn_cmp = NULL; if (!(block_dst = get_target_block(cc, br_depth))) { return false; @@ -849,6 +905,10 @@ jit_compile_op_br_if(JitCompContext *cc, uint32 br_depth, uint8 **p_frame_ip) /* append IF to current basic block */ POP_I32(cond); + if (merge_cmp_and_br_if) { + get_last_cmp_and_selectcc(cc, cond, &insn_cmp, &insn_select); + } + jit_frame = cc->jit_frame; cur_basic_block = cc->cur_basic_block; gen_commit_values(jit_frame, jit_frame->lp, jit_frame->sp); @@ -864,19 +924,25 @@ jit_compile_op_br_if(JitCompContext *cc, uint32 br_depth, uint8 **p_frame_ip) block_dst->result_count); } + if (!(insn_select && insn_cmp)) { + if (!GEN_INSN(CMP, cc->cmp_reg, cond, NEW_CONST(I32, 0))) { + jit_set_last_error(cc, "generate cmp insn failed"); + goto fail; + } + } + if (block_dst->frame_sp_begin == frame_sp_src) { if (block_dst->label_type == LABEL_TYPE_LOOP) { - if (!GEN_INSN(CMP, cc->cmp_reg, cond, NEW_CONST(I32, 0)) - || !GEN_INSN( - BNE, cc->cmp_reg, - jit_basic_block_label(block_dst->basic_block_entry), 0)) { + if (!(insn = GEN_INSN( + BNE, cc->cmp_reg, + jit_basic_block_label(block_dst->basic_block_entry), + 0))) { jit_set_last_error(cc, "generate bne insn failed"); goto fail; } } else { - if (!GEN_INSN(CMP, cc->cmp_reg, cond, NEW_CONST(I32, 0)) - || !(insn = GEN_INSN(BNE, cc->cmp_reg, 0, 0))) { + if (!(insn = GEN_INSN(BNE, cc->cmp_reg, 0, 0))) { jit_set_last_error(cc, "generate bne insn failed"); goto fail; } @@ -885,16 +951,27 @@ jit_compile_op_br_if(JitCompContext *cc, uint32 br_depth, uint8 **p_frame_ip) goto fail; } } + if (insn_select && insn_cmp) { + /* Change `CMP + SELECTcc` into `CMP + Bcc` */ + insn->opcode = JIT_OP_BEQ + (insn_select->opcode - JIT_OP_SELECTEQ); + jit_insn_unlink(insn_select); + jit_insn_delete(insn_select); + } return true; } CREATE_BASIC_BLOCK(if_basic_block); - if (!GEN_INSN(CMP, cc->cmp_reg, cond, NEW_CONST(I32, 0)) - || !GEN_INSN(BNE, cc->cmp_reg, jit_basic_block_label(if_basic_block), - 0)) { + if (!(insn = GEN_INSN(BNE, cc->cmp_reg, + jit_basic_block_label(if_basic_block), 0))) { jit_set_last_error(cc, "generate bne insn failed"); goto fail; } + if (insn_select && insn_cmp) { + /* Change `CMP + SELECTcc` into `CMP + Bcc` */ + insn->opcode = JIT_OP_BEQ + (insn_select->opcode - JIT_OP_SELECTEQ); + jit_insn_unlink(insn_select); + jit_insn_delete(insn_select); + } SET_BUILDER_POS(if_basic_block); SET_BB_BEGIN_BCIP(if_basic_block, *p_frame_ip - 1); diff --git a/core/iwasm/fast-jit/fe/jit_emit_control.h b/core/iwasm/fast-jit/fe/jit_emit_control.h index f72a2e924..e1bc09a0a 100644 --- a/core/iwasm/fast-jit/fe/jit_emit_control.h +++ b/core/iwasm/fast-jit/fe/jit_emit_control.h @@ -16,7 +16,7 @@ bool jit_compile_op_block(JitCompContext *cc, uint8 **p_frame_ip, uint8 *frame_ip_end, uint32 label_type, uint32 param_count, uint8 *param_types, uint32 result_count, - uint8 *result_types); + uint8 *result_types, bool merge_cmp_and_if); bool jit_compile_op_else(JitCompContext *cc, uint8 **p_frame_ip); @@ -28,7 +28,8 @@ bool jit_compile_op_br(JitCompContext *cc, uint32 br_depth, uint8 **p_frame_ip); bool -jit_compile_op_br_if(JitCompContext *cc, uint32 br_depth, uint8 **p_frame_ip); +jit_compile_op_br_if(JitCompContext *cc, uint32 br_depth, + bool merge_cmp_and_br_if, uint8 **p_frame_ip); bool jit_compile_op_br_table(JitCompContext *cc, uint32 *br_depths, uint32 br_count, diff --git a/core/iwasm/fast-jit/jit_frontend.c b/core/iwasm/fast-jit/jit_frontend.c index 8a887aad2..a84a48986 100644 --- a/core/iwasm/fast-jit/jit_frontend.c +++ b/core/iwasm/fast-jit/jit_frontend.c @@ -1010,6 +1010,7 @@ jit_compile_func(JitCompContext *cc) uint32 br_depth, *br_depths, br_count; uint32 func_idx, type_idx, mem_idx, local_idx, global_idx, i; uint32 bytes = 4, align, offset; + bool merge_cmp_and_if = false, merge_cmp_and_br_if = false; bool sign = true; int32 i32_const; int64 i64_const; @@ -1069,8 +1070,11 @@ jit_compile_func(JitCompContext *cc) if (!jit_compile_op_block( cc, &frame_ip, frame_ip_end, (uint32)(LABEL_TYPE_BLOCK + opcode - WASM_OP_BLOCK), - param_count, param_types, result_count, result_types)) + param_count, param_types, result_count, result_types, + merge_cmp_and_if)) return false; + /* Clear flag */ + merge_cmp_and_if = false; break; } case EXT_OP_BLOCK: @@ -1086,8 +1090,11 @@ jit_compile_func(JitCompContext *cc) if (!jit_compile_op_block( cc, &frame_ip, frame_ip_end, (uint32)(LABEL_TYPE_BLOCK + opcode - EXT_OP_BLOCK), - param_count, param_types, result_count, result_types)) + param_count, param_types, result_count, result_types, + merge_cmp_and_if)) return false; + /* Clear flag */ + merge_cmp_and_if = false; break; } @@ -1109,8 +1116,11 @@ jit_compile_func(JitCompContext *cc) case WASM_OP_BR_IF: read_leb_uint32(frame_ip, frame_ip_end, br_depth); - if (!jit_compile_op_br_if(cc, br_depth, &frame_ip)) + if (!jit_compile_op_br_if(cc, br_depth, merge_cmp_and_br_if, + &frame_ip)) return false; + /* Clear flag */ + merge_cmp_and_br_if = false; break; case WASM_OP_BR_TABLE: @@ -1506,6 +1516,13 @@ jit_compile_func(JitCompContext *cc) if (!jit_compile_op_i32_compare(cc, INT_EQZ + opcode - WASM_OP_I32_EQZ)) return false; + if (frame_ip < frame_ip_end) { + /* Merge `CMP, SELECTcc, CMP, BNE` insns into `CMP, Bcc` */ + if (*frame_ip == WASM_OP_IF || *frame_ip == EXT_OP_IF) + merge_cmp_and_if = true; + if (*frame_ip == WASM_OP_BR_IF) + merge_cmp_and_br_if = true; + } break; case WASM_OP_I64_EQZ: @@ -1522,6 +1539,13 @@ jit_compile_func(JitCompContext *cc) if (!jit_compile_op_i64_compare(cc, INT_EQZ + opcode - WASM_OP_I64_EQZ)) return false; + if (frame_ip < frame_ip_end) { + /* Merge `CMP, SELECTcc, CMP, BNE` insns into `CMP, Bcc` */ + if (*frame_ip == WASM_OP_IF || *frame_ip == EXT_OP_IF) + merge_cmp_and_if = true; + if (*frame_ip == WASM_OP_BR_IF) + merge_cmp_and_br_if = true; + } break; case WASM_OP_F32_EQ: @@ -1533,6 +1557,13 @@ jit_compile_func(JitCompContext *cc) if (!jit_compile_op_f32_compare(cc, FLOAT_EQ + opcode - WASM_OP_F32_EQ)) return false; + if (frame_ip < frame_ip_end) { + /* Merge `CMP, SELECTcc, CMP, BNE` insns into `CMP, Bcc` */ + if (*frame_ip == WASM_OP_IF || *frame_ip == EXT_OP_IF) + merge_cmp_and_if = true; + if (*frame_ip == WASM_OP_BR_IF) + merge_cmp_and_br_if = true; + } break; case WASM_OP_F64_EQ: @@ -1544,6 +1575,13 @@ jit_compile_func(JitCompContext *cc) if (!jit_compile_op_f64_compare(cc, FLOAT_EQ + opcode - WASM_OP_F64_EQ)) return false; + if (frame_ip < frame_ip_end) { + /* Merge `CMP, SELECTcc, CMP, BNE` insns into `CMP, Bcc` */ + if (*frame_ip == WASM_OP_IF || *frame_ip == EXT_OP_IF) + merge_cmp_and_if = true; + if (*frame_ip == WASM_OP_BR_IF) + merge_cmp_and_br_if = true; + } break; case WASM_OP_I32_CLZ: diff --git a/core/iwasm/fast-jit/jit_regalloc.c b/core/iwasm/fast-jit/jit_regalloc.c index 4b4b8fed3..e8c629658 100644 --- a/core/iwasm/fast-jit/jit_regalloc.c +++ b/core/iwasm/fast-jit/jit_regalloc.c @@ -339,7 +339,7 @@ fail: } /** - * Check whether the gien register is an allocation candidate, which + * Check whether the given register is an allocation candidate, which * must be a variable register that is not fixed hard register. * * @param cc the compilation context @@ -359,10 +359,8 @@ static void check_vreg_definition(RegallocContext *rc, JitInsn *insn) { JitRegVec regvec = jit_insn_opnd_regs(insn); - unsigned i; - JitReg *regp; - unsigned first_use = jit_insn_opnd_first_use(insn); - JitReg reg_defined; + JitReg *regp, reg_defined = 0; + unsigned i, first_use = jit_insn_opnd_first_use(insn); /* check if there is the definition of an vr before its references */ JIT_REG_VEC_FOREACH(regvec, i, regp) @@ -372,7 +370,7 @@ check_vreg_definition(RegallocContext *rc, JitInsn *insn) if (!is_alloc_candidate(rc->cc, *regp)) continue; - /*a strong assumption that there is only on defined reg*/ + /* a strong assumption that there is only one defined reg */ if (i < first_use) { reg_defined = *regp; continue; @@ -380,8 +378,8 @@ check_vreg_definition(RegallocContext *rc, JitInsn *insn) /** * both definition and references are in one instruction, - * like MOV i3,i3 - **/ + * like MOV i3, i3 + */ if (reg_defined == *regp) continue;