diff --git a/core/iwasm/fast-jit/cg/x86-64/jit_codegen_x86_64.cpp b/core/iwasm/fast-jit/cg/x86-64/jit_codegen_x86_64.cpp index 447cec0e8..0abdcef00 100644 --- a/core/iwasm/fast-jit/cg/x86-64/jit_codegen_x86_64.cpp +++ b/core/iwasm/fast-jit/cg/x86-64/jit_codegen_x86_64.cpp @@ -6172,30 +6172,32 @@ static const uint8 hreg_info_I64[3][16] = { r8, r9, r10, r11, r12, r13, r14, r15 */ { 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1 }, /* fixed, rsi is freely used */ - { 0, 0, 0, 0, 0, 0, 0, 0, + { 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0 }, /* caller_saved_native */ - { 0, 0, 0, 0, 0, 0, 0, 0, + { 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0 }, /* caller_saved_jitted */ }; +/* System V AMD64 ABI Calling Conversion. [XYZ]MM0-7 */ static uint8 hreg_info_F32[3][16] = { /* xmm0 ~ xmm15 */ { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 }, { 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1 }, /* TODO:caller_saved_native */ + 0, 0, 0, 0, 0, 0, 0, 0 }, /* caller_saved_native */ { 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1 }, /* TODO:caller_saved_jitted */ + 1, 1, 1, 1, 1, 1, 1, 1 }, /* caller_saved_jitted */ }; +/* System V AMD64 ABI Calling Conversion. [XYZ]MM0-7 */ static uint8 hreg_info_F64[3][16] = { /* xmm0 ~ xmm15 */ { 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 }, { 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1 }, /* TODO:caller_saved_native */ + 0, 0, 0, 0, 0, 0, 0, 0 }, /* caller_saved_native */ { 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1 }, /* TODO:caller_saved_jitted */ + 1, 1, 1, 1, 1, 1, 1, 1 }, /* caller_saved_jitted */ }; static const JitHardRegInfo hreg_info = { diff --git a/core/iwasm/fast-jit/fe/jit_emit_conversion.c b/core/iwasm/fast-jit/fe/jit_emit_conversion.c index f0ba56324..ad4eb81ab 100644 --- a/core/iwasm/fast-jit/fe/jit_emit_conversion.c +++ b/core/iwasm/fast-jit/fe/jit_emit_conversion.c @@ -64,6 +64,10 @@ jit_compile_op_i32_trunc_f32(JitCompContext *cc, bool sign, bool saturating) } *(jit_insn_opndv(insn, 2)) = value; +#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64) + jit_lock_reg_in_insn(cc, insn, nan_ret); +#endif + GEN_INSN(CMP, cc->cmp_reg, nan_ret, NEW_CONST(I32, 1)); if (!jit_emit_exception(cc, EXCE_INVALID_CONVERSION_TO_INTEGER, JIT_OP_BEQ, cc->cmp_reg, NULL)) { @@ -133,6 +137,10 @@ jit_compile_op_i32_trunc_f64(JitCompContext *cc, bool sign, bool saturating) } *(jit_insn_opndv(insn, 2)) = value; +#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64) + jit_lock_reg_in_insn(cc, insn, nan_ret); +#endif + GEN_INSN(CMP, cc->cmp_reg, nan_ret, NEW_CONST(I32, 1)); if (!jit_emit_exception(cc, EXCE_INVALID_CONVERSION_TO_INTEGER, JIT_OP_BEQ, cc->cmp_reg, NULL)) { @@ -270,6 +278,10 @@ jit_compile_op_f32_convert_i64(JitCompContext *cc, bool sign) goto fail; } *(jit_insn_opndv(insn, 2)) = value; + +#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64) + jit_lock_reg_in_insn(cc, insn, res); +#endif } PUSH_F32(res); @@ -330,7 +342,10 @@ jit_compile_op_f64_convert_i64(JitCompContext *cc, bool sign) else { JitInsn *insn; #if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64) + JitReg xmm0; + res = jit_codegen_get_hreg_by_name("xmm0_f64"); + xmm0 = jit_codegen_get_hreg_by_name("xmm0"); #else res = jit_cc_new_reg_F64(cc); #endif @@ -340,6 +355,9 @@ jit_compile_op_f64_convert_i64(JitCompContext *cc, bool sign) goto fail; } *(jit_insn_opndv(insn, 2)) = value; +#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64) + jit_lock_reg_in_insn(cc, insn, xmm0); +#endif } PUSH_F64(res); diff --git a/core/iwasm/fast-jit/fe/jit_emit_function.c b/core/iwasm/fast-jit/fe/jit_emit_function.c index c25265d1d..39a532a54 100644 --- a/core/iwasm/fast-jit/fe/jit_emit_function.c +++ b/core/iwasm/fast-jit/fe/jit_emit_function.c @@ -175,6 +175,10 @@ jit_compile_op_call(JitCompContext *cc, uint32 func_idx, bool tail_call) *(jit_insn_opndv(insn, 4)) = cc->fp_reg; } +#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64) + jit_lock_reg_in_insn(cc, insn, native_ret); +#endif + /* Check whether there is exception thrown */ GEN_INSN(CMP, cc->cmp_reg, native_ret, NEW_CONST(I32, 0)); if (!jit_emit_exception(cc, EXCE_ALREADY_THROWN, JIT_OP_BEQ, @@ -353,6 +357,8 @@ jit_compile_op_call_indirect(JitCompContext *cc, uint32 type_idx, *(jit_insn_opndv(insn, 5)) = NEW_CONST(I32, func_type->param_count); *(jit_insn_opndv(insn, 6)) = argv; + jit_lock_reg_in_insn(cc, insn, native_ret); + /* Check whether there is exception thrown */ GEN_INSN(CMP, cc->cmp_reg, native_ret, NEW_CONST(I32, 0)); if (!jit_emit_exception(cc, EXCE_ALREADY_THROWN, JIT_OP_BEQ, cc->cmp_reg, diff --git a/core/iwasm/fast-jit/fe/jit_emit_memory.c b/core/iwasm/fast-jit/fe/jit_emit_memory.c index d118013cb..0a878b095 100644 --- a/core/iwasm/fast-jit/fe/jit_emit_memory.c +++ b/core/iwasm/fast-jit/fe/jit_emit_memory.c @@ -500,6 +500,10 @@ jit_compile_op_memory_grow(JitCompContext *cc, uint32 mem_idx) *(jit_insn_opndv(insn, 3)) = delta; } +#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64) + jit_lock_reg_in_insn(cc, insn, grow_result); +#endif + /* check if enlarge memory success */ res = jit_cc_new_reg_I32(cc); GEN_INSN(CMP, cc->cmp_reg, grow_result, NEW_CONST(I32, 0)); diff --git a/core/iwasm/fast-jit/fe/jit_emit_numberic.c b/core/iwasm/fast-jit/fe/jit_emit_numberic.c index 7d713fee6..ee2cdbdc9 100644 --- a/core/iwasm/fast-jit/fe/jit_emit_numberic.c +++ b/core/iwasm/fast-jit/fe/jit_emit_numberic.c @@ -510,57 +510,60 @@ compile_int_div_no_check(JitCompContext *cc, IntArithmetic arith_op, #if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64) case INT_DIV_S: case INT_DIV_U: + { + JitInsn *insn = NULL; + if (is_i32) { GEN_INSN(MOV, eax_hreg, left); if (arith_op == INT_DIV_S) - GEN_INSN(DIV_S, eax_hreg, eax_hreg, right); + insn = GEN_INSN(DIV_S, eax_hreg, eax_hreg, right); else - GEN_INSN(DIV_U, eax_hreg, eax_hreg, right); - /* Just to indicate that edx is used, - register allocator cannot spill it out */ - GEN_INSN(MOV, edx_hreg, edx_hreg); + insn = GEN_INSN(DIV_U, eax_hreg, eax_hreg, right); + res = eax_hreg; } else { GEN_INSN(MOV, rax_hreg, left); - /* Just to indicate that eax is used, - register allocator cannot spill it out */ - GEN_INSN(MOV, eax_hreg, eax_hreg); if (arith_op == INT_DIV_S) - GEN_INSN(DIV_S, rax_hreg, rax_hreg, right); + insn = GEN_INSN(DIV_S, rax_hreg, rax_hreg, right); else - GEN_INSN(DIV_U, rax_hreg, rax_hreg, right); - /* Just to indicate that edx is used, - register allocator cannot spill it out */ - GEN_INSN(MOV, edx_hreg, edx_hreg); + insn = GEN_INSN(DIV_U, rax_hreg, rax_hreg, right); + res = rax_hreg; } + + jit_lock_reg_in_insn(cc, insn, eax_hreg); + jit_lock_reg_in_insn(cc, insn, edx_hreg); break; + } case INT_REM_S: case INT_REM_U: + { + JitInsn *insn = NULL; + if (is_i32) { GEN_INSN(MOV, eax_hreg, left); if (arith_op == INT_REM_S) - GEN_INSN(REM_S, edx_hreg, eax_hreg, right); + insn = GEN_INSN(REM_S, edx_hreg, eax_hreg, right); else - GEN_INSN(REM_U, edx_hreg, eax_hreg, right); + insn = GEN_INSN(REM_U, edx_hreg, eax_hreg, right); + res = edx_hreg; } else { GEN_INSN(MOV, rax_hreg, left); - /* Just to indicate that eax is used, - register allocator cannot spill it out */ - GEN_INSN(MOV, eax_hreg, eax_hreg); if (arith_op == INT_REM_S) - GEN_INSN(REM_S, rdx_hreg, rax_hreg, right); + insn = GEN_INSN(REM_S, rdx_hreg, rax_hreg, right); else - GEN_INSN(REM_U, rdx_hreg, rax_hreg, right); - /* Just to indicate that edx is used, - register allocator cannot spill it out */ - GEN_INSN(MOV, edx_hreg, edx_hreg); + insn = GEN_INSN(REM_U, rdx_hreg, rax_hreg, right); + res = rdx_hreg; } + + jit_lock_reg_in_insn(cc, insn, eax_hreg); + jit_lock_reg_in_insn(cc, insn, edx_hreg); break; + } #else case INT_DIV_S: GEN_INSN(DIV_S, res, left, right); @@ -1050,6 +1053,7 @@ compile_int_shl(JitCompContext *cc, JitReg left, JitReg right, bool is_i32) #if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64) JitReg ecx_hreg = jit_codegen_get_hreg_by_name("ecx"); JitReg rcx_hreg = jit_codegen_get_hreg_by_name("rcx"); + JitInsn *insn = NULL; #endif right = compile_int_shift_modulo(cc, right, is_i32); @@ -1063,8 +1067,8 @@ compile_int_shl(JitCompContext *cc, JitReg left, JitReg right, bool is_i32) res = is_i32 ? jit_cc_new_reg_I32(cc) : jit_cc_new_reg_I64(cc); #if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64) GEN_INSN(MOV, is_i32 ? ecx_hreg : rcx_hreg, right); - GEN_INSN(SHL, res, left, is_i32 ? ecx_hreg : rcx_hreg); - GEN_INSN(MOV, ecx_hreg, ecx_hreg); + insn = GEN_INSN(SHL, res, left, is_i32 ? ecx_hreg : rcx_hreg); + jit_lock_reg_in_insn(cc, insn, ecx_hreg); #else GEN_INSN(SHL, res, left, right); #endif @@ -1080,6 +1084,7 @@ compile_int_shrs(JitCompContext *cc, JitReg left, JitReg right, bool is_i32) #if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64) JitReg ecx_hreg = jit_codegen_get_hreg_by_name("ecx"); JitReg rcx_hreg = jit_codegen_get_hreg_by_name("rcx"); + JitInsn *insn = NULL; #endif right = compile_int_shift_modulo(cc, right, is_i32); @@ -1093,8 +1098,8 @@ compile_int_shrs(JitCompContext *cc, JitReg left, JitReg right, bool is_i32) res = is_i32 ? jit_cc_new_reg_I32(cc) : jit_cc_new_reg_I64(cc); #if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64) GEN_INSN(MOV, is_i32 ? ecx_hreg : rcx_hreg, right); - GEN_INSN(SHRS, res, left, is_i32 ? ecx_hreg : rcx_hreg); - GEN_INSN(MOV, ecx_hreg, ecx_hreg); + insn = GEN_INSN(SHRS, res, left, is_i32 ? ecx_hreg : rcx_hreg); + jit_lock_reg_in_insn(cc, insn, ecx_hreg); #else GEN_INSN(SHRS, res, left, right); #endif @@ -1110,6 +1115,7 @@ compile_int_shru(JitCompContext *cc, JitReg left, JitReg right, bool is_i32) #if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64) JitReg ecx_hreg = jit_codegen_get_hreg_by_name("ecx"); JitReg rcx_hreg = jit_codegen_get_hreg_by_name("rcx"); + JitInsn *insn = NULL; #endif right = compile_int_shift_modulo(cc, right, is_i32); @@ -1123,8 +1129,8 @@ compile_int_shru(JitCompContext *cc, JitReg left, JitReg right, bool is_i32) res = is_i32 ? jit_cc_new_reg_I32(cc) : jit_cc_new_reg_I64(cc); #if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64) GEN_INSN(MOV, is_i32 ? ecx_hreg : rcx_hreg, right); - GEN_INSN(SHRU, res, left, is_i32 ? ecx_hreg : rcx_hreg); - GEN_INSN(MOV, ecx_hreg, ecx_hreg); + insn = GEN_INSN(SHRU, res, left, is_i32 ? ecx_hreg : rcx_hreg); + jit_lock_reg_in_insn(cc, insn, ecx_hreg); #else GEN_INSN(SHRU, res, left, right); #endif @@ -1171,6 +1177,7 @@ compile_int_rotl(JitCompContext *cc, JitReg left, JitReg right, bool is_i32) #if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64) JitReg ecx_hreg = jit_codegen_get_hreg_by_name("ecx"); JitReg rcx_hreg = jit_codegen_get_hreg_by_name("rcx"); + JitInsn *insn = NULL; #endif right = compile_int_shift_modulo(cc, right, is_i32); @@ -1184,8 +1191,8 @@ compile_int_rotl(JitCompContext *cc, JitReg left, JitReg right, bool is_i32) res = is_i32 ? jit_cc_new_reg_I32(cc) : jit_cc_new_reg_I64(cc); #if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64) GEN_INSN(MOV, is_i32 ? ecx_hreg : rcx_hreg, right); - GEN_INSN(ROTL, res, left, is_i32 ? ecx_hreg : rcx_hreg); - GEN_INSN(MOV, ecx_hreg, ecx_hreg); + insn = GEN_INSN(ROTL, res, left, is_i32 ? ecx_hreg : rcx_hreg); + jit_lock_reg_in_insn(cc, insn, ecx_hreg); #else GEN_INSN(ROTL, res, left, right); #endif @@ -1232,6 +1239,7 @@ compile_int_rotr(JitCompContext *cc, JitReg left, JitReg right, bool is_i32) #if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64) JitReg ecx_hreg = jit_codegen_get_hreg_by_name("ecx"); JitReg rcx_hreg = jit_codegen_get_hreg_by_name("rcx"); + JitInsn *insn = NULL; #endif right = compile_int_shift_modulo(cc, right, is_i32); @@ -1245,8 +1253,8 @@ compile_int_rotr(JitCompContext *cc, JitReg left, JitReg right, bool is_i32) res = is_i32 ? jit_cc_new_reg_I32(cc) : jit_cc_new_reg_I64(cc); #if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64) GEN_INSN(MOV, is_i32 ? ecx_hreg : rcx_hreg, right); - GEN_INSN(ROTR, res, left, is_i32 ? ecx_hreg : rcx_hreg); - GEN_INSN(MOV, ecx_hreg, ecx_hreg); + insn = GEN_INSN(ROTR, res, left, is_i32 ? ecx_hreg : rcx_hreg); + jit_lock_reg_in_insn(cc, insn, ecx_hreg); #else GEN_INSN(ROTR, res, left, right); #endif diff --git a/core/iwasm/fast-jit/jit_ir.c b/core/iwasm/fast-jit/jit_ir.c index 476b1a3bb..be831c242 100644 --- a/core/iwasm/fast-jit/jit_ir.c +++ b/core/iwasm/fast-jit/jit_ir.c @@ -1558,3 +1558,46 @@ _jit_insn_check_opnd_access_LookupSwitch(const JitInsn *insn) unsigned opcode = insn->opcode; return (insn_opnd_kind[opcode] == JIT_OPND_KIND_LookupSwitch); } + +bool +jit_lock_reg_in_insn(JitCompContext *cc, JitInsn *the_insn, JitReg reg_to_lock) +{ + bool ret = false; + JitInsn *prevent_spill = NULL; + JitInsn *indicate_using = NULL; + + if (!the_insn) + goto just_return; + + if (jit_cc_is_hreg_fixed(cc, reg_to_lock)) { + ret = true; + goto just_return; + } + + /** + * give the virtual register of the locked hard register a minimum, non-zero + * distance, * so as to prevent it from being spilled out + */ + prevent_spill = jit_insn_new_MOV(reg_to_lock, reg_to_lock); + if (!prevent_spill) + goto just_return; + + jit_insn_insert_before(the_insn, prevent_spill); + + /** + * announce the locked hard register is being used, and do necessary spill + * ASAP + */ + indicate_using = jit_insn_new_MOV(reg_to_lock, reg_to_lock); + if (!indicate_using) + goto just_return; + + jit_insn_insert_after(the_insn, indicate_using); + + ret = true; + +just_return: + if (!ret) + jit_set_last_error(cc, "generate insn failed"); + return ret; +} diff --git a/core/iwasm/fast-jit/jit_ir.h b/core/iwasm/fast-jit/jit_ir.h index 39f61e748..43c735fbc 100644 --- a/core/iwasm/fast-jit/jit_ir.h +++ b/core/iwasm/fast-jit/jit_ir.h @@ -1882,6 +1882,9 @@ jit_cc_push_value(JitCompContext *cc, uint8 type, JitReg value); bool jit_cc_pop_value(JitCompContext *cc, uint8 type, JitReg *p_value); +bool +jit_lock_reg_in_insn(JitCompContext *cc, JitInsn *the_insn, JitReg reg_to_lock); + /** * Update the control flow graph after successors of blocks are * changed so that the predecessor vector of each block represents the diff --git a/core/iwasm/fast-jit/jit_regalloc.c b/core/iwasm/fast-jit/jit_regalloc.c index 51745e527..098d2bc99 100644 --- a/core/iwasm/fast-jit/jit_regalloc.c +++ b/core/iwasm/fast-jit/jit_regalloc.c @@ -71,6 +71,11 @@ uint_stack_pop(UintStack **stack) { bh_assert((*stack)->top > 0); + /** + * TODO: the fact of empty distances stack means there is no instruction + * using current JitReg anymore. so shall we release the HardReg and clean + * VirtualReg information? + */ if (--(*stack)->top == 0) uint_stack_delete(stack); } @@ -576,6 +581,7 @@ allocate_hreg(RegallocContext *rc, JitReg vreg, JitInsn *insn, int distance) continue; vr = rc_get_vr(rc, hregs[i].vreg); + /* TODO: since the hregs[i] is in use, its distances should be valid */ vr_distance = vr->distances ? uint_stack_top(vr->distances) : 0; if (vr_distance < min_distance) { @@ -727,6 +733,8 @@ allocate_for_basic_block(RegallocContext *rc, JitBasicBlock *basic_block, VirtualReg *vr = rc_get_vr(rc, *regp); bh_assert(uint_stack_top(vr->distances) == distance); uint_stack_pop(&vr->distances); + /* be sure that the hreg exists and hasn't been spilled out */ + bh_assert(vr->hreg != 0); *regp = vr->hreg; } } @@ -737,7 +745,7 @@ allocate_for_basic_block(RegallocContext *rc, JitBasicBlock *basic_block, bool jit_pass_regalloc(JitCompContext *cc) { - RegallocContext rc; + RegallocContext rc = { 0 }; unsigned label_index, end_label_index; JitBasicBlock *basic_block; VirtualReg *self_vr; @@ -761,6 +769,10 @@ jit_pass_regalloc(JitCompContext *cc) self_vr->hreg = self_vr->global_hreg; (rc_get_hr(&rc, cc->exec_env_reg))->vreg = cc->exec_env_reg; + /** + * TODO: the allocation of a basic block keeps using vregs[] + * and hregs[] from previous basic block + */ if ((distance = collect_distances(&rc, basic_block)) < 0) goto cleanup_and_return;