Lock register to avoid spilling it out by register allocator (#1188)

In one instruction, if one or multiple operands tending to lock some
hardware registers in IR phase, like EAX, EDX for DIV, ECX for SHIFT,
it leads to two known cases.

case 1: allocate VOID

`SHRU i250,i249,i3`. if pr_3 was allocated to vr_249 first, incoming
allocation of vr_3 leads a spill out of `vr_249` and clear the value
of `vr->hreg` of vr_249. When applying allocation result in FOREACH
in L732, a NULL will be assigned to.

case 2: unexpected spill out

`DIV_U i1,i1,i44`.  if allocation of vr_44 needs to spill out one
hardware register, there is a chance that `hr_4` will be selected.
If it happens, codegen will operate EDX and overwrite vr_44 value.

The reason of how `hr_4` will be spilled out is a hidden bug that
both information of `rc->hreg[]` and `rc->vreg` can be transfered
from one block to the next one. It means even there is no vr binds
to a hr in current block, the hr may still be thought as a busy one
becase of the left infroamtion of previous blocks

Workaround for cases:

- Add `MOV LOCKED_hr LOCKED_hr` just after the instruction. It prevents
  case 1
- Add `MOV LOCKED_hr LOCKED_hr` just before the instruction. It prevents
  case 2
This commit is contained in:
liang.he 2022-05-31 11:58:02 +08:00 committed by GitHub
parent 8350d9860b
commit c93508939a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 136 additions and 40 deletions

View File

@ -6172,30 +6172,32 @@ static const uint8 hreg_info_I64[3][16] = {
r8, r9, r10, r11, r12, r13, r14, r15 */
{ 1, 1, 1, 1, 1, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0, 1 }, /* fixed, rsi is freely used */
{ 0, 0, 0, 0, 0, 0, 0, 0,
{ 0, 1, 0, 1, 1, 1, 0, 0,
1, 1, 1, 1, 0, 0, 0, 0 }, /* caller_saved_native */
{ 0, 0, 0, 0, 0, 0, 0, 0,
{ 0, 1, 1, 1, 1, 1, 0, 0,
1, 1, 1, 1, 1, 1, 1, 0 }, /* caller_saved_jitted */
};
/* System V AMD64 ABI Calling Conversion. [XYZ]MM0-7 */
static uint8 hreg_info_F32[3][16] = {
/* xmm0 ~ xmm15 */
{ 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1 },
{ 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1 }, /* TODO:caller_saved_native */
0, 0, 0, 0, 0, 0, 0, 0 }, /* caller_saved_native */
{ 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1 }, /* TODO:caller_saved_jitted */
1, 1, 1, 1, 1, 1, 1, 1 }, /* caller_saved_jitted */
};
/* System V AMD64 ABI Calling Conversion. [XYZ]MM0-7 */
static uint8 hreg_info_F64[3][16] = {
/* xmm0 ~ xmm15 */
{ 1, 1, 1, 1, 1, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0 },
{ 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1 }, /* TODO:caller_saved_native */
0, 0, 0, 0, 0, 0, 0, 0 }, /* caller_saved_native */
{ 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1 }, /* TODO:caller_saved_jitted */
1, 1, 1, 1, 1, 1, 1, 1 }, /* caller_saved_jitted */
};
static const JitHardRegInfo hreg_info = {

View File

@ -64,6 +64,10 @@ jit_compile_op_i32_trunc_f32(JitCompContext *cc, bool sign, bool saturating)
}
*(jit_insn_opndv(insn, 2)) = value;
#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
jit_lock_reg_in_insn(cc, insn, nan_ret);
#endif
GEN_INSN(CMP, cc->cmp_reg, nan_ret, NEW_CONST(I32, 1));
if (!jit_emit_exception(cc, EXCE_INVALID_CONVERSION_TO_INTEGER,
JIT_OP_BEQ, cc->cmp_reg, NULL)) {
@ -133,6 +137,10 @@ jit_compile_op_i32_trunc_f64(JitCompContext *cc, bool sign, bool saturating)
}
*(jit_insn_opndv(insn, 2)) = value;
#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
jit_lock_reg_in_insn(cc, insn, nan_ret);
#endif
GEN_INSN(CMP, cc->cmp_reg, nan_ret, NEW_CONST(I32, 1));
if (!jit_emit_exception(cc, EXCE_INVALID_CONVERSION_TO_INTEGER,
JIT_OP_BEQ, cc->cmp_reg, NULL)) {
@ -270,6 +278,10 @@ jit_compile_op_f32_convert_i64(JitCompContext *cc, bool sign)
goto fail;
}
*(jit_insn_opndv(insn, 2)) = value;
#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
jit_lock_reg_in_insn(cc, insn, res);
#endif
}
PUSH_F32(res);
@ -330,7 +342,10 @@ jit_compile_op_f64_convert_i64(JitCompContext *cc, bool sign)
else {
JitInsn *insn;
#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
JitReg xmm0;
res = jit_codegen_get_hreg_by_name("xmm0_f64");
xmm0 = jit_codegen_get_hreg_by_name("xmm0");
#else
res = jit_cc_new_reg_F64(cc);
#endif
@ -340,6 +355,9 @@ jit_compile_op_f64_convert_i64(JitCompContext *cc, bool sign)
goto fail;
}
*(jit_insn_opndv(insn, 2)) = value;
#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
jit_lock_reg_in_insn(cc, insn, xmm0);
#endif
}
PUSH_F64(res);

View File

@ -175,6 +175,10 @@ jit_compile_op_call(JitCompContext *cc, uint32 func_idx, bool tail_call)
*(jit_insn_opndv(insn, 4)) = cc->fp_reg;
}
#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
jit_lock_reg_in_insn(cc, insn, native_ret);
#endif
/* Check whether there is exception thrown */
GEN_INSN(CMP, cc->cmp_reg, native_ret, NEW_CONST(I32, 0));
if (!jit_emit_exception(cc, EXCE_ALREADY_THROWN, JIT_OP_BEQ,
@ -353,6 +357,8 @@ jit_compile_op_call_indirect(JitCompContext *cc, uint32 type_idx,
*(jit_insn_opndv(insn, 5)) = NEW_CONST(I32, func_type->param_count);
*(jit_insn_opndv(insn, 6)) = argv;
jit_lock_reg_in_insn(cc, insn, native_ret);
/* Check whether there is exception thrown */
GEN_INSN(CMP, cc->cmp_reg, native_ret, NEW_CONST(I32, 0));
if (!jit_emit_exception(cc, EXCE_ALREADY_THROWN, JIT_OP_BEQ, cc->cmp_reg,

View File

@ -500,6 +500,10 @@ jit_compile_op_memory_grow(JitCompContext *cc, uint32 mem_idx)
*(jit_insn_opndv(insn, 3)) = delta;
}
#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
jit_lock_reg_in_insn(cc, insn, grow_result);
#endif
/* check if enlarge memory success */
res = jit_cc_new_reg_I32(cc);
GEN_INSN(CMP, cc->cmp_reg, grow_result, NEW_CONST(I32, 0));

View File

@ -510,57 +510,60 @@ compile_int_div_no_check(JitCompContext *cc, IntArithmetic arith_op,
#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
case INT_DIV_S:
case INT_DIV_U:
{
JitInsn *insn = NULL;
if (is_i32) {
GEN_INSN(MOV, eax_hreg, left);
if (arith_op == INT_DIV_S)
GEN_INSN(DIV_S, eax_hreg, eax_hreg, right);
insn = GEN_INSN(DIV_S, eax_hreg, eax_hreg, right);
else
GEN_INSN(DIV_U, eax_hreg, eax_hreg, right);
/* Just to indicate that edx is used,
register allocator cannot spill it out */
GEN_INSN(MOV, edx_hreg, edx_hreg);
insn = GEN_INSN(DIV_U, eax_hreg, eax_hreg, right);
res = eax_hreg;
}
else {
GEN_INSN(MOV, rax_hreg, left);
/* Just to indicate that eax is used,
register allocator cannot spill it out */
GEN_INSN(MOV, eax_hreg, eax_hreg);
if (arith_op == INT_DIV_S)
GEN_INSN(DIV_S, rax_hreg, rax_hreg, right);
insn = GEN_INSN(DIV_S, rax_hreg, rax_hreg, right);
else
GEN_INSN(DIV_U, rax_hreg, rax_hreg, right);
/* Just to indicate that edx is used,
register allocator cannot spill it out */
GEN_INSN(MOV, edx_hreg, edx_hreg);
insn = GEN_INSN(DIV_U, rax_hreg, rax_hreg, right);
res = rax_hreg;
}
jit_lock_reg_in_insn(cc, insn, eax_hreg);
jit_lock_reg_in_insn(cc, insn, edx_hreg);
break;
}
case INT_REM_S:
case INT_REM_U:
{
JitInsn *insn = NULL;
if (is_i32) {
GEN_INSN(MOV, eax_hreg, left);
if (arith_op == INT_REM_S)
GEN_INSN(REM_S, edx_hreg, eax_hreg, right);
insn = GEN_INSN(REM_S, edx_hreg, eax_hreg, right);
else
GEN_INSN(REM_U, edx_hreg, eax_hreg, right);
insn = GEN_INSN(REM_U, edx_hreg, eax_hreg, right);
res = edx_hreg;
}
else {
GEN_INSN(MOV, rax_hreg, left);
/* Just to indicate that eax is used,
register allocator cannot spill it out */
GEN_INSN(MOV, eax_hreg, eax_hreg);
if (arith_op == INT_REM_S)
GEN_INSN(REM_S, rdx_hreg, rax_hreg, right);
insn = GEN_INSN(REM_S, rdx_hreg, rax_hreg, right);
else
GEN_INSN(REM_U, rdx_hreg, rax_hreg, right);
/* Just to indicate that edx is used,
register allocator cannot spill it out */
GEN_INSN(MOV, edx_hreg, edx_hreg);
insn = GEN_INSN(REM_U, rdx_hreg, rax_hreg, right);
res = rdx_hreg;
}
jit_lock_reg_in_insn(cc, insn, eax_hreg);
jit_lock_reg_in_insn(cc, insn, edx_hreg);
break;
}
#else
case INT_DIV_S:
GEN_INSN(DIV_S, res, left, right);
@ -1050,6 +1053,7 @@ compile_int_shl(JitCompContext *cc, JitReg left, JitReg right, bool is_i32)
#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
JitReg ecx_hreg = jit_codegen_get_hreg_by_name("ecx");
JitReg rcx_hreg = jit_codegen_get_hreg_by_name("rcx");
JitInsn *insn = NULL;
#endif
right = compile_int_shift_modulo(cc, right, is_i32);
@ -1063,8 +1067,8 @@ compile_int_shl(JitCompContext *cc, JitReg left, JitReg right, bool is_i32)
res = is_i32 ? jit_cc_new_reg_I32(cc) : jit_cc_new_reg_I64(cc);
#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
GEN_INSN(MOV, is_i32 ? ecx_hreg : rcx_hreg, right);
GEN_INSN(SHL, res, left, is_i32 ? ecx_hreg : rcx_hreg);
GEN_INSN(MOV, ecx_hreg, ecx_hreg);
insn = GEN_INSN(SHL, res, left, is_i32 ? ecx_hreg : rcx_hreg);
jit_lock_reg_in_insn(cc, insn, ecx_hreg);
#else
GEN_INSN(SHL, res, left, right);
#endif
@ -1080,6 +1084,7 @@ compile_int_shrs(JitCompContext *cc, JitReg left, JitReg right, bool is_i32)
#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
JitReg ecx_hreg = jit_codegen_get_hreg_by_name("ecx");
JitReg rcx_hreg = jit_codegen_get_hreg_by_name("rcx");
JitInsn *insn = NULL;
#endif
right = compile_int_shift_modulo(cc, right, is_i32);
@ -1093,8 +1098,8 @@ compile_int_shrs(JitCompContext *cc, JitReg left, JitReg right, bool is_i32)
res = is_i32 ? jit_cc_new_reg_I32(cc) : jit_cc_new_reg_I64(cc);
#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
GEN_INSN(MOV, is_i32 ? ecx_hreg : rcx_hreg, right);
GEN_INSN(SHRS, res, left, is_i32 ? ecx_hreg : rcx_hreg);
GEN_INSN(MOV, ecx_hreg, ecx_hreg);
insn = GEN_INSN(SHRS, res, left, is_i32 ? ecx_hreg : rcx_hreg);
jit_lock_reg_in_insn(cc, insn, ecx_hreg);
#else
GEN_INSN(SHRS, res, left, right);
#endif
@ -1110,6 +1115,7 @@ compile_int_shru(JitCompContext *cc, JitReg left, JitReg right, bool is_i32)
#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
JitReg ecx_hreg = jit_codegen_get_hreg_by_name("ecx");
JitReg rcx_hreg = jit_codegen_get_hreg_by_name("rcx");
JitInsn *insn = NULL;
#endif
right = compile_int_shift_modulo(cc, right, is_i32);
@ -1123,8 +1129,8 @@ compile_int_shru(JitCompContext *cc, JitReg left, JitReg right, bool is_i32)
res = is_i32 ? jit_cc_new_reg_I32(cc) : jit_cc_new_reg_I64(cc);
#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
GEN_INSN(MOV, is_i32 ? ecx_hreg : rcx_hreg, right);
GEN_INSN(SHRU, res, left, is_i32 ? ecx_hreg : rcx_hreg);
GEN_INSN(MOV, ecx_hreg, ecx_hreg);
insn = GEN_INSN(SHRU, res, left, is_i32 ? ecx_hreg : rcx_hreg);
jit_lock_reg_in_insn(cc, insn, ecx_hreg);
#else
GEN_INSN(SHRU, res, left, right);
#endif
@ -1171,6 +1177,7 @@ compile_int_rotl(JitCompContext *cc, JitReg left, JitReg right, bool is_i32)
#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
JitReg ecx_hreg = jit_codegen_get_hreg_by_name("ecx");
JitReg rcx_hreg = jit_codegen_get_hreg_by_name("rcx");
JitInsn *insn = NULL;
#endif
right = compile_int_shift_modulo(cc, right, is_i32);
@ -1184,8 +1191,8 @@ compile_int_rotl(JitCompContext *cc, JitReg left, JitReg right, bool is_i32)
res = is_i32 ? jit_cc_new_reg_I32(cc) : jit_cc_new_reg_I64(cc);
#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
GEN_INSN(MOV, is_i32 ? ecx_hreg : rcx_hreg, right);
GEN_INSN(ROTL, res, left, is_i32 ? ecx_hreg : rcx_hreg);
GEN_INSN(MOV, ecx_hreg, ecx_hreg);
insn = GEN_INSN(ROTL, res, left, is_i32 ? ecx_hreg : rcx_hreg);
jit_lock_reg_in_insn(cc, insn, ecx_hreg);
#else
GEN_INSN(ROTL, res, left, right);
#endif
@ -1232,6 +1239,7 @@ compile_int_rotr(JitCompContext *cc, JitReg left, JitReg right, bool is_i32)
#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
JitReg ecx_hreg = jit_codegen_get_hreg_by_name("ecx");
JitReg rcx_hreg = jit_codegen_get_hreg_by_name("rcx");
JitInsn *insn = NULL;
#endif
right = compile_int_shift_modulo(cc, right, is_i32);
@ -1245,8 +1253,8 @@ compile_int_rotr(JitCompContext *cc, JitReg left, JitReg right, bool is_i32)
res = is_i32 ? jit_cc_new_reg_I32(cc) : jit_cc_new_reg_I64(cc);
#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
GEN_INSN(MOV, is_i32 ? ecx_hreg : rcx_hreg, right);
GEN_INSN(ROTR, res, left, is_i32 ? ecx_hreg : rcx_hreg);
GEN_INSN(MOV, ecx_hreg, ecx_hreg);
insn = GEN_INSN(ROTR, res, left, is_i32 ? ecx_hreg : rcx_hreg);
jit_lock_reg_in_insn(cc, insn, ecx_hreg);
#else
GEN_INSN(ROTR, res, left, right);
#endif

View File

@ -1558,3 +1558,46 @@ _jit_insn_check_opnd_access_LookupSwitch(const JitInsn *insn)
unsigned opcode = insn->opcode;
return (insn_opnd_kind[opcode] == JIT_OPND_KIND_LookupSwitch);
}
bool
jit_lock_reg_in_insn(JitCompContext *cc, JitInsn *the_insn, JitReg reg_to_lock)
{
bool ret = false;
JitInsn *prevent_spill = NULL;
JitInsn *indicate_using = NULL;
if (!the_insn)
goto just_return;
if (jit_cc_is_hreg_fixed(cc, reg_to_lock)) {
ret = true;
goto just_return;
}
/**
* give the virtual register of the locked hard register a minimum, non-zero
* distance, * so as to prevent it from being spilled out
*/
prevent_spill = jit_insn_new_MOV(reg_to_lock, reg_to_lock);
if (!prevent_spill)
goto just_return;
jit_insn_insert_before(the_insn, prevent_spill);
/**
* announce the locked hard register is being used, and do necessary spill
* ASAP
*/
indicate_using = jit_insn_new_MOV(reg_to_lock, reg_to_lock);
if (!indicate_using)
goto just_return;
jit_insn_insert_after(the_insn, indicate_using);
ret = true;
just_return:
if (!ret)
jit_set_last_error(cc, "generate insn failed");
return ret;
}

View File

@ -1882,6 +1882,9 @@ jit_cc_push_value(JitCompContext *cc, uint8 type, JitReg value);
bool
jit_cc_pop_value(JitCompContext *cc, uint8 type, JitReg *p_value);
bool
jit_lock_reg_in_insn(JitCompContext *cc, JitInsn *the_insn, JitReg reg_to_lock);
/**
* Update the control flow graph after successors of blocks are
* changed so that the predecessor vector of each block represents the

View File

@ -71,6 +71,11 @@ uint_stack_pop(UintStack **stack)
{
bh_assert((*stack)->top > 0);
/**
* TODO: the fact of empty distances stack means there is no instruction
* using current JitReg anymore. so shall we release the HardReg and clean
* VirtualReg information?
*/
if (--(*stack)->top == 0)
uint_stack_delete(stack);
}
@ -576,6 +581,7 @@ allocate_hreg(RegallocContext *rc, JitReg vreg, JitInsn *insn, int distance)
continue;
vr = rc_get_vr(rc, hregs[i].vreg);
/* TODO: since the hregs[i] is in use, its distances should be valid */
vr_distance = vr->distances ? uint_stack_top(vr->distances) : 0;
if (vr_distance < min_distance) {
@ -727,6 +733,8 @@ allocate_for_basic_block(RegallocContext *rc, JitBasicBlock *basic_block,
VirtualReg *vr = rc_get_vr(rc, *regp);
bh_assert(uint_stack_top(vr->distances) == distance);
uint_stack_pop(&vr->distances);
/* be sure that the hreg exists and hasn't been spilled out */
bh_assert(vr->hreg != 0);
*regp = vr->hreg;
}
}
@ -737,7 +745,7 @@ allocate_for_basic_block(RegallocContext *rc, JitBasicBlock *basic_block,
bool
jit_pass_regalloc(JitCompContext *cc)
{
RegallocContext rc;
RegallocContext rc = { 0 };
unsigned label_index, end_label_index;
JitBasicBlock *basic_block;
VirtualReg *self_vr;
@ -761,6 +769,10 @@ jit_pass_regalloc(JitCompContext *cc)
self_vr->hreg = self_vr->global_hreg;
(rc_get_hr(&rc, cc->exec_env_reg))->vreg = cc->exec_env_reg;
/**
* TODO: the allocation of a basic block keeps using vregs[]
* and hregs[] from previous basic block
*/
if ((distance = collect_distances(&rc, basic_block)) < 0)
goto cleanup_and_return;