Refine fast jit func return in callbc/returnbc (#1276)

Use register edx/rdx/xmm0 to pass i32/i64/f32/f64 return value
in function return/call, but not store and load the register into
stack frame, so as to improve performance.
This commit is contained in:
Wenyong Huang 2022-07-11 09:48:45 +08:00 committed by GitHub
parent bb9c9a6395
commit b5d07b44f0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 165 additions and 65 deletions

View File

@ -5717,7 +5717,13 @@ lower_callbc(JitCompContext *cc, x86::Assembler &a, bh_list *jmp_info_list,
{
JmpInfo *node;
Imm imm;
JitReg edx_hreg = jit_reg_new(JIT_REG_KIND_I32, REG_EDX_IDX);
JitReg rdx_hreg = jit_reg_new(JIT_REG_KIND_I64, REG_RDX_IDX);
JitReg xmm0_f32_hreg = jit_reg_new(JIT_REG_KIND_F32, 0);
JitReg xmm0_f64_hreg = jit_reg_new(JIT_REG_KIND_F64, 0);
JitReg ret_reg = *(jit_insn_opnd(insn, 0));
JitReg func_reg = *(jit_insn_opnd(insn, 2));
JitReg src_reg;
/* Load return_jitted_addr from stack */
x86::Mem m(x86::rbp, cc->jitted_return_address_offset);
@ -5739,6 +5745,29 @@ lower_callbc(JitCompContext *cc, x86::Assembler &a, bh_list *jmp_info_list,
a.mov(regs_i64[REG_I64_FREE_IDX], imm);
a.mov(m, regs_i64[REG_I64_FREE_IDX]);
a.jmp(regs_i64[jit_reg_no(func_reg)]);
if (ret_reg) {
switch (jit_reg_kind(ret_reg)) {
case JIT_REG_KIND_I32:
src_reg = edx_hreg;
break;
case JIT_REG_KIND_I64:
src_reg = rdx_hreg;
break;
case JIT_REG_KIND_F32:
src_reg = xmm0_f32_hreg;
break;
case JIT_REG_KIND_F64:
src_reg = xmm0_f64_hreg;
break;
default:
bh_assert(0);
return false;
}
if (!lower_mov(cc, a, ret_reg, src_reg))
return false;
}
return true;
fail:
return false;
@ -5747,10 +5776,13 @@ fail:
static bool
lower_returnbc(JitCompContext *cc, x86::Assembler &a, JitInsn *insn)
{
JitReg ecx_hreg = jit_reg_new(JIT_REG_KIND_I32, REG_ECX_IDX);
JitReg rcx_hreg = jit_reg_new(JIT_REG_KIND_I64, REG_RCX_IDX);
JitReg edx_hreg = jit_reg_new(JIT_REG_KIND_I32, REG_EDX_IDX);
JitReg rdx_hreg = jit_reg_new(JIT_REG_KIND_I64, REG_RDX_IDX);
JitReg xmm0_f32_hreg = jit_reg_new(JIT_REG_KIND_F32, 0);
JitReg xmm0_f64_hreg = jit_reg_new(JIT_REG_KIND_F64, 0);
JitReg act_reg = *(jit_insn_opnd(insn, 0));
JitReg ret_reg = *(jit_insn_opnd(insn, 1));
JitReg dst_reg;
int32 act;
CHECK_CONST(act_reg);
@ -5759,25 +5791,25 @@ lower_returnbc(JitCompContext *cc, x86::Assembler &a, JitInsn *insn)
act = jit_cc_get_const_I32(cc, act_reg);
if (ret_reg) {
if (jit_reg_is_kind(I32, ret_reg)) {
if (!lower_mov(cc, a, ecx_hreg, ret_reg))
switch (jit_reg_kind(ret_reg)) {
case JIT_REG_KIND_I32:
dst_reg = edx_hreg;
break;
case JIT_REG_KIND_I64:
dst_reg = rdx_hreg;
break;
case JIT_REG_KIND_F32:
dst_reg = xmm0_f32_hreg;
break;
case JIT_REG_KIND_F64:
dst_reg = xmm0_f64_hreg;
break;
default:
bh_assert(0);
return false;
}
else if (jit_reg_is_kind(I64, ret_reg)) {
if (!lower_mov(cc, a, rcx_hreg, ret_reg))
return false;
}
else if (jit_reg_is_kind(F32, ret_reg)) {
/* TODO */
if (!lower_mov(cc, a, dst_reg, ret_reg))
return false;
}
else if (jit_reg_is_kind(F64, ret_reg)) {
/* TODO */
return false;
}
else {
return false;
}
}
{
@ -6669,7 +6701,12 @@ jit_codegen_init()
/* info->out.ret.ival[0, 1] = rcx */
{
x86::Mem m(x86::rsi, 8);
a.mov(m, x86::rcx);
a.mov(m, x86::rdx);
}
/* info->out.ret.fval[0, 1] = xmm0 */
{
x86::Mem m(x86::rsi, 16);
a.movsd(m, x86::xmm0);
}
/* pop callee-save registers */

View File

@ -315,7 +315,7 @@ fail:
static void
copy_block_arities(JitCompContext *cc, JitReg dst_frame_sp, uint8 *dst_types,
uint32 dst_type_count)
uint32 dst_type_count, JitReg *p_first_res_reg)
{
JitFrame *jit_frame;
uint32 offset_src, offset_dst, i;
@ -335,29 +335,41 @@ copy_block_arities(JitCompContext *cc, JitReg dst_frame_sp, uint8 *dst_types,
case VALUE_TYPE_FUNCREF:
#endif
value = gen_load_i32(jit_frame, offset_src);
GEN_INSN(STI32, value, dst_frame_sp,
NEW_CONST(I32, offset_dst * 4));
if (i == 0 && p_first_res_reg)
*p_first_res_reg = value;
else
GEN_INSN(STI32, value, dst_frame_sp,
NEW_CONST(I32, offset_dst * 4));
offset_src++;
offset_dst++;
break;
case VALUE_TYPE_I64:
value = gen_load_i64(jit_frame, offset_src);
GEN_INSN(STI64, value, dst_frame_sp,
NEW_CONST(I32, offset_dst * 4));
if (i == 0 && p_first_res_reg)
*p_first_res_reg = value;
else
GEN_INSN(STI64, value, dst_frame_sp,
NEW_CONST(I32, offset_dst * 4));
offset_src += 2;
offset_dst += 2;
break;
case VALUE_TYPE_F32:
value = gen_load_f32(jit_frame, offset_src);
GEN_INSN(STF32, value, dst_frame_sp,
NEW_CONST(I32, offset_dst * 4));
if (i == 0 && p_first_res_reg)
*p_first_res_reg = value;
else
GEN_INSN(STF32, value, dst_frame_sp,
NEW_CONST(I32, offset_dst * 4));
offset_src++;
offset_dst++;
break;
case VALUE_TYPE_F64:
value = gen_load_f64(jit_frame, offset_src);
GEN_INSN(STF64, value, dst_frame_sp,
NEW_CONST(I32, offset_dst * 4));
if (i == 0 && p_first_res_reg)
*p_first_res_reg = value;
else
GEN_INSN(STF64, value, dst_frame_sp,
NEW_CONST(I32, offset_dst * 4));
offset_src += 2;
offset_dst += 2;
break;
@ -372,6 +384,7 @@ static void
handle_func_return(JitCompContext *cc, JitBlock *block)
{
JitReg prev_frame, prev_frame_sp;
JitReg ret_reg = 0;
prev_frame = jit_cc_new_reg_ptr(cc);
prev_frame_sp = jit_cc_new_reg_ptr(cc);
@ -387,7 +400,7 @@ handle_func_return(JitCompContext *cc, JitBlock *block)
wasm_get_cell_num(block->result_types, block->result_count);
copy_block_arities(cc, prev_frame_sp, block->result_types,
block->result_count);
block->result_count, &ret_reg);
/* prev_frame->sp += cell_num */
GEN_INSN(ADD, prev_frame_sp, prev_frame_sp,
NEW_CONST(PTR, cell_num * 4));
@ -406,7 +419,7 @@ handle_func_return(JitCompContext *cc, JitBlock *block)
/* fp_reg = prev_frame */
GEN_INSN(MOV, cc->fp_reg, prev_frame);
/* return 0 */
GEN_INSN(RETURNBC, NEW_CONST(I32, JIT_INTERP_ACTION_NORMAL), 0, 0);
GEN_INSN(RETURNBC, NEW_CONST(I32, JIT_INTERP_ACTION_NORMAL), ret_reg, 0);
}
/**
@ -883,7 +896,7 @@ handle_op_br(JitCompContext *cc, uint32 br_depth, uint8 **p_frame_ip)
if (copy_arities) {
/* Dest block is Loop block, copy loop parameters */
copy_block_arities(cc, frame_sp_dst, block_dst->param_types,
block_dst->param_count);
block_dst->param_count, NULL);
}
clear_values(jit_frame);
@ -896,7 +909,7 @@ handle_op_br(JitCompContext *cc, uint32 br_depth, uint8 **p_frame_ip)
if (copy_arities) {
/* Dest block is Block/If/Function block, copy block results */
copy_block_arities(cc, frame_sp_dst, block_dst->result_types,
block_dst->result_count);
block_dst->result_count, NULL);
}
clear_values(jit_frame);

View File

@ -66,7 +66,7 @@ fail:
/* Push results */
static bool
post_return(JitCompContext *cc, const WASMType *func_type)
post_return(JitCompContext *cc, const WASMType *func_type, JitReg first_res)
{
uint32 i, n;
JitReg value;
@ -79,30 +79,54 @@ post_return(JitCompContext *cc, const WASMType *func_type)
case VALUE_TYPE_EXTERNREF:
case VALUE_TYPE_FUNCREF:
#endif
value = jit_cc_new_reg_I32(cc);
GEN_INSN(LDI32, value, cc->fp_reg,
NEW_CONST(I32, offset_of_local(n)));
if (i == 0 && first_res) {
bh_assert(jit_reg_kind(first_res) == JIT_REG_KIND_I32);
value = first_res;
}
else {
value = jit_cc_new_reg_I32(cc);
GEN_INSN(LDI32, value, cc->fp_reg,
NEW_CONST(I32, offset_of_local(n)));
}
PUSH_I32(value);
n++;
break;
case VALUE_TYPE_I64:
value = jit_cc_new_reg_I64(cc);
GEN_INSN(LDI64, value, cc->fp_reg,
NEW_CONST(I32, offset_of_local(n)));
if (i == 0 && first_res) {
bh_assert(jit_reg_kind(first_res) == JIT_REG_KIND_I64);
value = first_res;
}
else {
value = jit_cc_new_reg_I64(cc);
GEN_INSN(LDI64, value, cc->fp_reg,
NEW_CONST(I32, offset_of_local(n)));
}
PUSH_I64(value);
n += 2;
break;
case VALUE_TYPE_F32:
value = jit_cc_new_reg_F32(cc);
GEN_INSN(LDF32, value, cc->fp_reg,
NEW_CONST(I32, offset_of_local(n)));
if (i == 0 && first_res) {
bh_assert(jit_reg_kind(first_res) == JIT_REG_KIND_F32);
value = first_res;
}
else {
value = jit_cc_new_reg_F32(cc);
GEN_INSN(LDF32, value, cc->fp_reg,
NEW_CONST(I32, offset_of_local(n)));
}
PUSH_F32(value);
n++;
break;
case VALUE_TYPE_F64:
value = jit_cc_new_reg_F64(cc);
GEN_INSN(LDF64, value, cc->fp_reg,
NEW_CONST(I32, offset_of_local(n)));
if (i == 0 && first_res) {
bh_assert(jit_reg_kind(first_res) == JIT_REG_KIND_F64);
value = first_res;
}
else {
value = jit_cc_new_reg_F64(cc);
GEN_INSN(LDF64, value, cc->fp_reg,
NEW_CONST(I32, offset_of_local(n)));
}
PUSH_F64(value);
n += 2;
break;
@ -176,6 +200,10 @@ jit_compile_op_call(JitCompContext *cc, uint32 func_idx, bool tail_call)
cc->cmp_reg, NULL)) {
return false;
}
if (!post_return(cc, func_type, 0)) {
goto fail;
}
}
else {
JitReg res = 0;
@ -187,44 +215,28 @@ jit_compile_op_call(JitCompContext *cc, uint32 func_idx, bool tail_call)
case VALUE_TYPE_EXTERNREF:
case VALUE_TYPE_FUNCREF:
#endif
#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
res = jit_codegen_get_hreg_by_name("eax");
#else
res = jit_cc_new_reg_I32(cc);
#endif
break;
case VALUE_TYPE_I64:
#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
res = jit_codegen_get_hreg_by_name("rax");
#else
res = jit_cc_new_reg_I64(cc);
#endif
break;
case VALUE_TYPE_F32:
#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
res = jit_codegen_get_hreg_by_name("xmm0");
#else
res = jit_cc_new_reg_F32(cc);
#endif
break;
case VALUE_TYPE_F64:
#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
res = jit_codegen_get_hreg_by_name("xmm0_f64");
#else
res = jit_cc_new_reg_F64(cc);
#endif
break;
default:
bh_assert(0);
break;
goto fail;
}
}
GEN_INSN(CALLBC, res, 0, jitted_code);
}
if (!post_return(cc, func_type)) {
goto fail;
if (!post_return(cc, func_type, res)) {
goto fail;
}
}
/* Clear part of memory regs and table regs as their values

View File

@ -3845,11 +3845,49 @@ wasm_interp_call_wasm(WASMModuleInstance *module_inst, WASMExecEnv *exec_env,
#else
JitGlobals *jit_globals = jit_compiler_get_jit_globals();
JitInterpSwitchInfo info;
WASMType *func_type = function->u.func->func_type;
uint8 type = func_type->result_count
? func_type->types[func_type->param_count]
: VALUE_TYPE_VOID;
#if WASM_ENABLE_REF_TYPES != 0
if (type == VALUE_TYPE_EXTERNREF || type == VALUE_TYPE_FUNCREF)
type = VALUE_TYPE_I32;
#endif
info.out.ret.last_return_type = type;
info.frame = frame;
frame->jitted_return_addr =
(uint8 *)jit_globals->return_to_interp_from_jitted;
jit_interp_switch_to_jitted(exec_env, &info,
function->u.func->fast_jit_jitted_code);
if (func_type->result_count) {
switch (type) {
case VALUE_TYPE_I32:
*(frame->sp - function->ret_cell_num) =
info.out.ret.ival[0];
break;
case VALUE_TYPE_I64:
*(frame->sp - function->ret_cell_num) =
info.out.ret.ival[0];
*(frame->sp - function->ret_cell_num + 1) =
info.out.ret.ival[1];
break;
case VALUE_TYPE_F32:
*(frame->sp - function->ret_cell_num) =
info.out.ret.fval[0];
break;
case VALUE_TYPE_F64:
*(frame->sp - function->ret_cell_num) =
info.out.ret.fval[0];
*(frame->sp - function->ret_cell_num + 1) =
info.out.ret.fval[1];
break;
default:
bh_assert(0);
break;
}
}
(void)wasm_interp_call_func_bytecode;
#endif
}