Fix fast jit issues (#1208)

Move jit spill cache to the end of interp frame to reduce footprint
Fix codegen compare float issue: should not overwritten the source registers
Fix float to int conversion check integer overflow issue
Unify the float compare
Fix get_global issue
This commit is contained in:
Wenyong Huang 2022-06-07 15:49:52 +08:00 committed by GitHub
parent 5e9f08fb68
commit ab2e959616
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 114 additions and 120 deletions

View File

@ -102,12 +102,6 @@
#define WASM_ENABLE_FAST_JIT_DUMP 0
#endif
#ifndef FAST_JIT_SPILL_CACHE_SIZE
/* The size of fast jit spill cache in cell num, one cell num
occpuies 4 bytes */
#define FAST_JIT_SPILL_CACHE_SIZE 32
#endif
#ifndef WASM_ENABLE_WAMR_COMPILER
#define WASM_ENABLE_WAMR_COMPILER 0
#endif

View File

@ -82,6 +82,9 @@ x86::Gp regs_i64[] = {
x86::r12, x86::r13, x86::r14, x86::r15,
};
#define REG_F32_FREE_IDX 15
#define REG_F64_FREE_IDX 15
x86::Xmm regs_float[] = {
x86::xmm0,
x86::xmm1,
@ -349,6 +352,8 @@ cmp_r_and_jmp_label(JitCompContext *cc, x86::Assembler &a,
bool fp_cmp = cc->last_cmp_on_fp;
bh_assert(!fp_cmp || (fp_cmp && (op == GES)));
switch (op) {
case EQ:
{
@ -362,60 +367,52 @@ cmp_r_and_jmp_label(JitCompContext *cc, x86::Assembler &a,
}
case GTS:
{
if (fp_cmp) {
a.ja(imm);
}
else {
a.jg(imm);
}
a.jg(imm);
break;
}
case LES:
{
if (fp_cmp) {
a.jnb(imm);
}
else {
a.jng(imm);
}
a.jng(imm);
break;
}
case GES:
{
if (fp_cmp) {
a.jnb(imm);
}
else {
if (fp_cmp)
a.jae(imm);
else
a.jnl(imm);
}
break;
}
case LTS:
{
if (fp_cmp) {
a.ja(imm);
}
else {
a.jl(imm);
}
a.jl(imm);
break;
}
case GTU:
{
a.ja(imm);
break;
}
case LEU:
{
a.jna(imm);
break;
}
case GEU:
{
a.jnb(imm);
break;
}
case LTU:
{
a.jb(imm);
break;
}
default:
{
bh_assert(0);
break;
}
}
if (r2) {
@ -761,10 +758,17 @@ static bool
mov_imm_to_m(x86::Assembler &a, x86::Mem &m_dst, Imm imm_src, uint32 bytes_dst)
{
if (bytes_dst == 8) {
/* As there is no instruction `MOV m64, imm64`, we use
two instructions to implement it */
a.mov(regs_i64[REG_I64_FREE_IDX], imm_src);
a.mov(m_dst, regs_i64[REG_I64_FREE_IDX]);
int64 value = imm_src.value();
if (value >= INT32_MIN && value <= INT32_MAX) {
imm_src.setValue((int32)value);
a.mov(m_dst, imm_src);
}
else {
/* There is no instruction `MOV m64, imm64`, we use
two instructions to implement it */
a.mov(regs_i64[REG_I64_FREE_IDX], imm_src);
a.mov(m_dst, regs_i64[REG_I64_FREE_IDX]);
}
}
else
a.mov(m_dst, imm_src);
@ -4220,17 +4224,8 @@ static bool
cmp_imm_r_to_r_f32(x86::Assembler &a, int32 reg_no_dst, float data1_src,
int32 reg_no2_src)
{
const JitHardRegInfo *hreg_info = jit_codegen_get_hreg_info();
/* xmm -> m128 */
x86::Mem cache = x86::xmmword_ptr(regs_i64[hreg_info->exec_env_hreg_index],
offsetof(WASMExecEnv, jit_cache));
a.movups(cache, regs_float[reg_no2_src]);
/* imm -> gp -> xmm */
mov_imm_to_r_f32(a, reg_no2_src, data1_src);
/* comiss xmm m32 */
a.comiss(regs_float[reg_no2_src], cache);
mov_imm_to_r_f32(a, REG_F32_FREE_IDX, data1_src);
a.comiss(regs_float[REG_F32_FREE_IDX], regs_float[reg_no2_src]);
return true;
}
@ -4249,15 +4244,8 @@ static bool
cmp_r_imm_to_r_f32(x86::Assembler &a, int32 reg_no_dst, int32 reg_no1_src,
float data2_src)
{
const JitHardRegInfo *hreg_info = jit_codegen_get_hreg_info();
/* imm -> m32 */
x86::Mem cache = x86::dword_ptr(regs_i64[hreg_info->exec_env_hreg_index],
offsetof(WASMExecEnv, jit_cache));
Imm imm(*(uint32 *)&data2_src);
mov_imm_to_m(a, cache, imm, 4);
/* comiss xmm m32 */
a.comiss(regs_float[reg_no1_src], cache);
mov_imm_to_r_f32(a, REG_F32_FREE_IDX, data2_src);
a.comiss(regs_float[reg_no1_src], regs_float[REG_F32_FREE_IDX]);
return true;
}
@ -4315,17 +4303,8 @@ static bool
cmp_imm_r_to_r_f64(x86::Assembler &a, int32 reg_no_dst, double data1_src,
int32 reg_no2_src)
{
const JitHardRegInfo *hreg_info = jit_codegen_get_hreg_info();
/* xmm -> m128 */
x86::Mem cache = x86::qword_ptr(regs_i64[hreg_info->exec_env_hreg_index],
offsetof(WASMExecEnv, jit_cache));
a.movupd(cache, regs_float[reg_no2_src]);
/* imm -> gp -> xmm */
mov_imm_to_r_f64(a, reg_no2_src, data1_src);
/* comiss xmm m64 */
a.comisd(regs_float[reg_no2_src], cache);
mov_imm_to_r_f64(a, REG_F64_FREE_IDX, data1_src);
a.comisd(regs_float[REG_F64_FREE_IDX], regs_float[reg_no2_src]);
return true;
}
@ -4344,15 +4323,8 @@ static bool
cmp_r_imm_to_r_f64(x86::Assembler &a, int32 reg_no_dst, int32 reg_no1_src,
double data2_src)
{
const JitHardRegInfo *hreg_info = jit_codegen_get_hreg_info();
/* imm -> m64 */
x86::Mem cache = x86::qword_ptr(regs_i64[hreg_info->exec_env_hreg_index],
offsetof(WASMExecEnv, jit_cache));
Imm imm(*(uint64 *)&data2_src);
mov_imm_to_m(a, cache, imm, 8);
/* comisd xmm m64 */
a.comisd(regs_float[reg_no1_src], cache);
mov_imm_to_r_f64(a, REG_F64_FREE_IDX, data2_src);
a.comisd(regs_float[reg_no1_src], regs_float[REG_F64_FREE_IDX]);
return true;
}
@ -5071,13 +5043,19 @@ cmp_r_and_jmp_relative(JitCompContext *cc, x86::Assembler &a, int32 reg_no,
+ a.code()->sectionById(0)->buffer().size();
bool fp_cmp = cc->last_cmp_on_fp;
bh_assert(!fp_cmp || (fp_cmp && (op == GTS || op == GES)));
switch (op) {
case EQ:
{
a.je(target);
break;
}
case NE:
{
a.jne(target);
break;
}
case GTS:
{
if (fp_cmp) {
@ -5090,18 +5068,13 @@ cmp_r_and_jmp_relative(JitCompContext *cc, x86::Assembler &a, int32 reg_no,
}
case LES:
{
if (fp_cmp) {
a.jnb(target);
}
else {
a.jng(target);
}
a.jng(target);
break;
}
case GES:
{
if (fp_cmp) {
a.jnb(target);
a.jae(target);
}
else {
a.jnl(target);
@ -5110,29 +5083,34 @@ cmp_r_and_jmp_relative(JitCompContext *cc, x86::Assembler &a, int32 reg_no,
}
case LTS:
{
if (fp_cmp) {
a.ja(target);
}
else {
a.jl(target);
}
a.jl(target);
break;
}
case GTU:
{
a.ja(target);
break;
}
case LEU:
{
a.jna(target);
break;
}
case GEU:
{
a.jae(target);
break;
}
case LTU:
{
a.jb(target);
break;
}
default:
{
bh_assert(0);
break;
}
}
/* The offset written by asmjit is always 0, we patch it again */
@ -5174,10 +5152,13 @@ lower_select(JitCompContext *cc, x86::Assembler &a, COND_OP op, JitReg r0,
CHECK_NCONST(r1);
CHECK_KIND(r1, JIT_REG_KIND_I32);
if (r0 == r3 && r0 != r2) {
if (r0 == r3 && r0 != r2 && !cc->last_cmp_on_fp) {
JitReg r_tmp;
/* Exchange r2, r3*/
/* For i32/i64, exchange r2 and r3 to make r0 equal to r2,
so as to decrease possible execution instructions.
For f32/f64 comparison, should not change the order as
the result of comparison with NaN may be different. */
r_tmp = r2;
r2 = r3;
r3 = r_tmp;
@ -5258,7 +5239,8 @@ lower_branch(JitCompContext *cc, x86::Assembler &a, bh_list *jmp_info_list,
label_dst = jit_reg_no(r1);
if (label_dst < (int32)jit_cc_label_num(cc) - 1 && is_last_insn
&& label_is_neighboring(cc, label_src, label_dst)) {
&& label_is_neighboring(cc, label_src, label_dst)
&& !cc->last_cmp_on_fp) {
JitReg r_tmp;
r_tmp = r1;
@ -6555,20 +6537,20 @@ static uint8 hreg_info_F32[3][16] = {
{ 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1 },
{ 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1 }, /* caller_saved_native */
1, 1, 1, 1, 1, 1, 1, 0 }, /* caller_saved_native */
{ 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1 }, /* caller_saved_jitted */
1, 1, 1, 1, 1, 1, 1, 0 }, /* caller_saved_jitted */
};
/* System V AMD64 ABI Calling Conversion. [XYZ]MM0-7 */
static uint8 hreg_info_F64[3][16] = {
/* xmm0 ~ xmm15 */
{ 1, 1, 1, 1, 1, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0 },
0, 0, 0, 0, 0, 0, 0, 1 },
{ 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1 }, /* caller_saved_native */
1, 1, 1, 1, 1, 1, 1, 0 }, /* caller_saved_native */
{ 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1 }, /* caller_saved_jitted */
1, 1, 1, 1, 1, 1, 1, 0 }, /* caller_saved_jitted */
};
static const JitHardRegInfo hreg_info = {

View File

@ -187,7 +187,7 @@ jit_compile_op_compare_float_point(JitCompContext *cc, FloatCond cond,
case FLOAT_LT:
{
GEN_INSN(CMP, cc->cmp_reg, rhs, lhs);
GEN_INSN(SELECTLTS, res, cc->cmp_reg, const_one, const_zero);
GEN_INSN(SELECTGTS, res, cc->cmp_reg, const_one, const_zero);
break;
}
case FLOAT_GT:
@ -199,7 +199,7 @@ jit_compile_op_compare_float_point(JitCompContext *cc, FloatCond cond,
case FLOAT_LE:
{
GEN_INSN(CMP, cc->cmp_reg, rhs, lhs);
GEN_INSN(SELECTLES, res, cc->cmp_reg, const_one, const_zero);
GEN_INSN(SELECTGES, res, cc->cmp_reg, const_one, const_zero);
break;
}
case FLOAT_GE:

View File

@ -63,8 +63,8 @@ jit_compile_op_i32_trunc_f32(JitCompContext *cc, bool sign, bool saturating)
}
/* If value is out of integer range, throw exception */
GEN_INSN(CMP, cc->cmp_reg, value, min_valid_float);
if (!jit_emit_exception(cc, EXCE_INTEGER_OVERFLOW, JIT_OP_BLES,
GEN_INSN(CMP, cc->cmp_reg, min_valid_float, value);
if (!jit_emit_exception(cc, EXCE_INTEGER_OVERFLOW, JIT_OP_BGES,
cc->cmp_reg, NULL)) {
goto fail;
}
@ -123,8 +123,8 @@ jit_compile_op_i32_trunc_f64(JitCompContext *cc, bool sign, bool saturating)
}
/* If value is out of integer range, throw exception */
GEN_INSN(CMP, cc->cmp_reg, value, min_valid_double);
if (!jit_emit_exception(cc, EXCE_INTEGER_OVERFLOW, JIT_OP_BLES,
GEN_INSN(CMP, cc->cmp_reg, min_valid_double, value);
if (!jit_emit_exception(cc, EXCE_INTEGER_OVERFLOW, JIT_OP_BGES,
cc->cmp_reg, NULL)) {
goto fail;
}

View File

@ -165,7 +165,16 @@ fail:
static uint8
get_global_type(const WASMModule *module, uint32 global_idx)
{
return module->globals[global_idx].type;
if (global_idx < module->import_global_count) {
const WASMGlobalImport *import_global =
&((module->import_globals + global_idx)->u.global);
return import_global->type;
}
else {
const WASMGlobal *global =
module->globals + (global_idx - module->import_global_count);
return global->type;
}
}
static uint32
@ -177,7 +186,8 @@ get_global_data_offset(const WASMModule *module, uint32 global_idx)
return import_global->data_offset;
}
else {
const WASMGlobal *global = module->globals + global_idx;
const WASMGlobal *global =
module->globals + (global_idx - module->import_global_count);
return global->data_offset;
}
}

View File

@ -332,6 +332,12 @@ jit_pass_dump(JitCompContext *cc)
const char *pass_name =
pass_no > 0 ? jit_compiler_get_pass_name(passes[pass_no - 1]) : "NULL";
#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
if (!strcmp(pass_name, "lower_cg"))
/* Ignore lower codegen pass as it does nothing in x86-64 */
return true;
#endif
os_printf("JIT.COMPILER.DUMP: PASS_NO=%d PREV_PASS=%s\n\n", pass_no,
pass_name);
jit_dump_cc(cc);

View File

@ -755,9 +755,11 @@ init_func_translation(JitCompContext *cc)
cc->jit_frame = jit_frame;
cc->cur_basic_block = jit_cc_entry_basic_block(cc);
cc->total_frame_size = wasm_interp_interp_frame_size(total_cell_num);
cc->spill_cache_offset = (uint32)offsetof(WASMInterpFrame, spill_cache);
cc->spill_cache_size = (uint32)sizeof(uint32) * FAST_JIT_SPILL_CACHE_SIZE;
cc->spill_cache_offset = wasm_interp_interp_frame_size(total_cell_num);
/* Set spill cache size according to max local cell num, max stack cell
num and virtual fixed register num */
cc->spill_cache_size = (max_locals + max_stacks) * 4 + sizeof(void *) * 4;
cc->total_frame_size = cc->spill_cache_offset + cc->spill_cache_size;
cc->jitted_return_address_offset =
offsetof(WASMInterpFrame, jitted_return_addr);
cc->cur_basic_block = jit_cc_entry_basic_block(cc);
@ -838,7 +840,7 @@ free_block_memory(JitBlock *block)
jit_free(block);
}
static JitBlock *
static JitBasicBlock *
create_func_block(JitCompContext *cc)
{
JitBlock *jit_block;
@ -883,7 +885,7 @@ create_func_block(JitCompContext *cc)
jit_block_stack_push(&cc->block_stack, jit_block);
cc->cur_basic_block = jit_block->basic_block_entry;
return jit_block;
return jit_block->basic_block_entry;
fail:
free_block_memory(jit_block);
@ -2086,13 +2088,13 @@ JitBasicBlock *
jit_frontend_translate_func(JitCompContext *cc)
{
JitFrame *jit_frame;
JitBlock *jit_block;
JitBasicBlock *basic_block_entry;
if (!(jit_frame = init_func_translation(cc))) {
return NULL;
}
if (!(jit_block = create_func_block(cc))) {
if (!(basic_block_entry = create_func_block(cc))) {
return NULL;
}
@ -2100,7 +2102,7 @@ jit_frontend_translate_func(JitCompContext *cc)
return NULL;
}
return jit_block->basic_block_entry;
return basic_block_entry;
}
#if 0

View File

@ -28,7 +28,6 @@ typedef struct WASMInterpFrame {
#if WASM_ENABLE_FAST_JIT != 0
uint8 *jitted_return_addr;
uint32 spill_cache[FAST_JIT_SPILL_CACHE_SIZE];
#endif
#if WASM_ENABLE_PERF_PROFILING != 0
@ -52,12 +51,13 @@ typedef struct WASMInterpFrame {
WASMBranchBlock *csp_boundary;
WASMBranchBlock *csp;
/* Frame data, the layout is:
lp: param_cell_count + local_cell_count
sp_bottom to sp_boundary: stack of data
csp_bottom to csp_boundary: stack of block
ref to frame end: data types of local vairables and stack data
*/
/**
* Frame data, the layout is:
* lp: parameters and local variables
* sp_bottom to sp_boundary: wasm operand stack
* csp_bottom to csp_boundary: wasm label stack
* jit spill cache: only available for fast jit
*/
uint32 lp[1];
#endif
} WASMInterpFrame;

View File

@ -1509,8 +1509,8 @@ wasm_instantiate(WASMModule *module, bool is_sub_inst, uint32 stack_size,
if (stack_size == 0)
stack_size = DEFAULT_WASM_STACK_SIZE;
#if WASM_ENABLE_SPEC_TEST != 0
if (stack_size < 100 * 1024)
stack_size = 100 * 1024;
if (stack_size < 64 * 1024)
stack_size = 64 * 1024;
#endif
module_inst->default_wasm_stack_size = stack_size;