diff --git a/core/config.h b/core/config.h index ac11affdb..98ff51cb2 100644 --- a/core/config.h +++ b/core/config.h @@ -102,12 +102,6 @@ #define WASM_ENABLE_FAST_JIT_DUMP 0 #endif -#ifndef FAST_JIT_SPILL_CACHE_SIZE -/* The size of fast jit spill cache in cell num, one cell num - occpuies 4 bytes */ -#define FAST_JIT_SPILL_CACHE_SIZE 32 -#endif - #ifndef WASM_ENABLE_WAMR_COMPILER #define WASM_ENABLE_WAMR_COMPILER 0 #endif diff --git a/core/iwasm/fast-jit/cg/x86-64/jit_codegen_x86_64.cpp b/core/iwasm/fast-jit/cg/x86-64/jit_codegen_x86_64.cpp index 0e257d22f..f822fb099 100644 --- a/core/iwasm/fast-jit/cg/x86-64/jit_codegen_x86_64.cpp +++ b/core/iwasm/fast-jit/cg/x86-64/jit_codegen_x86_64.cpp @@ -82,6 +82,9 @@ x86::Gp regs_i64[] = { x86::r12, x86::r13, x86::r14, x86::r15, }; +#define REG_F32_FREE_IDX 15 +#define REG_F64_FREE_IDX 15 + x86::Xmm regs_float[] = { x86::xmm0, x86::xmm1, @@ -349,6 +352,8 @@ cmp_r_and_jmp_label(JitCompContext *cc, x86::Assembler &a, bool fp_cmp = cc->last_cmp_on_fp; + bh_assert(!fp_cmp || (fp_cmp && (op == GES))); + switch (op) { case EQ: { @@ -362,60 +367,52 @@ cmp_r_and_jmp_label(JitCompContext *cc, x86::Assembler &a, } case GTS: { - if (fp_cmp) { - a.ja(imm); - } - else { - a.jg(imm); - } + a.jg(imm); break; } case LES: { - if (fp_cmp) { - a.jnb(imm); - } - else { - a.jng(imm); - } + a.jng(imm); break; } case GES: { - if (fp_cmp) { - a.jnb(imm); - } - else { - + if (fp_cmp) + a.jae(imm); + else a.jnl(imm); - } break; } case LTS: { - if (fp_cmp) { - a.ja(imm); - } - else { - a.jl(imm); - } + a.jl(imm); break; } case GTU: + { a.ja(imm); break; + } case LEU: + { a.jna(imm); break; + } case GEU: + { a.jnb(imm); break; + } case LTU: + { a.jb(imm); break; + } default: + { bh_assert(0); break; + } } if (r2) { @@ -761,10 +758,17 @@ static bool mov_imm_to_m(x86::Assembler &a, x86::Mem &m_dst, Imm imm_src, uint32 bytes_dst) { if (bytes_dst == 8) { - /* As there is no instruction `MOV m64, imm64`, we use - two instructions to implement it */ - a.mov(regs_i64[REG_I64_FREE_IDX], imm_src); - a.mov(m_dst, regs_i64[REG_I64_FREE_IDX]); + int64 value = imm_src.value(); + if (value >= INT32_MIN && value <= INT32_MAX) { + imm_src.setValue((int32)value); + a.mov(m_dst, imm_src); + } + else { + /* There is no instruction `MOV m64, imm64`, we use + two instructions to implement it */ + a.mov(regs_i64[REG_I64_FREE_IDX], imm_src); + a.mov(m_dst, regs_i64[REG_I64_FREE_IDX]); + } } else a.mov(m_dst, imm_src); @@ -4220,17 +4224,8 @@ static bool cmp_imm_r_to_r_f32(x86::Assembler &a, int32 reg_no_dst, float data1_src, int32 reg_no2_src) { - const JitHardRegInfo *hreg_info = jit_codegen_get_hreg_info(); - /* xmm -> m128 */ - x86::Mem cache = x86::xmmword_ptr(regs_i64[hreg_info->exec_env_hreg_index], - offsetof(WASMExecEnv, jit_cache)); - a.movups(cache, regs_float[reg_no2_src]); - - /* imm -> gp -> xmm */ - mov_imm_to_r_f32(a, reg_no2_src, data1_src); - - /* comiss xmm m32 */ - a.comiss(regs_float[reg_no2_src], cache); + mov_imm_to_r_f32(a, REG_F32_FREE_IDX, data1_src); + a.comiss(regs_float[REG_F32_FREE_IDX], regs_float[reg_no2_src]); return true; } @@ -4249,15 +4244,8 @@ static bool cmp_r_imm_to_r_f32(x86::Assembler &a, int32 reg_no_dst, int32 reg_no1_src, float data2_src) { - const JitHardRegInfo *hreg_info = jit_codegen_get_hreg_info(); - /* imm -> m32 */ - x86::Mem cache = x86::dword_ptr(regs_i64[hreg_info->exec_env_hreg_index], - offsetof(WASMExecEnv, jit_cache)); - Imm imm(*(uint32 *)&data2_src); - mov_imm_to_m(a, cache, imm, 4); - - /* comiss xmm m32 */ - a.comiss(regs_float[reg_no1_src], cache); + mov_imm_to_r_f32(a, REG_F32_FREE_IDX, data2_src); + a.comiss(regs_float[reg_no1_src], regs_float[REG_F32_FREE_IDX]); return true; } @@ -4315,17 +4303,8 @@ static bool cmp_imm_r_to_r_f64(x86::Assembler &a, int32 reg_no_dst, double data1_src, int32 reg_no2_src) { - const JitHardRegInfo *hreg_info = jit_codegen_get_hreg_info(); - /* xmm -> m128 */ - x86::Mem cache = x86::qword_ptr(regs_i64[hreg_info->exec_env_hreg_index], - offsetof(WASMExecEnv, jit_cache)); - a.movupd(cache, regs_float[reg_no2_src]); - - /* imm -> gp -> xmm */ - mov_imm_to_r_f64(a, reg_no2_src, data1_src); - - /* comiss xmm m64 */ - a.comisd(regs_float[reg_no2_src], cache); + mov_imm_to_r_f64(a, REG_F64_FREE_IDX, data1_src); + a.comisd(regs_float[REG_F64_FREE_IDX], regs_float[reg_no2_src]); return true; } @@ -4344,15 +4323,8 @@ static bool cmp_r_imm_to_r_f64(x86::Assembler &a, int32 reg_no_dst, int32 reg_no1_src, double data2_src) { - const JitHardRegInfo *hreg_info = jit_codegen_get_hreg_info(); - /* imm -> m64 */ - x86::Mem cache = x86::qword_ptr(regs_i64[hreg_info->exec_env_hreg_index], - offsetof(WASMExecEnv, jit_cache)); - Imm imm(*(uint64 *)&data2_src); - mov_imm_to_m(a, cache, imm, 8); - - /* comisd xmm m64 */ - a.comisd(regs_float[reg_no1_src], cache); + mov_imm_to_r_f64(a, REG_F64_FREE_IDX, data2_src); + a.comisd(regs_float[reg_no1_src], regs_float[REG_F64_FREE_IDX]); return true; } @@ -5071,13 +5043,19 @@ cmp_r_and_jmp_relative(JitCompContext *cc, x86::Assembler &a, int32 reg_no, + a.code()->sectionById(0)->buffer().size(); bool fp_cmp = cc->last_cmp_on_fp; + bh_assert(!fp_cmp || (fp_cmp && (op == GTS || op == GES))); + switch (op) { case EQ: + { a.je(target); break; + } case NE: + { a.jne(target); break; + } case GTS: { if (fp_cmp) { @@ -5090,18 +5068,13 @@ cmp_r_and_jmp_relative(JitCompContext *cc, x86::Assembler &a, int32 reg_no, } case LES: { - if (fp_cmp) { - a.jnb(target); - } - else { - a.jng(target); - } + a.jng(target); break; } case GES: { if (fp_cmp) { - a.jnb(target); + a.jae(target); } else { a.jnl(target); @@ -5110,29 +5083,34 @@ cmp_r_and_jmp_relative(JitCompContext *cc, x86::Assembler &a, int32 reg_no, } case LTS: { - if (fp_cmp) { - a.ja(target); - } - else { - a.jl(target); - } + a.jl(target); break; } case GTU: + { a.ja(target); break; + } case LEU: + { a.jna(target); break; + } case GEU: + { a.jae(target); break; + } case LTU: + { a.jb(target); break; + } default: + { bh_assert(0); break; + } } /* The offset written by asmjit is always 0, we patch it again */ @@ -5174,10 +5152,13 @@ lower_select(JitCompContext *cc, x86::Assembler &a, COND_OP op, JitReg r0, CHECK_NCONST(r1); CHECK_KIND(r1, JIT_REG_KIND_I32); - if (r0 == r3 && r0 != r2) { + if (r0 == r3 && r0 != r2 && !cc->last_cmp_on_fp) { JitReg r_tmp; - /* Exchange r2, r3*/ + /* For i32/i64, exchange r2 and r3 to make r0 equal to r2, + so as to decrease possible execution instructions. + For f32/f64 comparison, should not change the order as + the result of comparison with NaN may be different. */ r_tmp = r2; r2 = r3; r3 = r_tmp; @@ -5258,7 +5239,8 @@ lower_branch(JitCompContext *cc, x86::Assembler &a, bh_list *jmp_info_list, label_dst = jit_reg_no(r1); if (label_dst < (int32)jit_cc_label_num(cc) - 1 && is_last_insn - && label_is_neighboring(cc, label_src, label_dst)) { + && label_is_neighboring(cc, label_src, label_dst) + && !cc->last_cmp_on_fp) { JitReg r_tmp; r_tmp = r1; @@ -6555,20 +6537,20 @@ static uint8 hreg_info_F32[3][16] = { { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 }, { 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1 }, /* caller_saved_native */ + 1, 1, 1, 1, 1, 1, 1, 0 }, /* caller_saved_native */ { 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1 }, /* caller_saved_jitted */ + 1, 1, 1, 1, 1, 1, 1, 0 }, /* caller_saved_jitted */ }; /* System V AMD64 ABI Calling Conversion. [XYZ]MM0-7 */ static uint8 hreg_info_F64[3][16] = { /* xmm0 ~ xmm15 */ { 1, 1, 1, 1, 1, 1, 1, 1, - 0, 0, 0, 0, 0, 0, 0, 0 }, + 0, 0, 0, 0, 0, 0, 0, 1 }, { 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1 }, /* caller_saved_native */ + 1, 1, 1, 1, 1, 1, 1, 0 }, /* caller_saved_native */ { 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1 }, /* caller_saved_jitted */ + 1, 1, 1, 1, 1, 1, 1, 0 }, /* caller_saved_jitted */ }; static const JitHardRegInfo hreg_info = { diff --git a/core/iwasm/fast-jit/fe/jit_emit_compare.c b/core/iwasm/fast-jit/fe/jit_emit_compare.c index d86bd0485..002146943 100644 --- a/core/iwasm/fast-jit/fe/jit_emit_compare.c +++ b/core/iwasm/fast-jit/fe/jit_emit_compare.c @@ -187,7 +187,7 @@ jit_compile_op_compare_float_point(JitCompContext *cc, FloatCond cond, case FLOAT_LT: { GEN_INSN(CMP, cc->cmp_reg, rhs, lhs); - GEN_INSN(SELECTLTS, res, cc->cmp_reg, const_one, const_zero); + GEN_INSN(SELECTGTS, res, cc->cmp_reg, const_one, const_zero); break; } case FLOAT_GT: @@ -199,7 +199,7 @@ jit_compile_op_compare_float_point(JitCompContext *cc, FloatCond cond, case FLOAT_LE: { GEN_INSN(CMP, cc->cmp_reg, rhs, lhs); - GEN_INSN(SELECTLES, res, cc->cmp_reg, const_one, const_zero); + GEN_INSN(SELECTGES, res, cc->cmp_reg, const_one, const_zero); break; } case FLOAT_GE: diff --git a/core/iwasm/fast-jit/fe/jit_emit_conversion.c b/core/iwasm/fast-jit/fe/jit_emit_conversion.c index 5df76dc17..eada3c302 100644 --- a/core/iwasm/fast-jit/fe/jit_emit_conversion.c +++ b/core/iwasm/fast-jit/fe/jit_emit_conversion.c @@ -63,8 +63,8 @@ jit_compile_op_i32_trunc_f32(JitCompContext *cc, bool sign, bool saturating) } /* If value is out of integer range, throw exception */ - GEN_INSN(CMP, cc->cmp_reg, value, min_valid_float); - if (!jit_emit_exception(cc, EXCE_INTEGER_OVERFLOW, JIT_OP_BLES, + GEN_INSN(CMP, cc->cmp_reg, min_valid_float, value); + if (!jit_emit_exception(cc, EXCE_INTEGER_OVERFLOW, JIT_OP_BGES, cc->cmp_reg, NULL)) { goto fail; } @@ -123,8 +123,8 @@ jit_compile_op_i32_trunc_f64(JitCompContext *cc, bool sign, bool saturating) } /* If value is out of integer range, throw exception */ - GEN_INSN(CMP, cc->cmp_reg, value, min_valid_double); - if (!jit_emit_exception(cc, EXCE_INTEGER_OVERFLOW, JIT_OP_BLES, + GEN_INSN(CMP, cc->cmp_reg, min_valid_double, value); + if (!jit_emit_exception(cc, EXCE_INTEGER_OVERFLOW, JIT_OP_BGES, cc->cmp_reg, NULL)) { goto fail; } diff --git a/core/iwasm/fast-jit/fe/jit_emit_variable.c b/core/iwasm/fast-jit/fe/jit_emit_variable.c index 32b8dfca2..ba039d387 100644 --- a/core/iwasm/fast-jit/fe/jit_emit_variable.c +++ b/core/iwasm/fast-jit/fe/jit_emit_variable.c @@ -165,7 +165,16 @@ fail: static uint8 get_global_type(const WASMModule *module, uint32 global_idx) { - return module->globals[global_idx].type; + if (global_idx < module->import_global_count) { + const WASMGlobalImport *import_global = + &((module->import_globals + global_idx)->u.global); + return import_global->type; + } + else { + const WASMGlobal *global = + module->globals + (global_idx - module->import_global_count); + return global->type; + } } static uint32 @@ -177,7 +186,8 @@ get_global_data_offset(const WASMModule *module, uint32 global_idx) return import_global->data_offset; } else { - const WASMGlobal *global = module->globals + global_idx; + const WASMGlobal *global = + module->globals + (global_idx - module->import_global_count); return global->data_offset; } } diff --git a/core/iwasm/fast-jit/jit_dump.c b/core/iwasm/fast-jit/jit_dump.c index 8a477d144..4cbfcbb0c 100644 --- a/core/iwasm/fast-jit/jit_dump.c +++ b/core/iwasm/fast-jit/jit_dump.c @@ -332,6 +332,12 @@ jit_pass_dump(JitCompContext *cc) const char *pass_name = pass_no > 0 ? jit_compiler_get_pass_name(passes[pass_no - 1]) : "NULL"; +#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64) + if (!strcmp(pass_name, "lower_cg")) + /* Ignore lower codegen pass as it does nothing in x86-64 */ + return true; +#endif + os_printf("JIT.COMPILER.DUMP: PASS_NO=%d PREV_PASS=%s\n\n", pass_no, pass_name); jit_dump_cc(cc); diff --git a/core/iwasm/fast-jit/jit_frontend.c b/core/iwasm/fast-jit/jit_frontend.c index b6ab2d706..1ae48c364 100644 --- a/core/iwasm/fast-jit/jit_frontend.c +++ b/core/iwasm/fast-jit/jit_frontend.c @@ -755,9 +755,11 @@ init_func_translation(JitCompContext *cc) cc->jit_frame = jit_frame; cc->cur_basic_block = jit_cc_entry_basic_block(cc); - cc->total_frame_size = wasm_interp_interp_frame_size(total_cell_num); - cc->spill_cache_offset = (uint32)offsetof(WASMInterpFrame, spill_cache); - cc->spill_cache_size = (uint32)sizeof(uint32) * FAST_JIT_SPILL_CACHE_SIZE; + cc->spill_cache_offset = wasm_interp_interp_frame_size(total_cell_num); + /* Set spill cache size according to max local cell num, max stack cell + num and virtual fixed register num */ + cc->spill_cache_size = (max_locals + max_stacks) * 4 + sizeof(void *) * 4; + cc->total_frame_size = cc->spill_cache_offset + cc->spill_cache_size; cc->jitted_return_address_offset = offsetof(WASMInterpFrame, jitted_return_addr); cc->cur_basic_block = jit_cc_entry_basic_block(cc); @@ -838,7 +840,7 @@ free_block_memory(JitBlock *block) jit_free(block); } -static JitBlock * +static JitBasicBlock * create_func_block(JitCompContext *cc) { JitBlock *jit_block; @@ -883,7 +885,7 @@ create_func_block(JitCompContext *cc) jit_block_stack_push(&cc->block_stack, jit_block); cc->cur_basic_block = jit_block->basic_block_entry; - return jit_block; + return jit_block->basic_block_entry; fail: free_block_memory(jit_block); @@ -2086,13 +2088,13 @@ JitBasicBlock * jit_frontend_translate_func(JitCompContext *cc) { JitFrame *jit_frame; - JitBlock *jit_block; + JitBasicBlock *basic_block_entry; if (!(jit_frame = init_func_translation(cc))) { return NULL; } - if (!(jit_block = create_func_block(cc))) { + if (!(basic_block_entry = create_func_block(cc))) { return NULL; } @@ -2100,7 +2102,7 @@ jit_frontend_translate_func(JitCompContext *cc) return NULL; } - return jit_block->basic_block_entry; + return basic_block_entry; } #if 0 diff --git a/core/iwasm/interpreter/wasm_interp.h b/core/iwasm/interpreter/wasm_interp.h index abfe56182..d3692ff21 100644 --- a/core/iwasm/interpreter/wasm_interp.h +++ b/core/iwasm/interpreter/wasm_interp.h @@ -28,7 +28,6 @@ typedef struct WASMInterpFrame { #if WASM_ENABLE_FAST_JIT != 0 uint8 *jitted_return_addr; - uint32 spill_cache[FAST_JIT_SPILL_CACHE_SIZE]; #endif #if WASM_ENABLE_PERF_PROFILING != 0 @@ -52,12 +51,13 @@ typedef struct WASMInterpFrame { WASMBranchBlock *csp_boundary; WASMBranchBlock *csp; - /* Frame data, the layout is: - lp: param_cell_count + local_cell_count - sp_bottom to sp_boundary: stack of data - csp_bottom to csp_boundary: stack of block - ref to frame end: data types of local vairables and stack data - */ + /** + * Frame data, the layout is: + * lp: parameters and local variables + * sp_bottom to sp_boundary: wasm operand stack + * csp_bottom to csp_boundary: wasm label stack + * jit spill cache: only available for fast jit + */ uint32 lp[1]; #endif } WASMInterpFrame; diff --git a/core/iwasm/interpreter/wasm_runtime.c b/core/iwasm/interpreter/wasm_runtime.c index 54dc9fc5f..e85217715 100644 --- a/core/iwasm/interpreter/wasm_runtime.c +++ b/core/iwasm/interpreter/wasm_runtime.c @@ -1509,8 +1509,8 @@ wasm_instantiate(WASMModule *module, bool is_sub_inst, uint32 stack_size, if (stack_size == 0) stack_size = DEFAULT_WASM_STACK_SIZE; #if WASM_ENABLE_SPEC_TEST != 0 - if (stack_size < 100 * 1024) - stack_size = 100 * 1024; + if (stack_size < 64 * 1024) + stack_size = 64 * 1024; #endif module_inst->default_wasm_stack_size = stack_size;