diff --git a/core/iwasm/fast-jit/cg/x86-64/jit_codegen_x86_64.cpp b/core/iwasm/fast-jit/cg/x86-64/jit_codegen_x86_64.cpp index d6b01d029..9b35578b7 100644 --- a/core/iwasm/fast-jit/cg/x86-64/jit_codegen_x86_64.cpp +++ b/core/iwasm/fast-jit/cg/x86-64/jit_codegen_x86_64.cpp @@ -15,6 +15,7 @@ #endif #define CODEGEN_CHECK_ARGS 1 +#define CODEGEN_DUMP 0 using namespace asmjit; @@ -52,14 +53,35 @@ typedef enum { REG_I64_FREE_IDX = REG_RSI_IDX } RegIndexI64; -x86::Gp regs_i32[] = { x86::ebp, x86::eax, x86::ebx, x86::ecx, - x86::edx, x86::edi, x86::esi }; +/* clang-format off */ +x86::Gp regs_i8[] = { + x86::bpl, x86::al, x86::bl, x86::cl, + x86::dl, x86::dil, x86::sil, x86::spl, + x86::r8b, x86::r9b, x86::r10b, x86::r11b, + x86::r12b, x86::r13b, x86::r14b, x86::r15b +}; + +x86::Gp regs_i16[] = { + x86::bp, x86::ax, x86::bx, x86::cx, + x86::dx, x86::di, x86::si, x86::sp, + x86::r8w, x86::r9w, x86::r10w, x86::r11w, + x86::r12w, x86::r13w, x86::r14w, x86::r15w +}; + +x86::Gp regs_i32[] = { + x86::ebp, x86::eax, x86::ebx, x86::ecx, + x86::edx, x86::edi, x86::esi, x86::esp, + x86::r8d, x86::r9d, x86::r10d, x86::r11d, + x86::r12d, x86::r13d, x86::r14d, x86::r15d +}; x86::Gp regs_i64[] = { - x86::rbp, x86::rax, x86::rbx, x86::rcx, x86::rdx, x86::rdi, - x86::rsi, x86::rsp, x86::r8, x86::r9, x86::r10, x86::r11, + x86::rbp, x86::rax, x86::rbx, x86::rcx, + x86::rdx, x86::rdi, x86::rsi, x86::rsp, + x86::r8, x86::r9, x86::r10, x86::r11, x86::r12, x86::r13, x86::r14, x86::r15, }; +/* clang-format on */ int jit_codegen_interp_jitted_glue(void *exec_env, JitInterpSwitchInfo *info, @@ -76,7 +98,16 @@ jit_codegen_interp_jitted_glue(void *exec_env, JitInterpSwitchInfo *info, } #define PRINT_LINE() LOG_VERBOSE("\n", __LINE__) + +#if CODEGEN_DUMP != 0 +#define GOTO_FAIL \ + do { \ + PRINT_LINE(); \ + goto fail; \ + } while (0) +#else #define GOTO_FAIL goto fail +#endif #if CODEGEN_CHECK_ARGS == 0 @@ -136,7 +167,7 @@ jit_codegen_interp_jitted_glue(void *exec_env, JitInterpSwitchInfo *info, #endif /* end of CODEGEN_CHECK_ARGS == 0 */ /* Load one operand from insn and check none */ -#define LOAD_1ARG r0 = *jit_insn_opnd(insn, 0); +#define LOAD_1ARG() r0 = *jit_insn_opnd(insn, 0) /* Load two operands from insn and check if r0 is non-const */ #define LOAD_2ARGS() \ @@ -165,6 +196,223 @@ jit_codegen_interp_jitted_glue(void *exec_env, JitInterpSwitchInfo *info, r3 = *jit_insn_opnd(insn, 3); \ CHECK_NCONST(r0) +class JitErrorHandler : public ErrorHandler +{ + public: + Error err; + + JitErrorHandler() + : err(kErrorOk) + {} + + void handleError(Error e, const char *msg, BaseEmitter *base) override + { + this->err = e; + } +}; + +/* Alu opcode */ +typedef enum { ADD, SUB, MUL, DIV, REM } ALU_OP; +/* Bit opcode */ +typedef enum { OR, XOR, AND } BIT_OP; +/* Shift opcode */ +typedef enum { SHL, SHRS, SHRU } SHIFT_OP; +/* Condition opcode */ +typedef enum { EQ, NE, GTS, GES, LTS, LES, GTU, GEU, LTU, LEU } COND_OP; + +/* Jmp type */ +typedef enum JmpType { + JMP_DST_LABEL, /* jmp to dst label */ + JMP_END_OF_CALLBC, /* jmp to end of CALLBC */ + JMP_TARGET_CODE /* jmp to an function address */ +} JmpType; + +/** + * Jmp info, save the info on first encoding pass, + * and replace the offset with exact offset when the code cache + * has been allocated actually. + */ +typedef struct JmpInfo { + bh_list_link link; + JmpType type; + uint32 label_src; + uint32 offset; + union { + uint32 label_dst; + uint32 target_code_addr; + } dst_info; +} JmpInfo; + +static bool +label_is_neighboring(JitCompContext *cc, int32 label_prev, int32 label_succ) +{ + return (label_prev == 0 && label_succ == 2) + || (label_prev >= 2 && label_succ == label_prev + 1) + || (label_prev == (int32)jit_cc_label_num(cc) - 1 + && label_succ == 1); +} + +static bool +label_is_ahead(JitCompContext *cc, int32 label_dst, int32 label_src) +{ + return (label_dst == 0 && label_src != 0) + || (label_dst != 1 && label_src == 1) + || (2 <= label_dst && label_dst < label_src + && label_src <= (int32)jit_cc_label_num(cc) - 1); +} + +/** + * Encode jumping from one label to the other label + * + * @param a the assembler to emit the code + * @param jmp_info_list the jmp info list + * @param label_dst the index of dst label + * @param label_src the index of src label + * + * @return true if success, false if failed + */ +static bool +jmp_from_label_to_label(x86::Assembler &a, bh_list *jmp_info_list, + int32 label_dst, int32 label_src) +{ + Imm imm(INT32_MAX); + JmpInfo *node; + + node = (JmpInfo *)jit_calloc(sizeof(JmpInfo)); + if (!node) + return false; + + node->type = JMP_DST_LABEL; + node->label_src = label_src; + node->dst_info.label_dst = label_dst; + node->offset = a.code()->sectionById(0)->buffer().size() + 2; + bh_list_insert(jmp_info_list, node); + + a.jmp(imm); + return true; +} + +/** + * Encode detecting compare result register according to condition code + * and then jumping to suitable label when the condtion is met + * + * @param cc the compiler context + * @param a the assembler to emit the code + * @param jmp_info_list the jmp info list + * @param label_src the index of src label + * @param op the opcode of condition operation + * @param reg_no the no of register which contains the compare results + * @param r1 the label info when condition is met + * @param r2 the label info when condition is unmet, do nonthing if VOID + * @param is_last_insn if current insn is the last insn of current block + * + * @return true if success, false if failed + */ +static bool +cmp_r_and_jmp_label(JitCompContext *cc, x86::Assembler &a, + bh_list *jmp_info_list, int32 label_src, COND_OP op, + int32 reg_no, JitReg r1, JitReg r2, bool is_last_insn) +{ + Imm imm(INT32_MAX); + JmpInfo *node; + + node = (JmpInfo *)jit_malloc(sizeof(JmpInfo)); + if (!node) + return false; + + node->type = JMP_DST_LABEL; + node->label_src = label_src; + node->dst_info.label_dst = jit_reg_no(r1); + node->offset = a.code()->sectionById(0)->buffer().size() + 2; + bh_list_insert(jmp_info_list, node); + + switch (op) { + case EQ: + a.je(imm); + break; + case NE: + a.jne(imm); + break; + case GTS: + a.jg(imm); + break; + case LES: + a.jng(imm); + break; + case GES: + a.jnl(imm); + break; + case LTS: + a.jl(imm); + break; + case GTU: + a.ja(imm); + break; + case LEU: + a.jna(imm); + break; + case GEU: + a.jnb(imm); + break; + case LTU: + a.jb(imm); + break; + default: + bh_assert(0); + break; + } + + if (r2) { + int32 label_dst = jit_reg_no(r2); + if (!(is_last_insn && label_is_neighboring(cc, label_src, label_dst))) + if (!jmp_from_label_to_label(a, jmp_info_list, label_dst, + label_src)) + return false; + } + + return true; +} + +#if WASM_ENABLE_FAST_JIT_DUMP != 0 +static void +dump_native(char *data, uint32 length) +{ + /* Initialize decoder context */ + ZydisDecoder decoder; + ZydisDecoderInit(&decoder, ZYDIS_MACHINE_MODE_LONG_64, + ZYDIS_STACK_WIDTH_64); + + /* Initialize formatter */ + ZydisFormatter formatter; + ZydisFormatterInit(&formatter, ZYDIS_FORMATTER_STYLE_INTEL); + + /* Loop over the instructions in our buffer */ + ZyanU64 runtime_address = (ZyanU64)(uintptr_t)data; + ZyanUSize offset = 0; + ZydisDecodedInstruction instruction; + ZydisDecodedOperand operands[ZYDIS_MAX_OPERAND_COUNT_VISIBLE]; + + while (ZYAN_SUCCESS(ZydisDecoderDecodeFull( + &decoder, data + offset, length - offset, &instruction, operands, + ZYDIS_MAX_OPERAND_COUNT_VISIBLE, ZYDIS_DFLAG_VISIBLE_OPERANDS_ONLY))) { + /* Print current instruction pointer */ + os_printf("%012" PRIX64 " ", runtime_address); + + /* Format & print the binary instruction structure to + human readable format */ + char buffer[256]; + ZydisFormatterFormatInstruction(&formatter, &instruction, operands, + instruction.operand_count_visible, + buffer, sizeof(buffer), + runtime_address); + puts(buffer); + + offset += instruction.length; + runtime_address += instruction.length; + } +} +#endif + /** * Encode extending register of byte to register of dword * @param a the assembler to emit the code @@ -244,6 +492,181 @@ extend_r32_to_r64(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src, return false; } +static void +mov_r_to_r(x86::Assembler &a, uint32 kind_dst, int32 reg_no_dst, + int32 reg_no_src) +{ + if (reg_no_dst != reg_no_src) { + if (kind_dst == JIT_REG_KIND_I32) + a.mov(regs_i32[reg_no_dst], regs_i32[reg_no_src]); + else if (kind_dst == JIT_REG_KIND_I64) + a.mov(regs_i64[reg_no_dst], regs_i64[reg_no_src]); + else if (kind_dst == JIT_REG_KIND_F32) { + /* TODO */ + bh_assert(0); + } + else if (kind_dst == JIT_REG_KIND_F64) { + /* TODO */ + bh_assert(0); + } + else { + bh_assert(0); + } + } +} + +/** + * Encode moving memory to a register + * + * @param a the assembler to emit the code + * @param bytes_dst the bytes number of the data, + * could be 1(byte), 2(short), 4(int32), 8(int64), + * skipped by float and double + * @param kind_dst the kind of data to move, could be I32, I64, F32 or F64 + * @param is_signed whether the data is signed or unsigned + * @param reg_no_dst the index of dest register + * @param m_src the memory operand which contains the source data + * + * @return true if success, false otherwise + */ +static bool +mov_m_to_r(x86::Assembler &a, uint32 bytes_dst, uint32 kind_dst, bool is_signed, + int32 reg_no_dst, x86::Mem &m_src) +{ + if (kind_dst == JIT_REG_KIND_I32) { + switch (bytes_dst) { + case 1: + case 2: + if (is_signed) + a.movsx(regs_i32[reg_no_dst], m_src); + else + a.movzx(regs_i32[reg_no_dst], m_src); + break; + case 4: + a.mov(regs_i32[reg_no_dst], m_src); + break; + default: + bh_assert(0); + return false; + } + } + else if (kind_dst == JIT_REG_KIND_I64) { + switch (bytes_dst) { + case 1: + case 2: + if (is_signed) + a.movsx(regs_i64[reg_no_dst], m_src); + else + a.movzx(regs_i64[reg_no_dst], m_src); + break; + case 4: + if (is_signed) + a.movsxd(regs_i64[reg_no_dst], m_src); + else { + a.xor_(regs_i64[reg_no_dst], regs_i64[reg_no_dst]); + a.mov(regs_i64[reg_no_dst], m_src); + } + break; + case 8: + a.mov(regs_i64[reg_no_dst], m_src); + break; + default: + bh_assert(0); + return false; + } + } + else if (kind_dst == JIT_REG_KIND_F32) { + /* TODO */ + return false; + } + else if (kind_dst == JIT_REG_KIND_F64) { + /* TODO */ + return false; + } + return true; +} + +/** + * Encode moving register to memory + * + * @param a the assembler to emit the code + * @param bytes_dst the bytes number of the data, + * could be 1(byte), 2(short), 4(int32), 8(int64), + * skipped by float and double + * @param kind_dst the kind of data to move, could be I32, I64, F32 or F64 + * @param is_signed whether the data is signed or unsigned + * @param m_dst the dest memory operand + * @param reg_no_src the index of dest register + * + * @return true if success, false otherwise + */ +static bool +mov_r_to_m(x86::Assembler &a, uint32 bytes_dst, uint32 kind_dst, + x86::Mem &m_dst, int32 reg_no_src) +{ + if (kind_dst == JIT_REG_KIND_I32) { + bh_assert(reg_no_src < 8); + switch (bytes_dst) { + case 1: + a.mov(m_dst, regs_i8[reg_no_src]); + break; + case 2: + a.mov(m_dst, regs_i16[reg_no_src]); + break; + case 4: + a.mov(m_dst, regs_i32[reg_no_src]); + break; + default: + bh_assert(0); + return false; + } + } + else if (kind_dst == JIT_REG_KIND_I64) { + bh_assert(reg_no_src < 16); + switch (bytes_dst) { + case 1: + a.mov(m_dst, regs_i8[reg_no_src]); + break; + case 2: + a.mov(m_dst, regs_i16[reg_no_src]); + break; + case 4: + a.mov(m_dst, regs_i32[reg_no_src]); + break; + case 8: + a.mov(m_dst, regs_i64[reg_no_src]); + break; + default: + bh_assert(0); + return false; + } + } + else if (kind_dst == JIT_REG_KIND_F32) { + /* TODO */ + return false; + } + else if (kind_dst == JIT_REG_KIND_F64) { + /* TODO */ + return false; + } + return true; +} + +/** + * Encode moving immediate data to memory + * + * @param m dst memory + * @param imm src immediate data + * + * @return new stream + */ +static bool +mov_imm_to_m(x86::Assembler &a, x86::Mem &m_dst, Imm imm_src) +{ + a.mov(m_dst, imm_src); + return true; +} + /** * Encode loading register data from memory with imm base and imm offset * @@ -264,7 +687,8 @@ ld_r_from_base_imm_offset_imm(x86::Assembler &a, uint32 bytes_dst, uint32 kind_dst, bool is_signed, int32 reg_no_dst, int32 base, int32 offset) { - return false; + x86::Mem m((uintptr_t)(base + offset), bytes_dst); + return mov_m_to_r(a, bytes_dst, kind_dst, is_signed, reg_no_dst, m); } /** @@ -287,7 +711,8 @@ ld_r_from_base_imm_offset_r(x86::Assembler &a, uint32 bytes_dst, uint32 kind_dst, bool is_signed, int32 reg_no_dst, int32 base, int32 reg_no_offset) { - return false; + x86::Mem m(regs_i64[reg_no_dst], base, bytes_dst); + return mov_m_to_r(a, bytes_dst, kind_dst, is_signed, reg_no_dst, m); } /** @@ -310,7 +735,8 @@ ld_r_from_base_r_offset_imm(x86::Assembler &a, uint32 bytes_dst, uint32 kind_dst, bool is_signed, int32 reg_no_dst, int32 reg_no_base, int32 offset) { - return false; + x86::Mem m(regs_i64[reg_no_base], offset, bytes_dst); + return mov_m_to_r(a, bytes_dst, kind_dst, is_signed, reg_no_dst, m); } /** @@ -334,7 +760,8 @@ ld_r_from_base_r_offset_r(x86::Assembler &a, uint32 bytes_dst, uint32 kind_dst, bool is_signed, int32 reg_no_dst, int32 reg_no_base, int32 reg_no_offset) { - return false; + x86::Mem m(regs_i64[reg_no_base], regs_i64[reg_no_offset], 0, 0, bytes_dst); + return mov_m_to_r(a, bytes_dst, kind_dst, is_signed, reg_no_dst, m); } /** @@ -356,7 +783,8 @@ st_r_to_base_imm_offset_imm(x86::Assembler &a, uint32 bytes_dst, uint32 kind_dst, int32 reg_no_src, int32 base, int32 offset) { - return false; + x86::Mem m((uintptr_t)(base + offset), bytes_dst); + return mov_r_to_m(a, bytes_dst, kind_dst, m, reg_no_src); } /** @@ -378,7 +806,8 @@ static bool st_r_to_base_imm_offset_r(x86::Assembler &a, uint32 bytes_dst, uint32 kind_dst, int32 reg_no_src, int32 base, int32 reg_no_offset) { - return false; + x86::Mem m(regs_i64[reg_no_offset], base, bytes_dst); + return mov_r_to_m(a, bytes_dst, kind_dst, m, reg_no_src); } /** @@ -399,7 +828,8 @@ static bool st_r_to_base_r_offset_imm(x86::Assembler &a, uint32 bytes_dst, uint32 kind_dst, int32 reg_no_src, int32 reg_no_base, int32 offset) { - return false; + x86::Mem m(regs_i64[reg_no_base], offset, bytes_dst); + return mov_r_to_m(a, bytes_dst, kind_dst, m, reg_no_src); } /** @@ -422,7 +852,29 @@ st_r_to_base_r_offset_r(x86::Assembler &a, uint32 bytes_dst, uint32 kind_dst, int32 reg_no_src, int32 reg_no_base, int32 reg_no_offset) { - return false; + x86::Mem m(regs_i64[reg_no_base], regs_i64[reg_no_offset], 0, 0, bytes_dst); + return mov_r_to_m(a, bytes_dst, kind_dst, m, reg_no_src); +} + +static void +imm_set_value(Imm &imm, void *data, uint32 bytes) +{ + switch (bytes) { + case 1: + imm.setValue(*(uint8 *)data); + break; + case 2: + imm.setValue(*(uint16 *)data); + break; + case 4: + imm.setValue(*(uint32 *)data); + break; + case 8: + imm.setValue(*(uint64 *)data); + break; + default: + bh_assert(0); + } } /** @@ -441,7 +893,10 @@ static bool st_imm_to_base_imm_offset_imm(x86::Assembler &a, uint32 bytes_dst, void *data_src, int32 base, int32 offset) { - return false; + x86::Mem m((uintptr_t)(base + offset), bytes_dst); + Imm imm; + imm_set_value(imm, data_src, bytes_dst); + return mov_imm_to_m(a, m, imm); } /** @@ -461,7 +916,10 @@ static bool st_imm_to_base_imm_offset_r(x86::Assembler &a, uint32 bytes_dst, void *data_src, int32 base, int32 reg_no_offset) { - return false; + x86::Mem m(regs_i64[reg_no_offset], base, bytes_dst); + Imm imm; + imm_set_value(imm, data_src, bytes_dst); + return mov_imm_to_m(a, m, imm); } /** @@ -481,7 +939,10 @@ static bool st_imm_to_base_r_offset_imm(x86::Assembler &a, uint32 bytes_dst, void *data_src, int32 reg_no_base, int32 offset) { - return false; + x86::Mem m(regs_i64[reg_no_base], offset, bytes_dst); + Imm imm; + imm_set_value(imm, data_src, bytes_dst); + return mov_imm_to_m(a, m, imm); } /** @@ -502,7 +963,10 @@ static bool st_imm_to_base_r_offset_r(x86::Assembler &a, uint32 bytes_dst, void *data_src, int32 reg_no_base, int32 reg_no_offset) { - return false; + x86::Mem m(regs_i64[reg_no_base], regs_i64[reg_no_offset], 0, 0, bytes_dst); + Imm imm; + imm_set_value(imm, data_src, bytes_dst); + return mov_imm_to_m(a, m, imm); } /** @@ -517,7 +981,9 @@ st_imm_to_base_r_offset_r(x86::Assembler &a, uint32 bytes_dst, void *data_src, static bool mov_imm_to_r_i32(x86::Assembler &a, int32 reg_no, int32 data) { - return false; + Imm imm(data); + a.mov(regs_i32[reg_no], imm); + return true; } /** @@ -532,7 +998,9 @@ mov_imm_to_r_i32(x86::Assembler &a, int32 reg_no, int32 data) static bool mov_r_to_r_i32(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src) { - return false; + if (reg_no_dst != reg_no_src) + a.mov(regs_i32[reg_no_dst], regs_i32[reg_no_src]); + return true; } /** @@ -545,9 +1013,11 @@ mov_r_to_r_i32(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src) * @return true if success, false otherwise */ static bool -mov_imm_to_r_i64(x86::Assembler &a, int32 reg_no, int32 data) +mov_imm_to_r_i64(x86::Assembler &a, int32 reg_no, int64 data) { - return false; + Imm imm(data); + a.mov(regs_i64[reg_no], imm); + return true; } /** @@ -562,7 +1032,9 @@ mov_imm_to_r_i64(x86::Assembler &a, int32 reg_no, int32 data) static bool mov_r_to_r_i64(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src) { - return false; + if (reg_no_dst != reg_no_src) + a.mov(regs_i64[reg_no_dst], regs_i64[reg_no_src]); + return true; } /** @@ -635,7 +1107,7 @@ mov_r_to_r_f64(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src) * @return true if success, false otherwise */ static bool -convert_imm_i32_to_r_int8(x86::Assembler &a, int32 reg_no, int32 data) +convert_imm_i32_to_r_i8(x86::Assembler &a, int32 reg_no, int32 data) { return false; } @@ -650,7 +1122,7 @@ convert_imm_i32_to_r_int8(x86::Assembler &a, int32 reg_no, int32 data) * @return true if success, false otherwise */ static bool -convert_r_i32_to_r_int8(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src) +convert_r_i32_to_r_i8(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src) { return false; } @@ -665,7 +1137,7 @@ convert_r_i32_to_r_int8(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src) * @return true if success, false otherwise */ static bool -convert_imm_i32_to_r_uint8(x86::Assembler &a, int32 reg_no, int32 data) +convert_imm_i32_to_r_u8(x86::Assembler &a, int32 reg_no, int32 data) { return false; } @@ -680,7 +1152,7 @@ convert_imm_i32_to_r_uint8(x86::Assembler &a, int32 reg_no, int32 data) * @return true if success, false otherwise */ static bool -convert_r_i32_to_r_uint8(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src) +convert_r_i32_to_r_u8(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src) { return false; } @@ -695,7 +1167,7 @@ convert_r_i32_to_r_uint8(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src) * @return true if success, false otherwise */ static bool -convert_imm_i32_to_r_int16(x86::Assembler &a, int32 reg_no, int32 data) +convert_imm_i32_to_r_i16(x86::Assembler &a, int32 reg_no, int32 data) { return false; } @@ -710,7 +1182,7 @@ convert_imm_i32_to_r_int16(x86::Assembler &a, int32 reg_no, int32 data) * @return true if success, false otherwise */ static bool -convert_r_i32_to_r_int16(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src) +convert_r_i32_to_r_i16(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src) { return false; } @@ -725,7 +1197,7 @@ convert_r_i32_to_r_int16(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src) * @return true if success, false otherwise */ static bool -convert_imm_i32_to_r_uint16(x86::Assembler &a, int32 reg_no, int32 data) +convert_imm_i32_to_r_u16(x86::Assembler &a, int32 reg_no, int32 data) { return false; } @@ -740,7 +1212,7 @@ convert_imm_i32_to_r_uint16(x86::Assembler &a, int32 reg_no, int32 data) * @return true if success, false otherwise */ static bool -convert_r_i32_to_r_uint16(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src) +convert_r_i32_to_r_u16(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src) { return false; } @@ -755,7 +1227,7 @@ convert_r_i32_to_r_uint16(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src) * @return true if success, false otherwise */ static bool -convert_imm_i32_to_r_uint64(x86::Assembler &a, int32 reg_no, int32 data) +convert_imm_i32_to_r_u64(x86::Assembler &a, int32 reg_no, int32 data) { return false; } @@ -770,7 +1242,7 @@ convert_imm_i32_to_r_uint64(x86::Assembler &a, int32 reg_no, int32 data) * @return true if success, false otherwise */ static bool -convert_r_i32_to_r_uint64(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src) +convert_r_i32_to_r_u64(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src) { return false; } @@ -1075,15 +1547,6 @@ neg_r_to_r_f64(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src) return false; } -/* Alu opcode */ -typedef enum { ADD, SUB, MUL, DIV, REM } ALU_OP; -/* Bit opcode */ -typedef enum { OR, XOR, AND } BIT_OP; -/* Shift opcode */ -typedef enum { SHL, SHRS, SHRU } SHIFT_OP; -/* Condition opcode */ -typedef enum { EQ, NE, GTS, GES, LTS, LES, GTU, GEU, LTU, LEU } COND_OP; - static COND_OP not_cond(COND_OP op) { @@ -1107,7 +1570,73 @@ static bool alu_r_r_imm_i32(x86::Assembler &a, ALU_OP op, int32 reg_no_dst, int32 reg_no_src, int32 data) { - return false; + Imm imm(data); + + switch (op) { + case ADD: + mov_r_to_r(a, JIT_REG_KIND_I32, reg_no_dst, reg_no_src); + if (data == 1) + a.inc(regs_i32[reg_no_dst]); + else if (data == -1) + a.dec(regs_i32[reg_no_dst]); + else if (data != 0) + a.add(regs_i32[reg_no_dst], imm); + break; + case SUB: + mov_r_to_r(a, JIT_REG_KIND_I32, reg_no_dst, reg_no_src); + if (data == -1) + a.inc(regs_i32[reg_no_dst]); + else if (data == 1) + a.dec(regs_i32[reg_no_dst]); + else if (data != 0) + a.sub(regs_i32[reg_no_dst], imm); + break; + case MUL: + if (data == 0) + a.xor_(regs_i32[reg_no_dst], regs_i32[reg_no_dst]); + else if (data == -1) { + mov_r_to_r(a, JIT_REG_KIND_I32, reg_no_dst, reg_no_src); + a.neg(regs_i32[reg_no_dst]); + } + else if (data == 1) { + mov_r_to_r(a, JIT_REG_KIND_I32, reg_no_dst, reg_no_src); + } + else if (data == 2) { + mov_r_to_r(a, JIT_REG_KIND_I32, reg_no_dst, reg_no_src); + imm.setValue(1); + a.shl(regs_i32[reg_no_dst], imm); + } + else if (data == 4) { + mov_r_to_r(a, JIT_REG_KIND_I32, reg_no_dst, reg_no_src); + imm.setValue(2); + a.shl(regs_i32[reg_no_dst], imm); + } + else if (data == 8) { + mov_r_to_r(a, JIT_REG_KIND_I32, reg_no_dst, reg_no_src); + imm.setValue(3); + a.shl(regs_i32[reg_no_dst], imm); + } + else { + a.imul(regs_i32[reg_no_dst], regs_i32[reg_no_src], imm); + } + break; + case DIV: + case REM: +#if 0 + imm_from_sz_v_s (imm, SZ32, data, true); + mov_r_imm (reg_I4_free, imm); + stream = cdq (stream); + idiv_r (reg_I4_free); +#endif + /* TODO */ + bh_assert(0); + break; + default: + bh_assert(0); + break; + } + + return true; } /** @@ -1124,7 +1653,44 @@ static bool alu_r_r_r_i32(x86::Assembler &a, ALU_OP op, int32 reg_no_dst, int32 reg_no1_src, int32 reg_no2_src) { - return false; + switch (op) { + case ADD: + if (reg_no_dst != reg_no2_src) { + mov_r_to_r(a, JIT_REG_KIND_I32, reg_no_dst, reg_no1_src); + a.add(regs_i32[reg_no_dst], regs_i32[reg_no2_src]); + } + else + a.add(regs_i32[reg_no2_src], regs_i32[reg_no1_src]); + break; + case SUB: + if (reg_no_dst != reg_no2_src) { + mov_r_to_r(a, JIT_REG_KIND_I32, reg_no_dst, reg_no1_src); + a.sub(regs_i32[reg_no_dst], regs_i32[reg_no2_src]); + } + else { + a.sub(regs_i32[reg_no2_src], regs_i32[reg_no1_src]); + a.neg(regs_i32[reg_no2_src]); + } + break; + case MUL: + if (reg_no_dst != reg_no2_src) { + mov_r_to_r(a, JIT_REG_KIND_I32, reg_no_dst, reg_no1_src); + a.imul(regs_i32[reg_no_dst], regs_i32[reg_no2_src]); + } + else + a.imul(regs_i32[reg_no2_src], regs_i32[reg_no1_src]); + break; + case DIV: + case REM: + /* TODO */ + bh_assert(0); + break; + default: + bh_assert(0); + break; + } + + return true; } /** @@ -1142,7 +1708,33 @@ static bool alu_imm_imm_to_r_i32(x86::Assembler &a, ALU_OP op, int32 reg_no_dst, int32 data1_src, int32 data2_src) { - return false; + Imm imm; + int32 data = 0; + + switch (op) { + case ADD: + data = data1_src + data2_src; + break; + case SUB: + data = data1_src - data2_src; + break; + case MUL: + data = data1_src * data2_src; + break; + case DIV: + data = data1_src / data2_src; + break; + case REM: + data = data1_src % data2_src; + break; + default: + bh_assert(0); + break; + } + + imm.setValue(data); + a.mov(regs_i32[reg_no_dst], imm); + return true; } /** @@ -1160,7 +1752,31 @@ static bool alu_imm_r_to_r_i32(x86::Assembler &a, ALU_OP op, int32 reg_no_dst, int32 data1_src, int32 reg_no2_src) { - return false; + if (op == ADD || op == MUL) + return alu_r_r_imm_i32(a, op, reg_no_dst, reg_no2_src, data1_src); + else if (op == SUB) { + if (!alu_r_r_imm_i32(a, op, reg_no_dst, reg_no2_src, data1_src)) + return false; + a.neg(regs_i32[reg_no_dst]); + return true; + } + else { + if (reg_no_dst != reg_no2_src) { + if (!mov_imm_to_r_i32(a, reg_no_dst, data1_src) + || !alu_r_r_r_i32(a, op, reg_no_dst, reg_no_dst, reg_no2_src)) + return false; + return true; + } + else { + if (!mov_imm_to_r_i32(a, REG_I32_FREE_IDX, data1_src) + || !alu_r_r_r_i32(a, op, reg_no_dst, REG_I32_FREE_IDX, + reg_no2_src)) + return false; + return true; + } + } + + return true; } /** @@ -1178,7 +1794,7 @@ static bool alu_r_imm_to_r_i32(x86::Assembler &a, ALU_OP op, int32 reg_no_dst, int32 reg_no1_src, int32 data2_src) { - return false; + return alu_r_r_imm_i32(a, op, reg_no_dst, reg_no1_src, data2_src); } /** @@ -1196,7 +1812,7 @@ static bool alu_r_r_to_r_i32(x86::Assembler &a, ALU_OP op, int32 reg_no_dst, int32 reg_no1_src, int32 reg_no2_src) { - return false; + return alu_r_r_r_i32(a, op, reg_no_dst, reg_no1_src, reg_no2_src); } /** @@ -1213,7 +1829,73 @@ static bool alu_r_r_imm_i64(x86::Assembler &a, ALU_OP op, int32 reg_no_dst, int32 reg_no_src, int64 data) { - return false; + Imm imm(data); + + switch (op) { + case ADD: + mov_r_to_r(a, JIT_REG_KIND_I64, reg_no_dst, reg_no_src); + if (data == 1) + a.inc(regs_i64[reg_no_dst]); + else if (data == -1) + a.dec(regs_i64[reg_no_dst]); + else if (data != 0) + a.add(regs_i64[reg_no_dst], imm); + break; + case SUB: + mov_r_to_r(a, JIT_REG_KIND_I64, reg_no_dst, reg_no_src); + if (data == -1) + a.inc(regs_i64[reg_no_dst]); + else if (data == 1) + a.dec(regs_i64[reg_no_dst]); + else if (data != 0) + a.sub(regs_i64[reg_no_dst], imm); + break; + case MUL: + if (data == 0) + a.xor_(regs_i64[reg_no_dst], regs_i64[reg_no_dst]); + else if (data == -1) { + mov_r_to_r(a, JIT_REG_KIND_I64, reg_no_dst, reg_no_src); + a.neg(regs_i64[reg_no_dst]); + } + else if (data == 1) { + mov_r_to_r(a, JIT_REG_KIND_I64, reg_no_dst, reg_no_src); + } + else if (data == 2) { + mov_r_to_r(a, JIT_REG_KIND_I64, reg_no_dst, reg_no_src); + imm.setValue(1); + a.shl(regs_i64[reg_no_dst], imm); + } + else if (data == 4) { + mov_r_to_r(a, JIT_REG_KIND_I64, reg_no_dst, reg_no_src); + imm.setValue(2); + a.shl(regs_i64[reg_no_dst], imm); + } + else if (data == 8) { + mov_r_to_r(a, JIT_REG_KIND_I64, reg_no_dst, reg_no_src); + imm.setValue(3); + a.shl(regs_i64[reg_no_dst], imm); + } + else { + a.imul(regs_i64[reg_no_dst], regs_i64[reg_no_src], imm); + } + break; + case DIV: + case REM: +#if 0 + imm_from_sz_v_s (imm, SZ32, data, true); + mov_r_imm (reg_I4_free, imm); + stream = cdq (stream); + idiv_r (reg_I4_free); +#endif + /* TODO */ + bh_assert(0); + break; + default: + bh_assert(0); + break; + } + + return true; } /** @@ -1230,7 +1912,44 @@ static bool alu_r_r_r_i64(x86::Assembler &a, ALU_OP op, int32 reg_no_dst, int32 reg_no1_src, int32 reg_no2_src) { - return false; + switch (op) { + case ADD: + if (reg_no_dst != reg_no2_src) { + mov_r_to_r(a, JIT_REG_KIND_I64, reg_no_dst, reg_no1_src); + a.add(regs_i64[reg_no_dst], regs_i64[reg_no2_src]); + } + else + a.add(regs_i64[reg_no2_src], regs_i64[reg_no1_src]); + break; + case SUB: + if (reg_no_dst != reg_no2_src) { + mov_r_to_r(a, JIT_REG_KIND_I64, reg_no_dst, reg_no1_src); + a.sub(regs_i64[reg_no_dst], regs_i64[reg_no2_src]); + } + else { + a.sub(regs_i64[reg_no2_src], regs_i64[reg_no1_src]); + a.neg(regs_i64[reg_no2_src]); + } + break; + case MUL: + if (reg_no_dst != reg_no2_src) { + mov_r_to_r(a, JIT_REG_KIND_I64, reg_no_dst, reg_no1_src); + a.imul(regs_i64[reg_no_dst], regs_i64[reg_no2_src]); + } + else + a.imul(regs_i64[reg_no2_src], regs_i64[reg_no1_src]); + break; + case DIV: + case REM: + /* TODO */ + bh_assert(0); + break; + default: + bh_assert(0); + break; + } + + return true; } /** @@ -1248,7 +1967,33 @@ static bool alu_imm_imm_to_r_i64(x86::Assembler &a, ALU_OP op, int32 reg_no_dst, int64 data1_src, int64 data2_src) { - return false; + Imm imm; + int64 data = 0; + + switch (op) { + case ADD: + data = data1_src + data2_src; + break; + case SUB: + data = data1_src - data2_src; + break; + case MUL: + data = data1_src * data2_src; + break; + case DIV: + data = data1_src / data2_src; + break; + case REM: + data = data1_src % data2_src; + break; + default: + bh_assert(0); + break; + } + + imm.setValue(data); + a.mov(regs_i64[reg_no_dst], imm); + return true; } /** @@ -1266,7 +2011,31 @@ static bool alu_imm_r_to_r_i64(x86::Assembler &a, ALU_OP op, int32 reg_no_dst, int64 data1_src, int32 reg_no2_src) { - return false; + if (op == ADD || op == MUL) + return alu_r_r_imm_i64(a, op, reg_no_dst, reg_no2_src, data1_src); + else if (op == SUB) { + if (!alu_r_r_imm_i64(a, op, reg_no_dst, reg_no2_src, data1_src)) + return false; + a.neg(regs_i64[reg_no_dst]); + return true; + } + else { + if (reg_no_dst != reg_no2_src) { + if (!mov_imm_to_r_i64(a, reg_no_dst, data1_src) + || !alu_r_r_r_i64(a, op, reg_no_dst, reg_no_dst, reg_no2_src)) + return false; + return true; + } + else { + if (!mov_imm_to_r_i64(a, REG_I64_FREE_IDX, data1_src) + || !alu_r_r_r_i64(a, op, reg_no_dst, REG_I64_FREE_IDX, + reg_no2_src)) + return false; + return true; + } + } + + return true; } /** @@ -1284,7 +2053,7 @@ static bool alu_r_imm_to_r_i64(x86::Assembler &a, ALU_OP op, int32 reg_no_dst, int32 reg_no1_src, int64 data2_src) { - return false; + return alu_r_r_imm_i64(a, op, reg_no_dst, reg_no1_src, data2_src); } /** @@ -1302,7 +2071,7 @@ static bool alu_r_r_to_r_i64(x86::Assembler &a, ALU_OP op, int32 reg_no_dst, int32 reg_no1_src, int32 reg_no2_src) { - return false; + return alu_r_r_r_i64(a, op, reg_no_dst, reg_no1_src, reg_no2_src); } /** @@ -1819,7 +2588,9 @@ shift_r_r_to_r_i64(x86::Assembler &a, SHIFT_OP op, int32 reg_no_dst, static bool cmp_r_imm_i32(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src, int32 data) { - return false; + Imm imm(data); + a.cmp(regs_i32[reg_no_src], imm); + return true; } /** @@ -1837,7 +2608,8 @@ static bool cmp_r_r_i32(x86::Assembler &a, int32 reg_no_dst, int32 reg_no1_src, int32 reg_no2_src) { - return false; + a.cmp(regs_i32[reg_no1_src], regs_i32[reg_no2_src]); + return true; } /** @@ -1855,7 +2627,11 @@ static bool cmp_imm_imm_to_r_i32(x86::Assembler &a, int32 reg_no_dst, int32 data1_src, int32 data2_src) { - return false; + Imm imm(data1_src); + a.mov(regs_i32[REG_I32_FREE_IDX], imm); + imm.setValue(data2_src); + a.cmp(regs_i32[REG_I32_FREE_IDX], imm); + return true; } /** @@ -1873,7 +2649,10 @@ static bool cmp_imm_r_to_r_i32(x86::Assembler &a, int32 reg_no_dst, int32 data1_src, int32 reg_no2_src) { - return false; + Imm imm(data1_src); + a.mov(regs_i32[REG_I32_FREE_IDX], imm); + a.cmp(regs_i32[REG_I32_FREE_IDX], regs_i32[reg_no2_src]); + return true; } /** @@ -1891,7 +2670,9 @@ static bool cmp_r_imm_to_r_i32(x86::Assembler &a, int32 reg_no_dst, int32 reg_no1_src, int32 data2_src) { - return false; + Imm imm(data2_src); + a.cmp(regs_i32[reg_no1_src], imm); + return true; } /** @@ -1909,7 +2690,8 @@ static bool cmp_r_r_to_r_i32(x86::Assembler &a, int32 reg_no_dst, int32 reg_no1_src, int32 reg_no2_src) { - return false; + a.cmp(regs_i32[reg_no1_src], regs_i32[reg_no2_src]); + return true; } /** @@ -1926,7 +2708,9 @@ cmp_r_r_to_r_i32(x86::Assembler &a, int32 reg_no_dst, int32 reg_no1_src, static bool cmp_r_imm_i64(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src, int64 data) { - return false; + Imm imm(data); + a.cmp(regs_i64[reg_no_src], imm); + return true; } /** @@ -1944,7 +2728,8 @@ static bool cmp_r_r_i64(x86::Assembler &a, int32 reg_no_dst, int32 reg_no1_src, int32 reg_no2_src) { - return false; + a.cmp(regs_i64[reg_no1_src], regs_i64[reg_no2_src]); + return true; } /** @@ -1962,7 +2747,11 @@ static bool cmp_imm_imm_to_r_i64(x86::Assembler &a, int32 reg_no_dst, int32 data1_src, int32 data2_src) { - return false; + Imm imm(data1_src); + a.mov(regs_i64[REG_I64_FREE_IDX], imm); + imm.setValue(data2_src); + a.cmp(regs_i64[REG_I64_FREE_IDX], imm); + return true; } /** @@ -1980,7 +2769,10 @@ static bool cmp_imm_r_to_r_i64(x86::Assembler &a, int32 reg_no_dst, int64 data1_src, int32 reg_no2_src) { - return false; + Imm imm(data1_src); + a.mov(regs_i64[REG_I64_FREE_IDX], imm); + a.cmp(regs_i64[REG_I64_FREE_IDX], regs_i64[reg_no2_src]); + return true; } /** @@ -1998,7 +2790,9 @@ static bool cmp_r_imm_to_r_i64(x86::Assembler &a, int32 reg_no_dst, int32 reg_no1_src, int64 data2_src) { - return false; + Imm imm(data2_src); + a.cmp(regs_i64[reg_no1_src], imm); + return true; } /** @@ -2016,7 +2810,8 @@ static bool cmp_r_r_to_r_i64(x86::Assembler &a, int32 reg_no_dst, int32 reg_no1_src, int32 reg_no2_src) { - return false; + a.cmp(regs_i64[reg_no1_src], regs_i64[reg_no2_src]); + return true; } /** @@ -2172,14 +2967,21 @@ cmp_r_r_to_r_f64(x86::Assembler &a, int32 reg_no_dst, int32 reg_no1_src, #define LD_R_R_R(kind, bytes_dst, is_signed) \ do { \ int32 reg_no_dst; \ - int32 base, offset; \ + int32 base = 0, offset = 0; \ bool _ret = false; \ \ - CHECK_KIND(r1, JIT_REG_KIND_I64); \ - CHECK_KIND(r2, JIT_REG_KIND_I32); \ - base = 0; \ - offset = 0; \ - real_opnd_to_reg[1] = r2; \ + if (jit_reg_is_const(r1)) { \ + CHECK_KIND(r1, JIT_REG_KIND_I32); \ + } \ + else { \ + CHECK_KIND(r1, JIT_REG_KIND_I64); \ + } \ + if (jit_reg_is_const(r2)) { \ + CHECK_KIND(r2, JIT_REG_KIND_I32); \ + } \ + else { \ + CHECK_KIND(r2, JIT_REG_KIND_I64); \ + } \ \ reg_no_dst = jit_reg_no(r0); \ if (jit_reg_is_const(r1)) \ @@ -2205,7 +3007,7 @@ cmp_r_r_to_r_f64(x86::Assembler &a, int32 reg_no_dst, int32 reg_no1_src, _ret = ld_r_from_base_r_offset_r( \ a, bytes_dst, JIT_REG_KIND_##kind, is_signed, reg_no_dst, \ jit_reg_no(r1), jit_reg_no(r2)); \ - if (_ret) \ + if (!_ret) \ GOTO_FAIL; \ } while (0) @@ -2218,15 +3020,21 @@ cmp_r_r_to_r_f64(x86::Assembler &a, int32 reg_no_dst, int32 reg_no1_src, do { \ type data_src = 0; \ int32 reg_no_src = 0; \ - int32 base, offset; \ + int32 base = 0, offset = 0; \ bool _ret = false; \ \ - CHECK_KIND(r1, JIT_REG_KIND_I64); \ - CHECK_KIND(r2, JIT_REG_KIND_I32); \ - base = 0; \ - offset = 0; \ - real_opnd_to_reg[0] = r2; \ - real_opnd_to_reg[1] = r0; \ + if (jit_reg_is_const(r1)) { \ + CHECK_KIND(r1, JIT_REG_KIND_I32); \ + } \ + else { \ + CHECK_KIND(r1, JIT_REG_KIND_I64); \ + } \ + if (jit_reg_is_const(r2)) { \ + CHECK_KIND(r2, JIT_REG_KIND_I32); \ + } \ + else { \ + CHECK_KIND(r2, JIT_REG_KIND_I64); \ + } \ \ if (jit_reg_is_const(r0)) \ data_src = jit_cc_get_const_##kind(cc, r0); \ @@ -2389,19 +3197,19 @@ fail: } /** - * Encode insn convert: I32TOI1 r0, r1, or I32TOI2, I32TOF32, F32TOF64, etc. + * Encode insn convert: I32TOI8 r0, r1, or I32TOI16, I32TOF32, F32TOF64, etc. * @param kind0 the dst data kind, such as I32, I64, F32 and F64 * @param kind1 the src data kind, such as I32, I64, F32 and F64 * @param type0 the dst data type, such as int32, float and double * @param type1 the src data type, such as int32, float and double */ -#define CONVERT_R_R(kind0, kind1, type0, type1) \ +#define CONVERT_R_R(kind0, kind1, type0, type1, Type1) \ do { \ bool _ret = false; \ CHECK_KIND(r0, JIT_REG_KIND_##kind0); \ CHECK_KIND(r1, JIT_REG_KIND_##kind1); \ if (jit_reg_is_const(r1)) { \ - type1 data = jit_cc_get_const_##kind1(cc, r1); \ + Type1 data = jit_cc_get_const_##kind1(cc, r1); \ _ret = \ convert_imm_##type1##_to_r_##type0(a, jit_reg_no(r0), data); \ } \ @@ -2722,6 +3530,67 @@ fail: return false; } +/** + * Encode detecting the cmp flags in reg, and jmp to the relative address + * according to the condition opcode + * + * @param a the assembler to emit the code + * @param reg_no the no of register which contains cmp flags of cmp result + * @param op the condition opcode to jmp + * @param offset the relative offset to jmp when the contidtion meeted + * + * @return return the next address of native code after encoded + */ +static bool +cmp_r_and_jmp_relative(x86::Assembler &a, int32 reg_no, COND_OP op, + int32 offset) +{ + Imm target; + + if (offset >= -127 && offset <= 127) + target.setValue((int8)offset); + else + target.setValue(offset); + + switch (op) { + case EQ: + a.je(target); + break; + case NE: + a.jne(target); + break; + case GTS: + a.jg(target); + break; + case LES: + a.jng(target); + break; + case GES: + a.jge(target); + break; + case LTS: + a.jl(target); + break; + case GTU: + a.ja(target); + break; + case LEU: + a.jna(target); + break; + case GEU: + a.jae(target); + break; + case LTU: + a.jb(target); + break; + default: + bh_assert(0); + break; + } + + return true; +} + /** * Encode select insn, SELECT r0, r1, r2, r3 * @@ -2737,11 +3606,19 @@ static bool lower_select(JitCompContext *cc, x86::Assembler &a, COND_OP op, JitReg r0, JitReg r1, JitReg r2, JitReg r3) { -#if 0 - char stream_mov1[128]; - char stream_mov2[128]; - char *stream1 = stream_mov1; - char *stream2 = stream_mov2; + JitErrorHandler err_handler; + Environment env(Arch::kX64); + CodeHolder code1, code2; + char *stream_mov1, *stream_mov2; + uint32 size_mov1, size_mov2; + + code1.init(env); + code1.setErrorHandler(&err_handler); + x86::Assembler a1(&code1); + + code2.init(env); + code2.setErrorHandler(&err_handler); + x86::Assembler a2(&code2); CHECK_NCONST(r0); CHECK_NCONST(r1); @@ -2757,35 +3634,54 @@ lower_select(JitCompContext *cc, x86::Assembler &a, COND_OP op, JitReg r0, op = not_cond(op); } - if (!lower_mov(cc, &stream1, r0, r2)) - GOTO_FAIL; - if (!lower_mov(cc, &stream2, r0, r3)) + if (!lower_mov(cc, a1, r0, r2)) GOTO_FAIL; + if (!lower_mov(cc, a2, r0, r3)) + GOTO_FAIL; + + stream_mov1 = (char *)a1.code()->sectionById(0)->buffer().data(); + size_mov1 = a1.code()->sectionById(0)->buffer().size(); + stream_mov2 = (char *)a2.code()->sectionById(0)->buffer().data(); + size_mov2 = a2.code()->sectionById(0)->buffer().size(); + if (r0 != r2) { - memcpy(stream, stream_mov1, (int32)(stream1 - stream_mov1)); - stream += (int32)(stream1 - stream_mov1); + a.embedDataArray(TypeId::kInt8, stream_mov1, size_mov1); } if (r3 && r0 != r3) { - stream = cmp_r_and_jmp_relative(stream, jit_reg_no(r1), op, - (int32)(stream2 - stream_mov2)); - memcpy(stream, stream_mov2, (int32)(stream2 - stream_mov2)); - stream += (int32)(stream2 - stream_mov2); + if (!cmp_r_and_jmp_relative(a, jit_reg_no(r1), op, (int32)size_mov2)) + return false; + a.embedDataArray(TypeId::kInt8, stream_mov2, size_mov2); } return true; fail: return false; -#endif - return false; } +/* jmp to dst label */ +#define JMP_TO_LABEL(stream_offset, label_dst, label_src) \ + do { \ + if (label_is_ahead(cc, label_dst, label_src)) { \ + int32 _offset = label_offsets[label_dst] \ + - a.code()->sectionById(0)->buffer().size(); \ + Imm imm(_offset); \ + a.jmp(imm); \ + } \ + else { \ + if (!jmp_from_label_to_label(a, jmp_info_list, label_dst, \ + label_src)) \ + GOTO_FAIL; \ + } \ + } while (0) + /** * Encode branch insn, BEQ/BNE/../BLTU r0, r1, r2 * * @param cc the compiler context * @param a the assembler to emit the code + * @param jmp_info_list the jmp info list * @param r0 dst jit register that contains the dst operand info * @param r1 src jit register that contains the first src operand info * @param r2 src jit register that contains the second src operand info @@ -2794,15 +3690,15 @@ fail: * @return true if success, false if failed */ static bool -lower_branch(JitCompContext *cc, x86::Assembler &a, int32 label_src, COND_OP op, - JitReg r0, JitReg r1, JitReg r2, bool is_last_insn) +lower_branch(JitCompContext *cc, x86::Assembler &a, bh_list *jmp_info_list, + int32 label_src, COND_OP op, JitReg r0, JitReg r1, JitReg r2, + bool is_last_insn) { -#if 0 int32 reg_no, label_dst; CHECK_NCONST(r0); CHECK_KIND(r0, JIT_REG_KIND_I32); - CHECK_KIND(r1, JIT_REG_KIND_L4); + CHECK_KIND(r1, JIT_REG_KIND_L32); label_dst = jit_reg_no(r1); if (label_dst < (int32)jit_cc_label_num(cc) - 1 && is_last_insn @@ -2816,15 +3712,13 @@ lower_branch(JitCompContext *cc, x86::Assembler &a, int32 label_src, COND_OP op, } reg_no = jit_reg_no(r0); - if (!cmp_r_and_jmp_label(cc, &stream, stream_offset, label_src, op, reg_no, - r1, r2, is_last_insn)) + if (!cmp_r_and_jmp_label(cc, a, jmp_info_list, label_src, op, reg_no, r1, + r2, is_last_insn)) GOTO_FAIL; return true; fail: return false; -#endif - return false; } /** @@ -2975,15 +3869,15 @@ fail: * @param a the assembler to emit the code * @param label_src the index of src label * @param insn current insn info - * @param global_offset_base the base for calculating global offset * * @return true if success, false if failed */ static bool lower_callnative(JitCompContext *cc, x86::Assembler &a, int32 label_src, - JitInsn *insn, unsigned global_offset_base) + JitInsn *insn) { - return false; + /* TODO: ignore it now */ + return true; } /** @@ -2993,13 +3887,12 @@ lower_callnative(JitCompContext *cc, x86::Assembler &a, int32 label_src, * @param a the assembler to emit the code * @param label_src the index of src label * @param insn current insn info - * @param global_offset_base the base for calculating global offset * * @return true if success, false if failed */ static bool lower_callbc(JitCompContext *cc, x86::Assembler &a, int32 label_src, - JitInsn *insn, unsigned global_offset_base) + JitInsn *insn) { return false; } @@ -3008,14 +3901,511 @@ static bool lower_returnbc(JitCompContext *cc, x86::Assembler &a, int32 label_src, JitInsn *insn) { + JitReg ecx_hreg = jit_reg_new(JIT_REG_KIND_I32, REG_ECX_IDX); + JitReg rcx_hreg = jit_reg_new(JIT_REG_KIND_I64, REG_RCX_IDX); + JitReg act_reg = *(jit_insn_opnd(insn, 0)); + JitReg ret_reg = *(jit_insn_opnd(insn, 1)); + int32 act; + + CHECK_CONST(act_reg); + CHECK_KIND(act_reg, JIT_REG_KIND_I32); + + act = jit_cc_get_const_I32(cc, act_reg); + + if (ret_reg) { + if (jit_reg_is_kind(I32, ret_reg)) { + if (!lower_mov(cc, a, ecx_hreg, ret_reg)) + return false; + } + else if (jit_reg_is_kind(I64, ret_reg)) { + if (!lower_mov(cc, a, rcx_hreg, ret_reg)) + return false; + } + else if (jit_reg_is_kind(F32, ret_reg)) { + /* TODO */ + return false; + } + else if (jit_reg_is_kind(F64, ret_reg)) { + /* TODO */ + return false; + } + else { + return false; + } + } + + { + /* eax = act */ + Imm imm(act); + a.mov(x86::eax, imm); + + x86::Mem m(x86::rbp, cc->jitted_return_address_offset); + a.jmp(m); + } + return true; +fail: return false; } +static bool +lower_return(JitCompContext *cc, x86::Assembler &a, JitInsn *insn) +{ + JitReg act_reg = *(jit_insn_opnd(insn, 0)); + int32 act; + + CHECK_CONST(act_reg); + CHECK_KIND(act_reg, JIT_REG_KIND_I32); + + act = jit_cc_get_const_I32(cc, act_reg); + { + /* eax = act */ + Imm imm(act); + a.mov(x86::eax, imm); + + imm.setValue((uintptr_t)code_block_return_to_interp_from_jitted); + a.mov(regs_i64[REG_I64_FREE_IDX], imm); + a.jmp(regs_i64[REG_I64_FREE_IDX]); + } + return true; +fail: + return false; +} + +/** + * Replace all the jmp address pre-saved when the code cache hasn't been + * allocated with actual address after code cache allocated + * + * @param cc compiler context containting the allocated code cacha info + * @param jmp_info_list the jmp info list + */ +static void +patch_jmp_info_list(JitCompContext *cc, bh_list *jmp_info_list) +{ + JmpInfo *jmp_info, *jmp_info_next; + JitReg reg_src, reg_dst; + char *stream; + + jmp_info = (JmpInfo *)bh_list_first_elem(jmp_info_list); + + while (jmp_info) { + jmp_info_next = (JmpInfo *)bh_list_elem_next(jmp_info); + + reg_src = jit_reg_new(JIT_REG_KIND_L32, jmp_info->label_src); + stream = (char *)cc->jitted_addr_begin + jmp_info->offset; + + if (jmp_info->type == JMP_DST_LABEL) { + reg_dst = + jit_reg_new(JIT_REG_KIND_L32, jmp_info->dst_info.label_dst); + *(int32 *)stream = + (int32)((uintptr_t)*jit_annl_jitted_addr(cc, reg_dst) + - (uintptr_t)stream) + - 4; + } + else if (jmp_info->type == JMP_END_OF_CALLBC) { + /* TODO */ + } + else if (jmp_info->type == JMP_TARGET_CODE) { + /* TODO */ + } + + jmp_info = jmp_info_next; + } +} + +/* Free the jmp info list */ +static void +free_jmp_info_list(bh_list *jmp_info_list) +{ + void *cur_node = bh_list_first_elem(jmp_info_list); + + while (cur_node) { + void *next_node = bh_list_elem_next(cur_node); + + bh_list_remove(jmp_info_list, cur_node); + jit_free(cur_node); + cur_node = next_node; + } +} + bool jit_codegen_gen_native(JitCompContext *cc) { - jit_set_last_error(cc, "jit_codegen_gen_native failed"); - return false; + JitBasicBlock *block; + JitInsn *insn; + JitReg r0, r1, r2, r3; + JmpInfo jmp_info_head; + bh_list *jmp_info_list = (bh_list *)&jmp_info_head; + uint32 label_index, label_num, i, j; + uint32 *label_offsets = NULL, code_size; +#if CODEGEN_DUMP != 0 + uint32 code_offset = 0; +#endif + bool return_value = false, is_last_insn; + void **jitted_addr; + char *code_buf, *stream; + + JitErrorHandler err_handler; + Environment env(Arch::kX64); + CodeHolder code; + code.init(env); + code.setErrorHandler(&err_handler); + x86::Assembler a(&code); + + if (BH_LIST_SUCCESS != bh_list_init(jmp_info_list)) { + jit_set_last_error(cc, "init jmp info list failed"); + return false; + } + + label_num = jit_cc_label_num(cc); + + if (!(label_offsets = + (uint32 *)jit_calloc(((uint32)sizeof(uint32)) * label_num))) { + jit_set_last_error(cc, "allocate memory failed"); + goto fail; + } + + for (i = 0; i < label_num; i++) { + if (i == 0) + label_index = 0; + else if (i == label_num - 1) + label_index = 1; + else + label_index = i + 1; + + label_offsets[label_index] = code.sectionById(0)->buffer().size(); + + block = *jit_annl_basic_block( + cc, jit_reg_new(JIT_REG_KIND_L32, label_index)); + +#if CODEGEN_DUMP != 0 + os_printf("\nL%d:\n\n", label_index); +#endif + + JIT_FOREACH_INSN(block, insn) + { + is_last_insn = (insn->next == block) ? true : false; + + switch (insn->opcode) { + case JIT_OP_MOV: + LOAD_2ARGS(); + if (!lower_mov(cc, a, r0, r1)) + GOTO_FAIL; + break; + + case JIT_OP_I32TOI8: + LOAD_2ARGS(); + CONVERT_R_R(I32, I32, i8, i32, int32); + break; + + case JIT_OP_I32TOU8: + LOAD_2ARGS(); + CONVERT_R_R(I32, I32, u8, i32, int32); + break; + + case JIT_OP_I32TOI16: + LOAD_2ARGS(); + CONVERT_R_R(I32, I32, i16, i32, int32); + break; + + case JIT_OP_I32TOU16: + LOAD_2ARGS(); + CONVERT_R_R(I32, I32, u16, i32, int32); + break; + + case JIT_OP_I32TOF32: + case JIT_OP_U32TOF32: + LOAD_2ARGS(); + CONVERT_R_R(F32, I32, f32, i32, int32); + break; + + case JIT_OP_I32TOF64: + case JIT_OP_U32TOF64: + LOAD_2ARGS(); + CONVERT_R_R(F64, I32, f64, i32, int32); + break; + + case JIT_OP_F32TOI32: + LOAD_2ARGS(); + CONVERT_R_R(I32, F32, i32, f32, int32); + break; + + case JIT_OP_F32TOF64: + LOAD_2ARGS(); + CONVERT_R_R(F64, F32, f64, f32, float32); + break; + + case JIT_OP_F64TOI32: + LOAD_2ARGS(); + CONVERT_R_R(I32, F64, i32, f64, float64); + break; + + case JIT_OP_F64TOF32: + LOAD_2ARGS(); + CONVERT_R_R(F32, F64, f32, f64, float64); + break; + + case JIT_OP_NEG: + LOAD_2ARGS(); + if (!lower_neg(cc, a, r0, r1)) + GOTO_FAIL; + break; + + case JIT_OP_ADD: + case JIT_OP_SUB: + case JIT_OP_MUL: + case JIT_OP_DIV: + case JIT_OP_REM: + LOAD_3ARGS(); + if (!lower_alu(cc, a, + (ALU_OP)(ADD + (insn->opcode - JIT_OP_ADD)), + r0, r1, r2)) + GOTO_FAIL; + break; + + case JIT_OP_SHL: + case JIT_OP_SHRS: + case JIT_OP_SHRU: + LOAD_3ARGS(); + if (!lower_shift( + cc, a, + (SHIFT_OP)(SHL + (insn->opcode - JIT_OP_SHL)), r0, + r1, r2)) + GOTO_FAIL; + break; + + case JIT_OP_OR: + case JIT_OP_XOR: + case JIT_OP_AND: + LOAD_3ARGS(); + if (!lower_bit(cc, a, + (BIT_OP)(OR + (insn->opcode - JIT_OP_OR)), + r0, r1, r2)) + GOTO_FAIL; + break; + + case JIT_OP_CMP: + LOAD_3ARGS(); + if (!lower_cmp(cc, a, r0, r1, r2)) + GOTO_FAIL; + break; + + case JIT_OP_SELECTEQ: + case JIT_OP_SELECTNE: + case JIT_OP_SELECTGTS: + case JIT_OP_SELECTGES: + case JIT_OP_SELECTLTS: + case JIT_OP_SELECTLES: + case JIT_OP_SELECTGTU: + case JIT_OP_SELECTGEU: + case JIT_OP_SELECTLTU: + case JIT_OP_SELECTLEU: + LOAD_4ARGS(); + if (!lower_select( + cc, a, + (COND_OP)(EQ + (insn->opcode - JIT_OP_SELECTEQ)), + r0, r1, r2, r3)) + GOTO_FAIL; + break; + + case JIT_OP_LDEXECENV: + LOAD_1ARG(); + CHECK_KIND(r0, JIT_REG_KIND_I32); + /* TODO */ + break; + + case JIT_OP_LDJITINFO: + LOAD_1ARG(); + CHECK_KIND(r0, JIT_REG_KIND_I32); + /* TODO */ + break; + + case JIT_OP_LDI8: + LOAD_3ARGS(); + LD_R_R_R(I32, 1, true); + break; + + case JIT_OP_LDU8: + LOAD_3ARGS(); + LD_R_R_R(I32, 1, false); + break; + + case JIT_OP_LDI16: + LOAD_3ARGS(); + LD_R_R_R(I32, 2, true); + break; + + case JIT_OP_LDU16: + LOAD_3ARGS(); + LD_R_R_R(I32, 2, false); + break; + + case JIT_OP_LDI32: + LOAD_3ARGS(); + LD_R_R_R(I32, 4, true); + break; + + case JIT_OP_LDU32: + LOAD_3ARGS(); + LD_R_R_R(I32, 4, false); + break; + + case JIT_OP_LDI64: + case JIT_OP_LDU64: + LOAD_3ARGS(); + LD_R_R_R(I64, 8, false); + break; + + case JIT_OP_LDF32: + LOAD_3ARGS(); + LD_R_R_R(F32, 4, false); + break; + + case JIT_OP_LDF64: + LOAD_3ARGS(); + LD_R_R_R(F64, 8, false); + break; + + case JIT_OP_STI8: + LOAD_3ARGS_NO_ASSIGN(); + ST_R_R_R(I32, int32, 1); + break; + + case JIT_OP_STI16: + LOAD_3ARGS_NO_ASSIGN(); + ST_R_R_R(I32, int32, 2); + break; + + case JIT_OP_STI32: + LOAD_3ARGS_NO_ASSIGN(); + ST_R_R_R(I32, int32, 4); + break; + + case JIT_OP_STI64: + LOAD_3ARGS_NO_ASSIGN(); + ST_R_R_R(I64, int64, 8); + break; + + case JIT_OP_STF32: + LOAD_3ARGS_NO_ASSIGN(); + ST_R_R_R(F32, float32, 4); + break; + + case JIT_OP_STF64: + LOAD_3ARGS_NO_ASSIGN(); + ST_R_R_R(F64, float64, 8); + break; + + case JIT_OP_JMP: + LOAD_1ARG(); + CHECK_KIND(r0, JIT_REG_KIND_L32); + if (!(is_last_insn + && label_is_neighboring(cc, label_index, + jit_reg_no(r0)))) + JMP_TO_LABEL(stream_offset, jit_reg_no(r0), + label_index); + break; + + case JIT_OP_BEQ: + case JIT_OP_BNE: + case JIT_OP_BGTS: + case JIT_OP_BGES: + case JIT_OP_BLTS: + case JIT_OP_BLES: + case JIT_OP_BGTU: + case JIT_OP_BGEU: + case JIT_OP_BLTU: + case JIT_OP_BLEU: + LOAD_3ARGS(); + if (!lower_branch( + cc, a, jmp_info_list, label_index, + (COND_OP)(EQ + (insn->opcode - JIT_OP_BEQ)), r0, r1, + r2, is_last_insn)) + GOTO_FAIL; + break; + + case JIT_OP_LOOKUPSWITCH: + { + JitOpndLookupSwitch *opnd = jit_insn_opndls(insn); + if (!lower_lookupswitch(cc, a, label_index, opnd, + is_last_insn)) + GOTO_FAIL; + break; + } + + case JIT_OP_CALLNATIVE: + if (!lower_callnative(cc, a, label_index, insn)) + GOTO_FAIL; + break; + + case JIT_OP_CALLBC: + if (!lower_callbc(cc, a, label_index, insn)) + GOTO_FAIL; + break; + + case JIT_OP_RETURNBC: + if (!lower_returnbc(cc, a, label_index, insn)) + GOTO_FAIL; + break; + + case JIT_OP_RETURN: + if (!lower_return(cc, a, insn)) + GOTO_FAIL; + break; + + default: + jit_set_last_error_v(cc, "unsupported JIT opcode 0x%2x", + insn->opcode); + GOTO_FAIL; + } + + if (err_handler.err) { + jit_set_last_error_v( + cc, "failed to generate native code for JIT opcode 0x%02x", + insn->opcode); + GOTO_FAIL; + } + +#if CODEGEN_DUMP != 0 + dump_native((char *)code.sectionById(0)->buffer().data() + + code_offset, + code.sectionById(0)->buffer().size() - code_offset); + code_offset = code.sectionById(0)->buffer().size(); +#endif + } + } + + code_buf = (char *)code.sectionById(0)->buffer().data(); + code_size = code.sectionById(0)->buffer().size(); + if (!(stream = (char *)jit_code_cache_alloc(code_size))) { + jit_set_last_error(cc, "allocate memory failed"); + goto fail; + } + + bh_memcpy_s(stream, code_size, code_buf, code_size); + cc->jitted_addr_begin = stream; + cc->jitted_addr_end = stream + code_size; + + for (i = 0; i < label_num; i++) { + if (i == 0) + label_index = 0; + else if (i == label_num - 1) + label_index = 1; + else + label_index = i + 1; + + jitted_addr = jit_annl_jitted_addr( + cc, jit_reg_new(JIT_REG_KIND_L32, label_index)); + *jitted_addr = stream + label_offsets[label_index]; + } + + patch_jmp_info_list(cc, jmp_info_list); + return_value = true; + +fail: + + jit_free(label_offsets); + free_jmp_info_list(jmp_info_list); + return return_value; } bool @@ -3028,51 +4418,13 @@ void jit_codegen_free_native(JitCompContext *cc) {} -#if WASM_ENABLE_FAST_JIT_DUMP != 0 -static void -dump_native(char *data, uint32 length) -{ - /* Initialize decoder context */ - ZydisDecoder decoder; - ZydisDecoderInit(&decoder, ZYDIS_MACHINE_MODE_LONG_64, - ZYDIS_STACK_WIDTH_64); - - /* Initialize formatter */ - ZydisFormatter formatter; - ZydisFormatterInit(&formatter, ZYDIS_FORMATTER_STYLE_INTEL); - - /* Loop over the instructions in our buffer */ - ZyanU64 runtime_address = (ZyanU64)(uintptr_t)data; - ZyanUSize offset = 0; - ZydisDecodedInstruction instruction; - ZydisDecodedOperand operands[ZYDIS_MAX_OPERAND_COUNT_VISIBLE]; - - while (ZYAN_SUCCESS(ZydisDecoderDecodeFull( - &decoder, data + offset, length - offset, &instruction, operands, - ZYDIS_MAX_OPERAND_COUNT_VISIBLE, ZYDIS_DFLAG_VISIBLE_OPERANDS_ONLY))) { - /* Print current instruction pointer */ - printf("%012" PRIX64 " ", runtime_address); - - /* Format & print the binary instruction structure to - human readable format */ - char buffer[256]; - ZydisFormatterFormatInstruction(&formatter, &instruction, operands, - instruction.operand_count_visible, - buffer, sizeof(buffer), - runtime_address); - puts(buffer); - - offset += instruction.length; - runtime_address += instruction.length; - } -} -#endif - void jit_codegen_dump_native(void *begin_addr, void *end_addr) { #if WASM_ENABLE_FAST_JIT_DUMP != 0 + os_printf("\n"); dump_native((char *)begin_addr, (char *)end_addr - (char *)begin_addr); + os_printf("\n"); #endif } @@ -3080,12 +4432,15 @@ bool jit_codegen_init() { const JitHardRegInfo *hreg_info = jit_codegen_get_hreg_info(); + JitGlobals *jit_globals = jit_compiler_get_jit_globals(); char *code_buf, *stream; uint32 code_size; + JitErrorHandler err_handler; Environment env(Arch::kX64); CodeHolder code; code.init(env); + code.setErrorHandler(&err_handler); x86::Assembler a(&code); /* push callee-save registers */ @@ -3101,11 +4456,14 @@ jit_codegen_init() a.push(x86::rsi); /* exec_env_reg = exec_env */ a.mov(regs_i64[hreg_info->exec_env_hreg_index], x86::rdi); - /* fp_reg = info.->frame */ - a.mov(x86::ebp, x86::ptr(x86::rsi, 0)); + /* fp_reg = info->frame */ + a.mov(x86::rbp, x86::ptr(x86::rsi, 0)); /* jmp target */ a.jmp(x86::rdx); + if (err_handler.err) + return false; + code_buf = (char *)code.sectionById(0)->buffer().data(); code_size = code.sectionById(0)->buffer().size(); stream = (char *)jit_code_cache_alloc(code_size); @@ -3120,8 +4478,20 @@ jit_codegen_init() #endif a.setOffset(0); + /* pop info */ a.pop(x86::rsi); + /* info->frame = fp_reg */ + { + x86::Mem m(x86::rsi, 0); + a.mov(m, x86::rbp); + } + /* info->out.ret.ival[0, 1] = rcx */ + { + x86::Mem m(x86::rsi, 8); + a.mov(m, x86::rcx); + } + /* pop exec_env */ a.pop(x86::rdi); /* pop callee-save registers */ @@ -3131,18 +4501,27 @@ jit_codegen_init() a.pop(x86::r12); a.pop(x86::rbx); a.pop(x86::rbp); + a.ret(); + + if (err_handler.err) + goto fail1; code_buf = (char *)code.sectionById(0)->buffer().data(); code_size = code.sectionById(0)->buffer().size(); stream = (char *)jit_code_cache_alloc(code_size); - if (!stream) { - jit_code_cache_free(code_block_switch_to_jitted_from_interp); - return false; - } + if (!stream) + goto fail1; bh_memcpy_s(stream, code_size, code_buf, code_size); code_block_return_to_interp_from_jitted = stream; + + jit_globals->return_to_interp_from_jitted = + code_block_return_to_interp_from_jitted; return true; + +fail1: + jit_code_cache_free(code_block_switch_to_jitted_from_interp); + return false; } void diff --git a/core/iwasm/fast-jit/fe/jit_emit_control.c b/core/iwasm/fast-jit/fe/jit_emit_control.c index 16b398534..d9f82034e 100644 --- a/core/iwasm/fast-jit/fe/jit_emit_control.c +++ b/core/iwasm/fast-jit/fe/jit_emit_control.c @@ -118,6 +118,52 @@ fail: return false; } +static bool +load_block_results(JitCompContext *cc, JitBlock *block) +{ + JitFrame *jit_frame = cc->jit_frame; + uint32 offset, i; + JitReg value = 0; + + /* Restore jit frame's sp to block's sp begin */ + jit_frame->sp = block->frame_sp_begin; + + /* Load results to new block */ + offset = (uint32)(jit_frame->sp - jit_frame->lp); + for (i = 0; i < block->result_count; i++) { + switch (block->result_types[i]) { + case VALUE_TYPE_I32: +#if WASM_ENABLE_REF_TYPES != 0 + case VALUE_TYPE_EXTERNREF: + case VALUE_TYPE_FUNCREF: +#endif + value = gen_load_i32(jit_frame, offset); + offset++; + break; + case VALUE_TYPE_I64: + value = gen_load_i64(jit_frame, offset); + offset += 2; + break; + case VALUE_TYPE_F32: + value = gen_load_f32(jit_frame, offset); + offset++; + break; + case VALUE_TYPE_F64: + value = gen_load_f64(jit_frame, offset); + offset += 2; + break; + default: + bh_assert(0); + break; + } + PUSH(value, block->result_types[i]); + } + + return true; +fail: + return false; +} + static bool push_jit_block_to_stack_and_pass_params(JitCompContext *cc, JitBlock *block, JitBasicBlock *basic_block, JitReg cond) @@ -133,7 +179,6 @@ push_jit_block_to_stack_and_pass_params(JitCompContext *cc, JitBlock *block, we just move param values from current block's value stack to the new block's value stack */ for (i = 0; i < block->param_count; i++) { - param_index = block->param_count - 1 - i; jit_value = jit_value_stack_pop( &cc->block_stack.block_list_end->value_stack); if (!value_list_head) { @@ -296,8 +341,26 @@ handle_func_return(JitCompContext *cc, JitBlock *block) NEW_CONST(I32, offsetof(WASMInterpFrame, sp))); #endif - copy_block_arities(cc, prev_frame_sp, block->result_types, - block->result_count); + if (block->result_count) { + uint32 cell_num = + wasm_get_cell_num(block->result_types, block->result_count); + + copy_block_arities(cc, prev_frame_sp, block->result_types, + block->result_count); +#if UINTPTR_MAX == UINT64_MAX + /* prev_frame->sp += cell_num */ + GEN_INSN(ADD, prev_frame_sp, prev_frame_sp, + NEW_CONST(I64, cell_num * 4)); + GEN_INSN(STI64, prev_frame_sp, prev_frame, + NEW_CONST(I32, offsetof(WASMInterpFrame, sp))); +#else + /* prev_frame->sp += cell_num */ + GEN_INSN(ADD, prev_frame_sp, prev_frame_sp, + NEW_CONST(I32, cell_num * 4)); + GEN_INSN(STI32, prev_frame_sp, prev_frame, + NEW_CONST(I32, offsetof(WASMInterpFrame, sp))); +#endif + } /* Free stack space of the current frame: exec_env->wasm_stack.s.top = cur_frame */ @@ -320,14 +383,14 @@ handle_func_return(JitCompContext *cc, JitBlock *block) /* fp_reg = prev_frame */ GEN_INSN(MOV, cc->fp_reg, prev_frame); /* return 0 */ - GEN_INSN(RETURNBC, NEW_CONST(I32, 0)); + GEN_INSN(RETURNBC, NEW_CONST(I32, JIT_INTERP_ACTION_NORMAL), 0, 0); } static bool -handle_op_end(JitCompContext *cc, uint8 **p_frame_ip) +handle_op_end(JitCompContext *cc, uint8 **p_frame_ip, bool from_same_block) { JitFrame *jit_frame = cc->jit_frame; - JitBlock *block; + JitBlock *block, *block_prev; JitIncomingInsn *incoming_insn; JitInsn *insn; @@ -345,6 +408,42 @@ handle_op_end(JitCompContext *cc, uint8 **p_frame_ip) handle_func_return(cc, block); SET_BB_END_BCIP(cc->cur_basic_block, *p_frame_ip - 1); } + else if (block->result_count > 0) { + JitValue *value_list_head = NULL, *value_list_end = NULL; + JitValue *jit_value; + uint32 i; + + /* No need to change cc->jit_frame, just move result values + from current block's value stack to previous block's + value stack */ + block_prev = block->prev; + + for (i = 0; i < block->result_count; i++) { + jit_value = jit_value_stack_pop(&block->value_stack); + bh_assert(jit_value); + if (!value_list_head) { + value_list_head = value_list_end = jit_value; + jit_value->prev = jit_value->next = NULL; + } + else { + jit_value->prev = NULL; + jit_value->next = value_list_head; + value_list_head->prev = jit_value; + value_list_head = jit_value; + } + } + + if (!block_prev->value_stack.value_list_head) { + block_prev->value_stack.value_list_head = value_list_head; + block_prev->value_stack.value_list_end = value_list_end; + } + else { + /* Link to the end of previous block's value stack */ + block_prev->value_stack.value_list_end->next = value_list_head; + value_list_head->prev = block_prev->value_stack.value_list_end; + block_prev->value_stack.value_list_end = value_list_end; + } + } /* Pop block and destroy the block */ block = jit_block_stack_pop(&cc->block_stack); @@ -361,8 +460,9 @@ handle_op_end(JitCompContext *cc, uint8 **p_frame_ip) CREATE_BASIC_BLOCK(block->basic_block_end); SET_BB_END_BCIP(cc->cur_basic_block, *p_frame_ip - 1); SET_BB_BEGIN_BCIP(block->basic_block_end, *p_frame_ip); - /* Jump to the end basic block */ - BUILD_BR(block->basic_block_end); + if (from_same_block) + /* Jump to the end basic block */ + BUILD_BR(block->basic_block_end); /* Patch the INSNs which jump to this basic block */ incoming_insn = block->incoming_insns_for_end_bb; @@ -384,13 +484,20 @@ handle_op_end(JitCompContext *cc, uint8 **p_frame_ip) SET_BUILDER_POS(block->basic_block_end); + /* Pop block and load block results */ + block = jit_block_stack_pop(&cc->block_stack); + if (block->label_type == LABEL_TYPE_FUNCTION) { handle_func_return(cc, block); SET_BB_END_BCIP(cc->cur_basic_block, *p_frame_ip - 1); } + else { + if (!load_block_results(cc, block)) { + jit_block_destroy(block); + goto fail; + } + } - /* Pop block and destroy the block */ - block = jit_block_stack_pop(&cc->block_stack); jit_block_destroy(block); return true; } @@ -420,7 +527,7 @@ handle_op_else(JitCompContext *cc, uint8 **p_frame_ip) /* The if branch is handled like OP_BLOCK (cond is const and != 0), just skip the else branch and handle OP_END */ *p_frame_ip = block->wasm_code_end + 1; - return handle_op_end(cc, p_frame_ip); + return handle_op_end(cc, p_frame_ip, true); } else { /* Has else branch and need to translate else branch */ @@ -488,7 +595,7 @@ handle_next_reachable_block(JitCompContext *cc, uint8 **p_frame_ip) } else if (block->incoming_insns_for_end_bb) { *p_frame_ip = block->wasm_code_end + 1; - return handle_op_end(cc, p_frame_ip); + return handle_op_end(cc, p_frame_ip, false); } else { jit_block_stack_pop(&cc->block_stack); @@ -635,7 +742,7 @@ jit_compile_op_else(JitCompContext *cc, uint8 **p_frame_ip) bool jit_compile_op_end(JitCompContext *cc, uint8 **p_frame_ip) { - return handle_op_end(cc, p_frame_ip); + return handle_op_end(cc, p_frame_ip, true); } #if 0 @@ -716,7 +823,7 @@ bool jit_compile_op_br(JitCompContext *cc, uint32 br_depth, uint8 **p_frame_ip) { JitFrame *jit_frame; - JitBlock *block_dst; + JitBlock *block_dst, *block; JitReg frame_sp_dst; JitValueSlot *frame_sp_src = NULL; JitInsn *insn; @@ -733,6 +840,12 @@ jit_compile_op_br(JitCompContext *cc, uint32 br_depth, uint8 **p_frame_ip) #endif #endif + /* Check block stack */ + if (!(block = cc->block_stack.block_list_end)) { + jit_set_last_error(cc, "WASM block stack underflow"); + return false; + } + if (!(block_dst = get_target_block(cc, br_depth))) { return false; } @@ -761,15 +874,24 @@ jit_compile_op_br(JitCompContext *cc, uint32 br_depth, uint8 **p_frame_ip) #endif offset = offsetof(WASMInterpFrame, lp) + (block_dst->frame_sp_begin - jit_frame->lp) * 4; +#if UINTPTR_MAX == UINT64_MAX + GEN_INSN(ADD, frame_sp_dst, cc->fp_reg, NEW_CONST(I64, offset)); +#else GEN_INSN(ADD, frame_sp_dst, cc->fp_reg, NEW_CONST(I32, offset)); +#endif } + gen_commit_values(jit_frame, jit_frame->lp, block->frame_sp_begin); + if (block_dst->label_type == LABEL_TYPE_LOOP) { if (copy_arities) { /* Dest block is Loop block, copy loop parameters */ copy_block_arities(cc, frame_sp_dst, block_dst->param_types, block_dst->param_count); } + + clear_values(jit_frame); + /* Jump to the begin basic block */ BUILD_BR(block_dst->basic_block_entry); SET_BB_END_BCIP(cc->cur_basic_block, *p_frame_ip - 1); @@ -780,6 +902,9 @@ jit_compile_op_br(JitCompContext *cc, uint32 br_depth, uint8 **p_frame_ip) copy_block_arities(cc, frame_sp_dst, block_dst->result_types, block_dst->result_count); } + + clear_values(jit_frame); + /* Jump to the end basic block */ if (!(insn = GEN_INSN(JMP, 0))) { jit_set_last_error(cc, "generate jmp insn failed"); @@ -863,7 +988,11 @@ jit_compile_op_br_if(JitCompContext *cc, uint32 br_depth, uint8 **p_frame_ip) #endif offset = offsetof(WASMInterpFrame, lp) + (block_dst->frame_sp_begin - jit_frame->lp) * 4; +#if UINTPTR_MAX == UINT64_MAX + GEN_INSN(ADD, frame_sp_dst, cc->fp_reg, NEW_CONST(I64, offset)); +#else GEN_INSN(ADD, frame_sp_dst, cc->fp_reg, NEW_CONST(I32, offset)); +#endif } if (block_dst->label_type == LABEL_TYPE_LOOP) { diff --git a/core/iwasm/fast-jit/jit_codecache.c b/core/iwasm/fast-jit/jit_codecache.c index 7d30572df..3fea07b05 100644 --- a/core/iwasm/fast-jit/jit_codecache.c +++ b/core/iwasm/fast-jit/jit_codecache.c @@ -56,6 +56,8 @@ jit_code_cache_free(void *ptr) bool jit_pass_register_jitted_code(JitCompContext *cc) { - /* TODO */ - return false; + cc->cur_wasm_func->fast_jit_jitted_code = cc->jitted_addr_begin; + cc->cur_wasm_module->fast_jit_func_ptrs[cc->cur_wasm_func_idx] = + cc->jitted_addr_begin; + return true; } diff --git a/core/iwasm/fast-jit/jit_codegen.c b/core/iwasm/fast-jit/jit_codegen.c index 2f0f90b91..2bd60bb41 100644 --- a/core/iwasm/fast-jit/jit_codegen.c +++ b/core/iwasm/fast-jit/jit_codegen.c @@ -15,12 +15,8 @@ jit_pass_lower_cg(JitCompContext *cc) bool jit_pass_codegen(JitCompContext *cc) { -#if 0 - bh_assert(jit_annl_is_enabled_next_label(cc)); - if (!jit_annl_enable_jitted_addr(cc)) return false; -#endif return jit_codegen_gen_native(cc); } diff --git a/core/iwasm/fast-jit/jit_compiler.c b/core/iwasm/fast-jit/jit_compiler.c index 77e50ca92..927edc3c0 100644 --- a/core/iwasm/fast-jit/jit_compiler.c +++ b/core/iwasm/fast-jit/jit_compiler.c @@ -50,7 +50,8 @@ static JitGlobals jit_globals = { #else .passes = compiler_passes_with_dump, #endif - .code_cache_size = 10 * 1024 * 1024 + .code_cache_size = 10 * 1024 * 1024, + .return_to_interp_from_jitted = NULL }; /* clang-format on */ @@ -99,7 +100,7 @@ jit_compiler_destroy() jit_code_cache_destroy(); } -const JitGlobals * +JitGlobals * jit_compiler_get_jit_globals() { return &jit_globals; @@ -153,7 +154,7 @@ jit_compiler_compile_all(WASMModule *module) { JitCompContext *cc; char *last_error; - bool ret = false; + bool ret = true; uint32 i; /* Initialize compilation context. */ diff --git a/core/iwasm/fast-jit/jit_compiler.h b/core/iwasm/fast-jit/jit_compiler.h index 571f16ba7..ac1cd37e3 100644 --- a/core/iwasm/fast-jit/jit_compiler.h +++ b/core/iwasm/fast-jit/jit_compiler.h @@ -19,15 +19,50 @@ typedef struct JitGlobals { const uint8 *passes; /* Code cache size. */ uint32 code_cache_size; + char *return_to_interp_from_jitted; } JitGlobals; +/** + * Actions the interpreter should do when JITed code returns to + * interpreter. + */ +typedef enum JitInterpAction { + JIT_INTERP_ACTION_NORMAL, /* normal execution */ + JIT_INTERP_ACTION_THROWN, /* exception was thrown */ + JIT_INTERP_ACTION_CALL /* call wasm function */ +} JitInterpAction; + /** * Information exchanged between JITed code and interpreter. */ typedef struct JitInterpSwitchInfo { /* Points to the frame that is passed to JITed code and the frame - that is returned from JITed code. */ + that is returned from JITed code */ void *frame; + + /* Output values from JITed code of different actions */ + union { + /* IP and SP offsets for NORMAL */ + struct { + int32 ip; + int32 sp; + } normal; + + /* Function called from JITed code for CALL */ + struct { + void *function; + } call; + + /* Returned integer and/or floating point values for RETURN. This + is also used to pass return values from interpreter to JITed + code if the caller is in JITed code and the callee is in + interpreter. */ + struct { + uint32 ival[2]; + uint32 fval[2]; + uint32 last_return_type; + } ret; + } out; } JitInterpSwitchInfo; bool @@ -36,7 +71,7 @@ jit_compiler_init(); void jit_compiler_destroy(); -const JitGlobals * +JitGlobals * jit_compiler_get_jit_globals(); const char * diff --git a/core/iwasm/fast-jit/jit_dump.c b/core/iwasm/fast-jit/jit_dump.c index c4b23a882..8a477d144 100644 --- a/core/iwasm/fast-jit/jit_dump.c +++ b/core/iwasm/fast-jit/jit_dump.c @@ -146,11 +146,13 @@ jit_dump_insn(JitCompContext *cc, JitInsn *insn) void jit_dump_basic_block(JitCompContext *cc, JitBasicBlock *block) { - unsigned i; + unsigned i, label_index; + void *begin_addr, *end_addr; + JitBasicBlock *block_next; JitInsn *insn; JitRegVec preds = jit_basic_block_preds(block); JitRegVec succs = jit_basic_block_succs(block); - JitReg label = jit_basic_block_label(block); + JitReg label = jit_basic_block_label(block), label_next; JitReg *reg; jit_dump_reg(cc, label); @@ -176,16 +178,33 @@ jit_dump_basic_block(JitCompContext *cc, JitBasicBlock *block) - (uint8 *)cc->cur_wasm_module->load_addr); os_printf("\n"); - if (jit_annl_is_enabled_jitted_addr(cc)) - /* Dump assembly. */ - jit_codegen_dump_native( - *(jit_annl_jitted_addr(cc, label)), - label != cc->exit_label - ? *(jit_annl_jitted_addr(cc, *(jit_annl_next_label(cc, label)))) - : cc->jitted_addr_end); - else + if (jit_annl_is_enabled_jitted_addr(cc)) { + begin_addr = *(jit_annl_jitted_addr(cc, label)); + + if (label == cc->entry_label) { + block_next = cc->_ann._label_basic_block[2]; + label_next = jit_basic_block_label(block_next); + end_addr = *(jit_annl_jitted_addr(cc, label_next)); + } + else if (label == cc->exit_label) { + end_addr = cc->jitted_addr_end; + } + else { + label_index = jit_reg_no(label); + if (label_index < jit_cc_label_num(cc) - 1) + block_next = cc->_ann._label_basic_block[label_index + 1]; + else + block_next = cc->_ann._label_basic_block[1]; + label_next = jit_basic_block_label(block_next); + end_addr = *(jit_annl_jitted_addr(cc, label_next)); + } + + jit_codegen_dump_native(begin_addr, end_addr); + } + else { /* Dump IR. */ JIT_FOREACH_INSN(block, insn) jit_dump_insn(cc, insn); + } os_printf(" ; SUCCS("); @@ -279,18 +298,17 @@ dump_cc_ir(JitCompContext *cc) os_printf("\n\n"); - if (jit_annl_is_enabled_next_label(cc)) + if (jit_annl_is_enabled_next_label(cc)) { /* Blocks have been reordered, use that order to dump. */ for (label = cc->entry_label; label; label = *(jit_annl_next_label(cc, label))) jit_dump_basic_block(cc, *(jit_annl_basic_block(cc, label))); - else - /* Otherwise, use the default order. */ - { + } + else { + /* Otherwise, use the default order. */ jit_dump_basic_block(cc, jit_cc_entry_basic_block(cc)); - JIT_FOREACH_BLOCK(cc, i, end, block) - jit_dump_basic_block(cc, block); + JIT_FOREACH_BLOCK(cc, i, end, block) jit_dump_basic_block(cc, block); jit_dump_basic_block(cc, jit_cc_exit_basic_block(cc)); } diff --git a/core/iwasm/fast-jit/jit_frontend.c b/core/iwasm/fast-jit/jit_frontend.c index 3ad976d1f..fc4774a64 100644 --- a/core/iwasm/fast-jit/jit_frontend.c +++ b/core/iwasm/fast-jit/jit_frontend.c @@ -244,7 +244,7 @@ form_and_translate_func(JitCompContext *cc) if (insn) { *(jit_insn_opndv(insn, 2)) = NEW_CONST(I32, i); } - GEN_INSN(RETURNBC, NEW_CONST(I32, i)); + GEN_INSN(RETURN, NEW_CONST(I32, JIT_INTERP_ACTION_THROWN)); *(jit_annl_begin_bcip(cc, jit_basic_block_label(cc->cur_basic_block))) = @@ -351,7 +351,7 @@ init_func_translation(JitCompContext *cc) GEN_INSN(LDI64, top_boundary, cc->exec_env_reg, NEW_CONST(I32, offsetof(WASMExecEnv, wasm_stack.s.top_boundary))); /* frame_boundary = top + frame_size + outs_size */ - GEN_INSN(ADD, frame_boundary, top, NEW_CONST(I32, frame_size + outs_size)); + GEN_INSN(ADD, frame_boundary, top, NEW_CONST(I64, frame_size + outs_size)); /* if frame_boundary > top_boundary, throw stack overflow exception */ GEN_INSN(CMP, cc->cmp_reg, frame_boundary, top_boundary); if (!jit_emit_exception(cc, EXCE_OPERAND_STACK_OVERFLOW, JIT_OP_BGTU, @@ -361,13 +361,13 @@ init_func_translation(JitCompContext *cc) /* Add first and then sub to reduce one used register */ /* new_top = frame_boundary - outs_size = top + frame_size */ - GEN_INSN(SUB, new_top, frame_boundary, NEW_CONST(I32, outs_size)); + GEN_INSN(SUB, new_top, frame_boundary, NEW_CONST(I64, outs_size)); /* exec_env->wasm_stack.s.top = new_top */ GEN_INSN(STI64, new_top, cc->exec_env_reg, NEW_CONST(I32, offsetof(WASMExecEnv, wasm_stack.s.top))); /* frame_sp = frame->lp + local_size */ GEN_INSN(ADD, frame_sp, top, - NEW_CONST(I32, offsetof(WASMInterpFrame, lp) + local_size)); + NEW_CONST(I64, offsetof(WASMInterpFrame, lp) + local_size)); /* frame->sp = frame_sp */ GEN_INSN(STI64, frame_sp, top, NEW_CONST(I32, offsetof(WASMInterpFrame, sp))); diff --git a/core/iwasm/fast-jit/jit_ir.def b/core/iwasm/fast-jit/jit_ir.def index 21c3d984a..e2154239d 100644 --- a/core/iwasm/fast-jit/jit_ir.def +++ b/core/iwasm/fast-jit/jit_ir.def @@ -167,8 +167,9 @@ INSN(LOOKUPSWITCH, LookupSwitch, 1, 0) /* Call and return instructions */ INSN(CALLNATIVE, VReg, 2, 1) -INSN(CALLBC, Reg, 3, 0) -INSN(RETURNBC, Reg, 1, 0) +INSN(CALLBC, Reg, 3, 2) +INSN(RETURNBC, Reg, 3, 0) +INSN(RETURN, Reg, 1, 0) #if 0 /* Comparison instructions, can be translate to SELECTXXX */ diff --git a/core/iwasm/interpreter/wasm.h b/core/iwasm/interpreter/wasm.h index 2e6250778..41ae4dcb7 100644 --- a/core/iwasm/interpreter/wasm.h +++ b/core/iwasm/interpreter/wasm.h @@ -255,7 +255,7 @@ struct WASMFunction { uint32 const_cell_num; #endif #if WASM_ENABLE_FAST_JIT != 0 - void *jitted_code; + void *fast_jit_jitted_code; #endif }; @@ -447,7 +447,7 @@ struct WASMModule { #if WASM_ENABLE_FAST_JIT != 0 /* point to JITed functions */ - void **func_ptrs; + void **fast_jit_func_ptrs; #endif }; diff --git a/core/iwasm/interpreter/wasm_interp_classic.c b/core/iwasm/interpreter/wasm_interp_classic.c index 0d785acd9..0611c340a 100644 --- a/core/iwasm/interpreter/wasm_interp_classic.c +++ b/core/iwasm/interpreter/wasm_interp_classic.c @@ -3767,10 +3767,13 @@ wasm_interp_call_wasm(WASMModuleInstance *module_inst, WASMExecEnv *exec_env, #if WASM_ENABLE_FAST_JIT == 0 wasm_interp_call_func_bytecode(module_inst, exec_env, function, frame); #else + JitGlobals *jit_globals = jit_compiler_get_jit_globals(); JitInterpSwitchInfo info; info.frame = frame; + frame->jitted_return_addr = + (uint8 *)jit_globals->return_to_interp_from_jitted; jit_interp_switch_to_jitted(exec_env, &info, - function->u.func->jitted_code); + function->u.func->fast_jit_jitted_code); (void)wasm_interp_call_func_bytecode; #endif } diff --git a/core/iwasm/interpreter/wasm_loader.c b/core/iwasm/interpreter/wasm_loader.c index c8bd9e6c2..5e87cd22d 100644 --- a/core/iwasm/interpreter/wasm_loader.c +++ b/core/iwasm/interpreter/wasm_loader.c @@ -3230,6 +3230,11 @@ load_from_sections(WASMModule *module, WASMSection *sections, } #if WASM_ENABLE_FAST_JIT != 0 + if (!(module->fast_jit_func_ptrs = + loader_malloc(sizeof(void *) * module->function_count, error_buf, + error_buf_size))) { + return false; + } if (!jit_compiler_compile_all(module)) { set_error_buf(error_buf, error_buf_size, "fast jit compilation failed"); return false; @@ -3719,6 +3724,7 @@ wasm_loader_unload(WASMModule *module) } } #endif + #if WASM_ENABLE_DEBUG_INTERP != 0 WASMFastOPCodeNode *fast_opcode = bh_list_first_elem(&module->fast_opcode_list); @@ -3729,6 +3735,12 @@ wasm_loader_unload(WASMModule *module) } os_mutex_destroy(&module->ref_count_lock); #endif + +#if WASM_ENABLE_FAST_JIT != 0 + if (module->fast_jit_func_ptrs) + wasm_runtime_free(module->fast_jit_func_ptrs); +#endif + wasm_runtime_free(module); } diff --git a/product-mini/platforms/linux/CMakeLists.txt b/product-mini/platforms/linux/CMakeLists.txt index a4efc83a1..7cb6f25ab 100644 --- a/product-mini/platforms/linux/CMakeLists.txt +++ b/product-mini/platforms/linux/CMakeLists.txt @@ -120,6 +120,8 @@ set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--gc-sections -pie -f set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wextra -Wformat -Wformat-security -Wshadow") # set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wconversion -Wsign-conversion") +set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Wformat -Wformat-security") + if (WAMR_BUILD_TARGET MATCHES "X86_.*" OR WAMR_BUILD_TARGET STREQUAL "AMD_64") if (NOT (CMAKE_C_COMPILER MATCHES ".*clang.*" OR CMAKE_C_COMPILER_ID MATCHES ".*Clang")) set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mindirect-branch-register")