diff --git a/core/iwasm/fast-jit/cg/x86-64/jit_codegen_x86_64.cpp b/core/iwasm/fast-jit/cg/x86-64/jit_codegen_x86_64.cpp index 194d83ffb..447cec0e8 100644 --- a/core/iwasm/fast-jit/cg/x86-64/jit_codegen_x86_64.cpp +++ b/core/iwasm/fast-jit/cg/x86-64/jit_codegen_x86_64.cpp @@ -238,6 +238,8 @@ typedef enum { ADD, SUB, MUL, DIV_S, REM_S, DIV_U, REM_U, MIN, MAX } ALU_OP; typedef enum { OR, XOR, AND } BIT_OP; /* Shift opcode */ typedef enum { SHL, SHRS, SHRU, ROTL, ROTR } SHIFT_OP; +/* Bitcount opcode */ +typedef enum { CLZ, CTZ, POPCNT } BITCOUNT_OP; /* Condition opcode */ typedef enum { EQ, NE, GTS, GES, LTS, LES, GTU, GEU, LTU, LEU } COND_OP; @@ -2339,8 +2341,16 @@ alu_r_r_imm_i64(x86::Assembler &a, ALU_OP op, int32 reg_no_dst, a.inc(regs_i64[reg_no_dst]); else if (data == -1) a.dec(regs_i64[reg_no_dst]); - else if (data != 0) - a.add(regs_i64[reg_no_dst], imm); + else if (data != 0) { + if (data >= INT32_MIN && data <= INT32_MAX) { + imm.setValue((int32)data); + a.add(regs_i64[reg_no_dst], imm); + } + else { + a.mov(regs_i64[REG_I64_FREE_IDX], imm); + a.add(regs_i64[reg_no_dst], regs_i64[REG_I64_FREE_IDX]); + } + } break; case SUB: mov_r_to_r(a, JIT_REG_KIND_I64, reg_no_dst, reg_no_src); @@ -2348,8 +2358,16 @@ alu_r_r_imm_i64(x86::Assembler &a, ALU_OP op, int32 reg_no_dst, a.inc(regs_i64[reg_no_dst]); else if (data == 1) a.dec(regs_i64[reg_no_dst]); - else if (data != 0) - a.sub(regs_i64[reg_no_dst], imm); + else if (data != 0) { + if (data >= INT32_MIN && data <= INT32_MAX) { + imm.setValue((int32)data); + a.sub(regs_i64[reg_no_dst], imm); + } + else { + a.mov(regs_i64[REG_I64_FREE_IDX], imm); + a.sub(regs_i64[reg_no_dst], regs_i64[REG_I64_FREE_IDX]); + } + } break; case MUL: if (data == 0) @@ -3696,44 +3714,6 @@ fail: return false; } -/** - * Encode int32 cmp operation of reg and data, and save result to reg - * - * @param a the assembler to emit the code - * @param op the opcode of cmp operation - * @param reg_no_dst the no of dst register - * @param reg_no_src the no of src register, as first operand - * @param data the immediate data, as the second operand - * - * @return true if success, false otherwise - */ -static bool -cmp_r_imm_i32(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src, int32 data) -{ - Imm imm(data); - a.cmp(regs_i32[reg_no_src], imm); - return true; -} - -/** - * Encode int32 cmp operation of reg and reg, and save result to reg - * - * @param a the assembler to emit the code - * @param op the opcode of cmp operation - * @param reg_no_dst the no of dst register - * @param reg_no1_src the no of src register, as first operand - * @param reg_no2_src the no of src register, as second operand - * - * @return true if success, false otherwise - */ -static bool -cmp_r_r_i32(x86::Assembler &a, int32 reg_no_dst, int32 reg_no1_src, - int32 reg_no2_src) -{ - a.cmp(regs_i32[reg_no1_src], regs_i32[reg_no2_src]); - return true; -} - /** * Encode int32 cmp operation of imm and imm, and save result to reg * @@ -3816,44 +3796,6 @@ cmp_r_r_to_r_i32(x86::Assembler &a, int32 reg_no_dst, int32 reg_no1_src, return true; } -/** - * Encode int64 cmp operation of reg and data, and save result to reg - * - * @param a the assembler to emit the code - * @param op the opcode of cmp operation - * @param reg_no_dst the no of dst register - * @param reg_no_src the no of src register, as first operand - * @param data the immediate data, as the second operand - * - * @return true if success, false otherwise - */ -static bool -cmp_r_imm_i64(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src, int64 data) -{ - Imm imm(data); - a.cmp(regs_i64[reg_no_src], imm); - return true; -} - -/** - * Encode int64 cmp operation of reg and reg, and save result to reg - * - * @param a the assembler to emit the code - * @param op the opcode of cmp operation - * @param reg_no_dst the no of dst register - * @param reg_no1_src the no of src register, as first operand - * @param reg_no2_src the no of src register, as second operand - * - * @return true if success, false otherwise - */ -static bool -cmp_r_r_i64(x86::Assembler &a, int32 reg_no_dst, int32 reg_no1_src, - int32 reg_no2_src) -{ - a.cmp(regs_i64[reg_no1_src], regs_i64[reg_no2_src]); - return true; -} - /** * Encode int64 cmp operation of imm and imm, and save result to reg * @@ -3913,7 +3855,15 @@ cmp_r_imm_to_r_i64(x86::Assembler &a, int32 reg_no_dst, int32 reg_no1_src, int64 data2_src) { Imm imm(data2_src); - a.cmp(regs_i64[reg_no1_src], imm); + + if (data2_src >= INT32_MIN && data2_src <= INT32_MAX) { + imm.setValue((int32)data2_src); + a.cmp(regs_i64[reg_no1_src], imm); + } + else { + a.mov(regs_i64[REG_I64_FREE_IDX], imm); + a.cmp(regs_i64[reg_no1_src], regs_i64[REG_I64_FREE_IDX]); + } return true; } @@ -4621,6 +4571,120 @@ fail: return false; } +/** + * Encode int32 bitcount operation of reg, and save result to reg + * + * @param a the assembler to emit the code + * @param op the opcode of BITCOUNT operation + * @param reg_no_dst the no of register + * @param reg_no_src the reg no of first src register data + * + * @return true if success, false otherwise + */ +static bool +bitcount_r_to_r_i32(x86::Assembler &a, BITCOUNT_OP op, int32 reg_no_dst, + int32 reg_no_src) +{ + switch (op) { + case CLZ: + a.lzcnt(regs_i32[reg_no_dst], regs_i32[reg_no_src]); + break; + case CTZ: + a.tzcnt(regs_i32[reg_no_dst], regs_i32[reg_no_src]); + break; + case POPCNT: + a.popcnt(regs_i32[reg_no_dst], regs_i32[reg_no_src]); + break; + default: + bh_assert(0); + return false; + } + return true; +} + +/** + * Encode int64 bitcount operation of reg, and save result to reg + * + * @param a the assembler to emit the code + * @param op the opcode of BITCOUNT operation + * @param reg_no_dst the no of register + * @param reg_no_src the reg no of first src register data + * + * @return true if success, false otherwise + */ +static bool +bitcount_r_to_r_i64(x86::Assembler &a, BITCOUNT_OP op, int32 reg_no_dst, + int32 reg_no_src) +{ + switch (op) { + case CLZ: + a.lzcnt(regs_i64[reg_no_dst], regs_i64[reg_no_src]); + break; + case CTZ: + a.tzcnt(regs_i64[reg_no_dst], regs_i64[reg_no_src]); + break; + case POPCNT: + a.popcnt(regs_i64[reg_no_dst], regs_i64[reg_no_src]); + break; + default: + bh_assert(0); + return false; + } + return true; +} + +/** + * Encode insn bitcount: CLZ/CTZ/POPCNT r0, r1 + * @param kind the data kind, such as I32, I64 + * @param Type the data type, such as int32, int64 + * @param type the abbreviation of data type, such as i32, i64 + * @param op the opcode of bit operation + */ +#define BITCOUNT_R_R(kind, Type, type, op) \ + do { \ + int32 reg_no_dst; \ + bool _ret = false; \ + \ + CHECK_EQKIND(r0, r1); \ + CHECK_NCONST(r1); \ + \ + reg_no_dst = jit_reg_no(r0); \ + if (!bitcount_r_to_r_##type(a, op, reg_no_dst, jit_reg_no(r1))) \ + GOTO_FAIL; \ + } while (0) + +/** + * Encode bitcount insn, CLZ/CTZ/POPCNT r0, r1 + * + * @param cc the compiler context + * @param a the assembler to emit the code + * @param op the opcode of bitcount operations + * @param r0 dst jit register that contains the dst operand info + * @param r1 src jit register that contains the src operand info + * + * @return true if success, false if failed + */ +static bool +lower_bitcount(JitCompContext *cc, x86::Assembler &a, BITCOUNT_OP op, JitReg r0, + JitReg r1) +{ + switch (jit_reg_kind(r0)) { + case JIT_REG_KIND_I32: + BITCOUNT_R_R(I32, int32, i32, op); + break; + case JIT_REG_KIND_I64: + BITCOUNT_R_R(I64, int64, i64, op); + break; + default: + LOG_VERBOSE("Invalid reg type of bit: %d\n", jit_reg_kind(r0)); + GOTO_FAIL; + } + + return true; +fail: + return false; +} + /** * Encode insn cmp: CMP r0, r1, r2 * @param kind the data kind, such as I32, I64, F32 and F64 @@ -5151,6 +5215,8 @@ lower_callnative(JitCompContext *cc, x86::Assembler &a, bh_list *jmp_info_list, if (ret_reg) { bh_assert((jit_reg_kind(ret_reg) == JIT_REG_KIND_I32 && jit_reg_no(ret_reg) == REG_EAX_IDX) + || (jit_reg_kind(ret_reg) == JIT_REG_KIND_I64 + && jit_reg_no(ret_reg) == REG_RAX_IDX) || (jit_reg_kind(ret_reg) == JIT_REG_KIND_F32 || jit_reg_kind(ret_reg) == JIT_REG_KIND_F64 && jit_reg_no(ret_reg) == 0)); @@ -5705,6 +5771,17 @@ jit_codegen_gen_native(JitCompContext *cc) GOTO_FAIL; break; + case JIT_OP_CLZ: + case JIT_OP_CTZ: + case JIT_OP_POPCNT: + LOAD_2ARGS(); + if (!lower_bitcount( + cc, a, + (BITCOUNT_OP)(CLZ + (insn->opcode - JIT_OP_CLZ)), + r0, r1)) + GOTO_FAIL; + break; + case JIT_OP_CMP: LOAD_3ARGS(); if (!lower_cmp(cc, a, r0, r1, r2)) diff --git a/core/iwasm/fast-jit/fe/jit_emit_compare.c b/core/iwasm/fast-jit/fe/jit_emit_compare.c index 14231163d..bf4d3d507 100644 --- a/core/iwasm/fast-jit/fe/jit_emit_compare.c +++ b/core/iwasm/fast-jit/fe/jit_emit_compare.c @@ -174,11 +174,61 @@ fail: bool jit_compile_op_f32_compare(JitCompContext *cc, FloatCond cond) { + JitReg res, const_zero, const_one; JitReg lhs, rhs; POP_F32(rhs); POP_F32(lhs); + if (jit_reg_is_const_val(lhs) && jit_reg_is_const_val(rhs)) { + float32 lvalue = jit_cc_get_const_F32(cc, lhs); + float32 rvalue = jit_cc_get_const_F32(cc, rhs); + + const_zero = NEW_CONST(I32, 0); + const_one = NEW_CONST(I32, 1); + + switch (cond) { + case FLOAT_EQ: + { + res = (lvalue == rvalue) ? const_one : const_zero; + break; + } + case FLOAT_NE: + { + res = (lvalue != rvalue) ? const_one : const_zero; + break; + } + case FLOAT_LT: + { + res = (lvalue < rvalue) ? const_one : const_zero; + break; + } + case FLOAT_GT: + { + res = (lvalue > rvalue) ? const_one : const_zero; + break; + } + case FLOAT_LE: + { + res = (lvalue <= rvalue) ? const_one : const_zero; + break; + } + case FLOAT_GE: + { + res = (lvalue >= rvalue) ? const_one : const_zero; + break; + } + default: + { + bh_assert(!"unknown FloatCond"); + goto fail; + } + } + + PUSH_I32(res); + return true; + } + return jit_compile_op_compare_float_point(cc, cond, lhs, rhs); fail: return false; @@ -187,11 +237,61 @@ fail: bool jit_compile_op_f64_compare(JitCompContext *cc, FloatCond cond) { + JitReg res, const_zero, const_one; JitReg lhs, rhs; POP_F64(rhs); POP_F64(lhs); + if (jit_reg_is_const_val(lhs) && jit_reg_is_const_val(rhs)) { + float64 lvalue = jit_cc_get_const_F64(cc, lhs); + float64 rvalue = jit_cc_get_const_F64(cc, rhs); + + const_zero = NEW_CONST(I32, 0); + const_one = NEW_CONST(I32, 1); + + switch (cond) { + case FLOAT_EQ: + { + res = (lvalue == rvalue) ? const_one : const_zero; + break; + } + case FLOAT_NE: + { + res = (lvalue != rvalue) ? const_one : const_zero; + break; + } + case FLOAT_LT: + { + res = (lvalue < rvalue) ? const_one : const_zero; + break; + } + case FLOAT_GT: + { + res = (lvalue > rvalue) ? const_one : const_zero; + break; + } + case FLOAT_LE: + { + res = (lvalue <= rvalue) ? const_one : const_zero; + break; + } + case FLOAT_GE: + { + res = (lvalue >= rvalue) ? const_one : const_zero; + break; + } + default: + { + bh_assert(!"unknown FloatCond"); + goto fail; + } + } + + PUSH_I32(res); + return true; + } + return jit_compile_op_compare_float_point(cc, cond, lhs, rhs); fail: return false; diff --git a/core/iwasm/fast-jit/fe/jit_emit_memory.c b/core/iwasm/fast-jit/fe/jit_emit_memory.c index 3f1a34536..d118013cb 100644 --- a/core/iwasm/fast-jit/fe/jit_emit_memory.c +++ b/core/iwasm/fast-jit/fe/jit_emit_memory.c @@ -261,10 +261,10 @@ jit_compile_op_i64_load(JitCompContext *cc, uint32 align, uint32 offset, case 4: { if (sign) { - GEN_INSN(LDI16, value, maddr, NEW_CONST(I32, 0)); + GEN_INSN(LDI32, value, maddr, NEW_CONST(I32, 0)); } else { - GEN_INSN(LDU16, value, maddr, NEW_CONST(I32, 0)); + GEN_INSN(LDU32, value, maddr, NEW_CONST(I32, 0)); } break; } @@ -389,6 +389,10 @@ jit_compile_op_i64_store(JitCompContext *cc, uint32 align, uint32 offset, goto fail; } + if (jit_reg_is_const(value) && bytes < 8) { + value = NEW_CONST(I32, (int32)jit_cc_get_const_I64(cc, value)); + } + switch (bytes) { case 1: { diff --git a/core/iwasm/fast-jit/fe/jit_emit_numberic.c b/core/iwasm/fast-jit/fe/jit_emit_numberic.c index 83b660a0d..6027edc41 100644 --- a/core/iwasm/fast-jit/fe/jit_emit_numberic.c +++ b/core/iwasm/fast-jit/fe/jit_emit_numberic.c @@ -91,39 +91,197 @@ PUSH_FLOAT(res); \ } while (0) +static uint32 +clz32(uint32 type) +{ + uint32 num = 0; + if (type == 0) + return 32; + while (!(type & 0x80000000)) { + num++; + type <<= 1; + } + return num; +} + +static uint64 +clz64(uint64 type) +{ + uint32 num = 0; + if (type == 0) + return 64; + while (!(type & 0x8000000000000000LL)) { + num++; + type <<= 1; + } + return num; +} + +static uint32 +ctz32(uint32 type) +{ + uint32 num = 0; + if (type == 0) + return 32; + while (!(type & 1)) { + num++; + type >>= 1; + } + return num; +} + +static uint64 +ctz64(uint64 type) +{ + uint32 num = 0; + if (type == 0) + return 64; + while (!(type & 1)) { + num++; + type >>= 1; + } + return num; +} + +static uint32 +popcnt32(uint32 u) +{ + uint32 ret = 0; + while (u) { + u = (u & (u - 1)); + ret++; + } + return ret; +} + +static uint64 +popcnt64(uint64 u) +{ + uint32 ret = 0; + while (u) { + u = (u & (u - 1)); + ret++; + } + return ret; +} + bool jit_compile_op_i32_clz(JitCompContext *cc) { + JitReg value, res; + + POP_I32(value); + if (jit_reg_is_const(value)) { + uint32 i32 = jit_cc_get_const_I32(cc, value); + PUSH_I32(NEW_CONST(I32, clz32(i32))); + return true; + } + + res = jit_cc_new_reg_I32(cc); + GEN_INSN(CLZ, res, value); + PUSH_I32(res); + return true; +fail: return false; } bool jit_compile_op_i32_ctz(JitCompContext *cc) { + JitReg value, res = jit_cc_new_reg_I32(cc); + + POP_I32(value); + if (jit_reg_is_const(value)) { + uint32 i32 = jit_cc_get_const_I32(cc, value); + PUSH_I32(NEW_CONST(I32, ctz32(i32))); + return true; + } + + res = jit_cc_new_reg_I32(cc); + GEN_INSN(CTZ, res, value); + PUSH_I32(res); + return true; +fail: return false; } bool jit_compile_op_i32_popcnt(JitCompContext *cc) { + JitReg value, res; + + POP_I32(value); + if (jit_reg_is_const(value)) { + uint32 i32 = jit_cc_get_const_I32(cc, value); + PUSH_I32(NEW_CONST(I32, popcnt32(i32))); + return true; + } + + res = jit_cc_new_reg_I32(cc); + GEN_INSN(POPCNT, res, value); + PUSH_I32(res); + return true; +fail: return false; } bool jit_compile_op_i64_clz(JitCompContext *cc) { + JitReg value, res; + + POP_I64(value); + if (jit_reg_is_const(value)) { + uint64 i64 = jit_cc_get_const_I64(cc, value); + PUSH_I64(NEW_CONST(I64, clz64(i64))); + return true; + } + + res = jit_cc_new_reg_I64(cc); + GEN_INSN(CLZ, res, value); + PUSH_I64(res); + return true; +fail: return false; } bool jit_compile_op_i64_ctz(JitCompContext *cc) { + JitReg value, res; + + POP_I64(value); + if (jit_reg_is_const(value)) { + uint64 i64 = jit_cc_get_const_I64(cc, value); + PUSH_I64(NEW_CONST(I64, ctz64(i64))); + return true; + } + + res = jit_cc_new_reg_I64(cc); + GEN_INSN(CTZ, res, value); + PUSH_I64(res); + return true; +fail: return false; } bool jit_compile_op_i64_popcnt(JitCompContext *cc) { + JitReg value, res; + + POP_I64(value); + if (jit_reg_is_const(value)) { + uint64 i64 = jit_cc_get_const_I64(cc, value); + PUSH_I64(NEW_CONST(I64, popcnt64(i64))); + return true; + } + + res = jit_cc_new_reg_I64(cc); + GEN_INSN(POPCNT, res, value); + PUSH_I64(res); + return true; +fail: return false; } diff --git a/core/iwasm/fast-jit/fe/jit_emit_parametric.c b/core/iwasm/fast-jit/fe/jit_emit_parametric.c index 9d04ada5d..24dd9d264 100644 --- a/core/iwasm/fast-jit/fe/jit_emit_parametric.c +++ b/core/iwasm/fast-jit/fe/jit_emit_parametric.c @@ -30,7 +30,7 @@ pop_value_from_wasm_stack(JitCompContext *cc, bool is_32bit, JitReg *p_value, *p_type = jit_value->type; } if (p_value != NULL) { - *p_value = jit_value->value; + *p_value = jit_value->value->reg; } wasm_runtime_free(jit_value); diff --git a/core/iwasm/fast-jit/jit_ir.c b/core/iwasm/fast-jit/jit_ir.c index 195180b1f..476b1a3bb 100644 --- a/core/iwasm/fast-jit/jit_ir.c +++ b/core/iwasm/fast-jit/jit_ir.c @@ -1482,7 +1482,8 @@ jit_cc_pop_value(JitCompContext *cc, uint8 type, JitReg *p_value) break; } - bh_assert(value = jit_value->value); + bh_assert(cc->jit_frame->sp == jit_value->value); + bh_assert(value == jit_value->value->reg); *p_value = value; jit_free(jit_value); return true; @@ -1506,7 +1507,7 @@ jit_cc_push_value(JitCompContext *cc, uint8 type, JitReg value) bh_assert(value); jit_value->type = to_stack_value_type(type); - jit_value->value = value; + jit_value->value = cc->jit_frame->sp; jit_value_stack_push(&jit_block_stack_top(&cc->block_stack)->value_stack, jit_value); diff --git a/core/iwasm/fast-jit/jit_ir.def b/core/iwasm/fast-jit/jit_ir.def index 8f7ee238e..4bde69fc2 100644 --- a/core/iwasm/fast-jit/jit_ir.def +++ b/core/iwasm/fast-jit/jit_ir.def @@ -100,7 +100,10 @@ INSN(F64TOI64, Reg, 2, 1) INSN(F64TOF32, Reg, 2, 1) INSN(F64TOU32, Reg, 2, 1) -/* re-interpreter binary presentation. like *(integer*)&floating_point, and *(floating_point*)&integer */ +/** + * Re-interpret binary presentations: + * *(i32 *)&f32, *(i64 *)&f64, *(f32 *)&i32, *(f64 *)&i64 + */ INSN(I32CASTF32, Reg, 2, 1) INSN(I64CASTF64, Reg, 2, 1) INSN(F32CASTI32, Reg, 2, 1) @@ -127,6 +130,9 @@ INSN(AND, Reg, 3, 1) INSN(CMP, Reg, 3, 1) INSN(MAX, Reg, 3, 1) INSN(MIN, Reg, 3, 1) +INSN(CLZ, Reg, 2, 1) +INSN(CTZ, Reg, 2, 1) +INSN(POPCNT, Reg, 2, 1) /* Select instruction: */ INSN(SELECTEQ, Reg, 4, 1) diff --git a/core/iwasm/fast-jit/jit_ir.h b/core/iwasm/fast-jit/jit_ir.h index f76b8ce8a..39f61e748 100644 --- a/core/iwasm/fast-jit/jit_ir.h +++ b/core/iwasm/fast-jit/jit_ir.h @@ -855,6 +855,7 @@ typedef struct JitHardRegInfo { struct JitBlock; struct JitCompContext; +struct JitValueSlot; /** * Value in the WASM operation stack, each stack element @@ -863,7 +864,7 @@ struct JitCompContext; typedef struct JitValue { struct JitValue *next; struct JitValue *prev; - JitReg value; + struct JitValueSlot *value; /* VALUE_TYPE_I32/I64/F32/F64/VOID */ uint8 type; } JitValue;