From 94d6da28b7200727d59802cd18e17c03c1ca5aec Mon Sep 17 00:00:00 2001 From: "liang.he" Date: Tue, 19 Apr 2022 16:24:37 +0800 Subject: [PATCH] Emit JIT IR for wasm opcode SHL/SHRU/SHRS (#1097) --- .../fast-jit/cg/x86-64/jit_codegen_x86_64.cpp | 171 +++++++++++++++++- core/iwasm/fast-jit/fe/jit_emit_numberic.c | 116 +++++++++++- 2 files changed, 284 insertions(+), 3 deletions(-) diff --git a/core/iwasm/fast-jit/cg/x86-64/jit_codegen_x86_64.cpp b/core/iwasm/fast-jit/cg/x86-64/jit_codegen_x86_64.cpp index 21cf19e75..046f9977a 100644 --- a/core/iwasm/fast-jit/cg/x86-64/jit_codegen_x86_64.cpp +++ b/core/iwasm/fast-jit/cg/x86-64/jit_codegen_x86_64.cpp @@ -2823,6 +2823,32 @@ static bool shift_imm_imm_to_r_i32(x86::Assembler &a, SHIFT_OP op, int32 reg_no_dst, int32 data1_src, int32 data2_src) { + int32 data; + switch (op) { + case SHL: + { + data = data1_src << data2_src; + break; + } + case SHRS: + { + data = data1_src >> data2_src; + break; + } + case SHRU: + { + data = ((uint32)data1_src) >> data2_src; + break; + } + default: + { + bh_assert(0); + goto fail; + } + } + + return mov_imm_to_r_i32(a, reg_no_dst, data); +fail: return false; } @@ -2861,6 +2887,34 @@ static bool shift_r_imm_to_r_i32(x86::Assembler &a, SHIFT_OP op, int32 reg_no_dst, int32 reg_no1_src, int32 data2_src) { + /* SHL/SHA/SHR r/m32, imm8 */ + Imm imm((uint8)data2_src); + + switch (op) { + case SHL: + { + a.shl(regs_i32[reg_no1_src], imm); + break; + } + case SHRS: + { + a.sar(regs_i32[reg_no1_src], imm); + break; + } + case SHRU: + { + a.shr(regs_i32[reg_no1_src], imm); + break; + } + default: + { + bh_assert(0); + goto fail; + } + } + + return mov_r_to_r_i32(a, reg_no_dst, reg_no1_src); +fail: return false; } @@ -2879,6 +2933,34 @@ static bool shift_r_r_to_r_i32(x86::Assembler &a, SHIFT_OP op, int32 reg_no_dst, int32 reg_no1_src, int32 reg_no2_src) { + /* should be CL */ + bh_assert(reg_no2_src == REG_ECX_IDX); + + switch (op) { + case SHL: + { + a.shl(regs_i32[reg_no1_src], x86::cl); + break; + } + case SHRS: + { + a.sar(regs_i32[reg_no1_src], x86::cl); + break; + } + case SHRU: + { + a.shr(regs_i32[reg_no1_src], x86::cl); + break; + } + default: + { + bh_assert(0); + goto fail; + } + } + + return mov_r_to_r_i32(a, reg_no_dst, reg_no1_src); +fail: return false; } @@ -2897,6 +2979,33 @@ static bool shift_imm_imm_to_r_i64(x86::Assembler &a, SHIFT_OP op, int32 reg_no_dst, int64 data1_src, int64 data2_src) { + int64 data; + + switch (op) { + case SHL: + { + data = data1_src << data2_src; + break; + } + case SHRS: + { + data = data1_src >> data2_src; + break; + } + case SHRU: + { + data = ((uint64)data1_src) >> data2_src; + break; + } + default: + { + bh_assert(0); + goto fail; + } + } + + return mov_imm_to_r_i64(a, reg_no_dst, data); +fail: return false; } @@ -2935,6 +3044,34 @@ static bool shift_r_imm_to_r_i64(x86::Assembler &a, SHIFT_OP op, int32 reg_no_dst, int32 reg_no1_src, int64 data2_src) { + /* SHL/SHA/SHR r/m64, imm8 */ + Imm imm((uint8)data2_src); + + switch (op) { + case SHL: + { + a.shl(regs_i64[reg_no1_src], imm); + break; + } + case SHRS: + { + a.sar(regs_i64[reg_no1_src], imm); + break; + } + case SHRU: + { + a.shr(regs_i64[reg_no1_src], imm); + break; + } + default: + { + bh_assert(0); + goto fail; + } + } + + return mov_r_to_r_i64(a, reg_no_dst, reg_no1_src); +fail: return false; } @@ -2953,6 +3090,34 @@ static bool shift_r_r_to_r_i64(x86::Assembler &a, SHIFT_OP op, int32 reg_no_dst, int32 reg_no1_src, int32 reg_no2_src) { + /* should be CL */ + bh_assert(reg_no2_src == REG_ECX_IDX); + + switch (op) { + case SHL: + { + a.shl(regs_i64[reg_no1_src], x86::cl); + break; + } + case SHRS: + { + a.sar(regs_i64[reg_no1_src], x86::cl); + break; + } + case SHRU: + { + a.shr(regs_i64[reg_no1_src], x86::cl); + break; + } + default: + { + bh_assert(0); + goto fail; + } + } + + return mov_r_to_r_i64(a, reg_no_dst, reg_no1_src); +fail: return false; } @@ -3774,7 +3939,7 @@ fail: bool _ret = false; \ \ CHECK_EQKIND(r0, r1); \ - CHECK_KIND(r2, JIT_REG_KIND_I32); \ + CHECK_KIND(r2, JIT_REG_KIND_##kind); \ memset(&data1, 0, sizeof(Type)); \ memset(&data2, 0, sizeof(Type)); \ \ @@ -5127,10 +5292,14 @@ jit_codegen_get_hreg_by_name(const char *name) { if (strcmp(name, "eax") == 0) return jit_reg_new(JIT_REG_KIND_I32, REG_EAX_IDX); + else if (strcmp(name, "ecx") == 0) + return jit_reg_new(JIT_REG_KIND_I32, REG_ECX_IDX); else if (strcmp(name, "edx") == 0) return jit_reg_new(JIT_REG_KIND_I32, REG_EDX_IDX); else if (strcmp(name, "rax") == 0) return jit_reg_new(JIT_REG_KIND_I64, REG_RAX_IDX); + else if (strcmp(name, "rcx") == 0) + return jit_reg_new(JIT_REG_KIND_I64, REG_RCX_IDX); else if (strcmp(name, "rdx") == 0) return jit_reg_new(JIT_REG_KIND_I64, REG_RDX_IDX); diff --git a/core/iwasm/fast-jit/fe/jit_emit_numberic.c b/core/iwasm/fast-jit/fe/jit_emit_numberic.c index 1f5fba80b..ea12900ab 100644 --- a/core/iwasm/fast-jit/fe/jit_emit_numberic.c +++ b/core/iwasm/fast-jit/fe/jit_emit_numberic.c @@ -472,16 +472,128 @@ jit_compile_op_i64_bitwise(JitCompContext *cc, IntBitwise bitwise_op) return false; } +static bool +compile_int_shift(JitCompContext *cc, IntShift shift_op, bool is_i32) +{ +#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64) + JitReg ecx_hreg = jit_codegen_get_hreg_by_name("ecx"); + JitReg rcx_hreg = jit_codegen_get_hreg_by_name("rcx"); +#endif + JitReg left, right, mod_right, res; + + POP_INT(right); + POP_INT(left); + + /* right modulo N */ + if (jit_reg_is_const(right)) { + if (is_i32) { + int32 right_value = jit_cc_get_const_I32(cc, right); + right_value = right_value & 0x1f; + if (0 == right_value) { + res = left; + goto shortcut; + } + else { + mod_right = NEW_CONST(I32, right_value); + } + } + else { + int64 right_value = jit_cc_get_const_I64(cc, right); + right_value = right_value & 0x3f; + if (0 == right_value) { + res = left; + goto shortcut; + } + else { + mod_right = NEW_CONST(I64, right_value); + } + } + } + else { + if (is_i32) { + mod_right = jit_cc_new_reg_I32(cc); + GEN_INSN(AND, mod_right, right, NEW_CONST(I32, 0x1f)); + } + else { + mod_right = jit_cc_new_reg_I64(cc); + GEN_INSN(AND, mod_right, right, NEW_CONST(I64, 0x3f)); + } + } + + /* do shift */ + if (is_i32) { + res = jit_cc_new_reg_I32(cc); +#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64) + GEN_INSN(MOV, ecx_hreg, mod_right); +#endif + } + else { + res = jit_cc_new_reg_I64(cc); +#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64) + GEN_INSN(MOV, rcx_hreg, mod_right); +#endif + } + + switch (shift_op) { + case INT_SHL: + { +#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64) + GEN_INSN(SHL, res, left, is_i32 ? ecx_hreg : rcx_hreg); +#else + GEN_INSN(SHL, res, left, mod_right); +#endif + break; + } + case INT_SHR_S: + { +#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64) + GEN_INSN(SHRS, res, left, is_i32 ? ecx_hreg : rcx_hreg); +#else + GEN_INSN(SHRS, res, left, mod_right); +#endif + break; + } + case INT_SHR_U: + { +#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64) + GEN_INSN(SHRU, res, left, is_i32 ? ecx_hreg : rcx_hreg); +#else + GEN_INSN(SHRU, res, left, mod_right); +#endif + break; + } + default: + { + bh_assert(0); + goto fail; + } + } + + /** + * Just to indicate that ecx is used, register allocator cannot spill + * it out. Especially when rcx is ued. + */ +#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64) + GEN_INSN(MOV, ecx_hreg, ecx_hreg); +#endif + +shortcut: + PUSH_INT(res); + return true; +fail: + return false; +} + bool jit_compile_op_i32_shift(JitCompContext *cc, IntShift shift_op) { - return false; + return compile_int_shift(cc, shift_op, true); } bool jit_compile_op_i64_shift(JitCompContext *cc, IntShift shift_op) { - return false; + return compile_int_shift(cc, shift_op, false); } bool