diff --git a/core/iwasm/fast-jit/cg/x86-64/jit_codegen_x86_64.cpp b/core/iwasm/fast-jit/cg/x86-64/jit_codegen_x86_64.cpp index fce39d8cc..811ed9673 100644 --- a/core/iwasm/fast-jit/cg/x86-64/jit_codegen_x86_64.cpp +++ b/core/iwasm/fast-jit/cg/x86-64/jit_codegen_x86_64.cpp @@ -3014,7 +3014,6 @@ alu_r_imm_to_r_f32(x86::Assembler &a, ALU_OP op, int32 reg_no_dst, mov_imm_to_m(a, cache, imm, 4); mov_r_to_r_f32(a, reg_no_dst, reg_no1_src); - return alu_r_m_float(a, op, reg_no_dst, cache, true); } @@ -3033,40 +3032,52 @@ static bool alu_r_r_to_r_f32(x86::Assembler &a, ALU_OP op, int32 reg_no_dst, int32 reg_no1_src, int32 reg_no2_src) { + bool store_result = false; + + /** + * - op r0,r0,r1. do nothing since instructions always store results in + * the first register + * + * - op r1,r0,r1. use FREE_REG to cache and replace r0, and then store + * results in r1 + * + * - op r0,r1,r2. use r0 to cache and replace r1, and accept the result + * naturally + **/ + if (reg_no_dst == reg_no2_src) { + store_result = true; + reg_no_dst = REG_F32_FREE_IDX; + } + mov_r_to_r_f32(a, reg_no_dst, reg_no1_src); + switch (op) { case ADD: { - mov_r_to_r_f32(a, reg_no_dst, reg_no1_src); a.addss(regs_float[reg_no_dst], regs_float[reg_no2_src]); break; } case SUB: { - mov_r_to_r_f32(a, reg_no_dst, reg_no1_src); a.subss(regs_float[reg_no_dst], regs_float[reg_no2_src]); break; } case MUL: { - mov_r_to_r_f32(a, reg_no_dst, reg_no1_src); a.mulss(regs_float[reg_no_dst], regs_float[reg_no2_src]); break; } case DIV_S: { - mov_r_to_r_f32(a, reg_no_dst, reg_no1_src); a.divss(regs_float[reg_no_dst], regs_float[reg_no2_src]); break; } case MAX: { - mov_r_to_r_f32(a, reg_no_dst, reg_no1_src); a.maxss(regs_float[reg_no_dst], regs_float[reg_no2_src]); break; } case MIN: { - mov_r_to_r_f32(a, reg_no_dst, reg_no1_src); a.minss(regs_float[reg_no_dst], regs_float[reg_no2_src]); break; } @@ -3076,6 +3087,10 @@ alu_r_r_to_r_f32(x86::Assembler &a, ALU_OP op, int32 reg_no_dst, return false; } } + + if (store_result) + mov_r_to_r_f32(a, reg_no2_src, REG_F32_FREE_IDX); + return true; } @@ -3188,7 +3203,6 @@ alu_r_imm_to_r_f64(x86::Assembler &a, ALU_OP op, int32 reg_no_dst, mov_imm_to_m(a, cache, imm, 8); mov_r_to_r_f64(a, reg_no_dst, reg_no1_src); - return alu_r_m_float(a, op, reg_no_dst, cache, false); } @@ -3207,40 +3221,52 @@ static bool alu_r_r_to_r_f64(x86::Assembler &a, ALU_OP op, int32 reg_no_dst, int32 reg_no1_src, int32 reg_no2_src) { + bool store_result = false; + + /** + * - op r0,r0,r1. do nothing since instructions always store results in + * the first register + * + * - op r1,r0,r1. use FREE_REG to cache and replace r0, and then store + * results in r1 + * + * - op r0,r1,r2. use r0 to cache and replace r1, and accept the result + * naturally + **/ + if (reg_no_dst == reg_no2_src) { + store_result = true; + reg_no_dst = REG_F64_FREE_IDX; + } + mov_r_to_r_f64(a, reg_no_dst, reg_no1_src); + switch (op) { case ADD: { - mov_r_to_r_f64(a, reg_no_dst, reg_no1_src); a.addsd(regs_float[reg_no_dst], regs_float[reg_no2_src]); break; } case SUB: { - mov_r_to_r_f64(a, reg_no_dst, reg_no1_src); a.subsd(regs_float[reg_no_dst], regs_float[reg_no2_src]); break; } case MUL: { - mov_r_to_r_f64(a, reg_no_dst, reg_no1_src); a.mulsd(regs_float[reg_no_dst], regs_float[reg_no2_src]); break; } case DIV_S: { - mov_r_to_r_f64(a, reg_no_dst, reg_no1_src); a.divsd(regs_float[reg_no_dst], regs_float[reg_no2_src]); break; } case MAX: { - mov_r_to_r_f64(a, reg_no_dst, reg_no1_src); a.maxsd(regs_float[reg_no_dst], regs_float[reg_no2_src]); break; } case MIN: { - mov_r_to_r_f64(a, reg_no_dst, reg_no1_src); a.minsd(regs_float[reg_no_dst], regs_float[reg_no2_src]); break; } @@ -3250,6 +3276,10 @@ alu_r_r_to_r_f64(x86::Assembler &a, ALU_OP op, int32 reg_no_dst, return false; } } + + if (store_result) + mov_r_to_r_f64(a, reg_no2_src, REG_F64_FREE_IDX); + return true; } diff --git a/core/iwasm/fast-jit/fe/jit_emit_numberic.c b/core/iwasm/fast-jit/fe/jit_emit_numberic.c index d57c236ca..3290fa9c7 100644 --- a/core/iwasm/fast-jit/fe/jit_emit_numberic.c +++ b/core/iwasm/fast-jit/fe/jit_emit_numberic.c @@ -1379,9 +1379,11 @@ compile_op_float_math(JitCompContext *cc, FloatMath math_op, bool is_f32) switch (math_op) { case FLOAT_ABS: + /* TODO: andps 0x7fffffffffffffff */ func = is_f32 ? (void *)fabsf : (void *)fabs; break; case FLOAT_NEG: + /* TODO: xorps 0x8000000000000000 */ func = is_f32 ? (void *)negf : (void *)neg; break; case FLOAT_CEIL: