Refine fast jit frontend: merge cmp and if/br_if

This commit is contained in:
Wenyong Huang 2022-07-05 18:09:25 +08:00
parent a9658c245f
commit 12fbaab1ef
5 changed files with 190 additions and 62 deletions

View File

@ -381,7 +381,7 @@ cmp_r_and_jmp_label(JitCompContext *cc, x86::Assembler &a,
bool fp_cmp = cc->last_cmp_on_fp;
bh_assert(!fp_cmp || (fp_cmp && (op == GES)));
bh_assert(!fp_cmp || (fp_cmp && (op == GTS || op == GES)));
switch (op) {
case EQ:
@ -396,7 +396,10 @@ cmp_r_and_jmp_label(JitCompContext *cc, x86::Assembler &a,
}
case GTS:
{
a.jg(imm);
if (fp_cmp)
a.ja(imm);
else
a.jg(imm);
break;
}
case LES:
@ -5149,8 +5152,7 @@ cmp_r_and_jmp_relative(JitCompContext *cc, x86::Assembler &a, COND_OP op,
int32 offset)
{
Imm target(INT32_MAX);
char *stream = (char *)a.code()->sectionById(0)->buffer().data()
+ a.code()->sectionById(0)->buffer().size();
char *stream;
bool fp_cmp = cc->last_cmp_on_fp;
bh_assert(!fp_cmp || (fp_cmp && (op == GTS || op == GES)));
@ -5223,8 +5225,14 @@ cmp_r_and_jmp_relative(JitCompContext *cc, x86::Assembler &a, COND_OP op,
}
}
/* The offset written by asmjit is always 0, we patch it again */
*(int32 *)(stream + 2) = offset;
JitErrorHandler *err_handler = (JitErrorHandler *)a.code()->errorHandler();
if (!err_handler->err) {
/* The offset written by asmjit is always 0, we patch it again */
stream = (char *)a.code()->sectionById(0)->buffer().data()
+ a.code()->sectionById(0)->buffer().size() - 6;
*(int32 *)(stream + 2) = offset;
}
return true;
}
@ -5302,24 +5310,30 @@ fail:
}
/* jmp to dst label */
#define JMP_TO_LABEL(label_dst, label_src) \
do { \
if (label_is_ahead(cc, label_dst, label_src)) { \
char *stream = (char *)a.code()->sectionById(0)->buffer().data() \
+ a.code()->sectionById(0)->buffer().size(); \
int32 _offset = label_offsets[label_dst] \
- a.code()->sectionById(0)->buffer().size(); \
Imm imm(INT32_MAX); \
a.jmp(imm); \
/* The offset written by asmjit is always 0, we patch it again, \
6 is the size of jmp instruciton */ \
*(int32 *)(stream + 2) = _offset - 6; \
} \
else { \
if (!jmp_from_label_to_label(a, jmp_info_list, label_dst, \
label_src)) \
GOTO_FAIL; \
} \
#define JMP_TO_LABEL(label_dst, label_src) \
do { \
if (label_is_ahead(cc, label_dst, label_src)) { \
JitErrorHandler *err_handler = \
(JitErrorHandler *)a.code()->errorHandler(); \
int32 _offset; \
char *stream; \
Imm imm(INT32_MAX); \
a.jmp(imm); \
if (!err_handler->err) { \
/* The offset written by asmjit is always 0, we patch it \
again, 6 is the size of jmp instruciton */ \
stream = (char *)a.code()->sectionById(0)->buffer().data() \
+ a.code()->sectionById(0)->buffer().size() - 6; \
_offset = label_offsets[label_dst] \
- a.code()->sectionById(0)->buffer().size(); \
*(int32 *)(stream + 2) = _offset; \
} \
} \
else { \
if (!jmp_from_label_to_label(a, jmp_info_list, label_dst, \
label_src)) \
GOTO_FAIL; \
} \
} while (0)
/**

View File

@ -164,9 +164,40 @@ fail:
return false;
}
static bool
jit_reg_is_i32_const(JitCompContext *cc, JitReg reg, int32 val)
{
return (jit_reg_kind(reg) == JIT_REG_KIND_I32 && jit_reg_is_const(reg)
&& jit_cc_get_const_I32(cc, reg) == val)
? true
: false;
}
/**
* get the last two insns:
* CMP cmp_reg, r0, r1
* SELECTcc r2, cmp_reg, 1, 0
*/
static void
get_last_cmp_and_selectcc(JitCompContext *cc, JitReg cond, JitInsn **p_insn_cmp,
JitInsn **p_insn_select)
{
JitInsn *insn = jit_basic_block_last_insn(cc->cur_basic_block);
if (insn && insn->prev && insn->prev->opcode == JIT_OP_CMP
&& insn->opcode >= JIT_OP_SELECTEQ && insn->opcode <= JIT_OP_SELECTLEU
&& *jit_insn_opnd(insn, 0) == cond
&& jit_reg_is_i32_const(cc, *jit_insn_opnd(insn, 2), 1)
&& jit_reg_is_i32_const(cc, *jit_insn_opnd(insn, 3), 0)) {
*p_insn_cmp = insn->prev;
*p_insn_select = insn;
}
}
static bool
push_jit_block_to_stack_and_pass_params(JitCompContext *cc, JitBlock *block,
JitBasicBlock *basic_block, JitReg cond)
JitBasicBlock *basic_block, JitReg cond,
bool merge_cmp_and_if)
{
JitFrame *jit_frame = cc->jit_frame;
JitValue *value_list_head = NULL, *value_list_end = NULL, *jit_value;
@ -205,6 +236,12 @@ push_jit_block_to_stack_and_pass_params(JitCompContext *cc, JitBlock *block,
/* Continue to translate current block */
}
else {
JitInsn *insn_select = NULL, *insn_cmp = NULL;
if (merge_cmp_and_if) {
get_last_cmp_and_selectcc(cc, cond, &insn_cmp, &insn_select);
}
/* Commit register values to locals and stacks */
gen_commit_values(jit_frame, jit_frame->lp, jit_frame->sp);
@ -227,11 +264,26 @@ push_jit_block_to_stack_and_pass_params(JitCompContext *cc, JitBlock *block,
}
else {
/* IF block with condition br insn */
if (!GEN_INSN(CMP, cc->cmp_reg, cond, NEW_CONST(I32, 0))
|| !(insn = GEN_INSN(BNE, cc->cmp_reg,
jit_basic_block_label(basic_block), 0))) {
jit_set_last_error(cc, "generate cond br failed");
goto fail;
if (insn_select && insn_cmp) {
/* Change `CMP + SELECTcc` into `CMP + Bcc` */
if (!(insn = GEN_INSN(BEQ, cc->cmp_reg,
jit_basic_block_label(basic_block), 0))) {
jit_set_last_error(cc, "generate cond br failed");
goto fail;
}
insn->opcode =
JIT_OP_BEQ + (insn_select->opcode - JIT_OP_SELECTEQ);
jit_insn_unlink(insn_select);
jit_insn_delete(insn_select);
}
else {
if (!GEN_INSN(CMP, cc->cmp_reg, cond, NEW_CONST(I32, 0))
|| !(insn =
GEN_INSN(BNE, cc->cmp_reg,
jit_basic_block_label(basic_block), 0))) {
jit_set_last_error(cc, "generate cond br failed");
goto fail;
}
}
/* Don't create else basic block or end basic block now, just
@ -449,7 +501,9 @@ handle_op_end(JitCompContext *cc, uint8 **p_frame_ip, bool is_block_polymorphic)
incoming_insn = block->incoming_insns_for_end_bb;
while (incoming_insn) {
insn = incoming_insn->insn;
bh_assert(insn->opcode == JIT_OP_JMP || insn->opcode == JIT_OP_BNE);
bh_assert(
insn->opcode == JIT_OP_JMP
|| (insn->opcode >= JIT_OP_BEQ && insn->opcode <= JIT_OP_BLEU));
*(jit_insn_opnd(insn, incoming_insn->opnd_idx)) =
jit_basic_block_label(block->basic_block_end);
incoming_insn = incoming_insn->next;
@ -601,7 +655,7 @@ bool
jit_compile_op_block(JitCompContext *cc, uint8 **p_frame_ip,
uint8 *frame_ip_end, uint32 label_type, uint32 param_count,
uint8 *param_types, uint32 result_count,
uint8 *result_types)
uint8 *result_types, bool merge_cmp_and_if)
{
BlockAddr block_addr_cache[BLOCK_ADDR_CACHE_SIZE][BLOCK_ADDR_CONFLICT_SIZE];
JitBlock *block;
@ -656,8 +710,8 @@ jit_compile_op_block(JitCompContext *cc, uint8 **p_frame_ip,
if (label_type == LABEL_TYPE_BLOCK) {
/* Push the new jit block to block stack and continue to
translate current basic block */
if (!push_jit_block_to_stack_and_pass_params(cc, block,
cc->cur_basic_block, 0))
if (!push_jit_block_to_stack_and_pass_params(
cc, block, cc->cur_basic_block, 0, false))
goto fail;
}
else if (label_type == LABEL_TYPE_LOOP) {
@ -667,7 +721,7 @@ jit_compile_op_block(JitCompContext *cc, uint8 **p_frame_ip,
/* Push the new jit block to block stack and continue to
translate the new basic block */
if (!push_jit_block_to_stack_and_pass_params(
cc, block, block->basic_block_entry, 0))
cc, block, block->basic_block_entry, 0, false))
goto fail;
}
else if (label_type == LABEL_TYPE_IF) {
@ -682,7 +736,8 @@ jit_compile_op_block(JitCompContext *cc, uint8 **p_frame_ip,
SET_BB_BEGIN_BCIP(block->basic_block_entry, *p_frame_ip);
if (!push_jit_block_to_stack_and_pass_params(
cc, block, block->basic_block_entry, value))
cc, block, block->basic_block_entry, value,
merge_cmp_and_if))
goto fail;
}
else {
@ -691,7 +746,7 @@ jit_compile_op_block(JitCompContext *cc, uint8 **p_frame_ip,
BASIC_BLOCK if cannot be reached, we treat it same as
LABEL_TYPE_BLOCK and start to translate if branch */
if (!push_jit_block_to_stack_and_pass_params(
cc, block, cc->cur_basic_block, 0))
cc, block, cc->cur_basic_block, 0, false))
goto fail;
}
else {
@ -700,7 +755,7 @@ jit_compile_op_block(JitCompContext *cc, uint8 **p_frame_ip,
BASIC_BLOCK if cannot be reached, we treat it same as
LABEL_TYPE_BLOCK and start to translate else branch */
if (!push_jit_block_to_stack_and_pass_params(
cc, block, cc->cur_basic_block, 0))
cc, block, cc->cur_basic_block, 0, false))
goto fail;
*p_frame_ip = else_addr + 1;
}
@ -833,14 +888,15 @@ jit_compile_op_br(JitCompContext *cc, uint32 br_depth, uint8 **p_frame_ip)
}
bool
jit_compile_op_br_if(JitCompContext *cc, uint32 br_depth, uint8 **p_frame_ip)
jit_compile_op_br_if(JitCompContext *cc, uint32 br_depth,
bool merge_cmp_and_br_if, uint8 **p_frame_ip)
{
JitFrame *jit_frame;
JitBlock *block_dst;
JitReg cond;
JitBasicBlock *cur_basic_block, *if_basic_block = NULL;
JitValueSlot *frame_sp_src;
JitInsn *insn;
JitInsn *insn, *insn_select = NULL, *insn_cmp = NULL;
if (!(block_dst = get_target_block(cc, br_depth))) {
return false;
@ -849,6 +905,10 @@ jit_compile_op_br_if(JitCompContext *cc, uint32 br_depth, uint8 **p_frame_ip)
/* append IF to current basic block */
POP_I32(cond);
if (merge_cmp_and_br_if) {
get_last_cmp_and_selectcc(cc, cond, &insn_cmp, &insn_select);
}
jit_frame = cc->jit_frame;
cur_basic_block = cc->cur_basic_block;
gen_commit_values(jit_frame, jit_frame->lp, jit_frame->sp);
@ -864,19 +924,25 @@ jit_compile_op_br_if(JitCompContext *cc, uint32 br_depth, uint8 **p_frame_ip)
block_dst->result_count);
}
if (!(insn_select && insn_cmp)) {
if (!GEN_INSN(CMP, cc->cmp_reg, cond, NEW_CONST(I32, 0))) {
jit_set_last_error(cc, "generate cmp insn failed");
goto fail;
}
}
if (block_dst->frame_sp_begin == frame_sp_src) {
if (block_dst->label_type == LABEL_TYPE_LOOP) {
if (!GEN_INSN(CMP, cc->cmp_reg, cond, NEW_CONST(I32, 0))
|| !GEN_INSN(
BNE, cc->cmp_reg,
jit_basic_block_label(block_dst->basic_block_entry), 0)) {
if (!(insn = GEN_INSN(
BNE, cc->cmp_reg,
jit_basic_block_label(block_dst->basic_block_entry),
0))) {
jit_set_last_error(cc, "generate bne insn failed");
goto fail;
}
}
else {
if (!GEN_INSN(CMP, cc->cmp_reg, cond, NEW_CONST(I32, 0))
|| !(insn = GEN_INSN(BNE, cc->cmp_reg, 0, 0))) {
if (!(insn = GEN_INSN(BNE, cc->cmp_reg, 0, 0))) {
jit_set_last_error(cc, "generate bne insn failed");
goto fail;
}
@ -885,16 +951,27 @@ jit_compile_op_br_if(JitCompContext *cc, uint32 br_depth, uint8 **p_frame_ip)
goto fail;
}
}
if (insn_select && insn_cmp) {
/* Change `CMP + SELECTcc` into `CMP + Bcc` */
insn->opcode = JIT_OP_BEQ + (insn_select->opcode - JIT_OP_SELECTEQ);
jit_insn_unlink(insn_select);
jit_insn_delete(insn_select);
}
return true;
}
CREATE_BASIC_BLOCK(if_basic_block);
if (!GEN_INSN(CMP, cc->cmp_reg, cond, NEW_CONST(I32, 0))
|| !GEN_INSN(BNE, cc->cmp_reg, jit_basic_block_label(if_basic_block),
0)) {
if (!(insn = GEN_INSN(BNE, cc->cmp_reg,
jit_basic_block_label(if_basic_block), 0))) {
jit_set_last_error(cc, "generate bne insn failed");
goto fail;
}
if (insn_select && insn_cmp) {
/* Change `CMP + SELECTcc` into `CMP + Bcc` */
insn->opcode = JIT_OP_BEQ + (insn_select->opcode - JIT_OP_SELECTEQ);
jit_insn_unlink(insn_select);
jit_insn_delete(insn_select);
}
SET_BUILDER_POS(if_basic_block);
SET_BB_BEGIN_BCIP(if_basic_block, *p_frame_ip - 1);

View File

@ -16,7 +16,7 @@ bool
jit_compile_op_block(JitCompContext *cc, uint8 **p_frame_ip,
uint8 *frame_ip_end, uint32 label_type, uint32 param_count,
uint8 *param_types, uint32 result_count,
uint8 *result_types);
uint8 *result_types, bool merge_cmp_and_if);
bool
jit_compile_op_else(JitCompContext *cc, uint8 **p_frame_ip);
@ -28,7 +28,8 @@ bool
jit_compile_op_br(JitCompContext *cc, uint32 br_depth, uint8 **p_frame_ip);
bool
jit_compile_op_br_if(JitCompContext *cc, uint32 br_depth, uint8 **p_frame_ip);
jit_compile_op_br_if(JitCompContext *cc, uint32 br_depth,
bool merge_cmp_and_br_if, uint8 **p_frame_ip);
bool
jit_compile_op_br_table(JitCompContext *cc, uint32 *br_depths, uint32 br_count,

View File

@ -1010,6 +1010,7 @@ jit_compile_func(JitCompContext *cc)
uint32 br_depth, *br_depths, br_count;
uint32 func_idx, type_idx, mem_idx, local_idx, global_idx, i;
uint32 bytes = 4, align, offset;
bool merge_cmp_and_if = false, merge_cmp_and_br_if = false;
bool sign = true;
int32 i32_const;
int64 i64_const;
@ -1069,8 +1070,11 @@ jit_compile_func(JitCompContext *cc)
if (!jit_compile_op_block(
cc, &frame_ip, frame_ip_end,
(uint32)(LABEL_TYPE_BLOCK + opcode - WASM_OP_BLOCK),
param_count, param_types, result_count, result_types))
param_count, param_types, result_count, result_types,
merge_cmp_and_if))
return false;
/* Clear flag */
merge_cmp_and_if = false;
break;
}
case EXT_OP_BLOCK:
@ -1086,8 +1090,11 @@ jit_compile_func(JitCompContext *cc)
if (!jit_compile_op_block(
cc, &frame_ip, frame_ip_end,
(uint32)(LABEL_TYPE_BLOCK + opcode - EXT_OP_BLOCK),
param_count, param_types, result_count, result_types))
param_count, param_types, result_count, result_types,
merge_cmp_and_if))
return false;
/* Clear flag */
merge_cmp_and_if = false;
break;
}
@ -1109,8 +1116,11 @@ jit_compile_func(JitCompContext *cc)
case WASM_OP_BR_IF:
read_leb_uint32(frame_ip, frame_ip_end, br_depth);
if (!jit_compile_op_br_if(cc, br_depth, &frame_ip))
if (!jit_compile_op_br_if(cc, br_depth, merge_cmp_and_br_if,
&frame_ip))
return false;
/* Clear flag */
merge_cmp_and_br_if = false;
break;
case WASM_OP_BR_TABLE:
@ -1506,6 +1516,13 @@ jit_compile_func(JitCompContext *cc)
if (!jit_compile_op_i32_compare(cc, INT_EQZ + opcode
- WASM_OP_I32_EQZ))
return false;
if (frame_ip < frame_ip_end) {
/* Merge `CMP, SELECTcc, CMP, BNE` insns into `CMP, Bcc` */
if (*frame_ip == WASM_OP_IF || *frame_ip == EXT_OP_IF)
merge_cmp_and_if = true;
if (*frame_ip == WASM_OP_BR_IF)
merge_cmp_and_br_if = true;
}
break;
case WASM_OP_I64_EQZ:
@ -1522,6 +1539,13 @@ jit_compile_func(JitCompContext *cc)
if (!jit_compile_op_i64_compare(cc, INT_EQZ + opcode
- WASM_OP_I64_EQZ))
return false;
if (frame_ip < frame_ip_end) {
/* Merge `CMP, SELECTcc, CMP, BNE` insns into `CMP, Bcc` */
if (*frame_ip == WASM_OP_IF || *frame_ip == EXT_OP_IF)
merge_cmp_and_if = true;
if (*frame_ip == WASM_OP_BR_IF)
merge_cmp_and_br_if = true;
}
break;
case WASM_OP_F32_EQ:
@ -1533,6 +1557,13 @@ jit_compile_func(JitCompContext *cc)
if (!jit_compile_op_f32_compare(cc, FLOAT_EQ + opcode
- WASM_OP_F32_EQ))
return false;
if (frame_ip < frame_ip_end) {
/* Merge `CMP, SELECTcc, CMP, BNE` insns into `CMP, Bcc` */
if (*frame_ip == WASM_OP_IF || *frame_ip == EXT_OP_IF)
merge_cmp_and_if = true;
if (*frame_ip == WASM_OP_BR_IF)
merge_cmp_and_br_if = true;
}
break;
case WASM_OP_F64_EQ:
@ -1544,6 +1575,13 @@ jit_compile_func(JitCompContext *cc)
if (!jit_compile_op_f64_compare(cc, FLOAT_EQ + opcode
- WASM_OP_F64_EQ))
return false;
if (frame_ip < frame_ip_end) {
/* Merge `CMP, SELECTcc, CMP, BNE` insns into `CMP, Bcc` */
if (*frame_ip == WASM_OP_IF || *frame_ip == EXT_OP_IF)
merge_cmp_and_if = true;
if (*frame_ip == WASM_OP_BR_IF)
merge_cmp_and_br_if = true;
}
break;
case WASM_OP_I32_CLZ:

View File

@ -339,7 +339,7 @@ fail:
}
/**
* Check whether the gien register is an allocation candidate, which
* Check whether the given register is an allocation candidate, which
* must be a variable register that is not fixed hard register.
*
* @param cc the compilation context
@ -359,10 +359,8 @@ static void
check_vreg_definition(RegallocContext *rc, JitInsn *insn)
{
JitRegVec regvec = jit_insn_opnd_regs(insn);
unsigned i;
JitReg *regp;
unsigned first_use = jit_insn_opnd_first_use(insn);
JitReg reg_defined;
JitReg *regp, reg_defined = 0;
unsigned i, first_use = jit_insn_opnd_first_use(insn);
/* check if there is the definition of an vr before its references */
JIT_REG_VEC_FOREACH(regvec, i, regp)
@ -372,7 +370,7 @@ check_vreg_definition(RegallocContext *rc, JitInsn *insn)
if (!is_alloc_candidate(rc->cc, *regp))
continue;
/*a strong assumption that there is only on defined reg*/
/* a strong assumption that there is only one defined reg */
if (i < first_use) {
reg_defined = *regp;
continue;
@ -380,8 +378,8 @@ check_vreg_definition(RegallocContext *rc, JitInsn *insn)
/**
* both definition and references are in one instruction,
* like MOV i3,i3
**/
* like MOV i3, i3
*/
if (reg_defined == *regp)
continue;