Fix splat opcodes, add V128 handling in preserve_referenced_local and reserve_block_ret

This commit is contained in:
James Marsh 2025-01-21 18:23:58 +00:00 committed by Marcin Kolny
parent 474acd72e3
commit b2804c004f
2 changed files with 136 additions and 36 deletions

View File

@ -47,7 +47,7 @@ typedef float64 CellType_F64;
&& (app_addr) <= shared_heap_end_off - bytes + 1) && (app_addr) <= shared_heap_end_off - bytes + 1)
#define shared_heap_addr_app_to_native(app_addr, native_addr) \ #define shared_heap_addr_app_to_native(app_addr, native_addr) \
native_addr = shared_heap_base_addr + ((app_addr)-shared_heap_start_off) native_addr = shared_heap_base_addr + ((app_addr) - shared_heap_start_off)
#define CHECK_SHARED_HEAP_OVERFLOW(app_addr, bytes, native_addr) \ #define CHECK_SHARED_HEAP_OVERFLOW(app_addr, bytes, native_addr) \
if (app_addr_in_shared_heap(app_addr, bytes)) \ if (app_addr_in_shared_heap(app_addr, bytes)) \
@ -1793,7 +1793,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
else else
cur_func_type = cur_func->u.func->func_type; cur_func_type = cur_func->u.func->func_type;
/* clang-format off */ /* clang-format off */
#if WASM_ENABLE_GC == 0 #if WASM_ENABLE_GC == 0
if (cur_type != cur_func_type) { if (cur_type != cur_func_type) {
wasm_set_exception(module, "indirect call type mismatch"); wasm_set_exception(module, "indirect call type mismatch");
@ -5923,12 +5923,11 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
uint32 offset, addr; uint32 offset, addr;
offset = read_uint32(frame_ip); offset = read_uint32(frame_ip);
V128 data = POP_V128(); V128 data = POP_V128();
addr = POP_I32(); int32 base = POP_I32();
offset += base;
addr = GET_OPERAND(uint32, I32, 0);
V128 data; CHECK_MEMORY_OVERFLOW(32);
data = POP_V128();
CHECK_MEMORY_OVERFLOW(16);
STORE_V128(maddr, data); STORE_V128(maddr, data);
break; break;
} }
@ -5948,14 +5947,14 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
case SIMD_v8x16_shuffle: case SIMD_v8x16_shuffle:
{ {
V128 indices; V128 indices;
V128 v2 = POP_V128();
V128 v1 = POP_V128();
addr_ret = GET_OFFSET();
bh_memcpy_s(&indices, sizeof(V128), frame_ip, bh_memcpy_s(&indices, sizeof(V128), frame_ip,
sizeof(V128)); sizeof(V128));
frame_ip += sizeof(V128); frame_ip += sizeof(V128);
V128 v2 = POP_V128();
V128 v1 = POP_V128();
addr_ret = GET_OFFSET();
V128 result; V128 result;
for (int i = 0; i < 16; i++) { for (int i = 0; i < 16; i++) {
uint8_t index = indices.i8x16[i]; uint8_t index = indices.i8x16[i];
@ -5983,6 +5982,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
SIMDE_V128_TO_SIMD_V128(simde_result, result); SIMDE_V128_TO_SIMD_V128(simde_result, result);
PUT_V128_TO_ADDR(frame_lp + addr_ret, result); PUT_V128_TO_ADDR(frame_lp + addr_ret, result);
break;
} }
/* Splat */ /* Splat */
@ -6008,7 +6008,15 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
case SIMD_i8x16_splat: case SIMD_i8x16_splat:
{ {
SIMD_SPLAT_OP_I32(simde_wasm_i8x16_splat); uint32 val = POP_I32();
addr_ret = GET_OFFSET();
simde_v128_t simde_result = simde_wasm_i8x16_splat(val);
V128 result;
SIMDE_V128_TO_SIMD_V128(simde_result, result);
PUT_V128_TO_ADDR(frame_lp + addr_ret, result);
break; break;
} }
case SIMD_i16x8_splat: case SIMD_i16x8_splat:
@ -6140,7 +6148,18 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
/* i8x16 comparison operations */ /* i8x16 comparison operations */
case SIMD_i8x16_eq: case SIMD_i8x16_eq:
{ {
SIMD_DOUBLE_OP(simde_wasm_i8x16_eq); V128 v2 = POP_V128();
V128 v1 = POP_V128();
addr_ret = GET_OFFSET();
simde_v128_t simde_result =
simde_wasm_i8x16_eq(SIMD_V128_TO_SIMDE_V128(v1),
SIMD_V128_TO_SIMDE_V128(v2));
V128 result;
SIMDE_V128_TO_SIMD_V128(simde_result, result);
PUT_V128_TO_ADDR(frame_lp + addr_ret, result);
break; break;
} }
case SIMD_i8x16_ne: case SIMD_i8x16_ne:

View File

@ -9125,6 +9125,9 @@ preserve_referenced_local(WASMLoaderContext *loader_ctx, uint8 opcode,
if (is_32bit_type(cur_type)) if (is_32bit_type(cur_type))
i++; i++;
else if (cur_type == VALUE_TYPE_V128) {
i += 4;
}
else else
i += 2; i += 2;
} }
@ -9155,7 +9158,10 @@ preserve_local_for_block(WASMLoaderContext *loader_ctx, uint8 opcode,
return false; return false;
} }
if (is_32bit_type(cur_type)) { if (cur_type == VALUE_TYPE_V128) {
i += 4;
}
else if (is_32bit_type(cur_type)) {
i++; i++;
} }
else { else {
@ -9498,6 +9504,8 @@ wasm_loader_get_const_offset(WASMLoaderContext *ctx, uint8 type, void *value,
|| (type == VALUE_TYPE_EXTERNREF || (type == VALUE_TYPE_EXTERNREF
&& *(int32 *)value == c->value.i32) && *(int32 *)value == c->value.i32)
#endif #endif
|| (type == VALUE_TYPE_V128
&& (0 == memcmp(value, &(c->value.v128), sizeof(V128))))
|| (type == VALUE_TYPE_F64 || (type == VALUE_TYPE_F64
&& (0 == memcmp(value, &(c->value.f64), sizeof(float64)))) && (0 == memcmp(value, &(c->value.f64), sizeof(float64))))
|| (type == VALUE_TYPE_F32 || (type == VALUE_TYPE_F32
@ -9508,6 +9516,9 @@ wasm_loader_get_const_offset(WASMLoaderContext *ctx, uint8 type, void *value,
} }
if (is_32bit_type(c->value_type)) if (is_32bit_type(c->value_type))
operand_offset += 1; operand_offset += 1;
else if (c->value_type == VALUE_TYPE_V128) {
operand_offset += 4;
}
else else
operand_offset += 2; operand_offset += 2;
} }
@ -9559,6 +9570,10 @@ wasm_loader_get_const_offset(WASMLoaderContext *ctx, uint8 type, void *value,
c->value.i32 = *(int32 *)value; c->value.i32 = *(int32 *)value;
ctx->const_cell_num++; ctx->const_cell_num++;
break; break;
case VALUE_TYPE_V128:
bh_memcpy_s(&(c->value.v128), sizeof(WASMValue), value,
sizeof(V128));
ctx->const_cell_num++;
#if WASM_ENABLE_REF_TYPES != 0 && WASM_ENABLE_GC == 0 #if WASM_ENABLE_REF_TYPES != 0 && WASM_ENABLE_GC == 0
case VALUE_TYPE_EXTERNREF: case VALUE_TYPE_EXTERNREF:
case VALUE_TYPE_FUNCREF: case VALUE_TYPE_FUNCREF:
@ -9760,17 +9775,22 @@ reserve_block_ret(WASMLoaderContext *loader_ctx, uint8 opcode,
block_type, &return_types, &reftype_maps, &reftype_map_count); block_type, &return_types, &reftype_maps, &reftype_map_count);
#endif #endif
/* If there is only one return value, use EXT_OP_COPY_STACK_TOP/_I64 instead /* If there is only one return value, use EXT_OP_COPY_STACK_TOP/_I64/V128
* of EXT_OP_COPY_STACK_VALUES for interpreter performance. */ * instead of EXT_OP_COPY_STACK_VALUES for interpreter performance. */
if (return_count == 1) { if (return_count == 1) {
uint8 cell = (uint8)wasm_value_type_cell_num(return_types[0]); uint8 cell = (uint8)wasm_value_type_cell_num(return_types[0]);
if (cell <= 2 /* V128 isn't supported whose cell num is 4 */ if (block->dynamic_offset != *(loader_ctx->frame_offset - cell)) {
&& block->dynamic_offset != *(loader_ctx->frame_offset - cell)) {
/* insert op_copy before else opcode */ /* insert op_copy before else opcode */
if (opcode == WASM_OP_ELSE) if (opcode == WASM_OP_ELSE)
skip_label(); skip_label();
emit_label(cell == 1 ? EXT_OP_COPY_STACK_TOP
: EXT_OP_COPY_STACK_TOP_I64); if (cell == 4) {
emit_label(EXT_OP_COPY_STACK_TOP_V128);
}
else {
emit_label(cell == 1 ? EXT_OP_COPY_STACK_TOP
: EXT_OP_COPY_STACK_TOP_I64);
}
emit_operand(loader_ctx, *(loader_ctx->frame_offset - cell)); emit_operand(loader_ctx, *(loader_ctx->frame_offset - cell));
emit_operand(loader_ctx, block->dynamic_offset); emit_operand(loader_ctx, block->dynamic_offset);
@ -9805,11 +9825,37 @@ reserve_block_ret(WASMLoaderContext *loader_ctx, uint8 opcode,
for (i = (int32)return_count - 1; i >= 0; i--) { for (i = (int32)return_count - 1; i >= 0; i--) {
uint8 cells = (uint8)wasm_value_type_cell_num(return_types[i]); uint8 cells = (uint8)wasm_value_type_cell_num(return_types[i]);
frame_offset -= cells; if (frame_offset - cells < loader_ctx->frame_offset_bottom) {
dynamic_offset -= cells; set_error_buf(error_buf, error_buf_size, "frame offset underflow");
if (dynamic_offset != *frame_offset) { goto fail;
value_count++; }
total_cel_num += cells;
if (cells == 4) {
bool needs_copy = false;
int16 v128_dynamic = dynamic_offset - cells;
for (int j = 0; j < 4; j++) {
if (*(frame_offset - j - 1) != (v128_dynamic + j)) {
needs_copy = true;
break;
}
}
if (needs_copy) {
value_count++;
total_cel_num += cells;
}
frame_offset -= cells;
dynamic_offset = v128_dynamic;
}
else {
frame_offset -= cells;
dynamic_offset -= cells;
if (dynamic_offset != *frame_offset) {
value_count++;
total_cel_num += cells;
}
} }
} }
@ -9845,19 +9891,50 @@ reserve_block_ret(WASMLoaderContext *loader_ctx, uint8 opcode,
dynamic_offset = dynamic_offset_org; dynamic_offset = dynamic_offset_org;
for (i = (int32)return_count - 1, j = 0; i >= 0; i--) { for (i = (int32)return_count - 1, j = 0; i >= 0; i--) {
uint8 cell = (uint8)wasm_value_type_cell_num(return_types[i]); uint8 cell = (uint8)wasm_value_type_cell_num(return_types[i]);
frame_offset -= cell;
dynamic_offset -= cell; if (cell == 4) {
if (dynamic_offset != *frame_offset) { bool needs_copy = false;
/* cell num */ int16 v128_dynamic = dynamic_offset - cell;
cells[j] = cell;
/* src offset */ for (int k = 0; k < 4; k++) {
src_offsets[j] = *frame_offset; if (*(frame_offset - k - 1) != (v128_dynamic + k)) {
/* dst offset */ needs_copy = true;
dst_offsets[j] = dynamic_offset; break;
j++; }
}
if (needs_copy) {
cells[j] = cell;
src_offsets[j] = *(frame_offset - cell);
dst_offsets[j] = v128_dynamic;
j++;
}
frame_offset -= cell;
dynamic_offset = v128_dynamic;
} }
else {
frame_offset -= cell;
dynamic_offset -= cell;
if (dynamic_offset != *frame_offset) {
cells[j] = cell;
/* src offset */
src_offsets[j] = *frame_offset;
/* dst offset */
dst_offsets[j] = dynamic_offset;
j++;
}
}
if (opcode == WASM_OP_ELSE) { if (opcode == WASM_OP_ELSE) {
*frame_offset = dynamic_offset; if (cell == 4) {
for (int k = 0; k < cell; k++) {
*(frame_offset + k) = dynamic_offset + k;
}
}
else {
*frame_offset = dynamic_offset;
}
} }
else { else {
loader_ctx->frame_offset = frame_offset; loader_ctx->frame_offset = frame_offset;
@ -13031,6 +13108,10 @@ re_scan:
emit_label(EXT_OP_TEE_LOCAL_FAST); emit_label(EXT_OP_TEE_LOCAL_FAST);
emit_byte(loader_ctx, (uint8)local_offset); emit_byte(loader_ctx, (uint8)local_offset);
} }
else if (local_type == VALUE_TYPE_V128) {
emit_label(EXT_OP_TEE_LOCAL_FAST_V128);
emit_byte(loader_ctx, (uint8)local_offset);
}
else { else {
emit_label(EXT_OP_TEE_LOCAL_FAST_I64); emit_label(EXT_OP_TEE_LOCAL_FAST_I64);
emit_byte(loader_ctx, (uint8)local_offset); emit_byte(loader_ctx, (uint8)local_offset);