diff --git a/core/iwasm/interpreter/wasm_interp_fast.c b/core/iwasm/interpreter/wasm_interp_fast.c index 701563505..9a026428f 100644 --- a/core/iwasm/interpreter/wasm_interp_fast.c +++ b/core/iwasm/interpreter/wasm_interp_fast.c @@ -47,7 +47,7 @@ typedef float64 CellType_F64; && (app_addr) <= shared_heap_end_off - bytes + 1) #define shared_heap_addr_app_to_native(app_addr, native_addr) \ - native_addr = shared_heap_base_addr + ((app_addr)-shared_heap_start_off) + native_addr = shared_heap_base_addr + ((app_addr) - shared_heap_start_off) #define CHECK_SHARED_HEAP_OVERFLOW(app_addr, bytes, native_addr) \ if (app_addr_in_shared_heap(app_addr, bytes)) \ @@ -1793,7 +1793,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, else cur_func_type = cur_func->u.func->func_type; - /* clang-format off */ + /* clang-format off */ #if WASM_ENABLE_GC == 0 if (cur_type != cur_func_type) { wasm_set_exception(module, "indirect call type mismatch"); @@ -5923,12 +5923,11 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, uint32 offset, addr; offset = read_uint32(frame_ip); V128 data = POP_V128(); - addr = POP_I32(); + int32 base = POP_I32(); + offset += base; + addr = GET_OPERAND(uint32, I32, 0); - V128 data; - data = POP_V128(); - - CHECK_MEMORY_OVERFLOW(16); + CHECK_MEMORY_OVERFLOW(32); STORE_V128(maddr, data); break; } @@ -5948,14 +5947,14 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, case SIMD_v8x16_shuffle: { V128 indices; - V128 v2 = POP_V128(); - V128 v1 = POP_V128(); - addr_ret = GET_OFFSET(); - bh_memcpy_s(&indices, sizeof(V128), frame_ip, sizeof(V128)); frame_ip += sizeof(V128); + V128 v2 = POP_V128(); + V128 v1 = POP_V128(); + addr_ret = GET_OFFSET(); + V128 result; for (int i = 0; i < 16; i++) { uint8_t index = indices.i8x16[i]; @@ -5983,6 +5982,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, SIMDE_V128_TO_SIMD_V128(simde_result, result); PUT_V128_TO_ADDR(frame_lp + addr_ret, result); + break; } /* Splat */ @@ -6008,7 +6008,15 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, case SIMD_i8x16_splat: { - SIMD_SPLAT_OP_I32(simde_wasm_i8x16_splat); + uint32 val = POP_I32(); + addr_ret = GET_OFFSET(); + + simde_v128_t simde_result = simde_wasm_i8x16_splat(val); + + V128 result; + SIMDE_V128_TO_SIMD_V128(simde_result, result); + + PUT_V128_TO_ADDR(frame_lp + addr_ret, result); break; } case SIMD_i16x8_splat: @@ -6140,7 +6148,18 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, /* i8x16 comparison operations */ case SIMD_i8x16_eq: { - SIMD_DOUBLE_OP(simde_wasm_i8x16_eq); + V128 v2 = POP_V128(); + V128 v1 = POP_V128(); + addr_ret = GET_OFFSET(); + + simde_v128_t simde_result = + simde_wasm_i8x16_eq(SIMD_V128_TO_SIMDE_V128(v1), + SIMD_V128_TO_SIMDE_V128(v2)); + + V128 result; + SIMDE_V128_TO_SIMD_V128(simde_result, result); + + PUT_V128_TO_ADDR(frame_lp + addr_ret, result); break; } case SIMD_i8x16_ne: diff --git a/core/iwasm/interpreter/wasm_loader.c b/core/iwasm/interpreter/wasm_loader.c index d7bd34fde..e19a648da 100644 --- a/core/iwasm/interpreter/wasm_loader.c +++ b/core/iwasm/interpreter/wasm_loader.c @@ -9125,6 +9125,9 @@ preserve_referenced_local(WASMLoaderContext *loader_ctx, uint8 opcode, if (is_32bit_type(cur_type)) i++; + else if (cur_type == VALUE_TYPE_V128) { + i += 4; + } else i += 2; } @@ -9155,7 +9158,10 @@ preserve_local_for_block(WASMLoaderContext *loader_ctx, uint8 opcode, return false; } - if (is_32bit_type(cur_type)) { + if (cur_type == VALUE_TYPE_V128) { + i += 4; + } + else if (is_32bit_type(cur_type)) { i++; } else { @@ -9498,6 +9504,8 @@ wasm_loader_get_const_offset(WASMLoaderContext *ctx, uint8 type, void *value, || (type == VALUE_TYPE_EXTERNREF && *(int32 *)value == c->value.i32) #endif + || (type == VALUE_TYPE_V128 + && (0 == memcmp(value, &(c->value.v128), sizeof(V128)))) || (type == VALUE_TYPE_F64 && (0 == memcmp(value, &(c->value.f64), sizeof(float64)))) || (type == VALUE_TYPE_F32 @@ -9508,6 +9516,9 @@ wasm_loader_get_const_offset(WASMLoaderContext *ctx, uint8 type, void *value, } if (is_32bit_type(c->value_type)) operand_offset += 1; + else if (c->value_type == VALUE_TYPE_V128) { + operand_offset += 4; + } else operand_offset += 2; } @@ -9559,6 +9570,10 @@ wasm_loader_get_const_offset(WASMLoaderContext *ctx, uint8 type, void *value, c->value.i32 = *(int32 *)value; ctx->const_cell_num++; break; + case VALUE_TYPE_V128: + bh_memcpy_s(&(c->value.v128), sizeof(WASMValue), value, + sizeof(V128)); + ctx->const_cell_num++; #if WASM_ENABLE_REF_TYPES != 0 && WASM_ENABLE_GC == 0 case VALUE_TYPE_EXTERNREF: case VALUE_TYPE_FUNCREF: @@ -9760,17 +9775,22 @@ reserve_block_ret(WASMLoaderContext *loader_ctx, uint8 opcode, block_type, &return_types, &reftype_maps, &reftype_map_count); #endif - /* If there is only one return value, use EXT_OP_COPY_STACK_TOP/_I64 instead - * of EXT_OP_COPY_STACK_VALUES for interpreter performance. */ + /* If there is only one return value, use EXT_OP_COPY_STACK_TOP/_I64/V128 + * instead of EXT_OP_COPY_STACK_VALUES for interpreter performance. */ if (return_count == 1) { uint8 cell = (uint8)wasm_value_type_cell_num(return_types[0]); - if (cell <= 2 /* V128 isn't supported whose cell num is 4 */ - && block->dynamic_offset != *(loader_ctx->frame_offset - cell)) { + if (block->dynamic_offset != *(loader_ctx->frame_offset - cell)) { /* insert op_copy before else opcode */ if (opcode == WASM_OP_ELSE) skip_label(); - emit_label(cell == 1 ? EXT_OP_COPY_STACK_TOP - : EXT_OP_COPY_STACK_TOP_I64); + + if (cell == 4) { + emit_label(EXT_OP_COPY_STACK_TOP_V128); + } + else { + emit_label(cell == 1 ? EXT_OP_COPY_STACK_TOP + : EXT_OP_COPY_STACK_TOP_I64); + } emit_operand(loader_ctx, *(loader_ctx->frame_offset - cell)); emit_operand(loader_ctx, block->dynamic_offset); @@ -9805,11 +9825,37 @@ reserve_block_ret(WASMLoaderContext *loader_ctx, uint8 opcode, for (i = (int32)return_count - 1; i >= 0; i--) { uint8 cells = (uint8)wasm_value_type_cell_num(return_types[i]); - frame_offset -= cells; - dynamic_offset -= cells; - if (dynamic_offset != *frame_offset) { - value_count++; - total_cel_num += cells; + if (frame_offset - cells < loader_ctx->frame_offset_bottom) { + set_error_buf(error_buf, error_buf_size, "frame offset underflow"); + goto fail; + } + + if (cells == 4) { + bool needs_copy = false; + int16 v128_dynamic = dynamic_offset - cells; + + for (int j = 0; j < 4; j++) { + if (*(frame_offset - j - 1) != (v128_dynamic + j)) { + needs_copy = true; + break; + } + } + + if (needs_copy) { + value_count++; + total_cel_num += cells; + } + + frame_offset -= cells; + dynamic_offset = v128_dynamic; + } + else { + frame_offset -= cells; + dynamic_offset -= cells; + if (dynamic_offset != *frame_offset) { + value_count++; + total_cel_num += cells; + } } } @@ -9845,19 +9891,50 @@ reserve_block_ret(WASMLoaderContext *loader_ctx, uint8 opcode, dynamic_offset = dynamic_offset_org; for (i = (int32)return_count - 1, j = 0; i >= 0; i--) { uint8 cell = (uint8)wasm_value_type_cell_num(return_types[i]); - frame_offset -= cell; - dynamic_offset -= cell; - if (dynamic_offset != *frame_offset) { - /* cell num */ - cells[j] = cell; - /* src offset */ - src_offsets[j] = *frame_offset; - /* dst offset */ - dst_offsets[j] = dynamic_offset; - j++; + + if (cell == 4) { + bool needs_copy = false; + int16 v128_dynamic = dynamic_offset - cell; + + for (int k = 0; k < 4; k++) { + if (*(frame_offset - k - 1) != (v128_dynamic + k)) { + needs_copy = true; + break; + } + } + + if (needs_copy) { + cells[j] = cell; + src_offsets[j] = *(frame_offset - cell); + dst_offsets[j] = v128_dynamic; + j++; + } + + frame_offset -= cell; + dynamic_offset = v128_dynamic; } + else { + frame_offset -= cell; + dynamic_offset -= cell; + if (dynamic_offset != *frame_offset) { + cells[j] = cell; + /* src offset */ + src_offsets[j] = *frame_offset; + /* dst offset */ + dst_offsets[j] = dynamic_offset; + j++; + } + } + if (opcode == WASM_OP_ELSE) { - *frame_offset = dynamic_offset; + if (cell == 4) { + for (int k = 0; k < cell; k++) { + *(frame_offset + k) = dynamic_offset + k; + } + } + else { + *frame_offset = dynamic_offset; + } } else { loader_ctx->frame_offset = frame_offset; @@ -13031,6 +13108,10 @@ re_scan: emit_label(EXT_OP_TEE_LOCAL_FAST); emit_byte(loader_ctx, (uint8)local_offset); } + else if (local_type == VALUE_TYPE_V128) { + emit_label(EXT_OP_TEE_LOCAL_FAST_V128); + emit_byte(loader_ctx, (uint8)local_offset); + } else { emit_label(EXT_OP_TEE_LOCAL_FAST_I64); emit_byte(loader_ctx, (uint8)local_offset);