From a3074df21ba614845226a49a5ebe01b3819db4e9 Mon Sep 17 00:00:00 2001 From: Wenyong Huang Date: Thu, 5 Nov 2020 18:15:15 +0800 Subject: [PATCH] Import SIMD feature and add some workload samples (#438) --- README.md | 1 + build-scripts/config_common.cmake | 4 + core/config.h | 5 + core/deps/download.sh | 2 +- core/iwasm/aot/aot_loader.c | 22 +- core/iwasm/aot/aot_runtime.c | 6 + core/iwasm/aot/aot_runtime.h | 1 + .../common/arch/invokeNative_em64_simd.s | 64 ++ core/iwasm/common/iwasm_common.cmake | 14 +- core/iwasm/common/wasm_runtime_common.c | 161 +++- core/iwasm/common/wasm_runtime_common.h | 3 + core/iwasm/compilation/aot.c | 9 + core/iwasm/compilation/aot.h | 14 + core/iwasm/compilation/aot_compiler.c | 741 +++++++++++++++++- core/iwasm/compilation/aot_compiler.h | 55 ++ core/iwasm/compilation/aot_emit_aot_file.c | 45 +- core/iwasm/compilation/aot_emit_control.c | 36 +- core/iwasm/compilation/aot_emit_exception.c | 26 +- core/iwasm/compilation/aot_emit_function.c | 20 +- core/iwasm/compilation/aot_emit_memory.c | 73 +- core/iwasm/compilation/aot_emit_memory.h | 4 + core/iwasm/compilation/aot_emit_numberic.c | 179 +---- core/iwasm/compilation/aot_emit_parametric.c | 3 +- core/iwasm/compilation/aot_emit_variable.c | 12 +- core/iwasm/compilation/aot_llvm.c | 242 +++++- core/iwasm/compilation/aot_llvm.h | 44 ++ .../compilation/simd/simd_access_lanes.c | 381 +++++++++ .../compilation/simd/simd_access_lanes.h | 89 +++ core/iwasm/compilation/simd/simd_bit_shifts.c | 164 ++++ core/iwasm/compilation/simd/simd_bit_shifts.h | 39 + .../compilation/simd/simd_bitmask_extracts.c | 109 +++ .../compilation/simd/simd_bitmask_extracts.h | 29 + .../iwasm/compilation/simd/simd_bitwise_ops.c | 146 ++++ .../iwasm/compilation/simd/simd_bitwise_ops.h | 24 + .../compilation/simd/simd_bool_reductions.c | 183 +++++ .../compilation/simd/simd_bool_reductions.h | 43 + core/iwasm/compilation/simd/simd_common.c | 47 ++ core/iwasm/compilation/simd/simd_common.h | 23 + .../iwasm/compilation/simd/simd_comparisons.c | 231 ++++++ .../iwasm/compilation/simd/simd_comparisons.h | 44 ++ .../compilation/simd/simd_construct_values.c | 190 +++++ .../compilation/simd/simd_construct_values.h | 29 + .../iwasm/compilation/simd/simd_conversions.c | 422 ++++++++++ .../iwasm/compilation/simd/simd_conversions.h | 51 ++ .../compilation/simd/simd_floating_point.c | 273 +++++++ .../compilation/simd/simd_floating_point.h | 49 ++ core/iwasm/compilation/simd/simd_int_arith.c | 207 +++++ core/iwasm/compilation/simd/simd_int_arith.h | 51 ++ core/iwasm/compilation/simd/simd_load_store.c | 301 +++++++ core/iwasm/compilation/simd/simd_load_store.h | 45 ++ .../compilation/simd/simd_sat_int_arith.c | 367 +++++++++ .../compilation/simd/simd_sat_int_arith.h | 66 ++ core/iwasm/include/aot_export.h | 1 + core/iwasm/interpreter/wasm.h | 21 + core/iwasm/interpreter/wasm_loader.c | 686 +++++++++++++++- core/iwasm/interpreter/wasm_opcode.h | 215 +++++ .../libraries/libc-emcc/libc_emcc_wrapper.c | 192 +++++ doc/build_wamr.md | 4 + product-mini/platforms/linux/CMakeLists.txt | 5 + samples/basic/build.sh | 5 + samples/gui/build.sh | 5 + samples/littlevgl/build.sh | 5 + samples/simple/build.sh | 5 + samples/workload/README.md | 34 + samples/workload/bwa/.gitignore | 4 + samples/workload/bwa/CMakeLists.bwa_wasm.txt | 134 ++++ samples/workload/bwa/CMakeLists.txt | 91 +++ samples/workload/bwa/README.md | 47 ++ samples/workload/cmake/toolchain.cmake | 100 +++ samples/workload/docker/.gitignore | 1 + samples/workload/docker/Dockerfile | 77 ++ samples/workload/docker/build.sh | 48 ++ samples/workload/docker/run.sh | 10 + samples/workload/meshoptimizer/.gitignore | 2 + samples/workload/meshoptimizer/CMakeLists.txt | 39 + samples/workload/meshoptimizer/README.md | 59 ++ .../workload/meshoptimizer/codecbench.patch | 47 ++ samples/workload/tensorflow/build.sh | 32 +- samples/workload/tensorflow/tf_lite.patch | 12 +- samples/workload/wasm-av1/README.md | 22 + samples/workload/wasm-av1/build.sh | 100 +++ samples/workload/wasm-av1/wasm-av1.patch | 696 ++++++++++++++++ wamr-compiler/CMakeLists.txt | 1 + wamr-compiler/main.c | 9 +- 84 files changed, 7780 insertions(+), 318 deletions(-) create mode 100644 core/iwasm/common/arch/invokeNative_em64_simd.s create mode 100644 core/iwasm/compilation/simd/simd_access_lanes.c create mode 100644 core/iwasm/compilation/simd/simd_access_lanes.h create mode 100644 core/iwasm/compilation/simd/simd_bit_shifts.c create mode 100644 core/iwasm/compilation/simd/simd_bit_shifts.h create mode 100644 core/iwasm/compilation/simd/simd_bitmask_extracts.c create mode 100644 core/iwasm/compilation/simd/simd_bitmask_extracts.h create mode 100644 core/iwasm/compilation/simd/simd_bitwise_ops.c create mode 100644 core/iwasm/compilation/simd/simd_bitwise_ops.h create mode 100644 core/iwasm/compilation/simd/simd_bool_reductions.c create mode 100644 core/iwasm/compilation/simd/simd_bool_reductions.h create mode 100644 core/iwasm/compilation/simd/simd_common.c create mode 100644 core/iwasm/compilation/simd/simd_common.h create mode 100644 core/iwasm/compilation/simd/simd_comparisons.c create mode 100644 core/iwasm/compilation/simd/simd_comparisons.h create mode 100644 core/iwasm/compilation/simd/simd_construct_values.c create mode 100644 core/iwasm/compilation/simd/simd_construct_values.h create mode 100644 core/iwasm/compilation/simd/simd_conversions.c create mode 100644 core/iwasm/compilation/simd/simd_conversions.h create mode 100644 core/iwasm/compilation/simd/simd_floating_point.c create mode 100644 core/iwasm/compilation/simd/simd_floating_point.h create mode 100644 core/iwasm/compilation/simd/simd_int_arith.c create mode 100644 core/iwasm/compilation/simd/simd_int_arith.h create mode 100644 core/iwasm/compilation/simd/simd_load_store.c create mode 100644 core/iwasm/compilation/simd/simd_load_store.h create mode 100644 core/iwasm/compilation/simd/simd_sat_int_arith.c create mode 100644 core/iwasm/compilation/simd/simd_sat_int_arith.h create mode 100644 samples/workload/README.md create mode 100644 samples/workload/bwa/.gitignore create mode 100644 samples/workload/bwa/CMakeLists.bwa_wasm.txt create mode 100644 samples/workload/bwa/CMakeLists.txt create mode 100644 samples/workload/bwa/README.md create mode 100644 samples/workload/cmake/toolchain.cmake create mode 100644 samples/workload/docker/.gitignore create mode 100644 samples/workload/docker/Dockerfile create mode 100755 samples/workload/docker/build.sh create mode 100755 samples/workload/docker/run.sh create mode 100644 samples/workload/meshoptimizer/.gitignore create mode 100644 samples/workload/meshoptimizer/CMakeLists.txt create mode 100644 samples/workload/meshoptimizer/README.md create mode 100644 samples/workload/meshoptimizer/codecbench.patch create mode 100644 samples/workload/wasm-av1/README.md create mode 100755 samples/workload/wasm-av1/build.sh create mode 100644 samples/workload/wasm-av1/wasm-av1.patch diff --git a/README.md b/README.md index aa892f874..b34f81540 100644 --- a/README.md +++ b/README.md @@ -36,6 +36,7 @@ iwasm VM core - [Multi-value](https://github.com/WebAssembly/multi-value) - [wasm-c-api](https://github.com/WebAssembly/wasm-c-api) - [Tail-call](https://github.com/WebAssembly/tail-call) +- [128-bit SIMD](https://github.com/WebAssembly/simd) ### Supported architectures and platforms diff --git a/build-scripts/config_common.cmake b/build-scripts/config_common.cmake index 9f3d5bb68..822be12af 100644 --- a/build-scripts/config_common.cmake +++ b/build-scripts/config_common.cmake @@ -165,6 +165,10 @@ if (WAMR_DISABLE_HW_BOUND_CHECK EQUAL 1) add_definitions (-DWASM_DISABLE_HW_BOUND_CHECK=1) message (" Hardware boundary check disabled") endif () +if (WAMR_BUILD_SIMD EQUAL 1) + add_definitions (-DWASM_ENABLE_SIMD=1) + message (" SIMD enabled") +endif () if (WAMR_BUILD_MEMORY_PROFILING EQUAL 1) add_definitions (-DWASM_ENABLE_MEMORY_PROFILING=1) message (" Memory profiling enabled") diff --git a/core/config.h b/core/config.h index 1b474b0bb..7ab1d2fec 100644 --- a/core/config.h +++ b/core/config.h @@ -165,6 +165,11 @@ #define WASM_DISABLE_HW_BOUND_CHECK 0 #endif +/* Disable SIMD unless it is manualy enabled somewhere */ +#ifndef WASM_ENABLE_SIMD +#define WASM_ENABLE_SIMD 0 +#endif + /* Memory profiling */ #ifndef WASM_ENABLE_MEMORY_PROFILING #define WASM_ENABLE_MEMORY_PROFILING 0 diff --git a/core/deps/download.sh b/core/deps/download.sh index 923a9e03d..c7e40de84 100755 --- a/core/deps/download.sh +++ b/core/deps/download.sh @@ -13,7 +13,7 @@ if [ ! -d "lvgl" ]; then fi if [ ! -d "lv_drivers" ]; then echo "git pull lv_drivers..." - git clone https://github.com/littlevgl/lv_drivers.git + git clone https://github.com/littlevgl/lv_drivers.git --branch v6.0.1 [ $? -eq 0 ] || exit $? fi diff --git a/core/iwasm/aot/aot_loader.c b/core/iwasm/aot/aot_loader.c index 5f8ed0ebd..478570764 100644 --- a/core/iwasm/aot/aot_loader.c +++ b/core/iwasm/aot/aot_loader.c @@ -66,6 +66,11 @@ exchange_uint32(uint8 *p_data) static void exchange_uint64(uint8 *pData) { + uint32 value; + + value = *(uint32 *)pData; + *(uint32 *)pData = *(uint32 *)(pData + 4); + *(uint32 *)(pData + 4) = value; exchange_uint32(pData); exchange_uint32(pData + 4); } @@ -801,14 +806,22 @@ load_globals(const uint8 **p_buf, const uint8 *buf_end, /* Create each global */ for (i = 0; i < module->global_count; i++) { uint16 init_expr_type; - uint64 init_expr_value; read_uint8(buf, buf_end, globals[i].type); read_uint8(buf, buf_end, globals[i].is_mutable); read_uint16(buf, buf_end, init_expr_type); - read_uint64(buf, buf_end, init_expr_value); + + if (init_expr_type != INIT_EXPR_TYPE_V128_CONST) { + read_uint64(buf, buf_end, globals[i].init_expr.u.i64); + } + else { + uint64 *i64x2 = (uint64 *)globals[i].init_expr.u.v128.i64x2; + CHECK_BUF(buf, buf_end, sizeof(uint64) * 2); + wasm_runtime_read_v128(buf, &i64x2[0], &i64x2[1]); + buf += sizeof(uint64) * 2; + } + globals[i].init_expr.init_expr_type = (uint8)init_expr_type; - globals[i].init_expr.u.i64 = (int64)init_expr_value; globals[i].size = wasm_value_type_size(globals[i].type); globals[i].data_offset = data_offset; @@ -2101,6 +2114,9 @@ aot_convert_wasm_module(WASMModule *wasm_module, #endif #if WASM_ENABLE_TAIL_CALL != 0 option.enable_tail_call = true; +#endif +#if WASM_ENABLE_SIMD != 0 + option.enable_simd = true; #endif comp_ctx = aot_create_comp_context(comp_data, &option); if (!comp_ctx) { diff --git a/core/iwasm/aot/aot_runtime.c b/core/iwasm/aot/aot_runtime.c index 6507281c0..f31d36dfd 100644 --- a/core/iwasm/aot/aot_runtime.c +++ b/core/iwasm/aot/aot_runtime.c @@ -385,12 +385,14 @@ memory_instantiate(AOTModuleInstance *module_inst, AOTModule *module, memory_inst->mem_bound_check_2bytes.u64 = total_size - 2; memory_inst->mem_bound_check_4bytes.u64 = total_size - 4; memory_inst->mem_bound_check_8bytes.u64 = total_size - 8; + memory_inst->mem_bound_check_16bytes.u64 = total_size - 16; } else { memory_inst->mem_bound_check_1byte.u32[0] = (uint32)total_size - 1; memory_inst->mem_bound_check_2bytes.u32[0] = (uint32)total_size - 2; memory_inst->mem_bound_check_4bytes.u32[0] = (uint32)total_size - 4; memory_inst->mem_bound_check_8bytes.u32[0] = (uint32)total_size - 8; + memory_inst->mem_bound_check_16bytes.u32[0] = (uint32)total_size - 16; } } @@ -1545,12 +1547,14 @@ aot_enlarge_memory(AOTModuleInstance *module_inst, uint32 inc_page_count) memory_inst->mem_bound_check_2bytes.u64 = total_size - 2; memory_inst->mem_bound_check_4bytes.u64 = total_size - 4; memory_inst->mem_bound_check_8bytes.u64 = total_size - 8; + memory_inst->mem_bound_check_16bytes.u64 = total_size - 16; } else { memory_inst->mem_bound_check_1byte.u32[0] = (uint32)total_size - 1; memory_inst->mem_bound_check_2bytes.u32[0] = (uint32)total_size - 2; memory_inst->mem_bound_check_4bytes.u32[0] = (uint32)total_size - 4; memory_inst->mem_bound_check_8bytes.u32[0] = (uint32)total_size - 8; + memory_inst->mem_bound_check_16bytes.u32[0] = (uint32)total_size - 16; } return true; } @@ -1593,12 +1597,14 @@ aot_enlarge_memory(AOTModuleInstance *module_inst, uint32 inc_page_count) memory_inst->mem_bound_check_2bytes.u64 = total_size - 2; memory_inst->mem_bound_check_4bytes.u64 = total_size - 4; memory_inst->mem_bound_check_8bytes.u64 = total_size - 8; + memory_inst->mem_bound_check_16bytes.u64 = total_size - 16; } else { memory_inst->mem_bound_check_1byte.u32[0] = (uint32)total_size - 1; memory_inst->mem_bound_check_2bytes.u32[0] = (uint32)total_size - 2; memory_inst->mem_bound_check_4bytes.u32[0] = (uint32)total_size - 4; memory_inst->mem_bound_check_8bytes.u32[0] = (uint32)total_size - 8; + memory_inst->mem_bound_check_16bytes.u32[0] = (uint32)total_size - 16; } return true; } diff --git a/core/iwasm/aot/aot_runtime.h b/core/iwasm/aot/aot_runtime.h index ee1d36ba0..93a0cf5d8 100644 --- a/core/iwasm/aot/aot_runtime.h +++ b/core/iwasm/aot/aot_runtime.h @@ -234,6 +234,7 @@ typedef struct AOTMemoryInstance { MemBound mem_bound_check_2bytes; MemBound mem_bound_check_4bytes; MemBound mem_bound_check_8bytes; + MemBound mem_bound_check_16bytes; } AOTMemoryInstance; typedef struct AOTModuleInstance { diff --git a/core/iwasm/common/arch/invokeNative_em64_simd.s b/core/iwasm/common/arch/invokeNative_em64_simd.s new file mode 100644 index 000000000..eb9a58bc0 --- /dev/null +++ b/core/iwasm/common/arch/invokeNative_em64_simd.s @@ -0,0 +1,64 @@ +/* + * Copyright (C) 2019 Intel Corporation. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + */ + .text + .align 2 +#ifndef BH_PLATFORM_DARWIN +.globl invokeNative + .type invokeNative, @function +invokeNative: +#else +.globl _invokeNative +_invokeNative: +#endif /* end of BH_PLATFORM_DARWIN */ + /* rdi - function ptr */ + /* rsi - argv */ + /* rdx - n_stacks */ + + push %rbp + mov %rsp, %rbp + + mov %rdx, %r10 + mov %rsp, %r11 /* Check that stack is aligned on */ + and $8, %r11 /* 16 bytes. This code may be removed */ + je check_stack_succ /* when we are sure that compiler always */ + int3 /* calls us with aligned stack */ +check_stack_succ: + mov %r10, %r11 /* Align stack on 16 bytes before pushing */ + and $1, %r11 /* stack arguments in case we have an odd */ + shl $3, %r11 /* number of stack arguments */ + sub %r11, %rsp + /* store memory args */ + movq %rdi, %r11 /* func ptr */ + movq %r10, %rcx /* counter */ + lea 128+48-8(%rsi,%rcx,8), %r10 + sub %rsp, %r10 + cmpq $0, %rcx + je push_args_end +push_args: + push 0(%rsp,%r10) + loop push_args +push_args_end: + /* fill all fp args */ + movdqa 0x00(%rsi), %xmm0 + movdqa 0x10(%rsi), %xmm1 + movdqa 0x20(%rsi), %xmm2 + movdqa 0x30(%rsi), %xmm3 + movdqa 0x40(%rsi), %xmm4 + movdqa 0x50(%rsi), %xmm5 + movdqa 0x60(%rsi), %xmm6 + movdqa 0x70(%rsi), %xmm7 + + /* fill all int args */ + movq 0x80(%rsi), %rdi + movq 0x90(%rsi), %rdx + movq 0x98(%rsi), %rcx + movq 0xa0(%rsi), %r8 + movq 0xa8(%rsi), %r9 + movq 0x88(%rsi), %rsi + + call *%r11 + leave + ret + diff --git a/core/iwasm/common/iwasm_common.cmake b/core/iwasm/common/iwasm_common.cmake index 458b00a3f..ba0c0d0b4 100644 --- a/core/iwasm/common/iwasm_common.cmake +++ b/core/iwasm/common/iwasm_common.cmake @@ -11,10 +11,18 @@ add_definitions(-DBH_FREE=wasm_runtime_free) file (GLOB c_source_all ${IWASM_COMMON_DIR}/*.c) if (WAMR_BUILD_TARGET STREQUAL "X86_64" OR WAMR_BUILD_TARGET STREQUAL "AMD_64") - if (WAMR_BUILD_PLATFORM STREQUAL "windows") - set (source_all ${c_source_all} ${IWASM_COMMON_DIR}/arch/invokeNative_em64.asm) + if (NOT WAMR_BUILD_SIMD EQUAL 1) + if (WAMR_BUILD_PLATFORM STREQUAL "windows") + set (source_all ${c_source_all} ${IWASM_COMMON_DIR}/arch/invokeNative_em64.asm) + else () + set (source_all ${c_source_all} ${IWASM_COMMON_DIR}/arch/invokeNative_em64.s) + endif () else () - set (source_all ${c_source_all} ${IWASM_COMMON_DIR}/arch/invokeNative_em64.s) + if (WAMR_BUILD_PLATFORM STREQUAL "windows") + message(FATAL_ERROR "need an implementation of SIMD on windows") + else() + set (source_all ${c_source_all} ${IWASM_COMMON_DIR}/arch/invokeNative_em64_simd.s) + endif() endif () elseif (WAMR_BUILD_TARGET STREQUAL "X86_32") if (WAMR_BUILD_PLATFORM STREQUAL "windows") diff --git a/core/iwasm/common/wasm_runtime_common.c b/core/iwasm/common/wasm_runtime_common.c index 5e1983fde..bcc68a10b 100644 --- a/core/iwasm/common/wasm_runtime_common.c +++ b/core/iwasm/common/wasm_runtime_common.c @@ -2457,6 +2457,23 @@ wasm_application_execute_func(WASMModuleInstanceCommon *module_inst, argv1[p++] = u.parts[1]; break; } +#if WASM_ENABLE_SIMD != 0 + case VALUE_TYPE_V128: + { + /* it likes 0x123\0x234 or 123\234 */ + /* retrive first i64 */ + *(uint64*)(argv1 + p) = strtoull(argv[i], &endptr, 0); + /* skip \ */ + endptr++; + /* retrive second i64 */ + *(uint64*)(argv1 + p + 2) = strtoull(endptr, &endptr, 0); + p += 4; + break; + } +#endif /* WASM_ENABLE_SIMD != 0 */ + default: + bh_assert(0); + break; } if (endptr && *endptr != '\0' && *endptr != '_') { snprintf(buf, sizeof(buf), "invalid input argument %d: %s", @@ -2477,9 +2494,11 @@ wasm_application_execute_func(WASMModuleInstanceCommon *module_inst, for (j = 0; j < type->result_count; j++) { switch (type->types[type->param_count + j]) { case VALUE_TYPE_I32: + { os_printf("0x%x:i32", argv1[k]); k++; break; + } case VALUE_TYPE_I64: { union { uint64 val; uint32 parts[2]; } u; @@ -2511,6 +2530,27 @@ wasm_application_execute_func(WASMModuleInstanceCommon *module_inst, os_printf("%.7g:f64", u.val); break; } +#if WASM_ENABLE_SIMD != 0 + case VALUE_TYPE_V128: + { + uint64 *v = (uint64*)(argv1 + k); +#if defined(PRIx64) + os_printf("<0x%016"PRIx64" 0x%016"PRIx64">:v128", *v, *(v + 1)); +#else + if (4 == sizeof(long)) { + os_printf("<0x%016llx 0x%016llx>:v128", *v, *(v + 1)); + } + else { + os_printf("<0x%016lx 0x%016lx>:v128", *v, *(v + 1)); + } +#endif /* PRIx64 */ + k += 4; + break; + } +#endif /* WASM_ENABLE_SIMD != 0 */ + default: + bh_assert(0); + break; } if (j < (uint32)(type->result_count - 1)) os_printf(","); @@ -3067,12 +3107,31 @@ fail: #if defined(BUILD_TARGET_X86_64) \ || defined(BUILD_TARGET_AMD_64) \ || defined(BUILD_TARGET_AARCH64) + +#if WASM_ENABLE_SIMD != 0 +#ifdef v128 +#undef v128 +#endif + +#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64) +#include +/* unaligned */ +#define v128 __m128i_u +#else +#warning "Include header files for v128 to support SIMD feature" +#endif + +#ifndef v128 +#error "v128 type isn't defined" +#endif +#endif /* end of WASM_ENABLE_SIMD != 0 */ + typedef void (*GenericFunctionPointer)(); int64 invokeNative(GenericFunctionPointer f, uint64 *args, uint64 n_stacks); typedef float64 (*Float64FuncPtr)(GenericFunctionPointer, uint64*, uint64); typedef float32 (*Float32FuncPtr)(GenericFunctionPointer, uint64*, uint64); -typedef int64 (*Int64FuncPtr)(GenericFunctionPointer, uint64*,uint64); +typedef int64 (*Int64FuncPtr)(GenericFunctionPointer, uint64*, uint64); typedef int32 (*Int32FuncPtr)(GenericFunctionPointer, uint64*, uint64); typedef void (*VoidFuncPtr)(GenericFunctionPointer, uint64*, uint64); @@ -3082,10 +3141,15 @@ static Int64FuncPtr invokeNative_Int64 = (Int64FuncPtr)(uintptr_t)invokeNative; static Int32FuncPtr invokeNative_Int32 = (Int32FuncPtr)(uintptr_t)invokeNative; static VoidFuncPtr invokeNative_Void = (VoidFuncPtr)(uintptr_t)invokeNative; +#if WASM_ENABLE_SIMD != 0 +typedef v128 (*V128FuncPtr)(GenericFunctionPointer, uint64*, uint64); +static V128FuncPtr invokeNative_V128 = (V128FuncPtr)(uintptr_t)invokeNative; +#endif + #if defined(_WIN32) || defined(_WIN32_) #define MAX_REG_FLOATS 4 #define MAX_REG_INTS 4 -#else +#else /* else of defined(_WIN32) || defined(_WIN32_) */ #define MAX_REG_FLOATS 8 #if defined(BUILD_TARGET_AARCH64) #define MAX_REG_INTS 8 @@ -3101,12 +3165,17 @@ wasm_runtime_invoke_native(WASMExecEnv *exec_env, void *func_ptr, uint32 *argv, uint32 argc, uint32 *argv_ret) { WASMModuleInstanceCommon *module = wasm_runtime_get_module_inst(exec_env); - uint64 argv_buf[32], *argv1 = argv_buf, *fps, *ints, *stacks, size, arg_i64; + uint64 argv_buf[32], *argv1 = argv_buf, *ints, *stacks, size, arg_i64; uint32 *argv_src = argv, i, argc1, n_ints = 0, n_stacks = 0; uint32 arg_i32, ptr_len; uint32 result_count = func_type->result_count; uint32 ext_ret_count = result_count > 1 ? result_count - 1 : 0; bool ret = false; +#if WASM_ENABLE_SIMD == 0 + uint64 *fps; +#else + v128 *fps; +#endif #if defined(_WIN32) || defined(_WIN32_) /* important difference in calling conventions */ @@ -3115,7 +3184,13 @@ wasm_runtime_invoke_native(WASMExecEnv *exec_env, void *func_ptr, int n_fps = 0; #endif - argc1 = 1 + MAX_REG_FLOATS + (uint32)func_type->param_count + ext_ret_count; +#if WASM_ENABLE_SIMD == 0 + argc1 = 1 + MAX_REG_FLOATS + (uint32)func_type->param_count + + ext_ret_count; +#else + argc1 = 1 + MAX_REG_FLOATS * 2 + (uint32)func_type->param_count * 2 + + ext_ret_count; +#endif if (argc1 > sizeof(argv_buf) / sizeof(uint64)) { size = sizeof(uint64) * (uint64)argc1; if (!(argv1 = runtime_malloc((uint32)size, exec_env->module_inst, @@ -3124,8 +3199,13 @@ wasm_runtime_invoke_native(WASMExecEnv *exec_env, void *func_ptr, } } +#if WASM_ENABLE_SIMD == 0 fps = argv1; ints = fps + MAX_REG_FLOATS; +#else + fps = (v128 *)argv1; + ints = (uint64 *)(fps + MAX_REG_FLOATS); +#endif stacks = ints + MAX_REG_INTS; ints[n_ints++] = (uint64)(uintptr_t)exec_env; @@ -3175,18 +3255,34 @@ wasm_runtime_invoke_native(WASMExecEnv *exec_env, void *func_ptr, argv_src += 2; break; case VALUE_TYPE_F32: - if (n_fps < MAX_REG_FLOATS) + if (n_fps < MAX_REG_FLOATS) { *(float32*)&fps[n_fps++] = *(float32*)argv_src++; - else + } + else { *(float32*)&stacks[n_stacks++] = *(float32*)argv_src++; + } break; case VALUE_TYPE_F64: - if (n_fps < MAX_REG_FLOATS) + if (n_fps < MAX_REG_FLOATS) { *(float64*)&fps[n_fps++] = *(float64*)argv_src; - else + } + else { *(float64*)&stacks[n_stacks++] = *(float64*)argv_src; + } argv_src += 2; break; +#if WASM_ENABLE_SIMD != 0 + case VALUE_TYPE_V128: + if (n_fps < MAX_REG_FLOATS) { + *(v128*)&fps[n_fps++] = *(v128*)argv_src; + } + else { + *(v128*)&stacks[n_stacks++] = *(v128*)argv_src; + n_stacks++; + } + argv_src += 4; + break; +#endif default: bh_assert(0); break; @@ -3221,6 +3317,11 @@ wasm_runtime_invoke_native(WASMExecEnv *exec_env, void *func_ptr, case VALUE_TYPE_F64: PUT_F64_TO_ADDR(argv_ret, invokeNative_Float64(func_ptr, argv1, n_stacks)); break; +#if WASM_ENABLE_SIMD != 0 + case VALUE_TYPE_V128: + *(v128*)argv_ret = invokeNative_V128(func_ptr, argv1, n_stacks); + break; +#endif default: bh_assert(0); break; @@ -3268,6 +3369,50 @@ wasm_runtime_call_indirect(WASMExecEnv *exec_env, return false; } +static void +exchange_uint32(uint8 *p_data) +{ + uint8 value = *p_data; + *p_data = *(p_data + 3); + *(p_data + 3) = value; + + value = *(p_data + 1); + *(p_data + 1) = *(p_data + 2); + *(p_data + 2) = value; +} + +static void +exchange_uint64(uint8 *p_data) +{ + uint32 value; + + value = *(uint32 *)p_data; + *(uint32 *)p_data = *(uint32 *)(p_data + 4); + *(uint32 *)(p_data + 4) = value; + exchange_uint32(p_data); + exchange_uint32(p_data + 4); +} + +void +wasm_runtime_read_v128(const uint8 *bytes, uint64 *ret1, uint64 *ret2) +{ + uint64 u1, u2; + + bh_memcpy_s(&u1, 8, bytes, 8); + bh_memcpy_s(&u2, 8, bytes + 8, 8); + + if (!is_little_endian()) { + exchange_uint64((uint8*)&u1); + exchange_uint64((uint8*)&u2); + *ret1 = u2; + *ret2 = u1; + } + else { + *ret1 = u1; + *ret2 = u2; + } +} + #if WASM_ENABLE_THREAD_MGR != 0 typedef struct WASMThreadArg { WASMExecEnv *new_exec_env; diff --git a/core/iwasm/common/wasm_runtime_common.h b/core/iwasm/common/wasm_runtime_common.h index 111bfec2c..86907d469 100644 --- a/core/iwasm/common/wasm_runtime_common.h +++ b/core/iwasm/common/wasm_runtime_common.h @@ -464,6 +464,9 @@ wasm_runtime_invoke_native_raw(WASMExecEnv *exec_env, void *func_ptr, void *attachment, uint32 *argv, uint32 argc, uint32 *ret); +void +wasm_runtime_read_v128(const uint8 *bytes, uint64 *ret1, uint64 *ret2); + void wasm_runtime_dump_module_mem_consumption(const WASMModuleCommon *module); diff --git a/core/iwasm/compilation/aot.c b/core/iwasm/compilation/aot.c index 04a97e2f6..5e85cc1fa 100644 --- a/core/iwasm/compilation/aot.c +++ b/core/iwasm/compilation/aot.c @@ -14,6 +14,15 @@ aot_get_last_error() return aot_error[0] == '\0' ? "" : aot_error; } +void +aot_set_last_error_v(const char *format, ...) +{ + va_list args; + va_start(args, format); + vsnprintf(aot_error, sizeof(aot_error), format, args); + va_end(args); +} + void aot_set_last_error(const char *error) { diff --git a/core/iwasm/compilation/aot.h b/core/iwasm/compilation/aot.h index ccceb75f9..a898ad04c 100644 --- a/core/iwasm/compilation/aot.h +++ b/core/iwasm/compilation/aot.h @@ -230,6 +230,20 @@ aot_get_last_error(); void aot_set_last_error(const char *error); +void +aot_set_last_error_v(const char *format, ...); + +#if BH_DEBUG == 1 +#define HANDLE_FAILURE(callee) do { \ + aot_set_last_error_v("call %s failed in %s:%d", (callee),\ + __FUNCTION__, __LINE__); \ + } while (0) +#else +#define HANDLE_FAILURE(callee) do { \ + aot_set_last_error_v("call %s failed", (callee)); \ + } while (0) +#endif + #ifdef __cplusplus } /* end of extern "C" */ #endif diff --git a/core/iwasm/compilation/aot_compiler.c b/core/iwasm/compilation/aot_compiler.c index d4aa718dc..403515995 100644 --- a/core/iwasm/compilation/aot_compiler.c +++ b/core/iwasm/compilation/aot_compiler.c @@ -14,6 +14,18 @@ #include "aot_emit_control.h" #include "aot_emit_function.h" #include "aot_emit_parametric.h" +#include "simd/simd_access_lanes.h" +#include "simd/simd_bitmask_extracts.h" +#include "simd/simd_bit_shifts.h" +#include "simd/simd_bitwise_ops.h" +#include "simd/simd_bool_reductions.h" +#include "simd/simd_comparisons.h" +#include "simd/simd_construct_values.h" +#include "simd/simd_conversions.h" +#include "simd/simd_floating_point.h" +#include "simd/simd_int_arith.h" +#include "simd/simd_load_store.h" +#include "simd/simd_sat_int_arith.h" #include "../aot/aot_runtime.h" #include "../interpreter/wasm_opcode.h" #include @@ -163,6 +175,7 @@ aot_compile_func(AOTCompContext *comp_ctx, uint32 func_index) || value_type == VALUE_TYPE_I64 || value_type == VALUE_TYPE_F32 || value_type == VALUE_TYPE_F64 + || value_type == VALUE_TYPE_V128 || value_type == VALUE_TYPE_VOID) { param_count = 0; param_types = NULL; @@ -280,12 +293,12 @@ aot_compile_func(AOTCompContext *comp_ctx, uint32 func_index) case WASM_OP_DROP: if (!aot_compile_op_drop(comp_ctx, func_ctx, true)) - return false; + return false; break; case WASM_OP_DROP_64: if (!aot_compile_op_drop(comp_ctx, func_ctx, false)) - return false; + return false; break; case WASM_OP_SELECT: @@ -761,22 +774,22 @@ aot_compile_func(AOTCompContext *comp_ctx, uint32 func_index) case WASM_OP_I32_REINTERPRET_F32: if (!aot_compile_op_i32_reinterpret_f32(comp_ctx, func_ctx)) - return false; + return false; break; case WASM_OP_I64_REINTERPRET_F64: if (!aot_compile_op_i64_reinterpret_f64(comp_ctx, func_ctx)) - return false; + return false; break; case WASM_OP_F32_REINTERPRET_I32: if (!aot_compile_op_f32_reinterpret_i32(comp_ctx, func_ctx)) - return false; + return false; break; case WASM_OP_F64_REINTERPRET_I64: if (!aot_compile_op_f64_reinterpret_i64(comp_ctx, func_ctx)) - return false; + return false; break; case WASM_OP_I32_EXTEND8_S: @@ -1019,6 +1032,722 @@ build_atomic_rmw: } #endif /* end of WASM_ENABLE_SHARED_MEMORY */ +#if WASM_ENABLE_SIMD != 0 + case WASM_OP_SIMD_PREFIX: + { + if (!comp_ctx->enable_simd) { + aot_set_last_error( + "current building does not support SIMD instructions"); + return false; + } + + opcode = *frame_ip++; + switch (opcode) { + case SIMD_v128_load: + { + read_leb_uint32(frame_ip, frame_ip_end, align); + read_leb_uint32(frame_ip, frame_ip_end, offset); + if (!aot_compile_simd_v128_load(comp_ctx, func_ctx, align, offset)) + return false; + break; + } + + case SIMD_i16x8_load8x8_s: + case SIMD_i16x8_load8x8_u: + case SIMD_i32x4_load16x4_s: + case SIMD_i32x4_load16x4_u: + case SIMD_i64x2_load32x2_s: + case SIMD_i64x2_load32x2_u: + { + read_leb_uint32(frame_ip, frame_ip_end, align); + read_leb_uint32(frame_ip, frame_ip_end, offset); + if (!aot_compile_simd_load_extend(comp_ctx, func_ctx, + opcode, align, offset)) + return false; + break; + } + + case SIMD_v8x16_load_splat: + case SIMD_v16x8_load_splat: + case SIMD_v32x4_load_splat: + case SIMD_v64x2_load_splat: + { + read_leb_uint32(frame_ip, frame_ip_end, align); + read_leb_uint32(frame_ip, frame_ip_end, offset); + if (!aot_compile_simd_load_splat(comp_ctx, func_ctx, + opcode, align, offset)) + return false; + break; + } + + case SIMD_v128_store: + { + read_leb_uint32(frame_ip, frame_ip_end, align); + read_leb_uint32(frame_ip, frame_ip_end, offset); + if (!aot_compile_simd_v128_store(comp_ctx, func_ctx, align, offset)) + return false; + break; + } + + case SIMD_v128_const: + { + if (!aot_compile_simd_v128_const(comp_ctx, func_ctx, frame_ip)) + return false; + frame_ip += 16; + break; + } + + case SIMD_v8x16_shuffle: + { + if (!aot_compile_simd_shuffle(comp_ctx, func_ctx, frame_ip)) + return false; + frame_ip += 16; + break; + } + + case SIMD_v8x16_swizzle: + { + if (!aot_compile_simd_swizzle(comp_ctx, func_ctx)) + return false; + break; + } + + case SIMD_i8x16_splat: + case SIMD_i16x8_splat: + case SIMD_i32x4_splat: + case SIMD_i64x2_splat: + case SIMD_f32x4_splat: + case SIMD_f64x2_splat: + { + if (!aot_compile_simd_splat(comp_ctx, func_ctx, opcode)) + return false; + break; + } + + case SIMD_i8x16_extract_lane_s: + { + if (!aot_compile_simd_extract_i8x16(comp_ctx, func_ctx, *frame_ip++, + true)) + return false; + break; + } + case SIMD_i8x16_extract_lane_u: + { + if (!aot_compile_simd_extract_i8x16(comp_ctx, func_ctx, *frame_ip++, + false)) + return false; + break; + } + case SIMD_i16x8_extract_lane_s: + { + if (!aot_compile_simd_extract_i16x8(comp_ctx, func_ctx, *frame_ip++, + true)) + return false; + break; + } + case SIMD_i16x8_extract_lane_u: + { + if (!aot_compile_simd_extract_i16x8(comp_ctx, func_ctx, *frame_ip++, + false)) + return false; + break; + } + case SIMD_i32x4_extract_lane: + { + if (!aot_compile_simd_extract_i32x4(comp_ctx, func_ctx, *frame_ip++)) + return false; + break; + } + case SIMD_i64x2_extract_lane: + { + if (!aot_compile_simd_extract_i64x2(comp_ctx, func_ctx, *frame_ip++)) + return false; + break; + } + case SIMD_f32x4_extract_lane: + { + if (!aot_compile_simd_extract_f32x4(comp_ctx, func_ctx, *frame_ip++)) + return false; + break; + } + case SIMD_f64x2_extract_lane: + { + if (!aot_compile_simd_extract_f64x2(comp_ctx, func_ctx, *frame_ip++)) + return false; + break; + } + + case SIMD_i8x16_replace_lane: + { + if (!aot_compile_simd_replace_i8x16(comp_ctx, func_ctx, *frame_ip++)) + return false; + break; + } + case SIMD_i16x8_replace_lane: + { + if (!aot_compile_simd_replace_i16x8(comp_ctx, func_ctx, *frame_ip++)) + return false; + break; + } + case SIMD_i32x4_replace_lane: + { + if (!aot_compile_simd_replace_i32x4(comp_ctx, func_ctx, *frame_ip++)) + return false; + break; + } + case SIMD_i64x2_replace_lane: + { + if (!aot_compile_simd_replace_i64x2(comp_ctx, func_ctx, *frame_ip++)) + return false; + break; + } + case SIMD_f32x4_replace_lane: + { + if (!aot_compile_simd_replace_f32x4(comp_ctx, func_ctx, *frame_ip++)) + return false; + break; + } + case SIMD_f64x2_replace_lane: + { + if (!aot_compile_simd_replace_f64x2(comp_ctx, func_ctx, *frame_ip++)) + return false; + break; + } + + case SIMD_i8x16_eq: + case SIMD_i8x16_ne: + case SIMD_i8x16_lt_s: + case SIMD_i8x16_lt_u: + case SIMD_i8x16_gt_s: + case SIMD_i8x16_gt_u: + case SIMD_i8x16_le_s: + case SIMD_i8x16_le_u: + case SIMD_i8x16_ge_s: + case SIMD_i8x16_ge_u: + { + if (!aot_compile_simd_i8x16_compare(comp_ctx, func_ctx, + INT_EQ + opcode - SIMD_i8x16_eq)) + return false; + break; + } + + case SIMD_i16x8_eq: + case SIMD_i16x8_ne: + case SIMD_i16x8_lt_s: + case SIMD_i16x8_lt_u: + case SIMD_i16x8_gt_s: + case SIMD_i16x8_gt_u: + case SIMD_i16x8_le_s: + case SIMD_i16x8_le_u: + case SIMD_i16x8_ge_s: + case SIMD_i16x8_ge_u: + { + if (!aot_compile_simd_i16x8_compare(comp_ctx, func_ctx, + INT_EQ + opcode - SIMD_i16x8_eq)) + return false; + break; + } + + case SIMD_i32x4_eq: + case SIMD_i32x4_ne: + case SIMD_i32x4_lt_s: + case SIMD_i32x4_lt_u: + case SIMD_i32x4_gt_s: + case SIMD_i32x4_gt_u: + case SIMD_i32x4_le_s: + case SIMD_i32x4_le_u: + case SIMD_i32x4_ge_s: + case SIMD_i32x4_ge_u: + { + if (!aot_compile_simd_i32x4_compare(comp_ctx, func_ctx, + INT_EQ + opcode - SIMD_i32x4_eq)) + return false; + break; + } + + case SIMD_f32x4_eq: + case SIMD_f32x4_ne: + case SIMD_f32x4_lt: + case SIMD_f32x4_gt: + case SIMD_f32x4_le: + case SIMD_f32x4_ge: + { + if (!aot_compile_simd_f32x4_compare(comp_ctx, func_ctx, + FLOAT_EQ + opcode - SIMD_f32x4_eq)) + return false; + break; + } + + case SIMD_f64x2_eq: + case SIMD_f64x2_ne: + case SIMD_f64x2_lt: + case SIMD_f64x2_gt: + case SIMD_f64x2_le: + case SIMD_f64x2_ge: + { + if (!aot_compile_simd_f64x2_compare(comp_ctx, func_ctx, + FLOAT_EQ + opcode - SIMD_f64x2_eq)) + return false; + break; + } + + case SIMD_v128_not: + case SIMD_v128_and: + case SIMD_v128_andnot: + case SIMD_v128_or: + case SIMD_v128_xor: + case SIMD_v128_bitselect: + { + if (!aot_compile_simd_v128_bitwise(comp_ctx, func_ctx, + V128_NOT + opcode - SIMD_v128_not)) + return false; + break; + } + + case SIMD_i8x16_add: + case SIMD_i8x16_sub: + { + V128Arithmetic arith_op = (opcode == SIMD_i8x16_add) + ? V128_ADD : V128_SUB; + if (!aot_compile_simd_i8x16_arith(comp_ctx, func_ctx, arith_op)) + return false; + break; + } + + case SIMD_i16x8_add: + case SIMD_i16x8_sub: + case SIMD_i16x8_mul: + { + V128Arithmetic arith_op = V128_ADD; + if (opcode == SIMD_i16x8_sub) + arith_op = V128_SUB; + else if (opcode == SIMD_i16x8_mul) + arith_op = V128_MUL; + if (!aot_compile_simd_i16x8_arith(comp_ctx, func_ctx, arith_op)) + return false; + break; + } + + case SIMD_i32x4_add: + case SIMD_i32x4_sub: + case SIMD_i32x4_mul: + { + V128Arithmetic arith_op = V128_ADD; + if (opcode == SIMD_i32x4_sub) + arith_op = V128_SUB; + else if (opcode == SIMD_i32x4_mul) + arith_op = V128_MUL; + if (!aot_compile_simd_i32x4_arith(comp_ctx, func_ctx, arith_op)) + return false; + break; + } + + case SIMD_i64x2_add: + case SIMD_i64x2_sub: + case SIMD_i64x2_mul: + { + V128Arithmetic arith_op = V128_ADD; + if (opcode == SIMD_i64x2_sub) + arith_op = V128_SUB; + else if (opcode == SIMD_i64x2_mul) + arith_op = V128_MUL; + if (!aot_compile_simd_i64x2_arith(comp_ctx, func_ctx, arith_op)) + return false; + break; + } + + case SIMD_i8x16_neg: + { + if (!aot_compile_simd_i8x16_neg(comp_ctx, func_ctx)) + return false; + break; + } + case SIMD_i16x8_neg: + { + if (!aot_compile_simd_i16x8_neg(comp_ctx, func_ctx)) + return false; + break; + } + case SIMD_i32x4_neg: + { + if (!aot_compile_simd_i32x4_neg(comp_ctx, func_ctx)) + return false; + break; + } + case SIMD_i64x2_neg: + { + if (!aot_compile_simd_i64x2_neg(comp_ctx, func_ctx)) + return false; + break; + } + + case SIMD_i8x16_add_saturate_s: + case SIMD_i8x16_add_saturate_u: + { + if (!aot_compile_simd_i8x16_saturate(comp_ctx, func_ctx, V128_ADD, + opcode == SIMD_i8x16_add_saturate_s + ? true : false)) + return false; + break; + } + case SIMD_i8x16_sub_saturate_s: + case SIMD_i8x16_sub_saturate_u: + { + if (!aot_compile_simd_i8x16_saturate(comp_ctx, func_ctx, V128_SUB, + opcode == SIMD_i8x16_sub_saturate_s + ? true : false)) + return false; + break; + } + case SIMD_i16x8_add_saturate_s: + case SIMD_i16x8_add_saturate_u: + { + if (!aot_compile_simd_i16x8_saturate(comp_ctx, func_ctx, V128_ADD, + opcode == SIMD_i16x8_add_saturate_s + ? true : false)) + return false; + break; + } + case SIMD_i16x8_sub_saturate_s: + case SIMD_i16x8_sub_saturate_u: + { + if (!aot_compile_simd_i16x8_saturate(comp_ctx, func_ctx, V128_SUB, + opcode == SIMD_i16x8_sub_saturate_s + ? true : false)) + return false; + break; + } + + case SIMD_i8x16_min_s: + case SIMD_i8x16_min_u: + { + if (!aot_compile_simd_i8x16_cmp(comp_ctx, func_ctx, V128_MIN, + opcode == SIMD_i8x16_min_s + ? true : false)) + return false; + break; + } + case SIMD_i8x16_max_s: + case SIMD_i8x16_max_u: + { + if (!aot_compile_simd_i8x16_cmp(comp_ctx, func_ctx, V128_MAX, + opcode == SIMD_i8x16_max_s + ? true : false)) + return false; + break; + } + case SIMD_i16x8_min_s: + case SIMD_i16x8_min_u: + { + if (!aot_compile_simd_i16x8_cmp(comp_ctx, func_ctx, V128_MIN, + opcode == SIMD_i16x8_min_s + ? true : false)) + return false; + break; + } + case SIMD_i16x8_max_s: + case SIMD_i16x8_max_u: + { + if (!aot_compile_simd_i16x8_cmp(comp_ctx, func_ctx, V128_MAX, + opcode == SIMD_i16x8_max_s + ? true : false)) + return false; + break; + } + case SIMD_i32x4_min_s: + case SIMD_i32x4_min_u: + { + if (!aot_compile_simd_i32x4_cmp(comp_ctx, func_ctx, V128_MIN, + opcode == SIMD_i32x4_min_s + ? true : false)) + return false; + break; + } + case SIMD_i32x4_max_s: + case SIMD_i32x4_max_u: + { + if (!aot_compile_simd_i32x4_cmp(comp_ctx, func_ctx, V128_MAX, + opcode == SIMD_i32x4_max_s + ? true : false)) + return false; + break; + } + + case SIMD_i8x16_abs: + { + if (!aot_compile_simd_i8x16_abs(comp_ctx, func_ctx)) + return false; + break; + } + case SIMD_i16x8_abs: + { + if (!aot_compile_simd_i16x8_abs(comp_ctx, func_ctx)) + return false; + break; + } + case SIMD_i32x4_abs: + { + if (!aot_compile_simd_i32x4_abs(comp_ctx, func_ctx)) + return false; + break; + } + + case SIMD_i8x16_avgr_u: + { + if (!aot_compile_simd_i8x16_avgr_u(comp_ctx, func_ctx)) + return false; + break; + } + case SIMD_i16x8_avgr_u: + { + if (!aot_compile_simd_i16x8_avgr_u(comp_ctx, func_ctx)) + return false; + break; + } + + case SIMD_i8x16_any_true: + { + if (!aot_compile_simd_i8x16_any_true(comp_ctx, func_ctx)) + return false; + break; + } + case SIMD_i16x8_any_true: + { + if (!aot_compile_simd_i16x8_any_true(comp_ctx, func_ctx)) + return false; + break; + } + case SIMD_i32x4_any_true: + { + if (!aot_compile_simd_i32x4_any_true(comp_ctx, func_ctx)) + return false; + break; + } + case SIMD_i8x16_all_true: + { + if (!aot_compile_simd_i8x16_all_true(comp_ctx, func_ctx)) + return false; + break; + } + case SIMD_i16x8_all_true: + { + if (!aot_compile_simd_i16x8_all_true(comp_ctx, func_ctx)) + return false; + break; + } + case SIMD_i32x4_all_true: + { + if (!aot_compile_simd_i32x4_all_true(comp_ctx, func_ctx)) + return false; + break; + } + case SIMD_i8x16_bitmask: + { + if (!aot_compile_simd_i8x16_bitmask(comp_ctx, func_ctx)) + return false; + break; + } + case SIMD_i16x8_bitmask: + { + if (!aot_compile_simd_i16x8_bitmask(comp_ctx, func_ctx)) + return false; + break; + } + case SIMD_i32x4_bitmask: + { + if (!aot_compile_simd_i32x4_bitmask(comp_ctx, func_ctx)) + return false; + break; + } + + case SIMD_i8x16_shl: + case SIMD_i8x16_shr_s: + case SIMD_i8x16_shr_u: + { + if (!aot_compile_simd_i8x16_shift(comp_ctx, func_ctx, + INT_SHL + opcode - SIMD_i8x16_shl)) + return false; + break; + } + case SIMD_i16x8_shl: + case SIMD_i16x8_shr_s: + case SIMD_i16x8_shr_u: + { + if (!aot_compile_simd_i16x8_shift(comp_ctx, func_ctx, + INT_SHL + opcode - SIMD_i16x8_shl)) + return false; + break; + } + case SIMD_i32x4_shl: + case SIMD_i32x4_shr_s: + case SIMD_i32x4_shr_u: + { + if (!aot_compile_simd_i32x4_shift(comp_ctx, func_ctx, + INT_SHL + opcode - SIMD_i32x4_shl)) + return false; + break; + } + case SIMD_i64x2_shl: + case SIMD_i64x2_shr_s: + case SIMD_i64x2_shr_u: + { + if (!aot_compile_simd_i64x2_shift(comp_ctx, func_ctx, + INT_SHL + opcode - SIMD_i64x2_shl)) + return false; + break; + } + + case SIMD_i8x16_narrow_i16x8_s: + case SIMD_i8x16_narrow_i16x8_u: + { + bool is_signed = (opcode == SIMD_i8x16_narrow_i16x8_s) + ? true : false; + if (!aot_compile_simd_i8x16_narrow_i16x8(comp_ctx, func_ctx, + is_signed)) + return false; + break; + } + case SIMD_i16x8_narrow_i32x4_s: + case SIMD_i16x8_narrow_i32x4_u: + { + bool is_signed = (opcode == SIMD_i16x8_narrow_i32x4_s) + ? true : false; + if (!aot_compile_simd_i16x8_narrow_i32x4(comp_ctx, func_ctx, + is_signed)) + return false; + break; + } + case SIMD_i16x8_widen_low_i8x16_s: + case SIMD_i16x8_widen_high_i8x16_s: + { + bool is_low = (opcode == SIMD_i16x8_widen_low_i8x16_s) + ? true : false; + if (!aot_compile_simd_i16x8_widen_i8x16(comp_ctx, func_ctx, + is_low, true)) + return false; + break; + } + case SIMD_i16x8_widen_low_i8x16_u: + case SIMD_i16x8_widen_high_i8x16_u: + { + bool is_low = (opcode == SIMD_i16x8_widen_low_i8x16_u) + ? true : false; + if (!aot_compile_simd_i16x8_widen_i8x16(comp_ctx, func_ctx, + is_low, false)) + return false; + break; + } + case SIMD_i32x4_widen_low_i16x8_s: + case SIMD_i32x4_widen_high_i16x8_s: + { + bool is_low = (opcode == SIMD_i32x4_widen_low_i16x8_s) + ? true : false; + if (!aot_compile_simd_i32x4_widen_i16x8(comp_ctx, func_ctx, + is_low, true)) + return false; + break; + } + case SIMD_i32x4_widen_low_i16x8_u: + case SIMD_i32x4_widen_high_i16x8_u: + { + bool is_low = (opcode == SIMD_i32x4_widen_low_i16x8_u) + ? true : false; + if (!aot_compile_simd_i32x4_widen_i16x8(comp_ctx, func_ctx, + is_low, false)) + return false; + break; + } + + case SIMD_i32x4_trunc_sat_f32x4_s: + case SIMD_i32x4_trunc_sat_f32x4_u: + { + bool is_signed = (opcode == SIMD_i32x4_trunc_sat_f32x4_s) + ? true : false; + if (!aot_compile_simd_i32x4_trunc_sat_f32x4(comp_ctx, func_ctx, + is_signed)) + return false; + break; + } + case SIMD_f32x4_convert_i32x4_s: + case SIMD_f32x4_convert_i32x4_u: + { + bool is_signed = (opcode == SIMD_f32x4_convert_i32x4_s) + ? true : false; + if (!aot_compile_simd_f32x4_convert_i32x4(comp_ctx, func_ctx, + is_signed)) + return false; + break; + } + + case SIMD_f32x4_add: + case SIMD_f32x4_sub: + case SIMD_f32x4_mul: + case SIMD_f32x4_div: + case SIMD_f32x4_min: + case SIMD_f32x4_max: + { + if (!aot_compile_simd_f32x4_arith(comp_ctx, func_ctx, + FLOAT_ADD + opcode - SIMD_f32x4_add)) + return false; + break; + } + case SIMD_f64x2_add: + case SIMD_f64x2_sub: + case SIMD_f64x2_mul: + case SIMD_f64x2_div: + case SIMD_f64x2_min: + case SIMD_f64x2_max: + { + if (!aot_compile_simd_f64x2_arith(comp_ctx, func_ctx, + FLOAT_ADD + opcode - SIMD_f64x2_add)) + return false; + break; + } + + case SIMD_f32x4_neg: + { + if (!aot_compile_simd_f32x4_neg(comp_ctx, func_ctx)) + return false; + break; + } + case SIMD_f64x2_neg: + { + if (!aot_compile_simd_f64x2_neg(comp_ctx, func_ctx)) + return false; + break; + } + case SIMD_f32x4_abs: + { + if (!aot_compile_simd_f32x4_abs(comp_ctx, func_ctx)) + return false; + break; + } + case SIMD_f64x2_abs: + { + if (!aot_compile_simd_f64x2_abs(comp_ctx, func_ctx)) + return false; + break; + } + case SIMD_f32x4_sqrt: + { + if (!aot_compile_simd_f32x4_sqrt(comp_ctx, func_ctx)) + return false; + break; + } + case SIMD_f64x2_sqrt: + { + if (!aot_compile_simd_f64x2_sqrt(comp_ctx, func_ctx)) + return false; + break; + } + + default: + break; + } + break; + } +#endif /* end of WASM_ENABLE_SIMD */ + default: aot_set_last_error("unsupported opcode"); break; diff --git a/core/iwasm/compilation/aot_compiler.h b/core/iwasm/compilation/aot_compiler.h index b44ab4ddf..88bccc552 100644 --- a/core/iwasm/compilation/aot_compiler.h +++ b/core/iwasm/compilation/aot_compiler.h @@ -46,12 +46,35 @@ typedef enum IntArithmetic { INT_REM_U } IntArithmetic; +typedef enum V128Arithmetic { + V128_ADD = 0, + V128_ADD_SATURATE_S, + V128_ADD_SATURATE_U, + V128_SUB, + V128_SUB_SATURATE_S, + V128_SUB_SATURATE_U, + V128_MUL, + V128_DIV, + V128_NEG, + V128_MIN, + V128_MAX, +} V128Arithmetic; + typedef enum IntBitwise { INT_AND = 0, INT_OR, INT_XOR, } IntBitwise; +typedef enum V128Bitwise { + V128_NOT, + V128_AND, + V128_ANDNOT, + V128_OR, + V128_XOR, + V128_BITSELECT +} V128Bitwise; + typedef enum IntShift { INT_SHL = 0, INT_SHR_S, @@ -123,6 +146,7 @@ typedef enum FloatArithmetic { #define POP_I64(v) POP(v, VALUE_TYPE_I64) #define POP_F32(v) POP(v, VALUE_TYPE_F32) #define POP_F64(v) POP(v, VALUE_TYPE_F64) +#define POP_V128(v) POP(v, VALUE_TYPE_V128) #define POP_COND(llvm_value) do { \ AOTValue *aot_value; \ @@ -172,6 +196,7 @@ typedef enum FloatArithmetic { #define PUSH_I64(v) PUSH(v, VALUE_TYPE_I64) #define PUSH_F32(v) PUSH(v, VALUE_TYPE_F32) #define PUSH_F64(v) PUSH(v, VALUE_TYPE_F64) +#define PUSH_V128(v) PUSH(v, VALUE_TYPE_V128) #define PUSH_COND(v) PUSH(v, VALUE_TYPE_I1) #define TO_LLVM_TYPE(wasm_type) \ @@ -218,6 +243,36 @@ typedef enum FloatArithmetic { #define I64_63 (comp_ctx->llvm_consts.i64_63) #define I64_64 (comp_ctx->llvm_consts.i64_64) +#define V128_TYPE comp_ctx->basic_types.v128_type +#define V128_PTR_TYPE comp_ctx->basic_types.v128_ptr_type +#define V128_i8x16_TYPE comp_ctx->basic_types.i8x16_vec_type +#define V128_i16x8_TYPE comp_ctx->basic_types.i16x8_vec_type +#define V128_i32x4_TYPE comp_ctx->basic_types.i32x4_vec_type +#define V128_i64x2_TYPE comp_ctx->basic_types.i64x2_vec_type +#define V128_f32x4_TYPE comp_ctx->basic_types.f32x4_vec_type +#define V128_f64x2_TYPE comp_ctx->basic_types.f64x2_vec_type + +#define V128_ZERO (comp_ctx->llvm_consts.v128_zero) +#define V128_i8x16_ZERO (comp_ctx->llvm_consts.i8x16_vec_zero) +#define V128_i16x8_ZERO (comp_ctx->llvm_consts.i16x8_vec_zero) +#define V128_i32x4_ZERO (comp_ctx->llvm_consts.i32x4_vec_zero) +#define V128_i64x2_ZERO (comp_ctx->llvm_consts.i64x2_vec_zero) +#define V128_f32x4_ZERO (comp_ctx->llvm_consts.f32x4_vec_zero) +#define V128_f64x2_ZERO (comp_ctx->llvm_consts.f64x2_vec_zero) + +#define TO_V128_i8x16(v) LLVMBuildBitCast(comp_ctx->builder, v, \ + V128_i8x16_TYPE, "i8x16_val") +#define TO_V128_i16x8(v) LLVMBuildBitCast(comp_ctx->builder, v, \ + V128_i16x8_TYPE, "i16x8_val") +#define TO_V128_i32x4(v) LLVMBuildBitCast(comp_ctx->builder, v, \ + V128_i32x4_TYPE, "i32x4_val") +#define TO_V128_i64x2(v) LLVMBuildBitCast(comp_ctx->builder, v, \ + V128_i64x2_TYPE, "i64x2_val") +#define TO_V128_f32x4(v) LLVMBuildBitCast(comp_ctx->builder, v, \ + V128_f32x4_TYPE, "f32x4_val") +#define TO_V128_f64x2(v) LLVMBuildBitCast(comp_ctx->builder, v, \ + V128_f64x2_TYPE, "f64x2_val") + #define CHECK_LLVM_CONST(v) do { \ if (!v) { \ aot_set_last_error("create llvm const failed."); \ diff --git a/core/iwasm/compilation/aot_emit_aot_file.c b/core/iwasm/compilation/aot_emit_aot_file.c index 98311ec92..3a5641de6 100644 --- a/core/iwasm/compilation/aot_emit_aot_file.c +++ b/core/iwasm/compilation/aot_emit_aot_file.c @@ -299,9 +299,14 @@ get_import_global_info_size(AOTCompData *comp_data) static uint32 get_global_size(AOTGlobal *global) { - /* type (1 byte) + is_mutable (1 byte) - + init expr type (2 byes) + init expr value (8 byes) */ - return sizeof(uint8) * 2 + sizeof(uint16) + sizeof(uint64); + if (global->init_expr.init_expr_type != INIT_EXPR_TYPE_V128_CONST) + /* type (1 byte) + is_mutable (1 byte) + + init expr type (2 byes) + init expr value (8 byes) */ + return sizeof(uint8) * 2 + sizeof(uint16) + sizeof(uint64); + else + /* type (1 byte) + is_mutable (1 byte) + + init expr type (2 byes) + v128 value (16 byes) */ + return sizeof(uint8) * 2 + sizeof(uint16) + sizeof(uint64) * 2; } static uint32 @@ -800,10 +805,28 @@ exchange_uint32(uint8 *p_data) static void exchange_uint64(uint8 *pData) { + uint32 value; + + value = *(uint32 *)pData; + *(uint32 *)pData = *(uint32 *)(pData + 4); + *(uint32 *)(pData + 4) = value; exchange_uint32(pData); exchange_uint32(pData + 4); } +static void +exchange_uint128(uint8 *pData) +{ + /* swap high 64bit and low 64bit */ + uint64 value = *(uint64*)pData; + *(uint64*)pData = *(uint64*)(pData + 8); + *(uint64*)(pData + 8) = value; + /* exchange high 64bit */ + exchange_uint64(pData); + /* exchange low 64bit */ + exchange_uint64(pData + 8); +} + static union { int a; char b; @@ -851,6 +874,17 @@ static union { offset += (uint32)sizeof(uint64); \ } while (0) +#define EMIT_V128(v) do { \ + uint64 *t = (uint64*)v.i64x2; \ + CHECK_BUF(16); \ + if (!is_little_endian()) \ + exchange_uint128((uint8 *)&t); \ + PUT_U64_TO_ADDR(buf + offset, t[0]); \ + offset += (uint32)sizeof(uint64); \ + PUT_U64_TO_ADDR(buf + offset, t[1]); \ + offset += (uint32)sizeof(uint64); \ + } while (0) + #define EMIT_BUF(v, len) do { \ CHECK_BUF(len); \ memcpy(buf + offset, v, len); \ @@ -1093,7 +1127,10 @@ aot_emit_global_info(uint8 *buf, uint8 *buf_end, uint32 *p_offset, EMIT_U8(global->type); EMIT_U8(global->is_mutable); EMIT_U16(global->init_expr.init_expr_type); - EMIT_U64(global->init_expr.u.i64); + if (global->init_expr.init_expr_type != INIT_EXPR_TYPE_V128_CONST) + EMIT_U64(global->init_expr.u.i64); + else + EMIT_V128(global->init_expr.u.v128); } if (offset - *p_offset != get_global_info_size(comp_data)) { diff --git a/core/iwasm/compilation/aot_emit_control.c b/core/iwasm/compilation/aot_emit_control.c index a2799a7e5..783fb527d 100644 --- a/core/iwasm/compilation/aot_emit_control.c +++ b/core/iwasm/compilation/aot_emit_control.c @@ -96,11 +96,17 @@ format_block_name(char *name, uint32 name_size, } \ } while (0) -#define ADD_TO_RESULT_PHIS(block, value, idx) do { \ - LLVMBasicBlockRef block_curr = CURR_BLOCK(); \ - LLVMAddIncoming(block->result_phis[idx], \ - &value, &block_curr, 1); \ - } while (0) +#define ADD_TO_RESULT_PHIS(block, value, idx) do { \ + LLVMBasicBlockRef block_curr = CURR_BLOCK(); \ + LLVMTypeRef phi_ty = LLVMTypeOf(block->result_phis[idx]); \ + LLVMTypeRef value_ty = LLVMTypeOf(value); \ + bh_assert(LLVMGetTypeKind(phi_ty) == LLVMGetTypeKind(value_ty)); \ + bh_assert(LLVMGetTypeContext(phi_ty) \ + == LLVMGetTypeContext(value_ty)); \ + LLVMAddIncoming(block->result_phis[idx], &value, &block_curr, 1); \ + (void)phi_ty; \ + (void)value_ty; \ + } while (0) #define BUILD_ICMP(op, left, right, res, name) do { \ if (!(res = LLVMBuildICmp(comp_ctx->builder, op, \ @@ -686,24 +692,8 @@ check_suspend_flags(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) /* Move builder to terminate block */ SET_BUILDER_POS(terminate_block); - if (aot_func_type->result_count) { - switch (aot_func_type->types[aot_func_type->param_count]) { - case VALUE_TYPE_I32: - LLVMBuildRet(comp_ctx->builder, I32_ZERO); - break; - case VALUE_TYPE_I64: - LLVMBuildRet(comp_ctx->builder, I64_ZERO); - break; - case VALUE_TYPE_F32: - LLVMBuildRet(comp_ctx->builder, F32_ZERO); - break; - case VALUE_TYPE_F64: - LLVMBuildRet(comp_ctx->builder, F64_ZERO); - break; - } - } - else { - LLVMBuildRetVoid(comp_ctx->builder); + if (!aot_build_zero_function_ret(comp_ctx, aot_func_type)) { + goto fail; } /* Move builder to terminate block */ diff --git a/core/iwasm/compilation/aot_emit_exception.c b/core/iwasm/compilation/aot_emit_exception.c index 62911189b..3f5b6548c 100644 --- a/core/iwasm/compilation/aot_emit_exception.c +++ b/core/iwasm/compilation/aot_emit_exception.c @@ -53,10 +53,8 @@ aot_emit_exception(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, func_ctx->got_exception_block); /* Create exection id phi */ - if (!(func_ctx->exception_id_phi = - LLVMBuildPhi(comp_ctx->builder, - comp_ctx->basic_types.int32_type, - "exception_id_phi"))) { + if (!(func_ctx->exception_id_phi = LLVMBuildPhi( + comp_ctx->builder, I32_TYPE, "exception_id_phi"))) { aot_set_last_error("llvm build phi failed."); return false; } @@ -110,24 +108,8 @@ aot_emit_exception(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, /* Create return IR */ AOTFuncType *aot_func_type = func_ctx->aot_func->func_type; - if (aot_func_type->result_count) { - switch (aot_func_type->types[aot_func_type->param_count]) { - case VALUE_TYPE_I32: - LLVMBuildRet(comp_ctx->builder, I32_ZERO); - break; - case VALUE_TYPE_I64: - LLVMBuildRet(comp_ctx->builder, I64_ZERO); - break; - case VALUE_TYPE_F32: - LLVMBuildRet(comp_ctx->builder, F32_ZERO); - break; - case VALUE_TYPE_F64: - LLVMBuildRet(comp_ctx->builder, F64_ZERO); - break; - } - } - else { - LLVMBuildRetVoid(comp_ctx->builder); + if (!aot_build_zero_function_ret(comp_ctx, aot_func_type)) { + return false; } /* Resume the builder position */ diff --git a/core/iwasm/compilation/aot_emit_function.c b/core/iwasm/compilation/aot_emit_function.c index 054275115..dd701d89e 100644 --- a/core/iwasm/compilation/aot_emit_function.c +++ b/core/iwasm/compilation/aot_emit_function.c @@ -25,24 +25,8 @@ create_func_return_block(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) /* Create return IR */ LLVMPositionBuilderAtEnd(comp_ctx->builder, func_ctx->func_return_block); - if (aot_func_type->result_count) { - switch (aot_func_type->types[aot_func_type->param_count]) { - case VALUE_TYPE_I32: - LLVMBuildRet(comp_ctx->builder, I32_ZERO); - break; - case VALUE_TYPE_I64: - LLVMBuildRet(comp_ctx->builder, I64_ZERO); - break; - case VALUE_TYPE_F32: - LLVMBuildRet(comp_ctx->builder, F32_ZERO); - break; - case VALUE_TYPE_F64: - LLVMBuildRet(comp_ctx->builder, F64_ZERO); - break; - } - } - else { - LLVMBuildRetVoid(comp_ctx->builder); + if (!aot_build_zero_function_ret(comp_ctx, aot_func_type)) { + return false; } } diff --git a/core/iwasm/compilation/aot_emit_memory.c b/core/iwasm/compilation/aot_emit_memory.c index 68ed1ef7e..f717b02c6 100644 --- a/core/iwasm/compilation/aot_emit_memory.c +++ b/core/iwasm/compilation/aot_emit_memory.c @@ -53,6 +53,9 @@ get_memory_check_bound(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, case 8: mem_check_bound = func_ctx->mem_info[0].mem_bound_check_8bytes; break; + case 16: + mem_check_bound = func_ctx->mem_info[0].mem_bound_check_16bytes; + break; default: bh_assert(0); return NULL; @@ -73,9 +76,9 @@ get_memory_check_bound(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, static LLVMValueRef get_memory_size(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx); -static LLVMValueRef -check_memory_overflow(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, - uint32 offset, uint32 bytes) +LLVMValueRef +aot_check_memory_overflow(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, + uint32 offset, uint32 bytes) { LLVMValueRef offset_const = I32_CONST(offset); LLVMValueRef addr, maddr, offset1, cmp1, cmp2, cmp; @@ -348,7 +351,7 @@ aot_compile_op_i32_load(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, { LLVMValueRef maddr, value = NULL; - if (!(maddr = check_memory_overflow(comp_ctx, func_ctx, offset, bytes))) + if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes))) return false; switch (bytes) { @@ -400,7 +403,7 @@ aot_compile_op_i64_load(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, { LLVMValueRef maddr, value = NULL; - if (!(maddr = check_memory_overflow(comp_ctx, func_ctx, offset, bytes))) + if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes))) return false; switch (bytes) { @@ -454,7 +457,7 @@ aot_compile_op_f32_load(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, { LLVMValueRef maddr, value; - if (!(maddr = check_memory_overflow(comp_ctx, func_ctx, offset, 4))) + if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, 4))) return false; BUILD_PTR_CAST(F32_PTR_TYPE); @@ -471,7 +474,7 @@ aot_compile_op_f64_load(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, { LLVMValueRef maddr, value; - if (!(maddr = check_memory_overflow(comp_ctx, func_ctx, offset, 8))) + if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, 8))) return false; BUILD_PTR_CAST(F64_PTR_TYPE); @@ -490,7 +493,7 @@ aot_compile_op_i32_store(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, POP_I32(value); - if (!(maddr = check_memory_overflow(comp_ctx, func_ctx, offset, bytes))) + if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes))) return false; switch (bytes) { @@ -529,7 +532,7 @@ aot_compile_op_i64_store(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, POP_I64(value); - if (!(maddr = check_memory_overflow(comp_ctx, func_ctx, offset, bytes))) + if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes))) return false; switch (bytes) { @@ -572,7 +575,7 @@ aot_compile_op_f32_store(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, POP_F32(value); - if (!(maddr = check_memory_overflow(comp_ctx, func_ctx, offset, 4))) + if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, 4))) return false; BUILD_PTR_CAST(F32_PTR_TYPE); @@ -590,7 +593,7 @@ aot_compile_op_f64_store(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, POP_F64(value); - if (!(maddr = check_memory_overflow(comp_ctx, func_ctx, offset, 8))) + if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, 8))) return false; BUILD_PTR_CAST(F64_PTR_TYPE); @@ -877,24 +880,8 @@ aot_compile_op_memory_init(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, /* If memory.init failed, return this function so the runtime can catch the exception */ LLVMPositionBuilderAtEnd(comp_ctx->builder, mem_init_fail); - if (aot_func_type->result_count) { - switch (aot_func_type->types[aot_func_type->param_count]) { - case VALUE_TYPE_I32: - LLVMBuildRet(comp_ctx->builder, I32_ZERO); - break; - case VALUE_TYPE_I64: - LLVMBuildRet(comp_ctx->builder, I64_ZERO); - break; - case VALUE_TYPE_F32: - LLVMBuildRet(comp_ctx->builder, F32_ZERO); - break; - case VALUE_TYPE_F64: - LLVMBuildRet(comp_ctx->builder, F64_ZERO); - break; - } - } - else { - LLVMBuildRetVoid(comp_ctx->builder); + if (!aot_build_zero_function_ret(comp_ctx, aot_func_type)) { + goto fail; } LLVMPositionBuilderAtEnd(comp_ctx->builder, init_success); @@ -1002,7 +989,7 @@ aot_compile_op_atomic_rmw(AOTCompContext *comp_ctx, else POP_I64(value); - if (!(maddr = check_memory_overflow(comp_ctx, func_ctx, offset, bytes))) + if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes))) return false; if (!check_memory_alignment(comp_ctx, func_ctx, maddr, align)) @@ -1076,7 +1063,7 @@ aot_compile_op_atomic_cmpxchg(AOTCompContext *comp_ctx, POP_I64(expect); } - if (!(maddr = check_memory_overflow(comp_ctx, func_ctx, offset, bytes))) + if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes))) return false; if (!check_memory_alignment(comp_ctx, func_ctx, maddr, align)) @@ -1175,7 +1162,7 @@ aot_compile_op_atomic_wait(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, CHECK_LLVM_CONST(is_wait64); - if (!(maddr = check_memory_overflow(comp_ctx, func_ctx, offset, bytes))) + if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes))) return false; if (!check_memory_alignment(comp_ctx, func_ctx, maddr, align)) @@ -1219,24 +1206,8 @@ aot_compile_op_atomic_wait(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, /* If atomic wait failed, return this function so the runtime can catch the exception */ LLVMPositionBuilderAtEnd(comp_ctx->builder, wait_fail); - if (aot_func_type->result_count) { - switch (aot_func_type->types[aot_func_type->param_count]) { - case VALUE_TYPE_I32: - LLVMBuildRet(comp_ctx->builder, I32_ZERO); - break; - case VALUE_TYPE_I64: - LLVMBuildRet(comp_ctx->builder, I64_ZERO); - break; - case VALUE_TYPE_F32: - LLVMBuildRet(comp_ctx->builder, F32_ZERO); - break; - case VALUE_TYPE_F64: - LLVMBuildRet(comp_ctx->builder, F64_ZERO); - break; - } - } - else { - LLVMBuildRetVoid(comp_ctx->builder); + if (!aot_build_zero_function_ret(comp_ctx, aot_func_type)) { + goto fail; } LLVMPositionBuilderAtEnd(comp_ctx->builder, wait_success); @@ -1259,7 +1230,7 @@ aot_compiler_op_atomic_notify(AOTCompContext *comp_ctx, POP_I32(count); - if (!(maddr = check_memory_overflow(comp_ctx, func_ctx, offset, bytes))) + if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes))) return false; if (!check_memory_alignment(comp_ctx, func_ctx, maddr, align)) diff --git a/core/iwasm/compilation/aot_emit_memory.h b/core/iwasm/compilation/aot_emit_memory.h index 82465ae37..f505a23ea 100644 --- a/core/iwasm/compilation/aot_emit_memory.h +++ b/core/iwasm/compilation/aot_emit_memory.h @@ -49,6 +49,10 @@ bool aot_compile_op_f64_store(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, uint32 align, uint32 offset); +LLVMValueRef +aot_check_memory_overflow(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, + uint32 offset, uint32 bytes); + bool aot_compile_op_memory_size(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx); diff --git a/core/iwasm/compilation/aot_emit_numberic.c b/core/iwasm/compilation/aot_emit_numberic.c index d686dead6..f54140da8 100644 --- a/core/iwasm/compilation/aot_emit_numberic.c +++ b/core/iwasm/compilation/aot_emit_numberic.c @@ -135,110 +135,6 @@ } while (0) -static LLVMValueRef -__call_llvm_intrinsic(AOTCompContext *comp_ctx, - const char *name, - LLVMTypeRef ret_type, - LLVMTypeRef *param_types, - int param_count, - LLVMValueRef *param_values) -{ - LLVMValueRef func, ret; - LLVMTypeRef func_type; - - /* Declare llvm intrinsic function if necessary */ - if (!(func = LLVMGetNamedFunction(comp_ctx->module, name))) { - if (!(func_type = - LLVMFunctionType(ret_type, param_types, (uint32)param_count, false))) { - aot_set_last_error("create LLVM function type failed."); - return NULL; - } - - if (!(func = LLVMAddFunction(comp_ctx->module, name, func_type))) { - aot_set_last_error("add LLVM function failed."); - return NULL; - } - } - - /* Call the LLVM intrinsic function */ - if (!(ret = LLVMBuildCall(comp_ctx->builder, func, param_values, - (uint32)param_count, "call"))) { - aot_set_last_error("llvm build call failed."); - return NULL; - } - - return ret; -} - -static LLVMValueRef -call_llvm_intrinsic(AOTCompContext *comp_ctx, - const char *name, - LLVMTypeRef ret_type, - LLVMTypeRef *param_types, - int param_count, - ...) -{ - LLVMValueRef *param_values, ret; - va_list argptr; - uint64 total_size; - int i = 0; - - /* Create param values */ - total_size = sizeof(LLVMValueRef) * (uint64)param_count; - if (total_size >= UINT32_MAX - || !(param_values = wasm_runtime_malloc((uint32)total_size))) { - aot_set_last_error("allocate memory for param values failed."); - return false; - } - - /* Load each param value */ - va_start(argptr, param_count); - while (i < param_count) - param_values[i++] = va_arg(argptr, LLVMValueRef); - va_end(argptr); - - ret = __call_llvm_intrinsic(comp_ctx, name, ret_type, - param_types, param_count, - param_values); - - wasm_runtime_free(param_values); - - return ret; -} - -static LLVMValueRef -call_llvm_intrinsic_v(AOTCompContext *comp_ctx, - const char *name, - LLVMTypeRef ret_type, - LLVMTypeRef *param_types, - int param_count, - va_list param_value_list) -{ - LLVMValueRef *param_values, ret; - uint64 total_size; - int i = 0; - - /* Create param values */ - total_size = sizeof(LLVMValueRef) * (uint64)param_count; - if (total_size >= UINT32_MAX - || !(param_values = wasm_runtime_malloc((uint32)total_size))) { - aot_set_last_error("allocate memory for param values failed."); - return false; - } - - /* Load each param value */ - while (i < param_count) - param_values[i++] = va_arg(param_value_list, LLVMValueRef); - - ret = __call_llvm_intrinsic(comp_ctx, name, ret_type, - param_types, param_count, - param_values); - - wasm_runtime_free(param_values); - - return ret; -} - /* Call llvm constrained floating-point intrinsic */ static LLVMValueRef call_llvm_float_experimental_constrained_intrinsic(AOTCompContext *comp_ctx, @@ -255,12 +151,8 @@ call_llvm_float_experimental_constrained_intrinsic(AOTCompContext *comp_ctx, va_start(param_value_list, intrinsic); - ret = call_llvm_intrinsic_v(comp_ctx, - intrinsic, - ret_type, - param_types, - 4, - param_value_list); + ret = aot_call_llvm_intrinsic_v(comp_ctx, intrinsic, ret_type, param_types, + 4, param_value_list); va_end(param_value_list); @@ -283,12 +175,8 @@ call_llvm_libm_experimental_constrained_intrinsic(AOTCompContext *comp_ctx, va_start(param_value_list, intrinsic); - ret = call_llvm_intrinsic_v(comp_ctx, - intrinsic, - ret_type, - param_types, - 3, - param_value_list); + ret = aot_call_llvm_intrinsic_v(comp_ctx, intrinsic, ret_type, param_types, + 3, param_value_list); va_end(param_value_list); @@ -342,13 +230,8 @@ compile_op_float_min_max(AOTCompContext *comp_ctx, return NULL; } - if (!(cmp = call_llvm_intrinsic(comp_ctx, - intrinsic, - ret_type, - param_types, - 2, - left, - right))) + if (!(cmp = aot_call_llvm_intrinsic(comp_ctx, intrinsic, ret_type, + param_types, 2, left, right))) return NULL; if (!(cmp = LLVMBuildSelect(comp_ctx->builder, @@ -406,21 +289,21 @@ aot_compile_int_bit_count(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, /* Call the LLVM intrinsic function */ if (type < POP_CNT32) - DEF_INT_UNARY_OP(call_llvm_intrinsic(comp_ctx, - bit_cnt_llvm_intrinsic[type], - ret_type, - param_types, - 2, - operand, - zero_undef), + DEF_INT_UNARY_OP(aot_call_llvm_intrinsic(comp_ctx, + bit_cnt_llvm_intrinsic[type], + ret_type, + param_types, + 2, + operand, + zero_undef), NULL); else - DEF_INT_UNARY_OP(call_llvm_intrinsic(comp_ctx, - bit_cnt_llvm_intrinsic[type], - ret_type, - param_types, - 1, - operand), + DEF_INT_UNARY_OP(aot_call_llvm_intrinsic(comp_ctx, + bit_cnt_llvm_intrinsic[type], + ret_type, + param_types, + 1, + operand), NULL); return true; @@ -1032,12 +915,8 @@ call_llvm_float_math_intrinsic(AOTCompContext *comp_ctx, va_start(param_value_list, intrinsic); - ret = call_llvm_intrinsic_v(comp_ctx, - intrinsic, - ret_type, - ¶m_type, - 1, - param_value_list); + ret = aot_call_llvm_intrinsic_v(comp_ctx, intrinsic, ret_type, ¶m_type, + 1, param_value_list); va_end(param_value_list); @@ -1133,14 +1012,14 @@ compile_float_copysign(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, param_types[0] = param_types[1] = ret_type = is_f32 ? F32_TYPE : F64_TYPE; - DEF_FP_BINARY_OP(call_llvm_intrinsic(comp_ctx, - is_f32 ? "llvm.copysign.f32" : - "llvm.copysign.f64", - ret_type, - param_types, - 2, - left, - right), + DEF_FP_BINARY_OP(aot_call_llvm_intrinsic(comp_ctx, + is_f32 ? "llvm.copysign.f32" : + "llvm.copysign.f64", + ret_type, + param_types, + 2, + left, + right), NULL); return true; diff --git a/core/iwasm/compilation/aot_emit_parametric.c b/core/iwasm/compilation/aot_emit_parametric.c index dc81ad53e..498e9e8a6 100644 --- a/core/iwasm/compilation/aot_emit_parametric.c +++ b/core/iwasm/compilation/aot_emit_parametric.c @@ -46,7 +46,8 @@ pop_value_from_wasm_stack(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, wasm_runtime_free(aot_value); if ((is_32 - && (type != VALUE_TYPE_I32 && type != VALUE_TYPE_F32)) + && (type != VALUE_TYPE_I32 && type != VALUE_TYPE_F32 + && type != VALUE_TYPE_V128)) || (!is_32 && (type != VALUE_TYPE_I64 && type != VALUE_TYPE_F64))) { aot_set_last_error("invalid WASM stack data type."); diff --git a/core/iwasm/compilation/aot_emit_variable.c b/core/iwasm/compilation/aot_emit_variable.c index f9e18a5fd..78d39f7d3 100644 --- a/core/iwasm/compilation/aot_emit_variable.c +++ b/core/iwasm/compilation/aot_emit_variable.c @@ -116,7 +116,7 @@ compile_global(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, + sizeof(AOTMemoryInstance) * comp_ctx->comp_data->memory_count; uint32 global_offset; uint8 global_type; - LLVMValueRef offset, global_ptr, global; + LLVMValueRef offset, global_ptr, global, res; LLVMTypeRef ptr_type = NULL; bh_assert(global_idx < import_global_count + comp_data->global_count); @@ -153,6 +153,9 @@ compile_global(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, case VALUE_TYPE_F64: ptr_type = comp_ctx->basic_types.float64_ptr_type; break; + case VALUE_TYPE_V128: + ptr_type = comp_ctx->basic_types.v128_ptr_type; + break; default: bh_assert(0); break; @@ -170,14 +173,19 @@ compile_global(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, aot_set_last_error("llvm build load failed."); return false; } + /* All globals' data is 4-byte aligned */ + LLVMSetAlignment(global, 4); PUSH(global, global_type); } else { POP(global, global_type); - if (!LLVMBuildStore(comp_ctx->builder, global, global_ptr)) { + if (!(res = LLVMBuildStore(comp_ctx->builder, + global, global_ptr))) { aot_set_last_error("llvm build store failed."); return false; } + /* All globals' data is 4-byte aligned */ + LLVMSetAlignment(res, 4); } return true; diff --git a/core/iwasm/compilation/aot_llvm.c b/core/iwasm/compilation/aot_llvm.c index e81e4602c..371f69cdb 100644 --- a/core/iwasm/compilation/aot_llvm.c +++ b/core/iwasm/compilation/aot_llvm.c @@ -21,6 +21,10 @@ wasm_type_to_llvm_type(AOTLLVMTypes *llvm_types, uint8 wasm_type) return llvm_types->float32_type; case VALUE_TYPE_F64: return llvm_types->float64_type; +#if WASM_ENABLE_SIMD != 0 + case VALUE_TYPE_V128: + return llvm_types->i64x2_vec_type; +#endif case VALUE_TYPE_VOID: return llvm_types->void_type; } @@ -444,6 +448,31 @@ create_memory_info(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, } } + offset = I32_CONST(offsetof(AOTMemoryInstance, mem_bound_check_16bytes) + - offsetof(AOTMemoryInstance, memory_data.ptr)); + if (!(func_ctx->mem_info[0].mem_bound_check_16bytes = + LLVMBuildInBoundsGEP(comp_ctx->builder, mem_info_base, + &offset, 1, "bound_check_16bytes_offset"))) { + aot_set_last_error("llvm build in bounds gep failed"); + return false; + } + if (!(func_ctx->mem_info[0].mem_bound_check_16bytes = + LLVMBuildBitCast(comp_ctx->builder, + func_ctx->mem_info[0].mem_bound_check_16bytes, + bound_check_type, "bound_check_16bytes_ptr"))) { + aot_set_last_error("llvm build bit cast failed"); + return false; + } + if (mem_space_unchanged) { + if (!(func_ctx->mem_info[0].mem_bound_check_16bytes = + LLVMBuildLoad(comp_ctx->builder, + func_ctx->mem_info[0].mem_bound_check_16bytes, + "bound_check_16bytes"))) { + aot_set_last_error("llvm build load failed"); + return false; + } + } + return true; } @@ -676,6 +705,11 @@ aot_create_func_context(AOTCompData *comp_data, AOTCompContext *comp_ctx, case VALUE_TYPE_F64: local_value = F64_ZERO; break; +#if WASM_ENABLE_SIMD != 0 + case VALUE_TYPE_V128: + local_value = V128_ZERO; + break; +#endif default: bh_assert(0); break; @@ -814,23 +848,55 @@ aot_set_llvm_basic_types(AOTLLVMTypes *basic_types, LLVMContextRef context) basic_types->float32_ptr_type = LLVMPointerType(basic_types->float32_type, 0); basic_types->float64_ptr_type = LLVMPointerType(basic_types->float64_type, 0); + basic_types->i8x16_vec_type = LLVMVectorType(basic_types->int8_type, 16); + basic_types->i16x8_vec_type = LLVMVectorType(basic_types->int16_type, 8); + basic_types->i32x4_vec_type = LLVMVectorType(basic_types->int32_type, 4); + basic_types->i64x2_vec_type = LLVMVectorType(basic_types->int64_type, 2); + basic_types->f32x4_vec_type = LLVMVectorType(basic_types->float32_type, 4); + basic_types->f64x2_vec_type = LLVMVectorType(basic_types->float64_type, 2); + + basic_types->v128_type = basic_types->i64x2_vec_type; + basic_types->v128_ptr_type = LLVMPointerType(basic_types->v128_type, 0); + return (basic_types->int8_ptr_type && basic_types->int16_ptr_type && basic_types->int32_ptr_type && basic_types->int64_ptr_type && basic_types->float32_ptr_type && basic_types->float64_ptr_type + && basic_types->i8x16_vec_type + && basic_types->i16x8_vec_type + && basic_types->i32x4_vec_type + && basic_types->i64x2_vec_type + && basic_types->f32x4_vec_type + && basic_types->f64x2_vec_type && basic_types->meta_data_type) ? true : false; } static bool aot_create_llvm_consts(AOTLLVMConsts *consts, AOTCompContext *comp_ctx) { + LLVMValueRef i64_consts[2]; + consts->i8_zero = I8_CONST(0); consts->i32_zero = I32_CONST(0); consts->i64_zero = I64_CONST(0); consts->f32_zero = F32_CONST(0); consts->f64_zero = F64_CONST(0); + + if (consts->i64_zero) { + i64_consts[0] = i64_consts[1] = consts->i64_zero; + consts->v128_zero = consts->i64x2_vec_zero = + LLVMConstVector(i64_consts, 2); + if (consts->i64x2_vec_zero) { + consts->i8x16_vec_zero = TO_V128_i8x16(consts->i64x2_vec_zero); + consts->i16x8_vec_zero = TO_V128_i16x8(consts->i64x2_vec_zero); + consts->i32x4_vec_zero = TO_V128_i32x4(consts->i64x2_vec_zero); + consts->f32x4_vec_zero = TO_V128_f32x4(consts->i64x2_vec_zero); + consts->f64x2_vec_zero = TO_V128_f64x2(consts->i64x2_vec_zero); + } + } + consts->i32_one = I32_CONST(1); consts->i32_two = I32_CONST(2); consts->i32_three = I32_CONST(3); @@ -850,6 +916,12 @@ aot_create_llvm_consts(AOTLLVMConsts *consts, AOTCompContext *comp_ctx) && consts->i64_zero && consts->f32_zero && consts->f64_zero + && consts->i8x16_vec_zero + && consts->i16x8_vec_zero + && consts->i32x4_vec_zero + && consts->i64x2_vec_zero + && consts->f32x4_vec_zero + && consts->f64x2_vec_zero && consts->i32_one && consts->i32_two && consts->i32_three @@ -1014,7 +1086,7 @@ aot_create_comp_context(AOTCompData *comp_data, /*LLVMTypeRef elem_types[8];*/ struct LLVMMCJITCompilerOptions jit_options; LLVMTargetRef target; - char *triple = NULL, *triple_jit = NULL, *triple_norm, *arch, *abi; + char *triple = NULL, *triple_norm, *arch, *abi; char *cpu = NULL, *features, buf[128]; char *triple_norm_new = NULL, *cpu_new = NULL; char *err = NULL, *fp_round= "round.tonearest", *fp_exce = "fpexcept.strict"; @@ -1065,7 +1137,12 @@ aot_create_comp_context(AOTCompData *comp_data, if (option->enable_tail_call) comp_ctx->enable_tail_call = true; + if (option->enable_simd) + comp_ctx->enable_simd = true; + if (option->is_jit_mode) { + char *triple_jit = NULL; + /* Create LLVM execution engine */ LLVMInitializeMCJITCompilerOptions(&jit_options, sizeof(jit_options)); jit_options.OptLevel = LLVMCodeGenLevelAggressive; @@ -1186,7 +1263,8 @@ aot_create_comp_context(AOTCompData *comp_data, if (!cpu) cpu = ""; } - else { /* triple is NULL, cpu isn't NULL */ + else { + /* triple is NULL, cpu isn't NULL */ snprintf(buf, sizeof(buf), "target isn't specified for cpu %s.", cpu); aot_set_last_error(buf); @@ -1283,6 +1361,23 @@ aot_create_comp_context(AOTCompData *comp_data, } } + if (option->enable_simd) { + char *tmp; + bool ret; + + if (!(tmp = LLVMGetTargetMachineCPU(comp_ctx->target_machine))) { + aot_set_last_error("get CPU from Target Machine fail"); + goto fail; + } + + ret = aot_check_simd_compatibility(comp_ctx->target_arch, tmp); + LLVMDisposeMessage(tmp); + if (!ret) { + aot_set_last_error("SIMD compatibility check failed"); + goto fail; + } + } + if (!(target_data_ref = LLVMCreateTargetDataLayout(comp_ctx->target_machine))) { aot_set_last_error("create LLVM target data layout failed."); @@ -1349,11 +1444,13 @@ aot_create_comp_context(AOTCompData *comp_data, fail: if (triple_norm_new) LLVMDisposeMessage(triple_norm_new); + if (cpu_new) LLVMDisposeMessage(cpu_new); if (!ret) aot_destroy_comp_context(comp_ctx); + return ret; } @@ -1567,3 +1664,144 @@ aot_checked_addr_list_destroy(AOTFuncContext *func_ctx) func_ctx->checked_addr_list = NULL; } +bool +aot_build_zero_function_ret(AOTCompContext *comp_ctx, + AOTFuncType *func_type) +{ + LLVMValueRef ret = NULL; + + if (func_type->result_count) { + switch (func_type->types[func_type->param_count]) { + case VALUE_TYPE_I32: + ret = LLVMBuildRet(comp_ctx->builder, I32_ZERO); + break; + case VALUE_TYPE_I64: + ret = LLVMBuildRet(comp_ctx->builder, I64_ZERO); + break; + case VALUE_TYPE_F32: + ret = LLVMBuildRet(comp_ctx->builder, F32_ZERO); + break; + case VALUE_TYPE_F64: + ret = LLVMBuildRet(comp_ctx->builder, F64_ZERO); + break; +#if WASM_ENABLE_SIMD != 0 + case VALUE_TYPE_V128: + ret = LLVMBuildRet(comp_ctx->builder, V128_ZERO); + break; +#endif + default: + bh_assert(0); + } + } + else { + ret = LLVMBuildRetVoid(comp_ctx->builder); + } + + if (!ret) { + aot_set_last_error("llvm build ret failed."); + return false; + } + return true; +} + +static LLVMValueRef +__call_llvm_intrinsic(const AOTCompContext *comp_ctx, + const char *name, + LLVMTypeRef ret_type, + LLVMTypeRef *param_types, + int param_count, + LLVMValueRef *param_values) +{ + LLVMValueRef func, ret; + LLVMTypeRef func_type; + + /* Declare llvm intrinsic function if necessary */ + if (!(func = LLVMGetNamedFunction(comp_ctx->module, name))) { + if (!(func_type = LLVMFunctionType(ret_type, param_types, + (uint32)param_count, false))) { + aot_set_last_error("create LLVM function type failed."); + return NULL; + } + + if (!(func = LLVMAddFunction(comp_ctx->module, name, func_type))) { + aot_set_last_error("add LLVM function failed."); + return NULL; + } + } + + /* Call the LLVM intrinsic function */ + if (!(ret = LLVMBuildCall(comp_ctx->builder, func, param_values, + (uint32)param_count, "call"))) { + aot_set_last_error("llvm build call failed."); + return NULL; + } + + return ret; +} + +LLVMValueRef +aot_call_llvm_intrinsic(const AOTCompContext *comp_ctx, + const char *name, + LLVMTypeRef ret_type, + LLVMTypeRef *param_types, + int param_count, + ...) +{ + LLVMValueRef *param_values, ret; + va_list argptr; + uint64 total_size; + int i = 0; + + /* Create param values */ + total_size = sizeof(LLVMValueRef) * (uint64)param_count; + if (total_size >= UINT32_MAX + || !(param_values = wasm_runtime_malloc((uint32)total_size))) { + aot_set_last_error("allocate memory for param values failed."); + return false; + } + + /* Load each param value */ + va_start(argptr, param_count); + while (i < param_count) + param_values[i++] = va_arg(argptr, LLVMValueRef); + va_end(argptr); + + ret = __call_llvm_intrinsic(comp_ctx, name, ret_type, param_types, + param_count, param_values); + + wasm_runtime_free(param_values); + + return ret; +} + +LLVMValueRef +aot_call_llvm_intrinsic_v(const AOTCompContext *comp_ctx, + const char *name, + LLVMTypeRef ret_type, + LLVMTypeRef *param_types, + int param_count, + va_list param_value_list) +{ + LLVMValueRef *param_values, ret; + uint64 total_size; + int i = 0; + + /* Create param values */ + total_size = sizeof(LLVMValueRef) * (uint64)param_count; + if (total_size >= UINT32_MAX + || !(param_values = wasm_runtime_malloc((uint32)total_size))) { + aot_set_last_error("allocate memory for param values failed."); + return false; + } + + /* Load each param value */ + while (i < param_count) + param_values[i++] = va_arg(param_value_list, LLVMValueRef); + + ret = __call_llvm_intrinsic(comp_ctx, name, ret_type, param_types, + param_count, param_values); + + wasm_runtime_free(param_values); + + return ret; +} diff --git a/core/iwasm/compilation/aot_llvm.h b/core/iwasm/compilation/aot_llvm.h index c15db867c..3da5ea98e 100644 --- a/core/iwasm/compilation/aot_llvm.h +++ b/core/iwasm/compilation/aot_llvm.h @@ -106,6 +106,7 @@ typedef struct AOTMemInfo { LLVMValueRef mem_bound_check_2bytes; LLVMValueRef mem_bound_check_4bytes; LLVMValueRef mem_bound_check_8bytes; + LLVMValueRef mem_bound_check_16bytes; } AOTMemInfo; typedef struct AOTFuncContext { @@ -152,6 +153,15 @@ typedef struct AOTLLVMTypes { LLVMTypeRef float32_ptr_type; LLVMTypeRef float64_ptr_type; + LLVMTypeRef v128_type; + LLVMTypeRef v128_ptr_type; + LLVMTypeRef i8x16_vec_type; + LLVMTypeRef i16x8_vec_type; + LLVMTypeRef i32x4_vec_type; + LLVMTypeRef i64x2_vec_type; + LLVMTypeRef f32x4_vec_type; + LLVMTypeRef f64x2_vec_type; + LLVMTypeRef meta_data_type; } AOTLLVMTypes; @@ -161,6 +171,13 @@ typedef struct AOTLLVMConsts { LLVMValueRef i64_zero; LLVMValueRef f32_zero; LLVMValueRef f64_zero; + LLVMValueRef v128_zero; + LLVMValueRef i8x16_vec_zero; + LLVMValueRef i16x8_vec_zero; + LLVMValueRef i32x4_vec_zero; + LLVMValueRef i64x2_vec_zero; + LLVMValueRef f32x4_vec_zero; + LLVMValueRef f64x2_vec_zero; LLVMValueRef i32_one; LLVMValueRef i32_two; LLVMValueRef i32_three; @@ -201,6 +218,9 @@ typedef struct AOTCompContext { /* Bounday Check */ bool enable_bound_check; + /* 128-bit SIMD */ + bool enable_simd; + /* Thread Manager */ bool enable_thread_mgr; @@ -248,6 +268,7 @@ typedef struct AOTCompOption{ bool enable_bulk_memory; bool enable_thread_mgr; bool enable_tail_call; + bool enable_simd; bool is_sgx_platform; uint32 opt_level; uint32 size_level; @@ -309,6 +330,29 @@ aot_checked_addr_list_find(AOTFuncContext *func_ctx, void aot_checked_addr_list_destroy(AOTFuncContext *func_ctx); +bool +aot_build_zero_function_ret(AOTCompContext *comp_ctx, + AOTFuncType *func_type); + +LLVMValueRef +aot_call_llvm_intrinsic(const AOTCompContext *comp_ctx, + const char *name, + LLVMTypeRef ret_type, + LLVMTypeRef *param_types, + int param_count, + ...); + +LLVMValueRef +aot_call_llvm_intrinsic_v(const AOTCompContext *comp_ctx, + const char *name, + LLVMTypeRef ret_type, + LLVMTypeRef *param_types, + int param_count, + va_list param_value_list); + +bool +aot_check_simd_compatibility(const char *arch_c_str, const char *cpu_c_str); + #ifdef __cplusplus } /* end of extern "C" */ #endif diff --git a/core/iwasm/compilation/simd/simd_access_lanes.c b/core/iwasm/compilation/simd/simd_access_lanes.c new file mode 100644 index 000000000..4778d6368 --- /dev/null +++ b/core/iwasm/compilation/simd/simd_access_lanes.c @@ -0,0 +1,381 @@ +/* + * Copyright (C) 2019 Intel Corporation. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + */ + +#include "simd_access_lanes.h" +#include "simd_common.h" +#include "../aot_emit_exception.h" +#include "../../aot/aot_runtime.h" + +static LLVMValueRef +build_intx16_vector(const AOTCompContext *comp_ctx, + const LLVMTypeRef element_type, + const int *element_value) +{ + LLVMValueRef vector, elements[16]; + unsigned i; + + for (i = 0; i < 16; i++) { + if (!(elements[i] = + LLVMConstInt(element_type, element_value[i], true))) { + HANDLE_FAILURE("LLVMConstInst"); + goto fail; + } + } + + if (!(vector = LLVMConstVector(elements, 16))) { + HANDLE_FAILURE("LLVMConstVector"); + goto fail; + } + + return vector; +fail: + return NULL; +} + +bool +aot_compile_simd_shuffle(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + const uint8 *frame_ip) +{ + LLVMValueRef vec1, vec2, mask, result; + uint8 imm[16] = { 0 }; + int values[16]; + unsigned i; + + wasm_runtime_read_v128(frame_ip, (uint64 *)imm, (uint64 *)(imm + 8)); + for (i = 0; i < 16; i++) { + values[i] = imm[i]; + } + + if (!(vec2 = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, V128_i8x16_TYPE, + "vec2"))) { + goto fail; + } + + if (!(vec1 = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, V128_i8x16_TYPE, + "vec1"))) { + goto fail; + } + + /* build a vector <16 x i32> */ + if (!(mask = build_intx16_vector(comp_ctx, I32_TYPE, values))) { + goto fail; + } + + if (!(result = LLVMBuildShuffleVector(comp_ctx->builder, vec1, vec2, mask, + "new_vector"))) { + HANDLE_FAILURE("LLVMBuildShuffleVector"); + goto fail; + } + + if (!(result = LLVMBuildBitCast(comp_ctx->builder, result, V128_i64x2_TYPE, + "ret"))) { + HANDLE_FAILURE("LLVMBuildBitCast"); + goto fail; + } + + PUSH_V128(result); + + return true; +fail: + return false; +} + +// TODO: instructions for other CPUs +/* shufflevector is not an option, since it requires *mask as a const */ +bool +aot_compile_simd_swizzle(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) +{ + LLVMValueRef vector, mask, max_lanes, condition, mask_lanes, result; + LLVMTypeRef param_types[2]; + int max_lane_id[16] = { 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16 }, + mask_lane_id[16] = { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 }; + + if (!(mask = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, V128_i8x16_TYPE, + "mask"))) { + goto fail; + } + + if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, + V128_i8x16_TYPE, "vec"))) { + goto fail; + } + + /* icmp uge <16 x i8> mask, <16, 16, 16, 16, ...> */ + if (!(max_lanes = build_intx16_vector(comp_ctx, INT8_TYPE, max_lane_id))) { + goto fail; + } + + if (!(condition = LLVMBuildICmp(comp_ctx->builder, LLVMIntUGE, mask, + max_lanes, "compare_with_16"))) { + HANDLE_FAILURE("LLVMBuldICmp"); + goto fail; + } + + /* if the highest bit of every i8 of mask is 1, means doesn't pick up from vector */ + /* select <16 x i1> %condition, <16 x i8> <0x80, 0x80, ...>, <16 x i8> %mask */ + if (!(mask_lanes = + build_intx16_vector(comp_ctx, INT8_TYPE, mask_lane_id))) { + goto fail; + } + + if (!(mask = LLVMBuildSelect(comp_ctx->builder, condition, mask_lanes, + mask, "mask"))) { + HANDLE_FAILURE("LLVMBuildSelect"); + goto fail; + } + + param_types[0] = V128_i8x16_TYPE; + param_types[1] = V128_i8x16_TYPE; + if (!(result = aot_call_llvm_intrinsic( + comp_ctx, "llvm.x86.ssse3.pshuf.b.128", V128_i8x16_TYPE, + param_types, 2, vector, mask))) { + HANDLE_FAILURE("LLVMBuildCall"); + goto fail; + } + + if (!(result = LLVMBuildBitCast(comp_ctx->builder, result, V128_i64x2_TYPE, + "ret"))) { + HANDLE_FAILURE("LLVMBuildBitCast"); + goto fail; + } + + PUSH_V128(result); + + return true; +fail: + return false; +} + +static bool +aot_compile_simd_extract(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + uint8 lane_id, + bool need_extend, + bool is_signed, + LLVMTypeRef vector_type, + LLVMTypeRef result_type, + unsigned aot_value_type) +{ + LLVMValueRef vector, idx, result; + + if (!(idx = I8_CONST(lane_id))) { + HANDLE_FAILURE("LLVMConstInt"); + goto fail; + } + + /* bitcast <2 x i64> %0 to */ + if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type, + "vec"))) { + goto fail; + } + + /* extractelement %vector, i8 lane_id*/ + if (!(result = LLVMBuildExtractElement(comp_ctx->builder, vector, idx, + "element"))) { + HANDLE_FAILURE("LLVMBuildExtractElement"); + goto fail; + } + + if (need_extend) { + if (is_signed) { + /* sext %element to */ + if (!(result = LLVMBuildSExt(comp_ctx->builder, result, + result_type, "ret"))) { + HANDLE_FAILURE("LLVMBuildSExt"); + goto fail; + } + } + else { + /* sext %element to */ + if (!(result = LLVMBuildZExt(comp_ctx->builder, result, + result_type, "ret"))) { + HANDLE_FAILURE("LLVMBuildZExt"); + goto fail; + } + } + } + + PUSH(result, aot_value_type); + + return true; +fail: + return false; +} + +bool +aot_compile_simd_extract_i8x16(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + uint8 lane_id, + bool is_signed) +{ + return aot_compile_simd_extract(comp_ctx, func_ctx, lane_id, true, + is_signed, V128_i8x16_TYPE, I32_TYPE, + VALUE_TYPE_I32); +} + +bool +aot_compile_simd_extract_i16x8(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + uint8 lane_id, + bool is_signed) +{ + return aot_compile_simd_extract(comp_ctx, func_ctx, lane_id, true, + is_signed, V128_i16x8_TYPE, I32_TYPE, + VALUE_TYPE_I32); +} + +bool +aot_compile_simd_extract_i32x4(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + uint8 lane_id) +{ + return aot_compile_simd_extract(comp_ctx, func_ctx, lane_id, false, false, + V128_i32x4_TYPE, I32_TYPE, VALUE_TYPE_I32); +} + +bool +aot_compile_simd_extract_i64x2(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + uint8 lane_id) +{ + return aot_compile_simd_extract(comp_ctx, func_ctx, lane_id, false, false, + V128_i64x2_TYPE, I64_TYPE, VALUE_TYPE_I64); +} + +bool +aot_compile_simd_extract_f32x4(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + uint8 lane_id) +{ + return aot_compile_simd_extract(comp_ctx, func_ctx, lane_id, false, false, + V128_f32x4_TYPE, F32_TYPE, VALUE_TYPE_F32); +} + +bool +aot_compile_simd_extract_f64x2(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + uint8 lane_id) +{ + return aot_compile_simd_extract(comp_ctx, func_ctx, lane_id, false, false, + V128_f64x2_TYPE, F64_TYPE, VALUE_TYPE_F64); +} + +static bool +aot_compile_simd_replace(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + uint8 lane_id, + unsigned new_value_type, + LLVMTypeRef vector_type, + bool need_reduce, + LLVMTypeRef element_type) +{ + LLVMValueRef vector, new_value, idx, result; + + POP(new_value, new_value_type); + + if (!(idx = I8_CONST(lane_id))) { + HANDLE_FAILURE("LLVMConstInt"); + goto fail; + } + + /* bitcast <2 x i64> %0 to */ + + if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type, + "vec"))) { + goto fail; + } + + /* bitcast to */ + if (need_reduce) { + if (!(new_value = LLVMBuildTrunc(comp_ctx->builder, new_value, + element_type, "element"))) { + HANDLE_FAILURE("LLVMBuildTrunc"); + goto fail; + } + } + + /* insertelement %vector, %element, i8 idx */ + if (!(result = LLVMBuildInsertElement(comp_ctx->builder, vector, new_value, + idx, "new_vector"))) { + HANDLE_FAILURE("LLVMBuildInsertElement"); + goto fail; + } + + /* bitcast %result to <2 x i64> */ + if (!(result = LLVMBuildBitCast(comp_ctx->builder, result, V128_i64x2_TYPE, + "ret"))) { + HANDLE_FAILURE("LLVMBuildBitCast"); + goto fail; + } + + PUSH_V128(result); + + return true; +fail: + return false; +} + +bool +aot_compile_simd_replace_i8x16(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + uint8 lane_id) +{ + return aot_compile_simd_replace(comp_ctx, func_ctx, lane_id, + VALUE_TYPE_I32, V128_i8x16_TYPE, true, + INT8_TYPE); +} + +bool +aot_compile_simd_replace_i16x8(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + uint8 lane_id) +{ + return aot_compile_simd_replace(comp_ctx, func_ctx, lane_id, + VALUE_TYPE_I32, V128_i16x8_TYPE, true, + INT16_TYPE); +} + +bool +aot_compile_simd_replace_i32x4(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + uint8 lane_id) +{ + return aot_compile_simd_replace(comp_ctx, func_ctx, lane_id, + VALUE_TYPE_I32, V128_i32x4_TYPE, false, + I32_TYPE); +} + +bool +aot_compile_simd_replace_i64x2(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + uint8 lane_id) +{ + return aot_compile_simd_replace(comp_ctx, func_ctx, lane_id, + VALUE_TYPE_I64, V128_i64x2_TYPE, false, + I64_TYPE); +} + +bool +aot_compile_simd_replace_f32x4(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + uint8 lane_id) +{ + return aot_compile_simd_replace(comp_ctx, func_ctx, lane_id, + VALUE_TYPE_F32, V128_f32x4_TYPE, false, + F32_TYPE); +} + +bool +aot_compile_simd_replace_f64x2(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + uint8 lane_id) +{ + return aot_compile_simd_replace(comp_ctx, func_ctx, lane_id, + VALUE_TYPE_F64, V128_f64x2_TYPE, false, + F64_TYPE); +} diff --git a/core/iwasm/compilation/simd/simd_access_lanes.h b/core/iwasm/compilation/simd/simd_access_lanes.h new file mode 100644 index 000000000..ae90242df --- /dev/null +++ b/core/iwasm/compilation/simd/simd_access_lanes.h @@ -0,0 +1,89 @@ +/* + * Copyright (C) 2019 Intel Corporation. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + */ + +#ifndef _SIMD_ACCESS_LANES_H_ +#define _SIMD_ACCESS_LANES_H_ + +#include "../aot_compiler.h" + +#ifdef __cplusplus +extern "C" { +#endif + +bool +aot_compile_simd_shuffle(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + const uint8 *frame_ip); + +bool +aot_compile_simd_swizzle(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx); + +bool +aot_compile_simd_extract_i8x16(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + uint8 lane_id, + bool is_signed); + +bool +aot_compile_simd_extract_i16x8(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + uint8 lane_id, + bool is_signed); + +bool +aot_compile_simd_extract_i32x4(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + uint8 lane_id); + +bool +aot_compile_simd_extract_i64x2(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + uint8 lane_id); + +bool +aot_compile_simd_extract_f32x4(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + uint8 lane_id); + +bool +aot_compile_simd_extract_f64x2(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + uint8 lane_id); + +bool +aot_compile_simd_replace_i8x16(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + uint8 lane_id); + +bool +aot_compile_simd_replace_i16x8(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + uint8 lane_id); + +bool +aot_compile_simd_replace_i32x4(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + uint8 lane_id); + +bool +aot_compile_simd_replace_i64x2(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + uint8 lane_id); + +bool +aot_compile_simd_replace_f32x4(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + uint8 lane_id); + +bool +aot_compile_simd_replace_f64x2(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + uint8 lane_id); + +#ifdef __cplusplus +} /* end of extern "C" */ +#endif + +#endif /* end of _SIMD_ACCESS_LANES_H_ */ diff --git a/core/iwasm/compilation/simd/simd_bit_shifts.c b/core/iwasm/compilation/simd/simd_bit_shifts.c new file mode 100644 index 000000000..5b1ee9024 --- /dev/null +++ b/core/iwasm/compilation/simd/simd_bit_shifts.c @@ -0,0 +1,164 @@ +/* + * Copyright (C) 2019 Intel Corporation. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + */ + +#include "simd_bit_shifts.h" +#include "simd_common.h" +#include "../aot_emit_exception.h" +#include "../../aot/aot_runtime.h" + +static bool +simd_shift(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + IntShift shift_op, + LLVMTypeRef vector_type, + LLVMTypeRef element_type, + unsigned lane_width) +{ + LLVMValueRef vector, offset, width, undef, zeros, result; + LLVMTypeRef zeros_type; + + POP_I32(offset); + + if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type, + "vec"))) { + goto fail; + } + + if (!(width = LLVMConstInt(I32_TYPE, lane_width, true))) { + HANDLE_FAILURE("LLVMConstInt"); + goto fail; + } + + if (!(offset = + LLVMBuildURem(comp_ctx->builder, offset, width, "remainder"))) { + HANDLE_FAILURE("LLVMBuildURem"); + goto fail; + } + + if (I64_TYPE == element_type) { + if (!(offset = LLVMBuildZExt(comp_ctx->builder, offset, element_type, + "offset_scalar"))) { + HANDLE_FAILURE("LLVMBuildZExt"); + goto fail; + } + } + else { + if (!(offset = LLVMBuildTruncOrBitCast( + comp_ctx->builder, offset, element_type, "offset_scalar"))) { + HANDLE_FAILURE("LLVMBuildTrunc"); + goto fail; + } + } + + /* create a vector with offset */ + if (!(undef = LLVMGetUndef(vector_type))) { + HANDLE_FAILURE("LLVMGetUndef"); + goto fail; + } + + if (!(zeros_type = LLVMVectorType(I32_TYPE, 128 / lane_width))) { + HANDLE_FAILURE("LVMVectorType"); + goto fail; + } + + if (!(zeros = LLVMConstNull(zeros_type))) { + HANDLE_FAILURE("LLVMConstNull"); + goto fail; + } + + if (!(offset = LLVMBuildInsertElement(comp_ctx->builder, undef, offset, + I32_ZERO, "base_vector"))) { + HANDLE_FAILURE("LLVMBuildInsertElement"); + goto fail; + } + + if (!(offset = LLVMBuildShuffleVector(comp_ctx->builder, offset, undef, + zeros, "offset_vector"))) { + HANDLE_FAILURE("LLVMBuildShuffleVector"); + goto fail; + } + + switch (shift_op) { + case INT_SHL: + { + if (!(result = + LLVMBuildShl(comp_ctx->builder, vector, offset, "shl"))) { + HANDLE_FAILURE("LLVMBuildShl"); + goto fail; + } + break; + } + case INT_SHR_S: + { + if (!(result = LLVMBuildAShr(comp_ctx->builder, vector, offset, + "ashr"))) { + HANDLE_FAILURE("LLVMBuildAShr"); + goto fail; + } + break; + } + case INT_SHR_U: + { + if (!(result = LLVMBuildLShr(comp_ctx->builder, vector, offset, + "lshr"))) { + HANDLE_FAILURE("LLVMBuildLShr"); + goto fail; + } + break; + } + default: + { + bh_assert(0); + goto fail; + } + } + + if (!(result = LLVMBuildBitCast(comp_ctx->builder, result, V128_i64x2_TYPE, + "result"))) { + HANDLE_FAILURE("LLVMBuildBitCast"); + goto fail; + } + + PUSH_V128(result); + return true; +fail: + return false; +} + +bool +aot_compile_simd_i8x16_shift(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + IntShift shift_op) +{ + return simd_shift(comp_ctx, func_ctx, shift_op, V128_i8x16_TYPE, INT8_TYPE, + 8); +} + +bool +aot_compile_simd_i16x8_shift(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + IntShift shift_op) +{ + return simd_shift(comp_ctx, func_ctx, shift_op, V128_i16x8_TYPE, + INT16_TYPE, 16); +} + +bool +aot_compile_simd_i32x4_shift(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + IntShift shift_op) +{ + return simd_shift(comp_ctx, func_ctx, shift_op, V128_i32x4_TYPE, I32_TYPE, + 32); +} + +bool +aot_compile_simd_i64x2_shift(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + IntShift shift_op) +{ + return simd_shift(comp_ctx, func_ctx, shift_op, V128_i64x2_TYPE, I64_TYPE, + 64); +} diff --git a/core/iwasm/compilation/simd/simd_bit_shifts.h b/core/iwasm/compilation/simd/simd_bit_shifts.h new file mode 100644 index 000000000..503406079 --- /dev/null +++ b/core/iwasm/compilation/simd/simd_bit_shifts.h @@ -0,0 +1,39 @@ +/* + * Copyright (C) 2019 Intel Corporation. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + */ + +#ifndef _SIMD_BIT_SHIFTS_H_ +#define _SIMD_BIT_SHIFTS_H_ + +#include "../aot_compiler.h" + +#ifdef __cplusplus +extern "C" { +#endif + +bool +aot_compile_simd_i8x16_shift(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + IntShift shift_op); + +bool +aot_compile_simd_i16x8_shift(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + IntShift shift_op); + +bool +aot_compile_simd_i32x4_shift(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + IntShift shift_op); + +bool +aot_compile_simd_i64x2_shift(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + IntShift shift_op); + +#ifdef __cplusplus +} /* end of extern "C" */ +#endif + +#endif /* end of _SIMD_BIT_SHIFTS_H_ */ diff --git a/core/iwasm/compilation/simd/simd_bitmask_extracts.c b/core/iwasm/compilation/simd/simd_bitmask_extracts.c new file mode 100644 index 000000000..79565cfc2 --- /dev/null +++ b/core/iwasm/compilation/simd/simd_bitmask_extracts.c @@ -0,0 +1,109 @@ +/* + * Copyright (C) 2019 Intel Corporation. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + */ + +#include "simd_bitmask_extracts.h" +#include "simd_common.h" +#include "../aot_emit_exception.h" +#include "../../aot/aot_runtime.h" + +static bool +simd_build_bitmask(const AOTCompContext *comp_ctx, + const AOTFuncContext *func_ctx, + uint8 length, + LLVMTypeRef vector_type, + LLVMTypeRef element_type, + const char *intrinsic) +{ + LLVMValueRef vector, zeros, mask, mask_elements[16], cond, result; + LLVMTypeRef param_types[1], vector_ext_type; + const uint32 numbers[16] = { 0x1, 0x2, 0x4, 0x8, 0x10, 0x20, + 0x40, 0x80, 0x100, 0x200, 0x400, 0x800, + 0x1000, 0x2000, 0x4000, 0x8000 }; + uint8 i; + + if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type, + "vec"))) { + goto fail; + } + + if (!(vector_ext_type = LLVMVectorType(I32_TYPE, length))) { + HANDLE_FAILURE("LLVMVectorType"); + goto fail; + } + + if (!(vector = LLVMBuildSExt(comp_ctx->builder, vector, vector_ext_type, + "vec_ext"))) { + HANDLE_FAILURE("LLVMBuildSExt"); + goto fail; + } + + if (!(zeros = LLVMConstNull(vector_ext_type))) { + HANDLE_FAILURE("LLVMConstNull"); + goto fail; + } + + for (i = 0; i < 16; i++) { + if (!(mask_elements[i] = LLVMConstInt(I32_TYPE, numbers[i], false))) { + HANDLE_FAILURE("LLVMConstInt"); + goto fail; + } + } + + if (!(mask = LLVMConstVector(mask_elements, length))) { + HANDLE_FAILURE("LLVMConstVector"); + goto fail; + } + + if (!(cond = LLVMBuildICmp(comp_ctx->builder, LLVMIntSLT, vector, zeros, + "lt_zero"))) { + HANDLE_FAILURE("LLVMBuildICmp"); + goto fail; + } + + if (!(result = + LLVMBuildSelect(comp_ctx->builder, cond, mask, zeros, "select"))) { + HANDLE_FAILURE("LLVMBuildSelect"); + goto fail; + } + + param_types[0] = vector_ext_type; + if (!(result = aot_call_llvm_intrinsic(comp_ctx, intrinsic, I32_TYPE, + param_types, 1, result))) { + HANDLE_FAILURE("LLVMBuildCall"); + goto fail; + } + + PUSH_I32(result); + + return true; +fail: + return false; +} + +bool +aot_compile_simd_i8x16_bitmask(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx) +{ + return simd_build_bitmask(comp_ctx, func_ctx, 16, V128_i8x16_TYPE, + INT8_TYPE, + "llvm.experimental.vector.reduce.or.v16i32"); +} + +bool +aot_compile_simd_i16x8_bitmask(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx) +{ + return simd_build_bitmask(comp_ctx, func_ctx, 8, V128_i16x8_TYPE, + INT16_TYPE, + "llvm.experimental.vector.reduce.or.v8i32"); +} + +bool +aot_compile_simd_i32x4_bitmask(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx) +{ + return simd_build_bitmask(comp_ctx, func_ctx, 4, V128_i32x4_TYPE, I32_TYPE, + "llvm.experimental.vector.reduce.or.v4i32"); +} diff --git a/core/iwasm/compilation/simd/simd_bitmask_extracts.h b/core/iwasm/compilation/simd/simd_bitmask_extracts.h new file mode 100644 index 000000000..b8cd5e86f --- /dev/null +++ b/core/iwasm/compilation/simd/simd_bitmask_extracts.h @@ -0,0 +1,29 @@ +/* + * Copyright (C) 2019 Intel Corporation. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + */ + +#ifndef _SIMD_BITMASK_EXTRACTS_H_ +#define _SIMD_BITMASK_EXTRACTS_H_ + +#include "../aot_compiler.h" + +#ifdef __cplusplus +extern "C" { +#endif + +bool +aot_compile_simd_i8x16_bitmask(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx); + +bool +aot_compile_simd_i16x8_bitmask(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx); + +bool +aot_compile_simd_i32x4_bitmask(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx); + +#ifdef __cplusplus +} /* end of extern "C" */ +#endif + +#endif /* end of _SIMD_BITMASK_EXTRACTS_H_ */ + diff --git a/core/iwasm/compilation/simd/simd_bitwise_ops.c b/core/iwasm/compilation/simd/simd_bitwise_ops.c new file mode 100644 index 000000000..69e82bb85 --- /dev/null +++ b/core/iwasm/compilation/simd/simd_bitwise_ops.c @@ -0,0 +1,146 @@ +/* + * Copyright (C) 2019 Intel Corporation. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + */ + +#include "simd_bitwise_ops.h" +#include "../aot_emit_exception.h" +#include "../../aot/aot_runtime.h" + +static bool +v128_bitwise_two_component(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + V128Bitwise bitwise_op) +{ + LLVMValueRef vector1, vector2, result; + + POP_V128(vector2); + POP_V128(vector1); + + switch (bitwise_op) { + case V128_AND: + if (!(result = LLVMBuildAnd(comp_ctx->builder, vector1, vector2, + "and"))) { + HANDLE_FAILURE("LLVMBuildAnd"); + goto fail; + } + break; + case V128_OR: + if (!(result = + LLVMBuildOr(comp_ctx->builder, vector1, vector2, "or"))) { + HANDLE_FAILURE("LLVMBuildAnd"); + goto fail; + } + break; + case V128_XOR: + if (!(result = LLVMBuildXor(comp_ctx->builder, vector1, vector2, + "xor"))) { + HANDLE_FAILURE("LLVMBuildAnd"); + goto fail; + } + break; + case V128_ANDNOT: + { + /* v128.and(a, v128.not(b)) */ + if (!(vector2 = LLVMBuildNot(comp_ctx->builder, vector2, "not"))) { + HANDLE_FAILURE("LLVMBuildNot"); + goto fail; + } + + if (!(result = LLVMBuildAnd(comp_ctx->builder, vector1, vector2, + "and"))) { + HANDLE_FAILURE("LLVMBuildAnd"); + goto fail; + } + + break; + } + default: + bh_assert(0); + goto fail; + } + + PUSH_V128(result); + return true; +fail: + return false; +} + +static bool +v128_bitwise_not(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) +{ + LLVMValueRef vector, result; + + POP_V128(vector); + + if (!(result = LLVMBuildNot(comp_ctx->builder, vector, "not"))) { + HANDLE_FAILURE("LLVMBuildNot"); + goto fail; + } + + PUSH_V128(result); + return true; +fail: + return false; +} + +/* v128.or(v128.and(v1, c), v128.and(v2, v128.not(c))) */ +static bool +v128_bitwise_bit_select(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) +{ + LLVMValueRef vector1, vector2, vector3, result; + + POP_V128(vector3); + POP_V128(vector2); + POP_V128(vector1); + + if (!(vector1 = + LLVMBuildAnd(comp_ctx->builder, vector1, vector3, "a_and_c"))) { + HANDLE_FAILURE("LLVMBuildAdd"); + goto fail; + } + + if (!(vector3 = LLVMBuildNot(comp_ctx->builder, vector3, "not_c"))) { + HANDLE_FAILURE("LLVMBuildNot"); + goto fail; + } + + if (!(vector2 = + LLVMBuildAnd(comp_ctx->builder, vector2, vector3, "b_and_c"))) { + HANDLE_FAILURE("LLVMBuildAdd"); + goto fail; + } + + if (!(result = + LLVMBuildOr(comp_ctx->builder, vector1, vector2, "a_or_b"))) { + HANDLE_FAILURE("LLVMBuildOr"); + goto fail; + } + + PUSH_V128(result); + + return true; +fail: + return false; +} + +bool +aot_compile_simd_v128_bitwise(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + V128Bitwise bitwise_op) +{ + switch (bitwise_op) { + case V128_AND: + case V128_OR: + case V128_XOR: + case V128_ANDNOT: + return v128_bitwise_two_component(comp_ctx, func_ctx, bitwise_op); + case V128_NOT: + return v128_bitwise_not(comp_ctx, func_ctx); + case V128_BITSELECT: + return v128_bitwise_bit_select(comp_ctx, func_ctx); + default: + bh_assert(0); + return false; + } +} diff --git a/core/iwasm/compilation/simd/simd_bitwise_ops.h b/core/iwasm/compilation/simd/simd_bitwise_ops.h new file mode 100644 index 000000000..4717d0158 --- /dev/null +++ b/core/iwasm/compilation/simd/simd_bitwise_ops.h @@ -0,0 +1,24 @@ +/* + * Copyright (C) 2019 Intel Corporation. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + */ + +#ifndef _SIMD_BITWISE_OPS_H_ +#define _SIMD_BITWISE_OPS_H_ + +#include "../aot_compiler.h" + +#ifdef __cplusplus +extern "C" { +#endif + +bool +aot_compile_simd_v128_bitwise(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + V128Bitwise bitwise_op); + +#ifdef __cplusplus +} /* end of extern "C" */ +#endif + +#endif /* end of _SIMD_BITWISE_OPS_H_ */ diff --git a/core/iwasm/compilation/simd/simd_bool_reductions.c b/core/iwasm/compilation/simd/simd_bool_reductions.c new file mode 100644 index 000000000..c2abb3027 --- /dev/null +++ b/core/iwasm/compilation/simd/simd_bool_reductions.c @@ -0,0 +1,183 @@ +/* + * Copyright (C) 2019 Intel Corporation. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + */ + +#include "simd_bool_reductions.h" +#include "simd_common.h" +#include "../aot_emit_exception.h" +#include "../../aot/aot_runtime.h" + +static bool +simd_any_true(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + LLVMTypeRef vector_type, + LLVMTypeRef element_type, + const char *intrinsic) +{ + LLVMValueRef vector, zeros, non_zero, result; + + if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type, + "vec"))) { + goto fail; + } + + if (!(zeros = LLVMConstNull(vector_type))) { + HANDLE_FAILURE("LLVMConstNull"); + goto fail; + } + + /* icmp eq %vector, zeroinitialize */ + if (!(non_zero = LLVMBuildICmp(comp_ctx->builder, LLVMIntNE, vector, zeros, + "non_zero"))) { + HANDLE_FAILURE("LLVMBuildICmp"); + goto fail; + } + + /* zext to */ + if (!(non_zero = LLVMBuildZExt(comp_ctx->builder, non_zero, vector_type, + "non_zero_ex"))) { + HANDLE_FAILURE("LLVMBuildZExt"); + goto fail; + } + + if (!(result = aot_call_llvm_intrinsic(comp_ctx, intrinsic, element_type, + &vector_type, 1, non_zero))) { + HANDLE_FAILURE("LLVMBuildCall"); + goto fail; + } + + if (!(zeros = LLVMConstNull(element_type))) { + HANDLE_FAILURE("LLVMConstNull"); + goto fail; + } + + if (!(result = LLVMBuildICmp(comp_ctx->builder, LLVMIntNE, result, zeros, + "gt_zero"))) { + HANDLE_FAILURE("LLVMBuildICmp"); + goto fail; + } + + if (!(result = + LLVMBuildZExt(comp_ctx->builder, result, I32_TYPE, "ret"))) { + HANDLE_FAILURE("LLVMBuildZExt"); + goto fail; + } + + PUSH_I32(result); + + return true; +fail: + return false; +} + +bool +aot_compile_simd_i8x16_any_true(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx) +{ + return simd_any_true(comp_ctx, func_ctx, V128_i8x16_TYPE, INT8_TYPE, + "llvm.experimental.vector.reduce.add.v16i8"); +} + +bool +aot_compile_simd_i16x8_any_true(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx) +{ + return simd_any_true(comp_ctx, func_ctx, V128_i16x8_TYPE, INT16_TYPE, + "llvm.experimental.vector.reduce.add.v8i16"); +} + +bool +aot_compile_simd_i32x4_any_true(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx) +{ + return simd_any_true(comp_ctx, func_ctx, V128_i32x4_TYPE, I32_TYPE, + "llvm.experimental.vector.reduce.add.v4i32"); +} + +static bool +simd_all_true(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + LLVMTypeRef vector_type, + LLVMTypeRef element_type, + const char *intrinsic) +{ + LLVMValueRef vector, zeros, is_zero, result; + + if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type, + "vec"))) { + goto fail; + } + + if (!(zeros = LLVMConstNull(vector_type))) { + HANDLE_FAILURE("LLVMConstNull"); + goto fail; + } + + /* icmp eq %vector, zeroinitialize */ + if (!(is_zero = LLVMBuildICmp(comp_ctx->builder, LLVMIntEQ, vector, zeros, + "is_zero"))) { + HANDLE_FAILURE("LLVMBuildICmp"); + goto fail; + } + + /* zext to */ + if (!(is_zero = LLVMBuildZExt(comp_ctx->builder, is_zero, vector_type, + "is_zero_ex"))) { + HANDLE_FAILURE("LLVMBuildZExt"); + goto fail; + } + + if (!(result = aot_call_llvm_intrinsic(comp_ctx, intrinsic, element_type, + &vector_type, 1, is_zero))) { + HANDLE_FAILURE("LLVMBuildCall"); + goto fail; + } + + if (!(zeros = LLVMConstNull(element_type))) { + HANDLE_FAILURE("LLVMConstNull"); + goto fail; + } + + if (!(result = LLVMBuildICmp(comp_ctx->builder, LLVMIntEQ, result, zeros, + "none"))) { + HANDLE_FAILURE("LLVMBuildICmp"); + goto fail; + } + + if (!(result = + LLVMBuildZExt(comp_ctx->builder, result, I32_TYPE, "ret"))) { + HANDLE_FAILURE("LLVMBuildZExt"); + goto fail; + } + + PUSH_I32(result); + + return true; +fail: + return false; +} + +bool +aot_compile_simd_i8x16_all_true(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx) +{ + return simd_all_true(comp_ctx, func_ctx, V128_i8x16_TYPE, INT8_TYPE, + "llvm.experimental.vector.reduce.add.v16i8"); +} + +bool +aot_compile_simd_i16x8_all_true(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx) +{ + return simd_all_true(comp_ctx, func_ctx, V128_i16x8_TYPE, INT16_TYPE, + "llvm.experimental.vector.reduce.add.v8i16"); +} + +bool +aot_compile_simd_i32x4_all_true(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx) +{ + return simd_all_true(comp_ctx, func_ctx, V128_i32x4_TYPE, I32_TYPE, + "llvm.experimental.vector.reduce.add.v4i32"); +} diff --git a/core/iwasm/compilation/simd/simd_bool_reductions.h b/core/iwasm/compilation/simd/simd_bool_reductions.h new file mode 100644 index 000000000..e67f00e7e --- /dev/null +++ b/core/iwasm/compilation/simd/simd_bool_reductions.h @@ -0,0 +1,43 @@ +/* + * Copyright (C) 2019 Intel Corporation. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + */ + +#ifndef _SIMD_BOOL_REDUCTIONS_H_ +#define _SIMD_BOOL_REDUCTIONS_H_ + +#include "../aot_compiler.h" + +#ifdef __cplusplus +extern "C" { +#endif + +bool +aot_compile_simd_i8x16_any_true(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx); + +bool +aot_compile_simd_i16x8_any_true(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx); + +bool +aot_compile_simd_i32x4_any_true(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx); + +bool +aot_compile_simd_i8x16_all_true(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx); + +bool +aot_compile_simd_i16x8_all_true(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx); + +bool +aot_compile_simd_i32x4_all_true(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx); + +#ifdef __cplusplus +} /* end of extern "C" */ +#endif + +#endif /* end of _SIMD_BOOL_REDUCTIONS_H_ */ diff --git a/core/iwasm/compilation/simd/simd_common.c b/core/iwasm/compilation/simd/simd_common.c new file mode 100644 index 000000000..81378fa00 --- /dev/null +++ b/core/iwasm/compilation/simd/simd_common.c @@ -0,0 +1,47 @@ +/* + * Copyright (C) 2019 Intel Corporation. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + */ + +#include "simd_common.h" + +LLVMValueRef +simd_pop_v128_and_bitcast(const AOTCompContext *comp_ctx, + const AOTFuncContext *func_ctx, + LLVMTypeRef vec_type, + const char *name) +{ + LLVMValueRef number; + + POP_V128(number); + + if (!(number = + LLVMBuildBitCast(comp_ctx->builder, number, vec_type, name))) { + HANDLE_FAILURE("LLVMBuildBitCast"); + goto fail; + } + + return number; +fail: + return NULL; +} + +bool +simd_bitcast_and_push_v128(const AOTCompContext *comp_ctx, + const AOTFuncContext *func_ctx, + LLVMValueRef vector, + const char *name) +{ + if (!(vector = LLVMBuildBitCast(comp_ctx->builder, vector, V128_i64x2_TYPE, + name))) { + HANDLE_FAILURE("LLVMBuildBitCast"); + goto fail; + } + + /* push result into the stack */ + PUSH_V128(vector); + + return true; +fail: + return false; +} \ No newline at end of file diff --git a/core/iwasm/compilation/simd/simd_common.h b/core/iwasm/compilation/simd/simd_common.h new file mode 100644 index 000000000..5f029b01e --- /dev/null +++ b/core/iwasm/compilation/simd/simd_common.h @@ -0,0 +1,23 @@ +/* + * Copyright (C) 2019 Intel Corporation. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + */ + +#ifndef _SIMD_COMMON_H_ +#define _SIMD_COMMON_H_ + +#include "../aot_compiler.h" + +LLVMValueRef +simd_pop_v128_and_bitcast(const AOTCompContext *comp_ctx, + const AOTFuncContext *func_ctx, + LLVMTypeRef vec_type, + const char *name); + +bool +simd_bitcast_and_push_v128(const AOTCompContext *comp_ctx, + const AOTFuncContext *func_ctx, + LLVMValueRef vector, + const char *name); + +#endif /* _SIMD_COMMON_H_ */ \ No newline at end of file diff --git a/core/iwasm/compilation/simd/simd_comparisons.c b/core/iwasm/compilation/simd/simd_comparisons.c new file mode 100644 index 000000000..9b95a85b8 --- /dev/null +++ b/core/iwasm/compilation/simd/simd_comparisons.c @@ -0,0 +1,231 @@ +/* + * Copyright (C) 2019 Intel Corporation. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + */ + +#include "simd_comparisons.h" +#include "simd_common.h" +#include "../aot_emit_exception.h" +#include "../../aot/aot_runtime.h" + +static bool +float_cond_2_predicate(FloatCond cond, LLVMRealPredicate *out) +{ + switch (cond) { + case FLOAT_EQ: + *out = LLVMRealOEQ; + break; + case FLOAT_NE: + *out = LLVMRealUNE; + break; + case FLOAT_LT: + *out = LLVMRealOLT; + break; + case FLOAT_GT: + *out = LLVMRealOGT; + break; + case FLOAT_LE: + *out = LLVMRealOLE; + break; + case FLOAT_GE: + *out = LLVMRealOGE; + break; + default: + bh_assert(0); + goto fail; + } + + return true; +fail: + return false; +} + +static bool +int_cond_2_predicate(IntCond cond, LLVMIntPredicate *out) +{ + switch (cond) { + case INT_EQZ: + case INT_EQ: + *out = LLVMIntEQ; + break; + case INT_NE: + *out = LLVMIntNE; + break; + case INT_LT_S: + *out = LLVMIntSLT; + break; + case INT_LT_U: + *out = LLVMIntULT; + break; + case INT_GT_S: + *out = LLVMIntSGT; + break; + case INT_GT_U: + *out = LLVMIntUGT; + break; + case INT_LE_S: + *out = LLVMIntSLE; + break; + case INT_LE_U: + *out = LLVMIntULE; + break; + case INT_GE_S: + *out = LLVMIntSGE; + break; + case INT_GE_U: + *out = LLVMIntUGE; + break; + default: + bh_assert(0); + goto fail; + } + + return true; +fail: + return false; +} + +static bool +interger_vector_compare(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + IntCond cond, + LLVMTypeRef vector_type) +{ + LLVMValueRef vec1, vec2, result; + LLVMIntPredicate int_pred; + + if (!(vec2 = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type, + "vec2"))) { + goto fail; + } + + if (!(vec1 = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type, + "vec1"))) { + goto fail; + } + + if (!int_cond_2_predicate(cond, &int_pred)) { + HANDLE_FAILURE("int_cond_2_predicate"); + goto fail; + } + /* icmp %vec1, %vec2 */ + if (!(result = + LLVMBuildICmp(comp_ctx->builder, int_pred, vec1, vec2, "cmp"))) { + HANDLE_FAILURE("LLVMBuildICmp"); + goto fail; + } + + /* sext %result to */ + if (!(result = + LLVMBuildSExt(comp_ctx->builder, result, vector_type, "ext"))) { + HANDLE_FAILURE("LLVMBuildSExt"); + goto fail; + } + + /* bitcast %result to <2 x i64> */ + if (!(result = LLVMBuildBitCast(comp_ctx->builder, result, V128_i64x2_TYPE, + "result"))) { + HANDLE_FAILURE("LLVMBuildBitCast"); + goto fail; + } + + PUSH_V128(result); + + return true; +fail: + return false; +} + +bool +aot_compile_simd_i8x16_compare(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + IntCond cond) +{ + return interger_vector_compare(comp_ctx, func_ctx, cond, V128_i8x16_TYPE); +} + +bool +aot_compile_simd_i16x8_compare(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + IntCond cond) +{ + return interger_vector_compare(comp_ctx, func_ctx, cond, V128_i16x8_TYPE); +} + +bool +aot_compile_simd_i32x4_compare(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + IntCond cond) +{ + return interger_vector_compare(comp_ctx, func_ctx, cond, V128_i32x4_TYPE); +} + +static bool +float_vector_compare(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + FloatCond cond, + LLVMTypeRef vector_type, + LLVMTypeRef result_type) +{ + LLVMValueRef vec1, vec2, result; + LLVMRealPredicate real_pred; + + if (!(vec2 = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type, + "vec2"))) { + goto fail; + } + + if (!(vec1 = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type, + "vec1"))) { + goto fail; + } + + if (!float_cond_2_predicate(cond, &real_pred)) { + HANDLE_FAILURE("float_cond_2_predicate"); + goto fail; + } + /* fcmp %vec1, %vec2 */ + if (!(result = + LLVMBuildFCmp(comp_ctx->builder, real_pred, vec1, vec2, "cmp"))) { + HANDLE_FAILURE("LLVMBuildFCmp"); + goto fail; + } + + /* sext %result to */ + if (!(result = + LLVMBuildSExt(comp_ctx->builder, result, result_type, "ext"))) { + HANDLE_FAILURE("LLVMBuildSExt"); + goto fail; + } + + /* bitcast %result to <2 x i64> */ + if (!(result = LLVMBuildBitCast(comp_ctx->builder, result, V128_i64x2_TYPE, + "result"))) { + HANDLE_FAILURE("LLVMBuildBitCast"); + goto fail; + } + + PUSH_V128(result); + + return true; +fail: + return false; +} + +bool +aot_compile_simd_f32x4_compare(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + FloatCond cond) +{ + return float_vector_compare(comp_ctx, func_ctx, cond, V128_f32x4_TYPE, + V128_i32x4_TYPE); +} + +bool +aot_compile_simd_f64x2_compare(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + FloatCond cond) +{ + return float_vector_compare(comp_ctx, func_ctx, cond, V128_f64x2_TYPE, + V128_i64x2_TYPE); +} diff --git a/core/iwasm/compilation/simd/simd_comparisons.h b/core/iwasm/compilation/simd/simd_comparisons.h new file mode 100644 index 000000000..46d816714 --- /dev/null +++ b/core/iwasm/compilation/simd/simd_comparisons.h @@ -0,0 +1,44 @@ +/* + * Copyright (C) 2019 Intel Corporation. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + */ + +#ifndef _SIMD_COMPARISONS_H_ +#define _SIMD_COMPARISONS_H_ + +#include "../aot_compiler.h" + +#ifdef __cplusplus +extern "C" { +#endif + +bool +aot_compile_simd_i8x16_compare(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + IntCond cond); + +bool +aot_compile_simd_i16x8_compare(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + IntCond cond); + +bool +aot_compile_simd_i32x4_compare(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + IntCond cond); + +bool +aot_compile_simd_f32x4_compare(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + FloatCond cond); + +bool +aot_compile_simd_f64x2_compare(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + FloatCond cond); + +#ifdef __cplusplus +} /* end of extern "C" */ +#endif + +#endif /* end of _SIMD_COMPARISONS_H_ */ diff --git a/core/iwasm/compilation/simd/simd_construct_values.c b/core/iwasm/compilation/simd/simd_construct_values.c new file mode 100644 index 000000000..1438a1639 --- /dev/null +++ b/core/iwasm/compilation/simd/simd_construct_values.c @@ -0,0 +1,190 @@ +/* + * Copyright (C) 2019 Intel Corporation. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + */ + +#include "simd_construct_values.h" +#include "../aot_emit_exception.h" +#include "../interpreter/wasm_opcode.h" +#include "../../aot/aot_runtime.h" + +bool +aot_compile_simd_v128_const(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + const uint8 *imm_bytes) +{ + uint64 imm1, imm2; + LLVMValueRef undef, first_long, agg1, second_long, agg2; + + wasm_runtime_read_v128(imm_bytes, &imm1, &imm2); + + if (!(undef = LLVMGetUndef(V128_i64x2_TYPE))) { + HANDLE_FAILURE("LLVMGetUndef"); + goto fail; + } + + /* %agg1 = insertelement <2 x i64> undef, i16 0, i64 ${*imm} */ + if (!(first_long = I64_CONST(imm1))) { + HANDLE_FAILURE("LLVMConstInt"); + goto fail; + } + + if (!(agg1 = LLVMBuildInsertElement(comp_ctx->builder, undef, first_long, + I32_ZERO, "agg1"))) { + HANDLE_FAILURE("LLVMBuildInsertElement"); + goto fail; + } + + /* %agg2 = insertelement <2 x i64> %agg1, i16 1, i64 ${*(imm + 1)} */ + if (!(second_long = I64_CONST(imm2))) { + HANDLE_FAILURE("LLVMGetUndef"); + goto fail; + } + + if (!(agg2 = LLVMBuildInsertElement(comp_ctx->builder, agg1, second_long, + I32_ONE, "agg2"))) { + HANDLE_FAILURE("LLVMBuildInsertElement"); + goto fail; + } + + PUSH_V128(agg2); + + return true; +fail: + return false; +} + +bool +aot_compile_simd_splat(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + uint8 splat_opcode) +{ + LLVMValueRef value, undef, base, mask, new_vector, result; + LLVMTypeRef all_zero_ty; + + switch (splat_opcode) { + case SIMD_i8x16_splat: + { + LLVMValueRef input; + POP_I32(input); + + /* trunc i32 %input to i8 */ + if (!(value = LLVMBuildTrunc(comp_ctx->builder, input, INT8_TYPE, + "trunc"))) { + HANDLE_FAILURE("LLVMBuildTrunc"); + goto fail; + } + undef = LLVMGetUndef(V128_i8x16_TYPE); + if (!(all_zero_ty = LLVMVectorType(I32_TYPE, 16))) { + HANDLE_FAILURE("LLVMVectorType"); + goto fail; + } + break; + } + case SIMD_i16x8_splat: + { + LLVMValueRef input; + POP_I32(input); + + /* trunc i32 %input to i16 */ + if (!(value = LLVMBuildTrunc(comp_ctx->builder, input, INT16_TYPE, + "trunc"))) { + HANDLE_FAILURE("LLVMBuildTrunc"); + goto fail; + } + undef = LLVMGetUndef(V128_i16x8_TYPE); + if (!(all_zero_ty = LLVMVectorType(I32_TYPE, 8))) { + HANDLE_FAILURE("LLVMVectorType"); + goto fail; + } + break; + } + case SIMD_i32x4_splat: + { + POP_I32(value); + undef = LLVMGetUndef(V128_i32x4_TYPE); + + if (!(all_zero_ty = LLVMVectorType(I32_TYPE, 4))) { + HANDLE_FAILURE("LLVMVectorType"); + goto fail; + } + break; + } + case SIMD_i64x2_splat: + { + POP(value, VALUE_TYPE_I64); + undef = LLVMGetUndef(V128_i64x2_TYPE); + + if (!(all_zero_ty = LLVMVectorType(I32_TYPE, 2))) { + HANDLE_FAILURE("LLVMVectorType"); + goto fail; + } + break; + } + case SIMD_f32x4_splat: + { + POP(value, VALUE_TYPE_F32); + undef = LLVMGetUndef(V128_f32x4_TYPE); + + if (!(all_zero_ty = LLVMVectorType(I32_TYPE, 4))) { + HANDLE_FAILURE("LLVMVectorType"); + goto fail; + } + break; + } + case SIMD_f64x2_splat: + { + POP(value, VALUE_TYPE_F64); + undef = LLVMGetUndef(V128_f64x2_TYPE); + + if (!(all_zero_ty = LLVMVectorType(I32_TYPE, 2))) { + HANDLE_FAILURE("LLVMVectorType"); + goto fail; + } + break; + } + default: + { + bh_assert(0); + goto fail; + } + } + if (!undef) { + HANDLE_FAILURE("LVMGetUndef"); + goto fail; + } + + /* insertelement undef, ty %value, i32 0 */ + if (!(base = LLVMBuildInsertElement(comp_ctx->builder, undef, value, + I32_ZERO, "base"))) { + HANDLE_FAILURE("LLVMBuildInsertElement"); + goto fail; + } + + /* zeroinitializer */ + if (!(mask = LLVMConstNull(all_zero_ty))) { + HANDLE_FAILURE("LLVMConstNull"); + goto fail; + } + + /* shufflevector %base, undef, zeroinitializer */ + if (!(new_vector = LLVMBuildShuffleVector(comp_ctx->builder, base, undef, + mask, "new_vector"))) { + HANDLE_FAILURE("LLVMBuildShuffleVector"); + goto fail; + } + + /* bitcast to <2 x i64> */ + if (!(result = LLVMBuildBitCast(comp_ctx->builder, new_vector, + V128_i64x2_TYPE, "ret"))) { + HANDLE_FAILURE("LLVMBuidlCast"); + goto fail; + } + + /* push result into the stack */ + PUSH_V128(result); + + return true; +fail: + return false; +} diff --git a/core/iwasm/compilation/simd/simd_construct_values.h b/core/iwasm/compilation/simd/simd_construct_values.h new file mode 100644 index 000000000..12bc3dedf --- /dev/null +++ b/core/iwasm/compilation/simd/simd_construct_values.h @@ -0,0 +1,29 @@ +/* + * Copyright (C) 2019 Intel Corporation. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + */ + +#ifndef _SIMD_CONSTRUCT_VALUES_H_ +#define _SIMD_CONSTRUCT_VALUES_H_ + +#include "../aot_compiler.h" + +#ifdef __cplusplus +extern "C" { +#endif + +bool +aot_compile_simd_v128_const(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + const uint8 *imm_bytes); + +bool +aot_compile_simd_splat(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + uint8 splat_opcode); + +#ifdef __cplusplus +} /* end of extern "C" */ +#endif + +#endif /* end of _SIMD_CONSTRUCT_VALUES_H_ */ diff --git a/core/iwasm/compilation/simd/simd_conversions.c b/core/iwasm/compilation/simd/simd_conversions.c new file mode 100644 index 000000000..f2d32c090 --- /dev/null +++ b/core/iwasm/compilation/simd/simd_conversions.c @@ -0,0 +1,422 @@ +/* + * Copyright (C) 2019 Intel Corporation. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + */ + +#include "simd_conversions.h" +#include "simd_common.h" +#include "../aot_emit_exception.h" +#include "../aot_emit_numberic.h" +#include "../../aot/aot_runtime.h" + +static bool +simd_integer_narrow(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + bool is_signed, + LLVMTypeRef in_vector_type, + LLVMTypeRef out_vector_type, + const char *instrinsic) +{ + LLVMValueRef vector1, vector2, result; + LLVMTypeRef param_types[2] = { in_vector_type, in_vector_type }; + + if (!(vector2 = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, + in_vector_type, "vec2"))) { + goto fail; + } + + if (!(vector1 = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, + in_vector_type, "vec1"))) { + goto fail; + } + + if (!(result = + aot_call_llvm_intrinsic(comp_ctx, instrinsic, out_vector_type, + param_types, 2, vector1, vector2))) { + HANDLE_FAILURE("LLVMBuildCall"); + goto fail; + } + + if (!(result = LLVMBuildBitCast(comp_ctx->builder, result, V128_i64x2_TYPE, + "ret"))) { + HANDLE_FAILURE("LLVMBuildBitCast"); + goto fail; + } + + PUSH_V128(result); + return true; +fail: + return false; +} + +bool +aot_compile_simd_i8x16_narrow_i16x8(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + bool is_signed) +{ + return simd_integer_narrow( + comp_ctx, func_ctx, is_signed, V128_i16x8_TYPE, V128_i8x16_TYPE, + is_signed ? "llvm.x86.sse2.packsswb.128" : "llvm.x86.sse2.packuswb.128"); +} + +bool +aot_compile_simd_i16x8_narrow_i32x4(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + bool is_signed) +{ + return simd_integer_narrow( + comp_ctx, func_ctx, is_signed, V128_i32x4_TYPE, V128_i16x8_TYPE, + is_signed ? "llvm.x86.sse2.packssdw.128" : "llvm.x86.sse41.packusdw"); +} + +bool +aot_compile_simd_i16x8_widen_i8x16(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + bool is_low_half, + bool is_signed) +{ + LLVMValueRef vector, undef, mask_high[8], mask_low[8], mask, shuffled, + result; + uint8 mask_high_value[8] = { 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf }, + mask_low_value[8] = { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 }, i; + + if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, + V128_i8x16_TYPE, "vec"))) { + goto fail; + } + + if (!(undef = LLVMGetUndef(V128_i8x16_TYPE))) { + HANDLE_FAILURE("LLVMGetUndef"); + goto fail; + } + + /* create a mask */ + for (i = 0; i < 8; i++) { + mask_high[i] = LLVMConstInt(I32_TYPE, mask_high_value[i], true); + mask_low[i] = LLVMConstInt(I32_TYPE, mask_low_value[i], true); + } + + mask = is_low_half ? LLVMConstVector(mask_low, 8) + : LLVMConstVector(mask_high, 8); + if (!mask) { + HANDLE_FAILURE("LLVMConstVector"); + goto fail; + } + + /* retrive the low or high half */ + if (!(shuffled = LLVMBuildShuffleVector(comp_ctx->builder, vector, undef, + mask, "shuffled"))) { + HANDLE_FAILURE("LLVMBuildShuffleVector"); + goto fail; + } + + if (is_signed) { + if (!(result = LLVMBuildSExt(comp_ctx->builder, shuffled, + V128_i16x8_TYPE, "ext"))) { + HANDLE_FAILURE("LLVMBuildSExt"); + goto fail; + } + } + else { + if (!(result = LLVMBuildZExt(comp_ctx->builder, shuffled, + V128_i16x8_TYPE, "ext"))) { + HANDLE_FAILURE("LLVMBuildZExt"); + goto fail; + } + } + + if (!(result = LLVMBuildBitCast(comp_ctx->builder, result, V128_i64x2_TYPE, + "ret"))) { + HANDLE_FAILURE("LLVMBuildBitCast"); + goto fail; + } + + PUSH_V128(result); + return true; +fail: + return false; +} + +bool +aot_compile_simd_i32x4_widen_i16x8(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + bool is_low_half, + bool is_signed) +{ + LLVMValueRef vector, undef, mask_high[4], mask_low[4], mask, shuffled, + result; + uint8 mask_high_value[4] = { 0x4, 0x5, 0x6, 0x7 }, + mask_low_value[4] = { 0x0, 0x1, 0x2, 0x3 }, i; + + if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, + V128_i16x8_TYPE, "vec"))) { + goto fail; + } + + if (!(undef = LLVMGetUndef(V128_i16x8_TYPE))) { + HANDLE_FAILURE("LLVMGetUndef"); + goto fail; + } + + /* create a mask */ + for (i = 0; i < 4; i++) { + mask_high[i] = LLVMConstInt(I32_TYPE, mask_high_value[i], true); + mask_low[i] = LLVMConstInt(I32_TYPE, mask_low_value[i], true); + } + + mask = is_low_half ? LLVMConstVector(mask_low, 4) + : LLVMConstVector(mask_high, 4); + if (!mask) { + HANDLE_FAILURE("LLVMConstVector"); + goto fail; + } + + /* retrive the low or high half */ + if (!(shuffled = LLVMBuildShuffleVector(comp_ctx->builder, vector, undef, + mask, "shuffled"))) { + HANDLE_FAILURE("LLVMBuildShuffleVector"); + goto fail; + } + + if (is_signed) { + if (!(result = LLVMBuildSExt(comp_ctx->builder, shuffled, + V128_i32x4_TYPE, "ext"))) { + HANDLE_FAILURE("LLVMBuildSExt"); + goto fail; + } + } + else { + if (!(result = LLVMBuildZExt(comp_ctx->builder, shuffled, + V128_i32x4_TYPE, "ext"))) { + HANDLE_FAILURE("LLVMBuildZExt"); + goto fail; + } + } + + if (!(result = LLVMBuildBitCast(comp_ctx->builder, result, V128_i64x2_TYPE, + "ret"))) { + HANDLE_FAILURE("LLVMBuildBitCast"); + goto fail; + } + + PUSH_V128(result); + return true; +fail: + return false; +} + +static LLVMValueRef +simd_build_const_f32x4(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + float f) +{ + LLVMValueRef elements[4], vector; + + if (!(elements[0] = LLVMConstReal(F32_TYPE, f))) { + HANDLE_FAILURE("LLVMConstInt"); + goto fail; + } + + elements[1] = elements[2] = elements[3] = elements[0]; + + if (!(vector = LLVMConstVector(elements, 4))) { + HANDLE_FAILURE("LLVMConstVector"); + goto fail; + } + + return vector; +fail: + return NULL; +} + +static LLVMValueRef +simd_build_const_i32x4(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + uint64 integer, + bool is_signed) +{ + LLVMValueRef elements[4], vector; + + if (!(elements[0] = LLVMConstInt(I32_TYPE, integer, is_signed))) { + HANDLE_FAILURE("LLVMConstInt"); + goto fail; + } + + elements[1] = elements[2] = elements[3] = elements[0]; + + if (!(vector = LLVMConstVector(elements, 4))) { + HANDLE_FAILURE("LLVMConstVector"); + goto fail; + } + + return vector; +fail: + return NULL; +} + +bool +aot_compile_simd_i32x4_trunc_sat_f32x4(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + bool is_signed) +{ + LLVMValueRef vector, zeros, is_nan, max_float_v, min_float_v, is_ge_max, + is_le_min, result, max_int_v, min_int_v; + uint32 max_ui = 0xFFffFFff, min_ui = 0x0; + int32 max_si = 0x7FFFffff, min_si = 0x80000000; + float max_f_ui = 4294967296.0f, min_f_ui = 0.0f, max_f_si = 2147483647.0f, + min_f_si = -2147483648.0f; + + if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, + V128_f32x4_TYPE, "vec"))) { + goto fail; + } + + if (!(zeros = LLVMConstNull(V128_f32x4_TYPE))) { + HANDLE_FAILURE("LLVMConstNull"); + goto fail; + } + + if (is_signed) { + if (!(max_float_v = + simd_build_const_f32x4(comp_ctx, func_ctx, max_f_si))) { + goto fail; + } + + if (!(min_float_v = + simd_build_const_f32x4(comp_ctx, func_ctx, min_f_si))) { + goto fail; + } + + if (!(max_int_v = + simd_build_const_i32x4(comp_ctx, func_ctx, max_si, true))) { + goto fail; + } + + if (!(min_int_v = + simd_build_const_i32x4(comp_ctx, func_ctx, min_si, true))) { + goto fail; + } + } + else { + if (!(max_float_v = + simd_build_const_f32x4(comp_ctx, func_ctx, max_f_ui))) { + goto fail; + } + + if (!(min_float_v = + simd_build_const_f32x4(comp_ctx, func_ctx, min_f_ui))) { + goto fail; + } + + if (!(max_int_v = + simd_build_const_i32x4(comp_ctx, func_ctx, max_ui, false))) { + goto fail; + } + + if (!(min_int_v = + simd_build_const_i32x4(comp_ctx, func_ctx, min_ui, false))) { + goto fail; + } + } + + if (!(is_nan = LLVMBuildFCmp(comp_ctx->builder, LLVMRealORD, vector, zeros, + "is_nan"))) { + HANDLE_FAILURE("LLVMBuildFCmp"); + goto fail; + } + + if (!(is_le_min = LLVMBuildFCmp(comp_ctx->builder, LLVMRealOLE, vector, + min_float_v, "le_min"))) { + HANDLE_FAILURE("LLVMBuildFCmp"); + goto fail; + } + + if (!(is_ge_max = LLVMBuildFCmp(comp_ctx->builder, LLVMRealOGE, vector, + max_float_v, "ge_max"))) { + HANDLE_FAILURE("LLVMBuildFCmp"); + goto fail; + } + + if (is_signed) { + if (!(result = LLVMBuildFPToSI(comp_ctx->builder, vector, + V128_i32x4_TYPE, "truncated"))) { + HANDLE_FAILURE("LLVMBuildSIToFP"); + goto fail; + } + } + else { + if (!(result = LLVMBuildFPToUI(comp_ctx->builder, vector, + V128_i32x4_TYPE, "truncated"))) { + HANDLE_FAILURE("LLVMBuildUIToFP"); + goto fail; + } + } + + if (!(result = LLVMBuildSelect(comp_ctx->builder, is_ge_max, max_int_v, + result, "sat_w_max"))) { + HANDLE_FAILURE("LLVMBuildSelect"); + goto fail; + } + + if (!(result = LLVMBuildSelect(comp_ctx->builder, is_le_min, min_int_v, + result, "sat_w_min"))) { + HANDLE_FAILURE("LLVMBuildSelect"); + goto fail; + } + + if (!(result = LLVMBuildSelect(comp_ctx->builder, is_nan, result, + V128_i32x4_ZERO, "sat_w_nan"))) { + HANDLE_FAILURE("LLVMBuildSelect"); + goto fail; + } + + if (!(result = LLVMBuildBitCast(comp_ctx->builder, result, V128_i64x2_TYPE, + "ret"))) { + HANDLE_FAILURE("LLVMBuildBitCast"); + goto fail; + } + + PUSH_V128(result); + return true; +fail: + return false; +} + +bool +aot_compile_simd_f32x4_convert_i32x4(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + bool is_signed) +{ + LLVMValueRef vector, result; + + if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, + V128_i32x4_TYPE, "vec"))) { + goto fail; + } + + if (is_signed) { + if (!(result = LLVMBuildSIToFP(comp_ctx->builder, vector, + V128_f32x4_TYPE, "converted"))) { + HANDLE_FAILURE("LLVMBuildSIToFP"); + goto fail; + } + } + else { + if (!(result = LLVMBuildUIToFP(comp_ctx->builder, vector, + V128_f32x4_TYPE, "converted"))) { + HANDLE_FAILURE("LLVMBuildSIToFP"); + goto fail; + } + } + + if (!(result = LLVMBuildBitCast(comp_ctx->builder, result, V128_i64x2_TYPE, + "ret"))) { + HANDLE_FAILURE("LLVMBuildBitCast"); + goto fail; + } + + PUSH_V128(result); + return true; +fail: + return false; +} diff --git a/core/iwasm/compilation/simd/simd_conversions.h b/core/iwasm/compilation/simd/simd_conversions.h new file mode 100644 index 000000000..823b5dc3a --- /dev/null +++ b/core/iwasm/compilation/simd/simd_conversions.h @@ -0,0 +1,51 @@ +/* + * Copyright (C) 2019 Intel Corporation. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + */ + +#ifndef _SIMD_CONVERSIONS_H_ +#define _SIMD_CONVERSIONS_H_ + +#include "../aot_compiler.h" + +#ifdef __cplusplus +extern "C" { +#endif + +bool +aot_compile_simd_i8x16_narrow_i16x8(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + bool is_signed); + +bool +aot_compile_simd_i16x8_narrow_i32x4(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + bool is_signed); + +bool +aot_compile_simd_i16x8_widen_i8x16(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + bool is_low, + bool is_signed); + +bool +aot_compile_simd_i32x4_widen_i16x8(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + bool is_low, + bool is_signed); + +bool +aot_compile_simd_i32x4_trunc_sat_f32x4(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + bool is_signed); + +bool +aot_compile_simd_f32x4_convert_i32x4(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + bool is_signed); + +#ifdef __cplusplus +} /* end of extern "C" */ +#endif + +#endif /* end of _SIMD_CONVERSIONS_H_ */ diff --git a/core/iwasm/compilation/simd/simd_floating_point.c b/core/iwasm/compilation/simd/simd_floating_point.c new file mode 100644 index 000000000..24dc8fc51 --- /dev/null +++ b/core/iwasm/compilation/simd/simd_floating_point.c @@ -0,0 +1,273 @@ +/* + * Copyright (C) 2019 Intel Corporation. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + */ + +#include "simd_floating_point.h" +#include "simd_common.h" +#include "../aot_emit_exception.h" +#include "../aot_emit_numberic.h" +#include "../../aot/aot_runtime.h" + +static LLVMValueRef +simd_v128_float_cmp(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + FloatArithmetic arith_op, + LLVMValueRef lhs, + LLVMValueRef rhs) +{ + LLVMValueRef result; + LLVMRealPredicate op; + + op = FLOAT_MIN == arith_op ? LLVMRealULT : LLVMRealUGT; + + if (!(result = LLVMBuildFCmp(comp_ctx->builder, op, lhs, rhs, "cmp"))) { + HANDLE_FAILURE("LLVMBuildFCmp"); + goto fail; + } + + if (!(result = + LLVMBuildSelect(comp_ctx->builder, result, lhs, rhs, "select"))) { + HANDLE_FAILURE("LLVMBuildSelect"); + goto fail; + } + + return result; +fail: + return NULL; +} + +static bool +simd_v128_float_arith(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + FloatArithmetic arith_op, + LLVMTypeRef vector_type) +{ + LLVMValueRef lhs, rhs, result; + + if (!(rhs = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type, + "rhs"))) { + goto fail; + } + + if (!(lhs = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type, + "lhs"))) { + goto fail; + } + + switch (arith_op) { + case FLOAT_ADD: + if (!(result = + LLVMBuildFAdd(comp_ctx->builder, lhs, rhs, "sum"))) { + HANDLE_FAILURE("LLVMBuildFAdd"); + goto fail; + } + break; + case FLOAT_SUB: + if (!(result = LLVMBuildFSub(comp_ctx->builder, lhs, rhs, + "difference"))) { + HANDLE_FAILURE("LLVMBuildFSub"); + goto fail; + } + break; + case FLOAT_MUL: + if (!(result = + LLVMBuildFMul(comp_ctx->builder, lhs, rhs, "product"))) { + HANDLE_FAILURE("LLVMBuildFMul"); + goto fail; + } + break; + case FLOAT_DIV: + if (!(result = + LLVMBuildFDiv(comp_ctx->builder, lhs, rhs, "quotient"))) { + HANDLE_FAILURE("LLVMBuildFDiv"); + goto fail; + } + break; + case FLOAT_MIN: + if (!(result = simd_v128_float_cmp(comp_ctx, func_ctx, FLOAT_MIN, + lhs, rhs))) { + goto fail; + } + break; + case FLOAT_MAX: + if (!(result = simd_v128_float_cmp(comp_ctx, func_ctx, FLOAT_MAX, + lhs, rhs))) { + goto fail; + } + break; + default: + result = NULL; + bh_assert(0); + break; + } + + if (!(result = LLVMBuildBitCast(comp_ctx->builder, result, V128_i64x2_TYPE, + "ret"))) { + HANDLE_FAILURE("LLVMBuildBitCast"); + goto fail; + } + + /* push result into the stack */ + PUSH_V128(result); + + return true; +fail: + return false; +} + +bool +aot_compile_simd_f32x4_arith(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + FloatArithmetic arith_op) +{ + return simd_v128_float_arith(comp_ctx, func_ctx, arith_op, + V128_f32x4_TYPE); +} + +bool +aot_compile_simd_f64x2_arith(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + FloatArithmetic arith_op) +{ + return simd_v128_float_arith(comp_ctx, func_ctx, arith_op, + V128_f64x2_TYPE); +} + +static bool +simd_v128_float_neg(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + LLVMTypeRef vector_type) +{ + LLVMValueRef number, result; + + if (!(number = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type, + "number"))) { + goto fail; + } + + if (!(result = LLVMBuildFNeg(comp_ctx->builder, number, "neg"))) { + HANDLE_FAILURE("LLVMBuildFNeg"); + goto fail; + } + + if (!(result = LLVMBuildBitCast(comp_ctx->builder, result, V128_i64x2_TYPE, + "ret"))) { + HANDLE_FAILURE("LLVMBuildBitCast"); + goto fail; + } + + /* push result into the stack */ + PUSH_V128(result); + + return true; +fail: + return false; +} + +bool +aot_compile_simd_f32x4_neg(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) +{ + return simd_v128_float_neg(comp_ctx, func_ctx, V128_f32x4_TYPE); +} + +bool +aot_compile_simd_f64x2_neg(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) +{ + return simd_v128_float_neg(comp_ctx, func_ctx, V128_f64x2_TYPE); +} + +static bool +simd_v128_float_abs(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + LLVMTypeRef vector_type, + const char *intrinsic) +{ + LLVMValueRef vector, result; + LLVMTypeRef param_types[1] = { vector_type }; + + if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type, + "vec"))) { + goto fail; + } + + if (!(result = aot_call_llvm_intrinsic(comp_ctx, intrinsic, vector_type, + param_types, 1, vector))) { + HANDLE_FAILURE("LLVMBuildCall"); + goto fail; + } + + if (!(result = LLVMBuildBitCast(comp_ctx->builder, result, V128_i64x2_TYPE, + "ret"))) { + HANDLE_FAILURE("LLVMBuildBitCast"); + goto fail; + } + + /* push result into the stack */ + PUSH_V128(result); + return true; +fail: + return false; +} + +bool +aot_compile_simd_f32x4_abs(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) +{ + return simd_v128_float_abs(comp_ctx, func_ctx, V128_f32x4_TYPE, + "llvm.fabs.v4f32"); +} + +bool +aot_compile_simd_f64x2_abs(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) +{ + return simd_v128_float_abs(comp_ctx, func_ctx, V128_f64x2_TYPE, + "llvm.fabs.v2f64"); +} + +static bool +simd_v128_float_sqrt(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + LLVMTypeRef vector_type, + const char *intrinsic) +{ + LLVMValueRef number, result; + LLVMTypeRef param_types[1] = { vector_type }; + + if (!(number = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type, + "number"))) { + goto fail; + } + + if (!(result = aot_call_llvm_intrinsic(comp_ctx, intrinsic, vector_type, + param_types, 1, number))) { + HANDLE_FAILURE("LLVMBuildCall"); + goto fail; + } + + if (!(result = LLVMBuildBitCast(comp_ctx->builder, result, V128_i64x2_TYPE, + "ret"))) { + HANDLE_FAILURE("LLVMBuildBitCast"); + goto fail; + } + + /* push result into the stack */ + PUSH_V128(result); + + return true; +fail: + return false; +} + +bool +aot_compile_simd_f32x4_sqrt(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) +{ + return simd_v128_float_sqrt(comp_ctx, func_ctx, V128_f32x4_TYPE, + "llvm.sqrt.v4f32"); +} + +bool +aot_compile_simd_f64x2_sqrt(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) +{ + return simd_v128_float_sqrt(comp_ctx, func_ctx, V128_f64x2_TYPE, + "llvm.sqrt.v2f64"); +} diff --git a/core/iwasm/compilation/simd/simd_floating_point.h b/core/iwasm/compilation/simd/simd_floating_point.h new file mode 100644 index 000000000..cb254b614 --- /dev/null +++ b/core/iwasm/compilation/simd/simd_floating_point.h @@ -0,0 +1,49 @@ +/* + * Copyright (C) 2019 Intel Corporation. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + */ + +#ifndef _SIMD_FLOATING_POINT_H_ +#define _SIMD_FLOATING_POINT_H_ + +#include "../aot_compiler.h" + +#ifdef __cplusplus +extern "C" { +#endif + +bool +aot_compile_simd_f32x4_arith(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + FloatArithmetic arith_op); + +bool +aot_compile_simd_f64x2_arith(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + FloatArithmetic arith_op); + +bool +aot_compile_simd_f32x4_neg(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx); + +bool +aot_compile_simd_f64x2_neg(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx); + +bool +aot_compile_simd_f32x4_abs(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx); + +bool +aot_compile_simd_f64x2_abs(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx); + +bool +aot_compile_simd_f32x4_sqrt(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx); + +bool +aot_compile_simd_f64x2_sqrt(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx); + +#ifdef __cplusplus +} /* end of extern "C" */ +#endif + +#endif /* end of _SIMD_FLOATING_POINT_H_ */ diff --git a/core/iwasm/compilation/simd/simd_int_arith.c b/core/iwasm/compilation/simd/simd_int_arith.c new file mode 100644 index 000000000..4a83e3be7 --- /dev/null +++ b/core/iwasm/compilation/simd/simd_int_arith.c @@ -0,0 +1,207 @@ +/* + * Copyright (C) 2019 Intel Corporation. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + */ + +#include "simd_int_arith.h" +#include "simd_common.h" +#include "../aot_emit_exception.h" +#include "../../aot/aot_runtime.h" + +static bool +simd_v128_integer_arith(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + V128Arithmetic arith_op, + LLVMValueRef lhs, + LLVMValueRef rhs) +{ + LLVMValueRef result; + + switch (arith_op) { + case V128_ADD: + if (!(result = LLVMBuildAdd(comp_ctx->builder, lhs, rhs, "sum"))) { + HANDLE_FAILURE("LLVMBuildAdd"); + goto fail; + } + break; + case V128_SUB: + if (!(result = + LLVMBuildSub(comp_ctx->builder, lhs, rhs, "difference"))) { + HANDLE_FAILURE("LLVMBuildSub"); + goto fail; + } + break; + case V128_MUL: + if (!(result = + LLVMBuildMul(comp_ctx->builder, lhs, rhs, "product"))) { + HANDLE_FAILURE("LLVMBuildMul"); + goto fail; + } + break; + case V128_NEG: + if (!(result = LLVMBuildNeg(comp_ctx->builder, lhs, "neg"))) { + HANDLE_FAILURE("LLVMBuildNeg"); + goto fail; + } + break; + default: + result = NULL; + bh_assert(0); + break; + } + + if (!(result = LLVMBuildBitCast(comp_ctx->builder, result, V128_i64x2_TYPE, + "ret"))) { + HANDLE_FAILURE("LLVMBuildBitCast"); + goto fail; + } + + /* push result into the stack */ + PUSH_V128(result); + + return true; +fail: + return false; +} + +bool +aot_compile_simd_i8x16_arith(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + V128Arithmetic arith_op) +{ + LLVMValueRef lhs, rhs; + + if (!(rhs = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, V128_i8x16_TYPE, + "rhs"))) { + goto fail; + } + + if (!(lhs = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, V128_i8x16_TYPE, + "lhs"))) { + goto fail; + } + + return simd_v128_integer_arith(comp_ctx, func_ctx, arith_op, lhs, rhs); +fail: + return NULL; +} + +bool +aot_compile_simd_i16x8_arith(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + V128Arithmetic arith_op) +{ + LLVMValueRef lhs, rhs; + + if (!(rhs = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, V128_i16x8_TYPE, + "rhs"))) { + goto fail; + } + + if (!(lhs = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, V128_i16x8_TYPE, + "lhs"))) { + goto fail; + } + + return simd_v128_integer_arith(comp_ctx, func_ctx, arith_op, lhs, rhs); +fail: + return NULL; +} + +bool +aot_compile_simd_i32x4_arith(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + V128Arithmetic arith_op) +{ + LLVMValueRef lhs, rhs; + + if (!(rhs = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, V128_i32x4_TYPE, + "rhs"))) { + goto fail; + } + + if (!(lhs = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, V128_i32x4_TYPE, + "lhs"))) { + goto fail; + } + + return simd_v128_integer_arith(comp_ctx, func_ctx, arith_op, lhs, rhs); +fail: + return NULL; +} + +bool +aot_compile_simd_i64x2_arith(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + V128Arithmetic arith_op) +{ + LLVMValueRef lhs, rhs; + + POP_V128(rhs); + POP_V128(lhs); + + return simd_v128_integer_arith(comp_ctx, func_ctx, arith_op, lhs, rhs); +fail: + return false; +} + +bool +aot_compile_simd_i8x16_neg(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) +{ + LLVMValueRef number; + + if (!(number = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, + V128_i8x16_TYPE, "number"))) { + goto fail; + } + + return simd_v128_integer_arith(comp_ctx, func_ctx, V128_NEG, number, NULL); + +fail: + return false; +} + +bool +aot_compile_simd_i16x8_neg(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) +{ + LLVMValueRef number; + + if (!(number = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, + V128_i16x8_TYPE, "number"))) { + goto fail; + } + + return simd_v128_integer_arith(comp_ctx, func_ctx, V128_NEG, number, NULL); + +fail: + return false; +} + +bool +aot_compile_simd_i32x4_neg(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) +{ + LLVMValueRef number; + + if (!(number = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, + V128_i32x4_TYPE, "number"))) { + goto fail; + } + + return simd_v128_integer_arith(comp_ctx, func_ctx, V128_NEG, number, NULL); + +fail: + return false; +} + +bool +aot_compile_simd_i64x2_neg(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) +{ + LLVMValueRef number; + + POP_V128(number); + + return simd_v128_integer_arith(comp_ctx, func_ctx, V128_NEG, number, NULL); + +fail: + return false; +} diff --git a/core/iwasm/compilation/simd/simd_int_arith.h b/core/iwasm/compilation/simd/simd_int_arith.h new file mode 100644 index 000000000..5cd77899d --- /dev/null +++ b/core/iwasm/compilation/simd/simd_int_arith.h @@ -0,0 +1,51 @@ +/* + * Copyright (C) 2019 Intel Corporation. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + */ + +#ifndef _SIMD_INT_ARITH_H_ +#define _SIMD_INT_ARITH_H_ + +#include "../aot_compiler.h" + +#ifdef __cplusplus +extern "C" { +#endif + +bool +aot_compile_simd_i8x16_arith(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + V128Arithmetic cond); + +bool +aot_compile_simd_i16x8_arith(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + V128Arithmetic cond); + +bool +aot_compile_simd_i32x4_arith(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + V128Arithmetic cond); + +bool +aot_compile_simd_i64x2_arith(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + V128Arithmetic cond); + +bool +aot_compile_simd_i8x16_neg(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx); + +bool +aot_compile_simd_i16x8_neg(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx); + +bool +aot_compile_simd_i32x4_neg(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx); + +bool +aot_compile_simd_i64x2_neg(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx); + +#ifdef __cplusplus +} /* end of extern "C" */ +#endif + +#endif /* end of _SIMD_INT_ARITH_H_ */ diff --git a/core/iwasm/compilation/simd/simd_load_store.c b/core/iwasm/compilation/simd/simd_load_store.c new file mode 100644 index 000000000..6a15ff98b --- /dev/null +++ b/core/iwasm/compilation/simd/simd_load_store.c @@ -0,0 +1,301 @@ +/* + * Copyright (C) 2019 Intel Corporation. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + */ + +#include "simd_load_store.h" +#include "../aot_emit_exception.h" +#include "../aot_emit_memory.h" +#include "../../aot/aot_runtime.h" +#include "../../interpreter/wasm_opcode.h" + +/* data_length in bytes */ +static LLVMValueRef +simd_load(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + uint32 align, + uint32 offset, + uint32 data_length, + LLVMTypeRef ptr_type) +{ + LLVMValueRef maddr, data; + + if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, + data_length))) { + HANDLE_FAILURE("aot_check_memory_overflow"); + goto fail; + } + + if (!(maddr = LLVMBuildBitCast(comp_ctx->builder, maddr, ptr_type, + "data_ptr"))) { + HANDLE_FAILURE("LLVMBuildBitCast"); + goto fail; + } + + if (!(data = LLVMBuildLoad(comp_ctx->builder, maddr, "data"))) { + HANDLE_FAILURE("LLVMBuildLoad"); + goto fail; + } + + LLVMSetAlignment(data, 1); + + return data; +fail: + return NULL; +} + +/* data_length in bytes */ +static LLVMValueRef +simd_splat(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + LLVMValueRef element, + LLVMTypeRef vectory_type, + unsigned lane_count) +{ + LLVMValueRef undef, zeros, vector; + LLVMTypeRef zeros_type; + + if (!(undef = LLVMGetUndef(vectory_type))) { + HANDLE_FAILURE("LLVMGetUndef"); + goto fail; + } + + if (!(zeros_type = LLVMVectorType(I32_TYPE, lane_count))) { + HANDLE_FAILURE("LVMVectorType"); + goto fail; + } + + if (!(zeros = LLVMConstNull(zeros_type))) { + HANDLE_FAILURE("LLVMConstNull"); + goto fail; + } + + if (!(vector = LLVMBuildInsertElement(comp_ctx->builder, undef, element, + I32_ZERO, "base"))) { + HANDLE_FAILURE("LLVMBuildInsertElement"); + goto fail; + } + + if (!(vector = LLVMBuildShuffleVector(comp_ctx->builder, vector, undef, + zeros, "vector"))) { + HANDLE_FAILURE("LLVMBuildShuffleVector"); + goto fail; + } + + return vector; +fail: + return NULL; +} + +bool +aot_compile_simd_v128_load(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + uint32 align, + uint32 offset) +{ + LLVMValueRef result; + + if (!(result = + simd_load(comp_ctx, func_ctx, align, offset, 16, V128_PTR_TYPE))) { + goto fail; + } + + PUSH_V128(result); + return true; +fail: + return false; +} + +bool +aot_compile_simd_v128_store(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + uint32 align, + uint32 offset) +{ + LLVMValueRef maddr, value, result; + + POP_V128(value); + + if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, 16))) + return false; + + if (!(maddr = LLVMBuildBitCast(comp_ctx->builder, maddr, V128_PTR_TYPE, + "data_ptr"))) { + HANDLE_FAILURE("LLVMBuildBitCast"); + goto fail; + } + + if (!(result = LLVMBuildStore(comp_ctx->builder, value, maddr))) { + HANDLE_FAILURE("LLVMBuildStore"); + goto fail; + } + + LLVMSetAlignment(result, 1); + + return true; +fail: + return false; +} + +bool +aot_compile_simd_load_extend(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + uint8 load_opcode, + uint32 align, + uint32 offset) +{ + LLVMValueRef sub_vector, result; + LLVMTypeRef sub_vector_type, vector_type; + bool is_signed; + uint32 data_length; + + switch (load_opcode) { + case SIMD_i16x8_load8x8_s: + case SIMD_i16x8_load8x8_u: + { + data_length = 8; + vector_type = V128_i16x8_TYPE; + is_signed = (load_opcode == SIMD_i16x8_load8x8_s); + + if (!(sub_vector_type = LLVMVectorType(INT8_TYPE, 8))) { + HANDLE_FAILURE("LLVMVectorType"); + goto fail; + } + + break; + } + case SIMD_i32x4_load16x4_s: + case SIMD_i32x4_load16x4_u: + { + data_length = 8; + vector_type = V128_i32x4_TYPE; + is_signed = (load_opcode == SIMD_i32x4_load16x4_s); + + if (!(sub_vector_type = LLVMVectorType(INT16_TYPE, 4))) { + HANDLE_FAILURE("LLVMVectorType"); + goto fail; + } + + break; + } + case SIMD_i64x2_load32x2_s: + case SIMD_i64x2_load32x2_u: + { + data_length = 8; + vector_type = V128_i64x2_TYPE; + is_signed = (load_opcode == SIMD_i64x2_load32x2_s); + + if (!(sub_vector_type = LLVMVectorType(I32_TYPE, 2))) { + HANDLE_FAILURE("LLVMVectorType"); + goto fail; + } + + break; + } + default: + { + bh_assert(0); + goto fail; + } + } + + /* to vector ptr type */ + if (!(sub_vector_type = LLVMPointerType(sub_vector_type, 0))) { + HANDLE_FAILURE("LLVMPointerType"); + goto fail; + } + + if (!(sub_vector = simd_load(comp_ctx, func_ctx, align, offset, + data_length, sub_vector_type))) { + goto fail; + } + + if (is_signed) { + if (!(result = LLVMBuildSExt(comp_ctx->builder, sub_vector, + vector_type, "vector"))) { + HANDLE_FAILURE("LLVMBuildSExt"); + goto fail; + } + } + else { + if (!(result = LLVMBuildZExt(comp_ctx->builder, sub_vector, + vector_type, "vector"))) { + HANDLE_FAILURE("LLVMBuildZExt"); + goto fail; + } + } + + if (!(result = LLVMBuildBitCast(comp_ctx->builder, result, V128_i64x2_TYPE, + "result"))) { + HANDLE_FAILURE("LLVMBuildBitCast"); + goto fail; + } + + PUSH_V128(result); + return true; +fail: + return false; +} + +bool +aot_compile_simd_load_splat(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + uint8 load_opcode, + uint32 align, + uint32 offset) +{ + LLVMValueRef element, result; + LLVMTypeRef element_ptr_type, vector_type; + unsigned data_length, lane_count; + + switch (load_opcode) { + case SIMD_v8x16_load_splat: + data_length = 1; + lane_count = 16; + element_ptr_type = INT8_PTR_TYPE; + vector_type = V128_i8x16_TYPE; + break; + case SIMD_v16x8_load_splat: + data_length = 2; + lane_count = 8; + element_ptr_type = INT16_PTR_TYPE; + vector_type = V128_i16x8_TYPE; + break; + case SIMD_v32x4_load_splat: + data_length = 4; + lane_count = 4; + element_ptr_type = INT32_PTR_TYPE; + vector_type = V128_i32x4_TYPE; + break; + case SIMD_v64x2_load_splat: + data_length = 8; + lane_count = 2; + element_ptr_type = INT64_PTR_TYPE; + vector_type = V128_i64x2_TYPE; + break; + default: + bh_assert(0); + goto fail; + } + + if (!(element = simd_load(comp_ctx, func_ctx, align, offset, data_length, + element_ptr_type))) { + goto fail; + } + + if (!(result = simd_splat(comp_ctx, func_ctx, element, vector_type, + lane_count))) { + goto fail; + } + + if (!(result = LLVMBuildBitCast(comp_ctx->builder, result, V128_i64x2_TYPE, + "result"))) { + HANDLE_FAILURE("LLVMBuildBitCast"); + goto fail; + } + + PUSH_V128(result); + return true; +fail: + return false; +} diff --git a/core/iwasm/compilation/simd/simd_load_store.h b/core/iwasm/compilation/simd/simd_load_store.h new file mode 100644 index 000000000..dbf662ad1 --- /dev/null +++ b/core/iwasm/compilation/simd/simd_load_store.h @@ -0,0 +1,45 @@ +/* + * Copyright (C) 2019 Intel Corporation. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + */ + +#ifndef _SIMD_LOAD_STORE_H_ +#define _SIMD_LOAD_STORE_H_ + +#include "../aot_compiler.h" + +#ifdef __cplusplus +extern "C" { +#endif + +bool +aot_compile_simd_v128_load(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + uint32 align, + uint32 offset); + +bool +aot_compile_simd_v128_store(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + uint32 align, + uint32 offset); + +bool +aot_compile_simd_load_extend(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + uint8 load_opcode, + uint32 align, + uint32 offset); + +bool +aot_compile_simd_load_splat(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + uint8 load_opcode, + uint32 align, + uint32 offset); + +#ifdef __cplusplus +} /* end of extern "C" */ +#endif + +#endif /* end of _SIMD_LOAD_STORE_H_ */ diff --git a/core/iwasm/compilation/simd/simd_sat_int_arith.c b/core/iwasm/compilation/simd/simd_sat_int_arith.c new file mode 100644 index 000000000..d8f85da76 --- /dev/null +++ b/core/iwasm/compilation/simd/simd_sat_int_arith.c @@ -0,0 +1,367 @@ +/* + * Copyright (C) 2019 Intel Corporation. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + */ + +#include "simd_sat_int_arith.h" +#include "simd_common.h" +#include "../aot_emit_exception.h" +#include "../../aot/aot_runtime.h" + +static bool +simd_v128_integer_arith(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + LLVMTypeRef vector_type, + char *intrinsics_s_u[2], + bool is_signed) +{ + LLVMValueRef lhs, rhs, result; + LLVMTypeRef param_types[2]; + + if (!(rhs = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type, + "rhs"))) { + goto fail; + } + + if (!(lhs = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type, + "lhs"))) { + goto fail; + } + + param_types[0] = vector_type; + param_types[1] = vector_type; + + if (!(result = aot_call_llvm_intrinsic( + comp_ctx, is_signed ? intrinsics_s_u[0] : intrinsics_s_u[1], + vector_type, param_types, 2, lhs, rhs))) { + HANDLE_FAILURE("LLVMBuildCall"); + goto fail; + } + + if (!(result = LLVMBuildBitCast(comp_ctx->builder, result, V128_i64x2_TYPE, + "ret"))) { + HANDLE_FAILURE("LLVMBuildBitCast"); + goto fail; + } + + /* push result into the stack */ + PUSH_V128(result); + return true; +fail: + return false; +} + +bool +aot_compile_simd_i8x16_saturate(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + V128Arithmetic arith_op, + bool is_signed) +{ + char *intrinsics[2] = { 0 }; + bool result = false; + switch (arith_op) { + case V128_ADD: + intrinsics[0] = "llvm.sadd.sat.v16i8"; + intrinsics[1] = "llvm.uadd.sat.v16i8"; + result = simd_v128_integer_arith( + comp_ctx, func_ctx, V128_i8x16_TYPE, intrinsics, is_signed); + break; + case V128_SUB: + intrinsics[0] = "llvm.ssub.sat.v16i8"; + intrinsics[1] = "llvm.usub.sat.v16i8"; + result = simd_v128_integer_arith( + comp_ctx, func_ctx, V128_i8x16_TYPE, intrinsics, is_signed); + break; + default: + bh_assert(0); + break; + } + + return result; +} + +bool +aot_compile_simd_i16x8_saturate(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + V128Arithmetic arith_op, + bool is_signed) +{ + char *intrinsics[2] = { 0 }; + bool result = false; + switch (arith_op) { + case V128_ADD: + intrinsics[0] = "llvm.sadd.sat.v8i16"; + intrinsics[1] = "llvm.uadd.sat.v8i16"; + result = simd_v128_integer_arith( + comp_ctx, func_ctx, V128_i16x8_TYPE, intrinsics, is_signed); + break; + case V128_SUB: + intrinsics[0] = "llvm.ssub.sat.v8i16"; + intrinsics[1] = "llvm.usub.sat.v8i16"; + result = simd_v128_integer_arith( + comp_ctx, func_ctx, V128_i16x8_TYPE, intrinsics, is_signed); + break; + default: + bh_assert(0); + break; + } + + return result; +} + +static bool +simd_v128_cmp(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + LLVMTypeRef vector_type, + V128Arithmetic arith_op, + bool is_signed) +{ + LLVMValueRef lhs, rhs, result; + LLVMIntPredicate op; + + if (!(rhs = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type, + "rhs"))) { + goto fail; + } + + if (!(lhs = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type, + "lhs"))) { + goto fail; + } + + if (V128_MIN == arith_op) { + op = is_signed ? LLVMIntSLT : LLVMIntULT; + } + else { + op = is_signed ? LLVMIntSGT : LLVMIntUGT; + } + + if (!(result = LLVMBuildICmp(comp_ctx->builder, op, lhs, rhs, "cmp"))) { + HANDLE_FAILURE("LLVMBuildICmp"); + goto fail; + } + + if (!(result = + LLVMBuildSelect(comp_ctx->builder, result, lhs, rhs, "select"))) { + HANDLE_FAILURE("LLVMBuildSelect"); + goto fail; + } + + if (!(result = LLVMBuildBitCast(comp_ctx->builder, result, V128_i64x2_TYPE, + "ret"))) { + HANDLE_FAILURE("LLVMBuildBitCast"); + goto fail; + } + + /* push result into the stack */ + PUSH_V128(result); + return true; +fail: + return false; +} + +bool +aot_compile_simd_i8x16_cmp(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + V128Arithmetic arith_op, + bool is_signed) +{ + return simd_v128_cmp(comp_ctx, func_ctx, V128_i8x16_TYPE, arith_op, + is_signed); +} + +bool +aot_compile_simd_i16x8_cmp(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + V128Arithmetic arith_op, + bool is_signed) +{ + return simd_v128_cmp(comp_ctx, func_ctx, V128_i16x8_TYPE, arith_op, + is_signed); +} + +bool +aot_compile_simd_i32x4_cmp(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + V128Arithmetic arith_op, + bool is_signed) +{ + return simd_v128_cmp(comp_ctx, func_ctx, V128_i32x4_TYPE, arith_op, + is_signed); +} + +static bool +simd_v128_abs(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + LLVMTypeRef vector_type) +{ + LLVMValueRef vector, negs, zeros, cond, result; + + if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type, + "vec"))) { + goto fail; + } + + if (!(negs = LLVMBuildNeg(comp_ctx->builder, vector, "neg"))) { + HANDLE_FAILURE("LLVMBuildNeg"); + goto fail; + } + + if (!(zeros = LLVMConstNull(vector_type))) { + HANDLE_FAILURE("LLVMConstNull"); + goto fail; + } + + if (!(cond = LLVMBuildICmp(comp_ctx->builder, LLVMIntSGE, vector, zeros, + "ge_zero"))) { + HANDLE_FAILURE("LLVMBuildICmp"); + goto fail; + } + + if (!(result = LLVMBuildSelect(comp_ctx->builder, cond, vector, negs, + "select"))) { + HANDLE_FAILURE("LLVMBuildSelect"); + goto fail; + } + + if (!(result = LLVMBuildBitCast(comp_ctx->builder, result, V128_i64x2_TYPE, + "ret"))) { + HANDLE_FAILURE("LLVMBuildBitCast"); + goto fail; + } + + /* push result into the stack */ + PUSH_V128(result); + return true; +fail: + return false; +} + +bool +aot_compile_simd_i8x16_abs(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) +{ + return simd_v128_abs(comp_ctx, func_ctx, V128_i8x16_TYPE); +} + +bool +aot_compile_simd_i16x8_abs(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) +{ + return simd_v128_abs(comp_ctx, func_ctx, V128_i16x8_TYPE); +} + +bool +aot_compile_simd_i32x4_abs(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) +{ + return simd_v128_abs(comp_ctx, func_ctx, V128_i32x4_TYPE); +} + +/* (v1 + v2 + 1) / 2 */ +static bool +simd_v128_avg(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + LLVMTypeRef vector_type, + LLVMTypeRef element_type, + unsigned lane_width) +{ + LLVMValueRef lhs, rhs, undef, zeros, ones, result; + LLVMTypeRef ext_type; + + if (!(rhs = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type, + "rhs"))) { + goto fail; + } + + if (!(lhs = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type, + "lhs"))) { + goto fail; + } + + if (!(ext_type = LLVMVectorType(I32_TYPE, lane_width))) { + HANDLE_FAILURE("LLVMVectorType"); + goto fail; + } + + if (!(lhs = LLVMBuildZExt(comp_ctx->builder, lhs, ext_type, "left_ext"))) { + HANDLE_FAILURE("LLVMBuildZExt"); + goto fail; + } + + if (!(rhs = + LLVMBuildZExt(comp_ctx->builder, rhs, ext_type, "right_ext"))) { + HANDLE_FAILURE("LLVMBuildZExt"); + goto fail; + } + + if (!(undef = LLVMGetUndef(ext_type))) { + HANDLE_FAILURE("LLVMGetUndef"); + goto fail; + } + + if (!(zeros = LLVMConstNull(ext_type))) { + HANDLE_FAILURE("LLVMConstNull"); + goto fail; + } + + if (!(ones = LLVMConstInt(I32_TYPE, 1, true))) { + HANDLE_FAILURE("LLVMConstInt"); + goto fail; + } + + if (!(ones = LLVMBuildInsertElement(comp_ctx->builder, undef, ones, + I32_ZERO, "base_ones"))) { + HANDLE_FAILURE("LLVMBuildInsertElement"); + goto fail; + } + + if (!(ones = LLVMBuildShuffleVector(comp_ctx->builder, ones, undef, zeros, + "ones"))) { + HANDLE_FAILURE("LLVMBuildShuffleVector"); + goto fail; + } + + if (!(result = LLVMBuildAdd(comp_ctx->builder, lhs, rhs, "a_add_b"))) { + HANDLE_FAILURE("LLVMBuildAdd"); + goto fail; + } + + if (!(result = LLVMBuildAdd(comp_ctx->builder, result, ones, "plus_1"))) { + HANDLE_FAILURE("LLVMBuildAdd"); + goto fail; + } + + if (!(result = LLVMBuildLShr(comp_ctx->builder, result, ones, "avg"))) { + HANDLE_FAILURE("LLVMBuildLShr"); + goto fail; + } + + if (!(result = LLVMBuildTrunc(comp_ctx->builder, result, vector_type, + "avg_trunc"))) { + HANDLE_FAILURE("LLVMBuildTrunc"); + goto fail; + } + + if (!(result = LLVMBuildBitCast(comp_ctx->builder, result, V128_i64x2_TYPE, + "ret"))) { + HANDLE_FAILURE("LLVMBuildBitCast"); + goto fail; + } + + /* push result into the stack */ + PUSH_V128(result); + return true; +fail: + return false; +} +bool +aot_compile_simd_i8x16_avgr_u(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx) +{ + return simd_v128_avg(comp_ctx, func_ctx, V128_i8x16_TYPE, INT8_TYPE, 16); +} + +bool +aot_compile_simd_i16x8_avgr_u(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx) +{ + return simd_v128_avg(comp_ctx, func_ctx, V128_i16x8_TYPE, INT16_TYPE, 8); +} \ No newline at end of file diff --git a/core/iwasm/compilation/simd/simd_sat_int_arith.h b/core/iwasm/compilation/simd/simd_sat_int_arith.h new file mode 100644 index 000000000..57669878e --- /dev/null +++ b/core/iwasm/compilation/simd/simd_sat_int_arith.h @@ -0,0 +1,66 @@ +/* + * Copyright (C) 2019 Intel Corporation. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + */ + +#ifndef _SIMD_SAT_INT_ARITH_H_ +#define _SIMD_SAT_INT_ARITH_H_ + +#include "../aot_compiler.h" + +#ifdef __cplusplus +extern "C" { +#endif + +bool +aot_compile_simd_i8x16_saturate(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + V128Arithmetic arith_op, + bool is_signed); + +bool +aot_compile_simd_i16x8_saturate(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + V128Arithmetic arith_op, + bool is_signed); + +bool +aot_compile_simd_i8x16_cmp(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + V128Arithmetic arith_op, + bool is_signed); + +bool +aot_compile_simd_i16x8_cmp(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + V128Arithmetic arith_op, + bool is_signed); + +bool +aot_compile_simd_i32x4_cmp(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + V128Arithmetic arith_op, + bool is_signed); + +bool +aot_compile_simd_i8x16_abs(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx); + +bool +aot_compile_simd_i16x8_abs(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx); + +bool +aot_compile_simd_i32x4_abs(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx); + +bool +aot_compile_simd_i8x16_avgr_u(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx); + +bool +aot_compile_simd_i16x8_avgr_u(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx); + +#ifdef __cplusplus +} /* end of extern "C" */ +#endif + +#endif /* end of _SIMD_SAT_INT_ARITH_H_ */ diff --git a/core/iwasm/include/aot_export.h b/core/iwasm/include/aot_export.h index ca0ffb6a8..f2d339622 100644 --- a/core/iwasm/include/aot_export.h +++ b/core/iwasm/include/aot_export.h @@ -42,6 +42,7 @@ typedef struct AOTCompOption{ bool enable_bulk_memory; bool enable_thread_mgr; bool enable_tail_call; + bool enable_simd; bool is_sgx_platform; uint32_t opt_level; uint32_t size_level; diff --git a/core/iwasm/interpreter/wasm.h b/core/iwasm/interpreter/wasm.h index fea564260..040b33128 100644 --- a/core/iwasm/interpreter/wasm.h +++ b/core/iwasm/interpreter/wasm.h @@ -19,6 +19,7 @@ extern "C" { #define VALUE_TYPE_I64 0X7E #define VALUE_TYPE_F32 0x7D #define VALUE_TYPE_F64 0x7C +#define VALUE_TYPE_V128 0x7B #define VALUE_TYPE_VOID 0x40 /* Used by AOT */ #define VALUE_TYPE_I1 0x41 @@ -34,6 +35,7 @@ extern "C" { #define INIT_EXPR_TYPE_I64_CONST 0x42 #define INIT_EXPR_TYPE_F32_CONST 0x43 #define INIT_EXPR_TYPE_F64_CONST 0x44 +#define INIT_EXPR_TYPE_V128_CONST 0xFD #define INIT_EXPR_TYPE_GET_GLOBAL 0x23 #define INIT_EXPR_TYPE_ERROR 0xff @@ -79,6 +81,15 @@ typedef struct WASMModule WASMModule; typedef struct WASMFunction WASMFunction; typedef struct WASMGlobal WASMGlobal; +typedef union V128 { + int8 i8x16[16]; + int16 i16x8[8]; + int32 i32x8[4]; + int64 i64x2[2]; + float32 f32x4[4]; + float64 f64x2[2]; +} V128; + typedef union WASMValue { int32 i32; uint32 u32; @@ -87,6 +98,7 @@ typedef union WASMValue { float32 f32; float64 f64; uintptr_t addr; + V128 v128; } WASMValue; typedef struct InitializerExpression { @@ -98,6 +110,7 @@ typedef struct InitializerExpression { float32 f32; float64 f64; uint32 global_index; + V128 v128; } u; } InitializerExpression; @@ -448,6 +461,10 @@ wasm_value_type_size(uint8 value_type) case VALUE_TYPE_I64: case VALUE_TYPE_F64: return sizeof(int64); +#if WASM_ENABLE_SIMD != 0 + case VALUE_TYPE_V128: + return sizeof(int64) * 2; +#endif default: bh_assert(0); } @@ -465,6 +482,10 @@ wasm_value_type_cell_num(uint8 value_type) else if (value_type == VALUE_TYPE_I64 || value_type == VALUE_TYPE_F64) return 2; +#if WASM_ENABLE_SIMD != 0 + else if (value_type == VALUE_TYPE_V128) + return 4; +#endif else { bh_assert(0); } diff --git a/core/iwasm/interpreter/wasm_loader.c b/core/iwasm/interpreter/wasm_loader.c index f542f9deb..5d8b997e1 100644 --- a/core/iwasm/interpreter/wasm_loader.c +++ b/core/iwasm/interpreter/wasm_loader.c @@ -231,6 +231,23 @@ fail: res = (int32)res64; \ } while (0) +#if WASM_ENABLE_SIMD != 0 +#if (WASM_ENABLE_WAMR_COMPILER != 0) || (WASM_ENABLE_JIT != 0) +static V128 +read_i8x16(uint8 *p_buf, char* error_buf, uint32 error_buf_size) +{ + V128 result; + uint8 i; + + for (i = 0; i != 16; ++i) { + result.i8x16[i] = read_uint8(p_buf); + } + + return result; +} +#endif /* end of (WASM_ENABLE_WAMR_COMPILER != 0) || (WASM_ENABLE_JIT != 0) */ +#endif /* end of WASM_ENABLE_SIMD */ + static void * loader_malloc(uint64 size, char *error_buf, uint32 error_buf_size) { @@ -412,6 +429,29 @@ load_init_expr(const uint8 **p_buf, const uint8 *buf_end, for (i = 0; i < sizeof(float64); i++) *p_float++ = *p++; break; +#if WASM_ENABLE_SIMD != 0 +#if (WASM_ENABLE_WAMR_COMPILER != 0) || (WASM_ENABLE_JIT != 0) + case INIT_EXPR_TYPE_V128_CONST: + { + uint8 flag; + uint64 high, low; + + if (type != VALUE_TYPE_V128) + goto fail; + + flag = read_uint8(p); + (void)flag; + + CHECK_BUF(p, p_end, 16); + wasm_runtime_read_v128(p, &high, &low); + p += 16; + + init_expr->u.v128.i64x2[0] = high; + init_expr->u.v128.i64x2[1] = low; + break; + } +#endif /* end of (WASM_ENABLE_WAMR_COMPILER != 0) || (WASM_ENABLE_JIT != 0) */ +#endif /* end of WASM_ENABLE_SIMD */ /* get_global */ case INIT_EXPR_TYPE_GET_GLOBAL: read_leb_uint32(p, p_end, init_expr->u.global_index); @@ -1794,7 +1834,13 @@ load_function_section(const uint8 *buf, const uint8 *buf_end, CHECK_BUF(p_code, buf_code_end, 1); /* 0x7F/0x7E/0x7D/0x7C */ type = read_uint8(p_code); - if (type < VALUE_TYPE_F64 || type > VALUE_TYPE_I32) { + if ((type < VALUE_TYPE_F64 || type > VALUE_TYPE_I32) +#if WASM_ENABLE_SIMD != 0 +#if (WASM_ENABLE_WAMR_COMPILER != 0) || (WASM_ENABLE_JIT != 0) + && type != VALUE_TYPE_V128 +#endif +#endif + ) { set_error_buf(error_buf, error_buf_size, "invalid local type"); return false; @@ -2031,6 +2077,12 @@ load_export_section(const uint8 *buf, const uint8 *buf_end, WASMModule *module, "unknown function"); return false; } +#if WASM_ENABLE_SIMD != 0 +#if (WASM_ENABLE_WAMR_COMPILER != 0) || (WASM_ENABLE_JIT != 0) + /* TODO: check func type, if it has v128 param or result, + report error */ +#endif +#endif break; /*table index*/ case EXPORT_KIND_TABLE: @@ -3529,6 +3581,81 @@ wasm_loader_find_block_addr(BlockAddr *block_addr_cache, } break; } + +#if WASM_ENABLE_SIMD != 0 +#if (WASM_ENABLE_WAMR_COMPILER != 0) || (WASM_ENABLE_JIT != 0) + case WASM_OP_SIMD_PREFIX: + { + opcode = read_uint8(p); + if (SIMD_i8x16_eq <= opcode + && opcode <= SIMD_f32x4_convert_i32x4_u) { + break; + } + + switch (opcode) { + case SIMD_v128_load: + case SIMD_i16x8_load8x8_s: + case SIMD_i16x8_load8x8_u: + case SIMD_i32x4_load16x4_s: + case SIMD_i32x4_load16x4_u: + case SIMD_i64x2_load32x2_s: + case SIMD_i64x2_load32x2_u: + case SIMD_v8x16_load_splat: + case SIMD_v16x8_load_splat: + case SIMD_v32x4_load_splat: + case SIMD_v64x2_load_splat: + case SIMD_v128_store: + skip_leb_uint32(p, p_end); /* align */ + skip_leb_uint32(p, p_end); /* offset */ + break; + + case SIMD_v128_const: + case SIMD_v8x16_shuffle: + CHECK_BUF1(p, p_end, 16); + p += 16; + break; + + case SIMD_v8x16_swizzle: + case SIMD_i8x16_splat: + case SIMD_i16x8_splat: + case SIMD_i32x4_splat: + case SIMD_i64x2_splat: + case SIMD_f32x4_splat: + case SIMD_f64x2_splat: + break; + + case SIMD_i8x16_extract_lane_s: + case SIMD_i8x16_extract_lane_u: + case SIMD_i8x16_replace_lane: + case SIMD_i16x8_extract_lane_s: + case SIMD_i16x8_extract_lane_u: + case SIMD_i16x8_replace_lane: + case SIMD_i32x4_extract_lane: + case SIMD_i32x4_replace_lane: + case SIMD_i64x2_extract_lane: + case SIMD_i64x2_replace_lane: + case SIMD_f32x4_extract_lane: + case SIMD_f32x4_replace_lane: + case SIMD_f64x2_extract_lane: + case SIMD_f64x2_replace_lane: + CHECK_BUF(p, p_end, 1); + p++; + break; + + default: + LOG_WARNING("WASM loader find block addr failed: " + "invalid opcode fd 0x%02x.", opcode); + if (error_buf) + snprintf(error_buf, error_buf_size, + "WASM loader find block addr failed: " + "invalid opcode fd %02x.", opcode); + return false; + } + break; + } +#endif /* end of (WASM_ENABLE_WAMR_COMPILER != 0) || (WASM_ENABLE_JIT != 0) */ +#endif /* end of WASM_ENABLE_SIMD */ + #if WASM_ENABLE_SHARED_MEMORY != 0 case WASM_OP_ATOMIC_PREFIX: { @@ -3545,6 +3672,7 @@ wasm_loader_find_block_addr(BlockAddr *block_addr_cache, break; } #endif + default: set_error_buf_v(error_buf, error_buf_size, "%s %02x", @@ -3565,6 +3693,10 @@ fail: #define REF_I64_2 VALUE_TYPE_I64 #define REF_F64_1 VALUE_TYPE_F64 #define REF_F64_2 VALUE_TYPE_F64 +#define REF_V128_1 VALUE_TYPE_V128 +#define REF_V128_2 VALUE_TYPE_V128 +#define REF_V128_3 VALUE_TYPE_V128 +#define REF_V128_4 VALUE_TYPE_V128 #define REF_ANY VALUE_TYPE_ANY #if WASM_ENABLE_FAST_INTERP != 0 @@ -3775,12 +3907,18 @@ static bool check_stack_top_values(uint8 *frame_ref, int32 stack_cell_num, uint8 type, char *error_buf, uint32 error_buf_size) { - char *type_str[] = { "f64", "f32", "i64", "i32" }; + char *type_str[] = { "v128", "f64", "f32", "i64", "i32" }; if (((type == VALUE_TYPE_I32 || type == VALUE_TYPE_F32) && stack_cell_num < 1) || ((type == VALUE_TYPE_I64 || type == VALUE_TYPE_F64) - && stack_cell_num < 2)) { + && stack_cell_num < 2) +#if WASM_ENABLE_SIMD != 0 +#if (WASM_ENABLE_WAMR_COMPILER != 0) || (WASM_ENABLE_JIT != 0) + || (type == VALUE_TYPE_V128 && stack_cell_num < 4) +#endif +#endif + ) { set_error_buf(error_buf, error_buf_size, "type mismatch: expect data but stack was empty"); return false; @@ -3793,10 +3931,20 @@ check_stack_top_values(uint8 *frame_ref, int32 stack_cell_num, uint8 type, || *(frame_ref - 1) != REF_I64_2)) || (type == VALUE_TYPE_F64 && (*(frame_ref - 2) != REF_F64_1 - || *(frame_ref - 1) != REF_F64_2))) { + || *(frame_ref - 1) != REF_F64_2)) +#if WASM_ENABLE_SIMD != 0 +#if (WASM_ENABLE_WAMR_COMPILER != 0) || (WASM_ENABLE_JIT != 0) + || (type == VALUE_TYPE_V128 + && (*(frame_ref - 4) != REF_V128_1 + || *(frame_ref - 3) != REF_V128_2 + || *(frame_ref - 2) != REF_V128_3 + || *(frame_ref - 1) != REF_V128_4)) +#endif +#endif + ) { set_error_buf_v(error_buf, error_buf_size, "%s%s%s", "type mismatch: expect ", - type_str[type - VALUE_TYPE_F64], + type_str[type - VALUE_TYPE_V128], " but got other"); return false; } @@ -3922,6 +4070,23 @@ wasm_loader_push_frame_ref(WASMLoaderContext *ctx, uint8 type, ctx->stack_cell_num++; if (ctx->stack_cell_num > ctx->max_stack_cell_num) ctx->max_stack_cell_num = ctx->stack_cell_num; + +#if WASM_ENABLE_SIMD != 0 +#if (WASM_ENABLE_WAMR_COMPILER != 0) || (WASM_ENABLE_JIT != 0) + if (type == VALUE_TYPE_V128) { + if (!check_stack_push(ctx, error_buf, error_buf_size)) + return false; + *ctx->frame_ref++ = type; + ctx->stack_cell_num++; + if (!check_stack_push(ctx, error_buf, error_buf_size)) + return false; + *ctx->frame_ref++ = type; + ctx->stack_cell_num++; + if (ctx->stack_cell_num > ctx->max_stack_cell_num) + ctx->max_stack_cell_num = ctx->stack_cell_num; + } +#endif +#endif return true; } @@ -3954,6 +4119,15 @@ wasm_loader_pop_frame_ref(WASMLoaderContext *ctx, uint8 type, ctx->frame_ref--; ctx->stack_cell_num--; + +#if WASM_ENABLE_SIMD != 0 +#if (WASM_ENABLE_WAMR_COMPILER != 0) || (WASM_ENABLE_JIT != 0) + if (type == VALUE_TYPE_V128) { + ctx->frame_ref -= 2; + ctx->stack_cell_num -= 2; + } +#endif +#endif return true; } @@ -4713,6 +4887,13 @@ fail: goto fail; \ } while (0) +#define PUSH_V128() do { \ + if (!(wasm_loader_push_frame_ref_offset(loader_ctx, VALUE_TYPE_V128,\ + disable_emit, operand_offset,\ + error_buf, error_buf_size)))\ + goto fail; \ + } while (0) + #define POP_I32() do { \ if (!wasm_loader_pop_frame_ref_offset(loader_ctx, VALUE_TYPE_I32, \ error_buf, error_buf_size)) \ @@ -4737,6 +4918,12 @@ fail: goto fail; \ } while (0) +#define POP_V128() do { \ + if (!wasm_loader_pop_frame_ref_offset(loader_ctx, VALUE_TYPE_V128, \ + error_buf, error_buf_size)) \ + goto fail; \ + } while (0) + #define PUSH_OFFSET_TYPE(type) do { \ if (!(wasm_loader_push_frame_offset(loader_ctx, type, \ disable_emit, operand_offset, \ @@ -4793,6 +4980,12 @@ fail: goto fail; \ } while (0) +#define PUSH_V128() do { \ + if (!(wasm_loader_push_frame_ref(loader_ctx, VALUE_TYPE_V128, \ + error_buf, error_buf_size))) \ + goto fail; \ + } while (0) + #define POP_I32() do { \ if (!(wasm_loader_pop_frame_ref(loader_ctx, VALUE_TYPE_I32, \ error_buf, error_buf_size))) \ @@ -4817,6 +5010,12 @@ fail: goto fail; \ } while (0) +#define POP_V128() do { \ + if (!(wasm_loader_pop_frame_ref(loader_ctx, VALUE_TYPE_V128, \ + error_buf, error_buf_size))) \ + goto fail; \ + } while (0) + #define POP_AND_PUSH(type_pop, type_push) do { \ if (!(wasm_loader_push_pop_frame_ref(loader_ctx, 1, \ type_push, type_pop, \ @@ -5054,8 +5253,8 @@ check_memory_access_align(uint8 opcode, uint32 align, char *error_buf, uint32 error_buf_size) { uint8 mem_access_aligns[] = { - 2, 3, 2, 3, 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, /* loads */ - 2, 3, 2, 3, 0, 1, 0, 1, 2 /* stores */ + 2, 3, 2, 3, 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, /* loads */ + 2, 3, 2, 3, 0, 1, 0, 1, 2 /* stores */ }; bh_assert(opcode >= WASM_OP_I32_LOAD && opcode <= WASM_OP_I64_STORE32); @@ -5067,6 +5266,92 @@ check_memory_access_align(uint8 opcode, uint32 align, return true; } +#if WASM_ENABLE_SIMD != 0 +#if (WASM_ENABLE_WAMR_COMPILER != 0) || (WASM_ENABLE_JIT != 0) +static bool +check_simd_memory_access_align(uint8 opcode, uint32 align, + char *error_buf, uint32 error_buf_size) +{ + uint8 mem_access_aligns[] = { + 4, /* load */ + 3, 3, 3, 3, 3, 3, /* load and extend */ + 0, 1, 2, 3, /* load and splat */ + 4, /* store */ + }; + + bh_assert(opcode <= SIMD_v128_store); + + if (align > mem_access_aligns[opcode - SIMD_v128_load]) { + set_error_buf(error_buf, error_buf_size, + "alignment must not be larger than natural"); + return false; + } + + return true; +} + +static bool +check_simd_access_lane(uint8 opcode, uint8 lane, + char *error_buf, uint32 error_buf_size) +{ + switch (opcode) { + case SIMD_i8x16_extract_lane_s: + case SIMD_i8x16_extract_lane_u: + case SIMD_i8x16_replace_lane: + if (lane >= 16) { + goto fail; + } + break; + case SIMD_i16x8_extract_lane_s: + case SIMD_i16x8_extract_lane_u: + case SIMD_i16x8_replace_lane: + if (lane >= 8) { + goto fail; + } + break; + case SIMD_i32x4_extract_lane: + case SIMD_i32x4_replace_lane: + case SIMD_f32x4_extract_lane: + case SIMD_f32x4_replace_lane: + if (lane >= 4) { + goto fail; + } + break; + case SIMD_i64x2_extract_lane: + case SIMD_i64x2_replace_lane: + case SIMD_f64x2_extract_lane: + case SIMD_f64x2_replace_lane: + if (lane >= 2) { + goto fail; + } + break; + default: + goto fail; + } + + return true; +fail: + set_error_buf(error_buf, error_buf_size, "invalid lane index"); + return false; +} + +static bool +check_simd_shuffle_mask(V128 mask, + char *error_buf, + uint32 error_buf_size) +{ + uint8 i; + for (i = 0; i != 16; ++i) { + if (mask.i8x16[i] < 0 || mask.i8x16[i] >= 32) { + set_error_buf(error_buf, error_buf_size, "invalid lane index"); + return false; + } + } + return true; +} +#endif /* end of (WASM_ENABLE_WAMR_COMPILER != 0) || (WASM_ENABLE_JIT != 0) */ +#endif /* end of WASM_ENABLE_SIMD */ + #if WASM_ENABLE_SHARED_MEMORY != 0 static bool check_memory_align_equal(uint8 opcode, uint32 align, @@ -5104,6 +5389,7 @@ is_value_type(uint8 type) type == VALUE_TYPE_I64 || type == VALUE_TYPE_F32 || type == VALUE_TYPE_F64 || + type == VALUE_TYPE_V128 || type == VALUE_TYPE_VOID; } @@ -5892,7 +6178,7 @@ handle_op_block_and_loop: #if WASM_ENABLE_TAIL_CALL != 0 } else { - char *type_str[] = { "f64", "f32", "i64", "i32" }; + char *type_str[] = { "v128", "f64", "f32", "i64", "i32" }; uint8 type; if (func_type->result_count != func->func_type->result_count) { set_error_buf_v(error_buf, error_buf_size, @@ -5906,7 +6192,7 @@ handle_op_block_and_loop: if (func_type->types[func_type->param_count + i] != type) { set_error_buf_v(error_buf, error_buf_size, "%s%s%s", "type mismatch: expect ", - type_str[type - VALUE_TYPE_F64], + type_str[type - VALUE_TYPE_V128], " but got other"); goto fail; } @@ -5982,7 +6268,7 @@ handle_op_block_and_loop: #if WASM_ENABLE_TAIL_CALL != 0 } else { - char *type_str[] = { "f64", "f32", "i64", "i32" }; + char *type_str[] = { "v128", "f64", "f32", "i64", "i32" }; uint8 type; if (func_type->result_count != func->func_type->result_count) { set_error_buf_v(error_buf, error_buf_size, @@ -5996,7 +6282,7 @@ handle_op_block_and_loop: if (func_type->types[func_type->param_count + i] != type) { set_error_buf_v(error_buf, error_buf_size, "%s%s%s", "type mismatch: expect ", - type_str[type - VALUE_TYPE_F64], + type_str[type - VALUE_TYPE_V128], " but got other"); goto fail; } @@ -6037,7 +6323,8 @@ handle_op_block_and_loop: loader_ctx->dynamic_offset --; #endif } - else { + else if (*(loader_ctx->frame_ref - 1) == REF_I64_1 + || *(loader_ctx->frame_ref - 1) == REF_F64_1) { loader_ctx->frame_ref -= 2; loader_ctx->stack_cell_num -= 2; #if (WASM_ENABLE_FAST_INTERP == 0) || (WASM_ENABLE_JIT != 0) @@ -6051,6 +6338,10 @@ handle_op_block_and_loop: loader_ctx->dynamic_offset -= 2; #endif } + else { /* V128 */ + loader_ctx->frame_ref -= 4; + loader_ctx->stack_cell_num -= 4; + } } else { #if WASM_ENABLE_FAST_INTERP != 0 @@ -6889,6 +7180,376 @@ fail_data_cnt_sec_require: } break; } + +#if WASM_ENABLE_SIMD != 0 +#if (WASM_ENABLE_WAMR_COMPILER != 0) || (WASM_ENABLE_JIT != 0) + case WASM_OP_SIMD_PREFIX: + { + uint8 lane; + + opcode = read_uint8(p); + switch (opcode) { + case SIMD_v128_load: + case SIMD_i16x8_load8x8_s: + case SIMD_i16x8_load8x8_u: + case SIMD_i32x4_load16x4_s: + case SIMD_i32x4_load16x4_u: + case SIMD_i64x2_load32x2_s: + case SIMD_i64x2_load32x2_u: + case SIMD_v8x16_load_splat: + case SIMD_v16x8_load_splat: + case SIMD_v32x4_load_splat: + case SIMD_v64x2_load_splat: + { + CHECK_MEMORY(); + + read_leb_uint32(p, p_end, align); /* align */ + if (!check_simd_memory_access_align( + opcode, align, error_buf, error_buf_size)) { + goto fail; + } + + read_leb_uint32(p, p_end, mem_offset); /* offset */ + + /* pop(i32 %i), push(v128 *result) */ + POP_AND_PUSH(VALUE_TYPE_I32, VALUE_TYPE_V128); + break; + } + + case SIMD_v128_store: + { + CHECK_MEMORY(); + + read_leb_uint32(p, p_end, align); /* align */ + if (!check_simd_memory_access_align( + opcode, align, error_buf, error_buf_size)) { + goto fail; + } + + read_leb_uint32(p, p_end, mem_offset); /* offset */ + + /* pop(v128 %value) */ + POP_V128(); + /* pop(i32 %i) */ + POP_I32(); + break; + } + + case SIMD_v128_const: + CHECK_BUF1(p, p_end, 16); + p += 16; + PUSH_V128(); + break; + + case SIMD_v8x16_shuffle: + { + V128 mask; + + CHECK_BUF1(p, p_end, 16); + mask = read_i8x16(p, error_buf, error_buf_size); + p += 16; + if (!check_simd_shuffle_mask(mask, error_buf, + error_buf_size)) { + goto fail; + } + + POP2_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128); + break; + } + + case SIMD_v8x16_swizzle: + POP2_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128); + break; + + case SIMD_i8x16_splat: + case SIMD_i16x8_splat: + case SIMD_i32x4_splat: + POP_AND_PUSH(VALUE_TYPE_I32, VALUE_TYPE_V128); + break; + case SIMD_i64x2_splat: + POP_AND_PUSH(VALUE_TYPE_I64, VALUE_TYPE_V128); + break; + case SIMD_f32x4_splat: + POP_AND_PUSH(VALUE_TYPE_F32, VALUE_TYPE_V128); + break; + case SIMD_f64x2_splat: + POP_AND_PUSH(VALUE_TYPE_F64, VALUE_TYPE_V128); + break; + + case SIMD_i8x16_extract_lane_s: + case SIMD_i8x16_extract_lane_u: + case SIMD_i16x8_extract_lane_s: + case SIMD_i16x8_extract_lane_u: + case SIMD_i32x4_extract_lane: + CHECK_BUF(p, p_end, 1); + lane = read_uint8(p); + + if (!check_simd_access_lane(opcode, lane, error_buf, + error_buf_size)) { + goto fail; + } + + POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_I32); + break; + case SIMD_i64x2_extract_lane: + CHECK_BUF(p, p_end, 1); + lane = read_uint8(p); + + if (!check_simd_access_lane(opcode, lane, error_buf, + error_buf_size)) { + goto fail; + } + + POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_I64); + break; + case SIMD_f32x4_extract_lane: + CHECK_BUF(p, p_end, 1); + lane = read_uint8(p); + + if (!check_simd_access_lane(opcode, lane, error_buf, + error_buf_size)) { + goto fail; + } + + POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_F32); + break; + case SIMD_f64x2_extract_lane: + CHECK_BUF(p, p_end, 1); + lane = read_uint8(p); + + if (!check_simd_access_lane(opcode, lane, error_buf, + error_buf_size)) { + goto fail; + } + + POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_F64); + break; + case SIMD_i8x16_replace_lane: + case SIMD_i16x8_replace_lane: + case SIMD_i32x4_replace_lane: + CHECK_BUF(p, p_end, 1); + lane = read_uint8(p); + + if (!check_simd_access_lane(opcode, lane, error_buf, + error_buf_size)) { + goto fail; + } + + POP_I32(); + POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128); + break; + case SIMD_i64x2_replace_lane: + CHECK_BUF(p, p_end, 1); + lane = read_uint8(p); + + if (!check_simd_access_lane(opcode, lane, error_buf, + error_buf_size)) { + goto fail; + } + + POP_I64(); + POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128); + break; + case SIMD_f32x4_replace_lane: + CHECK_BUF(p, p_end, 1); + lane = read_uint8(p); + + if (!check_simd_access_lane(opcode, lane, error_buf, + error_buf_size)) { + goto fail; + } + + POP_F32(); + POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128); + break; + case SIMD_f64x2_replace_lane: + CHECK_BUF(p, p_end, 1); + lane = read_uint8(p); + + if (!check_simd_access_lane(opcode, lane, error_buf, + error_buf_size)) { + goto fail; + } + + POP_F64(); + POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128); + break; + case SIMD_i8x16_eq: + case SIMD_i8x16_ne: + case SIMD_i8x16_lt_s: + case SIMD_i8x16_lt_u: + case SIMD_i8x16_gt_s: + case SIMD_i8x16_gt_u: + case SIMD_i8x16_le_s: + case SIMD_i8x16_le_u: + case SIMD_i8x16_ge_s: + case SIMD_i8x16_ge_u: + case SIMD_i16x8_eq: + case SIMD_i16x8_ne: + case SIMD_i16x8_lt_s: + case SIMD_i16x8_lt_u: + case SIMD_i16x8_gt_s: + case SIMD_i16x8_gt_u: + case SIMD_i16x8_le_s: + case SIMD_i16x8_le_u: + case SIMD_i16x8_ge_s: + case SIMD_i16x8_ge_u: + case SIMD_i32x4_eq: + case SIMD_i32x4_ne: + case SIMD_i32x4_lt_s: + case SIMD_i32x4_lt_u: + case SIMD_i32x4_gt_s: + case SIMD_i32x4_gt_u: + case SIMD_i32x4_le_s: + case SIMD_i32x4_le_u: + case SIMD_i32x4_ge_s: + case SIMD_i32x4_ge_u: + case SIMD_f32x4_eq: + case SIMD_f32x4_ne: + case SIMD_f32x4_lt: + case SIMD_f32x4_gt: + case SIMD_f32x4_le: + case SIMD_f32x4_ge: + case SIMD_f64x2_eq: + case SIMD_f64x2_ne: + case SIMD_f64x2_lt: + case SIMD_f64x2_gt: + case SIMD_f64x2_le: + case SIMD_f64x2_ge: + POP2_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128); + break; + + case SIMD_v128_not: + case SIMD_i8x16_abs: + case SIMD_i8x16_neg: + case SIMD_i16x8_abs: + case SIMD_i16x8_neg: + case SIMD_i32x4_abs: + case SIMD_i32x4_neg: + case SIMD_i64x2_neg: + case SIMD_f32x4_abs: + case SIMD_f32x4_neg: + case SIMD_f32x4_sqrt: + case SIMD_f64x2_abs: + case SIMD_f64x2_neg: + case SIMD_f64x2_sqrt: + case SIMD_i16x8_widen_low_i8x16_s: + case SIMD_i16x8_widen_high_i8x16_s: + case SIMD_i16x8_widen_low_i8x16_u: + case SIMD_i16x8_widen_high_i8x16_u: + case SIMD_i32x4_widen_low_i16x8_s: + case SIMD_i32x4_widen_high_i16x8_s: + case SIMD_i32x4_widen_low_i16x8_u: + case SIMD_i32x4_widen_high_i16x8_u: + case SIMD_i32x4_trunc_sat_f32x4_s: + case SIMD_i32x4_trunc_sat_f32x4_u: + case SIMD_f32x4_convert_i32x4_s: + case SIMD_f32x4_convert_i32x4_u: + POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128); + break; + + case SIMD_v128_bitselect: + POP_V128(); + POP2_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128); + break; + + case SIMD_i8x16_any_true: + case SIMD_i8x16_all_true: + case SIMD_i8x16_bitmask: + case SIMD_i16x8_any_true: + case SIMD_i16x8_all_true: + case SIMD_i16x8_bitmask: + case SIMD_i32x4_any_true: + case SIMD_i32x4_all_true: + case SIMD_i32x4_bitmask: + POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_I32); + break; + + case SIMD_i8x16_shl: + case SIMD_i8x16_shr_s: + case SIMD_i8x16_shr_u: + case SIMD_i16x8_shl: + case SIMD_i16x8_shr_s: + case SIMD_i16x8_shr_u: + case SIMD_i32x4_shl: + case SIMD_i32x4_shr_s: + case SIMD_i32x4_shr_u: + case SIMD_i64x2_shl: + case SIMD_i64x2_shr_s: + case SIMD_i64x2_shr_u: + POP_I32(); + POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128); + break; + + case SIMD_i8x16_narrow_i16x8_s: + case SIMD_i8x16_narrow_i16x8_u: + case SIMD_i16x8_narrow_i32x4_s: + case SIMD_i16x8_narrow_i32x4_u: + case SIMD_v128_and: + case SIMD_v128_andnot: + case SIMD_v128_or: + case SIMD_v128_xor: + case SIMD_i8x16_add: + case SIMD_i8x16_add_saturate_s: + case SIMD_i8x16_add_saturate_u: + case SIMD_i8x16_sub: + case SIMD_i8x16_sub_saturate_s: + case SIMD_i8x16_sub_saturate_u: + case SIMD_i8x16_min_s: + case SIMD_i8x16_min_u: + case SIMD_i8x16_max_s: + case SIMD_i8x16_max_u: + case SIMD_i8x16_avgr_u: + case SIMD_i16x8_add: + case SIMD_i16x8_add_saturate_s: + case SIMD_i16x8_add_saturate_u: + case SIMD_i16x8_sub: + case SIMD_i16x8_sub_saturate_s: + case SIMD_i16x8_sub_saturate_u: + case SIMD_i16x8_mul: + case SIMD_i16x8_min_s: + case SIMD_i16x8_min_u: + case SIMD_i16x8_max_s: + case SIMD_i16x8_max_u: + case SIMD_i16x8_avgr_u: + case SIMD_i32x4_add: + case SIMD_i32x4_sub: + case SIMD_i32x4_mul: + case SIMD_i32x4_min_s: + case SIMD_i32x4_min_u: + case SIMD_i32x4_max_s: + case SIMD_i32x4_max_u: + case SIMD_i64x2_add: + case SIMD_i64x2_sub: + case SIMD_i64x2_mul: + case SIMD_f32x4_add: + case SIMD_f32x4_sub: + case SIMD_f32x4_mul: + case SIMD_f32x4_div: + case SIMD_f32x4_min: + case SIMD_f32x4_max: + case SIMD_f64x2_add: + case SIMD_f64x2_sub: + case SIMD_f64x2_mul: + case SIMD_f64x2_div: + case SIMD_f64x2_min: + case SIMD_f64x2_max: + POP2_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128); + break; + + default: + if (error_buf != NULL) { + snprintf(error_buf, error_buf_size, + "WASM module load failed: " + "invalid opcode 0xfd %02x.", opcode); + } + goto fail; + } + break; + } +#endif /* end of (WASM_ENABLE_WAMR_COMPILER != 0) || (WASM_ENABLE_JIT != 0) */ +#endif /* end of WASM_ENABLE_SIMD */ + #if WASM_ENABLE_SHARED_MEMORY != 0 case WASM_OP_ATOMIC_PREFIX: { @@ -7031,6 +7692,7 @@ fail_data_cnt_sec_require: break; } #endif /* end of WASM_ENABLE_SHARED_MEMORY */ + default: set_error_buf_v(error_buf, error_buf_size, "%s %02x", diff --git a/core/iwasm/interpreter/wasm_opcode.h b/core/iwasm/interpreter/wasm_opcode.h index 65c1dbf7e..650e9f25a 100644 --- a/core/iwasm/interpreter/wasm_opcode.h +++ b/core/iwasm/interpreter/wasm_opcode.h @@ -263,6 +263,7 @@ typedef enum WASMOpcode { /* Post-MVP extend op prefix */ WASM_OP_MISC_PREFIX = 0xfc, + WASM_OP_SIMD_PREFIX = 0xfd, WASM_OP_ATOMIC_PREFIX = 0xfe, } WASMOpcode; @@ -286,6 +287,220 @@ typedef enum WASMMiscEXTOpcode { #endif } WASMMiscEXTOpcode; +typedef enum WASMSimdEXTOpcode { + /* memory instruction */ + SIMD_v128_load = 0x00, + SIMD_i16x8_load8x8_s = 0x01, + SIMD_i16x8_load8x8_u = 0x02, + SIMD_i32x4_load16x4_s = 0x03, + SIMD_i32x4_load16x4_u = 0x04, + SIMD_i64x2_load32x2_s = 0x05, + SIMD_i64x2_load32x2_u = 0x06, + SIMD_v8x16_load_splat = 0x07, + SIMD_v16x8_load_splat = 0x08, + SIMD_v32x4_load_splat = 0x09, + SIMD_v64x2_load_splat = 0x0a, + SIMD_v128_store = 0x0b, + + /* basic operation */ + SIMD_v128_const = 0x0c, + SIMD_v8x16_shuffle = 0x0d, + SIMD_v8x16_swizzle = 0x0e, + + /* splat operation */ + SIMD_i8x16_splat = 0x0f, + SIMD_i16x8_splat = 0x10, + SIMD_i32x4_splat = 0x11, + SIMD_i64x2_splat = 0x12, + SIMD_f32x4_splat = 0x13, + SIMD_f64x2_splat = 0x14, + + /* lane operation */ + SIMD_i8x16_extract_lane_s = 0x15, + SIMD_i8x16_extract_lane_u = 0x16, + SIMD_i8x16_replace_lane = 0x17, + SIMD_i16x8_extract_lane_s = 0x18, + SIMD_i16x8_extract_lane_u = 0x19, + SIMD_i16x8_replace_lane = 0x1a, + SIMD_i32x4_extract_lane = 0x1b, + SIMD_i32x4_replace_lane = 0x1c, + SIMD_i64x2_extract_lane = 0x1d, + SIMD_i64x2_replace_lane = 0x1e, + SIMD_f32x4_extract_lane = 0x1f, + SIMD_f32x4_replace_lane = 0x20, + SIMD_f64x2_extract_lane = 0x21, + SIMD_f64x2_replace_lane = 0x22, + + /* i8x16 compare operation */ + SIMD_i8x16_eq = 0x23, + SIMD_i8x16_ne = 0x24, + SIMD_i8x16_lt_s = 0x25, + SIMD_i8x16_lt_u = 0x26, + SIMD_i8x16_gt_s = 0x27, + SIMD_i8x16_gt_u = 0x28, + SIMD_i8x16_le_s = 0x29, + SIMD_i8x16_le_u = 0x2a, + SIMD_i8x16_ge_s = 0x2b, + SIMD_i8x16_ge_u = 0x2c, + + /* i16x8 compare operation */ + SIMD_i16x8_eq = 0x2d, + SIMD_i16x8_ne = 0x2e, + SIMD_i16x8_lt_s = 0x2f, + SIMD_i16x8_lt_u = 0x30, + SIMD_i16x8_gt_s = 0x31, + SIMD_i16x8_gt_u = 0x32, + SIMD_i16x8_le_s = 0x33, + SIMD_i16x8_le_u = 0x34, + SIMD_i16x8_ge_s = 0x35, + SIMD_i16x8_ge_u = 0x36, + + /* i32x4 compare operation */ + SIMD_i32x4_eq = 0x37, + SIMD_i32x4_ne = 0x38, + SIMD_i32x4_lt_s = 0x39, + SIMD_i32x4_lt_u = 0x3a, + SIMD_i32x4_gt_s = 0x3b, + SIMD_i32x4_gt_u = 0x3c, + SIMD_i32x4_le_s = 0x3d, + SIMD_i32x4_le_u = 0x3e, + SIMD_i32x4_ge_s = 0x3f, + SIMD_i32x4_ge_u = 0x40, + + /* f32x4 compare operation */ + SIMD_f32x4_eq = 0x41, + SIMD_f32x4_ne = 0x42, + SIMD_f32x4_lt = 0x43, + SIMD_f32x4_gt = 0x44, + SIMD_f32x4_le = 0x45, + SIMD_f32x4_ge = 0x46, + + /* f64x2 compare operation */ + SIMD_f64x2_eq = 0x47, + SIMD_f64x2_ne = 0x48, + SIMD_f64x2_lt = 0x49, + SIMD_f64x2_gt = 0x4a, + SIMD_f64x2_le = 0x4b, + SIMD_f64x2_ge = 0x4c, + + /* v128 operation */ + SIMD_v128_not = 0x4d, + SIMD_v128_and = 0x4e, + SIMD_v128_andnot = 0x4f, + SIMD_v128_or = 0x50, + SIMD_v128_xor = 0x51, + SIMD_v128_bitselect = 0x52, + + /* i8x16 Operation */ + SIMD_i8x16_abs = 0x60, + SIMD_i8x16_neg = 0x61, + SIMD_i8x16_any_true = 0x62, + SIMD_i8x16_all_true = 0x63, + SIMD_i8x16_bitmask = 0x64, + SIMD_i8x16_narrow_i16x8_s = 0x65, + SIMD_i8x16_narrow_i16x8_u = 0x66, + SIMD_i8x16_shl = 0x6b, + SIMD_i8x16_shr_s = 0x6c, + SIMD_i8x16_shr_u = 0x6d, + SIMD_i8x16_add = 0x6e, + SIMD_i8x16_add_saturate_s = 0x6f, + SIMD_i8x16_add_saturate_u = 0x70, + SIMD_i8x16_sub = 0x71, + SIMD_i8x16_sub_saturate_s = 0x72, + SIMD_i8x16_sub_saturate_u = 0x73, + SIMD_i8x16_min_s = 0x76, + SIMD_i8x16_min_u = 0x77, + SIMD_i8x16_max_s = 0x78, + SIMD_i8x16_max_u = 0x79, + SIMD_i8x16_avgr_u = 0x7b, + + /* i16x8 operation */ + SIMD_i16x8_abs = 0x80, + SIMD_i16x8_neg = 0x81, + SIMD_i16x8_any_true = 0x82, + SIMD_i16x8_all_true = 0x83, + SIMD_i16x8_bitmask = 0x84, + SIMD_i16x8_narrow_i32x4_s = 0x85, + SIMD_i16x8_narrow_i32x4_u = 0x86, + SIMD_i16x8_widen_low_i8x16_s = 0x87, + SIMD_i16x8_widen_high_i8x16_s = 0x88, + SIMD_i16x8_widen_low_i8x16_u = 0x89, + SIMD_i16x8_widen_high_i8x16_u = 0x8a, + SIMD_i16x8_shl = 0x8b, + SIMD_i16x8_shr_s = 0x8c, + SIMD_i16x8_shr_u = 0x8d, + SIMD_i16x8_add = 0x8e, + SIMD_i16x8_add_saturate_s = 0x8f, + SIMD_i16x8_add_saturate_u = 0x90, + SIMD_i16x8_sub = 0x91, + SIMD_i16x8_sub_saturate_s = 0x92, + SIMD_i16x8_sub_saturate_u = 0x93, + SIMD_i16x8_mul = 0x95, + SIMD_i16x8_min_s = 0x96, + SIMD_i16x8_min_u = 0x97, + SIMD_i16x8_max_s = 0x98, + SIMD_i16x8_max_u = 0x99, + SIMD_i16x8_avgr_u = 0x9b, + + /* i32x4 operation */ + SIMD_i32x4_abs = 0xa0, + SIMD_i32x4_neg = 0xa1, + SIMD_i32x4_any_true = 0xa2, + SIMD_i32x4_all_true = 0xa3, + SIMD_i32x4_bitmask = 0xa4, + SIMD_i32x4_widen_low_i16x8_s = 0xa7, + SIMD_i32x4_widen_high_i16x8_s = 0xa8, + SIMD_i32x4_widen_low_i16x8_u = 0xa9, + SIMD_i32x4_widen_high_i16x8_u = 0xaa, + SIMD_i32x4_shl = 0xab, + SIMD_i32x4_shr_s = 0xac, + SIMD_i32x4_shr_u = 0xad, + SIMD_i32x4_add = 0xae, + SIMD_i32x4_sub = 0xb1, + SIMD_i32x4_mul = 0xb5, + SIMD_i32x4_min_s = 0xb6, + SIMD_i32x4_min_u = 0xb7, + SIMD_i32x4_max_s = 0xb8, + SIMD_i32x4_max_u = 0xb9, + + /* i64x2 operation */ + SIMD_i64x2_neg = 0xc1, + SIMD_i64x2_shl = 0xcb, + SIMD_i64x2_shr_s = 0xcc, + SIMD_i64x2_shr_u = 0xcd, + SIMD_i64x2_add = 0xce, + SIMD_i64x2_sub = 0xd1, + SIMD_i64x2_mul = 0xd5, + + /* f32x4 operation */ + SIMD_f32x4_abs = 0xe0, + SIMD_f32x4_neg = 0xe1, + SIMD_f32x4_sqrt = 0xe3, + SIMD_f32x4_add = 0xe4, + SIMD_f32x4_sub = 0xe5, + SIMD_f32x4_mul = 0xe6, + SIMD_f32x4_div = 0xe7, + SIMD_f32x4_min = 0xe8, + SIMD_f32x4_max = 0xe9, + + /* f64x2 operation */ + SIMD_f64x2_abs = 0xec, + SIMD_f64x2_neg = 0xed, + SIMD_f64x2_sqrt = 0xef, + SIMD_f64x2_add = 0xf0, + SIMD_f64x2_sub = 0xf1, + SIMD_f64x2_mul = 0xf2, + SIMD_f64x2_div = 0xf3, + SIMD_f64x2_min = 0xf4, + SIMD_f64x2_max = 0xf5, + + /* conversion operation */ + SIMD_i32x4_trunc_sat_f32x4_s = 0xf8, + SIMD_i32x4_trunc_sat_f32x4_u = 0xf9, + SIMD_f32x4_convert_i32x4_s = 0xfa, + SIMD_f32x4_convert_i32x4_u = 0xfb, +} WASMSimdEXTOpcode; + typedef enum WASMAtomicEXTOpcode { /* atomic wait and notify */ WASM_OP_ATOMIC_NOTIFY = 0x00, diff --git a/core/iwasm/libraries/libc-emcc/libc_emcc_wrapper.c b/core/iwasm/libraries/libc-emcc/libc_emcc_wrapper.c index 0e3652b39..0930fbc1a 100644 --- a/core/iwasm/libraries/libc-emcc/libc_emcc_wrapper.c +++ b/core/iwasm/libraries/libc-emcc/libc_emcc_wrapper.c @@ -23,6 +23,90 @@ #define REG_NATIVE_FUNC(func_name, signature) \ { #func_name, func_name##_wrapper, signature, NULL } +extern bool +wasm_runtime_call_indirect(wasm_exec_env_t exec_env, + uint32 element_idx, + uint32 argc, uint32 argv[]); + +static void +invoke_viiii_wrapper(wasm_exec_env_t exec_env, uint32 elem_idx, + int arg0, int arg1, int arg2, int arg3) +{ + uint32 argv[4]; + bool ret; + + argv[0] = arg0; + argv[1] = arg1; + argv[2] = arg2; + argv[3] = arg3; + ret = wasm_runtime_call_indirect(exec_env, elem_idx, 4, argv); + (void)ret; +} + +static void +invoke_viii_wrapper(wasm_exec_env_t exec_env, uint32 elem_idx, + int arg0, int arg1, int arg2) +{ + uint32 argv[4]; + bool ret; + + argv[0] = arg0; + argv[1] = arg1; + argv[2] = arg2; + ret = wasm_runtime_call_indirect(exec_env, elem_idx, 3, argv); + (void)ret; +} + +static void +invoke_vii_wrapper(wasm_exec_env_t exec_env, + uint32 elem_idx, int arg0, int arg1) +{ + uint32 argv[4]; + bool ret; + + argv[0] = arg0; + argv[1] = arg1; + ret = wasm_runtime_call_indirect(exec_env, elem_idx, 2, argv); + (void)ret; +} + +static void +invoke_vi_wrapper(wasm_exec_env_t exec_env, + uint32 elem_idx, int arg0) +{ + uint32 argv[4]; + bool ret; + + argv[0] = arg0; + ret = wasm_runtime_call_indirect(exec_env, elem_idx, 1, argv); + (void)ret; +} + +static int +invoke_iii_wrapper(wasm_exec_env_t exec_env, + uint32 elem_idx, int arg0, int arg1) +{ + uint32 argv[4]; + bool ret; + + argv[0] = arg0; + argv[1] = arg1; + ret = wasm_runtime_call_indirect(exec_env, elem_idx, 2, argv); + return ret ? argv[0] : 0; +} + +static int +invoke_ii_wrapper(wasm_exec_env_t exec_env, + uint32 elem_idx, int arg0) +{ + uint32 argv[4]; + bool ret; + + argv[0] = arg0; + ret = wasm_runtime_call_indirect(exec_env, elem_idx, 1, argv); + return ret ? argv[0] : 0; +} + struct timespec_emcc { int tv_sec; int tv_nsec; @@ -174,10 +258,111 @@ getentropy_wrapper(wasm_exec_env_t exec_env, void *buffer, uint32 length) return getentropy(buffer, length); } +#if !defined(BH_PLATFORM_LINUX_SGX) +static FILE *file_list[32] = { 0 }; + +static int +get_free_file_slot() +{ + unsigned int i; + + for (i = 0; i < sizeof(file_list) / sizeof(FILE *); i++) { + if (file_list[i] == NULL) + return (int)i; + } + return -1; +} + +static int +fopen_wrapper(wasm_exec_env_t exec_env, + const char *pathname, + const char *mode) +{ + FILE *file; + int file_id; + + if (pathname == NULL || mode == NULL) + return -1; + + if ((file_id = get_free_file_slot()) == -1) + return -1; + + file = fopen(pathname, mode); + file_list[file_id] = file; + return file_id + 1; +} + +static uint32 +fread_wrapper(wasm_exec_env_t exec_env, + void *ptr, uint32 size, uint32 nmemb, int file_id) +{ + FILE *file; + + file_id = file_id - 1; + if ((unsigned)file_id >= sizeof(file_list) / sizeof(FILE *)) { + return 0; + } + if ((file = file_list[file_id]) == NULL) { + return 0; + } + return (uint32)fread(ptr, size, nmemb, file); +} + +static uint32 +emcc_fwrite_wrapper(wasm_exec_env_t exec_env, + const void *ptr, uint32 size, uint32 nmemb, + int file_id) +{ + FILE *file; + + file_id = file_id - 1; + if ((unsigned)file_id >= sizeof(file_list) / sizeof(FILE *)) { + return 0; + } + if ((file = file_list[file_id]) == NULL) { + return 0; + } + return (uint32)fwrite(ptr, size, nmemb, file); +} + +static int +feof_wrapper(wasm_exec_env_t exec_env, int file_id) +{ + FILE *file; + + file_id = file_id - 1; + if ((unsigned)file_id >= sizeof(file_list) / sizeof(FILE *)) + return 1; + if ((file = file_list[file_id]) == NULL) + return 1; + return feof(file); +} + +static int +fclose_wrapper(wasm_exec_env_t exec_env, int file_id) +{ + FILE *file; + + file_id = file_id - 1; + if ((unsigned)file_id >= sizeof(file_list) / sizeof(FILE *)) + return -1; + if ((file = file_list[file_id]) == NULL) + return -1; + file_list[file_id] = NULL; + return fclose(file); +} +#endif /* end of BH_PLATFORM_LINUX_SGX */ + #define REG_NATIVE_FUNC(func_name, signature) \ { #func_name, func_name##_wrapper, signature, NULL } static NativeSymbol native_symbols_libc_emcc[] = { + REG_NATIVE_FUNC(invoke_viiii, "(iiiii)"), + REG_NATIVE_FUNC(invoke_viii, "(iiii)"), + REG_NATIVE_FUNC(invoke_vii, "(iii)"), + REG_NATIVE_FUNC(invoke_vi, "(ii)"), + REG_NATIVE_FUNC(invoke_iii, "(iii)i"), + REG_NATIVE_FUNC(invoke_ii, "(ii)i"), REG_NATIVE_FUNC(open, "($ii)i"), REG_NATIVE_FUNC(__sys_read, "(i*~)i"), REG_NATIVE_FUNC(__sys_stat64, "($*)i"), @@ -186,6 +371,13 @@ static NativeSymbol native_symbols_libc_emcc[] = { REG_NATIVE_FUNC(munmap, "(ii)i"), REG_NATIVE_FUNC(__munmap, "(ii)i"), REG_NATIVE_FUNC(getentropy, "(*~)i"), +#if !defined(BH_PLATFORM_LINUX_SGX) + REG_NATIVE_FUNC(fopen, "($$)i"), + REG_NATIVE_FUNC(fread, "(*iii)i"), + REG_NATIVE_FUNC(emcc_fwrite, "(*iii)i"), + REG_NATIVE_FUNC(feof, "(i)i"), + REG_NATIVE_FUNC(fclose, "(i)i"), +#endif /* end of BH_PLATFORM_LINUX_SGX */ }; uint32 diff --git a/doc/build_wamr.md b/doc/build_wamr.md index 8872f2dab..57a65a738 100644 --- a/doc/build_wamr.md +++ b/doc/build_wamr.md @@ -86,6 +86,10 @@ Currently we only profile the memory consumption of module, module_instance and #### **Enable tail call feature** - **WAMR_BUILD_TAIL_CALL**=1/0, default to disable if not set +#### **Enable 128-bit SIMD feature** +- **WAMR_BUILD_SIMD**=1/0, default to disable if not set +> Note: only supported in AOT mode, and the *--enable-simd* flag should be added for wamrc when generating aot file. + **Combination of configurations:** We can combine the configurations. For example, if we want to disable interpreter, enable AOT and WASI, we can run command: diff --git a/product-mini/platforms/linux/CMakeLists.txt b/product-mini/platforms/linux/CMakeLists.txt index 6b1b6e753..63b49c049 100644 --- a/product-mini/platforms/linux/CMakeLists.txt +++ b/product-mini/platforms/linux/CMakeLists.txt @@ -75,6 +75,11 @@ if (NOT DEFINED WAMR_BUILD_MINI_LOADER) set (WAMR_BUILD_MINI_LOADER 0) endif () +if (NOT DEFINED WAMR_BUILD_SIMD) + # Disable SIMD by default + set (WAMR_BUILD_SIMD 0) +endif () + if (COLLECT_CODE_COVERAGE EQUAL 1) set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fprofile-arcs -ftest-coverage") endif () diff --git a/samples/basic/build.sh b/samples/basic/build.sh index 7e3442c5a..cb195efd7 100755 --- a/samples/basic/build.sh +++ b/samples/basic/build.sh @@ -1,3 +1,8 @@ +# +# Copyright (C) 2019 Intel Corporation. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# + #!/bin/bash CURR_DIR=$PWD diff --git a/samples/gui/build.sh b/samples/gui/build.sh index a57aa48e8..f910f450b 100755 --- a/samples/gui/build.sh +++ b/samples/gui/build.sh @@ -1,3 +1,8 @@ +# +# Copyright (C) 2019 Intel Corporation. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# + #!/bin/bash PROJECT_DIR=$PWD diff --git a/samples/littlevgl/build.sh b/samples/littlevgl/build.sh index 9e12fe1c3..64e8f9275 100755 --- a/samples/littlevgl/build.sh +++ b/samples/littlevgl/build.sh @@ -1,3 +1,8 @@ +# +# Copyright (C) 2019 Intel Corporation. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# + #!/bin/bash PROJECT_DIR=$PWD diff --git a/samples/simple/build.sh b/samples/simple/build.sh index 51ec8bc28..ef67aea5d 100755 --- a/samples/simple/build.sh +++ b/samples/simple/build.sh @@ -1,3 +1,8 @@ +# +# Copyright (C) 2019 Intel Corporation. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# + #!/bin/bash CURR_DIR=$PWD diff --git a/samples/workload/README.md b/samples/workload/README.md new file mode 100644 index 000000000..f32fb4b9c --- /dev/null +++ b/samples/workload/README.md @@ -0,0 +1,34 @@ +All workloads have similar a requirment of software dependencies. It includes +**wasi-sdk**, **clang-11**, **emsdk**, **wabt** and **binaryen** + +> It might slightly different when using MacOS, and other linux distro than Ubuntu. This document only target +Ubuntu 18.04 as an example. + +## Installation instructions + +- **wasi-sdk**. Install + [latest release](https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-11/wasi-sdk-11.0-linux.tar.gz) + in */opt/wasi-sdk* or */opt/wasi-sdk-11* + +- **wabt**. Install + [latest release](https://github.com/WebAssembly/wabt/releases/download/${WABT_VER}/wabt-1.0.19-ubuntu.tar.gz) + in */opt/wabt* or */opt/wabt-1.0.19* + +- **clang-11**. Refer to [the guide](https://apt.llvm.org/). + +- **emsdk**. Refer to [the guide](https://emscripten.org/docs/getting_started/downloads.html). Don't forget to activate + emsdk and set up environment variables. Verify it with `echo ${EMSDK}`. + +- **libclang_rt.builtins-wasm32.a**. *wasi* has its private rt library. Put it under clang search path + +``` shell +# copy it +$ cp -r /opt/wasi-sdk-11.0/lib/clang/10.0.0/lib/wasi /usr/lib/llvm-11/lib/clang/11.0.0/lib/ + +# or just link it +$ ln -sf /opt/wasi-sdk-11.0/lib/clang/10.0.0/lib/wasi/ /usr/lib/llvm-11/lib/clang/11.0.0/lib/ +``` + +- **binaryen**. Install + [latest release](https://github.com/WebAssembly/binaryen/releases/download/version_97/binaryen-version_97-x86_64-linux.tar.gz) + in */opt/binaryen* or */opt/binaryen-version_97* diff --git a/samples/workload/bwa/.gitignore b/samples/workload/bwa/.gitignore new file mode 100644 index 000000000..cd7209590 --- /dev/null +++ b/samples/workload/bwa/.gitignore @@ -0,0 +1,4 @@ +build +libz +bwa +include diff --git a/samples/workload/bwa/CMakeLists.bwa_wasm.txt b/samples/workload/bwa/CMakeLists.bwa_wasm.txt new file mode 100644 index 000000000..8cda6b694 --- /dev/null +++ b/samples/workload/bwa/CMakeLists.bwa_wasm.txt @@ -0,0 +1,134 @@ +# Copyright (C) 2019 Intel Corporation. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +cmake_minimum_required (VERSION 3.0) + +project(bwa_wasm C) + +################ LIBZ ################ +set(LIBZ_SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../libz) +add_library(z_wasm STATIC + ${LIBZ_SRC_DIR}/adler32.c + ${LIBZ_SRC_DIR}/compress.c + ${LIBZ_SRC_DIR}/crc32.c + ${LIBZ_SRC_DIR}/deflate.c + ${LIBZ_SRC_DIR}/gzclose.c + ${LIBZ_SRC_DIR}/gzlib.c + ${LIBZ_SRC_DIR}/gzread.c + ${LIBZ_SRC_DIR}/gzwrite.c + ${LIBZ_SRC_DIR}/infback.c + ${LIBZ_SRC_DIR}/inffast.c + ${LIBZ_SRC_DIR}/inflate.c + ${LIBZ_SRC_DIR}/inftrees.c + ${LIBZ_SRC_DIR}/trees.c + ${LIBZ_SRC_DIR}/uncompr.c + ${LIBZ_SRC_DIR}/zutil.c +) + +set_target_properties(z_wasm PROPERTIES LINKER_LANGUAGE C) + +target_compile_definitions(z_wasm PRIVATE Z_HAVE_UNISTD_H _LARGEFILE64_SOURCE=1) + +target_compile_options(z_wasm + PRIVATE + -Wno-unused-function + -Wno-unused-variable +) + +target_include_directories(z_wasm + PUBLIC + ${LIBZ_SRC_DIR} +) + +################ BWA_WASM ################ +set(BWA_SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}) +set(BWA_SOURCE + ${BWA_SRC_DIR}/utils.c + ${BWA_SRC_DIR}/kthread.c + ${BWA_SRC_DIR}/kstring.c + ${BWA_SRC_DIR}/ksw.c + ${BWA_SRC_DIR}/bwt.c + ${BWA_SRC_DIR}/bntseq.c + ${BWA_SRC_DIR}/bwa.c + ${BWA_SRC_DIR}/bwamem.c + ${BWA_SRC_DIR}/bwamem_pair.c + ${BWA_SRC_DIR}/bwamem_extra.c + ${BWA_SRC_DIR}/malloc_wrap.c + ${BWA_SRC_DIR}/QSufSort.c + ${BWA_SRC_DIR}/bwt_gen.c + ${BWA_SRC_DIR}/rope.c + ${BWA_SRC_DIR}/rle.c + ${BWA_SRC_DIR}/is.c + ${BWA_SRC_DIR}/bwtindex.c + ${BWA_SRC_DIR}/bwashm.c + ${BWA_SRC_DIR}/bwase.c + ${BWA_SRC_DIR}/bwaseqio.c + ${BWA_SRC_DIR}/bwtgap.c + ${BWA_SRC_DIR}/bwtaln.c + ${BWA_SRC_DIR}/bamlite.c + ${BWA_SRC_DIR}/bwape.c + ${BWA_SRC_DIR}/kopen.c + ${BWA_SRC_DIR}/pemerge.c + ${BWA_SRC_DIR}/maxk.c + ${BWA_SRC_DIR}/bwtsw2_core.c + ${BWA_SRC_DIR}/bwtsw2_main.c + ${BWA_SRC_DIR}/bwtsw2_aux.c + ${BWA_SRC_DIR}/bwt_lite.c + ${BWA_SRC_DIR}/bwtsw2_chain.c + ${BWA_SRC_DIR}/fastmap.c + ${BWA_SRC_DIR}/bwtsw2_pair.c + ${BWA_SRC_DIR}/main.c +) + +add_executable(${PROJECT_NAME} ${BWA_SOURCE}) + +set_target_properties(${PROJECT_NAME} PROPERTIES OUTPUT_NAME bwa.wasm) + +target_include_directories(${PROJECT_NAME} + PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/../include + ${CMAKE_CURRENT_SOURCE_DIR}/../include/SSE + ${CMAKE_CURRENT_SOURCE_DIR}/../include/pthread +) + +target_compile_definitions(${PROJECT_NAME} + PRIVATE + USE_MALLOC_WRAPPERS + __SSE__ __SSE2__ __SSE4_1__ + _WASI_EMULATED_MMAN _WASI_EMULATED_SIGNAL +) + +target_compile_options(${PROJECT_NAME} + PRIVATE + -Wno-unused-function + -Wno-unused-variable +) + +target_link_options(${PROJECT_NAME} + PRIVATE + -Wno-unused-command-line-argument + LINKER:--allow-undefined,--export=__heap_base,--export=__data_end + LINKER:-z,stack-size=1048576 +) + +target_link_libraries(${PROJECT_NAME} z_wasm) + +find_program(WASM_OPT + NAMES wasm-opt + PATHS /opt/binaryen-version_97/bin /opt/binaryen/bin +) + +if (NOT WASM_OPT) + message(WARNING "can not find wasm-opt and will not optimize any wasm module") +endif() + +add_custom_target(bwa_wasm_opt ALL + COMMAND + ${WASM_OPT} -Oz --enable-simd -o bwa.opt.wasm bwa.wasm + BYPRODUCTS + ${CMAKE_CURRENT_BINARY_DIR}/bwa.opt.wasm + WORKING_DIRECTORY + ${CMAKE_CURRENT_BINARY_DIR} +) + +add_dependencies(bwa_wasm_opt ${PROJECT_NAME}) diff --git a/samples/workload/bwa/CMakeLists.txt b/samples/workload/bwa/CMakeLists.txt new file mode 100644 index 000000000..6e785a149 --- /dev/null +++ b/samples/workload/bwa/CMakeLists.txt @@ -0,0 +1,91 @@ +# Copyright (C) 2019 Intel Corporation. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +cmake_minimum_required (VERSION 3.0) + +project(bwa_wasm) + +################ EMCC ################ +if(NOT DEFINED ENV{EMSDK}) + message(FATAL_ERROR + "can not find emsdk. " + "please refer to https://emscripten.org/docs/getting_started/downloads.html " + "and install it, " + "or active emsdk by 'source ./emsdk_env.sh'" + ) +endif() + +################ BINARYEN ################ +find_program(WASM_OPT + NAMES wasm-opt + PATHS /opt/binaryen-version_97/bin /opt/binaryen/bin +) + +if (NOT WASM_OPT) + message(FATAL_ERROR + "can not find wasm-opt. " + "please download it from " + "https://github.com/WebAssembly/binaryen/releases/download/version_97/binaryen-version_97-x86_64-linux.tar.gz " + "and install it under /opt" + ) +endif() + +####################################### +include(ExternalProject) + +################ HEADERS ################ +ExternalProject_Add(headers_from_emcc + PREFIX headers + SOURCE_DIR "$ENV{EMSDK}/upstream/emscripten/system/include/SSE" + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + INSTALL_COMMAND mkdir -p ${CMAKE_CURRENT_SOURCE_DIR}/include/SSE + && ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_SOURCE_DIR}/include/pthread/sys + && ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_SOURCE_DIR}/include/emscripten + # copy emscripten SSE header files + && ${CMAKE_COMMAND} -E copy $ENV{EMSDK}/upstream/emscripten/system/include/SSE/immintrin.h ${CMAKE_CURRENT_SOURCE_DIR}/include/SSE/ + # SSE + && ${CMAKE_COMMAND} -E copy $ENV{EMSDK}/upstream/emscripten/system/include/SSE/xmmintrin.h ${CMAKE_CURRENT_SOURCE_DIR}/include/SSE/ + # SSE2 + && ${CMAKE_COMMAND} -E copy $ENV{EMSDK}/upstream/emscripten/system/include/SSE/emmintrin.h ${CMAKE_CURRENT_SOURCE_DIR}/include/SSE/ + # SSE4.1 + && ${CMAKE_COMMAND} -E copy $ENV{EMSDK}/upstream/emscripten/system/include/SSE/smmintrin.h ${CMAKE_CURRENT_SOURCE_DIR}/include/SSE/ + # a fake empty header to aovid further depenency + && ${CMAKE_COMMAND} -E touch ${CMAKE_CURRENT_SOURCE_DIR}/include/emscripten/emscripten.h + # copy emscripten pthread related header files + && ${CMAKE_COMMAND} -E copy $ENV{EMSDK}/upstream/emscripten/system/include/libc/pthread.h ${CMAKE_CURRENT_SOURCE_DIR}/include/pthread/ + && ${CMAKE_COMMAND} -E copy $ENV{EMSDK}/upstream/emscripten/system/include/libc/signal.h ${CMAKE_CURRENT_SOURCE_DIR}/include/pthread/ + && ${CMAKE_COMMAND} -E copy $ENV{EMSDK}/upstream/emscripten/system/include/libc/netdb.h ${CMAKE_CURRENT_SOURCE_DIR}/include/pthread/ + && ${CMAKE_COMMAND} -E copy $ENV{EMSDK}/upstream/emscripten/system/include/libc/sys/wait.h ${CMAKE_CURRENT_SOURCE_DIR}/include/pthread/sys/ + && ${CMAKE_COMMAND} -E copy $ENV{EMSDK}/upstream/emscripten/system/include/libc/sys/socket.h ${CMAKE_CURRENT_SOURCE_DIR}/include/pthread/sys/ +) + +################ libz ################ +ExternalProject_Add(libz_src + PREFIX libz + GIT_REPOSITORY https://github.com/madler/zlib.git + GIT_TAG master + GIT_PROGRESS ON + GIT_SHALLOW ON + SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/libz + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + INSTALL_COMMAND "" +) + +################ bwa ################ +ExternalProject_Add(bwa + PREFIX bwa + GIT_REPOSITORY https://github.com/lh3/bwa.git + GIT_TAG master + GIT_PROGRESS ON + GIT_SHALLOW ON + SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/bwa + DEPENDS libz_src headers_from_emcc + UPDATE_COMMAND git clean -fd && git checkout -- * + && ${CMAKE_COMMAND} -E echo "Copying pre-installed CMakeLists.txt" + && ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.bwa_wasm.txt CMakeLists.txt + CONFIGURE_COMMAND ${CMAKE_COMMAND} -DCMAKE_TOOLCHAIN_FILE=${CMAKE_CURRENT_SOURCE_DIR}/../cmake/toolchain.cmake ${CMAKE_CURRENT_SOURCE_DIR}/bwa + BUILD_COMMAND make bwa_wasm_opt + INSTALL_COMMAND ${CMAKE_COMMAND} -E copy ./bwa.opt.wasm ${CMAKE_CURRENT_SOURCE_DIR}/build/bwa.wasm +) diff --git a/samples/workload/bwa/README.md b/samples/workload/bwa/README.md new file mode 100644 index 000000000..f9688f6ca --- /dev/null +++ b/samples/workload/bwa/README.md @@ -0,0 +1,47 @@ +"bwa" sample introduction +============== + +This sample demonstrates how to build [bwa](https://github.com/lh3/bwa) into +WebAssembly with simd support and run it with iwasm. + +## Preparation + +please refer to [installation instructions](../README.md). + +## Build + +``` shell +$ mkdir build && cd build +$ cmake .. +$ make +# to verify +$ ls bwa.wasm +``` + +## Download sample data + +Download the bwa-0.7.15 binary package from +[such an address](https://sourceforge.net/projects/bio-bwa/files/bwakit/bwakit-0.7.15_x64-linux.tar.bz2/download), +a sample data file named **hs38DH.fa** will be used later. + +If want more data, please refer to http://hgdownload.cse.ucsc.edu/goldenpath/hg19/bigZips/ + +## Run workload + +Firstly please build iwasm with simd support: + +``` shell +$ cd /product-mini/platforms/linux/ +$ mkdir build && cd build +$ cmake .. -DWAMR_BUILD_SIMD=1 +$ make +``` + +Then compile wasm file to aot file and run: + +``` shell +$ cd /wamr-compiler/build +$ ./wamrc --enable-simd -o bwa.aot ./bwa.wasm +$ cd /product-mini/platforms/linux/ +$ ./iwasm --dir=. ./bwa.aot index hs38DH.fa +``` diff --git a/samples/workload/cmake/toolchain.cmake b/samples/workload/cmake/toolchain.cmake new file mode 100644 index 000000000..4b9ae8fd7 --- /dev/null +++ b/samples/workload/cmake/toolchain.cmake @@ -0,0 +1,100 @@ +# Copyright (C) 2019 Intel Corporation. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +cmake_minimum_required (VERSION 3.0) + +if(DEFINED _WAMR_TOOLCHAIN_CMAKE_) + return() +else() + set(_WAMR_TOOLCHAIN_CMAKE_ 1) +endif() + +SET(CMAKE_SYSTEM_NAME Linux) + +################ COMPILER ################ +find_program(CLANG_11 NAMES clang clang-11 REQUIRED) +find_program(CLANG++_11 NAMES clang++ clang++-11 REQUIRED) + +if(NOT CLANG_11) + message(FATAL_ERROR "clang not found") +else() + message(STATUS "use ${CLANG_11} as the c compiler") +endif() + +if(NOT CLANG++_11) + message(FATAL_ERROR "clang++ not found") +else() + message(STATUS "use ${CLANG++_11} as the c++ compiler") +endif() + +set(CMAKE_C_COMPILER "${CLANG_11}" CACHE STRING "C compiler" FORCE) +set(CMAKE_C_COMPILER_ID Clang CACHE STRING "C compiler ID" FORCE) + +set(CMAKE_CXX_COMPILER "${CLANG++_11}" CACHE STRING "C++ compiler" FORCE) +set(CMAKE_CXX_COMPILER_ID Clang CACHE STRING "C++ compiler ID" FORCE) + +################ WASI AS SYSROOT ################ +find_path(WASI_SYSROOT + wasi-sysroot + PATHS /opt/wasi-sdk-11.0/share /opt/wasi-sdk/share + REQUIRED +) + +if(NOT WASI_SYSROOT) + message(FATAL_ERROR + "can not find wasi sysroot. " + "please download it from " + "https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-11/wasi-sdk-11.0-linux.tar.gz " + "and install it under /opt" + ) +endif() + +set(CMAKE_SYSROOT ${WASI_SYSROOT}/wasi-sysroot CACHE STRING "--sysroot to compiler" FORCE) + +add_compile_options( + --target=wasm32-wasi + -msimd128 + $,-O0,-O3> + $<$:-g> + $<$:-v> +) + +################ AR ################ +find_program(LLVM_AR NAMES llvm-ar llvm-ar-11 REQUIRED) + +if(NOT LLVM_AR) + message(FATAL_ERROR "llvm-ar not found") +else() + message(STATUS "use ${LLVM_AR} as the AR") +endif() + +set(CMAKE_AR "${LLVM_AR}" CACHE STRING "AR" FORCE) + +################ RANLIB ################ +find_program(LLVM_RANLIB NAMES llvm-ranlib llvm-ranlib-11 REQUIRED) + +if(NOT LLVM_RANLIB) + message(FATAL_ERROR "llvm-ranlib not found") +else() + message(STATUS "use ${LLVM_RANLIB} as the ranlib") +endif() + +set(CMAKE_RANLIB "${LLVM_RANLIB}" CACHE STRING "RANLIB" FORCE) + +################ LD ################ +find_program(WASM_LD NAMES wasm-ld wasm-ld-11 REQUIRED) + +if(NOT WASM_LD) + message(FATAL_ERROR "wasm-ld not found") +else() + message(STATUS "use ${WASM_LD} as the linker") +endif() + +add_link_options( + --target=wasm32-wasi + -fuse-ld=${WASM_LD} + LINKER:--allow-undefined + $,-O0,-O3> + $<$:-g> + $<$:-v> +) diff --git a/samples/workload/docker/.gitignore b/samples/workload/docker/.gitignore new file mode 100644 index 000000000..9db0a2807 --- /dev/null +++ b/samples/workload/docker/.gitignore @@ -0,0 +1 @@ +build_scripts diff --git a/samples/workload/docker/Dockerfile b/samples/workload/docker/Dockerfile new file mode 100644 index 000000000..56f3adb22 --- /dev/null +++ b/samples/workload/docker/Dockerfile @@ -0,0 +1,77 @@ +FROM ubuntu:18.04 as builder + +# +# install clang and llvm +COPY llvm.sh /tmp +RUN apt update \ + && apt install -y lsb-release wget software-properties-common build-essential \ + && cd /tmp \ + && chmod a+x llvm.sh \ + && ./llvm.sh 11 + +ARG WASI_SDK_VER=11.0 +ARG WABT_VER=1.0.19 +ARG CMAKE_VER=3.16.2 +ARG BINARYEN_VER=version_97 + +# +# install wasi-sdk +ARG WASI_SDK_FILE="wasi-sdk-${WASI_SDK_VER}-linux.tar.gz" +COPY ${WASI_SDK_FILE} /opt +RUN cd /opt \ + && tar zxf ${WASI_SDK_FILE} \ + && rm ${WASI_SDK_FILE} \ + && ln -sf /opt/wasi-sdk-${WASI_SDK_VER} /opt/wasi-sdk \ + && ln -sf /opt/wasi-sdk/lib/clang/10.0.0/lib/wasi/ /usr/lib/llvm-11/lib/clang/11.0.0/lib/ + +# +# install wabt +ARG WABT_FILE="wabt-${WABT_VER}-ubuntu.tar.gz" +COPY ${WABT_FILE} /opt +RUN cd /opt \ + && tar zxf ${WABT_FILE} \ + && rm ${WABT_FILE} \ + && ln -sf /opt/wabt-${WABT_VER} /opt/wabt + +# +# install cmake +ARG CMAKE_FILE="cmake-${CMAKE_VER}-Linux-x86_64.sh" +COPY ${CMAKE_FILE} /tmp +RUN cd /tmp \ + && chmod a+x ${CMAKE_FILE} \ + && mkdir /opt/cmake \ + && ./${CMAKE_FILE} --prefix=/opt/cmake --skip-license \ + && ln -sf /opt/cmake/bin/cmake /usr/local/bin/cmake + +# +# install tools +RUN apt install -y git tree + +# +# install emsdk +RUN cd /opt \ + && git clone https://github.com/emscripten-core/emsdk.git \ + && cd emsdk \ + && git pull \ + && ./emsdk install latest \ + && ./emsdk activate latest \ + && echo "source /opt/emsdk/emsdk_env.sh" >> /root/.bashrc + +# +# install binaryen +ARG BINARYEN_FILE="binaryen-${BINARYEN_VER}-x86_64-linux.tar.gz" +COPY ${BINARYEN_FILE} /opt +RUN cd /opt \ + && tar zxf ${BINARYEN_FILE} \ + && rm ${BINARYEN_FILE} \ + && ln -sf /opt/binaryen-${BINARYEN_VER} /opt/binaryen + +# +# Clean up +RUN apt-get autoremove -y \ + && apt-get clean -y \ + && rm -rf /var/lib/apt/lists/* \ + && rm -rf /tmp/* + +VOLUME /data +WORKDIR /data diff --git a/samples/workload/docker/build.sh b/samples/workload/docker/build.sh new file mode 100755 index 000000000..c73c5bceb --- /dev/null +++ b/samples/workload/docker/build.sh @@ -0,0 +1,48 @@ +# +# Copyright (C) 2019 Intel Corporation. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# + +#!/bin/bash + +if [[ ! -d build_scripts ]]; then + mkdir build_scripts +fi + +WASI_SDK_VER=11.0 +WABT_VER=1.0.19 +CMAKE_VER=3.16.2 +BINARYEN_VER=version_97 + +cd build_scripts +if [[ ! -f wasi-sdk-${WASI_SDK_VER}-linux.tar.gz ]]; then + wget https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-11/wasi-sdk-${WASI_SDK_VER}-linux.tar.gz +fi + +if [[ ! -f wabt-${WABT_VER}-ubuntu.tar.gz ]]; then + wget https://github.com/WebAssembly/wabt/releases/download/${WABT_VER}/wabt-${WABT_VER}-ubuntu.tar.gz +fi + +if [[ ! -f llvm.sh ]]; then + wget https://apt.llvm.org/llvm.sh +fi + +if [[ ! -f cmake-${CMAKE_VER}-Linux-x86_64.sh ]]; then + wget https://github.com/Kitware/CMake/releases/download/v${CMAKE_VER}/cmake-${CMAKE_VER}-Linux-x86_64.sh +fi + +if [[ ! -f binaryen-${BINARYEN_VER}-x86_64-linux.tar.gz ]]; then + wget https://github.com/WebAssembly/binaryen/releases/download/${BINARYEN_VER}/binaryen-${BINARYEN_VER}-x86_64-linux.tar.gz +fi +cd - + +docker build \ + --build-arg http_proxy=${http_proxy} \ + --build-arg https_proxy=${https_proxy} \ + --build-arg HTTP_PROXY=${http_proxy} \ + --build-arg HTTPS_PROXY=${https_proxy} \ + --build-arg WASI_SDK_VER=11.0 \ + --build-arg WABT_VER=${WABT_VER} \ + --build-arg CMAKE_VER=${CMAKE_VER} \ + --build-arg BINARYEN_VER=${BINARYEN_VER} \ + -t clang_env:0.1 -f Dockerfile build_scripts diff --git a/samples/workload/docker/run.sh b/samples/workload/docker/run.sh new file mode 100755 index 000000000..8c5943027 --- /dev/null +++ b/samples/workload/docker/run.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +docker run --rm -it \ + -e http_proxy=${http_proxy} \ + -e https_proxy=${https_proxy} \ + -e HTTP_PROXY=${http_proxy} \ + -e HTTPS_PROXY=${htpps_proxy} \ + --name workload_w_clang \ + --mount type=bind,source=$(pwd)/..,target=/data \ + clang_env:0.1 diff --git a/samples/workload/meshoptimizer/.gitignore b/samples/workload/meshoptimizer/.gitignore new file mode 100644 index 000000000..dd97754d4 --- /dev/null +++ b/samples/workload/meshoptimizer/.gitignore @@ -0,0 +1,2 @@ +build +meshoptimizer \ No newline at end of file diff --git a/samples/workload/meshoptimizer/CMakeLists.txt b/samples/workload/meshoptimizer/CMakeLists.txt new file mode 100644 index 000000000..1270582df --- /dev/null +++ b/samples/workload/meshoptimizer/CMakeLists.txt @@ -0,0 +1,39 @@ +# Copyright (C) 2019 Intel Corporation. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +cmake_minimum_required (VERSION 3.0) + +project(bench-meshoptimizer) + +################ BINARYEN ################ +find_program(WASM_OPT + NAMES wasm-opt + PATHS /opt/binaryen-version_97/bin /opt/binaryen/bin +) + +if (NOT WASM_OPT) + message(FATAL_ERROR + "can not find wasm-opt. " + "please download it from " + "https://github.com/WebAssembly/binaryen/releases/download/version_97/binaryen-version_97-x86_64-linux.tar.gz " + "and install it under /opt" + ) +endif() + +################ MESHOPTIMIZER ################ +include(ExternalProject) + +ExternalProject_Add(codecbench + PREFIX codecbench + GIT_REPOSITORY https://github.com/zeux/meshoptimizer.git + GIT_TAG master + GIT_SHALLOW ON + GIT_PROGRESS ON + SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/meshoptimizer + UPDATE_COMMAND git clean -fd && git checkout -- * + && ${CMAKE_COMMAND} -E echo "Applying patch" + && git apply ${CMAKE_CURRENT_SOURCE_DIR}/codecbench.patch + CONFIGURE_COMMAND ${CMAKE_COMMAND} -DCMAKE_TOOLCHAIN_FILE=${CMAKE_CURRENT_SOURCE_DIR}/../cmake/toolchain.cmake ${CMAKE_CURRENT_SOURCE_DIR}/meshoptimizer + BUILD_COMMAND make codecbench.opt + INSTALL_COMMAND ${CMAKE_COMMAND} -E copy ./codecbench.opt.wasm ${CMAKE_CURRENT_SOURCE_DIR}/build/codecbench.wasm +) diff --git a/samples/workload/meshoptimizer/README.md b/samples/workload/meshoptimizer/README.md new file mode 100644 index 000000000..c29a07afd --- /dev/null +++ b/samples/workload/meshoptimizer/README.md @@ -0,0 +1,59 @@ +"codecbench of meshoptimizer" sample introduction +============== + +This sample demonstrates how to build [codecbench of messoptimizer](https://github.com/zeux/meshoptimizer) into +WebAssembly with simd support and run it with iwasm. + +## Preparation + +please refer to [installation instructions](../README.md). + +## Build with clang-11 and wasi-sdk + +``` shell +$ mkdir build && cd build +$ cmake .. +$ make +# to verify +$ ls codecbench.wasm +``` + +## Or build with EMCC + +EMCC is another toolchain to compile C code to WASM. In this case, will have +a higher performance with EMCC. + +``` shell +$ git clone https://github.com/zeux/meshoptimizer.git +$ cd messoptimizer +$ emcc tools/codecbench.cpp src/vertexcodec.cpp src/vertexfilter.cpp \ + src/overdrawanalyzer.cpp src/indexgenerator.cpp src/vcacheoptimizer.cpp \ + src/clusterizer.cpp src/indexcodec.cpp src/vfetchanalyzer.cpp \ + src/spatialorder.cpp src/allocator.cpp src/vcacheanalyzer.cpp \ + src/vfetchoptimizer.cpp src/overdrawoptimizer.cpp src/simplifier.cpp \ + src/stripifier.cpp -O3 -msimd128 \ + -s TOTAL_MEMORY=268435456 -s "EXPORTED_FUNCTIONS=['_main']" \ + -o codecbench.wasm +$ ls -l codecbench.wasm +``` + +## Run workload + +Firstly please build iwasm with simd support: + +``` shell +$ cd /product-mini/platforms/linux/ +$ mkdir build && cd build +$ cmake .. -DWAMR_BUILD_SIMD=1 +$ make +``` + +Then compile wasm file to aot file and run: + +``` shell +$ cd /wamr-compiler/build +$ ./wamrc --enable-simd -o codecbench.aot codecbench.wasm +$ cd /product-mini/platforms/linux/ +$ ./iwasm codecbench.aot +``` + diff --git a/samples/workload/meshoptimizer/codecbench.patch b/samples/workload/meshoptimizer/codecbench.patch new file mode 100644 index 000000000..4adebed4b --- /dev/null +++ b/samples/workload/meshoptimizer/codecbench.patch @@ -0,0 +1,47 @@ +diff --git a/CMakeLists.txt b/CMakeLists.txt +index eccc49e..dac126c 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -127,3 +127,42 @@ install(FILES + ${CMAKE_CURRENT_BINARY_DIR}/meshoptimizerConfig.cmake + ${CMAKE_CURRENT_BINARY_DIR}/meshoptimizerConfigVersion.cmake + DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/meshoptimizer) ++ ++################################################## ++# codecbench ++################################################## ++add_executable(codecbench tools/codecbench.cpp ${SOURCES}) ++ ++set_target_properties(codecbench PROPERTIES OUTPUT_NAME codecbench.wasm) ++ ++target_compile_options(codecbench ++ PUBLIC ++ -std=c++11 ++ -Wno-unused-function ++ -Wno-unused-variable ++) ++ ++target_link_options(codecbench ++ PUBLIC ++ LINKER:-allow-undefined,--demangle ++) ++ ++find_program(WASM_OPT ++ NAMES wasm-opt ++ PATHS /opt/binaryen-version_97/bin /opt/binaryen/bin ++) ++ ++if (NOT WASM_OPT) ++ message(WARNING "can not find wasm-opt and will not optimize any wasm module") ++endif() ++ ++add_custom_target(codecbench.opt ALL ++ COMMAND ++ ${WASM_OPT} -Oz --enable-simd -o codecbench.opt.wasm codecbench.wasm ++ BYPRODUCTS ++ ${CMAKE_CURRENT_BINARY_DIR}/codecbench.opt.wasm ++ WORKING_DIRECTORY ++ ${CMAKE_CURRENT_BINARY_DIR} ++) ++ ++add_dependencies(codecbench.opt codecbench) diff --git a/samples/workload/tensorflow/build.sh b/samples/workload/tensorflow/build.sh index d519ec5b0..ec9424842 100755 --- a/samples/workload/tensorflow/build.sh +++ b/samples/workload/tensorflow/build.sh @@ -1,8 +1,20 @@ +# +# Copyright (C) 2019 Intel Corporation. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# + #!/bin/bash #################################### # build tensorflow-lite sample # #################################### +if [ ! -d "${EMSDK}" ]; then + echo "can not find emsdk. " + echo "please refer to https://emscripten.org/docs/getting_started/downloads.html " + echo "to install it, or active it by 'source emsdk_env.sh'" + exit +fi + set -xe EMSDK_WASM_DIR="$EM_CACHE/wasm" @@ -64,7 +76,15 @@ fi if [ -d "${TF_LITE_BUILD_DIR}/gen" ]; then rm -fr ${TF_LITE_BUILD_DIR}/gen fi -make -j 4 -C "${TENSORFLOW_DIR}" -f ${TF_LITE_BUILD_DIR}/Makefile +if [[ $1 == '--sgx' ]]; then + make -j 4 -C "${TENSORFLOW_DIR}" -f ${TF_LITE_BUILD_DIR}/Makefile +else + export BUILD_WITH_SIMD=true + make -j 4 -C "${TENSORFLOW_DIR}" -f ${TF_LITE_BUILD_DIR}/Makefile +fi + +# remove patch file and recover emcc libc.a after building +Clear_Before_Exit # 2.5 copy /make/gen target files to out/ rm -rf ${OUT_DIR} @@ -84,7 +104,7 @@ cd ${OUT_DIR} if [[ $1 == '--sgx' ]]; then ${WAMRC_CMD} -sgx -o benchmark_model.aot benchmark_model.wasm else - ${WAMRC_CMD} -o benchmark_model.aot benchmark_model.wasm + ${WAMRC_CMD} --enable-simd -o benchmark_model.aot benchmark_model.wasm fi # 4. build iwasm with pthread and libc_emcc enable @@ -101,7 +121,7 @@ if [[ $1 == '--sgx' ]]; then else cd ${WAMR_PLATFORM_DIR}/linux rm -fr build && mkdir build - cd build && cmake .. -DWAMR_BUILD_LIB_PTHREAD=1 -DWAMR_BUILD_LIBC_EMCC=1 + cd build && cmake .. -DWAMR_BUILD_SIMD=1 -DWAMR_BUILD_LIB_PTHREAD=1 -DWAMR_BUILD_LIBC_EMCC=1 make fi @@ -122,8 +142,6 @@ else fi ${IWASM_CMD} --heap-size=10475860 \ - ${OUT_DIR}/benchmark_model.aot \ - --graph=mobilenet_quant_v1_224.tflite --max_secs=300 - -Clear_Before_Exit + ${OUT_DIR}/benchmark_model.aot \ + --graph=mobilenet_quant_v1_224.tflite --max_secs=300 diff --git a/samples/workload/tensorflow/tf_lite.patch b/samples/workload/tensorflow/tf_lite.patch index 85700778a..b76dd5219 100644 --- a/samples/workload/tensorflow/tf_lite.patch +++ b/samples/workload/tensorflow/tf_lite.patch @@ -1,5 +1,5 @@ diff --git a/tensorflow/lite/tools/make/Makefile b/tensorflow/lite/tools/make/Makefile -index c7ddff5844..1082644043 100644 +index c7ddff5844..17146868f7 100644 --- a/tensorflow/lite/tools/make/Makefile +++ b/tensorflow/lite/tools/make/Makefile @@ -48,11 +48,7 @@ INCLUDES += -I/usr/local/include @@ -15,10 +15,16 @@ index c7ddff5844..1082644043 100644 -ldl # There are no rules for compiling objects for the host system (since we don't -@@ -84,14 +80,18 @@ endif # ifeq ($(HOST_ARCH),$(TARGET_ARCH)) +@@ -84,14 +80,24 @@ endif # ifeq ($(HOST_ARCH),$(TARGET_ARCH)) endif # ifeq ($(HOST_OS),$(TARGET)) endif ++BUILD_WITH_SIMD ?= false ++ifeq ($(BUILD_WITH_SIMD), true) ++CFLAGS+=-msimd128 ++CXXFLAGS+=-msimd128 ++endif ++ +LIBFLAGS += -s TOTAL_STACK=1048576 \ + -Wl,--export=__data_end -Wl,--export=__heap_base \ + -s ERROR_ON_UNDEFINED_SYMBOLS=0 @@ -36,7 +42,7 @@ index c7ddff5844..1082644043 100644 # A small example program that shows how to link against the library. MINIMAL_SRCS := \ -@@ -277,12 +277,16 @@ LIB_PATH := $(LIBDIR)$(LIB_NAME) +@@ -277,12 +283,16 @@ LIB_PATH := $(LIBDIR)$(LIB_NAME) BENCHMARK_LIB := $(LIBDIR)$(BENCHMARK_LIB_NAME) BENCHMARK_BINARY := $(BINDIR)$(BENCHMARK_BINARY_NAME) BENCHMARK_PERF_OPTIONS_BINARY := $(BINDIR)$(BENCHMARK_PERF_OPTIONS_BINARY_NAME) diff --git a/samples/workload/wasm-av1/README.md b/samples/workload/wasm-av1/README.md new file mode 100644 index 000000000..30cb6ca52 --- /dev/null +++ b/samples/workload/wasm-av1/README.md @@ -0,0 +1,22 @@ +"wasm-av1" sample introduction +============== +This sample demonstrates how to build [wasm-av1](https://github.com/GoogleChromeLabs/wasm-av1) into WebAssembly with emcc toolchain and run it with iwasm. Please first install [emsdk](https://github.com/emscripten-core/emsdk): +```bash +git clone https://github.com/emscripten-core/emsdk.git +cd emsdk +./emsdk install latest +./emsdk activate latest +``` +And set up ensdk environment: +```bash +source emsdk_env.sh +``` +Then run +```bash +./build.sh +``` +to build wasm-av1 and run it with iwasm, which basically contains the following steps: +- hack emcc to delete some objects in libc.a +- patch wasm-av1 and build it with emcc compiler +- build iwasm with simd and libc-emcc support +- run testav1.aot with iwasm diff --git a/samples/workload/wasm-av1/build.sh b/samples/workload/wasm-av1/build.sh new file mode 100755 index 000000000..2d52b20d8 --- /dev/null +++ b/samples/workload/wasm-av1/build.sh @@ -0,0 +1,100 @@ +# +# Copyright (C) 2019 Intel Corporation. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# + +#!/bin/bash + +#################################### +# build wasm-av1 sample # +#################################### +if [ ! -d "${EMSDK}" ]; then + echo "can not find emsdk. " + echo "please refer to https://emscripten.org/docs/getting_started/downloads.html " + echo "to install it, or active it by 'source emsdk_env.sh'" + exit +fi + +set -xe + +EMSDK_WASM_DIR="$EM_CACHE/wasm" +BUILD_SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +OUT_DIR="${BUILD_SCRIPT_DIR}/out" +WASM_AV1_DIR="${BUILD_SCRIPT_DIR}/wasm-av1" + +WAMR_PLATFORM_DIR="${BUILD_SCRIPT_DIR}/../../../product-mini/platforms" +IWASM_CMD="${WAMR_PLATFORM_DIR}/linux/build/iwasm" + +WAMRC_DIR="${BUILD_SCRIPT_DIR}/../../../wamr-compiler" +WAMRC_CMD="${WAMRC_DIR}/build/wamrc" + +function Clear_Before_Exit +{ + [[ -f ${WASM_AV1_DIR}/wasm-av1.patch ]] && + rm -f ${WASM_AV1_DIR}/wasm-av1.patch + # resume the libc.a under EMSDK_WASM_DIR + cd ${EMSDK_WASM_DIR} + mv libc.a.bak libc.a +} + +# 1.hack emcc +cd ${EMSDK_WASM_DIR} +# back up libc.a +cp libc.a libc.a.bak +# delete some objects in libc.a +emar d libc.a fopen.o +emar d libc.a fread.o +emar d libc.a feof.o +emar d libc.a fclose.o + +# 2. build wasm-av1 +cd ${BUILD_SCRIPT_DIR} +# 2.1 clone wasm-av1 repo from Github +if [ ! -d "wasm-av1" ]; then + git clone https://github.com/GoogleChromeLabs/wasm-av1.git +fi + +# 2.2 copy the wasm-av1.patch to wasm-av1 and apply the patch +cd ${WASM_AV1_DIR} +cp -a ${BUILD_SCRIPT_DIR}/wasm-av1.patch . +git checkout Makefile +git checkout test.c +git checkout third_party/aom + +if [[ $(git apply wasm-av1.patch 2>&1) =~ "error" ]]; then + echo "git apply patch failed, please check wasm-av1 related changes..." + Clear_Before_Exit + exit 0 +fi + +make testavx -j 4 + +# remove patch file and recover emcc libc.a after building +Clear_Before_Exit + +# 2.3 copy /make/gen target files to out/ +rm -rf ${OUT_DIR} && mkdir ${OUT_DIR} +cp -a ${WASM_AV1_DIR}/testavx.wasm ${OUT_DIR}/ + +# 3. compile wasm-av1.wasm to wasm-av1.aot with wamrc +# 3.1 build wamr-compiler +cd ${WAMRC_DIR} +./build_llvm.sh +rm -fr build && mkdir build +cd build && cmake .. +make +# 3.2 compile wasm-av1.wasm to wasm-av1.aot +cd ${OUT_DIR} +${WAMRC_CMD} --enable-simd -o testavx.aot testavx.wasm + +# 4. build iwasm with pthread and libc_emcc enable +cd ${WAMR_PLATFORM_DIR}/linux +rm -fr build && mkdir build +cd build && cmake .. -DWAMR_BUILD_SIMD=1 -DWAMR_BUILD_LIB_PTHREAD=1 -DWAMR_BUILD_LIBC_EMCC=1 +make + +# 5. run wasm-av1 with iwasm +echo "---> run testav1.aot with iwasm" +cd ${OUT_DIR} +${IWASM_CMD} testavx.aot ../wasm-av1/third_party/samples/elephants_dream_480p24.ivf + diff --git a/samples/workload/wasm-av1/wasm-av1.patch b/samples/workload/wasm-av1/wasm-av1.patch new file mode 100644 index 000000000..1db8f7fb3 --- /dev/null +++ b/samples/workload/wasm-av1/wasm-av1.patch @@ -0,0 +1,696 @@ +diff --git a/Makefile b/Makefile +index c39fff6..4682d43 100644 +--- a/Makefile ++++ b/Makefile +@@ -59,11 +59,13 @@ $(TARGET): $(DEPS) blob-api.c yuv-to-rgb.c $(EMLIBAV1) + ]" \ + blob-api.c yuv-to-rgb.c $(SRCS) $(INC) -L $(LIBDIR) -l$(LIB) + +-$(TESTTARGET): test.c $(DEPS) $(X86LIBAV1) +- cc -o $@ -O3 test.c $(SRCS) $(INC) -L $(X86LIBDIR) -l$(LIB) ++$(TESTTARGET): test.c $(DEPS) $(EMLIBAV1) ++ emcc -o $@.wasm -O3 test.c $(SRCS) $(INC) -L $(LIBDIR) -l$(LIB) \ ++ -s TOTAL_MEMORY=104857600 -s ERROR_ON_UNDEFINED_SYMBOLS=0 + +-$(TESTTARGET)g: test.c $(DEPS) $(X86LIBAV1) +- cc -o $@ -g test.c $(SRCS) $(INC) -L $(X86LIBDIR) -l$(LIB) ++$(TESTTARGET)g: test.c $(DEPS) $(EMLIBAV1) ++ emcc -o $@.wasm -g test.c $(SRCS) $(INC) -L $(LIBDIR) -l$(LIB) \ ++ -s TOTAL_MEMORY=104857600 -s ERROR_ON_UNDEFINED_SYMBOLS=0 + + clean: + -rm $(TARGET) $(TESTTARGET) $(TESTTARGET)g +@@ -80,7 +82,7 @@ $(EMLIBAV1): $(LIBDIR) + -DCONFIG_RUNTIME_CPU_DETECT=0 \ + -DCONFIG_UNIT_TESTS=0 \ + -DCONFIG_WEBM_IO=0 \ +- -DCMAKE_TOOLCHAIN_FILE=`../../get-emcmake.sh`; \ ++ -DCMAKE_TOOLCHAIN_FILE=${EMSDK}/upstream/emscripten/cmake/Modules/Platform/Emscripten.cmake; \ + make \ + ) + +diff --git a/test.c b/test.c +index df2d44b..8e81cdc 100644 +--- a/test.c ++++ b/test.c +@@ -18,6 +18,9 @@ + + #include "decode-av1-priv.h" + ++size_t ++emcc_fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream); ++ + static void + dump_raw_frame(AVX_Video_Frame *avf, int id) { + FILE *f; +@@ -26,12 +29,13 @@ dump_raw_frame(AVX_Video_Frame *avf, int id) { + void *buf; + + sprintf(name, "frame%04d.yuv", id); ++ printf("writing %s ..\n", name); + if ((f = fopen(name, "wb")) == NULL) { + return; + } + buf = AVX_Video_Frame_get_buffer(avf); + size = AVX_Video_Frame_get_size(avf); +- fwrite(buf, size, 1, f); ++ emcc_fwrite(buf, size, 1, f); + fclose(f); + } + +@@ -63,6 +67,7 @@ main(int argc, char *argv[]) { + static int i = 0; + + ++i; ++ printf("##decode raw frame %d\n", i); + if (30 <= i && i < 40) { + dump_raw_frame(af, i); + } +diff --git a/third_party/aom/CMakeLists.txt b/third_party/aom/CMakeLists.txt +index 9dbe301..20c7be4 100644 +--- a/third_party/aom/CMakeLists.txt ++++ b/third_party/aom/CMakeLists.txt +@@ -56,6 +56,10 @@ option(BUILD_SHARED_LIBS "CMake should generate a shared library build." OFF) + + project(AOM C CXX) + ++set(CMAKE_C_FLAGS "-msimd128 -msse2 -msse3 -msse4.1 -msse4.2 ${CMAKE_C_FLAGS}") ++set(CMAKE_CXX_FLAGS "-msimd128 -msse2 -msse3 -msse4.1 -msse4.2 ${CMAKE_CXX_FLAGS}") ++set(CMAKE_VERBOSE_MAKEFILE on) ++ + set(AOM_ROOT "${CMAKE_CURRENT_SOURCE_DIR}") + set(AOM_CONFIG_DIR "${CMAKE_CURRENT_BINARY_DIR}") + set(INCLUDE_INSTALL_DIR "${CMAKE_INSTALL_PREFIX}/include" +@@ -347,7 +351,7 @@ if(CONFIG_AV1_DECODER AND ENABLE_EXAMPLES) + em_link_post_js(inspect "${AOM_ROOT}/tools/inspect-post.js") + # Force generation of Wasm instead of asm.js + append_link_flag_to_target("inspect" "-s WASM=1") +- append_compiler_flag("-s WASM=1") ++ append_compiler_flag("-O3 -s WASM=1 -s ERROR_ON_UNDEFINED_SYMBOLS=0") + endif() + endif() + +diff --git a/third_party/aom/aom/src/aom_codec.c b/third_party/aom/aom/src/aom_codec.c +index dbd6fa5..a8d2a49 100644 +--- a/third_party/aom/aom/src/aom_codec.c ++++ b/third_party/aom/aom/src/aom_codec.c +@@ -132,6 +132,7 @@ void aom_internal_error(struct aom_internal_error_info *info, + info->detail[sz - 1] = '\0'; + } + ++ printf("##aom internal error: %s\n", info->detail); + if (info->setjmp) longjmp(info->jmp, info->error_code); + } + +diff --git a/third_party/aom/aom_dsp/grain_table.c b/third_party/aom/aom_dsp/grain_table.c +index 0d6a73f..4b05833 100644 +--- a/third_party/aom/aom_dsp/grain_table.c ++++ b/third_party/aom/aom_dsp/grain_table.c +@@ -293,6 +293,9 @@ aom_codec_err_t aom_film_grain_table_read( + return error_info->error_code; + } + ++size_t ++emcc_fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream); ++ + aom_codec_err_t aom_film_grain_table_write( + const aom_film_grain_table_t *t, const char *filename, + struct aom_internal_error_info *error_info) { +@@ -305,7 +308,7 @@ aom_codec_err_t aom_film_grain_table_write( + return error_info->error_code; + } + +- if (!fwrite(kFileMagic, 8, 1, file)) { ++ if (!emcc_fwrite(kFileMagic, 8, 1, file)) { + aom_internal_error(error_info, AOM_CODEC_ERROR, + "Unable to write file magic"); + fclose(file); +diff --git a/third_party/aom/aomdec.c b/third_party/aom/aomdec.c +index 4addee8..f850147 100644 +--- a/third_party/aom/aomdec.c ++++ b/third_party/aom/aomdec.c +@@ -274,6 +274,9 @@ static void update_image_md5(const aom_image_t *img, const int planes[3], + } + } + ++size_t ++emcc_fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream); ++ + static void write_image_file(const aom_image_t *img, const int *planes, + const int num_planes, FILE *file) { + int i, y; +@@ -287,7 +290,7 @@ static void write_image_file(const aom_image_t *img, const int *planes, + const int h = aom_img_plane_height(img, plane); + + for (y = 0; y < h; ++y) { +- fwrite(buf, bytes_per_sample, w, file); ++ emcc_fwrite(buf, bytes_per_sample, w, file); + buf += stride; + } + } +diff --git a/third_party/aom/aomenc.c b/third_party/aom/aomenc.c +index 64155b0..3ed5080 100644 +--- a/third_party/aom/aomenc.c ++++ b/third_party/aom/aomenc.c +@@ -59,9 +59,12 @@ static size_t wrap_fread(void *ptr, size_t size, size_t nmemb, FILE *stream) { + } + #define fread wrap_fread + ++size_t ++emcc_fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream); ++ + static size_t wrap_fwrite(const void *ptr, size_t size, size_t nmemb, + FILE *stream) { +- return fwrite(ptr, size, nmemb, stream); ++ return emcc_fwrite(ptr, size, nmemb, stream); + } + #define fwrite wrap_fwrite + +diff --git a/third_party/aom/aomstats.c b/third_party/aom/aomstats.c +index 0cfeea2..6833776 100644 +--- a/third_party/aom/aomstats.c ++++ b/third_party/aom/aomstats.c +@@ -80,9 +80,12 @@ void stats_close(stats_io_t *stats, int last_pass) { + } + } + ++size_t ++emcc_fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream); ++ + void stats_write(stats_io_t *stats, const void *pkt, size_t len) { + if (stats->file) { +- (void)fwrite(pkt, 1, len, stats->file); ++ (void)emcc_fwrite(pkt, 1, len, stats->file); + } else { + if (stats->buf.sz + len > stats->buf_alloc_sz) { + size_t new_sz = stats->buf_alloc_sz + 64 * 1024; +diff --git a/third_party/aom/av1/common/debugmodes.c b/third_party/aom/av1/common/debugmodes.c +index 868f341..c44258c 100644 +--- a/third_party/aom/av1/common/debugmodes.c ++++ b/third_party/aom/av1/common/debugmodes.c +@@ -89,10 +89,13 @@ void av1_print_modes_and_motion_vectors(AV1_COMMON *cm, const char *file) { + fclose(mvs); + } + ++size_t ++emcc_fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream); ++ + void av1_print_uncompressed_frame_header(const uint8_t *data, int size, + const char *filename) { + FILE *hdrFile = fopen(filename, "w"); +- fwrite(data, size, sizeof(uint8_t), hdrFile); ++ emcc_fwrite(data, size, sizeof(uint8_t), hdrFile); + fclose(hdrFile); + } + +diff --git a/third_party/aom/av1/encoder/encoder.c b/third_party/aom/av1/encoder/encoder.c +index a557380..d709d26 100644 +--- a/third_party/aom/av1/encoder/encoder.c ++++ b/third_party/aom/av1/encoder/encoder.c +@@ -2799,6 +2799,9 @@ AV1_COMP *av1_create_compressor(AV1EncoderConfig *oxcf, + snprintf((H) + strlen(H), sizeof(H) - strlen(H), (T), (V)) + #endif // CONFIG_INTERNAL_STATS + ++size_t ++emcc_fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream); ++ + void av1_remove_compressor(AV1_COMP *cpi) { + AV1_COMMON *cm; + unsigned int i; +@@ -2814,7 +2817,7 @@ void av1_remove_compressor(AV1_COMP *cpi) { + if (cpi->oxcf.pass != 1) { + fprintf(stderr, "Writing counts.stt\n"); + FILE *f = fopen("counts.stt", "wb"); +- fwrite(&aggregate_fc, sizeof(aggregate_fc), 1, f); ++ emcc_fwrite(&aggregate_fc, sizeof(aggregate_fc), 1, f); + fclose(f); + } + #endif // CONFIG_ENTROPY_STATS +@@ -3013,7 +3016,7 @@ void aom_write_yuv_frame_420(YV12_BUFFER_CONFIG *s, FILE *f) { + int h = s->y_height; + + do { +- fwrite(src, s->y_width, 1, f); ++ emcc_fwrite(src, s->y_width, 1, f); + src += s->y_stride; + } while (--h); + +@@ -3021,7 +3024,7 @@ void aom_write_yuv_frame_420(YV12_BUFFER_CONFIG *s, FILE *f) { + h = s->uv_height; + + do { +- fwrite(src, s->uv_width, 1, f); ++ emcc_fwrite(src, s->uv_width, 1, f); + src += s->uv_stride; + } while (--h); + +@@ -3029,7 +3032,7 @@ void aom_write_yuv_frame_420(YV12_BUFFER_CONFIG *s, FILE *f) { + h = s->uv_height; + + do { +- fwrite(src, s->uv_width, 1, f); ++ emcc_fwrite(src, s->uv_width, 1, f); + src += s->uv_stride; + } while (--h); + } +@@ -3121,7 +3124,7 @@ void aom_write_one_yuv_frame(AV1_COMMON *cm, YV12_BUFFER_CONFIG *s) { + uint16_t *src16 = CONVERT_TO_SHORTPTR(s->y_buffer); + + do { +- fwrite(src16, s->y_width, 2, yuv_rec_file); ++ emcc_fwrite(src16, s->y_width, 2, yuv_rec_file); + src16 += s->y_stride; + } while (--h); + +@@ -3129,7 +3132,7 @@ void aom_write_one_yuv_frame(AV1_COMMON *cm, YV12_BUFFER_CONFIG *s) { + h = s->uv_height; + + do { +- fwrite(src16, s->uv_width, 2, yuv_rec_file); ++ emcc_fwrite(src16, s->uv_width, 2, yuv_rec_file); + src16 += s->uv_stride; + } while (--h); + +@@ -3137,7 +3140,7 @@ void aom_write_one_yuv_frame(AV1_COMMON *cm, YV12_BUFFER_CONFIG *s) { + h = s->uv_height; + + do { +- fwrite(src16, s->uv_width, 2, yuv_rec_file); ++ emcc_fwrite(src16, s->uv_width, 2, yuv_rec_file); + src16 += s->uv_stride; + } while (--h); + +@@ -3146,7 +3149,7 @@ void aom_write_one_yuv_frame(AV1_COMMON *cm, YV12_BUFFER_CONFIG *s) { + } + + do { +- fwrite(src, s->y_width, 1, yuv_rec_file); ++ emcc_fwrite(src, s->y_width, 1, yuv_rec_file); + src += s->y_stride; + } while (--h); + +@@ -3154,7 +3157,7 @@ void aom_write_one_yuv_frame(AV1_COMMON *cm, YV12_BUFFER_CONFIG *s) { + h = s->uv_height; + + do { +- fwrite(src, s->uv_width, 1, yuv_rec_file); ++ emcc_fwrite(src, s->uv_width, 1, yuv_rec_file); + src += s->uv_stride; + } while (--h); + +@@ -3162,7 +3165,7 @@ void aom_write_one_yuv_frame(AV1_COMMON *cm, YV12_BUFFER_CONFIG *s) { + h = s->uv_height; + + do { +- fwrite(src, s->uv_width, 1, yuv_rec_file); ++ emcc_fwrite(src, s->uv_width, 1, yuv_rec_file); + src += s->uv_stride; + } while (--h); + +@@ -3241,16 +3244,16 @@ static int dump_one_image(AV1_COMMON *cm, + + // --- Y --- + for (h = 0; h < cm->height; ++h) { +- fwrite(&ref_buf->y_buffer[h * ref_buf->y_stride], 1, cm->width, f_ref); ++ emcc_fwrite(&ref_buf->y_buffer[h * ref_buf->y_stride], 1, cm->width, f_ref); + } + // --- U --- + for (h = 0; h < (cm->height >> 1); ++h) { +- fwrite(&ref_buf->u_buffer[h * ref_buf->uv_stride], 1, (cm->width >> 1), ++ emcc_fwrite(&ref_buf->u_buffer[h * ref_buf->uv_stride], 1, (cm->width >> 1), + f_ref); + } + // --- V --- + for (h = 0; h < (cm->height >> 1); ++h) { +- fwrite(&ref_buf->v_buffer[h * ref_buf->uv_stride], 1, (cm->width >> 1), ++ emcc_fwrite(&ref_buf->v_buffer[h * ref_buf->uv_stride], 1, (cm->width >> 1), + f_ref); + } + +@@ -4692,17 +4695,17 @@ static void dump_filtered_recon_frames(AV1_COMP *cpi) { + + // --- Y --- + for (h = 0; h < cm->height; ++h) { +- fwrite(&recon_buf->y_buffer[h * recon_buf->y_stride], 1, cm->width, ++ emcc_fwrite(&recon_buf->y_buffer[h * recon_buf->y_stride], 1, cm->width, + f_recon); + } + // --- U --- + for (h = 0; h < (cm->height >> 1); ++h) { +- fwrite(&recon_buf->u_buffer[h * recon_buf->uv_stride], 1, (cm->width >> 1), ++ emcc_fwrite(&recon_buf->u_buffer[h * recon_buf->uv_stride], 1, (cm->width >> 1), + f_recon); + } + // --- V --- + for (h = 0; h < (cm->height >> 1); ++h) { +- fwrite(&recon_buf->v_buffer[h * recon_buf->uv_stride], 1, (cm->width >> 1), ++ emcc_fwrite(&recon_buf->v_buffer[h * recon_buf->uv_stride], 1, (cm->width >> 1), + f_recon); + } + +diff --git a/third_party/aom/av1/encoder/firstpass.c b/third_party/aom/av1/encoder/firstpass.c +index bb73fde..b963043 100644 +--- a/third_party/aom/av1/encoder/firstpass.c ++++ b/third_party/aom/av1/encoder/firstpass.c +@@ -476,6 +476,9 @@ static double raw_motion_error_stdev(int *raw_motion_err_list, + return raw_err_stdev; + } + ++size_t ++emcc_fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream); ++ + #define UL_INTRA_THRESH 50 + #define INVALID_ROW -1 + void av1_first_pass(AV1_COMP *cpi, const struct lookahead_entry *source) { +@@ -1077,7 +1080,7 @@ void av1_first_pass(AV1_COMP *cpi, const struct lookahead_entry *source) { + else + recon_file = fopen(filename, "ab"); + +- (void)fwrite(lst_yv12->buffer_alloc, lst_yv12->frame_size, 1, recon_file); ++ (void)emcc_fwrite(lst_yv12->buffer_alloc, lst_yv12->frame_size, 1, recon_file); + fclose(recon_file); + } + +diff --git a/third_party/aom/build/cmake/aom_configure.cmake b/third_party/aom/build/cmake/aom_configure.cmake +index 9220a32..fb8bf9f 100644 +--- a/third_party/aom/build/cmake/aom_configure.cmake ++++ b/third_party/aom/build/cmake/aom_configure.cmake +@@ -260,7 +260,7 @@ if(MSVC) + add_compiler_flag_if_supported("/WX") + endif() + else() +- require_c_flag("-std=c99" YES) ++ #require_c_flag("-std=c99" YES) + add_compiler_flag_if_supported("-Wall") + add_compiler_flag_if_supported("-Wdisabled-optimization") + add_compiler_flag_if_supported("-Wextra") +diff --git a/third_party/aom/examples/resize_util.c b/third_party/aom/examples/resize_util.c +index 5485691..e60ed86 100644 +--- a/third_party/aom/examples/resize_util.c ++++ b/third_party/aom/examples/resize_util.c +@@ -45,6 +45,9 @@ static int parse_dim(char *v, int *width, int *height) { + return 1; + } + ++size_t ++emcc_fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream); ++ + int main(int argc, char *argv[]) { + char *fin, *fout; + FILE *fpin, *fpout; +@@ -111,7 +114,7 @@ int main(int argc, char *argv[]) { + av1_resize_frame420(inbuf, width, inbuf_u, inbuf_v, width / 2, height, + width, outbuf, target_width, outbuf_u, outbuf_v, + target_width / 2, target_height, target_width); +- fwrite(outbuf, target_width * target_height * 3 / 2, 1, fpout); ++ emcc_fwrite(outbuf, target_width * target_height * 3 / 2, 1, fpout); + f++; + } + printf("%d frames processed\n", f); +diff --git a/third_party/aom/examples/scalable_encoder.c b/third_party/aom/examples/scalable_encoder.c +index 10d647e..fcf31e1 100644 +--- a/third_party/aom/examples/scalable_encoder.c ++++ b/third_party/aom/examples/scalable_encoder.c +@@ -91,6 +91,9 @@ void usage_exit(void) { + exit(EXIT_FAILURE); + } + ++size_t ++emcc_fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream); ++ + static int encode_frame(aom_codec_ctx_t *codec, aom_image_t *img, + int frame_index, int flags, FILE *outfile) { + int got_pkts = 0; +@@ -105,7 +108,7 @@ static int encode_frame(aom_codec_ctx_t *codec, aom_image_t *img, + + if (pkt->kind == AOM_CODEC_CX_FRAME_PKT) { + const int keyframe = (pkt->data.frame.flags & AOM_FRAME_IS_KEY) != 0; +- if (fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, outfile) != ++ if (emcc_fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, outfile) != + pkt->data.frame.sz) { + die_codec(codec, "Failed to write compressed frame"); + } +diff --git a/third_party/aom/ivfenc.c b/third_party/aom/ivfenc.c +index 80f4d14..d0e4e34 100644 +--- a/third_party/aom/ivfenc.c ++++ b/third_party/aom/ivfenc.c +@@ -14,6 +14,9 @@ + #include "aom/aom_encoder.h" + #include "aom_ports/mem_ops.h" + ++size_t ++emcc_fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream); ++ + void ivf_write_file_header(FILE *outfile, const struct aom_codec_enc_cfg *cfg, + unsigned int fourcc, int frame_cnt) { + char header[32]; +@@ -32,7 +35,7 @@ void ivf_write_file_header(FILE *outfile, const struct aom_codec_enc_cfg *cfg, + mem_put_le32(header + 24, frame_cnt); // length + mem_put_le32(header + 28, 0); // unused + +- fwrite(header, 1, 32, outfile); ++ emcc_fwrite(header, 1, 32, outfile); + } + + void ivf_write_frame_header(FILE *outfile, int64_t pts, size_t frame_size) { +@@ -41,12 +44,12 @@ void ivf_write_frame_header(FILE *outfile, int64_t pts, size_t frame_size) { + mem_put_le32(header, (int)frame_size); + mem_put_le32(header + 4, (int)(pts & 0xFFFFFFFF)); + mem_put_le32(header + 8, (int)(pts >> 32)); +- fwrite(header, 1, 12, outfile); ++ emcc_fwrite(header, 1, 12, outfile); + } + + void ivf_write_frame_size(FILE *outfile, size_t frame_size) { + char header[4]; + + mem_put_le32(header, (int)frame_size); +- fwrite(header, 1, 4, outfile); ++ emcc_fwrite(header, 1, 4, outfile); + } +diff --git a/third_party/aom/test/decode_perf_test.cc b/third_party/aom/test/decode_perf_test.cc +index 3c93e7d..2d364ae 100644 +--- a/third_party/aom/test/decode_perf_test.cc ++++ b/third_party/aom/test/decode_perf_test.cc +@@ -24,6 +24,11 @@ + + using ::testing::make_tuple; + ++extern "C" { ++ size_t ++ emcc_fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream); ++} ++ + namespace { + + #define VIDEO_NAME 0 +@@ -153,7 +158,7 @@ class AV1NewEncodeDecodePerfTest + + // Write frame header and data. + ivf_write_frame_header(outfile_, out_frames_, pkt->data.frame.sz); +- ASSERT_EQ(fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, outfile_), ++ ASSERT_EQ(emcc_fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, outfile_), + pkt->data.frame.sz); + } + +diff --git a/third_party/aom/test/film_grain_table_test.cc b/third_party/aom/test/film_grain_table_test.cc +index 0688146..dbb8e6b 100644 +--- a/third_party/aom/test/film_grain_table_test.cc ++++ b/third_party/aom/test/film_grain_table_test.cc +@@ -5,6 +5,11 @@ + #include "av1/encoder/grain_test_vectors.h" + #include "test/video_source.h" + ++extern "C" { ++ size_t ++ emcc_fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream); ++} ++ + void grain_equal(const aom_film_grain_t *expected, + const aom_film_grain_t *actual) { + EXPECT_EQ(expected->apply_grain, actual->apply_grain); +@@ -168,7 +173,7 @@ TEST_F(FilmGrainTableIOTest, ReadTruncatedFile) { + + std::string grain_file; + FILE *file = libaom_test::GetTempOutFile(&grain_file); +- fwrite("deadbeef", 8, 1, file); ++ emcc_fwrite("deadbeef", 8, 1, file); + fclose(file); + ASSERT_EQ(AOM_CODEC_ERROR, + aom_film_grain_table_read(&table, grain_file.c_str(), &error_)); +diff --git a/third_party/aom/test/resize_test.cc b/third_party/aom/test/resize_test.cc +index e1c4e9f..9c2bce8 100644 +--- a/third_party/aom/test/resize_test.cc ++++ b/third_party/aom/test/resize_test.cc +@@ -22,6 +22,11 @@ + // Enable(1) or Disable(0) writing of the compressed bitstream. + #define WRITE_COMPRESSED_STREAM 0 + ++extern "C" { ++ size_t ++ emcc_fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream); ++} ++ + namespace { + + #if WRITE_COMPRESSED_STREAM +@@ -55,13 +60,13 @@ static void write_ivf_file_header(const aom_codec_enc_cfg_t *const cfg, + mem_put_le32(header + 24, frame_cnt); /* length */ + mem_put_le32(header + 28, 0); /* unused */ + +- (void)fwrite(header, 1, 32, outfile); ++ (void)emcc_fwrite(header, 1, 32, outfile); + } + + static void write_ivf_frame_size(FILE *const outfile, const size_t size) { + char header[4]; + mem_put_le32(header, static_cast(size)); +- (void)fwrite(header, 1, 4, outfile); ++ (void)emcc_fwrite(header, 1, 4, outfile); + } + + static void write_ivf_frame_header(const aom_codec_cx_pkt_t *const pkt, +@@ -76,7 +81,7 @@ static void write_ivf_frame_header(const aom_codec_cx_pkt_t *const pkt, + mem_put_le32(header + 4, pts & 0xFFFFFFFF); + mem_put_le32(header + 8, pts >> 32); + +- (void)fwrite(header, 1, 12, outfile); ++ (void)emcc_fwrite(header, 1, 12, outfile); + } + #endif // WRITE_COMPRESSED_STREAM + +@@ -309,7 +314,7 @@ class ResizeInternalTestLarge : public ResizeTest { + + // Write frame header and data. + write_ivf_frame_header(pkt, outfile_); +- (void)fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, outfile_); ++ (void)emcc_fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, outfile_); + } + #endif + +@@ -608,7 +613,7 @@ class ResizeCspTest : public ResizeTest { + + // Write frame header and data. + write_ivf_frame_header(pkt, outfile_); +- (void)fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, outfile_); ++ (void)emcc_fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, outfile_); + } + #endif + +diff --git a/third_party/aom/test/y4m_test.cc b/third_party/aom/test/y4m_test.cc +index ad901d9..f24093f 100644 +--- a/third_party/aom/test/y4m_test.cc ++++ b/third_party/aom/test/y4m_test.cc +@@ -19,6 +19,11 @@ + #include "test/util.h" + #include "test/y4m_video_source.h" + ++extern "C" { ++ size_t ++ emcc_fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream); ++} ++ + namespace { + + using std::string; +@@ -68,7 +73,7 @@ static void write_image_file(const aom_image_t *img, FILE *file) { + (plane ? (img->d_w + img->x_chroma_shift) >> img->x_chroma_shift + : img->d_w); + for (y = 0; y < h; ++y) { +- fwrite(buf, bytes_per_sample, w, file); ++ emcc_fwrite(buf, bytes_per_sample, w, file); + buf += stride; + } + } +diff --git a/third_party/aom/third_party/googletest/src/googletest/src/gtest.cc b/third_party/aom/third_party/googletest/src/googletest/src/gtest.cc +index 5a8932c..ac2c435 100644 +--- a/third_party/aom/third_party/googletest/src/googletest/src/gtest.cc ++++ b/third_party/aom/third_party/googletest/src/googletest/src/gtest.cc +@@ -146,6 +146,11 @@ + # define vsnprintf _vsnprintf + #endif // GTEST_OS_WINDOWS + ++extern "C" { ++ size_t ++ emcc_fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream); ++} ++ + namespace testing { + + using internal::CountIf; +@@ -3867,7 +3872,7 @@ class ScopedPrematureExitFile { + // errors are ignored as there's nothing better we can do and we + // don't want to fail the test because of this. + FILE* pfile = posix::FOpen(premature_exit_filepath, "w"); +- fwrite("0", 1, 1, pfile); ++ emcc_fwrite("0", 1, 1, pfile); + fclose(pfile); + } + } +diff --git a/third_party/aom/third_party/libwebm/mkvmuxer/mkvwriter.cc b/third_party/aom/third_party/libwebm/mkvmuxer/mkvwriter.cc +index 84655d8..0004093 100644 +--- a/third_party/aom/third_party/libwebm/mkvmuxer/mkvwriter.cc ++++ b/third_party/aom/third_party/libwebm/mkvmuxer/mkvwriter.cc +@@ -14,6 +14,11 @@ + #include // for _SH_DENYWR + #endif + ++extern "C" { ++ size_t ++ emcc_fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream); ++} ++ + namespace mkvmuxer { + + MkvWriter::MkvWriter() : file_(NULL), writer_owns_file_(true) {} +@@ -32,7 +37,7 @@ int32 MkvWriter::Write(const void* buffer, uint32 length) { + if (buffer == NULL) + return -1; + +- const size_t bytes_written = fwrite(buffer, 1, length, file_); ++ const size_t bytes_written = emcc_fwrite(buffer, 1, length, file_); + + return (bytes_written == length) ? 0 : -1; + } +diff --git a/third_party/aom/tools_common.c b/third_party/aom/tools_common.c +index 7abc20c..fbc30bc 100644 +--- a/third_party/aom/tools_common.c ++++ b/third_party/aom/tools_common.c +@@ -185,6 +185,9 @@ const AvxInterface *get_aom_decoder_by_fourcc(uint32_t fourcc) { + } + #endif // CONFIG_AV1_DECODER + ++size_t ++emcc_fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream); ++ + void aom_img_write(const aom_image_t *img, FILE *file) { + int plane; + +@@ -197,7 +200,7 @@ void aom_img_write(const aom_image_t *img, FILE *file) { + int y; + + for (y = 0; y < h; ++y) { +- fwrite(buf, 1, w, file); ++ emcc_fwrite(buf, 1, w, file); + buf += stride; + } + } +diff --git a/third_party/aom/video_writer.c b/third_party/aom/video_writer.c +index 4e072c7..6b1ca54 100644 +--- a/third_party/aom/video_writer.c ++++ b/third_party/aom/video_writer.c +@@ -66,10 +66,13 @@ void aom_video_writer_close(AvxVideoWriter *writer) { + } + } + ++size_t ++emcc_fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream); ++ + int aom_video_writer_write_frame(AvxVideoWriter *writer, const uint8_t *buffer, + size_t size, int64_t pts) { + ivf_write_frame_header(writer->file, pts, size); +- if (fwrite(buffer, 1, size, writer->file) != size) return 0; ++ if (emcc_fwrite(buffer, 1, size, writer->file) != size) return 0; + + ++writer->frame_count; + diff --git a/wamr-compiler/CMakeLists.txt b/wamr-compiler/CMakeLists.txt index 114e186d2..c75049caf 100644 --- a/wamr-compiler/CMakeLists.txt +++ b/wamr-compiler/CMakeLists.txt @@ -27,6 +27,7 @@ add_definitions(-DWASM_DISABLE_HW_BOUND_CHECK=1) add_definitions(-DWASM_ENABLE_SHARED_MEMORY=1) add_definitions(-DWASM_ENABLE_THREAD_MGR=1) add_definitions(-DWASM_ENABLE_TAIL_CALL=1) +add_definitions(-DWASM_ENABLE_SIMD=1) # Set WAMR_BUILD_TARGET, currently values supported: # "X86_64", "AMD_64", "X86_32", "ARM_32", "MIPS_32", "XTENSA_32" diff --git a/wamr-compiler/main.c b/wamr-compiler/main.c index d3505ad01..835fea25a 100644 --- a/wamr-compiler/main.c +++ b/wamr-compiler/main.c @@ -43,6 +43,7 @@ print_help() printf(" --enable-multi-thread Enable multi-thread feature, the dependent features bulk-memory and\n"); printf(" --enable-tail-call Enable the post-MVP tail call feature\n"); printf(" thread-mgr will be enabled automatically\n"); + printf(" --enable-simd Enable the post-MVP 128-bit SIMD feature\n"); printf(" -v=n Set log verbose level (0 to 5, default is 2), larger with more log\n"); printf("Examples: wamrc -o test.aot test.wasm\n"); printf(" wamrc --target=i386 -o test.aot test.wasm\n"); @@ -70,6 +71,7 @@ main(int argc, char *argv[]) option.output_format = AOT_FORMAT_FILE; /* default value, enable or disable depends on the platform */ option.bounds_checks = 2; + option.enable_simd = false; /* Process options. */ for (argc--, argv++; argc > 0 && argv[0][0] == '-'; argc--, argv++) { @@ -150,6 +152,9 @@ main(int argc, char *argv[]) else if (!strcmp(argv[0], "--enable-tail-call")) { option.enable_tail_call = true; } + else if (!strcmp(argv[0], "--enable-simd")) { + option.enable_simd = true; + } else return print_help(); } @@ -158,8 +163,8 @@ main(int argc, char *argv[]) return print_help(); if (sgx_mode) { - option.size_level = 1; - option.is_sgx_platform = true; + option.size_level = 1; + option.is_sgx_platform = true; } wasm_file_name = argv[0];