diff --git a/core/iwasm/compilation/aot_compiler.c b/core/iwasm/compilation/aot_compiler.c index d02bcd758..a0ebec6e1 100644 --- a/core/iwasm/compilation/aot_compiler.c +++ b/core/iwasm/compilation/aot_compiler.c @@ -1741,6 +1741,55 @@ build_atomic_rmw: break; } + case SIMD_f32x4_ceil: + { + if (!aot_compile_simd_f32x4_ceil(comp_ctx, func_ctx)) + return false; + break; + } + case SIMD_f64x2_ceil: + { + if (!aot_compile_simd_f64x2_ceil(comp_ctx, func_ctx)) + return false; + break; + } + case SIMD_f32x4_floor: + { + if (!aot_compile_simd_f32x4_floor(comp_ctx, func_ctx)) + return false; + break; + } + case SIMD_f64x2_floor: + { + if (!aot_compile_simd_f64x2_floor(comp_ctx, func_ctx)) + return false; + break; + } + case SIMD_f32x4_trunc: + { + if (!aot_compile_simd_f32x4_trunc(comp_ctx, func_ctx)) + return false; + break; + } + case SIMD_f64x2_trunc: + { + if (!aot_compile_simd_f64x2_trunc(comp_ctx, func_ctx)) + return false; + break; + } + case SIMD_f32x4_nearest: + { + if (!aot_compile_simd_f32x4_nearest(comp_ctx, func_ctx)) + return false; + break; + } + case SIMD_f64x2_nearest: + { + if (!aot_compile_simd_f64x2_nearest(comp_ctx, func_ctx)) + return false; + break; + } + default: break; } diff --git a/core/iwasm/compilation/simd/simd_floating_point.c b/core/iwasm/compilation/simd/simd_floating_point.c index 24dc8fc51..e26b3a518 100644 --- a/core/iwasm/compilation/simd/simd_floating_point.c +++ b/core/iwasm/compilation/simd/simd_floating_point.c @@ -178,57 +178,10 @@ aot_compile_simd_f64x2_neg(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) } static bool -simd_v128_float_abs(AOTCompContext *comp_ctx, - AOTFuncContext *func_ctx, - LLVMTypeRef vector_type, - const char *intrinsic) -{ - LLVMValueRef vector, result; - LLVMTypeRef param_types[1] = { vector_type }; - - if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type, - "vec"))) { - goto fail; - } - - if (!(result = aot_call_llvm_intrinsic(comp_ctx, intrinsic, vector_type, - param_types, 1, vector))) { - HANDLE_FAILURE("LLVMBuildCall"); - goto fail; - } - - if (!(result = LLVMBuildBitCast(comp_ctx->builder, result, V128_i64x2_TYPE, - "ret"))) { - HANDLE_FAILURE("LLVMBuildBitCast"); - goto fail; - } - - /* push result into the stack */ - PUSH_V128(result); - return true; -fail: - return false; -} - -bool -aot_compile_simd_f32x4_abs(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) -{ - return simd_v128_float_abs(comp_ctx, func_ctx, V128_f32x4_TYPE, - "llvm.fabs.v4f32"); -} - -bool -aot_compile_simd_f64x2_abs(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) -{ - return simd_v128_float_abs(comp_ctx, func_ctx, V128_f64x2_TYPE, - "llvm.fabs.v2f64"); -} - -static bool -simd_v128_float_sqrt(AOTCompContext *comp_ctx, - AOTFuncContext *func_ctx, - LLVMTypeRef vector_type, - const char *intrinsic) +simd_v128_float_intrinsic(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + LLVMTypeRef vector_type, + const char *intrinsic) { LLVMValueRef number, result; LLVMTypeRef param_types[1] = { vector_type }; @@ -258,16 +211,86 @@ fail: return false; } +bool +aot_compile_simd_f32x4_abs(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) +{ + return simd_v128_float_intrinsic(comp_ctx, func_ctx, V128_f32x4_TYPE, + "llvm.fabs.v4f32"); +} + +bool +aot_compile_simd_f64x2_abs(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) +{ + return simd_v128_float_intrinsic(comp_ctx, func_ctx, V128_f64x2_TYPE, + "llvm.fabs.v2f64"); +} + bool aot_compile_simd_f32x4_sqrt(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) { - return simd_v128_float_sqrt(comp_ctx, func_ctx, V128_f32x4_TYPE, - "llvm.sqrt.v4f32"); + return simd_v128_float_intrinsic(comp_ctx, func_ctx, V128_f32x4_TYPE, + "llvm.sqrt.v4f32"); } bool aot_compile_simd_f64x2_sqrt(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) { - return simd_v128_float_sqrt(comp_ctx, func_ctx, V128_f64x2_TYPE, - "llvm.sqrt.v2f64"); + return simd_v128_float_intrinsic(comp_ctx, func_ctx, V128_f64x2_TYPE, + "llvm.sqrt.v2f64"); +} + +bool +aot_compile_simd_f32x4_ceil(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) +{ + return simd_v128_float_intrinsic(comp_ctx, func_ctx, V128_f32x4_TYPE, + "llvm.ceil.v4f32"); +} + +bool +aot_compile_simd_f64x2_ceil(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) +{ + return simd_v128_float_intrinsic(comp_ctx, func_ctx, V128_f64x2_TYPE, + "llvm.ceil.v2f64"); +} + +bool +aot_compile_simd_f32x4_floor(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) +{ + return simd_v128_float_intrinsic(comp_ctx, func_ctx, V128_f32x4_TYPE, + "llvm.floor.v4f32"); +} + +bool +aot_compile_simd_f64x2_floor(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) +{ + return simd_v128_float_intrinsic(comp_ctx, func_ctx, V128_f64x2_TYPE, + "llvm.floor.v2f64"); +} + +bool +aot_compile_simd_f32x4_trunc(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) +{ + return simd_v128_float_intrinsic(comp_ctx, func_ctx, V128_f32x4_TYPE, + "llvm.trunc.v4f32"); +} + +bool +aot_compile_simd_f64x2_trunc(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) +{ + return simd_v128_float_intrinsic(comp_ctx, func_ctx, V128_f64x2_TYPE, + "llvm.trunc.v2f64"); +} + +bool +aot_compile_simd_f32x4_nearest(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) +{ + return simd_v128_float_intrinsic(comp_ctx, func_ctx, V128_f32x4_TYPE, + "llvm.rint.v4f32"); +} + +bool +aot_compile_simd_f64x2_nearest(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) +{ + return simd_v128_float_intrinsic(comp_ctx, func_ctx, V128_f64x2_TYPE, + "llvm.rint.v2f64"); } diff --git a/core/iwasm/compilation/simd/simd_floating_point.h b/core/iwasm/compilation/simd/simd_floating_point.h index cb254b614..e95cab6ee 100644 --- a/core/iwasm/compilation/simd/simd_floating_point.h +++ b/core/iwasm/compilation/simd/simd_floating_point.h @@ -35,12 +35,34 @@ bool aot_compile_simd_f64x2_abs(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx); bool -aot_compile_simd_f32x4_sqrt(AOTCompContext *comp_ctx, - AOTFuncContext *func_ctx); +aot_compile_simd_f32x4_sqrt(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx); bool -aot_compile_simd_f64x2_sqrt(AOTCompContext *comp_ctx, - AOTFuncContext *func_ctx); +aot_compile_simd_f64x2_sqrt(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx); + +bool +aot_compile_simd_f32x4_ceil(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx); + +bool +aot_compile_simd_f64x2_ceil(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx); + +bool +aot_compile_simd_f32x4_floor(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx); + +bool +aot_compile_simd_f64x2_floor(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx); + +bool +aot_compile_simd_f32x4_trunc(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx); + +bool +aot_compile_simd_f64x2_trunc(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx); + +bool +aot_compile_simd_f32x4_nearest(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx); + +bool +aot_compile_simd_f64x2_nearest(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx); #ifdef __cplusplus } /* end of extern "C" */ diff --git a/core/iwasm/interpreter/wasm_loader.c b/core/iwasm/interpreter/wasm_loader.c index 6dd97aef0..c68425c18 100644 --- a/core/iwasm/interpreter/wasm_loader.c +++ b/core/iwasm/interpreter/wasm_loader.c @@ -7481,6 +7481,14 @@ fail_data_cnt_sec_require: POP2_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128); break; + case SIMD_f32x4_ceil: + case SIMD_f32x4_floor: + case SIMD_f32x4_trunc: + case SIMD_f32x4_nearest: + case SIMD_f64x2_ceil: + case SIMD_f64x2_floor: + case SIMD_f64x2_trunc: + case SIMD_f64x2_nearest: case SIMD_v128_not: case SIMD_i8x16_abs: case SIMD_i8x16_neg: diff --git a/core/iwasm/interpreter/wasm_opcode.h b/core/iwasm/interpreter/wasm_opcode.h index 650e9f25a..fa0fed74a 100644 --- a/core/iwasm/interpreter/wasm_opcode.h +++ b/core/iwasm/interpreter/wasm_opcode.h @@ -472,6 +472,16 @@ typedef enum WASMSimdEXTOpcode { SIMD_i64x2_sub = 0xd1, SIMD_i64x2_mul = 0xd5, + /* float ceil/floor/trunc/nearest */ + SIMD_f32x4_ceil = 0xd8, + SIMD_f32x4_floor = 0xd9, + SIMD_f32x4_trunc = 0xda, + SIMD_f32x4_nearest = 0xdb, + SIMD_f64x2_ceil = 0xdc, + SIMD_f64x2_floor = 0xdd, + SIMD_f64x2_trunc = 0xde, + SIMD_f64x2_nearest = 0xdf, + /* f32x4 operation */ SIMD_f32x4_abs = 0xe0, SIMD_f32x4_neg = 0xe1,