diff --git a/core/iwasm/compilation/simd/simd_access_lanes.c b/core/iwasm/compilation/simd/simd_access_lanes.c index 5abefbd47..8f1489c84 100644 --- a/core/iwasm/compilation/simd/simd_access_lanes.c +++ b/core/iwasm/compilation/simd/simd_access_lanes.c @@ -90,7 +90,6 @@ fail: return false; } -/* TODO: instructions for other CPUs */ /* shufflevector is not an option, since it requires *mask as a const */ bool aot_compile_simd_swizzle_x86(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) @@ -158,17 +157,13 @@ fail: return false; } -bool -aot_compile_simd_swizzle(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) +static bool +aot_compile_simd_swizzle_common(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) { LLVMValueRef vector, mask, default_lane_value, condition, max_lane_id, result, idx, id, replace_with_zero, elem, elem_or_zero, undef; uint8 i; - if (is_target_x86(comp_ctx)) { - return aot_compile_simd_swizzle_x86(comp_ctx, func_ctx); - } - int const_lane_ids[16] = { 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16 }, const_zeors[16] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, @@ -261,6 +256,17 @@ fail: return false; } +bool +aot_compile_simd_swizzle(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) +{ + if (is_target_x86(comp_ctx)) { + return aot_compile_simd_swizzle_x86(comp_ctx, func_ctx); + } + else { + return aot_compile_simd_swizzle_common(comp_ctx, func_ctx); + } +} + static bool aot_compile_simd_extract(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, diff --git a/core/iwasm/compilation/simd/simd_conversions.c b/core/iwasm/compilation/simd/simd_conversions.c index 1f725f429..d330c38a3 100644 --- a/core/iwasm/compilation/simd/simd_conversions.c +++ b/core/iwasm/compilation/simd/simd_conversions.c @@ -153,21 +153,16 @@ aot_compile_simd_i16x8_narrow_i32x4_x86(AOTCompContext *comp_ctx, is_signed ? "llvm.x86.sse2.packssdw.128" : "llvm.x86.sse41.packusdw"); } -bool -aot_compile_simd_i8x16_narrow_i16x8(AOTCompContext *comp_ctx, - AOTFuncContext *func_ctx, - bool is_signed) +static bool +aot_compile_simd_i8x16_narrow_i16x8_common(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + bool is_signed) { LLVMValueRef vector1, vector2, result, vector_min, vector_max, shuffle, vector1_clamped, vector2_clamped, vector1_trunced, vector2_trunced, shuffle_vector; LLVMValueRef v1_gt_max, v1_lt_min, v2_gt_max, v2_lt_min; - if (is_target_x86(comp_ctx)) { - return aot_compile_simd_i8x16_narrow_i16x8_x86(comp_ctx, func_ctx, - is_signed); - } - int min_s_array[8] = { 0xff80, 0xff80, 0xff80, 0xff80, 0xff80, 0xff80, 0xff80, 0xff80 }; int max_s_array[8] = { 0x007f, 0x007f, 0x007f, 0x007f, @@ -290,20 +285,30 @@ fail: } bool -aot_compile_simd_i16x8_narrow_i32x4(AOTCompContext *comp_ctx, +aot_compile_simd_i8x16_narrow_i16x8(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, bool is_signed) +{ + if (is_target_x86(comp_ctx)) { + return aot_compile_simd_i8x16_narrow_i16x8_x86(comp_ctx, func_ctx, + is_signed); + } + else { + return aot_compile_simd_i8x16_narrow_i16x8_common(comp_ctx, func_ctx, + is_signed); + } +} + +static bool +aot_compile_simd_i16x8_narrow_i32x4_common(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + bool is_signed) { LLVMValueRef vector1, vector2, result, vector_min, vector_max, shuffle, vector1_clamped, vector2_clamped, vector1_trunced, vector2_trunced, shuffle_vector; LLVMValueRef v1_gt_max, v1_lt_min, v2_gt_max, v2_lt_min; - if (is_target_x86(comp_ctx)) { - return aot_compile_simd_i16x8_narrow_i32x4_x86(comp_ctx, func_ctx, - is_signed); - } - int min_s_array[4] = { 0xffff8000, 0xffff8000, 0xffff8000, 0xffff8000 }; int32 max_s_array[4] = { 0x00007fff, 0x00007fff, 0x00007fff, 0x00007fff }; @@ -420,6 +425,21 @@ fail: return false; } +bool +aot_compile_simd_i16x8_narrow_i32x4(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + bool is_signed) +{ + if (is_target_x86(comp_ctx)) { + return aot_compile_simd_i16x8_narrow_i32x4_x86(comp_ctx, func_ctx, + is_signed); + } + else { + return aot_compile_simd_i16x8_narrow_i32x4_common(comp_ctx, func_ctx, + is_signed); + } +} + bool aot_compile_simd_i16x8_widen_i8x16(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,