Add cmake flag to control aot intrinsics (#3261)

Add cmake variable `-DWAMR_BUILD_AOT_INTRINSICS=1/0` to enable/disable
the aot intrinsic functions, which are normally used by AOT XIP feature, and
can be disabled to reduce the aot runtime binary size.

And refactor the code in aot_intrinsics.h/.c.
This commit is contained in:
Wenyong Huang 2024-04-01 11:26:05 +08:00 committed by GitHub
parent b9740beb31
commit 9c8551cf75
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 116 additions and 84 deletions

View File

@ -529,3 +529,19 @@ else ()
# Disable quick aot/jit entries for interp and fast-jit
add_definitions (-DWASM_ENABLE_QUICK_AOT_ENTRY=0)
endif ()
if (WAMR_BUILD_AOT EQUAL 1)
if (NOT DEFINED WAMR_BUILD_AOT_INTRINSICS)
# Enable aot intrinsics by default
set (WAMR_BUILD_AOT_INTRINSICS 1)
endif ()
if (WAMR_BUILD_AOT_INTRINSICS EQUAL 1)
add_definitions (-DWASM_ENABLE_AOT_INTRINSICS=1)
message (" AOT intrinsics enabled")
else ()
add_definitions (-DWASM_ENABLE_AOT_INTRINSICS=0)
message (" AOT intrinsics disabled")
endif ()
else ()
# Disable aot intrinsics for interp, fast-jit and llvm-jit
add_definitions (-DWASM_ENABLE_AOT_INTRINSICS=0)
endif ()

View File

@ -570,6 +570,14 @@
#define WASM_ENABLE_QUICK_AOT_ENTRY 1
#endif
/* Support AOT intrinsic functions which can be called from the AOT code
when `--disable-llvm-intrinsics` flag or
`--enable-builtin-intrinsics=<intr1,intr2,...>` is used by wamrc to
generate the AOT file */
#ifndef WASM_ENABLE_AOT_INTRINSICS
#define WASM_ENABLE_AOT_INTRINSICS 1
#endif
#ifndef WASM_TABLE_MAX_SIZE
#define WASM_TABLE_MAX_SIZE 1024
#endif

View File

@ -5,86 +5,6 @@
#include "aot_intrinsic.h"
typedef struct {
const char *llvm_intrinsic;
const char *native_intrinsic;
uint64 flag;
} aot_intrinsic;
/* clang-format off */
static const aot_intrinsic g_intrinsic_mapping[] = {
{ "llvm.experimental.constrained.fadd.f32", "aot_intrinsic_fadd_f32", AOT_INTRINSIC_FLAG_F32_FADD },
{ "llvm.experimental.constrained.fadd.f64", "aot_intrinsic_fadd_f64", AOT_INTRINSIC_FLAG_F64_FADD },
{ "llvm.experimental.constrained.fsub.f32", "aot_intrinsic_fsub_f32", AOT_INTRINSIC_FLAG_F32_FSUB },
{ "llvm.experimental.constrained.fsub.f64", "aot_intrinsic_fsub_f64", AOT_INTRINSIC_FLAG_F64_FSUB },
{ "llvm.experimental.constrained.fmul.f32", "aot_intrinsic_fmul_f32", AOT_INTRINSIC_FLAG_F32_FMUL },
{ "llvm.experimental.constrained.fmul.f64", "aot_intrinsic_fmul_f64", AOT_INTRINSIC_FLAG_F64_FMUL },
{ "llvm.experimental.constrained.fdiv.f32", "aot_intrinsic_fdiv_f32", AOT_INTRINSIC_FLAG_F32_FDIV },
{ "llvm.experimental.constrained.fdiv.f64", "aot_intrinsic_fdiv_f64", AOT_INTRINSIC_FLAG_F64_FDIV },
{ "llvm.fabs.f32", "aot_intrinsic_fabs_f32", AOT_INTRINSIC_FLAG_F32_FABS },
{ "llvm.fabs.f64", "aot_intrinsic_fabs_f64", AOT_INTRINSIC_FLAG_F64_FABS },
{ "llvm.ceil.f32", "aot_intrinsic_ceil_f32", AOT_INTRINSIC_FLAG_F32_CEIL },
{ "llvm.ceil.f64", "aot_intrinsic_ceil_f64", AOT_INTRINSIC_FLAG_F64_CEIL },
{ "llvm.floor.f32", "aot_intrinsic_floor_f32", AOT_INTRINSIC_FLAG_F32_FLOOR },
{ "llvm.floor.f64", "aot_intrinsic_floor_f64", AOT_INTRINSIC_FLAG_F64_FLOOR },
{ "llvm.trunc.f32", "aot_intrinsic_trunc_f32", AOT_INTRINSIC_FLAG_F32_TRUNC },
{ "llvm.trunc.f64", "aot_intrinsic_trunc_f64", AOT_INTRINSIC_FLAG_F64_TRUNC },
{ "llvm.rint.f32", "aot_intrinsic_rint_f32", AOT_INTRINSIC_FLAG_F32_RINT },
{ "llvm.rint.f64", "aot_intrinsic_rint_f64", AOT_INTRINSIC_FLAG_F64_RINT },
{ "llvm.sqrt.f32", "aot_intrinsic_sqrt_f32", AOT_INTRINSIC_FLAG_F32_SQRT },
{ "llvm.sqrt.f64", "aot_intrinsic_sqrt_f64", AOT_INTRINSIC_FLAG_F64_SQRT },
{ "llvm.copysign.f32", "aot_intrinsic_copysign_f32", AOT_INTRINSIC_FLAG_F32_COPYSIGN },
{ "llvm.copysign.f64", "aot_intrinsic_copysign_f64", AOT_INTRINSIC_FLAG_F64_COPYSIGN },
{ "llvm.minnum.f32", "aot_intrinsic_fmin_f32", AOT_INTRINSIC_FLAG_F32_MIN },
{ "llvm.minnum.f64", "aot_intrinsic_fmin_f64", AOT_INTRINSIC_FLAG_F64_MIN },
{ "llvm.maxnum.f32", "aot_intrinsic_fmax_f32", AOT_INTRINSIC_FLAG_F32_MAX },
{ "llvm.maxnum.f64", "aot_intrinsic_fmax_f64", AOT_INTRINSIC_FLAG_F64_MAX },
{ "llvm.ctlz.i32", "aot_intrinsic_clz_i32", AOT_INTRINSIC_FLAG_I32_CLZ },
{ "llvm.ctlz.i64", "aot_intrinsic_clz_i64", AOT_INTRINSIC_FLAG_I64_CLZ },
{ "llvm.cttz.i32", "aot_intrinsic_ctz_i32", AOT_INTRINSIC_FLAG_I32_CTZ },
{ "llvm.cttz.i64", "aot_intrinsic_ctz_i64", AOT_INTRINSIC_FLAG_I64_CTZ },
{ "llvm.ctpop.i32", "aot_intrinsic_popcnt_i32", AOT_INTRINSIC_FLAG_I32_POPCNT },
{ "llvm.ctpop.i64", "aot_intrinsic_popcnt_i64", AOT_INTRINSIC_FLAG_I64_POPCNT },
{ "f64_convert_i32_s", "aot_intrinsic_i32_to_f64", AOT_INTRINSIC_FLAG_I32_TO_F64 },
{ "f64_convert_i32_u", "aot_intrinsic_u32_to_f64", AOT_INTRINSIC_FLAG_U32_TO_F64 },
{ "f32_convert_i32_s", "aot_intrinsic_i32_to_f32", AOT_INTRINSIC_FLAG_I32_TO_F32 },
{ "f32_convert_i32_u", "aot_intrinsic_u32_to_f32", AOT_INTRINSIC_FLAG_U32_TO_F32 },
{ "f64_convert_i64_s", "aot_intrinsic_i64_to_f64", AOT_INTRINSIC_FLAG_I32_TO_F64 },
{ "f64_convert_i64_u", "aot_intrinsic_u64_to_f64", AOT_INTRINSIC_FLAG_U64_TO_F64 },
{ "f32_convert_i64_s", "aot_intrinsic_i64_to_f32", AOT_INTRINSIC_FLAG_I64_TO_F32 },
{ "f32_convert_i64_u", "aot_intrinsic_u64_to_f32", AOT_INTRINSIC_FLAG_U64_TO_F32 },
{ "i32_trunc_f32_u", "aot_intrinsic_f32_to_u32", AOT_INTRINSIC_FLAG_F32_TO_U32 },
{ "i32_trunc_f32_s", "aot_intrinsic_f32_to_i32", AOT_INTRINSIC_FLAG_F32_TO_I32 },
{ "i32_trunc_f64_u", "aot_intrinsic_f64_to_u32", AOT_INTRINSIC_FLAG_F64_TO_U32 },
{ "i32_trunc_f64_s", "aot_intrinsic_f64_to_i32", AOT_INTRINSIC_FLAG_F64_TO_I32 },
{ "i64_trunc_f64_u", "aot_intrinsic_f64_to_u64", AOT_INTRINSIC_FLAG_F64_TO_U64 },
{ "i64_trunc_f32_s", "aot_intrinsic_f32_to_i64", AOT_INTRINSIC_FLAG_F32_TO_I64 },
{ "i64_trunc_f32_u", "aot_intrinsic_f32_to_u64", AOT_INTRINSIC_FLAG_F32_TO_U64 },
{ "i64_trunc_f64_s", "aot_intrinsic_f64_to_i64", AOT_INTRINSIC_FLAG_F64_TO_I64 },
{ "f32_demote_f64", "aot_intrinsic_f64_to_f32", AOT_INTRINSIC_FLAG_F64_TO_F32 },
{ "f64_promote_f32", "aot_intrinsic_f32_to_f64", AOT_INTRINSIC_FLAG_F32_TO_F64 },
{ "f32_cmp", "aot_intrinsic_f32_cmp", AOT_INTRINSIC_FLAG_F32_CMP },
{ "f64_cmp", "aot_intrinsic_f64_cmp", AOT_INTRINSIC_FLAG_F64_CMP },
{ "i32.const", NULL, AOT_INTRINSIC_FLAG_I32_CONST },
{ "i64.const", NULL, AOT_INTRINSIC_FLAG_I64_CONST },
{ "f32.const", NULL, AOT_INTRINSIC_FLAG_F32_CONST },
{ "f64.const", NULL, AOT_INTRINSIC_FLAG_F64_CONST },
{ "i64.div_s", "aot_intrinsic_i64_div_s", AOT_INTRINSIC_FLAG_I64_DIV_S},
{ "i32.div_s", "aot_intrinsic_i32_div_s", AOT_INTRINSIC_FLAG_I32_DIV_S},
{ "i32.div_u", "aot_intrinsic_i32_div_u", AOT_INTRINSIC_FLAG_I32_DIV_U},
{ "i32.rem_s", "aot_intrinsic_i32_rem_s", AOT_INTRINSIC_FLAG_I32_REM_S},
{ "i32.rem_u", "aot_intrinsic_i32_rem_u", AOT_INTRINSIC_FLAG_I32_REM_U},
{ "i64.div_u", "aot_intrinsic_i64_div_u", AOT_INTRINSIC_FLAG_I64_DIV_U},
{ "i64.rem_s", "aot_intrinsic_i64_rem_s", AOT_INTRINSIC_FLAG_I64_REM_S},
{ "i64.rem_u", "aot_intrinsic_i64_rem_u", AOT_INTRINSIC_FLAG_I64_REM_U},
{ "i64.or", "aot_intrinsic_i64_bit_or", AOT_INTRINSIC_FLAG_I64_BIT_OR},
{ "i64.and", "aot_intrinsic_i64_bit_and", AOT_INTRINSIC_FLAG_I64_BIT_AND},
};
/* clang-format on */
static const uint32 g_intrinsic_count =
sizeof(g_intrinsic_mapping) / sizeof(aot_intrinsic);
float32
aot_intrinsic_fadd_f32(float32 a, float32 b)
{
@ -565,6 +485,88 @@ aot_intrinsic_i64_bit_and(uint64 l, uint64 r)
return l & r;
}
#if WASM_ENABLE_WAMR_COMPILER != 0 || WASM_ENABLE_JIT != 0
typedef struct {
const char *llvm_intrinsic;
const char *native_intrinsic;
uint64 flag;
} aot_intrinsic;
/* clang-format off */
static const aot_intrinsic g_intrinsic_mapping[] = {
{ "llvm.experimental.constrained.fadd.f32", "aot_intrinsic_fadd_f32", AOT_INTRINSIC_FLAG_F32_FADD },
{ "llvm.experimental.constrained.fadd.f64", "aot_intrinsic_fadd_f64", AOT_INTRINSIC_FLAG_F64_FADD },
{ "llvm.experimental.constrained.fsub.f32", "aot_intrinsic_fsub_f32", AOT_INTRINSIC_FLAG_F32_FSUB },
{ "llvm.experimental.constrained.fsub.f64", "aot_intrinsic_fsub_f64", AOT_INTRINSIC_FLAG_F64_FSUB },
{ "llvm.experimental.constrained.fmul.f32", "aot_intrinsic_fmul_f32", AOT_INTRINSIC_FLAG_F32_FMUL },
{ "llvm.experimental.constrained.fmul.f64", "aot_intrinsic_fmul_f64", AOT_INTRINSIC_FLAG_F64_FMUL },
{ "llvm.experimental.constrained.fdiv.f32", "aot_intrinsic_fdiv_f32", AOT_INTRINSIC_FLAG_F32_FDIV },
{ "llvm.experimental.constrained.fdiv.f64", "aot_intrinsic_fdiv_f64", AOT_INTRINSIC_FLAG_F64_FDIV },
{ "llvm.fabs.f32", "aot_intrinsic_fabs_f32", AOT_INTRINSIC_FLAG_F32_FABS },
{ "llvm.fabs.f64", "aot_intrinsic_fabs_f64", AOT_INTRINSIC_FLAG_F64_FABS },
{ "llvm.ceil.f32", "aot_intrinsic_ceil_f32", AOT_INTRINSIC_FLAG_F32_CEIL },
{ "llvm.ceil.f64", "aot_intrinsic_ceil_f64", AOT_INTRINSIC_FLAG_F64_CEIL },
{ "llvm.floor.f32", "aot_intrinsic_floor_f32", AOT_INTRINSIC_FLAG_F32_FLOOR },
{ "llvm.floor.f64", "aot_intrinsic_floor_f64", AOT_INTRINSIC_FLAG_F64_FLOOR },
{ "llvm.trunc.f32", "aot_intrinsic_trunc_f32", AOT_INTRINSIC_FLAG_F32_TRUNC },
{ "llvm.trunc.f64", "aot_intrinsic_trunc_f64", AOT_INTRINSIC_FLAG_F64_TRUNC },
{ "llvm.rint.f32", "aot_intrinsic_rint_f32", AOT_INTRINSIC_FLAG_F32_RINT },
{ "llvm.rint.f64", "aot_intrinsic_rint_f64", AOT_INTRINSIC_FLAG_F64_RINT },
{ "llvm.sqrt.f32", "aot_intrinsic_sqrt_f32", AOT_INTRINSIC_FLAG_F32_SQRT },
{ "llvm.sqrt.f64", "aot_intrinsic_sqrt_f64", AOT_INTRINSIC_FLAG_F64_SQRT },
{ "llvm.copysign.f32", "aot_intrinsic_copysign_f32", AOT_INTRINSIC_FLAG_F32_COPYSIGN },
{ "llvm.copysign.f64", "aot_intrinsic_copysign_f64", AOT_INTRINSIC_FLAG_F64_COPYSIGN },
{ "llvm.minnum.f32", "aot_intrinsic_fmin_f32", AOT_INTRINSIC_FLAG_F32_MIN },
{ "llvm.minnum.f64", "aot_intrinsic_fmin_f64", AOT_INTRINSIC_FLAG_F64_MIN },
{ "llvm.maxnum.f32", "aot_intrinsic_fmax_f32", AOT_INTRINSIC_FLAG_F32_MAX },
{ "llvm.maxnum.f64", "aot_intrinsic_fmax_f64", AOT_INTRINSIC_FLAG_F64_MAX },
{ "llvm.ctlz.i32", "aot_intrinsic_clz_i32", AOT_INTRINSIC_FLAG_I32_CLZ },
{ "llvm.ctlz.i64", "aot_intrinsic_clz_i64", AOT_INTRINSIC_FLAG_I64_CLZ },
{ "llvm.cttz.i32", "aot_intrinsic_ctz_i32", AOT_INTRINSIC_FLAG_I32_CTZ },
{ "llvm.cttz.i64", "aot_intrinsic_ctz_i64", AOT_INTRINSIC_FLAG_I64_CTZ },
{ "llvm.ctpop.i32", "aot_intrinsic_popcnt_i32", AOT_INTRINSIC_FLAG_I32_POPCNT },
{ "llvm.ctpop.i64", "aot_intrinsic_popcnt_i64", AOT_INTRINSIC_FLAG_I64_POPCNT },
{ "f64_convert_i32_s", "aot_intrinsic_i32_to_f64", AOT_INTRINSIC_FLAG_I32_TO_F64 },
{ "f64_convert_i32_u", "aot_intrinsic_u32_to_f64", AOT_INTRINSIC_FLAG_U32_TO_F64 },
{ "f32_convert_i32_s", "aot_intrinsic_i32_to_f32", AOT_INTRINSIC_FLAG_I32_TO_F32 },
{ "f32_convert_i32_u", "aot_intrinsic_u32_to_f32", AOT_INTRINSIC_FLAG_U32_TO_F32 },
{ "f64_convert_i64_s", "aot_intrinsic_i64_to_f64", AOT_INTRINSIC_FLAG_I32_TO_F64 },
{ "f64_convert_i64_u", "aot_intrinsic_u64_to_f64", AOT_INTRINSIC_FLAG_U64_TO_F64 },
{ "f32_convert_i64_s", "aot_intrinsic_i64_to_f32", AOT_INTRINSIC_FLAG_I64_TO_F32 },
{ "f32_convert_i64_u", "aot_intrinsic_u64_to_f32", AOT_INTRINSIC_FLAG_U64_TO_F32 },
{ "i32_trunc_f32_u", "aot_intrinsic_f32_to_u32", AOT_INTRINSIC_FLAG_F32_TO_U32 },
{ "i32_trunc_f32_s", "aot_intrinsic_f32_to_i32", AOT_INTRINSIC_FLAG_F32_TO_I32 },
{ "i32_trunc_f64_u", "aot_intrinsic_f64_to_u32", AOT_INTRINSIC_FLAG_F64_TO_U32 },
{ "i32_trunc_f64_s", "aot_intrinsic_f64_to_i32", AOT_INTRINSIC_FLAG_F64_TO_I32 },
{ "i64_trunc_f64_u", "aot_intrinsic_f64_to_u64", AOT_INTRINSIC_FLAG_F64_TO_U64 },
{ "i64_trunc_f32_s", "aot_intrinsic_f32_to_i64", AOT_INTRINSIC_FLAG_F32_TO_I64 },
{ "i64_trunc_f32_u", "aot_intrinsic_f32_to_u64", AOT_INTRINSIC_FLAG_F32_TO_U64 },
{ "i64_trunc_f64_s", "aot_intrinsic_f64_to_i64", AOT_INTRINSIC_FLAG_F64_TO_I64 },
{ "f32_demote_f64", "aot_intrinsic_f64_to_f32", AOT_INTRINSIC_FLAG_F64_TO_F32 },
{ "f64_promote_f32", "aot_intrinsic_f32_to_f64", AOT_INTRINSIC_FLAG_F32_TO_F64 },
{ "f32_cmp", "aot_intrinsic_f32_cmp", AOT_INTRINSIC_FLAG_F32_CMP },
{ "f64_cmp", "aot_intrinsic_f64_cmp", AOT_INTRINSIC_FLAG_F64_CMP },
{ "i32.const", NULL, AOT_INTRINSIC_FLAG_I32_CONST },
{ "i64.const", NULL, AOT_INTRINSIC_FLAG_I64_CONST },
{ "f32.const", NULL, AOT_INTRINSIC_FLAG_F32_CONST },
{ "f64.const", NULL, AOT_INTRINSIC_FLAG_F64_CONST },
{ "i64.div_s", "aot_intrinsic_i64_div_s", AOT_INTRINSIC_FLAG_I64_DIV_S},
{ "i32.div_s", "aot_intrinsic_i32_div_s", AOT_INTRINSIC_FLAG_I32_DIV_S},
{ "i32.div_u", "aot_intrinsic_i32_div_u", AOT_INTRINSIC_FLAG_I32_DIV_U},
{ "i32.rem_s", "aot_intrinsic_i32_rem_s", AOT_INTRINSIC_FLAG_I32_REM_S},
{ "i32.rem_u", "aot_intrinsic_i32_rem_u", AOT_INTRINSIC_FLAG_I32_REM_U},
{ "i64.div_u", "aot_intrinsic_i64_div_u", AOT_INTRINSIC_FLAG_I64_DIV_U},
{ "i64.rem_s", "aot_intrinsic_i64_rem_s", AOT_INTRINSIC_FLAG_I64_REM_S},
{ "i64.rem_u", "aot_intrinsic_i64_rem_u", AOT_INTRINSIC_FLAG_I64_REM_U},
{ "i64.or", "aot_intrinsic_i64_bit_or", AOT_INTRINSIC_FLAG_I64_BIT_OR},
{ "i64.and", "aot_intrinsic_i64_bit_and", AOT_INTRINSIC_FLAG_I64_BIT_AND},
};
/* clang-format on */
static const uint32 g_intrinsic_count =
sizeof(g_intrinsic_mapping) / sizeof(aot_intrinsic);
const char *
aot_intrinsic_get_symbol(const char *llvm_intrinsic)
{
@ -577,8 +579,6 @@ aot_intrinsic_get_symbol(const char *llvm_intrinsic)
return NULL;
}
#if WASM_ENABLE_WAMR_COMPILER != 0 || WASM_ENABLE_JIT != 0
static void
add_intrinsic_capability(AOTCompContext *comp_ctx, uint64 flag)
{

View File

@ -287,10 +287,10 @@ aot_intrinsic_i64_bit_or(uint64 l, uint64 r);
uint64
aot_intrinsic_i64_bit_and(uint64 l, uint64 r);
#if WASM_ENABLE_WAMR_COMPILER != 0 || WASM_ENABLE_JIT != 0
const char *
aot_intrinsic_get_symbol(const char *llvm_intrinsic);
#if WASM_ENABLE_WAMR_COMPILER != 0 || WASM_ENABLE_JIT != 0
bool
aot_intrinsic_check_capability(const AOTCompContext *comp_ctx,
const char *llvm_intrinsic);

View File

@ -62,6 +62,7 @@ typedef struct {
#define REG_AOT_TRACE_SYM()
#endif
#if WASM_ENABLE_AOT_INTRINSICS != 0
#define REG_INTRINSIC_SYM() \
REG_SYM(aot_intrinsic_fabs_f32), \
REG_SYM(aot_intrinsic_fabs_f64), \
@ -124,7 +125,10 @@ typedef struct {
REG_SYM(aot_intrinsic_i32_div_s), \
REG_SYM(aot_intrinsic_i32_div_u), \
REG_SYM(aot_intrinsic_i32_rem_s), \
REG_SYM(aot_intrinsic_i32_rem_u), \
REG_SYM(aot_intrinsic_i32_rem_u),
#else
#define REG_INTRINSIC_SYM()
#endif
#if WASM_ENABLE_STATIC_PGO != 0
#define REG_LLVM_PGO_SYM() \

View File

@ -264,6 +264,10 @@ Currently we only profile the memory consumption of module, module_instance and
- **WAMR_BUILD_QUICK_AOT_ENTRY**=1/0, enable registering quick call entries to speedup the aot/jit func call process, default to enable if not set
> Note: See [Refine callings to AOT/JIT functions from host native](./perf_tune.md#83-refine-callings-to-aotjit-functions-from-host-native) for more details.
#### **Enable AOT intrinsics**
- **WAMR_BUILD_AOT_INTRINSICS**=1/0, enable the AOT intrinsic functions, default to enable if not set. These functions can be called from the AOT code when `--disable-llvm-intrinsics` flag or `--enable-builtin-intrinsics=<intr1,intr2,...>` flag is used by wamrc to generate the AOT file.
> Note: See [Tuning the XIP intrinsic functions](./xip.md#tuning-the-xip-intrinsic-functions) for more details.
#### **Configurale memory access boundary check**
- **WAMR_CONFIGUABLE_BOUNDS_CHECKS**=1/0, default to disable if not set
> Note: If it is enabled, allow to run `iwasm --disable-bounds-checks` to disable the memory access boundary checks for interpreter mode.