diff --git a/core/config.h b/core/config.h index 69a12799b..96c809094 100644 --- a/core/config.h +++ b/core/config.h @@ -70,6 +70,10 @@ #define WASM_ENABLE_AOT 0 #endif +#ifndef WASM_ENABLE_WORD_ALIGN_READ +#define WASM_ENABLE_WORD_ALIGN_READ 0 +#endif + #define AOT_MAGIC_NUMBER 0x746f6100 #define AOT_CURRENT_VERSION 3 diff --git a/core/iwasm/aot/aot_intrinsic.c b/core/iwasm/aot/aot_intrinsic.c index b98960808..5ce9919a0 100644 --- a/core/iwasm/aot/aot_intrinsic.c +++ b/core/iwasm/aot/aot_intrinsic.c @@ -66,6 +66,7 @@ static const aot_intrinsic g_intrinsic_mapping[] = { { "f32.const", NULL, AOT_INTRINSIC_FLAG_F32_CONST }, { "f64.const", NULL, AOT_INTRINSIC_FLAG_F64_CONST }, { "i64.div_s", "aot_intrinsic_i64_div_s", AOT_INTRINSIC_FLAG_I64_DIV_S}, + { "i32.div_u", "aot_intrinsic_i32_div_u", AOT_INTRINSIC_FLAG_I32_DIV_U}, { "i64.div_u", "aot_intrinsic_i64_div_u", AOT_INTRINSIC_FLAG_I64_DIV_U}, { "i64.rem_s", "aot_intrinsic_i64_rem_s", AOT_INTRINSIC_FLAG_I64_REM_S}, { "i64.rem_u", "aot_intrinsic_i64_rem_u", AOT_INTRINSIC_FLAG_I64_REM_U}, @@ -497,6 +498,12 @@ aot_intrinsic_i64_div_s(int64 l, int64 r) return l / r; } +uint32 +aot_intrinsic_i32_div_u(uint32 l, uint32 r) +{ + return l / r; +} + uint64 aot_intrinsic_i64_div_u(uint64 l, uint64 r) { @@ -551,6 +558,12 @@ add_i64_common_intrinsics(AOTCompContext *comp_ctx) add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_I64_REM_U); } +static void +add_i32_common_intrinsics(AOTCompContext *comp_ctx) +{ + add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_I32_DIV_U); +} + static void add_f32_common_intrinsics(AOTCompContext *comp_ctx) { @@ -561,6 +574,12 @@ add_f32_common_intrinsics(AOTCompContext *comp_ctx) add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_F32_FDIV); add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_F32_SQRT); add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_F32_CMP); + add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_F32_MIN); + add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_F32_MAX); + add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_F32_CEIL); + add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_F32_FLOOR); + add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_F32_TRUNC); + add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_F32_RINT); } static void @@ -667,7 +686,9 @@ aot_intrinsic_fill_capability_flags(AOTCompContext *comp_ctx) * will cause rodata relocation */ add_f32_common_intrinsics(comp_ctx); + add_i32_common_intrinsics(comp_ctx); add_f64_common_intrinsics(comp_ctx); + add_i64_common_intrinsics(comp_ctx); add_common_float_integer_convertion(comp_ctx); add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_F32_CONST); add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_F64_CONST); diff --git a/core/iwasm/aot/aot_intrinsic.h b/core/iwasm/aot/aot_intrinsic.h index c6765ee0c..8962445b7 100644 --- a/core/iwasm/aot/aot_intrinsic.h +++ b/core/iwasm/aot/aot_intrinsic.h @@ -59,6 +59,7 @@ extern "C" { #define AOT_INTRINSIC_FLAG_F32_CMP AOT_INTRINSIC_FLAG(0, 25) #define AOT_INTRINSIC_FLAG_F32_CONST AOT_INTRINSIC_FLAG(0, 26) #define AOT_INTRINSIC_FLAG_I32_CONST AOT_INTRINSIC_FLAG(0, 27) +#define AOT_INTRINSIC_FLAG_I32_DIV_U AOT_INTRINSIC_FLAG(0, 28) #define AOT_INTRINSIC_FLAG_F64_FADD AOT_INTRINSIC_FLAG(1, 0) #define AOT_INTRINSIC_FLAG_F64_FSUB AOT_INTRINSIC_FLAG(1, 1) @@ -254,6 +255,9 @@ aot_intrinsic_f64_cmp(AOTFloatCond cond, float64 lhs, float64 rhs); int64 aot_intrinsic_i64_div_s(int64 l, int64 r); +uint32 +aot_intrinsic_i32_div_u(uint32 l, uint32 r); + uint64 aot_intrinsic_i64_div_u(uint64 l, uint64 r); diff --git a/core/iwasm/aot/aot_loader.c b/core/iwasm/aot/aot_loader.c index dca35685a..a15593b90 100644 --- a/core/iwasm/aot/aot_loader.c +++ b/core/iwasm/aot/aot_loader.c @@ -123,6 +123,80 @@ GET_U64_FROM_ADDR(uint32 *addr) return u.val; } +#if (WASM_ENABLE_WORD_ALIGN_READ != 0) + +static inline uint8 +GET_U8_FROM_ADDR(const uint8 *p) +{ + uint8 res = 0; + bh_assert(p); + + const uint8 *p_aligned = align_ptr(p, 4); + p_aligned = (p_aligned > p) ? p_aligned - 4 : p_aligned; + + uint32 buf32 = *(const uint32 *)p_aligned; + const uint8 *pbuf = (const uint8 *)&buf32; + + res = *(uint8 *)(pbuf + (p - p_aligned)); + + return res; +} + +static inline uint16 +GET_U16_FROM_ADDR(const uint8 *p) +{ + uint16 res = 0; + bh_assert(p); + + const uint8 *p_aligned = align_ptr(p, 4); + p_aligned = (p_aligned > p) ? p_aligned - 4 : p_aligned; + + uint32 buf32 = *(const uint32 *)p_aligned; + const uint8 *pbuf = (const uint8 *)&buf32; + + res = *(uint16 *)(pbuf + (p - p_aligned)); + + return res; +} + +#define TEMPLATE_READ(p, p_end, res, type) \ + do { \ + if (sizeof(type) != sizeof(uint64)) \ + p = (uint8 *)align_ptr(p, sizeof(type)); \ + else \ + /* align 4 bytes if type is uint64 */ \ + p = (uint8 *)align_ptr(p, sizeof(uint32)); \ + CHECK_BUF(p, p_end, sizeof(type)); \ + if (sizeof(type) == sizeof(uint8)) \ + res = GET_U8_FROM_ADDR(p); \ + else if (sizeof(type) == sizeof(uint16)) \ + res = GET_U16_FROM_ADDR(p); \ + else if (sizeof(type) == sizeof(uint32)) \ + res = *(type *)p; \ + else \ + res = (type)GET_U64_FROM_ADDR((uint32 *)p); \ + if (!is_little_endian()) \ + exchange_##type((uint8 *)&res); \ + p += sizeof(type); \ + } while (0) + +#define read_byte_array(p, p_end, addr, len) \ + do { \ + CHECK_BUF(p, p_end, len); \ + bh_memcpy_wa(addr, len, p, len); \ + p += len; \ + } while (0) + +#define read_string(p, p_end, str) \ + do { \ + if (!(str = load_string((uint8 **)&p, p_end, module, \ + is_load_from_file_buf, true, error_buf, \ + error_buf_size))) \ + goto fail; \ + } while (0) + +#else /* else of (WASM_ENABLE_WORD_ALIGN_READ != 0) */ + #define TEMPLATE_READ(p, p_end, res, type) \ do { \ if (sizeof(type) != sizeof(uint64)) \ @@ -140,11 +214,6 @@ GET_U64_FROM_ADDR(uint32 *addr) p += sizeof(type); \ } while (0) -#define read_uint8(p, p_end, res) TEMPLATE_READ(p, p_end, res, uint8) -#define read_uint16(p, p_end, res) TEMPLATE_READ(p, p_end, res, uint16) -#define read_uint32(p, p_end, res) TEMPLATE_READ(p, p_end, res, uint32) -#define read_uint64(p, p_end, res) TEMPLATE_READ(p, p_end, res, uint64) - #define read_byte_array(p, p_end, addr, len) \ do { \ CHECK_BUF(p, p_end, len); \ @@ -160,6 +229,13 @@ GET_U64_FROM_ADDR(uint32 *addr) goto fail; \ } while (0) +#endif /* end of (WASM_ENABLE_WORD_ALIGN_READ != 0) */ + +#define read_uint8(p, p_end, res) TEMPLATE_READ(p, p_end, res, uint8) +#define read_uint16(p, p_end, res) TEMPLATE_READ(p, p_end, res, uint16) +#define read_uint32(p, p_end, res) TEMPLATE_READ(p, p_end, res, uint32) +#define read_uint64(p, p_end, res) TEMPLATE_READ(p, p_end, res, uint64) + /* Legal values for bin_type */ #define BIN_TYPE_ELF32L 0 /* 32-bit little endian */ #define BIN_TYPE_ELF32B 1 /* 32-bit big endian */ @@ -211,6 +287,9 @@ loader_malloc(uint64 size, char *error_buf, uint32 error_buf_size) static char * const_str_set_insert(const uint8 *str, int32 len, AOTModule *module, +#if (WASM_ENABLE_WORD_ALIGN_READ != 0) + bool is_vram_word_align, +#endif char *error_buf, uint32 error_buf_size) { HashMap *set = module->const_str_set; @@ -230,8 +309,15 @@ const_str_set_insert(const uint8 *str, int32 len, AOTModule *module, if (!(c_str = loader_malloc((uint32)len + 1, error_buf, error_buf_size))) { return NULL; } - - bh_memcpy_s(c_str, (uint32)(len + 1), str, (uint32)len); +#if (WASM_ENABLE_WORD_ALIGN_READ != 0) + if (is_vram_word_align) { + bh_memcpy_wa(c_str, (uint32)(len + 1), str, (uint32)len); + } + else +#endif + { + bh_memcpy_s(c_str, (uint32)(len + 1), str, (uint32)len); + } c_str[len] = '\0'; if ((value = bh_hash_map_find(set, c_str))) { @@ -251,7 +337,11 @@ const_str_set_insert(const uint8 *str, int32 len, AOTModule *module, static char * load_string(uint8 **p_buf, const uint8 *buf_end, AOTModule *module, - bool is_load_from_file_buf, char *error_buf, uint32 error_buf_size) + bool is_load_from_file_buf, +#if (WASM_ENABLE_WORD_ALIGN_READ != 0) + bool is_vram_word_align, +#endif + char *error_buf, uint32 error_buf_size) { uint8 *p = *p_buf; const uint8 *p_end = buf_end; @@ -264,6 +354,15 @@ load_string(uint8 **p_buf, const uint8 *buf_end, AOTModule *module, if (str_len == 0) { str = ""; } +#if (WASM_ENABLE_WORD_ALIGN_READ != 0) + else if (is_vram_word_align) { + if (!(str = const_str_set_insert((uint8 *)p, str_len, module, + is_vram_word_align, error_buf, + error_buf_size))) { + goto fail; + } + } +#endif else if (p[str_len - 1] == '\0') { /* The string is terminated with '\0', use it directly */ str = (char *)p; @@ -280,8 +379,11 @@ load_string(uint8 **p_buf, const uint8 *buf_end, AOTModule *module, /* Load from sections, the file buffer cannot be reffered to after loading, we must create another string and insert it into const string set */ - if (!(str = const_str_set_insert((uint8 *)p, str_len, module, error_buf, - error_buf_size))) { + if (!(str = const_str_set_insert((uint8 *)p, str_len, module, +#if (WASM_ENABLE_WORD_ALIGN_READ != 0) + is_vram_word_align, +#endif + error_buf, error_buf_size))) { goto fail; } } diff --git a/core/iwasm/aot/aot_reloc.h b/core/iwasm/aot/aot_reloc.h index f97cea723..3009af292 100644 --- a/core/iwasm/aot/aot_reloc.h +++ b/core/iwasm/aot/aot_reloc.h @@ -110,6 +110,7 @@ typedef struct { REG_SYM(aot_intrinsic_i64_div_u), \ REG_SYM(aot_intrinsic_i64_rem_s), \ REG_SYM(aot_intrinsic_i64_rem_u), \ + REG_SYM(aot_intrinsic_i32_div_u), \ #define REG_COMMON_SYMBOLS \ REG_SYM(aot_set_exception_with_id), \ diff --git a/core/iwasm/common/wasm_runtime_common.c b/core/iwasm/common/wasm_runtime_common.c index 803603649..2896fac8c 100644 --- a/core/iwasm/common/wasm_runtime_common.c +++ b/core/iwasm/common/wasm_runtime_common.c @@ -506,6 +506,10 @@ wasm_runtime_full_init(RuntimeInitArgs *init_args) PackageType get_package_type(const uint8 *buf, uint32 size) { +#if (WASM_ENABLE_WORD_ALIGN_READ != 0) + uint32 buf32 = *(uint32 *)buf; + buf = (const uint8 *)&buf32; +#endif if (buf && size >= 4) { if (buf[0] == '\0' && buf[1] == 'a' && buf[2] == 's' && buf[3] == 'm') return Wasm_Module_Bytecode; diff --git a/core/iwasm/compilation/aot_emit_numberic.c b/core/iwasm/compilation/aot_emit_numberic.c index be3ae7b0d..14935e345 100644 --- a/core/iwasm/compilation/aot_emit_numberic.c +++ b/core/iwasm/compilation/aot_emit_numberic.c @@ -565,7 +565,22 @@ compile_int_div(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, PUSH_INT(res); return true; case INT_DIV_U: - LLVM_BUILD_OP(UDiv, left, right, res, "div_u", false); + if (comp_ctx->disable_llvm_intrinsics && is_i32 + && aot_intrinsic_check_capability(comp_ctx, "i32.div_u")) { + res = aot_call_llvm_intrinsic(comp_ctx, func_ctx, + "i32.div_u", param_types[0], + param_types, 2, left, right); + } + else if (comp_ctx->disable_llvm_intrinsics && !is_i32 + && aot_intrinsic_check_capability(comp_ctx, + "i64.div_u")) { + res = aot_call_llvm_intrinsic(comp_ctx, func_ctx, + "i64.div_u", param_types[0], + param_types, 2, left, right); + } + else { + LLVM_BUILD_OP(UDiv, left, right, res, "div_u", false); + } PUSH_INT(res); return true; case INT_REM_S: diff --git a/core/shared/utils/bh_common.c b/core/shared/utils/bh_common.c index e4b2eb15c..aeeab26bd 100644 --- a/core/shared/utils/bh_common.c +++ b/core/shared/utils/bh_common.c @@ -5,6 +5,73 @@ #include "bh_common.h" +static char * +align_ptr(char *src, unsigned int b) +{ + uintptr_t v = (uintptr_t)src; + uintptr_t m = b - 1; + return (char *)((v + m) & ~m); +} + +/* +Memory copy, with word alignment +*/ +int +b_memcpy_wa(void *s1, unsigned int s1max, const void *s2, unsigned int n) +{ + char *dest = (char *)s1; + char *src = (char *)s2; + + char *pa = align_ptr(src, 4); + char *pb = align_ptr((src + n), 4); + + unsigned int buff; + const char *p_byte_read; + + unsigned int *p; + char *ps; + + if (pa > src) { + pa -= 4; + } + + for (p = (unsigned int *)pa; p < (unsigned int *)pb; p++) { + buff = *(p); + p_byte_read = ((char *)&buff); + + /* read leading word */ + if ((char *)p <= src) { + for (ps = src; ps < ((char *)p + 4); ps++) { + if (ps >= src + n) { + break; + } + p_byte_read = ((char *)&buff) + (ps - (char *)p); + *dest++ = *p_byte_read; + } + } + /* read trailing word */ + else if ((char *)p >= pb - 4) { + for (ps = (char *)p; ps < src + n; ps++) { + *dest++ = *p_byte_read++; + } + } + /* read meaning word(s) */ + else { + if ((char *)p + 4 >= src + n) { + for (ps = (char *)p; ps < src + n; ps++) { + *dest++ = *p_byte_read++; + } + } + else { + *(unsigned int *)dest = buff; + dest += 4; + } + } + } + + return 0; +} + int b_memcpy_s(void *s1, unsigned int s1max, const void *s2, unsigned int n) { diff --git a/core/shared/utils/bh_common.h b/core/shared/utils/bh_common.h index eaeec5108..edb962eb1 100644 --- a/core/shared/utils/bh_common.h +++ b/core/shared/utils/bh_common.h @@ -19,6 +19,13 @@ extern "C" { bh_assert(_ret == 0); \ } while (0) +#define bh_memcpy_wa(dest, dlen, src, slen) \ + do { \ + int _ret = slen == 0 ? 0 : b_memcpy_wa(dest, dlen, src, slen); \ + (void)_ret; \ + bh_assert(_ret == 0); \ + } while (0) + #define bh_memmove_s(dest, dlen, src, slen) \ do { \ int _ret = slen == 0 ? 0 : b_memmove_s(dest, dlen, src, slen); \ @@ -43,6 +50,8 @@ extern "C" { int b_memcpy_s(void *s1, unsigned int s1max, const void *s2, unsigned int n); int +b_memcpy_wa(void *s1, unsigned int s1max, const void *s2, unsigned int n); +int b_memmove_s(void *s1, unsigned int s1max, const void *s2, unsigned int n); int b_strcat_s(char *s1, unsigned int s1max, const char *s2); diff --git a/product-mini/platforms/nuttx/wamr.mk b/product-mini/platforms/nuttx/wamr.mk index 06c40d68c..782844247 100644 --- a/product-mini/platforms/nuttx/wamr.mk +++ b/product-mini/platforms/nuttx/wamr.mk @@ -135,6 +135,12 @@ else CFLAGS += -DWASM_ENABLE_AOT=0 endif +ifeq ($(CONFIG_INTERPRETERS_WAMR_AOT_WORD_ALIGN_READ),y) +CFLAGS += -DWASM_ENABLE_WORD_ALIGN_READ=1 +else +CFLAGS += -DWASM_ENABLE_WORD_ALIGN_READ=0 +endif + ifeq ($(CONFIG_INTERPRETERS_WAMR_FAST), y) CFLAGS += -DWASM_ENABLE_FAST_INTERP=1 CFLAGS += -DWASM_ENABLE_INTERP=1