XIP adaptation for xtensa platform (#1636)

Add macro WASM_ENABLE_WORD_ALING_READ to enable reading
1/2/4 and n bytes data from vram buffer, which requires 4-byte addr
alignment reading.

Eliminate XIP AOT relocations related to the below ones:
   i32_div_u, f32_min, f32_max, f32_ceil, f32_floor, f32_trunc, f32_rint
This commit is contained in:
dongsheng28849455 2022-10-31 17:25:24 +08:00 committed by GitHub
parent dba9e52f2f
commit e517dbc7b2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 244 additions and 11 deletions

View File

@ -70,6 +70,10 @@
#define WASM_ENABLE_AOT 0 #define WASM_ENABLE_AOT 0
#endif #endif
#ifndef WASM_ENABLE_WORD_ALIGN_READ
#define WASM_ENABLE_WORD_ALIGN_READ 0
#endif
#define AOT_MAGIC_NUMBER 0x746f6100 #define AOT_MAGIC_NUMBER 0x746f6100
#define AOT_CURRENT_VERSION 3 #define AOT_CURRENT_VERSION 3

View File

@ -66,6 +66,7 @@ static const aot_intrinsic g_intrinsic_mapping[] = {
{ "f32.const", NULL, AOT_INTRINSIC_FLAG_F32_CONST }, { "f32.const", NULL, AOT_INTRINSIC_FLAG_F32_CONST },
{ "f64.const", NULL, AOT_INTRINSIC_FLAG_F64_CONST }, { "f64.const", NULL, AOT_INTRINSIC_FLAG_F64_CONST },
{ "i64.div_s", "aot_intrinsic_i64_div_s", AOT_INTRINSIC_FLAG_I64_DIV_S}, { "i64.div_s", "aot_intrinsic_i64_div_s", AOT_INTRINSIC_FLAG_I64_DIV_S},
{ "i32.div_u", "aot_intrinsic_i32_div_u", AOT_INTRINSIC_FLAG_I32_DIV_U},
{ "i64.div_u", "aot_intrinsic_i64_div_u", AOT_INTRINSIC_FLAG_I64_DIV_U}, { "i64.div_u", "aot_intrinsic_i64_div_u", AOT_INTRINSIC_FLAG_I64_DIV_U},
{ "i64.rem_s", "aot_intrinsic_i64_rem_s", AOT_INTRINSIC_FLAG_I64_REM_S}, { "i64.rem_s", "aot_intrinsic_i64_rem_s", AOT_INTRINSIC_FLAG_I64_REM_S},
{ "i64.rem_u", "aot_intrinsic_i64_rem_u", AOT_INTRINSIC_FLAG_I64_REM_U}, { "i64.rem_u", "aot_intrinsic_i64_rem_u", AOT_INTRINSIC_FLAG_I64_REM_U},
@ -497,6 +498,12 @@ aot_intrinsic_i64_div_s(int64 l, int64 r)
return l / r; return l / r;
} }
uint32
aot_intrinsic_i32_div_u(uint32 l, uint32 r)
{
return l / r;
}
uint64 uint64
aot_intrinsic_i64_div_u(uint64 l, uint64 r) aot_intrinsic_i64_div_u(uint64 l, uint64 r)
{ {
@ -551,6 +558,12 @@ add_i64_common_intrinsics(AOTCompContext *comp_ctx)
add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_I64_REM_U); add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_I64_REM_U);
} }
static void
add_i32_common_intrinsics(AOTCompContext *comp_ctx)
{
add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_I32_DIV_U);
}
static void static void
add_f32_common_intrinsics(AOTCompContext *comp_ctx) add_f32_common_intrinsics(AOTCompContext *comp_ctx)
{ {
@ -561,6 +574,12 @@ add_f32_common_intrinsics(AOTCompContext *comp_ctx)
add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_F32_FDIV); add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_F32_FDIV);
add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_F32_SQRT); add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_F32_SQRT);
add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_F32_CMP); add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_F32_CMP);
add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_F32_MIN);
add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_F32_MAX);
add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_F32_CEIL);
add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_F32_FLOOR);
add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_F32_TRUNC);
add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_F32_RINT);
} }
static void static void
@ -667,7 +686,9 @@ aot_intrinsic_fill_capability_flags(AOTCompContext *comp_ctx)
* will cause rodata relocation * will cause rodata relocation
*/ */
add_f32_common_intrinsics(comp_ctx); add_f32_common_intrinsics(comp_ctx);
add_i32_common_intrinsics(comp_ctx);
add_f64_common_intrinsics(comp_ctx); add_f64_common_intrinsics(comp_ctx);
add_i64_common_intrinsics(comp_ctx);
add_common_float_integer_convertion(comp_ctx); add_common_float_integer_convertion(comp_ctx);
add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_F32_CONST); add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_F32_CONST);
add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_F64_CONST); add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_F64_CONST);

View File

@ -59,6 +59,7 @@ extern "C" {
#define AOT_INTRINSIC_FLAG_F32_CMP AOT_INTRINSIC_FLAG(0, 25) #define AOT_INTRINSIC_FLAG_F32_CMP AOT_INTRINSIC_FLAG(0, 25)
#define AOT_INTRINSIC_FLAG_F32_CONST AOT_INTRINSIC_FLAG(0, 26) #define AOT_INTRINSIC_FLAG_F32_CONST AOT_INTRINSIC_FLAG(0, 26)
#define AOT_INTRINSIC_FLAG_I32_CONST AOT_INTRINSIC_FLAG(0, 27) #define AOT_INTRINSIC_FLAG_I32_CONST AOT_INTRINSIC_FLAG(0, 27)
#define AOT_INTRINSIC_FLAG_I32_DIV_U AOT_INTRINSIC_FLAG(0, 28)
#define AOT_INTRINSIC_FLAG_F64_FADD AOT_INTRINSIC_FLAG(1, 0) #define AOT_INTRINSIC_FLAG_F64_FADD AOT_INTRINSIC_FLAG(1, 0)
#define AOT_INTRINSIC_FLAG_F64_FSUB AOT_INTRINSIC_FLAG(1, 1) #define AOT_INTRINSIC_FLAG_F64_FSUB AOT_INTRINSIC_FLAG(1, 1)
@ -254,6 +255,9 @@ aot_intrinsic_f64_cmp(AOTFloatCond cond, float64 lhs, float64 rhs);
int64 int64
aot_intrinsic_i64_div_s(int64 l, int64 r); aot_intrinsic_i64_div_s(int64 l, int64 r);
uint32
aot_intrinsic_i32_div_u(uint32 l, uint32 r);
uint64 uint64
aot_intrinsic_i64_div_u(uint64 l, uint64 r); aot_intrinsic_i64_div_u(uint64 l, uint64 r);

View File

@ -123,6 +123,80 @@ GET_U64_FROM_ADDR(uint32 *addr)
return u.val; return u.val;
} }
#if (WASM_ENABLE_WORD_ALIGN_READ != 0)
static inline uint8
GET_U8_FROM_ADDR(const uint8 *p)
{
uint8 res = 0;
bh_assert(p);
const uint8 *p_aligned = align_ptr(p, 4);
p_aligned = (p_aligned > p) ? p_aligned - 4 : p_aligned;
uint32 buf32 = *(const uint32 *)p_aligned;
const uint8 *pbuf = (const uint8 *)&buf32;
res = *(uint8 *)(pbuf + (p - p_aligned));
return res;
}
static inline uint16
GET_U16_FROM_ADDR(const uint8 *p)
{
uint16 res = 0;
bh_assert(p);
const uint8 *p_aligned = align_ptr(p, 4);
p_aligned = (p_aligned > p) ? p_aligned - 4 : p_aligned;
uint32 buf32 = *(const uint32 *)p_aligned;
const uint8 *pbuf = (const uint8 *)&buf32;
res = *(uint16 *)(pbuf + (p - p_aligned));
return res;
}
#define TEMPLATE_READ(p, p_end, res, type) \
do { \
if (sizeof(type) != sizeof(uint64)) \
p = (uint8 *)align_ptr(p, sizeof(type)); \
else \
/* align 4 bytes if type is uint64 */ \
p = (uint8 *)align_ptr(p, sizeof(uint32)); \
CHECK_BUF(p, p_end, sizeof(type)); \
if (sizeof(type) == sizeof(uint8)) \
res = GET_U8_FROM_ADDR(p); \
else if (sizeof(type) == sizeof(uint16)) \
res = GET_U16_FROM_ADDR(p); \
else if (sizeof(type) == sizeof(uint32)) \
res = *(type *)p; \
else \
res = (type)GET_U64_FROM_ADDR((uint32 *)p); \
if (!is_little_endian()) \
exchange_##type((uint8 *)&res); \
p += sizeof(type); \
} while (0)
#define read_byte_array(p, p_end, addr, len) \
do { \
CHECK_BUF(p, p_end, len); \
bh_memcpy_wa(addr, len, p, len); \
p += len; \
} while (0)
#define read_string(p, p_end, str) \
do { \
if (!(str = load_string((uint8 **)&p, p_end, module, \
is_load_from_file_buf, true, error_buf, \
error_buf_size))) \
goto fail; \
} while (0)
#else /* else of (WASM_ENABLE_WORD_ALIGN_READ != 0) */
#define TEMPLATE_READ(p, p_end, res, type) \ #define TEMPLATE_READ(p, p_end, res, type) \
do { \ do { \
if (sizeof(type) != sizeof(uint64)) \ if (sizeof(type) != sizeof(uint64)) \
@ -140,11 +214,6 @@ GET_U64_FROM_ADDR(uint32 *addr)
p += sizeof(type); \ p += sizeof(type); \
} while (0) } while (0)
#define read_uint8(p, p_end, res) TEMPLATE_READ(p, p_end, res, uint8)
#define read_uint16(p, p_end, res) TEMPLATE_READ(p, p_end, res, uint16)
#define read_uint32(p, p_end, res) TEMPLATE_READ(p, p_end, res, uint32)
#define read_uint64(p, p_end, res) TEMPLATE_READ(p, p_end, res, uint64)
#define read_byte_array(p, p_end, addr, len) \ #define read_byte_array(p, p_end, addr, len) \
do { \ do { \
CHECK_BUF(p, p_end, len); \ CHECK_BUF(p, p_end, len); \
@ -160,6 +229,13 @@ GET_U64_FROM_ADDR(uint32 *addr)
goto fail; \ goto fail; \
} while (0) } while (0)
#endif /* end of (WASM_ENABLE_WORD_ALIGN_READ != 0) */
#define read_uint8(p, p_end, res) TEMPLATE_READ(p, p_end, res, uint8)
#define read_uint16(p, p_end, res) TEMPLATE_READ(p, p_end, res, uint16)
#define read_uint32(p, p_end, res) TEMPLATE_READ(p, p_end, res, uint32)
#define read_uint64(p, p_end, res) TEMPLATE_READ(p, p_end, res, uint64)
/* Legal values for bin_type */ /* Legal values for bin_type */
#define BIN_TYPE_ELF32L 0 /* 32-bit little endian */ #define BIN_TYPE_ELF32L 0 /* 32-bit little endian */
#define BIN_TYPE_ELF32B 1 /* 32-bit big endian */ #define BIN_TYPE_ELF32B 1 /* 32-bit big endian */
@ -211,6 +287,9 @@ loader_malloc(uint64 size, char *error_buf, uint32 error_buf_size)
static char * static char *
const_str_set_insert(const uint8 *str, int32 len, AOTModule *module, const_str_set_insert(const uint8 *str, int32 len, AOTModule *module,
#if (WASM_ENABLE_WORD_ALIGN_READ != 0)
bool is_vram_word_align,
#endif
char *error_buf, uint32 error_buf_size) char *error_buf, uint32 error_buf_size)
{ {
HashMap *set = module->const_str_set; HashMap *set = module->const_str_set;
@ -230,8 +309,15 @@ const_str_set_insert(const uint8 *str, int32 len, AOTModule *module,
if (!(c_str = loader_malloc((uint32)len + 1, error_buf, error_buf_size))) { if (!(c_str = loader_malloc((uint32)len + 1, error_buf, error_buf_size))) {
return NULL; return NULL;
} }
#if (WASM_ENABLE_WORD_ALIGN_READ != 0)
bh_memcpy_s(c_str, (uint32)(len + 1), str, (uint32)len); if (is_vram_word_align) {
bh_memcpy_wa(c_str, (uint32)(len + 1), str, (uint32)len);
}
else
#endif
{
bh_memcpy_s(c_str, (uint32)(len + 1), str, (uint32)len);
}
c_str[len] = '\0'; c_str[len] = '\0';
if ((value = bh_hash_map_find(set, c_str))) { if ((value = bh_hash_map_find(set, c_str))) {
@ -251,7 +337,11 @@ const_str_set_insert(const uint8 *str, int32 len, AOTModule *module,
static char * static char *
load_string(uint8 **p_buf, const uint8 *buf_end, AOTModule *module, load_string(uint8 **p_buf, const uint8 *buf_end, AOTModule *module,
bool is_load_from_file_buf, char *error_buf, uint32 error_buf_size) bool is_load_from_file_buf,
#if (WASM_ENABLE_WORD_ALIGN_READ != 0)
bool is_vram_word_align,
#endif
char *error_buf, uint32 error_buf_size)
{ {
uint8 *p = *p_buf; uint8 *p = *p_buf;
const uint8 *p_end = buf_end; const uint8 *p_end = buf_end;
@ -264,6 +354,15 @@ load_string(uint8 **p_buf, const uint8 *buf_end, AOTModule *module,
if (str_len == 0) { if (str_len == 0) {
str = ""; str = "";
} }
#if (WASM_ENABLE_WORD_ALIGN_READ != 0)
else if (is_vram_word_align) {
if (!(str = const_str_set_insert((uint8 *)p, str_len, module,
is_vram_word_align, error_buf,
error_buf_size))) {
goto fail;
}
}
#endif
else if (p[str_len - 1] == '\0') { else if (p[str_len - 1] == '\0') {
/* The string is terminated with '\0', use it directly */ /* The string is terminated with '\0', use it directly */
str = (char *)p; str = (char *)p;
@ -280,8 +379,11 @@ load_string(uint8 **p_buf, const uint8 *buf_end, AOTModule *module,
/* Load from sections, the file buffer cannot be reffered to /* Load from sections, the file buffer cannot be reffered to
after loading, we must create another string and insert it after loading, we must create another string and insert it
into const string set */ into const string set */
if (!(str = const_str_set_insert((uint8 *)p, str_len, module, error_buf, if (!(str = const_str_set_insert((uint8 *)p, str_len, module,
error_buf_size))) { #if (WASM_ENABLE_WORD_ALIGN_READ != 0)
is_vram_word_align,
#endif
error_buf, error_buf_size))) {
goto fail; goto fail;
} }
} }

View File

@ -110,6 +110,7 @@ typedef struct {
REG_SYM(aot_intrinsic_i64_div_u), \ REG_SYM(aot_intrinsic_i64_div_u), \
REG_SYM(aot_intrinsic_i64_rem_s), \ REG_SYM(aot_intrinsic_i64_rem_s), \
REG_SYM(aot_intrinsic_i64_rem_u), \ REG_SYM(aot_intrinsic_i64_rem_u), \
REG_SYM(aot_intrinsic_i32_div_u), \
#define REG_COMMON_SYMBOLS \ #define REG_COMMON_SYMBOLS \
REG_SYM(aot_set_exception_with_id), \ REG_SYM(aot_set_exception_with_id), \

View File

@ -506,6 +506,10 @@ wasm_runtime_full_init(RuntimeInitArgs *init_args)
PackageType PackageType
get_package_type(const uint8 *buf, uint32 size) get_package_type(const uint8 *buf, uint32 size)
{ {
#if (WASM_ENABLE_WORD_ALIGN_READ != 0)
uint32 buf32 = *(uint32 *)buf;
buf = (const uint8 *)&buf32;
#endif
if (buf && size >= 4) { if (buf && size >= 4) {
if (buf[0] == '\0' && buf[1] == 'a' && buf[2] == 's' && buf[3] == 'm') if (buf[0] == '\0' && buf[1] == 'a' && buf[2] == 's' && buf[3] == 'm')
return Wasm_Module_Bytecode; return Wasm_Module_Bytecode;

View File

@ -565,7 +565,22 @@ compile_int_div(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
PUSH_INT(res); PUSH_INT(res);
return true; return true;
case INT_DIV_U: case INT_DIV_U:
LLVM_BUILD_OP(UDiv, left, right, res, "div_u", false); if (comp_ctx->disable_llvm_intrinsics && is_i32
&& aot_intrinsic_check_capability(comp_ctx, "i32.div_u")) {
res = aot_call_llvm_intrinsic(comp_ctx, func_ctx,
"i32.div_u", param_types[0],
param_types, 2, left, right);
}
else if (comp_ctx->disable_llvm_intrinsics && !is_i32
&& aot_intrinsic_check_capability(comp_ctx,
"i64.div_u")) {
res = aot_call_llvm_intrinsic(comp_ctx, func_ctx,
"i64.div_u", param_types[0],
param_types, 2, left, right);
}
else {
LLVM_BUILD_OP(UDiv, left, right, res, "div_u", false);
}
PUSH_INT(res); PUSH_INT(res);
return true; return true;
case INT_REM_S: case INT_REM_S:

View File

@ -5,6 +5,73 @@
#include "bh_common.h" #include "bh_common.h"
static char *
align_ptr(char *src, unsigned int b)
{
uintptr_t v = (uintptr_t)src;
uintptr_t m = b - 1;
return (char *)((v + m) & ~m);
}
/*
Memory copy, with word alignment
*/
int
b_memcpy_wa(void *s1, unsigned int s1max, const void *s2, unsigned int n)
{
char *dest = (char *)s1;
char *src = (char *)s2;
char *pa = align_ptr(src, 4);
char *pb = align_ptr((src + n), 4);
unsigned int buff;
const char *p_byte_read;
unsigned int *p;
char *ps;
if (pa > src) {
pa -= 4;
}
for (p = (unsigned int *)pa; p < (unsigned int *)pb; p++) {
buff = *(p);
p_byte_read = ((char *)&buff);
/* read leading word */
if ((char *)p <= src) {
for (ps = src; ps < ((char *)p + 4); ps++) {
if (ps >= src + n) {
break;
}
p_byte_read = ((char *)&buff) + (ps - (char *)p);
*dest++ = *p_byte_read;
}
}
/* read trailing word */
else if ((char *)p >= pb - 4) {
for (ps = (char *)p; ps < src + n; ps++) {
*dest++ = *p_byte_read++;
}
}
/* read meaning word(s) */
else {
if ((char *)p + 4 >= src + n) {
for (ps = (char *)p; ps < src + n; ps++) {
*dest++ = *p_byte_read++;
}
}
else {
*(unsigned int *)dest = buff;
dest += 4;
}
}
}
return 0;
}
int int
b_memcpy_s(void *s1, unsigned int s1max, const void *s2, unsigned int n) b_memcpy_s(void *s1, unsigned int s1max, const void *s2, unsigned int n)
{ {

View File

@ -19,6 +19,13 @@ extern "C" {
bh_assert(_ret == 0); \ bh_assert(_ret == 0); \
} while (0) } while (0)
#define bh_memcpy_wa(dest, dlen, src, slen) \
do { \
int _ret = slen == 0 ? 0 : b_memcpy_wa(dest, dlen, src, slen); \
(void)_ret; \
bh_assert(_ret == 0); \
} while (0)
#define bh_memmove_s(dest, dlen, src, slen) \ #define bh_memmove_s(dest, dlen, src, slen) \
do { \ do { \
int _ret = slen == 0 ? 0 : b_memmove_s(dest, dlen, src, slen); \ int _ret = slen == 0 ? 0 : b_memmove_s(dest, dlen, src, slen); \
@ -43,6 +50,8 @@ extern "C" {
int int
b_memcpy_s(void *s1, unsigned int s1max, const void *s2, unsigned int n); b_memcpy_s(void *s1, unsigned int s1max, const void *s2, unsigned int n);
int int
b_memcpy_wa(void *s1, unsigned int s1max, const void *s2, unsigned int n);
int
b_memmove_s(void *s1, unsigned int s1max, const void *s2, unsigned int n); b_memmove_s(void *s1, unsigned int s1max, const void *s2, unsigned int n);
int int
b_strcat_s(char *s1, unsigned int s1max, const char *s2); b_strcat_s(char *s1, unsigned int s1max, const char *s2);

View File

@ -135,6 +135,12 @@ else
CFLAGS += -DWASM_ENABLE_AOT=0 CFLAGS += -DWASM_ENABLE_AOT=0
endif endif
ifeq ($(CONFIG_INTERPRETERS_WAMR_AOT_WORD_ALIGN_READ),y)
CFLAGS += -DWASM_ENABLE_WORD_ALIGN_READ=1
else
CFLAGS += -DWASM_ENABLE_WORD_ALIGN_READ=0
endif
ifeq ($(CONFIG_INTERPRETERS_WAMR_FAST), y) ifeq ($(CONFIG_INTERPRETERS_WAMR_FAST), y)
CFLAGS += -DWASM_ENABLE_FAST_INTERP=1 CFLAGS += -DWASM_ENABLE_FAST_INTERP=1
CFLAGS += -DWASM_ENABLE_INTERP=1 CFLAGS += -DWASM_ENABLE_INTERP=1