Use indirect call in pre-checker function to avoid relocation in XIP mode (#3142)

The stack profiler `aot_func#xxx` calls the wrapped function of `aot_func_internal#xxx`
by using symbol reference,  but in some platform like xtensa, it’s translated into a native
long call, which needs to resolve the indirect address by relocation and breaks the XIP
feature which requires the eliminating of relocation.

The solution is to change the symbol reference into an indirect call through the lookup
table, the code will be like this:
```llvm
call_wrapped_func:                                ; preds = %stack_bound_check_block
  %func_addr1 = getelementptr inbounds ptr, ptr %func_ptrs_ptr, i32 75
  %func_tmp2 = load ptr, ptr %func_addr1, align 4
  tail call void %func_tmp2(ptr %exec_env)
  ret void
```
This commit is contained in:
dongsheng28849455 2024-02-27 11:17:57 +08:00 committed by GitHub
parent 2349df1271
commit 4f6d70bc52
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 132 additions and 17 deletions

View File

@ -2500,15 +2500,26 @@ load_function_section(const uint8 *buf, const uint8 *buf_end, AOTModule *module,
const uint8 *p = buf, *p_end = buf_end;
uint32 i;
uint64 size, text_offset;
uint32 func_count = module->func_count;
size = sizeof(void *) * (uint64)module->func_count;
#if defined(BUILD_TARGET_XTENSA)
/*
* For Xtensa XIP, real func_count is doubled, including aot_func and
* aot_func_internal, so need to multipy func_count by 2 here.
*/
if (module->is_indirect_mode) {
func_count *= 2;
}
#endif
size = sizeof(void *) * (uint64)func_count;
if (size > 0
&& !(module->func_ptrs =
loader_malloc(size, error_buf, error_buf_size))) {
return false;
}
for (i = 0; i < module->func_count; i++) {
for (i = 0; i < func_count; i++) {
if (sizeof(void *) == 8) {
read_uint64(p, p_end, text_offset);
}
@ -2543,14 +2554,14 @@ load_function_section(const uint8 *buf, const uint8 *buf_end, AOTModule *module,
module->start_function = NULL;
}
size = sizeof(uint32) * (uint64)module->func_count;
size = sizeof(uint32) * (uint64)func_count;
if (size > 0
&& !(module->func_type_indexes =
loader_malloc(size, error_buf, error_buf_size))) {
return false;
}
for (i = 0; i < module->func_count; i++) {
for (i = 0; i < func_count; i++) {
read_uint32(p, p_end, module->func_type_indexes[i]);
if (module->func_type_indexes[i] >= module->type_count) {
set_error_buf(error_buf, error_buf_size, "unknown type");

View File

@ -1108,10 +1108,21 @@ init_func_ptrs(AOTModuleInstance *module_inst, AOTModule *module,
{
uint32 i;
void **func_ptrs;
uint64 total_size = ((uint64)module->import_func_count + module->func_count)
* sizeof(void *);
uint32 func_count = module->func_count;
#if defined(BUILD_TARGET_XTENSA)
/*
* For Xtensa XIP, real func_count is doubled, including aot_func and
* aot_func_internal, so need to multipy func_count by 2 here.
*/
if (module->is_indirect_mode) {
func_count *= 2;
}
#endif
if (module->import_func_count + module->func_count == 0)
uint64 total_size =
((uint64)module->import_func_count + func_count) * sizeof(void *);
if (module->import_func_count + func_count == 0)
return true;
/* Allocate memory */
@ -1133,8 +1144,8 @@ init_func_ptrs(AOTModuleInstance *module_inst, AOTModule *module,
}
/* Set defined function pointers */
bh_memcpy_s(func_ptrs, sizeof(void *) * module->func_count,
module->func_ptrs, sizeof(void *) * module->func_count);
bh_memcpy_s(func_ptrs, sizeof(void *) * func_count, module->func_ptrs,
sizeof(void *) * func_count);
return true;
}
@ -1144,10 +1155,21 @@ init_func_type_indexes(AOTModuleInstance *module_inst, AOTModule *module,
{
uint32 i;
uint32 *func_type_index;
uint64 total_size = ((uint64)module->import_func_count + module->func_count)
* sizeof(uint32);
uint32 func_count = module->func_count;
#if defined(BUILD_TARGET_XTENSA)
/*
* For Xtensa XIP, real func_count is doubled, including aot_func and
* aot_func_internal, so need to multipy func_count by 2 here.
*/
if (module->is_indirect_mode) {
func_count *= 2;
}
#endif
if (module->import_func_count + module->func_count == 0)
uint64 total_size =
((uint64)module->import_func_count + func_count) * sizeof(uint32);
if (module->import_func_count + func_count == 0)
return true;
/* Allocate memory */
@ -1161,8 +1183,8 @@ init_func_type_indexes(AOTModuleInstance *module_inst, AOTModule *module,
for (i = 0; i < module->import_func_count; i++, func_type_index++)
*func_type_index = module->import_funcs[i].func_type_index;
bh_memcpy_s(func_type_index, sizeof(uint32) * module->func_count,
module->func_type_indexes, sizeof(uint32) * module->func_count);
bh_memcpy_s(func_type_index, sizeof(uint32) * func_count,
module->func_type_indexes, sizeof(uint32) * func_count);
return true;
}

View File

@ -179,6 +179,16 @@ is_little_endian_binary(const AOTObjectData *obj_data)
return obj_data->target_info.bin_type & 1 ? false : true;
}
static bool
need_call_wrapped_indirect(const AOTObjectData *obj_data)
{
const bool need_precheck = obj_data->comp_ctx->enable_stack_bound_check
|| obj_data->comp_ctx->enable_stack_estimation;
return obj_data->comp_ctx->is_indirect_mode && need_precheck
&& !strncmp(obj_data->comp_ctx->target_arch, "xtensa", 6);
}
static bool
str_starts_with(const char *str, const char *prefix)
{
@ -870,6 +880,10 @@ get_func_section_size(AOTCompContext *comp_ctx, AOTCompData *comp_data,
/* function type indexes */
size += (uint32)sizeof(uint32) * comp_data->func_count;
/* aot_func#xxx + aot_func_internal#xxx in XIP mode for xtensa */
if (need_call_wrapped_indirect(obj_data))
size *= 2;
/* max_local_cell_nums */
size += (uint32)sizeof(uint32) * comp_data->func_count;
@ -2595,9 +2609,30 @@ aot_emit_func_section(uint8 *buf, uint8 *buf_end, uint32 *p_offset,
EMIT_U64(func->text_offset);
}
if (need_call_wrapped_indirect(obj_data)) {
/*
* Explicitly emit aot_func_internal#xxx for Xtensa XIP, therefore,
* for aot_func#xxx, func_indexes ranged from 0 ~ func_count,
* for aot_func_internal#xxxx, from func_count + 1 ~ 2 * func_count.
*/
for (i = 0, func = obj_data->funcs; i < obj_data->func_count;
i++, func++) {
if (is_32bit_binary(obj_data))
EMIT_U32(func->text_offset_of_aot_func_internal);
else
EMIT_U64(func->text_offset_of_aot_func_internal);
}
}
for (i = 0; i < comp_data->func_count; i++)
EMIT_U32(funcs[i]->func_type_index);
if (need_call_wrapped_indirect(obj_data)) {
/* func_type_index for aot_func_internal#xxxx */
for (i = 0; i < comp_data->func_count; i++)
EMIT_U32(funcs[i]->func_type_index);
}
for (i = 0; i < comp_data->func_count; i++) {
uint32 max_local_cell_num =
funcs[i]->param_cell_num + funcs[i]->local_cell_num;

View File

@ -24,6 +24,8 @@ create_native_stack_bound(const AOTCompContext *comp_ctx,
static bool
create_native_stack_top_min(const AOTCompContext *comp_ctx,
AOTFuncContext *func_ctx);
static bool
create_func_ptrs(const AOTCompContext *comp_ctx, AOTFuncContext *func_ctx);
LLVMTypeRef
wasm_type_to_llvm_type(const AOTCompContext *comp_ctx,
@ -537,8 +539,51 @@ aot_build_precheck_function(AOTCompContext *comp_ctx, LLVMModuleRef module,
if (ret_type == VOID_TYPE) {
name = "";
}
LLVMValueRef retval =
LLVMBuildCall2(b, func_type, wrapped_func, params, param_count, name);
LLVMValueRef retval;
if (comp_ctx->is_indirect_mode
&& !strncmp(comp_ctx->target_arch, "xtensa", 6)) {
/* call wrapped_func indirectly */
if (!create_func_ptrs(comp_ctx, func_ctx)) {
goto fail;
}
LLVMTypeRef func_ptr_type;
LLVMValueRef wrapped_func_indirect;
uint32 import_func_count = comp_ctx->comp_data->import_func_count;
uint32 func_count = comp_ctx->func_ctx_count;
/* Check function index */
if (func_index >= import_func_count + func_count) {
aot_set_last_error("Function index out of range.");
goto fail;
}
/* Get function type */
if (!(func_ptr_type = LLVMPointerType(func_type, 0))) {
aot_set_last_error("create LLVM function type failed.");
goto fail;
}
/*
* func_index layout :
* aot_func#xxx, range from 0 ~ func_conut - 1;
* aot_func#internal#xxx, range from func_conut ~ 2 * func_conut - 1;
*/
if (!(wrapped_func_indirect = aot_get_func_from_table(
comp_ctx, func_ctx->func_ptrs, func_ptr_type,
func_index + func_count + import_func_count))) {
goto fail;
}
/* Call the function indirectly */
retval = LLVMBuildCall2(b, func_type, wrapped_func_indirect, params,
param_count, name);
}
else
retval = LLVMBuildCall2(b, func_type, wrapped_func, params, param_count,
name);
if (!retval) {
goto fail;
}
@ -734,7 +779,9 @@ aot_add_llvm_func(AOTCompContext *comp_ctx, LLVMModuleRef module,
}
if (need_precheck) {
if (!comp_ctx->is_jit_mode)
if (!comp_ctx->is_jit_mode
&& !(comp_ctx->is_indirect_mode
&& !strncmp(comp_ctx->target_arch, "xtensa", 6)))
LLVMSetLinkage(func, LLVMInternalLinkage);
unsigned int kind =
LLVMGetEnumAttributeKindForName("noinline", strlen("noinline"));