From 4f6d70bc523cc62637e2db2bc5ea164a7a455be6 Mon Sep 17 00:00:00 2001 From: dongsheng28849455 <68947925+dongsheng28849455@users.noreply.github.com> Date: Tue, 27 Feb 2024 11:17:57 +0800 Subject: [PATCH] Use indirect call in pre-checker function to avoid relocation in XIP mode (#3142) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The stack profiler `aot_func#xxx` calls the wrapped function of `aot_func_internal#xxx` by using symbol reference, but in some platform like xtensa, it’s translated into a native long call, which needs to resolve the indirect address by relocation and breaks the XIP feature which requires the eliminating of relocation. The solution is to change the symbol reference into an indirect call through the lookup table, the code will be like this: ```llvm call_wrapped_func: ; preds = %stack_bound_check_block %func_addr1 = getelementptr inbounds ptr, ptr %func_ptrs_ptr, i32 75 %func_tmp2 = load ptr, ptr %func_addr1, align 4 tail call void %func_tmp2(ptr %exec_env) ret void ``` --- core/iwasm/aot/aot_loader.c | 19 ++++++-- core/iwasm/aot/aot_runtime.c | 42 +++++++++++++---- core/iwasm/compilation/aot_emit_aot_file.c | 35 ++++++++++++++ core/iwasm/compilation/aot_llvm.c | 53 ++++++++++++++++++++-- 4 files changed, 132 insertions(+), 17 deletions(-) diff --git a/core/iwasm/aot/aot_loader.c b/core/iwasm/aot/aot_loader.c index 5803f5391..85fa1f89d 100644 --- a/core/iwasm/aot/aot_loader.c +++ b/core/iwasm/aot/aot_loader.c @@ -2500,15 +2500,26 @@ load_function_section(const uint8 *buf, const uint8 *buf_end, AOTModule *module, const uint8 *p = buf, *p_end = buf_end; uint32 i; uint64 size, text_offset; + uint32 func_count = module->func_count; - size = sizeof(void *) * (uint64)module->func_count; +#if defined(BUILD_TARGET_XTENSA) + /* + * For Xtensa XIP, real func_count is doubled, including aot_func and + * aot_func_internal, so need to multipy func_count by 2 here. + */ + if (module->is_indirect_mode) { + func_count *= 2; + } +#endif + + size = sizeof(void *) * (uint64)func_count; if (size > 0 && !(module->func_ptrs = loader_malloc(size, error_buf, error_buf_size))) { return false; } - for (i = 0; i < module->func_count; i++) { + for (i = 0; i < func_count; i++) { if (sizeof(void *) == 8) { read_uint64(p, p_end, text_offset); } @@ -2543,14 +2554,14 @@ load_function_section(const uint8 *buf, const uint8 *buf_end, AOTModule *module, module->start_function = NULL; } - size = sizeof(uint32) * (uint64)module->func_count; + size = sizeof(uint32) * (uint64)func_count; if (size > 0 && !(module->func_type_indexes = loader_malloc(size, error_buf, error_buf_size))) { return false; } - for (i = 0; i < module->func_count; i++) { + for (i = 0; i < func_count; i++) { read_uint32(p, p_end, module->func_type_indexes[i]); if (module->func_type_indexes[i] >= module->type_count) { set_error_buf(error_buf, error_buf_size, "unknown type"); diff --git a/core/iwasm/aot/aot_runtime.c b/core/iwasm/aot/aot_runtime.c index abfccc7b7..cc5d7fd00 100644 --- a/core/iwasm/aot/aot_runtime.c +++ b/core/iwasm/aot/aot_runtime.c @@ -1108,10 +1108,21 @@ init_func_ptrs(AOTModuleInstance *module_inst, AOTModule *module, { uint32 i; void **func_ptrs; - uint64 total_size = ((uint64)module->import_func_count + module->func_count) - * sizeof(void *); + uint32 func_count = module->func_count; +#if defined(BUILD_TARGET_XTENSA) + /* + * For Xtensa XIP, real func_count is doubled, including aot_func and + * aot_func_internal, so need to multipy func_count by 2 here. + */ + if (module->is_indirect_mode) { + func_count *= 2; + } +#endif - if (module->import_func_count + module->func_count == 0) + uint64 total_size = + ((uint64)module->import_func_count + func_count) * sizeof(void *); + + if (module->import_func_count + func_count == 0) return true; /* Allocate memory */ @@ -1133,8 +1144,8 @@ init_func_ptrs(AOTModuleInstance *module_inst, AOTModule *module, } /* Set defined function pointers */ - bh_memcpy_s(func_ptrs, sizeof(void *) * module->func_count, - module->func_ptrs, sizeof(void *) * module->func_count); + bh_memcpy_s(func_ptrs, sizeof(void *) * func_count, module->func_ptrs, + sizeof(void *) * func_count); return true; } @@ -1144,10 +1155,21 @@ init_func_type_indexes(AOTModuleInstance *module_inst, AOTModule *module, { uint32 i; uint32 *func_type_index; - uint64 total_size = ((uint64)module->import_func_count + module->func_count) - * sizeof(uint32); + uint32 func_count = module->func_count; +#if defined(BUILD_TARGET_XTENSA) + /* + * For Xtensa XIP, real func_count is doubled, including aot_func and + * aot_func_internal, so need to multipy func_count by 2 here. + */ + if (module->is_indirect_mode) { + func_count *= 2; + } +#endif - if (module->import_func_count + module->func_count == 0) + uint64 total_size = + ((uint64)module->import_func_count + func_count) * sizeof(uint32); + + if (module->import_func_count + func_count == 0) return true; /* Allocate memory */ @@ -1161,8 +1183,8 @@ init_func_type_indexes(AOTModuleInstance *module_inst, AOTModule *module, for (i = 0; i < module->import_func_count; i++, func_type_index++) *func_type_index = module->import_funcs[i].func_type_index; - bh_memcpy_s(func_type_index, sizeof(uint32) * module->func_count, - module->func_type_indexes, sizeof(uint32) * module->func_count); + bh_memcpy_s(func_type_index, sizeof(uint32) * func_count, + module->func_type_indexes, sizeof(uint32) * func_count); return true; } diff --git a/core/iwasm/compilation/aot_emit_aot_file.c b/core/iwasm/compilation/aot_emit_aot_file.c index b7f3a2e47..64947281a 100644 --- a/core/iwasm/compilation/aot_emit_aot_file.c +++ b/core/iwasm/compilation/aot_emit_aot_file.c @@ -179,6 +179,16 @@ is_little_endian_binary(const AOTObjectData *obj_data) return obj_data->target_info.bin_type & 1 ? false : true; } +static bool +need_call_wrapped_indirect(const AOTObjectData *obj_data) +{ + const bool need_precheck = obj_data->comp_ctx->enable_stack_bound_check + || obj_data->comp_ctx->enable_stack_estimation; + + return obj_data->comp_ctx->is_indirect_mode && need_precheck + && !strncmp(obj_data->comp_ctx->target_arch, "xtensa", 6); +} + static bool str_starts_with(const char *str, const char *prefix) { @@ -870,6 +880,10 @@ get_func_section_size(AOTCompContext *comp_ctx, AOTCompData *comp_data, /* function type indexes */ size += (uint32)sizeof(uint32) * comp_data->func_count; + /* aot_func#xxx + aot_func_internal#xxx in XIP mode for xtensa */ + if (need_call_wrapped_indirect(obj_data)) + size *= 2; + /* max_local_cell_nums */ size += (uint32)sizeof(uint32) * comp_data->func_count; @@ -2595,9 +2609,30 @@ aot_emit_func_section(uint8 *buf, uint8 *buf_end, uint32 *p_offset, EMIT_U64(func->text_offset); } + if (need_call_wrapped_indirect(obj_data)) { + /* + * Explicitly emit aot_func_internal#xxx for Xtensa XIP, therefore, + * for aot_func#xxx, func_indexes ranged from 0 ~ func_count, + * for aot_func_internal#xxxx, from func_count + 1 ~ 2 * func_count. + */ + for (i = 0, func = obj_data->funcs; i < obj_data->func_count; + i++, func++) { + if (is_32bit_binary(obj_data)) + EMIT_U32(func->text_offset_of_aot_func_internal); + else + EMIT_U64(func->text_offset_of_aot_func_internal); + } + } + for (i = 0; i < comp_data->func_count; i++) EMIT_U32(funcs[i]->func_type_index); + if (need_call_wrapped_indirect(obj_data)) { + /* func_type_index for aot_func_internal#xxxx */ + for (i = 0; i < comp_data->func_count; i++) + EMIT_U32(funcs[i]->func_type_index); + } + for (i = 0; i < comp_data->func_count; i++) { uint32 max_local_cell_num = funcs[i]->param_cell_num + funcs[i]->local_cell_num; diff --git a/core/iwasm/compilation/aot_llvm.c b/core/iwasm/compilation/aot_llvm.c index f287b9719..c8417e6d6 100644 --- a/core/iwasm/compilation/aot_llvm.c +++ b/core/iwasm/compilation/aot_llvm.c @@ -24,6 +24,8 @@ create_native_stack_bound(const AOTCompContext *comp_ctx, static bool create_native_stack_top_min(const AOTCompContext *comp_ctx, AOTFuncContext *func_ctx); +static bool +create_func_ptrs(const AOTCompContext *comp_ctx, AOTFuncContext *func_ctx); LLVMTypeRef wasm_type_to_llvm_type(const AOTCompContext *comp_ctx, @@ -537,8 +539,51 @@ aot_build_precheck_function(AOTCompContext *comp_ctx, LLVMModuleRef module, if (ret_type == VOID_TYPE) { name = ""; } - LLVMValueRef retval = - LLVMBuildCall2(b, func_type, wrapped_func, params, param_count, name); + + LLVMValueRef retval; + if (comp_ctx->is_indirect_mode + && !strncmp(comp_ctx->target_arch, "xtensa", 6)) { + /* call wrapped_func indirectly */ + if (!create_func_ptrs(comp_ctx, func_ctx)) { + goto fail; + } + + LLVMTypeRef func_ptr_type; + LLVMValueRef wrapped_func_indirect; + uint32 import_func_count = comp_ctx->comp_data->import_func_count; + uint32 func_count = comp_ctx->func_ctx_count; + + /* Check function index */ + if (func_index >= import_func_count + func_count) { + aot_set_last_error("Function index out of range."); + goto fail; + } + + /* Get function type */ + if (!(func_ptr_type = LLVMPointerType(func_type, 0))) { + aot_set_last_error("create LLVM function type failed."); + goto fail; + } + + /* + * func_index layout : + * aot_func#xxx, range from 0 ~ func_conut - 1; + * aot_func#internal#xxx, range from func_conut ~ 2 * func_conut - 1; + */ + if (!(wrapped_func_indirect = aot_get_func_from_table( + comp_ctx, func_ctx->func_ptrs, func_ptr_type, + func_index + func_count + import_func_count))) { + goto fail; + } + + /* Call the function indirectly */ + retval = LLVMBuildCall2(b, func_type, wrapped_func_indirect, params, + param_count, name); + } + else + retval = LLVMBuildCall2(b, func_type, wrapped_func, params, param_count, + name); + if (!retval) { goto fail; } @@ -734,7 +779,9 @@ aot_add_llvm_func(AOTCompContext *comp_ctx, LLVMModuleRef module, } if (need_precheck) { - if (!comp_ctx->is_jit_mode) + if (!comp_ctx->is_jit_mode + && !(comp_ctx->is_indirect_mode + && !strncmp(comp_ctx->target_arch, "xtensa", 6))) LLVMSetLinkage(func, LLVMInternalLinkage); unsigned int kind = LLVMGetEnumAttributeKindForName("noinline", strlen("noinline"));