AOT/JIT native stack bound check improvement (#2244)

Move the native stack overflow check from the caller to the callee because the
former doesn't work for call_indirect and imported functions.

Make the stack usage estimation more accurate. Instead of making a guess from
the number of wasm locals in the function, use the LLVM's idea of the stack size
of each MachineFunction. The former is inaccurate because a) it doesn't reflect
optimization passes, and b) wasm locals are not the only reason to use stack.

To use the post-compilation stack usage information without requiring 2-pass
compilation or machine-code imm rewriting, introduce a global array to store
stack consumption of each functions:
For JIT, use a custom IRCompiler with an extra pass to fill the array.
For AOT, use `clang -fstack-usage` equivalent because we support external llc.

Re-implement function call stack usage estimation to reflect the real calling
conventions better. (aot_estimate_stack_usage_for_function_call)

Re-implement stack estimation logic (--enable-memory-profiling) based on the new
machinery.

Discussions: #2105.
This commit is contained in:
YAMAMOTO Takashi 2023-06-22 08:27:07 +09:00 committed by GitHub
parent 8797c751a5
commit cd7941cc39
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 1348 additions and 252 deletions

View File

@ -19,6 +19,15 @@ extern "C" {
#define AOT_FUNC_PREFIX "aot_func#"
#endif
#ifndef AOT_FUNC_INTERNAL_PREFIX
#define AOT_FUNC_INTERNAL_PREFIX "aot_func_internal#"
#endif
#ifndef AOT_STACK_SIZES_NAME
#define AOT_STACK_SIZES_NAME "aot_stack_sizes"
#endif
extern const char *aot_stack_sizes_name;
typedef InitializerExpression AOTInitExpr;
typedef WASMType AOTFuncType;
typedef WASMExport AOTExport;

View File

@ -2761,6 +2761,16 @@ aot_compile_wasm(AOTCompContext *comp_ctx)
aot_handle_llvm_errmsg("failed to addIRModule", err);
return false;
}
if (comp_ctx->stack_sizes != NULL) {
LLVMOrcJITTargetAddress addr;
if ((err = LLVMOrcLLLazyJITLookup(comp_ctx->orc_jit, &addr,
aot_stack_sizes_name))) {
aot_handle_llvm_errmsg("failed to look up stack_sizes", err);
return false;
}
comp_ctx->jit_stack_sizes = (uint32 *)addr;
}
}
return true;
@ -2815,6 +2825,55 @@ aot_emit_llvm_file(AOTCompContext *comp_ctx, const char *file_name)
return true;
}
static bool
aot_move_file(const char *dest, const char *src)
{
FILE *dfp = fopen(dest, "w");
FILE *sfp = fopen(src, "r");
size_t rsz;
char buf[128];
bool success = false;
if (dfp == NULL || sfp == NULL) {
LOG_DEBUG("open error %s %s", dest, src);
goto fail;
}
do {
rsz = fread(buf, 1, sizeof(buf), sfp);
if (rsz > 0) {
size_t wsz = fwrite(buf, 1, rsz, dfp);
if (wsz < rsz) {
LOG_DEBUG("write error");
goto fail;
}
}
if (rsz < sizeof(buf)) {
if (ferror(sfp)) {
LOG_DEBUG("read error");
goto fail;
}
}
} while (rsz > 0);
success = true;
fail:
if (dfp != NULL) {
if (fclose(dfp)) {
LOG_DEBUG("close error");
success = false;
}
if (!success) {
(void)unlink(dest);
}
}
if (sfp != NULL) {
(void)fclose(sfp);
}
if (success) {
(void)unlink(src);
}
return success;
}
bool
aot_emit_object_file(AOTCompContext *comp_ctx, char *file_name)
{
@ -2830,7 +2889,25 @@ aot_emit_object_file(AOTCompContext *comp_ctx, char *file_name)
int ret;
if (comp_ctx->external_llc_compiler) {
const char *stack_usage_flag = "";
char bc_file_name[64];
char su_file_name[65]; /* See the comment below */
if (comp_ctx->stack_usage_file != NULL) {
/*
* Note: we know the caller uses 64 byte buffer for
* file_name. It will get 1 byte longer because we
* replace ".o" with ".su".
*/
size_t len = strlen(file_name);
bh_assert(len + 1 <= sizeof(su_file_name));
bh_assert(len > 3);
bh_assert(file_name[len - 2] == '.');
bh_assert(file_name[len - 1] == 'o');
snprintf(su_file_name, sizeof(su_file_name), "%.*s.su",
(int)(len - 2), file_name);
stack_usage_flag = " -fstack-usage";
}
if (!aot_generate_tempfile_name("wamrc-bc", "bc", bc_file_name,
sizeof(bc_file_name))) {
@ -2842,8 +2919,8 @@ aot_emit_object_file(AOTCompContext *comp_ctx, char *file_name)
return false;
}
snprintf(cmd, sizeof(cmd), "%s %s -o %s %s",
comp_ctx->external_llc_compiler,
snprintf(cmd, sizeof(cmd), "%s%s %s -o %s %s",
comp_ctx->external_llc_compiler, stack_usage_flag,
comp_ctx->llc_compiler_flags ? comp_ctx->llc_compiler_flags
: "-O3 -c",
file_name, bc_file_name);
@ -2858,6 +2935,22 @@ aot_emit_object_file(AOTCompContext *comp_ctx, char *file_name)
"with external LLC compiler.");
return false;
}
if (comp_ctx->stack_usage_file != NULL) {
/*
* move the temporary .su file to the specified location.
*
* Note: the former is automatimally inferred from the output
* filename (file_name here) by clang.
*
* Note: the latter might be user-specified.
* (wamrc --stack-usage=<file>)
*/
if (!aot_move_file(comp_ctx->stack_usage_file, su_file_name)) {
aot_set_last_error("failed to move su file.");
(void)unlink(su_file_name);
return false;
}
}
}
else if (comp_ctx->external_asm_compiler) {
char asm_file_name[64];

View File

@ -140,6 +140,10 @@ typedef struct AOTObjectData {
AOTSymbolList symbol_list;
AOTRelocationGroup *relocation_groups;
uint32 relocation_group_count;
const char *stack_sizes_section_name;
uint32 stack_sizes_offset;
uint32 *stack_sizes;
} AOTObjectData;
#if 0
@ -1634,7 +1638,31 @@ aot_emit_object_data_section_info(uint8 *buf, uint8 *buf_end, uint32 *p_offset,
EMIT_STR(data_section->name);
offset = align_uint(offset, 4);
EMIT_U32(data_section->size);
EMIT_BUF(data_section->data, data_section->size);
if (obj_data->stack_sizes_section_name != NULL
&& !strcmp(obj_data->stack_sizes_section_name,
data_section->name)) {
uint32 ss_offset = obj_data->stack_sizes_offset;
uint32 ss_size =
obj_data->func_count * sizeof(*obj_data->stack_sizes);
LOG_VERBOSE("Replacing stack_sizes in %s section, offset %" PRIu32
", size %" PRIu32,
obj_data->stack_sizes_section_name, ss_offset, ss_size);
bh_assert(ss_offset + ss_size <= data_section->size);
/* 0 .. ss_offset */
if (ss_offset > 0) {
EMIT_BUF(data_section->data, ss_offset);
}
/* ss_offset .. ss_offset+ss_size */
EMIT_BUF(obj_data->stack_sizes, ss_size);
/* ss_offset+ss_size .. data_section->size */
if (data_section->size > ss_offset + ss_size) {
EMIT_BUF(data_section->data + ss_offset + ss_size,
data_section->size - (ss_offset + ss_size));
}
}
else {
EMIT_BUF(data_section->data, data_section->size);
}
}
if (offset - *p_offset
@ -2418,6 +2446,293 @@ aot_resolve_object_data_sections(AOTObjectData *obj_data)
return true;
}
static bool
read_stack_usage_file(const AOTCompContext *comp_ctx, const char *filename,
uint32 *sizes, uint32 count)
{
FILE *fp = NULL;
if (filename == NULL) {
aot_set_last_error("no stack usage file is specified.");
return false;
}
fp = fopen(filename, "r");
if (fp == NULL) {
LOG_ERROR("failed to open stack usage file: %s", filename);
goto fail;
}
/*
* the file consists of lines like:
*
* WASM Module:aot_func#9 72 static
*/
const char *aot_func_prefix = AOT_FUNC_PREFIX;
const char *aot_func_internal_prefix = AOT_FUNC_INTERNAL_PREFIX;
uint32 precheck_found = 0;
uint32 precheck_stack_size_max = 0;
uint32 precheck_stack_size_min = UINT32_MAX;
uint32 found = 0;
while (true) {
const char *prefix;
char line[100];
char *cp = fgets(line, sizeof(line), fp);
char *fn;
char *colon;
uintmax_t func_idx;
uintmax_t sz;
int ret;
if (cp == NULL) {
break;
}
/*
* Note: strrchr (not strchr) because a module name can contain
* colons.
*/
colon = strrchr(cp, ':');
if (colon == NULL) {
goto fail;
}
fn = strstr(colon, aot_func_prefix);
if (fn != NULL) {
prefix = aot_func_prefix;
}
else {
fn = strstr(colon, aot_func_internal_prefix);
if (fn == NULL) {
LOG_ERROR("failed to parse stack usage line: %s", cp);
goto fail;
}
prefix = aot_func_internal_prefix;
}
ret = sscanf(fn + strlen(prefix), "%ju %ju static", &func_idx, &sz);
if (ret != 2) {
goto fail;
}
if (sz > UINT32_MAX) {
goto fail;
}
if (func_idx > UINT32_MAX) {
goto fail;
}
if (func_idx >= count) {
goto fail;
}
if (prefix == aot_func_prefix) {
if (sz < precheck_stack_size_min) {
precheck_stack_size_min = sz;
}
if (sz > precheck_stack_size_max) {
precheck_stack_size_max = sz;
}
precheck_found++;
continue;
}
sizes[func_idx] = sz;
found++;
}
fclose(fp);
if (precheck_found != count) {
LOG_ERROR("%" PRIu32 " precheck entries found while %" PRIu32
" entries are expected",
precheck_found, count);
return false;
}
if (found != count) {
/*
* LLVM seems to eliminate calls to an empty function
* (and eliminate the function) even if it's marked noinline.
*/
LOG_VERBOSE("%" PRIu32 " entries found while %" PRIu32
" entries are expected. Maybe LLVM optimization eliminated "
"some functions.",
found, count);
}
if (precheck_stack_size_min != precheck_stack_size_max) {
/*
* Note: this is too strict.
*
* actually, the stack consumption of the precheck functions
* can depend on the type of them.
* that is, depending on various factors including
* calling conventions and compilers, a function with many
* parameters can consume more stack, even if it merely does
* a tail-call to another function.
*/
bool musttail = aot_target_precheck_can_use_musttail(comp_ctx);
if (musttail) {
LOG_WARNING(
"precheck functions use variable amount of stack. (%" PRIu32
" - %" PRIu32 ")",
precheck_stack_size_min, precheck_stack_size_max);
}
else {
LOG_VERBOSE("precheck functions use %" PRIu32 " - %" PRIu32
" bytes of stack.",
precheck_stack_size_min, precheck_stack_size_max);
}
}
else {
LOG_VERBOSE("precheck functions use %" PRIu32 " bytes of stack.",
precheck_stack_size_max);
}
if (precheck_stack_size_max >= 1024) {
LOG_WARNING("precheck functions themselves consume relatively large "
"amount of stack (%" PRIu32
"). Please ensure the runtime has large enough "
"WASM_STACK_GUARD_SIZE.",
precheck_stack_size_max);
}
return true;
fail:
if (fp != NULL)
fclose(fp);
aot_set_last_error("failed to read stack usage file.");
return false;
}
static bool
aot_resolve_stack_sizes(AOTCompContext *comp_ctx, AOTObjectData *obj_data)
{
LLVMSectionIteratorRef sec_itr = NULL;
LLVMSymbolIteratorRef sym_itr;
const char *name;
if (!(sym_itr = LLVMObjectFileCopySymbolIterator(obj_data->binary))) {
aot_set_last_error("llvm get symbol iterator failed.");
return false;
}
while (!LLVMObjectFileIsSymbolIteratorAtEnd(obj_data->binary, sym_itr)) {
if ((name = LLVMGetSymbolName(sym_itr))
&& !strcmp(name, aot_stack_sizes_name)) {
uint64 sz = LLVMGetSymbolSize(sym_itr);
if (sz != sizeof(uint32) * obj_data->func_count) {
aot_set_last_error("stack_sizes had unexpected size.");
goto fail;
}
uint64 addr = LLVMGetSymbolAddress(sym_itr);
if (!(sec_itr =
LLVMObjectFileCopySectionIterator(obj_data->binary))) {
aot_set_last_error("llvm get section iterator failed.");
goto fail;
}
LLVMMoveToContainingSection(sec_itr, sym_itr);
const char *sec_name = LLVMGetSectionName(sec_itr);
LOG_VERBOSE("stack_sizes found in section %s offset %" PRIu64 ".",
sec_name, addr);
/*
* Note: We can't always modify stack_sizes in-place.
* Eg. When WAMRC_LLC_COMPILER is used, LLVM sometimes uses
* read-only mmap of the temporary file to back
* LLVMGetSectionContents.
*/
const uint32 *ro_stack_sizes =
(const uint32 *)(LLVMGetSectionContents(sec_itr) + addr);
uint32 i;
for (i = 0; i < obj_data->func_count; i++) {
/* Note: -1 == AOT_NEG_ONE from aot_create_stack_sizes */
if (ro_stack_sizes[i] != (uint32)-1) {
aot_set_last_error("unexpected data in stack_sizes.");
goto fail;
}
}
if (addr > UINT32_MAX) {
aot_set_last_error("too large stack_sizes offset.");
goto fail;
}
/*
* Record section/offset and construct a copy of stack_sizes.
* aot_emit_object_data_section_info will emit this copy.
*/
obj_data->stack_sizes_section_name = sec_name;
obj_data->stack_sizes_offset = addr;
obj_data->stack_sizes = wasm_runtime_malloc(
obj_data->func_count * sizeof(*obj_data->stack_sizes));
if (obj_data->stack_sizes == NULL) {
aot_set_last_error("failed to allocate memory.");
goto fail;
}
uint32 *stack_sizes = obj_data->stack_sizes;
for (i = 0; i < obj_data->func_count; i++) {
stack_sizes[i] = (uint32)-1;
}
if (!read_stack_usage_file(comp_ctx, comp_ctx->stack_usage_file,
stack_sizes, obj_data->func_count)) {
goto fail;
}
for (i = 0; i < obj_data->func_count; i++) {
const AOTFuncContext *func_ctx = comp_ctx->func_ctxes[i];
bool musttail = aot_target_precheck_can_use_musttail(comp_ctx);
unsigned int stack_consumption_to_call_wrapped_func =
musttail ? 0
: aot_estimate_stack_usage_for_function_call(
comp_ctx, func_ctx->aot_func->func_type);
/*
* LLVM seems to eliminate calls to an empty function
* (and eliminate the function) even if it's marked noinline.
*
* Note: -1 == AOT_NEG_ONE from aot_create_stack_sizes
*/
if (stack_sizes[i] == (uint32)-1) {
if (func_ctx->stack_consumption_for_func_call != 0) {
/*
* This happens if a function calling another
* function has been optimized out.
*
* for example,
*
* (func $func
* (local i32)
* local.get 0
* if
* call $another
* end
* )
*/
LOG_VERBOSE("AOT func#%" PRIu32
" had call(s) but eliminated?",
i);
}
else {
LOG_VERBOSE("AOT func#%" PRIu32 " eliminated?", i);
}
stack_sizes[i] = 0;
}
else {
LOG_VERBOSE("AOT func#%" PRIu32 " stack_size %u + %" PRIu32
" + %u",
i, stack_consumption_to_call_wrapped_func,
stack_sizes[i],
func_ctx->stack_consumption_for_func_call);
if (UINT32_MAX - stack_sizes[i]
< func_ctx->stack_consumption_for_func_call) {
aot_set_last_error("stack size overflow.");
goto fail;
}
stack_sizes[i] += func_ctx->stack_consumption_for_func_call;
if (UINT32_MAX - stack_sizes[i]
< stack_consumption_to_call_wrapped_func) {
aot_set_last_error("stack size overflow.");
goto fail;
}
stack_sizes[i] += stack_consumption_to_call_wrapped_func;
}
}
LLVMDisposeSectionIterator(sec_itr);
LLVMDisposeSymbolIterator(sym_itr);
return true;
}
LLVMMoveToNextSymbol(sym_itr);
}
aot_set_last_error("stack_sizes not found.");
fail:
if (sec_itr)
LLVMDisposeSectionIterator(sec_itr);
LLVMDisposeSymbolIterator(sym_itr);
return false;
}
static bool
aot_resolve_functions(AOTCompContext *comp_ctx, AOTObjectData *obj_data)
{
@ -2429,6 +2744,10 @@ aot_resolve_functions(AOTCompContext *comp_ctx, AOTObjectData *obj_data)
/* allocate memory for aot function */
obj_data->func_count = comp_ctx->comp_data->func_count;
if (obj_data->func_count) {
if ((comp_ctx->enable_stack_bound_check
|| comp_ctx->enable_stack_estimation)
&& !aot_resolve_stack_sizes(comp_ctx, obj_data))
return false;
total_size = (uint32)sizeof(AOTObjectFunc) * obj_data->func_count;
if (!(obj_data->funcs = wasm_runtime_malloc(total_size))) {
aot_set_last_error("allocate memory for functions failed.");
@ -2922,6 +3241,8 @@ aot_obj_data_destroy(AOTObjectData *obj_data)
obj_data->relocation_group_count);
if (obj_data->symbol_list.len)
destroy_relocation_symbol_list(&obj_data->symbol_list);
if (obj_data->stack_sizes)
wasm_runtime_free(obj_data->stack_sizes);
wasm_runtime_free(obj_data);
}

View File

@ -366,143 +366,6 @@ fail:
#endif /* end of (WASM_ENABLE_DUMP_CALL_STACK != 0) \
|| (WASM_ENABLE_PERF_PROFILING != 0) */
static bool
record_stack_usage(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
uint32 callee_cell_num)
{
LLVMBasicBlockRef block_curr = LLVMGetInsertBlock(comp_ctx->builder);
LLVMBasicBlockRef block_update;
LLVMBasicBlockRef block_after_update;
LLVMValueRef callee_local_size, new_sp, cmp;
LLVMValueRef native_stack_top_min;
LLVMTypeRef ptrdiff_type;
if (comp_ctx->pointer_size == sizeof(uint64_t)) {
ptrdiff_type = I64_TYPE;
}
else {
ptrdiff_type = I32_TYPE;
}
/*
* new_sp = last_alloca - callee_local_size;
* if (*native_stack_top_min_addr > new_sp) {
* *native_stack_top_min_addr = new_sp;
* }
*/
if (!(callee_local_size = LLVMConstInt(
ptrdiff_type, -(int64_t)callee_cell_num * 4, true))) {
aot_set_last_error("llvm build const failed.");
return false;
}
if (!(new_sp = LLVMBuildInBoundsGEP2(comp_ctx->builder, INT8_TYPE,
func_ctx->last_alloca,
&callee_local_size, 1, "new_sp"))) {
aot_set_last_error("llvm build gep failed");
return false;
}
if (!(native_stack_top_min = LLVMBuildLoad2(
comp_ctx->builder, OPQ_PTR_TYPE,
func_ctx->native_stack_top_min_addr, "native_stack_top_min"))) {
aot_set_last_error("llvm build load failed");
return false;
}
if (!(cmp = LLVMBuildICmp(comp_ctx->builder, LLVMIntULT, new_sp,
native_stack_top_min, "cmp"))) {
aot_set_last_error("llvm build icmp failed.");
return false;
}
if (!(block_update = LLVMAppendBasicBlockInContext(
comp_ctx->context, func_ctx->func, "block_update"))) {
aot_set_last_error("llvm add basic block failed.");
return false;
}
if (!(block_after_update = LLVMAppendBasicBlockInContext(
comp_ctx->context, func_ctx->func, "block_after_update"))) {
aot_set_last_error("llvm add basic block failed.");
return false;
}
LLVMMoveBasicBlockAfter(block_update, block_curr);
LLVMMoveBasicBlockAfter(block_after_update, block_update);
if (!LLVMBuildCondBr(comp_ctx->builder, cmp, block_update,
block_after_update)) {
aot_set_last_error("llvm build cond br failed.");
return false;
}
LLVMPositionBuilderAtEnd(comp_ctx->builder, block_update);
if (!LLVMBuildStore(comp_ctx->builder, new_sp,
func_ctx->native_stack_top_min_addr)) {
aot_set_last_error("llvm build store failed");
return false;
}
if (!LLVMBuildBr(comp_ctx->builder, block_after_update)) {
aot_set_last_error("llvm build br failed.");
return false;
}
LLVMPositionBuilderAtEnd(comp_ctx->builder, block_after_update);
return true;
}
static bool
check_stack_boundary(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
uint32 callee_cell_num)
{
LLVMBasicBlockRef block_curr = LLVMGetInsertBlock(comp_ctx->builder);
LLVMBasicBlockRef check_stack;
LLVMValueRef callee_local_size, stack_bound, cmp;
if (!(callee_local_size = I32_CONST(callee_cell_num * 4))) {
aot_set_last_error("llvm build const failed.");
return false;
}
if (!(stack_bound = LLVMBuildInBoundsGEP2(
comp_ctx->builder, INT8_TYPE, func_ctx->native_stack_bound,
&callee_local_size, 1, "stack_bound"))) {
aot_set_last_error("llvm build inbound gep failed.");
return false;
}
if (!(check_stack = LLVMAppendBasicBlockInContext(
comp_ctx->context, func_ctx->func, "check_stack"))) {
aot_set_last_error("llvm add basic block failed.");
return false;
}
LLVMMoveBasicBlockAfter(check_stack, block_curr);
if (!(cmp = LLVMBuildICmp(comp_ctx->builder, LLVMIntULT,
func_ctx->last_alloca, stack_bound, "cmp"))) {
aot_set_last_error("llvm build icmp failed.");
return false;
}
if (!aot_emit_exception(comp_ctx, func_ctx, EXCE_NATIVE_STACK_OVERFLOW,
true, cmp, check_stack)) {
return false;
}
LLVMPositionBuilderAtEnd(comp_ctx->builder, check_stack);
return true;
}
static bool
check_stack(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
uint32 callee_cell_num)
{
if (comp_ctx->enable_stack_estimation
&& !record_stack_usage(comp_ctx, func_ctx, callee_cell_num))
return false;
if (comp_ctx->enable_stack_bound_check
&& !check_stack_boundary(comp_ctx, func_ctx, callee_cell_num))
return false;
return true;
}
/**
* Check whether the app address and its buffer are inside the linear memory,
* if no, throw exception
@ -610,6 +473,30 @@ check_app_addr_and_convert(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
return true;
}
static void
aot_estimate_and_record_stack_usage_for_function_call(
const AOTCompContext *comp_ctx, AOTFuncContext *caller_func_ctx,
const AOTFuncType *callee_func_type)
{
unsigned int size;
if (!(comp_ctx->enable_stack_bound_check
|| comp_ctx->enable_stack_estimation)) {
return;
}
size =
aot_estimate_stack_usage_for_function_call(comp_ctx, callee_func_type);
/*
* only record the max value, assuming that LLVM emits machine code
* which rewinds the stack before making the next call in the
* function.
*/
if (caller_func_ctx->stack_consumption_for_func_call < size) {
caller_func_ctx->stack_consumption_for_func_call = size;
}
}
bool
aot_compile_op_call(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
uint32 func_idx, bool tail_call)
@ -620,7 +507,6 @@ aot_compile_op_call(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
uint32 ext_ret_cell_num = 0, cell_num = 0;
AOTFuncContext **func_ctxes = comp_ctx->func_ctxes;
AOTFuncType *func_type;
AOTFunc *aot_func;
LLVMTypeRef *param_types = NULL, ret_type;
LLVMTypeRef ext_ret_ptr_type;
LLVMValueRef *param_values = NULL, value_ret = NULL, func;
@ -628,7 +514,6 @@ aot_compile_op_call(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
LLVMValueRef ext_ret, ext_ret_ptr, ext_ret_idx;
int32 i, j = 0, param_count, result_count, ext_ret_count;
uint64 total_size;
uint32 callee_cell_num;
uint8 wasm_ret_type;
uint8 *ext_ret_types = NULL;
const char *signature = NULL;
@ -658,6 +543,8 @@ aot_compile_op_call(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
func_type =
func_ctxes[func_idx - import_func_count]->aot_func->func_type;
}
aot_estimate_and_record_stack_usage_for_function_call(comp_ctx, func_ctx,
func_type);
/* Get param cell number */
param_cell_num = func_type->param_cell_num;
@ -885,15 +772,17 @@ aot_compile_op_call(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
else {
if (func_ctxes[func_idx - import_func_count] == func_ctx) {
/* recursive call */
func = func_ctx->func;
func = func_ctx->precheck_func;
}
else {
if (!comp_ctx->is_jit_mode) {
func = func_ctxes[func_idx - import_func_count]->func;
func =
func_ctxes[func_idx - import_func_count]->precheck_func;
}
else {
#if !(WASM_ENABLE_FAST_JIT != 0 && WASM_ENABLE_LAZY_JIT != 0)
func = func_ctxes[func_idx - import_func_count]->func;
func =
func_ctxes[func_idx - import_func_count]->precheck_func;
#else
/* JIT tier-up, load func ptr from func_ptrs[func_idx] */
LLVMValueRef func_ptr, func_idx_const;
@ -938,13 +827,6 @@ aot_compile_op_call(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
}
}
aot_func = func_ctxes[func_idx - import_func_count]->aot_func;
callee_cell_num =
aot_func->param_cell_num + aot_func->local_cell_num + 1;
if (!check_stack(comp_ctx, func_ctx, callee_cell_num))
goto fail;
#if LLVM_VERSION_MAJOR >= 14
llvm_func_type = func_ctxes[func_idx - import_func_count]->func_type;
#endif
@ -1213,6 +1095,8 @@ aot_compile_op_call_indirect(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
CHECK_LLVM_CONST(ftype_idx_const);
func_type = comp_ctx->comp_data->func_types[type_idx];
aot_estimate_and_record_stack_usage_for_function_call(comp_ctx, func_ctx,
func_type);
func_param_count = func_type->param_count;
func_result_count = func_type->result_count;
@ -1564,13 +1448,6 @@ aot_compile_op_call_indirect(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
/* Translate call non-import block */
LLVMPositionBuilderAtEnd(comp_ctx->builder, block_call_non_import);
if (!check_stack(comp_ctx, func_ctx,
param_cell_num + ext_cell_num
+ 1
/* Reserve some local variables */
+ 16))
goto fail;
/* Load function pointer */
if (!(func_ptr = LLVMBuildInBoundsGEP2(comp_ctx->builder, OPQ_PTR_TYPE,
func_ctx->func_ptrs, &func_idx, 1,

View File

@ -29,6 +29,7 @@ aot_compile_op_ref_is_null(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx);
bool
aot_compile_op_ref_func(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
uint32 func_idx);
#ifdef __cplusplus
} /* end of extern "C" */
#endif

View File

@ -14,6 +14,15 @@
#include "debug/dwarf_extractor.h"
#endif
static bool
create_native_symbol(const AOTCompContext *comp_ctx, AOTFuncContext *func_ctx);
static bool
create_native_stack_bound(const AOTCompContext *comp_ctx,
AOTFuncContext *func_ctx);
static bool
create_native_stack_top_min(const AOTCompContext *comp_ctx,
AOTFuncContext *func_ctx);
LLVMTypeRef
wasm_type_to_llvm_type(const AOTLLVMTypes *llvm_types, uint8 wasm_type)
{
@ -38,17 +47,472 @@ wasm_type_to_llvm_type(const AOTLLVMTypes *llvm_types, uint8 wasm_type)
return NULL;
}
static LLVMValueRef
aot_add_llvm_func1(const AOTCompContext *comp_ctx, LLVMModuleRef module,
uint32 func_index, uint32 param_count, LLVMTypeRef func_type,
const char *prefix)
{
char func_name[48];
LLVMValueRef func;
LLVMValueRef local_value;
uint32 i, j;
/* Add LLVM function */
snprintf(func_name, sizeof(func_name), "%s%d", prefix, func_index);
if (!(func = LLVMAddFunction(module, func_name, func_type))) {
aot_set_last_error("add LLVM function failed.");
return NULL;
}
j = 0;
local_value = LLVMGetParam(func, j++);
LLVMSetValueName(local_value, "exec_env");
/* Set parameter names */
for (i = 0; i < param_count; i++) {
local_value = LLVMGetParam(func, j++);
LLVMSetValueName(local_value, "");
}
return func;
}
/*
* create a basic func_ctx enough to call aot_emit_exception.
*
* that is:
* - exec_env
* - aot_inst
* - native_symbol (if is_indirect_mode)
*/
static bool
create_basic_func_context(const AOTCompContext *comp_ctx,
AOTFuncContext *func_ctx)
{
LLVMValueRef aot_inst_offset = I32_TWO, aot_inst_addr;
/* Save the pameters for fast access */
func_ctx->exec_env = LLVMGetParam(func_ctx->func, 0);
/* Get aot inst address, the layout of exec_env is:
exec_env->next, exec_env->prev, exec_env->module_inst, and argv_buf */
if (!(aot_inst_addr = LLVMBuildInBoundsGEP2(
comp_ctx->builder, OPQ_PTR_TYPE, func_ctx->exec_env,
&aot_inst_offset, 1, "aot_inst_addr"))) {
aot_set_last_error("llvm build in bounds gep failed");
goto fail;
}
/* Load aot inst */
if (!(func_ctx->aot_inst = LLVMBuildLoad2(comp_ctx->builder, OPQ_PTR_TYPE,
aot_inst_addr, "aot_inst"))) {
aot_set_last_error("llvm build load failed");
goto fail;
}
if (comp_ctx->is_indirect_mode
&& !create_native_symbol(comp_ctx, func_ctx)) {
goto fail;
}
return true;
fail:
return false;
}
/*
* return if the "precheck" wrapper function can use tail call optimization
*/
bool
aot_target_precheck_can_use_musttail(const AOTCompContext *comp_ctx)
{
if (!strcmp(comp_ctx->target_arch, "xtensa")) {
/*
* xtensa windowed ABI doesn't have tail call optimization.
*
* Note: as of writing this, the xtensa version of LLVM
* simply ignores the musttail attribute.
* https://github.com/espressif/llvm-project/pull/73
*/
return false;
}
if (!strcmp(comp_ctx->target_arch, "riscv32")
|| !strcmp(comp_ctx->target_arch, "riscv64")) {
/*
* REVISIT: actually, riscv can use tail call optimization
* in some cases. I (yamamoto) don't know the exact conditions
* though.
*/
return false;
}
/*
* x86-64/i386: true
*
* others: assume true for now
*/
return true;
}
unsigned int
aot_estimate_stack_usage_for_function_call(const AOTCompContext *comp_ctx,
const AOTFuncType *callee_func_type)
{
/*
* Estimate how much stack is necessary to make a function call.
* This does not include the stack consumption of the callee function.
*
* For precise estimation, ideally this function needs to be
* target-specific.
* However, this implementation aims to be target-independent,
* allowing a small overstimation, which is probably ok for our purpose.
* (overflow detection and memory profiling)
* On the other hand, an underestimation should be avoided as it
* can cause more serious problems like silent data corruptions.
*
* Assumptions:
*
* - the first result is returned via a register.
*
* - all parameters, including exec_env and pointers to non-first
* results, are passed via stack.
* (this is a bit pessimistic than many of real calling conventions,
* where some of parameters are passed via register.)
*
* - N-byte value needs N-byte alignment on stack.
*
* - a value smaller than a pointer is extended.
* (eg. 4 byte values are extended to 8 byte on x86-64.)
*/
const unsigned int param_count = callee_func_type->param_count;
const unsigned int result_count = callee_func_type->result_count;
unsigned int size = 0;
unsigned int i;
unsigned int nb;
if (!strcmp(comp_ctx->target_arch, "xtensa")) {
/*
* In the xtensa windowed ABI, outgoing arguments are already
* included in the callee's stack frame size, which equals to
* the operand of the ENTRY instruction and what LLVM
* MFI->getStackSize returns.
*/
return 0;
}
/* exec_env */
size = comp_ctx->pointer_size;
/* parameters */
for (i = 0; i < param_count; i++) {
nb = wasm_value_type_cell_num(callee_func_type->types[i]) * 4;
if (nb < comp_ctx->pointer_size) {
nb = comp_ctx->pointer_size;
}
size = align_uint(size, nb) + nb;
}
/* pointers to results */
nb = comp_ctx->pointer_size;
for (i = 1; i < result_count; i++) {
size = align_uint(size, nb) + nb;
}
/* return address */
nb = comp_ctx->pointer_size;
size = align_uint(size, nb) + nb;
/*
* some extra for possible arch-dependent things like
* 16-byte alignment for x86_64.
*/
size += 16;
return size;
}
/*
* a "precheck" function performs a few things before calling wrapped_func.
*
* - update native_stack_top_min if necessary
* - stack overflow check (if it does, trap)
*/
static LLVMValueRef
aot_add_precheck_function(AOTCompContext *comp_ctx, LLVMModuleRef module,
uint32 func_index, uint32 orig_param_count,
LLVMTypeRef func_type, LLVMValueRef wrapped_func)
{
LLVMValueRef precheck_func;
LLVMBasicBlockRef begin;
LLVMBasicBlockRef check_top_block;
LLVMBasicBlockRef update_top_block;
LLVMBasicBlockRef stack_bound_check_block;
LLVMBasicBlockRef call_wrapped_func_block;
LLVMValueRef *params = NULL;
precheck_func =
aot_add_llvm_func1(comp_ctx, module, func_index, orig_param_count,
func_type, AOT_FUNC_PREFIX);
if (!precheck_func) {
goto fail;
}
begin = LLVMAppendBasicBlockInContext(comp_ctx->context, precheck_func,
"begin");
check_top_block = LLVMAppendBasicBlockInContext(
comp_ctx->context, precheck_func, "check_top_block");
if (comp_ctx->enable_stack_estimation) {
update_top_block = LLVMAppendBasicBlockInContext(
comp_ctx->context, precheck_func, "update_top_block");
if (!update_top_block) {
goto fail;
}
}
stack_bound_check_block = LLVMAppendBasicBlockInContext(
comp_ctx->context, precheck_func, "stack_bound_check_block");
call_wrapped_func_block = LLVMAppendBasicBlockInContext(
comp_ctx->context, precheck_func, "call_wrapped_func");
if (!begin || !check_top_block || !stack_bound_check_block
|| !call_wrapped_func_block) {
goto fail;
}
LLVMBuilderRef b = comp_ctx->builder;
LLVMPositionBuilderAtEnd(b, begin);
/* create a temporary minimum func_ctx */
AOTFuncContext tmp;
AOTFuncContext *func_ctx = &tmp;
memset(func_ctx, 0, sizeof(*func_ctx));
func_ctx->func = precheck_func;
func_ctx->module = module;
func_ctx->aot_func = comp_ctx->comp_data->funcs[func_index];
#if WASM_ENABLE_DEBUG_AOT != 0
func_ctx->debug_func = NULL;
#endif
if (!create_basic_func_context(comp_ctx, func_ctx))
goto fail;
if (comp_ctx->enable_stack_bound_check
&& !create_native_stack_bound(comp_ctx, func_ctx))
goto fail;
if (comp_ctx->enable_stack_estimation
&& !create_native_stack_top_min(comp_ctx, func_ctx)) {
goto fail;
}
unsigned int param_count = LLVMCountParams(precheck_func);
uint64 sz = param_count * sizeof(LLVMValueRef);
params = wasm_runtime_malloc(sz);
if (params == NULL) {
goto fail;
}
LLVMGetParams(precheck_func, params);
const bool is_64bit = comp_ctx->pointer_size == sizeof(uint64);
LLVMTypeRef uintptr_type;
if (is_64bit)
uintptr_type = I64_TYPE;
else
uintptr_type = I32_TYPE;
/*
* load the stack pointer
*/
LLVMValueRef sp_ptr = LLVMBuildAlloca(b, I32_TYPE, "sp_ptr");
if (!sp_ptr) {
goto fail;
}
LLVMValueRef sp = LLVMBuildPtrToInt(b, sp_ptr, uintptr_type, "sp");
if (!sp) {
goto fail;
}
/*
* load the value for this wrapped function from the stack_sizes array
*/
LLVMValueRef func_index_const = I32_CONST(func_index);
LLVMValueRef sizes =
LLVMBuildBitCast(b, comp_ctx->stack_sizes, INT32_PTR_TYPE, "sizes");
if (!sizes) {
goto fail;
}
LLVMValueRef sizep = LLVMBuildInBoundsGEP2(b, I32_TYPE, sizes,
&func_index_const, 1, "sizep");
if (!sizep) {
goto fail;
}
LLVMValueRef size32 = LLVMBuildLoad2(b, I32_TYPE, sizep, "size32");
if (!size32) {
goto fail;
}
LLVMValueRef size;
if (is_64bit) {
size = LLVMBuildZExt(b, size32, uintptr_type, "size");
if (!size) {
goto fail;
}
}
else {
size = size32;
}
/*
* calculate new sp
*/
LLVMValueRef underflow =
LLVMBuildICmp(b, LLVMIntULT, sp, size, "underflow");
if (!underflow) {
goto fail;
}
LLVMValueRef new_sp = LLVMBuildSub(b, sp, size, "new_sp");
if (!new_sp) {
goto fail;
}
if (!LLVMBuildBr(b, check_top_block)) {
goto fail;
}
LLVMPositionBuilderAtEnd(b, check_top_block);
if (comp_ctx->enable_stack_estimation) {
/*
* load native_stack_top_min from the exec_env
*/
LLVMValueRef top_min =
LLVMBuildLoad2(b, OPQ_PTR_TYPE, func_ctx->native_stack_top_min_addr,
"native_stack_top_min");
if (!top_min) {
goto fail;
}
LLVMValueRef top_min_int = LLVMBuildPtrToInt(
b, top_min, uintptr_type, "native_stack_top_min_int");
if (!top_min_int) {
goto fail;
}
/*
* update native_stack_top_min if
* new_sp = sp - size < native_stack_top_min
*
* Note: unless the stack has already overflown in this exec_env,
* native_stack_bound <= native_stack_top_min
*/
LLVMValueRef cmp_top =
LLVMBuildICmp(b, LLVMIntULT, new_sp, top_min_int, "cmp_top");
if (!cmp_top) {
goto fail;
}
cmp_top = LLVMBuildOr(b, underflow, cmp_top, "cmp_top2");
if (!cmp_top) {
goto fail;
}
if (!LLVMBuildCondBr(b, cmp_top, update_top_block,
call_wrapped_func_block)) {
aot_set_last_error("llvm build cond br failed.");
goto fail;
}
/*
* update native_stack_top_min
*/
LLVMPositionBuilderAtEnd(b, update_top_block);
LLVMValueRef new_sp_ptr =
LLVMBuildIntToPtr(b, new_sp, OPQ_PTR_TYPE, "new_sp_ptr");
if (!new_sp_ptr) {
goto fail;
}
if (!LLVMBuildStore(b, new_sp_ptr,
func_ctx->native_stack_top_min_addr)) {
goto fail;
}
if (!LLVMBuildBr(b, stack_bound_check_block)) {
goto fail;
}
}
else {
if (!LLVMBuildBr(b, stack_bound_check_block)) {
goto fail;
}
}
LLVMPositionBuilderAtEnd(b, stack_bound_check_block);
if (comp_ctx->enable_stack_bound_check) {
/*
* trap if new_sp < native_stack_bound
*/
LLVMValueRef bound_int = LLVMBuildPtrToInt(
b, func_ctx->native_stack_bound, uintptr_type, "bound_base_int");
if (!bound_int) {
goto fail;
}
LLVMValueRef cmp =
LLVMBuildICmp(b, LLVMIntULT, new_sp, bound_int, "cmp");
if (!cmp) {
goto fail;
}
cmp = LLVMBuildOr(b, underflow, cmp, "cmp2");
if (!cmp) {
goto fail;
}
/* todo: @llvm.expect.i1(i1 %cmp, i1 0) */
if (!aot_emit_exception(comp_ctx, func_ctx, EXCE_NATIVE_STACK_OVERFLOW,
true, cmp, call_wrapped_func_block))
goto fail;
}
else {
if (!LLVMBuildBr(b, call_wrapped_func_block)) {
goto fail;
}
}
/*
* call the wrapped function
* use a tail-call if possible
*/
LLVMPositionBuilderAtEnd(b, call_wrapped_func_block);
const char *name = "tail_call";
LLVMTypeRef ret_type = LLVMGetReturnType(func_type);
if (ret_type == VOID_TYPE) {
name = "";
}
LLVMValueRef retval =
LLVMBuildCall2(b, func_type, wrapped_func, params, param_count, name);
if (!retval) {
goto fail;
}
wasm_runtime_free(params);
params = NULL;
if (aot_target_precheck_can_use_musttail(comp_ctx)) {
LLVMSetTailCallKind(retval, LLVMTailCallKindMustTail);
}
else {
LLVMSetTailCallKind(retval, LLVMTailCallKindTail);
}
if (ret_type == VOID_TYPE) {
if (!LLVMBuildRetVoid(b)) {
goto fail;
}
}
else {
if (!LLVMBuildRet(b, retval)) {
goto fail;
}
}
return precheck_func;
fail:
if (params != NULL) {
wasm_runtime_free(params);
}
aot_set_last_error("failed to build precheck wrapper function.");
return NULL;
}
/**
* Add LLVM function
*/
static LLVMValueRef
aot_add_llvm_func(const AOTCompContext *comp_ctx, LLVMModuleRef module,
aot_add_llvm_func(AOTCompContext *comp_ctx, LLVMModuleRef module,
const AOTFuncType *aot_func_type, uint32 func_index,
LLVMTypeRef *p_func_type)
LLVMTypeRef *p_func_type, LLVMValueRef *p_precheck_func)
{
LLVMValueRef func = NULL;
LLVMTypeRef *param_types, ret_type, func_type;
LLVMValueRef local_value;
LLVMTypeRef func_type_wrapper;
LLVMValueRef func_wrapper;
LLVMBasicBlockRef func_begin;
@ -101,21 +565,44 @@ aot_add_llvm_func(const AOTCompContext *comp_ctx, LLVMModuleRef module,
goto fail;
}
/* Add LLVM function */
snprintf(func_name, sizeof(func_name), "%s%d", AOT_FUNC_PREFIX, func_index);
if (!(func = LLVMAddFunction(module, func_name, func_type))) {
aot_set_last_error("add LLVM function failed.");
goto fail;
bh_assert(func_index < comp_ctx->func_ctx_count);
bh_assert(LLVMGetReturnType(func_type) == ret_type);
const char *prefix = AOT_FUNC_PREFIX;
const bool need_precheck =
comp_ctx->enable_stack_bound_check || comp_ctx->enable_stack_estimation;
if (need_precheck) {
/*
* REVISIT: probably this breaks windows hw bound check
* (the RtlAddFunctionTable stuff)
*/
prefix = AOT_FUNC_INTERNAL_PREFIX;
}
if (!(func = aot_add_llvm_func1(comp_ctx, module, func_index,
aot_func_type->param_count, func_type,
prefix)))
goto fail;
j = 0;
local_value = LLVMGetParam(func, j++);
LLVMSetValueName(local_value, "exec_env");
if (need_precheck) {
if (!comp_ctx->is_jit_mode)
LLVMSetLinkage(func, LLVMInternalLinkage);
unsigned int kind =
LLVMGetEnumAttributeKindForName("noinline", strlen("noinline"));
LLVMAttributeRef attr_noinline =
LLVMCreateEnumAttribute(comp_ctx->context, kind, 0);
LLVMAddAttributeAtIndex(func, LLVMAttributeFunctionIndex,
attr_noinline);
/* Set parameter names */
for (i = 0; i < aot_func_type->param_count; i++) {
local_value = LLVMGetParam(func, j++);
LLVMSetValueName(local_value, "");
LLVMValueRef precheck_func = aot_add_precheck_function(
comp_ctx, module, func_index, aot_func_type->param_count, func_type,
func);
if (!precheck_func)
goto fail;
LLVMAddAttributeAtIndex(precheck_func, LLVMAttributeFunctionIndex,
attr_noinline);
*p_precheck_func = precheck_func;
}
else {
*p_precheck_func = func;
}
if (p_func_type)
@ -454,27 +941,6 @@ create_local_variables(const AOTCompData *comp_data,
}
}
if (comp_ctx->enable_stack_bound_check
|| comp_ctx->enable_stack_estimation) {
if (aot_func_type->param_count + func->local_count > 0) {
func_ctx->last_alloca = func_ctx->locals[aot_func_type->param_count
+ func->local_count - 1];
if (!(func_ctx->last_alloca =
LLVMBuildBitCast(comp_ctx->builder, func_ctx->last_alloca,
INT8_PTR_TYPE, "stack_ptr"))) {
aot_set_last_error("llvm build bit cast failed.");
return false;
}
}
else {
if (!(func_ctx->last_alloca = LLVMBuildAlloca(
comp_ctx->builder, INT8_TYPE, "stack_ptr"))) {
aot_set_last_error("llvm build alloca failed.");
return false;
}
}
}
return true;
}
@ -904,6 +1370,68 @@ create_func_ptrs(const AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
return true;
}
const char *aot_stack_sizes_name = AOT_STACK_SIZES_NAME;
static bool
aot_create_stack_sizes(const AOTCompData *comp_data, AOTCompContext *comp_ctx)
{
const char *stack_sizes_name = "stack_sizes";
LLVMTypeRef stack_sizes_type =
LLVMArrayType(I32_TYPE, comp_data->func_count);
if (!stack_sizes_type) {
aot_set_last_error("failed to create stack_sizes type.");
return false;
}
LLVMValueRef stack_sizes =
LLVMAddGlobal(comp_ctx->module, stack_sizes_type, stack_sizes_name);
if (!stack_sizes) {
aot_set_last_error("failed to create stack_sizes global.");
return false;
}
LLVMValueRef *values;
uint64 size = sizeof(LLVMValueRef) * comp_data->func_count;
if (size >= UINT32_MAX || !(values = wasm_runtime_malloc((uint32)size))) {
aot_set_last_error("allocate memory failed.");
return false;
}
uint32 i;
for (i = 0; i < comp_data->func_count; i++) {
/*
* This value is a placeholder, which will be replaced
* after the corresponding functions are compiled.
*
* Don't use zeros becasue LLVM can optimize them to
* zeroinitializer.
*/
values[i] = I32_NEG_ONE;
}
LLVMValueRef array =
LLVMConstArray(I32_TYPE, values, comp_data->func_count);
wasm_runtime_free(values);
if (!array) {
aot_set_last_error("failed to create stack_sizes initializer.");
return false;
}
LLVMSetInitializer(stack_sizes, array);
/*
* create an alias so that aot_resolve_stack_sizes can find it.
*/
LLVMValueRef alias = LLVMAddAlias2(comp_ctx->module, stack_sizes_type, 0,
stack_sizes, aot_stack_sizes_name);
if (!alias) {
aot_set_last_error("failed to create stack_sizes alias.");
return false;
}
/*
* make the original symbol internal. we mainly use this version to
* avoid creating extra relocations in the precheck functions.
*/
LLVMSetLinkage(stack_sizes, LLVMInternalLinkage);
comp_ctx->stack_sizes_type = stack_sizes_type;
comp_ctx->stack_sizes = stack_sizes;
return true;
}
/**
* Create function compiler context
*/
@ -917,7 +1445,6 @@ aot_create_func_context(const AOTCompData *comp_data, AOTCompContext *comp_ctx,
WASMFunction *wasm_func = module->functions[func_index];
AOTBlock *aot_block;
LLVMTypeRef int8_ptr_type;
LLVMValueRef aot_inst_offset = I32_TWO, aot_inst_addr;
uint64 size;
/* Allocate memory for the function context */
@ -935,9 +1462,9 @@ aot_create_func_context(const AOTCompData *comp_data, AOTCompContext *comp_ctx,
func_ctx->module = comp_ctx->module;
/* Add LLVM function */
if (!(func_ctx->func =
aot_add_llvm_func(comp_ctx, func_ctx->module, aot_func_type,
func_index, &func_ctx->func_type))) {
if (!(func_ctx->func = aot_add_llvm_func(
comp_ctx, func_ctx->module, aot_func_type, func_index,
&func_ctx->func_type, &func_ctx->precheck_func))) {
goto fail;
}
@ -956,22 +1483,7 @@ aot_create_func_context(const AOTCompData *comp_data, AOTCompContext *comp_ctx,
/* Add local variables */
LLVMPositionBuilderAtEnd(comp_ctx->builder, aot_block->llvm_entry_block);
/* Save the pameters for fast access */
func_ctx->exec_env = LLVMGetParam(func_ctx->func, 0);
/* Get aot inst address, the layout of exec_env is:
exec_env->next, exec_env->prev, exec_env->module_inst, and argv_buf */
if (!(aot_inst_addr = LLVMBuildInBoundsGEP2(
comp_ctx->builder, OPQ_PTR_TYPE, func_ctx->exec_env,
&aot_inst_offset, 1, "aot_inst_addr"))) {
aot_set_last_error("llvm build in bounds gep failed");
goto fail;
}
/* Load aot inst */
if (!(func_ctx->aot_inst = LLVMBuildLoad2(comp_ctx->builder, OPQ_PTR_TYPE,
aot_inst_addr, "aot_inst"))) {
aot_set_last_error("llvm build load failed");
if (!create_basic_func_context(comp_ctx, func_ctx)) {
goto fail;
}
@ -980,28 +1492,12 @@ aot_create_func_context(const AOTCompData *comp_data, AOTCompContext *comp_ctx,
goto fail;
}
/* Get native stack boundary address */
if (comp_ctx->enable_stack_bound_check
&& !create_native_stack_bound(comp_ctx, func_ctx)) {
goto fail;
}
if (comp_ctx->enable_stack_estimation
&& !create_native_stack_top_min(comp_ctx, func_ctx)) {
goto fail;
}
/* Get auxiliary stack info */
if (wasm_func->has_op_set_global_aux_stack
&& !create_aux_stack_info(comp_ctx, func_ctx)) {
goto fail;
}
/* Get native symbol list */
if (comp_ctx->is_indirect_mode
&& !create_native_symbol(comp_ctx, func_ctx)) {
goto fail;
}
/* Create local variables */
if (!create_local_variables(comp_data, comp_ctx, func_ctx, func)) {
goto fail;
@ -1070,6 +1566,11 @@ aot_create_func_contexts(const AOTCompData *comp_data, AOTCompContext *comp_ctx)
uint64 size;
uint32 i;
if ((comp_ctx->enable_stack_bound_check
|| comp_ctx->enable_stack_estimation)
&& !aot_create_stack_sizes(comp_data, comp_ctx))
return NULL;
/* Allocate memory */
size = sizeof(AOTFuncContext *) * (uint64)comp_data->func_count;
if (size >= UINT32_MAX
@ -1483,6 +1984,55 @@ fail:
return ret;
}
static void
jit_stack_size_callback(void *user_data, const char *name, size_t namelen,
size_t stack_size)
{
AOTCompContext *comp_ctx = user_data;
/*
* Note: the longest name we care is
* something like "aot_func_internal#4294967295".
*/
char buf[64];
uint32 func_idx;
const AOTFuncContext *func_ctx;
bool musttail;
unsigned int stack_consumption_to_call_wrapped_func;
unsigned int call_size;
int ret;
bh_assert(comp_ctx != NULL);
bh_assert(comp_ctx->jit_stack_sizes != NULL);
if (namelen >= sizeof(buf)) {
LOG_DEBUG("too long name: %.*s", (int)namelen, name);
return;
}
/* ensure NUL termination */
bh_memcpy_s(buf, sizeof(buf), name, namelen);
buf[namelen] = 0;
ret = sscanf(buf, AOT_FUNC_INTERNAL_PREFIX "%" SCNu32, &func_idx);
if (ret != 1) {
return;
}
bh_assert(func_idx < comp_ctx->func_ctx_count);
func_ctx = comp_ctx->func_ctxes[func_idx];
call_size = func_ctx->stack_consumption_for_func_call;
musttail = aot_target_precheck_can_use_musttail(comp_ctx);
stack_consumption_to_call_wrapped_func =
musttail ? 0
: aot_estimate_stack_usage_for_function_call(
comp_ctx, func_ctx->aot_func->func_type);
LOG_VERBOSE("func %.*s stack %u + %zu + %u", (int)namelen, name,
stack_consumption_to_call_wrapped_func, stack_size, call_size);
/* Note: -1 == AOT_NEG_ONE from aot_create_stack_sizes */
bh_assert(comp_ctx->jit_stack_sizes[func_idx] == (uint32)-1);
comp_ctx->jit_stack_sizes[func_idx] = stack_size + call_size;
}
static bool
orc_jit_create(AOTCompContext *comp_ctx)
{
@ -1498,6 +2048,10 @@ orc_jit_create(AOTCompContext *comp_ctx)
goto fail;
}
if (comp_ctx->enable_stack_bound_check || comp_ctx->enable_stack_estimation)
LLVMOrcLLJITBuilderSetCompileFuncitonCreatorWithStackSizesCallback(
builder, jit_stack_size_callback, comp_ctx);
err = LLVMOrcJITTargetMachineBuilderDetectHost(&jtmb);
if (err != LLVMErrorSuccess) {
aot_handle_llvm_errmsg(
@ -1688,14 +2242,6 @@ aot_create_comp_context(const AOTCompData *comp_data, aot_comp_option_t option)
if (option->is_jit_mode) {
comp_ctx->is_jit_mode = true;
/* Create TargetMachine */
if (!create_target_machine_detect_host(comp_ctx))
goto fail;
/* Create LLJIT Instance */
if (!orc_jit_create(comp_ctx))
goto fail;
#ifndef OS_ENABLE_HW_BOUND_CHECK
comp_ctx->enable_bound_check = true;
/* Always enable stack boundary check if `bounds-checks`
@ -1715,6 +2261,14 @@ aot_create_comp_context(const AOTCompData *comp_data, aot_comp_option_t option)
comp_ctx->enable_stack_bound_check = false;
#endif
#endif
/* Create TargetMachine */
if (!create_target_machine_detect_host(comp_ctx))
goto fail;
/* Create LLJIT Instance */
if (!orc_jit_create(comp_ctx))
goto fail;
}
else {
/* Create LLVM target machine */
@ -2037,6 +2591,19 @@ aot_create_comp_context(const AOTCompData *comp_data, aot_comp_option_t option)
(option->stack_bounds_checks == 1) ? true : false;
}
if ((comp_ctx->enable_stack_bound_check
|| comp_ctx->enable_stack_estimation)
&& option->stack_usage_file == NULL) {
if (!aot_generate_tempfile_name(
"wamrc-su", "su", comp_ctx->stack_usage_temp_file,
sizeof(comp_ctx->stack_usage_temp_file)))
goto fail;
comp_ctx->stack_usage_file = comp_ctx->stack_usage_temp_file;
}
else {
comp_ctx->stack_usage_file = option->stack_usage_file;
}
os_printf("Create AoT compiler with:\n");
os_printf(" target: %s\n", comp_ctx->target_arch);
os_printf(" target cpu: %s\n", cpu);
@ -2095,7 +2662,7 @@ aot_create_comp_context(const AOTCompData *comp_data, aot_comp_option_t option)
if (!(comp_ctx->target_machine = LLVMCreateTargetMachineWithOpts(
target, triple_norm, cpu, features, opt_level,
LLVMRelocStatic, code_model, false,
option->stack_usage_file))) {
comp_ctx->stack_usage_file))) {
aot_set_last_error("create LLVM target machine failed.");
goto fail;
}
@ -2239,6 +2806,10 @@ aot_destroy_comp_context(AOTCompContext *comp_ctx)
if (!comp_ctx)
return;
if (comp_ctx->stack_usage_file == comp_ctx->stack_usage_temp_file) {
(void)unlink(comp_ctx->stack_usage_temp_file);
}
if (comp_ctx->target_machine)
LLVMDisposeTargetMachine(comp_ctx->target_machine);
@ -2534,8 +3105,8 @@ aot_checked_addr_list_destroy(AOTFuncContext *func_ctx)
}
bool
aot_build_zero_function_ret(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
AOTFuncType *func_type)
aot_build_zero_function_ret(const AOTCompContext *comp_ctx,
AOTFuncContext *func_ctx, AOTFuncType *func_type)
{
LLVMValueRef ret = NULL;
@ -2574,9 +3145,12 @@ aot_build_zero_function_ret(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
return false;
}
#if WASM_ENABLE_DEBUG_AOT != 0
LLVMMetadataRef return_location =
dwarf_gen_func_ret_location(comp_ctx, func_ctx);
LLVMInstructionSetDebugLoc(ret, return_location);
/* debug_func is NULL for precheck function */
if (func_ctx->debug_func != NULL) {
LLVMMetadataRef return_location =
dwarf_gen_func_ret_location(comp_ctx, func_ctx);
LLVMInstructionSetDebugLoc(ret, return_location);
}
#endif
return true;
}

View File

@ -153,6 +153,7 @@ typedef struct AOTMemInfo {
typedef struct AOTFuncContext {
AOTFunc *aot_func;
LLVMValueRef func;
LLVMValueRef precheck_func;
LLVMTypeRef func_type;
LLVMModuleRef module;
AOTBlockStack block_stack;
@ -165,7 +166,6 @@ typedef struct AOTFuncContext {
LLVMValueRef aux_stack_bound;
LLVMValueRef aux_stack_bottom;
LLVMValueRef native_symbol;
LLVMValueRef last_alloca;
LLVMValueRef func_ptrs;
AOTMemInfo *mem_info;
@ -182,6 +182,9 @@ typedef struct AOTFuncContext {
#if WASM_ENABLE_DEBUG_AOT != 0
LLVMMetadataRef debug_func;
#endif
unsigned int stack_consumption_for_func_call;
LLVMValueRef locals[1];
} AOTFuncContext;
@ -378,6 +381,11 @@ typedef struct AOTCompContext {
/* LLVM floating-point exception behavior metadata */
LLVMValueRef fp_exception_behavior;
/* a global array to store stack sizes */
LLVMTypeRef stack_sizes_type;
LLVMValueRef stack_sizes;
uint32 *jit_stack_sizes; /* for JIT */
/* LLVM data types */
AOTLLVMTypes basic_types;
LLVMTypeRef exec_env_type;
@ -406,6 +414,9 @@ typedef struct AOTCompContext {
* file for some architecture (such as arc) */
const char *external_asm_compiler;
const char *asm_compiler_flags;
const char *stack_usage_file;
char stack_usage_temp_file[64];
} AOTCompContext;
enum {
@ -509,8 +520,8 @@ void
aot_checked_addr_list_destroy(AOTFuncContext *func_ctx);
bool
aot_build_zero_function_ret(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
AOTFuncType *func_type);
aot_build_zero_function_ret(const AOTCompContext *comp_ctx,
AOTFuncContext *func_ctx, AOTFuncType *func_type);
LLVMValueRef
aot_call_llvm_intrinsic(const AOTCompContext *comp_ctx,
@ -554,6 +565,13 @@ bool
aot_set_cond_br_weights(AOTCompContext *comp_ctx, LLVMValueRef cond_br,
int32 weights_true, int32 weights_false);
bool
aot_target_precheck_can_use_musttail(const AOTCompContext *comp_ctx);
unsigned int
aot_estimate_stack_usage_for_function_call(const AOTCompContext *comp_ctx,
const AOTFuncType *callee_func_type);
#ifdef __cplusplus
} /* end of extern "C" */
#endif

View File

@ -6,6 +6,7 @@
#include <llvm-c/TargetMachine.h>
#include <llvm/ADT/None.h>
#include <llvm/ADT/Optional.h>
#include <llvm/IR/Instructions.h>
#if LLVM_VERSION_MAJOR >= 14
#include <llvm/MC/TargetRegistry.h>
#else
@ -112,3 +113,20 @@ LLVMCreateTargetMachineWithOpts(LLVMTargetRef ctarget, const char *triple,
opts, rm, cm, ol, jit);
return reinterpret_cast<LLVMTargetMachineRef>(targetmachine);
}
/* https://reviews.llvm.org/D153107 */
#if LLVM_VERSION_MAJOR < 17
using namespace llvm;
LLVMTailCallKind
LLVMGetTailCallKind(LLVMValueRef Call)
{
return (LLVMTailCallKind)unwrap<CallInst>(Call)->getTailCallKind();
}
void
LLVMSetTailCallKind(LLVMValueRef Call, LLVMTailCallKind kind)
{
unwrap<CallInst>(Call)->setTailCallKind((CallInst::TailCallKind)kind);
}
#endif

View File

@ -3,6 +3,7 @@
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/
#include <llvm/Config/llvm-config.h>
#include <llvm-c/TargetMachine.h>
LLVM_C_EXTERN_C_BEGIN
@ -14,4 +15,20 @@ LLVMCreateTargetMachineWithOpts(LLVMTargetRef ctarget, const char *triple,
LLVMCodeModel code_model,
bool EmitStackSizeSection,
const char *StackUsageOutput);
/* https://reviews.llvm.org/D153107 */
#if LLVM_VERSION_MAJOR < 17
typedef enum {
LLVMTailCallKindNone = 0,
LLVMTailCallKindTail = 1,
LLVMTailCallKindMustTail = 2,
LLVMTailCallKindNoTail = 3,
} LLVMTailCallKind;
LLVMTailCallKind
LLVMGetTailCallKind(LLVMValueRef CallInst);
void
LLVMSetTailCallKind(LLVMValueRef CallInst, LLVMTailCallKind kind);
#endif
LLVM_C_EXTERN_C_END

View File

@ -157,13 +157,29 @@ PartitionFunction(GlobalValueSet Requested)
const char *wrapper;
uint32 prefix_len = strlen(AOT_FUNC_PREFIX);
LOG_DEBUG("requested func %s", gvname);
/* Convert "aot_func#n_wrapper" to "aot_func#n" */
if (strstr(gvname, AOT_FUNC_PREFIX)
&& (wrapper = strstr(gvname + prefix_len, "_wrapper"))) {
if (strstr(gvname, AOT_FUNC_PREFIX)) {
char buf[16] = { 0 };
char func_name[64];
int group_stride, i, j;
int num;
/*
* if the jit wrapper (which has "_wrapper" suffix in
* the name) is requested, compile others in the group too.
* otherwise, only compile the requested one.
* (and possibly the correspondig wrapped function,
* which has AOT_FUNC_INTERNAL_PREFIX.)
*/
wrapper = strstr(gvname + prefix_len, "_wrapper");
if (wrapper != NULL) {
num = WASM_ORC_JIT_COMPILE_THREAD_NUM;
}
else {
num = 1;
wrapper = strchr(gvname + prefix_len, 0);
}
bh_assert(wrapper - (gvname + prefix_len) > 0);
/* Get AOT function index */
bh_memcpy_s(buf, (uint32)sizeof(buf), gvname + prefix_len,
@ -173,10 +189,18 @@ PartitionFunction(GlobalValueSet Requested)
group_stride = WASM_ORC_JIT_BACKEND_THREAD_NUM;
/* Compile some functions each time */
for (j = 0; j < WASM_ORC_JIT_COMPILE_THREAD_NUM; j++) {
for (j = 0; j < num; j++) {
Function *F1;
snprintf(func_name, sizeof(func_name), "%s%d",
AOT_FUNC_PREFIX, i + j * group_stride);
Function *F1 = M->getFunction(func_name);
F1 = M->getFunction(func_name);
if (F1) {
LOG_DEBUG("compile func %s", func_name);
GVsToAdd.push_back(cast<GlobalValue>(F1));
}
snprintf(func_name, sizeof(func_name), "%s%d",
AOT_FUNC_INTERNAL_PREFIX, i + j * group_stride);
F1 = M->getFunction(func_name);
if (F1) {
LOG_DEBUG("compile func %s", func_name);
GVsToAdd.push_back(cast<GlobalValue>(F1));

View File

@ -71,5 +71,10 @@ LLVMOrcLLLazyJITGetIRTransformLayer(LLVMOrcLLLazyJITRef J);
LLVMOrcObjectTransformLayerRef
LLVMOrcLLLazyJITGetObjTransformLayer(LLVMOrcLLLazyJITRef J);
void
LLVMOrcLLJITBuilderSetCompileFuncitonCreatorWithStackSizesCallback(
LLVMOrcLLLazyJITBuilderRef Builder,
void (*cb)(void *, const char *, size_t, size_t), void *cb_data);
LLVM_C_EXTERN_C_END
#endif

View File

@ -0,0 +1,139 @@
/*
* Copyright (C) 2023 Midokura Japan KK. All rights reserved.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#include "llvm/ExecutionEngine/Orc/CompileUtils.h"
#include "llvm/ExecutionEngine/Orc/LLJIT.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/SmallVectorMemoryBuffer.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "aot_orc_extra.h"
#include "bh_log.h"
typedef void (*cb_t)(void *, const char *, size_t, size_t);
class MyCompiler : public llvm::orc::IRCompileLayer::IRCompiler
{
public:
MyCompiler(llvm::orc::JITTargetMachineBuilder JTMB, cb_t cb, void *cb_data);
llvm::Expected<llvm::orc::SimpleCompiler::CompileResult> operator()(
llvm::Module &M) override;
private:
llvm::orc::JITTargetMachineBuilder JTMB;
cb_t cb;
void *cb_data;
};
MyCompiler::MyCompiler(llvm::orc::JITTargetMachineBuilder JTMB, cb_t cb,
void *cb_data)
: IRCompiler(llvm::orc::irManglingOptionsFromTargetOptions(JTMB.getOptions()))
, JTMB(std::move(JTMB))
, cb(cb)
, cb_data(cb_data)
{}
class PrintStackSizes : public llvm::MachineFunctionPass
{
public:
PrintStackSizes(cb_t cb, void *cb_data);
bool runOnMachineFunction(llvm::MachineFunction &MF) override;
static char ID;
private:
cb_t cb;
void *cb_data;
};
PrintStackSizes::PrintStackSizes(cb_t cb, void *cb_data)
: MachineFunctionPass(ID)
, cb(cb)
, cb_data(cb_data)
{}
char PrintStackSizes::ID = 0;
bool
PrintStackSizes::runOnMachineFunction(llvm::MachineFunction &MF)
{
auto name = MF.getName();
auto MFI = &MF.getFrameInfo();
size_t sz = MFI->getStackSize();
cb(cb_data, name.data(), name.size(), sz);
return false;
}
class MyPassManager : public llvm::legacy::PassManager
{
public:
void add(llvm::Pass *P) override;
};
void
MyPassManager::add(llvm::Pass *P)
{
// a hack to avoid having a copy of the whole addPassesToEmitMC.
// we want to add PrintStackSizes before FreeMachineFunctionPass.
if (P->getPassName() == "Free MachineFunction") {
return;
}
llvm::legacy::PassManager::add(P);
}
// a modified copy from llvm/lib/ExecutionEngine/Orc/CompileUtils.cpp
llvm::Expected<llvm::orc::SimpleCompiler::CompileResult>
MyCompiler::operator()(llvm::Module &M)
{
auto TM = cantFail(JTMB.createTargetMachine());
llvm::SmallVector<char, 0> ObjBufferSV;
{
llvm::raw_svector_ostream ObjStream(ObjBufferSV);
MyPassManager PM;
llvm::MCContext *Ctx;
if (TM->addPassesToEmitMC(PM, Ctx, ObjStream))
return llvm::make_error<llvm::StringError>(
"Target does not support MC emission",
llvm::inconvertibleErrorCode());
PM.add(new PrintStackSizes(cb, cb_data));
dynamic_cast<llvm::legacy::PassManager *>(&PM)->add(
llvm::createFreeMachineFunctionPass());
PM.run(M);
}
auto ObjBuffer = std::make_unique<llvm::SmallVectorMemoryBuffer>(
std::move(ObjBufferSV),
M.getModuleIdentifier() + "-jitted-objectbuffer",
/*RequiresNullTerminator=*/false);
return std::move(ObjBuffer);
}
DEFINE_SIMPLE_CONVERSION_FUNCTIONS(llvm::orc::LLLazyJITBuilder,
LLVMOrcLLLazyJITBuilderRef)
void
LLVMOrcLLJITBuilderSetCompileFuncitonCreatorWithStackSizesCallback(
LLVMOrcLLLazyJITBuilderRef Builder,
void (*cb)(void *, const char *, size_t, size_t), void *cb_data)
{
auto b = unwrap(Builder);
b->setCompileFunctionCreator(
[cb, cb_data](llvm::orc::JITTargetMachineBuilder JTMB)
-> llvm::Expected<
std::unique_ptr<llvm::orc::IRCompileLayer::IRCompiler>> {
return std::make_unique<MyCompiler>(
MyCompiler(std::move(JTMB), cb, cb_data));
});
}