diff --git a/core/iwasm/aot/aot_loader.c b/core/iwasm/aot/aot_loader.c index 3a5b6fc5c..0abafd9dd 100644 --- a/core/iwasm/aot/aot_loader.c +++ b/core/iwasm/aot/aot_loader.c @@ -597,6 +597,10 @@ load_target_info_section(const uint8 *buf, const uint8 *buf_end, return false; } +#if WASM_ENABLE_DUMP_CALL_STACK != 0 + module->feature_flags = target_info.feature_flags; +#endif + /* Finally, check feature flags */ return check_feature_flags(error_buf, error_buf_size, target_info.feature_flags); diff --git a/core/iwasm/aot/aot_runtime.c b/core/iwasm/aot/aot_runtime.c index bdb4ca911..013c761a0 100644 --- a/core/iwasm/aot/aot_runtime.c +++ b/core/iwasm/aot/aot_runtime.c @@ -4,6 +4,7 @@ */ #include "aot_runtime.h" +#include "../compilation/aot_stack_frame.h" #include "bh_log.h" #include "mem_alloc.h" #include "../common/wasm_runtime_common.h" @@ -72,6 +73,10 @@ bh_static_assert(offsetof(AOTFrame, sp) == sizeof(uintptr_t) * 5); bh_static_assert(offsetof(AOTFrame, frame_ref) == sizeof(uintptr_t) * 6); bh_static_assert(offsetof(AOTFrame, lp) == sizeof(uintptr_t) * 7); +bh_static_assert(offsetof(AOTTinyFrame, func_index) == sizeof(uint32) * 0); +bh_static_assert(offsetof(AOTTinyFrame, ip_offset) == sizeof(uint32) * 1); +bh_static_assert(sizeof(AOTTinyFrame) == sizeof(uint32) * 2); + static void set_error_buf(char *error_buf, uint32 error_buf_size, const char *string) { @@ -110,6 +115,55 @@ runtime_malloc(uint64 size, char *error_buf, uint32 error_buf_size) return mem; } +#if WASM_ENABLE_AOT_STACK_FRAME != 0 +static bool +is_tiny_frame(WASMExecEnv *exec_env) +{ + AOTModule *module = + (AOTModule *)((AOTModuleInstance *)exec_env->module_inst)->module; + + return module->feature_flags & WASM_FEATURE_TINY_STACK_FRAME; +} + +static bool +is_frame_per_function(WASMExecEnv *exec_env) +{ + AOTModule *module = + (AOTModule *)((AOTModuleInstance *)exec_env->module_inst)->module; + + return module->feature_flags & WASM_FEATURE_FRAME_PER_FUNCTION; +} + +static void * +get_top_frame(WASMExecEnv *exec_env) +{ + if (is_tiny_frame(exec_env)) { + return exec_env->wasm_stack.top > exec_env->wasm_stack.bottom + ? exec_env->wasm_stack.top - sizeof(AOTTinyFrame) + : NULL; + } + else { + return exec_env->cur_frame; + } +} + +static void * +get_prev_frame(WASMExecEnv *exec_env, void *cur_frame) +{ + bh_assert(cur_frame); + + if (is_tiny_frame(exec_env)) { + if ((uint8 *)cur_frame == exec_env->wasm_stack.bottom) { + return NULL; + } + return ((AOTTinyFrame *)cur_frame) - 1; + } + else { + return ((AOTFrame *)cur_frame)->prev_frame; + } +} +#endif + static bool check_global_init_expr(const AOTModule *module, uint32 global_index, char *error_buf, uint32 error_buf_size) @@ -2265,7 +2319,7 @@ aot_call_function(WASMExecEnv *exec_env, AOTFunctionInstance *function, uint32 ext_ret_cell = wasm_get_cell_num(ext_ret_types, ext_ret_count); uint64 size; #if WASM_ENABLE_AOT_STACK_FRAME != 0 - struct WASMInterpFrame *prev_frame = exec_env->cur_frame; + void *prev_frame = get_top_frame(exec_env); #endif /* Allocate memory all arguments */ @@ -2296,7 +2350,8 @@ aot_call_function(WASMExecEnv *exec_env, AOTFunctionInstance *function, } #if WASM_ENABLE_AOT_STACK_FRAME != 0 - if (!aot_alloc_frame(exec_env, function->func_index)) { + if (!is_frame_per_function(exec_env) + && !aot_alloc_frame(exec_env, function->func_index)) { if (argv1 != argv1_buf) wasm_runtime_free(argv1); return false; @@ -2324,7 +2379,7 @@ aot_call_function(WASMExecEnv *exec_env, AOTFunctionInstance *function, /* Free all frames allocated, note that some frames may be allocated in AOT code and haven't been freed if exception occurred */ - while (exec_env->cur_frame != prev_frame) + while (get_top_frame(exec_env) != prev_frame) aot_free_frame(exec_env); #endif if (!ret) { @@ -2367,9 +2422,12 @@ aot_call_function(WASMExecEnv *exec_env, AOTFunctionInstance *function, } else { #if WASM_ENABLE_AOT_STACK_FRAME != 0 - struct WASMInterpFrame *prev_frame = exec_env->cur_frame; - - if (!aot_alloc_frame(exec_env, function->func_index)) { + void *prev_frame = get_top_frame(exec_env); + /* Only allocate frame for frame-per-call mode; in the + frame-per-function mode the frame is allocated at the + beginning of the function. */ + if (!is_frame_per_function(exec_env) + && !aot_alloc_frame(exec_env, function->func_index)) { return false; } #endif @@ -2394,7 +2452,7 @@ aot_call_function(WASMExecEnv *exec_env, AOTFunctionInstance *function, /* Free all frames allocated, note that some frames may be allocated in AOT code and haven't been freed if exception occurred */ - while (exec_env->cur_frame != prev_frame) + while (get_top_frame(exec_env) != prev_frame) aot_free_frame(exec_env); #endif @@ -2880,7 +2938,7 @@ aot_invoke_native(WASMExecEnv *exec_env, uint32 func_idx, uint32 argc, goto fail; } #if WASM_ENABLE_AOT_STACK_FRAME != 0 - struct WASMInterpFrame *prev_frame = exec_env->cur_frame; + void *prev_frame = get_top_frame(exec_env); if (!aot_alloc_frame(exec_env, func_idx)) { goto fail; @@ -2894,7 +2952,7 @@ aot_invoke_native(WASMExecEnv *exec_env, uint32 func_idx, uint32 argc, /* Free all frames allocated, note that some frames may be allocated in AOT code and haven't been freed if exception occurred */ - while (exec_env->cur_frame != prev_frame) + while (get_top_frame(exec_env) != prev_frame) aot_free_frame(exec_env); #endif } @@ -3622,8 +3680,8 @@ get_func_name_from_index(const AOTModuleInstance *module_inst, WASM_ENABLE_PERF_PROFILING != 0 */ #if WASM_ENABLE_GC == 0 -bool -aot_alloc_frame(WASMExecEnv *exec_env, uint32 func_index) +static bool +aot_alloc_standard_frame(WASMExecEnv *exec_env, uint32 func_index) { AOTModuleInstance *module_inst = (AOTModuleInstance *)exec_env->module_inst; #if WASM_ENABLE_PERF_PROFILING != 0 @@ -3670,8 +3728,8 @@ aot_alloc_frame(WASMExecEnv *exec_env, uint32 func_index) #else /* else of WASM_ENABLE_GC == 0 */ -bool -aot_alloc_frame(WASMExecEnv *exec_env, uint32 func_index) +static bool +aot_alloc_standard_frame(WASMExecEnv *exec_env, uint32 func_index) { AOTModuleInstance *module_inst = (AOTModuleInstance *)exec_env->module_inst; AOTModule *module = (AOTModule *)module_inst->module; @@ -3727,11 +3785,48 @@ aot_alloc_frame(WASMExecEnv *exec_env, uint32 func_index) } #endif /* end of WASM_ENABLE_GC == 0 */ +static bool +aot_alloc_tiny_frame(WASMExecEnv *exec_env, uint32 func_index) +{ + AOTTinyFrame *new_frame = (AOTTinyFrame *)exec_env->wasm_stack.top; + + if ((uint8 *)new_frame > exec_env->wasm_stack.top_boundary) { + aot_set_exception((WASMModuleInstance *)exec_env->module_inst, + "wasm operand stack overflow"); + return false; + } + + new_frame->func_index = func_index; + exec_env->wasm_stack.top += sizeof(AOTTinyFrame); + return true; +} + +bool +aot_alloc_frame(WASMExecEnv *exec_env, uint32 func_index) +{ + AOTModule *module = + (AOTModule *)((AOTModuleInstance *)exec_env->module_inst)->module; + + if (is_frame_per_function(exec_env) + && func_index >= module->import_func_count) { + /* in frame per function mode the frame is allocated at + the beginning of each frame, so we only need to allocate + the frame for imported functions */ + return true; + } + if (is_tiny_frame(exec_env)) { + return aot_alloc_tiny_frame(exec_env, func_index); + } + else { + return aot_alloc_standard_frame(exec_env, func_index); + } +} + static inline void -aot_free_frame_internal(WASMExecEnv *exec_env) +aot_free_standard_frame(WASMExecEnv *exec_env) { AOTFrame *cur_frame = (AOTFrame *)exec_env->cur_frame; - AOTFrame *prev_frame = cur_frame->prev_frame; + AOTFrame *prev_frame = (AOTFrame *)cur_frame->prev_frame; #if WASM_ENABLE_PERF_PROFILING != 0 uint64 time_elapsed = @@ -3751,13 +3846,24 @@ aot_free_frame_internal(WASMExecEnv *exec_env) exec_env->cur_frame = (struct WASMInterpFrame *)prev_frame; } +static inline void +aot_free_tiny_frame(WASMExecEnv *exec_env) +{ + exec_env->wasm_stack.top = + get_prev_frame(exec_env, exec_env->wasm_stack.top); +} + void aot_free_frame(WASMExecEnv *exec_env) { - aot_free_frame_internal(exec_env); + if (is_tiny_frame(exec_env)) { + aot_free_tiny_frame(exec_env); + } + else { + aot_free_standard_frame(exec_env); + } } - void aot_frame_update_profile_info(WASMExecEnv *exec_env, bool alloc_frame) { @@ -3806,14 +3912,13 @@ aot_frame_update_profile_info(WASMExecEnv *exec_env, bool alloc_frame) bool aot_create_call_stack(struct WASMExecEnv *exec_env) { - AOTFrame *cur_frame = (AOTFrame *)exec_env->cur_frame, - *first_frame = cur_frame; AOTModuleInstance *module_inst = (AOTModuleInstance *)exec_env->module_inst; AOTModule *module = (AOTModule *)module_inst->module; uint32 n = 0; - while (cur_frame) { - cur_frame = cur_frame->prev_frame; + void *top_frame = get_top_frame(exec_env); + while (top_frame) { + top_frame = get_prev_frame(exec_env, top_frame); n++; } @@ -3823,28 +3928,46 @@ aot_create_call_stack(struct WASMExecEnv *exec_env) return false; } - cur_frame = first_frame; - while (cur_frame) { + top_frame = get_top_frame(exec_env); + while (n-- > 0) { + uint32 func_index, ip_offset; + uint32 *lp = NULL; +#if WASM_ENABLE_GC != 0 + uint32 *sp = NULL; + uint8 *frame_ref = NULL; +#endif + if (is_tiny_frame(exec_env)) { + AOTTinyFrame *frame = (AOTTinyFrame *)top_frame; + func_index = (uint32)frame->func_index; + ip_offset = (uint32)frame->ip_offset; + } + else { + AOTFrame *frame = (AOTFrame *)top_frame; + func_index = (uint32)frame->func_index; + ip_offset = (uint32)frame->ip_offset; + lp = frame->lp; +#if WASM_ENABLE_GC != 0 + sp = frame->sp; + frame_ref = frame->frame_ref; +#endif + } WASMCApiFrame frame = { 0 }; uint32 max_local_cell_num, max_stack_cell_num; uint32 all_cell_num, lp_size; frame.instance = module_inst; frame.module_offset = 0; - frame.func_index = (uint32)cur_frame->func_index; - frame.func_offset = (uint32)cur_frame->ip_offset; - frame.func_name_wp = get_func_name_from_index( - module_inst, (uint32)cur_frame->func_index); + frame.func_index = func_index; + frame.func_offset = ip_offset; + frame.func_name_wp = get_func_name_from_index(module_inst, func_index); - if (cur_frame->func_index >= module->import_func_count) { - uint32 aot_func_idx = - (uint32)(cur_frame->func_index - module->import_func_count); + if (func_index >= module->import_func_count) { + uint32 aot_func_idx = func_index - module->import_func_count; max_local_cell_num = module->max_local_cell_nums[aot_func_idx]; max_stack_cell_num = module->max_stack_cell_nums[aot_func_idx]; } else { - AOTFuncType *func_type = - module->import_funcs[cur_frame->func_index].func_type; + AOTFuncType *func_type = module->import_funcs[func_index].func_type; max_local_cell_num = func_type->param_cell_num > 2 ? func_type->param_cell_num : 2; max_stack_cell_num = 0; @@ -3856,12 +3979,12 @@ aot_create_call_stack(struct WASMExecEnv *exec_env) #else lp_size = align_uint(all_cell_num * 5, 4); #endif - if (lp_size > 0) { + if (lp_size > 0 && !is_tiny_frame(exec_env)) { if (!(frame.lp = wasm_runtime_malloc(lp_size))) { destroy_c_api_frames(module_inst->frames); return false; } - bh_memcpy_s(frame.lp, lp_size, cur_frame->lp, lp_size); + bh_memcpy_s(frame.lp, lp_size, lp, lp_size); #if WASM_ENABLE_GC != 0 uint32 local_ref_flags_cell_num = @@ -3869,9 +3992,8 @@ aot_create_call_stack(struct WASMExecEnv *exec_env) .local_ref_flag_cell_num; uint8 *local_ref_flags = module->func_local_ref_flags[frame.func_index].local_ref_flags; - frame.sp = frame.lp + (cur_frame->sp - cur_frame->lp); - frame.frame_ref = (uint8 *)frame.lp - + (cur_frame->frame_ref - (uint8 *)cur_frame->lp); + frame.sp = frame.lp + (sp - lp); + frame.frame_ref = (uint8 *)frame.lp + (frame_ref - (uint8 *)lp); /* copy local ref flags from AOT module */ bh_memcpy_s(frame.frame_ref, local_ref_flags_cell_num, local_ref_flags, lp_size); @@ -3885,7 +4007,7 @@ aot_create_call_stack(struct WASMExecEnv *exec_env) return false; } - cur_frame = cur_frame->prev_frame; + top_frame = get_prev_frame(exec_env, top_frame); } return true; diff --git a/core/iwasm/aot/aot_runtime.h b/core/iwasm/aot/aot_runtime.h index 76c784512..f6bff00bf 100644 --- a/core/iwasm/aot/aot_runtime.h +++ b/core/iwasm/aot/aot_runtime.h @@ -25,12 +25,15 @@ extern "C" { #define WASM_FEATURE_REF_TYPES (1 << 3) #define WASM_FEATURE_GARBAGE_COLLECTION (1 << 4) #define WASM_FEATURE_EXCEPTION_HANDLING (1 << 5) -#define WASM_FEATURE_MEMORY64 (1 << 6) +#define WASM_FEATURE_TINY_STACK_FRAME (1 << 6) #define WASM_FEATURE_MULTI_MEMORY (1 << 7) #define WASM_FEATURE_DYNAMIC_LINKING (1 << 8) #define WASM_FEATURE_COMPONENT_MODEL (1 << 9) #define WASM_FEATURE_RELAXED_SIMD (1 << 10) #define WASM_FEATURE_FLEXIBLE_VECTORS (1 << 11) +/* Stack frame is created at the beginning of the function, + * and not at the beginning of each function call */ +#define WASM_FEATURE_FRAME_PER_FUNCTION (1 << 12) typedef enum AOTSectionType { AOT_SECTION_TYPE_TARGET_INFO = 0, @@ -326,6 +329,10 @@ typedef struct AOTModule { /* `.data` and `.text` sections merged into one large mmaped section */ uint8 *merged_data_text_sections; uint32 merged_data_text_sections_size; + +#if WASM_ENABLE_AOT_STACK_FRAME != 0 + uint32 feature_flags; +#endif } AOTModule; #define AOTMemoryInstance WASMMemoryInstance diff --git a/core/iwasm/compilation/aot_compiler.c b/core/iwasm/compilation/aot_compiler.c index 78b7da88d..e56004972 100644 --- a/core/iwasm/compilation/aot_compiler.c +++ b/core/iwasm/compilation/aot_compiler.c @@ -16,6 +16,7 @@ #include "aot_emit_parametric.h" #include "aot_emit_table.h" #include "aot_emit_gc.h" +#include "aot_stack_frame_comp.h" #include "simd/simd_access_lanes.h" #include "simd/simd_bitmask_extracts.h" #include "simd/simd_bit_shifts.h" @@ -253,6 +254,13 @@ store_value(AOTCompContext *comp_ctx, LLVMValueRef value, uint8 value_type, return true; } +void +aot_call_stack_features_init_default(AOTCallStackFeatures *features) +{ + memset(features, 1, sizeof(AOTCallStackFeatures)); + features->frame_per_function = false; +} + bool aot_frame_store_value(AOTCompContext *comp_ctx, LLVMValueRef value, uint8 value_type, LLVMValueRef cur_frame, uint32 offset) @@ -573,9 +581,10 @@ aot_gen_commit_values(AOTCompFrame *frame) return true; } -bool -aot_gen_commit_ip(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, - LLVMValueRef ip_value, bool is_64bit) +static bool +aot_standard_frame_gen_commit_ip(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + LLVMValueRef ip_value, bool is_64bit) { LLVMValueRef cur_frame = func_ctx->cur_frame; LLVMValueRef value_offset, value_addr, value_ptr; @@ -613,6 +622,23 @@ aot_gen_commit_ip(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, return true; } +bool +aot_gen_commit_ip(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, + LLVMValueRef ip_value, bool is_64bit) +{ + switch (comp_ctx->aux_stack_frame_type) { + case AOT_STACK_FRAME_TYPE_STANDARD: + return aot_standard_frame_gen_commit_ip(comp_ctx, func_ctx, + ip_value, is_64bit); + case AOT_STACK_FRAME_TYPE_TINY: + return aot_tiny_frame_gen_commit_ip(comp_ctx, func_ctx, ip_value); + default: + aot_set_last_error( + "unsupported mode when generating commit_ip code"); + return false; + } +} + bool aot_gen_commit_sp_ip(AOTCompFrame *frame, bool commit_sp, bool commit_ip) { @@ -962,6 +988,7 @@ static bool aot_compile_func(AOTCompContext *comp_ctx, uint32 func_index) { AOTFuncContext *func_ctx = comp_ctx->func_ctxes[func_index]; + LLVMValueRef func_index_ref; uint8 *frame_ip = func_ctx->aot_func->code, opcode, *p_f32, *p_f64; uint8 *frame_ip_end = frame_ip + func_ctx->aot_func->code_size; uint8 *param_types = NULL; @@ -984,16 +1011,27 @@ aot_compile_func(AOTCompContext *comp_ctx, uint32 func_index) LLVMMetadataRef location; #endif - if (comp_ctx->enable_aux_stack_frame) { + /* Start to translate the opcodes */ + LLVMPositionBuilderAtEnd( + comp_ctx->builder, + func_ctx->block_stack.block_list_head->llvm_entry_block); + + if (comp_ctx->aux_stack_frame_type + && comp_ctx->call_stack_features.frame_per_function) { + INT_CONST(func_index_ref, + func_index + comp_ctx->comp_data->import_func_count, I32_TYPE, + true); + if (!aot_alloc_frame_per_function_frame_for_aot_func(comp_ctx, func_ctx, + func_index_ref)) { + return false; + } + } + if (comp_ctx->aux_stack_frame_type) { if (!init_comp_frame(comp_ctx, func_ctx, func_index)) { return false; } } - /* Start to translate the opcodes */ - LLVMPositionBuilderAtEnd( - comp_ctx->builder, - func_ctx->block_stack.block_list_head->llvm_entry_block); while (frame_ip < frame_ip_end) { opcode = *frame_ip++; diff --git a/core/iwasm/compilation/aot_compiler.h b/core/iwasm/compilation/aot_compiler.h index d3d55b02b..895d2416b 100644 --- a/core/iwasm/compilation/aot_compiler.h +++ b/core/iwasm/compilation/aot_compiler.h @@ -661,6 +661,15 @@ set_local_gc_ref(AOTCompFrame *frame, int n, LLVMValueRef value, uint8 ref_type) #define F64_CONST(v) LLVMConstReal(F64_TYPE, v) #define I8_CONST(v) LLVMConstInt(INT8_TYPE, v, true) +#define INT_CONST(variable, value, type, is_signed) \ + do { \ + variable = LLVMConstInt(type, value, is_signed); \ + if (!variable) { \ + aot_set_last_error("llvm build const failed"); \ + return false; \ + } \ + } while (0) + #define LLVM_CONST(name) (comp_ctx->llvm_consts.name) #define I1_ZERO LLVM_CONST(i1_zero) #define I1_ONE LLVM_CONST(i1_one) diff --git a/core/iwasm/compilation/aot_emit_aot_file.c b/core/iwasm/compilation/aot_emit_aot_file.c index e05f83b09..20f29057c 100644 --- a/core/iwasm/compilation/aot_emit_aot_file.c +++ b/core/iwasm/compilation/aot_emit_aot_file.c @@ -4433,6 +4433,12 @@ aot_obj_data_create(AOTCompContext *comp_ctx) if (comp_ctx->enable_gc) { obj_data->target_info.feature_flags |= WASM_FEATURE_GARBAGE_COLLECTION; } + if (comp_ctx->aux_stack_frame_type == AOT_STACK_FRAME_TYPE_TINY) { + obj_data->target_info.feature_flags |= WASM_FEATURE_TINY_STACK_FRAME; + } + if (comp_ctx->call_stack_features.frame_per_function) { + obj_data->target_info.feature_flags |= WASM_FEATURE_FRAME_PER_FUNCTION; + } bh_print_time("Begin to resolve object file info"); diff --git a/core/iwasm/compilation/aot_emit_control.c b/core/iwasm/compilation/aot_emit_control.c index 7d73d8d90..945f63952 100644 --- a/core/iwasm/compilation/aot_emit_control.c +++ b/core/iwasm/compilation/aot_emit_control.c @@ -6,6 +6,7 @@ #include "aot_emit_control.h" #include "aot_compiler.h" #include "aot_emit_exception.h" +#include "aot_stack_frame_comp.h" #if WASM_ENABLE_GC != 0 #include "aot_emit_gc.h" #endif @@ -38,13 +39,24 @@ format_block_name(char *name, uint32 name_size, uint32 block_index, snprintf(name, name_size, "%s", "func_end"); } -#define CREATE_BLOCK(new_llvm_block, name) \ - do { \ - if (!(new_llvm_block = LLVMAppendBasicBlockInContext( \ - comp_ctx->context, func_ctx->func, name))) { \ - aot_set_last_error("add LLVM basic block failed."); \ - goto fail; \ - } \ +#define CREATE_BLOCK(new_llvm_block, name) \ + do { \ + if (!(new_llvm_block = LLVMAppendBasicBlockInContext( \ + comp_ctx->context, func_ctx->func, name))) { \ + aot_set_last_error("add LLVM basic block failed."); \ + goto fail; \ + } \ + if (!strcmp(name, "func_end") && comp_ctx->aux_stack_frame_type \ + && comp_ctx->call_stack_features.frame_per_function) { \ + LLVMBasicBlockRef cur_block = \ + LLVMGetInsertBlock(comp_ctx->builder); \ + SET_BUILDER_POS(new_llvm_block); \ + if (!aot_free_frame_per_function_frame_for_aot_func(comp_ctx, \ + func_ctx)) { \ + goto fail; \ + } \ + SET_BUILDER_POS(cur_block); \ + } \ } while (0) #define CURR_BLOCK() LLVMGetInsertBlock(comp_ctx->builder) @@ -93,6 +105,11 @@ format_block_name(char *name, uint32 name_size, uint32 block_index, goto fail; \ } \ SET_BUILDER_POS(block->llvm_end_block); \ + LLVMValueRef first_instr = \ + get_first_non_phi(block->llvm_end_block); \ + if (first_instr) { \ + LLVMPositionBuilderBefore(comp_ctx->builder, first_instr); \ + } \ for (_i = 0; _i < block->result_count; _i++) { \ if (!(block->result_phis[_i] = LLVMBuildPhi( \ comp_ctx->builder, \ @@ -158,6 +175,18 @@ get_target_block(AOTFuncContext *func_ctx, uint32 br_depth) return block; } +LLVMValueRef +get_first_non_phi(LLVMBasicBlockRef block) +{ + LLVMValueRef instr = LLVMGetFirstInstruction(block); + + while (instr && LLVMIsAPHINode(instr)) { + instr = LLVMGetNextInstruction(instr); + } + + return instr; +} + static void clear_frame_locals(AOTCompFrame *aot_frame) { @@ -1361,6 +1390,13 @@ aot_compile_op_return(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, (*p_frame_ip - 1) - comp_ctx->comp_data->wasm_module->buf_code); #endif + if (comp_ctx->aux_stack_frame_type + && comp_ctx->call_stack_features.frame_per_function + && !aot_free_frame_per_function_frame_for_aot_func(comp_ctx, + func_ctx)) { + return false; + } + if (block_func->result_count) { /* Store extra result values to function parameters */ for (i = 0; i < block_func->result_count - 1; i++) { diff --git a/core/iwasm/compilation/aot_emit_function.c b/core/iwasm/compilation/aot_emit_function.c index 1d565b6c0..fbef02e20 100644 --- a/core/iwasm/compilation/aot_emit_function.c +++ b/core/iwasm/compilation/aot_emit_function.c @@ -7,6 +7,7 @@ #include "aot_emit_exception.h" #include "aot_emit_control.h" #include "aot_emit_table.h" +#include "aot_stack_frame_comp.h" #include "../aot/aot_runtime.h" #if WASM_ENABLE_GC != 0 #include "aot_emit_gc.h" @@ -1403,6 +1404,7 @@ aot_compile_op_call(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, LLVMValueRef *param_values = NULL, value_ret = NULL, func; LLVMValueRef import_func_idx, res; LLVMValueRef ext_ret, ext_ret_ptr, ext_ret_idx; + LLVMValueRef func_idx_ref; int32 i, j = 0, param_count, result_count, ext_ret_count; uint64 total_size; uint8 wasm_ret_type; @@ -1447,12 +1449,28 @@ aot_compile_op_call(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, return false; } - if (comp_ctx->enable_aux_stack_frame) { #if WASM_ENABLE_AOT_STACK_FRAME != 0 - if (!alloc_frame_for_aot_func(comp_ctx, func_ctx, func_idx)) - return false; -#endif + if (comp_ctx->aux_stack_frame_type) { + if (func_idx < import_func_count + && comp_ctx->call_stack_features.frame_per_function) { + INT_CONST(func_idx_ref, func_idx, I32_TYPE, true); + if (!aot_alloc_frame_per_function_frame_for_aot_func( + comp_ctx, func_ctx, func_idx_ref)) { + return false; + } + } + else if (!comp_ctx->call_stack_features.frame_per_function) { + if (comp_ctx->aux_stack_frame_type + != AOT_STACK_FRAME_TYPE_STANDARD) { + aot_set_last_error("unsupported mode"); + return false; + } + if (!alloc_frame_for_aot_func(comp_ctx, func_ctx, func_idx)) { + return false; + } + } } +#endif /* Get param cell number */ param_cell_num = func_type->param_cell_num; @@ -1522,7 +1540,7 @@ aot_compile_op_call(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, } if (func_idx < import_func_count) { - if (comp_ctx->enable_aux_stack_frame + if (comp_ctx->aux_stack_frame_type == AOT_STACK_FRAME_TYPE_STANDARD && !commit_params_to_frame_of_import_func( comp_ctx, func_ctx, func_type, param_values + 1)) { goto fail; @@ -1813,12 +1831,26 @@ aot_compile_op_call(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, } } - if (comp_ctx->enable_aux_stack_frame) { #if WASM_ENABLE_AOT_STACK_FRAME != 0 - if (!free_frame_for_aot_func(comp_ctx, func_ctx)) - goto fail; -#endif + if (comp_ctx->aux_stack_frame_type) { + if (func_idx < import_func_count + && comp_ctx->call_stack_features.frame_per_function) { + if (!aot_free_frame_per_function_frame_for_aot_func(comp_ctx, + func_ctx)) { + goto fail; + } + } + else if (!comp_ctx->call_stack_features.frame_per_function) { + if (comp_ctx->aux_stack_frame_type + != AOT_STACK_FRAME_TYPE_STANDARD) { + aot_set_last_error("unsupported mode"); + } + if (!free_frame_for_aot_func(comp_ctx, func_ctx)) { + goto fail; + } + } } +#endif /* Insert suspend check point */ if (comp_ctx->enable_thread_mgr) { @@ -2439,7 +2471,8 @@ aot_compile_op_call_indirect(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, goto fail; } - if (comp_ctx->enable_aux_stack_frame) { + if (comp_ctx->aux_stack_frame_type + && !comp_ctx->call_stack_features.frame_per_function) { #if WASM_ENABLE_AOT_STACK_FRAME != 0 /* TODO: use current frame instead of allocating new frame for WASM_OP_RETURN_CALL_INDIRECT */ @@ -2508,7 +2541,13 @@ aot_compile_op_call_indirect(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, /* Translate call import block */ LLVMPositionBuilderAtEnd(comp_ctx->builder, block_call_import); - if (comp_ctx->enable_aux_stack_frame + if (comp_ctx->aot_frame && comp_ctx->call_stack_features.frame_per_function + && !aot_alloc_frame_per_function_frame_for_aot_func(comp_ctx, func_ctx, + func_idx)) { + goto fail; + } + + if (comp_ctx->aux_stack_frame_type == AOT_STACK_FRAME_TYPE_STANDARD && !commit_params_to_frame_of_import_func(comp_ctx, func_ctx, func_type, param_values + 1)) { goto fail; @@ -2545,6 +2584,12 @@ aot_compile_op_call_indirect(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, && !check_call_return(comp_ctx, func_ctx, res)) goto fail; + if (comp_ctx->aot_frame && comp_ctx->call_stack_features.frame_per_function + && !aot_free_frame_per_function_frame_for_aot_func(comp_ctx, + func_ctx)) { + goto fail; + } + block_curr = LLVMGetInsertBlock(comp_ctx->builder); for (i = 0; i < func_result_count; i++) { LLVMAddIncoming(result_phis[i], &value_rets[i], &block_curr, 1); @@ -2629,7 +2674,8 @@ aot_compile_op_call_indirect(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, PUSH(result_phis[i], func_type->types[func_param_count + i]); } - if (comp_ctx->enable_aux_stack_frame) { + if (comp_ctx->aux_stack_frame_type + && !comp_ctx->call_stack_features.frame_per_function) { #if WASM_ENABLE_AOT_STACK_FRAME != 0 if (!free_frame_for_aot_func(comp_ctx, func_ctx)) goto fail; @@ -2936,7 +2982,8 @@ aot_compile_op_call_ref(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, goto fail; } - if (comp_ctx->enable_aux_stack_frame) { + if (comp_ctx->aux_stack_frame_type + && !comp_ctx->call_stack_features.frame_per_function) { #if WASM_ENABLE_AOT_STACK_FRAME != 0 /* TODO: use current frame instead of allocating new frame for WASM_OP_RETURN_CALL_REF */ @@ -3005,7 +3052,7 @@ aot_compile_op_call_ref(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, /* Translate call import block */ LLVMPositionBuilderAtEnd(comp_ctx->builder, block_call_import); - if (comp_ctx->enable_aux_stack_frame + if (comp_ctx->aux_stack_frame_type == AOT_STACK_FRAME_TYPE_STANDARD && !commit_params_to_frame_of_import_func(comp_ctx, func_ctx, func_type, param_values + 1)) { goto fail; @@ -3133,7 +3180,8 @@ aot_compile_op_call_ref(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, PUSH(result_phis[i], func_type->types[func_param_count + i]); } - if (comp_ctx->enable_aux_stack_frame) { + if (comp_ctx->aux_stack_frame_type + && !comp_ctx->call_stack_features.frame_per_function) { #if WASM_ENABLE_AOT_STACK_FRAME != 0 if (!free_frame_for_aot_func(comp_ctx, func_ctx)) goto fail; diff --git a/core/iwasm/compilation/aot_llvm.c b/core/iwasm/compilation/aot_llvm.c index 3346086a9..820a55e96 100644 --- a/core/iwasm/compilation/aot_llvm.c +++ b/core/iwasm/compilation/aot_llvm.c @@ -1771,7 +1771,7 @@ aot_create_func_context(const AOTCompData *comp_data, AOTCompContext *comp_ctx, goto fail; } - if (comp_ctx->enable_aux_stack_frame + if (comp_ctx->aux_stack_frame_type && !create_aux_stack_frame(comp_ctx, func_ctx)) { goto fail; } @@ -2577,9 +2577,7 @@ aot_create_comp_context(const AOTCompData *comp_data, aot_comp_option_t option) if (option->enable_ref_types) comp_ctx->enable_ref_types = true; - if (option->enable_aux_stack_frame) - comp_ctx->enable_aux_stack_frame = true; - + comp_ctx->aux_stack_frame_type = option->aux_stack_frame_type; comp_ctx->call_stack_features = option->call_stack_features; if (option->enable_perf_profiling) diff --git a/core/iwasm/compilation/aot_llvm.h b/core/iwasm/compilation/aot_llvm.h index 65debbaa3..43212e502 100644 --- a/core/iwasm/compilation/aot_llvm.h +++ b/core/iwasm/compilation/aot_llvm.h @@ -410,7 +410,7 @@ typedef struct AOTCompContext { bool enable_aux_stack_check; /* Generate auxiliary stack frame */ - bool enable_aux_stack_frame; + AOTStackFrameType aux_stack_frame_type; /* Auxiliary call stack features */ AOTCallStackFeatures call_stack_features; diff --git a/core/iwasm/compilation/aot_stack_frame.h b/core/iwasm/compilation/aot_stack_frame.h new file mode 100644 index 000000000..6155ee6e9 --- /dev/null +++ b/core/iwasm/compilation/aot_stack_frame.h @@ -0,0 +1,27 @@ +/* + * Copyright (C) 2024 Amazon Inc. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + */ + +#ifndef _AOT_STACK_FRAME_H_ +#define _AOT_STACK_FRAME_H_ + +#include "platform_common.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + /* The non-imported function index of current function */ + uint32 func_index; + + /* Instruction pointer: offset to the bytecode array */ + uint32 ip_offset; +} AOTTinyFrame; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/core/iwasm/compilation/aot_stack_frame_comp.c b/core/iwasm/compilation/aot_stack_frame_comp.c new file mode 100644 index 000000000..342dfe806 --- /dev/null +++ b/core/iwasm/compilation/aot_stack_frame_comp.c @@ -0,0 +1,148 @@ +/* + * Copyright (C) 2024 Amazon Inc. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + */ +#include "aot_stack_frame_comp.h" +#include "aot_emit_exception.h" + +#define ADD_IN_BOUNDS_GEP(variable, type, pointer, indices, num_indices) \ + do { \ + if (!(variable = \ + LLVMBuildInBoundsGEP2(comp_ctx->builder, type, pointer, \ + indices, num_indices, #variable))) { \ + aot_set_last_error("llvm build in bounds gep failed"); \ + return false; \ + } \ + } while (0) + +#define ADD_STORE(value, pointer) \ + do { \ + if (!LLVMBuildStore(comp_ctx->builder, value, pointer)) { \ + aot_set_last_error("llvm build store failed"); \ + return false; \ + } \ + } while (0) + +#define ADD_LOAD(value, type, pointer) \ + do { \ + if (!(value = \ + LLVMBuildLoad2(comp_ctx->builder, type, pointer, #value))) { \ + aot_set_last_error("llvm build load failed"); \ + return false; \ + } \ + } while (0) + +static bool +aot_alloc_tiny_frame_for_aot_func(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + LLVMValueRef func_index) +{ + LLVMValueRef wasm_stack_top_ptr = func_ctx->wasm_stack_top_ptr, + wasm_stack_top_bound = func_ctx->wasm_stack_top_bound, + wasm_stack_top, cmp; + LLVMBasicBlockRef check_wasm_stack_succ; + LLVMValueRef offset; + + ADD_LOAD(wasm_stack_top, INT8_PTR_TYPE, wasm_stack_top_ptr); + + if (comp_ctx->call_stack_features.bounds_checks) { + if (!(check_wasm_stack_succ = LLVMAppendBasicBlockInContext( + comp_ctx->context, func_ctx->func, + "check_wasm_stack_succ"))) { + aot_set_last_error("llvm add basic block failed."); + return false; + } + + LLVMMoveBasicBlockAfter(check_wasm_stack_succ, + LLVMGetInsertBlock(comp_ctx->builder)); + + if (!(cmp = LLVMBuildICmp(comp_ctx->builder, LLVMIntUGE, wasm_stack_top, + wasm_stack_top_bound, "cmp"))) { + aot_set_last_error("llvm build icmp failed"); + return false; + } + + if (!(aot_emit_exception(comp_ctx, func_ctx, + EXCE_OPERAND_STACK_OVERFLOW, true, cmp, + check_wasm_stack_succ))) { + return false; + } + } + + /* Save the func_idx on the top of the stack */ + ADD_STORE(func_index, wasm_stack_top); + + /* increment the stack pointer */ + INT_CONST(offset, sizeof(AOTTinyFrame), I32_TYPE, true); + ADD_IN_BOUNDS_GEP(wasm_stack_top, INT8_TYPE, wasm_stack_top, &offset, 1); + ADD_STORE(wasm_stack_top, wasm_stack_top_ptr); + + return true; +} + +static bool +aot_free_tiny_frame_for_aot_func(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx) +{ + LLVMValueRef wasm_stack_top_ptr = func_ctx->wasm_stack_top_ptr, + wasm_stack_top; + LLVMValueRef offset; + + ADD_LOAD(wasm_stack_top, INT8_PTR_TYPE, wasm_stack_top_ptr); + + INT_CONST(offset, -sizeof(AOTTinyFrame), + comp_ctx->pointer_size == 8 ? I64_TYPE : I32_TYPE, true); + ADD_IN_BOUNDS_GEP(wasm_stack_top, INT8_TYPE, wasm_stack_top, &offset, 1); + ADD_STORE(wasm_stack_top, wasm_stack_top_ptr); + + return true; +} + +bool +aot_tiny_frame_gen_commit_ip(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, + LLVMValueRef ip_value) +{ + LLVMValueRef wasm_stack_top_ptr = func_ctx->wasm_stack_top_ptr, + wasm_stack_top; + LLVMValueRef offset, ip_addr; + + bh_assert(ip_value); + + ADD_LOAD(wasm_stack_top, INT8_PTR_TYPE, wasm_stack_top_ptr); + + INT_CONST(offset, -4, comp_ctx->pointer_size == 8 ? I64_TYPE : I32_TYPE, + true); + ADD_IN_BOUNDS_GEP(ip_addr, INT8_TYPE, wasm_stack_top, &offset, 1); + + ADD_STORE(ip_value, ip_addr); + + return true; +} + +bool +aot_alloc_frame_per_function_frame_for_aot_func(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + LLVMValueRef func_index) +{ + switch (comp_ctx->aux_stack_frame_type) { + case AOT_STACK_FRAME_TYPE_TINY: + return aot_alloc_tiny_frame_for_aot_func(comp_ctx, func_ctx, + func_index); + default: + aot_set_last_error("unsupported mode"); + return false; + } +} + +bool +aot_free_frame_per_function_frame_for_aot_func(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx) +{ + switch (comp_ctx->aux_stack_frame_type) { + case AOT_STACK_FRAME_TYPE_TINY: + return aot_free_tiny_frame_for_aot_func(comp_ctx, func_ctx); + default: + aot_set_last_error("unsupported mode"); + return false; + } +} diff --git a/core/iwasm/compilation/aot_stack_frame_comp.h b/core/iwasm/compilation/aot_stack_frame_comp.h new file mode 100644 index 000000000..7980b8c08 --- /dev/null +++ b/core/iwasm/compilation/aot_stack_frame_comp.h @@ -0,0 +1,33 @@ +/* + * Copyright (C) 2024 Amazon Inc. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + */ + +#ifndef _AOT_STACK_FRAME_COMP_H_ +#define _AOT_STACK_FRAME_COMP_H_ + +#include "aot_stack_frame.h" +#include "aot_compiler.h" + +#ifdef __cplusplus +extern "C" { +#endif + +bool +aot_alloc_frame_per_function_frame_for_aot_func(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + LLVMValueRef func_index); + +bool +aot_free_frame_per_function_frame_for_aot_func(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx); + +bool +aot_tiny_frame_gen_commit_ip(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, + LLVMValueRef ip_value); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/core/iwasm/include/aot_comp_option.h b/core/iwasm/include/aot_comp_option.h index 4ab2e6ab6..67ec81cd3 100644 --- a/core/iwasm/include/aot_comp_option.h +++ b/core/iwasm/include/aot_comp_option.h @@ -21,8 +21,24 @@ typedef struct { /* Enables or disables parameters, locals and stack operands. */ bool values; + + /* If enabled, stack frame is generated at the beginning of each + * function (frame-per-function mode). Otherwise, stack frame is + * generated before each call of a function (frame-per-call mode). */ + bool frame_per_function; } AOTCallStackFeatures; +void +aot_call_stack_features_init_default(AOTCallStackFeatures *features); + +typedef enum { + AOT_STACK_FRAME_OFF = 0, + /* Use a small stack frame data structure (AOTTinyFrame) */ + AOT_STACK_FRAME_TYPE_TINY, + /* Use a regular stack frame data structure (AOTFrame) */ + AOT_STACK_FRAME_TYPE_STANDARD, +} AOTStackFrameType; + typedef struct AOTCompOption { bool is_jit_mode; bool is_indirect_mode; @@ -38,7 +54,7 @@ typedef struct AOTCompOption { bool enable_ref_types; bool enable_gc; bool enable_aux_stack_check; - bool enable_aux_stack_frame; + AOTStackFrameType aux_stack_frame_type; AOTCallStackFeatures call_stack_features; bool enable_perf_profiling; bool enable_memory_profiling; diff --git a/core/iwasm/interpreter/wasm_loader.c b/core/iwasm/interpreter/wasm_loader.c index 092e0d152..ed85bb789 100644 --- a/core/iwasm/interpreter/wasm_loader.c +++ b/core/iwasm/interpreter/wasm_loader.c @@ -5406,8 +5406,8 @@ init_llvm_jit_functions_stage1(WASMModule *module, char *error_buf, option.enable_aux_stack_check = true; #if WASM_ENABLE_PERF_PROFILING != 0 || WASM_ENABLE_DUMP_CALL_STACK != 0 \ || WASM_ENABLE_AOT_STACK_FRAME != 0 - option.enable_aux_stack_frame = true; - memset(&option.call_stack_features, 1, sizeof(AOTCallStackFeatures)); + option.aux_stack_frame_type = AOT_STACK_FRAME_TYPE_STANDARD; + aot_call_stack_features_init_default(&option.call_stack_features); #endif #if WASM_ENABLE_PERF_PROFILING != 0 option.enable_perf_profiling = true; diff --git a/core/iwasm/interpreter/wasm_mini_loader.c b/core/iwasm/interpreter/wasm_mini_loader.c index a21f4490f..34f4a1831 100644 --- a/core/iwasm/interpreter/wasm_mini_loader.c +++ b/core/iwasm/interpreter/wasm_mini_loader.c @@ -2148,8 +2148,8 @@ init_llvm_jit_functions_stage1(WASMModule *module, char *error_buf, option.enable_aux_stack_check = true; #if WASM_ENABLE_PERF_PROFILING != 0 || WASM_ENABLE_DUMP_CALL_STACK != 0 \ || WASM_ENABLE_AOT_STACK_FRAME != 0 - option.enable_aux_stack_frame = true; - memset(&option.call_stack_features, 1, sizeof(AOTCallStackFeatures)); + option.aux_stack_frame_type = AOT_STACK_FRAME_TYPE_STANDARD; + aot_call_stack_features_init_default(&option.call_stack_features); #endif #if WASM_ENABLE_PERF_PROFILING != 0 option.enable_perf_profiling = true; diff --git a/wamr-compiler/main.c b/wamr-compiler/main.c index 3c7ef1f4d..53c75c84e 100644 --- a/wamr-compiler/main.c +++ b/wamr-compiler/main.c @@ -307,6 +307,13 @@ finish: return ret; } +static bool +can_enable_tiny_frame(const AOTCompOption *opt) +{ + return !opt->call_stack_features.values && !opt->enable_gc + && !opt->enable_perf_profiling; +} + static uint32 resolve_segue_flags(char *str_flags) { @@ -403,9 +410,7 @@ main(int argc, char *argv[]) option.enable_bulk_memory = true; option.enable_ref_types = true; option.enable_gc = false; - - /* Set all the features to true by default */ - memset(&option.call_stack_features, 1, sizeof(AOTCallStackFeatures)); + aot_call_stack_features_init_default(&option.call_stack_features); /* Process options */ for (argc--, argv++; argc > 0 && argv[0][0] == '-'; argc--, argv++) { @@ -519,7 +524,7 @@ main(int argc, char *argv[]) option.enable_aux_stack_check = false; } else if (!strcmp(argv[0], "--enable-dump-call-stack")) { - option.enable_aux_stack_frame = true; + option.aux_stack_frame_type = AOT_STACK_FRAME_TYPE_STANDARD; } else if (!strncmp(argv[0], "--call-stack-features=", 22)) { /* Reset all the features, only enable the user-defined ones */ @@ -535,7 +540,7 @@ main(int argc, char *argv[]) } } else if (!strcmp(argv[0], "--enable-perf-profiling")) { - option.enable_aux_stack_frame = true; + option.aux_stack_frame_type = AOT_STACK_FRAME_TYPE_STANDARD; option.enable_perf_profiling = true; } else if (!strcmp(argv[0], "--enable-memory-profiling")) { @@ -550,7 +555,7 @@ main(int argc, char *argv[]) option.is_indirect_mode = true; } else if (!strcmp(argv[0], "--enable-gc")) { - option.enable_aux_stack_frame = true; + option.aux_stack_frame_type = AOT_STACK_FRAME_TYPE_STANDARD; option.enable_gc = true; } else if (!strcmp(argv[0], "--disable-llvm-intrinsics")) { @@ -652,6 +657,14 @@ main(int argc, char *argv[]) if (!use_dummy_wasm && (argc == 0 || !out_file_name)) PRINT_HELP_AND_EXIT(); + if (option.aux_stack_frame_type == AOT_STACK_FRAME_TYPE_STANDARD + && can_enable_tiny_frame(&option)) { + LOG_VERBOSE("Use tiny frame mode for stack frames"); + option.aux_stack_frame_type = AOT_STACK_FRAME_TYPE_TINY; + /* for now we only enable frame per function for a TINY frame mode */ + option.call_stack_features.frame_per_function = true; + } + if (!size_level_set) { /** * Set opt level to 1 by default for Windows and MacOS as