AOT call stack optimizations (#3773)

- Implement TINY / STANDARD frame modes - tiny mode is only able to keep track on the IP
  and func idx, STANDARD mode provides more capabilities (parameters, stack pointer etc.).
- Implement FRAME_PER_FUNCTION / FRAME_PER_CALL modes - frame per function adds
  code at the beginning and at the end of each function for allocating / deallocating stack frame,
  whereas in per-call mode the frame is allocated before each call. The exception is call to
  the imported function, where frame-per-function mode also allocates the stack before the
  `call` instruction (as it can't instrument the imported function).

At the moment TINY + FRAME_PER_FUNCTION is automatically enabled in case GC and perf
profiling are disabled and `values` call stack feature is not requested. In all the other cases
STANDARD + FRAME_PER_CALL is used.

STANDARD + FRAME_PER_FUNCTION and TINY + FRAME_PER_CALL are currently not
implemented but possible, and might be enabled in the future.

ps. https://github.com/bytecodealliance/wasm-micro-runtime/issues/3758
This commit is contained in:
Marcin Kolny 2024-09-10 02:05:23 +01:00 committed by GitHub
parent 0599351262
commit cbc2078898
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
17 changed files with 590 additions and 85 deletions

View File

@ -597,6 +597,10 @@ load_target_info_section(const uint8 *buf, const uint8 *buf_end,
return false;
}
#if WASM_ENABLE_DUMP_CALL_STACK != 0
module->feature_flags = target_info.feature_flags;
#endif
/* Finally, check feature flags */
return check_feature_flags(error_buf, error_buf_size,
target_info.feature_flags);

View File

@ -4,6 +4,7 @@
*/
#include "aot_runtime.h"
#include "../compilation/aot_stack_frame.h"
#include "bh_log.h"
#include "mem_alloc.h"
#include "../common/wasm_runtime_common.h"
@ -72,6 +73,10 @@ bh_static_assert(offsetof(AOTFrame, sp) == sizeof(uintptr_t) * 5);
bh_static_assert(offsetof(AOTFrame, frame_ref) == sizeof(uintptr_t) * 6);
bh_static_assert(offsetof(AOTFrame, lp) == sizeof(uintptr_t) * 7);
bh_static_assert(offsetof(AOTTinyFrame, func_index) == sizeof(uint32) * 0);
bh_static_assert(offsetof(AOTTinyFrame, ip_offset) == sizeof(uint32) * 1);
bh_static_assert(sizeof(AOTTinyFrame) == sizeof(uint32) * 2);
static void
set_error_buf(char *error_buf, uint32 error_buf_size, const char *string)
{
@ -110,6 +115,55 @@ runtime_malloc(uint64 size, char *error_buf, uint32 error_buf_size)
return mem;
}
#if WASM_ENABLE_AOT_STACK_FRAME != 0
static bool
is_tiny_frame(WASMExecEnv *exec_env)
{
AOTModule *module =
(AOTModule *)((AOTModuleInstance *)exec_env->module_inst)->module;
return module->feature_flags & WASM_FEATURE_TINY_STACK_FRAME;
}
static bool
is_frame_per_function(WASMExecEnv *exec_env)
{
AOTModule *module =
(AOTModule *)((AOTModuleInstance *)exec_env->module_inst)->module;
return module->feature_flags & WASM_FEATURE_FRAME_PER_FUNCTION;
}
static void *
get_top_frame(WASMExecEnv *exec_env)
{
if (is_tiny_frame(exec_env)) {
return exec_env->wasm_stack.top > exec_env->wasm_stack.bottom
? exec_env->wasm_stack.top - sizeof(AOTTinyFrame)
: NULL;
}
else {
return exec_env->cur_frame;
}
}
static void *
get_prev_frame(WASMExecEnv *exec_env, void *cur_frame)
{
bh_assert(cur_frame);
if (is_tiny_frame(exec_env)) {
if ((uint8 *)cur_frame == exec_env->wasm_stack.bottom) {
return NULL;
}
return ((AOTTinyFrame *)cur_frame) - 1;
}
else {
return ((AOTFrame *)cur_frame)->prev_frame;
}
}
#endif
static bool
check_global_init_expr(const AOTModule *module, uint32 global_index,
char *error_buf, uint32 error_buf_size)
@ -2265,7 +2319,7 @@ aot_call_function(WASMExecEnv *exec_env, AOTFunctionInstance *function,
uint32 ext_ret_cell = wasm_get_cell_num(ext_ret_types, ext_ret_count);
uint64 size;
#if WASM_ENABLE_AOT_STACK_FRAME != 0
struct WASMInterpFrame *prev_frame = exec_env->cur_frame;
void *prev_frame = get_top_frame(exec_env);
#endif
/* Allocate memory all arguments */
@ -2296,7 +2350,8 @@ aot_call_function(WASMExecEnv *exec_env, AOTFunctionInstance *function,
}
#if WASM_ENABLE_AOT_STACK_FRAME != 0
if (!aot_alloc_frame(exec_env, function->func_index)) {
if (!is_frame_per_function(exec_env)
&& !aot_alloc_frame(exec_env, function->func_index)) {
if (argv1 != argv1_buf)
wasm_runtime_free(argv1);
return false;
@ -2324,7 +2379,7 @@ aot_call_function(WASMExecEnv *exec_env, AOTFunctionInstance *function,
/* Free all frames allocated, note that some frames
may be allocated in AOT code and haven't been
freed if exception occurred */
while (exec_env->cur_frame != prev_frame)
while (get_top_frame(exec_env) != prev_frame)
aot_free_frame(exec_env);
#endif
if (!ret) {
@ -2367,9 +2422,12 @@ aot_call_function(WASMExecEnv *exec_env, AOTFunctionInstance *function,
}
else {
#if WASM_ENABLE_AOT_STACK_FRAME != 0
struct WASMInterpFrame *prev_frame = exec_env->cur_frame;
if (!aot_alloc_frame(exec_env, function->func_index)) {
void *prev_frame = get_top_frame(exec_env);
/* Only allocate frame for frame-per-call mode; in the
frame-per-function mode the frame is allocated at the
beginning of the function. */
if (!is_frame_per_function(exec_env)
&& !aot_alloc_frame(exec_env, function->func_index)) {
return false;
}
#endif
@ -2394,7 +2452,7 @@ aot_call_function(WASMExecEnv *exec_env, AOTFunctionInstance *function,
/* Free all frames allocated, note that some frames
may be allocated in AOT code and haven't been
freed if exception occurred */
while (exec_env->cur_frame != prev_frame)
while (get_top_frame(exec_env) != prev_frame)
aot_free_frame(exec_env);
#endif
@ -2880,7 +2938,7 @@ aot_invoke_native(WASMExecEnv *exec_env, uint32 func_idx, uint32 argc,
goto fail;
}
#if WASM_ENABLE_AOT_STACK_FRAME != 0
struct WASMInterpFrame *prev_frame = exec_env->cur_frame;
void *prev_frame = get_top_frame(exec_env);
if (!aot_alloc_frame(exec_env, func_idx)) {
goto fail;
@ -2894,7 +2952,7 @@ aot_invoke_native(WASMExecEnv *exec_env, uint32 func_idx, uint32 argc,
/* Free all frames allocated, note that some frames
may be allocated in AOT code and haven't been
freed if exception occurred */
while (exec_env->cur_frame != prev_frame)
while (get_top_frame(exec_env) != prev_frame)
aot_free_frame(exec_env);
#endif
}
@ -3622,8 +3680,8 @@ get_func_name_from_index(const AOTModuleInstance *module_inst,
WASM_ENABLE_PERF_PROFILING != 0 */
#if WASM_ENABLE_GC == 0
bool
aot_alloc_frame(WASMExecEnv *exec_env, uint32 func_index)
static bool
aot_alloc_standard_frame(WASMExecEnv *exec_env, uint32 func_index)
{
AOTModuleInstance *module_inst = (AOTModuleInstance *)exec_env->module_inst;
#if WASM_ENABLE_PERF_PROFILING != 0
@ -3670,8 +3728,8 @@ aot_alloc_frame(WASMExecEnv *exec_env, uint32 func_index)
#else /* else of WASM_ENABLE_GC == 0 */
bool
aot_alloc_frame(WASMExecEnv *exec_env, uint32 func_index)
static bool
aot_alloc_standard_frame(WASMExecEnv *exec_env, uint32 func_index)
{
AOTModuleInstance *module_inst = (AOTModuleInstance *)exec_env->module_inst;
AOTModule *module = (AOTModule *)module_inst->module;
@ -3727,11 +3785,48 @@ aot_alloc_frame(WASMExecEnv *exec_env, uint32 func_index)
}
#endif /* end of WASM_ENABLE_GC == 0 */
static bool
aot_alloc_tiny_frame(WASMExecEnv *exec_env, uint32 func_index)
{
AOTTinyFrame *new_frame = (AOTTinyFrame *)exec_env->wasm_stack.top;
if ((uint8 *)new_frame > exec_env->wasm_stack.top_boundary) {
aot_set_exception((WASMModuleInstance *)exec_env->module_inst,
"wasm operand stack overflow");
return false;
}
new_frame->func_index = func_index;
exec_env->wasm_stack.top += sizeof(AOTTinyFrame);
return true;
}
bool
aot_alloc_frame(WASMExecEnv *exec_env, uint32 func_index)
{
AOTModule *module =
(AOTModule *)((AOTModuleInstance *)exec_env->module_inst)->module;
if (is_frame_per_function(exec_env)
&& func_index >= module->import_func_count) {
/* in frame per function mode the frame is allocated at
the beginning of each frame, so we only need to allocate
the frame for imported functions */
return true;
}
if (is_tiny_frame(exec_env)) {
return aot_alloc_tiny_frame(exec_env, func_index);
}
else {
return aot_alloc_standard_frame(exec_env, func_index);
}
}
static inline void
aot_free_frame_internal(WASMExecEnv *exec_env)
aot_free_standard_frame(WASMExecEnv *exec_env)
{
AOTFrame *cur_frame = (AOTFrame *)exec_env->cur_frame;
AOTFrame *prev_frame = cur_frame->prev_frame;
AOTFrame *prev_frame = (AOTFrame *)cur_frame->prev_frame;
#if WASM_ENABLE_PERF_PROFILING != 0
uint64 time_elapsed =
@ -3751,13 +3846,24 @@ aot_free_frame_internal(WASMExecEnv *exec_env)
exec_env->cur_frame = (struct WASMInterpFrame *)prev_frame;
}
static inline void
aot_free_tiny_frame(WASMExecEnv *exec_env)
{
exec_env->wasm_stack.top =
get_prev_frame(exec_env, exec_env->wasm_stack.top);
}
void
aot_free_frame(WASMExecEnv *exec_env)
{
aot_free_frame_internal(exec_env);
if (is_tiny_frame(exec_env)) {
aot_free_tiny_frame(exec_env);
}
else {
aot_free_standard_frame(exec_env);
}
}
void
aot_frame_update_profile_info(WASMExecEnv *exec_env, bool alloc_frame)
{
@ -3806,14 +3912,13 @@ aot_frame_update_profile_info(WASMExecEnv *exec_env, bool alloc_frame)
bool
aot_create_call_stack(struct WASMExecEnv *exec_env)
{
AOTFrame *cur_frame = (AOTFrame *)exec_env->cur_frame,
*first_frame = cur_frame;
AOTModuleInstance *module_inst = (AOTModuleInstance *)exec_env->module_inst;
AOTModule *module = (AOTModule *)module_inst->module;
uint32 n = 0;
while (cur_frame) {
cur_frame = cur_frame->prev_frame;
void *top_frame = get_top_frame(exec_env);
while (top_frame) {
top_frame = get_prev_frame(exec_env, top_frame);
n++;
}
@ -3823,28 +3928,46 @@ aot_create_call_stack(struct WASMExecEnv *exec_env)
return false;
}
cur_frame = first_frame;
while (cur_frame) {
top_frame = get_top_frame(exec_env);
while (n-- > 0) {
uint32 func_index, ip_offset;
uint32 *lp = NULL;
#if WASM_ENABLE_GC != 0
uint32 *sp = NULL;
uint8 *frame_ref = NULL;
#endif
if (is_tiny_frame(exec_env)) {
AOTTinyFrame *frame = (AOTTinyFrame *)top_frame;
func_index = (uint32)frame->func_index;
ip_offset = (uint32)frame->ip_offset;
}
else {
AOTFrame *frame = (AOTFrame *)top_frame;
func_index = (uint32)frame->func_index;
ip_offset = (uint32)frame->ip_offset;
lp = frame->lp;
#if WASM_ENABLE_GC != 0
sp = frame->sp;
frame_ref = frame->frame_ref;
#endif
}
WASMCApiFrame frame = { 0 };
uint32 max_local_cell_num, max_stack_cell_num;
uint32 all_cell_num, lp_size;
frame.instance = module_inst;
frame.module_offset = 0;
frame.func_index = (uint32)cur_frame->func_index;
frame.func_offset = (uint32)cur_frame->ip_offset;
frame.func_name_wp = get_func_name_from_index(
module_inst, (uint32)cur_frame->func_index);
frame.func_index = func_index;
frame.func_offset = ip_offset;
frame.func_name_wp = get_func_name_from_index(module_inst, func_index);
if (cur_frame->func_index >= module->import_func_count) {
uint32 aot_func_idx =
(uint32)(cur_frame->func_index - module->import_func_count);
if (func_index >= module->import_func_count) {
uint32 aot_func_idx = func_index - module->import_func_count;
max_local_cell_num = module->max_local_cell_nums[aot_func_idx];
max_stack_cell_num = module->max_stack_cell_nums[aot_func_idx];
}
else {
AOTFuncType *func_type =
module->import_funcs[cur_frame->func_index].func_type;
AOTFuncType *func_type = module->import_funcs[func_index].func_type;
max_local_cell_num =
func_type->param_cell_num > 2 ? func_type->param_cell_num : 2;
max_stack_cell_num = 0;
@ -3856,12 +3979,12 @@ aot_create_call_stack(struct WASMExecEnv *exec_env)
#else
lp_size = align_uint(all_cell_num * 5, 4);
#endif
if (lp_size > 0) {
if (lp_size > 0 && !is_tiny_frame(exec_env)) {
if (!(frame.lp = wasm_runtime_malloc(lp_size))) {
destroy_c_api_frames(module_inst->frames);
return false;
}
bh_memcpy_s(frame.lp, lp_size, cur_frame->lp, lp_size);
bh_memcpy_s(frame.lp, lp_size, lp, lp_size);
#if WASM_ENABLE_GC != 0
uint32 local_ref_flags_cell_num =
@ -3869,9 +3992,8 @@ aot_create_call_stack(struct WASMExecEnv *exec_env)
.local_ref_flag_cell_num;
uint8 *local_ref_flags =
module->func_local_ref_flags[frame.func_index].local_ref_flags;
frame.sp = frame.lp + (cur_frame->sp - cur_frame->lp);
frame.frame_ref = (uint8 *)frame.lp
+ (cur_frame->frame_ref - (uint8 *)cur_frame->lp);
frame.sp = frame.lp + (sp - lp);
frame.frame_ref = (uint8 *)frame.lp + (frame_ref - (uint8 *)lp);
/* copy local ref flags from AOT module */
bh_memcpy_s(frame.frame_ref, local_ref_flags_cell_num,
local_ref_flags, lp_size);
@ -3885,7 +4007,7 @@ aot_create_call_stack(struct WASMExecEnv *exec_env)
return false;
}
cur_frame = cur_frame->prev_frame;
top_frame = get_prev_frame(exec_env, top_frame);
}
return true;

View File

@ -25,12 +25,15 @@ extern "C" {
#define WASM_FEATURE_REF_TYPES (1 << 3)
#define WASM_FEATURE_GARBAGE_COLLECTION (1 << 4)
#define WASM_FEATURE_EXCEPTION_HANDLING (1 << 5)
#define WASM_FEATURE_MEMORY64 (1 << 6)
#define WASM_FEATURE_TINY_STACK_FRAME (1 << 6)
#define WASM_FEATURE_MULTI_MEMORY (1 << 7)
#define WASM_FEATURE_DYNAMIC_LINKING (1 << 8)
#define WASM_FEATURE_COMPONENT_MODEL (1 << 9)
#define WASM_FEATURE_RELAXED_SIMD (1 << 10)
#define WASM_FEATURE_FLEXIBLE_VECTORS (1 << 11)
/* Stack frame is created at the beginning of the function,
* and not at the beginning of each function call */
#define WASM_FEATURE_FRAME_PER_FUNCTION (1 << 12)
typedef enum AOTSectionType {
AOT_SECTION_TYPE_TARGET_INFO = 0,
@ -326,6 +329,10 @@ typedef struct AOTModule {
/* `.data` and `.text` sections merged into one large mmaped section */
uint8 *merged_data_text_sections;
uint32 merged_data_text_sections_size;
#if WASM_ENABLE_AOT_STACK_FRAME != 0
uint32 feature_flags;
#endif
} AOTModule;
#define AOTMemoryInstance WASMMemoryInstance

View File

@ -16,6 +16,7 @@
#include "aot_emit_parametric.h"
#include "aot_emit_table.h"
#include "aot_emit_gc.h"
#include "aot_stack_frame_comp.h"
#include "simd/simd_access_lanes.h"
#include "simd/simd_bitmask_extracts.h"
#include "simd/simd_bit_shifts.h"
@ -253,6 +254,13 @@ store_value(AOTCompContext *comp_ctx, LLVMValueRef value, uint8 value_type,
return true;
}
void
aot_call_stack_features_init_default(AOTCallStackFeatures *features)
{
memset(features, 1, sizeof(AOTCallStackFeatures));
features->frame_per_function = false;
}
bool
aot_frame_store_value(AOTCompContext *comp_ctx, LLVMValueRef value,
uint8 value_type, LLVMValueRef cur_frame, uint32 offset)
@ -573,8 +581,9 @@ aot_gen_commit_values(AOTCompFrame *frame)
return true;
}
bool
aot_gen_commit_ip(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
static bool
aot_standard_frame_gen_commit_ip(AOTCompContext *comp_ctx,
AOTFuncContext *func_ctx,
LLVMValueRef ip_value, bool is_64bit)
{
LLVMValueRef cur_frame = func_ctx->cur_frame;
@ -613,6 +622,23 @@ aot_gen_commit_ip(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
return true;
}
bool
aot_gen_commit_ip(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
LLVMValueRef ip_value, bool is_64bit)
{
switch (comp_ctx->aux_stack_frame_type) {
case AOT_STACK_FRAME_TYPE_STANDARD:
return aot_standard_frame_gen_commit_ip(comp_ctx, func_ctx,
ip_value, is_64bit);
case AOT_STACK_FRAME_TYPE_TINY:
return aot_tiny_frame_gen_commit_ip(comp_ctx, func_ctx, ip_value);
default:
aot_set_last_error(
"unsupported mode when generating commit_ip code");
return false;
}
}
bool
aot_gen_commit_sp_ip(AOTCompFrame *frame, bool commit_sp, bool commit_ip)
{
@ -962,6 +988,7 @@ static bool
aot_compile_func(AOTCompContext *comp_ctx, uint32 func_index)
{
AOTFuncContext *func_ctx = comp_ctx->func_ctxes[func_index];
LLVMValueRef func_index_ref;
uint8 *frame_ip = func_ctx->aot_func->code, opcode, *p_f32, *p_f64;
uint8 *frame_ip_end = frame_ip + func_ctx->aot_func->code_size;
uint8 *param_types = NULL;
@ -984,16 +1011,27 @@ aot_compile_func(AOTCompContext *comp_ctx, uint32 func_index)
LLVMMetadataRef location;
#endif
if (comp_ctx->enable_aux_stack_frame) {
/* Start to translate the opcodes */
LLVMPositionBuilderAtEnd(
comp_ctx->builder,
func_ctx->block_stack.block_list_head->llvm_entry_block);
if (comp_ctx->aux_stack_frame_type
&& comp_ctx->call_stack_features.frame_per_function) {
INT_CONST(func_index_ref,
func_index + comp_ctx->comp_data->import_func_count, I32_TYPE,
true);
if (!aot_alloc_frame_per_function_frame_for_aot_func(comp_ctx, func_ctx,
func_index_ref)) {
return false;
}
}
if (comp_ctx->aux_stack_frame_type) {
if (!init_comp_frame(comp_ctx, func_ctx, func_index)) {
return false;
}
}
/* Start to translate the opcodes */
LLVMPositionBuilderAtEnd(
comp_ctx->builder,
func_ctx->block_stack.block_list_head->llvm_entry_block);
while (frame_ip < frame_ip_end) {
opcode = *frame_ip++;

View File

@ -661,6 +661,15 @@ set_local_gc_ref(AOTCompFrame *frame, int n, LLVMValueRef value, uint8 ref_type)
#define F64_CONST(v) LLVMConstReal(F64_TYPE, v)
#define I8_CONST(v) LLVMConstInt(INT8_TYPE, v, true)
#define INT_CONST(variable, value, type, is_signed) \
do { \
variable = LLVMConstInt(type, value, is_signed); \
if (!variable) { \
aot_set_last_error("llvm build const failed"); \
return false; \
} \
} while (0)
#define LLVM_CONST(name) (comp_ctx->llvm_consts.name)
#define I1_ZERO LLVM_CONST(i1_zero)
#define I1_ONE LLVM_CONST(i1_one)

View File

@ -4433,6 +4433,12 @@ aot_obj_data_create(AOTCompContext *comp_ctx)
if (comp_ctx->enable_gc) {
obj_data->target_info.feature_flags |= WASM_FEATURE_GARBAGE_COLLECTION;
}
if (comp_ctx->aux_stack_frame_type == AOT_STACK_FRAME_TYPE_TINY) {
obj_data->target_info.feature_flags |= WASM_FEATURE_TINY_STACK_FRAME;
}
if (comp_ctx->call_stack_features.frame_per_function) {
obj_data->target_info.feature_flags |= WASM_FEATURE_FRAME_PER_FUNCTION;
}
bh_print_time("Begin to resolve object file info");

View File

@ -6,6 +6,7 @@
#include "aot_emit_control.h"
#include "aot_compiler.h"
#include "aot_emit_exception.h"
#include "aot_stack_frame_comp.h"
#if WASM_ENABLE_GC != 0
#include "aot_emit_gc.h"
#endif
@ -45,6 +46,17 @@ format_block_name(char *name, uint32 name_size, uint32 block_index,
aot_set_last_error("add LLVM basic block failed."); \
goto fail; \
} \
if (!strcmp(name, "func_end") && comp_ctx->aux_stack_frame_type \
&& comp_ctx->call_stack_features.frame_per_function) { \
LLVMBasicBlockRef cur_block = \
LLVMGetInsertBlock(comp_ctx->builder); \
SET_BUILDER_POS(new_llvm_block); \
if (!aot_free_frame_per_function_frame_for_aot_func(comp_ctx, \
func_ctx)) { \
goto fail; \
} \
SET_BUILDER_POS(cur_block); \
} \
} while (0)
#define CURR_BLOCK() LLVMGetInsertBlock(comp_ctx->builder)
@ -93,6 +105,11 @@ format_block_name(char *name, uint32 name_size, uint32 block_index,
goto fail; \
} \
SET_BUILDER_POS(block->llvm_end_block); \
LLVMValueRef first_instr = \
get_first_non_phi(block->llvm_end_block); \
if (first_instr) { \
LLVMPositionBuilderBefore(comp_ctx->builder, first_instr); \
} \
for (_i = 0; _i < block->result_count; _i++) { \
if (!(block->result_phis[_i] = LLVMBuildPhi( \
comp_ctx->builder, \
@ -158,6 +175,18 @@ get_target_block(AOTFuncContext *func_ctx, uint32 br_depth)
return block;
}
LLVMValueRef
get_first_non_phi(LLVMBasicBlockRef block)
{
LLVMValueRef instr = LLVMGetFirstInstruction(block);
while (instr && LLVMIsAPHINode(instr)) {
instr = LLVMGetNextInstruction(instr);
}
return instr;
}
static void
clear_frame_locals(AOTCompFrame *aot_frame)
{
@ -1361,6 +1390,13 @@ aot_compile_op_return(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
(*p_frame_ip - 1) - comp_ctx->comp_data->wasm_module->buf_code);
#endif
if (comp_ctx->aux_stack_frame_type
&& comp_ctx->call_stack_features.frame_per_function
&& !aot_free_frame_per_function_frame_for_aot_func(comp_ctx,
func_ctx)) {
return false;
}
if (block_func->result_count) {
/* Store extra result values to function parameters */
for (i = 0; i < block_func->result_count - 1; i++) {

View File

@ -7,6 +7,7 @@
#include "aot_emit_exception.h"
#include "aot_emit_control.h"
#include "aot_emit_table.h"
#include "aot_stack_frame_comp.h"
#include "../aot/aot_runtime.h"
#if WASM_ENABLE_GC != 0
#include "aot_emit_gc.h"
@ -1403,6 +1404,7 @@ aot_compile_op_call(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
LLVMValueRef *param_values = NULL, value_ret = NULL, func;
LLVMValueRef import_func_idx, res;
LLVMValueRef ext_ret, ext_ret_ptr, ext_ret_idx;
LLVMValueRef func_idx_ref;
int32 i, j = 0, param_count, result_count, ext_ret_count;
uint64 total_size;
uint8 wasm_ret_type;
@ -1447,12 +1449,28 @@ aot_compile_op_call(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
return false;
}
if (comp_ctx->enable_aux_stack_frame) {
#if WASM_ENABLE_AOT_STACK_FRAME != 0
if (!alloc_frame_for_aot_func(comp_ctx, func_ctx, func_idx))
if (comp_ctx->aux_stack_frame_type) {
if (func_idx < import_func_count
&& comp_ctx->call_stack_features.frame_per_function) {
INT_CONST(func_idx_ref, func_idx, I32_TYPE, true);
if (!aot_alloc_frame_per_function_frame_for_aot_func(
comp_ctx, func_ctx, func_idx_ref)) {
return false;
#endif
}
}
else if (!comp_ctx->call_stack_features.frame_per_function) {
if (comp_ctx->aux_stack_frame_type
!= AOT_STACK_FRAME_TYPE_STANDARD) {
aot_set_last_error("unsupported mode");
return false;
}
if (!alloc_frame_for_aot_func(comp_ctx, func_ctx, func_idx)) {
return false;
}
}
}
#endif
/* Get param cell number */
param_cell_num = func_type->param_cell_num;
@ -1522,7 +1540,7 @@ aot_compile_op_call(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
}
if (func_idx < import_func_count) {
if (comp_ctx->enable_aux_stack_frame
if (comp_ctx->aux_stack_frame_type == AOT_STACK_FRAME_TYPE_STANDARD
&& !commit_params_to_frame_of_import_func(
comp_ctx, func_ctx, func_type, param_values + 1)) {
goto fail;
@ -1813,12 +1831,26 @@ aot_compile_op_call(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
}
}
if (comp_ctx->enable_aux_stack_frame) {
#if WASM_ENABLE_AOT_STACK_FRAME != 0
if (!free_frame_for_aot_func(comp_ctx, func_ctx))
if (comp_ctx->aux_stack_frame_type) {
if (func_idx < import_func_count
&& comp_ctx->call_stack_features.frame_per_function) {
if (!aot_free_frame_per_function_frame_for_aot_func(comp_ctx,
func_ctx)) {
goto fail;
#endif
}
}
else if (!comp_ctx->call_stack_features.frame_per_function) {
if (comp_ctx->aux_stack_frame_type
!= AOT_STACK_FRAME_TYPE_STANDARD) {
aot_set_last_error("unsupported mode");
}
if (!free_frame_for_aot_func(comp_ctx, func_ctx)) {
goto fail;
}
}
}
#endif
/* Insert suspend check point */
if (comp_ctx->enable_thread_mgr) {
@ -2439,7 +2471,8 @@ aot_compile_op_call_indirect(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
goto fail;
}
if (comp_ctx->enable_aux_stack_frame) {
if (comp_ctx->aux_stack_frame_type
&& !comp_ctx->call_stack_features.frame_per_function) {
#if WASM_ENABLE_AOT_STACK_FRAME != 0
/* TODO: use current frame instead of allocating new frame
for WASM_OP_RETURN_CALL_INDIRECT */
@ -2508,7 +2541,13 @@ aot_compile_op_call_indirect(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
/* Translate call import block */
LLVMPositionBuilderAtEnd(comp_ctx->builder, block_call_import);
if (comp_ctx->enable_aux_stack_frame
if (comp_ctx->aot_frame && comp_ctx->call_stack_features.frame_per_function
&& !aot_alloc_frame_per_function_frame_for_aot_func(comp_ctx, func_ctx,
func_idx)) {
goto fail;
}
if (comp_ctx->aux_stack_frame_type == AOT_STACK_FRAME_TYPE_STANDARD
&& !commit_params_to_frame_of_import_func(comp_ctx, func_ctx, func_type,
param_values + 1)) {
goto fail;
@ -2545,6 +2584,12 @@ aot_compile_op_call_indirect(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
&& !check_call_return(comp_ctx, func_ctx, res))
goto fail;
if (comp_ctx->aot_frame && comp_ctx->call_stack_features.frame_per_function
&& !aot_free_frame_per_function_frame_for_aot_func(comp_ctx,
func_ctx)) {
goto fail;
}
block_curr = LLVMGetInsertBlock(comp_ctx->builder);
for (i = 0; i < func_result_count; i++) {
LLVMAddIncoming(result_phis[i], &value_rets[i], &block_curr, 1);
@ -2629,7 +2674,8 @@ aot_compile_op_call_indirect(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
PUSH(result_phis[i], func_type->types[func_param_count + i]);
}
if (comp_ctx->enable_aux_stack_frame) {
if (comp_ctx->aux_stack_frame_type
&& !comp_ctx->call_stack_features.frame_per_function) {
#if WASM_ENABLE_AOT_STACK_FRAME != 0
if (!free_frame_for_aot_func(comp_ctx, func_ctx))
goto fail;
@ -2936,7 +2982,8 @@ aot_compile_op_call_ref(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
goto fail;
}
if (comp_ctx->enable_aux_stack_frame) {
if (comp_ctx->aux_stack_frame_type
&& !comp_ctx->call_stack_features.frame_per_function) {
#if WASM_ENABLE_AOT_STACK_FRAME != 0
/* TODO: use current frame instead of allocating new frame
for WASM_OP_RETURN_CALL_REF */
@ -3005,7 +3052,7 @@ aot_compile_op_call_ref(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
/* Translate call import block */
LLVMPositionBuilderAtEnd(comp_ctx->builder, block_call_import);
if (comp_ctx->enable_aux_stack_frame
if (comp_ctx->aux_stack_frame_type == AOT_STACK_FRAME_TYPE_STANDARD
&& !commit_params_to_frame_of_import_func(comp_ctx, func_ctx, func_type,
param_values + 1)) {
goto fail;
@ -3133,7 +3180,8 @@ aot_compile_op_call_ref(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
PUSH(result_phis[i], func_type->types[func_param_count + i]);
}
if (comp_ctx->enable_aux_stack_frame) {
if (comp_ctx->aux_stack_frame_type
&& !comp_ctx->call_stack_features.frame_per_function) {
#if WASM_ENABLE_AOT_STACK_FRAME != 0
if (!free_frame_for_aot_func(comp_ctx, func_ctx))
goto fail;

View File

@ -1771,7 +1771,7 @@ aot_create_func_context(const AOTCompData *comp_data, AOTCompContext *comp_ctx,
goto fail;
}
if (comp_ctx->enable_aux_stack_frame
if (comp_ctx->aux_stack_frame_type
&& !create_aux_stack_frame(comp_ctx, func_ctx)) {
goto fail;
}
@ -2577,9 +2577,7 @@ aot_create_comp_context(const AOTCompData *comp_data, aot_comp_option_t option)
if (option->enable_ref_types)
comp_ctx->enable_ref_types = true;
if (option->enable_aux_stack_frame)
comp_ctx->enable_aux_stack_frame = true;
comp_ctx->aux_stack_frame_type = option->aux_stack_frame_type;
comp_ctx->call_stack_features = option->call_stack_features;
if (option->enable_perf_profiling)

View File

@ -410,7 +410,7 @@ typedef struct AOTCompContext {
bool enable_aux_stack_check;
/* Generate auxiliary stack frame */
bool enable_aux_stack_frame;
AOTStackFrameType aux_stack_frame_type;
/* Auxiliary call stack features */
AOTCallStackFeatures call_stack_features;

View File

@ -0,0 +1,27 @@
/*
* Copyright (C) 2024 Amazon Inc. All rights reserved.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/
#ifndef _AOT_STACK_FRAME_H_
#define _AOT_STACK_FRAME_H_
#include "platform_common.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef struct {
/* The non-imported function index of current function */
uint32 func_index;
/* Instruction pointer: offset to the bytecode array */
uint32 ip_offset;
} AOTTinyFrame;
#ifdef __cplusplus
}
#endif
#endif

View File

@ -0,0 +1,148 @@
/*
* Copyright (C) 2024 Amazon Inc. All rights reserved.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/
#include "aot_stack_frame_comp.h"
#include "aot_emit_exception.h"
#define ADD_IN_BOUNDS_GEP(variable, type, pointer, indices, num_indices) \
do { \
if (!(variable = \
LLVMBuildInBoundsGEP2(comp_ctx->builder, type, pointer, \
indices, num_indices, #variable))) { \
aot_set_last_error("llvm build in bounds gep failed"); \
return false; \
} \
} while (0)
#define ADD_STORE(value, pointer) \
do { \
if (!LLVMBuildStore(comp_ctx->builder, value, pointer)) { \
aot_set_last_error("llvm build store failed"); \
return false; \
} \
} while (0)
#define ADD_LOAD(value, type, pointer) \
do { \
if (!(value = \
LLVMBuildLoad2(comp_ctx->builder, type, pointer, #value))) { \
aot_set_last_error("llvm build load failed"); \
return false; \
} \
} while (0)
static bool
aot_alloc_tiny_frame_for_aot_func(AOTCompContext *comp_ctx,
AOTFuncContext *func_ctx,
LLVMValueRef func_index)
{
LLVMValueRef wasm_stack_top_ptr = func_ctx->wasm_stack_top_ptr,
wasm_stack_top_bound = func_ctx->wasm_stack_top_bound,
wasm_stack_top, cmp;
LLVMBasicBlockRef check_wasm_stack_succ;
LLVMValueRef offset;
ADD_LOAD(wasm_stack_top, INT8_PTR_TYPE, wasm_stack_top_ptr);
if (comp_ctx->call_stack_features.bounds_checks) {
if (!(check_wasm_stack_succ = LLVMAppendBasicBlockInContext(
comp_ctx->context, func_ctx->func,
"check_wasm_stack_succ"))) {
aot_set_last_error("llvm add basic block failed.");
return false;
}
LLVMMoveBasicBlockAfter(check_wasm_stack_succ,
LLVMGetInsertBlock(comp_ctx->builder));
if (!(cmp = LLVMBuildICmp(comp_ctx->builder, LLVMIntUGE, wasm_stack_top,
wasm_stack_top_bound, "cmp"))) {
aot_set_last_error("llvm build icmp failed");
return false;
}
if (!(aot_emit_exception(comp_ctx, func_ctx,
EXCE_OPERAND_STACK_OVERFLOW, true, cmp,
check_wasm_stack_succ))) {
return false;
}
}
/* Save the func_idx on the top of the stack */
ADD_STORE(func_index, wasm_stack_top);
/* increment the stack pointer */
INT_CONST(offset, sizeof(AOTTinyFrame), I32_TYPE, true);
ADD_IN_BOUNDS_GEP(wasm_stack_top, INT8_TYPE, wasm_stack_top, &offset, 1);
ADD_STORE(wasm_stack_top, wasm_stack_top_ptr);
return true;
}
static bool
aot_free_tiny_frame_for_aot_func(AOTCompContext *comp_ctx,
AOTFuncContext *func_ctx)
{
LLVMValueRef wasm_stack_top_ptr = func_ctx->wasm_stack_top_ptr,
wasm_stack_top;
LLVMValueRef offset;
ADD_LOAD(wasm_stack_top, INT8_PTR_TYPE, wasm_stack_top_ptr);
INT_CONST(offset, -sizeof(AOTTinyFrame),
comp_ctx->pointer_size == 8 ? I64_TYPE : I32_TYPE, true);
ADD_IN_BOUNDS_GEP(wasm_stack_top, INT8_TYPE, wasm_stack_top, &offset, 1);
ADD_STORE(wasm_stack_top, wasm_stack_top_ptr);
return true;
}
bool
aot_tiny_frame_gen_commit_ip(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
LLVMValueRef ip_value)
{
LLVMValueRef wasm_stack_top_ptr = func_ctx->wasm_stack_top_ptr,
wasm_stack_top;
LLVMValueRef offset, ip_addr;
bh_assert(ip_value);
ADD_LOAD(wasm_stack_top, INT8_PTR_TYPE, wasm_stack_top_ptr);
INT_CONST(offset, -4, comp_ctx->pointer_size == 8 ? I64_TYPE : I32_TYPE,
true);
ADD_IN_BOUNDS_GEP(ip_addr, INT8_TYPE, wasm_stack_top, &offset, 1);
ADD_STORE(ip_value, ip_addr);
return true;
}
bool
aot_alloc_frame_per_function_frame_for_aot_func(AOTCompContext *comp_ctx,
AOTFuncContext *func_ctx,
LLVMValueRef func_index)
{
switch (comp_ctx->aux_stack_frame_type) {
case AOT_STACK_FRAME_TYPE_TINY:
return aot_alloc_tiny_frame_for_aot_func(comp_ctx, func_ctx,
func_index);
default:
aot_set_last_error("unsupported mode");
return false;
}
}
bool
aot_free_frame_per_function_frame_for_aot_func(AOTCompContext *comp_ctx,
AOTFuncContext *func_ctx)
{
switch (comp_ctx->aux_stack_frame_type) {
case AOT_STACK_FRAME_TYPE_TINY:
return aot_free_tiny_frame_for_aot_func(comp_ctx, func_ctx);
default:
aot_set_last_error("unsupported mode");
return false;
}
}

View File

@ -0,0 +1,33 @@
/*
* Copyright (C) 2024 Amazon Inc. All rights reserved.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/
#ifndef _AOT_STACK_FRAME_COMP_H_
#define _AOT_STACK_FRAME_COMP_H_
#include "aot_stack_frame.h"
#include "aot_compiler.h"
#ifdef __cplusplus
extern "C" {
#endif
bool
aot_alloc_frame_per_function_frame_for_aot_func(AOTCompContext *comp_ctx,
AOTFuncContext *func_ctx,
LLVMValueRef func_index);
bool
aot_free_frame_per_function_frame_for_aot_func(AOTCompContext *comp_ctx,
AOTFuncContext *func_ctx);
bool
aot_tiny_frame_gen_commit_ip(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
LLVMValueRef ip_value);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -21,8 +21,24 @@ typedef struct {
/* Enables or disables parameters, locals and stack operands. */
bool values;
/* If enabled, stack frame is generated at the beginning of each
* function (frame-per-function mode). Otherwise, stack frame is
* generated before each call of a function (frame-per-call mode). */
bool frame_per_function;
} AOTCallStackFeatures;
void
aot_call_stack_features_init_default(AOTCallStackFeatures *features);
typedef enum {
AOT_STACK_FRAME_OFF = 0,
/* Use a small stack frame data structure (AOTTinyFrame) */
AOT_STACK_FRAME_TYPE_TINY,
/* Use a regular stack frame data structure (AOTFrame) */
AOT_STACK_FRAME_TYPE_STANDARD,
} AOTStackFrameType;
typedef struct AOTCompOption {
bool is_jit_mode;
bool is_indirect_mode;
@ -38,7 +54,7 @@ typedef struct AOTCompOption {
bool enable_ref_types;
bool enable_gc;
bool enable_aux_stack_check;
bool enable_aux_stack_frame;
AOTStackFrameType aux_stack_frame_type;
AOTCallStackFeatures call_stack_features;
bool enable_perf_profiling;
bool enable_memory_profiling;

View File

@ -5406,8 +5406,8 @@ init_llvm_jit_functions_stage1(WASMModule *module, char *error_buf,
option.enable_aux_stack_check = true;
#if WASM_ENABLE_PERF_PROFILING != 0 || WASM_ENABLE_DUMP_CALL_STACK != 0 \
|| WASM_ENABLE_AOT_STACK_FRAME != 0
option.enable_aux_stack_frame = true;
memset(&option.call_stack_features, 1, sizeof(AOTCallStackFeatures));
option.aux_stack_frame_type = AOT_STACK_FRAME_TYPE_STANDARD;
aot_call_stack_features_init_default(&option.call_stack_features);
#endif
#if WASM_ENABLE_PERF_PROFILING != 0
option.enable_perf_profiling = true;

View File

@ -2148,8 +2148,8 @@ init_llvm_jit_functions_stage1(WASMModule *module, char *error_buf,
option.enable_aux_stack_check = true;
#if WASM_ENABLE_PERF_PROFILING != 0 || WASM_ENABLE_DUMP_CALL_STACK != 0 \
|| WASM_ENABLE_AOT_STACK_FRAME != 0
option.enable_aux_stack_frame = true;
memset(&option.call_stack_features, 1, sizeof(AOTCallStackFeatures));
option.aux_stack_frame_type = AOT_STACK_FRAME_TYPE_STANDARD;
aot_call_stack_features_init_default(&option.call_stack_features);
#endif
#if WASM_ENABLE_PERF_PROFILING != 0
option.enable_perf_profiling = true;

View File

@ -307,6 +307,13 @@ finish:
return ret;
}
static bool
can_enable_tiny_frame(const AOTCompOption *opt)
{
return !opt->call_stack_features.values && !opt->enable_gc
&& !opt->enable_perf_profiling;
}
static uint32
resolve_segue_flags(char *str_flags)
{
@ -403,9 +410,7 @@ main(int argc, char *argv[])
option.enable_bulk_memory = true;
option.enable_ref_types = true;
option.enable_gc = false;
/* Set all the features to true by default */
memset(&option.call_stack_features, 1, sizeof(AOTCallStackFeatures));
aot_call_stack_features_init_default(&option.call_stack_features);
/* Process options */
for (argc--, argv++; argc > 0 && argv[0][0] == '-'; argc--, argv++) {
@ -519,7 +524,7 @@ main(int argc, char *argv[])
option.enable_aux_stack_check = false;
}
else if (!strcmp(argv[0], "--enable-dump-call-stack")) {
option.enable_aux_stack_frame = true;
option.aux_stack_frame_type = AOT_STACK_FRAME_TYPE_STANDARD;
}
else if (!strncmp(argv[0], "--call-stack-features=", 22)) {
/* Reset all the features, only enable the user-defined ones */
@ -535,7 +540,7 @@ main(int argc, char *argv[])
}
}
else if (!strcmp(argv[0], "--enable-perf-profiling")) {
option.enable_aux_stack_frame = true;
option.aux_stack_frame_type = AOT_STACK_FRAME_TYPE_STANDARD;
option.enable_perf_profiling = true;
}
else if (!strcmp(argv[0], "--enable-memory-profiling")) {
@ -550,7 +555,7 @@ main(int argc, char *argv[])
option.is_indirect_mode = true;
}
else if (!strcmp(argv[0], "--enable-gc")) {
option.enable_aux_stack_frame = true;
option.aux_stack_frame_type = AOT_STACK_FRAME_TYPE_STANDARD;
option.enable_gc = true;
}
else if (!strcmp(argv[0], "--disable-llvm-intrinsics")) {
@ -652,6 +657,14 @@ main(int argc, char *argv[])
if (!use_dummy_wasm && (argc == 0 || !out_file_name))
PRINT_HELP_AND_EXIT();
if (option.aux_stack_frame_type == AOT_STACK_FRAME_TYPE_STANDARD
&& can_enable_tiny_frame(&option)) {
LOG_VERBOSE("Use tiny frame mode for stack frames");
option.aux_stack_frame_type = AOT_STACK_FRAME_TYPE_TINY;
/* for now we only enable frame per function for a TINY frame mode */
option.call_stack_features.frame_per_function = true;
}
if (!size_level_set) {
/**
* Set opt level to 1 by default for Windows and MacOS as