mirror of
https://github.com/bytecodealliance/wasm-micro-runtime.git
synced 2025-02-06 06:55:07 +00:00
Generate jitdump to support linux perf for LLVM JIT (#2788)
This commit is contained in:
parent
d7608690c0
commit
8aa813f44a
|
@ -67,8 +67,8 @@ def build_llvm(llvm_dir, platform, backends, projects, use_clang=False, extra_fl
|
||||||
"-DLLVM_INCLUDE_EXAMPLES:BOOL=OFF",
|
"-DLLVM_INCLUDE_EXAMPLES:BOOL=OFF",
|
||||||
"-DLLVM_INCLUDE_UTILS:BOOL=OFF",
|
"-DLLVM_INCLUDE_UTILS:BOOL=OFF",
|
||||||
"-DLLVM_INCLUDE_TESTS:BOOL=OFF",
|
"-DLLVM_INCLUDE_TESTS:BOOL=OFF",
|
||||||
"-DLLVM_BUILD_TESTS:BOOL=OFF",
|
|
||||||
"-DLLVM_OPTIMIZED_TABLEGEN:BOOL=ON",
|
"-DLLVM_OPTIMIZED_TABLEGEN:BOOL=ON",
|
||||||
|
"-DLLVM_USE_PERF:BOOL=ON",
|
||||||
]
|
]
|
||||||
|
|
||||||
# use clang/clang++/lld. but macos doesn't support lld
|
# use clang/clang++/lld. but macos doesn't support lld
|
||||||
|
@ -255,7 +255,7 @@ def main():
|
||||||
"branch": "release/15.x",
|
"branch": "release/15.x",
|
||||||
},
|
},
|
||||||
"xtensa": {
|
"xtensa": {
|
||||||
"repo": "https://github.com/espressif/llvm-project.git",
|
"repo": "https://github.com/espressif/llvm-project.git",
|
||||||
"repo_ssh": "git@github.com:espressif/llvm-project.git",
|
"repo_ssh": "git@github.com:espressif/llvm-project.git",
|
||||||
"branch": "xtensa_release_15.x",
|
"branch": "xtensa_release_15.x",
|
||||||
},
|
},
|
||||||
|
@ -281,13 +281,13 @@ def main():
|
||||||
commit_hash = query_llvm_version(llvm_info)
|
commit_hash = query_llvm_version(llvm_info)
|
||||||
print(commit_hash)
|
print(commit_hash)
|
||||||
return commit_hash is not None
|
return commit_hash is not None
|
||||||
|
|
||||||
repo_addr = llvm_info["repo"]
|
repo_addr = llvm_info["repo"]
|
||||||
if os.environ.get('USE_GIT_SSH') == "true":
|
if os.environ.get('USE_GIT_SSH') == "true":
|
||||||
repo_addr = llvm_info["repo_ssh"]
|
repo_addr = llvm_info["repo_ssh"]
|
||||||
else:
|
else:
|
||||||
print("To use ssh for git clone, run: export USE_GIT_SSH=true")
|
print("To use ssh for git clone, run: export USE_GIT_SSH=true")
|
||||||
|
|
||||||
llvm_dir = clone_llvm(deps_dir, repo_addr, llvm_info["branch"])
|
llvm_dir = clone_llvm(deps_dir, repo_addr, llvm_info["branch"])
|
||||||
if (
|
if (
|
||||||
build_llvm(
|
build_llvm(
|
||||||
|
|
|
@ -158,7 +158,7 @@ static JitCompOptions jit_options = { 0 };
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if WASM_ENABLE_JIT != 0
|
#if WASM_ENABLE_JIT != 0
|
||||||
static LLVMJITOptions llvm_jit_options = { 3, 3, 0 };
|
static LLVMJITOptions llvm_jit_options = { 3, 3, 0, false };
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static RunningMode runtime_running_mode = Mode_Default;
|
static RunningMode runtime_running_mode = Mode_Default;
|
||||||
|
@ -662,9 +662,14 @@ wasm_runtime_full_init(RuntimeInitArgs *init_args)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if WASM_ENABLE_JIT != 0
|
#if WASM_ENABLE_JIT != 0
|
||||||
|
LOG_DEBUG("Start LLVM_JIT, opt_sz=%u, opt_lvl=%u, segue=%s, linux_perf=%s",
|
||||||
|
init_args->llvm_jit_size_level, init_args->llvm_jit_opt_level,
|
||||||
|
init_args->segue_flags ? "Yes" : "No",
|
||||||
|
init_args->linux_perf_support ? "Yes" : "No");
|
||||||
llvm_jit_options.size_level = init_args->llvm_jit_size_level;
|
llvm_jit_options.size_level = init_args->llvm_jit_size_level;
|
||||||
llvm_jit_options.opt_level = init_args->llvm_jit_opt_level;
|
llvm_jit_options.opt_level = init_args->llvm_jit_opt_level;
|
||||||
llvm_jit_options.segue_flags = init_args->segue_flags;
|
llvm_jit_options.segue_flags = init_args->segue_flags;
|
||||||
|
llvm_jit_options.linux_perf_support = init_args->linux_perf_support;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (!wasm_runtime_env_init()) {
|
if (!wasm_runtime_env_init()) {
|
||||||
|
|
|
@ -430,6 +430,7 @@ typedef struct LLVMJITOptions {
|
||||||
uint32 opt_level;
|
uint32 opt_level;
|
||||||
uint32 size_level;
|
uint32 size_level;
|
||||||
uint32 segue_flags;
|
uint32 segue_flags;
|
||||||
|
bool linux_perf_support;
|
||||||
} LLVMJITOptions;
|
} LLVMJITOptions;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -653,6 +653,19 @@ aot_add_llvm_func(AOTCompContext *comp_ctx, LLVMModuleRef module,
|
||||||
attr_no_jump_tables);
|
attr_no_jump_tables);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* spread fp.all to every function */
|
||||||
|
if (comp_ctx->emit_frame_pointer) {
|
||||||
|
const char *key = "frame-pointer";
|
||||||
|
const char *val = "all";
|
||||||
|
LLVMAttributeRef no_omit_fp = LLVMCreateStringAttribute(
|
||||||
|
comp_ctx->context, key, strlen(key), val, strlen(val));
|
||||||
|
if (!no_omit_fp) {
|
||||||
|
aot_set_last_error("create LLVM attribute (frame-pointer) failed.");
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
LLVMAddAttributeAtIndex(func, LLVMAttributeFunctionIndex, no_omit_fp);
|
||||||
|
}
|
||||||
|
|
||||||
if (need_precheck) {
|
if (need_precheck) {
|
||||||
if (!comp_ctx->is_jit_mode)
|
if (!comp_ctx->is_jit_mode)
|
||||||
LLVMSetLinkage(func, LLVMInternalLinkage);
|
LLVMSetLinkage(func, LLVMInternalLinkage);
|
||||||
|
@ -2160,7 +2173,7 @@ jit_stack_size_callback(void *user_data, const char *name, size_t namelen,
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
orc_jit_create(AOTCompContext *comp_ctx)
|
orc_jit_create(AOTCompContext *comp_ctx, bool linux_perf_support)
|
||||||
{
|
{
|
||||||
LLVMErrorRef err;
|
LLVMErrorRef err;
|
||||||
LLVMOrcLLLazyJITRef orc_jit = NULL;
|
LLVMOrcLLLazyJITRef orc_jit = NULL;
|
||||||
|
@ -2200,6 +2213,14 @@ orc_jit_create(AOTCompContext *comp_ctx)
|
||||||
/* Ownership transfer: LLVMOrcLLJITBuilderRef -> LLVMOrcLLJITRef */
|
/* Ownership transfer: LLVMOrcLLJITBuilderRef -> LLVMOrcLLJITRef */
|
||||||
builder = NULL;
|
builder = NULL;
|
||||||
|
|
||||||
|
if (linux_perf_support) {
|
||||||
|
LOG_DEBUG("Enable linux perf support");
|
||||||
|
LLVMOrcObjectLayerRef obj_linking_layer =
|
||||||
|
(LLVMOrcObjectLayerRef)LLVMOrcLLLazyJITGetObjLinkingLayer(orc_jit);
|
||||||
|
LLVMOrcRTDyldObjectLinkingLayerRegisterJITEventListener(
|
||||||
|
obj_linking_layer, LLVMCreatePerfJITEventListener());
|
||||||
|
}
|
||||||
|
|
||||||
/* Ownership transfer: local -> AOTCompContext */
|
/* Ownership transfer: local -> AOTCompContext */
|
||||||
comp_ctx->orc_jit = orc_jit;
|
comp_ctx->orc_jit = orc_jit;
|
||||||
orc_jit = NULL;
|
orc_jit = NULL;
|
||||||
|
@ -2298,6 +2319,17 @@ aot_create_comp_context(const AOTCompData *comp_data, aot_comp_option_t option)
|
||||||
goto fail;
|
goto fail;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (option->linux_perf_support) {
|
||||||
|
/* FramePointerKind.All */
|
||||||
|
LLVMMetadataRef val =
|
||||||
|
LLVMValueAsMetadata(LLVMConstInt(LLVMInt32Type(), 2, false));
|
||||||
|
const char *key = "frame-pointer";
|
||||||
|
LLVMAddModuleFlag(comp_ctx->module, LLVMModuleFlagBehaviorWarning, key,
|
||||||
|
strlen(key), val);
|
||||||
|
|
||||||
|
comp_ctx->emit_frame_pointer = true;
|
||||||
|
}
|
||||||
|
|
||||||
if (BH_LIST_ERROR == bh_list_init(&comp_ctx->native_symbols)) {
|
if (BH_LIST_ERROR == bh_list_init(&comp_ctx->native_symbols)) {
|
||||||
goto fail;
|
goto fail;
|
||||||
}
|
}
|
||||||
|
@ -2401,7 +2433,7 @@ aot_create_comp_context(const AOTCompData *comp_data, aot_comp_option_t option)
|
||||||
goto fail;
|
goto fail;
|
||||||
|
|
||||||
/* Create LLJIT Instance */
|
/* Create LLJIT Instance */
|
||||||
if (!orc_jit_create(comp_ctx))
|
if (!orc_jit_create(comp_ctx, option->linux_perf_support))
|
||||||
goto fail;
|
goto fail;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
|
|
@ -12,6 +12,7 @@
|
||||||
#include "llvm-c/Target.h"
|
#include "llvm-c/Target.h"
|
||||||
#include "llvm-c/Core.h"
|
#include "llvm-c/Core.h"
|
||||||
#include "llvm-c/Object.h"
|
#include "llvm-c/Object.h"
|
||||||
|
#include "llvm-c/OrcEE.h"
|
||||||
#include "llvm-c/ExecutionEngine.h"
|
#include "llvm-c/ExecutionEngine.h"
|
||||||
#include "llvm-c/Analysis.h"
|
#include "llvm-c/Analysis.h"
|
||||||
#include "llvm-c/BitWriter.h"
|
#include "llvm-c/BitWriter.h"
|
||||||
|
@ -422,6 +423,8 @@ typedef struct AOTCompContext {
|
||||||
char stack_usage_temp_file[64];
|
char stack_usage_temp_file[64];
|
||||||
const char *llvm_passes;
|
const char *llvm_passes;
|
||||||
const char *builtin_intrinsics;
|
const char *builtin_intrinsics;
|
||||||
|
|
||||||
|
bool emit_frame_pointer;
|
||||||
} AOTCompContext;
|
} AOTCompContext;
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
|
@ -431,6 +434,7 @@ enum {
|
||||||
AOT_LLVMIR_OPT_FILE,
|
AOT_LLVMIR_OPT_FILE,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* always sync it with AOTCompOption in aot_export.h */
|
||||||
typedef struct AOTCompOption {
|
typedef struct AOTCompOption {
|
||||||
bool is_jit_mode;
|
bool is_jit_mode;
|
||||||
bool is_indirect_mode;
|
bool is_indirect_mode;
|
||||||
|
@ -457,6 +461,7 @@ typedef struct AOTCompOption {
|
||||||
uint32 bounds_checks;
|
uint32 bounds_checks;
|
||||||
uint32 stack_bounds_checks;
|
uint32 stack_bounds_checks;
|
||||||
uint32 segue_flags;
|
uint32 segue_flags;
|
||||||
|
bool linux_perf_support;
|
||||||
char **custom_sections;
|
char **custom_sections;
|
||||||
uint32 custom_sections_count;
|
uint32 custom_sections_count;
|
||||||
const char *stack_usage_file;
|
const char *stack_usage_file;
|
||||||
|
|
|
@ -12,11 +12,13 @@
|
||||||
#include "llvm/ADT/None.h"
|
#include "llvm/ADT/None.h"
|
||||||
#include "llvm/ADT/Optional.h"
|
#include "llvm/ADT/Optional.h"
|
||||||
#endif
|
#endif
|
||||||
|
#include "llvm/ExecutionEngine/JITEventListener.h"
|
||||||
|
#include "llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h"
|
||||||
#include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h"
|
#include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h"
|
||||||
#include "llvm/ExecutionEngine/Orc/LLJIT.h"
|
#include "llvm/ExecutionEngine/Orc/LLJIT.h"
|
||||||
|
#include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h"
|
||||||
#include "llvm/ExecutionEngine/Orc/ObjectTransformLayer.h"
|
#include "llvm/ExecutionEngine/Orc/ObjectTransformLayer.h"
|
||||||
#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
|
#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
|
||||||
#include "llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h"
|
|
||||||
#include "llvm/ExecutionEngine/SectionMemoryManager.h"
|
#include "llvm/ExecutionEngine/SectionMemoryManager.h"
|
||||||
#include "llvm/Support/CBindingWrapping.h"
|
#include "llvm/Support/CBindingWrapping.h"
|
||||||
|
|
||||||
|
@ -108,6 +110,7 @@ DEFINE_SIMPLE_CONVERSION_FUNCTIONS(ObjectTransformLayer,
|
||||||
LLVMOrcObjectTransformLayerRef)
|
LLVMOrcObjectTransformLayerRef)
|
||||||
DEFINE_SIMPLE_CONVERSION_FUNCTIONS(OrcV2CAPIHelper::PoolEntry,
|
DEFINE_SIMPLE_CONVERSION_FUNCTIONS(OrcV2CAPIHelper::PoolEntry,
|
||||||
LLVMOrcSymbolStringPoolEntryRef)
|
LLVMOrcSymbolStringPoolEntryRef)
|
||||||
|
DEFINE_SIMPLE_CONVERSION_FUNCTIONS(ObjectLayer, LLVMOrcObjectLayerRef)
|
||||||
DEFINE_SIMPLE_CONVERSION_FUNCTIONS(SymbolStringPool, LLVMOrcSymbolStringPoolRef)
|
DEFINE_SIMPLE_CONVERSION_FUNCTIONS(SymbolStringPool, LLVMOrcSymbolStringPoolRef)
|
||||||
DEFINE_SIMPLE_CONVERSION_FUNCTIONS(ThreadSafeModule, LLVMOrcThreadSafeModuleRef)
|
DEFINE_SIMPLE_CONVERSION_FUNCTIONS(ThreadSafeModule, LLVMOrcThreadSafeModuleRef)
|
||||||
|
|
||||||
|
@ -322,3 +325,9 @@ LLVMOrcLLLazyJITGetObjTransformLayer(LLVMOrcLLLazyJITRef J)
|
||||||
{
|
{
|
||||||
return wrap(&unwrap(J)->getObjTransformLayer());
|
return wrap(&unwrap(J)->getObjTransformLayer());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
LLVMOrcObjectLayerRef
|
||||||
|
LLVMOrcLLLazyJITGetObjLinkingLayer(LLVMOrcLLLazyJITRef J)
|
||||||
|
{
|
||||||
|
return wrap(&unwrap(J)->getObjLinkingLayer());
|
||||||
|
}
|
||||||
|
|
|
@ -76,5 +76,8 @@ LLVMOrcLLJITBuilderSetCompileFuncitonCreatorWithStackSizesCallback(
|
||||||
LLVMOrcLLLazyJITBuilderRef Builder,
|
LLVMOrcLLLazyJITBuilderRef Builder,
|
||||||
void (*cb)(void *, const char *, size_t, size_t), void *cb_data);
|
void (*cb)(void *, const char *, size_t, size_t), void *cb_data);
|
||||||
|
|
||||||
|
LLVMOrcObjectLayerRef
|
||||||
|
LLVMOrcLLLazyJITGetObjLinkingLayer(LLVMOrcLLLazyJITRef J);
|
||||||
|
|
||||||
LLVM_C_EXTERN_C_END
|
LLVM_C_EXTERN_C_END
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -70,6 +70,7 @@ typedef struct JitInterpSwitchInfo {
|
||||||
typedef struct JitCompOptions {
|
typedef struct JitCompOptions {
|
||||||
uint32 code_cache_size;
|
uint32 code_cache_size;
|
||||||
uint32 opt_level;
|
uint32 opt_level;
|
||||||
|
bool linux_perf_support;
|
||||||
} JitCompOptions;
|
} JitCompOptions;
|
||||||
|
|
||||||
bool
|
bool
|
||||||
|
|
|
@ -38,6 +38,7 @@ enum {
|
||||||
AOT_LLVMIR_OPT_FILE,
|
AOT_LLVMIR_OPT_FILE,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* always sync it with AOTCompOption in compilation/aot_llvm.h */
|
||||||
typedef struct AOTCompOption {
|
typedef struct AOTCompOption {
|
||||||
bool is_jit_mode;
|
bool is_jit_mode;
|
||||||
bool is_indirect_mode;
|
bool is_indirect_mode;
|
||||||
|
@ -64,6 +65,7 @@ typedef struct AOTCompOption {
|
||||||
uint32_t bounds_checks;
|
uint32_t bounds_checks;
|
||||||
uint32_t stack_bounds_checks;
|
uint32_t stack_bounds_checks;
|
||||||
uint32_t segue_flags;
|
uint32_t segue_flags;
|
||||||
|
bool linux_perf_support;
|
||||||
char **custom_sections;
|
char **custom_sections;
|
||||||
uint32_t custom_sections_count;
|
uint32_t custom_sections_count;
|
||||||
const char *stack_usage_file;
|
const char *stack_usage_file;
|
||||||
|
|
|
@ -169,6 +169,15 @@ typedef struct RuntimeInitArgs {
|
||||||
uint32_t llvm_jit_size_level;
|
uint32_t llvm_jit_size_level;
|
||||||
/* Segue optimization flags for LLVM JIT */
|
/* Segue optimization flags for LLVM JIT */
|
||||||
uint32_t segue_flags;
|
uint32_t segue_flags;
|
||||||
|
/**
|
||||||
|
* If enabled
|
||||||
|
* - llvm-jit will output a jitdump file for `perf inject`
|
||||||
|
* - aot. TBD
|
||||||
|
* - fast-jit. TBD
|
||||||
|
* - multi-tier-jit. TBD
|
||||||
|
* - interpreter. TBD
|
||||||
|
*/
|
||||||
|
bool linux_perf_support;
|
||||||
} RuntimeInitArgs;
|
} RuntimeInitArgs;
|
||||||
|
|
||||||
#ifndef WASM_VALKIND_T_DEFINED
|
#ifndef WASM_VALKIND_T_DEFINED
|
||||||
|
@ -945,7 +954,7 @@ wasm_runtime_get_custom_data(wasm_module_inst_t module_inst);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set the memory bounds checks flag of a WASM module instance.
|
* Set the memory bounds checks flag of a WASM module instance.
|
||||||
*
|
*
|
||||||
* @param module_inst the WASM module instance
|
* @param module_inst the WASM module instance
|
||||||
* @param enable the flag to enable/disable the memory bounds checks
|
* @param enable the flag to enable/disable the memory bounds checks
|
||||||
*/
|
*/
|
||||||
|
@ -954,9 +963,8 @@ wasm_runtime_set_bounds_checks(wasm_module_inst_t module_inst,
|
||||||
bool enable);
|
bool enable);
|
||||||
/**
|
/**
|
||||||
* Check if the memory bounds checks flag is enabled for a WASM module instance.
|
* Check if the memory bounds checks flag is enabled for a WASM module instance.
|
||||||
*
|
|
||||||
* @param module_inst the WASM module instance
|
|
||||||
*
|
*
|
||||||
|
* @param module_inst the WASM module instance
|
||||||
* @return true if the memory bounds checks flag is enabled, false otherwise
|
* @return true if the memory bounds checks flag is enabled, false otherwise
|
||||||
*/
|
*/
|
||||||
WASM_RUNTIME_API_EXTERN bool
|
WASM_RUNTIME_API_EXTERN bool
|
||||||
|
|
|
@ -2878,6 +2878,7 @@ init_llvm_jit_functions_stage1(WASMModule *module, char *error_buf,
|
||||||
option.opt_level = llvm_jit_options.opt_level;
|
option.opt_level = llvm_jit_options.opt_level;
|
||||||
option.size_level = llvm_jit_options.size_level;
|
option.size_level = llvm_jit_options.size_level;
|
||||||
option.segue_flags = llvm_jit_options.segue_flags;
|
option.segue_flags = llvm_jit_options.segue_flags;
|
||||||
|
option.linux_perf_support = llvm_jit_options.linux_perf_support;
|
||||||
|
|
||||||
#if WASM_ENABLE_BULK_MEMORY != 0
|
#if WASM_ENABLE_BULK_MEMORY != 0
|
||||||
option.enable_bulk_memory = true;
|
option.enable_bulk_memory = true;
|
||||||
|
|
|
@ -1877,6 +1877,7 @@ init_llvm_jit_functions_stage1(WASMModule *module, char *error_buf,
|
||||||
option.opt_level = llvm_jit_options.opt_level;
|
option.opt_level = llvm_jit_options.opt_level;
|
||||||
option.size_level = llvm_jit_options.size_level;
|
option.size_level = llvm_jit_options.size_level;
|
||||||
option.segue_flags = llvm_jit_options.segue_flags;
|
option.segue_flags = llvm_jit_options.segue_flags;
|
||||||
|
option.linux_perf_support = llvm_jit_options.linux_perf_support;
|
||||||
|
|
||||||
#if WASM_ENABLE_BULK_MEMORY != 0
|
#if WASM_ENABLE_BULK_MEMORY != 0
|
||||||
option.enable_bulk_memory = true;
|
option.enable_bulk_memory = true;
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
|
|
||||||
Normally there are some methods to tune the performance:
|
Normally there are some methods to tune the performance:
|
||||||
|
|
||||||
## 1. Use `wasm-opt` tool
|
## 1. Use `wasm-opt` tool
|
||||||
|
|
||||||
Download the [binaryen release](https://github.com/WebAssembly/binaryen/releases), and use the `wasm-opt` tool in it to optimize the wasm file, for example:
|
Download the [binaryen release](https://github.com/WebAssembly/binaryen/releases), and use the `wasm-opt` tool in it to optimize the wasm file, for example:
|
||||||
|
|
||||||
|
@ -23,16 +23,19 @@ emcc -msimd128 -O3 -o <wasm_file> <c/c++ source files>
|
||||||
## 3. Enable segue optimization for wamrc when generating the aot file
|
## 3. Enable segue optimization for wamrc when generating the aot file
|
||||||
|
|
||||||
[Segue](https://plas2022.github.io/files/pdf/SegueColorGuard.pdf) is an optimization technology which uses x86 segment register to store the WebAssembly linear memory base address, so as to remove most of the cost of SFI (Software-based Fault Isolation) base addition and free up a general purpose register, by this way it may:
|
[Segue](https://plas2022.github.io/files/pdf/SegueColorGuard.pdf) is an optimization technology which uses x86 segment register to store the WebAssembly linear memory base address, so as to remove most of the cost of SFI (Software-based Fault Isolation) base addition and free up a general purpose register, by this way it may:
|
||||||
|
|
||||||
- Improve the performance of JIT/AOT
|
- Improve the performance of JIT/AOT
|
||||||
- Reduce the footprint of JIT/AOT, the JIT/AOT code generated is smaller
|
- Reduce the footprint of JIT/AOT, the JIT/AOT code generated is smaller
|
||||||
- Reduce the compilation time of JIT/AOT
|
- Reduce the compilation time of JIT/AOT
|
||||||
|
|
||||||
Currently it is supported on linux x86-64, developer can use `--enable-segue=[<flags>]` for wamrc:
|
Currently it is supported on linux x86-64, developer can use `--enable-segue=[<flags>]` for wamrc:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
wamrc --enable-segue -o aot_file wasm_file
|
wamrc --enable-segue -o aot_file wasm_file
|
||||||
# or
|
# or
|
||||||
wamrc --enable-segue=[<flags>] -o aot_file wasm_file
|
wamrc --enable-segue=[<flags>] -o aot_file wasm_file
|
||||||
```
|
```
|
||||||
|
|
||||||
`flags` can be: i32.load, i64.load, f32.load, f64.load, v128.load, i32.store, i64.store, f32.store, f64.store and v128.store, use comma to separate them, e.g. `--enable-segue=i32.load,i64.store`, and `--enable-segue` means all flags are added.
|
`flags` can be: i32.load, i64.load, f32.load, f64.load, v128.load, i32.store, i64.store, f32.store, f64.store and v128.store, use comma to separate them, e.g. `--enable-segue=i32.load,i64.store`, and `--enable-segue` means all flags are added.
|
||||||
|
|
||||||
> Note: Normally for most cases, using `--enable-segue` is enough, but for some cases, using `--enable-segue=<flags>` may be better, for example for CoreMark benchmark, `--enable-segue=i32.store` may lead to better performance than `--enable-segue`.
|
> Note: Normally for most cases, using `--enable-segue` is enough, but for some cases, using `--enable-segue=<flags>` may be better, for example for CoreMark benchmark, `--enable-segue=i32.store` may lead to better performance than `--enable-segue`.
|
||||||
|
@ -40,7 +43,8 @@ wamrc --enable-segue=[<flags>] -o aot_file wasm_file
|
||||||
## 4. Enable segue optimization for iwasm when running wasm file
|
## 4. Enable segue optimization for iwasm when running wasm file
|
||||||
|
|
||||||
Similar to segue optimization for wamrc, run:
|
Similar to segue optimization for wamrc, run:
|
||||||
``` bash
|
|
||||||
|
```bash
|
||||||
iwasm --enable-segue wasm_file (iwasm is built with llvm-jit enabled)
|
iwasm --enable-segue wasm_file (iwasm is built with llvm-jit enabled)
|
||||||
# or
|
# or
|
||||||
iwasm --enable-segue=[<flags>] wasm_file
|
iwasm --enable-segue=[<flags>] wasm_file
|
||||||
|
@ -55,6 +59,7 @@ LLVM PGO (Profile-Guided Optimization) allows the compiler to better optimize co
|
||||||
2. Compile iwasm with `cmake -DWAMR_BUILD_STATIC_PGO=1` and run `iwasm --gen-prof-file=<raw_profile_file> <aot_file_of_pgo>` to generate the raw profile file.
|
2. Compile iwasm with `cmake -DWAMR_BUILD_STATIC_PGO=1` and run `iwasm --gen-prof-file=<raw_profile_file> <aot_file_of_pgo>` to generate the raw profile file.
|
||||||
|
|
||||||
> Note: Directly dumping raw profile data to file system may be unsupported in some environments, developer can dump the profile data into memory buffer instead and try outputting it through network (e.g. uart or socket):
|
> Note: Directly dumping raw profile data to file system may be unsupported in some environments, developer can dump the profile data into memory buffer instead and try outputting it through network (e.g. uart or socket):
|
||||||
|
|
||||||
```C
|
```C
|
||||||
uint32_t
|
uint32_t
|
||||||
wasm_runtime_get_pgo_prof_data_size(wasm_module_inst_t module_inst);
|
wasm_runtime_get_pgo_prof_data_size(wasm_module_inst_t module_inst);
|
||||||
|
@ -84,6 +89,78 @@ Please notice that this method is not a general solution since it may lead to se
|
||||||
3. Run the AOT module by iwasm with `--disable-bounds-checks` option.
|
3. Run the AOT module by iwasm with `--disable-bounds-checks` option.
|
||||||
|
|
||||||
> Note: The size of AOT file will be much smaller than the default, and some tricks are possible such as let the wasm application access the memory of host os directly.
|
> Note: The size of AOT file will be much smaller than the default, and some tricks are possible such as let the wasm application access the memory of host os directly.
|
||||||
Please notice that if this option is enabled, the wasm spec test will fail since it requires the memory boundary check. For example, the runtime will crash when accessing the memory out of the boundary in some cases instead of throwing an exception as the spec requires.
|
> Please notice that if this option is enabled, the wasm spec test will fail since it requires the memory boundary check. For example, the runtime will crash when accessing the memory out of the boundary in some cases instead of throwing an exception as the spec requires.
|
||||||
|
|
||||||
You should only use this method for well tested wasm applications and make sure the memory access is safe.
|
You should only use this method for well tested wasm applications and make sure the memory access is safe.
|
||||||
|
|
||||||
|
## 7. Use linux-perf
|
||||||
|
|
||||||
|
Linux perf is a powerful tool to analyze the performance of a program, developer can use it to find the hot functions and optimize them. It is one profiler supported by WAMR. In order to use it, you need to add `--perf-profile` while running _iwasm_. By default, it is disabled.
|
||||||
|
|
||||||
|
> [!CAUTION]
|
||||||
|
> For now, only llvm-jit mode supports linux-perf.
|
||||||
|
|
||||||
|
Here is a basic example, if there is a Wasm application _foo.wasm_, you'll execute.
|
||||||
|
|
||||||
|
```
|
||||||
|
$ perf record --output=perf.data.raw -- iwasm --perf-profile foo.wasm
|
||||||
|
```
|
||||||
|
|
||||||
|
This will create a _perf.data_ and a _jit-xxx.dump_ under _~/.debug.jit/_ folder. This extra file is WAMR generated at runtime, and it contains the mapping between the JIT code and the original Wasm function names.
|
||||||
|
|
||||||
|
The next thing need to do is to merge _jit-xxx.dump_ file into the _perf.data_.
|
||||||
|
|
||||||
|
```
|
||||||
|
$ perf inject --jit --input=perf.data.raw --output=perf.data
|
||||||
|
```
|
||||||
|
|
||||||
|
This step will create a lot of _jitted-xxxx-N.so_ which are ELF images for all JIT functions created at runtime.
|
||||||
|
|
||||||
|
> [!TIP]
|
||||||
|
> add `-v` and check if there is output likes _write ELF image ..._. If yes, it means above merge is successful.
|
||||||
|
|
||||||
|
Finally, you can use _perf report_ to analyze the performance.
|
||||||
|
|
||||||
|
```
|
||||||
|
$ perf report --input=perf.data
|
||||||
|
```
|
||||||
|
|
||||||
|
> [!CAUTION]
|
||||||
|
> Using release build of llvm and iwasm will produce "[unknown]" functions in the call graph. It is not only because
|
||||||
|
> of the missing debug information, but also because of removing frame pointers. To get the complete result, please
|
||||||
|
> use debug build of llvm and iwasm.
|
||||||
|
>
|
||||||
|
> Wasm functions will not be affected.
|
||||||
|
|
||||||
|
### 7.1 Flamegraph
|
||||||
|
|
||||||
|
[Flamegraph](https://www.brendangregg.com/flamegraphs.html) is a powerful tool to visualize stack traces of profiled software so that the most frequent code-paths can be identified quickly and accurately. In order to use it, you need to record call graphs when running `perf record`
|
||||||
|
|
||||||
|
```
|
||||||
|
$ perf record -k mono --call-graph=fp --output=perf.data.raw -- iwasm --perf-profile foo.wasm
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
merge the _jit-xxx.dump_ file into the _perf.data.raw_.
|
||||||
|
|
||||||
|
```
|
||||||
|
$ perf inject --jit --input=perf.data.raw --output=perf.data
|
||||||
|
```
|
||||||
|
|
||||||
|
generate the stack trace file.
|
||||||
|
|
||||||
|
```
|
||||||
|
$ perf script > out.perf
|
||||||
|
```
|
||||||
|
|
||||||
|
[fold stacks](https://github.com/brendangregg/FlameGraph#2-fold-stacks).
|
||||||
|
|
||||||
|
```
|
||||||
|
$ ./FlameGraph/stackcollapse-perf.pl out.perf > out.folded
|
||||||
|
```
|
||||||
|
|
||||||
|
[render a flamegraph](https://github.com/brendangregg/FlameGraph#3-flamegraphpl)
|
||||||
|
|
||||||
|
```
|
||||||
|
$ ./FlameGraph/flamegraph.pl out.folded > perf.foo.wasm.svg
|
||||||
|
```
|
||||||
|
|
|
@ -58,6 +58,7 @@ print_help()
|
||||||
#if WASM_ENABLE_JIT != 0
|
#if WASM_ENABLE_JIT != 0
|
||||||
printf(" --llvm-jit-size-level=n Set LLVM JIT size level, default is 3\n");
|
printf(" --llvm-jit-size-level=n Set LLVM JIT size level, default is 3\n");
|
||||||
printf(" --llvm-jit-opt-level=n Set LLVM JIT optimization level, default is 3\n");
|
printf(" --llvm-jit-opt-level=n Set LLVM JIT optimization level, default is 3\n");
|
||||||
|
printf(" --perf-profile Enable linux perf support. For now, it only works in llvm-jit.\n");
|
||||||
#if defined(os_writegsbase)
|
#if defined(os_writegsbase)
|
||||||
printf(" --enable-segue[=<flags>] Enable using segment register GS as the base address of\n");
|
printf(" --enable-segue[=<flags>] Enable using segment register GS as the base address of\n");
|
||||||
printf(" linear memory, which may improve performance, flags can be:\n");
|
printf(" linear memory, which may improve performance, flags can be:\n");
|
||||||
|
@ -560,6 +561,7 @@ main(int argc, char *argv[])
|
||||||
uint32 llvm_jit_size_level = 3;
|
uint32 llvm_jit_size_level = 3;
|
||||||
uint32 llvm_jit_opt_level = 3;
|
uint32 llvm_jit_opt_level = 3;
|
||||||
uint32 segue_flags = 0;
|
uint32 segue_flags = 0;
|
||||||
|
bool enable_linux_perf_support = false;
|
||||||
#endif
|
#endif
|
||||||
wasm_module_t wasm_module = NULL;
|
wasm_module_t wasm_module = NULL;
|
||||||
wasm_module_inst_t wasm_module_inst = NULL;
|
wasm_module_inst_t wasm_module_inst = NULL;
|
||||||
|
@ -700,6 +702,9 @@ main(int argc, char *argv[])
|
||||||
if (segue_flags == (uint32)-1)
|
if (segue_flags == (uint32)-1)
|
||||||
return print_help();
|
return print_help();
|
||||||
}
|
}
|
||||||
|
else if (!strncmp(argv[0], "--perf-profile", 14)) {
|
||||||
|
enable_linux_perf_support = true;
|
||||||
|
}
|
||||||
#endif /* end of WASM_ENABLE_JIT != 0 */
|
#endif /* end of WASM_ENABLE_JIT != 0 */
|
||||||
#if BH_HAS_DLFCN
|
#if BH_HAS_DLFCN
|
||||||
else if (!strncmp(argv[0], "--native-lib=", 13)) {
|
else if (!strncmp(argv[0], "--native-lib=", 13)) {
|
||||||
|
@ -814,6 +819,7 @@ main(int argc, char *argv[])
|
||||||
init_args.llvm_jit_size_level = llvm_jit_size_level;
|
init_args.llvm_jit_size_level = llvm_jit_size_level;
|
||||||
init_args.llvm_jit_opt_level = llvm_jit_opt_level;
|
init_args.llvm_jit_opt_level = llvm_jit_opt_level;
|
||||||
init_args.segue_flags = segue_flags;
|
init_args.segue_flags = segue_flags;
|
||||||
|
init_args.linux_perf_support = enable_linux_perf_support;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if WASM_ENABLE_DEBUG_INTERP != 0
|
#if WASM_ENABLE_DEBUG_INTERP != 0
|
||||||
|
|
Loading…
Reference in New Issue
Block a user