Enhancements on wasm function execution time statistic (#2985)

Enhance the statistic of wasm function execution time, or the performance
profiling feature:
- Add os_time_thread_cputime_us() to get the cputime of a thread,
  and use it to calculate the execution time of a wasm function
- Support the statistic of the children execution time of a function,
  and dump it in wasm_runtime_dump_perf_profiling
- Expose two APIs:
  wasm_runtime_sum_wasm_exec_time
  wasm_runtime_get_wasm_func_exec_time

And rename os_time_get_boot_microsecond to os_time_get_boot_us.
This commit is contained in:
liang.he 2024-01-17 09:51:54 +08:00 committed by GitHub
parent 25ccc9f2d5
commit 5c8b8a17a6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
24 changed files with 336 additions and 86 deletions

View File

@ -2807,7 +2807,7 @@ aot_alloc_frame(WASMExecEnv *exec_env, uint32 func_index)
}
#if WASM_ENABLE_PERF_PROFILING != 0
frame->time_started = os_time_get_boot_microsecond();
frame->time_started = os_time_thread_cputime_us();
frame->func_perf_prof_info = func_perf_prof;
#endif
@ -2826,8 +2826,13 @@ aot_free_frame(WASMExecEnv *exec_env)
#if WASM_ENABLE_PERF_PROFILING != 0
cur_frame->func_perf_prof_info->total_exec_time +=
os_time_get_boot_microsecond() - cur_frame->time_started;
os_time_thread_cputime_us() - cur_frame->time_started;
cur_frame->func_perf_prof_info->total_exec_cnt++;
/* parent function */
if (prev_frame)
prev_frame->func_perf_prof_info->children_exec_time =
cur_frame->func_perf_prof_info->total_exec_time;
#endif
wasm_exec_env_free_wasm_frame(exec_env, cur_frame);
@ -2964,22 +2969,65 @@ aot_dump_perf_profiling(const AOTModuleInstance *module_inst)
os_printf("Performance profiler data:\n");
for (i = 0; i < total_func_count; i++, perf_prof++) {
if (perf_prof->total_exec_cnt == 0)
continue;
func_name = get_func_name_from_index(module_inst, i);
if (func_name)
os_printf(
" func %s, execution time: %.3f ms, execution count: %" PRIu32
" times\n",
" times, children execution time: %.3f ms\n",
func_name, perf_prof->total_exec_time / 1000.0f,
perf_prof->total_exec_cnt);
perf_prof->total_exec_cnt,
perf_prof->children_exec_time / 1000.0f);
else
os_printf(" func %" PRIu32
", execution time: %.3f ms, execution count: %" PRIu32
" times\n",
" times, children execution time: %.3f ms\n",
i, perf_prof->total_exec_time / 1000.0f,
perf_prof->total_exec_cnt);
perf_prof->total_exec_cnt,
perf_prof->children_exec_time / 1000.0f);
}
}
double
aot_summarize_wasm_execute_time(const AOTModuleInstance *inst)
{
double ret = 0;
AOTModule *module = (AOTModule *)inst->module;
uint32 total_func_count = module->import_func_count + module->func_count, i;
for (i = 0; i < total_func_count; i++) {
AOTFuncPerfProfInfo *perf_prof =
(AOTFuncPerfProfInfo *)inst->func_perf_profilings + i;
ret += (perf_prof->total_exec_time - perf_prof->children_exec_time)
/ 1000.0f;
}
return ret;
}
double
aot_get_wasm_func_exec_time(const AOTModuleInstance *inst,
const char *func_name)
{
AOTModule *module = (AOTModule *)inst->module;
uint32 total_func_count = module->import_func_count + module->func_count, i;
for (i = 0; i < total_func_count; i++) {
const char *name_in_wasm = get_func_name_from_index(inst, i);
if (name_in_wasm && strcmp(func_name, name_in_wasm) == 0) {
AOTFuncPerfProfInfo *perf_prof =
(AOTFuncPerfProfInfo *)inst->func_perf_profilings + i;
return (perf_prof->total_exec_time - perf_prof->children_exec_time)
/ 1000.0f;
}
}
return -1.0;
}
#endif /* end of WASM_ENABLE_PERF_PROFILING */
#if WASM_ENABLE_STATIC_PGO != 0

View File

@ -286,6 +286,8 @@ typedef struct AOTFuncPerfProfInfo {
uint64 total_exec_time;
/* total execution count */
uint32 total_exec_cnt;
/* children execution time */
uint64 children_exec_time;
} AOTFuncPerfProfInfo;
/* AOT auxiliary call stack */
@ -613,6 +615,13 @@ aot_dump_call_stack(WASMExecEnv *exec_env, bool print, char *buf, uint32 len);
void
aot_dump_perf_profiling(const AOTModuleInstance *module_inst);
double
aot_summarize_wasm_execute_time(const AOTModuleInstance *inst);
double
aot_get_wasm_func_exec_time(const AOTModuleInstance *inst,
const char *func_name);
const uint8 *
aot_get_custom_section(const AOTModule *module, const char *name, uint32 *len);

View File

@ -1690,8 +1690,43 @@ wasm_runtime_dump_perf_profiling(WASMModuleInstanceCommon *module_inst)
}
#endif
}
double
wasm_runtime_sum_wasm_exec_time(WASMModuleInstanceCommon *inst)
{
#if WASM_ENABLE_INTERP != 0
if (inst->module_type == Wasm_Module_Bytecode)
return wasm_summarize_wasm_execute_time((WASMModuleInstance *)inst);
#endif
#if WASM_ENABLE_AOT != 0
if (inst->module_type == Wasm_Module_AoT)
return aot_summarize_wasm_execute_time((AOTModuleInstance *)inst);
#endif
return 0.0;
}
double
wasm_runtime_get_wasm_func_exec_time(WASMModuleInstanceCommon *inst,
const char *func_name)
{
#if WASM_ENABLE_INTERP != 0
if (inst->module_type == Wasm_Module_Bytecode)
return wasm_get_wasm_func_exec_time((WASMModuleInstance *)inst,
func_name);
#endif
#if WASM_ENABLE_AOT != 0
if (inst->module_type == Wasm_Module_AoT)
return aot_get_wasm_func_exec_time((AOTModuleInstance *)inst,
func_name);
#endif
return 0.0;
}
#endif /* WASM_ENABLE_PERF_PROFILING != 0 */
WASMModuleInstanceCommon *
wasm_runtime_get_module_inst(WASMExecEnv *exec_env)
{

View File

@ -396,9 +396,8 @@ handle_func_return(JitCompContext *cc, JitBlock *block)
#endif
#if WASM_ENABLE_PERF_PROFILING != 0
/* time_end = os_time_get_boot_microsecond() */
if (!jit_emit_callnative(cc, os_time_get_boot_microsecond, time_end, NULL,
0)) {
/* time_end = os_time_get_boot_us() */
if (!jit_emit_callnative(cc, os_time_get_boot_us, time_end, NULL, 0)) {
return false;
}
/* time_start = cur_frame->time_started */

View File

@ -1157,11 +1157,10 @@ init_func_translation(JitCompContext *cc)
func_inst = jit_cc_new_reg_ptr(cc);
#if WASM_ENABLE_PERF_PROFILING != 0
time_started = jit_cc_new_reg_I64(cc);
/* Call os_time_get_boot_microsecond() to get time_started firstly
/* Call os_time_get_boot_us() to get time_started firstly
as there is stack frame switching below, calling native in them
may cause register spilling work inproperly */
if (!jit_emit_callnative(cc, os_time_get_boot_microsecond, time_started,
NULL, 0)) {
if (!jit_emit_callnative(cc, os_time_get_boot_us, time_started, NULL, 0)) {
return NULL;
}
#endif

View File

@ -1274,6 +1274,26 @@ wasm_runtime_dump_mem_consumption(wasm_exec_env_t exec_env);
WASM_RUNTIME_API_EXTERN void
wasm_runtime_dump_perf_profiling(wasm_module_inst_t module_inst);
/**
* Return total wasm functions' execution time in ms
*
* @param module_inst the WASM module instance to profile
*/
WASM_RUNTIME_API_EXTERN double
wasm_runtime_sum_wasm_exec_time(wasm_module_inst_t module_inst);
/**
* Return execution time in ms of a given wasm funciton with
* func_name. If the function is not found, return 0.
*
* @param module_inst the WASM module instance to profile
* @param func_name could be an export name or a name in the
* name section
*/
WASM_RUNTIME_API_EXTERN double
wasm_runtime_get_wasm_func_exec_time(wasm_module_inst_t inst,
const char *func_name);
/* wasm thread callback function type */
typedef void *(*wasm_thread_callback_t)(wasm_exec_env_t, void *);
/* wasm thread type */

View File

@ -850,7 +850,7 @@ ALLOC_FRAME(WASMExecEnv *exec_env, uint32 size, WASMInterpFrame *prev_frame)
if (frame) {
frame->prev_frame = prev_frame;
#if WASM_ENABLE_PERF_PROFILING != 0
frame->time_started = os_time_get_boot_microsecond();
frame->time_started = os_time_thread_cputime_us();
#endif
}
else {
@ -866,9 +866,14 @@ FREE_FRAME(WASMExecEnv *exec_env, WASMInterpFrame *frame)
{
#if WASM_ENABLE_PERF_PROFILING != 0
if (frame->function) {
WASMInterpFrame *prev_frame = frame->prev_frame;
frame->function->total_exec_time +=
os_time_get_boot_microsecond() - frame->time_started;
os_time_thread_cputime_us() - frame->time_started;
frame->function->total_exec_cnt++;
if (prev_frame && prev_frame->function)
prev_frame->function->children_exec_time +=
frame->function->total_exec_time;
}
#endif
wasm_exec_env_free_wasm_frame(exec_env, frame);

View File

@ -884,7 +884,7 @@ ALLOC_FRAME(WASMExecEnv *exec_env, uint32 size, WASMInterpFrame *prev_frame)
if (frame) {
frame->prev_frame = prev_frame;
#if WASM_ENABLE_PERF_PROFILING != 0
frame->time_started = os_time_get_boot_microsecond();
frame->time_started = os_time_thread_cputime_us();
#endif
}
else {
@ -900,9 +900,15 @@ FREE_FRAME(WASMExecEnv *exec_env, WASMInterpFrame *frame)
{
#if WASM_ENABLE_PERF_PROFILING != 0
if (frame->function) {
WASMInterpFrame *prev_frame = frame->prev_frame;
frame->function->total_exec_time +=
os_time_get_boot_microsecond() - frame->time_started;
os_time_thread_cputime_us() - frame->time_started;
frame->function->total_exec_cnt++;
if (prev_frame && prev_frame->function)
prev_frame->function->children_exec_time +=
frame->function->total_exec_time;
}
#endif
wasm_exec_env_free_wasm_frame(exec_env, frame);

View File

@ -2392,53 +2392,103 @@ wasm_call_function(WASMExecEnv *exec_env, WASMFunctionInstance *function,
return !wasm_copy_exception(module_inst, NULL);
}
#if WASM_ENABLE_PERF_PROFILING != 0
void
wasm_dump_perf_profiling(const WASMModuleInstance *module_inst)
#if WASM_ENABLE_PERF_PROFILING != 0 || WASM_ENABLE_DUMP_CALL_STACK != 0
/* look for the function name */
static char *
get_func_name_from_index(const WASMModuleInstance *inst, uint32 func_index)
{
WASMExportFuncInstance *export_func;
WASMFunctionInstance *func_inst;
char *func_name;
uint32 i, j;
char *func_name = NULL;
WASMFunctionInstance *func_inst = inst->e->functions + func_index;
os_printf("Performance profiler data:\n");
for (i = 0; i < module_inst->e->function_count; i++) {
func_inst = module_inst->e->functions + i;
if (func_inst->is_import_func) {
func_name = func_inst->u.func_import->field_name;
}
#if WASM_ENABLE_CUSTOM_NAME_SECTION != 0
else if (func_inst->u.func->field_name) {
func_name = func_inst->u.func->field_name;
}
#endif
else {
func_name = NULL;
for (j = 0; j < module_inst->export_func_count; j++) {
export_func = module_inst->export_functions + j;
#if WASM_ENABLE_CUSTOM_NAME_SECTION != 0
func_name = func_inst->u.func->field_name;
#endif
/* if custom name section is not generated,
search symbols from export table */
if (!func_name) {
unsigned j;
for (j = 0; j < inst->export_func_count; j++) {
WASMExportFuncInstance *export_func =
inst->export_functions + j;
if (export_func->function == func_inst) {
func_name = export_func->name;
break;
}
}
}
}
return func_name;
}
#endif /*WASM_ENABLE_PERF_PROFILING != 0 || WASM_ENABLE_DUMP_CALL_STACK != 0*/
#if WASM_ENABLE_PERF_PROFILING != 0
void
wasm_dump_perf_profiling(const WASMModuleInstance *module_inst)
{
WASMFunctionInstance *func_inst;
char *func_name;
uint32 i;
os_printf("Performance profiler data:\n");
for (i = 0; i < module_inst->e->function_count; i++) {
func_inst = module_inst->e->functions + i;
if (func_inst->total_exec_cnt == 0)
continue;
func_name = get_func_name_from_index(module_inst, i);
if (func_name)
os_printf(
" func %s, execution time: %.3f ms, execution count: %" PRIu32
" times\n",
func_name,
module_inst->e->functions[i].total_exec_time / 1000.0f,
module_inst->e->functions[i].total_exec_cnt);
" times, children execution time: %.3f ms\n",
func_name, func_inst->total_exec_time / 1000.0f,
func_inst->total_exec_cnt,
func_inst->children_exec_time / 1000.0f);
else
os_printf(" func %" PRIu32
", execution time: %.3f ms, execution count: %" PRIu32
" times\n",
i, module_inst->e->functions[i].total_exec_time / 1000.0f,
module_inst->e->functions[i].total_exec_cnt);
" times, children execution time: %.3f ms\n",
i, func_inst->total_exec_time / 1000.0f,
func_inst->total_exec_cnt,
func_inst->children_exec_time / 1000.0f);
}
}
#endif
double
wasm_summarize_wasm_execute_time(const WASMModuleInstance *inst)
{
double ret = 0;
unsigned i;
for (i = 0; i < inst->e->function_count; i++) {
WASMFunctionInstance *func = inst->e->functions + i;
ret += (func->total_exec_time - func->children_exec_time) / 1000.0f;
}
return ret;
}
double
wasm_get_wasm_func_exec_time(const WASMModuleInstance *inst,
const char *func_name)
{
unsigned i;
for (i = 0; i < inst->e->function_count; i++) {
char *name_in_wasm = get_func_name_from_index(inst, i);
if (name_in_wasm && strcmp(name_in_wasm, func_name) == 0) {
WASMFunctionInstance *func = inst->e->functions + i;
return (func->total_exec_time - func->children_exec_time) / 1000.0f;
}
}
return -1.0;
}
#endif /*WASM_ENABLE_PERF_PROFILING != 0*/
uint32
wasm_module_malloc_internal(WASMModuleInstance *module_inst,
@ -2933,29 +2983,7 @@ wasm_interp_create_call_stack(struct WASMExecEnv *exec_env)
frame.func_offset = (uint32)(cur_frame->ip - func_code_base);
}
/* look for the function name */
if (func_inst->is_import_func) {
func_name = func_inst->u.func_import->field_name;
}
else {
#if WASM_ENABLE_CUSTOM_NAME_SECTION != 0
func_name = func_inst->u.func->field_name;
#endif
/* if custom name section is not generated,
search symbols from export table */
if (!func_name) {
uint32 i;
for (i = 0; i < module_inst->export_func_count; i++) {
WASMExportFuncInstance *export_func =
module_inst->export_functions + i;
if (export_func->function == func_inst) {
func_name = export_func->name;
break;
}
}
}
}
func_name = get_func_name_from_index(module_inst, frame.func_index);
frame.func_name_wp = func_name;
if (!bh_vector_append(module_inst->frames, &frame)) {
@ -3402,7 +3430,7 @@ llvm_jit_alloc_frame(WASMExecEnv *exec_env, uint32 func_index)
frame->ip = NULL;
frame->sp = frame->lp;
#if WASM_ENABLE_PERF_PROFILING != 0
frame->time_started = os_time_get_boot_microsecond();
frame->time_started = os_time_thread_cputime_us();
#endif
frame->prev_frame = wasm_exec_env_get_cur_frame(exec_env);
wasm_exec_env_set_cur_frame(exec_env, frame);
@ -3424,8 +3452,13 @@ llvm_jit_free_frame(WASMExecEnv *exec_env)
#if WASM_ENABLE_PERF_PROFILING != 0
if (frame->function) {
frame->function->total_exec_time +=
os_time_get_boot_microsecond() - frame->time_started;
os_time_thread_cputime_us() - frame->time_started;
frame->function->total_exec_cnt++;
/* parent function */
if (prev_frame)
prev_frame->function->children_exec_time =
frame->function->total_exec_time;
}
#endif
wasm_exec_env_free_wasm_frame(exec_env, frame);

View File

@ -186,6 +186,8 @@ struct WASMFunctionInstance {
uint64 total_exec_time;
/* total execution count */
uint32 total_exec_cnt;
/* children execution time */
uint64 children_exec_time;
#endif
};
@ -432,6 +434,13 @@ wasm_instantiate(WASMModule *module, WASMModuleInstance *parent,
void
wasm_dump_perf_profiling(const WASMModuleInstance *module_inst);
double
wasm_summarize_wasm_execute_time(const WASMModuleInstance *inst);
double
wasm_get_wasm_func_exec_time(const WASMModuleInstance *inst,
const char *func_name);
void
wasm_deinstantiate(WASMModuleInstance *module_inst, bool is_sub_inst);

View File

@ -927,7 +927,7 @@ clock_gettime_wrapper(wasm_exec_env_t exec_env, uint32 clk_id,
if (!validate_native_addr(ts_app, sizeof(struct timespec_app)))
return (uint32)-1;
time = os_time_get_boot_microsecond();
time = os_time_get_boot_us();
ts_app->tv_sec = time / 1000000;
ts_app->tv_nsec = (time % 1000000) * 1000;
@ -939,7 +939,7 @@ clock_wrapper(wasm_exec_env_t exec_env)
{
/* Convert to nano seconds as CLOCKS_PER_SEC in wasi-sdk */
return os_time_get_boot_microsecond() * 1000;
return os_time_get_boot_us() * 1000;
}
#if WASM_ENABLE_SPEC_TEST != 0

View File

@ -667,6 +667,12 @@ thread_manager_start_routine(void *arg)
since we will exit soon */
}
#if WASM_ENABLE_PERF_PROFILING != 0
os_printf("============= Spawned thread ===========\n");
wasm_runtime_dump_perf_profiling(module_inst);
os_printf("========================================\n");
#endif
/* Free aux stack space */
free_aux_stack(exec_env, exec_env->aux_stack_bottom.bottom);
/* Remove exec_env */

View File

@ -6,7 +6,14 @@
#include "platform_api_vmcore.h"
uint64
os_time_get_boot_microsecond()
os_time_get_boot_us()
{
return (uint64)aos_now_ms() * 1000;
}
uint64
os_time_thread_cputime_us(void)
{
/* FIXME if u know the right api */
return os_time_get_boot_us();
}

View File

@ -6,8 +6,15 @@
#include "platform_api_vmcore.h"
uint64
os_time_get_boot_microsecond()
os_time_get_boot_us()
{
TickType_t ticks = xTaskGetTickCount();
return (uint64)1000 * 1000 / configTICK_RATE_HZ * ticks;
}
uint64
os_time_thread_cputime_us(void)
{
/* FIXME if u know the right api */
return os_time_get_boot_us();
}

View File

@ -6,7 +6,7 @@
#include "platform_api_vmcore.h"
uint64
os_time_get_boot_microsecond()
os_time_get_boot_us()
{
struct timespec ts;
if (clock_gettime(CLOCK_MONOTONIC, &ts) != 0) {
@ -15,3 +15,14 @@ os_time_get_boot_microsecond()
return ((uint64)ts.tv_sec) * 1000 * 1000 + ((uint64)ts.tv_nsec) / 1000;
}
uint64
os_time_thread_cputime_us()
{
struct timespec ts;
if (clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts) != 0) {
return 0;
}
return ((uint64)ts.tv_sec) * 1000 * 1000 + ((uint64)ts.tv_nsec) / 1000;
}

View File

@ -36,11 +36,18 @@ os_vprintf(const char *format, va_list ap)
}
uint64
os_time_get_boot_microsecond(void)
os_time_get_boot_us(void)
{
return (uint64)esp_timer_get_time();
}
uint64
os_time_thread_cputime_us(void)
{
/* FIXME if u know the right api */
return os_time_get_boot_us();
}
uint8 *
os_thread_get_stack_boundary(void)
{

View File

@ -64,7 +64,13 @@ os_vprintf(const char *format, va_list ap);
* Get microseconds after boot.
*/
uint64
os_time_get_boot_microsecond(void);
os_time_get_boot_us(void);
/**
* Get thread-specific CPU-time clock in microseconds
*/
uint64
os_time_thread_cputime_us(void);
/**
* Get current thread id.

View File

@ -26,7 +26,7 @@ ocall_clock_nanosleep(int *p_ret, unsigned clock_id, int flags,
const void *rem_buf, unsigned int rem_buf_size);
uint64
os_time_get_boot_microsecond()
os_time_get_boot_us()
{
#ifndef SGX_DISABLE_WASI
struct timespec ts;
@ -40,6 +40,21 @@ os_time_get_boot_microsecond()
#endif
}
uint64
os_time_thread_cputime_us(void)
{
#ifndef SGX_DISABLE_WASI
struct timespec ts;
if (clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts) != 0) {
return 0;
}
return ((uint64)ts.tv_sec) * 1000 * 1000 + ((uint64)ts.tv_nsec) / 1000;
#else
return 0;
#endif
}
#ifndef SGX_DISABLE_WASI
int

View File

@ -10,25 +10,32 @@
#if IS_USED(MODULE_ZTIMER64_USEC)
uint64
os_time_get_boot_microsecond()
os_time_get_boot_us()
{
return ztimer64_now(ZTIMER64_USEC);
}
#elif IS_USED(MODULE_ZTIMER64_MSEC)
uint64
os_time_get_boot_microsecond()
os_time_get_boot_us()
{
return ztimer64_now(ZTIMER64_MSEC) * 1000;
}
#else
#ifdef __GNUC__
__attribute__((weak)) uint64
os_time_get_boot_microsecond();
os_time_get_boot_us();
#endif
uint64
os_time_get_boot_microsecond()
os_time_get_boot_us()
{
static uint64_t times;
return ++times;
}
#endif
uint64
os_time_thread_cputime_us(void)
{
/* FIXME if u know the right api */
return os_time_get_boot_us();
}

View File

@ -120,13 +120,20 @@ os_vprintf(const char *format, va_list ap)
}
uint64
os_time_get_boot_microsecond(void)
os_time_get_boot_us(void)
{
uint64 ret = rt_tick_get() * 1000;
ret /= RT_TICK_PER_SECOND;
return ret;
}
uint64
os_time_thread_cputime_us(void)
{
/* FIXME if u know the right api */
return os_time_get_boot_us();
}
korp_tid
os_self_thread(void)
{

View File

@ -6,7 +6,7 @@
#include "platform_api_vmcore.h"
uint64
os_time_get_boot_microsecond()
os_time_get_boot_us()
{
struct timespec ts;
#if defined(__MINGW32__)
@ -18,3 +18,10 @@ os_time_get_boot_microsecond()
return ((uint64)ts.tv_sec) * 1000 * 1000 + ((uint64)ts.tv_nsec) / 1000;
}
uint64
os_time_thread_cputime_us(void)
{
/* FIXME if u know the right api */
return os_time_get_boot_us();
}

View File

@ -6,7 +6,14 @@
#include "platform_api_vmcore.h"
uint64
os_time_get_boot_microsecond()
os_time_get_boot_us()
{
return k_uptime_get() * 1000;
}
uint64
os_time_thread_cputime_us(void)
{
/* FIXME if u know the right api */
return os_time_get_boot_us();
}

View File

@ -31,7 +31,7 @@ bh_log(LogLevel log_level, const char *file, int line, const char *fmt, ...)
self = os_self_thread();
usec = os_time_get_boot_microsecond();
usec = os_time_get_boot_us();
t = (uint32)(usec / 1000000) % (24 * 60 * 60);
h = t / (60 * 60);
t = t % (60 * 60);

View File

@ -38,7 +38,7 @@ struct _timer_ctx {
uint64
bh_get_tick_ms()
{
return os_time_get_boot_microsecond() / 1000;
return os_time_get_boot_us() / 1000;
}
uint32