Implement the segue optimization for LLVM AOT/JIT (#2230)

Segue is an optimization technology which uses x86 segment register to store
the WebAssembly linear memory base address, so as to remove most of the cost
of SFI (Software-based Fault Isolation) base addition and free up a general
purpose register, by this way it may:
- Improve the performance of JIT/AOT
- Reduce the footprint of JIT/AOT, the JIT/AOT code generated is smaller
- Reduce the compilation time of JIT/AOT

This PR uses the x86-64 GS segment register to apply the optimization, currently
it supports linux and linux-sgx platforms on x86-64 target. By default it is disabled,
developer can use the option below to enable it for wamrc and iwasm(with LLVM
JIT enabled):
```bash
wamrc --enable-segue=[<flags>] -o output_file wasm_file
iwasm --enable-segue=[<flags>] wasm_file [args...]
```
`flags` can be:
    i32.load, i64.load, f32.load, f64.load, v128.load,
    i32.store, i64.store, f32.store, f64.store, v128.store
Use comma to separate them, e.g. `--enable-segue=i32.load,i64.store`,
and `--enable-segue` means all flags are added.

Acknowledgement:
Many thanks to Intel Labs, UC San Diego and UT Austin teams for introducing this
technology and the great support and guidance!

Signed-off-by: Wenyong Huang <wenyong.huang@intel.com>
Co-authored-by: Vahldiek-oberwagner, Anjo Lucas <anjo.lucas.vahldiek-oberwagner@intel.com>
This commit is contained in:
Wenyong Huang 2023-05-26 10:13:33 +08:00 committed by GitHub
parent 27239723a9
commit 76be848ec3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
42 changed files with 1864 additions and 123 deletions

View File

@ -16,6 +16,7 @@ WAMR project reused some components from other open source project:
- **asmjit**: for the Fast JIT x86-64 codegen implementation
- **zydis**: for the Fast JIT x86-64 codegen implementation
- **NuttX ELF headers**: used in core/iwasm/aot/debug/elf_parser.c
- **Dhrystone**: for the test benchmakr dhrystone
The WAMR fast interpreter is a clean room development. We would acknowledge the inspirations by [WASM3](https://github.com/wasm3/wasm3) open source project for the approach of pre-calculated oprand stack location.
@ -35,6 +36,7 @@ The WAMR fast interpreter is a clean room development. We would acknowledge the
| asmjit | unspecified | unspecified | https://github.com/asmjit/asmjit | |
| zydis | unspecified | e14a07895136182a5b53e181eec3b1c6e0b434de | https://github.com/zyantific/zydis | |
| NuttX ELF headers | 72313301e23f9c2de969fb64b9a0f67bb4c284df | 10.3.0 | https://github.com/apache/incubator-nuttx | |
| Dhrystone | 2.1 | 2.1 | https://fossies.org/linux/privat/old/ | |
## Licenses
@ -81,15 +83,19 @@ The WAMR fast interpreter is a clean room development. We would acknowledge the
[LICENSE](./tests/wamr-test-suites/spec-test-script/LICENSE)
### libuv
[LICENSE](./core/iwasm/libraries/libc-uvwasi/LICENSE_LIBUV)
### uvwasi
[LICENSE](./core/iwasm/libraries/libc-uvwasi/LICENSE_UVWASI)
### asmjit
[LICENSE](./core/iwasm/fast-jit/cg/LICENSE_ASMJIT)
### zydis
[LICENSE](./core/iwasm/fast-jit/cg/LICENSE_ZYDIS)
### NuttX ELF headers
@ -97,3 +103,7 @@ The WAMR fast interpreter is a clean room development. We would acknowledge the
[LICENSE](./core/iwasm/aot/debug/LICENSE_NUTTX)
[NOTICE](./core/iwasm/aot/debug/NOTICE_NUTTX)
### Dhrystone
[LICENSE](./tests/benchmarks/dhrystone/LICENSE)

View File

@ -2889,6 +2889,16 @@ load(const uint8 *buf, uint32 size, AOTModule *module, char *error_buf,
module->code and will be destroyed in aot_unload() */
destroy_sections(section_list, false);
}
#if 0
{
uint32 i;
for (i = 0; i < module->func_count; i++) {
os_printf("AOT func %u, addr: %p\n", i, module->func_ptrs[i]);
}
}
#endif
return ret;
fail:
return false;

View File

@ -1015,6 +1015,15 @@ execute_post_instantiate_functions(AOTModuleInstance *module_inst,
}
}
#if defined(os_writegsbase)
{
AOTMemoryInstance *memory_inst = aot_get_default_memory(module_inst);
if (memory_inst)
/* write base addr of linear memory to GS segment register */
os_writegsbase(memory_inst->memory_data);
}
#endif
/* Execute start function for both main insance and sub instance */
if (module->start_function) {
AOTFunctionInstance start_func = { 0 };
@ -1453,6 +1462,15 @@ aot_call_function(WASMExecEnv *exec_env, AOTFunctionInstance *function,
}
argc = func_type->param_cell_num;
#if defined(os_writegsbase)
{
AOTMemoryInstance *memory_inst = aot_get_default_memory(module_inst);
if (memory_inst)
/* write base addr of linear memory to GS segment register */
os_writegsbase(memory_inst->memory_data);
}
#endif
/* func pointer was looked up previously */
bh_assert(function->u.func.func_ptr != NULL);

View File

@ -624,6 +624,11 @@ wasm_enlarge_memory_internal(WASMModuleInstance *module, uint32 inc_page_count)
#endif
#endif
#if defined(os_writegsbase)
/* write base addr of linear memory to GS segment register */
os_writegsbase(memory_data_new);
#endif
return ret;
}
#else
@ -756,4 +761,4 @@ wasm_get_linear_memory_size(WASMMemoryInstance *memory, void *node)
#endif
return linear_mem_size;
}
#endif
#endif

View File

@ -130,7 +130,7 @@ static JitCompOptions jit_options = { 0 };
#endif
#if WASM_ENABLE_JIT != 0
static LLVMJITOptions llvm_jit_options = { 3, 3 };
static LLVMJITOptions llvm_jit_options = { 3, 3, 0 };
#endif
static RunningMode runtime_running_mode = Mode_Default;
@ -554,6 +554,7 @@ wasm_runtime_full_init(RuntimeInitArgs *init_args)
#if WASM_ENABLE_JIT != 0
llvm_jit_options.size_level = init_args->llvm_jit_size_level;
llvm_jit_options.opt_level = init_args->llvm_jit_opt_level;
llvm_jit_options.segue_flags = init_args->segue_flags;
#endif
if (!wasm_runtime_env_init()) {

View File

@ -420,6 +420,7 @@ typedef struct wasm_frame_t {
typedef struct LLVMJITOptions {
uint32 opt_level;
uint32 size_level;
uint32 segue_flags;
} LLVMJITOptions;
#endif

View File

@ -239,6 +239,13 @@ check_type_compatible(uint8 src_type, uint8 dst_type)
#define FUNC_REF_TYPE comp_ctx->basic_types.funcref_type
#define EXTERN_REF_TYPE comp_ctx->basic_types.externref_type
#define INT8_PTR_TYPE_GS comp_ctx->basic_types.int8_ptr_type_gs
#define INT16_PTR_TYPE_GS comp_ctx->basic_types.int16_ptr_type_gs
#define INT32_PTR_TYPE_GS comp_ctx->basic_types.int32_ptr_type_gs
#define INT64_PTR_TYPE_GS comp_ctx->basic_types.int64_ptr_type_gs
#define F32_PTR_TYPE_GS comp_ctx->basic_types.float32_ptr_type_gs
#define F64_PTR_TYPE_GS comp_ctx->basic_types.float64_ptr_type_gs
#define I32_CONST(v) LLVMConstInt(I32_TYPE, v, true)
#define I64_CONST(v) LLVMConstInt(I64_TYPE, v, true)
#define F32_CONST(v) LLVMConstReal(F32_TYPE, v)
@ -272,6 +279,7 @@ check_type_compatible(uint8 src_type, uint8 dst_type)
#define V128_TYPE comp_ctx->basic_types.v128_type
#define V128_PTR_TYPE comp_ctx->basic_types.v128_ptr_type
#define V128_PTR_TYPE_GS comp_ctx->basic_types.v128_ptr_type_gs
#define V128_i8x16_TYPE comp_ctx->basic_types.i8x16_vec_type
#define V128_i16x8_TYPE comp_ctx->basic_types.i16x8_vec_type
#define V128_i32x4_TYPE comp_ctx->basic_types.i32x4_vec_type

View File

@ -81,7 +81,7 @@ get_memory_curr_page_count(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx);
LLVMValueRef
aot_check_memory_overflow(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
uint32 offset, uint32 bytes)
uint32 offset, uint32 bytes, bool enable_segue)
{
LLVMValueRef offset_const = I32_CONST(offset);
LLVMValueRef addr, maddr, offset1, cmp1, cmp2, cmp;
@ -162,11 +162,20 @@ aot_check_memory_overflow(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
/* inside memory space */
offset1 = I32_CONST((uint32)mem_offset);
CHECK_LLVM_CONST(offset1);
if (!(maddr = LLVMBuildInBoundsGEP2(comp_ctx->builder, INT8_TYPE,
mem_base_addr, &offset1, 1,
"maddr"))) {
aot_set_last_error("llvm build add failed.");
goto fail;
if (!enable_segue) {
if (!(maddr = LLVMBuildInBoundsGEP2(comp_ctx->builder,
INT8_TYPE, mem_base_addr,
&offset1, 1, "maddr"))) {
aot_set_last_error("llvm build add failed.");
goto fail;
}
}
else {
if (!(maddr = LLVMBuildIntToPtr(comp_ctx->builder, offset1,
INT8_PTR_TYPE_GS, "maddr"))) {
aot_set_last_error("llvm build IntToPtr failed.");
goto fail;
}
}
return maddr;
}
@ -244,11 +253,29 @@ aot_check_memory_overflow(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
}
}
/* maddr = mem_base_addr + offset1 */
if (!(maddr = LLVMBuildInBoundsGEP2(comp_ctx->builder, INT8_TYPE,
if (!enable_segue) {
/* maddr = mem_base_addr + offset1 */
if (!(maddr =
LLVMBuildInBoundsGEP2(comp_ctx->builder, INT8_TYPE,
mem_base_addr, &offset1, 1, "maddr"))) {
aot_set_last_error("llvm build add failed.");
goto fail;
aot_set_last_error("llvm build add failed.");
goto fail;
}
}
else {
LLVMValueRef maddr_base;
if (!(maddr_base = LLVMBuildIntToPtr(comp_ctx->builder, addr,
INT8_PTR_TYPE_GS, "maddr_base"))) {
aot_set_last_error("llvm build int to ptr failed.");
goto fail;
}
if (!(maddr = LLVMBuildInBoundsGEP2(comp_ctx->builder, INT8_TYPE,
maddr_base, &offset_const, 1,
"maddr"))) {
aot_set_last_error("llvm build inboundgep failed.");
goto fail;
}
}
return maddr;
fail:
@ -388,13 +415,18 @@ aot_compile_op_i32_load(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
{
LLVMValueRef maddr, value = NULL;
LLVMTypeRef data_type;
bool enable_segue = comp_ctx->enable_segue_i32_load;
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes)))
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes,
enable_segue)))
return false;
switch (bytes) {
case 4:
BUILD_PTR_CAST(INT32_PTR_TYPE);
if (!enable_segue)
BUILD_PTR_CAST(INT32_PTR_TYPE);
else
BUILD_PTR_CAST(INT32_PTR_TYPE_GS);
#if WASM_ENABLE_SHARED_MEMORY != 0
if (atomic)
BUILD_ATOMIC_LOAD(align, I32_TYPE);
@ -405,11 +437,17 @@ aot_compile_op_i32_load(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
case 2:
case 1:
if (bytes == 2) {
BUILD_PTR_CAST(INT16_PTR_TYPE);
if (!enable_segue)
BUILD_PTR_CAST(INT16_PTR_TYPE);
else
BUILD_PTR_CAST(INT16_PTR_TYPE_GS);
data_type = INT16_TYPE;
}
else {
BUILD_PTR_CAST(INT8_PTR_TYPE);
if (!enable_segue)
BUILD_PTR_CAST(INT8_PTR_TYPE);
else
BUILD_PTR_CAST(INT8_PTR_TYPE_GS);
data_type = INT8_TYPE;
}
@ -447,13 +485,18 @@ aot_compile_op_i64_load(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
{
LLVMValueRef maddr, value = NULL;
LLVMTypeRef data_type;
bool enable_segue = comp_ctx->enable_segue_i64_load;
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes)))
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes,
enable_segue)))
return false;
switch (bytes) {
case 8:
BUILD_PTR_CAST(INT64_PTR_TYPE);
if (!enable_segue)
BUILD_PTR_CAST(INT64_PTR_TYPE);
else
BUILD_PTR_CAST(INT64_PTR_TYPE_GS);
#if WASM_ENABLE_SHARED_MEMORY != 0
if (atomic)
BUILD_ATOMIC_LOAD(align, I64_TYPE);
@ -465,15 +508,24 @@ aot_compile_op_i64_load(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
case 2:
case 1:
if (bytes == 4) {
BUILD_PTR_CAST(INT32_PTR_TYPE);
if (!enable_segue)
BUILD_PTR_CAST(INT32_PTR_TYPE);
else
BUILD_PTR_CAST(INT32_PTR_TYPE_GS);
data_type = I32_TYPE;
}
else if (bytes == 2) {
BUILD_PTR_CAST(INT16_PTR_TYPE);
if (!enable_segue)
BUILD_PTR_CAST(INT16_PTR_TYPE);
else
BUILD_PTR_CAST(INT16_PTR_TYPE_GS);
data_type = INT16_TYPE;
}
else {
BUILD_PTR_CAST(INT8_PTR_TYPE);
if (!enable_segue)
BUILD_PTR_CAST(INT8_PTR_TYPE);
else
BUILD_PTR_CAST(INT8_PTR_TYPE_GS);
data_type = INT8_TYPE;
}
@ -509,12 +561,18 @@ aot_compile_op_f32_load(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
uint32 align, uint32 offset)
{
LLVMValueRef maddr, value;
bool enable_segue = comp_ctx->enable_segue_f32_load;
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, 4)))
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, 4,
enable_segue)))
return false;
BUILD_PTR_CAST(F32_PTR_TYPE);
if (!enable_segue)
BUILD_PTR_CAST(F32_PTR_TYPE);
else
BUILD_PTR_CAST(F32_PTR_TYPE_GS);
BUILD_LOAD(F32_TYPE);
PUSH_F32(value);
return true;
fail:
@ -526,12 +584,18 @@ aot_compile_op_f64_load(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
uint32 align, uint32 offset)
{
LLVMValueRef maddr, value;
bool enable_segue = comp_ctx->enable_segue_f64_load;
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, 8)))
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, 8,
enable_segue)))
return false;
BUILD_PTR_CAST(F64_PTR_TYPE);
if (!enable_segue)
BUILD_PTR_CAST(F64_PTR_TYPE);
else
BUILD_PTR_CAST(F64_PTR_TYPE_GS);
BUILD_LOAD(F64_TYPE);
PUSH_F64(value);
return true;
fail:
@ -543,22 +607,33 @@ aot_compile_op_i32_store(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
uint32 align, uint32 offset, uint32 bytes, bool atomic)
{
LLVMValueRef maddr, value;
bool enable_segue = comp_ctx->enable_segue_i32_store;
POP_I32(value);
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes)))
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes,
enable_segue)))
return false;
switch (bytes) {
case 4:
BUILD_PTR_CAST(INT32_PTR_TYPE);
if (!enable_segue)
BUILD_PTR_CAST(INT32_PTR_TYPE);
else
BUILD_PTR_CAST(INT32_PTR_TYPE_GS);
break;
case 2:
BUILD_PTR_CAST(INT16_PTR_TYPE);
if (!enable_segue)
BUILD_PTR_CAST(INT16_PTR_TYPE);
else
BUILD_PTR_CAST(INT16_PTR_TYPE_GS);
BUILD_TRUNC(value, INT16_TYPE);
break;
case 1:
BUILD_PTR_CAST(INT8_PTR_TYPE);
if (!enable_segue)
BUILD_PTR_CAST(INT8_PTR_TYPE);
else
BUILD_PTR_CAST(INT8_PTR_TYPE_GS);
BUILD_TRUNC(value, INT8_TYPE);
break;
default:
@ -582,26 +657,40 @@ aot_compile_op_i64_store(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
uint32 align, uint32 offset, uint32 bytes, bool atomic)
{
LLVMValueRef maddr, value;
bool enable_segue = comp_ctx->enable_segue_i64_store;
POP_I64(value);
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes)))
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes,
enable_segue)))
return false;
switch (bytes) {
case 8:
BUILD_PTR_CAST(INT64_PTR_TYPE);
if (!enable_segue)
BUILD_PTR_CAST(INT64_PTR_TYPE);
else
BUILD_PTR_CAST(INT64_PTR_TYPE_GS);
break;
case 4:
BUILD_PTR_CAST(INT32_PTR_TYPE);
if (!enable_segue)
BUILD_PTR_CAST(INT32_PTR_TYPE);
else
BUILD_PTR_CAST(INT32_PTR_TYPE_GS);
BUILD_TRUNC(value, I32_TYPE);
break;
case 2:
BUILD_PTR_CAST(INT16_PTR_TYPE);
if (!enable_segue)
BUILD_PTR_CAST(INT16_PTR_TYPE);
else
BUILD_PTR_CAST(INT16_PTR_TYPE_GS);
BUILD_TRUNC(value, INT16_TYPE);
break;
case 1:
BUILD_PTR_CAST(INT8_PTR_TYPE);
if (!enable_segue)
BUILD_PTR_CAST(INT8_PTR_TYPE);
else
BUILD_PTR_CAST(INT8_PTR_TYPE_GS);
BUILD_TRUNC(value, INT8_TYPE);
break;
default:
@ -625,13 +714,18 @@ aot_compile_op_f32_store(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
uint32 align, uint32 offset)
{
LLVMValueRef maddr, value;
bool enable_segue = comp_ctx->enable_segue_f32_store;
POP_F32(value);
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, 4)))
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, 4,
enable_segue)))
return false;
BUILD_PTR_CAST(F32_PTR_TYPE);
if (!enable_segue)
BUILD_PTR_CAST(F32_PTR_TYPE);
else
BUILD_PTR_CAST(F32_PTR_TYPE_GS);
BUILD_STORE();
return true;
fail:
@ -643,13 +737,18 @@ aot_compile_op_f64_store(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
uint32 align, uint32 offset)
{
LLVMValueRef maddr, value;
bool enable_segue = comp_ctx->enable_segue_f64_store;
POP_F64(value);
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, 8)))
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, 8,
enable_segue)))
return false;
BUILD_PTR_CAST(F64_PTR_TYPE);
if (!enable_segue)
BUILD_PTR_CAST(F64_PTR_TYPE);
else
BUILD_PTR_CAST(F64_PTR_TYPE_GS);
BUILD_STORE();
return true;
fail:
@ -1140,13 +1239,19 @@ aot_compile_op_atomic_rmw(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
uint32 offset, uint32 bytes)
{
LLVMValueRef maddr, value, result;
bool enable_segue = (op_type == VALUE_TYPE_I32)
? comp_ctx->enable_segue_i32_load
&& comp_ctx->enable_segue_i32_store
: comp_ctx->enable_segue_i64_load
&& comp_ctx->enable_segue_i64_store;
if (op_type == VALUE_TYPE_I32)
POP_I32(value);
else
POP_I64(value);
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes)))
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes,
enable_segue)))
return false;
if (!check_memory_alignment(comp_ctx, func_ctx, maddr, align))
@ -1154,19 +1259,31 @@ aot_compile_op_atomic_rmw(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
switch (bytes) {
case 8:
BUILD_PTR_CAST(INT64_PTR_TYPE);
if (!enable_segue)
BUILD_PTR_CAST(INT64_PTR_TYPE);
else
BUILD_PTR_CAST(INT64_PTR_TYPE_GS);
break;
case 4:
BUILD_PTR_CAST(INT32_PTR_TYPE);
if (!enable_segue)
BUILD_PTR_CAST(INT32_PTR_TYPE);
else
BUILD_PTR_CAST(INT32_PTR_TYPE_GS);
if (op_type == VALUE_TYPE_I64)
BUILD_TRUNC(value, I32_TYPE);
break;
case 2:
BUILD_PTR_CAST(INT16_PTR_TYPE);
if (!enable_segue)
BUILD_PTR_CAST(INT16_PTR_TYPE);
else
BUILD_PTR_CAST(INT16_PTR_TYPE_GS);
BUILD_TRUNC(value, INT16_TYPE);
break;
case 1:
BUILD_PTR_CAST(INT8_PTR_TYPE);
if (!enable_segue)
BUILD_PTR_CAST(INT8_PTR_TYPE);
else
BUILD_PTR_CAST(INT8_PTR_TYPE_GS);
BUILD_TRUNC(value, INT8_TYPE);
break;
default:
@ -1208,6 +1325,11 @@ aot_compile_op_atomic_cmpxchg(AOTCompContext *comp_ctx,
uint32 align, uint32 offset, uint32 bytes)
{
LLVMValueRef maddr, value, expect, result;
bool enable_segue = (op_type == VALUE_TYPE_I32)
? comp_ctx->enable_segue_i32_load
&& comp_ctx->enable_segue_i32_store
: comp_ctx->enable_segue_i64_load
&& comp_ctx->enable_segue_i64_store;
if (op_type == VALUE_TYPE_I32) {
POP_I32(value);
@ -1218,7 +1340,8 @@ aot_compile_op_atomic_cmpxchg(AOTCompContext *comp_ctx,
POP_I64(expect);
}
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes)))
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes,
enable_segue)))
return false;
if (!check_memory_alignment(comp_ctx, func_ctx, maddr, align))
@ -1226,22 +1349,34 @@ aot_compile_op_atomic_cmpxchg(AOTCompContext *comp_ctx,
switch (bytes) {
case 8:
BUILD_PTR_CAST(INT64_PTR_TYPE);
if (!enable_segue)
BUILD_PTR_CAST(INT64_PTR_TYPE);
else
BUILD_PTR_CAST(INT64_PTR_TYPE_GS);
break;
case 4:
BUILD_PTR_CAST(INT32_PTR_TYPE);
if (!enable_segue)
BUILD_PTR_CAST(INT32_PTR_TYPE);
else
BUILD_PTR_CAST(INT32_PTR_TYPE_GS);
if (op_type == VALUE_TYPE_I64) {
BUILD_TRUNC(value, I32_TYPE);
BUILD_TRUNC(expect, I32_TYPE);
}
break;
case 2:
BUILD_PTR_CAST(INT16_PTR_TYPE);
if (!enable_segue)
BUILD_PTR_CAST(INT16_PTR_TYPE);
else
BUILD_PTR_CAST(INT16_PTR_TYPE_GS);
BUILD_TRUNC(value, INT16_TYPE);
BUILD_TRUNC(expect, INT16_TYPE);
break;
case 1:
BUILD_PTR_CAST(INT8_PTR_TYPE);
if (!enable_segue)
BUILD_PTR_CAST(INT8_PTR_TYPE);
else
BUILD_PTR_CAST(INT8_PTR_TYPE_GS);
BUILD_TRUNC(value, INT8_TYPE);
BUILD_TRUNC(expect, INT8_TYPE);
break;
@ -1318,7 +1453,8 @@ aot_compile_op_atomic_wait(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
CHECK_LLVM_CONST(is_wait64);
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes)))
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes,
false)))
return false;
if (!check_memory_alignment(comp_ctx, func_ctx, maddr, align))
@ -1393,7 +1529,8 @@ aot_compiler_op_atomic_notify(AOTCompContext *comp_ctx,
POP_I32(count);
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes)))
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes,
false)))
return false;
if (!check_memory_alignment(comp_ctx, func_ctx, maddr, align))

View File

@ -53,7 +53,7 @@ aot_compile_op_f64_store(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
LLVMValueRef
aot_check_memory_overflow(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
uint32 offset, uint32 bytes);
uint32 offset, uint32 bytes, bool enable_segue);
bool
aot_compile_op_memory_size(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx);

View File

@ -1132,6 +1132,28 @@ aot_set_llvm_basic_types(AOTLLVMTypes *basic_types, LLVMContextRef context)
basic_types->v128_type = basic_types->i64x2_vec_type;
basic_types->v128_ptr_type = LLVMPointerType(basic_types->v128_type, 0);
basic_types->int8_ptr_type_gs =
LLVMPointerType(basic_types->int8_type, 256);
basic_types->int16_ptr_type_gs =
LLVMPointerType(basic_types->int16_type, 256);
basic_types->int32_ptr_type_gs =
LLVMPointerType(basic_types->int32_type, 256);
basic_types->int64_ptr_type_gs =
LLVMPointerType(basic_types->int64_type, 256);
basic_types->float32_ptr_type_gs =
LLVMPointerType(basic_types->float32_type, 256);
basic_types->float64_ptr_type_gs =
LLVMPointerType(basic_types->float64_type, 256);
basic_types->v128_ptr_type_gs =
LLVMPointerType(basic_types->v128_type, 256);
if (!basic_types->int8_ptr_type_gs || !basic_types->int16_ptr_type_gs
|| !basic_types->int32_ptr_type_gs || !basic_types->int64_ptr_type_gs
|| !basic_types->float32_ptr_type_gs
|| !basic_types->float64_ptr_type_gs
|| !basic_types->v128_ptr_type_gs) {
return false;
}
basic_types->i1x2_vec_type = LLVMVectorType(basic_types->int1_type, 2);
basic_types->funcref_type = LLVMInt32TypeInContext(context);
@ -2073,6 +2095,37 @@ aot_create_comp_context(const AOTCompData *comp_data, aot_comp_option_t option)
}
}
triple = LLVMGetTargetMachineTriple(comp_ctx->target_machine);
if (!triple) {
aot_set_last_error("get target machine triple failed.");
goto fail;
}
if (strstr(triple, "linux") && !strcmp(comp_ctx->target_arch, "x86_64")) {
if (option->segue_flags) {
if (option->segue_flags & (1 << 0))
comp_ctx->enable_segue_i32_load = true;
if (option->segue_flags & (1 << 1))
comp_ctx->enable_segue_i64_load = true;
if (option->segue_flags & (1 << 2))
comp_ctx->enable_segue_f32_load = true;
if (option->segue_flags & (1 << 3))
comp_ctx->enable_segue_f64_load = true;
if (option->segue_flags & (1 << 4))
comp_ctx->enable_segue_v128_load = true;
if (option->segue_flags & (1 << 8))
comp_ctx->enable_segue_i32_store = true;
if (option->segue_flags & (1 << 9))
comp_ctx->enable_segue_i64_store = true;
if (option->segue_flags & (1 << 10))
comp_ctx->enable_segue_f32_store = true;
if (option->segue_flags & (1 << 11))
comp_ctx->enable_segue_f64_store = true;
if (option->segue_flags & (1 << 12))
comp_ctx->enable_segue_v128_store = true;
}
}
LLVMDisposeMessage(triple);
if (option->enable_simd && strcmp(comp_ctx->target_arch, "x86_64") != 0
&& strncmp(comp_ctx->target_arch, "aarch64", 7) != 0) {
/* Disable simd if it isn't supported by target arch */

View File

@ -214,6 +214,14 @@ typedef struct AOTLLVMTypes {
LLVMTypeRef f32x4_vec_type;
LLVMTypeRef f64x2_vec_type;
LLVMTypeRef int8_ptr_type_gs;
LLVMTypeRef int16_ptr_type_gs;
LLVMTypeRef int32_ptr_type_gs;
LLVMTypeRef int64_ptr_type_gs;
LLVMTypeRef float32_ptr_type_gs;
LLVMTypeRef float64_ptr_type_gs;
LLVMTypeRef v128_ptr_type_gs;
LLVMTypeRef i1x2_vec_type;
LLVMTypeRef meta_data_type;
@ -341,6 +349,19 @@ typedef struct AOTCompContext {
/* Disable LLVM link time optimization */
bool disable_llvm_lto;
/* Enable to use segument register as the base addr
of linear memory for load/store operations */
bool enable_segue_i32_load;
bool enable_segue_i64_load;
bool enable_segue_f32_load;
bool enable_segue_f64_load;
bool enable_segue_v128_load;
bool enable_segue_i32_store;
bool enable_segue_i64_store;
bool enable_segue_f32_store;
bool enable_segue_f64_store;
bool enable_segue_v128_store;
/* Whether optimize the JITed code */
bool optimize;
@ -413,6 +434,7 @@ typedef struct AOTCompOption {
uint32 output_format;
uint32 bounds_checks;
uint32 stack_bounds_checks;
uint32 segue_flags;
char **custom_sections;
uint32 custom_sections_count;
const char *stack_usage_file;

View File

@ -14,12 +14,12 @@
static LLVMValueRef
simd_load(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, uint32 align,
uint32 offset, uint32 data_length, LLVMTypeRef ptr_type,
LLVMTypeRef data_type)
LLVMTypeRef data_type, bool enable_segue)
{
LLVMValueRef maddr, data;
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset,
data_length))) {
data_length, enable_segue))) {
HANDLE_FAILURE("aot_check_memory_overflow");
return NULL;
}
@ -44,10 +44,12 @@ bool
aot_compile_simd_v128_load(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
uint32 align, uint32 offset)
{
bool enable_segue = comp_ctx->enable_segue_v128_load;
LLVMTypeRef v128_ptr_type = enable_segue ? V128_PTR_TYPE_GS : V128_PTR_TYPE;
LLVMValueRef result;
if (!(result = simd_load(comp_ctx, func_ctx, align, offset, 16,
V128_PTR_TYPE, V128_TYPE))) {
v128_ptr_type, V128_TYPE, enable_segue))) {
return false;
}
@ -75,6 +77,7 @@ aot_compile_simd_load_extend(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
LLVMVectorType(I32_TYPE, 2), LLVMVectorType(I32_TYPE, 2),
};
LLVMTypeRef sub_vector_type, sub_vector_ptr_type;
bool enable_segue = comp_ctx->enable_segue_v128_load;
bh_assert(opcode_index < 6);
@ -82,13 +85,15 @@ aot_compile_simd_load_extend(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
/* to vector ptr type */
if (!sub_vector_type
|| !(sub_vector_ptr_type = LLVMPointerType(sub_vector_type, 0))) {
|| !(sub_vector_ptr_type =
LLVMPointerType(sub_vector_type, enable_segue ? 256 : 0))) {
HANDLE_FAILURE("LLVMPointerType");
return false;
}
if (!(sub_vector = simd_load(comp_ctx, func_ctx, align, offset, 8,
sub_vector_ptr_type, sub_vector_type))) {
if (!(sub_vector =
simd_load(comp_ctx, func_ctx, align, offset, 8,
sub_vector_ptr_type, sub_vector_type, enable_segue))) {
return false;
}
@ -118,6 +123,9 @@ aot_compile_simd_load_splat(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
LLVMValueRef element, result;
LLVMTypeRef element_ptr_types[] = { INT8_PTR_TYPE, INT16_PTR_TYPE,
INT32_PTR_TYPE, INT64_PTR_TYPE };
LLVMTypeRef element_ptr_types_gs[] = { INT8_PTR_TYPE_GS, INT16_PTR_TYPE_GS,
INT32_PTR_TYPE_GS,
INT64_PTR_TYPE_GS };
LLVMTypeRef element_data_types[] = { INT8_TYPE, INT16_TYPE, I32_TYPE,
I64_TYPE };
uint32 data_lengths[] = { 1, 2, 4, 8 };
@ -133,13 +141,16 @@ aot_compile_simd_load_splat(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
LLVM_CONST(i32x4_zero),
LLVM_CONST(i32x2_zero),
};
bool enable_segue = comp_ctx->enable_segue_v128_load;
bh_assert(opcode_index < 4);
if (!(element = simd_load(comp_ctx, func_ctx, align, offset,
data_lengths[opcode_index],
element_ptr_types[opcode_index],
element_data_types[opcode_index]))) {
if (!(element = simd_load(
comp_ctx, func_ctx, align, offset, data_lengths[opcode_index],
comp_ctx->enable_segue_v128_load
? element_ptr_types_gs[opcode_index]
: element_ptr_types[opcode_index],
element_data_types[opcode_index], enable_segue))) {
return false;
}
@ -170,11 +181,15 @@ aot_compile_simd_load_lane(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
uint32 data_lengths[] = { 1, 2, 4, 8 };
LLVMTypeRef element_ptr_types[] = { INT8_PTR_TYPE, INT16_PTR_TYPE,
INT32_PTR_TYPE, INT64_PTR_TYPE };
LLVMTypeRef element_ptr_types_gs[] = { INT8_PTR_TYPE_GS, INT16_PTR_TYPE_GS,
INT32_PTR_TYPE_GS,
INT64_PTR_TYPE_GS };
LLVMTypeRef element_data_types[] = { INT8_TYPE, INT16_TYPE, I32_TYPE,
I64_TYPE };
LLVMTypeRef vector_types[] = { V128_i8x16_TYPE, V128_i16x8_TYPE,
V128_i32x4_TYPE, V128_i64x2_TYPE };
LLVMValueRef lane = simd_lane_id_to_llvm_value(comp_ctx, lane_id);
bool enable_segue = comp_ctx->enable_segue_v128_load;
bh_assert(opcode_index < 4);
@ -183,10 +198,12 @@ aot_compile_simd_load_lane(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
return false;
}
if (!(element = simd_load(comp_ctx, func_ctx, align, offset,
data_lengths[opcode_index],
element_ptr_types[opcode_index],
element_data_types[opcode_index]))) {
if (!(element = simd_load(
comp_ctx, func_ctx, align, offset, data_lengths[opcode_index],
comp_ctx->enable_segue_v128_load
? element_ptr_types_gs[opcode_index]
: element_ptr_types[opcode_index],
element_data_types[opcode_index], enable_segue))) {
return false;
}
@ -207,6 +224,8 @@ aot_compile_simd_load_zero(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
uint32 opcode_index = opcode - SIMD_v128_load32_zero;
uint32 data_lengths[] = { 4, 8 };
LLVMTypeRef element_ptr_types[] = { INT32_PTR_TYPE, INT64_PTR_TYPE };
LLVMTypeRef element_ptr_types_gs[] = { INT32_PTR_TYPE_GS,
INT64_PTR_TYPE_GS };
LLVMTypeRef element_data_types[] = { I32_TYPE, I64_TYPE };
LLVMValueRef zero[] = {
LLVM_CONST(i32x4_vec_zero),
@ -222,13 +241,16 @@ aot_compile_simd_load_zero(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
LLVM_CONST(i32_six) },
{ LLVM_CONST(i32_zero), LLVM_CONST(i32_two) },
};
bool enable_segue = comp_ctx->enable_segue_v128_load;
bh_assert(opcode_index < 2);
if (!(element = simd_load(comp_ctx, func_ctx, align, offset,
data_lengths[opcode_index],
element_ptr_types[opcode_index],
element_data_types[opcode_index]))) {
if (!(element = simd_load(
comp_ctx, func_ctx, align, offset, data_lengths[opcode_index],
comp_ctx->enable_segue_v128_load
? element_ptr_types_gs[opcode_index]
: element_ptr_types[opcode_index],
element_data_types[opcode_index], enable_segue))) {
return false;
}
@ -260,12 +282,12 @@ aot_compile_simd_load_zero(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
static bool
simd_store(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, uint32 align,
uint32 offset, uint32 data_length, LLVMValueRef value,
LLVMTypeRef value_ptr_type)
LLVMTypeRef value_ptr_type, bool enable_segue)
{
LLVMValueRef maddr, result;
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset,
data_length)))
data_length, enable_segue)))
return false;
if (!(maddr = LLVMBuildBitCast(comp_ctx->builder, maddr, value_ptr_type,
@ -288,12 +310,14 @@ bool
aot_compile_simd_v128_store(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
uint32 align, uint32 offset)
{
bool enable_segue = comp_ctx->enable_segue_v128_store;
LLVMTypeRef v128_ptr_type = enable_segue ? V128_PTR_TYPE_GS : V128_PTR_TYPE;
LLVMValueRef value;
POP_V128(value);
return simd_store(comp_ctx, func_ctx, align, offset, 16, value,
V128_PTR_TYPE);
v128_ptr_type, enable_segue);
fail:
return false;
}
@ -307,10 +331,14 @@ aot_compile_simd_store_lane(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
uint32 data_lengths[] = { 1, 2, 4, 8 };
LLVMTypeRef element_ptr_types[] = { INT8_PTR_TYPE, INT16_PTR_TYPE,
INT32_PTR_TYPE, INT64_PTR_TYPE };
LLVMTypeRef element_ptr_types_gs[] = { INT8_PTR_TYPE_GS, INT16_PTR_TYPE_GS,
INT32_PTR_TYPE_GS,
INT64_PTR_TYPE_GS };
uint32 opcode_index = opcode - SIMD_v128_store8_lane;
LLVMTypeRef vector_types[] = { V128_i8x16_TYPE, V128_i16x8_TYPE,
V128_i32x4_TYPE, V128_i64x2_TYPE };
LLVMValueRef lane = simd_lane_id_to_llvm_value(comp_ctx, lane_id);
bool enable_segue = comp_ctx->enable_segue_v128_store;
bh_assert(opcode_index < 4);
@ -327,5 +355,7 @@ aot_compile_simd_store_lane(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
return simd_store(comp_ctx, func_ctx, align, offset,
data_lengths[opcode_index], element,
element_ptr_types[opcode_index]);
enable_segue ? element_ptr_types_gs[opcode_index]
: element_ptr_types[opcode_index],
enable_segue);
}

View File

@ -61,6 +61,7 @@ typedef struct AOTCompOption {
uint32_t output_format;
uint32_t bounds_checks;
uint32_t stack_bounds_checks;
uint32_t segue_flags;
char **custom_sections;
uint32_t custom_sections_count;
const char *stack_usage_file;

View File

@ -167,6 +167,8 @@ typedef struct RuntimeInitArgs {
/* LLVM JIT opt and size level */
uint32_t llvm_jit_opt_level;
uint32_t llvm_jit_size_level;
/* Segue optimization flags for LLVM JIT */
uint32_t segue_flags;
} RuntimeInitArgs;
#ifndef WASM_VALKIND_T_DEFINED
@ -1351,20 +1353,21 @@ WASM_RUNTIME_API_EXTERN void
wasm_runtime_get_version(uint32_t *major, uint32_t *minor, uint32_t *patch);
/**
* Check whether an import func `(import <module_name> <func_name> (func ...))` is linked or not
* with runtime registered natvie functions
* Check whether an import func `(import <module_name> <func_name> (func ...))`
* is linked or not with runtime registered natvie functions
*/
WASM_RUNTIME_API_EXTERN bool
wasm_runtime_is_import_func_linked(const char *module_name,
const char *func_name);
/**
* Check whether an import global `(import <module_name> <global_name> (global ...))` is linked or not
* with runtime registered natvie globals
* Check whether an import global `(import <module_name> <global_name> (global ...))`
* is linked or not with runtime registered natvie globals
*/
WASM_RUNTIME_API_EXTERN bool
wasm_runtime_is_import_global_linked(const char *module_name,
const char *global_name);
/* clang-format on */
#ifdef __cplusplus

View File

@ -4231,6 +4231,15 @@ wasm_interp_call_wasm(WASMModuleInstance *module_inst, WASMExecEnv *exec_env,
wasm_exec_env_set_cur_frame(exec_env, frame);
#if defined(os_writegsbase)
{
WASMMemoryInstance *memory_inst = wasm_get_default_memory(module_inst);
if (memory_inst)
/* write base addr of linear memory to GS segment register */
os_writegsbase(memory_inst->memory_data);
}
#endif
if (function->is_import_func) {
#if WASM_ENABLE_MULTI_MODULE != 0
if (function->import_module_inst) {

View File

@ -3979,6 +3979,15 @@ wasm_interp_call_wasm(WASMModuleInstance *module_inst, WASMExecEnv *exec_env,
wasm_exec_env_set_cur_frame(exec_env, frame);
#if defined(os_writegsbase)
{
WASMMemoryInstance *memory_inst = wasm_get_default_memory(module_inst);
if (memory_inst)
/* write base addr of linear memory to GS segment register */
os_writegsbase(memory_inst->memory_data);
}
#endif
if (function->is_import_func) {
#if WASM_ENABLE_MULTI_MODULE != 0
if (function->import_module_inst) {

View File

@ -3000,7 +3000,7 @@ init_llvm_jit_functions_stage1(WASMModule *module, char *error_buf,
if (module->function_count == 0)
return true;
#if WASM_ENABLE_FAST_JIT != 0 && WASM_ENABLE_LLVM_JIT != 0
#if WASM_ENABLE_FAST_JIT != 0 && WASM_ENABLE_LAZY_JIT != 0
if (os_mutex_init(&module->tierup_wait_lock) != 0) {
set_error_buf(error_buf, error_buf_size, "init jit tierup lock failed");
return false;
@ -3035,6 +3035,7 @@ init_llvm_jit_functions_stage1(WASMModule *module, char *error_buf,
llvm_jit_options = wasm_runtime_get_llvm_jit_options();
option.opt_level = llvm_jit_options.opt_level;
option.size_level = llvm_jit_options.size_level;
option.segue_flags = llvm_jit_options.segue_flags;
#if WASM_ENABLE_BULK_MEMORY != 0
option.enable_bulk_memory = true;

View File

@ -1843,7 +1843,7 @@ init_llvm_jit_functions_stage1(WASMModule *module, char *error_buf,
if (module->function_count == 0)
return true;
#if WASM_ENABLE_FAST_JIT != 0 && WASM_ENABLE_LLVM_JIT != 0
#if WASM_ENABLE_FAST_JIT != 0 && WASM_ENABLE_LAZY_JIT != 0
if (os_mutex_init(&module->tierup_wait_lock) != 0) {
set_error_buf(error_buf, error_buf_size, "init jit tierup lock failed");
return false;
@ -1876,6 +1876,7 @@ init_llvm_jit_functions_stage1(WASMModule *module, char *error_buf,
option.is_jit_mode = true;
option.opt_level = llvm_jit_options.opt_level;
option.size_level = llvm_jit_options.size_level;
option.segue_flags = llvm_jit_options.segue_flags;
#if WASM_ENABLE_BULK_MEMORY != 0
option.enable_bulk_memory = true;

View File

@ -56,6 +56,20 @@ typedef unsigned int korp_sem;
#define OS_THREAD_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER
#endif
#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
#define os_writegsbase(base_addr) \
do { \
uint64 __gs_value = (uint64)(uintptr_t)base_addr; \
asm volatile("wrgsbase %0" ::"r"(__gs_value) : "memory"); \
} while (0)
#if 0
/* _writegsbase_u64 also works, but need to add -mfsgsbase flag for gcc */
#include <immintrin.h>
#define os_writegsbase(base_addr) \
_writegsbase_u64(((uint64)(uintptr_t)base_addr))
#endif
#endif
typedef int (*os_print_function_t)(const char *message);
void
os_set_print_function(os_print_function_t pf);

View File

@ -63,6 +63,20 @@ typedef sem_t korp_sem;
#define bh_socket_t int
#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
#define os_writegsbase(base_addr) \
do { \
uint64 __gs_value = (uint64)(uintptr_t)base_addr; \
asm volatile("wrgsbase %0" ::"r"(__gs_value) : "memory"); \
} while (0)
#if 0
/* _writegsbase_u64 also works, but need to add -mfsgsbase flag for gcc */
#include <immintrin.h>
#define os_writegsbase(base_addr) \
_writegsbase_u64(((uint64)(uintptr_t)base_addr))
#endif
#endif
#if WASM_DISABLE_HW_BOUND_CHECK == 0
#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64) \
|| defined(BUILD_TARGET_AARCH64) || defined(BUILD_TARGET_RISCV64_LP64D) \

View File

@ -54,6 +54,14 @@ print_help()
#if WASM_ENABLE_JIT != 0
printf(" --llvm-jit-size-level=n Set LLVM JIT size level, default is 3\n");
printf(" --llvm-jit-opt-level=n Set LLVM JIT optimization level, default is 3\n");
#if defined(os_writegsbase)
printf(" --enable-segue[=<flags>] Enable using segment register GS as the base address of\n");
printf(" linear memory, which may improve performance, flags can be:\n");
printf(" i32.load, i64.load, f32.load, f64.load, v128.load,\n");
printf(" i32.store, i64.store, f32.store, f64.store, v128.store\n");
printf(" Use comma to separate, e.g. --enable-segue=i32.load,i64.store\n");
printf(" and --enable-segue means all flags are added.\n");
#endif
#endif
printf(" --repl Start a very simple REPL (read-eval-print-loop) mode\n"
" that runs commands in the form of \"FUNC ARG...\"\n");
@ -117,13 +125,13 @@ app_instance_func(wasm_module_inst_t module_inst, const char *func_name)
}
/**
* Split a space separated strings into an array of strings
* Split a string into an array of strings
* Returns NULL on failure
* Memory must be freed by caller
* Based on: http://stackoverflow.com/a/11198630/471795
*/
static char **
split_string(char *str, int *count)
split_string(char *str, int *count, const char *delimer)
{
char **res = NULL, **res1;
char *p;
@ -131,7 +139,7 @@ split_string(char *str, int *count)
/* split string and append tokens to 'res' */
do {
p = strtok(str, " ");
p = strtok(str, delimer);
str = NULL;
res1 = res;
res = (char **)realloc(res1, sizeof(char *) * (uint32)(idx + 1));
@ -180,7 +188,7 @@ app_instance_repl(wasm_module_inst_t module_inst)
printf("exit repl mode\n");
break;
}
app_argv = split_string(cmd, &app_argc);
app_argv = split_string(cmd, &app_argc, " ");
if (app_argv == NULL) {
LOG_ERROR("Wasm prepare param failed: split string failed.\n");
break;
@ -195,6 +203,59 @@ app_instance_repl(wasm_module_inst_t module_inst)
return NULL;
}
#if WASM_ENABLE_JIT != 0
static uint32
resolve_segue_flags(char *str_flags)
{
uint32 segue_flags = 0;
int32 flag_count, i;
char **flag_list;
flag_list = split_string(str_flags, &flag_count, ",");
if (flag_list) {
for (i = 0; i < flag_count; i++) {
if (!strcmp(flag_list[i], "i32.load")) {
segue_flags |= 1 << 0;
}
else if (!strcmp(flag_list[i], "i64.load")) {
segue_flags |= 1 << 1;
}
else if (!strcmp(flag_list[i], "f32.load")) {
segue_flags |= 1 << 2;
}
else if (!strcmp(flag_list[i], "f64.load")) {
segue_flags |= 1 << 3;
}
else if (!strcmp(flag_list[i], "v128.load")) {
segue_flags |= 1 << 4;
}
else if (!strcmp(flag_list[i], "i32.store")) {
segue_flags |= 1 << 8;
}
else if (!strcmp(flag_list[i], "i64.store")) {
segue_flags |= 1 << 9;
}
else if (!strcmp(flag_list[i], "f32.store")) {
segue_flags |= 1 << 10;
}
else if (!strcmp(flag_list[i], "f64.store")) {
segue_flags |= 1 << 11;
}
else if (!strcmp(flag_list[i], "v128.store")) {
segue_flags |= 1 << 12;
}
else {
/* invalid flag */
segue_flags = (uint32)-1;
break;
}
}
free(flag_list);
}
return segue_flags;
}
#endif /* end of WASM_ENABLE_JIT != 0 */
#if WASM_ENABLE_LIBC_WASI != 0
static bool
validate_env_str(char *env)
@ -367,6 +428,7 @@ main(int argc, char *argv[])
#if WASM_ENABLE_JIT != 0
uint32 llvm_jit_size_level = 3;
uint32 llvm_jit_opt_level = 3;
uint32 segue_flags = 0;
#endif
wasm_module_t wasm_module = NULL;
wasm_module_inst_t wasm_module_inst = NULL;
@ -487,7 +549,16 @@ main(int argc, char *argv[])
llvm_jit_opt_level = 3;
}
}
#endif
else if (!strcmp(argv[0], "--enable-segue")) {
/* all flags are enabled */
segue_flags = 0x1F1F;
}
else if (!strncmp(argv[0], "--enable-segue=", 15)) {
segue_flags = resolve_segue_flags(argv[0] + 15);
if (segue_flags == (uint32)-1)
return print_help();
}
#endif /* end of WASM_ENABLE_JIT != 0 */
#if WASM_ENABLE_LIBC_WASI != 0
else if (!strncmp(argv[0], "--dir=", 6)) {
if (argv[0][6] == '\0')
@ -632,6 +703,7 @@ main(int argc, char *argv[])
#if WASM_ENABLE_JIT != 0
init_args.llvm_jit_size_level = llvm_jit_size_level;
init_args.llvm_jit_opt_level = llvm_jit_opt_level;
init_args.segue_flags = segue_flags;
#endif
#if WASM_ENABLE_DEBUG_INTERP != 0

View File

@ -3,6 +3,8 @@
# Copyright (C) 2019 Intel Corporation. All rights reserved.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
PLATFORM=$(uname -s | tr A-Z a-z)
WAMRC="../../../wamr-compiler/build/wamrc"
if [ ! -d coremark ]; then
@ -32,4 +34,9 @@ cd ..
echo "Compile coremark.wasm to coremark.aot .."
${WAMRC} -o coremark.aot coremark.wasm
if [[ ${PLATFORM} == "linux" ]]; then
echo "Compile coremark.wasm to coremark_segue.aot .."
${WAMRC} --enable-segue -o coremark_segue.aot coremark.wasm
fi
echo "Done"

View File

@ -3,14 +3,21 @@
# Copyright (C) 2019 Intel Corporation. All rights reserved.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
IWASM="../../../product-mini/platforms/linux/build/iwasm"
PLATFORM=$(uname -s | tr A-Z a-z)
IWASM="../../../product-mini/platforms/${PLATFORM}/build/iwasm"
WAMRC="../../../wamr-compiler/build/wamrc"
echo "Run coremark with native .."
./coremark.exe
echo "Run coremark with iwasm mode .."
echo "Run coremark with iwasm aot mode .."
${IWASM} coremark.aot
echo "Run coremakr with iwasm interpreter .."
if [[ ${PLATFORM} == "linux" ]]; then
echo "Run coremark with iwasm aot-segue mode .."
${IWASM} coremark_segue.aot
fi
echo "Run coremark with iwasm interpreter mode .."
${IWASM} coremark.wasm

View File

@ -0,0 +1,7 @@
Dhrystone
------------------------------------------------------------------------------
There is no explicit license defined. They were originally
written in ADA by Reinhold P. Weicker and translated to C by Rick Richardson .
The source obtained from the following site:
https://fossies.org/linux/privat/old/dhrystone-2.1.tar.gz

View File

@ -0,0 +1,24 @@
#!/bin/bash
# Copyright (C) 2019 Intel Corporation. All rights reserved.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
PLATFORM=$(uname -s | tr A-Z a-z)
WAMRC_CMD=$PWD/../../../wamr-compiler/build/wamrc
echo "===> compile dhrystone src to dhrystone_native"
gcc -O3 -o dhrystone_native src/dhry_1.c src/dhry_2.c -I include
echo "===> compile dhrystone src to dhrystone.wasm"
/opt/wasi-sdk/bin/clang -O3 \
-o dhrystone.wasm src/dhry_1.c src/dhry_2.c -I include \
-Wl,--export=__heap_base -Wl,--export=__data_end
echo "===> compile dhrystone.wasm to dhrystone.aot"
${WAMRC_CMD} -o dhrystone.aot dhrystone.wasm
if [[ ${PLATFORM} == "linux" ]]; then
echo "===> compile dhrystone.wasm to dhrystone_segue.aot"
${WAMRC_CMD} --enable-segue -o dhrystone_segue.aot dhrystone.wasm
fi

View File

@ -0,0 +1,306 @@
/*
**************************************************************************
* DHRYSTONE 2.1 BENCHMARK PC VERSION
**************************************************************************
*
* "DHRYSTONE" Benchmark Program
* -----------------------------
*
* Version: C, Version 2.1
*
* File: dhry.h (part 1 of 3)
*
* Date: May 25, 1988
*
* Author: Reinhold P. Weicker
* Siemens AG, AUT E 51
* Postfach 3220
* 8520 Erlangen
* Germany (West)
* Phone: [+49]-9131-7-20330
* (8-17 Central European Time)
* Usenet: ..!mcsun!unido!estevax!weicker
*
* Original Version (in Ada) published in
* "Communications of the ACM" vol. 27., no. 10 (Oct. 1984),
* pp. 1013 - 1030, together with the statistics
* on which the distribution of statements etc. is based.
*
* In this C version, the following C library functions are used:
* - strcpy, strcmp (inside the measurement loop)
* - printf, scanf (outside the measurement loop)
* In addition, Berkeley UNIX system calls "times ()" or "time ()"
* are used for execution time measurement. For measurements
* on other systems, these calls have to be changed.
*
* Collection of Results:
* Reinhold Weicker (address see above) and
*
* Rick Richardson
* PC Research. Inc.
* 94 Apple Orchard Drive
* Tinton Falls, NJ 07724
* Phone: (201) 389-8963 (9-17 EST)
* Usenet: ...!uunet!pcrat!rick
*
* Please send results to Rick Richardson and/or Reinhold Weicker.
* Complete information should be given on hardware and software used.
* Hardware information includes: Machine type, CPU, type and size
* of caches; for microprocessors: clock frequency, memory speed
* (number of wait states).
* Software information includes: Compiler (and runtime library)
* manufacturer and version, compilation switches, OS version.
* The Operating System version may give an indication about the
* compiler; Dhrystone itself performs no OS calls in the measurement
* loop.
*
* The complete output generated by the program should be mailed
* such that at least some checks for correctness can be made.
*
**************************************************************************
*
* This version has changes made by Roy Longbottom to conform to a common
* format for a series of standard benchmarks for PCs:
*
* Running time greater than 5 seconds due to inaccuracy of the PC clock.
*
* Automatic adjustment of run time, no manually inserted parameters.
*
* Initial display of calibration times to confirm linearity.
*
* Display of results within one screen (or at a slow speed as the test
* progresses) so that it can be seen to have run successfully.
*
* Facilities to type in details of system used etc.
*
* All results and details appended to a results file.
*
*
* Roy Longbottom
* 101323.2241@compuserve.com
*
**************************************************************************
*
* For details of history, changes, other defines, benchmark construction
* statistics see official versions from ftp.nosc.mil/pub/aburto where
* the latest table of results (dhry.tbl) are available. See also
* netlib@ornl.gov
*
**************************************************************************
*
* Defines: The following "Defines" are possible:
* -DREG=register (default: Not defined)
* As an approximation to what an average C programmer
* might do, the "register" storage class is applied
* (if enabled by -DREG=register)
* - for local variables, if they are used (dynamically)
* five or more times
* - for parameters if they are used (dynamically)
* six or more times
* Note that an optimal "register" strategy is
* compiler-dependent, and that "register" declarations
* do not necessarily lead to faster execution.
* -DNOSTRUCTASSIGN (default: Not defined)
* Define if the C compiler does not support
* assignment of structures.
* -DNOENUMS (default: Not defined)
* Define if the C compiler does not support
* enumeration types.
***************************************************************************
*
* Compilation model and measurement (IMPORTANT):
*
* This C version of Dhrystone consists of three files:
* - dhry.h (this file, containing global definitions and comments)
* - dhry_1.c (containing the code corresponding to Ada package Pack_1)
* - dhry_2.c (containing the code corresponding to Ada package Pack_2)
*
* The following "ground rules" apply for measurements:
* - Separate compilation
* - No procedure merging
* - Otherwise, compiler optimizations are allowed but should be indicated
* - Default results are those without register declarations
* See the companion paper "Rationale for Dhrystone Version 2" for a more
* detailed discussion of these ground rules.
*
* For 16-Bit processors (e.g. 80186, 80286), times for all compilation
* models ("small", "medium", "large" etc.) should be given if possible,
* together with a definition of these models for the compiler system used.
*
**************************************************************************
* Examples of Pentium Results
*
* Dhrystone Benchmark Version 2.1 (Language: C)
*
* Month run 4/1996
* PC model Escom
* CPU Pentium
* Clock MHz 100
* Cache 256K
* Options Neptune chipset
* OS/DOS Windows 95
* Compiler Watcom C/ C++ 10.5 Win386
* OptLevel -otexan -zp8 -fp5 -5r
* Run by Roy Longbottom
* From UK
* Mail 101323.2241@compuserve.com
*
* Final values (* implementation-dependent):
*
* Int_Glob: O.K. 5
* Bool_Glob: O.K. 1
* Ch_1_Glob: O.K. A
* Ch_2_Glob: O.K. B
* Arr_1_Glob[8]: O.K. 7
* Arr_2_Glob8/7: O.K. 1600010
* Ptr_Glob->
* Ptr_Comp: * 98008
* Discr: O.K. 0
* Enum_Comp: O.K. 2
* Int_Comp: O.K. 17
* Str_Comp: O.K. DHRYSTONE PROGRAM, SOME STRING
* Next_Ptr_Glob->
* Ptr_Comp: * 98008 same as above
* Discr: O.K. 0
* Enum_Comp: O.K. 1
* Int_Comp: O.K. 18
* Str_Comp: O.K. DHRYSTONE PROGRAM, SOME STRING
* Int_1_Loc: O.K. 5
* Int_2_Loc: O.K. 13
* Int_3_Loc: O.K. 7
* Enum_Loc: O.K. 1
* Str_1_Loc: O.K. DHRYSTONE PROGRAM, 1'ST STRING
* Str_2_Loc: O.K. DHRYSTONE PROGRAM, 2'ND STRING
*
* Register option Selected.
*
* Microseconds 1 loop: 4.53
* Dhrystones / second: 220690
* VAX MIPS rating: 125.61
*
*
* Dhrystone Benchmark Version 2.1 (Language: C)
*
* Month run 4/1996
* PC model Escom
* CPU Pentium
* Clock MHz 100
* Cache 256K
* Options Neptune chipset
* OS/DOS Windows 95
* Compiler Watcom C/ C++ 10.5 Win386
* OptLevel No optimisation
* Run by Roy Longbottom
* From UK
* Mail 101323.2241@compuserve.com
*
* Final values (* implementation-dependent):
*
* Int_Glob: O.K. 5
* Bool_Glob: O.K. 1
* Ch_1_Glob: O.K. A
* Ch_2_Glob: O.K. B
* Arr_1_Glob[8]: O.K. 7
* Arr_2_Glob8/7: O.K. 320010
* Ptr_Glob->
* Ptr_Comp: * 98004
* Discr: O.K. 0
* Enum_Comp: O.K. 2
* Int_Comp: O.K. 17
* Str_Comp: O.K. DHRYSTONE PROGRAM, SOME STRING
* Next_Ptr_Glob->
* Ptr_Comp: * 98004 same as above
* Discr: O.K. 0
* Enum_Comp: O.K. 1
* Int_Comp: O.K. 18
* Str_Comp: O.K. DHRYSTONE PROGRAM, SOME STRING
* Int_1_Loc: O.K. 5
* Int_2_Loc: O.K. 13
* Int_3_Loc: O.K. 7
* Enum_Loc: O.K. 1
* Str_1_Loc: O.K. DHRYSTONE PROGRAM, 1'ST STRING
* Str_2_Loc: O.K. DHRYSTONE PROGRAM, 2'ND STRING
*
* Register option Not selected.
*
* Microseconds 1 loop: 20.06
* Dhrystones / second: 49844
* VAX MIPS rating: 28.37
*
**************************************************************************
*/
/* Compiler and system dependent definitions: */
#ifndef TIME
#define TIMES
#endif
/* Use times(2) time function unless */
/* explicitly defined otherwise */
#ifdef TIMES
/* #include <sys/types.h>
#include <sys/times.h> */
/* for "times" */
#endif
#define Mic_secs_Per_Second 1000000.0
/* Berkeley UNIX C returns process times in seconds/HZ */
#ifdef NOSTRUCTASSIGN
#define structassign(d, s) memcpy(&(d), &(s), sizeof(d))
#else
#define structassign(d, s) d = s
#endif
#ifdef NOENUM
#define Ident_1 0
#define Ident_2 1
#define Ident_3 2
#define Ident_4 3
#define Ident_5 4
typedef int Enumeration;
#else
typedef enum { Ident_1, Ident_2, Ident_3, Ident_4, Ident_5 } Enumeration;
#endif
/* for boolean and enumeration types in Ada, Pascal */
/* General definitions: */
#include <stdio.h>
#include <string.h>
/* for strcpy, strcmp */
#define Null 0
/* Value of a Null pointer */
#define true 1
#define false 0
typedef int One_Thirty;
typedef int One_Fifty;
typedef char Capital_Letter;
typedef int Boolean;
typedef char Str_30[31];
typedef int Arr_1_Dim[50];
typedef int Arr_2_Dim[50][50];
typedef struct record {
struct record *Ptr_Comp;
Enumeration Discr;
union {
struct {
Enumeration Enum_Comp;
int Int_Comp;
char Str_Comp[31];
} var_1;
struct {
Enumeration E_Comp_2;
char Str_2_Comp[31];
} var_2;
struct {
char Ch_1_Comp;
char Ch_2_Comp;
} var_3;
} variant;
} Rec_Type, *Rec_Pointer;

View File

@ -0,0 +1,19 @@
#!/bin/bash
# Copyright (C) 2019 Intel Corporation. All rights reserved.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
PLATFORM=$(uname -s | tr A-Z a-z)
readonly IWASM_CMD="../../../product-mini/platforms/${PLATFORM}/build/iwasm"
echo "============> run dhrystone native"
./dhrystone_native
echo "============> run dhrystone.aot"
${IWASM_CMD} dhrystone.aot
if [[ ${PLATFORM} == "linux" ]]; then
echo "============> run dhrystone_segue.aot"
${IWASM_CMD} dhrystone_segue.aot
fi

View File

@ -0,0 +1,485 @@
/*
*************************************************************************
*
* "DHRYSTONE" Benchmark Program
* -----------------------------
*
* Version: C, Version 2.1
*
* File: dhry_1.c (part 2 of 3)
*
* Date: May 25, 1988
*
* Author: Reinhold P. Weicker
*
*************************************************************************
*/
#include <time.h>
#include <stdlib.h>
#include <stdio.h>
#include "dhry.h"
/* Global Variables: */
Rec_Pointer Ptr_Glob, Next_Ptr_Glob;
int Int_Glob;
Boolean Bool_Glob;
char Ch_1_Glob, Ch_2_Glob;
int Arr_1_Glob[50];
int Arr_2_Glob[50][50];
Enumeration
Func_1(Capital_Letter Ch_1_Par_Val, Capital_Letter Ch_2_Par_Val);
/*
forward declaration necessary since Enumeration may not simply be int
*/
#ifndef ROPT
#define REG
/* REG becomes defined as empty */
/* i.e. no register variables */
#else
#define REG register
#endif
void
Proc_1(REG Rec_Pointer Ptr_Val_Par);
void
Proc_2(One_Fifty *Int_Par_Ref);
void
Proc_3(Rec_Pointer *Ptr_Ref_Par);
void
Proc_4();
void
Proc_5();
void
Proc_6(Enumeration Enum_Val_Par, Enumeration *Enum_Ref_Par);
void
Proc_7(One_Fifty Int_1_Par_Val, One_Fifty Int_2_Par_Val,
One_Fifty *Int_Par_Ref);
void
Proc_8(Arr_1_Dim Arr_1_Par_Ref, Arr_2_Dim Arr_2_Par_Ref, int Int_1_Par_Val,
int Int_2_Par_Val);
Boolean
Func_2(Str_30 Str_1_Par_Ref, Str_30 Str_2_Par_Ref);
/* variables for time measurement: */
#define Too_Small_Time 2
/* Measurements should last at least 2 seconds */
#define BILLION 1000000000L
#define MILLION 1000000
struct timespec Begin_Time, End_Time;
double User_Time;
double Microseconds, Dhrystones_Per_Second, Vax_Mips;
/* end of variables for time measurement */
int
main(int argc, char *argv[])
/*****/
/* main program, corresponds to procedures */
/* Main and Proc_0 in the Ada version */
{
One_Fifty Int_1_Loc;
REG One_Fifty Int_2_Loc;
One_Fifty Int_3_Loc;
REG char Ch_Index;
Enumeration Enum_Loc;
Str_30 Str_1_Loc;
Str_30 Str_2_Loc;
REG int Run_Index;
REG int Number_Of_Runs;
int endit, count = 10;
char general[9][80] = { " " };
/***********************************************************************
* Change for compiler and optimisation used *
***********************************************************************/
Next_Ptr_Glob = (Rec_Pointer)malloc(sizeof(Rec_Type));
Ptr_Glob = (Rec_Pointer)malloc(sizeof(Rec_Type));
Ptr_Glob->Ptr_Comp = Next_Ptr_Glob;
Ptr_Glob->Discr = Ident_1;
Ptr_Glob->variant.var_1.Enum_Comp = Ident_3;
Ptr_Glob->variant.var_1.Int_Comp = 40;
strcpy(Ptr_Glob->variant.var_1.Str_Comp, "DHRYSTONE PROGRAM, SOME STRING");
strcpy(Str_1_Loc, "DHRYSTONE PROGRAM, 1'ST STRING");
Arr_2_Glob[8][7] = 10;
/* Was missing in published program. Without this statement, */
/* Arr_2_Glob [8][7] would have an undefined value. */
/* Warning: With 16-Bit processors and Number_Of_Runs > 32000, */
/* overflow may occur for this array element. */
printf("\n");
printf("Dhrystone Benchmark, Version 2.1 (Language: C or C++)\n");
printf("\n");
Number_Of_Runs = 5000;
do {
Number_Of_Runs = Number_Of_Runs * 2;
count = count - 1;
Arr_2_Glob[8][7] = 10;
/***************/
/* Start timer */
/***************/
clock_gettime(CLOCK_MONOTONIC, &Begin_Time);
for (Run_Index = 1; Run_Index <= Number_Of_Runs; ++Run_Index) {
Proc_5();
Proc_4();
/* Ch_1_Glob == 'A', Ch_2_Glob == 'B', Bool_Glob == true */
Int_1_Loc = 2;
Int_2_Loc = 3;
strcpy(Str_2_Loc, "DHRYSTONE PROGRAM, 2'ND STRING");
Enum_Loc = Ident_2;
Bool_Glob = !Func_2(Str_1_Loc, Str_2_Loc);
/* Bool_Glob == 1 */
while (Int_1_Loc < Int_2_Loc) /* loop body executed once */
{
Int_3_Loc = 5 * Int_1_Loc - Int_2_Loc;
/* Int_3_Loc == 7 */
Proc_7(Int_1_Loc, Int_2_Loc, &Int_3_Loc);
/* Int_3_Loc == 7 */
Int_1_Loc += 1;
} /* while */
/* Int_1_Loc == 3, Int_2_Loc == 3, Int_3_Loc == 7 */
Proc_8(Arr_1_Glob, Arr_2_Glob, Int_1_Loc, Int_3_Loc);
/* Int_Glob == 5 */
Proc_1(Ptr_Glob);
for (Ch_Index = 'A'; Ch_Index <= Ch_2_Glob; ++Ch_Index)
/* loop body executed twice */
{
if (Enum_Loc == Func_1(Ch_Index, 'C'))
/* then, not executed */
{
Proc_6(Ident_1, &Enum_Loc);
strcpy(Str_2_Loc, "DHRYSTONE PROGRAM, 3'RD STRING");
Int_2_Loc = Run_Index;
Int_Glob = Run_Index;
}
}
/* Int_1_Loc == 3, Int_2_Loc == 3, Int_3_Loc == 7 */
Int_2_Loc = Int_2_Loc * Int_1_Loc;
Int_1_Loc = Int_2_Loc / Int_3_Loc;
Int_2_Loc = 7 * (Int_2_Loc - Int_3_Loc) - Int_1_Loc;
/* Int_1_Loc == 1, Int_2_Loc == 13, Int_3_Loc == 7 */
Proc_2(&Int_1_Loc);
/* Int_1_Loc == 5 */
} /* loop "for Run_Index" */
/**************/
/* Stop timer */
/**************/
clock_gettime(CLOCK_MONOTONIC, &End_Time);
User_Time = (End_Time.tv_sec - Begin_Time.tv_sec) * MILLION
+ (End_Time.tv_nsec - Begin_Time.tv_nsec) / 1000;
User_Time = User_Time / MILLION; /* convert to seconds */
printf("%ld runs %lf seconds \n", (long)Number_Of_Runs, User_Time);
if (User_Time > 5.0) {
count = 0;
}
else {
if (User_Time < 0.1) {
Number_Of_Runs = Number_Of_Runs * 5;
}
}
} /* calibrate/run do while */
while (count > 0);
printf("\n");
printf("Final values (* implementation-dependent):\n");
printf("\n");
printf("Int_Glob: ");
if (Int_Glob == 5)
printf("O.K. ");
else
printf("WRONG ");
printf("%d ", Int_Glob);
printf("Bool_Glob: ");
if (Bool_Glob == 1)
printf("O.K. ");
else
printf("WRONG ");
printf("%d\n", Bool_Glob);
printf("Ch_1_Glob: ");
if (Ch_1_Glob == 'A')
printf("O.K. ");
else
printf("WRONG ");
printf("%c ", Ch_1_Glob);
printf("Ch_2_Glob: ");
if (Ch_2_Glob == 'B')
printf("O.K. ");
else
printf("WRONG ");
printf("%c\n", Ch_2_Glob);
printf("Arr_1_Glob[8]: ");
if (Arr_1_Glob[8] == 7)
printf("O.K. ");
else
printf("WRONG ");
printf("%d ", Arr_1_Glob[8]);
printf("Arr_2_Glob8/7: ");
if (Arr_2_Glob[8][7] == Number_Of_Runs + 10)
printf("O.K. ");
else
printf("WRONG ");
printf("%10d\n", Arr_2_Glob[8][7]);
printf("Ptr_Glob-> ");
printf(" Ptr_Comp: * %p\n", Ptr_Glob->Ptr_Comp);
printf(" Discr: ");
if (Ptr_Glob->Discr == 0)
printf("O.K. ");
else
printf("WRONG ");
printf("%d ", Ptr_Glob->Discr);
printf("Enum_Comp: ");
if (Ptr_Glob->variant.var_1.Enum_Comp == 2)
printf("O.K. ");
else
printf("WRONG ");
printf("%d\n", Ptr_Glob->variant.var_1.Enum_Comp);
printf(" Int_Comp: ");
if (Ptr_Glob->variant.var_1.Int_Comp == 17)
printf("O.K. ");
else
printf("WRONG ");
printf("%d ", Ptr_Glob->variant.var_1.Int_Comp);
printf("Str_Comp: ");
if (strcmp(Ptr_Glob->variant.var_1.Str_Comp,
"DHRYSTONE PROGRAM, SOME STRING")
== 0)
printf("O.K. ");
else
printf("WRONG ");
printf("%s\n", Ptr_Glob->variant.var_1.Str_Comp);
printf("Next_Ptr_Glob-> ");
printf(" Ptr_Comp: * %p", Next_Ptr_Glob->Ptr_Comp);
printf(" same as above\n");
printf(" Discr: ");
if (Next_Ptr_Glob->Discr == 0)
printf("O.K. ");
else
printf("WRONG ");
printf("%d ", Next_Ptr_Glob->Discr);
printf("Enum_Comp: ");
if (Next_Ptr_Glob->variant.var_1.Enum_Comp == 1)
printf("O.K. ");
else
printf("WRONG ");
printf("%d\n", Next_Ptr_Glob->variant.var_1.Enum_Comp);
printf(" Int_Comp: ");
if (Next_Ptr_Glob->variant.var_1.Int_Comp == 18)
printf("O.K. ");
else
printf("WRONG ");
printf("%d ", Next_Ptr_Glob->variant.var_1.Int_Comp);
printf("Str_Comp: ");
if (strcmp(Next_Ptr_Glob->variant.var_1.Str_Comp,
"DHRYSTONE PROGRAM, SOME STRING")
== 0)
printf("O.K. ");
else
printf("WRONG ");
printf("%s\n", Next_Ptr_Glob->variant.var_1.Str_Comp);
printf("Int_1_Loc: ");
if (Int_1_Loc == 5)
printf("O.K. ");
else
printf("WRONG ");
printf("%d ", Int_1_Loc);
printf("Int_2_Loc: ");
if (Int_2_Loc == 13)
printf("O.K. ");
else
printf("WRONG ");
printf("%d\n", Int_2_Loc);
printf("Int_3_Loc: ");
if (Int_3_Loc == 7)
printf("O.K. ");
else
printf("WRONG ");
printf("%d ", Int_3_Loc);
printf("Enum_Loc: ");
if (Enum_Loc == 1)
printf("O.K. ");
else
printf("WRONG ");
printf("%d\n", Enum_Loc);
printf("Str_1_Loc: ");
if (strcmp(Str_1_Loc, "DHRYSTONE PROGRAM, 1'ST STRING") == 0)
printf("O.K. ");
else
printf("WRONG ");
printf("%s\n", Str_1_Loc);
printf("Str_2_Loc: ");
if (strcmp(Str_2_Loc, "DHRYSTONE PROGRAM, 2'ND STRING") == 0)
printf("O.K. ");
else
printf("WRONG ");
printf("%s\n", Str_2_Loc);
printf("\n");
if (User_Time < Too_Small_Time) {
printf("Measured time too small to obtain meaningful results\n");
printf("Please increase number of runs\n");
printf("\n");
}
else {
Microseconds = User_Time * Mic_secs_Per_Second / (double)Number_Of_Runs;
Dhrystones_Per_Second = (double)Number_Of_Runs / User_Time;
Vax_Mips = Dhrystones_Per_Second / 1757.0;
printf("Microseconds for one run through Dhrystone: ");
printf("%lf \n", Microseconds);
printf("Dhrystones per Second: ");
printf("%lf \n", Dhrystones_Per_Second);
printf("VAX MIPS rating = ");
printf("%lf \n", Vax_Mips);
printf("\n");
}
free(Next_Ptr_Glob);
free(Ptr_Glob);
return 1;
}
void
Proc_1(REG Rec_Pointer Ptr_Val_Par)
/******************/
/* executed once */
{
REG Rec_Pointer Next_Record = Ptr_Val_Par->Ptr_Comp;
/* == Ptr_Glob_Next */
/* Local variable, initialized with Ptr_Val_Par->Ptr_Comp, */
/* corresponds to "rename" in Ada, "with" in Pascal */
structassign(*Ptr_Val_Par->Ptr_Comp, *Ptr_Glob);
Ptr_Val_Par->variant.var_1.Int_Comp = 5;
Next_Record->variant.var_1.Int_Comp = Ptr_Val_Par->variant.var_1.Int_Comp;
Next_Record->Ptr_Comp = Ptr_Val_Par->Ptr_Comp;
Proc_3(&Next_Record->Ptr_Comp);
/* Ptr_Val_Par->Ptr_Comp->Ptr_Comp
== Ptr_Glob->Ptr_Comp */
if (Next_Record->Discr == Ident_1)
/* then, executed */
{
Next_Record->variant.var_1.Int_Comp = 6;
Proc_6(Ptr_Val_Par->variant.var_1.Enum_Comp,
&Next_Record->variant.var_1.Enum_Comp);
Next_Record->Ptr_Comp = Ptr_Glob->Ptr_Comp;
Proc_7(Next_Record->variant.var_1.Int_Comp, 10,
&Next_Record->variant.var_1.Int_Comp);
}
else { /* not executed */
structassign(*Ptr_Val_Par, *Ptr_Val_Par->Ptr_Comp);
}
} /* Proc_1 */
void
Proc_2(One_Fifty *Int_Par_Ref)
/******************/
/* executed once */
/* *Int_Par_Ref == 1, becomes 4 */
{
One_Fifty Int_Loc;
Enumeration Enum_Loc;
Int_Loc = *Int_Par_Ref + 10;
do /* executed once */
if (Ch_1_Glob == 'A')
/* then, executed */
{
Int_Loc -= 1;
*Int_Par_Ref = Int_Loc - Int_Glob;
Enum_Loc = Ident_1;
} /* if */
while (Enum_Loc != Ident_1); /* true */
} /* Proc_2 */
void
Proc_3(Rec_Pointer *Ptr_Ref_Par)
/******************/
/* executed once */
/* Ptr_Ref_Par becomes Ptr_Glob */
{
if (Ptr_Glob != Null)
/* then, executed */
*Ptr_Ref_Par = Ptr_Glob->Ptr_Comp;
Proc_7(10, Int_Glob, &Ptr_Glob->variant.var_1.Int_Comp);
} /* Proc_3 */
void
Proc_4() /* without parameters */
/*******/
/* executed once */
{
Boolean Bool_Loc;
Bool_Loc = Ch_1_Glob == 'A';
Bool_Glob = Bool_Loc | Bool_Glob;
Ch_2_Glob = 'B';
} /* Proc_4 */
void
Proc_5() /* without parameters */
/*******/
/* executed once */
{
Ch_1_Glob = 'A';
Bool_Glob = false;
} /* Proc_5 */
/* Procedure for the assignment of structures, */
/* if the C compiler doesn't support this feature */
#ifdef NOSTRUCTASSIGN
memcpy(d, s, l) register char *d;
register char *s;
register int l;
{
while (l--)
*d++ = *s++;
}
#endif

View File

@ -0,0 +1,187 @@
/*
*************************************************************************
*
* "DHRYSTONE" Benchmark Program
* -----------------------------
*
* Version: C, Version 2.1
*
* File: dhry_2.c (part 3 of 3)
*
* Date: May 25, 1988
*
* Author: Reinhold P. Weicker
*
*************************************************************************
*/
#include "dhry.h"
#ifndef REG
#define REG
/* REG becomes defined as empty */
/* i.e. no register variables */
#else
#define REG register
#endif
extern int Int_Glob;
extern char Ch_1_Glob;
Boolean
Func_3(Enumeration Enum_Par_Val);
void
Proc_6(Enumeration Enum_Val_Par, Enumeration *Enum_Ref_Par)
/*********************************/
/* executed once */
/* Enum_Val_Par == Ident_3, Enum_Ref_Par becomes Ident_2 */
{
*Enum_Ref_Par = Enum_Val_Par;
if (!Func_3(Enum_Val_Par))
/* then, not executed */
*Enum_Ref_Par = Ident_4;
switch (Enum_Val_Par) {
case Ident_1:
*Enum_Ref_Par = Ident_1;
break;
case Ident_2:
if (Int_Glob > 100)
/* then */
*Enum_Ref_Par = Ident_1;
else
*Enum_Ref_Par = Ident_4;
break;
case Ident_3: /* executed */
*Enum_Ref_Par = Ident_2;
break;
case Ident_4:
break;
case Ident_5:
*Enum_Ref_Par = Ident_3;
break;
} /* switch */
} /* Proc_6 */
void
Proc_7(One_Fifty Int_1_Par_Val, One_Fifty Int_2_Par_Val, One_Fifty *Int_Par_Ref)
/**********************************************/
/* executed three times */
/* first call: Int_1_Par_Val == 2, Int_2_Par_Val == 3, */
/* Int_Par_Ref becomes 7 */
/* second call: Int_1_Par_Val == 10, Int_2_Par_Val == 5, */
/* Int_Par_Ref becomes 17 */
/* third call: Int_1_Par_Val == 6, Int_2_Par_Val == 10, */
/* Int_Par_Ref becomes 18 */
{
One_Fifty Int_Loc;
Int_Loc = Int_1_Par_Val + 2;
*Int_Par_Ref = Int_2_Par_Val + Int_Loc;
} /* Proc_7 */
void
Proc_8(Arr_1_Dim Arr_1_Par_Ref, Arr_2_Dim Arr_2_Par_Ref, int Int_1_Par_Val,
int Int_2_Par_Val)
/*********************************************************************/
/* executed once */
/* Int_Par_Val_1 == 3 */
/* Int_Par_Val_2 == 7 */
{
REG One_Fifty Int_Index;
REG One_Fifty Int_Loc;
Int_Loc = Int_1_Par_Val + 5;
Arr_1_Par_Ref[Int_Loc] = Int_2_Par_Val;
Arr_1_Par_Ref[Int_Loc + 1] = Arr_1_Par_Ref[Int_Loc];
Arr_1_Par_Ref[Int_Loc + 30] = Int_Loc;
for (Int_Index = Int_Loc; Int_Index <= Int_Loc + 1; ++Int_Index)
Arr_2_Par_Ref[Int_Loc][Int_Index] = Int_Loc;
Arr_2_Par_Ref[Int_Loc][Int_Loc - 1] += 1;
Arr_2_Par_Ref[Int_Loc + 20][Int_Loc] = Arr_1_Par_Ref[Int_Loc];
Int_Glob = 5;
} /* Proc_8 */
Enumeration
Func_1(Capital_Letter Ch_1_Par_Val, Capital_Letter Ch_2_Par_Val)
/*************************************************/
/* executed three times */
/* first call: Ch_1_Par_Val == 'H', Ch_2_Par_Val == 'R' */
/* second call: Ch_1_Par_Val == 'A', Ch_2_Par_Val == 'C' */
/* third call: Ch_1_Par_Val == 'B', Ch_2_Par_Val == 'C' */
{
Capital_Letter Ch_1_Loc;
Capital_Letter Ch_2_Loc;
Ch_1_Loc = Ch_1_Par_Val;
Ch_2_Loc = Ch_1_Loc;
if (Ch_2_Loc != Ch_2_Par_Val)
/* then, executed */
return (Ident_1);
else /* not executed */
{
Ch_1_Glob = Ch_1_Loc;
return (Ident_2);
}
} /* Func_1 */
Boolean
Func_2(Str_30 Str_1_Par_Ref, Str_30 Str_2_Par_Ref)
/*************************************************/
/* executed once */
/* Str_1_Par_Ref == "DHRYSTONE PROGRAM, 1'ST STRING" */
/* Str_2_Par_Ref == "DHRYSTONE PROGRAM, 2'ND STRING" */
{
REG One_Thirty Int_Loc;
Capital_Letter Ch_Loc;
Int_Loc = 2;
while (Int_Loc <= 2) /* loop body executed once */
if (Func_1(Str_1_Par_Ref[Int_Loc], Str_2_Par_Ref[Int_Loc + 1])
== Ident_1)
/* then, executed */
{
Ch_Loc = 'A';
Int_Loc += 1;
} /* if, while */
if (Ch_Loc >= 'W' && Ch_Loc < 'Z')
/* then, not executed */
Int_Loc = 7;
if (Ch_Loc == 'R')
/* then, not executed */
return (true);
else /* executed */
{
if (strcmp(Str_1_Par_Ref, Str_2_Par_Ref) > 0)
/* then, not executed */
{
Int_Loc += 7;
Int_Glob = Int_Loc;
return (true);
}
else /* executed */
return (false);
} /* if Ch_Loc */
} /* Func_2 */
Boolean
Func_3(Enumeration Enum_Par_Val)
/***************************/
/* executed once */
/* Enum_Par_Val == Ident_3 */
{
Enumeration Enum_Loc;
Enum_Loc = Enum_Par_Val;
if (Enum_Loc == Ident_3)
/* then, executed */
return (true);
else /* not executed */
return (false);
} /* Func_3 */

View File

@ -3,27 +3,45 @@
# Copyright (C) 2019 Intel Corporation. All rights reserved.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
source /opt/emsdk/emsdk_env.sh
PLATFORM=$(uname -s | tr A-Z a-z)
OUT_DIR=$PWD/out
WAMRC_CMD=$PWD/../../../wamr-compiler/build/wamrc
mkdir -p jetstream
mkdir -p tsf-src
mkdir -p ${OUT_DIR}
if [[ $1 != "--no-simd" ]];then
NATIVE_SIMD_FLAGS="-msse2 -msse3 -msse4"
WASM_SIMD_FLAGS="-msimd128 -msse2 -msse3 -msse4"
else
NATIVE_SIMD_FLAGS=""
WASM_SIMD_FLAGS=""
fi
cd jetstream
echo "Download source files .."
wget https://browserbench.org/JetStream/wasm/gcc-loops.cpp
wget https://browserbench.org/JetStream/wasm/quicksort.c
wget https://browserbench.org/JetStream/wasm/HashSet.cpp
wget https://browserbench.org/JetStream/simple/float-mm.c
wget -N https://browserbench.org/JetStream/wasm/gcc-loops.cpp
wget -N https://browserbench.org/JetStream/wasm/quicksort.c
wget -N https://browserbench.org/JetStream/wasm/HashSet.cpp
wget -N https://browserbench.org/JetStream/simple/float-mm.c
patch -p1 < ../jetstream.patch
if [[ $? != 0 ]]; then
exit
fi
echo "Patch source files .."
patch -p1 -N < ../jetstream.patch
echo "Build gcc-loops with g++ .."
g++ -O3 -msse2 -msse3 -msse4 -o ${OUT_DIR}/gcc-loops_native gcc-loops.cpp
g++ -O3 ${NATIVE_SIMD_FLAGS} -o ${OUT_DIR}/gcc-loops_native gcc-loops.cpp
echo "Build gcc-loops with em++ .."
em++ -O3 -s STANDALONE_WASM=1 -msimd128 \
em++ -O3 -s STANDALONE_WASM=1 ${WASM_SIMD_FLAGS} \
-s INITIAL_MEMORY=1048576 \
-s TOTAL_STACK=32768 \
-s "EXPORTED_FUNCTIONS=['_main']" \
@ -33,11 +51,16 @@ em++ -O3 -s STANDALONE_WASM=1 -msimd128 \
echo "Compile gcc-loops.wasm to gcc-loops.aot"
${WAMRC_CMD} -o ${OUT_DIR}/gcc-loops.aot ${OUT_DIR}/gcc-loops.wasm
if [[ ${PLATFORM} == "linux" ]]; then
echo "Compile gcc-loops.wasm to gcc-loops_segue.aot"
${WAMRC_CMD} --enable-segue -o ${OUT_DIR}/gcc-loops_segue.aot ${OUT_DIR}/gcc-loops.wasm
fi
echo "Build quicksort with gcc .."
gcc -O3 -msse2 -msse3 -msse4 -o ${OUT_DIR}/quicksort_native quicksort.c
gcc -O3 ${NATIVE_SIMD_FLAGS} -o ${OUT_DIR}/quicksort_native quicksort.c
echo "Build quicksort with emcc .."
emcc -O3 -s STANDALONE_WASM=1 -msimd128 \
emcc -O3 -s STANDALONE_WASM=1 ${WASM_SIMD_FLAGS} \
-s INITIAL_MEMORY=1048576 \
-s TOTAL_STACK=32768 \
-s "EXPORTED_FUNCTIONS=['_main']" \
@ -46,12 +69,17 @@ emcc -O3 -s STANDALONE_WASM=1 -msimd128 \
echo "Compile quicksort.wasm to quicksort.aot"
${WAMRC_CMD} -o ${OUT_DIR}/quicksort.aot ${OUT_DIR}/quicksort.wasm
if [[ ${PLATFORM} == "linux" ]]; then
echo "Compile quicksort.wasm to quicksort_segue.aot"
${WAMRC_CMD} --enable-segue -o ${OUT_DIR}/quicksort_segue.aot ${OUT_DIR}/quicksort.wasm
fi
echo "Build HashSet with g++ .."
g++ -O3 -msse2 -msse3 -msse4 -o ${OUT_DIR}/HashSet_native HashSet.cpp \
g++ -O3 ${NATIVE_SIMD_FLAGS} -o ${OUT_DIR}/HashSet_native HashSet.cpp \
-lstdc++
echo "Build HashSet with em++ .."
em++ -O3 -s STANDALONE_WASM=1 -msimd128 \
em++ -O3 -s STANDALONE_WASM=1 ${WASM_SIMD_FLAGS} \
-s INITIAL_MEMORY=1048576 \
-s TOTAL_STACK=32768 \
-s "EXPORTED_FUNCTIONS=['_main']" \
@ -60,11 +88,16 @@ em++ -O3 -s STANDALONE_WASM=1 -msimd128 \
echo "Compile HashSet.wasm to HashSet.aot"
${WAMRC_CMD} -o ${OUT_DIR}/HashSet.aot ${OUT_DIR}/HashSet.wasm
if [[ ${PLATFORM} == "linux" ]]; then
echo "Compile HashSet.wasm to HashSet_segue.aot"
${WAMRC_CMD} --enable-segue -o ${OUT_DIR}/HashSet_segue.aot ${OUT_DIR}/HashSet.wasm
fi
echo "Build float-mm with gcc .."
gcc -O3 -msse2 -msse3 -msse4 -o ${OUT_DIR}/float-mm_native float-mm.c
gcc -O3 ${NATIVE_SIMD_FLAGS} -o ${OUT_DIR}/float-mm_native float-mm.c
echo "Build float-mm with emcc .."
emcc -O3 -s STANDALONE_WASM=1 -msimd128 \
emcc -O3 -s STANDALONE_WASM=1 ${WASM_SIMD_FLAGS} \
-s INITIAL_MEMORY=1048576 \
-s TOTAL_STACK=32768 \
-s "EXPORTED_FUNCTIONS=['_main']" \
@ -72,3 +105,70 @@ emcc -O3 -s STANDALONE_WASM=1 -msimd128 \
echo "Compile float-mm.wasm to float-mm.aot"
${WAMRC_CMD} -o ${OUT_DIR}/float-mm.aot ${OUT_DIR}/float-mm.wasm
if [[ ${PLATFORM} == "linux" ]]; then
echo "Compile float-mm.wasm to float-mm_segue.aot"
${WAMRC_CMD} --enable-segue -o ${OUT_DIR}/float-mm_segue.aot ${OUT_DIR}/float-mm.wasm
fi
cd ../tsf-src
tsf_srcs="tsf_asprintf.c tsf_buffer.c tsf_error.c tsf_reflect.c tsf_st.c \
tsf_type.c tsf_io.c tsf_native.c tsf_generator.c tsf_st_typetable.c \
tsf_parser.c tsf_buf_writer.c tsf_buf_reader.c tsf_primitive.c \
tsf_type_table.c tsf_copier.c tsf_destructor.c tsf_gpc_code_gen.c \
gpc_code_gen_util.c gpc_threaded.c gpc_intable.c gpc_instruction.c \
gpc_program.c gpc_proto.c gpc_stack_height.c tsf_serial_in_man.c \
tsf_serial_out_man.c tsf_type_in_map.c tsf_type_out_map.c \
tsf_stream_file_input.c tsf_stream_file_output.c tsf_sort.c \
tsf_version.c tsf_named_type.c tsf_io_utils.c tsf_zip_attr.c \
tsf_zip_reader.c tsf_zip_writer.c tsf_zip_abstract.c tsf_limits.c \
tsf_ra_type_man.c tsf_adaptive_reader.c tsf_sha1.c tsf_sha1_writer.c \
tsf_fsdb.c tsf_fsdb_protocol.c tsf_define_helpers.c tsf_ir.c \
tsf_ir_different.c tsf_ir_speed.c"
tsf_files="${tsf_srcs} config.h gpc_worklist.h \
tsf_config_stub.h tsf.h tsf_internal.h tsf_region.h tsf_types.h \
gpc.h tsf_atomics.h tsf_define_helpers.h tsf_indent.h tsf_inttypes.h \
tsf_serial_protocol.h tsf_util.h gpc_int_common.h tsf_build_defines.h \
tsf_format.h tsf_internal_config.h tsf_ir_different.h tsf_sha1.h \
tsf_zip_abstract.h gpc_internal.h tsf_config.h tsf_fsdb_protocol.h \
tsf_internal_config_stub.h tsf_ir.h tsf_st.h \
gpc_instruction_dispatch.gen gpc_instruction_stack_effects.gen \
gpc_instruction_to_string.gen gpc_instruction_size.gen \
gpc_instruction_static_size.gen gpc_interpreter.gen"
echo "Download tsf source files .."
for t in ${tsf_files}
do
wget -N "https://browserbench.org/JetStream/wasm/TSF/${t}"
if [[ $? != 0 ]]; then
exit
fi
done
patch -p1 -N < ../tsf.patch
echo "Build tsf with gcc .."
gcc \
-o ${OUT_DIR}/tsf_native -O3 ${NATIVE_SIMD_FLAGS} \
-I. -DTSF_BUILD_SYSTEM=1 \
${tsf_srcs} -lm
echo "Build tsf standalone with wasi-sdk .."
/opt/wasi-sdk/bin/clang -O3 ${WASM_SIMD_FLAGS} -z stack-size=1048576 \
-Wl,--initial-memory=52428800 \
-Wl,--export=main \
-Wl,--export=__heap_base,--export=__data_end \
-I. -DTSF_BUILD_SYSTEM=1 \
-Wl,--allow-undefined \
-o ${OUT_DIR}/tsf.wasm \
${tsf_srcs}
echo "Compile tsf.wasm to tsf.aot"
${WAMRC_CMD} -o ${OUT_DIR}/tsf.aot ${OUT_DIR}/tsf.wasm
if [[ ${PLATFORM} == "linux" ]]; then
echo "Compile tsf.wasm to tsf_segue.aot"
${WAMRC_CMD} --enable-segue -o ${OUT_DIR}/tsf_segue.aot ${OUT_DIR}/tsf.wasm
fi

View File

@ -1,15 +1,18 @@
diff -urN jetstream-org/HashSet.cpp jetstream/HashSet.cpp
--- jetstream-org/HashSet.cpp 2020-10-30 04:12:42.000000000 +0800
+++ jetstream/HashSet.cpp 2022-01-24 17:11:08.619831711 +0800
@@ -24,6 +24,7 @@
--- jetstream-org/HashSet.cpp 2020-10-30 04:12:42.000000000 +0800
+++ jetstream/HashSet.cpp 2022-01-24 17:11:08.619831711 +0800
@@ -22,8 +22,10 @@
#include <algorithm>
#include <memory>
+#include <limits>
#include <stdio.h>
#include <stdlib.h>
+#include <string.h>
#include <sys/time.h>
// Compile with: xcrun clang++ -o HashSet HashSet.cpp -O2 -W -framework Foundation -licucore -std=c++11 -fvisibility=hidden -DNDEBUG=1
@@ -76,7 +77,7 @@
@@ -76,7 +78,7 @@
inline ToType bitwise_cast(FromType from)
{
typename std::remove_const<ToType>::type to { };
@ -17,4 +20,4 @@ diff -urN jetstream-org/HashSet.cpp jetstream/HashSet.cpp
+ memcpy(&to, &from, sizeof(to));
return to;
}

View File

@ -3,6 +3,8 @@
# Copyright (C) 2019 Intel Corporation. All rights reserved.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
PLATFORM=$(uname -s | tr A-Z a-z)
CUR_DIR=$PWD
OUT_DIR=$CUR_DIR/out
REPORT=$CUR_DIR/report.txt
@ -13,7 +15,7 @@ IWASM_CMD=$CUR_DIR/../../../product-mini/platforms/${PLATFORM}/build/iwasm
BENCH_NAME_MAX_LEN=20
JETSTREAM_CASES="gcc-loops quicksort HashSet float-mm"
JETSTREAM_CASES="gcc-loops HashSet tsf float-mm quicksort"
rm -f $REPORT
touch $REPORT
@ -34,7 +36,11 @@ echo "Start to run cases, the result is written to report.txt"
#run benchmarks
cd $OUT_DIR
echo -en "\t\t\t\t\t native\tiwasm-aot\n" >> $REPORT
if [[ ${PLATFORM} == "linux" ]]; then
echo -en "\t\t\t\t\t native\tiwasm-aot\tiwasm-aot-segue\n" >> $REPORT
else
echo -en "\t\t\t\t\t native\tiwasm-aot\n" >> $REPORT
fi
for t in $JETSTREAM_CASES
do
@ -46,7 +52,13 @@ do
echo "run $t with iwasm aot .."
echo -en "\t" >> $REPORT
$TIME -f "real-%e-time" $IWASM_CMD ${t}.aot 2>&1 | grep "real-.*-time" | awk -F '-' '{ORS=""; print $2}' >> $REPORT
$TIME -f "real-%e-time" $IWASM_CMD --dir=. ${t}.aot 2>&1 | grep "real-.*-time" | awk -F '-' '{ORS=""; print $2}' >> $REPORT
if [[ ${PLATFORM} == "linux" ]]; then
echo "run $t with iwasm aot segue .."
echo -en "\t" >> $REPORT
$TIME -f "real-%e-time" $IWASM_CMD --dir=. ${t}_segue.aot 2>&1 | grep "real-.*-time" | awk -F '-' '{ORS=""; print $2}' >> $REPORT
fi
echo -en "\n" >> $REPORT
done

View File

@ -0,0 +1,24 @@
diff -urN tsf-src-org/tsf_internal.h tsf-src/tsf_internal.h
--- tsf-src-org/tsf_internal.h 2023-03-31 10:49:45.000000000 +0800
+++ tsf-src/tsf_internal.h 2023-05-11 08:18:35.000000000 +0800
@@ -429,6 +429,7 @@
#endif
tsf_fsdb_connection_t *connection;
#endif
+ uint32_t __padding;
} remote;
} u;
tsf_limits_t *limits;
diff -urN tsf-src-org/tsf_ir_speed.c tsf-src/tsf_ir_speed.c
--- tsf-src-org/tsf_ir_speed.c 2023-03-31 10:49:45.000000000 +0800
+++ tsf-src/tsf_ir_speed.c 2023-05-11 08:18:35.000000000 +0800
@@ -63,6 +63,9 @@
Program_t *program;
unsigned elementIndex;
+ if (!(programIndex % 100))
+ printf("##programIndex: %u\n", programIndex);
+
CS(program = tsf_region_create(sizeof(Program_t)));
program->globals.len = numDecls + numDefns;

View File

@ -16,6 +16,8 @@ libsodium_CASES="aead_aes256gcm2 aead_aes256gcm aead_chacha20poly13052 aead_chac
sodium_utils3 sodium_utils sodium_version stream2 stream3 stream4 stream verify1 \
xchacha20"
PLATFORM=$(uname -s | tr A-Z a-z)
readonly WAMRC_CMD=$PWD/../../../wamr-compiler/build/wamrc
readonly OUT_DIR=$PWD/libsodium/zig-out/bin
@ -34,9 +36,16 @@ zig build -Drelease-fast -Denable_benchmarks=true -Dtarget=wasm32-wasi
for case in ${libsodium_CASES}
do
${WAMRC_CMD} -o ${OUT_DIR}/${case}.aot ${OUT_DIR}/${case}.wasm
if [ "$?" != 0 ]; then
echo -e "Error while compiling ${case}.wasm to ${case}.aot"
exit
fi
if [[ ${PLATFORM} == "linux" ]]; then
${WAMRC_CMD} --enable-segue -o ${OUT_DIR}/${case}_segue.aot ${OUT_DIR}/${case}.wasm
if [ "$?" != 0 ]; then
echo -e "Error while compiling ${case}.wasm to ${case}_segue.aot"
exit
fi
fi
done

View File

@ -13,12 +13,14 @@ libsodium_CASES="aead_aes256gcm2 aead_aes256gcm aead_chacha20poly13052 aead_chac
scalarmult6 scalarmult7 scalarmult8 scalarmult_ed25519 scalarmult_ristretto255 \
scalarmult secretbox2 secretbox7 secretbox8 secretbox_easy2 secretbox_easy \
secretbox secretstream shorthash sign siphashx24 sodium_core sodium_utils2 \
sodium_utils3 sodium_utils sodium_version stream2 stream3 stream4 stream verify1 \
xchacha20"
sodium_utils stream2 stream3 stream4 stream verify1 xchacha20"
PLATFORM=$(uname -s | tr A-Z a-z)
readonly OUT_DIR=$PWD/libsodium/zig-out/bin
readonly REPORT=$PWD/report.txt
readonly IWASM_CMD=$PWD/../../../product-mini/platforms/linux/build/iwasm
readonly IWASM_CMD=$PWD/../../../product-mini/platforms/${PLATFORM}/build/iwasm
readonly TIME=/usr/bin/time
BENCH_NAME_MAX_LEN=20
@ -40,7 +42,11 @@ function print_bench_name()
# run benchmarks
cd $OUT_DIR
echo -en "\t\t\t\t\t\tnative\tiwasm-aot\n" >> $REPORT
if [[ ${PLATFORM} == "linux" ]]; then
echo -en "\t\t\t\t\t\tnative\tiwasm-aot\tiwasm-aot-segue\n" >> $REPORT
else
echo -en "\t\t\t\t\t\tnative\tiwasm-aot\n" >> $REPORT
fi
for t in $libsodium_CASES
do
@ -48,11 +54,38 @@ do
echo "run $t with native..."
echo -en "\t" >> $REPORT
./${t} | awk -F '-' 'BEGIN{FIELDWIDTHS="10"}{ORS=""; print $1 / 1000000.0}' >> $REPORT
if [[ $t != "sodium_utils2" ]]; then
./${t} | awk '{printf "%-10.2f", $0/1000000.0}' >> $REPORT
else
# sodium_utils2 doesn't print the result,
# use time command to get result instead
$TIME -f "real-%e-time" ./${t} 2>&1 | grep "real-.*-time" |
awk -F '-' '{printf "%-10.2f", $2}' >> $REPORT
fi
echo "run $t with iwasm aot..."
echo -en "\t \t" >> $REPORT
$IWASM_CMD ${t}.aot | awk -F '-' 'BEGIN{FIELDWIDTHS="10"}{ORS=""; print $1 / 1000000.0}' >> $REPORT
if [[ $t != "sodium_utils2" ]]; then
$IWASM_CMD ${t}.aot | awk '{printf "%-10.2f", $0/1000000.0}' >> $REPORT
else
# sodium_utils2 doesn't print the result,
# use time command to get result instead
$TIME -f "real-%e-time" $IWASM_CMD ${t}.aot 2>&1 | grep "real-.*-time" |
awk -F '-' '{printf "%-10.2f", $2}' >> $REPORT
fi
if [[ ${PLATFORM} == "linux" ]]; then
echo "run $t with iwasm aot segue..."
echo -en "\t \t" >> $REPORT
if [[ $t != "sodium_utils2" ]]; then
$IWASM_CMD ${t}_segue.aot | awk '{printf "%.2f", $0/1000000.0}' >> $REPORT
else
# sodium_utils2 doesn't print the result,
# use time command to get result instead
$TIME -f "real-%e-time" $IWASM_CMD ${t}_segue.aot 2>&1 | grep "real-.*-time" |
awk -F '-' '{printf "%.2f", $2}' >> $REPORT
fi
fi
echo -en "\n" >> $REPORT
done

View File

@ -3,6 +3,8 @@
# Copyright (C) 2019 Intel Corporation. All rights reserved.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
PLATFORM=$(uname -s | tr A-Z a-z)
OUT_DIR=$PWD/out
WAMRC_CMD=$PWD/../../../wamr-compiler/build/wamrc
POLYBENCH_CASES="datamining linear-algebra medley stencils"
@ -40,6 +42,12 @@ do
echo "Compile ${file_name%.*}.wasm into ${file_name%.*}.aot"
${WAMRC_CMD} -o ${OUT_DIR}/${file_name%.*}.aot \
${OUT_DIR}/${file_name%.*}.wasm
if [[ ${PLATFORM} == "linux" ]]; then
echo "Compile ${file_name%.*}.wasm into ${file_name%.*}_segue.aot"
${WAMRC_CMD} --enable-segue -o ${OUT_DIR}/${file_name%.*}_segue.aot \
${OUT_DIR}/${file_name%.*}.wasm
fi
done
done

View File

@ -37,7 +37,11 @@ echo "Start to run cases, the result is written to report.txt"
#run benchmarks
cd $OUT_DIR
echo -en "\t\t\t\t\t native\tiwasm-aot\n" >> $REPORT
if [[ ${PLATFORM} == "linux" ]]; then
echo -en "\t\t\t\t\t native\tiwasm-aot\tiwasm-aot-segue\n" >> $REPORT
else
echo -en "\t\t\t\t\t native\tiwasm-aot\n" >> $REPORT
fi
for t in $POLYBENCH_CASES
do
@ -51,5 +55,11 @@ do
echo -en "\t" >> $REPORT
$TIME -f "real-%e-time" $IWASM_CMD ${t}.aot 2>&1 | grep "real-.*-time" | awk -F '-' '{ORS=""; print $2}' >> $REPORT
if [[ ${PLATFORM} == "linux" ]]; then
echo "run $t with iwasm aot segue .."
echo -en "\t" >> $REPORT
$TIME -f "real-%e-time" $IWASM_CMD ${t}_segue.aot 2>&1 | grep "real-.*-time" | awk -F '-' '{ORS=""; print $2}' >> $REPORT
fi
echo -en "\n" >> $REPORT
done

View File

@ -37,7 +37,7 @@ echo "Start to run cases, the result is written to report.txt"
#run benchmarks
cd $OUT_DIR
echo -en "\t\t\t\t\t native\tiwasm-aot\n" >> $REPORT
echo -en "\t\t\t\t\t native\tiwasm-interp\n" >> $REPORT
for t in $POLYBENCH_CASES
do

View File

@ -3,6 +3,8 @@
# Copyright (C) 2019 Intel Corporation. All rights reserved.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
PLATFORM=$(uname -s | tr A-Z a-z)
OUT_DIR=$PWD/out
WAMRC_CMD=$PWD/../../../wamr-compiler/build/wamrc
SHOOTOUT_CASES="base64 fib2 gimli heapsort matrix memmove nestedloop \
@ -34,9 +36,12 @@ do
-Wl,--export=app_main -Wl,--export=_start \
${bench}.c main/main_${bench}.c main/my_libc.c
echo "Compile ${bench}.wasm into ${bench}.aot"
${WAMRC_CMD} -o ${OUT_DIR}/${bench}.aot ${OUT_DIR}/${bench}.wasm
if [[ ${PLATFORM} == "linux" ]]; then
echo "Compile ${bench}.wasm into ${bench}_segue.aot"
${WAMRC_CMD} --enable-segue -o ${OUT_DIR}/${bench}_segue.aot ${OUT_DIR}/${bench}.wasm
fi
done
cd ..

View File

@ -36,7 +36,11 @@ echo "Start to run cases, the result is written to report.txt"
#run benchmarks
cd $OUT_DIR
echo -en "\t\t\t\t\t native\tiwasm-aot\n" >> $REPORT
if [[ ${PLATFORM} == "linux" ]]; then
echo -en "\t\t\t\t\t native\tiwasm-aot\tiwasm-aot-segue\n" >> $REPORT
else
echo -en "\t\t\t\t\t native\tiwasm-aot\n" >> $REPORT
fi
for t in $SHOOTOUT_CASES
do
@ -50,5 +54,11 @@ do
echo -en "\t" >> $REPORT
$TIME -f "real-%e-time" $IWASM_CMD ${t}.aot 2>&1 | grep "real-.*-time" | awk -F '-' '{ORS=""; print $2}' >> $REPORT
if [[ ${PLATFORM} == "linux" ]]; then
echo "run $t with iwasm aot segue .."
echo -en "\t" >> $REPORT
$TIME -f "real-%e-time" $IWASM_CMD ${t}_segue.aot 2>&1 | grep "real-.*-time" | awk -F '-' '{ORS=""; print $2}' >> $REPORT
fi
echo -en "\n" >> $REPORT
done

View File

@ -46,9 +46,9 @@ do
echo -en "\t" >> $REPORT
$TIME -f "real-%e-time" ./${t}_native 2>&1 | grep "real-.*-time" | awk -F '-' '{ORS=""; print $2}' >> $REPORT
echo "run $t with iwasm aot .."
echo "run $t with iwasm interp .."
echo -en "\t" >> $REPORT
$TIME -f "real-%e-time" $IWASM_CMD ${t}.aot 2>&1 | grep "real-.*-time" | awk -F '-' '{ORS=""; print $2}' >> $REPORT
$TIME -f "real-%e-time" $IWASM_CMD ${t}.wasm 2>&1 | grep "real-.*-time" | awk -F '-' '{ORS=""; print $2}' >> $REPORT
echo -en "\n" >> $REPORT
done

View File

@ -65,6 +65,12 @@ print_help()
printf(" --enable-indirect-mode Enalbe call function through symbol table but not direct call\n");
printf(" --disable-llvm-intrinsics Disable the LLVM built-in intrinsics\n");
printf(" --disable-llvm-lto Disable the LLVM link time optimization\n");
printf(" --enable-segue[=<flags>] Enable using segment register GS as the base address of linear memory,\n");
printf(" only available on linux/linux-sgx x86-64, which may improve performance,\n");
printf(" flags can be: i32.load, i64.load, f32.load, f64.load, v128.load,\n");
printf(" i32.store, i64.store, f32.store, f64.store, v128.store\n");
printf(" Use comma to separate, e.g. --enable-segue=i32.load,i64.store\n");
printf(" and --enable-segue means all flags are added.\n");
printf(" --emit-custom-sections=<section names>\n");
printf(" Emit the specified custom sections to AoT file, using comma to separate\n");
printf(" multiple names, e.g.\n");
@ -84,7 +90,7 @@ print_help()
} while (0)
/**
* Split a strings into an array of strings
* Split a string into an array of strings
* Returns NULL on failure
* Memory must be freed by caller
* Based on: http://stackoverflow.com/a/11198630/471795
@ -126,6 +132,57 @@ split_string(char *str, int *count, const char *delimer)
return res;
}
static uint32
resolve_segue_flags(char *str_flags)
{
uint32 segue_flags = 0;
int32 flag_count, i;
char **flag_list;
flag_list = split_string(str_flags, &flag_count, ",");
if (flag_list) {
for (i = 0; i < flag_count; i++) {
if (!strcmp(flag_list[i], "i32.load")) {
segue_flags |= 1 << 0;
}
else if (!strcmp(flag_list[i], "i64.load")) {
segue_flags |= 1 << 1;
}
else if (!strcmp(flag_list[i], "f32.load")) {
segue_flags |= 1 << 2;
}
else if (!strcmp(flag_list[i], "f64.load")) {
segue_flags |= 1 << 3;
}
else if (!strcmp(flag_list[i], "v128.load")) {
segue_flags |= 1 << 4;
}
else if (!strcmp(flag_list[i], "i32.store")) {
segue_flags |= 1 << 8;
}
else if (!strcmp(flag_list[i], "i64.store")) {
segue_flags |= 1 << 9;
}
else if (!strcmp(flag_list[i], "f32.store")) {
segue_flags |= 1 << 10;
}
else if (!strcmp(flag_list[i], "f64.store")) {
segue_flags |= 1 << 11;
}
else if (!strcmp(flag_list[i], "v128.store")) {
segue_flags |= 1 << 12;
}
else {
/* invalid flag */
segue_flags = (uint32)-1;
break;
}
}
free(flag_list);
}
return segue_flags;
}
int
main(int argc, char *argv[])
{
@ -272,6 +329,15 @@ main(int argc, char *argv[])
else if (!strcmp(argv[0], "--disable-llvm-lto")) {
option.disable_llvm_lto = true;
}
else if (!strcmp(argv[0], "--enable-segue")) {
/* all flags are enabled */
option.segue_flags = 0x1F1F;
}
else if (!strncmp(argv[0], "--enable-segue=", 15)) {
option.segue_flags = resolve_segue_flags(argv[0] + 15);
if (option.segue_flags == (uint32)-1)
PRINT_HELP_AND_EXIT();
}
else if (!strncmp(argv[0], "--emit-custom-sections=", 23)) {
int len = 0;
if (option.custom_sections) {