diff --git a/.github/workflows/compilation_on_android_ubuntu.yml b/.github/workflows/compilation_on_android_ubuntu.yml index 2a57f6219..4c290475a 100644 --- a/.github/workflows/compilation_on_android_ubuntu.yml +++ b/.github/workflows/compilation_on_android_ubuntu.yml @@ -76,7 +76,7 @@ jobs: with: os: "ubuntu-22.04" arch: "X86" - + build_wamrc: needs: [build_llvm_libraries_on_ubuntu_2004, build_llvm_libraries_on_ubuntu_2204] @@ -117,84 +117,6 @@ jobs: cmake --build . --config Release --parallel 4 working-directory: wamr-compiler - build_iwasm_linux_gcc4_8: - runs-on: ubuntu-latest - container: - image: ubuntu:14.04 - strategy: - matrix: - make_options_run_mode: [ - # Running mode - $CLASSIC_INTERP_BUILD_OPTIONS, - $FAST_INTERP_BUILD_OPTIONS, - $FAST_JIT_BUILD_OPTIONS, - ] - make_options_feature: [ - # Features - "-DWAMR_BUILD_CUSTOM_NAME_SECTION=1", - "-DWAMR_BUILD_DEBUG_AOT=1", - "-DWAMR_BUILD_DEBUG_INTERP=1", - "-DWAMR_BUILD_DUMP_CALL_STACK=1", - "-DWAMR_BUILD_LIB_PTHREAD=1", - "-DWAMR_BUILD_LIB_WASI_THREADS=1", - "-DWAMR_BUILD_LOAD_CUSTOM_SECTION=1", - "-DWAMR_BUILD_MINI_LOADER=1", - "-DWAMR_BUILD_MEMORY_PROFILING=1", - "-DWAMR_BUILD_MULTI_MODULE=1", - "-DWAMR_BUILD_PERF_PROFILING=1", - "-DWAMR_BUILD_REF_TYPES=1", - "-DWAMR_BUILD_SIMD=1", - "-DWAMR_BUILD_TAIL_CALL=1", - "-DWAMR_DISABLE_HW_BOUND_CHECK=1", - ] - exclude: - # uncompatiable feature and platform - # uncompatiable mode and feature - # MULTI_MODULE only on INTERP mode - - make_options_run_mode: $FAST_JIT_BUILD_OPTIONS - make_options_feature: "-DWAMR_BUILD_MULTI_MODULE=1" - # SIMD only on JIT/AOT mode - - make_options_run_mode: $CLASSIC_INTERP_BUILD_OPTIONS - make_options_feature: "-DWAMR_BUILD_SIMD=1" - - make_options_run_mode: $FAST_INTERP_BUILD_OPTIONS - make_options_feature: "-DWAMR_BUILD_SIMD=1" - # DEBUG_INTERP only on CLASSIC INTERP mode - - make_options_run_mode: $FAST_INTERP_BUILD_OPTIONS - make_options_feature: "-DWAMR_BUILD_DEBUG_INTERP=1" - - make_options_run_mode: $FAST_JIT_BUILD_OPTIONS - make_options_feature: "-DWAMR_BUILD_DEBUG_INTERP=1" - # DEBUG_AOT only on JIT/AOT mode - - make_options_run_mode: $CLASSIC_INTERP_BUILD_OPTIONS - make_options_feature: "-DWAMR_BUILD_DEBUG_AOT=1" - - make_options_run_mode: $FAST_INTERP_BUILD_OPTIONS - make_options_feature: "-DWAMR_BUILD_DEBUG_AOT=1" - # TODO: DEBUG_AOT on JIT - - make_options_run_mode: $FAST_JIT_BUILD_OPTIONS - make_options_feature: "-DWAMR_BUILD_DEBUG_AOT=1" - # MINI_LOADER only on INTERP mode - - make_options_run_mode: $FAST_JIT_BUILD_OPTIONS - make_options_feature: "-DWAMR_BUILD_MINI_LOADER=1" - steps: - - name: checkout - uses: actions/checkout@v3 - - - name: Install dependencies - run: apt update && apt install -y make g++-4.8 gcc-4.8 wget git - - - name: Install cmake - run: | - wget https://github.com/Kitware/CMake/releases/download/v3.26.1/cmake-3.26.1-linux-x86_64.tar.gz -O cmake.tar.gz - tar xzf cmake.tar.gz - cp cmake-3.26.1-linux-x86_64/bin/cmake /usr/local/bin - cp -r cmake-3.26.1-linux-x86_64/share/cmake-3.26/ /usr/local/share/ - - - name: Build iwasm - run: | - mkdir build && cd build - cmake .. ${{ matrix.make_options_run_mode }} ${{ matrix.make_options_feature }} -DCMAKE_C_COMPILER=gcc-4.8 -DCMAKE_CXX_COMPILER=g++-4.8 - cmake --build . --config Release --parallel 4 - working-directory: product-mini/platforms/linux - build_iwasm: needs: [build_llvm_libraries_on_ubuntu_2004, build_llvm_libraries_on_ubuntu_2204] @@ -342,7 +264,6 @@ jobs: strategy: fail-fast: false matrix: - sanitizer: ["", "ubsan"] make_options: [ # Running mode $AOT_BUILD_OPTIONS, @@ -400,14 +321,14 @@ jobs: if: (!endsWith(matrix.make_options, '_INTERP_BUILD_OPTIONS')) run: | mkdir build && cd build - cmake -DSANITIZER="${{matrix.sanitizer}}" .. + cmake .. cmake --build . --config Release --parallel 4 working-directory: wamr-compiler - name: Build Sample [wasm-c-api] run: | VERBOSE=1 - cmake -S . -B build ${{ matrix.make_options }} -DSANITIZER="${{matrix.sanitizer}}" + cmake -S . -B build ${{ matrix.make_options }} cmake --build build --config Release --parallel 4 ctest --test-dir build --output-on-failure working-directory: samples/wasm-c-api @@ -515,6 +436,7 @@ jobs: ] runs-on: ${{ matrix.os }} strategy: + fail-fast: false matrix: os: [ubuntu-20.04, ubuntu-22.04] running_mode: diff --git a/.github/workflows/compilation_on_nuttx.yml b/.github/workflows/compilation_on_nuttx.yml index f338c8dea..2f4e3aca3 100644 --- a/.github/workflows/compilation_on_nuttx.yml +++ b/.github/workflows/compilation_on_nuttx.yml @@ -94,7 +94,7 @@ jobs: - name: Install RISC-V Compilers if: contains(matrix.nuttx_board_config, 'risc-v') run: | - curl -L https://static.dev.sifive.com/dev-tools/freedom-tools/v2020.12/riscv64-unknown-elf-toolchain-10.2.0-2020.12.8-x86_64-linux-ubuntu14.tar.gz > riscv.tar.gz + curl -L -k https://static.dev.sifive.com/dev-tools/freedom-tools/v2020.12/riscv64-unknown-elf-toolchain-10.2.0-2020.12.8-x86_64-linux-ubuntu14.tar.gz > riscv.tar.gz tar xvf riscv.tar.gz echo "$PWD/riscv64-unknown-elf-toolchain-10.2.0-2020.12.8-x86_64-linux-ubuntu14/bin" >> $GITHUB_PATH diff --git a/.github/workflows/compilation_on_sgx.yml b/.github/workflows/compilation_on_sgx.yml index f17261118..dd6317067 100644 --- a/.github/workflows/compilation_on_sgx.yml +++ b/.github/workflows/compilation_on_sgx.yml @@ -51,6 +51,7 @@ env: AOT_BUILD_OPTIONS: "-DWAMR_BUILD_AOT=1 -DWAMR_BUILD_FAST_INTERP=0 -DWAMR_BUILD_INTERP=0 -DWAMR_BUILD_JIT=0 -DWAMR_BUILD_LAZY_JIT=0" CLASSIC_INTERP_BUILD_OPTIONS: "-DWAMR_BUILD_AOT=0 -DWAMR_BUILD_FAST_INTERP=0 -DWAMR_BUILD_INTERP=1 -DWAMR_BUILD_JIT=0 -DWAMR_BUILD_LAZY_JIT=0" FAST_INTERP_BUILD_OPTIONS: "-DWAMR_BUILD_AOT=0 -DWAMR_BUILD_FAST_INTERP=1 -DWAMR_BUILD_INTERP=1 -DWAMR_BUILD_JIT=0 -DWAMR_BUILD_LAZY_JIT=0" + FAST_JIT_BUILD_OPTIONS: "-DWAMR_BUILD_AOT=1 -DWAMR_BUILD_FAST_INTERP=0 -DWAMR_BUILD_INTERP=1 -DWAMR_BUILD_FAST_JIT=1 -DWAMR_BUILD_JIT=0 -DWAMR_BUILD_LAZY_JIT=1" LLVM_LAZY_JIT_BUILD_OPTIONS: "-DWAMR_BUILD_AOT=1 -DWAMR_BUILD_FAST_INTERP=0 -DWAMR_BUILD_INTERP=0 -DWAMR_BUILD_JIT=1 -DWAMR_BUILD_LAZY_JIT=1" LLVM_EAGER_JIT_BUILD_OPTIONS: "-DWAMR_BUILD_AOT=1 -DWAMR_BUILD_FAST_INTERP=0 -DWAMR_BUILD_INTERP=0 -DWAMR_BUILD_JIT=1 -DWAMR_BUILD_LAZY_JIT=0" @@ -70,6 +71,7 @@ jobs: $AOT_BUILD_OPTIONS, $CLASSIC_INTERP_BUILD_OPTIONS, $FAST_INTERP_BUILD_OPTIONS, + $FAST_JIT_BUILD_OPTIONS, # Running modes unsupported #$LLVM_LAZY_JIT_BUILD_OPTIONS, #$LLVM_EAGER_JIT_BUILD_OPTIONS, @@ -127,124 +129,43 @@ jobs: mkdir build && cd build cmake .. ${{ matrix.make_options_run_mode }} ${{ matrix.make_options_feature }} cmake --build . --config Release --parallel 4 + cd ../enclave-sample + make working-directory: product-mini/platforms/${{ matrix.platform }} - build_wamrc: - needs: [build_llvm_libraries] + run_samples_file: + needs: [build_iwasm, build_llvm_libraries] runs-on: ${{ matrix.os }} strategy: matrix: + iwasm_make_options_run_mode: [ + # Running modes supported + $AOT_BUILD_OPTIONS, + $CLASSIC_INTERP_BUILD_OPTIONS, + $FAST_INTERP_BUILD_OPTIONS, + $FAST_JIT_BUILD_OPTIONS, + # Running modes unsupported + #$LLVM_LAZY_JIT_BUILD_OPTIONS, + #$LLVM_EAGER_JIT_BUILD_OPTIONS, + ] + os: [ubuntu-20.04] + wasi_sdk_release: + [ + "https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-19/wasi-sdk-19.0-linux.tar.gz", + ] + wabt_release: + [ + "https://github.com/WebAssembly/wabt/releases/download/1.0.31/wabt-1.0.31-ubuntu.tar.gz", + ] + iwasm_make_options_feature: [ + # Features to be tested: IPFS + "-DWAMR_BUILD_SGX_IPFS=1", + ] + platform: [linux-sgx] include: - os: ubuntu-20.04 llvm_cache_key: ${{ needs.build_llvm_libraries.outputs.cache_key }} - steps: - - name: install SGX SDK and necessary libraries - run: | - mkdir -p /opt/intel - cd /opt/intel - wget https://download.01.org/intel-sgx/sgx-linux/2.15/distro/ubuntu20.04-server/sgx_linux_x64_sdk_2.15.100.3.bin - chmod +x sgx_linux_x64_sdk_2.15.100.3.bin - echo 'yes' | ./sgx_linux_x64_sdk_2.15.100.3.bin - echo 'deb [arch=amd64] https://download.01.org/intel-sgx/sgx_repo/ubuntu focal main' | sudo tee /etc/apt/sources.list.d/intel-sgx.list - wget -qO - https://download.01.org/intel-sgx/sgx_repo/ubuntu/intel-sgx-deb.key | sudo apt-key add - - sudo apt update - sudo apt install -y libsgx-launch libsgx-urts - source /opt/intel/sgxsdk/environment - - name: checkout - uses: actions/checkout@v3 - - - name: Get LLVM libraries - id: retrieve_llvm_libs - uses: actions/cache@v3 - with: - path: | - ./core/deps/llvm/build/bin - ./core/deps/llvm/build/include - ./core/deps/llvm/build/lib - ./core/deps/llvm/build/libexec - ./core/deps/llvm/build/share - key: ${{ matrix.llvm_cache_key }} - - - name: Quit if cache miss - if: steps.retrieve_llvm_libs.outputs.cache-hit != 'true' - run: echo "::error::can not get prebuilt llvm libraries" && exit 1 - - - name: Build wamrc - run: | - mkdir build && cd build - cmake .. - cmake --build . --config Release --parallel 4 - working-directory: wamr-compiler - - build_samples_wasm_c_api: - needs: [build_iwasm] - runs-on: ${{ matrix.os }} - strategy: - matrix: - make_options: [ - # Running modes supported - $CLASSIC_INTERP_BUILD_OPTIONS, - $FAST_INTERP_BUILD_OPTIONS, - # Running modes unsupported - #$LLVM_EAGER_JIT_BUILD_OPTIONS, - #$LLVM_LAZY_JIT_BUILD_OPTIONS, - #$AOT_BUILD_OPTIONS, - ] - os: [ubuntu-20.04] - wasi_sdk_release: - [ - "https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-19/wasi-sdk-19.0-linux.tar.gz", - ] - wabt_release: - [ - "https://github.com/WebAssembly/wabt/releases/download/1.0.31/wabt-1.0.31-ubuntu.tar.gz", - ] - steps: - - name: checkout - uses: actions/checkout@v3 - - - name: download and install wabt - run: | - cd /opt - sudo wget ${{ matrix.wabt_release }} - sudo tar -xzf wabt-1.0.31-*.tar.gz - sudo mv wabt-1.0.31 wabt - - - name: install SGX SDK and necessary libraries - run: | - mkdir -p /opt/intel - cd /opt/intel - wget https://download.01.org/intel-sgx/sgx-linux/2.15/distro/ubuntu20.04-server/sgx_linux_x64_sdk_2.15.100.3.bin - chmod +x sgx_linux_x64_sdk_2.15.100.3.bin - echo 'yes' | ./sgx_linux_x64_sdk_2.15.100.3.bin - echo 'deb [arch=amd64] https://download.01.org/intel-sgx/sgx_repo/ubuntu focal main' | sudo tee /etc/apt/sources.list.d/intel-sgx.list - wget -qO - https://download.01.org/intel-sgx/sgx_repo/ubuntu/intel-sgx-deb.key | sudo apt-key add - - sudo apt update - sudo apt install -y libsgx-launch libsgx-urts - source /opt/intel/sgxsdk/environment - - - name: Build Sample [wasm-c-api] - run: | - cmake -S . -B build ${{ matrix.make_options }} - cmake --build build --config Release --parallel 4 - ctest --test-dir build - working-directory: samples/wasm-c-api - - build_samples_others: - needs: [build_iwasm] - runs-on: ${{ matrix.os }} - strategy: - matrix: - os: [ubuntu-20.04] - wasi_sdk_release: - [ - "https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-19/wasi-sdk-19.0-linux.tar.gz", - ] - wabt_release: - [ - "https://github.com/WebAssembly/wabt/releases/download/1.0.31/wabt-1.0.31-ubuntu.tar.gz", - ] steps: - name: checkout uses: actions/checkout@v3 @@ -290,13 +211,38 @@ jobs: wget -qO - https://download.01.org/intel-sgx/sgx_repo/ubuntu/intel-sgx-deb.key | sudo apt-key add - sudo apt update sudo apt install -y libsgx-launch libsgx-urts - source /opt/intel/sgxsdk/environment - - name: Build Sample [basic] + - name: Build iwasm for testing samples run: | - cd samples/basic - ./build.sh - ./run.sh + mkdir build && cd build + cmake .. ${{ matrix.iwasm_make_options_run_mode }} ${{ matrix.iwasm_make_options_feature }} + cmake --build . --config Release --parallel 4 + cd ../enclave-sample + make + working-directory: product-mini/platforms/${{ matrix.platform }} + + - name: Get LLVM libraries + if: matrix.iwasm_make_options_run_mode == '$AOT_BUILD_OPTIONS' + id: retrieve_llvm_libs + uses: actions/cache@v3 + with: + path: | + ./core/deps/llvm/build/bin + ./core/deps/llvm/build/include + ./core/deps/llvm/build/lib + ./core/deps/llvm/build/libexec + ./core/deps/llvm/build/share + key: ${{ matrix.llvm_cache_key }} + fail-on-cache-miss: true + + - name: Build wamrc only for testing samples in aot mode + if: matrix.iwasm_make_options_run_mode == '$AOT_BUILD_OPTIONS' + run: | + mkdir build && cd build + cmake .. + cmake --build . --config Release --parallel 4 + cp wamrc `pwd`/../../product-mini/platforms/${{ matrix.platform }}/enclave-sample + working-directory: wamr-compiler - name: Build Sample [file] run: | @@ -304,62 +250,46 @@ jobs: mkdir build && cd build cmake .. cmake --build . --config Release --parallel 4 - ./src/iwasm -f wasm-app/file.wasm -d . + cp wasm-app/file.wasm `pwd`/../../../product-mini/platforms/${{ matrix.platform }}/enclave-sample - - name: Build Sample [multi-thread] + - name: Test Sample [file] in non-aot mode + if: matrix.iwasm_make_options_run_mode != '$AOT_BUILD_OPTIONS' run: | - cd samples/multi-thread - mkdir build && cd build - cmake .. - cmake --build . --config Release --parallel 4 - ./iwasm wasm-apps/test.wasm + source /opt/intel/sgxsdk/environment + ./iwasm --dir=. file.wasm + working-directory: product-mini/platforms/${{ matrix.platform }}/enclave-sample - - name: Build Sample [multi-module] + - name: Test Sample [file] in aot mode + if: matrix.iwasm_make_options_run_mode == '$AOT_BUILD_OPTIONS' run: | - cd samples/multi-module - mkdir build && cd build - cmake .. - cmake --build . --config Release --parallel 4 - ./multi_module - - - name: Build Sample [spawn-thread] - run: | - cd samples/spawn-thread - mkdir build && cd build - cmake .. - cmake --build . --config Release --parallel 4 - ./spawn_thread - - - name: Build Sample [ref-types] - run: | - cd samples/ref-types - mkdir build && cd build - cmake .. - cmake --build . --config Release --parallel 4 - ./hello - - - name: Build Sample [wasi-threads] - run: | - cd samples/wasi-threads - mkdir build && cd build - cmake -DWASI_SYSROOT=`pwd`/../../../core/deps/wasi-libc/sysroot .. - cmake --build . --config Release --parallel 4 - ./iwasm wasm-apps/no_pthread.wasm + source /opt/intel/sgxsdk/environment + ./wamrc -sgx -o file.aot file.wasm + ./iwasm --dir=. file.aot + working-directory: product-mini/platforms/${{ matrix.platform }}/enclave-sample spec_test_default: - needs: [build_iwasm, build_llvm_libraries, build_wamrc] + needs: [build_iwasm, build_llvm_libraries] runs-on: ubuntu-20.04 strategy: matrix: - running_mode: ["classic-interp", "fast-interp", "aot"] - test_option: ["-x -p -s spec -b -P", "-x -p -s spec -S -b -P"] + running_mode: ["classic-interp", "fast-interp", "aot", "fast-jit"] + test_option: ["-x -p -s spec -b -P", "-x -p -s spec -S -b -P", "-x -p -s spec -X -b -P"] llvm_cache_key: ["${{ needs.build_llvm_libraries.outputs.cache_key }}"] - # classic-interp and fast-interp don't support simd exclude: + # classic-interp, fast-interp and fast-jit don't support simd - running_mode: "classic-interp" test_option: "-x -p -s spec -S -b -P" - running_mode: "fast-interp" test_option: "-x -p -s spec -S -b -P" + - running_mode: "fast-jit" + test_option: "-x -p -s spec -S -b -P" + # classic-interp, fast-interp and fast jit don't support XIP + - running_mode: "classic-interp" + test_option: "-x -p -s spec -X -b -P" + - running_mode: "fast-interp" + test_option: "-x -p -s spec -X -b -P" + - running_mode: "fast-jit" + test_option: "-x -p -s spec -X -b -P" steps: - name: checkout diff --git a/.github/workflows/nightly_run.yml b/.github/workflows/nightly_run.yml new file mode 100644 index 000000000..2f91fdb4e --- /dev/null +++ b/.github/workflows/nightly_run.yml @@ -0,0 +1,622 @@ +# Copyright (C) 2023 Intel Corporation. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +name: nightly_run + +on: + # midnight UTC + schedule: + - cron: "0 0 * * *" + # allow to be triggered manually + workflow_dispatch: + +# Cancel any in-flight jobs for the same PR/branch so there's only one active +# at a time +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +env: + # For BUILD + AOT_BUILD_OPTIONS: " -DWAMR_BUILD_AOT=1 -DWAMR_BUILD_FAST_INTERP=0 -DWAMR_BUILD_INTERP=0 -DWAMR_BUILD_FAST_JIT=0 -DWAMR_BUILD_JIT=0 -DWAMR_BUILD_LAZY_JIT=0" + CLASSIC_INTERP_BUILD_OPTIONS: "-DWAMR_BUILD_AOT=0 -DWAMR_BUILD_FAST_INTERP=0 -DWAMR_BUILD_INTERP=1 -DWAMR_BUILD_FAST_JIT=0 -DWAMR_BUILD_JIT=0 -DWAMR_BUILD_LAZY_JIT=0" + FAST_INTERP_BUILD_OPTIONS: " -DWAMR_BUILD_AOT=0 -DWAMR_BUILD_FAST_INTERP=1 -DWAMR_BUILD_INTERP=1 -DWAMR_BUILD_FAST_JIT=0 -DWAMR_BUILD_JIT=0 -DWAMR_BUILD_LAZY_JIT=0" + FAST_JIT_BUILD_OPTIONS: " -DWAMR_BUILD_AOT=1 -DWAMR_BUILD_FAST_INTERP=0 -DWAMR_BUILD_INTERP=0 -DWAMR_BUILD_FAST_JIT=1 -DWAMR_BUILD_JIT=0 -DWAMR_BUILD_LAZY_JIT=0" + LLVM_LAZY_JIT_BUILD_OPTIONS: " -DWAMR_BUILD_AOT=1 -DWAMR_BUILD_FAST_INTERP=0 -DWAMR_BUILD_INTERP=0 -DWAMR_BUILD_FAST_JIT=0 -DWAMR_BUILD_JIT=1 -DWAMR_BUILD_LAZY_JIT=1" + LLVM_EAGER_JIT_BUILD_OPTIONS: "-DWAMR_BUILD_AOT=1 -DWAMR_BUILD_FAST_INTERP=0 -DWAMR_BUILD_INTERP=0 -DWAMR_BUILD_FAST_JIT=0 -DWAMR_BUILD_JIT=1 -DWAMR_BUILD_LAZY_JIT=0" + MULTI_TIER_JIT_BUILD_OPTIONS: "-DWAMR_BUILD_AOT=1 -DWAMR_BUILD_FAST_INTERP=0 -DWAMR_BUILD_INTERP=1 -DWAMR_BUILD_FAST_JIT=1 -DWAMR_BUILD_JIT=1 -DWAMR_BUILD_LAZY_JIT=1" + # For Spec Test + DEFAULT_TEST_OPTIONS: "-s spec -b -P" + MULTI_MODULES_TEST_OPTIONS: "-s spec -b -M -P" + SIMD_TEST_OPTIONS: "-s spec -b -S -P" + THREADS_TEST_OPTIONS: "-s spec -b -p -P" + X86_32_TARGET_TEST_OPTIONS: "-m x86_32 -P" + WASI_TEST_OPTIONS: "-s wasi_certification -w" + +jobs: + build_llvm_libraries_on_ubuntu_2004: + uses: ./.github/workflows/build_llvm_libraries.yml + with: + os: "ubuntu-20.04" + arch: "X86" + + build_llvm_libraries_on_ubuntu_2204: + uses: ./.github/workflows/build_llvm_libraries.yml + with: + os: "ubuntu-22.04" + arch: "X86" + + build_wamrc: + needs: + [build_llvm_libraries_on_ubuntu_2004, build_llvm_libraries_on_ubuntu_2204] + runs-on: ${{ matrix.os }} + strategy: + matrix: + include: + - os: ubuntu-20.04 + llvm_cache_key: ${{ needs.build_llvm_libraries_on_ubuntu_2004.outputs.cache_key }} + - os: ubuntu-22.04 + llvm_cache_key: ${{ needs.build_llvm_libraries_on_ubuntu_2204.outputs.cache_key }} + steps: + - name: checkout + uses: actions/checkout@v3 + + # since jobs.id can't contain the dot character + # it is hard to use `format` to assemble the cache key + - name: Get LLVM libraries + id: retrieve_llvm_libs + uses: actions/cache@v3 + with: + path: | + ./core/deps/llvm/build/bin + ./core/deps/llvm/build/include + ./core/deps/llvm/build/lib + ./core/deps/llvm/build/libexec + ./core/deps/llvm/build/share + key: ${{ matrix.llvm_cache_key }} + + - name: Quit if cache miss + if: steps.retrieve_llvm_libs.outputs.cache-hit != 'true' + run: echo "::error::can not get prebuilt llvm libraries" && exit 1 + + - name: Build wamrc + run: | + mkdir build && cd build + cmake .. + cmake --build . --config Release --parallel 4 + working-directory: wamr-compiler + + build_iwasm: + needs: + [build_llvm_libraries_on_ubuntu_2004, build_llvm_libraries_on_ubuntu_2204] + runs-on: ${{ matrix.os }} + strategy: + matrix: + make_options_run_mode: [ + # Running mode + $AOT_BUILD_OPTIONS, + $CLASSIC_INTERP_BUILD_OPTIONS, + $FAST_INTERP_BUILD_OPTIONS, + $FAST_JIT_BUILD_OPTIONS, + $LLVM_LAZY_JIT_BUILD_OPTIONS, + $LLVM_EAGER_JIT_BUILD_OPTIONS, + $MULTI_TIER_JIT_BUILD_OPTIONS, + ] + make_options_feature: [ + # Features + "-DWAMR_BUILD_CUSTOM_NAME_SECTION=1", + "-DWAMR_BUILD_DEBUG_AOT=1", + "-DWAMR_BUILD_DEBUG_INTERP=1", + "-DWAMR_BUILD_DUMP_CALL_STACK=1", + "-DWAMR_BUILD_LIB_PTHREAD=1", + "-DWAMR_BUILD_LIB_WASI_THREADS=1", + "-DWAMR_BUILD_LOAD_CUSTOM_SECTION=1", + "-DWAMR_BUILD_MINI_LOADER=1", + "-DWAMR_BUILD_MEMORY_PROFILING=1", + "-DWAMR_BUILD_MULTI_MODULE=1", + "-DWAMR_BUILD_PERF_PROFILING=1", + "-DWAMR_BUILD_REF_TYPES=1", + "-DWAMR_BUILD_SIMD=1", + "-DWAMR_BUILD_TAIL_CALL=1", + "-DWAMR_DISABLE_HW_BOUND_CHECK=1", + ] + os: [ubuntu-20.04, ubuntu-22.04] + platform: [android, linux] + exclude: + # uncompatiable feature and platform + # uncompatiable mode and feature + # MULTI_MODULE only on INTERP mode + - make_options_run_mode: $AOT_BUILD_OPTIONS + make_options_feature: "-DWAMR_BUILD_MULTI_MODULE=1" + - make_options_run_mode: $FAST_JIT_BUILD_OPTIONS + make_options_feature: "-DWAMR_BUILD_MULTI_MODULE=1" + - make_options_run_mode: $LLVM_LAZY_JIT_BUILD_OPTIONS + make_options_feature: "-DWAMR_BUILD_MULTI_MODULE=1" + - make_options_run_mode: $LLVM_EAGER_JIT_BUILD_OPTIONS + make_options_feature: "-DWAMR_BUILD_MULTI_MODULE=1" + - make_options_run_mode: $MULTI_TIER_JIT_BUILD_OPTIONS + make_options_feature: "-DWAMR_BUILD_MULTI_MODULE=1" + # SIMD only on JIT/AOT mode + - make_options_run_mode: $CLASSIC_INTERP_BUILD_OPTIONS + make_options_feature: "-DWAMR_BUILD_SIMD=1" + - make_options_run_mode: $FAST_INTERP_BUILD_OPTIONS + make_options_feature: "-DWAMR_BUILD_SIMD=1" + # DEBUG_INTERP only on CLASSIC INTERP mode + - make_options_run_mode: $AOT_BUILD_OPTIONS + make_options_feature: "-DWAMR_BUILD_DEBUG_INTERP=1" + - make_options_run_mode: $FAST_INTERP_BUILD_OPTIONS + make_options_feature: "-DWAMR_BUILD_DEBUG_INTERP=1" + - make_options_run_mode: $FAST_JIT_BUILD_OPTIONS + make_options_feature: "-DWAMR_BUILD_DEBUG_INTERP=1" + - make_options_run_mode: $LLVM_LAZY_JIT_BUILD_OPTIONS + make_options_feature: "-DWAMR_BUILD_DEBUG_INTERP=1" + - make_options_run_mode: $LLVM_EAGER_JIT_BUILD_OPTIONS + make_options_feature: "-DWAMR_BUILD_DEBUG_INTERP=1" + - make_options_run_mode: $MULTI_TIER_JIT_BUILD_OPTIONS + make_options_feature: "-DWAMR_BUILD_DEBUG_INTERP=1" + # DEBUG_AOT only on JIT/AOT mode + - make_options_run_mode: $CLASSIC_INTERP_BUILD_OPTIONS + make_options_feature: "-DWAMR_BUILD_DEBUG_AOT=1" + - make_options_run_mode: $FAST_INTERP_BUILD_OPTIONS + make_options_feature: "-DWAMR_BUILD_DEBUG_AOT=1" + # TODO: DEBUG_AOT on JIT + - make_options_run_mode: $FAST_JIT_BUILD_OPTIONS + make_options_feature: "-DWAMR_BUILD_DEBUG_AOT=1" + - make_options_run_mode: $LLVM_LAZY_JIT_BUILD_OPTIONS + make_options_feature: "-DWAMR_BUILD_DEBUG_AOT=1" + - make_options_run_mode: $LLVM_EAGER_JIT_BUILD_OPTIONS + make_options_feature: "-DWAMR_BUILD_DEBUG_AOT=1" + - make_options_run_mode: $MULTI_TIER_JIT_BUILD_OPTIONS + make_options_feature: "-DWAMR_BUILD_DEBUG_AOT=1" + # MINI_LOADER only on INTERP mode + - make_options_run_mode: $AOT_BUILD_OPTIONS + make_options_feature: "-DWAMR_BUILD_MINI_LOADER=1" + - make_options_run_mode: $FAST_JIT_BUILD_OPTIONS + make_options_feature: "-DWAMR_BUILD_MINI_LOADER=1" + - make_options_run_mode: $LLVM_LAZY_JIT_BUILD_OPTIONS + make_options_feature: "-DWAMR_BUILD_MINI_LOADER=1" + - make_options_run_mode: $LLVM_EAGER_JIT_BUILD_OPTIONS + make_options_feature: "-DWAMR_BUILD_MINI_LOADER=1" + - make_options_run_mode: $MULTI_TIER_JIT_BUILD_OPTIONS + make_options_feature: "-DWAMR_BUILD_MINI_LOADER=1" + # Fast-JIT and Multi-Tier-JIT mode don't support android(X86-32) + - make_options_run_mode: $FAST_JIT_BUILD_OPTIONS + platform: android + - make_options_run_mode: $MULTI_TIER_JIT_BUILD_OPTIONS + platform: android + # only test andorid on ubuntu latest + - os: ubuntu-20.04 + platform: android + include: + - os: ubuntu-20.04 + llvm_cache_key: ${{ needs.build_llvm_libraries_on_ubuntu_2004.outputs.cache_key }} + - os: ubuntu-22.04 + llvm_cache_key: ${{ needs.build_llvm_libraries_on_ubuntu_2204.outputs.cache_key }} + steps: + - name: checkout + uses: actions/checkout@v3 + + # only download llvm cache when needed + - name: Get LLVM libraries + id: retrieve_llvm_libs + if: endsWith(matrix.make_options_run_mode, '_JIT_BUILD_OPTIONS') + uses: actions/cache@v3 + with: + path: | + ./core/deps/llvm/build/bin + ./core/deps/llvm/build/include + ./core/deps/llvm/build/lib + ./core/deps/llvm/build/libexec + ./core/deps/llvm/build/share + key: ${{ matrix.llvm_cache_key }} + + - name: Quit if cache miss + if: endsWith(matrix.make_options_run_mode, '_JIT_BUILD_OPTIONS') && (steps.retrieve_llvm_libs.outputs.cache-hit != 'true') + run: echo "::error::can not get prebuilt llvm libraries" && exit 1 + + - name: Build iwasm + run: | + mkdir build && cd build + cmake .. ${{ matrix.make_options_run_mode }} ${{ matrix.make_options_feature }} + cmake --build . --config Release --parallel 4 + working-directory: product-mini/platforms/${{ matrix.platform }} + + build_iwasm_linux_gcc4_8: + runs-on: ubuntu-latest + container: + image: ubuntu:14.04 + strategy: + matrix: + make_options_run_mode: [ + # Running mode + $CLASSIC_INTERP_BUILD_OPTIONS, + $FAST_INTERP_BUILD_OPTIONS, + $FAST_JIT_BUILD_OPTIONS, + ] + make_options_feature: [ + # Features + "-DWAMR_BUILD_CUSTOM_NAME_SECTION=1", + "-DWAMR_BUILD_DEBUG_AOT=1", + "-DWAMR_BUILD_DEBUG_INTERP=1", + "-DWAMR_BUILD_DUMP_CALL_STACK=1", + "-DWAMR_BUILD_LIB_PTHREAD=1", + "-DWAMR_BUILD_LIB_WASI_THREADS=1", + "-DWAMR_BUILD_LOAD_CUSTOM_SECTION=1", + "-DWAMR_BUILD_MINI_LOADER=1", + "-DWAMR_BUILD_MEMORY_PROFILING=1", + "-DWAMR_BUILD_MULTI_MODULE=1", + "-DWAMR_BUILD_PERF_PROFILING=1", + "-DWAMR_BUILD_REF_TYPES=1", + "-DWAMR_BUILD_SIMD=1", + "-DWAMR_BUILD_TAIL_CALL=1", + "-DWAMR_DISABLE_HW_BOUND_CHECK=1", + ] + exclude: + # uncompatiable feature and platform + # uncompatiable mode and feature + # MULTI_MODULE only on INTERP mode + - make_options_run_mode: $FAST_JIT_BUILD_OPTIONS + make_options_feature: "-DWAMR_BUILD_MULTI_MODULE=1" + # SIMD only on JIT/AOT mode + - make_options_run_mode: $CLASSIC_INTERP_BUILD_OPTIONS + make_options_feature: "-DWAMR_BUILD_SIMD=1" + - make_options_run_mode: $FAST_INTERP_BUILD_OPTIONS + make_options_feature: "-DWAMR_BUILD_SIMD=1" + # DEBUG_INTERP only on CLASSIC INTERP mode + - make_options_run_mode: $FAST_INTERP_BUILD_OPTIONS + make_options_feature: "-DWAMR_BUILD_DEBUG_INTERP=1" + - make_options_run_mode: $FAST_JIT_BUILD_OPTIONS + make_options_feature: "-DWAMR_BUILD_DEBUG_INTERP=1" + # DEBUG_AOT only on JIT/AOT mode + - make_options_run_mode: $CLASSIC_INTERP_BUILD_OPTIONS + make_options_feature: "-DWAMR_BUILD_DEBUG_AOT=1" + - make_options_run_mode: $FAST_INTERP_BUILD_OPTIONS + make_options_feature: "-DWAMR_BUILD_DEBUG_AOT=1" + # TODO: DEBUG_AOT on JIT + - make_options_run_mode: $FAST_JIT_BUILD_OPTIONS + make_options_feature: "-DWAMR_BUILD_DEBUG_AOT=1" + # MINI_LOADER only on INTERP mode + - make_options_run_mode: $FAST_JIT_BUILD_OPTIONS + make_options_feature: "-DWAMR_BUILD_MINI_LOADER=1" + steps: + - name: checkout + uses: actions/checkout@v3 + + - name: Install dependencies + run: apt update && apt install -y make g++-4.8 gcc-4.8 wget git + + - name: Install cmake + run: | + wget https://github.com/Kitware/CMake/releases/download/v3.26.1/cmake-3.26.1-linux-x86_64.tar.gz -O cmake.tar.gz + tar xzf cmake.tar.gz + cp cmake-3.26.1-linux-x86_64/bin/cmake /usr/local/bin + cp -r cmake-3.26.1-linux-x86_64/share/cmake-3.26/ /usr/local/share/ + - name: Build iwasm + run: | + mkdir build && cd build + cmake .. ${{ matrix.make_options_run_mode }} ${{ matrix.make_options_feature }} -DCMAKE_C_COMPILER=gcc-4.8 -DCMAKE_CXX_COMPILER=g++-4.8 + cmake --build . --config Release --parallel 4 + working-directory: product-mini/platforms/linux + + build_samples_wasm_c_api: + needs: + [ + build_iwasm, + build_llvm_libraries_on_ubuntu_2004, + build_llvm_libraries_on_ubuntu_2204, + build_wamrc, + ] + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + sanitizer: ["", "ubsan", "asan"] + make_options: [ + # Running mode + $AOT_BUILD_OPTIONS, + $CLASSIC_INTERP_BUILD_OPTIONS, + $FAST_INTERP_BUILD_OPTIONS, + $FAST_JIT_BUILD_OPTIONS, + $LLVM_LAZY_JIT_BUILD_OPTIONS, + $LLVM_EAGER_JIT_BUILD_OPTIONS, + $MULTI_TIER_JIT_BUILD_OPTIONS, + ] + os: [ubuntu-20.04, ubuntu-22.04] + wasi_sdk_release: + [ + "https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-20/wasi-sdk-20.0-linux.tar.gz", + ] + wabt_release: + [ + "https://github.com/WebAssembly/wabt/releases/download/1.0.31/wabt-1.0.31-ubuntu.tar.gz", + ] + include: + - os: ubuntu-20.04 + llvm_cache_key: ${{ needs.build_llvm_libraries_on_ubuntu_2004.outputs.cache_key }} + - os: ubuntu-22.04 + llvm_cache_key: ${{ needs.build_llvm_libraries_on_ubuntu_2204.outputs.cache_key }} + exclude: + - make_options: $MULTI_TIER_JIT_BUILD_OPTIONS + sanitizer: asan + steps: + - name: checkout + uses: actions/checkout@v3 + + - name: Get LLVM libraries + id: retrieve_llvm_libs + if: (!endsWith(matrix.make_options, '_INTERP_BUILD_OPTIONS')) + uses: actions/cache@v3 + with: + path: | + ./core/deps/llvm/build/bin + ./core/deps/llvm/build/include + ./core/deps/llvm/build/lib + ./core/deps/llvm/build/libexec + ./core/deps/llvm/build/share + key: ${{ matrix.llvm_cache_key }} + + - name: Quit if cache miss + if: (!endsWith(matrix.make_options, '_INTERP_BUILD_OPTIONS')) && (steps.retrieve_llvm_libs.outputs.cache-hit != 'true') + run: echo "::error::can not get prebuilt llvm libraries" && exit 1 + + - name: download and install wabt + run: | + cd /opt + sudo wget ${{ matrix.wabt_release }} + sudo tar -xzf wabt-1.0.31-*.tar.gz + sudo mv wabt-1.0.31 wabt + - name: Build wamrc + if: (!endsWith(matrix.make_options, '_INTERP_BUILD_OPTIONS')) + run: | + mkdir build && cd build + cmake -D WAMR_BUILD_SANITIZER="${{matrix.sanitizer}}" .. + cmake --build . --config Release --parallel 4 + working-directory: wamr-compiler + + - name: Build Sample [wasm-c-api] + run: | + VERBOSE=1 + cmake -S . -B build ${{ matrix.make_options }} -D WAMR_BUILD_SANITIZER="${{matrix.sanitizer}}" + cmake --build build --config Release --parallel 4 + ctest --test-dir build --output-on-failure + working-directory: samples/wasm-c-api + + build_samples_others: + needs: [build_iwasm] + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-20.04, ubuntu-22.04] + wasi_sdk_release: + [ + "https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-20/wasi-sdk-20.0-linux.tar.gz", + ] + wabt_release: + [ + "https://github.com/WebAssembly/wabt/releases/download/1.0.31/wabt-1.0.31-ubuntu.tar.gz", + ] + steps: + - name: checkout + uses: actions/checkout@v3 + + - name: download and install wasi-sdk + run: | + cd /opt + sudo wget ${{ matrix.wasi_sdk_release }} + sudo tar -xzf wasi-sdk-*.tar.gz + sudo mv wasi-sdk-20.0 wasi-sdk + - name: download and install wabt + run: | + cd /opt + sudo wget ${{ matrix.wabt_release }} + sudo tar -xzf wabt-1.0.31-*.tar.gz + sudo mv wabt-1.0.31 wabt + - name: Build Sample [basic] + run: | + cd samples/basic + ./build.sh + ./run.sh + - name: Build Sample [file] + run: | + cd samples/file + mkdir build && cd build + cmake .. + cmake --build . --config Release --parallel 4 + ./src/iwasm -f wasm-app/file.wasm -d . + - name: Build Sample [multi-thread] + run: | + cd samples/multi-thread + mkdir build && cd build + cmake .. + cmake --build . --config Release --parallel 4 + ./iwasm wasm-apps/test.wasm + - name: Build Sample [multi-module] + run: | + cd samples/multi-module + mkdir build && cd build + cmake .. + cmake --build . --config Release --parallel 4 + ./multi_module + - name: Build Sample [spawn-thread] + run: | + cd samples/spawn-thread + mkdir build && cd build + cmake .. + cmake --build . --config Release --parallel 4 + ./spawn_thread + - name: Build Sample [ref-types] + run: | + cd samples/ref-types + mkdir build && cd build + cmake .. + cmake --build . --config Release --parallel 4 + ./hello + - name: Build Sample [simple] + run: | + ./build.sh -p host-interp + python3 ./sample_test_run.py $(pwd)/out + exit $? + working-directory: ./samples/simple + + - name: Build Sample [wasi-threads] + run: | + cd samples/wasi-threads + mkdir build && cd build + cmake .. + cmake --build . --config Release --parallel 4 + ./iwasm wasm-apps/no_pthread.wasm + test: + needs: + [ + build_iwasm, + build_llvm_libraries_on_ubuntu_2004, + build_llvm_libraries_on_ubuntu_2204, + build_wamrc, + ] + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-20.04, ubuntu-22.04] + sanitizer: ["", "ubsan", "asan"] + running_mode: + [ + "classic-interp", + "fast-interp", + "jit", + "aot", + "fast-jit", + "multi-tier-jit", + ] + test_option: + [ + $DEFAULT_TEST_OPTIONS, + $MULTI_MODULES_TEST_OPTIONS, + $SIMD_TEST_OPTIONS, + $THREADS_TEST_OPTIONS, + $WASI_TEST_OPTIONS, + ] + wasi_sdk_release: + [ + "https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-20/wasi-sdk-20.0-linux.tar.gz", + ] + include: + - os: ubuntu-20.04 + llvm_cache_key: ${{ needs.build_llvm_libraries_on_ubuntu_2004.outputs.cache_key }} + ubuntu_version: "20.04" + - os: ubuntu-22.04 + llvm_cache_key: ${{ needs.build_llvm_libraries_on_ubuntu_2204.outputs.cache_key }} + ubuntu_version: "22.04" + exclude: + # uncompatiable modes and features + - os: ubuntu-20.04 + sanitizer: asan + # asan works only for aot now + - running_mode: "classic-interp" + sanitizer: asan + - running_mode: "fast-interp" + sanitizer: asan + - running_mode: "jit" + sanitizer: asan + - running_mode: "fast-jit" + sanitizer: asan + - running_mode: "multi-tier-jit" + sanitizer: asan + # classic-interp and fast-interp don't support simd + - running_mode: "classic-interp" + test_option: $SIMD_TEST_OPTIONS + - running_mode: "fast-interp" + test_option: $SIMD_TEST_OPTIONS + # aot and jit don't support multi module + - running_mode: "aot" + test_option: $MULTI_MODULES_TEST_OPTIONS + - running_mode: "jit" + test_option: $MULTI_MODULES_TEST_OPTIONS + # fast-jit doesn't support multi module, simd + - running_mode: "fast-jit" + test_option: $MULTI_MODULES_TEST_OPTIONS + - running_mode: "fast-jit" + test_option: $SIMD_TEST_OPTIONS + # multi-tier-jit doesn't support multi module, simd + - running_mode: "multi-tier-jit" + test_option: $MULTI_MODULES_TEST_OPTIONS + - running_mode: "multi-tier-jit" + test_option: $SIMD_TEST_OPTIONS + steps: + - name: checkout + uses: actions/checkout@v3 + + - name: download and install wasi-sdk + if: matrix.test_option == '$WASI_TEST_OPTIONS' + run: | + cd /opt + sudo wget ${{ matrix.wasi_sdk_release }} + sudo tar -xzf wasi-sdk-*.tar.gz + sudo mv wasi-sdk-20.0 wasi-sdk + - name: set env variable(if llvm are used) + if: matrix.running_mode == 'aot' || matrix.running_mode == 'jit' || matrix.running_mode == 'multi-tier-jit' + run: echo "USE_LLVM=true" >> $GITHUB_ENV + + - name: set env variable(if x86_32 test needed) + if: > + (matrix.test_option == '$DEFAULT_TEST_OPTIONS' || matrix.test_option == '$THREADS_TEST_OPTIONS' + || matrix.test_option == '$WASI_TEST_OPTIONS') + && matrix.running_mode != 'fast-jit' && matrix.running_mode != 'jit' && matrix.running_mode != 'multi-tier-jit' + run: echo "TEST_ON_X86_32=true" >> $GITHUB_ENV + + - name: set sanitizer + run: echo "WAMR_BUILD_SANITIZER=${{ matrix.sanitizer }}" >> $GITHUB_ENV + + #only download llvm libraries in jit and aot mode + - name: Get LLVM libraries + if: env.USE_LLVM == 'true' + id: retrieve_llvm_libs + uses: actions/cache@v3 + with: + path: | + ./core/deps/llvm/build/bin + ./core/deps/llvm/build/include + ./core/deps/llvm/build/lib + ./core/deps/llvm/build/libexec + ./core/deps/llvm/build/share + key: ${{ matrix.llvm_cache_key }} + + - name: Quit if cache miss + if: env.USE_LLVM == 'true' && steps.retrieve_llvm_libs.outputs.cache-hit != 'true' + run: echo "::error::can not get prebuilt llvm libraries" && exit 1 + + - name: install jq JSON processor + if: matrix.running_mode == 'aot' && matrix.test_option == '$WASI_TEST_OPTIONS' + run: sudo apt-get update && sudo apt install -y jq + + - name: Build WASI thread tests + if: matrix.test_option == '$WASI_TEST_OPTIONS' + run: bash build.sh + working-directory: ./core/iwasm/libraries/lib-wasi-threads/test/ + + - name: build socket api tests + if: matrix.test_option == '$WASI_TEST_OPTIONS' + run: bash build.sh + working-directory: ./core/iwasm/libraries/lib-socket/test/ + + - name: run tests + timeout-minutes: 10 + run: ./test_wamr.sh ${{ matrix.test_option }} -t ${{ matrix.running_mode }} + working-directory: ./tests/wamr-test-suites + + #only install x32 support libraries when to run x86_32 cases + - name: install x32 support libraries + if: env.TEST_ON_X86_32 == 'true' + run: + # Add another apt repository as some packages cannot + # be downloaded with the github default repository + sudo curl -sSL https://packages.microsoft.com/keys/microsoft.asc | sudo tee /etc/apt/trusted.gpg.d/microsoft.asc && + sudo apt-add-repository https://packages.microsoft.com/ubuntu/${{ matrix.ubuntu_version }}/prod && + sudo apt-get update && + sudo apt install -y g++-multilib lib32gcc-9-dev + + - name: run tests x86_32 + timeout-minutes: 10 + if: env.TEST_ON_X86_32 == 'true' + run: ./test_wamr.sh ${{ env.X86_32_TARGET_TEST_OPTIONS }} ${{ matrix.test_option }} -t ${{ matrix.running_mode }} + working-directory: ./tests/wamr-test-suites \ No newline at end of file diff --git a/.github/workflows/spec_test_on_nuttx.yml b/.github/workflows/spec_test_on_nuttx.yml index 7b8403777..4ffa943a9 100644 --- a/.github/workflows/spec_test_on_nuttx.yml +++ b/.github/workflows/spec_test_on_nuttx.yml @@ -52,7 +52,7 @@ jobs: - name: Install RISC-V Compilers if: contains(matrix.nuttx_board_config, 'risc-v') run: | - curl -L https://static.dev.sifive.com/dev-tools/freedom-tools/v2020.12/riscv64-unknown-elf-toolchain-10.2.0-2020.12.8-x86_64-linux-ubuntu14.tar.gz > riscv.tar.gz + curl -L -k https://static.dev.sifive.com/dev-tools/freedom-tools/v2020.12/riscv64-unknown-elf-toolchain-10.2.0-2020.12.8-x86_64-linux-ubuntu14.tar.gz > riscv.tar.gz tar xvf riscv.tar.gz echo "$PWD/riscv64-unknown-elf-toolchain-10.2.0-2020.12.8-x86_64-linux-ubuntu14/bin" >> $GITHUB_PATH diff --git a/.gitignore b/.gitignore index a4889fb7f..99f1a502e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ .cache +.clangd .vs .vscode .venv diff --git a/ATTRIBUTIONS.md b/ATTRIBUTIONS.md index 0cf62f499..60b6bb1b7 100644 --- a/ATTRIBUTIONS.md +++ b/ATTRIBUTIONS.md @@ -16,6 +16,7 @@ WAMR project reused some components from other open source project: - **asmjit**: for the Fast JIT x86-64 codegen implementation - **zydis**: for the Fast JIT x86-64 codegen implementation - **NuttX ELF headers**: used in core/iwasm/aot/debug/elf_parser.c +- **Dhrystone**: for the test benchmakr dhrystone The WAMR fast interpreter is a clean room development. We would acknowledge the inspirations by [WASM3](https://github.com/wasm3/wasm3) open source project for the approach of pre-calculated oprand stack location. @@ -35,6 +36,7 @@ The WAMR fast interpreter is a clean room development. We would acknowledge the | asmjit | unspecified | unspecified | https://github.com/asmjit/asmjit | | | zydis | unspecified | e14a07895136182a5b53e181eec3b1c6e0b434de | https://github.com/zyantific/zydis | | | NuttX ELF headers | 72313301e23f9c2de969fb64b9a0f67bb4c284df | 10.3.0 | https://github.com/apache/incubator-nuttx | | +| Dhrystone | 2.1 | 2.1 | https://fossies.org/linux/privat/old/ | | ## Licenses @@ -81,15 +83,19 @@ The WAMR fast interpreter is a clean room development. We would acknowledge the [LICENSE](./tests/wamr-test-suites/spec-test-script/LICENSE) ### libuv + [LICENSE](./core/iwasm/libraries/libc-uvwasi/LICENSE_LIBUV) ### uvwasi + [LICENSE](./core/iwasm/libraries/libc-uvwasi/LICENSE_UVWASI) ### asmjit + [LICENSE](./core/iwasm/fast-jit/cg/LICENSE_ASMJIT) ### zydis + [LICENSE](./core/iwasm/fast-jit/cg/LICENSE_ZYDIS) ### NuttX ELF headers @@ -97,3 +103,7 @@ The WAMR fast interpreter is a clean room development. We would acknowledge the [LICENSE](./core/iwasm/aot/debug/LICENSE_NUTTX) [NOTICE](./core/iwasm/aot/debug/NOTICE_NUTTX) + +### Dhrystone + +[LICENSE](./tests/benchmarks/dhrystone/LICENSE) diff --git a/README.md b/README.md index 8cbdcf495..486ca0fd0 100644 --- a/README.md +++ b/README.md @@ -77,6 +77,7 @@ The following platforms are supported, click each link below for how to build iw - [Blog: Introduction to WAMR running modes](https://bytecodealliance.github.io/wamr.dev/blog/introduction-to-wamr-running-modes/) - [Memory usage tunning](./doc/memory_tune.md): the memory model and how to tune the memory usage - [Memory usage profiling](./doc/build_wamr.md#enable-memory-profiling-experiment): how to profile the memory usage +- [Performance tunning](./doc/perf_tune.md): how to tune the performance - [Benchmarks](./tests/benchmarks): checkout these links for how to run the benchmarks: [PolyBench](./tests/benchmarks/polybench), [CoreMark](./tests/benchmarks/coremark), [Sightglass](./tests/benchmarks/sightglass), [JetStream2](./tests/benchmarks/jetstream) - [Performance and footprint data](https://github.com/bytecodealliance/wasm-micro-runtime/wiki/Performance): the performance and footprint data diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index 0684d4805..d6308ce67 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -1,3 +1,42 @@ +## WAMR-1.2.2 + +### Breaking Changes + +### New Features +- Implement Fast JIT multi-threading feature (#2134) + +### Bug Fixes +- Update request.ts wasm_response_send signature (#2122) +- Fix ems allocator unaligned memory access on riscv64 (#2140) +- libc_wasi_wrapper.c: Fix min func issue for size_t < 8 bytes on some platforms (#2152) +- Fix three multi-threading and wasm-c-api-imports issues (#2173) +- Fix build polybench benchmark error with wasi-sdk-19.0 (#2187) +- Fix wamr-ide debugger ignoring launch config (#2155) + +### Enhancements +- Add test for validating linear memory size updates (#2078) +- Update Zephyr docs to remove unsupported west subcommand (#2128) +- Update messages/comments to refer the new place of the version definition (#2133) +- build_wamr_lldb.yml: sync lldb build options between ubuntu and macos (#2132) +- build_wamr_vscode_ext.yml: vsce publish only on the official repo (#2130) +- VSCode-Extension: Download lldb built for ubuntu 20.04 (#2139) +- Avoid re-installing if Tensorflow is already installed for WASI-NN (#2148) +- wamrc: Add --stack-usage option (#2158) +- Fix URL in language-bindings/python/README.md (#2166) +- Fix URL in embed_wamr.md (#2165) +- Fix URL in README.md (#2168) +- Return error when exception was raised after main thread finishes (#2169) +- wasi-nn: Add external delegation to support several NPU/GPU (#2162) +- Update document for iwasm/wamrc dependent packages (#2183) +- Use a manual flag to disable clock_nanosleep on the unsupported platforms (#2176) +- Fix compile warnings on windows platform (#2208) + +### Others +- CI: Add ubsan checks to samples/wasm-c-api (#2147) +- CI: More precise trigger paths for github actions (#2157) + +--- + ## WAMR-1.2.1 ### Breaking Changes diff --git a/build-scripts/build_llvm.py b/build-scripts/build_llvm.py index 3957f4b89..d70915c3b 100755 --- a/build-scripts/build_llvm.py +++ b/build-scripts/build_llvm.py @@ -61,7 +61,7 @@ def build_llvm(llvm_dir, platform, backends, projects, use_clang=False, extra_fl "-DLLVM_ENABLE_IDE:BOOL=OFF", "-DLLVM_ENABLE_LIBEDIT=OFF", "-DLLVM_ENABLE_TERMINFO:BOOL=OFF", - "-DLLVM_ENABLE_ZLIB:BOOL=OFF", + "-DLLVM_ENABLE_ZLIB:BOOL=ON", "-DLLVM_INCLUDE_BENCHMARKS:BOOL=OFF", "-DLLVM_INCLUDE_DOCS:BOOL=OFF", "-DLLVM_INCLUDE_EXAMPLES:BOOL=OFF", diff --git a/build-scripts/config_common.cmake b/build-scripts/config_common.cmake index bfdbe3aab..572384dd8 100644 --- a/build-scripts/config_common.cmake +++ b/build-scripts/config_common.cmake @@ -127,6 +127,28 @@ else () unset (LLVM_AVAILABLE_LIBS) endif () +# Sanitizers + +set(WAMR_BUILD_SANITIZER $ENV{WAMR_BUILD_SANITIZER}) + +if (NOT DEFINED WAMR_BUILD_SANITIZER) + set(WAMR_BUILD_SANITIZER "") +elseif (WAMR_BUILD_SANITIZER STREQUAL "ubsan") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g -O0 -fno-omit-frame-pointer -fsanitize=undefined -fno-sanitize-recover=all -fno-sanitize=alignment" ) + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=undefined") +elseif (WAMR_BUILD_SANITIZER STREQUAL "asan") + if (NOT WAMR_BUILD_JIT EQUAL 1) + set (ASAN_OPTIONS "verbosity=2 debug=true ") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g -O0 -fno-omit-frame-pointer -fsanitize=address -fno-sanitize-recover=all" ) + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=address") + endif() +elseif (WAMR_BUILD_SANITIZER STREQUAL "tsan") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g -O0 -fno-omit-frame-pointer -fsanitize=thread -fno-sanitize-recover=all" ) + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=thread") +elseif (NOT (WAMR_BUILD_SANITIZER STREQUAL "") ) + message(SEND_ERROR "Unsupported sanitizer: ${WAMR_BUILD_SANITIZER}") +endif() + ######################################## message ("-- Build Configurations:") @@ -370,3 +392,11 @@ if ("$ENV{COLLECT_CODE_COVERAGE}" STREQUAL "1" OR COLLECT_CODE_COVERAGE EQUAL 1) add_definitions (-DCOLLECT_CODE_COVERAGE) message (" Collect code coverage enabled") endif () +if (WAMR_BUILD_STATIC_PGO EQUAL 1) + add_definitions (-DWASM_ENABLE_STATIC_PGO=1) + message (" AOT static PGO enabled") +endif () +if (WAMR_DISABLE_WRITE_GS_BASE EQUAL 1) + add_definitions (-DWASM_DISABLE_WRITE_GS_BASE=1) + message (" Write linear memory base addr to x86 GS register disabled") +endif () diff --git a/build-scripts/requirements.txt b/build-scripts/requirements.txt index bf0d9d411..077c95d8a 100644 --- a/build-scripts/requirements.txt +++ b/build-scripts/requirements.txt @@ -1 +1 @@ -requests==2.28.2 \ No newline at end of file +requests==2.31.0 \ No newline at end of file diff --git a/core/config.h b/core/config.h index a1db3d6bf..6701f53af 100644 --- a/core/config.h +++ b/core/config.h @@ -449,4 +449,15 @@ #define WASM_ENABLE_WASM_CACHE 0 #endif +#ifndef WASM_ENABLE_STATIC_PGO +#define WASM_ENABLE_STATIC_PGO 0 +#endif + +/* Disable writing linear memory base address to GS segment register, + by default only in linux x86-64, linear memory base addr is written + to GS segment register before calling wasm/aot function. */ +#ifndef WASM_DISABLE_WRITE_GS_BASE +#define WASM_DISABLE_WRITE_GS_BASE 0 +#endif + #endif /* end of _CONFIG_H_ */ diff --git a/core/iwasm/aot/aot_loader.c b/core/iwasm/aot/aot_loader.c index 5345fb2d7..480a00b91 100644 --- a/core/iwasm/aot/aot_loader.c +++ b/core/iwasm/aot/aot_loader.c @@ -1430,8 +1430,28 @@ destroy_object_data_sections(AOTObjectDataSection *data_sections, uint32 i; AOTObjectDataSection *data_section = data_sections; for (i = 0; i < data_section_count; i++, data_section++) - if (data_section->data) + if (data_section->data) { +#if WASM_ENABLE_STATIC_PGO != 0 + if (!strncmp(data_section->name, "__llvm_prf_data", 15)) { + LLVMProfileData *data = (LLVMProfileData *)data_section->data; + if (data->values) { + uint32 num_value_sites = + data->num_value_sites[0] + data->num_value_sites[1]; + uint32 j; + for (j = 0; j < num_value_sites; j++) { + ValueProfNode *node = data->values[j], *node_next; + while (node) { + node_next = node->next; + wasm_runtime_free(node); + node = node_next; + } + } + wasm_runtime_free(data->values); + } + } +#endif os_munmap(data_section->data, data_section->size); + } wasm_runtime_free(data_sections); } @@ -1900,6 +1920,8 @@ str2uint64(const char *buf, uint64 *p_res) return true; } +#define R_X86_64_GOTPCREL 9 /* 32 bit signed PC relative offset to GOT */ + static bool do_text_relocation(AOTModule *module, AOTRelocationGroup *group, char *error_buf, uint32 error_buf_size) @@ -1937,6 +1959,14 @@ do_text_relocation(AOTModule *module, AOTRelocationGroup *group, bh_memcpy_s(symbol, symbol_len, relocation->symbol_name, symbol_len); symbol[symbol_len] = '\0'; +#if WASM_ENABLE_STATIC_PGO != 0 + if (!strcmp(symbol, "__llvm_profile_runtime") + || !strcmp(symbol, "__llvm_profile_register_function") + || !strcmp(symbol, "__llvm_profile_register_names_function")) { + continue; + } +#endif + if (!strncmp(symbol, AOT_FUNC_PREFIX, strlen(AOT_FUNC_PREFIX))) { p = symbol + strlen(AOT_FUNC_PREFIX); if (*p == '\0' @@ -1945,7 +1975,26 @@ do_text_relocation(AOTModule *module, AOTRelocationGroup *group, "invalid import symbol %s", symbol); goto check_symbol_fail; } +#if (defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)) \ + && !defined(BH_PLATFORM_WINDOWS) + if (relocation->relocation_type == R_X86_64_GOTPCREL) { + GOTItem *got_item = module->got_item_list; + uint32 got_item_idx = 0; + + while (got_item) { + if (got_item->func_idx == func_index) + break; + got_item_idx++; + got_item = got_item->next; + } + /* Calculate `GOT + G` */ + symbol_addr = module->got_func_ptrs + got_item_idx; + } + else + symbol_addr = module->func_ptrs[func_index]; +#else symbol_addr = module->func_ptrs[func_index]; +#endif } else if (!strcmp(symbol, ".text")) { symbol_addr = module->code; @@ -1956,7 +2005,13 @@ do_text_relocation(AOTModule *module, AOTRelocationGroup *group, /* ".rodata.cst4/8/16/.." */ || !strncmp(symbol, ".rodata.cst", strlen(".rodata.cst")) /* ".rodata.strn.m" */ - || !strncmp(symbol, ".rodata.str", strlen(".rodata.str"))) { + || !strncmp(symbol, ".rodata.str", strlen(".rodata.str")) +#if WASM_ENABLE_STATIC_PGO != 0 + || !strncmp(symbol, "__llvm_prf_cnts", 15) + || !strncmp(symbol, "__llvm_prf_data", 15) + || !strncmp(symbol, "__llvm_prf_names", 16) +#endif + ) { symbol_addr = get_data_section_addr(module, symbol, NULL); if (!symbol_addr) { set_error_buf_v(error_buf, error_buf_size, @@ -2088,6 +2143,14 @@ do_data_relocation(AOTModule *module, AOTRelocationGroup *group, else if (!strcmp(group->section_name, ".rdata")) { data_section_name = group->section_name; } +#if WASM_ENABLE_STATIC_PGO != 0 + else if (!strncmp(group->section_name, ".rel__llvm_prf_data", 19)) { + data_section_name = group->section_name + strlen(".rel"); + } + else if (!strncmp(group->section_name, ".rela__llvm_prf_data", 20)) { + data_section_name = group->section_name + strlen(".rela"); + } +#endif else { set_error_buf(error_buf, error_buf_size, "invalid data relocation section name"); @@ -2107,6 +2170,49 @@ do_data_relocation(AOTModule *module, AOTRelocationGroup *group, if (!strcmp(symbol, ".text")) { symbol_addr = module->code; } +#if WASM_ENABLE_STATIC_PGO != 0 + else if (!strncmp(symbol, AOT_FUNC_PREFIX, strlen(AOT_FUNC_PREFIX))) { + char *p = symbol + strlen(AOT_FUNC_PREFIX); + uint32 func_index; + if (*p == '\0' + || (func_index = (uint32)atoi(p)) > module->func_count) { + set_error_buf_v(error_buf, error_buf_size, + "invalid relocation symbol %s", symbol); + return false; + } + symbol_addr = module->func_ptrs[func_index]; + } + else if (!strcmp(symbol, "__llvm_prf_cnts")) { + uint32 j; + for (j = 0; j < module->data_section_count; j++) { + if (!strncmp(module->data_sections[j].name, symbol, 15)) { + bh_assert(relocation->relocation_addend + sizeof(uint64) + <= module->data_sections[j].size); + symbol_addr = module->data_sections[j].data; + break; + } + } + if (j == module->data_section_count) { + set_error_buf_v(error_buf, error_buf_size, + "invalid relocation symbol %s", symbol); + return false; + } + } + else if (!strncmp(symbol, "__llvm_prf_cnts", 15)) { + uint32 j; + for (j = 0; j < module->data_section_count; j++) { + if (!strcmp(module->data_sections[j].name, symbol)) { + symbol_addr = module->data_sections[j].data; + break; + } + } + if (j == module->data_section_count) { + set_error_buf_v(error_buf, error_buf_size, + "invalid relocation symbol %s", symbol); + return false; + } + } +#endif /* end of WASM_ENABLE_STATIC_PGO != 0 */ else { set_error_buf_v(error_buf, error_buf_size, "invalid relocation symbol %s", symbol); @@ -2154,7 +2260,7 @@ load_relocation_section(const uint8 *buf, const uint8 *buf_end, { AOTRelocationGroup *groups = NULL, *group; uint32 symbol_count = 0; - uint32 group_count = 0, i, j; + uint32 group_count = 0, i, j, got_item_count = 0; uint64 size; uint32 *symbol_offsets, total_string_len; uint8 *symbol_buf, *symbol_buf_end; @@ -2216,6 +2322,8 @@ load_relocation_section(const uint8 *buf, const uint8 *buf_end, for (j = 0; j < relocation_count; j++) { AOTRelocation relocation = { 0 }; + char group_name_buf[128] = { 0 }; + char symbol_name_buf[128] = { 0 }; uint32 symbol_index, offset32; int32 addend32; uint16 symbol_name_len; @@ -2244,10 +2352,10 @@ load_relocation_section(const uint8 *buf, const uint8 *buf_end, symbol_name_len = *(uint16 *)symbol_name; symbol_name += sizeof(uint16); - char group_name_buf[128] = { 0 }; - char symbol_name_buf[128] = { 0 }; - memcpy(group_name_buf, group_name, group_name_len); - memcpy(symbol_name_buf, symbol_name, symbol_name_len); + bh_memcpy_s(group_name_buf, (uint32)sizeof(group_name_buf), + group_name, group_name_len); + bh_memcpy_s(symbol_name_buf, (uint32)sizeof(symbol_name_buf), + symbol_name, symbol_name_len); if ((group_name_len == strlen(".text") || (module->is_indirect_mode @@ -2309,6 +2417,139 @@ load_relocation_section(const uint8 *buf, const uint8 *buf_end, } #endif /* end of defined(BH_PLATFORM_WINDOWS) */ +#if (defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)) \ + && !defined(BH_PLATFORM_WINDOWS) + buf = symbol_buf_end; + read_uint32(buf, buf_end, group_count); + + /* Resolve the relocations of type R_X86_64_GOTPCREL */ + for (i = 0; i < group_count; i++) { + uint32 name_index, relocation_count; + uint16 group_name_len; + uint8 *group_name; + + /* section name address is 4 bytes aligned. */ + buf = (uint8 *)align_ptr(buf, sizeof(uint32)); + read_uint32(buf, buf_end, name_index); + + if (name_index >= symbol_count) { + set_error_buf(error_buf, error_buf_size, + "symbol index out of range"); + goto fail; + } + + group_name = symbol_buf + symbol_offsets[name_index]; + group_name_len = *(uint16 *)group_name; + group_name += sizeof(uint16); + + read_uint32(buf, buf_end, relocation_count); + + for (j = 0; j < relocation_count; j++) { + AOTRelocation relocation = { 0 }; + char group_name_buf[128] = { 0 }; + char symbol_name_buf[128] = { 0 }; + uint32 symbol_index; + uint16 symbol_name_len; + uint8 *symbol_name; + + /* relocation offset and addend */ + buf += sizeof(void *) * 2; + + read_uint32(buf, buf_end, relocation.relocation_type); + read_uint32(buf, buf_end, symbol_index); + + if (symbol_index >= symbol_count) { + set_error_buf(error_buf, error_buf_size, + "symbol index out of range"); + goto fail; + } + + symbol_name = symbol_buf + symbol_offsets[symbol_index]; + symbol_name_len = *(uint16 *)symbol_name; + symbol_name += sizeof(uint16); + + bh_memcpy_s(group_name_buf, (uint32)sizeof(group_name_buf), + group_name, group_name_len); + bh_memcpy_s(symbol_name_buf, (uint32)sizeof(symbol_name_buf), + symbol_name, symbol_name_len); + + if (relocation.relocation_type == R_X86_64_GOTPCREL + && !strncmp(symbol_name_buf, AOT_FUNC_PREFIX, + strlen(AOT_FUNC_PREFIX))) { + uint32 func_idx = + atoi(symbol_name_buf + strlen(AOT_FUNC_PREFIX)); + GOTItem *got_item = module->got_item_list; + + if (func_idx >= module->func_count) { + set_error_buf(error_buf, error_buf_size, + "func index out of range"); + goto fail; + } + + while (got_item) { + if (got_item->func_idx == func_idx) + break; + got_item = got_item->next; + } + + if (!got_item) { + /* Create the got item and append to the list */ + got_item = wasm_runtime_malloc(sizeof(GOTItem)); + if (!got_item) { + set_error_buf(error_buf, error_buf_size, + "allocate memory failed"); + goto fail; + } + + got_item->func_idx = func_idx; + got_item->next = NULL; + if (!module->got_item_list) { + module->got_item_list = module->got_item_list_end = + got_item; + } + else { + module->got_item_list_end->next = got_item; + module->got_item_list_end = got_item; + } + + got_item_count++; + } + } + } + } + + if (got_item_count) { + GOTItem *got_item = module->got_item_list; + uint32 got_item_idx = 0; + + map_prot = MMAP_PROT_READ | MMAP_PROT_WRITE; + /* aot code and data in x86_64 must be in range 0 to 2G due to + relocation for R_X86_64_32/32S/PC32 */ + map_flags = MMAP_MAP_32BIT; + + /* Create the GOT for func_ptrs, note that it is different from + the .got section of a dynamic object file */ + size = (uint64)sizeof(void *) * got_item_count; + if (size > UINT32_MAX + || !(module->got_func_ptrs = + os_mmap(NULL, (uint32)size, map_prot, map_flags))) { + set_error_buf(error_buf, error_buf_size, "mmap memory failed"); + goto fail; + } + + while (got_item) { + module->got_func_ptrs[got_item_idx++] = + module->func_ptrs[got_item->func_idx]; + got_item = got_item->next; + } + + module->got_item_count = got_item_count; + } +#else + (void)got_item_count; +#endif /* (defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)) && \ + !defined(BH_PLATFORM_WINDOWS) */ + buf = symbol_buf_end; read_uint32(buf, buf_end, group_count); @@ -2889,6 +3130,16 @@ load(const uint8 *buf, uint32 size, AOTModule *module, char *error_buf, module->code and will be destroyed in aot_unload() */ destroy_sections(section_list, false); } + +#if 0 + { + uint32 i; + for (i = 0; i < module->func_count; i++) { + os_printf("AOT func %u, addr: %p\n", i, module->func_ptrs[i]); + } + } +#endif + return ret; fail: return false; @@ -2984,9 +3235,27 @@ aot_unload(AOTModule *module) } #endif +#if (defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)) \ + && !defined(BH_PLATFORM_WINDOWS) + { + GOTItem *got_item = module->got_item_list, *got_item_next; + + if (module->got_func_ptrs) { + os_munmap(module->got_func_ptrs, + sizeof(void *) * module->got_item_count); + } + while (got_item) { + got_item_next = got_item->next; + wasm_runtime_free(got_item); + got_item = got_item_next; + } + } +#endif + if (module->data_sections) destroy_object_data_sections(module->data_sections, module->data_section_count); + #if WASM_ENABLE_DEBUG_AOT != 0 jit_code_entry_destroy(module->elf_hdr); #endif @@ -3033,3 +3302,23 @@ aot_get_custom_section(const AOTModule *module, const char *name, uint32 *len) return NULL; } #endif /* end of WASM_ENABLE_LOAD_CUSTOM_SECTION */ + +#if WASM_ENABLE_STATIC_PGO != 0 +void +aot_exchange_uint16(uint8 *p_data) +{ + return exchange_uint16(p_data); +} + +void +aot_exchange_uint32(uint8 *p_data) +{ + return exchange_uint32(p_data); +} + +void +aot_exchange_uint64(uint8 *p_data) +{ + return exchange_uint64(p_data); +} +#endif diff --git a/core/iwasm/aot/aot_reloc.h b/core/iwasm/aot/aot_reloc.h index 9f5c2d57f..98df09cb4 100644 --- a/core/iwasm/aot/aot_reloc.h +++ b/core/iwasm/aot/aot_reloc.h @@ -121,6 +121,14 @@ typedef struct { REG_SYM(aot_intrinsic_i32_rem_s), \ REG_SYM(aot_intrinsic_i32_rem_u), \ +#if WASM_ENABLE_STATIC_PGO != 0 +#define REG_LLVM_PGO_SYM() \ + { "__llvm_profile_instrument_target", llvm_profile_instrument_target }, \ + { "__llvm_profile_instrument_memop", llvm_profile_instrument_memop }, +#else +#define REG_LLVM_PGO_SYM() +#endif + #define REG_COMMON_SYMBOLS \ REG_SYM(aot_set_exception_with_id), \ REG_SYM(aot_invoke_native), \ @@ -150,6 +158,7 @@ typedef struct { REG_REF_TYPES_SYM() \ REG_AOT_TRACE_SYM() \ REG_INTRINSIC_SYM() \ + REG_LLVM_PGO_SYM() \ #define CHECK_RELOC_OFFSET(data_size) do { \ if (!check_reloc_offset(target_section_size, \ diff --git a/core/iwasm/aot/aot_runtime.c b/core/iwasm/aot/aot_runtime.c index b5c406b96..0a9c6144d 100644 --- a/core/iwasm/aot/aot_runtime.c +++ b/core/iwasm/aot/aot_runtime.c @@ -1015,6 +1015,15 @@ execute_post_instantiate_functions(AOTModuleInstance *module_inst, } } +#if defined(os_writegsbase) + { + AOTMemoryInstance *memory_inst = aot_get_default_memory(module_inst); + if (memory_inst) + /* write base addr of linear memory to GS segment register */ + os_writegsbase(memory_inst->memory_data); + } +#endif + /* Execute start function for both main insance and sub instance */ if (module->start_function) { AOTFunctionInstance start_func = { 0 }; @@ -1453,6 +1462,15 @@ aot_call_function(WASMExecEnv *exec_env, AOTFunctionInstance *function, } argc = func_type->param_cell_num; +#if defined(os_writegsbase) + { + AOTMemoryInstance *memory_inst = aot_get_default_memory(module_inst); + if (memory_inst) + /* write base addr of linear memory to GS segment register */ + os_writegsbase(memory_inst->memory_data); + } +#endif + /* func pointer was looked up previously */ bh_assert(function->u.func.func_ptr != NULL); @@ -2779,12 +2797,14 @@ aot_dump_call_stack(WASMExecEnv *exec_env, bool print, char *buf, uint32 len) /* function name not exported, print number instead */ if (frame.func_name_wp == NULL) { - line_length = snprintf(line_buf, sizeof(line_buf), "#%02d $f%d\n", - n, frame.func_index); + line_length = + snprintf(line_buf, sizeof(line_buf), + "#%02" PRIu32 " $f%" PRIu32 "\n", n, frame.func_index); } else { - line_length = snprintf(line_buf, sizeof(line_buf), "#%02d %s\n", n, - frame.func_name_wp); + line_length = + snprintf(line_buf, sizeof(line_buf), "#%02" PRIu32 " %s\n", n, + frame.func_name_wp); } if (line_length >= sizeof(line_buf)) { @@ -2834,3 +2854,520 @@ aot_dump_perf_profiling(const AOTModuleInstance *module_inst) } } #endif /* end of WASM_ENABLE_PERF_PROFILING */ + +#if WASM_ENABLE_STATIC_PGO != 0 + +/* indirect call target */ +#define IPVK_IndirectCallTarget 0 +/* memory intrinsic functions size */ +#define IPVK_MemOPSize 1 +#define IPVK_First IPVK_IndirectCallTarget +#define IPVK_Last IPVK_MemOPSize + +#define INSTR_PROF_DEFAULT_NUM_VAL_PER_SITE 24 +#define INSTR_PROF_MAX_NUM_VAL_PER_SITE 255 + +static int hasNonDefaultValsPerSite = 0; +static uint32 VPMaxNumValsPerSite = INSTR_PROF_DEFAULT_NUM_VAL_PER_SITE; + +static bool +cmpxchg_ptr(void **ptr, void *old_val, void *new_val) +{ +#if defined(os_atomic_cmpxchg) + return os_atomic_cmpxchg(ptr, &old_val, new_val); +#else + /* TODO: add lock when thread-manager is enabled */ + void *read = *ptr; + if (read == old_val) { + *ptr = new_val; + return true; + } + return false; +#endif +} + +static int +allocateValueProfileCounters(LLVMProfileData *Data) +{ + ValueProfNode **Mem; + uint64 NumVSites = 0, total_size; + uint32 VKI; + + /* When dynamic allocation is enabled, allow tracking the max number of + values allowed. */ + if (!hasNonDefaultValsPerSite) + VPMaxNumValsPerSite = INSTR_PROF_MAX_NUM_VAL_PER_SITE; + + for (VKI = IPVK_First; VKI <= IPVK_Last; ++VKI) + NumVSites += Data->num_value_sites[VKI]; + + /* If NumVSites = 0, calloc is allowed to return a non-null pointer. */ + bh_assert(NumVSites > 0 && "NumVSites can't be zero"); + + total_size = (uint64)sizeof(ValueProfNode *) * NumVSites; + if (total_size > UINT32_MAX + || !(Mem = (ValueProfNode **)wasm_runtime_malloc((uint32)total_size))) { + return 0; + } + memset(Mem, 0, (uint32)total_size); + + if (!cmpxchg_ptr((void **)&Data->values, NULL, Mem)) { + wasm_runtime_free(Mem); + return 0; + } + return 1; +} + +static ValueProfNode * +allocateOneNode(void) +{ + ValueProfNode *Node; + + Node = wasm_runtime_malloc((uint32)sizeof(ValueProfNode)); + if (Node) + memset(Node, 0, sizeof(ValueProfNode)); + return Node; +} + +static void +instrumentTargetValueImpl(uint64 TargetValue, void *Data, uint32 CounterIndex, + uint64 CountValue) +{ + ValueProfNode **ValueCounters; + ValueProfNode *PrevVNode = NULL, *MinCountVNode = NULL, *CurVNode; + LLVMProfileData *PData = (LLVMProfileData *)Data; + uint64 MinCount = UINT64_MAX; + uint8 VDataCount = 0; + bool success = false; + + if (!PData) + return; + if (!CountValue) + return; + if (!PData->values) { + if (!allocateValueProfileCounters(PData)) + return; + } + + ValueCounters = (ValueProfNode **)PData->values; + CurVNode = ValueCounters[CounterIndex]; + + while (CurVNode) { + if (TargetValue == CurVNode->value) { + CurVNode->count += CountValue; + return; + } + if (CurVNode->count < MinCount) { + MinCount = CurVNode->count; + MinCountVNode = CurVNode; + } + PrevVNode = CurVNode; + CurVNode = CurVNode->next; + ++VDataCount; + } + + if (VDataCount >= VPMaxNumValsPerSite) { + if (MinCountVNode->count <= CountValue) { + CurVNode = MinCountVNode; + CurVNode->value = TargetValue; + CurVNode->count = CountValue; + } + else + MinCountVNode->count -= CountValue; + + return; + } + + CurVNode = allocateOneNode(); + if (!CurVNode) + return; + CurVNode->value = TargetValue; + CurVNode->count += CountValue; + + if (!ValueCounters[CounterIndex]) { + success = + cmpxchg_ptr((void **)&ValueCounters[CounterIndex], NULL, CurVNode); + } + else if (PrevVNode && !PrevVNode->next) { + success = cmpxchg_ptr((void **)&PrevVNode->next, 0, CurVNode); + } + + if (!success) { + wasm_runtime_free(CurVNode); + } +} + +void +llvm_profile_instrument_target(uint64 target_value, void *data, + uint32 counter_idx) +{ + instrumentTargetValueImpl(target_value, data, counter_idx, 1); +} + +static inline uint32 +popcount64(uint64 u) +{ + uint32 ret = 0; + while (u) { + u = (u & (u - 1)); + ret++; + } + return ret; +} + +static inline uint32 +clz64(uint64 type) +{ + uint32 num = 0; + if (type == 0) + return 64; + while (!(type & 0x8000000000000000LL)) { + num++; + type <<= 1; + } + return num; +} + +/* Map an (observed) memop size value to the representative value of its range. + For example, 5 -> 5, 22 -> 17, 99 -> 65, 256 -> 256, 1001 -> 513. */ +static uint64 +InstrProfGetRangeRepValue(uint64 Value) +{ + if (Value <= 8) + /* The first ranges are individually tracked. Use the value as is. */ + return Value; + else if (Value >= 513) + /* The last range is mapped to its lowest value. */ + return 513; + else if (popcount64(Value) == 1) + /* If it's a power of two, use it as is. */ + return Value; + else + /* Otherwise, take to the previous power of two + 1. */ + return (((uint64)1) << (64 - clz64(Value) - 1)) + 1; +} + +void +llvm_profile_instrument_memop(uint64 target_value, void *data, + uint32 counter_idx) +{ + uint64 rep_value = InstrProfGetRangeRepValue(target_value); + instrumentTargetValueImpl(rep_value, data, counter_idx, 1); +} + +static uint32 +get_pgo_prof_data_size(AOTModuleInstance *module_inst, uint32 *p_num_prof_data, + uint32 *p_num_prof_counters, uint32 *p_padding_size, + uint32 *p_prof_counters_size, uint32 *p_prof_names_size, + uint32 *p_value_counters_size, uint8 **p_prof_names) +{ + AOTModule *module = (AOTModule *)module_inst->module; + LLVMProfileData *prof_data; + uint8 *prof_names = NULL; + uint32 num_prof_data = 0, num_prof_counters = 0, padding_size, i; + uint32 prof_counters_size = 0, prof_names_size = 0; + uint32 total_size, total_size_wo_value_counters; + + for (i = 0; i < module->data_section_count; i++) { + if (!strncmp(module->data_sections[i].name, "__llvm_prf_data", 15)) { + bh_assert(module->data_sections[i].size == sizeof(LLVMProfileData)); + num_prof_data++; + prof_data = (LLVMProfileData *)module->data_sections[i].data; + num_prof_counters += prof_data->num_counters; + } + else if (!strncmp(module->data_sections[i].name, "__llvm_prf_cnts", + 15)) { + prof_counters_size += module->data_sections[i].size; + } + else if (!strncmp(module->data_sections[i].name, "__llvm_prf_names", + 16)) { + prof_names_size = module->data_sections[i].size; + prof_names = module->data_sections[i].data; + } + } + + if (prof_counters_size != num_prof_counters * sizeof(uint64)) + return 0; + + total_size = sizeof(LLVMProfileRawHeader) + + num_prof_data * sizeof(LLVMProfileData_64) + + prof_counters_size + prof_names_size; + padding_size = sizeof(uint64) - (prof_names_size % sizeof(uint64)); + if (padding_size != sizeof(uint64)) + total_size += padding_size; + + /* Total size excluding value counters */ + total_size_wo_value_counters = total_size; + + for (i = 0; i < module->data_section_count; i++) { + if (!strncmp(module->data_sections[i].name, "__llvm_prf_data", 15)) { + uint32 j, k, num_value_sites, num_value_nodes; + ValueProfNode **values, *value_node; + + prof_data = (LLVMProfileData *)module->data_sections[i].data; + values = prof_data->values; + + if (prof_data->num_value_sites[0] > 0 + || prof_data->num_value_sites[1] > 0) { + /* TotalSize (uint32) and NumValueKinds (uint32) */ + total_size += 8; + for (j = 0; j < 2; j++) { + if ((num_value_sites = prof_data->num_value_sites[j]) > 0) { + /* ValueKind (uint32) and NumValueSites (uint32) */ + total_size += 8; + /* (Value + Counter) group counts of each value site, + each count is one byte */ + total_size += align_uint(num_value_sites, 8); + + if (values) { + for (k = 0; k < num_value_sites; k++) { + num_value_nodes = 0; + value_node = *values; + while (value_node) { + num_value_nodes++; + value_node = value_node->next; + } + if (num_value_nodes) { + /* (Value + Counter) groups */ + total_size += num_value_nodes * 8 * 2; + } + values++; + } + } + } + } + } + } + } + + if (p_num_prof_data) + *p_num_prof_data = num_prof_data; + if (p_num_prof_counters) + *p_num_prof_counters = num_prof_counters; + if (p_padding_size) + *p_padding_size = padding_size; + if (p_prof_counters_size) + *p_prof_counters_size = prof_counters_size; + if (p_prof_names_size) + *p_prof_names_size = prof_names_size; + if (p_value_counters_size) + *p_value_counters_size = total_size - total_size_wo_value_counters; + if (p_prof_names) + *p_prof_names = prof_names; + + return total_size; +} + +uint32 +aot_get_pgo_prof_data_size(AOTModuleInstance *module_inst) +{ + return get_pgo_prof_data_size(module_inst, NULL, NULL, NULL, NULL, NULL, + NULL, NULL); +} + +static union { + int a; + char b; +} __ue = { .a = 1 }; + +#define is_little_endian() (__ue.b == 1) + +uint32 +aot_dump_pgo_prof_data_to_buf(AOTModuleInstance *module_inst, char *buf, + uint32 len) +{ + AOTModule *module = (AOTModule *)module_inst->module; + LLVMProfileRawHeader prof_header = { 0 }; + LLVMProfileData *prof_data; + uint8 *prof_names = NULL; + uint32 num_prof_data = 0, num_prof_counters = 0, padding_size, i; + uint32 prof_counters_size = 0, prof_names_size = 0; + uint32 value_counters_size = 0, value_counters_size_backup = 0; + uint32 total_size, size; + int64 counters_delta, offset_counters; + + total_size = get_pgo_prof_data_size(module_inst, &num_prof_data, + &num_prof_counters, &padding_size, + &prof_counters_size, &prof_names_size, + &value_counters_size, &prof_names); + if (len < total_size) + return 0; + + value_counters_size_backup = value_counters_size; + value_counters_size = 0; + + prof_header.counters_delta = counters_delta = + sizeof(LLVMProfileData_64) * num_prof_data; + offset_counters = 0; + for (i = 0; i < module->data_section_count; i++) { + if (!strncmp(module->data_sections[i].name, "__llvm_prf_data", 15)) { + prof_data = (LLVMProfileData *)module->data_sections[i].data; + prof_data->offset_counters = counters_delta + offset_counters; + offset_counters += prof_data->num_counters * sizeof(uint64); + counters_delta -= sizeof(LLVMProfileData_64); + } + } + + prof_header.magic = 0xFF6C70726F667281LL; + /* Version 8 */ + prof_header.version = 0x0000000000000008LL; + /* with VARIANT_MASK_IR_PROF (IR Instrumentation) */ + prof_header.version |= 0x1ULL << 56; + /* with VARIANT_MASK_MEMPROF (Memory Profile) */ + prof_header.version |= 0x1ULL << 62; + prof_header.num_prof_data = num_prof_data; + prof_header.num_prof_counters = num_prof_counters; + prof_header.names_size = prof_names_size; + prof_header.value_kind_last = 1; + + if (!is_little_endian()) { + aot_exchange_uint64((uint8 *)&prof_header.magic); + aot_exchange_uint64((uint8 *)&prof_header.version); + aot_exchange_uint64((uint8 *)&prof_header.num_prof_data); + aot_exchange_uint64((uint8 *)&prof_header.num_prof_counters); + aot_exchange_uint64((uint8 *)&prof_header.names_size); + aot_exchange_uint64((uint8 *)&prof_header.counters_delta); + aot_exchange_uint64((uint8 *)&prof_header.value_kind_last); + } + + size = sizeof(LLVMProfileRawHeader); + bh_memcpy_s(buf, size, &prof_header, size); + buf += size; + + for (i = 0; i < module->data_section_count; i++) { + if (!strncmp(module->data_sections[i].name, "__llvm_prf_data", 15)) { + LLVMProfileData_64 *prof_data_64 = (LLVMProfileData_64 *)buf; + + /* Convert LLVMProfileData to LLVMProfileData_64, the pointer width + in the output file is alawys 8 bytes */ + prof_data = (LLVMProfileData *)module->data_sections[i].data; + prof_data_64->func_md5 = prof_data->func_md5; + prof_data_64->func_hash = prof_data->func_hash; + prof_data_64->offset_counters = prof_data->offset_counters; + prof_data_64->func_ptr = prof_data->func_ptr; + prof_data_64->values = (uint64)(uintptr_t)prof_data->values; + prof_data_64->num_counters = prof_data->num_counters; + prof_data_64->num_value_sites[0] = prof_data->num_value_sites[0]; + prof_data_64->num_value_sites[1] = prof_data->num_value_sites[1]; + + if (!is_little_endian()) { + aot_exchange_uint64((uint8 *)&prof_data_64->func_hash); + aot_exchange_uint64((uint8 *)&prof_data_64->offset_counters); + aot_exchange_uint64((uint8 *)&prof_data_64->offset_counters); + aot_exchange_uint64((uint8 *)&prof_data_64->func_ptr); + aot_exchange_uint64((uint8 *)&prof_data_64->values); + aot_exchange_uint32((uint8 *)&prof_data_64->num_counters); + aot_exchange_uint16((uint8 *)&prof_data_64->num_value_sites[0]); + aot_exchange_uint16((uint8 *)&prof_data_64->num_value_sites[1]); + } + buf += sizeof(LLVMProfileData_64); + } + } + + for (i = 0; i < module->data_section_count; i++) { + if (!strncmp(module->data_sections[i].name, "__llvm_prf_cnts", 15)) { + size = module->data_sections[i].size; + bh_memcpy_s(buf, size, module->data_sections[i].data, size); + buf += size; + } + } + + if (prof_names && prof_names_size > 0) { + size = prof_names_size; + bh_memcpy_s(buf, size, prof_names, size); + buf += size; + padding_size = sizeof(uint64) - (prof_names_size % sizeof(uint64)); + if (padding_size != sizeof(uint64)) { + char padding_buf[8] = { 0 }; + bh_memcpy_s(buf, padding_size, padding_buf, padding_size); + buf += padding_size; + } + } + + for (i = 0; i < module->data_section_count; i++) { + if (!strncmp(module->data_sections[i].name, "__llvm_prf_data", 15)) { + uint32 j, k, num_value_sites, num_value_nodes; + ValueProfNode **values, **values_tmp, *value_node; + + prof_data = (LLVMProfileData *)module->data_sections[i].data; + values = values_tmp = prof_data->values; + + if (prof_data->num_value_sites[0] > 0 + || prof_data->num_value_sites[1] > 0) { + uint32 *buf_total_size = (uint32 *)buf; + + buf += 4; /* emit TotalSize later */ + *(uint32 *)buf = (prof_data->num_value_sites[0] > 0 + && prof_data->num_value_sites[1] > 0) + ? 2 + : 1; + if (!is_little_endian()) + aot_exchange_uint32((uint8 *)buf); + buf += 4; + + for (j = 0; j < 2; j++) { + if ((num_value_sites = prof_data->num_value_sites[j]) > 0) { + /* ValueKind */ + *(uint32 *)buf = j; + if (!is_little_endian()) + aot_exchange_uint32((uint8 *)buf); + buf += 4; + /* NumValueSites */ + *(uint32 *)buf = num_value_sites; + if (!is_little_endian()) + aot_exchange_uint32((uint8 *)buf); + buf += 4; + + for (k = 0; k < num_value_sites; k++) { + num_value_nodes = 0; + if (values_tmp) { + value_node = *values_tmp; + while (value_node) { + num_value_nodes++; + value_node = value_node->next; + } + values_tmp++; + } + bh_assert(num_value_nodes < 255); + *(uint8 *)buf++ = (uint8)num_value_nodes; + } + if (num_value_sites % 8) { + buf += 8 - (num_value_sites % 8); + } + + for (k = 0; k < num_value_sites; k++) { + if (values) { + value_node = *values; + while (value_node) { + *(uint64 *)buf = value_node->value; + if (!is_little_endian()) + aot_exchange_uint64((uint8 *)buf); + buf += 8; + *(uint64 *)buf = value_node->count; + if (!is_little_endian()) + aot_exchange_uint64((uint8 *)buf); + buf += 8; + value_node = value_node->next; + } + values++; + } + } + } + } + + /* TotalSize */ + *(uint32 *)buf_total_size = + (uint8 *)buf - (uint8 *)buf_total_size; + if (!is_little_endian()) + aot_exchange_uint64((uint8 *)buf_total_size); + value_counters_size += (uint8 *)buf - (uint8 *)buf_total_size; + } + } + } + + bh_assert(value_counters_size == value_counters_size_backup); + (void)value_counters_size_backup; + + return total_size; +} +#endif /* end of WASM_ENABLE_STATIC_PGO != 0 */ diff --git a/core/iwasm/aot/aot_runtime.h b/core/iwasm/aot/aot_runtime.h index bcd06534e..2493d7c2c 100644 --- a/core/iwasm/aot/aot_runtime.h +++ b/core/iwasm/aot/aot_runtime.h @@ -41,6 +41,10 @@ typedef struct AOTObjectDataSection { char *name; uint8 *data; uint32 size; +#if WASM_ENABLE_WAMR_COMPILER != 0 || WASM_ENABLE_JIT != 0 + bool is_name_allocated; + bool is_data_allocated; +#endif } AOTObjectDataSection; /* Relocation info */ @@ -51,6 +55,9 @@ typedef struct AOTRelocation { char *symbol_name; /* index in the symbol offset field */ uint32 symbol_index; +#if WASM_ENABLE_WAMR_COMPILER != 0 || WASM_ENABLE_JIT != 0 + bool is_symbol_name_allocated; +#endif } AOTRelocation; /* Relocation Group */ @@ -60,6 +67,9 @@ typedef struct AOTRelocationGroup { uint32 name_index; uint32 relocation_count; AOTRelocation *relocations; +#if WASM_ENABLE_WAMR_COMPILER != 0 || WASM_ENABLE_JIT != 0 + bool is_section_name_allocated; +#endif } AOTRelocationGroup; /* AOT function instance */ @@ -108,6 +118,13 @@ typedef struct AOTUnwindInfo { #define PLT_ITEM_SIZE 12 #endif +#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64) +typedef struct GOTItem { + uint32 func_idx; + struct GOTItem *next; +} GOTItem, *GOTItemList; +#endif + typedef struct AOTModule { uint32 module_type; @@ -204,6 +221,13 @@ typedef struct AOTModule { bool rtl_func_table_registered; #endif +#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64) + uint32 got_item_count; + GOTItemList got_item_list; + GOTItemList got_item_list_end; + void **got_func_ptrs; +#endif + /* data sections in AOT object file, including .data, .rodata and .rodata.cstN. */ AOTObjectDataSection *data_sections; @@ -294,6 +318,54 @@ typedef struct AOTFrame { #endif } AOTFrame; +#if WASM_ENABLE_STATIC_PGO != 0 +typedef struct LLVMProfileRawHeader { + uint64 magic; + uint64 version; + uint64 binary_ids_size; + uint64 num_prof_data; + uint64 padding_bytes_before_counters; + uint64 num_prof_counters; + uint64 padding_bytes_after_counters; + uint64 names_size; + uint64 counters_delta; + uint64 names_delta; + uint64 value_kind_last; +} LLVMProfileRawHeader; + +typedef struct ValueProfNode { + uint64 value; + uint64 count; + struct ValueProfNode *next; +} ValueProfNode; + +/* The profiling data of data sections created by aot compiler and + used when profiling, the width of pointer can be 8 bytes (64-bit) + or 4 bytes (32-bit) */ +typedef struct LLVMProfileData { + uint64 func_md5; + uint64 func_hash; + uint64 offset_counters; + uintptr_t func_ptr; + ValueProfNode **values; + uint32 num_counters; + uint16 num_value_sites[2]; +} LLVMProfileData; + +/* The profiling data for writting to the output file, the width of + pointer is 8 bytes suppose we always use wamrc and llvm-profdata + with 64-bit mode */ +typedef struct LLVMProfileData_64 { + uint64 func_md5; + uint64 func_hash; + uint64 offset_counters; + uint64 func_ptr; + uint64 values; + uint32 num_counters; + uint16 num_value_sites[2]; +} LLVMProfileData_64; +#endif /* end of WASM_ENABLE_STATIC_PGO != 0 */ + /** * Load a AOT module from aot file buffer * @param buf the byte buffer which contains the AOT file data @@ -564,6 +636,32 @@ aot_dump_perf_profiling(const AOTModuleInstance *module_inst); const uint8 * aot_get_custom_section(const AOTModule *module, const char *name, uint32 *len); +#if WASM_ENABLE_STATIC_PGO != 0 +void +llvm_profile_instrument_target(uint64 target_value, void *data, + uint32 counter_idx); + +void +llvm_profile_instrument_memop(uint64 target_value, void *data, + uint32 counter_idx); + +uint32 +aot_get_pgo_prof_data_size(AOTModuleInstance *module_inst); + +uint32 +aot_dump_pgo_prof_data_to_buf(AOTModuleInstance *module_inst, char *buf, + uint32 len); + +void +aot_exchange_uint16(uint8 *p_data); + +void +aot_exchange_uint32(uint8 *p_data); + +void +aot_exchange_uint64(uint8 *p_data); +#endif /* end of WASM_ENABLE_STATIC_PGO != 0 */ + #ifdef __cplusplus } /* end of extern "C" */ #endif diff --git a/core/iwasm/aot/arch/aot_reloc_x86_32.c b/core/iwasm/aot/arch/aot_reloc_x86_32.c index af3e0bb8e..5a49c14af 100644 --- a/core/iwasm/aot/arch/aot_reloc_x86_32.c +++ b/core/iwasm/aot/arch/aot_reloc_x86_32.c @@ -8,6 +8,9 @@ #define R_386_32 1 /* Direct 32 bit */ #define R_386_PC32 2 /* PC relative 32 bit */ #define R_386_PLT32 4 /* 32-bit address ProcedureLinkageTable */ +#define R_386_TLS_GD_32 \ + 24 /* Direct 32 bit for general dynamic \ + thread local data */ #if !defined(_WIN32) && !defined(_WIN32_) /* clang-format off */ @@ -110,6 +113,9 @@ apply_relocation(AOTModule *module, uint8 *target_section_addr, { switch (reloc_type) { case R_386_32: +#if WASM_ENABLE_STATIC_PGO != 0 + case R_386_TLS_GD_32: +#endif { intptr_t value; diff --git a/core/iwasm/aot/arch/aot_reloc_x86_64.c b/core/iwasm/aot/arch/aot_reloc_x86_64.c index f4d8eeabd..1221a6297 100644 --- a/core/iwasm/aot/arch/aot_reloc_x86_64.c +++ b/core/iwasm/aot/arch/aot_reloc_x86_64.c @@ -6,11 +6,13 @@ #include "aot_reloc.h" #if !defined(BH_PLATFORM_WINDOWS) -#define R_X86_64_64 1 /* Direct 64 bit */ -#define R_X86_64_PC32 2 /* PC relative 32 bit signed */ -#define R_X86_64_PLT32 4 /* 32 bit PLT address */ -#define R_X86_64_32 10 /* Direct 32 bit zero extended */ -#define R_X86_64_32S 11 /* Direct 32 bit sign extended */ +#define R_X86_64_64 1 /* Direct 64 bit */ +#define R_X86_64_PC32 2 /* PC relative 32 bit signed */ +#define R_X86_64_PLT32 4 /* 32 bit PLT address */ +#define R_X86_64_GOTPCREL 9 /* 32 bit signed PC relative offset to GOT */ +#define R_X86_64_32 10 /* Direct 32 bit zero extended */ +#define R_X86_64_32S 11 /* Direct 32 bit sign extended */ +#define R_X86_64_PC64 24 /* PC relative 64 bit */ #else #ifndef IMAGE_REL_AMD64_ADDR64 #define IMAGE_REL_AMD64_ADDR64 1 /* The 64-bit VA of the relocation target */ @@ -164,6 +166,7 @@ apply_relocation(AOTModule *module, uint8 *target_section_addr, #endif #if !defined(BH_PLATFORM_WINDOWS) case R_X86_64_PC32: + case R_X86_64_GOTPCREL: /* GOT + G has been calculated as symbol_addr */ { intptr_t target_addr = (intptr_t) /* S + A - P */ ((uintptr_t)symbol_addr + reloc_addend @@ -182,6 +185,16 @@ apply_relocation(AOTModule *module, uint8 *target_section_addr, *(int32 *)(target_section_addr + reloc_offset) = (int32)target_addr; break; } + case R_X86_64_PC64: + { + intptr_t target_addr = (intptr_t) /* S + A - P */ + ((uintptr_t)symbol_addr + reloc_addend + - (uintptr_t)(target_section_addr + reloc_offset)); + + CHECK_RELOC_OFFSET(sizeof(int64)); + *(int64 *)(target_section_addr + reloc_offset) = (int64)target_addr; + break; + } case R_X86_64_32: case R_X86_64_32S: { diff --git a/core/iwasm/common/wasm_memory.c b/core/iwasm/common/wasm_memory.c index 82676ae27..310dab6d2 100644 --- a/core/iwasm/common/wasm_memory.c +++ b/core/iwasm/common/wasm_memory.c @@ -624,6 +624,11 @@ wasm_enlarge_memory_internal(WASMModuleInstance *module, uint32 inc_page_count) #endif #endif +#if defined(os_writegsbase) + /* write base addr of linear memory to GS segment register */ + os_writegsbase(memory_data_new); +#endif + return ret; } #else @@ -756,4 +761,4 @@ wasm_get_linear_memory_size(WASMMemoryInstance *memory, void *node) #endif return linear_mem_size; } -#endif \ No newline at end of file +#endif diff --git a/core/iwasm/common/wasm_runtime_common.c b/core/iwasm/common/wasm_runtime_common.c index 452a2661b..b2923db33 100644 --- a/core/iwasm/common/wasm_runtime_common.c +++ b/core/iwasm/common/wasm_runtime_common.c @@ -130,7 +130,7 @@ static JitCompOptions jit_options = { 0 }; #endif #if WASM_ENABLE_JIT != 0 -static LLVMJITOptions llvm_jit_options = { 3, 3 }; +static LLVMJITOptions llvm_jit_options = { 3, 3, 0 }; #endif static RunningMode runtime_running_mode = Mode_Default; @@ -554,6 +554,7 @@ wasm_runtime_full_init(RuntimeInitArgs *init_args) #if WASM_ENABLE_JIT != 0 llvm_jit_options.size_level = init_args->llvm_jit_size_level; llvm_jit_options.opt_level = init_args->llvm_jit_opt_level; + llvm_jit_options.segue_flags = init_args->segue_flags; #endif if (!wasm_runtime_env_init()) { @@ -4212,6 +4213,12 @@ static V128FuncPtr invokeNative_V128 = (V128FuncPtr)(uintptr_t)invokeNative; || defined(BUILD_TARGET_RISCV64_LP64) */ #endif /* end of defined(_WIN32) || defined(_WIN32_) */ +/* ASAN is not designed to work with custom stack unwind or other low-level \ + things. > Ignore a function that does some low-level magic. (e.g. walking \ + through the thread's stack bypassing the frame boundaries) */ +#if defined(__GNUC__) +__attribute__((no_sanitize_address)) +#endif bool wasm_runtime_invoke_native(WASMExecEnv *exec_env, void *func_ptr, const WASMType *func_type, const char *signature, @@ -5026,6 +5033,33 @@ wasm_runtime_dump_call_stack_to_buf(wasm_exec_env_t exec_env, char *buf, } #endif /* end of WASM_ENABLE_DUMP_CALL_STACK */ +#if WASM_ENABLE_STATIC_PGO != 0 +uint32 +wasm_runtime_get_pgo_prof_data_size(WASMModuleInstanceCommon *module_inst) +{ +#if WASM_ENABLE_AOT != 0 + if (module_inst->module_type == Wasm_Module_AoT) { + AOTModuleInstance *aot_inst = (AOTModuleInstance *)module_inst; + return aot_get_pgo_prof_data_size(aot_inst); + } +#endif + return 0; +} + +uint32 +wasm_runtime_dump_pgo_prof_data_to_buf(WASMModuleInstanceCommon *module_inst, + char *buf, uint32 len) +{ +#if WASM_ENABLE_AOT != 0 + if (module_inst->module_type == Wasm_Module_AoT) { + AOTModuleInstance *aot_inst = (AOTModuleInstance *)module_inst; + return aot_dump_pgo_prof_data_to_buf(aot_inst, buf, len); + } +#endif + return 0; +} +#endif /* end of WASM_ENABLE_STATIC_PGO != 0 */ + bool wasm_runtime_get_table_elem_type(const WASMModuleCommon *module_comm, uint32 table_idx, uint8 *out_elem_type, diff --git a/core/iwasm/common/wasm_runtime_common.h b/core/iwasm/common/wasm_runtime_common.h index 00d5ba237..283d2ed57 100644 --- a/core/iwasm/common/wasm_runtime_common.h +++ b/core/iwasm/common/wasm_runtime_common.h @@ -420,6 +420,7 @@ typedef struct wasm_frame_t { typedef struct LLVMJITOptions { uint32 opt_level; uint32 size_level; + uint32 segue_flags; } LLVMJITOptions; #endif diff --git a/core/iwasm/common/wasm_shared_memory.c b/core/iwasm/common/wasm_shared_memory.c index c5e78e43c..54fc8200f 100644 --- a/core/iwasm/common/wasm_shared_memory.c +++ b/core/iwasm/common/wasm_shared_memory.c @@ -384,7 +384,7 @@ wasm_runtime_atomic_wait(WASMModuleInstanceCommon *module, void *address, /* unit of timeout is nsec, convert it to usec */ timeout_left = (uint64)timeout / 1000; - timeout_1sec = 1e6; + timeout_1sec = (uint64)1e6; while (1) { if (timeout < 0) { diff --git a/core/iwasm/compilation/aot_compiler.h b/core/iwasm/compilation/aot_compiler.h index e6031ab89..40d79cf86 100644 --- a/core/iwasm/compilation/aot_compiler.h +++ b/core/iwasm/compilation/aot_compiler.h @@ -239,6 +239,13 @@ check_type_compatible(uint8 src_type, uint8 dst_type) #define FUNC_REF_TYPE comp_ctx->basic_types.funcref_type #define EXTERN_REF_TYPE comp_ctx->basic_types.externref_type +#define INT8_PTR_TYPE_GS comp_ctx->basic_types.int8_ptr_type_gs +#define INT16_PTR_TYPE_GS comp_ctx->basic_types.int16_ptr_type_gs +#define INT32_PTR_TYPE_GS comp_ctx->basic_types.int32_ptr_type_gs +#define INT64_PTR_TYPE_GS comp_ctx->basic_types.int64_ptr_type_gs +#define F32_PTR_TYPE_GS comp_ctx->basic_types.float32_ptr_type_gs +#define F64_PTR_TYPE_GS comp_ctx->basic_types.float64_ptr_type_gs + #define I32_CONST(v) LLVMConstInt(I32_TYPE, v, true) #define I64_CONST(v) LLVMConstInt(I64_TYPE, v, true) #define F32_CONST(v) LLVMConstReal(F32_TYPE, v) @@ -272,6 +279,7 @@ check_type_compatible(uint8 src_type, uint8 dst_type) #define V128_TYPE comp_ctx->basic_types.v128_type #define V128_PTR_TYPE comp_ctx->basic_types.v128_ptr_type +#define V128_PTR_TYPE_GS comp_ctx->basic_types.v128_ptr_type_gs #define V128_i8x16_TYPE comp_ctx->basic_types.i8x16_vec_type #define V128_i16x8_TYPE comp_ctx->basic_types.i16x8_vec_type #define V128_i32x4_TYPE comp_ctx->basic_types.i32x4_vec_type diff --git a/core/iwasm/compilation/aot_emit_aot_file.c b/core/iwasm/compilation/aot_emit_aot_file.c index 62bb809da..893e39918 100644 --- a/core/iwasm/compilation/aot_emit_aot_file.c +++ b/core/iwasm/compilation/aot_emit_aot_file.c @@ -111,6 +111,8 @@ typedef struct AOTSymbolList { /* AOT object data */ typedef struct AOTObjectData { + AOTCompContext *comp_ctx; + LLVMMemoryBufferRef mem_buf; LLVMBinaryRef binary; @@ -119,6 +121,12 @@ typedef struct AOTObjectData { void *text; uint32 text_size; + void *text_unlikely; + uint32 text_unlikely_size; + + void *text_hot; + uint32 text_hot_size; + /* literal data and size */ void *literal; uint32 literal_size; @@ -558,8 +566,10 @@ get_init_data_section_size(AOTCompContext *comp_ctx, AOTCompData *comp_data, static uint32 get_text_section_size(AOTObjectData *obj_data) { - return (sizeof(uint32) + obj_data->literal_size + obj_data->text_size + 3) - & ~3; + return sizeof(uint32) + align_uint(obj_data->literal_size, 4) + + align_uint(obj_data->text_size, 4) + + align_uint(obj_data->text_unlikely_size, 4) + + align_uint(obj_data->text_hot_size, 4); } static uint32 @@ -1702,12 +1712,28 @@ aot_emit_text_section(uint8 *buf, uint8 *buf_end, uint32 *p_offset, EMIT_U32(AOT_SECTION_TYPE_TEXT); EMIT_U32(section_size); EMIT_U32(obj_data->literal_size); - if (obj_data->literal_size > 0) - EMIT_BUF(obj_data->literal, obj_data->literal_size); - EMIT_BUF(obj_data->text, obj_data->text_size); - while (offset & 3) - EMIT_BUF(&placeholder, 1); + if (obj_data->literal_size > 0) { + EMIT_BUF(obj_data->literal, obj_data->literal_size); + while (offset & 3) + EMIT_BUF(&placeholder, 1); + } + + if (obj_data->text_size > 0) { + EMIT_BUF(obj_data->text, obj_data->text_size); + while (offset & 3) + EMIT_BUF(&placeholder, 1); + } + if (obj_data->text_unlikely_size > 0) { + EMIT_BUF(obj_data->text_unlikely, obj_data->text_unlikely_size); + while (offset & 3) + EMIT_BUF(&placeholder, 1); + } + if (obj_data->text_hot_size > 0) { + EMIT_BUF(obj_data->text_hot, obj_data->text_hot_size); + while (offset & 3) + EMIT_BUF(&placeholder, 1); + } if (offset - *p_offset != section_size + sizeof(uint32) * 2) { aot_set_last_error("emit text section failed."); @@ -2211,11 +2237,23 @@ aot_resolve_text(AOTObjectData *obj_data) } while ( !LLVMObjectFileIsSectionIteratorAtEnd(obj_data->binary, sec_itr)) { - if ((name = (char *)LLVMGetSectionName(sec_itr)) - && !strcmp(name, ".text")) { - obj_data->text = (char *)LLVMGetSectionContents(sec_itr); - obj_data->text_size = (uint32)LLVMGetSectionSize(sec_itr); - break; + if ((name = (char *)LLVMGetSectionName(sec_itr))) { + if (!strcmp(name, ".text")) { + obj_data->text = (char *)LLVMGetSectionContents(sec_itr); + obj_data->text_size = (uint32)LLVMGetSectionSize(sec_itr); + } + else if (!strcmp(name, ".text.unlikely.")) { + obj_data->text_unlikely = + (char *)LLVMGetSectionContents(sec_itr); + obj_data->text_unlikely_size = + (uint32)LLVMGetSectionSize(sec_itr); + } + else if (!strcmp(name, ".text.hot.")) { + obj_data->text_hot = + (char *)LLVMGetSectionContents(sec_itr); + obj_data->text_hot_size = + (uint32)LLVMGetSectionSize(sec_itr); + } } LLVMMoveToNextSection(sec_itr); } @@ -2253,7 +2291,8 @@ static bool get_relocations_count(LLVMSectionIteratorRef sec_itr, uint32 *p_count); static bool -is_data_section(LLVMSectionIteratorRef sec_itr, char *section_name) +is_data_section(AOTObjectData *obj_data, LLVMSectionIteratorRef sec_itr, + char *section_name) { uint32 relocation_count = 0; @@ -2265,7 +2304,11 @@ is_data_section(LLVMSectionIteratorRef sec_itr, char *section_name) || !strncmp(section_name, ".rodata.str", strlen(".rodata.str")) || (!strcmp(section_name, ".rdata") && get_relocations_count(sec_itr, &relocation_count) - && relocation_count > 0)); + && relocation_count > 0) + || (obj_data->comp_ctx->enable_llvm_pgo + && (!strncmp(section_name, "__llvm_prf_cnts", 15) + || !strncmp(section_name, "__llvm_prf_data", 15) + || !strncmp(section_name, "__llvm_prf_names", 16)))); } static bool @@ -2281,7 +2324,7 @@ get_object_data_sections_count(AOTObjectData *obj_data, uint32 *p_count) } while (!LLVMObjectFileIsSectionIteratorAtEnd(obj_data->binary, sec_itr)) { if ((name = (char *)LLVMGetSectionName(sec_itr)) - && (is_data_section(sec_itr, name))) { + && (is_data_section(obj_data, sec_itr, name))) { count++; } LLVMMoveToNextSection(sec_itr); @@ -2306,6 +2349,9 @@ aot_resolve_object_data_sections(AOTObjectData *obj_data) } if (sections_count > 0) { + uint32 llvm_prf_cnts_idx = 0, llvm_prf_data_idx = 0; + char buf[32]; + size = (uint32)sizeof(AOTObjectDataSection) * sections_count; if (!(data_section = obj_data->data_sections = wasm_runtime_malloc(size))) { @@ -2322,10 +2368,46 @@ aot_resolve_object_data_sections(AOTObjectData *obj_data) while ( !LLVMObjectFileIsSectionIteratorAtEnd(obj_data->binary, sec_itr)) { if ((name = (char *)LLVMGetSectionName(sec_itr)) - && (is_data_section(sec_itr, name))) { + && (is_data_section(obj_data, sec_itr, name))) { data_section->name = name; - data_section->data = (uint8 *)LLVMGetSectionContents(sec_itr); - data_section->size = (uint32)LLVMGetSectionSize(sec_itr); + if (obj_data->comp_ctx->enable_llvm_pgo + && !strcmp(name, "__llvm_prf_cnts")) { + snprintf(buf, sizeof(buf), "%s%u", name, + llvm_prf_cnts_idx++); + size = strlen(buf) + 1; + if (!(data_section->name = wasm_runtime_malloc(size))) { + aot_set_last_error( + "allocate memory for data section name failed."); + return false; + } + bh_memcpy_s(data_section->name, size, buf, size); + data_section->is_name_allocated = true; + } + else if (obj_data->comp_ctx->enable_llvm_pgo + && !strcmp(name, "__llvm_prf_data")) { + snprintf(buf, sizeof(buf), "%s%u", name, + llvm_prf_data_idx++); + size = strlen(buf) + 1; + if (!(data_section->name = wasm_runtime_malloc(size))) { + aot_set_last_error( + "allocate memory for data section name failed."); + return false; + } + bh_memcpy_s(data_section->name, size, buf, size); + data_section->is_name_allocated = true; + } + + if (obj_data->comp_ctx->enable_llvm_pgo + && !strcmp(name, "__llvm_prf_names")) { + data_section->data = (uint8 *)aot_compress_aot_func_names( + obj_data->comp_ctx, &data_section->size); + data_section->is_data_allocated = true; + } + else { + data_section->data = + (uint8 *)LLVMGetSectionContents(sec_itr); + data_section->size = (uint32)LLVMGetSectionSize(sec_itr); + } data_section++; } LLVMMoveToNextSection(sec_itr); @@ -2365,9 +2447,36 @@ aot_resolve_functions(AOTCompContext *comp_ctx, AOTObjectData *obj_data) && str_starts_with(name, prefix)) { func_index = (uint32)atoi(name + strlen(prefix)); if (func_index < obj_data->func_count) { + LLVMSectionIteratorRef contain_section; + char *contain_section_name; + func = obj_data->funcs + func_index; func->func_name = name; - func->text_offset = LLVMGetSymbolAddress(sym_itr); + + if (!(contain_section = LLVMObjectFileCopySectionIterator( + obj_data->binary))) { + aot_set_last_error("llvm get section iterator failed."); + LLVMDisposeSymbolIterator(sym_itr); + return false; + } + LLVMMoveToContainingSection(contain_section, sym_itr); + contain_section_name = + (char *)LLVMGetSectionName(contain_section); + LLVMDisposeSectionIterator(contain_section); + + if (!strcmp(contain_section_name, ".text.unlikely.")) { + func->text_offset = align_uint(obj_data->text_size, 4) + + LLVMGetSymbolAddress(sym_itr); + } + else if (!strcmp(contain_section_name, ".text.hot.")) { + func->text_offset = + align_uint(obj_data->text_size, 4) + + align_uint(obj_data->text_unlikely_size, 4) + + LLVMGetSymbolAddress(sym_itr); + } + else { + func->text_offset = LLVMGetSymbolAddress(sym_itr); + } } } LLVMMoveToNextSymbol(sym_itr); @@ -2478,9 +2587,86 @@ aot_resolve_object_relocation_group(AOTObjectData *obj_data, } /* set relocation fields */ - relocation->relocation_offset = offset; relocation->relocation_type = (uint32)type; relocation->symbol_name = (char *)LLVMGetSymbolName(rel_sym); + relocation->relocation_offset = offset; + if (!strcmp(group->section_name, ".rela.text.unlikely.") + || !strcmp(group->section_name, ".rel.text.unlikely.")) { + relocation->relocation_offset += align_uint(obj_data->text_size, 4); + } + else if (!strcmp(group->section_name, ".rela.text.hot.") + || !strcmp(group->section_name, ".rel.text.hot.")) { + relocation->relocation_offset += + align_uint(obj_data->text_size, 4) + + align_uint(obj_data->text_unlikely_size, 4); + } + if (!strcmp(relocation->symbol_name, ".text.unlikely.")) { + relocation->symbol_name = ".text"; + relocation->relocation_addend += align_uint(obj_data->text_size, 4); + } + if (!strcmp(relocation->symbol_name, ".text.hot.")) { + relocation->symbol_name = ".text"; + relocation->relocation_addend += + align_uint(obj_data->text_size, 4) + + align_uint(obj_data->text_unlikely_size, 4); + } + + if (obj_data->comp_ctx->enable_llvm_pgo + && (!strcmp(relocation->symbol_name, "__llvm_prf_cnts") + || !strcmp(relocation->symbol_name, "__llvm_prf_data"))) { + LLVMSectionIteratorRef sec_itr; + char buf[32], *section_name; + uint32 prof_section_idx = 0; + + if (!(sec_itr = + LLVMObjectFileCopySectionIterator(obj_data->binary))) { + aot_set_last_error("llvm get section iterator failed."); + LLVMDisposeSymbolIterator(rel_sym); + goto fail; + } + while (!LLVMObjectFileIsSectionIteratorAtEnd(obj_data->binary, + sec_itr)) { + section_name = (char *)LLVMGetSectionName(sec_itr); + if (section_name + && !strcmp(section_name, relocation->symbol_name)) { + if (LLVMGetSectionContainsSymbol(sec_itr, rel_sym)) + break; + prof_section_idx++; + } + LLVMMoveToNextSection(sec_itr); + } + LLVMDisposeSectionIterator(sec_itr); + + if (!strcmp(group->section_name, ".rela.text") + || !strcmp(group->section_name, ".rel.text")) { + snprintf(buf, sizeof(buf), "%s%u", relocation->symbol_name, + prof_section_idx); + size = strlen(buf) + 1; + if (!(relocation->symbol_name = wasm_runtime_malloc(size))) { + aot_set_last_error( + "allocate memory for relocation symbol name failed."); + LLVMDisposeSymbolIterator(rel_sym); + goto fail; + } + bh_memcpy_s(relocation->symbol_name, size, buf, size); + relocation->is_symbol_name_allocated = true; + } + else if (!strncmp(group->section_name, ".rela__llvm_prf_data", 20) + || !strncmp(group->section_name, ".rel__llvm_prf_data", + 19)) { + snprintf(buf, sizeof(buf), "%s%u", relocation->symbol_name, + prof_section_idx); + size = strlen(buf) + 1; + if (!(relocation->symbol_name = wasm_runtime_malloc(size))) { + aot_set_last_error( + "allocate memory for relocation symbol name failed."); + LLVMDisposeSymbolIterator(rel_sym); + goto fail; + } + bh_memcpy_s(relocation->symbol_name, size, buf, size); + relocation->is_symbol_name_allocated = true; + } + } /* for ".LCPIxxx", ".LJTIxxx", ".LBBxxx" and switch lookup table * relocation, transform the symbol name to real section name and set @@ -2525,10 +2711,14 @@ fail: } static bool -is_relocation_section_name(char *section_name) +is_relocation_section_name(AOTObjectData *obj_data, char *section_name) { return (!strcmp(section_name, ".rela.text") || !strcmp(section_name, ".rel.text") + || !strcmp(section_name, ".rela.text.unlikely.") + || !strcmp(section_name, ".rel.text.unlikely.") + || !strcmp(section_name, ".rela.text.hot.") + || !strcmp(section_name, ".rel.text.hot.") || !strcmp(section_name, ".rela.literal") || !strcmp(section_name, ".rela.data") || !strcmp(section_name, ".rel.data") @@ -2536,6 +2726,9 @@ is_relocation_section_name(char *section_name) || !strcmp(section_name, ".rel.sdata") || !strcmp(section_name, ".rela.rodata") || !strcmp(section_name, ".rel.rodata") + || (obj_data->comp_ctx->enable_llvm_pgo + && (!strcmp(section_name, ".rela__llvm_prf_data") + || !strcmp(section_name, ".rel__llvm_prf_data"))) /* ".rela.rodata.cst4/8/16/.." */ || !strncmp(section_name, ".rela.rodata.cst", strlen(".rela.rodata.cst")) @@ -2545,14 +2738,15 @@ is_relocation_section_name(char *section_name) } static bool -is_relocation_section(LLVMSectionIteratorRef sec_itr) +is_relocation_section(AOTObjectData *obj_data, LLVMSectionIteratorRef sec_itr) { uint32 count = 0; char *name = (char *)LLVMGetSectionName(sec_itr); if (name) { - if (is_relocation_section_name(name)) + if (is_relocation_section_name(obj_data, name)) return true; - else if ((!strcmp(name, ".text") || !strcmp(name, ".rdata")) + else if ((!strcmp(name, ".text") || !strcmp(name, ".text.unlikely.") + || !strcmp(name, ".text.hot.") || !strcmp(name, ".rdata")) && get_relocations_count(sec_itr, &count) && count > 0) return true; } @@ -2570,7 +2764,7 @@ get_relocation_groups_count(AOTObjectData *obj_data, uint32 *p_count) return false; } while (!LLVMObjectFileIsSectionIteratorAtEnd(obj_data->binary, sec_itr)) { - if (is_relocation_section(sec_itr)) { + if (is_relocation_section(obj_data, sec_itr)) { count++; } LLVMMoveToNextSection(sec_itr); @@ -2586,7 +2780,7 @@ aot_resolve_object_relocation_groups(AOTObjectData *obj_data) { LLVMSectionIteratorRef sec_itr; AOTRelocationGroup *relocation_group; - uint32 group_count; + uint32 group_count, llvm_prf_data_idx = 0; char *name; uint32 size; @@ -2612,14 +2806,50 @@ aot_resolve_object_relocation_groups(AOTObjectData *obj_data) return false; } while (!LLVMObjectFileIsSectionIteratorAtEnd(obj_data->binary, sec_itr)) { - if (is_relocation_section(sec_itr)) { + if (is_relocation_section(obj_data, sec_itr)) { name = (char *)LLVMGetSectionName(sec_itr); relocation_group->section_name = name; + + if (obj_data->comp_ctx->enable_llvm_pgo + && (!strcmp(name, ".rela__llvm_prf_data") + || !strcmp(name, ".rel__llvm_prf_data"))) { + char buf[32]; + snprintf(buf, sizeof(buf), "%s%u", name, llvm_prf_data_idx); + size = strlen(buf) + 1; + if (!(relocation_group->section_name = + wasm_runtime_malloc(size))) { + aot_set_last_error( + "allocate memory for section name failed."); + LLVMDisposeSectionIterator(sec_itr); + return false; + } + bh_memcpy_s(relocation_group->section_name, size, buf, size); + relocation_group->is_section_name_allocated = true; + } + if (!aot_resolve_object_relocation_group(obj_data, relocation_group, sec_itr)) { LLVMDisposeSectionIterator(sec_itr); return false; } + + if (obj_data->comp_ctx->enable_llvm_pgo + && (!strcmp(name, ".rela__llvm_prf_data") + || !strcmp(name, ".rel__llvm_prf_data"))) { + llvm_prf_data_idx++; + } + + if (!strcmp(relocation_group->section_name, ".rela.text.unlikely.") + || !strcmp(relocation_group->section_name, ".rela.text.hot.")) { + relocation_group->section_name = ".rela.text"; + } + else if (!strcmp(relocation_group->section_name, + ".rel.text.unlikely.") + || !strcmp(relocation_group->section_name, + ".rel.text.hot.")) { + relocation_group->section_name = ".rel.text"; + } + relocation_group++; } LLVMMoveToNextSection(sec_itr); @@ -2633,12 +2863,21 @@ static void destroy_relocation_groups(AOTRelocationGroup *relocation_groups, uint32 relocation_group_count) { - uint32 i; + uint32 i, j; AOTRelocationGroup *relocation_group = relocation_groups; - for (i = 0; i < relocation_group_count; i++, relocation_group++) - if (relocation_group->relocations) + for (i = 0; i < relocation_group_count; i++, relocation_group++) { + if (relocation_group->relocations) { + for (j = 0; j < relocation_group->relocation_count; j++) { + if (relocation_group->relocations[j].is_symbol_name_allocated) + wasm_runtime_free( + relocation_group->relocations[j].symbol_name); + } wasm_runtime_free(relocation_group->relocations); + } + if (relocation_group->is_section_name_allocated) + wasm_runtime_free(relocation_group->section_name); + } wasm_runtime_free(relocation_groups); } @@ -2664,8 +2903,20 @@ aot_obj_data_destroy(AOTObjectData *obj_data) LLVMDisposeMemoryBuffer(obj_data->mem_buf); if (obj_data->funcs) wasm_runtime_free(obj_data->funcs); - if (obj_data->data_sections) + if (obj_data->data_sections) { + uint32 i; + for (i = 0; i < obj_data->data_sections_count; i++) { + if (obj_data->data_sections[i].name + && obj_data->data_sections[i].is_name_allocated) { + wasm_runtime_free(obj_data->data_sections[i].name); + } + if (obj_data->data_sections[i].data + && obj_data->data_sections[i].is_data_allocated) { + wasm_runtime_free(obj_data->data_sections[i].data); + } + } wasm_runtime_free(obj_data->data_sections); + } if (obj_data->relocation_groups) destroy_relocation_groups(obj_data->relocation_groups, obj_data->relocation_group_count); @@ -2688,6 +2939,7 @@ aot_obj_data_create(AOTCompContext *comp_ctx) return false; } memset(obj_data, 0, sizeof(AOTObjectData)); + obj_data->comp_ctx = comp_ctx; bh_print_time("Begin to emit object file"); if (comp_ctx->external_llc_compiler || comp_ctx->external_asm_compiler) { @@ -2821,8 +3073,8 @@ aot_obj_data_create(AOTCompContext *comp_ctx) if (!aot_resolve_target_info(comp_ctx, obj_data) || !aot_resolve_text(obj_data) || !aot_resolve_literal(obj_data) || !aot_resolve_object_data_sections(obj_data) - || !aot_resolve_object_relocation_groups(obj_data) - || !aot_resolve_functions(comp_ctx, obj_data)) + || !aot_resolve_functions(comp_ctx, obj_data) + || !aot_resolve_object_relocation_groups(obj_data)) goto fail; return obj_data; diff --git a/core/iwasm/compilation/aot_emit_function.c b/core/iwasm/compilation/aot_emit_function.c index 9ba8baa24..cce66429e 100644 --- a/core/iwasm/compilation/aot_emit_function.c +++ b/core/iwasm/compilation/aot_emit_function.c @@ -868,10 +868,6 @@ aot_compile_op_call(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, #if LLVM_VERSION_MAJOR >= 14 LLVMTypeRef llvm_func_type; #endif - bool recursive_call = - (func_ctx == func_ctxes[func_idx - import_func_count]) ? true - : false; - if (comp_ctx->is_indirect_mode) { LLVMTypeRef func_ptr_type; @@ -971,7 +967,7 @@ aot_compile_op_call(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, /* Check whether there was exception thrown when executing the function */ - if (!tail_call && !recursive_call && comp_ctx->enable_bound_check + if (!tail_call && comp_ctx->enable_bound_check && !check_exception_thrown(comp_ctx, func_ctx)) goto fail; } diff --git a/core/iwasm/compilation/aot_emit_memory.c b/core/iwasm/compilation/aot_emit_memory.c index 4da4cc807..c11989ebf 100644 --- a/core/iwasm/compilation/aot_emit_memory.c +++ b/core/iwasm/compilation/aot_emit_memory.c @@ -81,7 +81,7 @@ get_memory_curr_page_count(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx); LLVMValueRef aot_check_memory_overflow(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, - uint32 offset, uint32 bytes) + uint32 offset, uint32 bytes, bool enable_segue) { LLVMValueRef offset_const = I32_CONST(offset); LLVMValueRef addr, maddr, offset1, cmp1, cmp2, cmp; @@ -162,11 +162,20 @@ aot_check_memory_overflow(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, /* inside memory space */ offset1 = I32_CONST((uint32)mem_offset); CHECK_LLVM_CONST(offset1); - if (!(maddr = LLVMBuildInBoundsGEP2(comp_ctx->builder, INT8_TYPE, - mem_base_addr, &offset1, 1, - "maddr"))) { - aot_set_last_error("llvm build add failed."); - goto fail; + if (!enable_segue) { + if (!(maddr = LLVMBuildInBoundsGEP2(comp_ctx->builder, + INT8_TYPE, mem_base_addr, + &offset1, 1, "maddr"))) { + aot_set_last_error("llvm build add failed."); + goto fail; + } + } + else { + if (!(maddr = LLVMBuildIntToPtr(comp_ctx->builder, offset1, + INT8_PTR_TYPE_GS, "maddr"))) { + aot_set_last_error("llvm build IntToPtr failed."); + goto fail; + } } return maddr; } @@ -244,11 +253,29 @@ aot_check_memory_overflow(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, } } - /* maddr = mem_base_addr + offset1 */ - if (!(maddr = LLVMBuildInBoundsGEP2(comp_ctx->builder, INT8_TYPE, + if (!enable_segue) { + /* maddr = mem_base_addr + offset1 */ + if (!(maddr = + LLVMBuildInBoundsGEP2(comp_ctx->builder, INT8_TYPE, mem_base_addr, &offset1, 1, "maddr"))) { - aot_set_last_error("llvm build add failed."); - goto fail; + aot_set_last_error("llvm build add failed."); + goto fail; + } + } + else { + LLVMValueRef maddr_base; + + if (!(maddr_base = LLVMBuildIntToPtr(comp_ctx->builder, addr, + INT8_PTR_TYPE_GS, "maddr_base"))) { + aot_set_last_error("llvm build int to ptr failed."); + goto fail; + } + if (!(maddr = LLVMBuildInBoundsGEP2(comp_ctx->builder, INT8_TYPE, + maddr_base, &offset_const, 1, + "maddr"))) { + aot_set_last_error("llvm build inboundgep failed."); + goto fail; + } } return maddr; fail: @@ -388,13 +415,18 @@ aot_compile_op_i32_load(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, { LLVMValueRef maddr, value = NULL; LLVMTypeRef data_type; + bool enable_segue = comp_ctx->enable_segue_i32_load; - if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes))) + if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes, + enable_segue))) return false; switch (bytes) { case 4: - BUILD_PTR_CAST(INT32_PTR_TYPE); + if (!enable_segue) + BUILD_PTR_CAST(INT32_PTR_TYPE); + else + BUILD_PTR_CAST(INT32_PTR_TYPE_GS); #if WASM_ENABLE_SHARED_MEMORY != 0 if (atomic) BUILD_ATOMIC_LOAD(align, I32_TYPE); @@ -405,11 +437,17 @@ aot_compile_op_i32_load(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, case 2: case 1: if (bytes == 2) { - BUILD_PTR_CAST(INT16_PTR_TYPE); + if (!enable_segue) + BUILD_PTR_CAST(INT16_PTR_TYPE); + else + BUILD_PTR_CAST(INT16_PTR_TYPE_GS); data_type = INT16_TYPE; } else { - BUILD_PTR_CAST(INT8_PTR_TYPE); + if (!enable_segue) + BUILD_PTR_CAST(INT8_PTR_TYPE); + else + BUILD_PTR_CAST(INT8_PTR_TYPE_GS); data_type = INT8_TYPE; } @@ -447,13 +485,18 @@ aot_compile_op_i64_load(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, { LLVMValueRef maddr, value = NULL; LLVMTypeRef data_type; + bool enable_segue = comp_ctx->enable_segue_i64_load; - if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes))) + if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes, + enable_segue))) return false; switch (bytes) { case 8: - BUILD_PTR_CAST(INT64_PTR_TYPE); + if (!enable_segue) + BUILD_PTR_CAST(INT64_PTR_TYPE); + else + BUILD_PTR_CAST(INT64_PTR_TYPE_GS); #if WASM_ENABLE_SHARED_MEMORY != 0 if (atomic) BUILD_ATOMIC_LOAD(align, I64_TYPE); @@ -465,15 +508,24 @@ aot_compile_op_i64_load(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, case 2: case 1: if (bytes == 4) { - BUILD_PTR_CAST(INT32_PTR_TYPE); + if (!enable_segue) + BUILD_PTR_CAST(INT32_PTR_TYPE); + else + BUILD_PTR_CAST(INT32_PTR_TYPE_GS); data_type = I32_TYPE; } else if (bytes == 2) { - BUILD_PTR_CAST(INT16_PTR_TYPE); + if (!enable_segue) + BUILD_PTR_CAST(INT16_PTR_TYPE); + else + BUILD_PTR_CAST(INT16_PTR_TYPE_GS); data_type = INT16_TYPE; } else { - BUILD_PTR_CAST(INT8_PTR_TYPE); + if (!enable_segue) + BUILD_PTR_CAST(INT8_PTR_TYPE); + else + BUILD_PTR_CAST(INT8_PTR_TYPE_GS); data_type = INT8_TYPE; } @@ -509,12 +561,18 @@ aot_compile_op_f32_load(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, uint32 align, uint32 offset) { LLVMValueRef maddr, value; + bool enable_segue = comp_ctx->enable_segue_f32_load; - if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, 4))) + if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, 4, + enable_segue))) return false; - BUILD_PTR_CAST(F32_PTR_TYPE); + if (!enable_segue) + BUILD_PTR_CAST(F32_PTR_TYPE); + else + BUILD_PTR_CAST(F32_PTR_TYPE_GS); BUILD_LOAD(F32_TYPE); + PUSH_F32(value); return true; fail: @@ -526,12 +584,18 @@ aot_compile_op_f64_load(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, uint32 align, uint32 offset) { LLVMValueRef maddr, value; + bool enable_segue = comp_ctx->enable_segue_f64_load; - if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, 8))) + if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, 8, + enable_segue))) return false; - BUILD_PTR_CAST(F64_PTR_TYPE); + if (!enable_segue) + BUILD_PTR_CAST(F64_PTR_TYPE); + else + BUILD_PTR_CAST(F64_PTR_TYPE_GS); BUILD_LOAD(F64_TYPE); + PUSH_F64(value); return true; fail: @@ -543,22 +607,33 @@ aot_compile_op_i32_store(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, uint32 align, uint32 offset, uint32 bytes, bool atomic) { LLVMValueRef maddr, value; + bool enable_segue = comp_ctx->enable_segue_i32_store; POP_I32(value); - if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes))) + if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes, + enable_segue))) return false; switch (bytes) { case 4: - BUILD_PTR_CAST(INT32_PTR_TYPE); + if (!enable_segue) + BUILD_PTR_CAST(INT32_PTR_TYPE); + else + BUILD_PTR_CAST(INT32_PTR_TYPE_GS); break; case 2: - BUILD_PTR_CAST(INT16_PTR_TYPE); + if (!enable_segue) + BUILD_PTR_CAST(INT16_PTR_TYPE); + else + BUILD_PTR_CAST(INT16_PTR_TYPE_GS); BUILD_TRUNC(value, INT16_TYPE); break; case 1: - BUILD_PTR_CAST(INT8_PTR_TYPE); + if (!enable_segue) + BUILD_PTR_CAST(INT8_PTR_TYPE); + else + BUILD_PTR_CAST(INT8_PTR_TYPE_GS); BUILD_TRUNC(value, INT8_TYPE); break; default: @@ -582,26 +657,40 @@ aot_compile_op_i64_store(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, uint32 align, uint32 offset, uint32 bytes, bool atomic) { LLVMValueRef maddr, value; + bool enable_segue = comp_ctx->enable_segue_i64_store; POP_I64(value); - if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes))) + if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes, + enable_segue))) return false; switch (bytes) { case 8: - BUILD_PTR_CAST(INT64_PTR_TYPE); + if (!enable_segue) + BUILD_PTR_CAST(INT64_PTR_TYPE); + else + BUILD_PTR_CAST(INT64_PTR_TYPE_GS); break; case 4: - BUILD_PTR_CAST(INT32_PTR_TYPE); + if (!enable_segue) + BUILD_PTR_CAST(INT32_PTR_TYPE); + else + BUILD_PTR_CAST(INT32_PTR_TYPE_GS); BUILD_TRUNC(value, I32_TYPE); break; case 2: - BUILD_PTR_CAST(INT16_PTR_TYPE); + if (!enable_segue) + BUILD_PTR_CAST(INT16_PTR_TYPE); + else + BUILD_PTR_CAST(INT16_PTR_TYPE_GS); BUILD_TRUNC(value, INT16_TYPE); break; case 1: - BUILD_PTR_CAST(INT8_PTR_TYPE); + if (!enable_segue) + BUILD_PTR_CAST(INT8_PTR_TYPE); + else + BUILD_PTR_CAST(INT8_PTR_TYPE_GS); BUILD_TRUNC(value, INT8_TYPE); break; default: @@ -625,13 +714,18 @@ aot_compile_op_f32_store(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, uint32 align, uint32 offset) { LLVMValueRef maddr, value; + bool enable_segue = comp_ctx->enable_segue_f32_store; POP_F32(value); - if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, 4))) + if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, 4, + enable_segue))) return false; - BUILD_PTR_CAST(F32_PTR_TYPE); + if (!enable_segue) + BUILD_PTR_CAST(F32_PTR_TYPE); + else + BUILD_PTR_CAST(F32_PTR_TYPE_GS); BUILD_STORE(); return true; fail: @@ -643,13 +737,18 @@ aot_compile_op_f64_store(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, uint32 align, uint32 offset) { LLVMValueRef maddr, value; + bool enable_segue = comp_ctx->enable_segue_f64_store; POP_F64(value); - if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, 8))) + if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, 8, + enable_segue))) return false; - BUILD_PTR_CAST(F64_PTR_TYPE); + if (!enable_segue) + BUILD_PTR_CAST(F64_PTR_TYPE); + else + BUILD_PTR_CAST(F64_PTR_TYPE_GS); BUILD_STORE(); return true; fail: @@ -1140,13 +1239,19 @@ aot_compile_op_atomic_rmw(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, uint32 offset, uint32 bytes) { LLVMValueRef maddr, value, result; + bool enable_segue = (op_type == VALUE_TYPE_I32) + ? comp_ctx->enable_segue_i32_load + && comp_ctx->enable_segue_i32_store + : comp_ctx->enable_segue_i64_load + && comp_ctx->enable_segue_i64_store; if (op_type == VALUE_TYPE_I32) POP_I32(value); else POP_I64(value); - if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes))) + if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes, + enable_segue))) return false; if (!check_memory_alignment(comp_ctx, func_ctx, maddr, align)) @@ -1154,19 +1259,31 @@ aot_compile_op_atomic_rmw(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, switch (bytes) { case 8: - BUILD_PTR_CAST(INT64_PTR_TYPE); + if (!enable_segue) + BUILD_PTR_CAST(INT64_PTR_TYPE); + else + BUILD_PTR_CAST(INT64_PTR_TYPE_GS); break; case 4: - BUILD_PTR_CAST(INT32_PTR_TYPE); + if (!enable_segue) + BUILD_PTR_CAST(INT32_PTR_TYPE); + else + BUILD_PTR_CAST(INT32_PTR_TYPE_GS); if (op_type == VALUE_TYPE_I64) BUILD_TRUNC(value, I32_TYPE); break; case 2: - BUILD_PTR_CAST(INT16_PTR_TYPE); + if (!enable_segue) + BUILD_PTR_CAST(INT16_PTR_TYPE); + else + BUILD_PTR_CAST(INT16_PTR_TYPE_GS); BUILD_TRUNC(value, INT16_TYPE); break; case 1: - BUILD_PTR_CAST(INT8_PTR_TYPE); + if (!enable_segue) + BUILD_PTR_CAST(INT8_PTR_TYPE); + else + BUILD_PTR_CAST(INT8_PTR_TYPE_GS); BUILD_TRUNC(value, INT8_TYPE); break; default: @@ -1208,6 +1325,11 @@ aot_compile_op_atomic_cmpxchg(AOTCompContext *comp_ctx, uint32 align, uint32 offset, uint32 bytes) { LLVMValueRef maddr, value, expect, result; + bool enable_segue = (op_type == VALUE_TYPE_I32) + ? comp_ctx->enable_segue_i32_load + && comp_ctx->enable_segue_i32_store + : comp_ctx->enable_segue_i64_load + && comp_ctx->enable_segue_i64_store; if (op_type == VALUE_TYPE_I32) { POP_I32(value); @@ -1218,7 +1340,8 @@ aot_compile_op_atomic_cmpxchg(AOTCompContext *comp_ctx, POP_I64(expect); } - if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes))) + if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes, + enable_segue))) return false; if (!check_memory_alignment(comp_ctx, func_ctx, maddr, align)) @@ -1226,22 +1349,34 @@ aot_compile_op_atomic_cmpxchg(AOTCompContext *comp_ctx, switch (bytes) { case 8: - BUILD_PTR_CAST(INT64_PTR_TYPE); + if (!enable_segue) + BUILD_PTR_CAST(INT64_PTR_TYPE); + else + BUILD_PTR_CAST(INT64_PTR_TYPE_GS); break; case 4: - BUILD_PTR_CAST(INT32_PTR_TYPE); + if (!enable_segue) + BUILD_PTR_CAST(INT32_PTR_TYPE); + else + BUILD_PTR_CAST(INT32_PTR_TYPE_GS); if (op_type == VALUE_TYPE_I64) { BUILD_TRUNC(value, I32_TYPE); BUILD_TRUNC(expect, I32_TYPE); } break; case 2: - BUILD_PTR_CAST(INT16_PTR_TYPE); + if (!enable_segue) + BUILD_PTR_CAST(INT16_PTR_TYPE); + else + BUILD_PTR_CAST(INT16_PTR_TYPE_GS); BUILD_TRUNC(value, INT16_TYPE); BUILD_TRUNC(expect, INT16_TYPE); break; case 1: - BUILD_PTR_CAST(INT8_PTR_TYPE); + if (!enable_segue) + BUILD_PTR_CAST(INT8_PTR_TYPE); + else + BUILD_PTR_CAST(INT8_PTR_TYPE_GS); BUILD_TRUNC(value, INT8_TYPE); BUILD_TRUNC(expect, INT8_TYPE); break; @@ -1318,7 +1453,8 @@ aot_compile_op_atomic_wait(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, CHECK_LLVM_CONST(is_wait64); - if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes))) + if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes, + false))) return false; if (!check_memory_alignment(comp_ctx, func_ctx, maddr, align)) @@ -1393,7 +1529,8 @@ aot_compiler_op_atomic_notify(AOTCompContext *comp_ctx, POP_I32(count); - if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes))) + if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes, + false))) return false; if (!check_memory_alignment(comp_ctx, func_ctx, maddr, align)) diff --git a/core/iwasm/compilation/aot_emit_memory.h b/core/iwasm/compilation/aot_emit_memory.h index e49582e3c..1c2db503a 100644 --- a/core/iwasm/compilation/aot_emit_memory.h +++ b/core/iwasm/compilation/aot_emit_memory.h @@ -53,7 +53,7 @@ aot_compile_op_f64_store(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, LLVMValueRef aot_check_memory_overflow(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, - uint32 offset, uint32 bytes); + uint32 offset, uint32 bytes, bool enable_segue); bool aot_compile_op_memory_size(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx); diff --git a/core/iwasm/compilation/aot_emit_variable.c b/core/iwasm/compilation/aot_emit_variable.c index 70487d4de..31d803553 100644 --- a/core/iwasm/compilation/aot_emit_variable.c +++ b/core/iwasm/compilation/aot_emit_variable.c @@ -112,7 +112,7 @@ static bool compile_global(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, uint32 global_idx, bool is_set, bool is_aux_stack) { - AOTCompData *comp_data = comp_ctx->comp_data; + const AOTCompData *comp_data = comp_ctx->comp_data; uint32 import_global_count = comp_data->import_global_count; uint32 global_base_offset; uint32 global_offset; diff --git a/core/iwasm/compilation/aot_llvm.c b/core/iwasm/compilation/aot_llvm.c index dc3fe7f59..81b7e8c36 100644 --- a/core/iwasm/compilation/aot_llvm.c +++ b/core/iwasm/compilation/aot_llvm.c @@ -15,7 +15,7 @@ #endif LLVMTypeRef -wasm_type_to_llvm_type(AOTLLVMTypes *llvm_types, uint8 wasm_type) +wasm_type_to_llvm_type(const AOTLLVMTypes *llvm_types, uint8 wasm_type) { switch (wasm_type) { case VALUE_TYPE_I32: @@ -42,8 +42,8 @@ wasm_type_to_llvm_type(AOTLLVMTypes *llvm_types, uint8 wasm_type) * Add LLVM function */ static LLVMValueRef -aot_add_llvm_func(AOTCompContext *comp_ctx, LLVMModuleRef module, - AOTFuncType *aot_func_type, uint32 func_index, +aot_add_llvm_func(const AOTCompContext *comp_ctx, LLVMModuleRef module, + const AOTFuncType *aot_func_type, uint32 func_index, LLVMTypeRef *p_func_type) { LLVMValueRef func = NULL; @@ -177,8 +177,9 @@ free_block_memory(AOTBlock *block) * Create first AOTBlock, or function block for the function */ static AOTBlock * -aot_create_func_block(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, - AOTFunc *func, AOTFuncType *aot_func_type) +aot_create_func_block(const AOTCompContext *comp_ctx, + const AOTFuncContext *func_ctx, const AOTFunc *func, + const AOTFuncType *aot_func_type) { AOTBlock *aot_block; uint32 param_count = aot_func_type->param_count, @@ -266,7 +267,8 @@ create_argv_buf(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) } static bool -create_native_stack_bound(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) +create_native_stack_bound(const AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx) { LLVMValueRef stack_bound_offset = I32_FOUR, stack_bound_addr; @@ -288,7 +290,8 @@ create_native_stack_bound(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) } static bool -create_native_stack_top_min(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) +create_native_stack_top_min(const AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx) { LLVMValueRef offset = I32_NINE; @@ -303,7 +306,7 @@ create_native_stack_top_min(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) } static bool -create_aux_stack_info(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) +create_aux_stack_info(const AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) { LLVMValueRef aux_stack_bound_offset = I32_SIX, aux_stack_bound_addr; LLVMValueRef aux_stack_bottom_offset = I32_SEVEN, aux_stack_bottom_addr; @@ -355,7 +358,7 @@ create_aux_stack_info(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) } static bool -create_native_symbol(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) +create_native_symbol(const AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) { LLVMValueRef native_symbol_offset = I32_EIGHT, native_symbol_addr; @@ -384,8 +387,9 @@ create_native_symbol(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) } static bool -create_local_variables(AOTCompData *comp_data, AOTCompContext *comp_ctx, - AOTFuncContext *func_ctx, AOTFunc *func) +create_local_variables(const AOTCompData *comp_data, + const AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, + const AOTFunc *func) { AOTFuncType *aot_func_type = comp_data->func_types[func->func_type_index]; char local_name[32]; @@ -475,7 +479,7 @@ create_local_variables(AOTCompData *comp_data, AOTCompContext *comp_ctx, } static bool -create_memory_info(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, +create_memory_info(const AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, LLVMTypeRef int8_ptr_type, uint32 func_index) { LLVMValueRef offset, mem_info_base; @@ -807,7 +811,7 @@ create_memory_info(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, } static bool -create_cur_exception(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) +create_cur_exception(const AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) { LLVMValueRef offset; @@ -823,7 +827,8 @@ create_cur_exception(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) } static bool -create_func_type_indexes(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) +create_func_type_indexes(const AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx) { LLVMValueRef offset, func_type_indexes_ptr; LLVMTypeRef int32_ptr_type; @@ -861,7 +866,7 @@ create_func_type_indexes(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) } static bool -create_func_ptrs(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) +create_func_ptrs(const AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) { LLVMValueRef offset; @@ -903,7 +908,7 @@ create_func_ptrs(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) * Create function compiler context */ static AOTFuncContext * -aot_create_func_context(AOTCompData *comp_data, AOTCompContext *comp_ctx, +aot_create_func_context(const AOTCompData *comp_data, AOTCompContext *comp_ctx, AOTFunc *func, uint32 func_index) { AOTFuncContext *func_ctx; @@ -1059,7 +1064,7 @@ aot_destroy_func_contexts(AOTFuncContext **func_ctxes, uint32 count) * Create function compiler contexts */ static AOTFuncContext ** -aot_create_func_contexts(AOTCompData *comp_data, AOTCompContext *comp_ctx) +aot_create_func_contexts(const AOTCompData *comp_data, AOTCompContext *comp_ctx) { AOTFuncContext **func_ctxes; uint64 size; @@ -1127,6 +1132,28 @@ aot_set_llvm_basic_types(AOTLLVMTypes *basic_types, LLVMContextRef context) basic_types->v128_type = basic_types->i64x2_vec_type; basic_types->v128_ptr_type = LLVMPointerType(basic_types->v128_type, 0); + basic_types->int8_ptr_type_gs = + LLVMPointerType(basic_types->int8_type, 256); + basic_types->int16_ptr_type_gs = + LLVMPointerType(basic_types->int16_type, 256); + basic_types->int32_ptr_type_gs = + LLVMPointerType(basic_types->int32_type, 256); + basic_types->int64_ptr_type_gs = + LLVMPointerType(basic_types->int64_type, 256); + basic_types->float32_ptr_type_gs = + LLVMPointerType(basic_types->float32_type, 256); + basic_types->float64_ptr_type_gs = + LLVMPointerType(basic_types->float64_type, 256); + basic_types->v128_ptr_type_gs = + LLVMPointerType(basic_types->v128_type, 256); + if (!basic_types->int8_ptr_type_gs || !basic_types->int16_ptr_type_gs + || !basic_types->int32_ptr_type_gs || !basic_types->int64_ptr_type_gs + || !basic_types->float32_ptr_type_gs + || !basic_types->float64_ptr_type_gs + || !basic_types->v128_ptr_type_gs) { + return false; + } + basic_types->i1x2_vec_type = LLVMVectorType(basic_types->int1_type, 2); basic_types->funcref_type = LLVMInt32TypeInContext(context); @@ -1536,7 +1563,7 @@ aot_compiler_destroy(void) } AOTCompContext * -aot_create_comp_context(AOTCompData *comp_data, aot_comp_option_t option) +aot_create_comp_context(const AOTCompData *comp_data, aot_comp_option_t option) { AOTCompContext *comp_ctx, *ret = NULL; LLVMTargetRef target; @@ -1643,6 +1670,12 @@ aot_create_comp_context(AOTCompData *comp_data, aot_comp_option_t option) if (option->disable_llvm_lto) comp_ctx->disable_llvm_lto = true; + if (option->enable_llvm_pgo) + comp_ctx->enable_llvm_pgo = true; + + if (option->use_prof_file) + comp_ctx->use_prof_file = option->use_prof_file; + if (option->enable_stack_estimation) comp_ctx->enable_stack_estimation = true; @@ -2007,6 +2040,7 @@ aot_create_comp_context(AOTCompData *comp_data, aot_comp_option_t option) os_printf("Create AoT compiler with:\n"); os_printf(" target: %s\n", comp_ctx->target_arch); os_printf(" target cpu: %s\n", cpu); + os_printf(" target triple: %s\n", triple_norm); os_printf(" cpu features: %s\n", features); os_printf(" opt level: %d\n", opt_level); os_printf(" size level: %d\n", size_level); @@ -2025,6 +2059,8 @@ aot_create_comp_context(AOTCompData *comp_data, aot_comp_option_t option) break; } + LLVMSetTarget(comp_ctx->module, triple_norm); + if (!LLVMTargetHasTargetMachine(target)) { snprintf(buf, sizeof(buf), "no target machine for this target (%s).", triple_norm); @@ -2065,6 +2101,37 @@ aot_create_comp_context(AOTCompData *comp_data, aot_comp_option_t option) } } + triple = LLVMGetTargetMachineTriple(comp_ctx->target_machine); + if (!triple) { + aot_set_last_error("get target machine triple failed."); + goto fail; + } + if (strstr(triple, "linux") && !strcmp(comp_ctx->target_arch, "x86_64")) { + if (option->segue_flags) { + if (option->segue_flags & (1 << 0)) + comp_ctx->enable_segue_i32_load = true; + if (option->segue_flags & (1 << 1)) + comp_ctx->enable_segue_i64_load = true; + if (option->segue_flags & (1 << 2)) + comp_ctx->enable_segue_f32_load = true; + if (option->segue_flags & (1 << 3)) + comp_ctx->enable_segue_f64_load = true; + if (option->segue_flags & (1 << 4)) + comp_ctx->enable_segue_v128_load = true; + if (option->segue_flags & (1 << 8)) + comp_ctx->enable_segue_i32_store = true; + if (option->segue_flags & (1 << 9)) + comp_ctx->enable_segue_i64_store = true; + if (option->segue_flags & (1 << 10)) + comp_ctx->enable_segue_f32_store = true; + if (option->segue_flags & (1 << 11)) + comp_ctx->enable_segue_f64_store = true; + if (option->segue_flags & (1 << 12)) + comp_ctx->enable_segue_v128_store = true; + } + } + LLVMDisposeMessage(triple); + if (option->enable_simd && strcmp(comp_ctx->target_arch, "x86_64") != 0 && strncmp(comp_ctx->target_arch, "aarch64", 7) != 0) { /* Disable simd if it isn't supported by target arch */ @@ -2098,6 +2165,7 @@ aot_create_comp_context(AOTCompData *comp_data, aot_comp_option_t option) aot_set_last_error("create LLVM target data layout failed."); goto fail; } + LLVMSetModuleDataLayout(comp_ctx->module, target_data_ref); comp_ctx->pointer_size = LLVMPointerSize(target_data_ref); LLVMDisposeTargetData(target_data_ref); @@ -2768,3 +2836,23 @@ aot_load_const_from_table(AOTCompContext *comp_ctx, LLVMValueRef base, (void)const_type; return const_value; } + +bool +aot_set_cond_br_weights(AOTCompContext *comp_ctx, LLVMValueRef cond_br, + int32 weights_true, int32 weights_false) +{ + LLVMMetadataRef md_nodes[3], meta_data; + LLVMValueRef meta_data_as_value; + + md_nodes[0] = LLVMMDStringInContext2(comp_ctx->context, "branch_weights", + strlen("branch_weights")); + md_nodes[1] = LLVMValueAsMetadata(I32_CONST(weights_true)); + md_nodes[2] = LLVMValueAsMetadata(I32_CONST(weights_false)); + + meta_data = LLVMMDNodeInContext2(comp_ctx->context, md_nodes, 3); + meta_data_as_value = LLVMMetadataAsValue(comp_ctx->context, meta_data); + + LLVMSetMetadata(cond_br, 2, meta_data_as_value); + + return true; +} diff --git a/core/iwasm/compilation/aot_llvm.h b/core/iwasm/compilation/aot_llvm.h index 2a1564019..76fedcc62 100644 --- a/core/iwasm/compilation/aot_llvm.h +++ b/core/iwasm/compilation/aot_llvm.h @@ -154,8 +154,6 @@ typedef struct AOTFuncContext { AOTFunc *aot_func; LLVMValueRef func; LLVMTypeRef func_type; - /* LLVM module for this function, note that in LAZY JIT mode, - each aot function belongs to an individual module */ LLVMModuleRef module; AOTBlockStack block_stack; @@ -214,6 +212,14 @@ typedef struct AOTLLVMTypes { LLVMTypeRef f32x4_vec_type; LLVMTypeRef f64x2_vec_type; + LLVMTypeRef int8_ptr_type_gs; + LLVMTypeRef int16_ptr_type_gs; + LLVMTypeRef int32_ptr_type_gs; + LLVMTypeRef int64_ptr_type_gs; + LLVMTypeRef float32_ptr_type_gs; + LLVMTypeRef float64_ptr_type_gs; + LLVMTypeRef v128_ptr_type_gs; + LLVMTypeRef i1x2_vec_type; LLVMTypeRef meta_data_type; @@ -275,7 +281,7 @@ typedef struct AOTLLVMConsts { * Compiler context */ typedef struct AOTCompContext { - AOTCompData *comp_data; + const AOTCompData *comp_data; /* LLVM variables required to emit LLVM IR */ LLVMContextRef context; @@ -341,6 +347,25 @@ typedef struct AOTCompContext { /* Disable LLVM link time optimization */ bool disable_llvm_lto; + /* Enable LLVM PGO (Profile-Guided Optimization) */ + bool enable_llvm_pgo; + + /* Use profile file collected by LLVM PGO */ + char *use_prof_file; + + /* Enable to use segument register as the base addr + of linear memory for load/store operations */ + bool enable_segue_i32_load; + bool enable_segue_i64_load; + bool enable_segue_f32_load; + bool enable_segue_f64_load; + bool enable_segue_v128_load; + bool enable_segue_i32_store; + bool enable_segue_i64_store; + bool enable_segue_f32_store; + bool enable_segue_f64_store; + bool enable_segue_v128_store; + /* Whether optimize the JITed code */ bool optimize; @@ -407,12 +432,15 @@ typedef struct AOTCompOption { bool enable_aux_stack_frame; bool disable_llvm_intrinsics; bool disable_llvm_lto; + bool enable_llvm_pgo; bool enable_stack_estimation; + char *use_prof_file; uint32 opt_level; uint32 size_level; uint32 output_format; uint32 bounds_checks; uint32 stack_bounds_checks; + uint32 segue_flags; char **custom_sections; uint32 custom_sections_count; const char *stack_usage_file; @@ -425,7 +453,7 @@ void aot_compiler_destroy(void); AOTCompContext * -aot_create_comp_context(AOTCompData *comp_data, aot_comp_option_t option); +aot_create_comp_context(const AOTCompData *comp_data, aot_comp_option_t option); void aot_destroy_comp_context(AOTCompContext *comp_ctx); @@ -464,7 +492,7 @@ void aot_block_destroy(AOTBlock *block); LLVMTypeRef -wasm_type_to_llvm_type(AOTLLVMTypes *llvm_types, uint8 wasm_type); +wasm_type_to_llvm_type(const AOTLLVMTypes *llvm_types, uint8 wasm_type); bool aot_checked_addr_list_add(AOTFuncContext *func_ctx, uint32 local_idx, @@ -519,6 +547,13 @@ aot_apply_llvm_new_pass_manager(AOTCompContext *comp_ctx, LLVMModuleRef module); void aot_handle_llvm_errmsg(const char *string, LLVMErrorRef err); +char * +aot_compress_aot_func_names(AOTCompContext *comp_ctx, uint32 *p_size); + +bool +aot_set_cond_br_weights(AOTCompContext *comp_ctx, LLVMValueRef cond_br, + int32 weights_true, int32 weights_false); + #ifdef __cplusplus } /* end of extern "C" */ #endif diff --git a/core/iwasm/compilation/aot_llvm_extra.cpp b/core/iwasm/compilation/aot_llvm_extra.cpp index 9b77f5e6a..a8843ccea 100644 --- a/core/iwasm/compilation/aot_llvm_extra.cpp +++ b/core/iwasm/compilation/aot_llvm_extra.cpp @@ -5,6 +5,8 @@ #include #include +#include +#include #include #include #include @@ -44,6 +46,7 @@ #if LLVM_VERSION_MAJOR >= 12 #include #endif +#include #include #include "../aot/aot_runtime.h" @@ -232,14 +235,26 @@ aot_apply_llvm_new_pass_manager(AOTCompContext *comp_ctx, LLVMModuleRef module) PTO.SLPVectorization = true; PTO.LoopUnrolling = true; + Optional PGO = None; + if (comp_ctx->enable_llvm_pgo) { + /* Disable static counter allocation for value profiler, + it will be allocated by runtime */ + const char *argv[] = { "", "-vp-static-alloc=false" }; + cl::ParseCommandLineOptions(2, argv); + PGO = PGOOptions("", "", "", PGOOptions::IRInstr); + } + else if (comp_ctx->use_prof_file) { + PGO = PGOOptions(comp_ctx->use_prof_file, "", "", PGOOptions::IRUse); + } + #ifdef DEBUG_PASS PassInstrumentationCallbacks PIC; - PassBuilder PB(TM, PTO, None, &PIC); + PassBuilder PB(TM, PTO, PGO, &PIC); #else #if LLVM_VERSION_MAJOR == 12 - PassBuilder PB(false, TM, PTO); + PassBuilder PB(false, TM, PTO, PGO); #else - PassBuilder PB(TM, PTO); + PassBuilder PB(TM, PTO, PGO); #endif #endif @@ -334,8 +349,20 @@ aot_apply_llvm_new_pass_manager(AOTCompContext *comp_ctx, LLVMModuleRef module) FPM.addPass(SLPVectorizerPass()); FPM.addPass(LoadStoreVectorizerPass()); + if (comp_ctx->enable_llvm_pgo || comp_ctx->use_prof_file) { + /* LICM pass: loop invariant code motion, attempting to remove + as much code from the body of a loop as possible. Experiments + show it is good to enable it when pgo is enabled. */ +#if LLVM_VERSION_MAJOR >= 15 + LICMOptions licm_opt; + FPM.addPass( + createFunctionToLoopPassAdaptor(LICMPass(licm_opt), true)); +#else + FPM.addPass(createFunctionToLoopPassAdaptor(LICMPass(), true)); +#endif + } + /* - FPM.addPass(createFunctionToLoopPassAdaptor(LICMPass())); FPM.addPass(createFunctionToLoopPassAdaptor(LoopRotatePass())); FPM.addPass(createFunctionToLoopPassAdaptor(SimpleLoopUnswitchPass())); */ @@ -344,9 +371,10 @@ aot_apply_llvm_new_pass_manager(AOTCompContext *comp_ctx, LLVMModuleRef module) if (!disable_llvm_lto) { /* Apply LTO for AOT mode */ - if (comp_ctx->comp_data->func_count >= 10) - /* Adds the pre-link optimizations if the func count - is large enough */ + if (comp_ctx->comp_data->func_count >= 10 + || comp_ctx->enable_llvm_pgo || comp_ctx->use_prof_file) + /* Add the pre-link optimizations if the func count + is large enough or PGO is enabled */ MPM.addPass(PB.buildLTOPreLinkDefaultPipeline(OL)); else MPM.addPass(PB.buildLTODefaultPipeline(OL, NULL)); @@ -358,3 +386,34 @@ aot_apply_llvm_new_pass_manager(AOTCompContext *comp_ctx, LLVMModuleRef module) MPM.run(*M, MAM); } + +char * +aot_compress_aot_func_names(AOTCompContext *comp_ctx, uint32 *p_size) +{ + std::vector NameStrs; + std::string Result; + char buf[32], *compressed_str; + uint32 compressed_str_len, i; + + for (i = 0; i < comp_ctx->func_ctx_count; i++) { + snprintf(buf, sizeof(buf), "%s%d", AOT_FUNC_PREFIX, i); + std::string str(buf); + NameStrs.push_back(str); + } + + if (collectPGOFuncNameStrings(NameStrs, true, Result)) { + aot_set_last_error("collect pgo func name strings failed"); + return NULL; + } + + compressed_str_len = Result.size(); + if (!(compressed_str = (char *)wasm_runtime_malloc(compressed_str_len))) { + aot_set_last_error("allocate memory failed"); + return NULL; + } + + bh_memcpy_s(compressed_str, compressed_str_len, Result.c_str(), + compressed_str_len); + *p_size = compressed_str_len; + return compressed_str; +} diff --git a/core/iwasm/compilation/aot_llvm_extra2.cpp b/core/iwasm/compilation/aot_llvm_extra2.cpp index 9bd44bbff..8c3f3a395 100644 --- a/core/iwasm/compilation/aot_llvm_extra2.cpp +++ b/core/iwasm/compilation/aot_llvm_extra2.cpp @@ -4,7 +4,13 @@ */ #include +#include +#include +#if LLVM_VERSION_MAJOR >= 14 #include +#else +#include +#endif #include #include "bh_assert.h" diff --git a/core/iwasm/compilation/aot_orc_extra.cpp b/core/iwasm/compilation/aot_orc_extra.cpp index 8cf253e94..b778b634e 100644 --- a/core/iwasm/compilation/aot_orc_extra.cpp +++ b/core/iwasm/compilation/aot_orc_extra.cpp @@ -8,6 +8,8 @@ #include "llvm-c/OrcEE.h" #include "llvm-c/TargetMachine.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/Optional.h" #include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h" #include "llvm/ExecutionEngine/Orc/LLJIT.h" #include "llvm/ExecutionEngine/Orc/ObjectTransformLayer.h" diff --git a/core/iwasm/compilation/debug/dwarf_extractor.cpp b/core/iwasm/compilation/debug/dwarf_extractor.cpp index d5a1be85e..160aadee0 100644 --- a/core/iwasm/compilation/debug/dwarf_extractor.cpp +++ b/core/iwasm/compilation/debug/dwarf_extractor.cpp @@ -114,7 +114,7 @@ destroy_dwarf_extractor(dwar_extractor_handle_t handle) } LLVMMetadataRef -dwarf_gen_file_info(AOTCompContext *comp_ctx) +dwarf_gen_file_info(const AOTCompContext *comp_ctx) { dwar_extractor *extractor; int units_number; @@ -191,7 +191,7 @@ dwarf_gen_mock_vm_info(AOTCompContext *comp_ctx) #endif LLVMMetadataRef -dwarf_gen_comp_unit_info(AOTCompContext *comp_ctx) +dwarf_gen_comp_unit_info(const AOTCompContext *comp_ctx) { dwar_extractor *extractor; int units_number; @@ -257,7 +257,7 @@ lldb_get_basic_type_encoding(BasicType basic_type) } static LLVMMetadataRef -lldb_type_to_type_dbi(AOTCompContext *comp_ctx, SBType &type) +lldb_type_to_type_dbi(const AOTCompContext *comp_ctx, SBType &type) { LLVMMetadataRef type_info = NULL; BasicType basic_type = type.GetBasicType(); @@ -282,8 +282,9 @@ lldb_type_to_type_dbi(AOTCompContext *comp_ctx, SBType &type) } static LLVMMetadataRef -lldb_function_to_function_dbi(AOTCompContext *comp_ctx, SBSymbolContext &sc, - AOTFuncContext *func_ctx) +lldb_function_to_function_dbi(const AOTCompContext *comp_ctx, + SBSymbolContext &sc, + const AOTFuncContext *func_ctx) { SBFunction function(sc.GetFunction()); const char *function_name = function.GetName(); @@ -388,7 +389,8 @@ lldb_function_to_function_dbi(AOTCompContext *comp_ctx, SBSymbolContext &sc, } LLVMMetadataRef -dwarf_gen_func_info(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) +dwarf_gen_func_info(const AOTCompContext *comp_ctx, + const AOTFuncContext *func_ctx) { LLVMMetadataRef func_info = NULL; dwar_extractor *extractor; @@ -417,8 +419,8 @@ dwarf_gen_func_info(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) } void -dwarf_get_func_name(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, - char *name, int len) +dwarf_get_func_name(const AOTCompContext *comp_ctx, + const AOTFuncContext *func_ctx, char *name, int len) { LLVMMetadataRef func_info = NULL; dwar_extractor *extractor; @@ -448,8 +450,8 @@ dwarf_get_func_name(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, } LLVMMetadataRef -dwarf_gen_location(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, - uint64_t vm_offset) +dwarf_gen_location(const AOTCompContext *comp_ctx, + const AOTFuncContext *func_ctx, uint64_t vm_offset) { LLVMMetadataRef location_info = NULL; dwar_extractor *extractor; @@ -487,7 +489,8 @@ dwarf_gen_location(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, } LLVMMetadataRef -dwarf_gen_func_ret_location(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) +dwarf_gen_func_ret_location(const AOTCompContext *comp_ctx, + const AOTFuncContext *func_ctx) { LLVMMetadataRef func_info = NULL; dwar_extractor *extractor; diff --git a/core/iwasm/compilation/debug/dwarf_extractor.h b/core/iwasm/compilation/debug/dwarf_extractor.h index 449d4d57c..c48e8f5c0 100644 --- a/core/iwasm/compilation/debug/dwarf_extractor.h +++ b/core/iwasm/compilation/debug/dwarf_extractor.h @@ -30,24 +30,26 @@ dwar_extractor_handle_t create_dwarf_extractor(aot_comp_data_t comp_data, char *file_name); LLVMMetadataRef -dwarf_gen_file_info(AOTCompContext *comp_ctx); +dwarf_gen_file_info(const AOTCompContext *comp_ctx); LLVMMetadataRef -dwarf_gen_comp_unit_info(AOTCompContext *comp_ctx); +dwarf_gen_comp_unit_info(const AOTCompContext *comp_ctx); LLVMMetadataRef -dwarf_gen_func_info(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx); +dwarf_gen_func_info(const AOTCompContext *comp_ctx, + const AOTFuncContext *func_ctx); LLVMMetadataRef -dwarf_gen_location(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, - uint64_t vm_offset); +dwarf_gen_location(const AOTCompContext *comp_ctx, + const AOTFuncContext *func_ctx, uint64_t vm_offset); LLVMMetadataRef -dwarf_gen_func_ret_location(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx); +dwarf_gen_func_ret_location(const AOTCompContext *comp_ctx, + const AOTFuncContext *func_ctx); void -dwarf_get_func_name(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, - char *name, int len); +dwarf_get_func_name(const AOTCompContext *comp_ctx, + const AOTFuncContext *func_ctx, char *name, int len); #ifdef __cplusplus } diff --git a/core/iwasm/compilation/simd/simd_load_store.c b/core/iwasm/compilation/simd/simd_load_store.c index d166e954c..0e869727a 100644 --- a/core/iwasm/compilation/simd/simd_load_store.c +++ b/core/iwasm/compilation/simd/simd_load_store.c @@ -14,12 +14,12 @@ static LLVMValueRef simd_load(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, uint32 align, uint32 offset, uint32 data_length, LLVMTypeRef ptr_type, - LLVMTypeRef data_type) + LLVMTypeRef data_type, bool enable_segue) { LLVMValueRef maddr, data; if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, - data_length))) { + data_length, enable_segue))) { HANDLE_FAILURE("aot_check_memory_overflow"); return NULL; } @@ -44,10 +44,12 @@ bool aot_compile_simd_v128_load(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, uint32 align, uint32 offset) { + bool enable_segue = comp_ctx->enable_segue_v128_load; + LLVMTypeRef v128_ptr_type = enable_segue ? V128_PTR_TYPE_GS : V128_PTR_TYPE; LLVMValueRef result; if (!(result = simd_load(comp_ctx, func_ctx, align, offset, 16, - V128_PTR_TYPE, V128_TYPE))) { + v128_ptr_type, V128_TYPE, enable_segue))) { return false; } @@ -75,6 +77,7 @@ aot_compile_simd_load_extend(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, LLVMVectorType(I32_TYPE, 2), LLVMVectorType(I32_TYPE, 2), }; LLVMTypeRef sub_vector_type, sub_vector_ptr_type; + bool enable_segue = comp_ctx->enable_segue_v128_load; bh_assert(opcode_index < 6); @@ -82,13 +85,15 @@ aot_compile_simd_load_extend(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, /* to vector ptr type */ if (!sub_vector_type - || !(sub_vector_ptr_type = LLVMPointerType(sub_vector_type, 0))) { + || !(sub_vector_ptr_type = + LLVMPointerType(sub_vector_type, enable_segue ? 256 : 0))) { HANDLE_FAILURE("LLVMPointerType"); return false; } - if (!(sub_vector = simd_load(comp_ctx, func_ctx, align, offset, 8, - sub_vector_ptr_type, sub_vector_type))) { + if (!(sub_vector = + simd_load(comp_ctx, func_ctx, align, offset, 8, + sub_vector_ptr_type, sub_vector_type, enable_segue))) { return false; } @@ -118,6 +123,9 @@ aot_compile_simd_load_splat(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, LLVMValueRef element, result; LLVMTypeRef element_ptr_types[] = { INT8_PTR_TYPE, INT16_PTR_TYPE, INT32_PTR_TYPE, INT64_PTR_TYPE }; + LLVMTypeRef element_ptr_types_gs[] = { INT8_PTR_TYPE_GS, INT16_PTR_TYPE_GS, + INT32_PTR_TYPE_GS, + INT64_PTR_TYPE_GS }; LLVMTypeRef element_data_types[] = { INT8_TYPE, INT16_TYPE, I32_TYPE, I64_TYPE }; uint32 data_lengths[] = { 1, 2, 4, 8 }; @@ -133,13 +141,16 @@ aot_compile_simd_load_splat(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, LLVM_CONST(i32x4_zero), LLVM_CONST(i32x2_zero), }; + bool enable_segue = comp_ctx->enable_segue_v128_load; bh_assert(opcode_index < 4); - if (!(element = simd_load(comp_ctx, func_ctx, align, offset, - data_lengths[opcode_index], - element_ptr_types[opcode_index], - element_data_types[opcode_index]))) { + if (!(element = simd_load( + comp_ctx, func_ctx, align, offset, data_lengths[opcode_index], + comp_ctx->enable_segue_v128_load + ? element_ptr_types_gs[opcode_index] + : element_ptr_types[opcode_index], + element_data_types[opcode_index], enable_segue))) { return false; } @@ -170,11 +181,15 @@ aot_compile_simd_load_lane(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, uint32 data_lengths[] = { 1, 2, 4, 8 }; LLVMTypeRef element_ptr_types[] = { INT8_PTR_TYPE, INT16_PTR_TYPE, INT32_PTR_TYPE, INT64_PTR_TYPE }; + LLVMTypeRef element_ptr_types_gs[] = { INT8_PTR_TYPE_GS, INT16_PTR_TYPE_GS, + INT32_PTR_TYPE_GS, + INT64_PTR_TYPE_GS }; LLVMTypeRef element_data_types[] = { INT8_TYPE, INT16_TYPE, I32_TYPE, I64_TYPE }; LLVMTypeRef vector_types[] = { V128_i8x16_TYPE, V128_i16x8_TYPE, V128_i32x4_TYPE, V128_i64x2_TYPE }; LLVMValueRef lane = simd_lane_id_to_llvm_value(comp_ctx, lane_id); + bool enable_segue = comp_ctx->enable_segue_v128_load; bh_assert(opcode_index < 4); @@ -183,10 +198,12 @@ aot_compile_simd_load_lane(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, return false; } - if (!(element = simd_load(comp_ctx, func_ctx, align, offset, - data_lengths[opcode_index], - element_ptr_types[opcode_index], - element_data_types[opcode_index]))) { + if (!(element = simd_load( + comp_ctx, func_ctx, align, offset, data_lengths[opcode_index], + comp_ctx->enable_segue_v128_load + ? element_ptr_types_gs[opcode_index] + : element_ptr_types[opcode_index], + element_data_types[opcode_index], enable_segue))) { return false; } @@ -207,6 +224,8 @@ aot_compile_simd_load_zero(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, uint32 opcode_index = opcode - SIMD_v128_load32_zero; uint32 data_lengths[] = { 4, 8 }; LLVMTypeRef element_ptr_types[] = { INT32_PTR_TYPE, INT64_PTR_TYPE }; + LLVMTypeRef element_ptr_types_gs[] = { INT32_PTR_TYPE_GS, + INT64_PTR_TYPE_GS }; LLVMTypeRef element_data_types[] = { I32_TYPE, I64_TYPE }; LLVMValueRef zero[] = { LLVM_CONST(i32x4_vec_zero), @@ -222,13 +241,16 @@ aot_compile_simd_load_zero(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, LLVM_CONST(i32_six) }, { LLVM_CONST(i32_zero), LLVM_CONST(i32_two) }, }; + bool enable_segue = comp_ctx->enable_segue_v128_load; bh_assert(opcode_index < 2); - if (!(element = simd_load(comp_ctx, func_ctx, align, offset, - data_lengths[opcode_index], - element_ptr_types[opcode_index], - element_data_types[opcode_index]))) { + if (!(element = simd_load( + comp_ctx, func_ctx, align, offset, data_lengths[opcode_index], + comp_ctx->enable_segue_v128_load + ? element_ptr_types_gs[opcode_index] + : element_ptr_types[opcode_index], + element_data_types[opcode_index], enable_segue))) { return false; } @@ -260,12 +282,12 @@ aot_compile_simd_load_zero(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, static bool simd_store(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, uint32 align, uint32 offset, uint32 data_length, LLVMValueRef value, - LLVMTypeRef value_ptr_type) + LLVMTypeRef value_ptr_type, bool enable_segue) { LLVMValueRef maddr, result; if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, - data_length))) + data_length, enable_segue))) return false; if (!(maddr = LLVMBuildBitCast(comp_ctx->builder, maddr, value_ptr_type, @@ -288,12 +310,14 @@ bool aot_compile_simd_v128_store(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, uint32 align, uint32 offset) { + bool enable_segue = comp_ctx->enable_segue_v128_store; + LLVMTypeRef v128_ptr_type = enable_segue ? V128_PTR_TYPE_GS : V128_PTR_TYPE; LLVMValueRef value; POP_V128(value); return simd_store(comp_ctx, func_ctx, align, offset, 16, value, - V128_PTR_TYPE); + v128_ptr_type, enable_segue); fail: return false; } @@ -307,10 +331,14 @@ aot_compile_simd_store_lane(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, uint32 data_lengths[] = { 1, 2, 4, 8 }; LLVMTypeRef element_ptr_types[] = { INT8_PTR_TYPE, INT16_PTR_TYPE, INT32_PTR_TYPE, INT64_PTR_TYPE }; + LLVMTypeRef element_ptr_types_gs[] = { INT8_PTR_TYPE_GS, INT16_PTR_TYPE_GS, + INT32_PTR_TYPE_GS, + INT64_PTR_TYPE_GS }; uint32 opcode_index = opcode - SIMD_v128_store8_lane; LLVMTypeRef vector_types[] = { V128_i8x16_TYPE, V128_i16x8_TYPE, V128_i32x4_TYPE, V128_i64x2_TYPE }; LLVMValueRef lane = simd_lane_id_to_llvm_value(comp_ctx, lane_id); + bool enable_segue = comp_ctx->enable_segue_v128_store; bh_assert(opcode_index < 4); @@ -327,5 +355,7 @@ aot_compile_simd_store_lane(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, return simd_store(comp_ctx, func_ctx, align, offset, data_lengths[opcode_index], element, - element_ptr_types[opcode_index]); + enable_segue ? element_ptr_types_gs[opcode_index] + : element_ptr_types[opcode_index], + enable_segue); } diff --git a/core/iwasm/include/aot_export.h b/core/iwasm/include/aot_export.h index e58873bfd..dca26aa6e 100644 --- a/core/iwasm/include/aot_export.h +++ b/core/iwasm/include/aot_export.h @@ -55,12 +55,15 @@ typedef struct AOTCompOption { bool enable_aux_stack_frame; bool disable_llvm_intrinsics; bool disable_llvm_lto; + bool enable_llvm_pgo; bool enable_stack_estimation; + char *use_prof_file; uint32_t opt_level; uint32_t size_level; uint32_t output_format; uint32_t bounds_checks; uint32_t stack_bounds_checks; + uint32_t segue_flags; char **custom_sections; uint32_t custom_sections_count; const char *stack_usage_file; diff --git a/core/iwasm/include/wasm_export.h b/core/iwasm/include/wasm_export.h index f6c0107b9..28b952e8f 100644 --- a/core/iwasm/include/wasm_export.h +++ b/core/iwasm/include/wasm_export.h @@ -167,6 +167,8 @@ typedef struct RuntimeInitArgs { /* LLVM JIT opt and size level */ uint32_t llvm_jit_opt_level; uint32_t llvm_jit_size_level; + /* Segue optimization flags for LLVM JIT */ + uint32_t segue_flags; } RuntimeInitArgs; #ifndef WASM_VALKIND_T_DEFINED @@ -1329,6 +1331,30 @@ WASM_RUNTIME_API_EXTERN uint32_t wasm_runtime_dump_call_stack_to_buf(wasm_exec_env_t exec_env, char *buf, uint32_t len); +/** + * Get the size required to store the LLVM PGO profile data + * + * @param module_inst the WASM module instance + * + * @return size required to store the contents, 0 means error + */ +WASM_RUNTIME_API_EXTERN uint32_t +wasm_runtime_get_pgo_prof_data_size(wasm_module_inst_t module_inst); + +/** + * Dump the LLVM PGO profile data to buffer + * + * @param module_inst the WASM module instance + * @param buf buffer to store the dumped content + * @param len length of the buffer + * + * @return bytes dumped to the buffer, 0 means error and data in buf + * may be invalid + */ +WASM_RUNTIME_API_EXTERN uint32_t +wasm_runtime_dump_pgo_prof_data_to_buf(wasm_module_inst_t module_inst, + char *buf, uint32_t len); + /** * Get a custom section by name * @@ -1351,20 +1377,21 @@ WASM_RUNTIME_API_EXTERN void wasm_runtime_get_version(uint32_t *major, uint32_t *minor, uint32_t *patch); /** - * Check whether an import func `(import (func ...))` is linked or not - * with runtime registered natvie functions + * Check whether an import func `(import (func ...))` + * is linked or not with runtime registered natvie functions */ WASM_RUNTIME_API_EXTERN bool wasm_runtime_is_import_func_linked(const char *module_name, const char *func_name); /** - * Check whether an import global `(import (global ...))` is linked or not - * with runtime registered natvie globals + * Check whether an import global `(import (global ...))` + * is linked or not with runtime registered natvie globals */ WASM_RUNTIME_API_EXTERN bool wasm_runtime_is_import_global_linked(const char *module_name, const char *global_name); + /* clang-format on */ #ifdef __cplusplus diff --git a/core/iwasm/interpreter/wasm_interp_classic.c b/core/iwasm/interpreter/wasm_interp_classic.c index 9e326e737..653ee5b79 100644 --- a/core/iwasm/interpreter/wasm_interp_classic.c +++ b/core/iwasm/interpreter/wasm_interp_classic.c @@ -270,7 +270,7 @@ local_copysignf(float x, float y) { union { float f; - uint32_t i; + uint32 i; } ux = { x }, uy = { y }; ux.i &= 0x7fffffff; ux.i |= uy.i & 0x80000000; @@ -282,9 +282,9 @@ local_copysign(double x, double y) { union { double f; - uint64_t i; + uint64 i; } ux = { x }, uy = { y }; - ux.i &= -1ULL / 2; + ux.i &= UINT64_MAX / 2; ux.i |= uy.i & 1ULL << 63; return ux.f; } @@ -4002,6 +4002,12 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, } #if WASM_ENABLE_FAST_JIT != 0 +/* ASAN is not designed to work with custom stack unwind or other low-level \ + things. > Ignore a function that does some low-level magic. (e.g. walking \ + through the thread's stack bypassing the frame boundaries) */ +#if defined(__GNUC__) +__attribute__((no_sanitize_address)) +#endif static void fast_jit_call_func_bytecode(WASMModuleInstance *module_inst, WASMExecEnv *exec_env, @@ -4241,6 +4247,15 @@ wasm_interp_call_wasm(WASMModuleInstance *module_inst, WASMExecEnv *exec_env, wasm_exec_env_set_cur_frame(exec_env, frame); +#if defined(os_writegsbase) + { + WASMMemoryInstance *memory_inst = wasm_get_default_memory(module_inst); + if (memory_inst) + /* write base addr of linear memory to GS segment register */ + os_writegsbase(memory_inst->memory_data); + } +#endif + if (function->is_import_func) { #if WASM_ENABLE_MULTI_MODULE != 0 if (function->import_module_inst) { diff --git a/core/iwasm/interpreter/wasm_interp_fast.c b/core/iwasm/interpreter/wasm_interp_fast.c index d36804e46..49d5e412c 100644 --- a/core/iwasm/interpreter/wasm_interp_fast.c +++ b/core/iwasm/interpreter/wasm_interp_fast.c @@ -232,7 +232,7 @@ local_copysignf(float x, float y) { union { float f; - uint32_t i; + uint32 i; } ux = { x }, uy = { y }; ux.i &= 0x7fffffff; ux.i |= uy.i & 0x80000000; @@ -244,9 +244,9 @@ local_copysign(double x, double y) { union { double f; - uint64_t i; + uint64 i; } ux = { x }, uy = { y }; - ux.i &= -1ULL / 2; + ux.i &= UINT64_MAX / 2; ux.i |= uy.i & 1ULL << 63; return ux.f; } @@ -3995,6 +3995,15 @@ wasm_interp_call_wasm(WASMModuleInstance *module_inst, WASMExecEnv *exec_env, wasm_exec_env_set_cur_frame(exec_env, frame); +#if defined(os_writegsbase) + { + WASMMemoryInstance *memory_inst = wasm_get_default_memory(module_inst); + if (memory_inst) + /* write base addr of linear memory to GS segment register */ + os_writegsbase(memory_inst->memory_data); + } +#endif + if (function->is_import_func) { #if WASM_ENABLE_MULTI_MODULE != 0 if (function->import_module_inst) { diff --git a/core/iwasm/interpreter/wasm_loader.c b/core/iwasm/interpreter/wasm_loader.c index 9094cc248..64d723868 100644 --- a/core/iwasm/interpreter/wasm_loader.c +++ b/core/iwasm/interpreter/wasm_loader.c @@ -3000,7 +3000,7 @@ init_llvm_jit_functions_stage1(WASMModule *module, char *error_buf, if (module->function_count == 0) return true; -#if WASM_ENABLE_FAST_JIT != 0 && WASM_ENABLE_LLVM_JIT != 0 +#if WASM_ENABLE_FAST_JIT != 0 && WASM_ENABLE_LAZY_JIT != 0 if (os_mutex_init(&module->tierup_wait_lock) != 0) { set_error_buf(error_buf, error_buf_size, "init jit tierup lock failed"); return false; @@ -3035,6 +3035,7 @@ init_llvm_jit_functions_stage1(WASMModule *module, char *error_buf, llvm_jit_options = wasm_runtime_get_llvm_jit_options(); option.opt_level = llvm_jit_options.opt_level; option.size_level = llvm_jit_options.size_level; + option.segue_flags = llvm_jit_options.segue_flags; #if WASM_ENABLE_BULK_MEMORY != 0 option.enable_bulk_memory = true; diff --git a/core/iwasm/interpreter/wasm_mini_loader.c b/core/iwasm/interpreter/wasm_mini_loader.c index 6a62a12f6..ebce01edb 100644 --- a/core/iwasm/interpreter/wasm_mini_loader.c +++ b/core/iwasm/interpreter/wasm_mini_loader.c @@ -1843,7 +1843,7 @@ init_llvm_jit_functions_stage1(WASMModule *module, char *error_buf, if (module->function_count == 0) return true; -#if WASM_ENABLE_FAST_JIT != 0 && WASM_ENABLE_LLVM_JIT != 0 +#if WASM_ENABLE_FAST_JIT != 0 && WASM_ENABLE_LAZY_JIT != 0 if (os_mutex_init(&module->tierup_wait_lock) != 0) { set_error_buf(error_buf, error_buf_size, "init jit tierup lock failed"); return false; @@ -1876,6 +1876,7 @@ init_llvm_jit_functions_stage1(WASMModule *module, char *error_buf, option.is_jit_mode = true; option.opt_level = llvm_jit_options.opt_level; option.size_level = llvm_jit_options.size_level; + option.segue_flags = llvm_jit_options.segue_flags; #if WASM_ENABLE_BULK_MEMORY != 0 option.enable_bulk_memory = true; diff --git a/core/iwasm/interpreter/wasm_runtime.c b/core/iwasm/interpreter/wasm_runtime.c index 29365024d..ef0128dbe 100644 --- a/core/iwasm/interpreter/wasm_runtime.c +++ b/core/iwasm/interpreter/wasm_runtime.c @@ -3048,12 +3048,14 @@ wasm_interp_dump_call_stack(struct WASMExecEnv *exec_env, bool print, char *buf, /* function name not exported, print number instead */ if (frame.func_name_wp == NULL) { - line_length = snprintf(line_buf, sizeof(line_buf), "#%02d $f%d\n", - n, frame.func_index); + line_length = + snprintf(line_buf, sizeof(line_buf), + "#%02" PRIu32 " $f%" PRIu32 "\n", n, frame.func_index); } else { - line_length = snprintf(line_buf, sizeof(line_buf), "#%02d %s\n", n, - frame.func_name_wp); + line_length = + snprintf(line_buf, sizeof(line_buf), "#%02" PRIu32 " %s\n", n, + frame.func_name_wp); } if (line_length >= sizeof(line_buf)) { diff --git a/core/iwasm/libraries/lib-pthread/lib_pthread_wrapper.c b/core/iwasm/libraries/lib-pthread/lib_pthread_wrapper.c index ae1fd94f7..206479c2a 100644 --- a/core/iwasm/libraries/lib-pthread/lib_pthread_wrapper.c +++ b/core/iwasm/libraries/lib-pthread/lib_pthread_wrapper.c @@ -561,7 +561,6 @@ pthread_create_wrapper(wasm_exec_env_t exec_env, #if WASM_ENABLE_LIBC_WASI != 0 WASIContext *wasi_ctx; #endif - CApiFuncImport **new_c_api_func_imports = NULL; bh_assert(module); bh_assert(module_inst); diff --git a/core/iwasm/libraries/libc-wasi/libc_wasi_wrapper.c b/core/iwasm/libraries/libc-wasi/libc_wasi_wrapper.c index afb11925a..70ac4dc54 100644 --- a/core/iwasm/libraries/libc-wasi/libc_wasi_wrapper.c +++ b/core/iwasm/libraries/libc-wasi/libc_wasi_wrapper.c @@ -56,11 +56,13 @@ typedef struct WASIContext *wasi_ctx_t; wasi_ctx_t wasm_runtime_get_wasi_ctx(wasm_module_inst_t module_inst); +#if WASM_ENABLE_THREAD_MGR != 0 static inline uint64_t min_uint64(uint64_t a, uint64_t b) { return a > b ? b : a; } +#endif static inline uint32_t min_uint32(uint32_t a, uint32_t b) diff --git a/core/iwasm/libraries/libc-wasi/sandboxed-system-primitives/src/ssp_config.h b/core/iwasm/libraries/libc-wasi/sandboxed-system-primitives/src/ssp_config.h index d5babd02b..7f6e9b941 100644 --- a/core/iwasm/libraries/libc-wasi/sandboxed-system-primitives/src/ssp_config.h +++ b/core/iwasm/libraries/libc-wasi/sandboxed-system-primitives/src/ssp_config.h @@ -41,7 +41,7 @@ #endif #if !defined(__APPLE__) && !defined(__FreeBSD__) && !defined(__EMSCRIPTEN__) \ - && !defined(ESP_PLATFORM) + && !defined(ESP_PLATFORM) && !defined(DISABLE_CLOCK_NANOSLEEP) #define CONFIG_HAS_CLOCK_NANOSLEEP 1 #else #define CONFIG_HAS_CLOCK_NANOSLEEP 0 diff --git a/core/iwasm/libraries/wasi-nn/test/CMakeLists.txt b/core/iwasm/libraries/wasi-nn/test/CMakeLists.txt index 33fad71eb..30be48a22 100644 --- a/core/iwasm/libraries/wasi-nn/test/CMakeLists.txt +++ b/core/iwasm/libraries/wasi-nn/test/CMakeLists.txt @@ -8,7 +8,7 @@ project (iwasm) set (CMAKE_VERBOSE_MAKEFILE OFF) # Reset default linker flags set (CMAKE_C_STANDARD 99) -set (CMAKE_CXX_STANDARD 14) +set (CMAKE_CXX_STANDARD 17) set (CMAKE_SHARED_LIBRARY_LINK_C_FLAGS "") set (CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS "") diff --git a/core/shared/mem-alloc/ems/ems_alloc.c b/core/shared/mem-alloc/ems/ems_alloc.c index 5c2a628a2..a29539dd5 100644 --- a/core/shared/mem-alloc/ems/ems_alloc.c +++ b/core/shared/mem-alloc/ems/ems_alloc.c @@ -564,6 +564,7 @@ gc_realloc_vo_internal(void *vheap, void *ptr, gc_size_t size, const char *file, os_mutex_unlock(&heap->lock); return NULL; } + hmu_mark_pinuse(hmu_next); } os_mutex_unlock(&heap->lock); return obj_old; diff --git a/core/shared/mem-alloc/ems/ems_gc_internal.h b/core/shared/mem-alloc/ems/ems_gc_internal.h index e1ff9d61d..68b505453 100644 --- a/core/shared/mem-alloc/ems/ems_gc_internal.h +++ b/core/shared/mem-alloc/ems/ems_gc_internal.h @@ -214,13 +214,16 @@ set_hmu_normal_node_next(hmu_normal_node_t *node, hmu_normal_node_t *next) #if defined(_MSC_VER) __pragma(pack(push, 1)); #define __attr_packed +#define __attr_aligned(a) #elif defined(__GNUC__) || defined(__clang__) #define __attr_packed __attribute__((packed)) +#define __attr_aligned(a) __attribute__((aligned(a))) #else #error "packed attribute isn't used to define struct hmu_tree_node" #endif #else /* else of UINTPTR_MAX == UINT64_MAX */ #define __attr_packed +#define __attr_aligned(a) #endif typedef struct hmu_tree_node { @@ -229,7 +232,7 @@ typedef struct hmu_tree_node { struct hmu_tree_node *right; struct hmu_tree_node *parent; gc_size_t size; -} __attr_packed hmu_tree_node_t; +} __attr_packed __attr_aligned(4) hmu_tree_node_t; #if UINTPTR_MAX == UINT64_MAX #if defined(_MSC_VER) diff --git a/core/shared/platform/common/posix/posix_thread.c b/core/shared/platform/common/posix/posix_thread.c index 5e814c418..4fb566d6e 100644 --- a/core/shared/platform/common/posix/posix_thread.c +++ b/core/shared/platform/common/posix/posix_thread.c @@ -492,6 +492,12 @@ destroy_stack_guard_pages() } #endif /* end of WASM_DISABLE_STACK_HW_BOUND_CHECK == 0 */ +/* ASAN is not designed to work with custom stack unwind or other low-level \ + things. > Ignore a function that does some low-level magic. (e.g. walking \ + through the thread's stack bypassing the frame boundaries) */ +#if defined(__GNUC__) +__attribute__((no_sanitize_address)) +#endif static void mask_signals(int how) { @@ -506,6 +512,12 @@ mask_signals(int how) static os_thread_local_attribute struct sigaction prev_sig_act_SIGSEGV; static os_thread_local_attribute struct sigaction prev_sig_act_SIGBUS; +/* ASAN is not designed to work with custom stack unwind or other low-level \ + things. > Ignore a function that does some low-level magic. (e.g. walking \ + through the thread's stack bypassing the frame boundaries) */ +#if defined(__GNUC__) +__attribute__((no_sanitize_address)) +#endif static void signal_callback(int sig_num, siginfo_t *sig_info, void *sig_ucontext) { diff --git a/core/shared/platform/include/platform_api_extension.h b/core/shared/platform/include/platform_api_extension.h index 94fe16ea3..7029bb8d7 100644 --- a/core/shared/platform/include/platform_api_extension.h +++ b/core/shared/platform/include/platform_api_extension.h @@ -130,6 +130,7 @@ os_thread_exit(void *retval); #define os_memory_order_release memory_order_release #define os_memory_order_seq_cst memory_order_seq_cst #define os_atomic_thread_fence atomic_thread_fence +#define os_atomic_cmpxchg atomic_compare_exchange_strong #endif #endif /* end of os_atomic_thread_fence */ diff --git a/core/shared/platform/linux/platform_internal.h b/core/shared/platform/linux/platform_internal.h index 0ac63cf5e..8439f8723 100644 --- a/core/shared/platform/linux/platform_internal.h +++ b/core/shared/platform/linux/platform_internal.h @@ -63,6 +63,22 @@ typedef sem_t korp_sem; #define bh_socket_t int +#if WASM_DISABLE_WRITE_GS_BASE == 0 +#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64) +#define os_writegsbase(base_addr) \ + do { \ + uint64 __gs_value = (uint64)(uintptr_t)base_addr; \ + asm volatile("wrgsbase %0" ::"r"(__gs_value) : "memory"); \ + } while (0) +#if 0 +/* _writegsbase_u64 also works, but need to add -mfsgsbase flag for gcc */ +#include +#define os_writegsbase(base_addr) \ + _writegsbase_u64(((uint64)(uintptr_t)base_addr)) +#endif +#endif +#endif + #if WASM_DISABLE_HW_BOUND_CHECK == 0 #if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64) \ || defined(BUILD_TARGET_AARCH64) || defined(BUILD_TARGET_RISCV64_LP64D) \ diff --git a/core/shared/platform/windows/platform_internal.h b/core/shared/platform/windows/platform_internal.h index 8d5c1488b..500ab200c 100644 --- a/core/shared/platform/windows/platform_internal.h +++ b/core/shared/platform/windows/platform_internal.h @@ -26,6 +26,7 @@ #include #include #include +#include #include #include diff --git a/core/shared/platform/windows/shared_platform.cmake b/core/shared/platform/windows/shared_platform.cmake index 414c7b1a1..a68d63177 100644 --- a/core/shared/platform/windows/shared_platform.cmake +++ b/core/shared/platform/windows/shared_platform.cmake @@ -5,7 +5,7 @@ set (PLATFORM_SHARED_DIR ${CMAKE_CURRENT_LIST_DIR}) add_definitions(-DBH_PLATFORM_WINDOWS) add_definitions(-DHAVE_STRUCT_TIMESPEC) - +add_definitions(-D_WINSOCK_DEPRECATED_NO_WARNINGS) include_directories(${PLATFORM_SHARED_DIR}) include_directories(${PLATFORM_SHARED_DIR}/../include) diff --git a/core/version.h b/core/version.h index a999d534e..de24b30bc 100644 --- a/core/version.h +++ b/core/version.h @@ -7,5 +7,5 @@ #define _WAMR_VERSION_H_ #define WAMR_VERSION_MAJOR 1 #define WAMR_VERSION_MINOR 2 -#define WAMR_VERSION_PATCH 1 +#define WAMR_VERSION_PATCH 2 #endif diff --git a/doc/build_wamr.md b/doc/build_wamr.md index a66f27be0..a9d337164 100644 --- a/doc/build_wamr.md +++ b/doc/build_wamr.md @@ -98,7 +98,7 @@ cmake -DWAMR_BUILD_PLATFORM=linux -DWAMR_BUILD_TARGET=ARM #### **Disable boundary check with hardware trap** - **WAMR_DISABLE_HW_BOUND_CHECK**=1/0, default to enable if not set and supported by platform -> Note: by default only platform linux/darwin/android/windows/vxworks 64-bit will enable the boundary check with hardware trap feature, and the wamrc tool will generate AOT code without boundary check instructions in all 64-bit targets except SGX to improve performance. The boundary check includes linear memory access boundary and native stack access boundary, if `WAMR_DISABLE_STACK_HW_BOUND_CHECK` below isn't set. +> Note: by default only platform [linux/darwin/android/windows/vxworks 64-bit](https://github.com/bytecodealliance/wasm-micro-runtime/blob/5fb5119239220b0803e7045ca49b0a29fe65e70e/core/shared/platform/linux/platform_internal.h#L81) will enable the boundary check with hardware trap feature, for 32-bit platforms it's automatically disabled even when the flag is set to 0, and the wamrc tool will generate AOT code without boundary check instructions in all 64-bit targets except SGX to improve performance. The boundary check includes linear memory access boundary and native stack access boundary, if `WAMR_DISABLE_STACK_HW_BOUND_CHECK` below isn't set. #### **Disable native stack boundary check with hardware trap** - **WAMR_DISABLE_STACK_HW_BOUND_CHECK**=1/0, default to enable if not set and supported by platform, same as `WAMR_DISABLE_HW_BOUND_CHECK`. @@ -198,6 +198,13 @@ Currently we only profile the memory consumption of module, module_instance and - **WAMR_BUILD_STACK_GUARD_SIZE**=n, default to N/A if not set. > Note: By default, the stack guard size is 1K (1024) or 24K (if uvwasi enabled). +### **Disable the writing linear memory base address to x86 GS segment register +- **WAMR_DISABLE_WRITE_GS_BASE**=1/0, default to enable if not set and supported by platform +> Note: by default only platform [linux x86-64](https://github.com/bytecodealliance/wasm-micro-runtime/blob/5fb5119239220b0803e7045ca49b0a29fe65e70e/core/shared/platform/linux/platform_internal.h#L67) will enable this feature, for 32-bit platforms it's automatically disabled even when the flag is set to 0. In linux x86-64, writing the linear memory base address to x86 GS segment register may be used to speedup the linear memory access for LLVM AOT/JIT, when `--enable-segue=[]` option is added for `wamrc` or `iwasm`. + +### **Enable running PGO(Profile-Guided Optimization) instrumented AOT file** +- **WAMR_BUILD_STATIC_PGO**=1/0, default to disable if not set + **Combination of configurations:** We can combine the configurations. For example, if we want to disable interpreter, enable AOT and WASI, we can run command: diff --git a/doc/build_wasm_app.md b/doc/build_wasm_app.md index 40f1b89dd..4475824dc 100644 --- a/doc/build_wasm_app.md +++ b/doc/build_wasm_app.md @@ -394,7 +394,7 @@ Examples: wamrc -o test.aot test.wasm ### Usage example ``` bash -WAMRC_LLC_COMPILER= ./wamrc -o test.aot test.wasm +WAMRC_LLC_COMPILER=/usr/local/opt/llvm@14/bin/clang WAMRC_LLC_FLAGS="--target=x86_64-pc-linux-gnu -mcmodel=medium -c -O3" ./wamrc -o test.aot test.wasm ``` > Note: `wamrc` will verify whether the specified file exists and executable. If verification failed, `wamrc` will report a warning and fallback to normal pipeline. Since the verification is based on file, you **must specify the absolute path to the binary** even if it's in `$PATH` @@ -403,6 +403,8 @@ WAMRC_LLC_COMPILER= ./wamrc -o test.aot test.wasm > Note: the `LLC` and `ASM` in the env name just means this compiler will be used to compile the `LLVM IR file`/`assembly file` to object file, usually passing the compiler driver is the simplest way. (e.g. for LLVM toolchain, you don't need to pass `/usr/bin/llc`, using `/usr/bin/clang` is OK) +> Note: You might need to set `WAMRC_LLC_FLAGS`/`WAMRC_ASM_FLAGS` to match whatever the `wamrc` command would automatically do. In the above example, `-mcmodel=medium` corresponds to `wamrc --size-level=1`, which is the default of `wamrc` on macOS. + Run WASM app in WAMR mini product build ======================================= diff --git a/doc/perf_tune.md b/doc/perf_tune.md new file mode 100644 index 000000000..05a6433a7 --- /dev/null +++ b/doc/perf_tune.md @@ -0,0 +1,74 @@ +# Tune the performance of running wasm/aot file + +Normally there are some methods to tune the performance: + +## 1. Use `wasm-opt` tool + +Download the [binaryen release](https://github.com/WebAssembly/binaryen/releases), and use the `wasm-opt` tool in it to optimize the wasm file, for example: + +```bash +wasm-opt -O4 -o test_opt.wasm test.wasm +``` + +## 2. Enable `simd128` option when compiling wasm source files + +WebAssembly [128-bit SIMD](https://github.com/WebAssembly/simd) is supported by WAMR on x86-64 and aarch64 targets, enabling it when compiling wasm source files may greatly improve the performance. For [wasi-sdk](https://github.com/WebAssembly/wasi-sdk) and [emsdk](https://github.com/emscripten-core/emsdk), please add `-msimd128` flag for `clang` and `emcc/em++`: + +```bash +/opt/wasi-sdk/bin/clang -msimd128 -O3 -o + +emcc -msimd128 -O3 -o +``` + +## 3. Enable segue optimization for wamrc when generating the aot file + +[Segue](https://plas2022.github.io/files/pdf/SegueColorGuard.pdf) is an optimization technology which uses x86 segment register to store the WebAssembly linear memory base address, so as to remove most of the cost of SFI (Software-based Fault Isolation) base addition and free up a general purpose register, by this way it may: +- Improve the performance of JIT/AOT +- Reduce the footprint of JIT/AOT, the JIT/AOT code generated is smaller +- Reduce the compilation time of JIT/AOT + +Currently it is supported on linux x86-64, developer can use `--enable-segue=[]` for wamrc: +```bash +wamrc --enable-segue -o aot_file wasm_file +# or +wamrc --enable-segue=[] -o aot_file wasm_file +``` +`flags` can be: i32.load, i64.load, f32.load, f64.load, v128.load, i32.store, i64.store, f32.store, f64.store and v128.store, use comma to separate them, e.g. `--enable-segue=i32.load,i64.store`, and `--enable-segue` means all flags are added. + +> Note: Normally for most cases, using `--enable-segue` is enough, but for some cases, using `--enable-segue=` may be better, for example for CoreMark benchmark, `--enable-segue=i32.store` may lead to better performance than `--enable-segue`. + +## 4. Enable segue optimization for iwasm when running wasm file + +Similar to segue optimization for wamrc, run: +``` bash +iwasm --enable-segue wasm_file (iwasm is built with llvm-jit enabled) +# or +iwasm --enable-segue=[] wasm_file +``` + +## 5. Use the AOT static PGO method + +LLVM PGO (Profile-Guided Optimization) allows the compiler to better optimize code for how it actually runs. WAMR supports AOT static PGO, currently it is tested on Linux x86-64 and x86-32. The basic steps are: + +1. Use `wamrc --enable-llvm-pgo -o ` to generate an instrumented aot file. + +2. Compile iwasm with `cmake -DWAMR_BUILD_STATIC_PGO=1` and run `iwasm --gen-prof-file= ` to generate the raw profile file. + +> Note: Directly dumping raw profile data to file system may be unsupported in some environments, developer can dump the profile data into memory buffer instead and try outputting it through network (e.g. uart or socket): +```C +uint32_t +wasm_runtime_get_pgo_prof_data_size(wasm_module_inst_t module_inst); + +uint32_t +wasm_runtime_dump_pgo_prof_data_to_buf(wasm_module_inst_t module_inst, char *buf, uint32_t len); +``` + +3. Install or compile `llvm-profdata` tool,refer to [here](../tests/benchmarks/README.md#install-llvm-profdata) for the details. + +4. Run `llvm-profdata merge -output= ` to merge the raw profile file into the profile file. + +5. Run `wamrc --use-prof-file= -o ` to generate the optimized aot file. + +6. Run the optimized aot_file: `iwasm `. + +Developer can refer to the `test_pgo.sh` files under each benchmark folder for more details, e.g. [test_pgo.sh](../tests/benchmarks/coremark/test_pgo.sh) of CoreMark benchmark. diff --git a/language-bindings/python/README.md b/language-bindings/python/README.md index ec82ee191..96b7a7ff9 100644 --- a/language-bindings/python/README.md +++ b/language-bindings/python/README.md @@ -4,6 +4,8 @@ The WAMR Python package contains a set of high-level bindings for WAMR API and W ## Installation +* **Notice**: This python package need python >= `3.9`. + To Install from local source tree in _development mode_ run the following command, ```bash diff --git a/language-bindings/python/setup.py b/language-bindings/python/setup.py index fb7993e68..ec080e4ee 100755 --- a/language-bindings/python/setup.py +++ b/language-bindings/python/setup.py @@ -62,4 +62,5 @@ setup( 'install': PreInstallCommand, 'egg_info': PreEggInfoCommand, }, + python_requires='>=3.9' ) diff --git a/language-bindings/python/wamr-api/README.md b/language-bindings/python/wamr-api/README.md index 5ee672e29..801c8aa1e 100644 --- a/language-bindings/python/wamr-api/README.md +++ b/language-bindings/python/wamr-api/README.md @@ -1,5 +1,7 @@ # WARM API +* **Notice**: The python package `wamr.wamrapi.wamr` need python >= `3.9`. + ## Setup ### Pre-requisites diff --git a/language-bindings/python/wamr-api/samples/compile.sh b/language-bindings/python/wamr-api/samples/compile.sh old mode 100644 new mode 100755 diff --git a/product-mini/platforms/darwin/CMakeLists.txt b/product-mini/platforms/darwin/CMakeLists.txt index 4d68066b0..865e516fc 100644 --- a/product-mini/platforms/darwin/CMakeLists.txt +++ b/product-mini/platforms/darwin/CMakeLists.txt @@ -34,7 +34,7 @@ if (NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE Release) endif () -set(CMAKE_CXX_STANDARD 14) +set(CMAKE_CXX_STANDARD 17) if (NOT DEFINED WAMR_BUILD_INTERP) # Enable Interpreter by default diff --git a/product-mini/platforms/freebsd/CMakeLists.txt b/product-mini/platforms/freebsd/CMakeLists.txt index fee2934c0..dd1bbc41a 100644 --- a/product-mini/platforms/freebsd/CMakeLists.txt +++ b/product-mini/platforms/freebsd/CMakeLists.txt @@ -34,7 +34,7 @@ if (NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE Release) endif () -set(CMAKE_CXX_STANDARD 14) +set(CMAKE_CXX_STANDARD 17) if (NOT DEFINED WAMR_BUILD_INTERP) # Enable Interpreter by default diff --git a/product-mini/platforms/ios/CMakeLists.txt b/product-mini/platforms/ios/CMakeLists.txt index 764bc7f65..4bbff4cff 100644 --- a/product-mini/platforms/ios/CMakeLists.txt +++ b/product-mini/platforms/ios/CMakeLists.txt @@ -41,7 +41,7 @@ if (NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE Release) endif () -set(CMAKE_CXX_STANDARD 14) +set(CMAKE_CXX_STANDARD 17) if (NOT DEFINED WAMR_BUILD_INTERP) # Enable Interpreter by default diff --git a/product-mini/platforms/linux-sgx/CMakeLists.txt b/product-mini/platforms/linux-sgx/CMakeLists.txt index e1cbe2cc7..a9aef355c 100644 --- a/product-mini/platforms/linux-sgx/CMakeLists.txt +++ b/product-mini/platforms/linux-sgx/CMakeLists.txt @@ -89,6 +89,11 @@ if (NOT DEFINED WAMR_BUILD_SGX_IPFS) set (WAMR_BUILD_SGX_IPFS 0) endif () +if (NOT DEFINED WAMR_BUILD_STATIC_PGO) + # Disable static PGO by default + set (WAMR_BUILD_STATIC_PGO 0) +endif () + set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--gc-sections") set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=gnu11 -ffunction-sections -fdata-sections \ -Wall -Wno-unused-parameter -Wno-pedantic \ @@ -107,6 +112,18 @@ add_custom_command ( add_custom_target (vmlib_untrusted ALL DEPENDS libvmlib_untrusted.a) +if ((WAMR_BUILD_STATIC_PGO EQUAL 1) AND (WAMR_BUILD_AOT EQUAL 1)) + execute_process( + COMMAND bash -c "sed -i -E 's/^WAMR_BUILD_STATIC_PGO = 0/WAMR_BUILD_STATIC_PGO = 1/g' ${CMAKE_CURRENT_SOURCE_DIR}/enclave-sample/Makefile" + OUTPUT_VARIABLE cmdOutput + ) +else() + execute_process( + COMMAND bash -c "sed -i -E 's/^WAMR_BUILD_STATIC_PGO = 1/WAMR_BUILD_STATIC_PGO = 0/g' ${CMAKE_CURRENT_SOURCE_DIR}/enclave-sample/Makefile" + OUTPUT_VARIABLE cmdOutput + ) +endif() + if (DEFINED WAMR_BUILD_GLOBAL_HEAP_POOL) execute_process( COMMAND bash -c "sed -i -E 's/^WAMR_BUILD_GLOBAL_HEAP_POOL = .*/WAMR_BUILD_GLOBAL_HEAP_POOL = ${WAMR_BUILD_GLOBAL_HEAP_POOL}/g' ${CMAKE_CURRENT_SOURCE_DIR}/enclave-sample/Makefile" diff --git a/product-mini/platforms/linux-sgx/enclave-sample/App/App.cpp b/product-mini/platforms/linux-sgx/enclave-sample/App/App.cpp index e760518a1..2b5300ff3 100644 --- a/product-mini/platforms/linux-sgx/enclave-sample/App/App.cpp +++ b/product-mini/platforms/linux-sgx/enclave-sample/App/App.cpp @@ -232,6 +232,9 @@ print_help() printf(" for example:\n"); printf(" --addr-pool=1.2.3.4/15,2.3.4.5/16\n"); printf(" --max-threads=n Set maximum thread number per cluster, default is 4\n"); +#if WASM_ENABLE_STATIC_PGO != 0 + printf(" --gen-prof-file= Generate LLVM PGO (Profile-Guided Optimization) profile file\n"); +#endif printf(" --version Show version information\n"); return 1; } @@ -294,6 +297,10 @@ typedef enum EcallCmd { CMD_SET_WASI_ARGS, /* wasm_runtime_set_wasi_args() */ CMD_SET_LOG_LEVEL, /* bh_log_set_verbose_level() */ CMD_GET_VERSION, /* wasm_runtime_get_version() */ +#if WASM_ENABLE_STATIC_PGO != 0 + CMD_GET_PGO_PROF_BUF_SIZE, /* wasm_runtime_get_pro_prof_data_size() */ + CMD_DUMP_PGO_PROF_BUF_DATA, /* wasm_runtime_dump_pgo_prof_data_to_buf() */ +#endif } EcallCmd; static void @@ -598,6 +605,64 @@ get_version(uint64_t *major, uint64_t *minor, uint64_t *patch) *patch = ecall_args[2]; } +#if WASM_ENABLE_STATIC_PGO != 0 +static void +dump_pgo_prof_data(void *module_inst, const char *path) +{ + char *buf; + uint32_t len; + FILE *file; + + uint64_t ecall_args[1]; + ecall_args[0] = (uint64_t)(uintptr_t)module_inst; + if (SGX_SUCCESS + != ecall_handle_command(g_eid, CMD_GET_PGO_PROF_BUF_SIZE, + (uint8_t *)ecall_args, sizeof(ecall_args))) { + printf("Call ecall_handle_command() failed.\n"); + return; + } + if (!(len = ecall_args[0])) { + printf("failed to get LLVM PGO profile data size\n"); + return; + } + + if (!(buf = (char *)malloc(len))) { + printf("allocate memory failed\n"); + return; + } + + uint64_t ecall_args_2[3]; + ecall_args_2[0] = (uint64_t)(uintptr_t)module_inst; + ecall_args_2[1] = (uint64_t)(uintptr_t)buf; + ecall_args_2[2] = len; + if (SGX_SUCCESS + != ecall_handle_command(g_eid, CMD_DUMP_PGO_PROF_BUF_DATA, + (uint8_t *)ecall_args_2, + sizeof(ecall_args_2))) { + printf("Call ecall_handle_command() failed.\n"); + free(buf); + return; + } + if (!(len = ecall_args_2[0])) { + printf("failed to dump LLVM PGO profile data\n"); + free(buf); + return; + } + + if (!(file = fopen(path, "wb"))) { + printf("failed to create file %s", path); + free(buf); + return; + } + fwrite(buf, len, 1, file); + fclose(file); + + free(buf); + + printf("LLVM raw profile file %s was generated.\n", path); +} +#endif + int main(int argc, char *argv[]) { @@ -619,6 +684,9 @@ main(int argc, char *argv[]) const char *addr_pool[8] = { NULL }; uint32_t addr_pool_size = 0; uint32_t max_thread_num = 4; +#if WASM_ENABLE_STATIC_PGO != 0 + const char *gen_prof_file = NULL; +#endif if (enclave_init(&g_eid) < 0) { std::cout << "Fail to initialize enclave." << std::endl; @@ -718,6 +786,13 @@ main(int argc, char *argv[]) return print_help(); max_thread_num = atoi(argv[0] + 14); } +#if WASM_ENABLE_STATIC_PGO != 0 + else if (!strncmp(argv[0], "--gen-prof-file=", 16)) { + if (argv[0][16] == '\0') + return print_help(); + gen_prof_file = argv[0] + 16; + } +#endif else if (!strncmp(argv[0], "--version", 9)) { uint64_t major = 0, minor = 0, patch = 0; get_version(&major, &minor, &patch); @@ -779,6 +854,11 @@ main(int argc, char *argv[]) else app_instance_main(wasm_module_inst, argc, argv); +#if WASM_ENABLE_STATIC_PGO != 0 + if (gen_prof_file) + dump_pgo_prof_data(wasm_module_inst, gen_prof_file); +#endif + ret = 0; /* Deinstantiate module */ @@ -836,7 +916,7 @@ wamr_pal_create_process(struct wamr_pal_create_process_args *args) int stdoutfd = -1; int stderrfd = -1; - int argc = 2; + const int argc = 2; char *argv[argc] = { (char *)"./iwasm", (char *)args->argv[0] }; uint8_t *wasm_files_buf = NULL; diff --git a/product-mini/platforms/linux-sgx/enclave-sample/Enclave/Enclave.cpp b/product-mini/platforms/linux-sgx/enclave-sample/Enclave/Enclave.cpp index 302743594..9ed17e1c8 100644 --- a/product-mini/platforms/linux-sgx/enclave-sample/Enclave/Enclave.cpp +++ b/product-mini/platforms/linux-sgx/enclave-sample/Enclave/Enclave.cpp @@ -49,6 +49,10 @@ typedef enum EcallCmd { CMD_SET_WASI_ARGS, /* wasm_runtime_set_wasi_args() */ CMD_SET_LOG_LEVEL, /* bh_log_set_verbose_level() */ CMD_GET_VERSION, /* wasm_runtime_get_version() */ +#if WASM_ENABLE_STATIC_PGO != 0 + CMD_GET_PGO_PROF_BUF_SIZE, /* wasm_runtime_get_pro_prof_data_size() */ + CMD_DUMP_PGO_PROF_BUF_DATA, /* wasm_runtime_dump_pgo_prof_data_to_buf() */ +#endif } EcallCmd; typedef struct EnclaveModule { @@ -597,6 +601,36 @@ handle_cmd_get_version(uint64 *args, uint32 argc) args[2] = patch; } +#if WASM_ENABLE_STATIC_PGO != 0 +static void +handle_cmd_get_pgo_prof_buf_size(uint64 *args, int32 argc) +{ + wasm_module_inst_t module_inst = *(wasm_module_inst_t *)args; + uint32 buf_len; + + bh_assert(argc == 1); + + buf_len = wasm_runtime_get_pgo_prof_data_size(module_inst); + args[0] = buf_len; +} + +static void +handle_cmd_get_pro_prof_buf_data(uint64 *args, int32 argc) +{ + uint64 *args_org = args; + wasm_module_inst_t module_inst = *(wasm_module_inst_t *)args++; + char *buf = *(char **)args++; + uint32 len = *(uint32 *)args++; + uint32 bytes_dumped; + + bh_assert(argc == 3); + + bytes_dumped = + wasm_runtime_dump_pgo_prof_data_to_buf(module_inst, buf, len); + args_org[0] = bytes_dumped; +} +#endif + void ecall_handle_command(unsigned cmd, unsigned char *cmd_buf, unsigned cmd_buf_size) @@ -647,6 +681,14 @@ ecall_handle_command(unsigned cmd, unsigned char *cmd_buf, case CMD_GET_VERSION: handle_cmd_get_version(args, argc); break; +#if WASM_ENABLE_STATIC_PGO != 0 + case CMD_GET_PGO_PROF_BUF_SIZE: + handle_cmd_get_pgo_prof_buf_size(args, argc); + break; + case CMD_DUMP_PGO_PROF_BUF_DATA: + handle_cmd_get_pro_prof_buf_data(args, argc); + break; +#endif default: LOG_ERROR("Unknown command %d\n", cmd); break; diff --git a/product-mini/platforms/linux-sgx/enclave-sample/Makefile b/product-mini/platforms/linux-sgx/enclave-sample/Makefile index b598aad54..402545621 100644 --- a/product-mini/platforms/linux-sgx/enclave-sample/Makefile +++ b/product-mini/platforms/linux-sgx/enclave-sample/Makefile @@ -15,6 +15,7 @@ WAMR_BUILD_SGX_IPFS = 0 WAMR_BUILD_LIB_RATS = 0 WAMR_BUILD_GLOBAL_HEAP_POOL = 0 WAMR_BUILD_GLOBAL_HEAP_SIZE = 10485760 +WAMR_BUILD_STATIC_PGO = 0 VMLIB_BUILD_DIR ?= $(CURDIR)/../build LIB_RATS_SRC ?= $(VMLIB_BUILD_DIR)/_deps/librats-build @@ -65,7 +66,7 @@ ifeq ($(WAMR_BUILD_LIB_RATS), 1) App_Include_Paths += -I$(LIB_RATS_INCLUDE_DIR) endif -App_C_Flags := $(SGX_COMMON_CFLAGS) -fPIC -Wno-attributes $(App_Include_Paths) +App_C_Flags := $(SGX_COMMON_CFLAGS) -fPIC -Wno-attributes $(App_Include_Paths) -DWASM_ENABLE_STATIC_PGO=$(WAMR_BUILD_STATIC_PGO) # Three configuration modes - Debug, prerelease, release # Debug - Macro DEBUG enabled. @@ -134,7 +135,7 @@ ifeq ($(WAMR_BUILD_LIB_RATS), 1) Enclave_Include_Paths += -I$(LIB_RATS_INCLUDE_DIR) -I$(SGX_SSL)/include endif -Enclave_C_Flags := $(SGX_COMMON_CFLAGS) -nostdinc -fvisibility=hidden -fpie -fstack-protector $(Enclave_Include_Paths) -DWASM_GLOBAL_HEAP_SIZE=$(WAMR_BUILD_GLOBAL_HEAP_SIZE) -DWASM_ENABLE_GLOBAL_HEAP_POOL=$(WAMR_BUILD_GLOBAL_HEAP_POOL) -DWASM_ENABLE_LIB_RATS=$(WAMR_BUILD_LIB_RATS) +Enclave_C_Flags := $(SGX_COMMON_CFLAGS) -nostdinc -fvisibility=hidden -fpie -fstack-protector $(Enclave_Include_Paths) -DWASM_GLOBAL_HEAP_SIZE=$(WAMR_BUILD_GLOBAL_HEAP_SIZE) -DWASM_ENABLE_GLOBAL_HEAP_POOL=$(WAMR_BUILD_GLOBAL_HEAP_POOL) -DWASM_ENABLE_LIB_RATS=$(WAMR_BUILD_LIB_RATS) -DWASM_ENABLE_STATIC_PGO=$(WAMR_BUILD_STATIC_PGO) ifeq ($(SPEC_TEST), 1) Enclave_C_Flags += -DWASM_ENABLE_SPEC_TEST=1 else diff --git a/product-mini/platforms/linux/CMakeLists.txt b/product-mini/platforms/linux/CMakeLists.txt index 4c6af78ea..13efe27a9 100644 --- a/product-mini/platforms/linux/CMakeLists.txt +++ b/product-mini/platforms/linux/CMakeLists.txt @@ -16,7 +16,7 @@ set (CMAKE_SHARED_LIBRARY_LINK_C_FLAGS "") set (CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS "") set (CMAKE_C_STANDARD 99) -set (CMAKE_CXX_STANDARD 14) +set (CMAKE_CXX_STANDARD 17) # Set WAMR_BUILD_TARGET, currently values supported: # "X86_64", "AMD_64", "X86_32", "AARCH64[sub]", "ARM[sub]", "THUMB[sub]", @@ -135,24 +135,6 @@ if (WAMR_BUILD_TARGET MATCHES "X86_.*" OR WAMR_BUILD_TARGET STREQUAL "AMD_64") set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mindirect-branch-register") set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mindirect-branch-register") # UNDEFINED BEHAVIOR, refer to https://en.cppreference.com/w/cpp/language/ub - if(CMAKE_BUILD_TYPE STREQUAL "Debug" AND NOT WAMR_BUILD_JIT EQUAL 1) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=undefined \ - -fno-sanitize=bounds,bounds-strict,alignment \ - -fno-sanitize-recover") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=undefined \ - -fno-sanitize=bounds,bounds-strict,alignment \ - -fno-sanitize-recover") - endif() - else () - # UNDEFINED BEHAVIOR, refer to https://en.cppreference.com/w/cpp/language/ub - if(CMAKE_BUILD_TYPE STREQUAL "Debug" AND NOT WAMR_BUILD_JIT EQUAL 1) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=undefined \ - -fno-sanitize=bounds,alignment \ - -fno-sanitize-recover") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=undefined \ - -fno-sanitize=bounds,alignment \ - -fno-sanitize-recover") - endif() endif () endif () diff --git a/product-mini/platforms/posix/main.c b/product-mini/platforms/posix/main.c index 2e96ccddd..752d235e6 100644 --- a/product-mini/platforms/posix/main.c +++ b/product-mini/platforms/posix/main.c @@ -54,6 +54,14 @@ print_help() #if WASM_ENABLE_JIT != 0 printf(" --llvm-jit-size-level=n Set LLVM JIT size level, default is 3\n"); printf(" --llvm-jit-opt-level=n Set LLVM JIT optimization level, default is 3\n"); +#if defined(os_writegsbase) + printf(" --enable-segue[=] Enable using segment register GS as the base address of\n"); + printf(" linear memory, which may improve performance, flags can be:\n"); + printf(" i32.load, i64.load, f32.load, f64.load, v128.load,\n"); + printf(" i32.store, i64.store, f32.store, f64.store, v128.store\n"); + printf(" Use comma to separate, e.g. --enable-segue=i32.load,i64.store\n"); + printf(" and --enable-segue means all flags are added.\n"); +#endif #endif printf(" --repl Start a very simple REPL (read-eval-print-loop) mode\n" " that runs commands in the form of \"FUNC ARG...\"\n"); @@ -89,6 +97,9 @@ print_help() #if WASM_ENABLE_DEBUG_INTERP != 0 printf(" -g=ip:port Set the debug sever address, default is debug disabled\n"); printf(" if port is 0, then a random port will be used\n"); +#endif +#if WASM_ENABLE_STATIC_PGO != 0 + printf(" --gen-prof-file= Generate LLVM PGO (Profile-Guided Optimization) profile file\n"); #endif printf(" --version Show version information\n"); return 1; @@ -117,13 +128,13 @@ app_instance_func(wasm_module_inst_t module_inst, const char *func_name) } /** - * Split a space separated strings into an array of strings + * Split a string into an array of strings * Returns NULL on failure * Memory must be freed by caller * Based on: http://stackoverflow.com/a/11198630/471795 */ static char ** -split_string(char *str, int *count) +split_string(char *str, int *count, const char *delimer) { char **res = NULL, **res1; char *p; @@ -131,7 +142,7 @@ split_string(char *str, int *count) /* split string and append tokens to 'res' */ do { - p = strtok(str, " "); + p = strtok(str, delimer); str = NULL; res1 = res; res = (char **)realloc(res1, sizeof(char *) * (uint32)(idx + 1)); @@ -180,7 +191,7 @@ app_instance_repl(wasm_module_inst_t module_inst) printf("exit repl mode\n"); break; } - app_argv = split_string(cmd, &app_argc); + app_argv = split_string(cmd, &app_argc, " "); if (app_argv == NULL) { LOG_ERROR("Wasm prepare param failed: split string failed.\n"); break; @@ -195,6 +206,59 @@ app_instance_repl(wasm_module_inst_t module_inst) return NULL; } +#if WASM_ENABLE_JIT != 0 +static uint32 +resolve_segue_flags(char *str_flags) +{ + uint32 segue_flags = 0; + int32 flag_count, i; + char **flag_list; + + flag_list = split_string(str_flags, &flag_count, ","); + if (flag_list) { + for (i = 0; i < flag_count; i++) { + if (!strcmp(flag_list[i], "i32.load")) { + segue_flags |= 1 << 0; + } + else if (!strcmp(flag_list[i], "i64.load")) { + segue_flags |= 1 << 1; + } + else if (!strcmp(flag_list[i], "f32.load")) { + segue_flags |= 1 << 2; + } + else if (!strcmp(flag_list[i], "f64.load")) { + segue_flags |= 1 << 3; + } + else if (!strcmp(flag_list[i], "v128.load")) { + segue_flags |= 1 << 4; + } + else if (!strcmp(flag_list[i], "i32.store")) { + segue_flags |= 1 << 8; + } + else if (!strcmp(flag_list[i], "i64.store")) { + segue_flags |= 1 << 9; + } + else if (!strcmp(flag_list[i], "f32.store")) { + segue_flags |= 1 << 10; + } + else if (!strcmp(flag_list[i], "f64.store")) { + segue_flags |= 1 << 11; + } + else if (!strcmp(flag_list[i], "v128.store")) { + segue_flags |= 1 << 12; + } + else { + /* invalid flag */ + segue_flags = (uint32)-1; + break; + } + } + free(flag_list); + } + return segue_flags; +} +#endif /* end of WASM_ENABLE_JIT != 0 */ + #if WASM_ENABLE_LIBC_WASI != 0 static bool validate_env_str(char *env) @@ -352,6 +416,44 @@ moudle_destroyer(uint8 *buffer, uint32 size) static char global_heap_buf[WASM_GLOBAL_HEAP_SIZE] = { 0 }; #endif +#if WASM_ENABLE_STATIC_PGO != 0 +static void +dump_pgo_prof_data(wasm_module_inst_t module_inst, const char *path) +{ + char *buf; + uint32 len; + FILE *file; + + if (!(len = wasm_runtime_get_pgo_prof_data_size(module_inst))) { + printf("failed to get LLVM PGO profile data size\n"); + return; + } + + if (!(buf = wasm_runtime_malloc(len))) { + printf("allocate memory failed\n"); + return; + } + + if (len != wasm_runtime_dump_pgo_prof_data_to_buf(module_inst, buf, len)) { + printf("failed to dump LLVM PGO profile data\n"); + wasm_runtime_free(buf); + return; + } + + if (!(file = fopen(path, "wb"))) { + printf("failed to create file %s", path); + wasm_runtime_free(buf); + return; + } + fwrite(buf, len, 1, file); + fclose(file); + + wasm_runtime_free(buf); + + printf("LLVM raw profile file %s was generated.\n", path); +} +#endif + int main(int argc, char *argv[]) { @@ -367,6 +469,7 @@ main(int argc, char *argv[]) #if WASM_ENABLE_JIT != 0 uint32 llvm_jit_size_level = 3; uint32 llvm_jit_opt_level = 3; + uint32 segue_flags = 0; #endif wasm_module_t wasm_module = NULL; wasm_module_inst_t wasm_module_inst = NULL; @@ -398,6 +501,9 @@ main(int argc, char *argv[]) char *ip_addr = NULL; int instance_port = 0; #endif +#if WASM_ENABLE_STATIC_PGO != 0 + const char *gen_prof_file = NULL; +#endif /* Process options. */ for (argc--, argv++; argc > 0 && argv[0][0] == '-'; argc--, argv++) { @@ -487,7 +593,16 @@ main(int argc, char *argv[]) llvm_jit_opt_level = 3; } } -#endif + else if (!strcmp(argv[0], "--enable-segue")) { + /* all flags are enabled */ + segue_flags = 0x1F1F; + } + else if (!strncmp(argv[0], "--enable-segue=", 15)) { + segue_flags = resolve_segue_flags(argv[0] + 15); + if (segue_flags == (uint32)-1) + return print_help(); + } +#endif /* end of WASM_ENABLE_JIT != 0 */ #if WASM_ENABLE_LIBC_WASI != 0 else if (!strncmp(argv[0], "--dir=", 6)) { if (argv[0][6] == '\0') @@ -592,6 +707,13 @@ main(int argc, char *argv[]) return print_help(); ip_addr = argv[0] + 3; } +#endif +#if WASM_ENABLE_STATIC_PGO != 0 + else if (!strncmp(argv[0], "--gen-prof-file=", 16)) { + if (argv[0][16] == '\0') + return print_help(); + gen_prof_file = argv[0] + 16; + } #endif else if (!strncmp(argv[0], "--version", 9)) { uint32 major, minor, patch; @@ -632,6 +754,7 @@ main(int argc, char *argv[]) #if WASM_ENABLE_JIT != 0 init_args.llvm_jit_size_level = llvm_jit_size_level; init_args.llvm_jit_opt_level = llvm_jit_opt_level; + init_args.segue_flags = segue_flags; #endif #if WASM_ENABLE_DEBUG_INTERP != 0 @@ -754,6 +877,12 @@ main(int argc, char *argv[]) } #endif +#if WASM_ENABLE_STATIC_PGO != 0 && WASM_ENABLE_AOT != 0 + if (get_package_type(wasm_file_buf, wasm_file_size) == Wasm_Module_AoT + && gen_prof_file) + dump_pgo_prof_data(wasm_module_inst, gen_prof_file); +#endif + #if WASM_ENABLE_DEBUG_INTERP != 0 fail4: #endif diff --git a/product-mini/platforms/windows/CMakeLists.txt b/product-mini/platforms/windows/CMakeLists.txt index 35b22a608..db88f42bc 100644 --- a/product-mini/platforms/windows/CMakeLists.txt +++ b/product-mini/platforms/windows/CMakeLists.txt @@ -102,7 +102,6 @@ include (${WAMR_ROOT_DIR}/build-scripts/runtime_lib.cmake) add_library(vmlib ${WAMR_RUNTIME_LIB_SOURCE}) #set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DWIN32_LEAN_AND_MEAN") -set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D_WINSOCK_DEPRECATED_NO_WARNINGS") if (NOT MINGW) set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /SAFESEH:NO") set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} /SAFESEH:NO") diff --git a/samples/mem_allocator/CMakeLists.txt b/samples/mem_allocator/CMakeLists.txt new file mode 100644 index 000000000..f157dfbde --- /dev/null +++ b/samples/mem_allocator/CMakeLists.txt @@ -0,0 +1,22 @@ +# Copyright (C) 2023 Midokura Japan KK. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +cmake_minimum_required(VERSION 3.0) +project(mem_allocator_create) + +string (TOLOWER ${CMAKE_HOST_SYSTEM_NAME} WAMR_BUILD_PLATFORM) +if(APPLE) + add_definitions(-DBH_PLATFORM_DARWIN) +endif() + +set(WAMR_BUILD_INTERP 1) +set(WAMR_BUILD_LIBC_BUILTIN 0) + +set(WAMR_ROOT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../..) +include(${WAMR_ROOT_DIR}/build-scripts/runtime_lib.cmake) + +add_library(vmlib ${WAMR_RUNTIME_LIB_SOURCE}) + +add_executable(mem_alloc_test main.c) + +target_link_libraries(mem_alloc_test vmlib -lm -lpthread) diff --git a/samples/mem_allocator/main.c b/samples/mem_allocator/main.c new file mode 100644 index 000000000..a309d2e62 --- /dev/null +++ b/samples/mem_allocator/main.c @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2023 Midokura Japan KK. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + */ + +#include +#include +#include + +#include "mem_alloc.h" + +char store[1000]; + +int +main(int argc, char **argv) +{ + mem_allocator_t a = mem_allocator_create(store, sizeof(store)); + uint8_t *p; + uint8_t *p2; + + p = mem_allocator_malloc(a, 256); + printf("%p\n", p); + if (p == NULL) { + exit(1); + } + p = mem_allocator_realloc(a, p, 256 + 12); + printf("%p\n", p); + if (p == NULL) { + exit(1); + } + + /* + * write some values to confuse the ems allocator. + * + * hmu = p + 256 + * hmu_set_ut(hmu, HMU_FC) + * hmu_set_size(hmu, 256) + * hmu_set_free_size(hmu) + */ + *(uint32_t *)(p + 256) = (1 << 30) | 0x20; + *(uint32_t *)(p + 256 + 12 - 4) = 12; + + p2 = mem_allocator_malloc(a, 256); + printf("%p\n", p2); + if (p2 == NULL) { + exit(1); + } + mem_allocator_free(a, p2); + + p2 = mem_allocator_malloc(a, 256); + printf("%p\n", p2); + if (p2 == NULL) { + exit(1); + } + mem_allocator_free(a, p2); + + mem_allocator_free(a, p); +} diff --git a/samples/ref-types/src/hello.c b/samples/ref-types/src/hello.c index db2f7997f..0ee1aee88 100644 --- a/samples/ref-types/src/hello.c +++ b/samples/ref-types/src/hello.c @@ -142,8 +142,8 @@ set_and_cmp(wasm_exec_env_t exec_env, wasm_module_inst_t inst, int32 i, wasm_set_externref(exec_env, inst, i, externref); local_set_externref(i, externref); - wasm_get_externref(exec_env, inst, 0, &wasm_externref); - if (!local_chk_externref(exec_env, 0, wasm_externref)) { + wasm_get_externref(exec_env, inst, i, &wasm_externref); + if (!local_chk_externref(exec_env, i, wasm_externref)) { printf("#%d, In host language world Wasm Externref 0x%lx Vs. Native " "Externref 0x%lx FAILED\n", i, wasm_externref, externref); diff --git a/samples/wasm-c-api/CMakeLists.txt b/samples/wasm-c-api/CMakeLists.txt index c528fe16d..4dab0185c 100644 --- a/samples/wasm-c-api/CMakeLists.txt +++ b/samples/wasm-c-api/CMakeLists.txt @@ -16,7 +16,7 @@ if(NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE Release) endif() -set(CMAKE_CXX_STANDARD 14) +set(CMAKE_CXX_STANDARD 17) ################ runtime settings ################ string (TOLOWER ${CMAKE_HOST_SYSTEM_NAME} WAMR_BUILD_PLATFORM) @@ -87,15 +87,6 @@ endif() set(WAMR_ROOT_DIR ${CMAKE_CURRENT_LIST_DIR}/../..) include (${WAMR_ROOT_DIR}/build-scripts/runtime_lib.cmake) -if (NOT DEFINED SANITIZER) - set(SANITIZER "") -elseif (SANITIZER STREQUAL "ubsan") - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g -O2 -fno-omit-frame-pointer -fsanitize=undefined -fno-sanitize-recover=all -fno-sanitize=alignment" ) - set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=undefined") -elseif (NOT (SANITIZER STREQUAL "") ) - message(SEND_ERROR "Unsupported sanitizer: ${SANITIZER}") -endif() - add_library(vmlib STATIC ${WAMR_RUNTIME_LIB_SOURCE}) if (MSVC) target_compile_definitions(vmlib PRIVATE WASM_API_EXTERN=) diff --git a/test-tools/wamr-ide/VSCode-Extension/package.json b/test-tools/wamr-ide/VSCode-Extension/package.json index bb8a5cee1..dfe37961b 100644 --- a/test-tools/wamr-ide/VSCode-Extension/package.json +++ b/test-tools/wamr-ide/VSCode-Extension/package.json @@ -6,7 +6,7 @@ }, "displayName": "WAMR-IDE", "description": "An Integrated Development Environment for WASM", - "version": "1.1.2", + "version": "1.2.1", "engines": { "vscode": "^1.59.0" }, diff --git a/test-tools/wamr-ide/VSCode-Extension/src/debugConfigurationProvider.ts b/test-tools/wamr-ide/VSCode-Extension/src/debugConfigurationProvider.ts index 6294f7efe..e7b42bf03 100644 --- a/test-tools/wamr-ide/VSCode-Extension/src/debugConfigurationProvider.ts +++ b/test-tools/wamr-ide/VSCode-Extension/src/debugConfigurationProvider.ts @@ -7,52 +7,33 @@ import * as vscode from 'vscode'; import * as os from 'os'; export class WasmDebugConfigurationProvider - implements vscode.DebugConfigurationProvider -{ - /* default port set as 1234 */ - private port = 1234; - private hostPath!: string; - private providerPromise: Thenable | undefined = - undefined; + implements vscode.DebugConfigurationProvider { + private wasmDebugConfig = { + type: 'wamr-debug', + name: 'Attach', + request: 'attach', + stopOnEntry: true, + initCommands: os.platform() === 'win32' || os.platform() === 'darwin' ? + /* linux and windows has different debug configuration */ + ['platform select remote-linux'] : + undefined, + attachCommands: [ + /* default port 1234 */ + 'process connect -p wasm connect://127.0.0.1:1234', + ] + }; - private wasmDebugConfig!: vscode.DebugConfiguration; + public resolveDebugConfiguration( + _: vscode.WorkspaceFolder | undefined, + debugConfiguration: vscode.DebugConfiguration, + ): vscode.ProviderResult { - public resolveDebugConfiguration(): - | Thenable - | undefined { - if (!this.providerPromise) { - this.providerPromise = Promise.resolve(this.wasmDebugConfig); - return this.providerPromise; - } - return this.providerPromise; - } + this.wasmDebugConfig = { + ...this.wasmDebugConfig, + ...debugConfiguration + }; - public setDebugConfig(hostPath: string, port: number): void { - this.port = port; - this.hostPath = hostPath; - /* linux and windows has different debug configuration */ - if (os.platform() === 'win32' || os.platform() === 'darwin') { - this.wasmDebugConfig = { - type: 'wamr-debug', - name: 'Attach', - request: 'attach', - ['stopOnEntry']: true, - ['initCommands']: ['platform select remote-linux'], - ['attachCommands']: [ - 'process connect -p wasm connect://127.0.0.1:' + port + '', - ], - }; - } else if (os.platform() === 'linux') { - this.wasmDebugConfig = { - type: 'wamr-debug', - name: 'Attach', - request: 'attach', - ['stopOnEntry']: true, - ['attachCommands']: [ - 'process connect -p wasm connect://127.0.0.1:' + port + '', - ], - }; - } + return this.wasmDebugConfig; } public getDebugConfig(): vscode.DebugConfiguration { diff --git a/test-tools/wamr-ide/VSCode-Extension/src/extension.ts b/test-tools/wamr-ide/VSCode-Extension/src/extension.ts index 9d979b7ac..523b26b83 100644 --- a/test-tools/wamr-ide/VSCode-Extension/src/extension.ts +++ b/test-tools/wamr-ide/VSCode-Extension/src/extension.ts @@ -171,7 +171,6 @@ export async function activate(context: vscode.ExtensionContext) { /* register debug configuration */ wasmDebugConfigProvider = new WasmDebugConfigurationProvider(); - wasmDebugConfigProvider.setDebugConfig(currentPrjDir, 1234); vscode.debug.registerDebugConfigurationProvider( 'wamr-debug', diff --git a/tests/benchmarks/README.md b/tests/benchmarks/README.md new file mode 100644 index 000000000..2112829e0 --- /dev/null +++ b/tests/benchmarks/README.md @@ -0,0 +1,62 @@ +# WAMR test benchmarks + +This folder contains test benchmarks for wamr. + +## Build and Run + +Refer to the `README.md` under each folder for how to build and run the benchmark. + +## Install `llvm-profdata` + +The tool `llvm-profdata` is used when running the `test_pgo.sh` script under the benchmark folder. There are two ways to install it: + +1. Refer to https://apt.llvm.org/, e.g. in Ubuntu 20.04, add lines below to /etc/apt/source.list + +```bash +deb http://apt.llvm.org/focal/ llvm-toolchain-focal main +deb-src http://apt.llvm.org/focal/ llvm-toolchain-focal main +# 15 +deb http://apt.llvm.org/focal/ llvm-toolchain-focal-15 main +deb-src http://apt.llvm.org/focal/ llvm-toolchain-focal-15 main +``` + +Then run `sudo apt update`, `sudo apt install llvm`. And after installing: + +```bash +cd /usr/bin +sudo ln -s llvm-profdata-15 llvm-profdata +``` + +2. Build manually + +```bash +git clone --depth 1 --branch release/15.x https://github.com/llvm/llvm-project.git +cd llvm-project +mkdir build && cd build +cmake ../llvm \ + -DCMAKE_BUILD_TYPE:STRING="Release" \ + -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \ + -DLLVM_APPEND_VC_REV:BOOL=ON \ + -DLLVM_BUILD_EXAMPLES:BOOL=OFF \ + -DLLVM_BUILD_LLVM_DYLIB:BOOL=OFF \ + -DLLVM_BUILD_TESTS:BOOL=OFF \ + -DLLVM_CCACHE_BUILD:BOOL=ON \ + -DLLVM_ENABLE_BINDINGS:BOOL=OFF \ + -DLLVM_ENABLE_IDE:BOOL=OFF \ + -DLLVM_ENABLE_LIBEDIT=OFF \ + -DLLVM_ENABLE_TERMINFO:BOOL=OFF \ + -DLLVM_ENABLE_ZLIB:BOOL=ON \ + -DLLVM_INCLUDE_BENCHMARKS:BOOL=OFF \ + -DLLVM_INCLUDE_DOCS:BOOL=OFF \ + -DLLVM_INCLUDE_EXAMPLES:BOOL=OFF \ + -DLLVM_INCLUDE_UTILS:BOOL=OFF \ + -DLLVM_INCLUDE_TESTS:BOOL=OFF \ + -DLLVM_BUILD_TESTS:BOOL=OFF \ + -DLLVM_OPTIMIZED_TABLEGEN:BOOL=ON \ + -DLLVM_ENABLE_LIBXML2:BOOL=OFF \ + -DLLVM_TARGETS_TO_BUILD:STRING="X86" \ + -DLLVM_INCLUDE_TOOLS:BOOL=ON \ + -G'Ninja' +ninja -j 8 +# tool `llvm-profdata` is generated under this folder. +``` diff --git a/tests/benchmarks/coremark/README.md b/tests/benchmarks/coremark/README.md index 1631cc5c0..4e88069f8 100644 --- a/tests/benchmarks/coremark/README.md +++ b/tests/benchmarks/coremark/README.md @@ -17,3 +17,9 @@ And then run `./build.sh` to build the source code, file `coremark.exe`, `corema # Running Run `./run.sh` to test the benchmark, the native mode, iwasm aot mode and iwasm interpreter mode will be tested respectively. + +Run `./test_pgo.sh` to test the benchmark with AOT static PGO (Profile-Guided Optimization) enabled, please refer [here](../README.md#install-llvm-profdata) to install tool `llvm-profdata` and build `iwasm` with `cmake -DWAMR_BUILD_STATIC_PGO=1`. + +- For Linux, build `iwasm` with `cmake -DWAMR_BUILD_STATIC_PGO=1`, then run `./test_pgo.sh` to test the benchmark with AOT static PGO (Profile-Guided Optimization) enabled. + +- For Linux-sgx, similarly, build `iwasm` with `cmake -DWAMR_BUILD_STATIC_PGO=1`, then `make` in the directory `enclave-sample`. And run `./test_pgo.sh --sgx` to test the benchmark. diff --git a/tests/benchmarks/coremark/build.sh b/tests/benchmarks/coremark/build.sh index 14c179ce5..ecada10d4 100755 --- a/tests/benchmarks/coremark/build.sh +++ b/tests/benchmarks/coremark/build.sh @@ -3,6 +3,8 @@ # Copyright (C) 2019 Intel Corporation. All rights reserved. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +PLATFORM=$(uname -s | tr A-Z a-z) + WAMRC="../../../wamr-compiler/build/wamrc" if [ ! -d coremark ]; then @@ -32,4 +34,9 @@ cd .. echo "Compile coremark.wasm to coremark.aot .." ${WAMRC} -o coremark.aot coremark.wasm +if [[ ${PLATFORM} == "linux" ]]; then + echo "Compile coremark.wasm to coremark_segue.aot .." + ${WAMRC} --enable-segue -o coremark_segue.aot coremark.wasm +fi + echo "Done" diff --git a/tests/benchmarks/coremark/run.sh b/tests/benchmarks/coremark/run.sh index a1ea7f6b7..0d308bb68 100755 --- a/tests/benchmarks/coremark/run.sh +++ b/tests/benchmarks/coremark/run.sh @@ -3,14 +3,21 @@ # Copyright (C) 2019 Intel Corporation. All rights reserved. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -IWASM="../../../product-mini/platforms/linux/build/iwasm" +PLATFORM=$(uname -s | tr A-Z a-z) + +IWASM="../../../product-mini/platforms/${PLATFORM}/build/iwasm" WAMRC="../../../wamr-compiler/build/wamrc" echo "Run coremark with native .." ./coremark.exe -echo "Run coremark with iwasm mode .." +echo "Run coremark with iwasm aot mode .." ${IWASM} coremark.aot -echo "Run coremakr with iwasm interpreter .." +if [[ ${PLATFORM} == "linux" ]]; then + echo "Run coremark with iwasm aot-segue mode .." + ${IWASM} coremark_segue.aot +fi + +echo "Run coremark with iwasm interpreter mode .." ${IWASM} coremark.wasm diff --git a/tests/benchmarks/coremark/test_pgo.sh b/tests/benchmarks/coremark/test_pgo.sh new file mode 100755 index 000000000..1c631312e --- /dev/null +++ b/tests/benchmarks/coremark/test_pgo.sh @@ -0,0 +1,55 @@ +#!/bin/sh + +# Copyright (C) 2019 Intel Corporation. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +PLATFORM=$(uname -s | tr A-Z a-z) + +if [ "$1" = "--sgx" ] && [ "$PLATFORM" = "linux" ]; then + IWASM="../../../product-mini/platforms/${PLATFORM}-sgx/enclave-sample/iwasm" + WAMRC="../../../wamr-compiler/build/wamrc -sgx" +else + IWASM="../../../product-mini/platforms/${PLATFORM}/build/iwasm" + WAMRC="../../../wamr-compiler/build/wamrc" +fi + +if [ ! -e "coremark.wasm" ]; then + echo "coremark.wasm doesn't exist, please run build.sh first" + exit +fi + +echo "" +echo "Compile coremark.wasm to coremark.aot .." +${WAMRC} -o coremark.aot coremark.wasm + +echo "" +echo "Compile coremark.wasm to coremark_pgo.aot .." +${WAMRC} --enable-llvm-pgo -o coremark_pgo.aot coremark.wasm + +echo "" +echo "Run coremark_pgo.aot to generate the raw profile data .." +${IWASM} --gen-prof-file=coremark.profraw coremark_pgo.aot + +echo "" +echo "Merge the raw profile data to coremark.profdata .." +rm -f coremark.profdata && llvm-profdata merge -output=coremark.profdata coremark.profraw + +echo "" +echo "Compile coremark.wasm to coremark_opt.aot with the profile data .." +${WAMRC} --use-prof-file=coremark.profdata -o coremark_opt.aot coremark.wasm + +echo "" +echo "Run the coremark native" +./coremark.exe + +echo "" +echo "Run the original aot file coremark.aot" +${IWASM} coremark.aot + +echo "" +echo "Run the PGO optimized aot file coremark_opt.aot" +${IWASM} coremark_opt.aot + +# Show the profile data: +# llvm-profdata show --all-functions --detailed-summary --binary-ids --counts \ +# --hot-func-list --memop-sizes --show-prof-sym-list coremark.profraw diff --git a/tests/benchmarks/dhrystone/LICENSE b/tests/benchmarks/dhrystone/LICENSE new file mode 100644 index 000000000..9b3a7b2ce --- /dev/null +++ b/tests/benchmarks/dhrystone/LICENSE @@ -0,0 +1,7 @@ +Dhrystone +------------------------------------------------------------------------------ +There is no explicit license defined. They were originally +written in ADA by Reinhold P. Weicker and translated to C by Rick Richardson . + +The source obtained from the following site: +https://fossies.org/linux/privat/old/dhrystone-2.1.tar.gz diff --git a/tests/benchmarks/dhrystone/build.sh b/tests/benchmarks/dhrystone/build.sh new file mode 100755 index 000000000..eea33d584 --- /dev/null +++ b/tests/benchmarks/dhrystone/build.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +# Copyright (C) 2019 Intel Corporation. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +PLATFORM=$(uname -s | tr A-Z a-z) + +WAMRC_CMD=$PWD/../../../wamr-compiler/build/wamrc + +echo "===> compile dhrystone src to dhrystone_native" +gcc -O3 -o dhrystone_native src/dhry_1.c src/dhry_2.c -I include + +echo "===> compile dhrystone src to dhrystone.wasm" +/opt/wasi-sdk/bin/clang -O3 \ + -o dhrystone.wasm src/dhry_1.c src/dhry_2.c -I include \ + -Wl,--export=__heap_base -Wl,--export=__data_end + +echo "===> compile dhrystone.wasm to dhrystone.aot" +${WAMRC_CMD} -o dhrystone.aot dhrystone.wasm + +if [[ ${PLATFORM} == "linux" ]]; then + echo "===> compile dhrystone.wasm to dhrystone_segue.aot" + ${WAMRC_CMD} --enable-segue -o dhrystone_segue.aot dhrystone.wasm +fi diff --git a/tests/benchmarks/dhrystone/include/dhry.h b/tests/benchmarks/dhrystone/include/dhry.h new file mode 100644 index 000000000..0eb5ec64c --- /dev/null +++ b/tests/benchmarks/dhrystone/include/dhry.h @@ -0,0 +1,306 @@ +/* + ************************************************************************** + * DHRYSTONE 2.1 BENCHMARK PC VERSION + ************************************************************************** + * + * "DHRYSTONE" Benchmark Program + * ----------------------------- + * + * Version: C, Version 2.1 + * + * File: dhry.h (part 1 of 3) + * + * Date: May 25, 1988 + * + * Author: Reinhold P. Weicker + * Siemens AG, AUT E 51 + * Postfach 3220 + * 8520 Erlangen + * Germany (West) + * Phone: [+49]-9131-7-20330 + * (8-17 Central European Time) + * Usenet: ..!mcsun!unido!estevax!weicker + * + * Original Version (in Ada) published in + * "Communications of the ACM" vol. 27., no. 10 (Oct. 1984), + * pp. 1013 - 1030, together with the statistics + * on which the distribution of statements etc. is based. + * + * In this C version, the following C library functions are used: + * - strcpy, strcmp (inside the measurement loop) + * - printf, scanf (outside the measurement loop) + * In addition, Berkeley UNIX system calls "times ()" or "time ()" + * are used for execution time measurement. For measurements + * on other systems, these calls have to be changed. + * + * Collection of Results: + * Reinhold Weicker (address see above) and + * + * Rick Richardson + * PC Research. Inc. + * 94 Apple Orchard Drive + * Tinton Falls, NJ 07724 + * Phone: (201) 389-8963 (9-17 EST) + * Usenet: ...!uunet!pcrat!rick + * + * Please send results to Rick Richardson and/or Reinhold Weicker. + * Complete information should be given on hardware and software used. + * Hardware information includes: Machine type, CPU, type and size + * of caches; for microprocessors: clock frequency, memory speed + * (number of wait states). + * Software information includes: Compiler (and runtime library) + * manufacturer and version, compilation switches, OS version. + * The Operating System version may give an indication about the + * compiler; Dhrystone itself performs no OS calls in the measurement + * loop. + * + * The complete output generated by the program should be mailed + * such that at least some checks for correctness can be made. + * + ************************************************************************** + * + * This version has changes made by Roy Longbottom to conform to a common + * format for a series of standard benchmarks for PCs: + * + * Running time greater than 5 seconds due to inaccuracy of the PC clock. + * + * Automatic adjustment of run time, no manually inserted parameters. + * + * Initial display of calibration times to confirm linearity. + * + * Display of results within one screen (or at a slow speed as the test + * progresses) so that it can be seen to have run successfully. + * + * Facilities to type in details of system used etc. + * + * All results and details appended to a results file. + * + * + * Roy Longbottom + * 101323.2241@compuserve.com + * + ************************************************************************** + * + * For details of history, changes, other defines, benchmark construction + * statistics see official versions from ftp.nosc.mil/pub/aburto where + * the latest table of results (dhry.tbl) are available. See also + * netlib@ornl.gov + * + ************************************************************************** + * + * Defines: The following "Defines" are possible: + * -DREG=register (default: Not defined) + * As an approximation to what an average C programmer + * might do, the "register" storage class is applied + * (if enabled by -DREG=register) + * - for local variables, if they are used (dynamically) + * five or more times + * - for parameters if they are used (dynamically) + * six or more times + * Note that an optimal "register" strategy is + * compiler-dependent, and that "register" declarations + * do not necessarily lead to faster execution. + * -DNOSTRUCTASSIGN (default: Not defined) + * Define if the C compiler does not support + * assignment of structures. + * -DNOENUMS (default: Not defined) + * Define if the C compiler does not support + * enumeration types. + *************************************************************************** + * + * Compilation model and measurement (IMPORTANT): + * + * This C version of Dhrystone consists of three files: + * - dhry.h (this file, containing global definitions and comments) + * - dhry_1.c (containing the code corresponding to Ada package Pack_1) + * - dhry_2.c (containing the code corresponding to Ada package Pack_2) + * + * The following "ground rules" apply for measurements: + * - Separate compilation + * - No procedure merging + * - Otherwise, compiler optimizations are allowed but should be indicated + * - Default results are those without register declarations + * See the companion paper "Rationale for Dhrystone Version 2" for a more + * detailed discussion of these ground rules. + * + * For 16-Bit processors (e.g. 80186, 80286), times for all compilation + * models ("small", "medium", "large" etc.) should be given if possible, + * together with a definition of these models for the compiler system used. + * + ************************************************************************** + * Examples of Pentium Results + * + * Dhrystone Benchmark Version 2.1 (Language: C) + * + * Month run 4/1996 + * PC model Escom + * CPU Pentium + * Clock MHz 100 + * Cache 256K + * Options Neptune chipset + * OS/DOS Windows 95 + * Compiler Watcom C/ C++ 10.5 Win386 + * OptLevel -otexan -zp8 -fp5 -5r + * Run by Roy Longbottom + * From UK + * Mail 101323.2241@compuserve.com + * + * Final values (* implementation-dependent): + * + * Int_Glob: O.K. 5 + * Bool_Glob: O.K. 1 + * Ch_1_Glob: O.K. A + * Ch_2_Glob: O.K. B + * Arr_1_Glob[8]: O.K. 7 + * Arr_2_Glob8/7: O.K. 1600010 + * Ptr_Glob-> + * Ptr_Comp: * 98008 + * Discr: O.K. 0 + * Enum_Comp: O.K. 2 + * Int_Comp: O.K. 17 + * Str_Comp: O.K. DHRYSTONE PROGRAM, SOME STRING + * Next_Ptr_Glob-> + * Ptr_Comp: * 98008 same as above + * Discr: O.K. 0 + * Enum_Comp: O.K. 1 + * Int_Comp: O.K. 18 + * Str_Comp: O.K. DHRYSTONE PROGRAM, SOME STRING + * Int_1_Loc: O.K. 5 + * Int_2_Loc: O.K. 13 + * Int_3_Loc: O.K. 7 + * Enum_Loc: O.K. 1 + * Str_1_Loc: O.K. DHRYSTONE PROGRAM, 1'ST STRING + * Str_2_Loc: O.K. DHRYSTONE PROGRAM, 2'ND STRING + * + * Register option Selected. + * + * Microseconds 1 loop: 4.53 + * Dhrystones / second: 220690 + * VAX MIPS rating: 125.61 + * + * + * Dhrystone Benchmark Version 2.1 (Language: C) + * + * Month run 4/1996 + * PC model Escom + * CPU Pentium + * Clock MHz 100 + * Cache 256K + * Options Neptune chipset + * OS/DOS Windows 95 + * Compiler Watcom C/ C++ 10.5 Win386 + * OptLevel No optimisation + * Run by Roy Longbottom + * From UK + * Mail 101323.2241@compuserve.com + * + * Final values (* implementation-dependent): + * + * Int_Glob: O.K. 5 + * Bool_Glob: O.K. 1 + * Ch_1_Glob: O.K. A + * Ch_2_Glob: O.K. B + * Arr_1_Glob[8]: O.K. 7 + * Arr_2_Glob8/7: O.K. 320010 + * Ptr_Glob-> + * Ptr_Comp: * 98004 + * Discr: O.K. 0 + * Enum_Comp: O.K. 2 + * Int_Comp: O.K. 17 + * Str_Comp: O.K. DHRYSTONE PROGRAM, SOME STRING + * Next_Ptr_Glob-> + * Ptr_Comp: * 98004 same as above + * Discr: O.K. 0 + * Enum_Comp: O.K. 1 + * Int_Comp: O.K. 18 + * Str_Comp: O.K. DHRYSTONE PROGRAM, SOME STRING + * Int_1_Loc: O.K. 5 + * Int_2_Loc: O.K. 13 + * Int_3_Loc: O.K. 7 + * Enum_Loc: O.K. 1 + * Str_1_Loc: O.K. DHRYSTONE PROGRAM, 1'ST STRING + * Str_2_Loc: O.K. DHRYSTONE PROGRAM, 2'ND STRING + * + * Register option Not selected. + * + * Microseconds 1 loop: 20.06 + * Dhrystones / second: 49844 + * VAX MIPS rating: 28.37 + * + ************************************************************************** + */ + +/* Compiler and system dependent definitions: */ + +#ifndef TIME +#define TIMES +#endif +/* Use times(2) time function unless */ +/* explicitly defined otherwise */ + +#ifdef TIMES +/* #include + #include */ +/* for "times" */ +#endif + +#define Mic_secs_Per_Second 1000000.0 +/* Berkeley UNIX C returns process times in seconds/HZ */ + +#ifdef NOSTRUCTASSIGN +#define structassign(d, s) memcpy(&(d), &(s), sizeof(d)) +#else +#define structassign(d, s) d = s +#endif + +#ifdef NOENUM +#define Ident_1 0 +#define Ident_2 1 +#define Ident_3 2 +#define Ident_4 3 +#define Ident_5 4 +typedef int Enumeration; +#else +typedef enum { Ident_1, Ident_2, Ident_3, Ident_4, Ident_5 } Enumeration; +#endif +/* for boolean and enumeration types in Ada, Pascal */ + +/* General definitions: */ + +#include +#include + +/* for strcpy, strcmp */ + +#define Null 0 +/* Value of a Null pointer */ +#define true 1 +#define false 0 + +typedef int One_Thirty; +typedef int One_Fifty; +typedef char Capital_Letter; +typedef int Boolean; +typedef char Str_30[31]; +typedef int Arr_1_Dim[50]; +typedef int Arr_2_Dim[50][50]; + +typedef struct record { + struct record *Ptr_Comp; + Enumeration Discr; + union { + struct { + Enumeration Enum_Comp; + int Int_Comp; + char Str_Comp[31]; + } var_1; + struct { + Enumeration E_Comp_2; + char Str_2_Comp[31]; + } var_2; + struct { + char Ch_1_Comp; + char Ch_2_Comp; + } var_3; + } variant; +} Rec_Type, *Rec_Pointer; diff --git a/tests/benchmarks/dhrystone/run.sh b/tests/benchmarks/dhrystone/run.sh new file mode 100755 index 000000000..a9ac1d0b5 --- /dev/null +++ b/tests/benchmarks/dhrystone/run.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +# Copyright (C) 2019 Intel Corporation. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +PLATFORM=$(uname -s | tr A-Z a-z) + +readonly IWASM_CMD="../../../product-mini/platforms/${PLATFORM}/build/iwasm" + +echo "============> run dhrystone native" +./dhrystone_native + +echo "============> run dhrystone.aot" +${IWASM_CMD} dhrystone.aot + +if [[ ${PLATFORM} == "linux" ]]; then + echo "============> run dhrystone_segue.aot" + ${IWASM_CMD} dhrystone_segue.aot +fi diff --git a/tests/benchmarks/dhrystone/src/dhry_1.c b/tests/benchmarks/dhrystone/src/dhry_1.c new file mode 100644 index 000000000..92f6e7e85 --- /dev/null +++ b/tests/benchmarks/dhrystone/src/dhry_1.c @@ -0,0 +1,485 @@ +/* + ************************************************************************* + * + * "DHRYSTONE" Benchmark Program + * ----------------------------- + * + * Version: C, Version 2.1 + * + * File: dhry_1.c (part 2 of 3) + * + * Date: May 25, 1988 + * + * Author: Reinhold P. Weicker + * + ************************************************************************* + */ + +#include +#include +#include +#include "dhry.h" + +/* Global Variables: */ + +Rec_Pointer Ptr_Glob, Next_Ptr_Glob; +int Int_Glob; +Boolean Bool_Glob; +char Ch_1_Glob, Ch_2_Glob; +int Arr_1_Glob[50]; +int Arr_2_Glob[50][50]; + +Enumeration +Func_1(Capital_Letter Ch_1_Par_Val, Capital_Letter Ch_2_Par_Val); +/* +forward declaration necessary since Enumeration may not simply be int +*/ + +#ifndef ROPT +#define REG +/* REG becomes defined as empty */ +/* i.e. no register variables */ +#else +#define REG register +#endif + +void +Proc_1(REG Rec_Pointer Ptr_Val_Par); +void +Proc_2(One_Fifty *Int_Par_Ref); +void +Proc_3(Rec_Pointer *Ptr_Ref_Par); +void +Proc_4(); +void +Proc_5(); +void +Proc_6(Enumeration Enum_Val_Par, Enumeration *Enum_Ref_Par); +void +Proc_7(One_Fifty Int_1_Par_Val, One_Fifty Int_2_Par_Val, + One_Fifty *Int_Par_Ref); +void +Proc_8(Arr_1_Dim Arr_1_Par_Ref, Arr_2_Dim Arr_2_Par_Ref, int Int_1_Par_Val, + int Int_2_Par_Val); + +Boolean +Func_2(Str_30 Str_1_Par_Ref, Str_30 Str_2_Par_Ref); + +/* variables for time measurement: */ + +#define Too_Small_Time 2 +/* Measurements should last at least 2 seconds */ + +#define BILLION 1000000000L +#define MILLION 1000000 +struct timespec Begin_Time, End_Time; +double User_Time; + +double Microseconds, Dhrystones_Per_Second, Vax_Mips; + +/* end of variables for time measurement */ + +int +main(int argc, char *argv[]) +/*****/ + +/* main program, corresponds to procedures */ +/* Main and Proc_0 in the Ada version */ +{ + One_Fifty Int_1_Loc; + REG One_Fifty Int_2_Loc; + One_Fifty Int_3_Loc; + REG char Ch_Index; + Enumeration Enum_Loc; + Str_30 Str_1_Loc; + Str_30 Str_2_Loc; + REG int Run_Index; + REG int Number_Of_Runs; + int endit, count = 10; + char general[9][80] = { " " }; + + /*********************************************************************** + * Change for compiler and optimisation used * + ***********************************************************************/ + + Next_Ptr_Glob = (Rec_Pointer)malloc(sizeof(Rec_Type)); + Ptr_Glob = (Rec_Pointer)malloc(sizeof(Rec_Type)); + + Ptr_Glob->Ptr_Comp = Next_Ptr_Glob; + Ptr_Glob->Discr = Ident_1; + Ptr_Glob->variant.var_1.Enum_Comp = Ident_3; + Ptr_Glob->variant.var_1.Int_Comp = 40; + strcpy(Ptr_Glob->variant.var_1.Str_Comp, "DHRYSTONE PROGRAM, SOME STRING"); + strcpy(Str_1_Loc, "DHRYSTONE PROGRAM, 1'ST STRING"); + + Arr_2_Glob[8][7] = 10; + /* Was missing in published program. Without this statement, */ + /* Arr_2_Glob [8][7] would have an undefined value. */ + /* Warning: With 16-Bit processors and Number_Of_Runs > 32000, */ + /* overflow may occur for this array element. */ + + printf("\n"); + printf("Dhrystone Benchmark, Version 2.1 (Language: C or C++)\n"); + printf("\n"); + + Number_Of_Runs = 5000; + + do { + + Number_Of_Runs = Number_Of_Runs * 2; + count = count - 1; + Arr_2_Glob[8][7] = 10; + + /***************/ + /* Start timer */ + /***************/ + + clock_gettime(CLOCK_MONOTONIC, &Begin_Time); + + for (Run_Index = 1; Run_Index <= Number_Of_Runs; ++Run_Index) { + + Proc_5(); + Proc_4(); + /* Ch_1_Glob == 'A', Ch_2_Glob == 'B', Bool_Glob == true */ + Int_1_Loc = 2; + Int_2_Loc = 3; + strcpy(Str_2_Loc, "DHRYSTONE PROGRAM, 2'ND STRING"); + Enum_Loc = Ident_2; + Bool_Glob = !Func_2(Str_1_Loc, Str_2_Loc); + /* Bool_Glob == 1 */ + while (Int_1_Loc < Int_2_Loc) /* loop body executed once */ + { + Int_3_Loc = 5 * Int_1_Loc - Int_2_Loc; + /* Int_3_Loc == 7 */ + Proc_7(Int_1_Loc, Int_2_Loc, &Int_3_Loc); + /* Int_3_Loc == 7 */ + Int_1_Loc += 1; + } /* while */ + /* Int_1_Loc == 3, Int_2_Loc == 3, Int_3_Loc == 7 */ + Proc_8(Arr_1_Glob, Arr_2_Glob, Int_1_Loc, Int_3_Loc); + /* Int_Glob == 5 */ + Proc_1(Ptr_Glob); + for (Ch_Index = 'A'; Ch_Index <= Ch_2_Glob; ++Ch_Index) + /* loop body executed twice */ + { + if (Enum_Loc == Func_1(Ch_Index, 'C')) + /* then, not executed */ + { + Proc_6(Ident_1, &Enum_Loc); + strcpy(Str_2_Loc, "DHRYSTONE PROGRAM, 3'RD STRING"); + Int_2_Loc = Run_Index; + Int_Glob = Run_Index; + } + } + /* Int_1_Loc == 3, Int_2_Loc == 3, Int_3_Loc == 7 */ + Int_2_Loc = Int_2_Loc * Int_1_Loc; + Int_1_Loc = Int_2_Loc / Int_3_Loc; + Int_2_Loc = 7 * (Int_2_Loc - Int_3_Loc) - Int_1_Loc; + /* Int_1_Loc == 1, Int_2_Loc == 13, Int_3_Loc == 7 */ + Proc_2(&Int_1_Loc); + /* Int_1_Loc == 5 */ + + } /* loop "for Run_Index" */ + + /**************/ + /* Stop timer */ + /**************/ + + clock_gettime(CLOCK_MONOTONIC, &End_Time); + + User_Time = (End_Time.tv_sec - Begin_Time.tv_sec) * MILLION + + (End_Time.tv_nsec - Begin_Time.tv_nsec) / 1000; + User_Time = User_Time / MILLION; /* convert to seconds */ + + printf("%ld runs %lf seconds \n", (long)Number_Of_Runs, User_Time); + if (User_Time > 5.0) { + count = 0; + } + else { + if (User_Time < 0.1) { + Number_Of_Runs = Number_Of_Runs * 5; + } + } + } /* calibrate/run do while */ + while (count > 0); + + printf("\n"); + printf("Final values (* implementation-dependent):\n"); + printf("\n"); + printf("Int_Glob: "); + if (Int_Glob == 5) + printf("O.K. "); + else + printf("WRONG "); + printf("%d ", Int_Glob); + + printf("Bool_Glob: "); + if (Bool_Glob == 1) + printf("O.K. "); + else + printf("WRONG "); + printf("%d\n", Bool_Glob); + + printf("Ch_1_Glob: "); + if (Ch_1_Glob == 'A') + printf("O.K. "); + else + printf("WRONG "); + printf("%c ", Ch_1_Glob); + + printf("Ch_2_Glob: "); + if (Ch_2_Glob == 'B') + printf("O.K. "); + else + printf("WRONG "); + printf("%c\n", Ch_2_Glob); + + printf("Arr_1_Glob[8]: "); + if (Arr_1_Glob[8] == 7) + printf("O.K. "); + else + printf("WRONG "); + printf("%d ", Arr_1_Glob[8]); + + printf("Arr_2_Glob8/7: "); + if (Arr_2_Glob[8][7] == Number_Of_Runs + 10) + printf("O.K. "); + else + printf("WRONG "); + printf("%10d\n", Arr_2_Glob[8][7]); + + printf("Ptr_Glob-> "); + printf(" Ptr_Comp: * %p\n", Ptr_Glob->Ptr_Comp); + + printf(" Discr: "); + if (Ptr_Glob->Discr == 0) + printf("O.K. "); + else + printf("WRONG "); + printf("%d ", Ptr_Glob->Discr); + + printf("Enum_Comp: "); + if (Ptr_Glob->variant.var_1.Enum_Comp == 2) + printf("O.K. "); + else + printf("WRONG "); + printf("%d\n", Ptr_Glob->variant.var_1.Enum_Comp); + + printf(" Int_Comp: "); + if (Ptr_Glob->variant.var_1.Int_Comp == 17) + printf("O.K. "); + else + printf("WRONG "); + printf("%d ", Ptr_Glob->variant.var_1.Int_Comp); + + printf("Str_Comp: "); + if (strcmp(Ptr_Glob->variant.var_1.Str_Comp, + "DHRYSTONE PROGRAM, SOME STRING") + == 0) + printf("O.K. "); + else + printf("WRONG "); + printf("%s\n", Ptr_Glob->variant.var_1.Str_Comp); + + printf("Next_Ptr_Glob-> "); + printf(" Ptr_Comp: * %p", Next_Ptr_Glob->Ptr_Comp); + printf(" same as above\n"); + + printf(" Discr: "); + if (Next_Ptr_Glob->Discr == 0) + printf("O.K. "); + else + printf("WRONG "); + printf("%d ", Next_Ptr_Glob->Discr); + + printf("Enum_Comp: "); + if (Next_Ptr_Glob->variant.var_1.Enum_Comp == 1) + printf("O.K. "); + else + printf("WRONG "); + printf("%d\n", Next_Ptr_Glob->variant.var_1.Enum_Comp); + + printf(" Int_Comp: "); + if (Next_Ptr_Glob->variant.var_1.Int_Comp == 18) + printf("O.K. "); + else + printf("WRONG "); + printf("%d ", Next_Ptr_Glob->variant.var_1.Int_Comp); + + printf("Str_Comp: "); + if (strcmp(Next_Ptr_Glob->variant.var_1.Str_Comp, + "DHRYSTONE PROGRAM, SOME STRING") + == 0) + printf("O.K. "); + else + printf("WRONG "); + printf("%s\n", Next_Ptr_Glob->variant.var_1.Str_Comp); + + printf("Int_1_Loc: "); + if (Int_1_Loc == 5) + printf("O.K. "); + else + printf("WRONG "); + printf("%d ", Int_1_Loc); + + printf("Int_2_Loc: "); + if (Int_2_Loc == 13) + printf("O.K. "); + else + printf("WRONG "); + printf("%d\n", Int_2_Loc); + + printf("Int_3_Loc: "); + if (Int_3_Loc == 7) + printf("O.K. "); + else + printf("WRONG "); + printf("%d ", Int_3_Loc); + + printf("Enum_Loc: "); + if (Enum_Loc == 1) + printf("O.K. "); + else + printf("WRONG "); + printf("%d\n", Enum_Loc); + + printf("Str_1_Loc: "); + if (strcmp(Str_1_Loc, "DHRYSTONE PROGRAM, 1'ST STRING") == 0) + printf("O.K. "); + else + printf("WRONG "); + printf("%s\n", Str_1_Loc); + + printf("Str_2_Loc: "); + if (strcmp(Str_2_Loc, "DHRYSTONE PROGRAM, 2'ND STRING") == 0) + printf("O.K. "); + else + printf("WRONG "); + printf("%s\n", Str_2_Loc); + + printf("\n"); + + if (User_Time < Too_Small_Time) { + printf("Measured time too small to obtain meaningful results\n"); + printf("Please increase number of runs\n"); + printf("\n"); + } + else { + Microseconds = User_Time * Mic_secs_Per_Second / (double)Number_Of_Runs; + Dhrystones_Per_Second = (double)Number_Of_Runs / User_Time; + Vax_Mips = Dhrystones_Per_Second / 1757.0; + + printf("Microseconds for one run through Dhrystone: "); + printf("%lf \n", Microseconds); + printf("Dhrystones per Second: "); + printf("%lf \n", Dhrystones_Per_Second); + printf("VAX MIPS rating = "); + printf("%lf \n", Vax_Mips); + printf("\n"); + } + + free(Next_Ptr_Glob); + free(Ptr_Glob); + return 1; +} + +void +Proc_1(REG Rec_Pointer Ptr_Val_Par) +/******************/ + +/* executed once */ +{ + REG Rec_Pointer Next_Record = Ptr_Val_Par->Ptr_Comp; + /* == Ptr_Glob_Next */ + /* Local variable, initialized with Ptr_Val_Par->Ptr_Comp, */ + /* corresponds to "rename" in Ada, "with" in Pascal */ + + structassign(*Ptr_Val_Par->Ptr_Comp, *Ptr_Glob); + Ptr_Val_Par->variant.var_1.Int_Comp = 5; + Next_Record->variant.var_1.Int_Comp = Ptr_Val_Par->variant.var_1.Int_Comp; + Next_Record->Ptr_Comp = Ptr_Val_Par->Ptr_Comp; + Proc_3(&Next_Record->Ptr_Comp); + /* Ptr_Val_Par->Ptr_Comp->Ptr_Comp + == Ptr_Glob->Ptr_Comp */ + if (Next_Record->Discr == Ident_1) + /* then, executed */ + { + Next_Record->variant.var_1.Int_Comp = 6; + Proc_6(Ptr_Val_Par->variant.var_1.Enum_Comp, + &Next_Record->variant.var_1.Enum_Comp); + Next_Record->Ptr_Comp = Ptr_Glob->Ptr_Comp; + Proc_7(Next_Record->variant.var_1.Int_Comp, 10, + &Next_Record->variant.var_1.Int_Comp); + } + else { /* not executed */ + structassign(*Ptr_Val_Par, *Ptr_Val_Par->Ptr_Comp); + } +} /* Proc_1 */ + +void +Proc_2(One_Fifty *Int_Par_Ref) +/******************/ +/* executed once */ +/* *Int_Par_Ref == 1, becomes 4 */ + +{ + One_Fifty Int_Loc; + Enumeration Enum_Loc; + + Int_Loc = *Int_Par_Ref + 10; + do /* executed once */ + if (Ch_1_Glob == 'A') + /* then, executed */ + { + Int_Loc -= 1; + *Int_Par_Ref = Int_Loc - Int_Glob; + Enum_Loc = Ident_1; + } /* if */ + while (Enum_Loc != Ident_1); /* true */ +} /* Proc_2 */ + +void +Proc_3(Rec_Pointer *Ptr_Ref_Par) +/******************/ +/* executed once */ +/* Ptr_Ref_Par becomes Ptr_Glob */ + +{ + if (Ptr_Glob != Null) + /* then, executed */ + *Ptr_Ref_Par = Ptr_Glob->Ptr_Comp; + Proc_7(10, Int_Glob, &Ptr_Glob->variant.var_1.Int_Comp); +} /* Proc_3 */ + +void +Proc_4() /* without parameters */ +/*******/ +/* executed once */ +{ + Boolean Bool_Loc; + + Bool_Loc = Ch_1_Glob == 'A'; + Bool_Glob = Bool_Loc | Bool_Glob; + Ch_2_Glob = 'B'; +} /* Proc_4 */ + +void +Proc_5() /* without parameters */ +/*******/ +/* executed once */ +{ + Ch_1_Glob = 'A'; + Bool_Glob = false; +} /* Proc_5 */ + +/* Procedure for the assignment of structures, */ +/* if the C compiler doesn't support this feature */ +#ifdef NOSTRUCTASSIGN +memcpy(d, s, l) register char *d; +register char *s; +register int l; +{ + while (l--) + *d++ = *s++; +} +#endif diff --git a/tests/benchmarks/dhrystone/src/dhry_2.c b/tests/benchmarks/dhrystone/src/dhry_2.c new file mode 100644 index 000000000..5378799eb --- /dev/null +++ b/tests/benchmarks/dhrystone/src/dhry_2.c @@ -0,0 +1,187 @@ +/* + ************************************************************************* + * + * "DHRYSTONE" Benchmark Program + * ----------------------------- + * + * Version: C, Version 2.1 + * + * File: dhry_2.c (part 3 of 3) + * + * Date: May 25, 1988 + * + * Author: Reinhold P. Weicker + * + ************************************************************************* + */ + +#include "dhry.h" + +#ifndef REG +#define REG +/* REG becomes defined as empty */ +/* i.e. no register variables */ +#else +#define REG register +#endif + +extern int Int_Glob; +extern char Ch_1_Glob; + +Boolean +Func_3(Enumeration Enum_Par_Val); + +void +Proc_6(Enumeration Enum_Val_Par, Enumeration *Enum_Ref_Par) +/*********************************/ +/* executed once */ +/* Enum_Val_Par == Ident_3, Enum_Ref_Par becomes Ident_2 */ + +{ + *Enum_Ref_Par = Enum_Val_Par; + if (!Func_3(Enum_Val_Par)) + /* then, not executed */ + *Enum_Ref_Par = Ident_4; + switch (Enum_Val_Par) { + case Ident_1: + *Enum_Ref_Par = Ident_1; + break; + case Ident_2: + if (Int_Glob > 100) + /* then */ + *Enum_Ref_Par = Ident_1; + else + *Enum_Ref_Par = Ident_4; + break; + case Ident_3: /* executed */ + *Enum_Ref_Par = Ident_2; + break; + case Ident_4: + break; + case Ident_5: + *Enum_Ref_Par = Ident_3; + break; + } /* switch */ +} /* Proc_6 */ + +void +Proc_7(One_Fifty Int_1_Par_Val, One_Fifty Int_2_Par_Val, One_Fifty *Int_Par_Ref) +/**********************************************/ +/* executed three times */ +/* first call: Int_1_Par_Val == 2, Int_2_Par_Val == 3, */ +/* Int_Par_Ref becomes 7 */ +/* second call: Int_1_Par_Val == 10, Int_2_Par_Val == 5, */ +/* Int_Par_Ref becomes 17 */ +/* third call: Int_1_Par_Val == 6, Int_2_Par_Val == 10, */ +/* Int_Par_Ref becomes 18 */ + +{ + One_Fifty Int_Loc; + + Int_Loc = Int_1_Par_Val + 2; + *Int_Par_Ref = Int_2_Par_Val + Int_Loc; +} /* Proc_7 */ + +void +Proc_8(Arr_1_Dim Arr_1_Par_Ref, Arr_2_Dim Arr_2_Par_Ref, int Int_1_Par_Val, + int Int_2_Par_Val) +/*********************************************************************/ +/* executed once */ +/* Int_Par_Val_1 == 3 */ +/* Int_Par_Val_2 == 7 */ + +{ + REG One_Fifty Int_Index; + REG One_Fifty Int_Loc; + + Int_Loc = Int_1_Par_Val + 5; + Arr_1_Par_Ref[Int_Loc] = Int_2_Par_Val; + Arr_1_Par_Ref[Int_Loc + 1] = Arr_1_Par_Ref[Int_Loc]; + Arr_1_Par_Ref[Int_Loc + 30] = Int_Loc; + for (Int_Index = Int_Loc; Int_Index <= Int_Loc + 1; ++Int_Index) + Arr_2_Par_Ref[Int_Loc][Int_Index] = Int_Loc; + Arr_2_Par_Ref[Int_Loc][Int_Loc - 1] += 1; + Arr_2_Par_Ref[Int_Loc + 20][Int_Loc] = Arr_1_Par_Ref[Int_Loc]; + Int_Glob = 5; +} /* Proc_8 */ + +Enumeration +Func_1(Capital_Letter Ch_1_Par_Val, Capital_Letter Ch_2_Par_Val) +/*************************************************/ +/* executed three times */ +/* first call: Ch_1_Par_Val == 'H', Ch_2_Par_Val == 'R' */ +/* second call: Ch_1_Par_Val == 'A', Ch_2_Par_Val == 'C' */ +/* third call: Ch_1_Par_Val == 'B', Ch_2_Par_Val == 'C' */ + +{ + Capital_Letter Ch_1_Loc; + Capital_Letter Ch_2_Loc; + + Ch_1_Loc = Ch_1_Par_Val; + Ch_2_Loc = Ch_1_Loc; + if (Ch_2_Loc != Ch_2_Par_Val) + /* then, executed */ + return (Ident_1); + else /* not executed */ + { + Ch_1_Glob = Ch_1_Loc; + return (Ident_2); + } +} /* Func_1 */ + +Boolean +Func_2(Str_30 Str_1_Par_Ref, Str_30 Str_2_Par_Ref) +/*************************************************/ +/* executed once */ +/* Str_1_Par_Ref == "DHRYSTONE PROGRAM, 1'ST STRING" */ +/* Str_2_Par_Ref == "DHRYSTONE PROGRAM, 2'ND STRING" */ + +{ + REG One_Thirty Int_Loc; + Capital_Letter Ch_Loc; + + Int_Loc = 2; + while (Int_Loc <= 2) /* loop body executed once */ + if (Func_1(Str_1_Par_Ref[Int_Loc], Str_2_Par_Ref[Int_Loc + 1]) + == Ident_1) + /* then, executed */ + { + Ch_Loc = 'A'; + Int_Loc += 1; + } /* if, while */ + if (Ch_Loc >= 'W' && Ch_Loc < 'Z') + /* then, not executed */ + Int_Loc = 7; + if (Ch_Loc == 'R') + /* then, not executed */ + return (true); + else /* executed */ + { + if (strcmp(Str_1_Par_Ref, Str_2_Par_Ref) > 0) + /* then, not executed */ + { + Int_Loc += 7; + Int_Glob = Int_Loc; + return (true); + } + else /* executed */ + return (false); + } /* if Ch_Loc */ +} /* Func_2 */ + +Boolean +Func_3(Enumeration Enum_Par_Val) +/***************************/ +/* executed once */ +/* Enum_Par_Val == Ident_3 */ + +{ + Enumeration Enum_Loc; + + Enum_Loc = Enum_Par_Val; + if (Enum_Loc == Ident_3) + /* then, executed */ + return (true); + else /* not executed */ + return (false); +} /* Func_3 */ diff --git a/tests/benchmarks/dhrystone/test_pgo.sh b/tests/benchmarks/dhrystone/test_pgo.sh new file mode 100755 index 000000000..8bca19757 --- /dev/null +++ b/tests/benchmarks/dhrystone/test_pgo.sh @@ -0,0 +1,55 @@ +#!/bin/sh + +# Copyright (C) 2019 Intel Corporation. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +PLATFORM=$(uname -s | tr A-Z a-z) + +if [ "$1" = "--sgx" ] && [ "$PLATFORM" = "linux" ]; then + IWASM="../../../product-mini/platforms/${PLATFORM}-sgx/enclave-sample/iwasm" + WAMRC="../../../wamr-compiler/build/wamrc -sgx" +else + IWASM="../../../product-mini/platforms/${PLATFORM}/build/iwasm" + WAMRC="../../../wamr-compiler/build/wamrc" +fi + +if [ ! -e "dhrystone.wasm" ]; then + echo "dhrystone.wasm doesn't exist, please run build.sh first" + exit +fi + +echo "" +echo "Compile dhrystone.wasm to dhrystone.aot .." +${WAMRC} -o dhrystone.aot dhrystone.wasm + +echo "" +echo "Compile dhrystone.wasm to dhrystone_pgo.aot .." +${WAMRC} --enable-llvm-pgo -o dhrystone_pgo.aot dhrystone.wasm + +echo "" +echo "Run dhrystone_pgo.aot to generate the raw profile data .." +${IWASM} --gen-prof-file=dhrystone.profraw dhrystone_pgo.aot + +echo "" +echo "Merge the raw profile data to dhrystone.profdata .." +rm -f dhrystone.profdata && llvm-profdata merge -output=dhrystone.profdata dhrystone.profraw + +echo "" +echo "Compile dhrystone.wasm to dhrystone_opt.aot with the profile data .." +${WAMRC} --use-prof-file=dhrystone.profdata -o dhrystone_opt.aot dhrystone.wasm + +echo "" +echo "Run the dhrystone native" +./dhrystone_native + +echo "" +echo "Run the original aot file dhrystone.aot" +${IWASM} dhrystone.aot + +echo "" +echo "Run the PGO optimized aot file dhrystone_opt.aot" +${IWASM} dhrystone_opt.aot + +# Show the profile data: +# llvm-profdata show --all-functions --detailed-summary --binary-ids --counts \ +# --hot-func-list --memop-sizes --show-prof-sym-list dhrystone.profraw diff --git a/tests/benchmarks/jetstream/README.md b/tests/benchmarks/jetstream/README.md index f6c593d11..1bf438c2b 100644 --- a/tests/benchmarks/jetstream/README.md +++ b/tests/benchmarks/jetstream/README.md @@ -27,3 +27,9 @@ And then run `./build.sh` to build the source code, the folder `out` will be cre # Running Run `./run_aot.sh` to test the benchmark, the native mode and iwasm aot mode will be tested for each workload, and the file `report.txt` will be generated. + +Run `./test_pgo.sh` to test the benchmark with AOT static PGO (Profile-Guided Optimization) enabled, please refer [here](../README.md#install-llvm-profdata) to install tool `llvm-profdata` and build `iwasm` with `cmake -DWAMR_BUILD_STATIC_PGO=1`. + +- For Linux, build `iwasm` with `cmake -DWAMR_BUILD_STATIC_PGO=1`, then run `./test_pgo.sh` to test the benchmark with AOT static PGO (Profile-Guided Optimization) enabled. + +- For Linux-sgx, similarly, build `iwasm` with `cmake -DWAMR_BUILD_STATIC_PGO=1`, then `make` in the directory `enclave-sample`. And run `./test_pgo.sh --sgx` to test the benchmark. diff --git a/tests/benchmarks/jetstream/build.sh b/tests/benchmarks/jetstream/build.sh index 030b8d3a0..ca8401cda 100755 --- a/tests/benchmarks/jetstream/build.sh +++ b/tests/benchmarks/jetstream/build.sh @@ -3,27 +3,45 @@ # Copyright (C) 2019 Intel Corporation. All rights reserved. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +source /opt/emsdk/emsdk_env.sh + +PLATFORM=$(uname -s | tr A-Z a-z) + OUT_DIR=$PWD/out WAMRC_CMD=$PWD/../../../wamr-compiler/build/wamrc mkdir -p jetstream +mkdir -p tsf-src mkdir -p ${OUT_DIR} +if [[ $1 != "--no-simd" ]];then + NATIVE_SIMD_FLAGS="-msse2 -msse3 -msse4" + WASM_SIMD_FLAGS="-msimd128 -msse2 -msse3 -msse4" +else + NATIVE_SIMD_FLAGS="" + WASM_SIMD_FLAGS="" +fi + cd jetstream echo "Download source files .." -wget https://browserbench.org/JetStream/wasm/gcc-loops.cpp -wget https://browserbench.org/JetStream/wasm/quicksort.c -wget https://browserbench.org/JetStream/wasm/HashSet.cpp -wget https://browserbench.org/JetStream/simple/float-mm.c +wget -N https://browserbench.org/JetStream/wasm/gcc-loops.cpp +wget -N https://browserbench.org/JetStream/wasm/quicksort.c +wget -N https://browserbench.org/JetStream/wasm/HashSet.cpp +wget -N https://browserbench.org/JetStream/simple/float-mm.c -patch -p1 < ../jetstream.patch +if [[ $? != 0 ]]; then + exit +fi + +echo "Patch source files .." +patch -p1 -N < ../jetstream.patch echo "Build gcc-loops with g++ .." -g++ -O3 -msse2 -msse3 -msse4 -o ${OUT_DIR}/gcc-loops_native gcc-loops.cpp +g++ -O3 ${NATIVE_SIMD_FLAGS} -o ${OUT_DIR}/gcc-loops_native gcc-loops.cpp echo "Build gcc-loops with em++ .." -em++ -O3 -s STANDALONE_WASM=1 -msimd128 \ +em++ -O3 -s STANDALONE_WASM=1 ${WASM_SIMD_FLAGS} \ -s INITIAL_MEMORY=1048576 \ -s TOTAL_STACK=32768 \ -s "EXPORTED_FUNCTIONS=['_main']" \ @@ -33,11 +51,16 @@ em++ -O3 -s STANDALONE_WASM=1 -msimd128 \ echo "Compile gcc-loops.wasm to gcc-loops.aot" ${WAMRC_CMD} -o ${OUT_DIR}/gcc-loops.aot ${OUT_DIR}/gcc-loops.wasm +if [[ ${PLATFORM} == "linux" ]]; then + echo "Compile gcc-loops.wasm to gcc-loops_segue.aot" + ${WAMRC_CMD} --enable-segue -o ${OUT_DIR}/gcc-loops_segue.aot ${OUT_DIR}/gcc-loops.wasm +fi + echo "Build quicksort with gcc .." -gcc -O3 -msse2 -msse3 -msse4 -o ${OUT_DIR}/quicksort_native quicksort.c +gcc -O3 ${NATIVE_SIMD_FLAGS} -o ${OUT_DIR}/quicksort_native quicksort.c echo "Build quicksort with emcc .." -emcc -O3 -s STANDALONE_WASM=1 -msimd128 \ +emcc -O3 -s STANDALONE_WASM=1 ${WASM_SIMD_FLAGS} \ -s INITIAL_MEMORY=1048576 \ -s TOTAL_STACK=32768 \ -s "EXPORTED_FUNCTIONS=['_main']" \ @@ -46,12 +69,17 @@ emcc -O3 -s STANDALONE_WASM=1 -msimd128 \ echo "Compile quicksort.wasm to quicksort.aot" ${WAMRC_CMD} -o ${OUT_DIR}/quicksort.aot ${OUT_DIR}/quicksort.wasm +if [[ ${PLATFORM} == "linux" ]]; then + echo "Compile quicksort.wasm to quicksort_segue.aot" + ${WAMRC_CMD} --enable-segue -o ${OUT_DIR}/quicksort_segue.aot ${OUT_DIR}/quicksort.wasm +fi + echo "Build HashSet with g++ .." -g++ -O3 -msse2 -msse3 -msse4 -o ${OUT_DIR}/HashSet_native HashSet.cpp \ +g++ -O3 ${NATIVE_SIMD_FLAGS} -o ${OUT_DIR}/HashSet_native HashSet.cpp \ -lstdc++ echo "Build HashSet with em++ .." -em++ -O3 -s STANDALONE_WASM=1 -msimd128 \ +em++ -O3 -s STANDALONE_WASM=1 ${WASM_SIMD_FLAGS} \ -s INITIAL_MEMORY=1048576 \ -s TOTAL_STACK=32768 \ -s "EXPORTED_FUNCTIONS=['_main']" \ @@ -60,11 +88,16 @@ em++ -O3 -s STANDALONE_WASM=1 -msimd128 \ echo "Compile HashSet.wasm to HashSet.aot" ${WAMRC_CMD} -o ${OUT_DIR}/HashSet.aot ${OUT_DIR}/HashSet.wasm +if [[ ${PLATFORM} == "linux" ]]; then + echo "Compile HashSet.wasm to HashSet_segue.aot" + ${WAMRC_CMD} --enable-segue -o ${OUT_DIR}/HashSet_segue.aot ${OUT_DIR}/HashSet.wasm +fi + echo "Build float-mm with gcc .." -gcc -O3 -msse2 -msse3 -msse4 -o ${OUT_DIR}/float-mm_native float-mm.c +gcc -O3 ${NATIVE_SIMD_FLAGS} -o ${OUT_DIR}/float-mm_native float-mm.c echo "Build float-mm with emcc .." -emcc -O3 -s STANDALONE_WASM=1 -msimd128 \ +emcc -O3 -s STANDALONE_WASM=1 ${WASM_SIMD_FLAGS} \ -s INITIAL_MEMORY=1048576 \ -s TOTAL_STACK=32768 \ -s "EXPORTED_FUNCTIONS=['_main']" \ @@ -72,3 +105,70 @@ emcc -O3 -s STANDALONE_WASM=1 -msimd128 \ echo "Compile float-mm.wasm to float-mm.aot" ${WAMRC_CMD} -o ${OUT_DIR}/float-mm.aot ${OUT_DIR}/float-mm.wasm + +if [[ ${PLATFORM} == "linux" ]]; then + echo "Compile float-mm.wasm to float-mm_segue.aot" + ${WAMRC_CMD} --enable-segue -o ${OUT_DIR}/float-mm_segue.aot ${OUT_DIR}/float-mm.wasm +fi + +cd ../tsf-src + +tsf_srcs="tsf_asprintf.c tsf_buffer.c tsf_error.c tsf_reflect.c tsf_st.c \ + tsf_type.c tsf_io.c tsf_native.c tsf_generator.c tsf_st_typetable.c \ + tsf_parser.c tsf_buf_writer.c tsf_buf_reader.c tsf_primitive.c \ + tsf_type_table.c tsf_copier.c tsf_destructor.c tsf_gpc_code_gen.c \ + gpc_code_gen_util.c gpc_threaded.c gpc_intable.c gpc_instruction.c \ + gpc_program.c gpc_proto.c gpc_stack_height.c tsf_serial_in_man.c \ + tsf_serial_out_man.c tsf_type_in_map.c tsf_type_out_map.c \ + tsf_stream_file_input.c tsf_stream_file_output.c tsf_sort.c \ + tsf_version.c tsf_named_type.c tsf_io_utils.c tsf_zip_attr.c \ + tsf_zip_reader.c tsf_zip_writer.c tsf_zip_abstract.c tsf_limits.c \ + tsf_ra_type_man.c tsf_adaptive_reader.c tsf_sha1.c tsf_sha1_writer.c \ + tsf_fsdb.c tsf_fsdb_protocol.c tsf_define_helpers.c tsf_ir.c \ + tsf_ir_different.c tsf_ir_speed.c" + +tsf_files="${tsf_srcs} config.h gpc_worklist.h \ + tsf_config_stub.h tsf.h tsf_internal.h tsf_region.h tsf_types.h \ + gpc.h tsf_atomics.h tsf_define_helpers.h tsf_indent.h tsf_inttypes.h \ + tsf_serial_protocol.h tsf_util.h gpc_int_common.h tsf_build_defines.h \ + tsf_format.h tsf_internal_config.h tsf_ir_different.h tsf_sha1.h \ + tsf_zip_abstract.h gpc_internal.h tsf_config.h tsf_fsdb_protocol.h \ + tsf_internal_config_stub.h tsf_ir.h tsf_st.h \ + gpc_instruction_dispatch.gen gpc_instruction_stack_effects.gen \ + gpc_instruction_to_string.gen gpc_instruction_size.gen \ + gpc_instruction_static_size.gen gpc_interpreter.gen" + +echo "Download tsf source files .." +for t in ${tsf_files} +do + wget -N "https://browserbench.org/JetStream/wasm/TSF/${t}" + if [[ $? != 0 ]]; then + exit + fi +done + +patch -p1 -N < ../tsf.patch + +echo "Build tsf with gcc .." +gcc \ + -o ${OUT_DIR}/tsf_native -O3 ${NATIVE_SIMD_FLAGS} \ + -I. -DTSF_BUILD_SYSTEM=1 \ + ${tsf_srcs} -lm + +echo "Build tsf standalone with wasi-sdk .." +/opt/wasi-sdk/bin/clang -O3 ${WASM_SIMD_FLAGS} -z stack-size=1048576 \ + -Wl,--initial-memory=52428800 \ + -Wl,--export=main \ + -Wl,--export=__heap_base,--export=__data_end \ + -I. -DTSF_BUILD_SYSTEM=1 \ + -Wl,--allow-undefined \ + -o ${OUT_DIR}/tsf.wasm \ + ${tsf_srcs} + +echo "Compile tsf.wasm to tsf.aot" +${WAMRC_CMD} -o ${OUT_DIR}/tsf.aot ${OUT_DIR}/tsf.wasm + +if [[ ${PLATFORM} == "linux" ]]; then + echo "Compile tsf.wasm to tsf_segue.aot" + ${WAMRC_CMD} --enable-segue -o ${OUT_DIR}/tsf_segue.aot ${OUT_DIR}/tsf.wasm +fi diff --git a/tests/benchmarks/jetstream/jetstream.patch b/tests/benchmarks/jetstream/jetstream.patch index 34431de08..bc680d98a 100644 --- a/tests/benchmarks/jetstream/jetstream.patch +++ b/tests/benchmarks/jetstream/jetstream.patch @@ -1,15 +1,18 @@ diff -urN jetstream-org/HashSet.cpp jetstream/HashSet.cpp ---- jetstream-org/HashSet.cpp 2020-10-30 04:12:42.000000000 +0800 -+++ jetstream/HashSet.cpp 2022-01-24 17:11:08.619831711 +0800 -@@ -24,6 +24,7 @@ +--- jetstream-org/HashSet.cpp 2020-10-30 04:12:42.000000000 +0800 ++++ jetstream/HashSet.cpp 2022-01-24 17:11:08.619831711 +0800 +@@ -22,8 +22,10 @@ + + #include #include ++#include #include #include +#include #include - + // Compile with: xcrun clang++ -o HashSet HashSet.cpp -O2 -W -framework Foundation -licucore -std=c++11 -fvisibility=hidden -DNDEBUG=1 -@@ -76,7 +77,7 @@ +@@ -76,7 +78,7 @@ inline ToType bitwise_cast(FromType from) { typename std::remove_const::type to { }; @@ -17,4 +20,4 @@ diff -urN jetstream-org/HashSet.cpp jetstream/HashSet.cpp + memcpy(&to, &from, sizeof(to)); return to; } - + diff --git a/tests/benchmarks/jetstream/run_aot.sh b/tests/benchmarks/jetstream/run_aot.sh index d62a5da90..85ef3fba5 100755 --- a/tests/benchmarks/jetstream/run_aot.sh +++ b/tests/benchmarks/jetstream/run_aot.sh @@ -3,6 +3,8 @@ # Copyright (C) 2019 Intel Corporation. All rights reserved. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +PLATFORM=$(uname -s | tr A-Z a-z) + CUR_DIR=$PWD OUT_DIR=$CUR_DIR/out REPORT=$CUR_DIR/report.txt @@ -13,7 +15,7 @@ IWASM_CMD=$CUR_DIR/../../../product-mini/platforms/${PLATFORM}/build/iwasm BENCH_NAME_MAX_LEN=20 -JETSTREAM_CASES="gcc-loops quicksort HashSet float-mm" +JETSTREAM_CASES="gcc-loops HashSet tsf float-mm quicksort" rm -f $REPORT touch $REPORT @@ -34,7 +36,11 @@ echo "Start to run cases, the result is written to report.txt" #run benchmarks cd $OUT_DIR -echo -en "\t\t\t\t\t native\tiwasm-aot\n" >> $REPORT +if [[ ${PLATFORM} == "linux" ]]; then + echo -en "\t\t\t\t\t native\tiwasm-aot\tiwasm-aot-segue\n" >> $REPORT +else + echo -en "\t\t\t\t\t native\tiwasm-aot\n" >> $REPORT +fi for t in $JETSTREAM_CASES do @@ -46,7 +52,13 @@ do echo "run $t with iwasm aot .." echo -en "\t" >> $REPORT - $TIME -f "real-%e-time" $IWASM_CMD ${t}.aot 2>&1 | grep "real-.*-time" | awk -F '-' '{ORS=""; print $2}' >> $REPORT + $TIME -f "real-%e-time" $IWASM_CMD --dir=. ${t}.aot 2>&1 | grep "real-.*-time" | awk -F '-' '{ORS=""; print $2}' >> $REPORT + + if [[ ${PLATFORM} == "linux" ]]; then + echo "run $t with iwasm aot segue .." + echo -en "\t" >> $REPORT + $TIME -f "real-%e-time" $IWASM_CMD --dir=. ${t}_segue.aot 2>&1 | grep "real-.*-time" | awk -F '-' '{ORS=""; print $2}' >> $REPORT + fi echo -en "\n" >> $REPORT done diff --git a/tests/benchmarks/jetstream/test_pgo.sh b/tests/benchmarks/jetstream/test_pgo.sh new file mode 100755 index 000000000..a11018212 --- /dev/null +++ b/tests/benchmarks/jetstream/test_pgo.sh @@ -0,0 +1,92 @@ +#!/bin/bash + +# Copyright (C) 2019 Intel Corporation. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +CUR_DIR=$PWD +OUT_DIR=$CUR_DIR/out +REPORT=$CUR_DIR/report.txt +TIME=/usr/bin/time + +PLATFORM=$(uname -s | tr A-Z a-z) +if [ "$1" = "--sgx" ] && [ "$PLATFORM" = "linux" ]; then + IWASM_CMD="$CUR_DIR/../../../product-mini/platforms/${PLATFORM}-sgx/enclave-sample/iwasm" + WAMRC_CMD="$CUR_DIR/../../../wamr-compiler/build/wamrc -sgx" +else + IWASM_CMD="$CUR_DIR/../../../product-mini/platforms/${PLATFORM}/build/iwasm" + WAMRC_CMD="$CUR_DIR/../../../wamr-compiler/build/wamrc" +fi + +BENCH_NAME_MAX_LEN=20 + +JETSTREAM_CASES="gcc-loops HashSet tsf float-mm quicksort" + +rm -f $REPORT +touch $REPORT + +function print_bench_name() +{ + name=$1 + echo -en "$name" >> $REPORT + name_len=${#name} + if [ $name_len -lt $BENCH_NAME_MAX_LEN ] + then + spaces=$(( $BENCH_NAME_MAX_LEN - $name_len )) + for i in $(eval echo "{1..$spaces}"); do echo -n " " >> $REPORT; done + fi +} + +pushd $OUT_DIR > /dev/null 2>&1 +for t in $JETSTREAM_CASES +do + if [ ! -e "${t}.wasm" ]; then + echo "${t}.wasm doesn't exist, please run build.sh first" + exit + fi + + echo "" + echo "Compile ${t}.wasm to ${t}.aot .." + ${WAMRC_CMD} -o ${t}.aot ${t}.wasm + + echo "" + echo "Compile ${t}.wasm to ${t}_pgo.aot .." + ${WAMRC_CMD} --enable-llvm-pgo -o ${t}_pgo.aot ${t}.wasm + + echo "" + echo "Run ${t}_pgo.aot to generate the raw profile data .." + ${IWASM_CMD} --gen-prof-file=${t}.profraw --dir=. ${t}_pgo.aot + + echo "" + echo "Merge the raw profile data to ${t}.profdata .." + rm -f ${t}.profdata && llvm-profdata merge -output=${t}.profdata ${t}.profraw + + echo "" + echo "Compile ${t}.wasm to ${t}_opt.aot with the profile data .." + ${WAMRC_CMD} --use-prof-file=${t}.profdata -o ${t}_opt.aot ${t}.wasm +done +popd > /dev/null 2>&1 + +echo "Start to run cases, the result is written to report.txt" + +#run benchmarks +cd $OUT_DIR +echo -en "\t\t\t\t\t native\tiwasm-aot\tiwasm-aot-pgo\n" >> $REPORT + +for t in $JETSTREAM_CASES +do + print_bench_name $t + + echo "run $t with native .." + echo -en "\t" >> $REPORT + $TIME -f "real-%e-time" ./${t}_native 2>&1 | grep "real-.*-time" | awk -F '-' '{ORS=""; print $2}' >> $REPORT + + echo "run $t with iwasm aot .." + echo -en "\t" >> $REPORT + $TIME -f "real-%e-time" $IWASM_CMD --dir=. ${t}.aot 2>&1 | grep "real-.*-time" | awk -F '-' '{ORS=""; print $2}' >> $REPORT + + echo "run $t with iwasm aot opt .." + echo -en "\t" >> $REPORT + $TIME -f "real-%e-time" $IWASM_CMD --dir=. ${t}_opt.aot 2>&1 | grep "real-.*-time" | awk -F '-' '{ORS=""; print $2}' >> $REPORT + + echo -en "\n" >> $REPORT +done diff --git a/tests/benchmarks/jetstream/tsf.patch b/tests/benchmarks/jetstream/tsf.patch new file mode 100644 index 000000000..e52c3cdc5 --- /dev/null +++ b/tests/benchmarks/jetstream/tsf.patch @@ -0,0 +1,24 @@ +diff -urN tsf-src-org/tsf_internal.h tsf-src/tsf_internal.h +--- tsf-src-org/tsf_internal.h 2023-03-31 10:49:45.000000000 +0800 ++++ tsf-src/tsf_internal.h 2023-05-11 08:18:35.000000000 +0800 +@@ -429,6 +429,7 @@ + #endif + tsf_fsdb_connection_t *connection; + #endif ++ uint32_t __padding; + } remote; + } u; + tsf_limits_t *limits; +diff -urN tsf-src-org/tsf_ir_speed.c tsf-src/tsf_ir_speed.c +--- tsf-src-org/tsf_ir_speed.c 2023-03-31 10:49:45.000000000 +0800 ++++ tsf-src/tsf_ir_speed.c 2023-05-11 08:18:35.000000000 +0800 +@@ -63,6 +63,9 @@ + Program_t *program; + unsigned elementIndex; + ++ if (!(programIndex % 100)) ++ printf("##programIndex: %u\n", programIndex); ++ + CS(program = tsf_region_create(sizeof(Program_t))); + + program->globals.len = numDecls + numDefns; diff --git a/tests/benchmarks/libsodium/README.md b/tests/benchmarks/libsodium/README.md index 19500afe6..a21e679c8 100644 --- a/tests/benchmarks/libsodium/README.md +++ b/tests/benchmarks/libsodium/README.md @@ -18,6 +18,12 @@ And then run `./build.sh` to build the source code, the libsodium source code wi Run `./run_aot.sh` to test the benchmark, the native mode and iwasm aot mode will be tested respectively. +Run `./test_pgo.sh` to test the benchmark with AOT static PGO (Profile-Guided Optimization) enabled, please refer [here](../README.md#install-llvm-profdata) to install tool `llvm-profdata` and build `iwasm` with `cmake -DWAMR_BUILD_STATIC_PGO=1`. + +- For Linux, build `iwasm` with `cmake -DWAMR_BUILD_STATIC_PGO=1`, then run `./test_pgo.sh` to test the benchmark with AOT static PGO (Profile-Guided Optimization) enabled. + +- For Linux-sgx, similarly, build `iwasm` with `cmake -DWAMR_BUILD_STATIC_PGO=1`, then `make` in the directory `enclave-sample`. And run `./test_pgo.sh --sgx` to test the benchmark. + # Others Refer to [Performance of WebAssembly runtimes in 2023](https://00f.net/2023/01/04/webassembly-benchmark-2023) for more about the performance comparison of wasm runtimes on running the libsodium benchmarks. diff --git a/tests/benchmarks/libsodium/build.sh b/tests/benchmarks/libsodium/build.sh index 1e9cc21a7..3049f2c73 100755 --- a/tests/benchmarks/libsodium/build.sh +++ b/tests/benchmarks/libsodium/build.sh @@ -16,6 +16,8 @@ libsodium_CASES="aead_aes256gcm2 aead_aes256gcm aead_chacha20poly13052 aead_chac sodium_utils3 sodium_utils sodium_version stream2 stream3 stream4 stream verify1 \ xchacha20" +PLATFORM=$(uname -s | tr A-Z a-z) + readonly WAMRC_CMD=$PWD/../../../wamr-compiler/build/wamrc readonly OUT_DIR=$PWD/libsodium/zig-out/bin @@ -34,9 +36,16 @@ zig build -Drelease-fast -Denable_benchmarks=true -Dtarget=wasm32-wasi for case in ${libsodium_CASES} do ${WAMRC_CMD} -o ${OUT_DIR}/${case}.aot ${OUT_DIR}/${case}.wasm - if [ "$?" != 0 ]; then echo -e "Error while compiling ${case}.wasm to ${case}.aot" exit fi + + if [[ ${PLATFORM} == "linux" ]]; then + ${WAMRC_CMD} --enable-segue -o ${OUT_DIR}/${case}_segue.aot ${OUT_DIR}/${case}.wasm + if [ "$?" != 0 ]; then + echo -e "Error while compiling ${case}.wasm to ${case}_segue.aot" + exit + fi + fi done diff --git a/tests/benchmarks/libsodium/test_aot.sh b/tests/benchmarks/libsodium/run_aot.sh similarity index 50% rename from tests/benchmarks/libsodium/test_aot.sh rename to tests/benchmarks/libsodium/run_aot.sh index 2e4e3e357..8859d0634 100755 --- a/tests/benchmarks/libsodium/test_aot.sh +++ b/tests/benchmarks/libsodium/run_aot.sh @@ -13,12 +13,14 @@ libsodium_CASES="aead_aes256gcm2 aead_aes256gcm aead_chacha20poly13052 aead_chac scalarmult6 scalarmult7 scalarmult8 scalarmult_ed25519 scalarmult_ristretto255 \ scalarmult secretbox2 secretbox7 secretbox8 secretbox_easy2 secretbox_easy \ secretbox secretstream shorthash sign siphashx24 sodium_core sodium_utils2 \ - sodium_utils3 sodium_utils sodium_version stream2 stream3 stream4 stream verify1 \ - xchacha20" + sodium_utils stream2 stream3 stream4 stream verify1 xchacha20" + +PLATFORM=$(uname -s | tr A-Z a-z) readonly OUT_DIR=$PWD/libsodium/zig-out/bin readonly REPORT=$PWD/report.txt -readonly IWASM_CMD=$PWD/../../../product-mini/platforms/linux/build/iwasm +readonly IWASM_CMD=$PWD/../../../product-mini/platforms/${PLATFORM}/build/iwasm +readonly TIME=/usr/bin/time BENCH_NAME_MAX_LEN=20 @@ -40,7 +42,11 @@ function print_bench_name() # run benchmarks cd $OUT_DIR -echo -en "\t\t\t\t\t\tnative\tiwasm-aot\n" >> $REPORT +if [[ ${PLATFORM} == "linux" ]]; then + echo -en "\t\t\t\t\t\tnative\tiwasm-aot\tiwasm-aot-segue\n" >> $REPORT +else + echo -en "\t\t\t\t\t\tnative\tiwasm-aot\n" >> $REPORT +fi for t in $libsodium_CASES do @@ -48,11 +54,38 @@ do echo "run $t with native..." echo -en "\t" >> $REPORT - ./${t} | awk -F '-' 'BEGIN{FIELDWIDTHS="10"}{ORS=""; print $1 / 1000000.0}' >> $REPORT + if [[ $t != "sodium_utils2" ]]; then + ./${t} | awk '{printf "%-10.2f", $0/1000000.0}' >> $REPORT + else + # sodium_utils2 doesn't print the result, + # use time command to get result instead + $TIME -f "real-%e-time" ./${t} 2>&1 | grep "real-.*-time" | + awk -F '-' '{printf "%-10.2f", $2}' >> $REPORT + fi echo "run $t with iwasm aot..." echo -en "\t \t" >> $REPORT - $IWASM_CMD ${t}.aot | awk -F '-' 'BEGIN{FIELDWIDTHS="10"}{ORS=""; print $1 / 1000000.0}' >> $REPORT + if [[ $t != "sodium_utils2" ]]; then + $IWASM_CMD ${t}.aot | awk '{printf "%-10.2f", $0/1000000.0}' >> $REPORT + else + # sodium_utils2 doesn't print the result, + # use time command to get result instead + $TIME -f "real-%e-time" $IWASM_CMD ${t}.aot 2>&1 | grep "real-.*-time" | + awk -F '-' '{printf "%-10.2f", $2}' >> $REPORT + fi + + if [[ ${PLATFORM} == "linux" ]]; then + echo "run $t with iwasm aot segue..." + echo -en "\t \t" >> $REPORT + if [[ $t != "sodium_utils2" ]]; then + $IWASM_CMD ${t}_segue.aot | awk '{printf "%.2f", $0/1000000.0}' >> $REPORT + else + # sodium_utils2 doesn't print the result, + # use time command to get result instead + $TIME -f "real-%e-time" $IWASM_CMD ${t}_segue.aot 2>&1 | grep "real-.*-time" | + awk -F '-' '{printf "%.2f", $2}' >> $REPORT + fi + fi echo -en "\n" >> $REPORT done diff --git a/tests/benchmarks/libsodium/test_pgo.sh b/tests/benchmarks/libsodium/test_pgo.sh new file mode 100755 index 000000000..2dda6a5f9 --- /dev/null +++ b/tests/benchmarks/libsodium/test_pgo.sh @@ -0,0 +1,121 @@ +#!/bin/bash + +# Copyright (C) 2019 Intel Corporation. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +libsodium_CASES="aead_aes256gcm2 aead_aes256gcm aead_chacha20poly13052 aead_chacha20poly1305 \ + aead_xchacha20poly1305 auth2 auth3 auth5 auth6 auth7 auth box2 box7 box8 \ + box_easy2 box_easy box_seal box_seed box chacha20 codecs core1 core2 core3 \ + core4 core5 core6 core_ed25519 core_ristretto255 ed25519_convert generichash2 \ + generichash3 generichash hash3 hash kdf keygen kx metamorphic misuse \ + onetimeauth2 onetimeauth7 onetimeauth pwhash_argon2id pwhash_argon2i \ + pwhash_scrypt_ll pwhash_scrypt randombytes scalarmult2 scalarmult5 \ + scalarmult6 scalarmult7 scalarmult8 scalarmult_ed25519 scalarmult_ristretto255 \ + scalarmult secretbox2 secretbox7 secretbox8 secretbox_easy2 secretbox_easy \ + secretbox secretstream shorthash sign siphashx24 sodium_core sodium_utils2 \ + sodium_utils stream2 stream3 stream4 stream verify1 xchacha20" + +PLATFORM=$(uname -s | tr A-Z a-z) + +readonly OUT_DIR=$PWD/libsodium/zig-out/bin +readonly REPORT=$PWD/report.txt +if [ "$1" = "--sgx" ] && [ "$PLATFORM" = "linux" ]; then + readonly IWASM_CMD="$PWD/../../../product-mini/platforms/${PLATFORM}-sgx/enclave-sample/iwasm" + readonly WAMRC_CMD="$PWD/../../../wamr-compiler/build/wamrc -sgx" +else + readonly IWASM_CMD="$PWD/../../../product-mini/platforms/${PLATFORM}/build/iwasm" + readonly WAMRC_CMD="$PWD/../../../wamr-compiler/build/wamrc" +fi +readonly TIME=/usr/bin/time + +BENCH_NAME_MAX_LEN=20 + +rm -f $REPORT +touch $REPORT + +function print_bench_name() +{ + name=$1 + echo -en "$name" >> $REPORT + name_len=${#name} + if [ $name_len -lt $BENCH_NAME_MAX_LEN ] + then + spaces=$(( $BENCH_NAME_MAX_LEN - $name_len )) + for i in $(eval echo "{1..$spaces}"); do echo -n " " >> $REPORT; done + fi +} + +pushd $OUT_DIR > /dev/null 2>&1 +for t in $libsodium_CASES +do + if [ ! -e "${t}.wasm" ]; then + echo "${t}.wasm doesn't exist, please run build.sh first" + exit + fi + + echo "" + echo "Compile ${t}.wasm to ${t}.aot .." + ${WAMRC_CMD} -o ${t}.aot ${t}.wasm + + echo "" + echo "Compile ${t}.wasm to ${t}_pgo.aot .." + ${WAMRC_CMD} --enable-llvm-pgo -o ${t}_pgo.aot ${t}.wasm + + echo "" + echo "Run ${t}_pgo.aot to generate the raw profile data .." + ${IWASM_CMD} --gen-prof-file=${t}.profraw --dir=. ${t}_pgo.aot + + echo "" + echo "Merge the raw profile data to ${t}.profdata .." + rm -f ${t}.profdata && llvm-profdata merge -output=${t}.profdata ${t}.profraw + + echo "" + echo "Compile ${t}.wasm to ${t}_opt.aot with the profile data .." + ${WAMRC_CMD} --use-prof-file=${t}.profdata -o ${t}_opt.aot ${t}.wasm +done + +# run benchmarks +cd $OUT_DIR + +echo -en "\t\t\t\t\t\tnative\tiwasm-aot\tiwasm-aot-pgo\n" >> $REPORT + +for t in $libsodium_CASES +do + print_bench_name $t + + echo "run $t with native..." + echo -en "\t" >> $REPORT + if [[ $t != "sodium_utils2" ]]; then + ./${t} | awk '{printf "%-10.2f", $0/1000000.0}' >> $REPORT + else + # sodium_utils2 doesn't print the result, + # use time command to get result instead + $TIME -f "real-%e-time" ./${t} 2>&1 | grep "real-.*-time" | + awk -F '-' '{printf "%-10.2f", $2}' >> $REPORT + fi + + echo "run $t with iwasm aot..." + echo -en "\t \t" >> $REPORT + if [[ $t != "sodium_utils2" ]]; then + $IWASM_CMD ${t}.aot | awk '{printf "%-10.2f", $0/1000000.0}' >> $REPORT + else + # sodium_utils2 doesn't print the result, + # use time command to get result instead + $TIME -f "real-%e-time" $IWASM_CMD ${t}.aot 2>&1 | grep "real-.*-time" | + awk -F '-' '{printf "%-10.2f", $2}' >> $REPORT + fi + + echo "run $t with iwasm aot opt..." + echo -en "\t \t" >> $REPORT + if [[ $t != "sodium_utils2" ]]; then + $IWASM_CMD ${t}_opt.aot | awk '{printf "%-10.2f", $0/1000000.0}' >> $REPORT + else + # sodium_utils2 doesn't print the result, + # use time command to get result instead + $TIME -f "real-%e-time" $IWASM_CMD ${t}_opt.aot 2>&1 | grep "real-.*-time" | + awk -F '-' '{printf "%-10.2f", $2}' >> $REPORT + fi + + echo -en "\n" >> $REPORT +done + diff --git a/tests/benchmarks/polybench/README.md b/tests/benchmarks/polybench/README.md index 7808e17d9..d8b3db961 100644 --- a/tests/benchmarks/polybench/README.md +++ b/tests/benchmarks/polybench/README.md @@ -19,3 +19,9 @@ And then run `./build.sh` to build the source code, the folder `out` will be cre Run `./run_aot.sh` to test the benchmark, the native mode and iwasm aot mode will be tested for each workload, and the file `report.txt` will be generated. Run `./run_interp.sh` to test the benchmark, the native mode and iwasm interpreter mode will be tested for each workload, and the file `report.txt` will be generated. + +Run `./test_pgo.sh` to test the benchmark with AOT static PGO (Profile-Guided Optimization) enabled, please refer [here](../README.md#install-llvm-profdata) to install tool `llvm-profdata` and build `iwasm` with `cmake -DWAMR_BUILD_STATIC_PGO=1`. + +- For Linux, build `iwasm` with `cmake -DWAMR_BUILD_STATIC_PGO=1`, then run `./test_pgo.sh` to test the benchmark with AOT static PGO (Profile-Guided Optimization) enabled. + +- For Linux-sgx, similarly, build `iwasm` with `cmake -DWAMR_BUILD_STATIC_PGO=1`, then `make` in the directory `enclave-sample`. And run `./test_pgo.sh --sgx` to test the benchmark. diff --git a/tests/benchmarks/polybench/build.sh b/tests/benchmarks/polybench/build.sh index bc7bf4c10..3e59a9bc5 100755 --- a/tests/benchmarks/polybench/build.sh +++ b/tests/benchmarks/polybench/build.sh @@ -3,6 +3,8 @@ # Copyright (C) 2019 Intel Corporation. All rights reserved. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +PLATFORM=$(uname -s | tr A-Z a-z) + OUT_DIR=$PWD/out WAMRC_CMD=$PWD/../../../wamr-compiler/build/wamrc POLYBENCH_CASES="datamining linear-algebra medley stencils" @@ -40,6 +42,12 @@ do echo "Compile ${file_name%.*}.wasm into ${file_name%.*}.aot" ${WAMRC_CMD} -o ${OUT_DIR}/${file_name%.*}.aot \ ${OUT_DIR}/${file_name%.*}.wasm + + if [[ ${PLATFORM} == "linux" ]]; then + echo "Compile ${file_name%.*}.wasm into ${file_name%.*}_segue.aot" + ${WAMRC_CMD} --enable-segue -o ${OUT_DIR}/${file_name%.*}_segue.aot \ + ${OUT_DIR}/${file_name%.*}.wasm + fi done done diff --git a/tests/benchmarks/polybench/run_aot.sh b/tests/benchmarks/polybench/run_aot.sh index 17cc098ad..7eb301b2b 100755 --- a/tests/benchmarks/polybench/run_aot.sh +++ b/tests/benchmarks/polybench/run_aot.sh @@ -37,7 +37,11 @@ echo "Start to run cases, the result is written to report.txt" #run benchmarks cd $OUT_DIR -echo -en "\t\t\t\t\t native\tiwasm-aot\n" >> $REPORT +if [[ ${PLATFORM} == "linux" ]]; then + echo -en "\t\t\t\t\t native\tiwasm-aot\tiwasm-aot-segue\n" >> $REPORT +else + echo -en "\t\t\t\t\t native\tiwasm-aot\n" >> $REPORT +fi for t in $POLYBENCH_CASES do @@ -51,5 +55,11 @@ do echo -en "\t" >> $REPORT $TIME -f "real-%e-time" $IWASM_CMD ${t}.aot 2>&1 | grep "real-.*-time" | awk -F '-' '{ORS=""; print $2}' >> $REPORT + if [[ ${PLATFORM} == "linux" ]]; then + echo "run $t with iwasm aot segue .." + echo -en "\t" >> $REPORT + $TIME -f "real-%e-time" $IWASM_CMD ${t}_segue.aot 2>&1 | grep "real-.*-time" | awk -F '-' '{ORS=""; print $2}' >> $REPORT + fi + echo -en "\n" >> $REPORT done diff --git a/tests/benchmarks/polybench/run_interp.sh b/tests/benchmarks/polybench/run_interp.sh index f6d5c254d..5dfe760e2 100755 --- a/tests/benchmarks/polybench/run_interp.sh +++ b/tests/benchmarks/polybench/run_interp.sh @@ -37,7 +37,7 @@ echo "Start to run cases, the result is written to report.txt" #run benchmarks cd $OUT_DIR -echo -en "\t\t\t\t\t native\tiwasm-aot\n" >> $REPORT +echo -en "\t\t\t\t\t native\tiwasm-interp\n" >> $REPORT for t in $POLYBENCH_CASES do diff --git a/tests/benchmarks/polybench/test_pgo.sh b/tests/benchmarks/polybench/test_pgo.sh new file mode 100755 index 000000000..6451a5be2 --- /dev/null +++ b/tests/benchmarks/polybench/test_pgo.sh @@ -0,0 +1,95 @@ +#!/bin/bash + +# Copyright (C) 2019 Intel Corporation. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +CUR_DIR=$PWD +OUT_DIR=$CUR_DIR/out +REPORT=$CUR_DIR/report.txt +TIME=/usr/bin/time + +PLATFORM=$(uname -s | tr A-Z a-z) +if [ "$1" = "--sgx" ] && [ "$PLATFORM" = "linux" ]; then + IWASM_CMD="$CUR_DIR/../../../product-mini/platforms/${PLATFORM}-sgx/enclave-sample/iwasm" + WAMRC_CMD="$CUR_DIR/../../../wamr-compiler/build/wamrc -sgx" +else + IWASM_CMD="$CUR_DIR/../../../product-mini/platforms/${PLATFORM}/build/iwasm" + WAMRC_CMD="$CUR_DIR/../../../wamr-compiler/build/wamrc" +fi + +BENCH_NAME_MAX_LEN=20 + +POLYBENCH_CASES="2mm 3mm adi atax bicg cholesky correlation covariance \ + deriche doitgen durbin fdtd-2d floyd-warshall gemm gemver \ + gesummv gramschmidt heat-3d jacobi-1d jacobi-2d ludcmp lu \ + mvt nussinov seidel-2d symm syr2k syrk trisolv trmm" + +rm -f $REPORT +touch $REPORT + +function print_bench_name() +{ + name=$1 + echo -en "$name" >> $REPORT + name_len=${#name} + if [ $name_len -lt $BENCH_NAME_MAX_LEN ] + then + spaces=$(( $BENCH_NAME_MAX_LEN - $name_len )) + for i in $(eval echo "{1..$spaces}"); do echo -n " " >> $REPORT; done + fi +} + +pushd $OUT_DIR > /dev/null 2>&1 +for t in $POLYBENCH_CASES +do + if [ ! -e "${t}.wasm" ]; then + echo "${t}.wasm doesn't exist, please run build.sh first" + exit + fi + + echo "" + echo "Compile ${t}.wasm to ${t}.aot .." + ${WAMRC_CMD} -o ${t}.aot ${t}.wasm + + echo "" + echo "Compile ${t}.wasm to ${t}_pgo.aot .." + ${WAMRC_CMD} --enable-llvm-pgo -o ${t}_pgo.aot ${t}.wasm + + echo "" + echo "Run ${t}_pgo.aot to generate the raw profile data .." + ${IWASM_CMD} --gen-prof-file=${t}.profraw --dir=. ${t}_pgo.aot + + echo "" + echo "Merge the raw profile data to ${t}.profdata .." + rm -f ${t}.profdata && llvm-profdata merge -output=${t}.profdata ${t}.profraw + + echo "" + echo "Compile ${t}.wasm to ${t}_opt.aot with the profile data .." + ${WAMRC_CMD} --use-prof-file=${t}.profdata -o ${t}_opt.aot ${t}.wasm +done +popd > /dev/null 2>&1 + +echo "Start to run cases, the result is written to report.txt" + +#run benchmarks +cd $OUT_DIR +echo -en "\t\t\t\t\t native\tiwasm-aot\tiwasm-aot-pgo\n" >> $REPORT + +for t in $POLYBENCH_CASES +do + print_bench_name $t + + echo "run $t with native .." + echo -en "\t" >> $REPORT + $TIME -f "real-%e-time" ./${t}_native 2>&1 | grep "real-.*-time" | awk -F '-' '{ORS=""; print $2}' >> $REPORT + + echo "run $t with iwasm aot .." + echo -en "\t" >> $REPORT + $TIME -f "real-%e-time" $IWASM_CMD ${t}.aot 2>&1 | grep "real-.*-time" | awk -F '-' '{ORS=""; print $2}' >> $REPORT + + echo "run $t with iwasm aot opt .." + echo -en "\t" >> $REPORT + $TIME -f "real-%e-time" $IWASM_CMD ${t}_opt.aot 2>&1 | grep "real-.*-time" | awk -F '-' '{ORS=""; print $2}' >> $REPORT + + echo -en "\n" >> $REPORT +done diff --git a/tests/benchmarks/sightglass/README.md b/tests/benchmarks/sightglass/README.md index a446d80ea..ff34c7d70 100644 --- a/tests/benchmarks/sightglass/README.md +++ b/tests/benchmarks/sightglass/README.md @@ -19,3 +19,9 @@ And then run `./build.sh` to build the source code, the folder `out` will be cre Run `./run_aot.sh` to test the benchmark, the native mode and iwasm aot mode will be tested for each workload, and the file `report.txt` will be generated. Run `./run_interp.sh` to test the benchmark, the native mode and iwasm interpreter mode will be tested for each workload, and the file `report.txt` will be generated. + +Run `./test_pgo.sh` to test the benchmark with AOT static PGO (Profile-Guided Optimization) enabled, please refer [here](../README.md#install-llvm-profdata) to install tool `llvm-profdata` and build `iwasm` with `cmake -DWAMR_BUILD_STATIC_PGO=1`. + +- For Linux, build `iwasm` with `cmake -DWAMR_BUILD_STATIC_PGO=1`, then run `./test_pgo.sh` to test the benchmark with AOT static PGO (Profile-Guided Optimization) enabled. + +- For Linux-sgx, similarly, build `iwasm` with `cmake -DWAMR_BUILD_STATIC_PGO=1`, then `make` in the directory `enclave-sample`. And run `./test_pgo.sh --sgx` to test the benchmark. diff --git a/tests/benchmarks/sightglass/build.sh b/tests/benchmarks/sightglass/build.sh index c7192c16f..540882285 100755 --- a/tests/benchmarks/sightglass/build.sh +++ b/tests/benchmarks/sightglass/build.sh @@ -3,6 +3,8 @@ # Copyright (C) 2019 Intel Corporation. All rights reserved. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +PLATFORM=$(uname -s | tr A-Z a-z) + OUT_DIR=$PWD/out WAMRC_CMD=$PWD/../../../wamr-compiler/build/wamrc SHOOTOUT_CASES="base64 fib2 gimli heapsort matrix memmove nestedloop \ @@ -34,9 +36,12 @@ do -Wl,--export=app_main -Wl,--export=_start \ ${bench}.c main/main_${bench}.c main/my_libc.c - echo "Compile ${bench}.wasm into ${bench}.aot" ${WAMRC_CMD} -o ${OUT_DIR}/${bench}.aot ${OUT_DIR}/${bench}.wasm + if [[ ${PLATFORM} == "linux" ]]; then + echo "Compile ${bench}.wasm into ${bench}_segue.aot" + ${WAMRC_CMD} --enable-segue -o ${OUT_DIR}/${bench}_segue.aot ${OUT_DIR}/${bench}.wasm + fi done cd .. diff --git a/tests/benchmarks/sightglass/run_aot.sh b/tests/benchmarks/sightglass/run_aot.sh index 7a74a7912..44945b91b 100755 --- a/tests/benchmarks/sightglass/run_aot.sh +++ b/tests/benchmarks/sightglass/run_aot.sh @@ -36,7 +36,11 @@ echo "Start to run cases, the result is written to report.txt" #run benchmarks cd $OUT_DIR -echo -en "\t\t\t\t\t native\tiwasm-aot\n" >> $REPORT +if [[ ${PLATFORM} == "linux" ]]; then + echo -en "\t\t\t\t\t native\tiwasm-aot\tiwasm-aot-segue\n" >> $REPORT +else + echo -en "\t\t\t\t\t native\tiwasm-aot\n" >> $REPORT +fi for t in $SHOOTOUT_CASES do @@ -50,5 +54,11 @@ do echo -en "\t" >> $REPORT $TIME -f "real-%e-time" $IWASM_CMD ${t}.aot 2>&1 | grep "real-.*-time" | awk -F '-' '{ORS=""; print $2}' >> $REPORT + if [[ ${PLATFORM} == "linux" ]]; then + echo "run $t with iwasm aot segue .." + echo -en "\t" >> $REPORT + $TIME -f "real-%e-time" $IWASM_CMD ${t}_segue.aot 2>&1 | grep "real-.*-time" | awk -F '-' '{ORS=""; print $2}' >> $REPORT + fi + echo -en "\n" >> $REPORT done diff --git a/tests/benchmarks/sightglass/run_interp.sh b/tests/benchmarks/sightglass/run_interp.sh index 50e94a5db..c3dbb3024 100755 --- a/tests/benchmarks/sightglass/run_interp.sh +++ b/tests/benchmarks/sightglass/run_interp.sh @@ -46,9 +46,9 @@ do echo -en "\t" >> $REPORT $TIME -f "real-%e-time" ./${t}_native 2>&1 | grep "real-.*-time" | awk -F '-' '{ORS=""; print $2}' >> $REPORT - echo "run $t with iwasm aot .." + echo "run $t with iwasm interp .." echo -en "\t" >> $REPORT - $TIME -f "real-%e-time" $IWASM_CMD ${t}.aot 2>&1 | grep "real-.*-time" | awk -F '-' '{ORS=""; print $2}' >> $REPORT + $TIME -f "real-%e-time" $IWASM_CMD ${t}.wasm 2>&1 | grep "real-.*-time" | awk -F '-' '{ORS=""; print $2}' >> $REPORT echo -en "\n" >> $REPORT done diff --git a/tests/benchmarks/sightglass/test_pgo.sh b/tests/benchmarks/sightglass/test_pgo.sh new file mode 100755 index 000000000..4baedf9fd --- /dev/null +++ b/tests/benchmarks/sightglass/test_pgo.sh @@ -0,0 +1,94 @@ +#!/bin/bash + +# Copyright (C) 2019 Intel Corporation. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +CUR_DIR=$PWD +OUT_DIR=$CUR_DIR/out +REPORT=$CUR_DIR/report.txt +TIME=/usr/bin/time + +PLATFORM=$(uname -s | tr A-Z a-z) +if [ "$1" = "--sgx" ] && [ "$PLATFORM" = "linux" ]; then + IWASM_CMD="$CUR_DIR/../../../product-mini/platforms/${PLATFORM}-sgx/enclave-sample/iwasm" + WAMRC_CMD="$CUR_DIR/../../../wamr-compiler/build/wamrc -sgx" +else + IWASM_CMD="$CUR_DIR/../../../product-mini/platforms/${PLATFORM}/build/iwasm" + WAMRC_CMD="$CUR_DIR/../../../wamr-compiler/build/wamrc" +fi + +BENCH_NAME_MAX_LEN=20 + +SHOOTOUT_CASES="base64 fib2 gimli heapsort matrix memmove nestedloop \ + nestedloop2 nestedloop3 random seqhash sieve strchr \ + switch2" + +rm -f $REPORT +touch $REPORT + +function print_bench_name() +{ + name=$1 + echo -en "$name" >> $REPORT + name_len=${#name} + if [ $name_len -lt $BENCH_NAME_MAX_LEN ] + then + spaces=$(( $BENCH_NAME_MAX_LEN - $name_len )) + for i in $(eval echo "{1..$spaces}"); do echo -n " " >> $REPORT; done + fi +} + +pushd $OUT_DIR > /dev/null 2>&1 +for t in $SHOOTOUT_CASES +do + if [ ! -e "${t}.wasm" ]; then + echo "${t}.wasm doesn't exist, please run build.sh first" + exit + fi + + echo "" + echo "Compile ${t}.wasm to ${t}.aot .." + ${WAMRC_CMD} -o ${t}.aot ${t}.wasm + + echo "" + echo "Compile ${t}.wasm to ${t}_pgo.aot .." + ${WAMRC_CMD} --enable-llvm-pgo -o ${t}_pgo.aot ${t}.wasm + + echo "" + echo "Run ${t}_pgo.aot to generate the raw profile data .." + ${IWASM_CMD} --gen-prof-file=${t}.profraw --dir=. ${t}_pgo.aot + + echo "" + echo "Merge the raw profile data to ${t}.profdata .." + rm -f ${t}.profdata && llvm-profdata merge -output=${t}.profdata ${t}.profraw + + echo "" + echo "Compile ${t}.wasm to ${t}_opt.aot with the profile data .." + ${WAMRC_CMD} --use-prof-file=${t}.profdata -o ${t}_opt.aot ${t}.wasm +done +popd > /dev/null 2>&1 + +echo "Start to run cases, the result is written to report.txt" + +#run benchmarks +cd $OUT_DIR +echo -en "\t\t\t\t\t native\tiwasm-aot\tiwasm-aot-pgo\n" >> $REPORT + +for t in $SHOOTOUT_CASES +do + print_bench_name $t + + echo "run $t with native .." + echo -en "\t" >> $REPORT + $TIME -f "real-%e-time" ./${t}_native 2>&1 | grep "real-.*-time" | awk -F '-' '{ORS=""; print $2}' >> $REPORT + + echo "run $t with iwasm aot .." + echo -en "\t" >> $REPORT + $TIME -f "real-%e-time" $IWASM_CMD ${t}.aot 2>&1 | grep "real-.*-time" | awk -F '-' '{ORS=""; print $2}' >> $REPORT + + echo "run $t with iwasm aot opt .." + echo -en "\t" >> $REPORT + $TIME -f "real-%e-time" $IWASM_CMD ${t}_opt.aot 2>&1 | grep "real-.*-time" | awk -F '-' '{ORS=""; print $2}' >> $REPORT + + echo -en "\n" >> $REPORT +done diff --git a/tests/wamr-test-suites/spec-test-script/all.py b/tests/wamr-test-suites/spec-test-script/all.py index 8b26d6892..bd5b89ce4 100644 --- a/tests/wamr-test-suites/spec-test-script/all.py +++ b/tests/wamr-test-suites/spec-test-script/all.py @@ -73,7 +73,8 @@ def ignore_the_case( if not multi_module_flag and case_name in ["imports", "linking"]: return True - if "i386" == target and case_name in ["float_exprs"]: + # Note: x87 doesn't preserve sNaN and makes some relevant tests fail. + if "i386" == target and case_name in ["float_exprs", "conversions"]: return True if gc_flag: diff --git a/tests/wamr-test-suites/spec-test-script/thread_proposal_ignore_cases.patch b/tests/wamr-test-suites/spec-test-script/thread_proposal_ignore_cases.patch index 41a0d25b8..c411d89c1 100644 --- a/tests/wamr-test-suites/spec-test-script/thread_proposal_ignore_cases.patch +++ b/tests/wamr-test-suites/spec-test-script/thread_proposal_ignore_cases.patch @@ -1,5 +1,22 @@ +diff --git a/test/core/atomic_wait_notify.wast b/test/core/atomic_wait_notify.wast +index 2e312c3..4f35ac5 100644 +--- a/test/core/atomic_wait_notify.wast ++++ b/test/core/atomic_wait_notify.wast +@@ -70,6 +70,7 @@ + (memory (export "shared") 1 1 shared) + ) + ++(; + (thread $T1 (shared (module $Mem)) + (register "mem" $Mem) + (module +@@ -106,3 +107,4 @@ + + (wait $T1) + (wait $T2) ++;) diff --git a/test/core/binary.wast b/test/core/binary.wast -index b9fa438c..a5711dd3 100644 +index b9fa438..a5711dd 100644 --- a/test/core/binary.wast +++ b/test/core/binary.wast @@ -45,7 +45,7 @@ @@ -163,7 +180,7 @@ index b9fa438c..a5711dd3 100644 ;; 1 elem segment declared, 2 given (assert_malformed diff --git a/test/core/elem.wast b/test/core/elem.wast -index 1ea2b061..8eded377 100644 +index 1ea2b06..8eded37 100644 --- a/test/core/elem.wast +++ b/test/core/elem.wast @@ -12,10 +12,10 @@ @@ -195,7 +212,7 @@ index 1ea2b061..8eded377 100644 (assert_return (invoke $module1 "call-9") (i32.const 70)) +;) diff --git a/test/core/thread.wast b/test/core/thread.wast -index c3456a61..83fc2815 100644 +index c3456a6..83fc281 100644 --- a/test/core/thread.wast +++ b/test/core/thread.wast @@ -2,6 +2,7 @@ diff --git a/tests/wamr-test-suites/test_wamr.sh b/tests/wamr-test-suites/test_wamr.sh index 67868b9c9..e263e5ab6 100755 --- a/tests/wamr-test-suites/test_wamr.sh +++ b/tests/wamr-test-suites/test_wamr.sh @@ -430,7 +430,7 @@ function spec_test() # sgx only enable in interp mode and aot mode if [[ ${SGX_OPT} == "--sgx" ]];then - if [[ $1 == 'classic-interp' || $1 == 'fast-interp' || $1 == 'aot' ]]; then + if [[ $1 == 'classic-interp' || $1 == 'fast-interp' || $1 == 'aot' || $1 == 'fast-jit' ]]; then ARGS_FOR_SPEC_TEST+="-x " fi fi @@ -628,7 +628,7 @@ function standalone_test() function build_iwasm_with_cfg() { - echo "Build iwasm with compile flags with " $* " for spec test" \ + echo "Build iwasm with compile flags " $* " for spec test" \ | tee -a ${REPORT_DIR}/spec_test_report.txt if [[ ${SGX_OPT} == "--sgx" ]];then @@ -754,6 +754,23 @@ function trigger() EXTRA_COMPILE_FLAGS+=" -DWAMR_BUILD_LIB_WASI_THREADS=1" fi + echo "SANITIZER IS" $WAMR_BUILD_SANITIZER + + if [[ "$WAMR_BUILD_SANITIZER" == "ubsan" ]]; then + echo "Setting run with ubsan" + EXTRA_COMPILE_FLAGS+=" -DWAMR_BUILD_SANITIZER=ubsan" + fi + + if [[ "$WAMR_BUILD_SANITIZER" == "asan" ]]; then + echo "Setting run with asan" + EXTRA_COMPILE_FLAGS+=" -DWAMR_BUILD_SANITIZER=asan" + fi + + if [[ "$WAMR_BUILD_SANITIZER" == "tsan" ]]; then + echo "Setting run with tsan" + EXTRA_COMPILE_FLAGS+=" -DWAMR_BUILD_SANITIZER=tsan" + fi + for t in "${TYPE[@]}"; do case $t in "classic-interp") diff --git a/wamr-compiler/CMakeLists.txt b/wamr-compiler/CMakeLists.txt index 0ae821af6..08f935bb6 100644 --- a/wamr-compiler/CMakeLists.txt +++ b/wamr-compiler/CMakeLists.txt @@ -21,7 +21,7 @@ else() add_definitions(-DCOMPILING_WASM_RUNTIME_API=1) endif() -set (CMAKE_CXX_STANDARD 14) +set (CMAKE_CXX_STANDARD 17) if (NOT DEFINED WAMR_BUILD_PLATFORM) set (WAMR_BUILD_PLATFORM "linux") diff --git a/wamr-compiler/main.c b/wamr-compiler/main.c index bd8691c4b..ce6ed7006 100644 --- a/wamr-compiler/main.c +++ b/wamr-compiler/main.c @@ -37,6 +37,9 @@ print_help() printf(" by default it is disabled in all 64-bit platforms except SGX and\n"); printf(" in these platforms runtime does bounds checks with hardware trap,\n"); printf(" and by default it is enabled in all 32-bit platforms\n"); + printf(" CAVEAT: --bounds-checks=0 enables some optimizations\n"); + printf(" which make the compiled AOT module incompatible\n"); + printf(" with a runtime without the hardware bounds checks.\n"); printf(" --stack-bounds-checks=1/0 Enable or disable the bounds checks for native stack:\n"); printf(" if the option isn't set, the status is same as `--bounds-check`,\n"); printf(" if the option is set:\n"); @@ -65,6 +68,14 @@ print_help() printf(" --enable-indirect-mode Enalbe call function through symbol table but not direct call\n"); printf(" --disable-llvm-intrinsics Disable the LLVM built-in intrinsics\n"); printf(" --disable-llvm-lto Disable the LLVM link time optimization\n"); + printf(" --enable-llvm-pgo Enable LLVM PGO (Profile-Guided Optimization)\n"); + printf(" --use-prof-file= Use profile file collected by LLVM PGO (Profile-Guided Optimization)\n"); + printf(" --enable-segue[=] Enable using segment register GS as the base address of linear memory,\n"); + printf(" only available on linux/linux-sgx x86-64, which may improve performance,\n"); + printf(" flags can be: i32.load, i64.load, f32.load, f64.load, v128.load,\n"); + printf(" i32.store, i64.store, f32.store, f64.store, v128.store\n"); + printf(" Use comma to separate, e.g. --enable-segue=i32.load,i64.store\n"); + printf(" and --enable-segue means all flags are added.\n"); printf(" --emit-custom-sections=
\n"); printf(" Emit the specified custom sections to AoT file, using comma to separate\n"); printf(" multiple names, e.g.\n"); @@ -84,7 +95,7 @@ print_help() } while (0) /** - * Split a strings into an array of strings + * Split a string into an array of strings * Returns NULL on failure * Memory must be freed by caller * Based on: http://stackoverflow.com/a/11198630/471795 @@ -126,6 +137,57 @@ split_string(char *str, int *count, const char *delimer) return res; } +static uint32 +resolve_segue_flags(char *str_flags) +{ + uint32 segue_flags = 0; + int32 flag_count, i; + char **flag_list; + + flag_list = split_string(str_flags, &flag_count, ","); + if (flag_list) { + for (i = 0; i < flag_count; i++) { + if (!strcmp(flag_list[i], "i32.load")) { + segue_flags |= 1 << 0; + } + else if (!strcmp(flag_list[i], "i64.load")) { + segue_flags |= 1 << 1; + } + else if (!strcmp(flag_list[i], "f32.load")) { + segue_flags |= 1 << 2; + } + else if (!strcmp(flag_list[i], "f64.load")) { + segue_flags |= 1 << 3; + } + else if (!strcmp(flag_list[i], "v128.load")) { + segue_flags |= 1 << 4; + } + else if (!strcmp(flag_list[i], "i32.store")) { + segue_flags |= 1 << 8; + } + else if (!strcmp(flag_list[i], "i64.store")) { + segue_flags |= 1 << 9; + } + else if (!strcmp(flag_list[i], "f32.store")) { + segue_flags |= 1 << 10; + } + else if (!strcmp(flag_list[i], "f64.store")) { + segue_flags |= 1 << 11; + } + else if (!strcmp(flag_list[i], "v128.store")) { + segue_flags |= 1 << 12; + } + else { + /* invalid flag */ + segue_flags = (uint32)-1; + break; + } + } + free(flag_list); + } + return segue_flags; +} + int main(int argc, char *argv[]) { @@ -272,6 +334,23 @@ main(int argc, char *argv[]) else if (!strcmp(argv[0], "--disable-llvm-lto")) { option.disable_llvm_lto = true; } + else if (!strcmp(argv[0], "--enable-llvm-pgo")) { + option.enable_llvm_pgo = true; + } + else if (!strncmp(argv[0], "--use-prof-file=", 16)) { + if (argv[0][16] == '\0') + PRINT_HELP_AND_EXIT(); + option.use_prof_file = argv[0] + 16; + } + else if (!strcmp(argv[0], "--enable-segue")) { + /* all flags are enabled */ + option.segue_flags = 0x1F1F; + } + else if (!strncmp(argv[0], "--enable-segue=", 15)) { + option.segue_flags = resolve_segue_flags(argv[0] + 15); + if (option.segue_flags == (uint32)-1) + PRINT_HELP_AND_EXIT(); + } else if (!strncmp(argv[0], "--emit-custom-sections=", 23)) { int len = 0; if (option.custom_sections) {