diff --git a/.github/workflows/android.yml b/.github/workflows/android.yml deleted file mode 100644 index f55e66eb1..000000000 --- a/.github/workflows/android.yml +++ /dev/null @@ -1,112 +0,0 @@ -# This is a basic workflow to help you get started with Actions - -name: Android - -# Controls when the action will run. -on: - # Triggers the workflow on push or pull request events but only for the master branch - push: - branches: [ main ] - pull_request: - branches: [ main ] - - # Allows you to run this workflow manually from the Actions tab - workflow_dispatch: - -# A workflow run is made up of one or more jobs that can run sequentially or in parallel -jobs: - # This workflow contains a single job called "build" - build: - # The type of runner that the job will run on - runs-on: ubuntu-latest - - # Steps represent a sequence of tasks that will be executed as part of the job - steps: - - # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it - - uses: actions/checkout@v2 - - - name: Build iwasm [default] - run: | - cd product-mini/platforms/android - mkdir build && cd build - cmake .. - make -j $(nproc) - cd .. && rm -rf build - - name: Build iwasm [classic interp] - run: | - cd product-mini/platforms/android - mkdir build && cd build - cmake .. -DWAMR_BUILD_FAST_INTERP=0 - make -j $(nproc) - cd .. && rm -rf build - - name: Build iwasm [multi module] - run: | - cd product-mini/platforms/android - mkdir build && cd build - cmake .. -DWAMR_BUILD_MULTI_MODULE=1 - make -j $(nproc) - cd .. && rm -rf build - - name: Build iwasm [lib-pthread] - run: | - cd product-mini/platforms/android - mkdir build && cd build - cmake .. -DWAMR_BUILD_LIB_PTHREAD=1 - make -j $(nproc) - cd .. && rm -rf build - - name: Build iwasm [aot only] - run: | - cd product-mini/platforms/android - mkdir build && cd build - cmake .. -DWAMR_BUILD_AOT=1 -DWAMR_BUILD_INTERP=0 - make -j $(nproc) - cd .. && rm -rf build - - name: Build iwasm [interp only] - run: | - cd product-mini/platforms/android - mkdir build && cd build - cmake .. -DWAMR_BUILD_AOT=0 - make -j $(nproc) - cd .. && rm -rf build - - name: Build iwasm [memory profiling] - run: | - cd product-mini/platforms/android - mkdir build && cd build - cmake .. -DWAMR_BUILD_MEMORY_PROFILING=1 - make -j $(nproc) - cd .. && rm -rf build - - name: Build iwasm [tail call] - run: | - cd product-mini/platforms/android - mkdir build && cd build - cmake .. -DWAMR_BUILD_TAIL_CALL=1 - make -j $(nproc) - cd .. && rm -rf build - - name: Build iwasm [custom name section] - run: | - cd product-mini/platforms/android - mkdir build && cd build - cmake .. -DWAMR_BUILD_CUSTOM_NAME_SECTION=1 - make -j $(nproc) - cd .. && rm -rf build - - name: Build iwasm [disable hardware boundary check] - run: | - cd product-mini/platforms/android - mkdir build && cd build - cmake .. -DWAMR_DISABLE_HW_BOUND_CHECK=1 - make -j $(nproc) - cd .. && rm -rf build - - name: Build iwasm [reference types] - run: | - cd product-mini/platforms/android - mkdir build && cd build - cmake .. -DWAMR_BUILD_REF_TYPES=1 - make -j $(nproc) - cd .. && rm -rf build - - name: Build iwasm [128-bit SIMD] - run: | - cd product-mini/platforms/android - mkdir build && cd build - cmake .. -DWAMR_BUILD_SIMD=1 - make -j $(nproc) - cd .. && rm -rf build diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index c73b3a867..ed701d1d9 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -16,119 +16,154 @@ on: - 'doc/**' jobs: - build: + build_llvm_libraries: runs-on: ${{ matrix.os }} strategy: matrix: os: [ubuntu-18.04, ubuntu-20.04] steps: - - uses: actions/checkout@v2 - - - name: Build iwasm [default] - run: | - cd product-mini/platforms/linux - mkdir build && cd build - cmake .. - make -j $(nproc) - cd .. && rm -rf build - - name: Build iwasm [classic interp] - run: | - cd product-mini/platforms/linux - mkdir build && cd build - cmake .. -DWAMR_BUILD_FAST_INTERP=0 - make -j $(nproc) - cd .. && rm -rf build - - name: Build iwasm [multi module] - run: | - cd product-mini/platforms/linux - mkdir build && cd build - cmake .. -DWAMR_BUILD_MULTI_MODULE=1 - make -j $(nproc) - cd .. && rm -rf build - - name: Build iwasm [lib-pthread] - run: | - cd product-mini/platforms/linux - mkdir build && cd build - cmake .. -DWAMR_BUILD_LIB_PTHREAD=1 - make -j $(nproc) - cd .. && rm -rf build - - name: Build iwasm [aot only] - run: | - cd product-mini/platforms/linux - mkdir build && cd build - cmake .. -DWAMR_BUILD_AOT=1 -DWAMR_BUILD_INTERP=0 - make -j $(nproc) - cd .. && rm -rf build - - name: Build iwasm [interp only] - run: | - cd product-mini/platforms/linux - mkdir build && cd build - cmake .. -DWAMR_BUILD_AOT=0 - make -j $(nproc) - cd .. && rm -rf build - - name: Build iwasm [memory profiling] - run: | - cd product-mini/platforms/linux - mkdir build && cd build - cmake .. -DWAMR_BUILD_MEMORY_PROFILING=1 - make -j $(nproc) - cd .. && rm -rf build - - name: Build iwasm [tail call] - run: | - cd product-mini/platforms/linux - mkdir build && cd build - cmake .. -DWAMR_BUILD_TAIL_CALL=1 - make -j $(nproc) - cd .. && rm -rf build - - name: Build iwasm [custom name section] - run: | - cd product-mini/platforms/linux - mkdir build && cd build - cmake .. -DWAMR_BUILD_CUSTOM_NAME_SECTION=1 - make -j $(nproc) - cd .. && rm -rf build - - name: Build iwasm [disable hardware boundary check] - run: | - cd product-mini/platforms/linux - mkdir build && cd build - cmake .. -DWAMR_DISABLE_HW_BOUND_CHECK=1 - make -j $(nproc) - cd .. && rm -rf build - - name: Build iwasm [reference types] - run: | - cd product-mini/platforms/linux - mkdir build && cd build - cmake .. -DWAMR_BUILD_REF_TYPES=1 - make -j $(nproc) - cd .. && rm -rf build - - name: Build iwasm [128-bit SIMD] - run: | - cd product-mini/platforms/linux - mkdir build && cd build - cmake .. -DWAMR_BUILD_SIMD=1 - make -j $(nproc) - cd .. && rm -rf build + - name: checkout + uses: actions/checkout@v2 - name: Cache LLVM libraries - uses: actions/cache@v2 id: cache_llvm + uses: actions/cache@v2 env: cache-name: llvm_libraries with: - path: ./core/deps/llvm - key: ${{ runner.os }}-build-${{env.cache-name}} - restore-keys: ${{ runner.os }}-build-${{env.cache-name}} + path: ./core/deps/llvm/build/LLVM-13.0.0-Linux.tar.gz + key: ${{ matrix.os }}-build-${{env.cache-name}} + restore-keys: ${{ matrix.os }}-build-${{env.cache-name}} - name: Build llvm and clang from source + id: build_llvm if: steps.cache_llvm.outputs.cache-hit != 'true' run: | cd wamr-compiler ./build_llvm.sh + cd ../core/deps/llvm/build/ + make package + + build_wamrc: + needs: build_llvm_libraries + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-18.04, ubuntu-20.04] + steps: + - name: checkout + uses: actions/checkout@v2 + - name: Get LLVM libraries + id: cache_llvm + uses: actions/cache@v2 + env: + cache-name: llvm_libraries + with: + path: ./core/deps/llvm/build/LLVM-13.0.0-Linux.tar.gz + key: ${{ matrix.os }}-build-${{env.cache-name}} + restore-keys: ${{ matrix.os }}-build-${{env.cache-name}} + - name: Quit if cache miss + if: steps.cache_llvm.outputs.cache-hit != 'true' + run: exit 1 + - name: Extract the LLVM package + run: tar xf LLVM-13.0.0-Linux.tar.gz --strip-components=1 + working-directory: ./core/deps/llvm/build - name: Build wamrc run: | - cd wamr-compiler mkdir build && cd build - cmake .. - make -j $(nproc) - cd .. + cmake .. && make -j $(nproc) + working-directory: wamr-compiler + - name: Upload Wamrc + uses: actions/upload-artifact@v2 + with: + name: wamrc_bin-${{ matrix.os }} + path: ./wamr-compiler/build/wamrc + retention-days: 1 + + build_iwasm: + needs: build_llvm_libraries + runs-on: ${{ matrix.os }} + strategy: + matrix: + make_options: [ + # Running mode + "-DWAMR_BUILD_INERP=1 -DWAMR_BUILD_JIT=0 -DWAMR_BUILD_AOT=0 -DWAMR_BUILD_FAST_INTERP=1", + "-DWAMR_BUILD_INERP=1 -DWAMR_BUILD_JIT=0 -DWAMR_BUILD_AOT=0 -DWAMR_BUILD_FAST_INTERP=0", + "-DWAMR_BUILD_INERP=0 -DWAMR_BUILD_JIT=1 -DWAMR_BUILD_AOT=0", + "-DWAMR_BUILD_INERP=0 -DWAMR_BUILD_JIT=1 -DWAMR_BUILD_AOT=1", + # Features + "-DWAMR_BUILD_CUSTOM_NAME_SECTION=1", + "-DWAMR_BUILD_LIB_PTHREAD=1", + "-DWAMR_BUILD_MEMORY_PROFILING=1", + "-DWAMR_BUILD_MULTI_MODULE=1", + "-DWAMR_BUILD_REF_TYPES=1", + "-DWAMR_BUILD_SIMD=1", + "-DWAMR_BUILD_TAIL_CALL=1", + "-DWAMR_DISABLE_HW_BOUND_CHECK=1", + ] + os: [ubuntu-18.04, ubuntu-20.04] + platform: [linux, android] + steps: + - name: checkout + uses: actions/checkout@v2 + - name: Get LLVM libraries + id: cache_llvm + uses: actions/cache@v2 + env: + cache-name: llvm_libraries + with: + path: ./core/deps/llvm/build/LLVM-13.0.0-Linux.tar.gz + key: ${{ matrix.os }}-build-${{env.cache-name}} + restore-keys: ${{ matrix.os }}-build-${{env.cache-name}} + - name: Quit if cache miss + if: steps.cache_llvm.outputs.cache-hit != 'true' + run: exit 1 + - name: Extract the LLVM package + run: tar xf LLVM-13.0.0-Linux.tar.gz --strip-components=1 + working-directory: ./core/deps/llvm/build + - name: Build iwasm + run: | + mkdir build && cd build + cmake .. ${{ matrix.make_options }} && make -j $(nproc) + cd .. && rm -rf build + working-directory: product-mini/platforms/${{ matrix.platform }} + + build_samples: + needs: [build_llvm_libraries, build_wamrc] + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-18.04, ubuntu-20.04] + make_options: [ + # Running mode + "-DWAMR_BUILD_INERP=1 -DWAMR_BUILD_JIT=0 -DWAMR_BUILD_AOT=0 -DWAMR_BUILD_FAST_INTERP=1", + "-DWAMR_BUILD_INERP=1 -DWAMR_BUILD_JIT=0 -DWAMR_BUILD_AOT=0 -DWAMR_BUILD_FAST_INTERP=0", + "-DWAMR_BUILD_INERP=0 -DWAMR_BUILD_JIT=1 -DWAMR_BUILD_AOT=0", + "-DWAMR_BUILD_INERP=0 -DWAMR_BUILD_JIT=1 -DWAMR_BUILD_AOT=1", + ] + steps: + - name: checkout + uses: actions/checkout@v2 + - name: Get LLVM libraries + id: cache_llvm + uses: actions/cache@v2 + env: + cache-name: llvm_libraries + with: + path: ./core/deps/llvm/build/LLVM-13.0.0-Linux.tar.gz + key: ${{ matrix.os }}-build-${{env.cache-name}} + restore-keys: ${{ matrix.os }}-build-${{env.cache-name}} + - name: Quit if cache miss + if: steps.cache_llvm.outputs.cache-hit != 'true' + run: exit 1 + - name: Extract the LLVM package + run: tar xf LLVM-13.0.0-Linux.tar.gz --strip-components=1 + working-directory: ./core/deps/llvm/build + - name: Download Wamrc + uses: actions/download-artifact@v2 + with: + name: wamrc_bin-${{ matrix.os }} + path: ./wamr-compiler/build + - name: Give execution rights + run: chmod a+x ./wamr-compiler/build/wamrc - name: download and install wasi-sdk run: | cd /opt @@ -147,39 +182,7 @@ jobs: run: | cd samples/wasm-c-api mkdir build && cd build - cmake .. - make -j $(nproc) - ./callback - ./callback_chain - ./global - ./hello - ./hostref - ./memory - ./reflect - ./table - ./trap - cd .. && rm -r build - - name: Build Sample [wasm-c-api] [Jit] - run: | - cd samples/wasm-c-api - mkdir build && cd build - cmake -DWAMR_BUILD_INTERP=0 -DWAMR_BUILD_JIT=1 -DWAMR_BUILD_AOT=1 .. - make -j $(nproc) - ./callback - ./callback_chain - ./global - ./hello - ./hostref - ./memory - ./reflect - ./table - ./trap - cd .. && rm -r build - - name: Build Sample [wasm-c-api] [Aot] - run: | - cd samples/wasm-c-api - mkdir build && cd build - cmake -DWAMR_BUILD_INTERP=0 -DWAMR_BUILD_JIT=0 -DWAMR_BUILD_AOT=1 .. + cmake .. ${{ matrix.make_options }} make -j $(nproc) ./callback ./callback_chain diff --git a/README.md b/README.md index 61ff6b3f5..a85e99fd6 100644 --- a/README.md +++ b/README.md @@ -77,8 +77,6 @@ For **Windows**: ```shell cd wamr-compiler python build_llvm.py -open LLVM.sln in wasm-micro-runtime\core\deps\llvm\win32build with Visual Studio -build LLVM.sln Release mkdir build && cd build cmake .. cmake --build . --config Release diff --git a/build-scripts/build_llvm.py b/build-scripts/build_llvm.py new file mode 100755 index 000000000..1a06e04a4 --- /dev/null +++ b/build-scripts/build_llvm.py @@ -0,0 +1,177 @@ +#!/usr/bin/env python3 +# +# Copyright (C) 2019 Intel Corporation. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# + +import argparse +import os +import pathlib +import shlex +import shutil +import subprocess +import sys + + +def clone_llvm(dst_dir, llvm_repo, llvm_branch): + """ + any error will raise CallProcessError + """ + llvm_dir = dst_dir.joinpath("llvm").resolve() + + if not llvm_dir.exists(): + print(f"Clone llvm to {llvm_dir} ...") + GIT_CLONE_CMD = f"git clone --depth 1 --branch {llvm_branch} {llvm_repo} llvm" + subprocess.check_output(shlex.split(GIT_CLONE_CMD), cwd=dst_dir) + else: + print(f"There is an LLVM local repo in {llvm_dir}, keep using it") + + return llvm_dir + + +def build_llvm(llvm_dir, platform, backends): + LLVM_COMPILE_OPTIONS = [ + '-DCMAKE_BUILD_TYPE:STRING="Release"', + "-DCMAKE_EXPORT_COMPILE_COMMANDS=ON", + "-DLLVM_APPEND_VC_REV:BOOL=ON", + "-DLLVM_BUILD_BENCHMARKS:BOOL=OFF", + "-DLLVM_BUILD_DOCS:BOOL=OFF", + "-DLLVM_BUILD_EXAMPLES:BOOL=OFF", + "-DLLVM_BUILD_LLVM_DYLIB:BOOL=OFF", + "-DLLVM_BUILD_TESTS:BOOL=OFF", + "-DLLVM_CCACHE_BUILD:BOOL=OFF", + "-DLLVM_ENABLE_BINDINGS:BOOL=OFF", + "-DLLVM_ENABLE_IDE:BOOL=OFF", + "-DLLVM_ENABLE_LIBXML2:BOOL=OFF", + "-DLLVM_ENABLE_TERMINFO:BOOL=OFF", + "-DLLVM_ENABLE_ZLIB:BOOL=OFF", + "-DLLVM_INCLUDE_BENCHMARKS:BOOL=OFF", + "-DLLVM_INCLUDE_DOCS:BOOL=OFF", + "-DLLVM_INCLUDE_EXAMPLES:BOOL=OFF", + "-DLLVM_INCLUDE_UTILS:BOOL=OFF", + "-DLLVM_INCLUDE_TESTS:BOOL=OFF", + "-DLLVM_INCLUDE_TOOLS:BOOL=OFF", + "-DLLVM_OPTIMIZED_TABLEGEN:BOOL=ON", + ] + + LLVM_EXTRA_COMPILER_OPTIONS = { + "arc": [ + '-DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD:STRING="ARC"', + "-DLLVM_ENABLE_LIBICUUC:BOOL=OFF", + "-DLLVM_ENABLE_LIBICUDATA:BOOL=OFF", + ], + "xtensa": [ + '-DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD:STRING="Xtensa"', + ], + "windows": [ + "-DCMAKE_INSTALL_PREFIX=LLVM-install", + ], + "default": [], + } + + LLVM_TARGETS_TO_BUILD = [ + "-DLLVM_TARGETS_TO_BUILD:STRING=" + ";".join(backends) + if backends + else '-DLLVM_TARGETS_TO_BUILD:STRING="AArch64;ARM;Mips;RISCV;X86"' + ] + + if not llvm_dir.exists(): + raise Exception(f"{llvm_dir} doesn't exist") + + build_dir = llvm_dir.joinpath( + "win32build" if "windows" == platform else "build" + ).resolve() + build_dir.mkdir(exist_ok=True) + + lib_llvm_core_library = build_dir.joinpath("lib/libLLVMCore.a").resolve() + if lib_llvm_core_library.exists(): + print(f"Please remove {build_dir} manually and try again") + return + + compile_options = " ".join( + LLVM_COMPILE_OPTIONS + + LLVM_EXTRA_COMPILER_OPTIONS.get( + platform, LLVM_EXTRA_COMPILER_OPTIONS["default"] + ) + + LLVM_TARGETS_TO_BUILD + ) + + CONFIG_CMD = f"cmake {compile_options} ../llvm " + subprocess.check_call(shlex.split(CONFIG_CMD), cwd=build_dir) + + BUILD_CMD = f"cmake --build . --parallel {os.cpu_count()}" + ( + " --config Release" if "windows" == platform else "" + ) + + subprocess.check_call(shlex.split(BUILD_CMD), cwd=build_dir) + + return build_dir + + +def main(): + parser = argparse.ArgumentParser(description="build necessary LLVM libraries") + parser.add_argument( + "--platform", + type=str, + choices=["android", "arc", "darwin", "linux", "windows", "xtensa"], + help="identify current platform", + ) + parser.add_argument( + "--arch", + nargs="+", + type=str, + choices=["AArch64", "ARC", "ARM", "Mips", "RISCV", "X86", "Xtensa"], + help="identify LLVM supported backends, separate by space, like '--arch ARM Mips X86'", + ) + options = parser.parse_args() + + # if the "platform" is not identified in the command line option, + # detect it + if not options.platform: + if sys.platform.startswith("win32") or sys.platform.startswith("msys"): + platform = "windows" + elif sys.platform.startswith("darwin"): + platform = "darwin" + else: + platform = "linux" + else: + platform = options.platform + + print(f"========== Build LLVM for {platform} ==========\n") + + llvm_repo_and_branch = { + "arc": { + "repo": "https://github.com/llvm/llvm-project.git", + "branch": "release/13.x" + }, + "xtensa": { + "repo": "https://github.com/espressif/llvm-project.git", + "branch": "xtensa_release_11.0.0", + }, + "default": { + "repo": "https://github.com/llvm/llvm-project.git", + "branch": "release/13.x", + }, + } + + # retrieve the real file + current_file = pathlib.Path(__file__) + if current_file.is_symlink(): + current_file = pathlib.Path(os.readlink(current_file)) + + current_dir = current_file.parent.resolve() + deps_dir = current_dir.joinpath("../core/deps").resolve() + + print(f"==================== CLONE LLVM ====================") + llvm_info = llvm_repo_and_branch.get(platform, llvm_repo_and_branch["default"]) + llvm_dir = clone_llvm(deps_dir, llvm_info["repo"], llvm_info["branch"]) + + print() + print(f"==================== BUILD LLVM ====================") + build_llvm(llvm_dir, platform, options.arch) + + print() + + +if __name__ == "__main__": + main() diff --git a/ci/Dockerfile b/ci/Dockerfile index 7c3bed58a..1a176d352 100644 --- a/ci/Dockerfile +++ b/ci/Dockerfile @@ -1,18 +1,83 @@ # Copyright (C) 2019 Intel Corporation. All rights reserved. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -FROM ubuntu:18.04 +ARG VARIANT=need_to_assign +FROM ubuntu:${VARIANT} + +ARG DEBIAN_FRONTEND=noninteractive +ENV TZ=Asian/Shanghai RUN apt update \ && apt install -y apt-transport-https ca-certificates gnupg \ - software-properties-common wget lsb-release curl build-essential + tzdata lsb-release software-properties-common build-essential \ + apt-utils curl wget git tree unzip zip vim # # CMAKE (https://apt.kitware.com/) -RUN wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | tee /etc/apt/trusted.gpg.d/kitware.gpg >/dev/null \ - && apt purge --auto-remove cmake \ - && apt-add-repository 'deb https://apt.kitware.com/ubuntu/ bionic main' \ +RUN wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | tee /usr/share/keyrings/kitware-archive-keyring.gpg > /dev/null \ + && echo 'deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ bionic main' | tee /etc/apt/sources.list.d/kitware.list >/dev/null \ && apt update \ - && apt-get install -y kitware-archive-keyring \ - && rm /etc/apt/trusted.gpg.d/kitware.gpg \ - && apt-get install -y cmake + && rm /usr/share/keyrings/kitware-archive-keyring.gpg \ + && apt install -y kitware-archive-keyring \ + && apt install -y cmake + +# +# install emsdk (may not necessary ?) +RUN cd /opt \ + && git clone https://github.com/emscripten-core/emsdk.git +RUN cd /opt/emsdk \ + && git pull \ + && ./emsdk install 2.0.26 \ + && ./emsdk activate 2.0.26 \ + && echo "source /opt/emsdk/emsdk_env.sh" >> /root/.bashrc + +# +#install wabt +ARG WABT_VER=1.0.23 +RUN wget -c https://github.com/WebAssembly/wabt/releases/download/${WABT_VER}/wabt-${WABT_VER}-ubuntu.tar.gz -P /opt +RUN tar xf /opt/wabt-${WABT_VER}-ubuntu.tar.gz -C /opt \ + && ln -fs /opt/wabt-${WABT_VER} /opt/wabt +RUN rm /opt/wabt-${WABT_VER}-ubuntu.tar.gz + +# +# install binaryen +ARG BINARYEN_VER=version_101 +RUN wget -c https://github.com/WebAssembly/binaryen/releases/download/${BINARYEN_VER}/binaryen-${BINARYEN_VER}-x86_64-linux.tar.gz -P /opt +RUN tar xf /opt/binaryen-${BINARYEN_VER}-x86_64-linux.tar.gz -C /opt \ + && ln -fs /opt/binaryen-${BINARYEN_VER} /opt/binaryen +RUN rm /opt/binaryen-${BINARYEN_VER}-x86_64-linux.tar.gz + + +# +# install bazelisk +ARG BAZELISK_VER=1.10.1 +RUN mkdir /opt/bazelisk +RUN wget -c https://github.com/bazelbuild/bazelisk/releases/download/v${BAZELISK_VER}/bazelisk-linux-amd64 -P /opt/bazelisk +RUN chmod a+x /opt/bazelisk/bazelisk-linux-amd64 \ + && ln -fs /opt/bazelisk/bazelisk-linux-amd64 /opt/bazelisk/bazel + +# set path +RUN echo "PATH=/opt/clang_llvm/bin:/opt/wasi-sdk/bin:/opt/wabt/bin:/opt/binaryen/bin:/opt/bazelisk:${PATH}" >> /root/.bashrc + +# +# install +RUN apt update \ + && apt install -y ninja-build python2.7 valgrind + +# +# ocaml +RUN apt update \ + && apt install -y ocaml ocamlbuild + +# +# PS +RUN echo "PS1='\n[ \u@wamr-dev-docker \W ]\n$ '" >> /root/.bashrc + +# Clean up +RUN apt-get autoremove -y \ + && apt-get clean -y \ + && rm -rf /var/lib/apt/lists/* \ + && rm -rf /tmp/* + +VOLUME workspace +WORKDIR workspace diff --git a/ci/build_wamr.sh b/ci/build_wamr.sh index 9d02aba84..9b944662e 100755 --- a/ci/build_wamr.sh +++ b/ci/build_wamr.sh @@ -1,24 +1,33 @@ -#!/bin/bash +#!/usr/bin/env bash # Copyright (C) 2019 Intel Corporation. All rights reserved. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -docker build -t wamr_dev:0.1 -f Dockerfile . \ - && docker run --rm -it \ - --name wamr_building \ - --mount type=bind,src=$(realpath .)/..,dst=/source \ - --workdir /source \ - wamr_dev:0.1 \ - /bin/bash -c "\ - pushd product-mini/platforms/linux \ - && mkdir -p build \ - && pushd build \ - && rm -rf * \ - && cmake .. \ - && make \ - && popd \ - && popd \ - && echo 'Copying binary for image build' \ - && mkdir -p build_out \ - && rm build_out/* \ - && cp -f product-mini/platforms/linux/build/iwasm build_out/iwasm" +readonly CURRENT_PATH=$(dirname "$(realpath "$0")") +readonly ROOT=$(realpath "${CURRENT_PATH}/..") +readonly VARIANT=$(lsb_release -c | awk '{print $2}') + +docker build \ + --build-arg VARIANT=${VARIANT} \ + --memory=4G --cpu-quota=50000 \ + -t wamr_dev_${VARIANT}:0.1 -f "${CURRENT_PATH}"/Dockerfile "${CURRENT_PATH}" && + docker run --rm -it \ + --cpus=".5" \ + --memory=4G \ + --name wamr_build_env \ + --mount type=bind,src="${ROOT}",dst=/workspace \ + wamr_dev_${VARIANT}:0.1 \ + /bin/bash -c "\ + pwd \ + && pushd product-mini/platforms/linux \ + && rm -rf build \ + && mkdir build \ + && pushd build \ + && cmake .. \ + && make \ + && popd \ + && popd \ + && echo 'Copying the binary ...' \ + && rm -rf build_out \ + && mkdir build_out \ + && cp product-mini/platforms/linux/build/iwasm build_out/iwasm" diff --git a/core/iwasm/compilation/aot_compiler.c b/core/iwasm/compilation/aot_compiler.c index 097d4f901..695752ee6 100644 --- a/core/iwasm/compilation/aot_compiler.c +++ b/core/iwasm/compilation/aot_compiler.c @@ -21,6 +21,7 @@ #include "simd/simd_bitwise_ops.h" #include "simd/simd_bool_reductions.h" #include "simd/simd_comparisons.h" +#include "simd/simd_conversions.h" #include "simd/simd_construct_values.h" #include "simd/simd_conversions.h" #include "simd/simd_floating_point.h" @@ -603,7 +604,7 @@ aot_compile_func(AOTCompContext *comp_ctx, uint32 func_index) break; case WASM_OP_F32_CONST: - p_f32 = (uint8*)&f32_const; + p_f32 = (uint8 *)&f32_const; for (i = 0; i < sizeof(float32); i++) *p_f32++ = *frame_ip++; if (!aot_compile_op_f32_const(comp_ctx, func_ctx, f32_const)) @@ -611,7 +612,7 @@ aot_compile_func(AOTCompContext *comp_ctx, uint32 func_index) break; case WASM_OP_F64_CONST: - p_f64 = (uint8*)&f64_const; + p_f64 = (uint8 *)&f64_const; for (i = 0; i < sizeof(float64); i++) *p_f64++ = *frame_ip++; if (!aot_compile_op_f64_const(comp_ctx, func_ctx, f64_const)) @@ -989,7 +990,7 @@ aot_compile_func(AOTCompContext *comp_ctx, uint32 func_index) { uint32 seg_index; read_leb_uint32(frame_ip, frame_ip_end, seg_index); - frame_ip ++; + frame_ip++; if (!aot_compile_op_memory_init(comp_ctx, func_ctx, seg_index)) return false; break; @@ -1011,7 +1012,7 @@ aot_compile_func(AOTCompContext *comp_ctx, uint32 func_index) } case WASM_OP_MEMORY_FILL: { - frame_ip ++; + frame_ip++; if (!aot_compile_op_memory_fill(comp_ctx, func_ctx)) return false; break; @@ -1121,9 +1122,9 @@ aot_compile_func(AOTCompContext *comp_ctx, uint32 func_index) goto op_atomic_i32_load; case WASM_OP_ATOMIC_I32_LOAD16_U: bytes = 2; - op_atomic_i32_load: - if (!aot_compile_op_i32_load(comp_ctx, func_ctx, align, - offset, bytes, sign, true)) +op_atomic_i32_load: + if (!aot_compile_op_i32_load(comp_ctx, func_ctx, align, offset, + bytes, sign, true)) return false; break; @@ -1138,9 +1139,9 @@ aot_compile_func(AOTCompContext *comp_ctx, uint32 func_index) goto op_atomic_i64_load; case WASM_OP_ATOMIC_I64_LOAD32_U: bytes = 4; - op_atomic_i64_load: - if (!aot_compile_op_i64_load(comp_ctx, func_ctx, align, - offset, bytes, sign, true)) +op_atomic_i64_load: + if (!aot_compile_op_i64_load(comp_ctx, func_ctx, align, offset, + bytes, sign, true)) return false; break; @@ -1152,9 +1153,9 @@ aot_compile_func(AOTCompContext *comp_ctx, uint32 func_index) goto op_atomic_i32_store; case WASM_OP_ATOMIC_I32_STORE16: bytes = 2; - op_atomic_i32_store: - if (!aot_compile_op_i32_store(comp_ctx, func_ctx, align, - offset, bytes, true)) +op_atomic_i32_store: + if (!aot_compile_op_i32_store(comp_ctx, func_ctx, align, offset, + bytes, true)) return false; break; @@ -1169,9 +1170,9 @@ aot_compile_func(AOTCompContext *comp_ctx, uint32 func_index) goto op_atomic_i64_store; case WASM_OP_ATOMIC_I64_STORE32: bytes = 4; - op_atomic_i64_store: - if (!aot_compile_op_i64_store(comp_ctx, func_ctx, align, - offset, bytes, true)) +op_atomic_i64_store: + if (!aot_compile_op_i64_store(comp_ctx, func_ctx, align, offset, + bytes, true)) return false; break; @@ -1202,10 +1203,9 @@ aot_compile_func(AOTCompContext *comp_ctx, uint32 func_index) case WASM_OP_ATOMIC_RMW_I64_CMPXCHG32_U: bytes = 4; op_type = VALUE_TYPE_I64; - op_atomic_cmpxchg: - if (!aot_compile_op_atomic_cmpxchg(comp_ctx, func_ctx, - op_type, align, - offset, bytes)) +op_atomic_cmpxchg: + if (!aot_compile_op_atomic_cmpxchg(comp_ctx, func_ctx, op_type, + align, offset, bytes)) return false; break; @@ -1217,8 +1217,7 @@ aot_compile_func(AOTCompContext *comp_ctx, uint32 func_index) COMPILE_ATOMIC_RMW(Xchg, XCHG); build_atomic_rmw: - if (!aot_compile_op_atomic_rmw(comp_ctx, func_ctx, - bin_op, op_type, + if (!aot_compile_op_atomic_rmw(comp_ctx, func_ctx, bin_op, op_type, align, offset, bytes)) return false; break; @@ -1239,7 +1238,9 @@ build_atomic_rmw: } opcode = *frame_ip++; + /* follow the order of enum WASMSimdEXTOpcode in wasm_opcode.h */ switch (opcode) { + /* Memory instruction */ case SIMD_v128_load: { read_leb_uint32(frame_ip, frame_ip_end, align); @@ -1249,30 +1250,30 @@ build_atomic_rmw: break; } - case SIMD_i16x8_load8x8_s: - case SIMD_i16x8_load8x8_u: - case SIMD_i32x4_load16x4_s: - case SIMD_i32x4_load16x4_u: - case SIMD_i64x2_load32x2_s: - case SIMD_i64x2_load32x2_u: + case SIMD_v128_load8x8_s: + case SIMD_v128_load8x8_u: + case SIMD_v128_load16x4_s: + case SIMD_v128_load16x4_u: + case SIMD_v128_load32x2_s: + case SIMD_v128_load32x2_u: { read_leb_uint32(frame_ip, frame_ip_end, align); read_leb_uint32(frame_ip, frame_ip_end, offset); - if (!aot_compile_simd_load_extend(comp_ctx, func_ctx, - opcode, align, offset)) + if (!aot_compile_simd_load_extend(comp_ctx, func_ctx, opcode, + align, offset)) return false; break; } - case SIMD_v8x16_load_splat: - case SIMD_v16x8_load_splat: - case SIMD_v32x4_load_splat: - case SIMD_v64x2_load_splat: + case SIMD_v128_load8_splat: + case SIMD_v128_load16_splat: + case SIMD_v128_load32_splat: + case SIMD_v128_load64_splat: { read_leb_uint32(frame_ip, frame_ip_end, align); read_leb_uint32(frame_ip, frame_ip_end, offset); - if (!aot_compile_simd_load_splat(comp_ctx, func_ctx, - opcode, align, offset)) + if (!aot_compile_simd_load_splat(comp_ctx, func_ctx, opcode, align, + offset)) return false; break; } @@ -1281,11 +1282,13 @@ build_atomic_rmw: { read_leb_uint32(frame_ip, frame_ip_end, align); read_leb_uint32(frame_ip, frame_ip_end, offset); - if (!aot_compile_simd_v128_store(comp_ctx, func_ctx, align, offset)) + if (!aot_compile_simd_v128_store(comp_ctx, func_ctx, align, + offset)) return false; break; } + /* Basic operation */ case SIMD_v128_const: { if (!aot_compile_simd_v128_const(comp_ctx, func_ctx, frame_ip)) @@ -1309,6 +1312,7 @@ build_atomic_rmw: break; } + /* Splat operation */ case SIMD_i8x16_splat: case SIMD_i16x8_splat: case SIMD_i32x4_splat: @@ -1321,96 +1325,108 @@ build_atomic_rmw: break; } + /* Lane operation */ case SIMD_i8x16_extract_lane_s: - { - if (!aot_compile_simd_extract_i8x16(comp_ctx, func_ctx, *frame_ip++, - true)) - return false; - break; - } case SIMD_i8x16_extract_lane_u: { - if (!aot_compile_simd_extract_i8x16(comp_ctx, func_ctx, *frame_ip++, - false)) - return false; - break; - } - case SIMD_i16x8_extract_lane_s: - { - if (!aot_compile_simd_extract_i16x8(comp_ctx, func_ctx, *frame_ip++, - true)) - return false; - break; - } - case SIMD_i16x8_extract_lane_u: - { - if (!aot_compile_simd_extract_i16x8(comp_ctx, func_ctx, *frame_ip++, - false)) - return false; - break; - } - case SIMD_i32x4_extract_lane: - { - if (!aot_compile_simd_extract_i32x4(comp_ctx, func_ctx, *frame_ip++)) - return false; - break; - } - case SIMD_i64x2_extract_lane: - { - if (!aot_compile_simd_extract_i64x2(comp_ctx, func_ctx, *frame_ip++)) - return false; - break; - } - case SIMD_f32x4_extract_lane: - { - if (!aot_compile_simd_extract_f32x4(comp_ctx, func_ctx, *frame_ip++)) - return false; - break; - } - case SIMD_f64x2_extract_lane: - { - if (!aot_compile_simd_extract_f64x2(comp_ctx, func_ctx, *frame_ip++)) + if (!aot_compile_simd_extract_i8x16( + comp_ctx, func_ctx, *frame_ip++, + SIMD_i8x16_extract_lane_s == opcode)) return false; break; } case SIMD_i8x16_replace_lane: { - if (!aot_compile_simd_replace_i8x16(comp_ctx, func_ctx, *frame_ip++)) - return false; - break; - } - case SIMD_i16x8_replace_lane: - { - if (!aot_compile_simd_replace_i16x8(comp_ctx, func_ctx, *frame_ip++)) - return false; - break; - } - case SIMD_i32x4_replace_lane: - { - if (!aot_compile_simd_replace_i32x4(comp_ctx, func_ctx, *frame_ip++)) - return false; - break; - } - case SIMD_i64x2_replace_lane: - { - if (!aot_compile_simd_replace_i64x2(comp_ctx, func_ctx, *frame_ip++)) - return false; - break; - } - case SIMD_f32x4_replace_lane: - { - if (!aot_compile_simd_replace_f32x4(comp_ctx, func_ctx, *frame_ip++)) - return false; - break; - } - case SIMD_f64x2_replace_lane: - { - if (!aot_compile_simd_replace_f64x2(comp_ctx, func_ctx, *frame_ip++)) + if (!aot_compile_simd_replace_i8x16(comp_ctx, func_ctx, + *frame_ip++)) return false; break; } + case SIMD_i16x8_extract_lane_s: + case SIMD_i16x8_extract_lane_u: + { + if (!aot_compile_simd_extract_i16x8( + comp_ctx, func_ctx, *frame_ip++, + SIMD_i16x8_extract_lane_s == opcode)) + return false; + break; + } + + case SIMD_i16x8_replace_lane: + { + if (!aot_compile_simd_replace_i16x8(comp_ctx, func_ctx, + *frame_ip++)) + return false; + break; + } + + case SIMD_i32x4_extract_lane: + { + if (!aot_compile_simd_extract_i32x4(comp_ctx, func_ctx, + *frame_ip++)) + return false; + break; + } + + case SIMD_i32x4_replace_lane: + { + if (!aot_compile_simd_replace_i32x4(comp_ctx, func_ctx, + *frame_ip++)) + return false; + break; + } + + case SIMD_i64x2_extract_lane: + { + if (!aot_compile_simd_extract_i64x2(comp_ctx, func_ctx, + *frame_ip++)) + return false; + break; + } + + case SIMD_i64x2_replace_lane: + { + if (!aot_compile_simd_replace_i64x2(comp_ctx, func_ctx, + *frame_ip++)) + return false; + break; + } + + case SIMD_f32x4_extract_lane: + { + if (!aot_compile_simd_extract_f32x4(comp_ctx, func_ctx, + *frame_ip++)) + return false; + break; + } + + case SIMD_f32x4_replace_lane: + { + if (!aot_compile_simd_replace_f32x4(comp_ctx, func_ctx, + *frame_ip++)) + return false; + break; + } + + case SIMD_f64x2_extract_lane: + { + if (!aot_compile_simd_extract_f64x2(comp_ctx, func_ctx, + *frame_ip++)) + return false; + break; + } + + case SIMD_f64x2_replace_lane: + { + if (!aot_compile_simd_replace_f64x2(comp_ctx, func_ctx, + *frame_ip++)) + return false; + break; + } + + /* i8x16 Cmp */ case SIMD_i8x16_eq: case SIMD_i8x16_ne: case SIMD_i8x16_lt_s: @@ -1422,12 +1438,13 @@ build_atomic_rmw: case SIMD_i8x16_ge_s: case SIMD_i8x16_ge_u: { - if (!aot_compile_simd_i8x16_compare(comp_ctx, func_ctx, - INT_EQ + opcode - SIMD_i8x16_eq)) + if (!aot_compile_simd_i8x16_compare( + comp_ctx, func_ctx, INT_EQ + opcode - SIMD_i8x16_eq)) return false; break; } + /* i16x8 Cmp */ case SIMD_i16x8_eq: case SIMD_i16x8_ne: case SIMD_i16x8_lt_s: @@ -1439,12 +1456,13 @@ build_atomic_rmw: case SIMD_i16x8_ge_s: case SIMD_i16x8_ge_u: { - if (!aot_compile_simd_i16x8_compare(comp_ctx, func_ctx, - INT_EQ + opcode - SIMD_i16x8_eq)) + if (!aot_compile_simd_i16x8_compare( + comp_ctx, func_ctx, INT_EQ + opcode - SIMD_i16x8_eq)) return false; break; } + /* i32x4 Cmp */ case SIMD_i32x4_eq: case SIMD_i32x4_ne: case SIMD_i32x4_lt_s: @@ -1456,12 +1474,13 @@ build_atomic_rmw: case SIMD_i32x4_ge_s: case SIMD_i32x4_ge_u: { - if (!aot_compile_simd_i32x4_compare(comp_ctx, func_ctx, - INT_EQ + opcode - SIMD_i32x4_eq)) + if (!aot_compile_simd_i32x4_compare( + comp_ctx, func_ctx, INT_EQ + opcode - SIMD_i32x4_eq)) return false; break; } + /* f32x4 Cmp */ case SIMD_f32x4_eq: case SIMD_f32x4_ne: case SIMD_f32x4_lt: @@ -1469,12 +1488,13 @@ build_atomic_rmw: case SIMD_f32x4_le: case SIMD_f32x4_ge: { - if (!aot_compile_simd_f32x4_compare(comp_ctx, func_ctx, - FLOAT_EQ + opcode - SIMD_f32x4_eq)) + if (!aot_compile_simd_f32x4_compare( + comp_ctx, func_ctx, FLOAT_EQ + opcode - SIMD_f32x4_eq)) return false; break; } + /* f64x2 Cmp */ case SIMD_f64x2_eq: case SIMD_f64x2_ne: case SIMD_f64x2_lt: @@ -1482,12 +1502,13 @@ build_atomic_rmw: case SIMD_f64x2_le: case SIMD_f64x2_ge: { - if (!aot_compile_simd_f64x2_compare(comp_ctx, func_ctx, - FLOAT_EQ + opcode - SIMD_f64x2_eq)) + if (!aot_compile_simd_f64x2_compare( + comp_ctx, func_ctx, FLOAT_EQ + opcode - SIMD_f64x2_eq)) return false; break; } + /* v128 Op */ case SIMD_v128_not: case SIMD_v128_and: case SIMD_v128_andnot: @@ -1495,60 +1516,76 @@ build_atomic_rmw: case SIMD_v128_xor: case SIMD_v128_bitselect: { - if (!aot_compile_simd_v128_bitwise(comp_ctx, func_ctx, - V128_NOT + opcode - SIMD_v128_not)) + if (!aot_compile_simd_v128_bitwise( + comp_ctx, func_ctx, V128_NOT + opcode - SIMD_v128_not)) return false; break; } - case SIMD_i8x16_add: - case SIMD_i8x16_sub: + case SIMD_v128_any_true: { - V128Arithmetic arith_op = (opcode == SIMD_i8x16_add) - ? V128_ADD : V128_SUB; - if (!aot_compile_simd_i8x16_arith(comp_ctx, func_ctx, arith_op)) + if (!aot_compile_simd_v128_any_true(comp_ctx, func_ctx)) return false; break; } - case SIMD_i16x8_add: - case SIMD_i16x8_sub: - case SIMD_i16x8_mul: + /* Load Lane Op */ + case SIMD_v128_load8_lane: + case SIMD_v128_load16_lane: + case SIMD_v128_load32_lane: + case SIMD_v128_load64_lane: { - V128Arithmetic arith_op = V128_ADD; - if (opcode == SIMD_i16x8_sub) - arith_op = V128_SUB; - else if (opcode == SIMD_i16x8_mul) - arith_op = V128_MUL; - if (!aot_compile_simd_i16x8_arith(comp_ctx, func_ctx, arith_op)) + read_leb_uint32(frame_ip, frame_ip_end, align); + read_leb_uint32(frame_ip, frame_ip_end, offset); + if (!aot_compile_simd_load_lane(comp_ctx, func_ctx, opcode, align, + offset, *frame_ip++)) return false; break; } - case SIMD_i32x4_add: - case SIMD_i32x4_sub: - case SIMD_i32x4_mul: + case SIMD_v128_store8_lane: + case SIMD_v128_store16_lane: + case SIMD_v128_store32_lane: + case SIMD_v128_store64_lane: { - V128Arithmetic arith_op = V128_ADD; - if (opcode == SIMD_i32x4_sub) - arith_op = V128_SUB; - else if (opcode == SIMD_i32x4_mul) - arith_op = V128_MUL; - if (!aot_compile_simd_i32x4_arith(comp_ctx, func_ctx, arith_op)) + read_leb_uint32(frame_ip, frame_ip_end, align); + read_leb_uint32(frame_ip, frame_ip_end, offset); + if (!aot_compile_simd_store_lane(comp_ctx, func_ctx, opcode, align, + offset, *frame_ip++)) return false; break; } - case SIMD_i64x2_add: - case SIMD_i64x2_sub: - case SIMD_i64x2_mul: + case SIMD_v128_load32_zero: + case SIMD_v128_load64_zero: { - V128Arithmetic arith_op = V128_ADD; - if (opcode == SIMD_i64x2_sub) - arith_op = V128_SUB; - else if (opcode == SIMD_i64x2_mul) - arith_op = V128_MUL; - if (!aot_compile_simd_i64x2_arith(comp_ctx, func_ctx, arith_op)) + read_leb_uint32(frame_ip, frame_ip_end, align); + read_leb_uint32(frame_ip, frame_ip_end, offset); + if (!aot_compile_simd_load_zero(comp_ctx, func_ctx, opcode, align, + offset)) + return false; + break; + } + + /* Float conversion */ + case SIMD_f32x4_demote_f64x2_zero: + { + if (!aot_compile_simd_f64x2_demote(comp_ctx, func_ctx)) + return false; + break; + } + + case SIMD_f64x2_promote_low_f32x4_zero: + { + if (!aot_compile_simd_f32x4_promote(comp_ctx, func_ctx)) + return false; + break; + } + + /* i8x16 Op */ + case SIMD_i8x16_abs: + { + if (!aot_compile_simd_i8x16_abs(comp_ctx, func_ctx)) return false; break; } @@ -1559,381 +1596,33 @@ build_atomic_rmw: return false; break; } - case SIMD_i16x8_neg: + + case SIMD_i8x16_popcnt: { - if (!aot_compile_simd_i16x8_neg(comp_ctx, func_ctx)) - return false; - break; - } - case SIMD_i32x4_neg: - { - if (!aot_compile_simd_i32x4_neg(comp_ctx, func_ctx)) - return false; - break; - } - case SIMD_i64x2_neg: - { - if (!aot_compile_simd_i64x2_neg(comp_ctx, func_ctx)) + if (!aot_compile_simd_i8x16_popcnt(comp_ctx, func_ctx)) return false; break; } - case SIMD_i8x16_add_saturate_s: - case SIMD_i8x16_add_saturate_u: - { - if (!aot_compile_simd_i8x16_saturate(comp_ctx, func_ctx, V128_ADD, - opcode == SIMD_i8x16_add_saturate_s - ? true : false)) - return false; - break; - } - case SIMD_i8x16_sub_saturate_s: - case SIMD_i8x16_sub_saturate_u: - { - if (!aot_compile_simd_i8x16_saturate(comp_ctx, func_ctx, V128_SUB, - opcode == SIMD_i8x16_sub_saturate_s - ? true : false)) - return false; - break; - } - case SIMD_i16x8_add_saturate_s: - case SIMD_i16x8_add_saturate_u: - { - if (!aot_compile_simd_i16x8_saturate(comp_ctx, func_ctx, V128_ADD, - opcode == SIMD_i16x8_add_saturate_s - ? true : false)) - return false; - break; - } - case SIMD_i16x8_sub_saturate_s: - case SIMD_i16x8_sub_saturate_u: - { - if (!aot_compile_simd_i16x8_saturate(comp_ctx, func_ctx, V128_SUB, - opcode == SIMD_i16x8_sub_saturate_s - ? true : false)) - return false; - break; - } - - case SIMD_i8x16_min_s: - case SIMD_i8x16_min_u: - { - if (!aot_compile_simd_i8x16_cmp(comp_ctx, func_ctx, V128_MIN, - opcode == SIMD_i8x16_min_s - ? true : false)) - return false; - break; - } - case SIMD_i8x16_max_s: - case SIMD_i8x16_max_u: - { - if (!aot_compile_simd_i8x16_cmp(comp_ctx, func_ctx, V128_MAX, - opcode == SIMD_i8x16_max_s - ? true : false)) - return false; - break; - } - case SIMD_i16x8_min_s: - case SIMD_i16x8_min_u: - { - if (!aot_compile_simd_i16x8_cmp(comp_ctx, func_ctx, V128_MIN, - opcode == SIMD_i16x8_min_s - ? true : false)) - return false; - break; - } - case SIMD_i16x8_max_s: - case SIMD_i16x8_max_u: - { - if (!aot_compile_simd_i16x8_cmp(comp_ctx, func_ctx, V128_MAX, - opcode == SIMD_i16x8_max_s - ? true : false)) - return false; - break; - } - case SIMD_i32x4_min_s: - case SIMD_i32x4_min_u: - { - if (!aot_compile_simd_i32x4_cmp(comp_ctx, func_ctx, V128_MIN, - opcode == SIMD_i32x4_min_s - ? true : false)) - return false; - break; - } - case SIMD_i32x4_max_s: - case SIMD_i32x4_max_u: - { - if (!aot_compile_simd_i32x4_cmp(comp_ctx, func_ctx, V128_MAX, - opcode == SIMD_i32x4_max_s - ? true : false)) - return false; - break; - } - - case SIMD_i8x16_abs: - { - if (!aot_compile_simd_i8x16_abs(comp_ctx, func_ctx)) - return false; - break; - } - case SIMD_i16x8_abs: - { - if (!aot_compile_simd_i16x8_abs(comp_ctx, func_ctx)) - return false; - break; - } - case SIMD_i32x4_abs: - { - if (!aot_compile_simd_i32x4_abs(comp_ctx, func_ctx)) - return false; - break; - } - - case SIMD_i8x16_avgr_u: - { - if (!aot_compile_simd_i8x16_avgr_u(comp_ctx, func_ctx)) - return false; - break; - } - case SIMD_i16x8_avgr_u: - { - if (!aot_compile_simd_i16x8_avgr_u(comp_ctx, func_ctx)) - return false; - break; - } - - case SIMD_i8x16_any_true: - { - if (!aot_compile_simd_i8x16_any_true(comp_ctx, func_ctx)) - return false; - break; - } - case SIMD_i16x8_any_true: - { - if (!aot_compile_simd_i16x8_any_true(comp_ctx, func_ctx)) - return false; - break; - } - case SIMD_i32x4_any_true: - { - if (!aot_compile_simd_i32x4_any_true(comp_ctx, func_ctx)) - return false; - break; - } case SIMD_i8x16_all_true: { if (!aot_compile_simd_i8x16_all_true(comp_ctx, func_ctx)) return false; break; } - case SIMD_i16x8_all_true: - { - if (!aot_compile_simd_i16x8_all_true(comp_ctx, func_ctx)) - return false; - break; - } - case SIMD_i32x4_all_true: - { - if (!aot_compile_simd_i32x4_all_true(comp_ctx, func_ctx)) - return false; - break; - } + case SIMD_i8x16_bitmask: { if (!aot_compile_simd_i8x16_bitmask(comp_ctx, func_ctx)) return false; break; } - case SIMD_i16x8_bitmask: - { - if (!aot_compile_simd_i16x8_bitmask(comp_ctx, func_ctx)) - return false; - break; - } - case SIMD_i32x4_bitmask: - { - if (!aot_compile_simd_i32x4_bitmask(comp_ctx, func_ctx)) - return false; - break; - } - - case SIMD_i8x16_shl: - case SIMD_i8x16_shr_s: - case SIMD_i8x16_shr_u: - { - if (!aot_compile_simd_i8x16_shift(comp_ctx, func_ctx, - INT_SHL + opcode - SIMD_i8x16_shl)) - return false; - break; - } - case SIMD_i16x8_shl: - case SIMD_i16x8_shr_s: - case SIMD_i16x8_shr_u: - { - if (!aot_compile_simd_i16x8_shift(comp_ctx, func_ctx, - INT_SHL + opcode - SIMD_i16x8_shl)) - return false; - break; - } - case SIMD_i32x4_shl: - case SIMD_i32x4_shr_s: - case SIMD_i32x4_shr_u: - { - if (!aot_compile_simd_i32x4_shift(comp_ctx, func_ctx, - INT_SHL + opcode - SIMD_i32x4_shl)) - return false; - break; - } - case SIMD_i64x2_shl: - case SIMD_i64x2_shr_s: - case SIMD_i64x2_shr_u: - { - if (!aot_compile_simd_i64x2_shift(comp_ctx, func_ctx, - INT_SHL + opcode - SIMD_i64x2_shl)) - return false; - break; - } case SIMD_i8x16_narrow_i16x8_s: case SIMD_i8x16_narrow_i16x8_u: { - bool is_signed = (opcode == SIMD_i8x16_narrow_i16x8_s) - ? true : false; - if (!aot_compile_simd_i8x16_narrow_i16x8(comp_ctx, func_ctx, - is_signed)) - return false; - break; - } - case SIMD_i16x8_narrow_i32x4_s: - case SIMD_i16x8_narrow_i32x4_u: - { - bool is_signed = (opcode == SIMD_i16x8_narrow_i32x4_s) - ? true : false; - if (!aot_compile_simd_i16x8_narrow_i32x4(comp_ctx, func_ctx, - is_signed)) - return false; - break; - } - case SIMD_i16x8_widen_low_i8x16_s: - case SIMD_i16x8_widen_high_i8x16_s: - { - bool is_low = (opcode == SIMD_i16x8_widen_low_i8x16_s) - ? true : false; - if (!aot_compile_simd_i16x8_widen_i8x16(comp_ctx, func_ctx, - is_low, true)) - return false; - break; - } - case SIMD_i16x8_widen_low_i8x16_u: - case SIMD_i16x8_widen_high_i8x16_u: - { - bool is_low = (opcode == SIMD_i16x8_widen_low_i8x16_u) - ? true : false; - if (!aot_compile_simd_i16x8_widen_i8x16(comp_ctx, func_ctx, - is_low, false)) - return false; - break; - } - case SIMD_i32x4_widen_low_i16x8_s: - case SIMD_i32x4_widen_high_i16x8_s: - { - bool is_low = (opcode == SIMD_i32x4_widen_low_i16x8_s) - ? true : false; - if (!aot_compile_simd_i32x4_widen_i16x8(comp_ctx, func_ctx, - is_low, true)) - return false; - break; - } - case SIMD_i32x4_widen_low_i16x8_u: - case SIMD_i32x4_widen_high_i16x8_u: - { - bool is_low = (opcode == SIMD_i32x4_widen_low_i16x8_u) - ? true : false; - if (!aot_compile_simd_i32x4_widen_i16x8(comp_ctx, func_ctx, - is_low, false)) - return false; - break; - } - - case SIMD_i32x4_trunc_sat_f32x4_s: - case SIMD_i32x4_trunc_sat_f32x4_u: - { - bool is_signed = (opcode == SIMD_i32x4_trunc_sat_f32x4_s) - ? true : false; - if (!aot_compile_simd_i32x4_trunc_sat_f32x4(comp_ctx, func_ctx, - is_signed)) - return false; - break; - } - case SIMD_f32x4_convert_i32x4_s: - case SIMD_f32x4_convert_i32x4_u: - { - bool is_signed = (opcode == SIMD_f32x4_convert_i32x4_s) - ? true : false; - if (!aot_compile_simd_f32x4_convert_i32x4(comp_ctx, func_ctx, - is_signed)) - return false; - break; - } - - case SIMD_f32x4_add: - case SIMD_f32x4_sub: - case SIMD_f32x4_mul: - case SIMD_f32x4_div: - case SIMD_f32x4_min: - case SIMD_f32x4_max: - { - if (!aot_compile_simd_f32x4_arith(comp_ctx, func_ctx, - FLOAT_ADD + opcode - SIMD_f32x4_add)) - return false; - break; - } - case SIMD_f64x2_add: - case SIMD_f64x2_sub: - case SIMD_f64x2_mul: - case SIMD_f64x2_div: - case SIMD_f64x2_min: - case SIMD_f64x2_max: - { - if (!aot_compile_simd_f64x2_arith(comp_ctx, func_ctx, - FLOAT_ADD + opcode - SIMD_f64x2_add)) - return false; - break; - } - - case SIMD_f32x4_neg: - { - if (!aot_compile_simd_f32x4_neg(comp_ctx, func_ctx)) - return false; - break; - } - case SIMD_f64x2_neg: - { - if (!aot_compile_simd_f64x2_neg(comp_ctx, func_ctx)) - return false; - break; - } - case SIMD_f32x4_abs: - { - if (!aot_compile_simd_f32x4_abs(comp_ctx, func_ctx)) - return false; - break; - } - case SIMD_f64x2_abs: - { - if (!aot_compile_simd_f64x2_abs(comp_ctx, func_ctx)) - return false; - break; - } - case SIMD_f32x4_sqrt: - { - if (!aot_compile_simd_f32x4_sqrt(comp_ctx, func_ctx)) - return false; - break; - } - case SIMD_f64x2_sqrt: - { - if (!aot_compile_simd_f64x2_sqrt(comp_ctx, func_ctx)) + if (!aot_compile_simd_i8x16_narrow_i16x8( + comp_ctx, func_ctx, (opcode == SIMD_i8x16_narrow_i16x8_s))) return false; break; } @@ -1944,42 +1633,247 @@ build_atomic_rmw: return false; break; } - case SIMD_f64x2_ceil: - { - if (!aot_compile_simd_f64x2_ceil(comp_ctx, func_ctx)) - return false; - break; - } + case SIMD_f32x4_floor: { if (!aot_compile_simd_f32x4_floor(comp_ctx, func_ctx)) return false; break; } - case SIMD_f64x2_floor: - { - if (!aot_compile_simd_f64x2_floor(comp_ctx, func_ctx)) - return false; - break; - } + case SIMD_f32x4_trunc: { if (!aot_compile_simd_f32x4_trunc(comp_ctx, func_ctx)) return false; break; } - case SIMD_f64x2_trunc: - { - if (!aot_compile_simd_f64x2_trunc(comp_ctx, func_ctx)) - return false; - break; - } + case SIMD_f32x4_nearest: { if (!aot_compile_simd_f32x4_nearest(comp_ctx, func_ctx)) return false; break; } + + case SIMD_i8x16_shl: + case SIMD_i8x16_shr_s: + case SIMD_i8x16_shr_u: + { + if (!aot_compile_simd_i8x16_shift( + comp_ctx, func_ctx, INT_SHL + opcode - SIMD_i8x16_shl)) + return false; + break; + } + + case SIMD_i8x16_add: + { + if (!aot_compile_simd_i8x16_arith(comp_ctx, func_ctx, V128_ADD)) + return false; + break; + } + + case SIMD_i8x16_add_sat_s: + case SIMD_i8x16_add_sat_u: + { + if (!aot_compile_simd_i8x16_saturate(comp_ctx, func_ctx, V128_ADD, + opcode + == SIMD_i8x16_add_sat_s)) + return false; + break; + } + + case SIMD_i8x16_sub: + { + if (!aot_compile_simd_i8x16_arith(comp_ctx, func_ctx, V128_SUB)) + return false; + break; + } + + case SIMD_i8x16_sub_sat_s: + case SIMD_i8x16_sub_sat_u: + { + if (!aot_compile_simd_i8x16_saturate(comp_ctx, func_ctx, V128_SUB, + opcode + == SIMD_i8x16_sub_sat_s)) + return false; + break; + } + + case SIMD_f64x2_ceil: + { + if (!aot_compile_simd_f64x2_ceil(comp_ctx, func_ctx)) + return false; + break; + } + + case SIMD_f64x2_floor: + { + if (!aot_compile_simd_f64x2_floor(comp_ctx, func_ctx)) + return false; + break; + } + + case SIMD_i8x16_min_s: + case SIMD_i8x16_min_u: + { + if (!aot_compile_simd_i8x16_cmp(comp_ctx, func_ctx, V128_MIN, + opcode == SIMD_i8x16_min_s)) + return false; + break; + } + + case SIMD_i8x16_max_s: + case SIMD_i8x16_max_u: + { + if (!aot_compile_simd_i8x16_cmp(comp_ctx, func_ctx, V128_MAX, + opcode == SIMD_i8x16_max_s)) + return false; + break; + } + + case SIMD_f64x2_trunc: + { + if (!aot_compile_simd_f64x2_trunc(comp_ctx, func_ctx)) + return false; + break; + } + + case SIMD_i8x16_avgr_u: + { + if (!aot_compile_simd_i8x16_avgr_u(comp_ctx, func_ctx)) + return false; + break; + } + + case SIMD_i16x8_extadd_pairwise_i8x16_s: + case SIMD_i16x8_extadd_pairwise_i8x16_u: + { + if (!aot_compile_simd_i16x8_extadd_pairwise_i8x16( + comp_ctx, func_ctx, + SIMD_i16x8_extadd_pairwise_i8x16_s == opcode)) + return false; + break; + } + + case SIMD_i32x4_extadd_pairwise_i16x8_s: + case SIMD_i32x4_extadd_pairwise_i16x8_u: + { + if (!aot_compile_simd_i32x4_extadd_pairwise_i16x8( + comp_ctx, func_ctx, + SIMD_i32x4_extadd_pairwise_i16x8_s == opcode)) + return false; + break; + } + + /* i16x8 Op */ + case SIMD_i16x8_abs: + { + if (!aot_compile_simd_i16x8_abs(comp_ctx, func_ctx)) + return false; + break; + } + + case SIMD_i16x8_neg: + { + if (!aot_compile_simd_i16x8_neg(comp_ctx, func_ctx)) + return false; + break; + } + + case SIMD_i16x8_q15mulr_sat_s: + { + if (!aot_compile_simd_i16x8_q15mulr_sat(comp_ctx, func_ctx)) + return false; + break; + } + + case SIMD_i16x8_all_true: + { + if (!aot_compile_simd_i16x8_all_true(comp_ctx, func_ctx)) + return false; + break; + } + + case SIMD_i16x8_bitmask: + { + if (!aot_compile_simd_i16x8_bitmask(comp_ctx, func_ctx)) + return false; + break; + } + + case SIMD_i16x8_narrow_i32x4_s: + case SIMD_i16x8_narrow_i32x4_u: + { + if (!aot_compile_simd_i16x8_narrow_i32x4( + comp_ctx, func_ctx, SIMD_i16x8_narrow_i32x4_s == opcode)) + return false; + break; + } + + case SIMD_i16x8_extend_low_i8x16_s: + case SIMD_i16x8_extend_high_i8x16_s: + { + if (!aot_compile_simd_i16x8_extend_i8x16( + comp_ctx, func_ctx, SIMD_i16x8_extend_low_i8x16_s == opcode, + true)) + return false; + break; + } + + case SIMD_i16x8_extend_low_i8x16_u: + case SIMD_i16x8_extend_high_i8x16_u: + { + if (!aot_compile_simd_i16x8_extend_i8x16( + comp_ctx, func_ctx, SIMD_i16x8_extend_low_i8x16_u == opcode, + false)) + return false; + break; + } + + case SIMD_i16x8_shl: + case SIMD_i16x8_shr_s: + case SIMD_i16x8_shr_u: + { + if (!aot_compile_simd_i16x8_shift( + comp_ctx, func_ctx, INT_SHL + opcode - SIMD_i16x8_shl)) + return false; + break; + } + + case SIMD_i16x8_add: + { + if (!aot_compile_simd_i16x8_arith(comp_ctx, func_ctx, V128_ADD)) + return false; + break; + } + + case SIMD_i16x8_add_sat_s: + case SIMD_i16x8_add_sat_u: + { + if (!aot_compile_simd_i16x8_saturate( + comp_ctx, func_ctx, V128_ADD, + opcode == SIMD_i16x8_add_sat_s ? true : false)) + return false; + break; + } + + case SIMD_i16x8_sub: + { + if (!aot_compile_simd_i16x8_arith(comp_ctx, func_ctx, V128_SUB)) + return false; + break; + } + + case SIMD_i16x8_sub_sat_s: + case SIMD_i16x8_sub_sat_u: + { + if (!aot_compile_simd_i16x8_saturate( + comp_ctx, func_ctx, V128_SUB, + opcode == SIMD_i16x8_sub_sat_s ? true : false)) + return false; + break; + } + case SIMD_f64x2_nearest: { if (!aot_compile_simd_f64x2_nearest(comp_ctx, func_ctx)) @@ -1987,6 +1881,490 @@ build_atomic_rmw: break; } + case SIMD_i16x8_mul: + { + if (!aot_compile_simd_i16x8_arith(comp_ctx, func_ctx, V128_MUL)) + return false; + break; + } + + case SIMD_i16x8_min_s: + case SIMD_i16x8_min_u: + { + if (!aot_compile_simd_i16x8_cmp(comp_ctx, func_ctx, V128_MIN, + opcode == SIMD_i16x8_min_s)) + return false; + break; + } + + case SIMD_i16x8_max_s: + case SIMD_i16x8_max_u: + { + if (!aot_compile_simd_i16x8_cmp(comp_ctx, func_ctx, V128_MAX, + opcode == SIMD_i16x8_max_s)) + return false; + break; + } + + case SIMD_i16x8_avgr_u: + { + if (!aot_compile_simd_i16x8_avgr_u(comp_ctx, func_ctx)) + return false; + break; + } + + case SIMD_i16x8_extmul_low_i8x16_s: + case SIMD_i16x8_extmul_high_i8x16_s: + { + if (!(aot_compile_simd_i16x8_extmul_i8x16( + comp_ctx, func_ctx, SIMD_i16x8_extmul_low_i8x16_s == opcode, + true))) + return false; + break; + } + + case SIMD_i16x8_extmul_low_i8x16_u: + case SIMD_i16x8_extmul_high_i8x16_u: + { + if (!(aot_compile_simd_i16x8_extmul_i8x16( + comp_ctx, func_ctx, SIMD_i16x8_extmul_low_i8x16_u == opcode, + false))) + return false; + break; + } + + /* i32x4 Op */ + case SIMD_i32x4_abs: + { + if (!aot_compile_simd_i32x4_abs(comp_ctx, func_ctx)) + return false; + break; + } + + case SIMD_i32x4_neg: + { + if (!aot_compile_simd_i32x4_neg(comp_ctx, func_ctx)) + return false; + break; + } + + case SIMD_i32x4_all_true: + { + if (!aot_compile_simd_i32x4_all_true(comp_ctx, func_ctx)) + return false; + break; + } + + case SIMD_i32x4_bitmask: + { + if (!aot_compile_simd_i32x4_bitmask(comp_ctx, func_ctx)) + return false; + break; + } + + case SIMD_i32x4_narrow_i64x2_s: + case SIMD_i32x4_narrow_i64x2_u: + { + if (!aot_compile_simd_i32x4_narrow_i64x2( + comp_ctx, func_ctx, SIMD_i32x4_narrow_i64x2_s == opcode)) + return false; + break; + } + + case SIMD_i32x4_extend_low_i16x8_s: + case SIMD_i32x4_extend_high_i16x8_s: + { + if (!aot_compile_simd_i32x4_extend_i16x8( + comp_ctx, func_ctx, SIMD_i32x4_extend_low_i16x8_s == opcode, + true)) + return false; + break; + } + + case SIMD_i32x4_extend_low_i16x8_u: + case SIMD_i32x4_extend_high_i16x8_u: + { + if (!aot_compile_simd_i32x4_extend_i16x8( + comp_ctx, func_ctx, SIMD_i32x4_extend_low_i16x8_u == opcode, + false)) + return false; + break; + } + + case SIMD_i32x4_shl: + case SIMD_i32x4_shr_s: + case SIMD_i32x4_shr_u: + { + if (!aot_compile_simd_i32x4_shift( + comp_ctx, func_ctx, INT_SHL + opcode - SIMD_i32x4_shl)) + return false; + break; + } + + case SIMD_i32x4_add: + { + if (!aot_compile_simd_i32x4_arith(comp_ctx, func_ctx, V128_ADD)) + return false; + break; + } + + case SIMD_i32x4_add_sat_s: + case SIMD_i32x4_add_sat_u: + { + if (!aot_compile_simd_i32x4_saturate(comp_ctx, func_ctx, V128_ADD, + opcode + == SIMD_i32x4_add_sat_s)) + return false; + break; + } + + case SIMD_i32x4_sub: + { + if (!aot_compile_simd_i32x4_arith(comp_ctx, func_ctx, V128_SUB)) + return false; + break; + } + + case SIMD_i32x4_sub_sat_s: + case SIMD_i32x4_sub_sat_u: + { + if (!aot_compile_simd_i32x4_saturate(comp_ctx, func_ctx, V128_SUB, + opcode + == SIMD_i32x4_add_sat_s)) + return false; + break; + } + + case SIMD_i32x4_mul: + { + if (!aot_compile_simd_i32x4_arith(comp_ctx, func_ctx, V128_MUL)) + return false; + break; + } + + case SIMD_i32x4_min_s: + case SIMD_i32x4_min_u: + { + if (!aot_compile_simd_i32x4_cmp(comp_ctx, func_ctx, V128_MIN, + SIMD_i32x4_min_s == opcode)) + return false; + break; + } + + case SIMD_i32x4_max_s: + case SIMD_i32x4_max_u: + { + if (!aot_compile_simd_i32x4_cmp(comp_ctx, func_ctx, V128_MAX, + SIMD_i32x4_max_s == opcode)) + return false; + break; + } + + case SIMD_i32x4_dot_i16x8_s: + { + if (!aot_compile_simd_i32x4_dot_i16x8(comp_ctx, func_ctx)) + return false; + break; + } + + case SIMD_i32x4_avgr_u: + { + if (!aot_compile_simd_i32x4_avgr_u(comp_ctx, func_ctx)) + return false; + break; + } + + case SIMD_i32x4_extmul_low_i16x8_s: + case SIMD_i32x4_extmul_high_i16x8_s: + { + if (!aot_compile_simd_i32x4_extmul_i16x8( + comp_ctx, func_ctx, SIMD_i32x4_extmul_low_i16x8_s == opcode, + true)) + return false; + break; + } + + case SIMD_i32x4_extmul_low_i16x8_u: + case SIMD_i32x4_extmul_high_i16x8_u: + { + if (!aot_compile_simd_i32x4_extmul_i16x8( + comp_ctx, func_ctx, SIMD_i32x4_extmul_low_i16x8_u == opcode, + false)) + return false; + break; + } + + /* i64x2 Op */ + case SIMD_i64x2_abs: + { + if (!aot_compile_simd_i64x2_abs(comp_ctx, func_ctx)) + return false; + break; + } + + case SIMD_i64x2_neg: + { + if (!aot_compile_simd_i64x2_neg(comp_ctx, func_ctx)) + return false; + break; + } + + case SIMD_i64x2_all_true: + { + if (!aot_compile_simd_i64x2_all_true(comp_ctx, func_ctx)) + return false; + break; + } + + case SIMD_i64x2_bitmask: + { + if (!aot_compile_simd_i64x2_bitmask(comp_ctx, func_ctx)) + return false; + break; + } + + case SIMD_i64x2_extend_low_i32x4_s: + case SIMD_i64x2_extend_high_i32x4_s: + { + if (!aot_compile_simd_i64x2_extend_i32x4( + comp_ctx, func_ctx, SIMD_i64x2_extend_low_i32x4_s == opcode, + true)) + return false; + break; + } + + case SIMD_i64x2_extend_low_i32x4_u: + case SIMD_i64x2_extend_high_i32x4_u: + { + if (!aot_compile_simd_i64x2_extend_i32x4( + comp_ctx, func_ctx, SIMD_i64x2_extend_low_i32x4_u == opcode, + false)) + return false; + break; + } + + case SIMD_i64x2_shl: + case SIMD_i64x2_shr_s: + case SIMD_i64x2_shr_u: + { + if (!aot_compile_simd_i64x2_shift( + comp_ctx, func_ctx, INT_SHL + opcode - SIMD_i64x2_shl)) + return false; + break; + } + + case SIMD_i64x2_add: + { + if (!aot_compile_simd_i64x2_arith(comp_ctx, func_ctx, V128_ADD)) + return false; + break; + } + + case SIMD_i64x2_sub: + { + if (!aot_compile_simd_i64x2_arith(comp_ctx, func_ctx, V128_SUB)) + return false; + break; + } + + case SIMD_i64x2_mul: + { + if (!aot_compile_simd_i64x2_arith(comp_ctx, func_ctx, V128_MUL)) + return false; + break; + } + + case SIMD_i64x2_eq: + case SIMD_i64x2_ne: + case SIMD_i64x2_lt_s: + case SIMD_i64x2_gt_s: + case SIMD_i64x2_le_s: + case SIMD_i64x2_ge_s: + { + IntCond icond[] = { INT_EQ, INT_NE, INT_LT_S, + INT_GT_S, INT_LE_S, INT_GE_S }; + if (!aot_compile_simd_i64x2_compare(comp_ctx, func_ctx, + icond[opcode - SIMD_i64x2_eq])) + return false; + break; + } + + case SIMD_i64x2_extmul_low_i32x4_s: + case SIMD_i64x2_extmul_high_i32x4_s: + { + if (!aot_compile_simd_i64x2_extmul_i32x4( + comp_ctx, func_ctx, SIMD_i64x2_extmul_low_i32x4_s == opcode, + true)) + return false; + break; + } + + case SIMD_i64x2_extmul_low_i32x4_u: + case SIMD_i64x2_extmul_high_i32x4_u: + { + if (!aot_compile_simd_i64x2_extmul_i32x4( + comp_ctx, func_ctx, SIMD_i64x2_extmul_low_i32x4_u == opcode, + false)) + return false; + break; + } + + /* f32x4 Op */ + case SIMD_f32x4_abs: + { + if (!aot_compile_simd_f32x4_abs(comp_ctx, func_ctx)) + return false; + break; + } + + case SIMD_f32x4_neg: + { + if (!aot_compile_simd_f32x4_neg(comp_ctx, func_ctx)) + return false; + break; + } + + case SIMD_f32x4_round: + { + if (!aot_compile_simd_f32x4_round(comp_ctx, func_ctx)) + return false; + break; + } + + case SIMD_f32x4_sqrt: + { + if (!aot_compile_simd_f32x4_sqrt(comp_ctx, func_ctx)) + return false; + break; + } + + case SIMD_f32x4_add: + case SIMD_f32x4_sub: + case SIMD_f32x4_mul: + case SIMD_f32x4_div: + { + if (!aot_compile_simd_f32x4_arith( + comp_ctx, func_ctx, FLOAT_ADD + opcode - SIMD_f32x4_add)) + return false; + break; + } + + case SIMD_f32x4_min: + case SIMD_f32x4_max: + { + if (!aot_compile_simd_f32x4_min_max(comp_ctx, func_ctx, + SIMD_f32x4_min == opcode)) + return false; + break; + } + + case SIMD_f32x4_pmin: + case SIMD_f32x4_pmax: + { + if (!aot_compile_simd_f32x4_pmin_pmax(comp_ctx, func_ctx, + SIMD_f32x4_pmin == opcode)) + return false; + break; + } + + /* f64x2 Op */ + + case SIMD_f64x2_abs: + { + if (!aot_compile_simd_f64x2_abs(comp_ctx, func_ctx)) + return false; + break; + } + + case SIMD_f64x2_neg: + { + if (!aot_compile_simd_f64x2_neg(comp_ctx, func_ctx)) + return false; + break; + } + + case SIMD_f64x2_round: + { + if (!aot_compile_simd_f64x2_round(comp_ctx, func_ctx)) + return false; + break; + } + + case SIMD_f64x2_sqrt: + { + if (!aot_compile_simd_f64x2_sqrt(comp_ctx, func_ctx)) + return false; + break; + } + + case SIMD_f64x2_add: + case SIMD_f64x2_sub: + case SIMD_f64x2_mul: + case SIMD_f64x2_div: + { + if (!aot_compile_simd_f64x2_arith( + comp_ctx, func_ctx, FLOAT_ADD + opcode - SIMD_f64x2_add)) + return false; + break; + } + + case SIMD_f64x2_min: + case SIMD_f64x2_max: + { + if (!aot_compile_simd_f64x2_min_max(comp_ctx, func_ctx, + SIMD_f64x2_min == opcode)) + return false; + break; + } + + case SIMD_f64x2_pmin: + case SIMD_f64x2_pmax: + { + if (!aot_compile_simd_f64x2_pmin_pmax(comp_ctx, func_ctx, + SIMD_f64x2_pmin == opcode)) + return false; + break; + } + + /* Conversion Op */ + case SIMD_i32x4_trunc_sat_f32x4_s: + case SIMD_i32x4_trunc_sat_f32x4_u: + { + if (!aot_compile_simd_i32x4_trunc_sat_f32x4( + comp_ctx, func_ctx, + SIMD_i32x4_trunc_sat_f32x4_s == opcode)) + return false; + break; + } + + case SIMD_f32x4_convert_i32x4_s: + case SIMD_f32x4_convert_i32x4_u: + { + if (!aot_compile_simd_f32x4_convert_i32x4( + comp_ctx, func_ctx, SIMD_f32x4_convert_i32x4_s == opcode)) + return false; + break; + } + + case SIMD_i32x4_trunc_sat_f64x2_s_zero: + case SIMD_i32x4_trunc_sat_f64x2_u_zero: + { + if (!aot_compile_simd_i32x4_trunc_sat_f64x2( + comp_ctx, func_ctx, + SIMD_i32x4_trunc_sat_f64x2_s_zero == opcode)) + return false; + break; + } + + case SIMD_f64x2_convert_low_i32x4_s: + case SIMD_f64x2_convert_low_i32x4_u: + { + if (!aot_compile_simd_f64x2_convert_i32x4( + comp_ctx, func_ctx, SIMD_f64x2_convert_low_i32x4_s == opcode)) + return false; + break; + } + default: aot_set_last_error("unsupported opcode"); return false; @@ -2012,11 +2390,9 @@ build_atomic_rmw: /* Move got_exception block to the bottom */ if (func_ctx->got_exception_block) { - LLVMBasicBlockRef last_block = - LLVMGetLastBasicBlock(func_ctx->func); + LLVMBasicBlockRef last_block = LLVMGetLastBasicBlock(func_ctx->func); if (last_block != func_ctx->got_exception_block) - LLVMMoveBasicBlockAfter(func_ctx->got_exception_block, - last_block); + LLVMMoveBasicBlockAfter(func_ctx->got_exception_block, last_block); } return true; @@ -2068,21 +2444,21 @@ aot_compile_wasm(AOTCompContext *comp_ctx) ret = LLVMVerifyModule(comp_ctx->module, LLVMPrintMessageAction, &msg); if (!ret && msg) { - if (msg[0] != '\0') { - aot_set_last_error(msg); - LLVMDisposeMessage(msg); - return false; - } + if (msg[0] != '\0') { + aot_set_last_error(msg); LLVMDisposeMessage(msg); + return false; + } + LLVMDisposeMessage(msg); } bh_print_time("Begin to run function optimization passes"); if (comp_ctx->optimize) { - LLVMInitializeFunctionPassManager(comp_ctx->pass_mgr); - for (i = 0; i < comp_ctx->func_ctx_count; i++) - LLVMRunFunctionPassManager(comp_ctx->pass_mgr, - comp_ctx->func_ctxes[i]->func); + LLVMInitializeFunctionPassManager(comp_ctx->pass_mgr); + for (i = 0; i < comp_ctx->func_ctx_count; i++) + LLVMRunFunctionPassManager(comp_ctx->pass_mgr, + comp_ctx->func_ctxes[i]->func); } return true; @@ -2091,50 +2467,50 @@ aot_compile_wasm(AOTCompContext *comp_ctx) bool aot_emit_llvm_file(AOTCompContext *comp_ctx, const char *file_name) { - char *err = NULL; + char *err = NULL; - bh_print_time("Begin to emit LLVM IR file"); + bh_print_time("Begin to emit LLVM IR file"); - if (LLVMPrintModuleToFile(comp_ctx->module, file_name, &err) != 0) { - if (err) { - LLVMDisposeMessage(err); - err = NULL; - } - aot_set_last_error("emit llvm ir to file failed."); - return false; + if (LLVMPrintModuleToFile(comp_ctx->module, file_name, &err) != 0) { + if (err) { + LLVMDisposeMessage(err); + err = NULL; } + aot_set_last_error("emit llvm ir to file failed."); + return false; + } - return true; + return true; } bool aot_emit_object_file(AOTCompContext *comp_ctx, char *file_name) { - char *err = NULL; - LLVMCodeGenFileType file_type = LLVMObjectFile; - LLVMTargetRef target = - LLVMGetTargetMachineTarget(comp_ctx->target_machine); + char *err = NULL; + LLVMCodeGenFileType file_type = LLVMObjectFile; + LLVMTargetRef target = + LLVMGetTargetMachineTarget(comp_ctx->target_machine); - bh_print_time("Begin to emit object file"); + bh_print_time("Begin to emit object file"); - if (!strncmp(LLVMGetTargetName(target), "arc", 3)) - /* Emit to assmelby file instead for arc target - as it cannot emit to object file */ - file_type = LLVMAssemblyFile; + if (!strncmp(LLVMGetTargetName(target), "arc", 3)) + /* Emit to assmelby file instead for arc target + as it cannot emit to object file */ + file_type = LLVMAssemblyFile; - if (LLVMTargetMachineEmitToFile(comp_ctx->target_machine, - comp_ctx->module, - file_name, file_type, - &err) != 0) { - if (err) { - LLVMDisposeMessage(err); - err = NULL; - } - aot_set_last_error("emit elf to object file failed."); - return false; + if (LLVMTargetMachineEmitToFile(comp_ctx->target_machine, + comp_ctx->module, + file_name, file_type, + &err) != 0) { + if (err) { + LLVMDisposeMessage(err); + err = NULL; } + aot_set_last_error("emit elf to object file failed."); + return false; + } - return true; + return true; } #if WASM_ENABLE_REF_TYPES != 0 @@ -2143,11 +2519,11 @@ wasm_set_ref_types_flag(bool enable); #endif typedef struct AOTFileMap { - uint8 *wasm_file_buf; - uint32 wasm_file_size; - uint8 *aot_file_buf; - uint32 aot_file_size; - struct AOTFileMap *next; + uint8 *wasm_file_buf; + uint32 wasm_file_size; + uint8 *aot_file_buf; + uint32 aot_file_size; + struct AOTFileMap *next; } AOTFileMap; static bool aot_compile_wasm_file_inited = false; @@ -2157,80 +2533,82 @@ static korp_mutex aot_file_map_lock; bool aot_compile_wasm_file_init() { - if (aot_compile_wasm_file_inited) { - return true; - } - - if (BHT_OK != os_mutex_init(&aot_file_map_lock)) { - return false; - } - - aot_file_maps = NULL; - aot_compile_wasm_file_inited = true; + if (aot_compile_wasm_file_inited) { return true; + } + + if (BHT_OK != os_mutex_init(&aot_file_map_lock)) { + return false; + } + + aot_file_maps = NULL; + aot_compile_wasm_file_inited = true; + return true; } void aot_compile_wasm_file_destroy() { - AOTFileMap *file_map = aot_file_maps, *file_map_next; + AOTFileMap *file_map = aot_file_maps, *file_map_next; - if (!aot_compile_wasm_file_inited) { - return; - } + if (!aot_compile_wasm_file_inited) { + return; + } - while (file_map) { - file_map_next = file_map->next; + while (file_map) { + file_map_next = file_map->next; - wasm_runtime_free(file_map->wasm_file_buf); - wasm_runtime_free(file_map->aot_file_buf); - wasm_runtime_free(file_map); + wasm_runtime_free(file_map->wasm_file_buf); + wasm_runtime_free(file_map->aot_file_buf); + wasm_runtime_free(file_map); - file_map = file_map_next; - } + file_map = file_map_next; + } - aot_file_maps = NULL; - os_mutex_destroy(&aot_file_map_lock); - aot_compile_wasm_file_inited = false; + aot_file_maps = NULL; + os_mutex_destroy(&aot_file_map_lock); + aot_compile_wasm_file_inited = false; } static void set_error_buf(char *error_buf, uint32 error_buf_size, const char *string) { - if (error_buf != NULL) { - snprintf(error_buf, error_buf_size, - "WASM module load failed: %s", string); - } + if (error_buf != NULL) { + snprintf(error_buf, error_buf_size, "WASM module load failed: %s", string); + } } -uint8* -aot_compile_wasm_file(const uint8 *wasm_file_buf, uint32 wasm_file_size, - uint32 opt_level, uint32 size_level, - char *error_buf, uint32 error_buf_size, +uint8 * +aot_compile_wasm_file(const uint8 *wasm_file_buf, + uint32 wasm_file_size, + uint32 opt_level, + uint32 size_level, + char *error_buf, + uint32 error_buf_size, uint32 *p_aot_file_size) { - WASMModule *wasm_module = NULL; - AOTCompData *comp_data = NULL; - AOTCompContext *comp_ctx = NULL; - RuntimeInitArgs init_args; - AOTCompOption option = { 0 }; - AOTFileMap *file_map = NULL, *file_map_next; - uint8 *wasm_file_buf_cloned = NULL; - uint8 *aot_file_buf = NULL; - uint32 aot_file_size; + WASMModule *wasm_module = NULL; + AOTCompData *comp_data = NULL; + AOTCompContext *comp_ctx = NULL; + RuntimeInitArgs init_args; + AOTCompOption option = { 0 }; + AOTFileMap *file_map = NULL, *file_map_next; + uint8 *wasm_file_buf_cloned = NULL; + uint8 *aot_file_buf = NULL; + uint32 aot_file_size; - option.is_jit_mode = false; - option.opt_level = opt_level; - option.size_level = size_level; - option.output_format = AOT_FORMAT_FILE; - /* default value, enable or disable depends on the platform */ - option.bounds_checks = 2; - option.enable_aux_stack_check = true; + option.is_jit_mode = false; + option.opt_level = opt_level; + option.size_level = size_level; + option.output_format = AOT_FORMAT_FILE; + /* default value, enable or disable depends on the platform */ + option.bounds_checks = 2; + option.enable_aux_stack_check = true; #if WASM_ENABLE_BULK_MEMORY != 0 - option.enable_bulk_memory = true; + option.enable_bulk_memory = true; #endif #if WASM_ENABLE_THREAD_MGR != 0 - option.enable_thread_mgr = true; + option.enable_thread_mgr = true; #endif #if WASM_ENABLE_TAIL_CALL != 0 option.enable_tail_call = true; @@ -2239,111 +2617,111 @@ aot_compile_wasm_file(const uint8 *wasm_file_buf, uint32 wasm_file_size, option.enable_simd = true; #endif #if WASM_ENABLE_REF_TYPES != 0 - option.enable_ref_types = true; + option.enable_ref_types = true; #endif #if (WASM_ENABLE_PERF_PROFILING != 0) || (WASM_ENABLE_DUMP_CALL_STACK != 0) - option.enable_aux_stack_frame = true; + option.enable_aux_stack_frame = true; #endif #if WASM_ENABLE_REF_TYPES != 0 - wasm_set_ref_types_flag(option.enable_ref_types); + wasm_set_ref_types_flag(option.enable_ref_types); #endif - memset(&init_args, 0, sizeof(RuntimeInitArgs)); + memset(&init_args, 0, sizeof(RuntimeInitArgs)); - init_args.mem_alloc_type = Alloc_With_Allocator; - init_args.mem_alloc_option.allocator.malloc_func = malloc; - init_args.mem_alloc_option.allocator.realloc_func = realloc; - init_args.mem_alloc_option.allocator.free_func = free; + init_args.mem_alloc_type = Alloc_With_Allocator; + init_args.mem_alloc_option.allocator.malloc_func = malloc; + init_args.mem_alloc_option.allocator.realloc_func = realloc; + init_args.mem_alloc_option.allocator.free_func = free; - os_mutex_lock(&aot_file_map_lock); + os_mutex_lock(&aot_file_map_lock); - /* lookup the file maps */ - file_map = aot_file_maps; - while (file_map) { - file_map_next = file_map->next; + /* lookup the file maps */ + file_map = aot_file_maps; + while (file_map) { + file_map_next = file_map->next; - if (wasm_file_size == file_map->wasm_file_size - && memcmp(wasm_file_buf, file_map->wasm_file_buf, - wasm_file_size) == 0) { - os_mutex_unlock(&aot_file_map_lock); - /* found */ - *p_aot_file_size = file_map->aot_file_size; - return file_map->aot_file_buf; - } - - file_map = file_map_next; + if (wasm_file_size == file_map->wasm_file_size + && memcmp(wasm_file_buf, file_map->wasm_file_buf, wasm_file_size) + == 0) { + os_mutex_unlock(&aot_file_map_lock); + /* found */ + *p_aot_file_size = file_map->aot_file_size; + return file_map->aot_file_buf; } - /* not found, initialize file map and clone wasm file */ - if (!(file_map = wasm_runtime_malloc(sizeof(AOTFileMap))) - || !(wasm_file_buf_cloned = wasm_runtime_malloc(wasm_file_size))) { - set_error_buf(error_buf, error_buf_size, "allocate memory failed"); - goto fail1; - } + file_map = file_map_next; + } - bh_memcpy_s(wasm_file_buf_cloned, wasm_file_size, - wasm_file_buf, wasm_file_size); - memset(file_map, 0, sizeof(AOTFileMap)); - file_map->wasm_file_buf = wasm_file_buf_cloned; - file_map->wasm_file_size = wasm_file_size; + /* not found, initialize file map and clone wasm file */ + if (!(file_map = wasm_runtime_malloc(sizeof(AOTFileMap))) + || !(wasm_file_buf_cloned = wasm_runtime_malloc(wasm_file_size))) { + set_error_buf(error_buf, error_buf_size, "allocate memory failed"); + goto fail1; + } - /* load WASM module */ - if (!(wasm_module = wasm_load(wasm_file_buf, wasm_file_size, - error_buf, sizeof(error_buf)))) { - goto fail1; - } + bh_memcpy_s(wasm_file_buf_cloned, wasm_file_size, wasm_file_buf, + wasm_file_size); + memset(file_map, 0, sizeof(AOTFileMap)); + file_map->wasm_file_buf = wasm_file_buf_cloned; + file_map->wasm_file_size = wasm_file_size; - if (!(comp_data = aot_create_comp_data(wasm_module))) { - set_error_buf(error_buf, error_buf_size, aot_get_last_error()); - goto fail2; - } + /* load WASM module */ + if (!(wasm_module = wasm_load(wasm_file_buf, wasm_file_size, error_buf, + sizeof(error_buf)))) { + goto fail1; + } - if (!(comp_ctx = aot_create_comp_context(comp_data, &option))) { - set_error_buf(error_buf, error_buf_size, aot_get_last_error()); - goto fail3; - } + if (!(comp_data = aot_create_comp_data(wasm_module))) { + set_error_buf(error_buf, error_buf_size, aot_get_last_error()); + goto fail2; + } - if (!aot_compile_wasm(comp_ctx)) { - set_error_buf(error_buf, error_buf_size, aot_get_last_error()); - goto fail4; - } + if (!(comp_ctx = aot_create_comp_context(comp_data, &option))) { + set_error_buf(error_buf, error_buf_size, aot_get_last_error()); + goto fail3; + } - if (!(aot_file_buf = aot_emit_aot_file_buf(comp_ctx, comp_data, - &aot_file_size))) { - set_error_buf(error_buf, error_buf_size, aot_get_last_error()); - goto fail4; - } + if (!aot_compile_wasm(comp_ctx)) { + set_error_buf(error_buf, error_buf_size, aot_get_last_error()); + goto fail4; + } - file_map->aot_file_buf = aot_file_buf; - file_map->aot_file_size = aot_file_size; + if (!(aot_file_buf = + aot_emit_aot_file_buf(comp_ctx, comp_data, &aot_file_size))) { + set_error_buf(error_buf, error_buf_size, aot_get_last_error()); + goto fail4; + } - if (!aot_file_maps) - aot_file_maps = file_map; - else { - file_map->next = aot_file_maps; - aot_file_maps = file_map; - } + file_map->aot_file_buf = aot_file_buf; + file_map->aot_file_size = aot_file_size; - *p_aot_file_size = aot_file_size; + if (!aot_file_maps) + aot_file_maps = file_map; + else { + file_map->next = aot_file_maps; + aot_file_maps = file_map; + } + + *p_aot_file_size = aot_file_size; fail4: - /* Destroy compiler context */ - aot_destroy_comp_context(comp_ctx); + /* Destroy compiler context */ + aot_destroy_comp_context(comp_ctx); fail3: /* Destroy compile data */ - aot_destroy_comp_data(comp_data); + aot_destroy_comp_data(comp_data); fail2: - wasm_unload(wasm_module); + wasm_unload(wasm_module); fail1: - if (!aot_file_buf) { - if (wasm_file_buf_cloned) - wasm_runtime_free(wasm_file_buf_cloned); - if (file_map) - wasm_runtime_free(file_map); - } + if (!aot_file_buf) { + if (wasm_file_buf_cloned) + wasm_runtime_free(wasm_file_buf_cloned); + if (file_map) + wasm_runtime_free(file_map); + } - os_mutex_unlock(&aot_file_map_lock); + os_mutex_unlock(&aot_file_map_lock); - return aot_file_buf; + return aot_file_buf; } diff --git a/core/iwasm/compilation/aot_compiler.h b/core/iwasm/compilation/aot_compiler.h index d30c89908..7058473f9 100644 --- a/core/iwasm/compilation/aot_compiler.h +++ b/core/iwasm/compilation/aot_compiler.h @@ -28,11 +28,7 @@ typedef enum IntArithmetic { typedef enum V128Arithmetic { V128_ADD = 0, - V128_ADD_SATURATE_S, - V128_ADD_SATURATE_U, V128_SUB, - V128_SUB_SATURATE_S, - V128_SUB_SATURATE_U, V128_MUL, V128_DIV, V128_NEG, @@ -52,7 +48,7 @@ typedef enum V128Bitwise { V128_ANDNOT, V128_OR, V128_XOR, - V128_BITSELECT + V128_BITSELECT, } V128Bitwise; typedef enum IntShift { @@ -79,7 +75,7 @@ typedef enum FloatArithmetic { FLOAT_MUL, FLOAT_DIV, FLOAT_MIN, - FLOAT_MAX + FLOAT_MAX, } FloatArithmetic; static inline bool @@ -246,27 +242,29 @@ check_type_compatible(uint8 src_type, uint8 dst_type) #define F64_CONST(v) LLVMConstReal(F64_TYPE, v) #define I8_CONST(v) LLVMConstInt(INT8_TYPE, v, true) -#define I8_ZERO (comp_ctx->llvm_consts.i8_zero) -#define I32_ZERO (comp_ctx->llvm_consts.i32_zero) -#define I64_ZERO (comp_ctx->llvm_consts.i64_zero) -#define F32_ZERO (comp_ctx->llvm_consts.f32_zero) -#define F64_ZERO (comp_ctx->llvm_consts.f64_zero) -#define I32_ONE (comp_ctx->llvm_consts.i32_one) -#define I32_TWO (comp_ctx->llvm_consts.i32_two) -#define I32_THREE (comp_ctx->llvm_consts.i32_three) -#define I32_FOUR (comp_ctx->llvm_consts.i32_four) -#define I32_FIVE (comp_ctx->llvm_consts.i32_five) -#define I32_SIX (comp_ctx->llvm_consts.i32_six) -#define I32_SEVEN (comp_ctx->llvm_consts.i32_seven) -#define I32_EIGHT (comp_ctx->llvm_consts.i32_eight) -#define I32_NEG_ONE (comp_ctx->llvm_consts.i32_neg_one) -#define I64_NEG_ONE (comp_ctx->llvm_consts.i64_neg_one) -#define I32_MIN (comp_ctx->llvm_consts.i32_min) -#define I64_MIN (comp_ctx->llvm_consts.i64_min) -#define I32_31 (comp_ctx->llvm_consts.i32_31) -#define I32_32 (comp_ctx->llvm_consts.i32_32) -#define I64_63 (comp_ctx->llvm_consts.i64_63) -#define I64_64 (comp_ctx->llvm_consts.i64_64) +#define LLVM_CONST(name) (comp_ctx->llvm_consts.name) +#define I8_ZERO LLVM_CONST(i8_zero) +#define I32_ZERO LLVM_CONST(i32_zero) +#define I64_ZERO LLVM_CONST(i64_zero) +#define F32_ZERO LLVM_CONST(f32_zero) +#define F64_ZERO LLVM_CONST(f64_zero) +#define I32_ONE LLVM_CONST(i32_one) +#define I32_TWO LLVM_CONST(i32_two) +#define I32_THREE LLVM_CONST(i32_three) +#define I32_FOUR LLVM_CONST(i32_four) +#define I32_FIVE LLVM_CONST(i32_five) +#define I32_SIX LLVM_CONST(i32_six) +#define I32_SEVEN LLVM_CONST(i32_seven) +#define I32_EIGHT LLVM_CONST(i32_eight) +#define I32_NEG_ONE LLVM_CONST(i32_neg_one) +#define I64_NEG_ONE LLVM_CONST(i64_neg_one) +#define I32_MIN LLVM_CONST(i32_min) +#define I64_MIN LLVM_CONST(i64_min) +#define I32_31 LLVM_CONST(i32_31) +#define I32_32 LLVM_CONST(i32_32) +#define I64_63 LLVM_CONST(i64_63) +#define I64_64 LLVM_CONST(i64_64) +#define REF_NULL I32_NEG_ONE #define V128_TYPE comp_ctx->basic_types.v128_type #define V128_PTR_TYPE comp_ctx->basic_types.v128_ptr_type @@ -277,15 +275,12 @@ check_type_compatible(uint8 src_type, uint8 dst_type) #define V128_f32x4_TYPE comp_ctx->basic_types.f32x4_vec_type #define V128_f64x2_TYPE comp_ctx->basic_types.f64x2_vec_type -#define V128_ZERO (comp_ctx->llvm_consts.v128_zero) -#define V128_i8x16_ZERO (comp_ctx->llvm_consts.i8x16_vec_zero) -#define V128_i16x8_ZERO (comp_ctx->llvm_consts.i16x8_vec_zero) -#define V128_i32x4_ZERO (comp_ctx->llvm_consts.i32x4_vec_zero) -#define V128_i64x2_ZERO (comp_ctx->llvm_consts.i64x2_vec_zero) -#define V128_f32x4_ZERO (comp_ctx->llvm_consts.f32x4_vec_zero) -#define V128_f64x2_ZERO (comp_ctx->llvm_consts.f64x2_vec_zero) - -#define REF_NULL (comp_ctx->llvm_consts.i32_neg_one) +#define V128_i8x16_ZERO LLVM_CONST(i8x16_vec_zero) +#define V128_i16x8_ZERO LLVM_CONST(i16x8_vec_zero) +#define V128_i32x4_ZERO LLVM_CONST(i32x4_vec_zero) +#define V128_i64x2_ZERO LLVM_CONST(i64x2_vec_zero) +#define V128_f32x4_ZERO LLVM_CONST(f32x4_vec_zero) +#define V128_f64x2_ZERO LLVM_CONST(f64x2_vec_zero) #define TO_V128_i8x16(v) LLVMBuildBitCast(comp_ctx->builder, v, \ V128_i8x16_TYPE, "i8x16_val") diff --git a/core/iwasm/compilation/aot_emit_control.c b/core/iwasm/compilation/aot_emit_control.c index 39f882d41..51a5648d2 100644 --- a/core/iwasm/compilation/aot_emit_control.c +++ b/core/iwasm/compilation/aot_emit_control.c @@ -435,6 +435,20 @@ aot_compile_op_block(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, } else if (label_type == LABEL_TYPE_IF) { POP_COND(value); + + if (LLVMIsUndef(value) +#if LLVM_VERSION_NUMBER >= 12 + || LLVMIsPoison(value) +#endif + ) { + if (!(aot_emit_exception(comp_ctx, func_ctx, EXCE_INTEGER_OVERFLOW, + false, NULL, NULL))) { + goto fail; + } + return aot_handle_next_reachable_block(comp_ctx, func_ctx, + p_frame_ip); + } + if (!LLVMIsConstant(value)) { /* Compare value is not constant, create condition br IR */ /* Create entry block */ @@ -791,6 +805,19 @@ aot_compile_op_br_if(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, #endif POP_COND(value_cmp); + + if (LLVMIsUndef(value_cmp) +#if LLVM_VERSION_NUMBER >= 12 + || LLVMIsPoison(value_cmp) +#endif + ) { + if (!(aot_emit_exception(comp_ctx, func_ctx, EXCE_INTEGER_OVERFLOW, + false, NULL, NULL))) { + goto fail; + } + return aot_handle_next_reachable_block(comp_ctx, func_ctx, p_frame_ip); + } + if (!LLVMIsConstant(value_cmp)) { /* Compare value is not constant, create condition br IR */ if (!(block_dst = get_target_block(func_ctx, br_depth))) { @@ -917,6 +944,19 @@ aot_compile_op_br_table(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, #endif POP_I32(value_cmp); + + if (LLVMIsUndef(value_cmp) +#if LLVM_VERSION_NUMBER >= 12 + || LLVMIsPoison(value_cmp) +#endif + ) { + if (!(aot_emit_exception(comp_ctx, func_ctx, EXCE_INTEGER_OVERFLOW, + false, NULL, NULL))) { + goto fail; + } + return aot_handle_next_reachable_block(comp_ctx, func_ctx, p_frame_ip); + } + if (!LLVMIsConstant(value_cmp)) { /* Compare value is not constant, create switch IR */ for (i = 0; i <= br_count; i++) { diff --git a/core/iwasm/compilation/aot_emit_memory.c b/core/iwasm/compilation/aot_emit_memory.c index d7b194ce2..206183539 100644 --- a/core/iwasm/compilation/aot_emit_memory.c +++ b/core/iwasm/compilation/aot_emit_memory.c @@ -126,8 +126,16 @@ aot_check_memory_overflow(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, POP_I32(addr); + /* + * Note: not throw the integer-overflow-exception here since it must + * have been thrown when converting float to integer before + */ /* return addres directly if constant offset and inside memory space */ - if (LLVMIsConstant(addr)) { + if (LLVMIsConstant(addr) && !LLVMIsUndef(addr) +#if LLVM_VERSION_NUMBER >= 12 + && !LLVMIsPoison(addr) +#endif + ) { uint64 mem_offset = (uint64)LLVMConstIntGetZExtValue(addr) + (uint64)offset; uint32 num_bytes_per_page = @@ -764,8 +772,16 @@ check_bulk_memory_overflow(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, } } + /* + * Note: not throw the integer-overflow-exception here since it must + * have been thrown when converting float to integer before + */ /* return addres directly if constant offset and inside memory space */ - if (LLVMIsConstant(offset) && LLVMIsConstant(bytes)) { + if (!LLVMIsUndef(offset) && !LLVMIsUndef(bytes) +#if LLVM_VERSION_NUMBER >= 12 + && !LLVMIsPoison(offset) && !LLVMIsPoison(bytes) +#endif + && LLVMIsConstant(offset) && LLVMIsConstant(bytes)) { uint64 mem_offset = (uint64)LLVMConstIntGetZExtValue(offset); uint64 mem_len = (uint64)LLVMConstIntGetZExtValue(bytes); uint32 num_bytes_per_page = diff --git a/core/iwasm/compilation/aot_emit_numberic.c b/core/iwasm/compilation/aot_emit_numberic.c index 147e58135..b6b6016b4 100644 --- a/core/iwasm/compilation/aot_emit_numberic.c +++ b/core/iwasm/compilation/aot_emit_numberic.c @@ -36,10 +36,17 @@ LLVMMoveBasicBlockAfter(block, LLVMGetInsertBlock(comp_ctx->builder)); \ } while (0) -#define IS_CONST_ZERO(val) \ - (LLVMIsConstant(val) \ - && ((is_i32 && (int32)LLVMConstIntGetZExtValue(val) == 0) \ +#if LLVM_VERSION_NUMBER >= 12 +#define IS_CONST_ZERO(val) \ + (!LLVMIsUndef(val) && !LLVMIsPoison(val) && LLVMIsConstant(val) \ + && ((is_i32 && (int32)LLVMConstIntGetZExtValue(val) == 0) \ || (!is_i32 && (int64)LLVMConstIntGetSExtValue(val) == 0))) +#else +#define IS_CONST_ZERO(val) \ + (!LLVMIsUndef(val) && LLVMIsConstant(val) \ + && ((is_i32 && (int32)LLVMConstIntGetZExtValue(val) == 0) \ + || (!is_i32 && (int64)LLVMConstIntGetSExtValue(val) == 0))) +#endif #define CHECK_INT_OVERFLOW(type) do { \ LLVMValueRef cmp_min_int, cmp_neg_one; \ @@ -399,6 +406,18 @@ compile_int_div(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, POP_INT(right); POP_INT(left); + if (LLVMIsUndef(right) || LLVMIsUndef(left) +#if LLVM_VERSION_NUMBER >= 12 + || LLVMIsPoison(right) || LLVMIsPoison(left) +#endif + ) { + if (!(aot_emit_exception(comp_ctx, func_ctx, EXCE_INTEGER_OVERFLOW, + false, NULL, NULL))) { + goto fail; + } + return aot_handle_next_reachable_block(comp_ctx, func_ctx, p_frame_ip); + } + if (LLVMIsConstant(right)) { int64 right_val = (int64)LLVMConstIntGetSExtValue(right); switch (right_val) { diff --git a/core/iwasm/compilation/aot_llvm.c b/core/iwasm/compilation/aot_llvm.c index acce56fb3..780c45dd8 100644 --- a/core/iwasm/compilation/aot_llvm.c +++ b/core/iwasm/compilation/aot_llvm.c @@ -811,7 +811,7 @@ aot_create_func_context(AOTCompData *comp_data, AOTCompContext *comp_ctx, local_value = F64_ZERO; break; case VALUE_TYPE_V128: - local_value = V128_ZERO; + local_value = V128_i64x2_ZERO; break; case VALUE_TYPE_FUNCREF: case VALUE_TYPE_EXTERNREF: @@ -963,6 +963,8 @@ aot_set_llvm_basic_types(AOTLLVMTypes *basic_types, LLVMContextRef context) basic_types->v128_type = basic_types->i64x2_vec_type; basic_types->v128_ptr_type = LLVMPointerType(basic_types->v128_type, 0); + basic_types->i1x2_vec_type = LLVMVectorType(basic_types->int1_type, 2); + basic_types->funcref_type = LLVMInt32TypeInContext(context); basic_types->externref_type = LLVMInt32TypeInContext(context); @@ -979,6 +981,7 @@ aot_set_llvm_basic_types(AOTLLVMTypes *basic_types, LLVMContextRef context) && basic_types->i64x2_vec_type && basic_types->f32x4_vec_type && basic_types->f64x2_vec_type + && basic_types->i1x2_vec_type && basic_types->meta_data_type && basic_types->funcref_type && basic_types->externref_type) ? true : false; @@ -987,73 +990,89 @@ aot_set_llvm_basic_types(AOTLLVMTypes *basic_types, LLVMContextRef context) static bool aot_create_llvm_consts(AOTLLVMConsts *consts, AOTCompContext *comp_ctx) { - LLVMValueRef i64_consts[2]; +#define CREATE_I1_CONST(name, value) \ + if (!(consts->i1_##name = \ + LLVMConstInt(comp_ctx->basic_types.int1_type, value, true))) \ + return false; - consts->i8_zero = I8_CONST(0); - consts->i32_zero = I32_CONST(0); - consts->i64_zero = I64_CONST(0); - consts->f32_zero = F32_CONST(0); - consts->f64_zero = F64_CONST(0); + CREATE_I1_CONST(zero, 0) + CREATE_I1_CONST(one, 1) +#undef CREATE_I1_CONST - if (consts->i64_zero) { - i64_consts[0] = i64_consts[1] = consts->i64_zero; - consts->v128_zero = consts->i64x2_vec_zero = - LLVMConstVector(i64_consts, 2); - if (consts->i64x2_vec_zero) { - consts->i8x16_vec_zero = TO_V128_i8x16(consts->i64x2_vec_zero); - consts->i16x8_vec_zero = TO_V128_i16x8(consts->i64x2_vec_zero); - consts->i32x4_vec_zero = TO_V128_i32x4(consts->i64x2_vec_zero); - consts->f32x4_vec_zero = TO_V128_f32x4(consts->i64x2_vec_zero); - consts->f64x2_vec_zero = TO_V128_f64x2(consts->i64x2_vec_zero); - } + if (!(consts->i8_zero = I8_CONST(0))) + return false; + + if (!(consts->f32_zero = F32_CONST(0))) + return false; + + if (!(consts->f64_zero = F64_CONST(0))) + return false; + +#define CREATE_I32_CONST(name, value) \ + if (!(consts->i32_##name = LLVMConstInt(I32_TYPE, value, true))) \ + return false; + + CREATE_I32_CONST(min, (uint32)INT32_MIN) + CREATE_I32_CONST(neg_one, (uint32)-1) + CREATE_I32_CONST(zero, 0) + CREATE_I32_CONST(one, 1) + CREATE_I32_CONST(two, 2) + CREATE_I32_CONST(three, 3) + CREATE_I32_CONST(four, 4) + CREATE_I32_CONST(five, 5) + CREATE_I32_CONST(six, 6) + CREATE_I32_CONST(seven, 7) + CREATE_I32_CONST(eight, 8) + CREATE_I32_CONST(nine, 9) + CREATE_I32_CONST(ten, 10) + CREATE_I32_CONST(eleven, 11) + CREATE_I32_CONST(twelve, 12) + CREATE_I32_CONST(thirteen, 13) + CREATE_I32_CONST(fourteen, 14) + CREATE_I32_CONST(fifteen, 15) + CREATE_I32_CONST(31, 31) + CREATE_I32_CONST(32, 32) +#undef CREATE_I32_CONST + +#define CREATE_I64_CONST(name, value) \ + if (!(consts->i64_##name = LLVMConstInt(I64_TYPE, value, true))) \ + return false; + + CREATE_I64_CONST(min, (uint64)INT64_MIN) + CREATE_I64_CONST(neg_one, (uint64)-1) + CREATE_I64_CONST(zero, 0) + CREATE_I64_CONST(63, 63) + CREATE_I64_CONST(64, 64) +#undef CREATE_I64_CONST + +#define CREATE_V128_CONST(name, type) \ + if (!(consts->name##_vec_zero = LLVMConstNull(type))) \ + return false; \ + if (!(consts->name##_undef = LLVMGetUndef(type))) \ + return false; + + CREATE_V128_CONST(i8x16, V128_i8x16_TYPE) + CREATE_V128_CONST(i16x8, V128_i16x8_TYPE) + CREATE_V128_CONST(i32x4, V128_i32x4_TYPE) + CREATE_V128_CONST(i64x2, V128_i64x2_TYPE) + CREATE_V128_CONST(f32x4, V128_f32x4_TYPE) + CREATE_V128_CONST(f64x2, V128_f64x2_TYPE) +#undef CREATE_V128_CONST + +#define CREATE_VEC_ZERO_MASK(slot) \ + { \ + LLVMTypeRef type = LLVMVectorType(I32_TYPE, slot); \ + if (!type || !(consts->i32x##slot##_zero = LLVMConstNull(type))) \ + return false; \ } - consts->i32_one = I32_CONST(1); - consts->i32_two = I32_CONST(2); - consts->i32_three = I32_CONST(3); - consts->i32_four = I32_CONST(4); - consts->i32_five = I32_CONST(5); - consts->i32_six = I32_CONST(6); - consts->i32_seven = I32_CONST(7); - consts->i32_eight = I32_CONST(8); - consts->i32_neg_one = I32_CONST((uint32)-1); - consts->i64_neg_one = I64_CONST((uint64)-1); - consts->i32_min = I32_CONST((uint32)INT32_MIN); - consts->i64_min = I64_CONST((uint64)INT64_MIN); - consts->i32_31 = I32_CONST(31); - consts->i32_32 = I32_CONST(32); - consts->i64_63 = I64_CONST(63); - consts->i64_64 = I64_CONST(64); - consts->ref_null = I32_CONST(NULL_REF); + CREATE_VEC_ZERO_MASK(16) + CREATE_VEC_ZERO_MASK(8) + CREATE_VEC_ZERO_MASK(4) + CREATE_VEC_ZERO_MASK(2) +#undef CREATE_VEC_ZERO_MASK - return (consts->i8_zero - && consts->i32_zero - && consts->i64_zero - && consts->f32_zero - && consts->f64_zero - && consts->i8x16_vec_zero - && consts->i16x8_vec_zero - && consts->i32x4_vec_zero - && consts->i64x2_vec_zero - && consts->f32x4_vec_zero - && consts->f64x2_vec_zero - && consts->i32_one - && consts->i32_two - && consts->i32_three - && consts->i32_four - && consts->i32_five - && consts->i32_six - && consts->i32_seven - && consts->i32_eight - && consts->i32_neg_one - && consts->i64_neg_one - && consts->i32_min - && consts->i64_min - && consts->i32_31 - && consts->i32_32 - && consts->i64_63 - && consts->i64_64 - && consts->ref_null) ? true : false; + return true; } typedef struct ArchItem { @@ -2213,7 +2232,8 @@ aot_build_zero_function_ret(AOTCompContext *comp_ctx, ret = LLVMBuildRet(comp_ctx->builder, F64_ZERO); break; case VALUE_TYPE_V128: - ret = LLVMBuildRet(comp_ctx->builder, V128_ZERO); + ret = + LLVMBuildRet(comp_ctx->builder, LLVM_CONST(i64x2_vec_zero)); break; case VALUE_TYPE_FUNCREF: case VALUE_TYPE_EXTERNREF: @@ -2315,7 +2335,7 @@ __call_llvm_intrinsic(const AOTCompContext *comp_ctx, LLVMValueRef aot_call_llvm_intrinsic(const AOTCompContext *comp_ctx, const AOTFuncContext *func_ctx, - const char *name, + const char *intrinsic, LLVMTypeRef ret_type, LLVMTypeRef *param_types, int param_count, @@ -2340,8 +2360,8 @@ aot_call_llvm_intrinsic(const AOTCompContext *comp_ctx, param_values[i++] = va_arg(argptr, LLVMValueRef); va_end(argptr); - ret = __call_llvm_intrinsic(comp_ctx, func_ctx, name, ret_type, param_types, - param_count, param_values); + ret = __call_llvm_intrinsic(comp_ctx, func_ctx, intrinsic, ret_type, + param_types, param_count, param_values); wasm_runtime_free(param_values); @@ -2351,7 +2371,7 @@ aot_call_llvm_intrinsic(const AOTCompContext *comp_ctx, LLVMValueRef aot_call_llvm_intrinsic_v(const AOTCompContext *comp_ctx, const AOTFuncContext *func_ctx, - const char *name, + const char *intrinsic, LLVMTypeRef ret_type, LLVMTypeRef *param_types, int param_count, @@ -2373,8 +2393,8 @@ aot_call_llvm_intrinsic_v(const AOTCompContext *comp_ctx, while (i < param_count) param_values[i++] = va_arg(param_value_list, LLVMValueRef); - ret = __call_llvm_intrinsic(comp_ctx, func_ctx, name, ret_type, param_types, - param_count, param_values); + ret = __call_llvm_intrinsic(comp_ctx, func_ctx, intrinsic, ret_type, + param_types, param_count, param_values); wasm_runtime_free(param_values); diff --git a/core/iwasm/compilation/aot_llvm.h b/core/iwasm/compilation/aot_llvm.h index b05b6cf61..85cf40c70 100644 --- a/core/iwasm/compilation/aot_llvm.h +++ b/core/iwasm/compilation/aot_llvm.h @@ -178,6 +178,8 @@ typedef struct AOTLLVMTypes { LLVMTypeRef f32x4_vec_type; LLVMTypeRef f64x2_vec_type; + LLVMTypeRef i1x2_vec_type; + LLVMTypeRef meta_data_type; LLVMTypeRef funcref_type; @@ -185,18 +187,13 @@ typedef struct AOTLLVMTypes { } AOTLLVMTypes; typedef struct AOTLLVMConsts { + LLVMValueRef i1_zero; + LLVMValueRef i1_one; LLVMValueRef i8_zero; LLVMValueRef i32_zero; LLVMValueRef i64_zero; LLVMValueRef f32_zero; LLVMValueRef f64_zero; - LLVMValueRef v128_zero; - LLVMValueRef i8x16_vec_zero; - LLVMValueRef i16x8_vec_zero; - LLVMValueRef i32x4_vec_zero; - LLVMValueRef i64x2_vec_zero; - LLVMValueRef f32x4_vec_zero; - LLVMValueRef f64x2_vec_zero; LLVMValueRef i32_one; LLVMValueRef i32_two; LLVMValueRef i32_three; @@ -205,6 +202,13 @@ typedef struct AOTLLVMConsts { LLVMValueRef i32_six; LLVMValueRef i32_seven; LLVMValueRef i32_eight; + LLVMValueRef i32_nine; + LLVMValueRef i32_ten; + LLVMValueRef i32_eleven; + LLVMValueRef i32_twelve; + LLVMValueRef i32_thirteen; + LLVMValueRef i32_fourteen; + LLVMValueRef i32_fifteen; LLVMValueRef i32_neg_one; LLVMValueRef i64_neg_one; LLVMValueRef i32_min; @@ -213,7 +217,22 @@ typedef struct AOTLLVMConsts { LLVMValueRef i32_32; LLVMValueRef i64_63; LLVMValueRef i64_64; - LLVMValueRef ref_null; + LLVMValueRef i8x16_vec_zero; + LLVMValueRef i16x8_vec_zero; + LLVMValueRef i32x4_vec_zero; + LLVMValueRef i64x2_vec_zero; + LLVMValueRef f32x4_vec_zero; + LLVMValueRef f64x2_vec_zero; + LLVMValueRef i8x16_undef; + LLVMValueRef i16x8_undef; + LLVMValueRef i32x4_undef; + LLVMValueRef i64x2_undef; + LLVMValueRef f32x4_undef; + LLVMValueRef f64x2_undef; + LLVMValueRef i32x16_zero; + LLVMValueRef i32x8_zero; + LLVMValueRef i32x4_zero; + LLVMValueRef i32x2_zero; } AOTLLVMConsts; /** @@ -393,7 +412,7 @@ aot_build_zero_function_ret(AOTCompContext *comp_ctx, LLVMValueRef aot_call_llvm_intrinsic(const AOTCompContext *comp_ctx, const AOTFuncContext *func_ctx, - const char *name, + const char *intrinsic, LLVMTypeRef ret_type, LLVMTypeRef *param_types, int param_count, @@ -402,7 +421,7 @@ aot_call_llvm_intrinsic(const AOTCompContext *comp_ctx, LLVMValueRef aot_call_llvm_intrinsic_v(const AOTCompContext *comp_ctx, const AOTFuncContext *func_ctx, - const char *name, + const char *intrinsic, LLVMTypeRef ret_type, LLVMTypeRef *param_types, int param_count, diff --git a/core/iwasm/compilation/simd/simd_access_lanes.c b/core/iwasm/compilation/simd/simd_access_lanes.c index 1db75a838..89cc27ef0 100644 --- a/core/iwasm/compilation/simd/simd_access_lanes.c +++ b/core/iwasm/compilation/simd/simd_access_lanes.c @@ -8,39 +8,6 @@ #include "../aot_emit_exception.h" #include "../../aot/aot_runtime.h" -static bool -is_target_x86(AOTCompContext *comp_ctx) -{ - return !strncmp(comp_ctx->target_arch, "x86_64", 6) || - !strncmp(comp_ctx->target_arch, "i386", 4); -} - -static LLVMValueRef -build_intx16_vector(const AOTCompContext *comp_ctx, - const LLVMTypeRef element_type, - const int *element_value) -{ - LLVMValueRef vector, elements[16]; - unsigned i; - - for (i = 0; i < 16; i++) { - if (!(elements[i] = - LLVMConstInt(element_type, element_value[i], true))) { - HANDLE_FAILURE("LLVMConstInst"); - goto fail; - } - } - - if (!(vector = LLVMConstVector(elements, 16))) { - HANDLE_FAILURE("LLVMConstVector"); - goto fail; - } - - return vector; -fail: - return NULL; -} - bool aot_compile_simd_shuffle(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, @@ -67,7 +34,8 @@ aot_compile_simd_shuffle(AOTCompContext *comp_ctx, } /* build a vector <16 x i32> */ - if (!(mask = build_intx16_vector(comp_ctx, I32_TYPE, values))) { + if (!(mask = + simd_build_const_integer_vector(comp_ctx, I32_TYPE, values, 16))) { goto fail; } @@ -77,29 +45,20 @@ aot_compile_simd_shuffle(AOTCompContext *comp_ctx, goto fail; } - if (!(result = LLVMBuildBitCast(comp_ctx->builder, result, V128_i64x2_TYPE, - "ret"))) { - HANDLE_FAILURE("LLVMBuildBitCast"); - goto fail; - } + return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result"); - PUSH_V128(result); - - return true; fail: return false; } +/*TODO: llvm.experimental.vector.*/ /* shufflevector is not an option, since it requires *mask as a const */ bool -aot_compile_simd_swizzle_x86(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) +aot_compile_simd_swizzle_x86(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx) { LLVMValueRef vector, mask, max_lanes, condition, mask_lanes, result; LLVMTypeRef param_types[2]; - int max_lane_id[16] = { 16, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 16, 16, 16, 16 }, - mask_lane_id[16] = { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 }; if (!(mask = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, V128_i8x16_TYPE, "mask"))) { @@ -112,7 +71,15 @@ aot_compile_simd_swizzle_x86(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) } /* icmp uge <16 x i8> mask, <16, 16, 16, 16, ...> */ - if (!(max_lanes = build_intx16_vector(comp_ctx, INT8_TYPE, max_lane_id))) { + if (!(max_lanes = simd_build_splat_const_integer_vector( + comp_ctx, INT8_TYPE, 16, 16))) { + goto fail; + } + + /* if the highest bit of every i8 of mask is 1, means doesn't pick up from vector */ + /* select <16 x i1> %condition, <16 x i8> <0x80, 0x80, ...>, <16 x i8> %mask */ + if (!(mask_lanes = simd_build_splat_const_integer_vector( + comp_ctx, INT8_TYPE, 0x80, 16))) { goto fail; } @@ -122,13 +89,6 @@ aot_compile_simd_swizzle_x86(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) goto fail; } - /* if the highest bit of every i8 of mask is 1, means doesn't pick up from vector */ - /* select <16 x i1> %condition, <16 x i8> <0x80, 0x80, ...>, <16 x i8> %mask */ - if (!(mask_lanes = - build_intx16_vector(comp_ctx, INT8_TYPE, mask_lane_id))) { - goto fail; - } - if (!(mask = LLVMBuildSelect(comp_ctx->builder, condition, mask_lanes, mask, "mask"))) { HANDLE_FAILURE("LLVMBuildSelect"); @@ -158,17 +118,13 @@ fail: } static bool -aot_compile_simd_swizzle_common(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) +aot_compile_simd_swizzle_common(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx) { LLVMValueRef vector, mask, default_lane_value, condition, max_lane_id, result, idx, id, replace_with_zero, elem, elem_or_zero, undef; uint8 i; - int const_lane_ids[16] = { 16, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 16, 16, 16, 16 }, - const_zeors[16] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; - if (!(mask = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, V128_i8x16_TYPE, "mask"))) { goto fail; @@ -185,8 +141,8 @@ aot_compile_simd_swizzle_common(AOTCompContext *comp_ctx, AOTFuncContext *func_c } /* icmp uge <16 x i8> mask, <16, 16, 16, 16, ...> */ - if (!(max_lane_id = - build_intx16_vector(comp_ctx, INT8_TYPE, const_lane_ids))) { + if (!(max_lane_id = simd_build_splat_const_integer_vector( + comp_ctx, INT8_TYPE, 16, 16))) { goto fail; } @@ -197,8 +153,8 @@ aot_compile_simd_swizzle_common(AOTCompContext *comp_ctx, AOTFuncContext *func_c } /* if the id is out of range (>=16), set the id as 0 */ - if (!(default_lane_value = - build_intx16_vector(comp_ctx, INT8_TYPE, const_zeors))) { + if (!(default_lane_value = simd_build_splat_const_integer_vector( + comp_ctx, INT8_TYPE, 0, 16))) { goto fail; } @@ -277,9 +233,9 @@ aot_compile_simd_extract(AOTCompContext *comp_ctx, LLVMTypeRef result_type, unsigned aot_value_type) { - LLVMValueRef vector, idx, result; + LLVMValueRef vector, lane, result; - if (!(idx = I8_CONST(lane_id))) { + if (!(lane = simd_lane_id_to_llvm_value(comp_ctx, lane_id))) { HANDLE_FAILURE("LLVMConstInt"); goto fail; } @@ -291,7 +247,7 @@ aot_compile_simd_extract(AOTCompContext *comp_ctx, } /* extractelement %vector, i8 lane_id*/ - if (!(result = LLVMBuildExtractElement(comp_ctx->builder, vector, idx, + if (!(result = LLVMBuildExtractElement(comp_ctx->builder, vector, lane, "element"))) { HANDLE_FAILURE("LLVMBuildExtractElement"); goto fail; @@ -390,23 +346,20 @@ aot_compile_simd_replace(AOTCompContext *comp_ctx, bool need_reduce, LLVMTypeRef element_type) { - LLVMValueRef vector, new_value, idx, result; + LLVMValueRef vector, new_value, lane, result; POP(new_value, new_value_type); - if (!(idx = I8_CONST(lane_id))) { - HANDLE_FAILURE("LLVMConstInt"); + if (!(lane = simd_lane_id_to_llvm_value(comp_ctx, lane_id))) { goto fail; } - /* bitcast <2 x i64> %0 to */ - if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type, "vec"))) { goto fail; } - /* bitcast to */ + /* trunc to */ if (need_reduce) { if (!(new_value = LLVMBuildTrunc(comp_ctx->builder, new_value, element_type, "element"))) { @@ -415,23 +368,15 @@ aot_compile_simd_replace(AOTCompContext *comp_ctx, } } - /* insertelement %vector, %element, i8 idx */ + /* insertelement %vector, %element, i32 lane */ if (!(result = LLVMBuildInsertElement(comp_ctx->builder, vector, new_value, - idx, "new_vector"))) { + lane, "new_vector"))) { HANDLE_FAILURE("LLVMBuildInsertElement"); goto fail; } - /* bitcast %result to <2 x i64> */ - if (!(result = LLVMBuildBitCast(comp_ctx->builder, result, V128_i64x2_TYPE, - "ret"))) { - HANDLE_FAILURE("LLVMBuildBitCast"); - goto fail; - } + return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "reesult"); - PUSH_V128(result); - - return true; fail: return false; } diff --git a/core/iwasm/compilation/simd/simd_access_lanes.h b/core/iwasm/compilation/simd/simd_access_lanes.h index ae90242df..06d023767 100644 --- a/core/iwasm/compilation/simd/simd_access_lanes.h +++ b/core/iwasm/compilation/simd/simd_access_lanes.h @@ -82,6 +82,26 @@ aot_compile_simd_replace_f64x2(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, uint8 lane_id); +bool +aot_compile_simd_load8_lane(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + uint8 lane_id); + +bool +aot_compile_simd_load16_lane(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + uint8 lane_id); + +bool +aot_compile_simd_load32_lane(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + uint8 lane_id); + +bool +aot_compile_simd_load64_lane(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + uint8 lane_id); + #ifdef __cplusplus } /* end of extern "C" */ #endif diff --git a/core/iwasm/compilation/simd/simd_bit_shifts.c b/core/iwasm/compilation/simd/simd_bit_shifts.c index 5b1ee9024..0bbb9c8cc 100644 --- a/core/iwasm/compilation/simd/simd_bit_shifts.c +++ b/core/iwasm/compilation/simd/simd_bit_shifts.c @@ -8,121 +8,112 @@ #include "../aot_emit_exception.h" #include "../../aot/aot_runtime.h" +enum integer_shift { + e_shift_i8x16, + e_shift_i16x8, + e_shift_i32x4, + e_shift_i64x2, +}; + static bool simd_shift(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, IntShift shift_op, - LLVMTypeRef vector_type, - LLVMTypeRef element_type, - unsigned lane_width) + enum integer_shift itype) { - LLVMValueRef vector, offset, width, undef, zeros, result; - LLVMTypeRef zeros_type; + LLVMValueRef vector, offset, result = NULL; + LLVMTypeRef vector_type[] = { V128_i8x16_TYPE, V128_i16x8_TYPE, + V128_i32x4_TYPE, V128_i64x2_TYPE }; + LLVMTypeRef element_type[] = { INT8_TYPE, INT16_TYPE, I32_TYPE, I64_TYPE }; + + LLVMValueRef undef[] = { LLVM_CONST(i8x16_undef), LLVM_CONST(i16x8_undef), + LLVM_CONST(i32x4_undef), + LLVM_CONST(i64x2_undef) }; + LLVMValueRef mask[] = { LLVM_CONST(i8x16_vec_zero), + LLVM_CONST(i16x8_vec_zero), + LLVM_CONST(i32x4_vec_zero), + LLVM_CONST(i64x2_vec_zero) }; + LLVMValueRef lane_bits[] = { + LLVM_CONST(i32_eight), + LLVMConstInt(I32_TYPE, 16, true), + LLVMConstInt(I32_TYPE, 32, true), + LLVMConstInt(I32_TYPE, 64, true), + }; POP_I32(offset); - if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type, - "vec"))) { - goto fail; + if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, + vector_type[itype], "vec"))) { + return false; } - if (!(width = LLVMConstInt(I32_TYPE, lane_width, true))) { - HANDLE_FAILURE("LLVMConstInt"); - goto fail; + /* offset mod LaneBits */ + if (!lane_bits[itype] + || !(offset = LLVMBuildSRem(comp_ctx->builder, offset, + lane_bits[itype], "offset_fix"))) { + HANDLE_FAILURE("LLVMBuildSRem"); + return false; + } + + /* change type */ + if (itype < e_shift_i32x4) { + offset = LLVMBuildTrunc(comp_ctx->builder, offset, element_type[itype], + "offset_trunc"); + } + else if (itype == e_shift_i64x2) { + offset = LLVMBuildZExt(comp_ctx->builder, offset, element_type[itype], + "offset_ext"); + } + + if (!offset) { + HANDLE_FAILURE("LLVMBuildZext/LLVMBuildTrunc"); + return false; + } + + /* splat to a vector */ + if (!(offset = + LLVMBuildInsertElement(comp_ctx->builder, undef[itype], offset, + I32_ZERO, "offset_vector_base"))) { + HANDLE_FAILURE("LLVMBuildInsertElement"); + return false; } if (!(offset = - LLVMBuildURem(comp_ctx->builder, offset, width, "remainder"))) { - HANDLE_FAILURE("LLVMBuildURem"); - goto fail; - } - - if (I64_TYPE == element_type) { - if (!(offset = LLVMBuildZExt(comp_ctx->builder, offset, element_type, - "offset_scalar"))) { - HANDLE_FAILURE("LLVMBuildZExt"); - goto fail; - } - } - else { - if (!(offset = LLVMBuildTruncOrBitCast( - comp_ctx->builder, offset, element_type, "offset_scalar"))) { - HANDLE_FAILURE("LLVMBuildTrunc"); - goto fail; - } - } - - /* create a vector with offset */ - if (!(undef = LLVMGetUndef(vector_type))) { - HANDLE_FAILURE("LLVMGetUndef"); - goto fail; - } - - if (!(zeros_type = LLVMVectorType(I32_TYPE, 128 / lane_width))) { - HANDLE_FAILURE("LVMVectorType"); - goto fail; - } - - if (!(zeros = LLVMConstNull(zeros_type))) { - HANDLE_FAILURE("LLVMConstNull"); - goto fail; - } - - if (!(offset = LLVMBuildInsertElement(comp_ctx->builder, undef, offset, - I32_ZERO, "base_vector"))) { - HANDLE_FAILURE("LLVMBuildInsertElement"); - goto fail; - } - - if (!(offset = LLVMBuildShuffleVector(comp_ctx->builder, offset, undef, - zeros, "offset_vector"))) { + LLVMBuildShuffleVector(comp_ctx->builder, offset, undef[itype], + mask[itype], "offset_vector"))) { HANDLE_FAILURE("LLVMBuildShuffleVector"); - goto fail; + return false; } switch (shift_op) { case INT_SHL: { - if (!(result = - LLVMBuildShl(comp_ctx->builder, vector, offset, "shl"))) { - HANDLE_FAILURE("LLVMBuildShl"); - goto fail; - } + result = LLVMBuildShl(comp_ctx->builder, vector, offset, "shl"); break; } case INT_SHR_S: { - if (!(result = LLVMBuildAShr(comp_ctx->builder, vector, offset, - "ashr"))) { - HANDLE_FAILURE("LLVMBuildAShr"); - goto fail; - } + result = LLVMBuildAShr(comp_ctx->builder, vector, offset, "ashr"); break; } case INT_SHR_U: { - if (!(result = LLVMBuildLShr(comp_ctx->builder, vector, offset, - "lshr"))) { - HANDLE_FAILURE("LLVMBuildLShr"); - goto fail; - } + result = LLVMBuildLShr(comp_ctx->builder, vector, offset, "lshr"); break; } default: { - bh_assert(0); - goto fail; + break; } } - if (!(result = LLVMBuildBitCast(comp_ctx->builder, result, V128_i64x2_TYPE, - "result"))) { - HANDLE_FAILURE("LLVMBuildBitCast"); + if (!result) { + HANDLE_FAILURE("LLVMBuildShl/LLVMBuildLShr/LLVMBuildAShr"); goto fail; } - PUSH_V128(result); - return true; + return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result"); + fail: return false; } @@ -132,8 +123,7 @@ aot_compile_simd_i8x16_shift(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, IntShift shift_op) { - return simd_shift(comp_ctx, func_ctx, shift_op, V128_i8x16_TYPE, INT8_TYPE, - 8); + return simd_shift(comp_ctx, func_ctx, shift_op, e_shift_i8x16); } bool @@ -141,8 +131,7 @@ aot_compile_simd_i16x8_shift(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, IntShift shift_op) { - return simd_shift(comp_ctx, func_ctx, shift_op, V128_i16x8_TYPE, - INT16_TYPE, 16); + return simd_shift(comp_ctx, func_ctx, shift_op, e_shift_i16x8); } bool @@ -150,8 +139,7 @@ aot_compile_simd_i32x4_shift(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, IntShift shift_op) { - return simd_shift(comp_ctx, func_ctx, shift_op, V128_i32x4_TYPE, I32_TYPE, - 32); + return simd_shift(comp_ctx, func_ctx, shift_op, e_shift_i32x4); } bool @@ -159,6 +147,5 @@ aot_compile_simd_i64x2_shift(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, IntShift shift_op) { - return simd_shift(comp_ctx, func_ctx, shift_op, V128_i64x2_TYPE, I64_TYPE, - 64); + return simd_shift(comp_ctx, func_ctx, shift_op, e_shift_i64x2); } diff --git a/core/iwasm/compilation/simd/simd_bitmask_extracts.c b/core/iwasm/compilation/simd/simd_bitmask_extracts.c index 4e0534e06..6b5cd6e7d 100644 --- a/core/iwasm/compilation/simd/simd_bitmask_extracts.c +++ b/core/iwasm/compilation/simd/simd_bitmask_extracts.c @@ -8,70 +8,92 @@ #include "../aot_emit_exception.h" #include "../../aot/aot_runtime.h" +enum integer_bitmask_type { + e_bitmask_i8x16, + e_bitmask_i16x8, + e_bitmask_i32x4, + e_bitmask_i64x2, +}; + +/* TODO: should use a much clever intrinsic */ static bool simd_build_bitmask(const AOTCompContext *comp_ctx, const AOTFuncContext *func_ctx, - uint8 length, - LLVMTypeRef vector_type, - LLVMTypeRef element_type, - const char *intrinsic) + enum integer_bitmask_type itype) { - LLVMValueRef vector, zeros, mask, mask_elements[16], cond, result; - LLVMTypeRef param_types[1], vector_ext_type; - const uint32 numbers[16] = { 0x1, 0x2, 0x4, 0x8, 0x10, 0x20, - 0x40, 0x80, 0x100, 0x200, 0x400, 0x800, - 0x1000, 0x2000, 0x4000, 0x8000 }; + LLVMValueRef vector, mask, result; uint8 i; + LLVMTypeRef vector_ext_type; - if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type, - "vec"))) { + uint32 lanes[] = { 16, 8, 4, 2 }; + uint32 lane_bits[] = { 8, 16, 32, 64 }; + LLVMTypeRef element_type[] = { INT8_TYPE, INT16_TYPE, I32_TYPE, I64_TYPE }; + LLVMTypeRef vector_type[] = { V128_i8x16_TYPE, V128_i16x8_TYPE, + V128_i32x4_TYPE, V128_i64x2_TYPE }; + int32 mask_element[16] = { 0 }; + const char *intrinsic[] = { + "llvm.vector.reduce.or.v16i64", + "llvm.vector.reduce.or.v8i64", + "llvm.vector.reduce.or.v4i64", + "llvm.vector.reduce.or.v2i64", + }; + + LLVMValueRef ashr_distance; + + if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, + vector_type[itype], "vec"))) { goto fail; } - if (!(vector_ext_type = LLVMVectorType(I32_TYPE, length))) { + /* fill every bit in a lange with its sign bit */ + if (!(ashr_distance = simd_build_splat_const_integer_vector( + comp_ctx, element_type[itype], lane_bits[itype] - 1, + lanes[itype]))) { + goto fail; + } + + if (!(vector = LLVMBuildAShr(comp_ctx->builder, vector, ashr_distance, + "vec_ashr"))) { + HANDLE_FAILURE("LLVMBuildAShr"); + goto fail; + } + + if (!(vector_ext_type = LLVMVectorType(I64_TYPE, lanes[itype]))) { HANDLE_FAILURE("LLVMVectorType"); goto fail; } - if (!(vector = LLVMBuildSExt(comp_ctx->builder, vector, vector_ext_type, - "vec_ext"))) { - HANDLE_FAILURE("LLVMBuildSExt"); - goto fail; - } - - if (!(zeros = LLVMConstNull(vector_ext_type))) { - HANDLE_FAILURE("LLVMConstNull"); - goto fail; - } - - for (i = 0; i < 16; i++) { - if (!(mask_elements[i] = LLVMConstInt(I32_TYPE, numbers[i], false))) { - HANDLE_FAILURE("LLVMConstInt"); + if (e_bitmask_i64x2 != itype) { + if (!(vector = LLVMBuildSExt(comp_ctx->builder, vector, + vector_ext_type, "zext_to_i64"))) { goto fail; } } - if (!(mask = LLVMConstVector(mask_elements, length))) { - HANDLE_FAILURE("LLVMConstVector"); + for (i = 0; i < 16; i++) { + mask_element[i] = 0x1 << i; + } + + if (!(mask = simd_build_const_integer_vector( + comp_ctx, I64_TYPE, mask_element, lanes[itype]))) { goto fail; } - if (!(cond = LLVMBuildICmp(comp_ctx->builder, LLVMIntSLT, vector, zeros, - "lt_zero"))) { - HANDLE_FAILURE("LLVMBuildICmp"); + if (!(vector = + LLVMBuildAnd(comp_ctx->builder, vector, mask, "mask_bits"))) { + HANDLE_FAILURE("LLVMBuildAnd"); goto fail; } if (!(result = - LLVMBuildSelect(comp_ctx->builder, cond, mask, zeros, "select"))) { - HANDLE_FAILURE("LLVMBuildSelect"); + aot_call_llvm_intrinsic(comp_ctx, func_ctx, intrinsic[itype], + I64_TYPE, &vector_ext_type, 1, vector))) { goto fail; } - param_types[0] = vector_ext_type; - if (!(result = aot_call_llvm_intrinsic(comp_ctx, func_ctx, intrinsic, I32_TYPE, - param_types, 1, result))) { - HANDLE_FAILURE("LLVMBuildCall"); + if (!(result = + LLVMBuildTrunc(comp_ctx->builder, result, I32_TYPE, "to_i32"))) { + HANDLE_FAILURE("LLVMBuildTrunc"); goto fail; } @@ -86,24 +108,26 @@ bool aot_compile_simd_i8x16_bitmask(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) { - return simd_build_bitmask(comp_ctx, func_ctx, 16, V128_i8x16_TYPE, - INT8_TYPE, - "llvm.experimental.vector.reduce.or.v16i32"); + return simd_build_bitmask(comp_ctx, func_ctx, e_bitmask_i8x16); } bool aot_compile_simd_i16x8_bitmask(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) { - return simd_build_bitmask(comp_ctx, func_ctx, 8, V128_i16x8_TYPE, - INT16_TYPE, - "llvm.experimental.vector.reduce.or.v8i32"); + return simd_build_bitmask(comp_ctx, func_ctx, e_bitmask_i16x8); } bool aot_compile_simd_i32x4_bitmask(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) { - return simd_build_bitmask(comp_ctx, func_ctx, 4, V128_i32x4_TYPE, I32_TYPE, - "llvm.experimental.vector.reduce.or.v4i32"); + return simd_build_bitmask(comp_ctx, func_ctx, e_bitmask_i32x4); +} + +bool +aot_compile_simd_i64x2_bitmask(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx) +{ + return simd_build_bitmask(comp_ctx, func_ctx, e_bitmask_i64x2); } diff --git a/core/iwasm/compilation/simd/simd_bitmask_extracts.h b/core/iwasm/compilation/simd/simd_bitmask_extracts.h index b8cd5e86f..aac4cc2ce 100644 --- a/core/iwasm/compilation/simd/simd_bitmask_extracts.h +++ b/core/iwasm/compilation/simd/simd_bitmask_extracts.h @@ -13,17 +13,23 @@ extern "C" { #endif bool -aot_compile_simd_i8x16_bitmask(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx); +aot_compile_simd_i8x16_bitmask(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx); bool -aot_compile_simd_i16x8_bitmask(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx); +aot_compile_simd_i16x8_bitmask(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx); bool -aot_compile_simd_i32x4_bitmask(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx); +aot_compile_simd_i32x4_bitmask(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx); + +bool +aot_compile_simd_i64x2_bitmask(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx); #ifdef __cplusplus } /* end of extern "C" */ #endif #endif /* end of _SIMD_BITMASK_EXTRACTS_H_ */ - diff --git a/core/iwasm/compilation/simd/simd_bitwise_ops.c b/core/iwasm/compilation/simd/simd_bitwise_ops.c index 69e82bb85..db8c3f321 100644 --- a/core/iwasm/compilation/simd/simd_bitwise_ops.c +++ b/core/iwasm/compilation/simd/simd_bitwise_ops.c @@ -86,7 +86,7 @@ fail: /* v128.or(v128.and(v1, c), v128.and(v2, v128.not(c))) */ static bool -v128_bitwise_bit_select(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) +v128_bitwise_bitselect(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) { LLVMValueRef vector1, vector2, vector3, result; @@ -138,7 +138,7 @@ aot_compile_simd_v128_bitwise(AOTCompContext *comp_ctx, case V128_NOT: return v128_bitwise_not(comp_ctx, func_ctx); case V128_BITSELECT: - return v128_bitwise_bit_select(comp_ctx, func_ctx); + return v128_bitwise_bitselect(comp_ctx, func_ctx); default: bh_assert(0); return false; diff --git a/core/iwasm/compilation/simd/simd_bool_reductions.c b/core/iwasm/compilation/simd/simd_bool_reductions.c index 503f3ebef..a5d591a03 100644 --- a/core/iwasm/compilation/simd/simd_bool_reductions.c +++ b/core/iwasm/compilation/simd/simd_bool_reductions.c @@ -8,145 +8,62 @@ #include "../aot_emit_exception.h" #include "../../aot/aot_runtime.h" -static bool -simd_any_true(AOTCompContext *comp_ctx, - AOTFuncContext *func_ctx, - LLVMTypeRef vector_type, - LLVMTypeRef element_type, - const char *intrinsic) -{ - LLVMValueRef vector, zeros, non_zero, result; - - if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type, - "vec"))) { - goto fail; - } - - if (!(zeros = LLVMConstNull(vector_type))) { - HANDLE_FAILURE("LLVMConstNull"); - goto fail; - } - - /* icmp eq %vector, zeroinitialize */ - if (!(non_zero = LLVMBuildICmp(comp_ctx->builder, LLVMIntNE, vector, zeros, - "non_zero"))) { - HANDLE_FAILURE("LLVMBuildICmp"); - goto fail; - } - - /* zext to */ - if (!(non_zero = LLVMBuildZExt(comp_ctx->builder, non_zero, vector_type, - "non_zero_ex"))) { - HANDLE_FAILURE("LLVMBuildZExt"); - goto fail; - } - - if (!(result = aot_call_llvm_intrinsic(comp_ctx, func_ctx, intrinsic, element_type, - &vector_type, 1, non_zero))) { - HANDLE_FAILURE("LLVMBuildCall"); - goto fail; - } - - if (!(zeros = LLVMConstNull(element_type))) { - HANDLE_FAILURE("LLVMConstNull"); - goto fail; - } - - if (!(result = LLVMBuildICmp(comp_ctx->builder, LLVMIntNE, result, zeros, - "gt_zero"))) { - HANDLE_FAILURE("LLVMBuildICmp"); - goto fail; - } - - if (!(result = - LLVMBuildZExt(comp_ctx->builder, result, I32_TYPE, "ret"))) { - HANDLE_FAILURE("LLVMBuildZExt"); - goto fail; - } - - PUSH_I32(result); - - return true; -fail: - return false; -} - -bool -aot_compile_simd_i8x16_any_true(AOTCompContext *comp_ctx, - AOTFuncContext *func_ctx) -{ - return simd_any_true(comp_ctx, func_ctx, V128_i8x16_TYPE, INT8_TYPE, - "llvm.experimental.vector.reduce.add.v16i8"); -} - -bool -aot_compile_simd_i16x8_any_true(AOTCompContext *comp_ctx, - AOTFuncContext *func_ctx) -{ - return simd_any_true(comp_ctx, func_ctx, V128_i16x8_TYPE, INT16_TYPE, - "llvm.experimental.vector.reduce.add.v8i16"); -} - -bool -aot_compile_simd_i32x4_any_true(AOTCompContext *comp_ctx, - AOTFuncContext *func_ctx) -{ - return simd_any_true(comp_ctx, func_ctx, V128_i32x4_TYPE, I32_TYPE, - "llvm.experimental.vector.reduce.add.v4i32"); -} +enum integer_all_true { + e_int_all_true_v16i8, + e_int_all_true_v8i16, + e_int_all_true_v4i32, + e_int_all_true_v2i64, +}; static bool simd_all_true(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, - LLVMTypeRef vector_type, - LLVMTypeRef element_type, - const char *intrinsic) + enum integer_all_true itype) { - LLVMValueRef vector, zeros, is_zero, result; + LLVMValueRef vector, result; + LLVMTypeRef vector_i1_type; + LLVMTypeRef vector_type[] = { V128_i8x16_TYPE, V128_i16x8_TYPE, + V128_i32x4_TYPE, V128_i64x2_TYPE }; + uint32 lanes[] = { 16, 8, 4, 2 }; + const char *intrinsic[] = { + "llvm.vector.reduce.and.v16i1", + "llvm.vector.reduce.and.v8i1", + "llvm.vector.reduce.and.v4i1", + "llvm.vector.reduce.and.v2i1", + }; + LLVMValueRef zero[] = { + LLVM_CONST(i8x16_vec_zero), + LLVM_CONST(i16x8_vec_zero), + LLVM_CONST(i32x4_vec_zero), + LLVM_CONST(i64x2_vec_zero), + }; - if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type, - "vec"))) { + if (!(vector_i1_type = LLVMVectorType(INT1_TYPE, lanes[itype]))) { + HANDLE_FAILURE("LLVMVectorType"); goto fail; } - if (!(zeros = LLVMConstNull(vector_type))) { - HANDLE_FAILURE("LLVMConstNull"); + if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, + vector_type[itype], "vector"))) { goto fail; } - /* icmp eq %vector, zeroinitialize */ - if (!(is_zero = LLVMBuildICmp(comp_ctx->builder, LLVMIntEQ, vector, zeros, - "is_zero"))) { + /* compare with zero */ + if (!(result = LLVMBuildICmp(comp_ctx->builder, LLVMIntNE, vector, + zero[itype], "ne_zero"))) { HANDLE_FAILURE("LLVMBuildICmp"); goto fail; } - /* zext to */ - if (!(is_zero = LLVMBuildZExt(comp_ctx->builder, is_zero, vector_type, - "is_zero_ex"))) { - HANDLE_FAILURE("LLVMBuildZExt"); - goto fail; - } - - if (!(result = aot_call_llvm_intrinsic(comp_ctx, func_ctx, intrinsic, element_type, - &vector_type, 1, is_zero))) { - HANDLE_FAILURE("LLVMBuildCall"); - goto fail; - } - - if (!(zeros = LLVMConstNull(element_type))) { - HANDLE_FAILURE("LLVMConstNull"); - goto fail; - } - - if (!(result = LLVMBuildICmp(comp_ctx->builder, LLVMIntEQ, result, zeros, - "none"))) { - HANDLE_FAILURE("LLVMBuildICmp"); + /* check zero */ + if (!(result = aot_call_llvm_intrinsic(comp_ctx, func_ctx, + intrinsic[itype], INT1_TYPE, + &vector_i1_type, 1, result))) { goto fail; } if (!(result = - LLVMBuildZExt(comp_ctx->builder, result, I32_TYPE, "ret"))) { + LLVMBuildZExt(comp_ctx->builder, result, I32_TYPE, "to_i32"))) { HANDLE_FAILURE("LLVMBuildZExt"); goto fail; } @@ -162,22 +79,61 @@ bool aot_compile_simd_i8x16_all_true(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) { - return simd_all_true(comp_ctx, func_ctx, V128_i8x16_TYPE, INT8_TYPE, - "llvm.experimental.vector.reduce.add.v16i8"); + return simd_all_true(comp_ctx, func_ctx, e_int_all_true_v16i8); } bool aot_compile_simd_i16x8_all_true(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) { - return simd_all_true(comp_ctx, func_ctx, V128_i16x8_TYPE, INT16_TYPE, - "llvm.experimental.vector.reduce.add.v8i16"); + return simd_all_true(comp_ctx, func_ctx, e_int_all_true_v8i16); } bool aot_compile_simd_i32x4_all_true(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) { - return simd_all_true(comp_ctx, func_ctx, V128_i32x4_TYPE, I32_TYPE, - "llvm.experimental.vector.reduce.add.v4i32"); + return simd_all_true(comp_ctx, func_ctx, e_int_all_true_v4i32); +} + +bool +aot_compile_simd_i64x2_all_true(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx) +{ + return simd_all_true(comp_ctx, func_ctx, e_int_all_true_v2i64); +} + +bool +aot_compile_simd_v128_any_true(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx) +{ + LLVMTypeRef vector_type; + LLVMValueRef vector, result; + + if (!(vector_type = LLVMVectorType(INT1_TYPE, 128))) { + return false; + } + + if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type, + "vector"))) { + goto fail; + } + + if (!(result = aot_call_llvm_intrinsic( + comp_ctx, func_ctx, "llvm.vector.reduce.or.v128i1", INT1_TYPE, + &vector_type, 1, vector))) { + goto fail; + } + + if (!(result = + LLVMBuildZExt(comp_ctx->builder, result, I32_TYPE, "to_i32"))) { + HANDLE_FAILURE("LLVMBuildZExt"); + goto fail; + } + + PUSH_I32(result); + + return true; +fail: + return false; } diff --git a/core/iwasm/compilation/simd/simd_bool_reductions.h b/core/iwasm/compilation/simd/simd_bool_reductions.h index e67f00e7e..649d5a5e2 100644 --- a/core/iwasm/compilation/simd/simd_bool_reductions.h +++ b/core/iwasm/compilation/simd/simd_bool_reductions.h @@ -12,18 +12,6 @@ extern "C" { #endif -bool -aot_compile_simd_i8x16_any_true(AOTCompContext *comp_ctx, - AOTFuncContext *func_ctx); - -bool -aot_compile_simd_i16x8_any_true(AOTCompContext *comp_ctx, - AOTFuncContext *func_ctx); - -bool -aot_compile_simd_i32x4_any_true(AOTCompContext *comp_ctx, - AOTFuncContext *func_ctx); - bool aot_compile_simd_i8x16_all_true(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx); @@ -36,6 +24,14 @@ bool aot_compile_simd_i32x4_all_true(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx); +bool +aot_compile_simd_i64x2_all_true(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx); + +bool +aot_compile_simd_v128_any_true(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx); + #ifdef __cplusplus } /* end of extern "C" */ #endif diff --git a/core/iwasm/compilation/simd/simd_common.c b/core/iwasm/compilation/simd/simd_common.c index 81378fa00..524babe13 100644 --- a/core/iwasm/compilation/simd/simd_common.c +++ b/core/iwasm/compilation/simd/simd_common.c @@ -44,4 +44,119 @@ simd_bitcast_and_push_v128(const AOTCompContext *comp_ctx, return true; fail: return false; -} \ No newline at end of file +} + +LLVMValueRef +simd_lane_id_to_llvm_value(AOTCompContext *comp_ctx, uint8 lane_id) +{ + LLVMValueRef lane_indexes[] = { + LLVM_CONST(i32_zero), LLVM_CONST(i32_one), + LLVM_CONST(i32_two), LLVM_CONST(i32_three), + LLVM_CONST(i32_four), LLVM_CONST(i32_five), + LLVM_CONST(i32_six), LLVM_CONST(i32_seven), + LLVM_CONST(i32_eight), LLVM_CONST(i32_nine), + LLVM_CONST(i32_ten), LLVM_CONST(i32_eleven), + LLVM_CONST(i32_twelve), LLVM_CONST(i32_thirteen), + LLVM_CONST(i32_fourteen), LLVM_CONST(i32_fifteen), + }; + + return lane_id < 16 ? lane_indexes[lane_id] : NULL; +} + +LLVMValueRef +simd_build_const_integer_vector(const AOTCompContext *comp_ctx, + const LLVMTypeRef element_type, + const int *element_value, + uint32 length) +{ + LLVMValueRef vector = NULL; + LLVMValueRef *elements; + unsigned i; + + if (!(elements = wasm_runtime_malloc(sizeof(LLVMValueRef) * length))) { + return NULL; + } + + for (i = 0; i < length; i++) { + if (!(elements[i] = + LLVMConstInt(element_type, element_value[i], true))) { + HANDLE_FAILURE("LLVMConstInst"); + goto fail; + } + } + + if (!(vector = LLVMConstVector(elements, length))) { + HANDLE_FAILURE("LLVMConstVector"); + goto fail; + } + +fail: + wasm_runtime_free(elements); + return vector; +} + +LLVMValueRef +simd_build_splat_const_integer_vector(const AOTCompContext *comp_ctx, + const LLVMTypeRef element_type, + const int64 element_value, + uint32 length) +{ + LLVMValueRef vector = NULL, element; + LLVMValueRef *elements; + unsigned i; + + if (!(elements = wasm_runtime_malloc(sizeof(LLVMValueRef) * length))) { + return NULL; + } + + if (!(element = LLVMConstInt(element_type, element_value, true))) { + HANDLE_FAILURE("LLVMConstInt"); + goto fail; + } + + for (i = 0; i < length; i++) { + elements[i] = element; + } + + if (!(vector = LLVMConstVector(elements, length))) { + HANDLE_FAILURE("LLVMConstVector"); + goto fail; + } + +fail: + wasm_runtime_free(elements); + return vector; +} + +LLVMValueRef +simd_build_splat_const_float_vector(const AOTCompContext *comp_ctx, + const LLVMTypeRef element_type, + const float element_value, + uint32 length) +{ + LLVMValueRef vector = NULL, element; + LLVMValueRef *elements; + unsigned i; + + if (!(elements = wasm_runtime_malloc(sizeof(LLVMValueRef) * length))) { + return NULL; + } + + if (!(element = LLVMConstReal(element_type, element_value))) { + HANDLE_FAILURE("LLVMConstReal"); + goto fail; + } + + for (i = 0; i < length; i++) { + elements[i] = element; + } + + if (!(vector = LLVMConstVector(elements, length))) { + HANDLE_FAILURE("LLVMConstVector"); + goto fail; + } + +fail: + wasm_runtime_free(elements); + return vector; +} diff --git a/core/iwasm/compilation/simd/simd_common.h b/core/iwasm/compilation/simd/simd_common.h index 5f029b01e..18af9fd56 100644 --- a/core/iwasm/compilation/simd/simd_common.h +++ b/core/iwasm/compilation/simd/simd_common.h @@ -8,6 +8,13 @@ #include "../aot_compiler.h" +static inline bool +is_target_x86(AOTCompContext *comp_ctx) +{ + return !strncmp(comp_ctx->target_arch, "x86_64", 6) + || !strncmp(comp_ctx->target_arch, "i386", 4); +} + LLVMValueRef simd_pop_v128_and_bitcast(const AOTCompContext *comp_ctx, const AOTFuncContext *func_ctx, @@ -20,4 +27,24 @@ simd_bitcast_and_push_v128(const AOTCompContext *comp_ctx, LLVMValueRef vector, const char *name); +LLVMValueRef +simd_lane_id_to_llvm_value(AOTCompContext *comp_ctx, uint8 lane_id); + +LLVMValueRef +simd_build_const_integer_vector(const AOTCompContext *comp_ctx, + const LLVMTypeRef element_type, + const int *element_value, + uint32 length); + +LLVMValueRef +simd_build_splat_const_integer_vector(const AOTCompContext *comp_ctx, + const LLVMTypeRef element_type, + const int64 element_value, + uint32 length); + +LLVMValueRef +simd_build_splat_const_float_vector(const AOTCompContext *comp_ctx, + const LLVMTypeRef element_type, + const float element_value, + uint32 length); #endif /* _SIMD_COMMON_H_ */ \ No newline at end of file diff --git a/core/iwasm/compilation/simd/simd_comparisons.c b/core/iwasm/compilation/simd/simd_comparisons.c index 9b95a85b8..2eedd8ee4 100644 --- a/core/iwasm/compilation/simd/simd_comparisons.c +++ b/core/iwasm/compilation/simd/simd_comparisons.c @@ -160,6 +160,14 @@ aot_compile_simd_i32x4_compare(AOTCompContext *comp_ctx, return interger_vector_compare(comp_ctx, func_ctx, cond, V128_i32x4_TYPE); } +bool +aot_compile_simd_i64x2_compare(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + IntCond cond) +{ + return interger_vector_compare(comp_ctx, func_ctx, cond, V128_i64x2_TYPE); +} + static bool float_vector_compare(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, diff --git a/core/iwasm/compilation/simd/simd_comparisons.h b/core/iwasm/compilation/simd/simd_comparisons.h index 46d816714..153fca198 100644 --- a/core/iwasm/compilation/simd/simd_comparisons.h +++ b/core/iwasm/compilation/simd/simd_comparisons.h @@ -27,6 +27,11 @@ aot_compile_simd_i32x4_compare(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, IntCond cond); +bool +aot_compile_simd_i64x2_compare(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + IntCond cond); + bool aot_compile_simd_f32x4_compare(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, diff --git a/core/iwasm/compilation/simd/simd_construct_values.c b/core/iwasm/compilation/simd/simd_construct_values.c index 1438a1639..392817631 100644 --- a/core/iwasm/compilation/simd/simd_construct_values.c +++ b/core/iwasm/compilation/simd/simd_construct_values.c @@ -3,6 +3,7 @@ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception */ +#include "simd_common.h" #include "simd_construct_values.h" #include "../aot_emit_exception.h" #include "../interpreter/wasm_opcode.h" @@ -14,23 +15,19 @@ aot_compile_simd_v128_const(AOTCompContext *comp_ctx, const uint8 *imm_bytes) { uint64 imm1, imm2; - LLVMValueRef undef, first_long, agg1, second_long, agg2; + LLVMValueRef first_long, agg1, second_long, agg2; wasm_runtime_read_v128(imm_bytes, &imm1, &imm2); - if (!(undef = LLVMGetUndef(V128_i64x2_TYPE))) { - HANDLE_FAILURE("LLVMGetUndef"); - goto fail; - } - /* %agg1 = insertelement <2 x i64> undef, i16 0, i64 ${*imm} */ if (!(first_long = I64_CONST(imm1))) { HANDLE_FAILURE("LLVMConstInt"); goto fail; } - if (!(agg1 = LLVMBuildInsertElement(comp_ctx->builder, undef, first_long, - I32_ZERO, "agg1"))) { + if (!(agg1 = + LLVMBuildInsertElement(comp_ctx->builder, LLVM_CONST(i64x2_undef), + first_long, I32_ZERO, "agg1"))) { HANDLE_FAILURE("LLVMBuildInsertElement"); goto fail; } @@ -48,7 +45,6 @@ aot_compile_simd_v128_const(AOTCompContext *comp_ctx, } PUSH_V128(agg2); - return true; fail: return false; @@ -57,134 +53,88 @@ fail: bool aot_compile_simd_splat(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, - uint8 splat_opcode) + uint8 opcode) { - LLVMValueRef value, undef, base, mask, new_vector, result; - LLVMTypeRef all_zero_ty; + uint32 opcode_index = opcode - SIMD_i8x16_splat; + LLVMValueRef value = NULL, base, new_vector; + LLVMValueRef undefs[] = { + LLVM_CONST(i8x16_undef), LLVM_CONST(i16x8_undef), + LLVM_CONST(i32x4_undef), LLVM_CONST(i64x2_undef), + LLVM_CONST(f32x4_undef), LLVM_CONST(f64x2_undef), + }; + LLVMValueRef masks[] = { + LLVM_CONST(i32x16_zero), LLVM_CONST(i32x8_zero), + LLVM_CONST(i32x4_zero), LLVM_CONST(i32x2_zero), + LLVM_CONST(i32x4_zero), LLVM_CONST(i32x2_zero), + }; - switch (splat_opcode) { + switch (opcode) { case SIMD_i8x16_splat: { LLVMValueRef input; POP_I32(input); - /* trunc i32 %input to i8 */ - if (!(value = LLVMBuildTrunc(comp_ctx->builder, input, INT8_TYPE, - "trunc"))) { - HANDLE_FAILURE("LLVMBuildTrunc"); - goto fail; - } - undef = LLVMGetUndef(V128_i8x16_TYPE); - if (!(all_zero_ty = LLVMVectorType(I32_TYPE, 16))) { - HANDLE_FAILURE("LLVMVectorType"); - goto fail; - } + value = + LLVMBuildTrunc(comp_ctx->builder, input, INT8_TYPE, "trunc"); break; } case SIMD_i16x8_splat: { LLVMValueRef input; POP_I32(input); - /* trunc i32 %input to i16 */ - if (!(value = LLVMBuildTrunc(comp_ctx->builder, input, INT16_TYPE, - "trunc"))) { - HANDLE_FAILURE("LLVMBuildTrunc"); - goto fail; - } - undef = LLVMGetUndef(V128_i16x8_TYPE); - if (!(all_zero_ty = LLVMVectorType(I32_TYPE, 8))) { - HANDLE_FAILURE("LLVMVectorType"); - goto fail; - } + value = + LLVMBuildTrunc(comp_ctx->builder, input, INT16_TYPE, "trunc"); break; } case SIMD_i32x4_splat: { POP_I32(value); - undef = LLVMGetUndef(V128_i32x4_TYPE); - - if (!(all_zero_ty = LLVMVectorType(I32_TYPE, 4))) { - HANDLE_FAILURE("LLVMVectorType"); - goto fail; - } break; } case SIMD_i64x2_splat: { POP(value, VALUE_TYPE_I64); - undef = LLVMGetUndef(V128_i64x2_TYPE); - - if (!(all_zero_ty = LLVMVectorType(I32_TYPE, 2))) { - HANDLE_FAILURE("LLVMVectorType"); - goto fail; - } break; } case SIMD_f32x4_splat: { POP(value, VALUE_TYPE_F32); - undef = LLVMGetUndef(V128_f32x4_TYPE); - - if (!(all_zero_ty = LLVMVectorType(I32_TYPE, 4))) { - HANDLE_FAILURE("LLVMVectorType"); - goto fail; - } break; } case SIMD_f64x2_splat: { POP(value, VALUE_TYPE_F64); - undef = LLVMGetUndef(V128_f64x2_TYPE); - - if (!(all_zero_ty = LLVMVectorType(I32_TYPE, 2))) { - HANDLE_FAILURE("LLVMVectorType"); - goto fail; - } break; } default: { - bh_assert(0); - goto fail; + break; } } - if (!undef) { - HANDLE_FAILURE("LVMGetUndef"); + + if (!value) { goto fail; } /* insertelement undef, ty %value, i32 0 */ - if (!(base = LLVMBuildInsertElement(comp_ctx->builder, undef, value, - I32_ZERO, "base"))) { + if (!(base = + LLVMBuildInsertElement(comp_ctx->builder, undefs[opcode_index], + value, I32_ZERO, "base"))) { HANDLE_FAILURE("LLVMBuildInsertElement"); goto fail; } - /* zeroinitializer */ - if (!(mask = LLVMConstNull(all_zero_ty))) { - HANDLE_FAILURE("LLVMConstNull"); - goto fail; - } - /* shufflevector %base, undef, zeroinitializer */ - if (!(new_vector = LLVMBuildShuffleVector(comp_ctx->builder, base, undef, - mask, "new_vector"))) { + if (!(new_vector = LLVMBuildShuffleVector( + comp_ctx->builder, base, undefs[opcode_index], masks[opcode_index], + "new_vector"))) { HANDLE_FAILURE("LLVMBuildShuffleVector"); goto fail; } - /* bitcast to <2 x i64> */ - if (!(result = LLVMBuildBitCast(comp_ctx->builder, new_vector, - V128_i64x2_TYPE, "ret"))) { - HANDLE_FAILURE("LLVMBuidlCast"); - goto fail; - } - - /* push result into the stack */ - PUSH_V128(result); - - return true; + return simd_bitcast_and_push_v128(comp_ctx, func_ctx, new_vector, + "result"); fail: return false; } diff --git a/core/iwasm/compilation/simd/simd_conversions.c b/core/iwasm/compilation/simd/simd_conversions.c index 67500d017..426b88f9d 100644 --- a/core/iwasm/compilation/simd/simd_conversions.c +++ b/core/iwasm/compilation/simd/simd_conversions.c @@ -10,278 +10,192 @@ #include "../../aot/aot_runtime.h" static bool -is_target_x86(AOTCompContext *comp_ctx) -{ - return !strncmp(comp_ctx->target_arch, "x86_64", 6) || - !strncmp(comp_ctx->target_arch, "i386", 4); -} - -static bool -simd_integer_narrow(AOTCompContext *comp_ctx, - AOTFuncContext *func_ctx, - bool is_signed, - LLVMTypeRef in_vector_type, - LLVMTypeRef out_vector_type, - const char *instrinsic) +simd_integer_narrow_x86(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + LLVMTypeRef in_vector_type, + LLVMTypeRef out_vector_type, + const char *instrinsic) { LLVMValueRef vector1, vector2, result; LLVMTypeRef param_types[2] = { in_vector_type, in_vector_type }; if (!(vector2 = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, - in_vector_type, "vec2"))) { - goto fail; + in_vector_type, "vec2")) + || !(vector1 = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, + in_vector_type, "vec1"))) { + return false; } - if (!(vector1 = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, - in_vector_type, "vec1"))) { - goto fail; - } - - if (!(result = - aot_call_llvm_intrinsic(comp_ctx, func_ctx, instrinsic, out_vector_type, - param_types, 2, vector1, vector2))) { + if (!(result = aot_call_llvm_intrinsic(comp_ctx, func_ctx, instrinsic, + out_vector_type, param_types, 2, + vector1, vector2))) { HANDLE_FAILURE("LLVMBuildCall"); - goto fail; + return false; } - if (!(result = LLVMBuildBitCast(comp_ctx->builder, result, V128_i64x2_TYPE, - "ret"))) { - HANDLE_FAILURE("LLVMBuildBitCast"); - goto fail; - } - - PUSH_V128(result); - return true; -fail: - return false; + return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result"); } +enum integer_sat_type { + e_sat_i16x8 = 0, + e_sat_i32x4, + e_sat_i64x2, + e_sat_i32x8, +}; + static LLVMValueRef -build_intx4_vector(const AOTCompContext *comp_ctx, - const LLVMTypeRef element_type, - const int *element_value) +simd_saturate(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + enum integer_sat_type itype, + LLVMValueRef vector, + LLVMValueRef min, + LLVMValueRef max, + bool is_signed) { - LLVMValueRef vector, elements[4]; - unsigned i; + LLVMValueRef result; + LLVMTypeRef vector_type; - for (i = 0; i < 4; i++) { - if (!(elements[i] = - LLVMConstInt(element_type, element_value[i], true))) { - HANDLE_FAILURE("LLVMConstInst"); - goto fail; + LLVMTypeRef param_types[][2] = { + { V128_i16x8_TYPE, V128_i16x8_TYPE }, + { V128_i32x4_TYPE, V128_i32x4_TYPE }, + { V128_i64x2_TYPE, V128_i64x2_TYPE }, + { 0 }, + }; + + const char *smin_intrinsic[] = { + "llvm.smin.v8i16", + "llvm.smin.v4i32", + "llvm.smin.v2i64", + "llvm.smin.v8i32", + }; + + const char *umin_intrinsic[] = { + "llvm.umin.v8i16", + "llvm.umin.v4i32", + "llvm.umin.v2i64", + "llvm.umin.v8i32", + }; + + const char *smax_intrinsic[] = { + "llvm.smax.v8i16", + "llvm.smax.v4i32", + "llvm.smax.v2i64", + "llvm.smax.v8i32", + }; + + const char *umax_intrinsic[] = { + "llvm.umax.v8i16", + "llvm.umax.v4i32", + "llvm.umax.v2i64", + "llvm.umax.v8i32", + }; + + if (e_sat_i32x8 == itype) { + if (!(vector_type = LLVMVectorType(I32_TYPE, 8))) { + HANDLE_FAILURE("LLVMVectorType"); + return NULL; } + + param_types[itype][0] = vector_type; + param_types[itype][1] = vector_type; } - if (!(vector = LLVMConstVector(elements, 4))) { - HANDLE_FAILURE("LLVMConstVector"); - goto fail; - } - return vector; -fail: - return NULL; -} - -static LLVMValueRef -build_intx8_vector(const AOTCompContext *comp_ctx, - const LLVMTypeRef element_type, - const int *element_value) -{ - LLVMValueRef vector, elements[8]; - unsigned i; - - for (i = 0; i < 8; i++) { - if (!(elements[i] = - LLVMConstInt(element_type, element_value[i], true))) { - HANDLE_FAILURE("LLVMConstInst"); - goto fail; - } + if (!(result = aot_call_llvm_intrinsic( + comp_ctx, func_ctx, + is_signed ? smin_intrinsic[itype] : umin_intrinsic[itype], + param_types[itype][0], param_types[itype], 2, vector, max)) + || !(result = aot_call_llvm_intrinsic( + comp_ctx, func_ctx, + is_signed ? smax_intrinsic[itype] : umax_intrinsic[itype], + param_types[itype][0], param_types[itype], 2, result, min))) { + return NULL; } - if (!(vector = LLVMConstVector(elements, 8))) { - HANDLE_FAILURE("LLVMConstVector"); - goto fail; - } - - return vector; -fail: - return NULL; -} - -static LLVMValueRef -build_intx16_vector(const AOTCompContext *comp_ctx, - const LLVMTypeRef element_type, - const int *element_value) -{ - LLVMValueRef vector, elements[16]; - unsigned i; - - for (i = 0; i < 16; i++) { - if (!(elements[i] = - LLVMConstInt(element_type, element_value[i], true))) { - HANDLE_FAILURE("LLVMConstInst"); - goto fail; - } - } - - if (!(vector = LLVMConstVector(elements, 16))) { - HANDLE_FAILURE("LLVMConstVector"); - goto fail; - } - - return vector; -fail: - return NULL; -} - -bool -aot_compile_simd_i8x16_narrow_i16x8_x86(AOTCompContext *comp_ctx, - AOTFuncContext *func_ctx, - bool is_signed) -{ - return simd_integer_narrow( - comp_ctx, func_ctx, is_signed, V128_i16x8_TYPE, V128_i8x16_TYPE, - is_signed ? "llvm.x86.sse2.packsswb.128" : "llvm.x86.sse2.packuswb.128"); -} - -bool -aot_compile_simd_i16x8_narrow_i32x4_x86(AOTCompContext *comp_ctx, - AOTFuncContext *func_ctx, - bool is_signed) -{ - return simd_integer_narrow( - comp_ctx, func_ctx, is_signed, V128_i32x4_TYPE, V128_i16x8_TYPE, - is_signed ? "llvm.x86.sse2.packssdw.128" : "llvm.x86.sse41.packusdw"); + return result; } static bool -aot_compile_simd_i8x16_narrow_i16x8_common(AOTCompContext *comp_ctx, - AOTFuncContext *func_ctx, - bool is_signed) +simd_integer_narrow_common(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + enum integer_sat_type itype, + bool is_signed) { - LLVMValueRef vector1, vector2, result, vector_min, vector_max, shuffle, - vector1_clamped, vector2_clamped, vector1_trunced, vector2_trunced, - shuffle_vector; - LLVMValueRef v1_gt_max, v1_lt_min, v2_gt_max, v2_lt_min; + LLVMValueRef vec1, vec2, min, max, mask, result; + LLVMTypeRef in_vector_type[] = { V128_i16x8_TYPE, V128_i32x4_TYPE, + V128_i64x2_TYPE }; + LLVMTypeRef min_max_type[] = { INT16_TYPE, I32_TYPE, I64_TYPE }; + LLVMTypeRef trunc_type[3] = { 0 }; + uint8 length[] = { 8, 4, 2 }; - int min_s_array[8] = { 0xff80, 0xff80, 0xff80, 0xff80, - 0xff80, 0xff80, 0xff80, 0xff80 }; - int max_s_array[8] = { 0x007f, 0x007f, 0x007f, 0x007f, - 0x007f, 0x007f, 0x007f, 0x007f }; + int64 smin[] = { 0xff80, 0xffFF8000, 0xffFFffFF80000000 }; + int64 umin[] = { 0x0, 0x0, 0x0 }; + int64 smax[] = { 0x007f, 0x00007fff, 0x000000007fFFffFF }; + int64 umax[] = { 0x00ff, 0x0000ffff, 0x00000000ffFFffFF }; - int min_u_array[8] = { 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000 }; - int max_u_array[8] = { 0x00ff, 0x00ff, 0x00ff, 0x00ff, - 0x00ff, 0x00ff, 0x00ff, 0x00ff }; + LLVMValueRef mask_element[] = { + LLVM_CONST(i32_zero), LLVM_CONST(i32_one), + LLVM_CONST(i32_two), LLVM_CONST(i32_three), + LLVM_CONST(i32_four), LLVM_CONST(i32_five), + LLVM_CONST(i32_six), LLVM_CONST(i32_seven), + LLVM_CONST(i32_eight), LLVM_CONST(i32_nine), + LLVM_CONST(i32_ten), LLVM_CONST(i32_eleven), + LLVM_CONST(i32_twelve), LLVM_CONST(i32_thirteen), + LLVM_CONST(i32_fourteen), LLVM_CONST(i32_fifteen), + }; - int shuffle_array[16] = { 0, 1, 2, 3, 4, 5, 6, 7, - 8, 9, 10, 11, 12, 13, 14, 15 }; - - if (!(vector2 = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, - V128_i16x8_TYPE, "vec2"))) { - goto fail; + if (!(trunc_type[0] == LLVMVectorType(INT8_TYPE, 8)) + || !(trunc_type[1] == LLVMVectorType(INT16_TYPE, 4)) + || !(trunc_type[2] == LLVMVectorType(I32_TYPE, 2))) { + HANDLE_FAILURE("LLVMVectorType"); + return false; } - if (!(vector1 = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, - V128_i16x8_TYPE, "vec1"))) { - goto fail; + if (!(vec2 = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, + in_vector_type[itype], "vec2")) + || !(vec1 = simd_pop_v128_and_bitcast( + comp_ctx, func_ctx, in_vector_type[itype], "vec1"))) { + return false; } - if (!(vector_min = build_intx8_vector( - comp_ctx, INT16_TYPE, is_signed ? min_s_array : min_u_array))) { - goto fail; - } - if (!(vector_max = build_intx8_vector( - comp_ctx, INT16_TYPE, is_signed ? max_s_array : max_u_array))) { - goto fail; - } - if (!(shuffle = build_intx16_vector(comp_ctx, I32_TYPE, shuffle_array))) { - goto fail; + if (!(max = simd_build_splat_const_integer_vector( + comp_ctx, min_max_type[itype], + is_signed ? smax[itype] : umax[itype], length[itype])) + || !(min = simd_build_splat_const_integer_vector( + comp_ctx, min_max_type[itype], + is_signed ? smin[itype] : umin[itype], length[itype]))) { + return false; } - if (!(v1_gt_max = LLVMBuildICmp(comp_ctx->builder, LLVMIntSGT, vector1, - vector_max, "v1_great_than_max"))) { - HANDLE_FAILURE("LLVMBuldICmp"); - goto fail; + /* sat */ + if (!(vec1 = simd_saturate(comp_ctx, func_ctx, e_sat_i16x8, vec1, min, max, + is_signed)) + || !(vec2 = simd_saturate(comp_ctx, func_ctx, e_sat_i16x8, vec2, min, + max, is_signed))) { + return false; } - if (!(v2_gt_max = LLVMBuildICmp(comp_ctx->builder, LLVMIntSGT, vector2, - vector_max, "v2_great_than_max"))) { - HANDLE_FAILURE("LLVMBuldICmp"); - goto fail; - } - - if (!(v1_lt_min = LLVMBuildICmp(comp_ctx->builder, LLVMIntSLT, vector1, - vector_min, "v1_less_than_min"))) { - HANDLE_FAILURE("LLVMBuldICmp"); - goto fail; - } - - if (!(v2_lt_min = LLVMBuildICmp(comp_ctx->builder, LLVMIntSLT, vector2, - vector_min, "v2_less_than_min"))) { - HANDLE_FAILURE("LLVMBuldICmp"); - goto fail; - } - - if (!(vector1_clamped = - LLVMBuildSelect(comp_ctx->builder, v1_gt_max, vector_max, vector1, - "vector1_clamped_max"))) { - HANDLE_FAILURE("LLVMBuildSelect"); - goto fail; - } - - if (!(vector1_clamped = - LLVMBuildSelect(comp_ctx->builder, v1_lt_min, vector_min, - vector1_clamped, "vector1_clamped_min"))) { - HANDLE_FAILURE("LLVMBuildSelect"); - goto fail; - } - - if (!(vector2_clamped = - LLVMBuildSelect(comp_ctx->builder, v2_gt_max, vector_max, vector2, - "vector2_clamped_max"))) { - HANDLE_FAILURE("LLVMBuildSelect"); - goto fail; - } - - if (!(vector2_clamped = - LLVMBuildSelect(comp_ctx->builder, v2_lt_min, vector_min, - vector2_clamped, "vector2_clamped_min"))) { - HANDLE_FAILURE("LLVMBuildSelect"); - goto fail; - } - - if (!(vector1_trunced = - LLVMBuildTrunc(comp_ctx->builder, vector1_clamped, - LLVMVectorType(INT8_TYPE, 8), "vector1_trunced"))) { + /* trunc */ + if (!(vec1 = LLVMBuildTrunc(comp_ctx->builder, vec1, trunc_type[itype], + "vec1_trunc")) + || !(vec2 = LLVMBuildTrunc(comp_ctx->builder, vec2, trunc_type[itype], + "vec2_trunc"))) { HANDLE_FAILURE("LLVMBuildTrunc"); - goto fail; + return false; } - if (!(vector2_trunced = - LLVMBuildTrunc(comp_ctx->builder, vector2_clamped, - LLVMVectorType(INT8_TYPE, 8), "vector2_trunced"))) { - HANDLE_FAILURE("LLVMBuildTrunc"); - goto fail; + /* combine */ + if (!(mask = LLVMConstVector(mask_element, (length[itype] << 1)))) { + HANDLE_FAILURE("LLVMConstInt"); + return false; } - if (!(shuffle_vector = LLVMBuildShuffleVector( - comp_ctx->builder, vector1_trunced, vector2_trunced, shuffle, - "shuffle_vector"))) { + if (!(result = LLVMBuildShuffleVector(comp_ctx->builder, vec1, vec2, mask, + "vec_shuffle"))) { HANDLE_FAILURE("LLVMBuildShuffleVector"); - goto fail; + return false; } - if (!(result = LLVMBuildBitCast(comp_ctx->builder, shuffle_vector, - V128_i64x2_TYPE, "ret"))) { - HANDLE_FAILURE("LLVMBuildBitCast"); - goto fail; - } - - PUSH_V128(result); - return true; - -fail: - return false; + return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result"); } bool @@ -290,339 +204,184 @@ aot_compile_simd_i8x16_narrow_i16x8(AOTCompContext *comp_ctx, bool is_signed) { if (is_target_x86(comp_ctx)) { - return aot_compile_simd_i8x16_narrow_i16x8_x86(comp_ctx, func_ctx, - is_signed); + return simd_integer_narrow_x86( + comp_ctx, func_ctx, V128_i16x8_TYPE, V128_i8x16_TYPE, + is_signed ? "llvm.x86.sse2.packsswb.128" + : "llvm.x86.sse2.packuswb.128"); } else { - return aot_compile_simd_i8x16_narrow_i16x8_common(comp_ctx, func_ctx, - is_signed); + return simd_integer_narrow_common(comp_ctx, func_ctx, e_sat_i16x8, + is_signed); } } -static bool -aot_compile_simd_i16x8_narrow_i32x4_common(AOTCompContext *comp_ctx, - AOTFuncContext *func_ctx, - bool is_signed) -{ - LLVMValueRef vector1, vector2, result, vector_min, vector_max, shuffle, - vector1_clamped, vector2_clamped, vector1_trunced, vector2_trunced, - shuffle_vector; - LLVMValueRef v1_gt_max, v1_lt_min, v2_gt_max, v2_lt_min; - - int min_s_array[4] = { 0xffff8000, 0xffff8000, 0xffff8000, 0xffff8000 }; - int32 max_s_array[4] = { 0x00007fff, 0x00007fff, 0x00007fff, 0x00007fff }; - - int min_u_array[4] = { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }; - int max_u_array[4] = { 0x0000ffff, 0x0000ffff, 0x0000ffff, 0x0000ffff }; - - int shuffle_array[8] = { 0, 1, 2, 3, 4, 5, 6, 7 }; - - if (!(vector2 = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, - V128_i32x4_TYPE, "vec2"))) { - goto fail; - } - - if (!(vector1 = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, - V128_i32x4_TYPE, "vec1"))) { - goto fail; - } - - if (!(vector_min = build_intx4_vector( - comp_ctx, I32_TYPE, is_signed ? min_s_array : min_u_array))) { - goto fail; - } - if (!(vector_max = build_intx4_vector( - comp_ctx, I32_TYPE, is_signed ? max_s_array : max_u_array))) { - goto fail; - } - if (!(shuffle = build_intx8_vector(comp_ctx, I32_TYPE, shuffle_array))) { - goto fail; - } - - if (!(v1_gt_max = LLVMBuildICmp(comp_ctx->builder, LLVMIntSGT, vector1, - vector_max, "v1_great_than_max"))) { - HANDLE_FAILURE("LLVMBuldICmp"); - goto fail; - } - - if (!(v2_gt_max = LLVMBuildICmp(comp_ctx->builder, LLVMIntSGT, vector2, - vector_max, "v2_great_than_max"))) { - HANDLE_FAILURE("LLVMBuldICmp"); - goto fail; - } - - if (!(v1_lt_min = LLVMBuildICmp(comp_ctx->builder, LLVMIntSLT, vector1, - vector_min, "v1_less_than_min"))) { - HANDLE_FAILURE("LLVMBuldICmp"); - goto fail; - } - - if (!(v2_lt_min = LLVMBuildICmp(comp_ctx->builder, LLVMIntSLT, vector2, - vector_min, "v2_less_than_min"))) { - HANDLE_FAILURE("LLVMBuldICmp"); - goto fail; - } - - if (!(vector1_clamped = - LLVMBuildSelect(comp_ctx->builder, v1_gt_max, vector_max, vector1, - "vector1_clamped_max"))) { - HANDLE_FAILURE("LLVMBuildSelect"); - goto fail; - } - - if (!(vector1_clamped = - LLVMBuildSelect(comp_ctx->builder, v1_lt_min, vector_min, - vector1_clamped, "vector1_clamped_min"))) { - HANDLE_FAILURE("LLVMBuildSelect"); - goto fail; - } - - if (!(vector2_clamped = - LLVMBuildSelect(comp_ctx->builder, v2_gt_max, vector_max, vector2, - "vector2_clamped_max"))) { - HANDLE_FAILURE("LLVMBuildSelect"); - goto fail; - } - - if (!(vector2_clamped = - LLVMBuildSelect(comp_ctx->builder, v2_lt_min, vector_min, - vector2_clamped, "vector2_clamped_min"))) { - HANDLE_FAILURE("LLVMBuildSelect"); - goto fail; - } - - if (!(vector1_trunced = LLVMBuildTrunc(comp_ctx->builder, vector1_clamped, - LLVMVectorType(INT16_TYPE, 4), - "vector1_trunced"))) { - HANDLE_FAILURE("LLVMBuildTrunc"); - goto fail; - } - - if (!(vector2_trunced = LLVMBuildTrunc(comp_ctx->builder, vector2_clamped, - LLVMVectorType(INT16_TYPE, 4), - "vector2_trunced"))) { - HANDLE_FAILURE("LLVMBuildTrunc"); - goto fail; - } - - if (!(shuffle_vector = LLVMBuildShuffleVector( - comp_ctx->builder, vector1_trunced, vector2_trunced, shuffle, - "shuffle_vector"))) { - HANDLE_FAILURE("LLVMBuildShuffleVector"); - goto fail; - } - - if (!(result = LLVMBuildBitCast(comp_ctx->builder, shuffle_vector, - V128_i64x2_TYPE, "ret"))) { - HANDLE_FAILURE("LLVMBuildBitCast"); - goto fail; - } - - PUSH_V128(result); - return true; - -fail: - return false; -} - bool aot_compile_simd_i16x8_narrow_i32x4(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, bool is_signed) { if (is_target_x86(comp_ctx)) { - return aot_compile_simd_i16x8_narrow_i32x4_x86(comp_ctx, func_ctx, - is_signed); + return simd_integer_narrow_x86(comp_ctx, func_ctx, V128_i32x4_TYPE, + V128_i16x8_TYPE, + is_signed ? "llvm.x86.sse2.packssdw.128" + : "llvm.x86.sse41.packusdw"); } else { - return aot_compile_simd_i16x8_narrow_i32x4_common(comp_ctx, func_ctx, - is_signed); + return simd_integer_narrow_common(comp_ctx, func_ctx, e_sat_i32x4, + is_signed); } } bool -aot_compile_simd_i16x8_widen_i8x16(AOTCompContext *comp_ctx, - AOTFuncContext *func_ctx, - bool is_low_half, - bool is_signed) +aot_compile_simd_i32x4_narrow_i64x2(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + bool is_signed) { - LLVMValueRef vector, undef, mask_high[8], mask_low[8], mask, shuffled, - result; - uint8 mask_high_value[8] = { 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf }, - mask_low_value[8] = { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 }, i; - - if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, - V128_i8x16_TYPE, "vec"))) { - goto fail; - } - - if (!(undef = LLVMGetUndef(V128_i8x16_TYPE))) { - HANDLE_FAILURE("LLVMGetUndef"); - goto fail; - } - - /* create a mask */ - for (i = 0; i < 8; i++) { - mask_high[i] = LLVMConstInt(I32_TYPE, mask_high_value[i], true); - mask_low[i] = LLVMConstInt(I32_TYPE, mask_low_value[i], true); - } - - mask = is_low_half ? LLVMConstVector(mask_low, 8) - : LLVMConstVector(mask_high, 8); - if (!mask) { - HANDLE_FAILURE("LLVMConstVector"); - goto fail; - } - - /* retrive the low or high half */ - if (!(shuffled = LLVMBuildShuffleVector(comp_ctx->builder, vector, undef, - mask, "shuffled"))) { - HANDLE_FAILURE("LLVMBuildShuffleVector"); - goto fail; - } - - if (is_signed) { - if (!(result = LLVMBuildSExt(comp_ctx->builder, shuffled, - V128_i16x8_TYPE, "ext"))) { - HANDLE_FAILURE("LLVMBuildSExt"); - goto fail; - } - } - else { - if (!(result = LLVMBuildZExt(comp_ctx->builder, shuffled, - V128_i16x8_TYPE, "ext"))) { - HANDLE_FAILURE("LLVMBuildZExt"); - goto fail; - } - } - - if (!(result = LLVMBuildBitCast(comp_ctx->builder, result, V128_i64x2_TYPE, - "ret"))) { - HANDLE_FAILURE("LLVMBuildBitCast"); - goto fail; - } - - PUSH_V128(result); - return true; -fail: - return false; + /* TODO: x86 intrinsics */ + return simd_integer_narrow_common(comp_ctx, func_ctx, e_sat_i64x2, + is_signed); } -bool -aot_compile_simd_i32x4_widen_i16x8(AOTCompContext *comp_ctx, - AOTFuncContext *func_ctx, - bool is_low_half, - bool is_signed) -{ - LLVMValueRef vector, undef, mask_high[4], mask_low[4], mask, shuffled, - result; - uint8 mask_high_value[4] = { 0x4, 0x5, 0x6, 0x7 }, - mask_low_value[4] = { 0x0, 0x1, 0x2, 0x3 }, i; - - if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, - V128_i16x8_TYPE, "vec"))) { - goto fail; - } - - if (!(undef = LLVMGetUndef(V128_i16x8_TYPE))) { - HANDLE_FAILURE("LLVMGetUndef"); - goto fail; - } - - /* create a mask */ - for (i = 0; i < 4; i++) { - mask_high[i] = LLVMConstInt(I32_TYPE, mask_high_value[i], true); - mask_low[i] = LLVMConstInt(I32_TYPE, mask_low_value[i], true); - } - - mask = is_low_half ? LLVMConstVector(mask_low, 4) - : LLVMConstVector(mask_high, 4); - if (!mask) { - HANDLE_FAILURE("LLVMConstVector"); - goto fail; - } - - /* retrive the low or high half */ - if (!(shuffled = LLVMBuildShuffleVector(comp_ctx->builder, vector, undef, - mask, "shuffled"))) { - HANDLE_FAILURE("LLVMBuildShuffleVector"); - goto fail; - } - - if (is_signed) { - if (!(result = LLVMBuildSExt(comp_ctx->builder, shuffled, - V128_i32x4_TYPE, "ext"))) { - HANDLE_FAILURE("LLVMBuildSExt"); - goto fail; - } - } - else { - if (!(result = LLVMBuildZExt(comp_ctx->builder, shuffled, - V128_i32x4_TYPE, "ext"))) { - HANDLE_FAILURE("LLVMBuildZExt"); - goto fail; - } - } - - if (!(result = LLVMBuildBitCast(comp_ctx->builder, result, V128_i64x2_TYPE, - "ret"))) { - HANDLE_FAILURE("LLVMBuildBitCast"); - goto fail; - } - - PUSH_V128(result); - return true; -fail: - return false; -} +enum integer_extend_type { + e_ext_i8x16, + e_ext_i16x8, + e_ext_i32x4, +}; static LLVMValueRef -simd_build_const_f32x4(AOTCompContext *comp_ctx, +simd_integer_extension(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, - float f) -{ - LLVMValueRef elements[4], vector; - - if (!(elements[0] = LLVMConstReal(F32_TYPE, f))) { - HANDLE_FAILURE("LLVMConstInt"); - goto fail; - } - - elements[1] = elements[2] = elements[3] = elements[0]; - - if (!(vector = LLVMConstVector(elements, 4))) { - HANDLE_FAILURE("LLVMConstVector"); - goto fail; - } - - return vector; -fail: - return NULL; -} - -static LLVMValueRef -simd_build_const_i32x4(AOTCompContext *comp_ctx, - AOTFuncContext *func_ctx, - uint64 integer, + enum integer_extend_type itype, + LLVMValueRef vector, + bool lower_half, bool is_signed) { - LLVMValueRef elements[4], vector; + LLVMValueRef mask, sub_vector, result; + LLVMValueRef bits[] = { + LLVM_CONST(i32_zero), LLVM_CONST(i32_one), + LLVM_CONST(i32_two), LLVM_CONST(i32_three), + LLVM_CONST(i32_four), LLVM_CONST(i32_five), + LLVM_CONST(i32_six), LLVM_CONST(i32_seven), + LLVM_CONST(i32_eight), LLVM_CONST(i32_nine), + LLVM_CONST(i32_ten), LLVM_CONST(i32_eleven), + LLVM_CONST(i32_twelve), LLVM_CONST(i32_thirteen), + LLVM_CONST(i32_fourteen), LLVM_CONST(i32_fifteen), + }; + LLVMTypeRef out_vector_type[] = { V128_i16x8_TYPE, V128_i32x4_TYPE, + V128_i64x2_TYPE }; + LLVMValueRef undef[] = { LLVM_CONST(i8x16_undef), LLVM_CONST(i16x8_undef), + LLVM_CONST(i32x4_undef) }; + uint32 sub_vector_length[] = { 8, 4, 2 }; - if (!(elements[0] = LLVMConstInt(I32_TYPE, integer, is_signed))) { - HANDLE_FAILURE("LLVMConstInt"); - goto fail; - } - - elements[1] = elements[2] = elements[3] = elements[0]; - - if (!(vector = LLVMConstVector(elements, 4))) { + if (!(mask = lower_half ? LLVMConstVector(bits, sub_vector_length[itype]) + : LLVMConstVector(bits + sub_vector_length[itype], + sub_vector_length[itype]))) { HANDLE_FAILURE("LLVMConstVector"); - goto fail; + return false; } - return vector; -fail: - return NULL; + /* retrive the low or high half */ + if (!(sub_vector = LLVMBuildShuffleVector(comp_ctx->builder, vector, + undef[itype], mask, "half"))) { + HANDLE_FAILURE("LLVMBuildShuffleVector"); + return false; + } + + if (is_signed) { + if (!(result = LLVMBuildSExt(comp_ctx->builder, sub_vector, + out_vector_type[itype], "sext"))) { + HANDLE_FAILURE("LLVMBuildSExt"); + return false; + } + } + else { + if (!(result = LLVMBuildZExt(comp_ctx->builder, sub_vector, + out_vector_type[itype], "zext"))) { + HANDLE_FAILURE("LLVMBuildZExt"); + return false; + } + } + + return result; +} + +static bool +simd_integer_extension_wrapper(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + enum integer_extend_type itype, + bool lower_half, + bool is_signed) +{ + LLVMValueRef vector, result; + + LLVMTypeRef in_vector_type[] = { V128_i8x16_TYPE, V128_i16x8_TYPE, + V128_i32x4_TYPE }; + + if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, + in_vector_type[itype], "vec"))) { + return false; + } + + if (!(result = simd_integer_extension(comp_ctx, func_ctx, itype, vector, + lower_half, is_signed))) { + return false; + } + + return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result"); +} + +bool +aot_compile_simd_i16x8_extend_i8x16(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + bool lower_half, + bool is_signed) +{ + return simd_integer_extension_wrapper(comp_ctx, func_ctx, e_ext_i8x16, + lower_half, is_signed); +} + +bool +aot_compile_simd_i32x4_extend_i16x8(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + bool lower_half, + bool is_signed) +{ + return simd_integer_extension_wrapper(comp_ctx, func_ctx, e_ext_i16x8, + lower_half, is_signed); +} + +bool +aot_compile_simd_i64x2_extend_i32x4(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + bool lower_half, + bool is_signed) +{ + return simd_integer_extension_wrapper(comp_ctx, func_ctx, e_ext_i32x4, + lower_half, is_signed); +} + +static LLVMValueRef +simd_trunc_sat(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + const char *intrinsics, + LLVMTypeRef in_vector_type, + LLVMTypeRef out_vector_type) +{ + LLVMValueRef vector, result; + LLVMTypeRef param_types[] = { in_vector_type }; + + if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, + in_vector_type, "vector"))) { + return false; + } + + if (!(result = aot_call_llvm_intrinsic(comp_ctx, func_ctx, intrinsics, + out_vector_type, param_types, 1, + vector))) { + return false; + } + + return result; } bool @@ -630,127 +389,81 @@ aot_compile_simd_i32x4_trunc_sat_f32x4(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, bool is_signed) { - LLVMValueRef vector, zeros, is_nan, max_float_v, min_float_v, is_ge_max, - is_le_min, result, max_int_v, min_int_v; - uint32 max_ui = 0xFFffFFff, min_ui = 0x0; - int32 max_si = 0x7FFFffff, min_si = 0x80000000; - float max_f_ui = 4294967296.0f, min_f_ui = 0.0f, max_f_si = 2147483647.0f, - min_f_si = -2147483648.0f; - - if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, - V128_f32x4_TYPE, "vec"))) { - goto fail; + LLVMValueRef result; + if (!(result = simd_trunc_sat(comp_ctx, func_ctx, + is_signed ? "llvm.fptosi.sat.v4i32.v4f32" + : "llvm.fptoui.sat.v4i32.v4f32", + V128_f32x4_TYPE, V128_i32x4_TYPE))) { + return false; } - if (!(zeros = LLVMConstNull(V128_f32x4_TYPE))) { + return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result"); +} + +bool +aot_compile_simd_i32x4_trunc_sat_f64x2(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + bool is_signed) +{ + LLVMValueRef result, zero, mask; + LLVMTypeRef out_vector_type; + LLVMValueRef lanes[] = { + LLVM_CONST(i32_zero), + LLVM_CONST(i32_one), + LLVM_CONST(i32_two), + LLVM_CONST(i32_three), + }; + + if (!(out_vector_type = LLVMVectorType(I32_TYPE, 2))) { + HANDLE_FAILURE("LLVMVectorType"); + return false; + } + + if (!(result = simd_trunc_sat(comp_ctx, func_ctx, + is_signed ? "llvm.fptosi.sat.v2i32.v2f64" + : "llvm.fptoui.sat.v2i32.v2f64", + V128_f64x2_TYPE, out_vector_type))) { + return false; + } + + if (!(zero = LLVMConstNull(out_vector_type))) { HANDLE_FAILURE("LLVMConstNull"); - goto fail; + return false; } - if (is_signed) { - if (!(max_float_v = - simd_build_const_f32x4(comp_ctx, func_ctx, max_f_si))) { - goto fail; - } - - if (!(min_float_v = - simd_build_const_f32x4(comp_ctx, func_ctx, min_f_si))) { - goto fail; - } - - if (!(max_int_v = - simd_build_const_i32x4(comp_ctx, func_ctx, max_si, true))) { - goto fail; - } - - if (!(min_int_v = - simd_build_const_i32x4(comp_ctx, func_ctx, min_si, true))) { - goto fail; - } - } - else { - if (!(max_float_v = - simd_build_const_f32x4(comp_ctx, func_ctx, max_f_ui))) { - goto fail; - } - - if (!(min_float_v = - simd_build_const_f32x4(comp_ctx, func_ctx, min_f_ui))) { - goto fail; - } - - if (!(max_int_v = - simd_build_const_i32x4(comp_ctx, func_ctx, max_ui, false))) { - goto fail; - } - - if (!(min_int_v = - simd_build_const_i32x4(comp_ctx, func_ctx, min_ui, false))) { - goto fail; - } + /* v2i32 -> v4i32 */ + if (!(mask = LLVMConstVector(lanes, 4))) { + HANDLE_FAILURE("LLVMConstVector"); + return false; } - if (!(is_nan = LLVMBuildFCmp(comp_ctx->builder, LLVMRealORD, vector, zeros, - "is_nan"))) { - HANDLE_FAILURE("LLVMBuildFCmp"); - goto fail; + if (!(result = LLVMBuildShuffleVector(comp_ctx->builder, result, zero, + mask, "extend"))) { + HANDLE_FAILURE("LLVMBuildShuffleVector"); + return false; } - if (!(is_le_min = LLVMBuildFCmp(comp_ctx->builder, LLVMRealOLE, vector, - min_float_v, "le_min"))) { - HANDLE_FAILURE("LLVMBuildFCmp"); - goto fail; + return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result"); +} + +static LLVMValueRef +simd_integer_convert(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + bool is_signed, + LLVMValueRef vector, + LLVMTypeRef out_vector_type) + +{ + LLVMValueRef result; + result = is_signed ? LLVMBuildSIToFP(comp_ctx->builder, vector, + out_vector_type, "converted") + : LLVMBuildUIToFP(comp_ctx->builder, vector, + out_vector_type, "converted"); + if (!result) { + HANDLE_FAILURE("LLVMBuildSIToFP/LLVMBuildUIToFP"); } - if (!(is_ge_max = LLVMBuildFCmp(comp_ctx->builder, LLVMRealOGE, vector, - max_float_v, "ge_max"))) { - HANDLE_FAILURE("LLVMBuildFCmp"); - goto fail; - } - - if (is_signed) { - if (!(result = LLVMBuildFPToSI(comp_ctx->builder, vector, - V128_i32x4_TYPE, "truncated"))) { - HANDLE_FAILURE("LLVMBuildSIToFP"); - goto fail; - } - } - else { - if (!(result = LLVMBuildFPToUI(comp_ctx->builder, vector, - V128_i32x4_TYPE, "truncated"))) { - HANDLE_FAILURE("LLVMBuildUIToFP"); - goto fail; - } - } - - if (!(result = LLVMBuildSelect(comp_ctx->builder, is_ge_max, max_int_v, - result, "sat_w_max"))) { - HANDLE_FAILURE("LLVMBuildSelect"); - goto fail; - } - - if (!(result = LLVMBuildSelect(comp_ctx->builder, is_le_min, min_int_v, - result, "sat_w_min"))) { - HANDLE_FAILURE("LLVMBuildSelect"); - goto fail; - } - - if (!(result = LLVMBuildSelect(comp_ctx->builder, is_nan, result, - V128_i32x4_ZERO, "sat_w_nan"))) { - HANDLE_FAILURE("LLVMBuildSelect"); - goto fail; - } - - if (!(result = LLVMBuildBitCast(comp_ctx->builder, result, V128_i64x2_TYPE, - "ret"))) { - HANDLE_FAILURE("LLVMBuildBitCast"); - goto fail; - } - - PUSH_V128(result); - return true; -fail: - return false; + return result; } bool @@ -762,32 +475,302 @@ aot_compile_simd_f32x4_convert_i32x4(AOTCompContext *comp_ctx, if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, V128_i32x4_TYPE, "vec"))) { - goto fail; + return false; } + if (!(result = simd_integer_convert(comp_ctx, func_ctx, is_signed, vector, + V128_f32x4_TYPE))) { + return false; + } + + return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result"); +} + +bool +aot_compile_simd_f64x2_convert_i32x4(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + bool is_signed) +{ + LLVMValueRef vector, mask, result; + LLVMValueRef lanes[] = { + LLVM_CONST(i32_zero), + LLVM_CONST(i32_one), + }; + LLVMTypeRef out_vector_type; + + if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, + V128_i32x4_TYPE, "vec"))) { + return false; + } + + if (!(out_vector_type = LLVMVectorType(F64_TYPE, 4))) { + HANDLE_FAILURE("LLVMVectorType"); + return false; + } + + if (!(result = simd_integer_convert(comp_ctx, func_ctx, is_signed, vector, + out_vector_type))) { + return false; + } + + /* v4f64 -> v2f64 */ + if (!(mask = LLVMConstVector(lanes, 2))) { + HANDLE_FAILURE("LLVMConstVector"); + return false; + } + + if (!(result = LLVMBuildShuffleVector(comp_ctx->builder, result, result, + mask, "trunc"))) { + HANDLE_FAILURE("LLVMBuildShuffleVector"); + return false; + } + + return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result"); +} + +static bool +simd_extadd_pairwise(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + LLVMTypeRef in_vector_type, + LLVMTypeRef out_vector_type, + bool is_signed) +{ + LLVMValueRef vector, even_mask, odd_mask, sub_vector_even, sub_vector_odd, + result; + + LLVMValueRef even_element[] = { + LLVM_CONST(i32_zero), LLVM_CONST(i32_two), LLVM_CONST(i32_four), + LLVM_CONST(i32_six), LLVM_CONST(i32_eight), LLVM_CONST(i32_ten), + LLVM_CONST(i32_twelve), LLVM_CONST(i32_fourteen), + }; + + LLVMValueRef odd_element[] = { + LLVM_CONST(i32_one), LLVM_CONST(i32_three), + LLVM_CONST(i32_five), LLVM_CONST(i32_seven), + LLVM_CONST(i32_nine), LLVM_CONST(i32_eleven), + LLVM_CONST(i32_thirteen), LLVM_CONST(i32_fifteen), + }; + + /* assumption about i16x8 from i8x16 and i32x4 from i16x8 */ + uint8 mask_length = V128_i16x8_TYPE == out_vector_type ? 8 : 4; + + if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, + in_vector_type, "vector"))) { + return false; + } + + if (!(even_mask = LLVMConstVector(even_element, mask_length)) + || !(odd_mask = LLVMConstVector(odd_element, mask_length))) { + HANDLE_FAILURE("LLVMConstVector"); + return false; + } + + /* shuffle a <16xi8> vector to two <8xi8> vectors */ + if (!(sub_vector_even = LLVMBuildShuffleVector( + comp_ctx->builder, vector, vector, even_mask, "pick_even")) + || !(sub_vector_odd = LLVMBuildShuffleVector( + comp_ctx->builder, vector, vector, odd_mask, "pick_odd"))) { + HANDLE_FAILURE("LLVMBuildShuffleVector"); + return false; + } + + /* sext/zext <8xi8> to <8xi16> */ if (is_signed) { - if (!(result = LLVMBuildSIToFP(comp_ctx->builder, vector, - V128_f32x4_TYPE, "converted"))) { - HANDLE_FAILURE("LLVMBuildSIToFP"); - goto fail; + if (!(sub_vector_even = + LLVMBuildSExt(comp_ctx->builder, sub_vector_even, + out_vector_type, "even_sext")) + || !(sub_vector_odd = + LLVMBuildSExt(comp_ctx->builder, sub_vector_odd, + out_vector_type, "odd_sext"))) { + HANDLE_FAILURE("LLVMBuildSExt"); + return false; } } else { - if (!(result = LLVMBuildUIToFP(comp_ctx->builder, vector, - V128_f32x4_TYPE, "converted"))) { - HANDLE_FAILURE("LLVMBuildSIToFP"); - goto fail; + if (!(sub_vector_even = + LLVMBuildZExt(comp_ctx->builder, sub_vector_even, + out_vector_type, "even_zext")) + || !(sub_vector_odd = + LLVMBuildZExt(comp_ctx->builder, sub_vector_odd, + out_vector_type, "odd_zext"))) { + HANDLE_FAILURE("LLVMBuildZExt"); + return false; } } - if (!(result = LLVMBuildBitCast(comp_ctx->builder, result, V128_i64x2_TYPE, - "ret"))) { - HANDLE_FAILURE("LLVMBuildBitCast"); - goto fail; + if (!(result = LLVMBuildAdd(comp_ctx->builder, sub_vector_even, + sub_vector_odd, "sum"))) { + HANDLE_FAILURE("LLVMBuildAdd"); + return false; } - PUSH_V128(result); - return true; -fail: - return false; + return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result"); } + +bool +aot_compile_simd_i16x8_extadd_pairwise_i8x16(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + bool is_signed) +{ + return simd_extadd_pairwise(comp_ctx, func_ctx, V128_i8x16_TYPE, + V128_i16x8_TYPE, is_signed); +} + +bool +aot_compile_simd_i32x4_extadd_pairwise_i16x8(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + bool is_signed) +{ + return simd_extadd_pairwise(comp_ctx, func_ctx, V128_i16x8_TYPE, + V128_i32x4_TYPE, is_signed); +} + +bool +aot_compile_simd_i16x8_q15mulr_sat(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx) +{ + LLVMValueRef lhs, rhs, pad, offset, min, max, result; + LLVMTypeRef vector_ext_type; + + if (!(rhs = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, V128_i16x8_TYPE, + "rhs")) + || !(lhs = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, + V128_i16x8_TYPE, "lhs"))) { + return false; + } + + if (!(vector_ext_type = LLVMVectorType(I32_TYPE, 8))) { + HANDLE_FAILURE("LLVMVectorType"); + return false; + } + + if (!(lhs = LLVMBuildSExt(comp_ctx->builder, lhs, vector_ext_type, + "lhs_v8i32")) + || !(rhs = LLVMBuildSExt(comp_ctx->builder, rhs, vector_ext_type, + "rhs_v8i32"))) { + HANDLE_FAILURE("LLVMBuildSExt"); + return false; + } + + /* 0x4000 and 15*/ + if (!(pad = simd_build_splat_const_integer_vector(comp_ctx, I32_TYPE, + 0x4000, 8)) + || !(offset = simd_build_splat_const_integer_vector(comp_ctx, I32_TYPE, + 15, 8))) { + return false; + } + + /* TODO: looking for x86 intrinsics about integer"fused multiply-and-add" */ + /* S.SignedSaturate((x * y + 0x4000) >> 15) */ + if (!(result = LLVMBuildMul(comp_ctx->builder, lhs, rhs, "mul"))) { + HANDLE_FAILURE("LLVMBuildMul"); + return false; + } + + if (!(result = LLVMBuildAdd(comp_ctx->builder, result, pad, "add"))) { + HANDLE_FAILURE("LLVMBuildAdd"); + return false; + } + + if (!(result = LLVMBuildAShr(comp_ctx->builder, result, offset, "ashr"))) { + HANDLE_FAILURE("LLVMBuildAShr"); + return false; + } + + if (!(min = simd_build_splat_const_integer_vector(comp_ctx, I32_TYPE, + 0xffff8000, 8)) + || !(max = simd_build_splat_const_integer_vector(comp_ctx, I32_TYPE, + 0x00007fff, 8))) { + return false; + } + + /* sat after trunc will let *sat* part be optimized */ + if (!(result = simd_saturate(comp_ctx, func_ctx, e_sat_i32x8, result, min, + max, true))) { + return false; + } + + if (!(result = LLVMBuildTrunc(comp_ctx->builder, result, V128_i16x8_TYPE, + "down_to_v8i16"))) { + HANDLE_FAILURE("LLVMBuidlTrunc"); + return false; + } + + return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result"); +} + +enum integer_extmul_type { + e_i16x8_extmul_i8x16, + e_i32x4_extmul_i16x8, + e_i64x2_extmul_i32x4, +}; + +static bool +simd_integer_extmul(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + bool lower_half, + bool is_signed, + enum integer_extmul_type itype) +{ + LLVMValueRef vec1, vec2, result; + enum integer_extend_type ext_type[] = { + e_ext_i8x16, + e_ext_i16x8, + e_ext_i32x4, + }; + LLVMTypeRef in_vector_type[] = { + V128_i8x16_TYPE, + V128_i16x8_TYPE, + V128_i32x4_TYPE, + }; + + if (!(vec1 = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, + in_vector_type[itype], "vec1")) + || !(vec2 = simd_pop_v128_and_bitcast( + comp_ctx, func_ctx, in_vector_type[itype], "vec2"))) { + return false; + } + + if (!(vec1 = simd_integer_extension(comp_ctx, func_ctx, ext_type[itype], + vec1, lower_half, is_signed)) + || !(vec2 = simd_integer_extension(comp_ctx, func_ctx, ext_type[itype], + vec2, lower_half, is_signed))) { + return false; + } + + if (!(result = LLVMBuildMul(comp_ctx->builder, vec1, vec2, "product"))) { + return false; + } + + return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result"); +} + +bool +aot_compile_simd_i16x8_extmul_i8x16(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + bool lower_half, + bool is_signed) +{ + return simd_integer_extmul(comp_ctx, func_ctx, lower_half, is_signed, + e_i16x8_extmul_i8x16); +} + +bool +aot_compile_simd_i32x4_extmul_i16x8(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + bool lower_half, + bool is_signed) +{ + return simd_integer_extmul(comp_ctx, func_ctx, lower_half, is_signed, + e_i32x4_extmul_i16x8); +} + +bool +aot_compile_simd_i64x2_extmul_i32x4(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + bool lower_half, + bool is_signed) +{ + return simd_integer_extmul(comp_ctx, func_ctx, lower_half, is_signed, + e_i64x2_extmul_i32x4); +} \ No newline at end of file diff --git a/core/iwasm/compilation/simd/simd_conversions.h b/core/iwasm/compilation/simd/simd_conversions.h index 823b5dc3a..64143edf7 100644 --- a/core/iwasm/compilation/simd/simd_conversions.h +++ b/core/iwasm/compilation/simd/simd_conversions.h @@ -23,27 +23,77 @@ aot_compile_simd_i16x8_narrow_i32x4(AOTCompContext *comp_ctx, bool is_signed); bool -aot_compile_simd_i16x8_widen_i8x16(AOTCompContext *comp_ctx, - AOTFuncContext *func_ctx, - bool is_low, - bool is_signed); +aot_compile_simd_i32x4_narrow_i64x2(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + bool is_signed); bool -aot_compile_simd_i32x4_widen_i16x8(AOTCompContext *comp_ctx, - AOTFuncContext *func_ctx, - bool is_low, - bool is_signed); +aot_compile_simd_i16x8_extend_i8x16(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + bool is_low, + bool is_signed); + +bool +aot_compile_simd_i32x4_extend_i16x8(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + bool is_low, + bool is_signed); + +bool +aot_compile_simd_i64x2_extend_i32x4(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + bool lower_half, + bool is_signed); bool aot_compile_simd_i32x4_trunc_sat_f32x4(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, bool is_signed); +bool +aot_compile_simd_i32x4_trunc_sat_f64x2(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + bool is_signed); + bool aot_compile_simd_f32x4_convert_i32x4(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, bool is_signed); +bool +aot_compile_simd_f64x2_convert_i32x4(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + bool is_signed); +bool +aot_compile_simd_i16x8_extadd_pairwise_i8x16(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + bool is_signed); + +bool +aot_compile_simd_i32x4_extadd_pairwise_i16x8(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + bool is_signed); +bool +aot_compile_simd_i16x8_q15mulr_sat(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx); + +bool +aot_compile_simd_i16x8_extmul_i8x16(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + bool is_low, + bool is_signed); + +bool +aot_compile_simd_i32x4_extmul_i16x8(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + bool is_low, + bool is_signed); + +bool +aot_compile_simd_i64x2_extmul_i32x4(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + bool lower_half, + bool is_signed); #ifdef __cplusplus } /* end of extern "C" */ #endif diff --git a/core/iwasm/compilation/simd/simd_floating_point.c b/core/iwasm/compilation/simd/simd_floating_point.c index 1a819c9fd..f5ec15df3 100644 --- a/core/iwasm/compilation/simd/simd_floating_point.c +++ b/core/iwasm/compilation/simd/simd_floating_point.c @@ -9,111 +9,45 @@ #include "../aot_emit_numberic.h" #include "../../aot/aot_runtime.h" -static LLVMValueRef -simd_v128_float_cmp(AOTCompContext *comp_ctx, - AOTFuncContext *func_ctx, - FloatArithmetic arith_op, - LLVMValueRef lhs, - LLVMValueRef rhs) -{ - LLVMValueRef result; - LLVMRealPredicate op; - - op = FLOAT_MIN == arith_op ? LLVMRealULT : LLVMRealUGT; - - if (!(result = LLVMBuildFCmp(comp_ctx->builder, op, lhs, rhs, "cmp"))) { - HANDLE_FAILURE("LLVMBuildFCmp"); - goto fail; - } - - if (!(result = - LLVMBuildSelect(comp_ctx->builder, result, lhs, rhs, "select"))) { - HANDLE_FAILURE("LLVMBuildSelect"); - goto fail; - } - - return result; -fail: - return NULL; -} - static bool simd_v128_float_arith(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, FloatArithmetic arith_op, LLVMTypeRef vector_type) { - LLVMValueRef lhs, rhs, result; + LLVMValueRef lhs, rhs, result = NULL; - if (!(rhs = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type, - "rhs"))) { - goto fail; - } - - if (!(lhs = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type, - "lhs"))) { - goto fail; + if (!(rhs = + simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type, "rhs")) + || !(lhs = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type, + "lhs"))) { + return false; } switch (arith_op) { case FLOAT_ADD: - if (!(result = - LLVMBuildFAdd(comp_ctx->builder, lhs, rhs, "sum"))) { - HANDLE_FAILURE("LLVMBuildFAdd"); - goto fail; - } + result = LLVMBuildFAdd(comp_ctx->builder, lhs, rhs, "sum"); break; case FLOAT_SUB: - if (!(result = LLVMBuildFSub(comp_ctx->builder, lhs, rhs, - "difference"))) { - HANDLE_FAILURE("LLVMBuildFSub"); - goto fail; - } + result = LLVMBuildFSub(comp_ctx->builder, lhs, rhs, "difference"); break; case FLOAT_MUL: - if (!(result = - LLVMBuildFMul(comp_ctx->builder, lhs, rhs, "product"))) { - HANDLE_FAILURE("LLVMBuildFMul"); - goto fail; - } + result = LLVMBuildFMul(comp_ctx->builder, lhs, rhs, "product"); break; case FLOAT_DIV: - if (!(result = - LLVMBuildFDiv(comp_ctx->builder, lhs, rhs, "quotient"))) { - HANDLE_FAILURE("LLVMBuildFDiv"); - goto fail; - } - break; - case FLOAT_MIN: - if (!(result = simd_v128_float_cmp(comp_ctx, func_ctx, FLOAT_MIN, - lhs, rhs))) { - goto fail; - } - break; - case FLOAT_MAX: - if (!(result = simd_v128_float_cmp(comp_ctx, func_ctx, FLOAT_MAX, - lhs, rhs))) { - goto fail; - } + result = LLVMBuildFDiv(comp_ctx->builder, lhs, rhs, "quotient"); break; default: - result = NULL; - bh_assert(0); - break; + return false; } - if (!(result = LLVMBuildBitCast(comp_ctx->builder, result, V128_i64x2_TYPE, - "ret"))) { - HANDLE_FAILURE("LLVMBuildBitCast"); - goto fail; + if (!result) { + HANDLE_FAILURE( + "LLVMBuildFAdd/LLVMBuildFSub/LLVMBuildFMul/LLVMBuildFDiv"); + return false; } - /* push result into the stack */ - PUSH_V128(result); - - return true; -fail: - return false; + return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result"); } bool @@ -139,30 +73,19 @@ simd_v128_float_neg(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, LLVMTypeRef vector_type) { - LLVMValueRef number, result; + LLVMValueRef vector, result; - if (!(number = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type, - "number"))) { - goto fail; + if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type, + "vector"))) { + return false; } - if (!(result = LLVMBuildFNeg(comp_ctx->builder, number, "neg"))) { + if (!(result = LLVMBuildFNeg(comp_ctx->builder, vector, "neg"))) { HANDLE_FAILURE("LLVMBuildFNeg"); - goto fail; + return false; } - if (!(result = LLVMBuildBitCast(comp_ctx->builder, result, V128_i64x2_TYPE, - "ret"))) { - HANDLE_FAILURE("LLVMBuildBitCast"); - goto fail; - } - - /* push result into the stack */ - PUSH_V128(result); - - return true; -fail: - return false; + return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result"); } bool @@ -178,119 +101,310 @@ aot_compile_simd_f64x2_neg(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) } static bool -simd_v128_float_intrinsic(AOTCompContext *comp_ctx, - AOTFuncContext *func_ctx, - LLVMTypeRef vector_type, - const char *intrinsic) +simd_float_intrinsic(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + LLVMTypeRef vector_type, + const char *intrinsic) { - LLVMValueRef number, result; + LLVMValueRef vector, result; LLVMTypeRef param_types[1] = { vector_type }; - if (!(number = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type, - "number"))) { - goto fail; + if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type, + "vector"))) { + return false; } - if (!(result = aot_call_llvm_intrinsic(comp_ctx, func_ctx, intrinsic, vector_type, - param_types, 1, number))) { + if (!(result = + aot_call_llvm_intrinsic(comp_ctx, func_ctx, intrinsic, vector_type, + param_types, 1, vector))) { HANDLE_FAILURE("LLVMBuildCall"); - goto fail; + return false; } - if (!(result = LLVMBuildBitCast(comp_ctx->builder, result, V128_i64x2_TYPE, - "ret"))) { - HANDLE_FAILURE("LLVMBuildBitCast"); - goto fail; - } - - /* push result into the stack */ - PUSH_V128(result); - - return true; -fail: - return false; + return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result"); } bool aot_compile_simd_f32x4_abs(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) { - return simd_v128_float_intrinsic(comp_ctx, func_ctx, V128_f32x4_TYPE, - "llvm.fabs.v4f32"); + return simd_float_intrinsic(comp_ctx, func_ctx, V128_f32x4_TYPE, + "llvm.fabs.v4f32"); } bool aot_compile_simd_f64x2_abs(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) { - return simd_v128_float_intrinsic(comp_ctx, func_ctx, V128_f64x2_TYPE, - "llvm.fabs.v2f64"); + return simd_float_intrinsic(comp_ctx, func_ctx, V128_f64x2_TYPE, + "llvm.fabs.v2f64"); +} + +bool +aot_compile_simd_f32x4_round(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx) +{ + return simd_float_intrinsic(comp_ctx, func_ctx, V128_f32x4_TYPE, + "llvm.round.v4f32"); +} + +bool +aot_compile_simd_f64x2_round(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx) +{ + return simd_float_intrinsic(comp_ctx, func_ctx, V128_f64x2_TYPE, + "llvm.round.v2f64"); } bool aot_compile_simd_f32x4_sqrt(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) { - return simd_v128_float_intrinsic(comp_ctx, func_ctx, V128_f32x4_TYPE, - "llvm.sqrt.v4f32"); + return simd_float_intrinsic(comp_ctx, func_ctx, V128_f32x4_TYPE, + "llvm.sqrt.v4f32"); } bool aot_compile_simd_f64x2_sqrt(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) { - return simd_v128_float_intrinsic(comp_ctx, func_ctx, V128_f64x2_TYPE, - "llvm.sqrt.v2f64"); + return simd_float_intrinsic(comp_ctx, func_ctx, V128_f64x2_TYPE, + "llvm.sqrt.v2f64"); } bool aot_compile_simd_f32x4_ceil(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) { - return simd_v128_float_intrinsic(comp_ctx, func_ctx, V128_f32x4_TYPE, - "llvm.ceil.v4f32"); + return simd_float_intrinsic(comp_ctx, func_ctx, V128_f32x4_TYPE, + "llvm.ceil.v4f32"); } bool aot_compile_simd_f64x2_ceil(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) { - return simd_v128_float_intrinsic(comp_ctx, func_ctx, V128_f64x2_TYPE, - "llvm.ceil.v2f64"); + return simd_float_intrinsic(comp_ctx, func_ctx, V128_f64x2_TYPE, + "llvm.ceil.v2f64"); } bool -aot_compile_simd_f32x4_floor(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) +aot_compile_simd_f32x4_floor(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx) { - return simd_v128_float_intrinsic(comp_ctx, func_ctx, V128_f32x4_TYPE, - "llvm.floor.v4f32"); + return simd_float_intrinsic(comp_ctx, func_ctx, V128_f32x4_TYPE, + "llvm.floor.v4f32"); } bool -aot_compile_simd_f64x2_floor(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) +aot_compile_simd_f64x2_floor(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx) { - return simd_v128_float_intrinsic(comp_ctx, func_ctx, V128_f64x2_TYPE, - "llvm.floor.v2f64"); + return simd_float_intrinsic(comp_ctx, func_ctx, V128_f64x2_TYPE, + "llvm.floor.v2f64"); } bool -aot_compile_simd_f32x4_trunc(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) +aot_compile_simd_f32x4_trunc(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx) { - return simd_v128_float_intrinsic(comp_ctx, func_ctx, V128_f32x4_TYPE, - "llvm.trunc.v4f32"); + return simd_float_intrinsic(comp_ctx, func_ctx, V128_f32x4_TYPE, + "llvm.trunc.v4f32"); } bool -aot_compile_simd_f64x2_trunc(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) +aot_compile_simd_f64x2_trunc(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx) { - return simd_v128_float_intrinsic(comp_ctx, func_ctx, V128_f64x2_TYPE, - "llvm.trunc.v2f64"); + return simd_float_intrinsic(comp_ctx, func_ctx, V128_f64x2_TYPE, + "llvm.trunc.v2f64"); } bool -aot_compile_simd_f32x4_nearest(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) +aot_compile_simd_f32x4_nearest(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx) { - return simd_v128_float_intrinsic(comp_ctx, func_ctx, V128_f32x4_TYPE, - "llvm.rint.v4f32"); + return simd_float_intrinsic(comp_ctx, func_ctx, V128_f32x4_TYPE, + "llvm.rint.v4f32"); } bool -aot_compile_simd_f64x2_nearest(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) +aot_compile_simd_f64x2_nearest(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx) { - return simd_v128_float_intrinsic(comp_ctx, func_ctx, V128_f64x2_TYPE, - "llvm.rint.v2f64"); + return simd_float_intrinsic(comp_ctx, func_ctx, V128_f64x2_TYPE, + "llvm.rint.v2f64"); +} + +static bool +simd_float_cmp(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + FloatArithmetic arith_op, + LLVMTypeRef vector_type) +{ + LLVMValueRef lhs, rhs, result; + LLVMRealPredicate op = FLOAT_MIN == arith_op ? LLVMRealULT : LLVMRealUGT; + + if (!(rhs = + simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type, "rhs")) + || !(lhs = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type, + "lhs"))) { + return false; + } + + if (!(result = LLVMBuildFCmp(comp_ctx->builder, op, lhs, rhs, "cmp"))) { + HANDLE_FAILURE("LLVMBuildFCmp"); + return false; + } + + if (!(result = + LLVMBuildSelect(comp_ctx->builder, result, lhs, rhs, "select"))) { + HANDLE_FAILURE("LLVMBuildSelect"); + return false; + } + + return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result"); +} + +/*TODO: sugggest non-IA platforms check with "llvm.minimum.*" and "llvm.maximum.*" firstly */ +bool +aot_compile_simd_f32x4_min_max(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + bool run_min) +{ + return simd_float_cmp(comp_ctx, func_ctx, run_min ? FLOAT_MIN : FLOAT_MAX, + V128_f32x4_TYPE); +} + +bool +aot_compile_simd_f64x2_min_max(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + bool run_min) +{ + return simd_float_cmp(comp_ctx, func_ctx, run_min ? FLOAT_MIN : FLOAT_MAX, + V128_f64x2_TYPE); +} + +static bool +simd_float_pmin_max(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + LLVMTypeRef vector_type, + const char *intrinsic) +{ + LLVMValueRef lhs, rhs, result; + LLVMTypeRef param_types[2]; + + param_types[0] = vector_type; + param_types[1] = vector_type; + + if (!(rhs = + simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type, "rhs")) + || !(lhs = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type, + "lhs"))) { + return false; + } + + if (!(result = + aot_call_llvm_intrinsic(comp_ctx, func_ctx, intrinsic, vector_type, + param_types, 2, lhs, rhs))) { + return false; + } + + return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result"); +} + +bool +aot_compile_simd_f32x4_pmin_pmax(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + bool run_min) +{ + return simd_float_pmin_max(comp_ctx, func_ctx, V128_f32x4_TYPE, + run_min ? "llvm.minnum.v4f32" + : "llvm.maxnum.v4f32"); +} + +bool +aot_compile_simd_f64x2_pmin_pmax(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + bool run_min) +{ + return simd_float_pmin_max(comp_ctx, func_ctx, V128_f64x2_TYPE, + run_min ? "llvm.minnum.v2f64" + : "llvm.maxnum.v2f64"); +} + +bool +aot_compile_simd_f64x2_demote(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx) +{ + LLVMValueRef vector, elem_0, elem_1, result; + + if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, + V128_f64x2_TYPE, "vector"))) { + return false; + } + + if (!(elem_0 = LLVMBuildExtractElement(comp_ctx->builder, vector, + LLVM_CONST(i32_zero), "elem_0")) + || !(elem_1 = LLVMBuildExtractElement( + comp_ctx->builder, vector, LLVM_CONST(i32_one), "elem_1"))) { + HANDLE_FAILURE("LLVMBuildExtractElement"); + return false; + } + + /* fptrunc elem to */ + if (!(elem_0 = LLVMBuildFPTrunc(comp_ctx->builder, elem_0, F32_TYPE, + "elem_0_trunc")) + || !(elem_1 = LLVMBuildFPTrunc(comp_ctx->builder, elem_1, F32_TYPE, + "elem_1_trunc"))) { + HANDLE_FAILURE("LLVMBuildFPTrunc"); + return false; + } + + if (!(result = LLVMBuildInsertElement( + comp_ctx->builder, LLVM_CONST(f32x4_vec_zero), elem_0, + LLVM_CONST(i32_zero), "new_vector_0")) + || !(result = + LLVMBuildInsertElement(comp_ctx->builder, result, elem_1, + LLVM_CONST(i32_one), "new_vector_1"))) { + HANDLE_FAILURE("LLVMBuildInsertElement"); + return false; + } + + return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result"); +} + +bool +aot_compile_simd_f32x4_promote(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx) +{ + LLVMValueRef vector, elem_0, elem_1, result; + + if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, + V128_f32x4_TYPE, "vector"))) { + return false; + } + + if (!(elem_0 = LLVMBuildExtractElement(comp_ctx->builder, vector, + LLVM_CONST(i32_zero), "elem_0")) + || !(elem_1 = LLVMBuildExtractElement( + comp_ctx->builder, vector, LLVM_CONST(i32_one), "elem_1"))) { + HANDLE_FAILURE("LLVMBuildExtractElement"); + return false; + } + + /* fpext elem to */ + if (!(elem_0 = + LLVMBuildFPExt(comp_ctx->builder, elem_0, F64_TYPE, "elem_0_ext")) + || !(elem_1 = LLVMBuildFPExt(comp_ctx->builder, elem_1, F64_TYPE, + "elem_1_ext"))) { + HANDLE_FAILURE("LLVMBuildFPExt"); + return false; + } + + if (!(result = LLVMBuildInsertElement( + comp_ctx->builder, LLVM_CONST(f64x2_vec_zero), elem_0, + LLVM_CONST(i32_zero), "new_vector_0")) + || !(result = + LLVMBuildInsertElement(comp_ctx->builder, result, elem_1, + LLVM_CONST(i32_one), "new_vector_1"))) { + HANDLE_FAILURE("LLVMBuildInsertElement"); + return false; + } + + return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result"); } diff --git a/core/iwasm/compilation/simd/simd_floating_point.h b/core/iwasm/compilation/simd/simd_floating_point.h index e95cab6ee..314f1494d 100644 --- a/core/iwasm/compilation/simd/simd_floating_point.h +++ b/core/iwasm/compilation/simd/simd_floating_point.h @@ -35,34 +35,80 @@ bool aot_compile_simd_f64x2_abs(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx); bool -aot_compile_simd_f32x4_sqrt(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx); +aot_compile_simd_f32x4_round(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx); bool -aot_compile_simd_f64x2_sqrt(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx); +aot_compile_simd_f64x2_round(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx); bool -aot_compile_simd_f32x4_ceil(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx); +aot_compile_simd_f32x4_sqrt(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx); bool -aot_compile_simd_f64x2_ceil(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx); +aot_compile_simd_f64x2_sqrt(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx); bool -aot_compile_simd_f32x4_floor(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx); +aot_compile_simd_f32x4_ceil(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx); bool -aot_compile_simd_f64x2_floor(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx); +aot_compile_simd_f64x2_ceil(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx); bool -aot_compile_simd_f32x4_trunc(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx); +aot_compile_simd_f32x4_floor(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx); bool -aot_compile_simd_f64x2_trunc(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx); +aot_compile_simd_f64x2_floor(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx); bool -aot_compile_simd_f32x4_nearest(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx); +aot_compile_simd_f32x4_trunc(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx); bool -aot_compile_simd_f64x2_nearest(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx); +aot_compile_simd_f64x2_trunc(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx); + +bool +aot_compile_simd_f32x4_nearest(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx); + +bool +aot_compile_simd_f64x2_nearest(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx); + +bool +aot_compile_simd_f32x4_min_max(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + bool run_min); + +bool +aot_compile_simd_f64x2_min_max(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + bool run_min); + +bool +aot_compile_simd_f32x4_pmin_pmax(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + bool run_min); + +bool +aot_compile_simd_f64x2_pmin_pmax(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + bool run_min); + +bool +aot_compile_simd_f64x2_demote(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx); + +bool +aot_compile_simd_f32x4_promote(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx); #ifdef __cplusplus } /* end of extern "C" */ diff --git a/core/iwasm/compilation/simd/simd_int_arith.c b/core/iwasm/compilation/simd/simd_int_arith.c index f61b67bcd..804d56ea5 100644 --- a/core/iwasm/compilation/simd/simd_int_arith.c +++ b/core/iwasm/compilation/simd/simd_int_arith.c @@ -9,59 +9,41 @@ #include "../../aot/aot_runtime.h" static bool -simd_v128_integer_arith(AOTCompContext *comp_ctx, - AOTFuncContext *func_ctx, - V128Arithmetic arith_op, - LLVMValueRef lhs, - LLVMValueRef rhs) +simd_integer_arith(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + V128Arithmetic arith_op, + LLVMTypeRef vector_type) { - LLVMValueRef result; + LLVMValueRef lhs, rhs, result = NULL; + + if (!(rhs = + simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type, "rhs")) + || !(lhs = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type, + "lhs"))) { + return false; + } switch (arith_op) { case V128_ADD: - if (!(result = LLVMBuildAdd(comp_ctx->builder, lhs, rhs, "sum"))) { - HANDLE_FAILURE("LLVMBuildAdd"); - goto fail; - } + result = LLVMBuildAdd(comp_ctx->builder, lhs, rhs, "sum"); break; case V128_SUB: - if (!(result = - LLVMBuildSub(comp_ctx->builder, lhs, rhs, "difference"))) { - HANDLE_FAILURE("LLVMBuildSub"); - goto fail; - } + result = LLVMBuildSub(comp_ctx->builder, lhs, rhs, "difference"); break; case V128_MUL: - if (!(result = - LLVMBuildMul(comp_ctx->builder, lhs, rhs, "product"))) { - HANDLE_FAILURE("LLVMBuildMul"); - goto fail; - } - break; - case V128_NEG: - if (!(result = LLVMBuildNeg(comp_ctx->builder, lhs, "neg"))) { - HANDLE_FAILURE("LLVMBuildNeg"); - goto fail; - } + result = LLVMBuildMul(comp_ctx->builder, lhs, rhs, "product"); break; default: - result = NULL; - bh_assert(0); + HANDLE_FAILURE("Unsupport arith_op"); break; } - if (!(result = LLVMBuildBitCast(comp_ctx->builder, result, V128_i64x2_TYPE, - "ret"))) { - HANDLE_FAILURE("LLVMBuildBitCast"); - goto fail; + if (!result) { + HANDLE_FAILURE("LLVMBuildAdd/LLVMBuildSub/LLVMBuildMul"); + return false; } - /* push result into the stack */ - PUSH_V128(result); - - return true; -fail: - return false; + return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result"); } bool @@ -69,21 +51,7 @@ aot_compile_simd_i8x16_arith(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, V128Arithmetic arith_op) { - LLVMValueRef lhs, rhs; - - if (!(rhs = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, V128_i8x16_TYPE, - "rhs"))) { - goto fail; - } - - if (!(lhs = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, V128_i8x16_TYPE, - "lhs"))) { - goto fail; - } - - return simd_v128_integer_arith(comp_ctx, func_ctx, arith_op, lhs, rhs); -fail: - return false; + return simd_integer_arith(comp_ctx, func_ctx, arith_op, V128_i8x16_TYPE); } bool @@ -91,21 +59,7 @@ aot_compile_simd_i16x8_arith(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, V128Arithmetic arith_op) { - LLVMValueRef lhs, rhs; - - if (!(rhs = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, V128_i16x8_TYPE, - "rhs"))) { - goto fail; - } - - if (!(lhs = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, V128_i16x8_TYPE, - "lhs"))) { - goto fail; - } - - return simd_v128_integer_arith(comp_ctx, func_ctx, arith_op, lhs, rhs); -fail: - return false; + return simd_integer_arith(comp_ctx, func_ctx, arith_op, V128_i16x8_TYPE); } bool @@ -113,21 +67,7 @@ aot_compile_simd_i32x4_arith(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, V128Arithmetic arith_op) { - LLVMValueRef lhs, rhs; - - if (!(rhs = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, V128_i32x4_TYPE, - "rhs"))) { - goto fail; - } - - if (!(lhs = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, V128_i32x4_TYPE, - "lhs"))) { - goto fail; - } - - return simd_v128_integer_arith(comp_ctx, func_ctx, arith_op, lhs, rhs); -fail: - return false; + return simd_integer_arith(comp_ctx, func_ctx, arith_op, V128_i32x4_TYPE); } bool @@ -135,73 +75,354 @@ aot_compile_simd_i64x2_arith(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, V128Arithmetic arith_op) { - LLVMValueRef lhs, rhs; + return simd_integer_arith(comp_ctx, func_ctx, arith_op, V128_i64x2_TYPE); +} - POP_V128(rhs); - POP_V128(lhs); +static bool +simd_neg(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, LLVMTypeRef type) +{ + LLVMValueRef vector, result; - return simd_v128_integer_arith(comp_ctx, func_ctx, arith_op, lhs, rhs); -fail: - return false; + if (!(vector = + simd_pop_v128_and_bitcast(comp_ctx, func_ctx, type, "vector"))) { + return false; + } + + if (!(result = LLVMBuildNeg(comp_ctx->builder, vector, "neg"))) { + HANDLE_FAILURE("LLVMBuildNeg"); + return false; + } + + return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result"); } bool aot_compile_simd_i8x16_neg(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) { - LLVMValueRef number; - - if (!(number = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, - V128_i8x16_TYPE, "number"))) { - goto fail; - } - - return simd_v128_integer_arith(comp_ctx, func_ctx, V128_NEG, number, NULL); - -fail: - return false; + return simd_neg(comp_ctx, func_ctx, V128_i8x16_TYPE); } bool aot_compile_simd_i16x8_neg(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) { - LLVMValueRef number; - - if (!(number = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, - V128_i16x8_TYPE, "number"))) { - goto fail; - } - - return simd_v128_integer_arith(comp_ctx, func_ctx, V128_NEG, number, NULL); - -fail: - return false; + return simd_neg(comp_ctx, func_ctx, V128_i16x8_TYPE); } bool aot_compile_simd_i32x4_neg(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) { - LLVMValueRef number; - - if (!(number = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, - V128_i32x4_TYPE, "number"))) { - goto fail; - } - - return simd_v128_integer_arith(comp_ctx, func_ctx, V128_NEG, number, NULL); - -fail: - return false; + return simd_neg(comp_ctx, func_ctx, V128_i32x4_TYPE); } bool aot_compile_simd_i64x2_neg(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) { - LLVMValueRef number; - - POP_V128(number); - - return simd_v128_integer_arith(comp_ctx, func_ctx, V128_NEG, number, NULL); - -fail: - return false; + return simd_neg(comp_ctx, func_ctx, V128_i64x2_TYPE); +} + +bool +aot_compile_simd_i8x16_popcnt(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx) +{ + LLVMValueRef vector, result; + + if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, + V128_i8x16_TYPE, "vector"))) { + return false; + } + + if (!(result = aot_call_llvm_intrinsic(comp_ctx, func_ctx, + "llvm.ctpop.v16i8", V128_i8x16_TYPE, + &V128_i8x16_TYPE, 1, vector))) { + return false; + } + + return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result"); +} + +static bool +simd_v128_cmp(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + LLVMTypeRef vector_type, + V128Arithmetic arith_op, + bool is_signed) +{ + LLVMValueRef lhs, rhs, result; + LLVMIntPredicate op; + + if (!(rhs = + simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type, "rhs")) + || !(lhs = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type, + "lhs"))) { + return false; + } + + if (V128_MIN == arith_op) { + op = is_signed ? LLVMIntSLT : LLVMIntULT; + } + else { + op = is_signed ? LLVMIntSGT : LLVMIntUGT; + } + + if (!(result = LLVMBuildICmp(comp_ctx->builder, op, lhs, rhs, "cmp"))) { + HANDLE_FAILURE("LLVMBuildICmp"); + return false; + } + + if (!(result = + LLVMBuildSelect(comp_ctx->builder, result, lhs, rhs, "select"))) { + HANDLE_FAILURE("LLVMBuildSelect"); + return false; + } + + return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result"); +} + +bool +aot_compile_simd_i8x16_cmp(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + V128Arithmetic arith_op, + bool is_signed) +{ + return simd_v128_cmp(comp_ctx, func_ctx, V128_i8x16_TYPE, arith_op, + is_signed); +} + +bool +aot_compile_simd_i16x8_cmp(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + V128Arithmetic arith_op, + bool is_signed) +{ + return simd_v128_cmp(comp_ctx, func_ctx, V128_i16x8_TYPE, arith_op, + is_signed); +} + +bool +aot_compile_simd_i32x4_cmp(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + V128Arithmetic arith_op, + bool is_signed) +{ + return simd_v128_cmp(comp_ctx, func_ctx, V128_i32x4_TYPE, arith_op, + is_signed); +} + +/* llvm.abs.* */ +static bool +simd_v128_abs(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + char *intrinsic, + LLVMTypeRef vector_type) +{ + LLVMValueRef vector, result; + LLVMTypeRef param_types[] = { vector_type, INT1_TYPE }; + + if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type, + "vec"))) { + return false; + } + + if (!(result = aot_call_llvm_intrinsic(comp_ctx, func_ctx, intrinsic, + vector_type, param_types, 2, vector, + /* is_int_min_poison */ + LLVM_CONST(i1_zero)))) { + return false; + } + + return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result"); +} + +bool +aot_compile_simd_i8x16_abs(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) +{ + return simd_v128_abs(comp_ctx, func_ctx, "llvm.abs.v16i8", + V128_i8x16_TYPE); +} + +bool +aot_compile_simd_i16x8_abs(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) +{ + return simd_v128_abs(comp_ctx, func_ctx, "llvm.abs.v8i16", + V128_i16x8_TYPE); +} + +bool +aot_compile_simd_i32x4_abs(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) +{ + return simd_v128_abs(comp_ctx, func_ctx, "llvm.abs.v4i32", + V128_i32x4_TYPE); +} + +bool +aot_compile_simd_i64x2_abs(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) +{ + return simd_v128_abs(comp_ctx, func_ctx, "llvm.abs.v2i64", + V128_i64x2_TYPE); +} + +enum integer_avgr_u { + e_avgr_u_i8x16, + e_avgr_u_i16x8, + e_avgr_u_i32x4, +}; + +/* TODO: try int_x86_mmx_pavg_b and int_x86_mmx_pavg_w */ +/* (v1 + v2 + 1) / 2 */ +static bool +simd_v128_avg(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + enum integer_avgr_u itype) +{ + LLVMValueRef lhs, rhs, ones, result; + LLVMTypeRef vector_ext_type; + LLVMTypeRef vector_type[] = { + V128_i8x16_TYPE, + V128_i16x8_TYPE, + V128_i32x4_TYPE, + }; + unsigned lanes[] = { 16, 8, 4 }; + + if (!(rhs = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, + vector_type[itype], "rhs")) + || !(lhs = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, + vector_type[itype], "lhs"))) { + return false; + } + + if (!(vector_ext_type = LLVMVectorType(I64_TYPE, lanes[itype]))) { + HANDLE_FAILURE("LLVMVectorType"); + return false; + } + + if (!(lhs = LLVMBuildZExt(comp_ctx->builder, lhs, vector_ext_type, + "zext_to_i64")) + || !(rhs = LLVMBuildZExt(comp_ctx->builder, rhs, vector_ext_type, + "zext_to_i64"))) { + HANDLE_FAILURE("LLVMBuildZExt"); + return false; + } + + /* by default, add will do signed/unsigned overflow */ + if (!(result = LLVMBuildAdd(comp_ctx->builder, lhs, rhs, "l_add_r"))) { + HANDLE_FAILURE("LLVMBuildAdd"); + return false; + } + + if (!(ones = simd_build_splat_const_integer_vector(comp_ctx, I64_TYPE, 1, + lanes[itype]))) { + return false; + } + + if (!(result = LLVMBuildAdd(comp_ctx->builder, result, ones, "plus_1"))) { + HANDLE_FAILURE("LLVMBuildAdd"); + return false; + } + + if (!(result = LLVMBuildLShr(comp_ctx->builder, result, ones, "avg"))) { + HANDLE_FAILURE("LLVMBuildLShr"); + return false; + } + + if (!(result = LLVMBuildTrunc(comp_ctx->builder, result, + vector_type[itype], "to_orig_type"))) { + HANDLE_FAILURE("LLVMBuildTrunc"); + return false; + } + + return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result"); +} + +bool +aot_compile_simd_i8x16_avgr_u(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx) +{ + return simd_v128_avg(comp_ctx, func_ctx, e_avgr_u_i8x16); +} + +bool +aot_compile_simd_i16x8_avgr_u(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx) +{ + return simd_v128_avg(comp_ctx, func_ctx, e_avgr_u_i16x8); +} + +bool +aot_compile_simd_i32x4_avgr_u(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx) +{ + return simd_v128_avg(comp_ctx, func_ctx, e_avgr_u_i32x4); +} + +bool +aot_compile_simd_i32x4_dot_i16x8(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx) +{ + LLVMValueRef vec1, vec2, even_mask, odd_mask, zero, result; + LLVMTypeRef vector_ext_type; + LLVMValueRef even_element[] = { + LLVM_CONST(i32_zero), + LLVM_CONST(i32_two), + LLVM_CONST(i32_four), + LLVM_CONST(i32_six), + }; + LLVMValueRef odd_element[] = { + LLVM_CONST(i32_one), + LLVM_CONST(i32_three), + LLVM_CONST(i32_five), + LLVM_CONST(i32_seven), + }; + + if (!(vec1 = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, V128_i16x8_TYPE, + "vec1")) + || !(vec2 = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, + V128_i16x8_TYPE, "vec2"))) { + return false; + } + + if (!(vector_ext_type = LLVMVectorType(I32_TYPE, 8))) { + HANDLE_FAILURE("LLVMVectorType"); + return false; + } + + /* sext to */ + if (!(vec1 = LLVMBuildSExt(comp_ctx->builder, vec1, vector_ext_type, + "vec1_v8i32")) + || !(vec2 = LLVMBuildSExt(comp_ctx->builder, vec2, vector_ext_type, + "vec2_v8i32"))) { + HANDLE_FAILURE("LLVMBuildSExt"); + return false; + } + + if (!(result = LLVMBuildMul(comp_ctx->builder, vec1, vec2, "product"))) { + HANDLE_FAILURE("LLVMBuildMul"); + return false; + } + + /* pick elements with even indexes and odd indexes */ + if (!(even_mask = LLVMConstVector(even_element, 4)) + || !(odd_mask = LLVMConstVector(odd_element, 4))) { + HANDLE_FAILURE("LLVMConstVector"); + return false; + } + + if (!(zero = + simd_build_splat_const_integer_vector(comp_ctx, I32_TYPE, 0, 8))) { + return false; + } + + if (!(vec1 = LLVMBuildShuffleVector(comp_ctx->builder, result, zero, + even_mask, "even_result")) + || !(vec2 = LLVMBuildShuffleVector(comp_ctx->builder, result, zero, + odd_mask, "odd_result"))) { + HANDLE_FAILURE("LLVMBuildShuffleVector"); + return false; + } + + if (!(result = LLVMBuildAdd(comp_ctx->builder, vec1, vec2, "new_vec"))) { + HANDLE_FAILURE("LLVMBuildAdd"); + return false; + } + + return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result"); } diff --git a/core/iwasm/compilation/simd/simd_int_arith.h b/core/iwasm/compilation/simd/simd_int_arith.h index 5cd77899d..0bd26ea05 100644 --- a/core/iwasm/compilation/simd/simd_int_arith.h +++ b/core/iwasm/compilation/simd/simd_int_arith.h @@ -44,6 +44,56 @@ aot_compile_simd_i32x4_neg(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx); bool aot_compile_simd_i64x2_neg(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx); +bool +aot_compile_simd_i8x16_popcnt(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx); + +bool +aot_compile_simd_i8x16_cmp(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + V128Arithmetic arith_op, + bool is_signed); + +bool +aot_compile_simd_i16x8_cmp(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + V128Arithmetic arith_op, + bool is_signed); + +bool +aot_compile_simd_i32x4_cmp(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + V128Arithmetic arith_op, + bool is_signed); + +bool +aot_compile_simd_i8x16_abs(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx); + +bool +aot_compile_simd_i16x8_abs(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx); + +bool +aot_compile_simd_i32x4_abs(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx); + +bool +aot_compile_simd_i64x2_abs(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx); + +bool +aot_compile_simd_i8x16_avgr_u(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx); + +bool +aot_compile_simd_i16x8_avgr_u(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx); + +bool +aot_compile_simd_i32x4_avgr_u(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx); + +bool +aot_compile_simd_i32x4_dot_i16x8(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx); + #ifdef __cplusplus } /* end of extern "C" */ #endif diff --git a/core/iwasm/compilation/simd/simd_load_store.c b/core/iwasm/compilation/simd/simd_load_store.c index 6a15ff98b..097974b47 100644 --- a/core/iwasm/compilation/simd/simd_load_store.c +++ b/core/iwasm/compilation/simd/simd_load_store.c @@ -3,6 +3,7 @@ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception */ +#include "simd_common.h" #include "simd_load_store.h" #include "../aot_emit_exception.h" #include "../aot_emit_memory.h" @@ -23,68 +24,23 @@ simd_load(AOTCompContext *comp_ctx, if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, data_length))) { HANDLE_FAILURE("aot_check_memory_overflow"); - goto fail; + return NULL; } if (!(maddr = LLVMBuildBitCast(comp_ctx->builder, maddr, ptr_type, "data_ptr"))) { HANDLE_FAILURE("LLVMBuildBitCast"); - goto fail; + return NULL; } if (!(data = LLVMBuildLoad(comp_ctx->builder, maddr, "data"))) { HANDLE_FAILURE("LLVMBuildLoad"); - goto fail; + return NULL; } LLVMSetAlignment(data, 1); return data; -fail: - return NULL; -} - -/* data_length in bytes */ -static LLVMValueRef -simd_splat(AOTCompContext *comp_ctx, - AOTFuncContext *func_ctx, - LLVMValueRef element, - LLVMTypeRef vectory_type, - unsigned lane_count) -{ - LLVMValueRef undef, zeros, vector; - LLVMTypeRef zeros_type; - - if (!(undef = LLVMGetUndef(vectory_type))) { - HANDLE_FAILURE("LLVMGetUndef"); - goto fail; - } - - if (!(zeros_type = LLVMVectorType(I32_TYPE, lane_count))) { - HANDLE_FAILURE("LVMVectorType"); - goto fail; - } - - if (!(zeros = LLVMConstNull(zeros_type))) { - HANDLE_FAILURE("LLVMConstNull"); - goto fail; - } - - if (!(vector = LLVMBuildInsertElement(comp_ctx->builder, undef, element, - I32_ZERO, "base"))) { - HANDLE_FAILURE("LLVMBuildInsertElement"); - goto fail; - } - - if (!(vector = LLVMBuildShuffleVector(comp_ctx->builder, vector, undef, - zeros, "vector"))) { - HANDLE_FAILURE("LLVMBuildShuffleVector"); - goto fail; - } - - return vector; -fail: - return NULL; } bool @@ -97,40 +53,10 @@ aot_compile_simd_v128_load(AOTCompContext *comp_ctx, if (!(result = simd_load(comp_ctx, func_ctx, align, offset, 16, V128_PTR_TYPE))) { - goto fail; + return false; } PUSH_V128(result); - return true; -fail: - return false; -} - -bool -aot_compile_simd_v128_store(AOTCompContext *comp_ctx, - AOTFuncContext *func_ctx, - uint32 align, - uint32 offset) -{ - LLVMValueRef maddr, value, result; - - POP_V128(value); - - if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, 16))) - return false; - - if (!(maddr = LLVMBuildBitCast(comp_ctx->builder, maddr, V128_PTR_TYPE, - "data_ptr"))) { - HANDLE_FAILURE("LLVMBuildBitCast"); - goto fail; - } - - if (!(result = LLVMBuildStore(comp_ctx->builder, value, maddr))) { - HANDLE_FAILURE("LLVMBuildStore"); - goto fail; - } - - LLVMSetAlignment(result, 1); return true; fail: @@ -140,162 +66,272 @@ fail: bool aot_compile_simd_load_extend(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, - uint8 load_opcode, + uint8 opcode, uint32 align, uint32 offset) { LLVMValueRef sub_vector, result; - LLVMTypeRef sub_vector_type, vector_type; - bool is_signed; - uint32 data_length; - - switch (load_opcode) { - case SIMD_i16x8_load8x8_s: - case SIMD_i16x8_load8x8_u: - { - data_length = 8; - vector_type = V128_i16x8_TYPE; - is_signed = (load_opcode == SIMD_i16x8_load8x8_s); - - if (!(sub_vector_type = LLVMVectorType(INT8_TYPE, 8))) { - HANDLE_FAILURE("LLVMVectorType"); - goto fail; - } - - break; - } - case SIMD_i32x4_load16x4_s: - case SIMD_i32x4_load16x4_u: - { - data_length = 8; - vector_type = V128_i32x4_TYPE; - is_signed = (load_opcode == SIMD_i32x4_load16x4_s); - - if (!(sub_vector_type = LLVMVectorType(INT16_TYPE, 4))) { - HANDLE_FAILURE("LLVMVectorType"); - goto fail; - } - - break; - } - case SIMD_i64x2_load32x2_s: - case SIMD_i64x2_load32x2_u: - { - data_length = 8; - vector_type = V128_i64x2_TYPE; - is_signed = (load_opcode == SIMD_i64x2_load32x2_s); - - if (!(sub_vector_type = LLVMVectorType(I32_TYPE, 2))) { - HANDLE_FAILURE("LLVMVectorType"); - goto fail; - } - - break; - } - default: - { - bh_assert(0); - goto fail; - } - } + uint32 opcode_index = opcode - SIMD_v128_load8x8_s; + bool signeds[] = { true, false, true, false, true, false }; + LLVMTypeRef vector_types[] = { + V128_i16x8_TYPE, V128_i16x8_TYPE, V128_i32x4_TYPE, + V128_i32x4_TYPE, V128_i64x2_TYPE, V128_i64x2_TYPE, + }; + LLVMTypeRef sub_vector_types[] = { + LLVMVectorType(INT8_TYPE, 8), LLVMVectorType(INT8_TYPE, 8), + LLVMVectorType(INT16_TYPE, 4), LLVMVectorType(INT16_TYPE, 4), + LLVMVectorType(I32_TYPE, 2), LLVMVectorType(I32_TYPE, 2), + }; + LLVMTypeRef sub_vector_type = sub_vector_types[opcode_index]; /* to vector ptr type */ - if (!(sub_vector_type = LLVMPointerType(sub_vector_type, 0))) { + if (!sub_vector_type + || !(sub_vector_type = LLVMPointerType(sub_vector_type, 0))) { HANDLE_FAILURE("LLVMPointerType"); - goto fail; + return false; } - if (!(sub_vector = simd_load(comp_ctx, func_ctx, align, offset, - data_length, sub_vector_type))) { - goto fail; + if (!(sub_vector = simd_load(comp_ctx, func_ctx, align, offset, 8, + sub_vector_type))) { + return false; } - if (is_signed) { + if (signeds[opcode_index]) { if (!(result = LLVMBuildSExt(comp_ctx->builder, sub_vector, - vector_type, "vector"))) { + vector_types[opcode_index], "vector"))) { HANDLE_FAILURE("LLVMBuildSExt"); - goto fail; + return false; } } else { if (!(result = LLVMBuildZExt(comp_ctx->builder, sub_vector, - vector_type, "vector"))) { + vector_types[opcode_index], "vector"))) { HANDLE_FAILURE("LLVMBuildZExt"); - goto fail; + return false; } } - if (!(result = LLVMBuildBitCast(comp_ctx->builder, result, V128_i64x2_TYPE, - "result"))) { - HANDLE_FAILURE("LLVMBuildBitCast"); - goto fail; - } - - PUSH_V128(result); - return true; -fail: - return false; + return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result"); } bool aot_compile_simd_load_splat(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, - uint8 load_opcode, + uint8 opcode, uint32 align, uint32 offset) { + uint32 opcode_index = opcode - SIMD_v128_load8_splat; LLVMValueRef element, result; - LLVMTypeRef element_ptr_type, vector_type; - unsigned data_length, lane_count; + LLVMTypeRef element_ptr_types[] = { INT8_PTR_TYPE, INT16_PTR_TYPE, + INT32_PTR_TYPE, INT64_PTR_TYPE }; + uint32 data_lengths[] = { 1, 2, 4, 8 }; + LLVMValueRef undefs[] = { + LLVM_CONST(i8x16_undef), + LLVM_CONST(i16x8_undef), + LLVM_CONST(i32x4_undef), + LLVM_CONST(i64x2_undef), + }; + LLVMValueRef masks[] = { + LLVM_CONST(i32x16_zero), + LLVM_CONST(i32x8_zero), + LLVM_CONST(i32x4_zero), + LLVM_CONST(i32x2_zero), + }; - switch (load_opcode) { - case SIMD_v8x16_load_splat: - data_length = 1; - lane_count = 16; - element_ptr_type = INT8_PTR_TYPE; - vector_type = V128_i8x16_TYPE; - break; - case SIMD_v16x8_load_splat: - data_length = 2; - lane_count = 8; - element_ptr_type = INT16_PTR_TYPE; - vector_type = V128_i16x8_TYPE; - break; - case SIMD_v32x4_load_splat: - data_length = 4; - lane_count = 4; - element_ptr_type = INT32_PTR_TYPE; - vector_type = V128_i32x4_TYPE; - break; - case SIMD_v64x2_load_splat: - data_length = 8; - lane_count = 2; - element_ptr_type = INT64_PTR_TYPE; - vector_type = V128_i64x2_TYPE; - break; - default: - bh_assert(0); - goto fail; + if (!(element = simd_load(comp_ctx, func_ctx, align, offset, + data_lengths[opcode_index], + element_ptr_types[opcode_index]))) { + return false; } - if (!(element = simd_load(comp_ctx, func_ctx, align, offset, data_length, - element_ptr_type))) { - goto fail; + if (!(result = + LLVMBuildInsertElement(comp_ctx->builder, undefs[opcode_index], + element, I32_ZERO, "base"))) { + HANDLE_FAILURE("LLVMBuildInsertElement"); + return false; } - if (!(result = simd_splat(comp_ctx, func_ctx, element, vector_type, - lane_count))) { - goto fail; + if (!(result = LLVMBuildShuffleVector(comp_ctx->builder, result, + undefs[opcode_index], + masks[opcode_index], "vector"))) { + HANDLE_FAILURE("LLVMBuildShuffleVector"); + return false; } - if (!(result = LLVMBuildBitCast(comp_ctx->builder, result, V128_i64x2_TYPE, - "result"))) { + return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result"); +} + +bool +aot_compile_simd_load_lane(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + uint8 opcode, + uint32 align, + uint32 offset, + uint8 lane_id) +{ + LLVMValueRef element, vector; + uint32 opcode_index = opcode - SIMD_v128_load8_lane; + uint32 data_lengths[] = { 1, 2, 4, 8 }; + LLVMTypeRef element_ptr_types[] = { INT8_PTR_TYPE, INT16_PTR_TYPE, + INT32_PTR_TYPE, INT64_PTR_TYPE }; + LLVMTypeRef vector_types[] = { V128_i8x16_TYPE, V128_i16x8_TYPE, + V128_i32x4_TYPE, V128_i64x2_TYPE }; + LLVMValueRef lane = simd_lane_id_to_llvm_value(comp_ctx, lane_id); + + if (!(vector = simd_pop_v128_and_bitcast( + comp_ctx, func_ctx, vector_types[opcode_index], "src"))) { + return false; + } + + if (!(element = simd_load(comp_ctx, func_ctx, align, offset, + data_lengths[opcode_index], + element_ptr_types[opcode_index]))) { + return false; + } + + if (!(vector = LLVMBuildInsertElement(comp_ctx->builder, vector, element, + lane, "dst"))) { + HANDLE_FAILURE("LLVMBuildInsertElement"); + return false; + } + + return simd_bitcast_and_push_v128(comp_ctx, func_ctx, vector, "result"); +} + +bool +aot_compile_simd_load_zero(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + uint8 opcode, + uint32 align, + uint32 offset) +{ + LLVMValueRef element, result, mask; + uint32 opcode_index = opcode - SIMD_v128_load32_zero; + uint32 data_lengths[] = { 4, 8 }; + LLVMTypeRef element_ptr_types[] = { INT32_PTR_TYPE, INT64_PTR_TYPE }; + LLVMValueRef zero[] = { + LLVM_CONST(i32x4_vec_zero), + LLVM_CONST(i64x2_vec_zero), + }; + LLVMValueRef undef[] = { + LLVM_CONST(i32x4_undef), + LLVM_CONST(i64x2_undef), + }; + uint32 mask_length[] = { 4, 2 }; + LLVMValueRef mask_element[][4] = { + { LLVM_CONST(i32_zero), LLVM_CONST(i32_four), LLVM_CONST(i32_five), + LLVM_CONST(i32_six) }, + { LLVM_CONST(i32_zero), LLVM_CONST(i32_two) }, + }; + + if (!(element = simd_load(comp_ctx, func_ctx, align, offset, + data_lengths[opcode_index], + element_ptr_types[opcode_index]))) { + return false; + } + + if (!(result = + LLVMBuildInsertElement(comp_ctx->builder, undef[opcode_index], + element, I32_ZERO, "vector"))) { + HANDLE_FAILURE("LLVMBuildInsertElement"); + return false; + } + + /* fill in other lanes with zero */ + if (!(mask = LLVMConstVector(mask_element[opcode_index], + mask_length[opcode_index]))) { + HANDLE_FAILURE("LLConstVector"); + return false; + } + + if (!(result = LLVMBuildShuffleVector(comp_ctx->builder, result, + zero[opcode_index], mask, + "fill_in_zero"))) { + HANDLE_FAILURE("LLVMBuildShuffleVector"); + return false; + } + + return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result"); +} + +/* data_length in bytes */ +static bool +simd_store(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + uint32 align, + uint32 offset, + uint32 data_length, + LLVMValueRef value, + LLVMTypeRef value_ptr_type) +{ + LLVMValueRef maddr, result; + + if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, + data_length))) + return false; + + if (!(maddr = LLVMBuildBitCast(comp_ctx->builder, maddr, value_ptr_type, + "data_ptr"))) { HANDLE_FAILURE("LLVMBuildBitCast"); - goto fail; + return false; } - PUSH_V128(result); + if (!(result = LLVMBuildStore(comp_ctx->builder, value, maddr))) { + HANDLE_FAILURE("LLVMBuildStore"); + return false; + } + + LLVMSetAlignment(result, 1); + return true; +} + +bool +aot_compile_simd_v128_store(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + uint32 align, + uint32 offset) +{ + LLVMValueRef value; + + POP_V128(value); + + return simd_store(comp_ctx, func_ctx, align, offset, 16, value, + V128_PTR_TYPE); fail: return false; } + +bool +aot_compile_simd_store_lane(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + uint8 opcode, + uint32 align, + uint32 offset, + uint8 lane_id) +{ + LLVMValueRef element, vector; + uint32 data_lengths[] = { 1, 2, 4, 8 }; + LLVMTypeRef element_ptr_types[] = { INT8_PTR_TYPE, INT16_PTR_TYPE, + INT32_PTR_TYPE, INT64_PTR_TYPE }; + uint32 opcode_index = opcode - SIMD_v128_store8_lane; + LLVMTypeRef vector_types[] = { V128_i8x16_TYPE, V128_i16x8_TYPE, + V128_i32x4_TYPE, V128_i64x2_TYPE }; + LLVMValueRef lane = simd_lane_id_to_llvm_value(comp_ctx, lane_id); + + if (!(vector = simd_pop_v128_and_bitcast( + comp_ctx, func_ctx, vector_types[opcode_index], "src"))) { + return false; + } + + if (!(element = LLVMBuildExtractElement(comp_ctx->builder, vector, lane, + "element"))) { + HANDLE_FAILURE("LLVMBuildExtractElement"); + return false; + } + + return simd_store(comp_ctx, func_ctx, align, offset, + data_lengths[opcode_index], element, + element_ptr_types[opcode_index]); +} diff --git a/core/iwasm/compilation/simd/simd_load_store.h b/core/iwasm/compilation/simd/simd_load_store.h index dbf662ad1..bd3baf2ce 100644 --- a/core/iwasm/compilation/simd/simd_load_store.h +++ b/core/iwasm/compilation/simd/simd_load_store.h @@ -18,26 +18,49 @@ aot_compile_simd_v128_load(AOTCompContext *comp_ctx, uint32 align, uint32 offset); -bool -aot_compile_simd_v128_store(AOTCompContext *comp_ctx, - AOTFuncContext *func_ctx, - uint32 align, - uint32 offset); - bool aot_compile_simd_load_extend(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, - uint8 load_opcode, + uint8 opcode, uint32 align, uint32 offset); bool aot_compile_simd_load_splat(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, - uint8 load_opcode, + uint8 opcode, uint32 align, uint32 offset); +bool +aot_compile_simd_load_lane(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + uint8 opcode, + uint32 align, + uint32 offset, + uint8 lane_id); + +bool +aot_compile_simd_load_zero(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + uint8 opcode, + uint32 align, + uint32 offset); + +bool +aot_compile_simd_v128_store(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + uint32 align, + uint32 offset); + +bool +aot_compile_simd_store_lane(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + uint8 opcode, + uint32 align, + uint32 offset, + uint8 lane_id); + #ifdef __cplusplus } /* end of extern "C" */ #endif diff --git a/core/iwasm/compilation/simd/simd_sat_int_arith.c b/core/iwasm/compilation/simd/simd_sat_int_arith.c index 1f2b5b353..131dace17 100644 --- a/core/iwasm/compilation/simd/simd_sat_int_arith.c +++ b/core/iwasm/compilation/simd/simd_sat_int_arith.c @@ -9,46 +9,32 @@ #include "../../aot/aot_runtime.h" static bool -simd_v128_integer_arith(AOTCompContext *comp_ctx, - AOTFuncContext *func_ctx, - LLVMTypeRef vector_type, - char *intrinsics_s_u[2], - bool is_signed) +simd_sat_int_arith(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + LLVMTypeRef vector_type, + const char *intrinsics) { LLVMValueRef lhs, rhs, result; LLVMTypeRef param_types[2]; - if (!(rhs = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type, - "rhs"))) { - goto fail; - } - - if (!(lhs = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type, - "lhs"))) { - goto fail; + if (!(rhs = + simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type, "rhs")) + || !(lhs = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type, + "lhs"))) { + return false; } param_types[0] = vector_type; param_types[1] = vector_type; - if (!(result = aot_call_llvm_intrinsic( - comp_ctx, func_ctx, is_signed ? intrinsics_s_u[0] : intrinsics_s_u[1], - vector_type, param_types, 2, lhs, rhs))) { + if (!(result = + aot_call_llvm_intrinsic(comp_ctx, func_ctx, intrinsics, + vector_type, param_types, 2, lhs, rhs))) { HANDLE_FAILURE("LLVMBuildCall"); - goto fail; + return false; } - if (!(result = LLVMBuildBitCast(comp_ctx->builder, result, V128_i64x2_TYPE, - "ret"))) { - HANDLE_FAILURE("LLVMBuildBitCast"); - goto fail; - } - - /* push result into the stack */ - PUSH_V128(result); - return true; -fail: - return false; + return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result"); } bool @@ -57,27 +43,14 @@ aot_compile_simd_i8x16_saturate(AOTCompContext *comp_ctx, V128Arithmetic arith_op, bool is_signed) { - char *intrinsics[2] = { 0 }; - bool result = false; - switch (arith_op) { - case V128_ADD: - intrinsics[0] = "llvm.sadd.sat.v16i8"; - intrinsics[1] = "llvm.uadd.sat.v16i8"; - result = simd_v128_integer_arith( - comp_ctx, func_ctx, V128_i8x16_TYPE, intrinsics, is_signed); - break; - case V128_SUB: - intrinsics[0] = "llvm.ssub.sat.v16i8"; - intrinsics[1] = "llvm.usub.sat.v16i8"; - result = simd_v128_integer_arith( - comp_ctx, func_ctx, V128_i8x16_TYPE, intrinsics, is_signed); - break; - default: - bh_assert(0); - break; - } + char *intrinsics[][2] = { + { "llvm.sadd.sat.v16i8", "llvm.uadd.sat.v16i8" }, + { "llvm.ssub.sat.v16i8", "llvm.usub.sat.v16i8" }, + }; - return result; + return simd_sat_int_arith(comp_ctx, func_ctx, V128_i8x16_TYPE, + is_signed ? intrinsics[arith_op][0] + : intrinsics[arith_op][1]); } bool @@ -86,282 +59,28 @@ aot_compile_simd_i16x8_saturate(AOTCompContext *comp_ctx, V128Arithmetic arith_op, bool is_signed) { - char *intrinsics[2] = { 0 }; - bool result = false; - switch (arith_op) { - case V128_ADD: - intrinsics[0] = "llvm.sadd.sat.v8i16"; - intrinsics[1] = "llvm.uadd.sat.v8i16"; - result = simd_v128_integer_arith( - comp_ctx, func_ctx, V128_i16x8_TYPE, intrinsics, is_signed); - break; - case V128_SUB: - intrinsics[0] = "llvm.ssub.sat.v8i16"; - intrinsics[1] = "llvm.usub.sat.v8i16"; - result = simd_v128_integer_arith( - comp_ctx, func_ctx, V128_i16x8_TYPE, intrinsics, is_signed); - break; - default: - bh_assert(0); - break; - } + char *intrinsics[][2] = { + { "llvm.sadd.sat.v8i16", "llvm.uadd.sat.v8i16" }, + { "llvm.ssub.sat.v8i16", "llvm.usub.sat.v8i16" }, + }; - return result; -} - -static bool -simd_v128_cmp(AOTCompContext *comp_ctx, - AOTFuncContext *func_ctx, - LLVMTypeRef vector_type, - V128Arithmetic arith_op, - bool is_signed) -{ - LLVMValueRef lhs, rhs, result; - LLVMIntPredicate op; - - if (!(rhs = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type, - "rhs"))) { - goto fail; - } - - if (!(lhs = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type, - "lhs"))) { - goto fail; - } - - if (V128_MIN == arith_op) { - op = is_signed ? LLVMIntSLT : LLVMIntULT; - } - else { - op = is_signed ? LLVMIntSGT : LLVMIntUGT; - } - - if (!(result = LLVMBuildICmp(comp_ctx->builder, op, lhs, rhs, "cmp"))) { - HANDLE_FAILURE("LLVMBuildICmp"); - goto fail; - } - - if (!(result = - LLVMBuildSelect(comp_ctx->builder, result, lhs, rhs, "select"))) { - HANDLE_FAILURE("LLVMBuildSelect"); - goto fail; - } - - if (!(result = LLVMBuildBitCast(comp_ctx->builder, result, V128_i64x2_TYPE, - "ret"))) { - HANDLE_FAILURE("LLVMBuildBitCast"); - goto fail; - } - - /* push result into the stack */ - PUSH_V128(result); - return true; -fail: - return false; + return simd_sat_int_arith(comp_ctx, func_ctx, V128_i16x8_TYPE, + is_signed ? intrinsics[arith_op][0] + : intrinsics[arith_op][1]); } bool -aot_compile_simd_i8x16_cmp(AOTCompContext *comp_ctx, - AOTFuncContext *func_ctx, - V128Arithmetic arith_op, - bool is_signed) +aot_compile_simd_i32x4_saturate(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + V128Arithmetic arith_op, + bool is_signed) { - return simd_v128_cmp(comp_ctx, func_ctx, V128_i8x16_TYPE, arith_op, - is_signed); + char *intrinsics[][2] = { + { "llvm.sadd.sat.v4i32", "llvm.uadd.sat.v4i32" }, + { "llvm.ssub.sat.v4i32", "llvm.usub.sat.v4i32" }, + }; + + return simd_sat_int_arith(comp_ctx, func_ctx, V128_i16x8_TYPE, + is_signed ? intrinsics[arith_op][0] + : intrinsics[arith_op][1]); } - -bool -aot_compile_simd_i16x8_cmp(AOTCompContext *comp_ctx, - AOTFuncContext *func_ctx, - V128Arithmetic arith_op, - bool is_signed) -{ - return simd_v128_cmp(comp_ctx, func_ctx, V128_i16x8_TYPE, arith_op, - is_signed); -} - -bool -aot_compile_simd_i32x4_cmp(AOTCompContext *comp_ctx, - AOTFuncContext *func_ctx, - V128Arithmetic arith_op, - bool is_signed) -{ - return simd_v128_cmp(comp_ctx, func_ctx, V128_i32x4_TYPE, arith_op, - is_signed); -} - -static bool -simd_v128_abs(AOTCompContext *comp_ctx, - AOTFuncContext *func_ctx, - LLVMTypeRef vector_type) -{ - LLVMValueRef vector, negs, zeros, cond, result; - - if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type, - "vec"))) { - goto fail; - } - - if (!(negs = LLVMBuildNeg(comp_ctx->builder, vector, "neg"))) { - HANDLE_FAILURE("LLVMBuildNeg"); - goto fail; - } - - if (!(zeros = LLVMConstNull(vector_type))) { - HANDLE_FAILURE("LLVMConstNull"); - goto fail; - } - - if (!(cond = LLVMBuildICmp(comp_ctx->builder, LLVMIntSGE, vector, zeros, - "ge_zero"))) { - HANDLE_FAILURE("LLVMBuildICmp"); - goto fail; - } - - if (!(result = LLVMBuildSelect(comp_ctx->builder, cond, vector, negs, - "select"))) { - HANDLE_FAILURE("LLVMBuildSelect"); - goto fail; - } - - if (!(result = LLVMBuildBitCast(comp_ctx->builder, result, V128_i64x2_TYPE, - "ret"))) { - HANDLE_FAILURE("LLVMBuildBitCast"); - goto fail; - } - - /* push result into the stack */ - PUSH_V128(result); - return true; -fail: - return false; -} - -bool -aot_compile_simd_i8x16_abs(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) -{ - return simd_v128_abs(comp_ctx, func_ctx, V128_i8x16_TYPE); -} - -bool -aot_compile_simd_i16x8_abs(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) -{ - return simd_v128_abs(comp_ctx, func_ctx, V128_i16x8_TYPE); -} - -bool -aot_compile_simd_i32x4_abs(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) -{ - return simd_v128_abs(comp_ctx, func_ctx, V128_i32x4_TYPE); -} - -/* (v1 + v2 + 1) / 2 */ -static bool -simd_v128_avg(AOTCompContext *comp_ctx, - AOTFuncContext *func_ctx, - LLVMTypeRef vector_type, - LLVMTypeRef element_type, - unsigned lane_width) -{ - LLVMValueRef lhs, rhs, undef, zeros, ones, result; - LLVMTypeRef ext_type; - - if (!(rhs = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type, - "rhs"))) { - goto fail; - } - - if (!(lhs = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type, - "lhs"))) { - goto fail; - } - - if (!(ext_type = LLVMVectorType(I32_TYPE, lane_width))) { - HANDLE_FAILURE("LLVMVectorType"); - goto fail; - } - - if (!(lhs = LLVMBuildZExt(comp_ctx->builder, lhs, ext_type, "left_ext"))) { - HANDLE_FAILURE("LLVMBuildZExt"); - goto fail; - } - - if (!(rhs = - LLVMBuildZExt(comp_ctx->builder, rhs, ext_type, "right_ext"))) { - HANDLE_FAILURE("LLVMBuildZExt"); - goto fail; - } - - if (!(undef = LLVMGetUndef(ext_type))) { - HANDLE_FAILURE("LLVMGetUndef"); - goto fail; - } - - if (!(zeros = LLVMConstNull(ext_type))) { - HANDLE_FAILURE("LLVMConstNull"); - goto fail; - } - - if (!(ones = LLVMConstInt(I32_TYPE, 1, true))) { - HANDLE_FAILURE("LLVMConstInt"); - goto fail; - } - - if (!(ones = LLVMBuildInsertElement(comp_ctx->builder, undef, ones, - I32_ZERO, "base_ones"))) { - HANDLE_FAILURE("LLVMBuildInsertElement"); - goto fail; - } - - if (!(ones = LLVMBuildShuffleVector(comp_ctx->builder, ones, undef, zeros, - "ones"))) { - HANDLE_FAILURE("LLVMBuildShuffleVector"); - goto fail; - } - - if (!(result = LLVMBuildAdd(comp_ctx->builder, lhs, rhs, "a_add_b"))) { - HANDLE_FAILURE("LLVMBuildAdd"); - goto fail; - } - - if (!(result = LLVMBuildAdd(comp_ctx->builder, result, ones, "plus_1"))) { - HANDLE_FAILURE("LLVMBuildAdd"); - goto fail; - } - - if (!(result = LLVMBuildLShr(comp_ctx->builder, result, ones, "avg"))) { - HANDLE_FAILURE("LLVMBuildLShr"); - goto fail; - } - - if (!(result = LLVMBuildTrunc(comp_ctx->builder, result, vector_type, - "avg_trunc"))) { - HANDLE_FAILURE("LLVMBuildTrunc"); - goto fail; - } - - if (!(result = LLVMBuildBitCast(comp_ctx->builder, result, V128_i64x2_TYPE, - "ret"))) { - HANDLE_FAILURE("LLVMBuildBitCast"); - goto fail; - } - - /* push result into the stack */ - PUSH_V128(result); - return true; -fail: - return false; -} -bool -aot_compile_simd_i8x16_avgr_u(AOTCompContext *comp_ctx, - AOTFuncContext *func_ctx) -{ - return simd_v128_avg(comp_ctx, func_ctx, V128_i8x16_TYPE, INT8_TYPE, 16); -} - -bool -aot_compile_simd_i16x8_avgr_u(AOTCompContext *comp_ctx, - AOTFuncContext *func_ctx) -{ - return simd_v128_avg(comp_ctx, func_ctx, V128_i16x8_TYPE, INT16_TYPE, 8); -} \ No newline at end of file diff --git a/core/iwasm/compilation/simd/simd_sat_int_arith.h b/core/iwasm/compilation/simd/simd_sat_int_arith.h index 57669878e..9b10472a7 100644 --- a/core/iwasm/compilation/simd/simd_sat_int_arith.h +++ b/core/iwasm/compilation/simd/simd_sat_int_arith.h @@ -25,40 +25,10 @@ aot_compile_simd_i16x8_saturate(AOTCompContext *comp_ctx, bool is_signed); bool -aot_compile_simd_i8x16_cmp(AOTCompContext *comp_ctx, - AOTFuncContext *func_ctx, - V128Arithmetic arith_op, - bool is_signed); - -bool -aot_compile_simd_i16x8_cmp(AOTCompContext *comp_ctx, - AOTFuncContext *func_ctx, - V128Arithmetic arith_op, - bool is_signed); - -bool -aot_compile_simd_i32x4_cmp(AOTCompContext *comp_ctx, - AOTFuncContext *func_ctx, - V128Arithmetic arith_op, - bool is_signed); - -bool -aot_compile_simd_i8x16_abs(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx); - -bool -aot_compile_simd_i16x8_abs(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx); - -bool -aot_compile_simd_i32x4_abs(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx); - -bool -aot_compile_simd_i8x16_avgr_u(AOTCompContext *comp_ctx, - AOTFuncContext *func_ctx); - -bool -aot_compile_simd_i16x8_avgr_u(AOTCompContext *comp_ctx, - AOTFuncContext *func_ctx); - +aot_compile_simd_i32x4_saturate(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + V128Arithmetic arith_op, + bool is_signed); #ifdef __cplusplus } /* end of extern "C" */ #endif diff --git a/core/iwasm/interpreter/wasm_loader.c b/core/iwasm/interpreter/wasm_loader.c index 434f11a3c..06909a1ca 100644 --- a/core/iwasm/interpreter/wasm_loader.c +++ b/core/iwasm/interpreter/wasm_loader.c @@ -4053,44 +4053,35 @@ wasm_loader_find_block_addr(BlockAddr *block_addr_cache, #if (WASM_ENABLE_WAMR_COMPILER != 0) || (WASM_ENABLE_JIT != 0) case WASM_OP_SIMD_PREFIX: { + /* TODO: shall we ceate a table to be friendly to branch prediction */ opcode = read_uint8(p); - if (SIMD_i8x16_eq <= opcode - && opcode <= SIMD_f32x4_convert_i32x4_u) { - break; - } - + /* follow the order of enum WASMSimdEXTOpcode in wasm_opcode.h */ switch (opcode) { case SIMD_v128_load: - case SIMD_i16x8_load8x8_s: - case SIMD_i16x8_load8x8_u: - case SIMD_i32x4_load16x4_s: - case SIMD_i32x4_load16x4_u: - case SIMD_i64x2_load32x2_s: - case SIMD_i64x2_load32x2_u: - case SIMD_v8x16_load_splat: - case SIMD_v16x8_load_splat: - case SIMD_v32x4_load_splat: - case SIMD_v64x2_load_splat: + case SIMD_v128_load8x8_s: + case SIMD_v128_load8x8_u: + case SIMD_v128_load16x4_s: + case SIMD_v128_load16x4_u: + case SIMD_v128_load32x2_s: + case SIMD_v128_load32x2_u: + case SIMD_v128_load8_splat: + case SIMD_v128_load16_splat: + case SIMD_v128_load32_splat: + case SIMD_v128_load64_splat: case SIMD_v128_store: - skip_leb_uint32(p, p_end); /* align */ - skip_leb_uint32(p, p_end); /* offset */ + /* memarg align */ + skip_leb_uint32(p, p_end); + /* memarg offset*/ + skip_leb_uint32(p, p_end); break; case SIMD_v128_const: case SIMD_v8x16_shuffle: + /* immByte[16] immLaneId[16] */ CHECK_BUF1(p, p_end, 16); p += 16; break; - case SIMD_v8x16_swizzle: - case SIMD_i8x16_splat: - case SIMD_i16x8_splat: - case SIMD_i32x4_splat: - case SIMD_i64x2_splat: - case SIMD_f32x4_splat: - case SIMD_f64x2_splat: - break; - case SIMD_i8x16_extract_lane_s: case SIMD_i8x16_extract_lane_u: case SIMD_i8x16_replace_lane: @@ -4105,14 +4096,44 @@ wasm_loader_find_block_addr(BlockAddr *block_addr_cache, case SIMD_f32x4_replace_lane: case SIMD_f64x2_extract_lane: case SIMD_f64x2_replace_lane: + /* ImmLaneId */ CHECK_BUF(p, p_end, 1); p++; break; + case SIMD_v128_load8_lane: + case SIMD_v128_load16_lane: + case SIMD_v128_load32_lane: + case SIMD_v128_load64_lane: + case SIMD_v128_store8_lane: + case SIMD_v128_store16_lane: + case SIMD_v128_store32_lane: + case SIMD_v128_store64_lane: + /* memarg align */ + skip_leb_uint32(p, p_end); + /* memarg offset*/ + skip_leb_uint32(p, p_end); + /* ImmLaneId */ + CHECK_BUF(p, p_end, 1); + p++; + break; + + case SIMD_v128_load32_zero: + case SIMD_v128_load64_zero: + /* memarg align */ + skip_leb_uint32(p, p_end); + /* memarg offset*/ + skip_leb_uint32(p, p_end); + break; + default: - LOG_WARNING("WASM loader find block addr failed: " - "invalid opcode fd 0x%02x.", opcode); - return false; + /* + * since latest SIMD specific used almost every value + * from 0x00 to 0xff, the default branch will present all + * opcodes without imm + * https://github.com/WebAssembly/simd/blob/main/proposals/simd/NewOpcodes.md + */ + break; } break; } @@ -5685,9 +5706,25 @@ check_simd_memory_access_align(uint8 opcode, uint32 align, 4, /* store */ }; - bh_assert(opcode <= SIMD_v128_store); + uint8 mem_access_aligns_load_lane[] = { + 0, 1, 2, 3, /* load lane */ + 0, 1, 2, 3, /* store lane */ + 2, 3 /* store zero */ + }; - if (align > mem_access_aligns[opcode - SIMD_v128_load]) { + if (!((opcode <= SIMD_v128_store) + || (SIMD_v128_load8_lane <= opcode + && opcode <= SIMD_v128_load64_zero))) { + set_error_buf(error_buf, error_buf_size, + "the opcode doesn't include memarg"); + return false; + } + + if ((opcode <= SIMD_v128_store + && align > mem_access_aligns[opcode - SIMD_v128_load]) + || (SIMD_v128_load8_lane <= opcode && opcode <= SIMD_v128_load64_zero + && align > mem_access_aligns_load_lane[opcode + - SIMD_v128_load8_lane])) { set_error_buf(error_buf, error_buf_size, "alignment must not be larger than natural"); return false; @@ -5731,6 +5768,24 @@ check_simd_access_lane(uint8 opcode, uint8 lane, goto fail; } break; + + case SIMD_v128_load8_lane: + case SIMD_v128_load16_lane: + case SIMD_v128_load32_lane: + case SIMD_v128_load64_lane: + case SIMD_v128_store8_lane: + case SIMD_v128_store16_lane: + case SIMD_v128_store32_lane: + case SIMD_v128_store64_lane: + case SIMD_v128_load32_zero: + case SIMD_v128_load64_zero: + { + uint8 max_lanes[] = { 16, 8, 4, 2, 16, 8, 4, 2, 4, 2 }; + if (lane >= max_lanes[opcode - SIMD_v128_load8_lane]) { + goto fail; + } + break; + } default: goto fail; } @@ -8038,21 +8093,21 @@ fail_data_cnt_sec_require: #if (WASM_ENABLE_WAMR_COMPILER != 0) || (WASM_ENABLE_JIT != 0) case WASM_OP_SIMD_PREFIX: { - uint8 lane; - opcode = read_uint8(p); + /* follow the order of enum WASMSimdEXTOpcode in wasm_opcode.h */ switch (opcode) { + /* memory instruction */ case SIMD_v128_load: - case SIMD_i16x8_load8x8_s: - case SIMD_i16x8_load8x8_u: - case SIMD_i32x4_load16x4_s: - case SIMD_i32x4_load16x4_u: - case SIMD_i64x2_load32x2_s: - case SIMD_i64x2_load32x2_u: - case SIMD_v8x16_load_splat: - case SIMD_v16x8_load_splat: - case SIMD_v32x4_load_splat: - case SIMD_v64x2_load_splat: + case SIMD_v128_load8x8_s: + case SIMD_v128_load8x8_u: + case SIMD_v128_load16x4_s: + case SIMD_v128_load16x4_u: + case SIMD_v128_load32x2_s: + case SIMD_v128_load32x2_u: + case SIMD_v128_load8_splat: + case SIMD_v128_load16_splat: + case SIMD_v128_load32_splat: + case SIMD_v128_load64_splat: { CHECK_MEMORY(); @@ -8064,7 +8119,6 @@ fail_data_cnt_sec_require: read_leb_uint32(p, p_end, mem_offset); /* offset */ - /* pop(i32 %i), push(v128 *result) */ POP_AND_PUSH(VALUE_TYPE_I32, VALUE_TYPE_V128); break; } @@ -8081,18 +8135,19 @@ fail_data_cnt_sec_require: read_leb_uint32(p, p_end, mem_offset); /* offset */ - /* pop(v128 %value) */ POP_V128(); - /* pop(i32 %i) */ POP_I32(); break; } + /* basic operation */ case SIMD_v128_const: + { CHECK_BUF1(p, p_end, 16); p += 16; PUSH_V128(); break; + } case SIMD_v8x16_shuffle: { @@ -8111,122 +8166,87 @@ fail_data_cnt_sec_require: } case SIMD_v8x16_swizzle: + { POP2_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128); break; + } + /* splat operation */ case SIMD_i8x16_splat: case SIMD_i16x8_splat: case SIMD_i32x4_splat: - POP_AND_PUSH(VALUE_TYPE_I32, VALUE_TYPE_V128); - break; case SIMD_i64x2_splat: - POP_AND_PUSH(VALUE_TYPE_I64, VALUE_TYPE_V128); - break; case SIMD_f32x4_splat: - POP_AND_PUSH(VALUE_TYPE_F32, VALUE_TYPE_V128); - break; case SIMD_f64x2_splat: - POP_AND_PUSH(VALUE_TYPE_F64, VALUE_TYPE_V128); + { + uint8 pop_type[] = { VALUE_TYPE_I32, VALUE_TYPE_I32, + VALUE_TYPE_I32, VALUE_TYPE_I64, + VALUE_TYPE_F32, VALUE_TYPE_F64 }; + POP_AND_PUSH(pop_type[opcode - SIMD_i8x16_splat], + VALUE_TYPE_V128); break; + } + /* lane operation */ case SIMD_i8x16_extract_lane_s: case SIMD_i8x16_extract_lane_u: + case SIMD_i8x16_replace_lane: case SIMD_i16x8_extract_lane_s: case SIMD_i16x8_extract_lane_u: - case SIMD_i32x4_extract_lane: - CHECK_BUF(p, p_end, 1); - lane = read_uint8(p); - - if (!check_simd_access_lane(opcode, lane, error_buf, - error_buf_size)) { - goto fail; - } - - POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_I32); - break; - case SIMD_i64x2_extract_lane: - CHECK_BUF(p, p_end, 1); - lane = read_uint8(p); - - if (!check_simd_access_lane(opcode, lane, error_buf, - error_buf_size)) { - goto fail; - } - - POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_I64); - break; - case SIMD_f32x4_extract_lane: - CHECK_BUF(p, p_end, 1); - lane = read_uint8(p); - - if (!check_simd_access_lane(opcode, lane, error_buf, - error_buf_size)) { - goto fail; - } - - POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_F32); - break; - case SIMD_f64x2_extract_lane: - CHECK_BUF(p, p_end, 1); - lane = read_uint8(p); - - if (!check_simd_access_lane(opcode, lane, error_buf, - error_buf_size)) { - goto fail; - } - - POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_F64); - break; - case SIMD_i8x16_replace_lane: case SIMD_i16x8_replace_lane: + case SIMD_i32x4_extract_lane: case SIMD_i32x4_replace_lane: - CHECK_BUF(p, p_end, 1); - lane = read_uint8(p); - - if (!check_simd_access_lane(opcode, lane, error_buf, - error_buf_size)) { - goto fail; - } - - POP_I32(); - POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128); - break; + case SIMD_i64x2_extract_lane: case SIMD_i64x2_replace_lane: - CHECK_BUF(p, p_end, 1); - lane = read_uint8(p); - - if (!check_simd_access_lane(opcode, lane, error_buf, - error_buf_size)) { - goto fail; - } - - POP_I64(); - POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128); - break; + case SIMD_f32x4_extract_lane: case SIMD_f32x4_replace_lane: - CHECK_BUF(p, p_end, 1); - lane = read_uint8(p); - - if (!check_simd_access_lane(opcode, lane, error_buf, - error_buf_size)) { - goto fail; - } - - POP_F32(); - POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128); - break; + case SIMD_f64x2_extract_lane: case SIMD_f64x2_replace_lane: + { + uint8 lane; + /* clang-format off */ + uint8 replace[] = { + /*i8x16*/ 0x0, 0x0, VALUE_TYPE_I32, + /*i16x8*/ 0x0, 0x0, VALUE_TYPE_I32, + /*i32x4*/ 0x0, VALUE_TYPE_I32, + /*i64x2*/ 0x0, VALUE_TYPE_I64, + /*f32x4*/ 0x0, VALUE_TYPE_F32, + /*f64x2*/ 0x0, VALUE_TYPE_F64, + }; + uint8 push_type[] = { + /*i8x16*/ VALUE_TYPE_I32, VALUE_TYPE_I32, + VALUE_TYPE_V128, + /*i16x8*/ VALUE_TYPE_I32, VALUE_TYPE_I32, + VALUE_TYPE_V128, + /*i32x4*/ VALUE_TYPE_I32, VALUE_TYPE_V128, + /*i64x2*/ VALUE_TYPE_I64, VALUE_TYPE_V128, + /*f32x4*/ VALUE_TYPE_F32, VALUE_TYPE_V128, + /*f64x2*/ VALUE_TYPE_F64, VALUE_TYPE_V128, + }; + /* clang-format on */ + CHECK_BUF(p, p_end, 1); lane = read_uint8(p); - if (!check_simd_access_lane(opcode, lane, error_buf, error_buf_size)) { goto fail; } - POP_F64(); - POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128); + if (replace[opcode - SIMD_i8x16_extract_lane_s]) { + if (!(wasm_loader_pop_frame_ref( + loader_ctx, + replace[opcode - SIMD_i8x16_extract_lane_s], + error_buf, error_buf_size))) + goto fail; + } + + POP_AND_PUSH( + VALUE_TYPE_V128, + push_type[opcode - SIMD_i8x16_extract_lane_s]); break; + } + + /* i8x16 compare operation */ case SIMD_i8x16_eq: case SIMD_i8x16_ne: case SIMD_i8x16_lt_s: @@ -8237,6 +8257,7 @@ fail_data_cnt_sec_require: case SIMD_i8x16_le_u: case SIMD_i8x16_ge_s: case SIMD_i8x16_ge_u: + /* i16x8 compare operation */ case SIMD_i16x8_eq: case SIMD_i16x8_ne: case SIMD_i16x8_lt_s: @@ -8247,6 +8268,7 @@ fail_data_cnt_sec_require: case SIMD_i16x8_le_u: case SIMD_i16x8_ge_s: case SIMD_i16x8_ge_u: + /* i32x4 compare operation */ case SIMD_i32x4_eq: case SIMD_i32x4_ne: case SIMD_i32x4_lt_s: @@ -8257,122 +8279,318 @@ fail_data_cnt_sec_require: case SIMD_i32x4_le_u: case SIMD_i32x4_ge_s: case SIMD_i32x4_ge_u: + /* f32x4 compare operation */ case SIMD_f32x4_eq: case SIMD_f32x4_ne: case SIMD_f32x4_lt: case SIMD_f32x4_gt: case SIMD_f32x4_le: case SIMD_f32x4_ge: + /* f64x2 compare operation */ case SIMD_f64x2_eq: case SIMD_f64x2_ne: case SIMD_f64x2_lt: case SIMD_f64x2_gt: case SIMD_f64x2_le: case SIMD_f64x2_ge: + { POP2_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128); break; + } + + /* v128 operation */ + case SIMD_v128_not: + { + POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128); + break; + } + + case SIMD_v128_and: + case SIMD_v128_andnot: + case SIMD_v128_or: + case SIMD_v128_xor: + { + POP2_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128); + break; + } + + case SIMD_v128_bitselect: + { + POP_V128(); + POP2_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128); + break; + } + + case SIMD_v128_any_true: + { + POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_I32); + break; + } + + /* Load Lane Operation */ + case SIMD_v128_load8_lane: + case SIMD_v128_load16_lane: + case SIMD_v128_load32_lane: + case SIMD_v128_load64_lane: + case SIMD_v128_store8_lane: + case SIMD_v128_store16_lane: + case SIMD_v128_store32_lane: + case SIMD_v128_store64_lane: + { + uint8 lane; + + CHECK_MEMORY(); + + read_leb_uint32(p, p_end, align); /* align */ + if (!check_simd_memory_access_align( + opcode, align, error_buf, error_buf_size)) { + goto fail; + } + + read_leb_uint32(p, p_end, mem_offset); /* offset */ + + CHECK_BUF(p, p_end, 1); + lane = read_uint8(p); + if (!check_simd_access_lane(opcode, lane, error_buf, + error_buf_size)) { + goto fail; + } + + POP_V128(); + POP_I32(); + if (opcode < SIMD_v128_store8_lane) { + PUSH_V128(); + } + break; + } + + case SIMD_v128_load32_zero: + case SIMD_v128_load64_zero: + { + CHECK_MEMORY(); + + read_leb_uint32(p, p_end, align); /* align */ + if (!check_simd_memory_access_align( + opcode, align, error_buf, error_buf_size)) { + goto fail; + } + + read_leb_uint32(p, p_end, mem_offset); /* offset */ + + POP_AND_PUSH(VALUE_TYPE_I32, VALUE_TYPE_V128); + break; + } + + /* Float conversion */ + case SIMD_f32x4_demote_f64x2_zero: + case SIMD_f64x2_promote_low_f32x4_zero: + { + POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128); + break; + } + + /* i8x16 Operation */ + case SIMD_i8x16_abs: + case SIMD_i8x16_neg: + case SIMD_i8x16_popcnt: + { + POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128); + break; + } + + case SIMD_i8x16_all_true: + case SIMD_i8x16_bitmask: + { + POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_I32); + break; + } + + case SIMD_i8x16_narrow_i16x8_s: + case SIMD_i8x16_narrow_i16x8_u: + { + POP2_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128); + break; + } case SIMD_f32x4_ceil: case SIMD_f32x4_floor: case SIMD_f32x4_trunc: case SIMD_f32x4_nearest: - case SIMD_f64x2_ceil: - case SIMD_f64x2_floor: - case SIMD_f64x2_trunc: - case SIMD_f64x2_nearest: - case SIMD_v128_not: - case SIMD_i8x16_abs: - case SIMD_i8x16_neg: - case SIMD_i16x8_abs: - case SIMD_i16x8_neg: - case SIMD_i32x4_abs: - case SIMD_i32x4_neg: - case SIMD_i64x2_neg: - case SIMD_f32x4_abs: - case SIMD_f32x4_neg: - case SIMD_f32x4_sqrt: - case SIMD_f64x2_abs: - case SIMD_f64x2_neg: - case SIMD_f64x2_sqrt: - case SIMD_i16x8_widen_low_i8x16_s: - case SIMD_i16x8_widen_high_i8x16_s: - case SIMD_i16x8_widen_low_i8x16_u: - case SIMD_i16x8_widen_high_i8x16_u: - case SIMD_i32x4_widen_low_i16x8_s: - case SIMD_i32x4_widen_high_i16x8_s: - case SIMD_i32x4_widen_low_i16x8_u: - case SIMD_i32x4_widen_high_i16x8_u: - case SIMD_i32x4_trunc_sat_f32x4_s: - case SIMD_i32x4_trunc_sat_f32x4_u: - case SIMD_f32x4_convert_i32x4_s: - case SIMD_f32x4_convert_i32x4_u: + { POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128); break; - - case SIMD_v128_bitselect: - POP_V128(); - POP2_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128); - break; - - case SIMD_i8x16_any_true: - case SIMD_i8x16_all_true: - case SIMD_i8x16_bitmask: - case SIMD_i16x8_any_true: - case SIMD_i16x8_all_true: - case SIMD_i16x8_bitmask: - case SIMD_i32x4_any_true: - case SIMD_i32x4_all_true: - case SIMD_i32x4_bitmask: - POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_I32); - break; + } case SIMD_i8x16_shl: case SIMD_i8x16_shr_s: case SIMD_i8x16_shr_u: - case SIMD_i16x8_shl: - case SIMD_i16x8_shr_s: - case SIMD_i16x8_shr_u: - case SIMD_i32x4_shl: - case SIMD_i32x4_shr_s: - case SIMD_i32x4_shr_u: - case SIMD_i64x2_shl: - case SIMD_i64x2_shr_s: - case SIMD_i64x2_shr_u: + { POP_I32(); POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128); break; + } - case SIMD_i8x16_narrow_i16x8_s: - case SIMD_i8x16_narrow_i16x8_u: - case SIMD_i16x8_narrow_i32x4_s: - case SIMD_i16x8_narrow_i32x4_u: - case SIMD_v128_and: - case SIMD_v128_andnot: - case SIMD_v128_or: - case SIMD_v128_xor: case SIMD_i8x16_add: - case SIMD_i8x16_add_saturate_s: - case SIMD_i8x16_add_saturate_u: + case SIMD_i8x16_add_sat_s: + case SIMD_i8x16_add_sat_u: case SIMD_i8x16_sub: - case SIMD_i8x16_sub_saturate_s: - case SIMD_i8x16_sub_saturate_u: + case SIMD_i8x16_sub_sat_s: + case SIMD_i8x16_sub_sat_u: + { + POP2_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128); + break; + } + + case SIMD_f64x2_ceil: + case SIMD_f64x2_floor: + { + POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128); + break; + } + case SIMD_i8x16_min_s: case SIMD_i8x16_min_u: case SIMD_i8x16_max_s: case SIMD_i8x16_max_u: + { + POP2_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128); + break; + } + + case SIMD_f64x2_trunc: + { + POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128); + break; + } + case SIMD_i8x16_avgr_u: + { + POP2_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128); + break; + } + + case SIMD_i16x8_extadd_pairwise_i8x16_s: + case SIMD_i16x8_extadd_pairwise_i8x16_u: + case SIMD_i32x4_extadd_pairwise_i16x8_s: + case SIMD_i32x4_extadd_pairwise_i16x8_u: + /* i16x8 operation */ + case SIMD_i16x8_abs: + case SIMD_i16x8_neg: + { + POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128); + break; + } + + case SIMD_i16x8_q15mulr_sat_s: + { + POP2_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128); + break; + } + + case SIMD_i16x8_all_true: + case SIMD_i16x8_bitmask: + { + POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_I32); + break; + } + + case SIMD_i16x8_narrow_i32x4_s: + case SIMD_i16x8_narrow_i32x4_u: + { + POP2_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128); + break; + } + + case SIMD_i16x8_extend_low_i8x16_s: + case SIMD_i16x8_extend_high_i8x16_s: + case SIMD_i16x8_extend_low_i8x16_u: + case SIMD_i16x8_extend_high_i8x16_u: + { + POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128); + break; + } + + case SIMD_i16x8_shl: + case SIMD_i16x8_shr_s: + case SIMD_i16x8_shr_u: + { + POP_I32(); + POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128); + break; + } + case SIMD_i16x8_add: - case SIMD_i16x8_add_saturate_s: - case SIMD_i16x8_add_saturate_u: + case SIMD_i16x8_add_sat_s: + case SIMD_i16x8_add_sat_u: case SIMD_i16x8_sub: - case SIMD_i16x8_sub_saturate_s: - case SIMD_i16x8_sub_saturate_u: + case SIMD_i16x8_sub_sat_s: + case SIMD_i16x8_sub_sat_u: + { + POP2_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128); + break; + } + + case SIMD_f64x2_nearest: + { + POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128); + break; + } + case SIMD_i16x8_mul: case SIMD_i16x8_min_s: case SIMD_i16x8_min_u: case SIMD_i16x8_max_s: case SIMD_i16x8_max_u: case SIMD_i16x8_avgr_u: + case SIMD_i16x8_extmul_low_i8x16_s: + case SIMD_i16x8_extmul_high_i8x16_s: + case SIMD_i16x8_extmul_low_i8x16_u: + case SIMD_i16x8_extmul_high_i8x16_u: + { + POP2_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128); + break; + } + + /* i32x4 operation */ + case SIMD_i32x4_abs: + case SIMD_i32x4_neg: + { + POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128); + break; + } + + case SIMD_i32x4_all_true: + case SIMD_i32x4_bitmask: + { + POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_I32); + break; + } + + case SIMD_i32x4_narrow_i64x2_s: + case SIMD_i32x4_narrow_i64x2_u: + { + POP2_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128); + break; + } + + case SIMD_i32x4_extend_low_i16x8_s: + case SIMD_i32x4_extend_high_i16x8_s: + case SIMD_i32x4_extend_low_i16x8_u: + case SIMD_i32x4_extend_high_i16x8_u: + { + POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128); + break; + } + + case SIMD_i32x4_shl: + case SIMD_i32x4_shr_s: + case SIMD_i32x4_shr_u: + { + POP_I32(); + POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128); + break; + } + case SIMD_i32x4_add: case SIMD_i32x4_sub: case SIMD_i32x4_mul: @@ -8380,31 +8598,137 @@ fail_data_cnt_sec_require: case SIMD_i32x4_min_u: case SIMD_i32x4_max_s: case SIMD_i32x4_max_u: + case SIMD_i32x4_dot_i16x8_s: + case SIMD_i32x4_avgr_u: + case SIMD_i32x4_extmul_low_i16x8_s: + case SIMD_i32x4_extmul_high_i16x8_s: + case SIMD_i32x4_extmul_low_i16x8_u: + case SIMD_i32x4_extmul_high_i16x8_u: + { + POP2_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128); + break; + } + + /* i64x2 operation */ + case SIMD_i64x2_abs: + case SIMD_i64x2_neg: + { + POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128); + break; + } + + case SIMD_i64x2_all_true: + case SIMD_i64x2_bitmask: + { + POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_I32); + break; + } + + case SIMD_i64x2_extend_low_i32x4_s: + case SIMD_i64x2_extend_high_i32x4_s: + case SIMD_i64x2_extend_low_i32x4_u: + case SIMD_i64x2_extend_high_i32x4_u: + { + POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128); + break; + } + + case SIMD_i64x2_shl: + case SIMD_i64x2_shr_s: + case SIMD_i64x2_shr_u: + { + POP_I32(); + POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128); + break; + } + case SIMD_i64x2_add: case SIMD_i64x2_sub: case SIMD_i64x2_mul: + case SIMD_i64x2_eq: + case SIMD_i64x2_ne: + case SIMD_i64x2_lt_s: + case SIMD_i64x2_gt_s: + case SIMD_i64x2_le_s: + case SIMD_i64x2_ge_s: + case SIMD_i64x2_extmul_low_i32x4_s: + case SIMD_i64x2_extmul_high_i32x4_s: + case SIMD_i64x2_extmul_low_i32x4_u: + case SIMD_i64x2_extmul_high_i32x4_u: + { + POP2_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128); + break; + } + + /* f32x4 operation */ + case SIMD_f32x4_abs: + case SIMD_f32x4_neg: + case SIMD_f32x4_round: + case SIMD_f32x4_sqrt: + { + POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128); + break; + } + case SIMD_f32x4_add: case SIMD_f32x4_sub: case SIMD_f32x4_mul: case SIMD_f32x4_div: case SIMD_f32x4_min: case SIMD_f32x4_max: + case SIMD_f32x4_pmin: + case SIMD_f32x4_pmax: + { + POP2_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128); + break; + } + + /* f64x2 operation */ + case SIMD_f64x2_abs: + case SIMD_f64x2_neg: + case SIMD_f64x2_round: + case SIMD_f64x2_sqrt: + { + POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128); + break; + } + case SIMD_f64x2_add: case SIMD_f64x2_sub: case SIMD_f64x2_mul: case SIMD_f64x2_div: case SIMD_f64x2_min: case SIMD_f64x2_max: + case SIMD_f64x2_pmin: + case SIMD_f64x2_pmax: + { POP2_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128); break; + } + + case SIMD_i32x4_trunc_sat_f32x4_s: + case SIMD_i32x4_trunc_sat_f32x4_u: + case SIMD_f32x4_convert_i32x4_s: + case SIMD_f32x4_convert_i32x4_u: + case SIMD_i32x4_trunc_sat_f64x2_s_zero: + case SIMD_i32x4_trunc_sat_f64x2_u_zero: + case SIMD_f64x2_convert_low_i32x4_s: + case SIMD_f64x2_convert_low_i32x4_u: + { + POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128); + break; + } default: + { if (error_buf != NULL) { snprintf(error_buf, error_buf_size, - "WASM module load failed: " - "invalid opcode 0xfd %02x.", opcode); + "WASM module load failed: " + "invalid opcode 0xfd %02x.", + opcode); } goto fail; + } } break; } diff --git a/core/iwasm/interpreter/wasm_opcode.h b/core/iwasm/interpreter/wasm_opcode.h index 5f5508230..46fe5da81 100644 --- a/core/iwasm/interpreter/wasm_opcode.h +++ b/core/iwasm/interpreter/wasm_opcode.h @@ -296,18 +296,18 @@ typedef enum WASMMiscEXTOpcode { typedef enum WASMSimdEXTOpcode { /* memory instruction */ - SIMD_v128_load = 0x00, - SIMD_i16x8_load8x8_s = 0x01, - SIMD_i16x8_load8x8_u = 0x02, - SIMD_i32x4_load16x4_s = 0x03, - SIMD_i32x4_load16x4_u = 0x04, - SIMD_i64x2_load32x2_s = 0x05, - SIMD_i64x2_load32x2_u = 0x06, - SIMD_v8x16_load_splat = 0x07, - SIMD_v16x8_load_splat = 0x08, - SIMD_v32x4_load_splat = 0x09, - SIMD_v64x2_load_splat = 0x0a, - SIMD_v128_store = 0x0b, + SIMD_v128_load = 0x00, + SIMD_v128_load8x8_s = 0x01, + SIMD_v128_load8x8_u = 0x02, + SIMD_v128_load16x4_s = 0x03, + SIMD_v128_load16x4_u = 0x04, + SIMD_v128_load32x2_s = 0x05, + SIMD_v128_load32x2_u = 0x06, + SIMD_v128_load8_splat = 0x07, + SIMD_v128_load16_splat = 0x08, + SIMD_v128_load32_splat = 0x09, + SIMD_v128_load64_splat = 0x0a, + SIMD_v128_store = 0x0b, /* basic operation */ SIMD_v128_const = 0x0c, @@ -391,107 +391,170 @@ typedef enum WASMSimdEXTOpcode { SIMD_f64x2_ge = 0x4c, /* v128 operation */ - SIMD_v128_not = 0x4d, - SIMD_v128_and = 0x4e, - SIMD_v128_andnot = 0x4f, - SIMD_v128_or = 0x50, - SIMD_v128_xor = 0x51, + SIMD_v128_not = 0x4d, + SIMD_v128_and = 0x4e, + SIMD_v128_andnot = 0x4f, + SIMD_v128_or = 0x50, + SIMD_v128_xor = 0x51, SIMD_v128_bitselect = 0x52, + SIMD_v128_any_true = 0x53, + + /* Load Lane Operation */ + SIMD_v128_load8_lane = 0x54, + SIMD_v128_load16_lane = 0x55, + SIMD_v128_load32_lane = 0x56, + SIMD_v128_load64_lane = 0x57, + SIMD_v128_store8_lane = 0x58, + SIMD_v128_store16_lane = 0x59, + SIMD_v128_store32_lane = 0x5a, + SIMD_v128_store64_lane = 0x5b, + SIMD_v128_load32_zero = 0x5c, + SIMD_v128_load64_zero = 0x5d, + + /* Float conversion */ + SIMD_f32x4_demote_f64x2_zero = 0x5e, + SIMD_f64x2_promote_low_f32x4_zero = 0x5f, /* i8x16 Operation */ SIMD_i8x16_abs = 0x60, SIMD_i8x16_neg = 0x61, - SIMD_i8x16_any_true = 0x62, + SIMD_i8x16_popcnt = 0x62, SIMD_i8x16_all_true = 0x63, SIMD_i8x16_bitmask = 0x64, SIMD_i8x16_narrow_i16x8_s = 0x65, SIMD_i8x16_narrow_i16x8_u = 0x66, + SIMD_f32x4_ceil = 0x67, + SIMD_f32x4_floor = 0x68, + SIMD_f32x4_trunc = 0x69, + SIMD_f32x4_nearest = 0x6a, SIMD_i8x16_shl = 0x6b, SIMD_i8x16_shr_s = 0x6c, SIMD_i8x16_shr_u = 0x6d, SIMD_i8x16_add = 0x6e, - SIMD_i8x16_add_saturate_s = 0x6f, - SIMD_i8x16_add_saturate_u = 0x70, + SIMD_i8x16_add_sat_s = 0x6f, + SIMD_i8x16_add_sat_u = 0x70, SIMD_i8x16_sub = 0x71, - SIMD_i8x16_sub_saturate_s = 0x72, - SIMD_i8x16_sub_saturate_u = 0x73, + SIMD_i8x16_sub_sat_s = 0x72, + SIMD_i8x16_sub_sat_u = 0x73, + SIMD_f64x2_ceil = 0x74, + SIMD_f64x2_floor = 0x75, SIMD_i8x16_min_s = 0x76, SIMD_i8x16_min_u = 0x77, SIMD_i8x16_max_s = 0x78, SIMD_i8x16_max_u = 0x79, + SIMD_f64x2_trunc = 0x7a, SIMD_i8x16_avgr_u = 0x7b, + SIMD_i16x8_extadd_pairwise_i8x16_s = 0x7c, + SIMD_i16x8_extadd_pairwise_i8x16_u = 0x7d, + SIMD_i32x4_extadd_pairwise_i16x8_s = 0x7e, + SIMD_i32x4_extadd_pairwise_i16x8_u = 0x7f, /* i16x8 operation */ SIMD_i16x8_abs = 0x80, SIMD_i16x8_neg = 0x81, - SIMD_i16x8_any_true = 0x82, + SIMD_i16x8_q15mulr_sat_s = 0x82, SIMD_i16x8_all_true = 0x83, SIMD_i16x8_bitmask = 0x84, SIMD_i16x8_narrow_i32x4_s = 0x85, SIMD_i16x8_narrow_i32x4_u = 0x86, - SIMD_i16x8_widen_low_i8x16_s = 0x87, - SIMD_i16x8_widen_high_i8x16_s = 0x88, - SIMD_i16x8_widen_low_i8x16_u = 0x89, - SIMD_i16x8_widen_high_i8x16_u = 0x8a, + SIMD_i16x8_extend_low_i8x16_s = 0x87, + SIMD_i16x8_extend_high_i8x16_s = 0x88, + SIMD_i16x8_extend_low_i8x16_u = 0x89, + SIMD_i16x8_extend_high_i8x16_u = 0x8a, SIMD_i16x8_shl = 0x8b, SIMD_i16x8_shr_s = 0x8c, SIMD_i16x8_shr_u = 0x8d, SIMD_i16x8_add = 0x8e, - SIMD_i16x8_add_saturate_s = 0x8f, - SIMD_i16x8_add_saturate_u = 0x90, + SIMD_i16x8_add_sat_s = 0x8f, + SIMD_i16x8_add_sat_u = 0x90, SIMD_i16x8_sub = 0x91, - SIMD_i16x8_sub_saturate_s = 0x92, - SIMD_i16x8_sub_saturate_u = 0x93, + SIMD_i16x8_sub_sat_s = 0x92, + SIMD_i16x8_sub_sat_u = 0x93, + SIMD_f64x2_nearest = 0x94, SIMD_i16x8_mul = 0x95, SIMD_i16x8_min_s = 0x96, SIMD_i16x8_min_u = 0x97, SIMD_i16x8_max_s = 0x98, SIMD_i16x8_max_u = 0x99, + /* placeholder = 0x9a */ SIMD_i16x8_avgr_u = 0x9b, + SIMD_i16x8_extmul_low_i8x16_s = 0x9c, + SIMD_i16x8_extmul_high_i8x16_s = 0x9d, + SIMD_i16x8_extmul_low_i8x16_u = 0x9e, + SIMD_i16x8_extmul_high_i8x16_u = 0x9f, /* i32x4 operation */ SIMD_i32x4_abs = 0xa0, SIMD_i32x4_neg = 0xa1, - SIMD_i32x4_any_true = 0xa2, + /* placeholder = 0xa2 */ SIMD_i32x4_all_true = 0xa3, SIMD_i32x4_bitmask = 0xa4, - SIMD_i32x4_widen_low_i16x8_s = 0xa7, - SIMD_i32x4_widen_high_i16x8_s = 0xa8, - SIMD_i32x4_widen_low_i16x8_u = 0xa9, - SIMD_i32x4_widen_high_i16x8_u = 0xaa, + SIMD_i32x4_narrow_i64x2_s = 0xa5, + SIMD_i32x4_narrow_i64x2_u = 0xa6, + SIMD_i32x4_extend_low_i16x8_s = 0xa7, + SIMD_i32x4_extend_high_i16x8_s = 0xa8, + SIMD_i32x4_extend_low_i16x8_u = 0xa9, + SIMD_i32x4_extend_high_i16x8_u = 0xaa, SIMD_i32x4_shl = 0xab, SIMD_i32x4_shr_s = 0xac, SIMD_i32x4_shr_u = 0xad, SIMD_i32x4_add = 0xae, + SIMD_i32x4_add_sat_s = 0xaf, + SIMD_i32x4_add_sat_u = 0xb0, SIMD_i32x4_sub = 0xb1, + SIMD_i32x4_sub_sat_s = 0xb2, + SIMD_i32x4_sub_sat_u = 0xb3, + /* placeholder = 0xb4 */ SIMD_i32x4_mul = 0xb5, SIMD_i32x4_min_s = 0xb6, SIMD_i32x4_min_u = 0xb7, SIMD_i32x4_max_s = 0xb8, SIMD_i32x4_max_u = 0xb9, + SIMD_i32x4_dot_i16x8_s = 0xba, + SIMD_i32x4_avgr_u = 0xbb, + SIMD_i32x4_extmul_low_i16x8_s = 0xbc, + SIMD_i32x4_extmul_high_i16x8_s = 0xbd, + SIMD_i32x4_extmul_low_i16x8_u = 0xbe, + SIMD_i32x4_extmul_high_i16x8_u = 0xbf, /* i64x2 operation */ - SIMD_i64x2_neg = 0xc1, - SIMD_i64x2_shl = 0xcb, - SIMD_i64x2_shr_s = 0xcc, - SIMD_i64x2_shr_u = 0xcd, - SIMD_i64x2_add = 0xce, - SIMD_i64x2_sub = 0xd1, - SIMD_i64x2_mul = 0xd5, - - /* float ceil/floor/trunc/nearest */ - SIMD_f32x4_ceil = 0xd8, - SIMD_f32x4_floor = 0xd9, - SIMD_f32x4_trunc = 0xda, - SIMD_f32x4_nearest = 0xdb, - SIMD_f64x2_ceil = 0xdc, - SIMD_f64x2_floor = 0xdd, - SIMD_f64x2_trunc = 0xde, - SIMD_f64x2_nearest = 0xdf, + SIMD_i64x2_abs = 0xc0, + SIMD_i64x2_neg = 0xc1, + /* placeholder = 0xc2 */ + SIMD_i64x2_all_true = 0xc3, + SIMD_i64x2_bitmask = 0xc4, + /* placeholder = 0xc5 */ + /* placeholder = 0xc6 */ + SIMD_i64x2_extend_low_i32x4_s = 0xc7, + SIMD_i64x2_extend_high_i32x4_s = 0xc8, + SIMD_i64x2_extend_low_i32x4_u = 0xc9, + SIMD_i64x2_extend_high_i32x4_u = 0xca, + SIMD_i64x2_shl = 0xcb, + SIMD_i64x2_shr_s = 0xcc, + SIMD_i64x2_shr_u = 0xcd, + SIMD_i64x2_add = 0xce, + /* placeholder = 0xcf */ + /* placeholder = 0xd0 */ + SIMD_i64x2_sub = 0xd1, + /* placeholder = 0xd2 */ + /* placeholder = 0xd3 */ + /* placeholder = 0xd4 */ + SIMD_i64x2_mul = 0xd5, + SIMD_i64x2_eq = 0xd6, + SIMD_i64x2_ne = 0xd7, + SIMD_i64x2_lt_s = 0xd8, + SIMD_i64x2_gt_s = 0xd9, + SIMD_i64x2_le_s = 0xda, + SIMD_i64x2_ge_s = 0xdb, + SIMD_i64x2_extmul_low_i32x4_s = 0xdc, + SIMD_i64x2_extmul_high_i32x4_s = 0xdd, + SIMD_i64x2_extmul_low_i32x4_u = 0xde, + SIMD_i64x2_extmul_high_i32x4_u = 0xdf, /* f32x4 operation */ SIMD_f32x4_abs = 0xe0, SIMD_f32x4_neg = 0xe1, + SIMD_f32x4_round = 0xe2, SIMD_f32x4_sqrt = 0xe3, SIMD_f32x4_add = 0xe4, SIMD_f32x4_sub = 0xe5, @@ -499,10 +562,13 @@ typedef enum WASMSimdEXTOpcode { SIMD_f32x4_div = 0xe7, SIMD_f32x4_min = 0xe8, SIMD_f32x4_max = 0xe9, + SIMD_f32x4_pmin = 0xea, + SIMD_f32x4_pmax = 0xeb, /* f64x2 operation */ SIMD_f64x2_abs = 0xec, SIMD_f64x2_neg = 0xed, + SIMD_f64x2_round = 0xee, SIMD_f64x2_sqrt = 0xef, SIMD_f64x2_add = 0xf0, SIMD_f64x2_sub = 0xf1, @@ -510,12 +576,18 @@ typedef enum WASMSimdEXTOpcode { SIMD_f64x2_div = 0xf3, SIMD_f64x2_min = 0xf4, SIMD_f64x2_max = 0xf5, + SIMD_f64x2_pmin = 0xf6, + SIMD_f64x2_pmax = 0xf7, /* conversion operation */ - SIMD_i32x4_trunc_sat_f32x4_s = 0xf8, - SIMD_i32x4_trunc_sat_f32x4_u = 0xf9, - SIMD_f32x4_convert_i32x4_s = 0xfa, - SIMD_f32x4_convert_i32x4_u = 0xfb, + SIMD_i32x4_trunc_sat_f32x4_s = 0xf8, + SIMD_i32x4_trunc_sat_f32x4_u = 0xf9, + SIMD_f32x4_convert_i32x4_s = 0xfa, + SIMD_f32x4_convert_i32x4_u = 0xfb, + SIMD_i32x4_trunc_sat_f64x2_s_zero = 0xfc, + SIMD_i32x4_trunc_sat_f64x2_u_zero = 0xfd, + SIMD_f64x2_convert_low_i32x4_s = 0xfe, + SIMD_f64x2_convert_low_i32x4_u = 0xff, } WASMSimdEXTOpcode; typedef enum WASMAtomicEXTOpcode { diff --git a/doc/build_wamr.md b/doc/build_wamr.md index 9587b439b..f11c9e640 100644 --- a/doc/build_wamr.md +++ b/doc/build_wamr.md @@ -258,6 +258,7 @@ Then build the source codes: ``` Bash cd core/deps/ git clone https://github.com/nodejs/uvwasi.git + cd product-mini/platforms/windows/ mkdir build cd build diff --git a/doc/build_wasm_app.md b/doc/build_wasm_app.md index 75345a384..2ceb51f9c 100644 --- a/doc/build_wasm_app.md +++ b/doc/build_wasm_app.md @@ -6,6 +6,11 @@ For C and C++, WASI-SDK version 12.0+ is the major tool supported by WAMR to bui To install WASI SDK, please download the [wasi-sdk release](https://github.com/CraneStation/wasi-sdk/releases) and extract the archive to default path `/opt/wasi-sdk`. +The offical *wasi-sdk release* doesn't fully support *latest 128-bit SIMD spec* yet. WARM provides a script in [build-wasi-sdk](../test-tools/build-wasi-sdk/) to generate +another wasi-sdk with *llvm-13* from source code and installs it at *../test-tools/wasi-sdk*. If you plan to build WASM applications with *latest 128-bit SIMD*, please use it instead of the offical release. + +And [sample workloads](../samples/workload) are using the self-compiled wasi-sdk. + For [AssemblyScript](https://github.com/AssemblyScript/assemblyscript), please refer to [AssemblyScript quick start](https://www.assemblyscript.org/quick-start.html) and [AssemblyScript compiler](https://www.assemblyscript.org/compiler.html#command-line-options) for how to install `asc` compiler and build WASM applications. For Rust, please firstly ref to [Install Rust and Cargo](https://doc.rust-lang.org/cargo/getting-started/installation.html) to install cargo, rustc and rustup, by default they are installed under ~/.cargo/bin, and then run `rustup target add wasm32-wasi` to install wasm32-wasi target for Rust toolchain. To build WASM applications, we can run `cargo build --target wasm32-wasi`, the output files are under `target/wasm32-wasi`. diff --git a/product-mini/platforms/android/build_llvm.sh b/product-mini/platforms/android/build_llvm.sh index d58579264..dd4fc0907 100755 --- a/product-mini/platforms/android/build_llvm.sh +++ b/product-mini/platforms/android/build_llvm.sh @@ -1,43 +1,6 @@ #!/bin/sh -# Copyright (C) 2019 Intel Corporation. All rights reserved. +# Copyright (C) 2020 Intel Corporation. All rights reserved. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -DEPS_DIR=${PWD}/../../../core/deps - -cd ${DEPS_DIR} -if [ ! -d "llvm" ]; then - echo "Clone llvm to core/deps/ .." - git clone https://github.com/llvm-mirror/llvm.git -fi - -cd llvm -mkdir -p build -cd build - -if [ ! -f bin/llvm-lto ]; then - - CORE_NUM=$(nproc --all) - if [ -z "${CORE_NUM}" ]; then - CORE_NUM=1 - fi - - echo "Build llvm with" ${CORE_NUM} "cores" - - cmake .. \ - -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \ - -DCMAKE_BUILD_TYPE:STRING="Release" \ - -DLLVM_BUILD_LLVM_DYLIB:BOOL=OFF \ - -DLLVM_OPTIMIZED_TABLEGEN:BOOL=ON \ - -DLLVM_INCLUDE_EXAMPLES:BOOL=OFF \ - -DLLVM_INCLUDE_TESTS:BOOL=OFF \ - -DLLVM_INCLUDE_BENCHMARKS:BOOL=OFF \ - -DLLVM_APPEND_VC_REV:BOOL=OFF - make -j ${CORE_NUM} - -else - echo "llvm has already been built" -fi - -cd ${PWD} - +/usr/bin/env python3 ../../../build-scripts/build_llvm.py --platform android diff --git a/product-mini/platforms/darwin/build_jit.sh b/product-mini/platforms/darwin/build_jit.sh new file mode 100755 index 000000000..908d1560c --- /dev/null +++ b/product-mini/platforms/darwin/build_jit.sh @@ -0,0 +1,10 @@ +#!/bin/sh + +# Copyright (C) 2019 Intel Corporation. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +rm -fr build && mkdir build +cd build +cmake .. -DWAMR_BUILD_JIT=1 +make +cd .. diff --git a/product-mini/platforms/darwin/build_llvm.sh b/product-mini/platforms/darwin/build_llvm.sh new file mode 100755 index 000000000..f037b4ff6 --- /dev/null +++ b/product-mini/platforms/darwin/build_llvm.sh @@ -0,0 +1,6 @@ +#!/bin/sh + +# Copyright (C) 2020 Intel Corporation. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +/usr/bin/env python3 ../../../build-scripts/build_llvm.py --platform darwin diff --git a/product-mini/platforms/linux/build_llvm.sh b/product-mini/platforms/linux/build_llvm.sh index f4e4ee533..70371529f 100755 --- a/product-mini/platforms/linux/build_llvm.sh +++ b/product-mini/platforms/linux/build_llvm.sh @@ -3,44 +3,4 @@ # Copyright (C) 2020 Intel Corporation. All rights reserved. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -DEPS_DIR=${PWD}/../../../core/deps - -cd ${DEPS_DIR} -if [ ! -d "llvm" ]; then - echo "Clone llvm to core/deps/ .." - git clone --depth 1 --branch release/11.x https://github.com/llvm/llvm-project.git llvm -fi - -cd llvm -mkdir -p build -cd build - -if [ ! -f bin/llvm-lto ]; then - - CORE_NUM=$(nproc --all) - if [ -z "${CORE_NUM}" ]; then - CORE_NUM=1 - fi - - echo "Build llvm with" ${CORE_NUM} "cores" - - cmake ../llvm \ - -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \ - -DCMAKE_BUILD_TYPE:STRING="Release" \ - -DLLVM_TARGETS_TO_BUILD:STRING="X86;ARM;AArch64;Mips" \ - -DLLVM_BUILD_LLVM_DYLIB:BOOL=OFF \ - -DLLVM_OPTIMIZED_TABLEGEN:BOOL=ON \ - -DLLVM_ENABLE_ZLIB:BOOL=OFF \ - -DLLVM_INCLUDE_DOCS:BOOL=OFF \ - -DLLVM_INCLUDE_EXAMPLES:BOOL=OFF \ - -DLLVM_INCLUDE_TESTS:BOOL=OFF \ - -DLLVM_INCLUDE_BENCHMARKS:BOOL=OFF \ - -DLLVM_APPEND_VC_REV:BOOL=OFF - make -j ${CORE_NUM} - -else - echo "llvm has already been built" -fi - -cd ${PWD} - +/usr/bin/env python3 ../../../build-scripts/build_llvm.py diff --git a/product-mini/platforms/windows/build_llvm.py b/product-mini/platforms/windows/build_llvm.py index f476e9e6c..9325a0209 100644 --- a/product-mini/platforms/windows/build_llvm.py +++ b/product-mini/platforms/windows/build_llvm.py @@ -1,69 +1,16 @@ +#!/usr/bin/env python3 # # Copyright (C) 2019 Intel Corporation. All rights reserved. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception # -#!/usr/bin/env python3 -import os +import pathlib +import subprocess import sys -from pathlib import Path -def clone_llvm(): - llvm_dir = Path("llvm") - if(llvm_dir.exists() == False): - print("Clone llvm to core/deps/ ..") - for line in os.popen("git clone --branch release/11.x https://github.com/llvm/llvm-project.git llvm"): - print(line) - else: - print("llvm source codes already existed") - return llvm_dir - -def main(): - current_os = sys.platform - print("current OS is ", current_os) - - current_dir = Path.cwd() - deps_dir = current_dir.joinpath( "../../../core/deps") - - os.chdir(deps_dir) - llvm_dir = clone_llvm() - os.chdir(llvm_dir) - - build_dir_name = "win32build" - llvm_file = "LLVM.sln" - - Path(build_dir_name).mkdir(exist_ok = True) - build_dir = Path(build_dir_name) - os.chdir(build_dir) - - if ( not Path(llvm_file).exists()): - core_number = os.cpu_count() - print("Build llvm with", core_number, " cores") - cmd = 'cmake ../llvm \ - -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \ - -DCMAKE_BUILD_TYPE:STRING="Release" \ - -DLLVM_TARGETS_TO_BUILD:STRING="X86;ARM;AArch64;Mips" \ - -DLLVM_INCLUDE_GO_TESTS=OFF \ - -DLLVM_INCLUDE_TOOLS=OFF \ - -DLLVM_INCLUDE_UTILS=OFF \ - -DLLVM_ENABLE_TERMINFO=OFF \ - -DLLVM_BUILD_LLVM_DYLIB:BOOL=OFF \ - -DLLVM_OPTIMIZED_TABLEGEN:BOOL=ON \ - -DLLVM_ENABLE_ZLIB:BOOL=OFF \ - -DLLVM_INCLUDE_DOCS:BOOL=OFF \ - -DLLVM_INCLUDE_EXAMPLES:BOOL=OFF \ - -DLLVM_INCLUDE_TESTS:BOOL=OFF \ - -DLLVM_INCLUDE_BENCHMARKS:BOOL=OFF \ - -DLLVM_APPEND_VC_REV:BOOL=OFF' - print(cmd) - for line in os.popen(cmd): - print(line) - else: - print("llvm has already been Cmaked") - - print("Please open LLVM.sln in {} to build *Release* version".format(build_dir.absolute())) - - os.chdir(current_dir) - -if __name__ == "__main__": - main() +script = ( + pathlib.Path(__file__) + .parent.joinpath("../../../build-scripts/build_llvm.py") + .resolve() +) +subprocess.check_call([sys.executable, script]) diff --git a/samples/workload/README.md b/samples/workload/README.md index d93ef805e..dd94e05ca 100644 --- a/samples/workload/README.md +++ b/samples/workload/README.md @@ -1,5 +1,5 @@ All workloads have similar requirment of software dependencies, including -**wasi-sdk**, **emsdk**, **wabt** and **binaryen** +**emsdk**, **wabt** and **binaryen** > There might be slight differences when using MacOS and other Linux distro than Ubuntu. This document only target Ubuntu 18.04 as example. @@ -10,16 +10,6 @@ use [preparation.sh](./preparation.sh) to install all dependencies before compil for details, the script includes below steps: -- **wasi-sdk**. Install - [latest release](https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-12/wasi-sdk-12.0-linux.tar.gz) - to */opt/wasi-sdk* - -``` bash -$ wget https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-${WASI_SDK_VER}/${WASI_SDK_FILE} -$ tar zxf ${WASI_SDK_FILE} -C /opt -$ ln -sf /opt/wasi-sdk-${WASI_SDK_VER}.0 /opt/wasi-sdk -``` - - **wabt**. Install [latest release](https://github.com/WebAssembly/wabt/releases/download/1.0.23/wabt-1.0.23-ubuntu.tar.gz) to */opt/wabt* @@ -32,15 +22,15 @@ $ ln -sf /opt/wabt-${WABT_VER} /opt/wabt - **emsdk**. Refer to [the guide](https://emscripten.org/docs/getting_started/downloads.html). Don't forget to activate emsdk and set up environment variables. Verify it with `echo ${EMSDK}`. Please be sure to install and activate the building - of 2.0.12 + of 2.0.26 ``` bash $ cd /opt $ git clone https://github.com/emscripten-core/emsdk.git $ cd emsdk $ git pull -$ ./emsdk install 2.0.12 -$ ./emsdk activate 2.0.12 +$ ./emsdk install 2.0.26 +$ ./emsdk activate 2.0.26 $ echo "source /opt/emsdk/emsdk_env.sh" >> "${HOME}"/.bashrc ``` diff --git a/samples/workload/XNNPACK/CMakeLists.txt b/samples/workload/XNNPACK/CMakeLists.txt index 4a71e6127..054d32699 100644 --- a/samples/workload/XNNPACK/CMakeLists.txt +++ b/samples/workload/XNNPACK/CMakeLists.txt @@ -6,85 +6,87 @@ cmake_minimum_required (VERSION 3.0) project(xnnpack_wasm) ################ EMCC ################ -if(NOT DEFINED ENV{EMSDK}) - message(SEND_ERROR - "can not find emsdk. " - "please refer to https://emscripten.org/docs/getting_started/downloads.html " - "and install it, " - "or active emsdk by 'source ./emsdk_env.sh'" - ) -endif() - include(ExternalProject) ExternalProject_Add(xnnpack PREFIX xnnpack GIT_REPOSITORY https://github.com/google/XNNPACK.git - GIT_TAG 90f520b6482bb99ac1bbfb71be1382f6c9b83241 + GIT_TAG master GIT_PROGRESS ON SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/xnnpack UPDATE_COMMAND git checkout . + && cmake -E copy ${CMAKE_CURRENT_SOURCE_DIR}/benchmark.patch ${CMAKE_CURRENT_SOURCE_DIR}/xnnpack/third_party && git apply ${CMAKE_CURRENT_SOURCE_DIR}/xnnpack.patch CONFIGURE_COMMAND "" + # grep xnnpack_benchmark -A 1 BUILD.bazel \ + # | grep "name =" \ + # | awk '{print $3}' \ + # | sed -e 's/\"//g' -e 's/,//g' -e 's/^/\/\/:/g' BUILD_COMMAND cd ${CMAKE_CURRENT_SOURCE_DIR}/xnnpack && bazel --output_user_root=build_user_output build -c opt --config=wasm - //:qs8_gemm_bench.wasm - //:qs8_requantization_bench.wasm - //:qu8_gemm_bench.wasm - //:qu8_requantization_bench.wasm - //:f16_igemm_bench.wasm - //:f16_gemm_bench.wasm - //:f16_spmm_bench.wasm - //:f32_igemm_bench.wasm - //:f16_relu_bench.wasm - //:f32_conv_hwc_bench.wasm - //:f32_conv_hwc2chw_bench.wasm - //:f16_dwconv_bench.wasm - //:f32_dwconv_bench.wasm - //:f32_dwconv2d_chw_bench.wasm - //:f32_gemm_bench.wasm - //:f32_hswish_bench.wasm - //:f32_raddexpminusmax_bench.wasm - //:f32_raddextexp_bench.wasm - //:f32_raddstoreexpminusmax_bench.wasm - //:f32_relu_bench.wasm - //:f32_rmax_bench.wasm - //:f32_sigmoid_bench.wasm - //:f32_spmm_bench.wasm - //:f32_softmax_bench.wasm - //:f32_velu_bench.wasm - //:f32_vscaleexpminusmax_bench.wasm - //:f32_vscaleextexp_bench.wasm - //:f32_vsqrt_bench.wasm - //:f32_im2col_gemm_bench.wasm - //:rounding_bench.wasm - //:average_pooling_bench.wasm - //:bankers_rounding_bench.wasm - //:ceiling_bench.wasm - //:channel_shuffle_bench.wasm - //:convolution_bench.wasm - //:deconvolution_bench.wasm - //:elu_bench.wasm - //:floor_bench.wasm - //:global_average_pooling_bench.wasm - //:hardswish_bench.wasm - //:max_pooling_bench.wasm - //:sigmoid_bench.wasm - //:prelu_bench.wasm - //:softmax_bench.wasm - //:square_root_bench.wasm - //:truncation_bench.wasm - //:f32_dwconv_e2e_bench.wasm - //:f32_gemm_e2e_bench.wasm - //:qs8_gemm_e2e_bench.wasm - //:end2end_bench.wasm - //:f32_exp_ulp_eval.wasm - //:f32_expminus_ulp_eval.wasm - //:f32_expm1minus_ulp_eval.wasm - //:f32_extexp_ulp_eval.wasm - //:f32_sigmoid_ulp_eval.wasm - //:f32_sqrt_ulp_eval.wasm - #--sandbox_debug + //:qs8_dwconv_bench.wasm + //:qs8_gemm_bench.wasm + //:qs8_requantization_bench.wasm + //:qs8_vadd_bench.wasm + //:qs8_vaddc_bench.wasm + //:qu8_gemm_bench.wasm + //:qu8_requantization_bench.wasm + //:qu8_vadd_bench.wasm + //:qu8_vaddc_bench.wasm + //:f16_igemm_bench.wasm + //:f16_gemm_bench.wasm + //:f16_spmm_bench.wasm + //:f16_vrelu_bench.wasm + //:f32_igemm_bench.wasm + //:f32_conv_hwc_bench.wasm + //:f32_conv_hwc2chw_bench.wasm + //:f16_dwconv_bench.wasm + //:f32_dwconv_bench.wasm + //:f32_dwconv2d_chw_bench.wasm + //:f32_gemm_bench.wasm + //:f32_raddexpminusmax_bench.wasm + //:f32_raddextexp_bench.wasm + //:f32_raddstoreexpminusmax_bench.wasm + //:f32_rmax_bench.wasm + //:f32_spmm_bench.wasm + //:f32_softmax_bench.wasm + //:f32_velu_bench.wasm + //:f32_vhswish_bench.wasm + //:f32_vrelu_bench.wasm + //:f32_vscaleexpminusmax_bench.wasm + //:f32_vscaleextexp_bench.wasm + //:f32_vsigmoid_bench.wasm + //:f32_vsqrt_bench.wasm + //:f32_im2col_gemm_bench.wasm + //:rounding_bench.wasm + //:average_pooling_bench.wasm + //:bankers_rounding_bench.wasm + //:ceiling_bench.wasm + //:channel_shuffle_bench.wasm + //:convolution_bench.wasm + //:deconvolution_bench.wasm + //:elu_bench.wasm + //:floor_bench.wasm + //:global_average_pooling_bench.wasm + //:hardswish_bench.wasm + //:max_pooling_bench.wasm + //:sigmoid_bench.wasm + //:prelu_bench.wasm + //:softmax_bench.wasm + //:square_root_bench.wasm + //:truncation_bench.wasm + //:f32_dwconv_e2e_bench.wasm + //:f32_gemm_e2e_bench.wasm + //:qs8_dwconv_e2e_bench.wasm + //:qs8_gemm_e2e_bench.wasm + //:qu8_dwconv_e2e_bench.wasm + //:end2end_bench.wasm + //:f32_exp_ulp_eval.wasm + //:f32_expminus_ulp_eval.wasm + //:f32_expm1minus_ulp_eval.wasm + //:f32_extexp_ulp_eval.wasm + //:f32_sigmoid_ulp_eval.wasm + //:f32_sqrt_ulp_eval.wasm INSTALL_COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/xnnpack/bazel-out/wasm-opt/bin/ ${CMAKE_CURRENT_SOURCE_DIR}/build/wasm-opt diff --git a/samples/workload/XNNPACK/README.md b/samples/workload/XNNPACK/README.md index ffbe23610..f6a207293 100644 --- a/samples/workload/XNNPACK/README.md +++ b/samples/workload/XNNPACK/README.md @@ -5,20 +5,7 @@ This sample demonstrates how to build [XNNPACK](https://github.com/google/XNNPAC ## Installation toolchains -- **bazel**. Please install bazel from [latest release](https://github.com/bazelbuild/bazel/releases) - -- **emsdk**. Please install [emsdk](https://github.com/emscripten-core/emsdk) to /opt/emsdk: -```bash -cd /opt -git clone https://github.com/emscripten-core/emsdk.git -cd emsdk -./emsdk install latest -./emsdk activate latest -``` -And set up ensdk environment: -```bash -source /opt/emsdk/emsdk_env.sh -``` +please refer to [installation instructions](../README.md). ## Build XNNPACK diff --git a/samples/workload/XNNPACK/benchmark.patch b/samples/workload/XNNPACK/benchmark.patch new file mode 100644 index 000000000..713b476d2 --- /dev/null +++ b/samples/workload/XNNPACK/benchmark.patch @@ -0,0 +1,14 @@ +diff --git include/benchmark/benchmark.h include/benchmark/benchmark.h +index 9b54802..baa5938 100755 +--- include/benchmark/benchmark.h ++++ include/benchmark/benchmark.h +@@ -364,7 +364,9 @@ template + inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) { + internal::UseCharPointer(&reinterpret_cast(value)); + } ++ + // FIXME Add ClobberMemory() for non-gnu and non-msvc compilers ++inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() { } + #endif + + // This class is used for user-defined counters. diff --git a/samples/workload/XNNPACK/build_workload.sh b/samples/workload/XNNPACK/build_workload.sh new file mode 120000 index 000000000..a31afa928 --- /dev/null +++ b/samples/workload/XNNPACK/build_workload.sh @@ -0,0 +1 @@ +../docker/build_workload.sh \ No newline at end of file diff --git a/samples/workload/XNNPACK/docker_build.sh b/samples/workload/XNNPACK/docker_build.sh deleted file mode 120000 index 3c6de9bca..000000000 --- a/samples/workload/XNNPACK/docker_build.sh +++ /dev/null @@ -1 +0,0 @@ -../docker/docker_build.sh \ No newline at end of file diff --git a/samples/workload/XNNPACK/xnnpack.patch b/samples/workload/XNNPACK/xnnpack.patch index 0f36acd41..cab32b7ff 100644 --- a/samples/workload/XNNPACK/xnnpack.patch +++ b/samples/workload/XNNPACK/xnnpack.patch @@ -1,8 +1,8 @@ diff --git a/.bazelrc b/.bazelrc -index ec740f38..2c193244 100644 +index ec740f38..29f9d56e 100644 --- a/.bazelrc +++ b/.bazelrc -@@ -49,4 +49,10 @@ build:ios_fat --watchos_cpus=armv7k +@@ -49,4 +49,9 @@ build:ios_fat --watchos_cpus=armv7k build:macos --apple_platform_type=macos build:macos_arm64 --config=macos @@ -10,558 +10,31 @@ index ec740f38..2c193244 100644 \ No newline at end of file +build:macos_arm64 --cpu=darwin_arm64 + -+build:wasm --copt=-msimd128 +build:wasm --cpu=wasm ++build:wasm --copt=-msimd128 +build:wasm --crosstool_top=@emsdk//emscripten_toolchain:everything +build:wasm --host_crosstool_top=@bazel_tools//tools/cpp:toolchain -+ -diff --git a/BUILD.bazel b/BUILD.bazel -index 1f2b15a8..e7abf838 100644 ---- a/BUILD.bazel -+++ b/BUILD.bazel -@@ -4996,7 +4996,7 @@ xnnpack_cc_library( - ######################### Benchmarks for micro-kernels ######################### - - xnnpack_benchmark( -- name = "qs8_gemm_bench", -+ name = "qs8_gemm_bench.wasm", - srcs = [ - "bench/gemm.h", - "bench/qs8-gemm.cc", -@@ -5007,7 +5007,7 @@ xnnpack_benchmark( - ) - - xnnpack_benchmark( -- name = "qs8_requantization_bench", -+ name = "qs8_requantization_bench.wasm", - srcs = [ - "bench/qs8-requantization.cc", - "src/xnnpack/AlignedAllocator.h", -@@ -5017,7 +5017,7 @@ xnnpack_benchmark( - ) - - xnnpack_benchmark( -- name = "qu8_gemm_bench", -+ name = "qu8_gemm_bench.wasm", - srcs = [ - "bench/gemm.h", - "bench/qu8-gemm.cc", -@@ -5028,7 +5028,7 @@ xnnpack_benchmark( - ) - - xnnpack_benchmark( -- name = "qu8_requantization_bench", -+ name = "qu8_requantization_bench.wasm", - srcs = [ - "bench/qu8-requantization.cc", - "src/xnnpack/AlignedAllocator.h", -@@ -5038,11 +5038,10 @@ xnnpack_benchmark( - ) - - xnnpack_benchmark( -- name = "f16_igemm_bench", -+ name = "f16_igemm_bench.wasm", - srcs = [ - "bench/f16-igemm.cc", - "bench/conv.h", -- "bench/google/conv.h", - "src/xnnpack/AlignedAllocator.h", - ] + MICROKERNEL_BENCHMARK_HDRS, - deps = MICROKERNEL_BENCHMARK_DEPS + [ -@@ -5052,7 +5051,7 @@ xnnpack_benchmark( - ) - - xnnpack_benchmark( -- name = "f16_gemm_bench", -+ name = "f16_gemm_bench.wasm", - srcs = [ - "bench/f16-gemm.cc", - "bench/gemm.h", -@@ -5064,7 +5063,7 @@ xnnpack_benchmark( - ) - - xnnpack_benchmark( -- name = "f16_spmm_bench", -+ name = "f16_spmm_bench.wasm", - srcs = [ - "bench/f16-spmm.cc", - "bench/spmm.h", -@@ -5074,7 +5073,7 @@ xnnpack_benchmark( - ) - - xnnpack_benchmark( -- name = "f32_igemm_bench", -+ name = "f32_igemm_bench.wasm", - srcs = [ - "bench/f32-igemm.cc", - "bench/conv.h", -@@ -5087,7 +5086,7 @@ xnnpack_benchmark( - ) - - xnnpack_benchmark( -- name = "f16_relu_bench", -+ name = "f16_relu_bench.wasm", - srcs = [ - "bench/f16-relu.cc", - "src/xnnpack/AlignedAllocator.h", -@@ -5096,7 +5095,7 @@ xnnpack_benchmark( - ) - - xnnpack_benchmark( -- name = "f32_conv_hwc_bench", -+ name = "f32_conv_hwc_bench.wasm", - srcs = [ - "bench/f32-conv-hwc.cc", - "bench/dconv.h", -@@ -5108,7 +5107,7 @@ xnnpack_benchmark( - ) - - xnnpack_benchmark( -- name = "f32_conv_hwc2chw_bench", -+ name = "f32_conv_hwc2chw_bench.wasm", - srcs = [ - "bench/f32-conv-hwc2chw.cc", - "bench/dconv.h", -@@ -5120,11 +5119,10 @@ xnnpack_benchmark( - ) - - xnnpack_benchmark( -- name = "f16_dwconv_bench", -+ name = "f16_dwconv_bench.wasm", - srcs = [ - "bench/f16-dwconv.cc", - "bench/dwconv.h", -- "bench/google/dwconv.h", - "src/xnnpack/AlignedAllocator.h", - ] + MICROKERNEL_BENCHMARK_HDRS, - deps = MICROKERNEL_BENCHMARK_DEPS + [ -@@ -5134,7 +5132,7 @@ xnnpack_benchmark( - ) - - xnnpack_benchmark( -- name = "f32_dwconv_bench", -+ name = "f32_dwconv_bench.wasm", - srcs = [ - "bench/f32-dwconv.cc", - "bench/dwconv.h", -@@ -5147,7 +5145,7 @@ xnnpack_benchmark( - ) - - xnnpack_benchmark( -- name = "f32_dwconv2d_chw_bench", -+ name = "f32_dwconv2d_chw_bench.wasm", - srcs = [ - "bench/f32-dwconv2d-chw.cc", - "bench/dwconv.h", -@@ -5160,7 +5158,7 @@ xnnpack_benchmark( - ) - - xnnpack_benchmark( -- name = "f32_gemm_bench", -+ name = "f32_gemm_bench.wasm", - srcs = [ - "bench/f32-gemm.cc", - "bench/gemm.h", -@@ -5171,7 +5169,7 @@ xnnpack_benchmark( - ) - - xnnpack_benchmark( -- name = "f32_hswish_bench", -+ name = "f32_hswish_bench.wasm", - srcs = [ - "bench/f32-hswish.cc", - "src/xnnpack/AlignedAllocator.h", -@@ -5180,7 +5178,7 @@ xnnpack_benchmark( - ) - - xnnpack_benchmark( -- name = "f32_raddexpminusmax_bench", -+ name = "f32_raddexpminusmax_bench.wasm", - srcs = [ - "bench/f32-raddexpminusmax.cc", - "src/xnnpack/AlignedAllocator.h", -@@ -5189,7 +5187,7 @@ xnnpack_benchmark( - ) - - xnnpack_benchmark( -- name = "f32_raddextexp_bench", -+ name = "f32_raddextexp_bench.wasm", - srcs = [ - "bench/f32-raddextexp.cc", - "src/xnnpack/AlignedAllocator.h", -@@ -5198,7 +5196,7 @@ xnnpack_benchmark( - ) - - xnnpack_benchmark( -- name = "f32_raddstoreexpminusmax_bench", -+ name = "f32_raddstoreexpminusmax_bench.wasm", - srcs = [ - "bench/f32-raddstoreexpminusmax.cc", - "src/xnnpack/AlignedAllocator.h", -@@ -5207,7 +5205,7 @@ xnnpack_benchmark( - ) - - xnnpack_benchmark( -- name = "f32_relu_bench", -+ name = "f32_relu_bench.wasm", - srcs = [ - "bench/f32-relu.cc", - "src/xnnpack/AlignedAllocator.h", -@@ -5216,7 +5214,7 @@ xnnpack_benchmark( - ) - - xnnpack_benchmark( -- name = "f32_rmax_bench", -+ name = "f32_rmax_bench.wasm", - srcs = [ - "bench/f32-rmax.cc", - "src/xnnpack/AlignedAllocator.h", -@@ -5225,7 +5223,7 @@ xnnpack_benchmark( - ) - - xnnpack_benchmark( -- name = "f32_sigmoid_bench", -+ name = "f32_sigmoid_bench.wasm", - srcs = [ - "bench/f32-sigmoid.cc", - "src/xnnpack/AlignedAllocator.h", -@@ -5234,7 +5232,7 @@ xnnpack_benchmark( - ) - - xnnpack_benchmark( -- name = "f32_spmm_bench", -+ name = "f32_spmm_bench.wasm", - srcs = [ - "bench/f32-spmm.cc", - "bench/spmm.h", -@@ -5244,7 +5242,7 @@ xnnpack_benchmark( - ) - - xnnpack_benchmark( -- name = "f32_softmax_bench", -+ name = "f32_softmax_bench.wasm", - srcs = [ - "bench/f32-softmax.cc", - ] + MICROKERNEL_BENCHMARK_HDRS, -@@ -5253,7 +5251,7 @@ xnnpack_benchmark( - ) - - xnnpack_benchmark( -- name = "f32_velu_bench", -+ name = "f32_velu_bench.wasm", - srcs = [ - "bench/f32-velu.cc", - "src/xnnpack/AlignedAllocator.h", -@@ -5262,7 +5260,7 @@ xnnpack_benchmark( - ) - - xnnpack_benchmark( -- name = "f32_vscaleexpminusmax_bench", -+ name = "f32_vscaleexpminusmax_bench.wasm", - srcs = [ - "bench/f32-vscaleexpminusmax.cc", - "src/xnnpack/AlignedAllocator.h", -@@ -5271,7 +5269,7 @@ xnnpack_benchmark( - ) - - xnnpack_benchmark( -- name = "f32_vscaleextexp_bench", -+ name = "f32_vscaleextexp_bench.wasm", - srcs = [ - "bench/f32-vscaleextexp.cc", - "src/xnnpack/AlignedAllocator.h", -@@ -5280,7 +5278,7 @@ xnnpack_benchmark( - ) - - xnnpack_benchmark( -- name = "f32_vsqrt_bench", -+ name = "f32_vsqrt_bench.wasm", - srcs = [ - "bench/f32-vsqrt.cc", - "src/xnnpack/AlignedAllocator.h", -@@ -5289,7 +5287,7 @@ xnnpack_benchmark( - ) - - xnnpack_benchmark( -- name = "f32_im2col_gemm_bench", -+ name = "f32_im2col_gemm_bench.wasm", - srcs = [ - "bench/f32-im2col-gemm.cc", - "bench/conv.h", -@@ -5302,7 +5300,7 @@ xnnpack_benchmark( - ) - - xnnpack_benchmark( -- name = "rounding_bench", -+ name = "rounding_bench.wasm", - srcs = [ - "bench/rounding.cc", - "src/xnnpack/AlignedAllocator.h", -@@ -5314,7 +5312,7 @@ xnnpack_benchmark( - ########################### Benchmarks for operators ########################### - - xnnpack_benchmark( -- name = "average_pooling_bench", -+ name = "average_pooling_bench.wasm", - srcs = ["bench/average-pooling.cc"], - copts = xnnpack_optional_tflite_copts(), - tags = ["nowin32"], -@@ -5322,7 +5320,7 @@ xnnpack_benchmark( - ) - - xnnpack_benchmark( -- name = "bankers_rounding_bench", -+ name = "bankers_rounding_bench.wasm", - srcs = ["bench/bankers-rounding.cc"], - copts = xnnpack_optional_tflite_copts(), - tags = ["nowin32"], -@@ -5330,7 +5328,7 @@ xnnpack_benchmark( - ) - - xnnpack_benchmark( -- name = "ceiling_bench", -+ name = "ceiling_bench.wasm", - srcs = ["bench/ceiling.cc"], - copts = xnnpack_optional_tflite_copts(), - tags = ["nowin32"], -@@ -5338,13 +5336,13 @@ xnnpack_benchmark( - ) - - xnnpack_benchmark( -- name = "channel_shuffle_bench", -+ name = "channel_shuffle_bench.wasm", - srcs = ["bench/channel-shuffle.cc"], - deps = OPERATOR_BENCHMARK_DEPS, - ) - - xnnpack_benchmark( -- name = "convolution_bench", -+ name = "convolution_bench.wasm", - srcs = ["bench/convolution.cc"], - copts = xnnpack_optional_tflite_copts() + xnnpack_optional_armcl_copts(), - tags = ["nowin32"], -@@ -5352,7 +5350,7 @@ xnnpack_benchmark( - ) - - xnnpack_benchmark( -- name = "deconvolution_bench", -+ name = "deconvolution_bench.wasm", - srcs = ["bench/deconvolution.cc"], - copts = xnnpack_optional_tflite_copts(), - tags = ["nowin32"], -@@ -5360,7 +5358,7 @@ xnnpack_benchmark( - ) - - xnnpack_benchmark( -- name = "elu_bench", -+ name = "elu_bench.wasm", - srcs = ["bench/elu.cc"], - copts = xnnpack_optional_tflite_copts(), - tags = ["nowin32"], -@@ -5368,7 +5366,7 @@ xnnpack_benchmark( - ) - - xnnpack_benchmark( -- name = "floor_bench", -+ name = "floor_bench.wasm", - srcs = ["bench/floor.cc"], - copts = xnnpack_optional_tflite_copts(), - tags = ["nowin32"], -@@ -5376,13 +5374,13 @@ xnnpack_benchmark( - ) - - xnnpack_benchmark( -- name = "global_average_pooling_bench", -+ name = "global_average_pooling_bench.wasm", - srcs = ["bench/global-average-pooling.cc"], - deps = OPERATOR_BENCHMARK_DEPS, - ) - - xnnpack_benchmark( -- name = "hardswish_bench", -+ name = "hardswish_bench.wasm", - srcs = ["bench/hardswish.cc"], - copts = xnnpack_optional_tflite_copts(), - tags = ["nowin32"], -@@ -5390,13 +5388,13 @@ xnnpack_benchmark( - ) - - xnnpack_benchmark( -- name = "max_pooling_bench", -+ name = "max_pooling_bench.wasm", - srcs = ["bench/max-pooling.cc"], - deps = OPERATOR_BENCHMARK_DEPS, - ) - - xnnpack_benchmark( -- name = "sigmoid_bench", -+ name = "sigmoid_bench.wasm", - srcs = ["bench/sigmoid.cc"], - copts = xnnpack_optional_tflite_copts(), - tags = ["nowin32"], -@@ -5404,7 +5402,7 @@ xnnpack_benchmark( - ) - - xnnpack_benchmark( -- name = "prelu_bench", -+ name = "prelu_bench.wasm", - srcs = ["bench/prelu.cc"], - copts = xnnpack_optional_tflite_copts(), - tags = ["nowin32"], -@@ -5412,7 +5410,7 @@ xnnpack_benchmark( - ) - - xnnpack_benchmark( -- name = "softmax_bench", -+ name = "softmax_bench.wasm", - srcs = ["bench/softmax.cc"], - copts = xnnpack_optional_tflite_copts(), - tags = ["nowin32"], -@@ -5420,7 +5418,7 @@ xnnpack_benchmark( - ) - - xnnpack_benchmark( -- name = "square_root_bench", -+ name = "square_root_bench.wasm", - srcs = ["bench/square-root.cc"], - copts = xnnpack_optional_tflite_copts(), - tags = ["nowin32"], -@@ -5428,7 +5426,7 @@ xnnpack_benchmark( - ) - - xnnpack_benchmark( -- name = "truncation_bench", -+ name = "truncation_bench.wasm", - srcs = ["bench/truncation.cc"], - deps = OPERATOR_BENCHMARK_DEPS, - ) -@@ -5620,7 +5618,7 @@ cc_library( - ) - - xnnpack_benchmark( -- name = "f32_dwconv_e2e_bench", -+ name = "f32_dwconv_e2e_bench.wasm", - srcs = [ - "bench/f32-dwconv-e2e.cc", - "bench/end2end.h", -@@ -5635,7 +5633,7 @@ xnnpack_benchmark( - ) - - xnnpack_benchmark( -- name = "f32_gemm_e2e_bench", -+ name = "f32_gemm_e2e_bench.wasm", - srcs = [ - "bench/f32-gemm-e2e.cc", - "bench/end2end.h", -@@ -5650,7 +5648,7 @@ xnnpack_benchmark( - ) - - xnnpack_benchmark( -- name = "qs8_gemm_e2e_bench", -+ name = "qs8_gemm_e2e_bench.wasm", - srcs = [ - "bench/qs8-gemm-e2e.cc", - "bench/end2end.h", -@@ -5663,7 +5661,7 @@ xnnpack_benchmark( - ) - - xnnpack_benchmark( -- name = "end2end_bench", -+ name = "end2end_bench.wasm", - srcs = ["bench/end2end.cc"], - deps = [ - ":XNNPACK", -@@ -5690,7 +5688,7 @@ xnnpack_benchmark( - #################### Accuracy evaluation for math functions #################### - - xnnpack_benchmark( -- name = "f32_exp_ulp_eval", -+ name = "f32_exp_ulp_eval.wasm", - srcs = [ - "eval/f32-exp-ulp.cc", - "src/xnnpack/AlignedAllocator.h", -@@ -5702,7 +5700,7 @@ xnnpack_benchmark( - ) - - xnnpack_benchmark( -- name = "f32_expminus_ulp_eval", -+ name = "f32_expminus_ulp_eval.wasm", - srcs = [ - "eval/f32-expminus-ulp.cc", - "src/xnnpack/AlignedAllocator.h", -@@ -5714,7 +5712,7 @@ xnnpack_benchmark( - ) - - xnnpack_benchmark( -- name = "f32_expm1minus_ulp_eval", -+ name = "f32_expm1minus_ulp_eval.wasm", - srcs = [ - "eval/f32-expm1minus-ulp.cc", - "src/xnnpack/AlignedAllocator.h", -@@ -5726,7 +5724,7 @@ xnnpack_benchmark( - ) - - xnnpack_benchmark( -- name = "f32_extexp_ulp_eval", -+ name = "f32_extexp_ulp_eval.wasm", - srcs = [ - "eval/f32-extexp-ulp.cc", - "src/xnnpack/AlignedAllocator.h", -@@ -5738,7 +5736,7 @@ xnnpack_benchmark( - ) - - xnnpack_benchmark( -- name = "f32_sigmoid_ulp_eval", -+ name = "f32_sigmoid_ulp_eval.wasm", - srcs = [ - "eval/f32-sigmoid-ulp.cc", - "src/xnnpack/AlignedAllocator.h", -@@ -5750,7 +5748,7 @@ xnnpack_benchmark( - ) - - xnnpack_benchmark( -- name = "f32_sqrt_ulp_eval", -+ name = "f32_sqrt_ulp_eval.wasm", - srcs = [ - "eval/f32-sqrt-ulp.cc", - "src/xnnpack/AlignedAllocator.h", diff --git a/WORKSPACE b/WORKSPACE -index 4fa1aa2f..6181aab2 100644 +index c58e76b6..30934678 100644 --- a/WORKSPACE +++ b/WORKSPACE -@@ -5,8 +5,8 @@ load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") - # Bazel rule definitions - http_archive( - name = "rules_cc", -- strip_prefix = "rules_cc-master", -- urls = ["https://github.com/bazelbuild/rules_cc/archive/master.zip"], -+ strip_prefix = "rules_cc-main", -+ urls = ["https://github.com/bazelbuild/rules_cc/archive/main.zip"], - ) - - # Google Test framework, used by most unit-tests. -@@ -19,8 +19,8 @@ http_archive( - # Google Benchmark library, used in micro-benchmarks. - http_archive( +@@ -21,6 +21,7 @@ http_archive( name = "com_google_benchmark", -- strip_prefix = "benchmark-master", -- urls = ["https://github.com/google/benchmark/archive/master.zip"], -+ strip_prefix = "benchmark-1.5.3", -+ urls = ["https://github.com/google/benchmark/archive/refs/tags/v1.5.3.zip"], + strip_prefix = "benchmark-master", + urls = ["https://github.com/google/benchmark/archive/master.zip"], ++ patches = ["@//third_party:benchmark.patch"], ) # FP16 library, used for half-precision conversions -@@ -89,3 +89,18 @@ android_ndk_repository(name = "androidndk") +@@ -84,6 +85,19 @@ http_archive( + ], + ) - # Android SDK location and API is auto-detected from $ANDROID_HOME environment variable - android_sdk_repository(name = "androidsdk") -+ -+# emscripten library +http_archive( -+ name = "emsdk", -+ strip_prefix = "emsdk-c1589b55641787d55d53e883852035beea9aec3f/bazel", -+ url = "https://github.com/emscripten-core/emsdk/archive/c1589b55641787d55d53e883852035beea9aec3f.tar.gz", -+ sha256 = "7a58a9996b113d3e0675df30b5f17e28aa47de2e684a844f05394fe2f6f12e8e", ++ name = "emsdk", ++ strip_prefix = "emsdk-2.0.26/bazel", ++ url = "https://github.com/emscripten-core/emsdk/archive/refs/tags/2.0.26.tar.gz", ++ sha256 = "79e7166aa8eaae6e52cef1363b2d8db795d03684846066bc51f9dcf905dd58ad", +) + +load("@emsdk//:deps.bzl", emsdk_deps = "deps") @@ -570,67 +43,42 @@ index 4fa1aa2f..6181aab2 100644 +load("@emsdk//:emscripten_deps.bzl", emsdk_emscripten_deps = "emscripten_deps") +emsdk_emscripten_deps() + + # Android NDK location and version is auto-detected from $ANDROID_NDK_HOME environment variable + android_ndk_repository(name = "androidndk") + diff --git a/build_defs.bzl b/build_defs.bzl -index 10345032..0e926fca 100644 +index 2442bed1..b860dfef 100644 --- a/build_defs.bzl +++ b/build_defs.bzl -@@ -1,6 +1,6 @@ - """Build definitions and rules for XNNPACK.""" - --load(":emscripten.bzl", "xnnpack_emscripten_benchmark_linkopts", "xnnpack_emscripten_deps", "xnnpack_emscripten_minimal_linkopts", "xnnpack_emscripten_test_linkopts") -+load(":emscripten.bzl", "xnnpack_emscripten_benchmark_linkopts", "xnnpack_emscripten_deps", "xnnpack_emscripten_minimal_linkopts", "xnnpack_emscripten_test_linkopts", "xnnpack_emscripten_benchmark_copts") - - def xnnpack_visibility(): - """Visibility of :XNNPACK target. -@@ -424,10 +424,15 @@ def xnnpack_benchmark(name, srcs, copts = [], deps = [], tags = []): - ":windows_x86_64_mingw": ["-Wno-unused-function"], - ":windows_x86_64_msys": ["-Wno-unused-function"], - ":windows_x86_64": [], -+ ":emscripten": xnnpack_emscripten_benchmark_copts(), -+ ":emscripten_wasm": xnnpack_emscripten_benchmark_copts(), -+ ":emscripten_wasmsimd": xnnpack_emscripten_benchmark_copts(), - "//conditions:default": ["-Wno-unused-function"], - }) + copts, - linkopts = select({ - ":emscripten": xnnpack_emscripten_benchmark_linkopts(), -+ ":emscripten_wasm": xnnpack_emscripten_benchmark_linkopts(), -+ ":emscripten_wasmsimd": xnnpack_emscripten_benchmark_linkopts(), - ":windows_x86_64_mingw": ["-lshlwapi"], - ":windows_x86_64_msys": ["-lshlwapi"], - "//conditions:default": [], +@@ -414,7 +414,7 @@ def xnnpack_benchmark(name, srcs, copts = [], deps = [], tags = []): + explicitly specified. + """ + native.cc_binary( +- name = name, ++ name = name + ".wasm", + srcs = srcs, + copts = xnnpack_std_cxxopts() + [ + "-Iinclude", diff --git a/emscripten.bzl b/emscripten.bzl -index 0a0caedf..aafe3199 100644 +index 130d5f16..2696ad54 100644 --- a/emscripten.bzl +++ b/emscripten.bzl -@@ -6,6 +6,7 @@ def xnnpack_emscripten_minimal_linkopts(): - "-s ASSERTIONS=0", - "-s ERROR_ON_UNDEFINED_SYMBOLS=1", - "-s EXIT_RUNTIME=1", -+ "--oformat=wasm", - ] - - def xnnpack_emscripten_test_linkopts(): -@@ -17,21 +18,36 @@ def xnnpack_emscripten_test_linkopts(): - "-s EXIT_RUNTIME=1", - "-s ALLOW_MEMORY_GROWTH=1", - "--pre-js $(location :preamble.js.lds)", -+ "--oformat=wasm", - ] - - def xnnpack_emscripten_benchmark_linkopts(): +@@ -25,12 +25,19 @@ def xnnpack_emscripten_benchmark_linkopts(): """Emscripten-specific linkopts for benchmarks.""" return [ "-s ASSERTIONS=1", +- "-s ENVIRONMENT=node,shell,web", - "-s ERROR_ON_UNDEFINED_SYMBOLS=1", - "-s EXIT_RUNTIME=1", -- "-s ALLOW_MEMORY_GROWTH=1", + "-s ERROR_ON_UNDEFINED_SYMBOLS=0", -+ "-s ALLOW_MEMORY_GROWTH=0", - "-s TOTAL_MEMORY=436207616", # 416M + "-s ALLOW_MEMORY_GROWTH=1", + "-s TOTAL_MEMORY=445644800", # 425M - "--pre-js $(location :preamble.js.lds)", + "-s USE_PTHREADS=0", + "-s STANDALONE_WASM=1", + "-Wno-unused", ++ "-Wno-unused-variable", ++ "-Wno-unused-command-line-argument", + "-Wl,--export=__heap_base", + "-Wl,--export=__data_end", + "-Wl,--export=malloc", @@ -639,19 +87,6 @@ index 0a0caedf..aafe3199 100644 ] def xnnpack_emscripten_deps(): - """Emscripten-specific dependencies for unit tests and benchmarks.""" -+ return [] -+ -+def xnnpack_emscripten_benchmark_copts(): - return [ -- ":preamble.js.lds", -+ "-s ASSERTIONS=1", -+ "-s ERROR_ON_UNDEFINED_SYMBOLS=0", -+ "-s ALLOW_MEMORY_GROWTH=0", -+ "-s USE_PTHREADS=0", -+ "-s STANDALONE_WASM=1", -+ "-Wno-unused", - ] diff --git a/third_party/cpuinfo.BUILD b/third_party/cpuinfo.BUILD index 128d683e..f6c287c4 100644 --- a/third_party/cpuinfo.BUILD diff --git a/samples/workload/bwa/CMakeLists.bwa_wasm.txt b/samples/workload/bwa/CMakeLists.bwa_wasm.txt index f315bf9c2..c68b942f1 100644 --- a/samples/workload/bwa/CMakeLists.bwa_wasm.txt +++ b/samples/workload/bwa/CMakeLists.bwa_wasm.txt @@ -5,6 +5,8 @@ cmake_minimum_required (VERSION 3.0) project(bwa_wasm C) +include(${CMAKE_CURRENT_SOURCE_DIR}/../../cmake/preparation.cmake) + ################ LIBZ ################ set(LIBZ_SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../libz) add_library(z_wasm STATIC @@ -86,16 +88,15 @@ set_target_properties(${PROJECT_NAME} PROPERTIES OUTPUT_NAME bwa.wasm) target_include_directories(${PROJECT_NAME} PRIVATE - ${CMAKE_CURRENT_SOURCE_DIR}/../include - ${CMAKE_CURRENT_SOURCE_DIR}/../include/SSE - ${CMAKE_CURRENT_SOURCE_DIR}/../include/pthread + ${WASI_SDK_HOME}/share/wasi-sysroot/include/libc/musl + ${WASI_SDK_HOME}/share/wasi-sysroot/include/sse ) target_compile_definitions(${PROJECT_NAME} PRIVATE USE_MALLOC_WRAPPERS __SSE__ __SSE2__ __SSE4_1__ - _WASI_EMULATED_MMAN _WASI_EMULATED_SIGNAL + _WASI_EMULATED_MMAN _WASI_EMULATED_SIGNAL _WASI_EMULATED_PROCESS_CLOCKS ) target_compile_options(${PROJECT_NAME} @@ -112,16 +113,7 @@ target_link_options(${PROJECT_NAME} LINKER:-z,stack-size=1048576 ) -target_link_libraries(${PROJECT_NAME} z_wasm) - -find_program(WASM_OPT - NAMES wasm-opt - PATHS /opt/binaryen-version_97/bin /opt/binaryen/bin -) - -if (NOT WASM_OPT) - message(WARNING "can not find wasm-opt and will not optimize any wasm module") -endif() +target_link_libraries(${PROJECT_NAME} z_wasm wasi-emulated-process-clocks) add_custom_target(bwa_wasm_opt ALL COMMAND diff --git a/samples/workload/bwa/CMakeLists.txt b/samples/workload/bwa/CMakeLists.txt index 8331a51a2..9ea6704b3 100644 --- a/samples/workload/bwa/CMakeLists.txt +++ b/samples/workload/bwa/CMakeLists.txt @@ -10,67 +10,37 @@ include(${CMAKE_CURRENT_SOURCE_DIR}/../cmake/preparation.cmake) ####################################### include(ExternalProject) -################ HEADERS ################ -set(EMSDK_SYSTEM_HEADERS "$ENV{EMSDK}/upstream/emscripten/system/include") -set(EMSDK_SSE_HEADERS "${EMSDK_SYSTEM_HEADERS}/SSE") -set(EMSDK_LIBC_HEADERS "${EMSDK_SYSTEM_HEADERS}/libc") -ExternalProject_Add(headers_from_emcc - PREFIX headers - SOURCE_DIR ${EMSDK_SYSTEM_HEADERS} - CONFIGURE_COMMAND "" - BUILD_COMMAND "" - INSTALL_COMMAND mkdir -p ${CMAKE_CURRENT_SOURCE_DIR}/include/SSE - && ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_SOURCE_DIR}/include/pthread/sys - && ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_SOURCE_DIR}/include/emscripten - # copy emscripten SSE header files - && ${CMAKE_COMMAND} -E copy ${EMSDK_SYSTEM_HEADERS}/wasm_simd128.h ${CMAKE_CURRENT_SOURCE_DIR}/include/SSE/ - && ${CMAKE_COMMAND} -E copy ${EMSDK_SSE_HEADERS}/immintrin.h ${CMAKE_CURRENT_SOURCE_DIR}/include/SSE/ - # SSE - && ${CMAKE_COMMAND} -E copy ${EMSDK_SSE_HEADERS}/xmmintrin.h ${CMAKE_CURRENT_SOURCE_DIR}/include/SSE/ - # SSE2 - && ${CMAKE_COMMAND} -E copy ${EMSDK_SSE_HEADERS}/emmintrin.h ${CMAKE_CURRENT_SOURCE_DIR}/include/SSE/ - # SSE4.1 - && ${CMAKE_COMMAND} -E copy ${EMSDK_SSE_HEADERS}/smmintrin.h ${CMAKE_CURRENT_SOURCE_DIR}/include/SSE/ - # a fake empty header to aovid further depenency - && ${CMAKE_COMMAND} -E touch ${CMAKE_CURRENT_SOURCE_DIR}/include/emscripten/emscripten.h - # copy emscripten pthread related header files - && ${CMAKE_COMMAND} -E copy ${EMSDK_LIBC_HEADERS}/pthread.h ${CMAKE_CURRENT_SOURCE_DIR}/include/pthread/ - && ${CMAKE_COMMAND} -E copy ${EMSDK_LIBC_HEADERS}/signal.h ${CMAKE_CURRENT_SOURCE_DIR}/include/pthread/ - && ${CMAKE_COMMAND} -E copy ${EMSDK_LIBC_HEADERS}/netdb.h ${CMAKE_CURRENT_SOURCE_DIR}/include/pthread/ - && ${CMAKE_COMMAND} -E copy ${EMSDK_LIBC_HEADERS}/sys/wait.h ${CMAKE_CURRENT_SOURCE_DIR}/include/pthread/sys/ - && ${CMAKE_COMMAND} -E copy ${EMSDK_LIBC_HEADERS}/sys/socket.h ${CMAKE_CURRENT_SOURCE_DIR}/include/pthread/sys/ -) - ################ libz ################ ExternalProject_Add(libz_src - PREFIX libz GIT_REPOSITORY https://github.com/madler/zlib.git GIT_TAG master GIT_PROGRESS ON GIT_SHALLOW ON SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/libz + UPDATE_COMMAND "" + PATCH_COMMAND "" CONFIGURE_COMMAND "" - BUILD_COMMAND "" - INSTALL_COMMAND "" + BUILD_COMMAND "" + INSTALL_COMMAND "" ) ################ bwa ################ ExternalProject_Add(bwa - PREFIX bwa GIT_REPOSITORY https://github.com/lh3/bwa.git GIT_TAG master GIT_PROGRESS ON GIT_SHALLOW ON SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/bwa - DEPENDS libz_src headers_from_emcc - UPDATE_COMMAND git clean -fd && git checkout -- * + DEPENDS libz_src + UPDATE_COMMAND git clean -ffdx && git checkout -- * && ${CMAKE_COMMAND} -E echo "Copying pre-installed CMakeLists.txt" && ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.bwa_wasm.txt CMakeLists.txt && git apply ../bwa.patch CONFIGURE_COMMAND ${CMAKE_COMMAND} - -DWASI_SDK_PREFIX=${WASI_SDK_HOME}/wasi-sdk - -DCMAKE_TOOLCHAIN_FILE=${WASI_SDK_HOME}/wasi-sdk/share/cmake/wasi-sdk.cmake - ${CMAKE_CURRENT_SOURCE_DIR}/bwa + -DWASI_SDK_PREFIX=${WASI_SDK_HOME} + -DCMAKE_TOOLCHAIN_FILE=${WASI_SDK_HOME}/share/cmake/wasi-sdk.cmake + -DCMAKE_SYSROOT=${WASI_SDK_HOME}/share/wasi-sysroot + ${CMAKE_CURRENT_SOURCE_DIR}/bwa BUILD_COMMAND make bwa_wasm_opt INSTALL_COMMAND ${CMAKE_COMMAND} -E copy ./bwa.opt.wasm ${CMAKE_CURRENT_SOURCE_DIR}/build/bwa.wasm ) diff --git a/samples/workload/bwa/build_workload.sh b/samples/workload/bwa/build_workload.sh new file mode 120000 index 000000000..a31afa928 --- /dev/null +++ b/samples/workload/bwa/build_workload.sh @@ -0,0 +1 @@ +../docker/build_workload.sh \ No newline at end of file diff --git a/samples/workload/bwa/docker_build.sh b/samples/workload/bwa/docker_build.sh deleted file mode 120000 index 3c6de9bca..000000000 --- a/samples/workload/bwa/docker_build.sh +++ /dev/null @@ -1 +0,0 @@ -../docker/docker_build.sh \ No newline at end of file diff --git a/samples/workload/cmake/preparation.cmake b/samples/workload/cmake/preparation.cmake index 8530a1417..326943c83 100644 --- a/samples/workload/cmake/preparation.cmake +++ b/samples/workload/cmake/preparation.cmake @@ -1,76 +1,49 @@ # Copyright (C) 2019 Intel Corporation. All rights reserved. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -################ WASI-SDK ################ -find_path(WASI_SDK_HOME - NAMES wasi-sdk - PATHS /opt/ - REQUIRED +####################################### +include(ExternalProject) + +file(REAL_PATH ../../.. WAMR_ROOT + BASE_DIRECTORY ${CMAKE_CURRENT_LIST_DIR} ) -if (NOT WASI_SDK_HOME) +find_path(WASI_SDK_PARENT + name wasi-sdk + PATHS ${WAMR_ROOT}/test-tools/ + NO_DEFAULT_PATH + NO_CMAKE_FIND_ROOT_PATH +) + +if(NOT WASI_SDK_PARENT) message(FATAL_ERROR - "can not find wasi-sdk. " - "please download it from " - "https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-12/wasi-sdk-12.0-linux.tar.gz " - "and install it under /opt/wasi-sdk" + "can not find 'wasi-sdk' under ${WAMR_ROOT}/test-tools, " + "please run ${WAMR_ROOT}/test-tools/build-wasi-sdk/build_wasi_sdk.py " + "to build wasi-sdk and try again" ) +endif() + +set(WASI_SDK_HOME ${WASI_SDK_PARENT}/wasi-sdk) +message(CHECK_START "Detecting WASI-SDK at ${WASI_SDK_HOME}") +if(EXISTS "${WASI_SDK_HOME}/share/cmake/wasi-sdk.cmake") + message(CHECK_PASS "found") else() - message(STATUS - "Detecting wasi-sdk info: ${WASI_SDK_HOME}/wasi-sdk" - ) -endif() - -# -# check clang version -execute_process(COMMAND - ${WASI_SDK_HOME}/wasi-sdk/bin/clang --version - OUTPUT_VARIABLE clang_full_version_string -) -string(REGEX REPLACE ".*clang version ([0-9]+\\.[0-9]+).*" "\\1" - CLANG_VERSION_STRING ${clang_full_version_string} -) -message(STATUS "Detecting clang versoin: ${CLANG_VERSION_STRING}") -if(CLANG_VERSION_STRING VERSION_LESS 11.0) - message(FATAL_ERROR - "please install latest wai-sdk to get a clang-11 at least" - ) -endif() - -################ EMCC ################ -if(NOT DEFINED ENV{EMSDK}) - message(FATAL_ERROR - "can not find emsdk. " - "please refer to https://emscripten.org/docs/getting_started/downloads.html " - "and install it, " - "or active emsdk by 'source ./emsdk_env.sh'" - ) -endif() - -message(STATUS "Detecting EMSDK info: $ENV{EMSDK}") - -### check if the emsdk is 2.0.12 -### upstream/.emsdk_version should be releases-upstream-dcf819a7821f8db0c8f15ac336fea8960ec204f5-64bit -file(STRINGS "$ENV{EMSDK}/upstream/.emsdk_version" EMSDK_VERSION) -if(NOT (${EMSDK_VERSION} STREQUAL "releases-upstream-dcf819a7821f8db0c8f15ac336fea8960ec204f5-64bit")) - message(FATAL_ERROR "please install emsdk 2.0.12") + message(CHECK_FAIL "not found") endif() ################ BINARYEN ################ find_program(WASM_OPT - NAMES wasm-opt - PATHS /opt/binaryen-version_101/bin /opt/binaryen/bin + NAMES wasm-opt + PATHS /opt/binaryen-version_101/bin /opt/binaryen/bin + NO_DEFAULT_PATH + NO_CMAKE_FIND_ROOT_PATH ) -if (NOT WASM_OPT) +if(NOT WASM_OPT) message(FATAL_ERROR "can not find wasm-opt. " "please download it from " "https://github.com/WebAssembly/binaryen/releases/download/version_101/binaryen-version_101-x86_64-linux.tar.gz " "and install it under /opt" ) -else() - message(STATUS - "Detecting EMSDK info: $ENV{EMSDK}" - ) endif() diff --git a/samples/workload/docker/.gitignore b/samples/workload/docker/.gitignore deleted file mode 100644 index 9db0a2807..000000000 --- a/samples/workload/docker/.gitignore +++ /dev/null @@ -1 +0,0 @@ -build_scripts diff --git a/samples/workload/docker/Dockerfile b/samples/workload/docker/Dockerfile deleted file mode 100644 index ecc8bd79d..000000000 --- a/samples/workload/docker/Dockerfile +++ /dev/null @@ -1,93 +0,0 @@ -FROM ubuntu:18.04 as builder - -RUN apt update \ - && apt install -y lsb-release software-properties-common build-essential \ - wget curl git tree zip unzip - -ARG WASI_SDK_VER=12 -ARG WABT_VER=1.0.23 -ARG CMAKE_VER=3.16.2 -ARG BINARYEN_VER=version_101 - -# -# install wasi-sdk -ARG WASI_SDK_FILE="wasi-sdk-${WASI_SDK_VER}.0-linux.tar.gz" -COPY ${WASI_SDK_FILE} /opt -RUN cd /opt \ - && tar zxf ${WASI_SDK_FILE} \ - && rm ${WASI_SDK_FILE} \ - && ln -sf /opt/wasi-sdk-${WASI_SDK_VER}.0 /opt/wasi-sdk - -# -# install wabt -ARG WABT_FILE="wabt-${WABT_VER}-ubuntu.tar.gz" -COPY ${WABT_FILE} /opt -RUN cd /opt \ - && tar zxf ${WABT_FILE} \ - && rm ${WABT_FILE} \ - && ln -sf /opt/wabt-${WABT_VER} /opt/wabt - -# -# install cmake -ARG CMAKE_FILE="cmake-${CMAKE_VER}-Linux-x86_64.sh" -COPY ${CMAKE_FILE} /tmp -RUN cd /tmp \ - && chmod a+x ${CMAKE_FILE} \ - && mkdir /opt/cmake \ - && ./${CMAKE_FILE} --prefix=/opt/cmake --skip-license \ - && ln -sf /opt/cmake/bin/cmake /usr/local/bin/cmake - -# -# install emsdk -RUN cd /opt \ - && git clone https://github.com/emscripten-core/emsdk.git \ - && cd emsdk \ - && git pull \ - && ./emsdk install 2.0.12 \ - && ./emsdk activate 2.0.12 \ - && echo "source /opt/emsdk/emsdk_env.sh" >> /root/.bashrc - -# -# install binaryen -ARG BINARYEN_FILE="binaryen-${BINARYEN_VER}-x86_64-linux.tar.gz" -COPY ${BINARYEN_FILE} /opt -RUN cd /opt \ - && tar zxf ${BINARYEN_FILE} \ - && rm ${BINARYEN_FILE} \ - && ln -sf /opt/binaryen-${BINARYEN_VER} /opt/binaryen - -# -# install bazelisk -ARG BAZEL_FILE="bazelisk-linux-amd64" -COPY ${BAZEL_FILE} /opt/bazelisk/bin/bazelisk -RUN cd /opt/bazelisk/bin/ \ - && chmod a+x bazelisk \ - && ln -sf /opt/bazelisk/bin/bazelisk /usr/local/bin/bazel - -RUN apt update \ - && apt install -y python2.7-minimal - -# -# Clean up -RUN apt-get autoremove -y \ - && apt-get clean -y \ - && rm -rf /var/lib/apt/lists/* \ - && rm -rf /tmp/* - -VOLUME /data - -# -# -RUN touch /build.sh \ - && echo "\ -#!/bin/bash \n\ -if [[ -d /data/project/build ]]; then \n\ - rm -r /data/project/build \n\ -fi \n\ -mkdir /data/project/build \n\ -cd /data/project/build \n\ -source /opt/emsdk/emsdk_env.sh \n\ -cmake .. \n\ -make \n\ -cd - > /dev/null" > /build.sh \ - && chmod a+x /build.sh diff --git a/samples/workload/docker/build_workload.sh b/samples/workload/docker/build_workload.sh new file mode 100755 index 000000000..62fe69a93 --- /dev/null +++ b/samples/workload/docker/build_workload.sh @@ -0,0 +1,33 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2019 Intel Corporation. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# + +readonly SCRIPT_PATH=$(dirname "$(realpath "$0")") +readonly ROOT=$(realpath "${SCRIPT_PATH}"/../../../) +readonly CURRENT_PATH=$(pwd) +readonly CURRENT_RELATIVE_ROOT=$(realpath --relative-base ${ROOT} ${CURRENT_PATH}) +readonly VARIANT=$(lsb_release -c | awk '{print $2}') + +docker build \ + --build-arg VARIANT=${VARIANT} \ + --memory 4G --cpu-quota 50000 \ + -t wamr_dev_${VARIANT}:0.1 -f "${ROOT}"/ci/Dockerfile "${ROOT}"/ci && + docker run --rm -it \ + --memory 4G \ + --cpus ".5" \ + --name workload_build_env \ + --mount type=bind,source="${ROOT}",target=/workspace \ + wamr_dev_${VARIANT}:0.1 \ + /bin/bash -c "\ + pwd \ + && pushd ${CURRENT_RELATIVE_ROOT} \ + && rm -rf build \ + && mkdir build \ + && pushd build \ + && cmake .. \ + && cmake --build . --config Release \ + && popd \ + && popd \ + && echo 'Go and find out results under ${CURRENT_RELATIVE_ROOT}/build' " diff --git a/samples/workload/docker/docker_build.sh b/samples/workload/docker/docker_build.sh deleted file mode 100755 index 7c3d95546..000000000 --- a/samples/workload/docker/docker_build.sh +++ /dev/null @@ -1,53 +0,0 @@ -#!/usr/bin/env bash -# -# Copyright (C) 2019 Intel Corporation. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -# - -readonly BUILD_CONTENT="/tmp/build_content" -if [[ ! -d ${BUILD_CONTENT} ]]; then - mkdir ${BUILD_CONTENT} -fi - -readonly WASI_SDK_VER=12 -readonly WABT_VER=1.0.23 -readonly CMAKE_VER=3.16.2 -readonly BINARYEN_VER=version_101 -readonly BAZELISK_VER=1.7.5 - -cd ${BUILD_CONTENT} || exit -if [[ ! -f wasi-sdk-${WASI_SDK_VER}.0-linux.tar.gz ]]; then - wget https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-${WASI_SDK_VER}/wasi-sdk-${WASI_SDK_VER}.0-linux.tar.gz -fi - -if [[ ! -f wabt-${WABT_VER}-ubuntu.tar.gz ]]; then - wget https://github.com/WebAssembly/wabt/releases/download/${WABT_VER}/wabt-${WABT_VER}-ubuntu.tar.gz -fi - -if [[ ! -f cmake-${CMAKE_VER}-Linux-x86_64.sh ]]; then - wget https://github.com/Kitware/CMake/releases/download/v${CMAKE_VER}/cmake-${CMAKE_VER}-Linux-x86_64.sh -fi - -if [[ ! -f binaryen-${BINARYEN_VER}-x86_64-linux.tar.gz ]]; then - wget https://github.com/WebAssembly/binaryen/releases/download/${BINARYEN_VER}/binaryen-${BINARYEN_VER}-x86_64-linux.tar.gz -fi - -if [[ ! -f bazelisk-linux-amd64 ]]; then - wget https://github.com/bazelbuild/bazelisk/releases/download/v${BAZELISK_VER}/bazelisk-linux-amd64 -fi -cd - > /dev/null || exit - -DOCKERFILE_PATH=$(dirname "$(realpath "$0")") - -docker build \ - --build-arg WASI_SDK_VER=${WASI_SDK_VER} \ - --build-arg WABT_VER=${WABT_VER} \ - --build-arg CMAKE_VER=${CMAKE_VER} \ - --build-arg BINARYEN_VER=${BINARYEN_VER} \ - -t wamr_workload_env:0.1 -f "${DOCKERFILE_PATH}"/Dockerfile ${BUILD_CONTENT} \ - && docker run --rm \ - --name workload_w_clang \ - --mount type=bind,source="$(pwd)",target=/data/project \ - -w /data/project \ - wamr_workload_env:0.1 \ - /bin/bash -c /build.sh diff --git a/samples/workload/meshoptimizer/CMakeLists.txt b/samples/workload/meshoptimizer/CMakeLists.txt index a7f488099..97f98fa3a 100644 --- a/samples/workload/meshoptimizer/CMakeLists.txt +++ b/samples/workload/meshoptimizer/CMakeLists.txt @@ -21,9 +21,10 @@ ExternalProject_Add(codecbench && ${CMAKE_COMMAND} -E echo "Applying patch" && git apply ${CMAKE_CURRENT_SOURCE_DIR}/codecbench.patch CONFIGURE_COMMAND ${CMAKE_COMMAND} - -DWASI_SDK_PREFIX=${WASI_SDK_HOME}/wasi-sdk - -DCMAKE_TOOLCHAIN_FILE=${WASI_SDK_HOME}/wasi-sdk/share/cmake/wasi-sdk.cmake - ${CMAKE_CURRENT_SOURCE_DIR}/meshoptimizer - BUILD_COMMAND make codecbench.opt - INSTALL_COMMAND ${CMAKE_COMMAND} -E copy ./codecbench.opt.wasm ${CMAKE_CURRENT_SOURCE_DIR}/build/codecbench.wasm + -DWASI_SDK_PREFIX=${WASI_SDK_HOME} + -DCMAKE_TOOLCHAIN_FILE=${WASI_SDK_HOME}/share/cmake/wasi-sdk.cmake + -DCMAKE_SYSROOT=${WASI_SDK_HOME}/share/wasi-sysroot + ${CMAKE_CURRENT_SOURCE_DIR}/meshoptimizer + BUILD_COMMAND make codecbench + INSTALL_COMMAND ${CMAKE_COMMAND} -E copy ./codecbench.wasm ${CMAKE_CURRENT_SOURCE_DIR}/build/codecbench.wasm ) diff --git a/samples/workload/meshoptimizer/build_workload.sh b/samples/workload/meshoptimizer/build_workload.sh new file mode 120000 index 000000000..a31afa928 --- /dev/null +++ b/samples/workload/meshoptimizer/build_workload.sh @@ -0,0 +1 @@ +../docker/build_workload.sh \ No newline at end of file diff --git a/samples/workload/meshoptimizer/codecbench.patch b/samples/workload/meshoptimizer/codecbench.patch index d739558a2..4c9063aa1 100644 --- a/samples/workload/meshoptimizer/codecbench.patch +++ b/samples/workload/meshoptimizer/codecbench.patch @@ -1,8 +1,8 @@ diff --git a/CMakeLists.txt b/CMakeLists.txt -index ffdb4da..a397427 100644 +index f4378ce..9bc104b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt -@@ -127,3 +127,43 @@ install(FILES +@@ -129,3 +129,43 @@ install(FILES ${CMAKE_CURRENT_BINARY_DIR}/meshoptimizerConfig.cmake ${CMAKE_CURRENT_BINARY_DIR}/meshoptimizerConfigVersion.cmake DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/meshoptimizer) @@ -46,3 +46,74 @@ index ffdb4da..a397427 100644 +) + +add_dependencies(codecbench.opt codecbench) +diff --git a/src/vertexcodec.cpp b/src/vertexcodec.cpp +index 5f3ec20..b79bfad 100644 +--- a/src/vertexcodec.cpp ++++ b/src/vertexcodec.cpp +@@ -81,13 +81,13 @@ + #endif + + #ifdef SIMD_WASM +-#define wasmx_splat_v32x4(v, i) wasm_v32x4_shuffle(v, v, i, i, i, i) +-#define wasmx_unpacklo_v8x16(a, b) wasm_v8x16_shuffle(a, b, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23) +-#define wasmx_unpackhi_v8x16(a, b) wasm_v8x16_shuffle(a, b, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31) +-#define wasmx_unpacklo_v16x8(a, b) wasm_v16x8_shuffle(a, b, 0, 8, 1, 9, 2, 10, 3, 11) +-#define wasmx_unpackhi_v16x8(a, b) wasm_v16x8_shuffle(a, b, 4, 12, 5, 13, 6, 14, 7, 15) +-#define wasmx_unpacklo_v64x2(a, b) wasm_v64x2_shuffle(a, b, 0, 2) +-#define wasmx_unpackhi_v64x2(a, b) wasm_v64x2_shuffle(a, b, 1, 3) ++#define wasmx_splat_v32x4(v, i) wasm_i32x4_shuffle(v, v, i, i, i, i) ++#define wasmx_unpacklo_v8x16(a, b) wasm_i8x16_shuffle(a, b, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23) ++#define wasmx_unpackhi_v8x16(a, b) wasm_i8x16_shuffle(a, b, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31) ++#define wasmx_unpacklo_v16x8(a, b) wasm_i16x8_shuffle(a, b, 0, 8, 1, 9, 2, 10, 3, 11) ++#define wasmx_unpackhi_v16x8(a, b) wasm_i16x8_shuffle(a, b, 4, 12, 5, 13, 6, 14, 7, 15) ++#define wasmx_unpacklo_v64x2(a, b) wasm_i64x2_shuffle(a, b, 0, 2) ++#define wasmx_unpackhi_v64x2(a, b) wasm_i64x2_shuffle(a, b, 1, 3) + #endif + + namespace meshopt +@@ -700,7 +700,7 @@ static v128_t decodeShuffleMask(unsigned char mask0, unsigned char mask1) + v128_t sm1 = wasm_v128_load(&kDecodeBytesGroupShuffle[mask1]); + + v128_t sm1off = wasm_v128_load(&kDecodeBytesGroupCount[mask0]); +- sm1off = wasm_v8x16_shuffle(sm1off, sm1off, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); ++ sm1off = wasm_i8x16_shuffle(sm1off, sm1off, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + + v128_t sm1r = wasm_i8x16_add(sm1, sm1off); + +@@ -751,7 +751,7 @@ static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsi + + v128_t shuf = decodeShuffleMask(mask0, mask1); + +- v128_t result = wasm_v128_bitselect(wasm_v8x16_swizzle(rest, shuf), sel, mask); ++ v128_t result = wasm_v128_bitselect(wasm_i8x16_swizzle(rest, shuf), sel, mask); + + wasm_v128_store(buffer, result); + +@@ -773,7 +773,7 @@ static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsi + + v128_t shuf = decodeShuffleMask(mask0, mask1); + +- v128_t result = wasm_v128_bitselect(wasm_v8x16_swizzle(rest, shuf), sel, mask); ++ v128_t result = wasm_v128_bitselect(wasm_i8x16_swizzle(rest, shuf), sel, mask); + + wasm_v128_store(buffer, result); + +diff --git a/src/vertexfilter.cpp b/src/vertexfilter.cpp +index 023452c..2374cf7 100644 +--- a/src/vertexfilter.cpp ++++ b/src/vertexfilter.cpp +@@ -56,10 +56,10 @@ + #endif + + #ifdef SIMD_WASM +-#define wasmx_unpacklo_v16x8(a, b) wasm_v16x8_shuffle(a, b, 0, 8, 1, 9, 2, 10, 3, 11) +-#define wasmx_unpackhi_v16x8(a, b) wasm_v16x8_shuffle(a, b, 4, 12, 5, 13, 6, 14, 7, 15) +-#define wasmx_unziplo_v32x4(a, b) wasm_v32x4_shuffle(a, b, 0, 2, 4, 6) +-#define wasmx_unziphi_v32x4(a, b) wasm_v32x4_shuffle(a, b, 1, 3, 5, 7) ++#define wasmx_unpacklo_v16x8(a, b) wasm_i16x8_shuffle(a, b, 0, 8, 1, 9, 2, 10, 3, 11) ++#define wasmx_unpackhi_v16x8(a, b) wasm_i16x8_shuffle(a, b, 4, 12, 5, 13, 6, 14, 7, 15) ++#define wasmx_unziplo_v32x4(a, b) wasm_i32x4_shuffle(a, b, 0, 2, 4, 6) ++#define wasmx_unziphi_v32x4(a, b) wasm_i32x4_shuffle(a, b, 1, 3, 5, 7) + #endif + + namespace meshopt diff --git a/samples/workload/meshoptimizer/docker_build.sh b/samples/workload/meshoptimizer/docker_build.sh deleted file mode 120000 index 3c6de9bca..000000000 --- a/samples/workload/meshoptimizer/docker_build.sh +++ /dev/null @@ -1 +0,0 @@ -../docker/docker_build.sh \ No newline at end of file diff --git a/samples/workload/preparation.sh b/samples/workload/preparation.sh index d14e0dd57..3f49f0cf6 100755 --- a/samples/workload/preparation.sh +++ b/samples/workload/preparation.sh @@ -5,8 +5,6 @@ # readonly BUILD_CONTENT="/tmp/build_content" -readonly WASI_SDK_VER=12 -readonly WASI_SDK_FILE="wasi-sdk-${WASI_SDK_VER}.0-linux.tar.gz" readonly WABT_VER=1.0.23 readonly WABT_FILE="wabt-${WABT_VER}-ubuntu.tar.gz" readonly CMAKE_VER=3.16.2 @@ -28,17 +26,6 @@ function install_deps() { build-essential git tree zip unzip } -# -# install wasi-sdk -function install_wasi-sdk() { - if [[ ! -f ${WASI_SDK_FILE} ]]; then - wget https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-${WASI_SDK_VER}/${WASI_SDK_FILE} - fi - - tar zxf ${WASI_SDK_FILE} -C /opt - ln -sf /opt/wasi-sdk-${WASI_SDK_VER}.0 /opt/wasi-sdk -} - # # install wabt function install_wabt() { @@ -70,8 +57,8 @@ function install_emsdk() { git clone https://github.com/emscripten-core/emsdk.git cd emsdk git pull - ./emsdk install 2.0.12 - ./emsdk activate 2.0.12 + ./emsdk install 2.0.26 + ./emsdk activate 2.0.26 echo "source /opt/emsdk/emsdk_env.sh" >> "${HOME}"/.bashrc } diff --git a/samples/workload/tensorflow/README.md b/samples/workload/tensorflow/README.md index 4f35b8ef7..164b6bb09 100644 --- a/samples/workload/tensorflow/README.md +++ b/samples/workload/tensorflow/README.md @@ -5,8 +5,8 @@ This sample demonstrates how to build [tensorflow](https://github.com/tensorflow ```bash git clone https://github.com/emscripten-core/emsdk.git cd emsdk -./emsdk install 2.0.12 -./emsdk activate 2.0.12 +./emsdk install 2.0.26 +./emsdk activate 2.0.26 ``` And set up ensdk environment: ```bash diff --git a/samples/workload/tensorflow/build.sh b/samples/workload/tensorflow/build.sh index 591b1d668..7289e617b 100755 --- a/samples/workload/tensorflow/build.sh +++ b/samples/workload/tensorflow/build.sh @@ -17,7 +17,7 @@ fi set -xe -EMSDK_WASM_DIR="${EMSDK}/upstream/emscripten/cache/wasm" +EMSDK_WASM_DIR="${EMSDK}/upstream/emscripten/cache/sysroot/lib/wasm32-emscripten" BUILD_SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" OUT_DIR="${BUILD_SCRIPT_DIR}/out" TENSORFLOW_DIR="${BUILD_SCRIPT_DIR}/tensorflow" diff --git a/samples/workload/wasm-av1/CMakeLists.avx_wasm.txt b/samples/workload/wasm-av1/CMakeLists.avx_wasm.txt index 08e26f960..d11496123 100644 --- a/samples/workload/wasm-av1/CMakeLists.avx_wasm.txt +++ b/samples/workload/wasm-av1/CMakeLists.avx_wasm.txt @@ -5,8 +5,10 @@ cmake_minimum_required (VERSION 2.8...3.16) project(testavx) +include(${CMAKE_CURRENT_SOURCE_DIR}/../../cmake/preparation.cmake) + # a workaround to let aom find our non-public headers -include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../include/libc) +include_directories(${WASI_SDK_HOME}/share/wasi-sysroot/include/libc/musl) ################ AOM ################ set(ENABLE_CCACHE ON) @@ -58,15 +60,6 @@ target_link_libraries(${PROJECT_NAME} add_dependencies(${PROJECT_NAME} aom) -find_program(WASM_OPT - NAMES wasm-opt - PATHS /opt/binaryen-version_97/bin /opt/binaryen/bin -) - -if (NOT WASM_OPT) - message(WARNING "can not find wasm-opt and will not optimize any wasm module") -endif() - add_custom_target(${PROJECT_NAME}_opt ALL COMMAND ${WASM_OPT} -Oz --enable-simd -o ${PROJECT_NAME}.opt.wasm ${PROJECT_NAME}.wasm diff --git a/samples/workload/wasm-av1/CMakeLists.txt b/samples/workload/wasm-av1/CMakeLists.txt index 2b555ade1..8b1f0df0c 100644 --- a/samples/workload/wasm-av1/CMakeLists.txt +++ b/samples/workload/wasm-av1/CMakeLists.txt @@ -10,27 +10,6 @@ include(${CMAKE_CURRENT_SOURCE_DIR}/../cmake/preparation.cmake) ####################################### include(ExternalProject) -################ HEADERS ################ -set(EMSDK_SYSTEM_HEADERS "$ENV{EMSDK}/upstream/emscripten/system/include") -set(EMSDK_LIBC_HEADERS "${EMSDK_SYSTEM_HEADERS}/libc") -ExternalProject_Add(headers_from_emcc - PREFIX headers - SOURCE_DIR "${EMSDK_SYSTEM_HEADERS}" - CONFIGURE_COMMAND "" - BUILD_COMMAND "" - INSTALL_COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_SOURCE_DIR}/include/pthread/sys - && ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_SOURCE_DIR}/include/libc/bits - # copy emscripten pthread related header files - && ${CMAKE_COMMAND} -E copy ${EMSDK_LIBC_HEADERS}/pthread.h ${CMAKE_CURRENT_SOURCE_DIR}/include/pthread/ - && ${CMAKE_COMMAND} -E copy ${EMSDK_LIBC_HEADERS}/signal.h ${CMAKE_CURRENT_SOURCE_DIR}/include/pthread/ - && ${CMAKE_COMMAND} -E copy ${EMSDK_LIBC_HEADERS}/netdb.h ${CMAKE_CURRENT_SOURCE_DIR}/include/pthread/ - && ${CMAKE_COMMAND} -E copy ${EMSDK_LIBC_HEADERS}/sys/wait.h ${CMAKE_CURRENT_SOURCE_DIR}/include/pthread/sys/ - && ${CMAKE_COMMAND} -E copy ${EMSDK_LIBC_HEADERS}/sys/socket.h ${CMAKE_CURRENT_SOURCE_DIR}/include/pthread/sys/ - # copy emscripten setjmp headers - && ${CMAKE_COMMAND} -E copy ${EMSDK_LIBC_HEADERS}/setjmp.h ${CMAKE_CURRENT_SOURCE_DIR}/include/libc/setjmp.h - && ${CMAKE_COMMAND} -E copy ${EMSDK_SYSTEM_HEADERS}/../lib/libc/musl/arch/emscripten/bits/setjmp.h ${CMAKE_CURRENT_SOURCE_DIR}/include/libc/bits/setjmp.h -) - ################ av1 ################ ExternalProject_Add(av1 PREFIX av1 @@ -39,15 +18,15 @@ ExternalProject_Add(av1 GIT_PROGRESS ON GIT_SHALLOW ON SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/av1 - DEPENDS headers_from_emcc UPDATE_COMMAND git clean -fd && git checkout -- * && ${CMAKE_COMMAND} -E echo "Copying pre-installed CMakeLists.txt" && ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.avx_wasm.txt CMakeLists.txt && git apply ../av1-clang.patch CONFIGURE_COMMAND ${CMAKE_COMMAND} - -DWASI_SDK_PREFIX=${WASI_SDK_HOME}/wasi-sdk - -DCMAKE_TOOLCHAIN_FILE=${WASI_SDK_HOME}/wasi-sdk/share/cmake/wasi-sdk.cmake - ${CMAKE_CURRENT_SOURCE_DIR}/av1 + -DWASI_SDK_PREFIX=${WASI_SDK_HOME} + -DCMAKE_TOOLCHAIN_FILE=${WASI_SDK_HOME}/share/cmake/wasi-sdk.cmake + -DCMAKE_SYSROOT=${WASI_SDK_HOME}/share/wasi-sysroot + ${CMAKE_CURRENT_SOURCE_DIR}/av1 BUILD_COMMAND make testavx_opt INSTALL_COMMAND ${CMAKE_COMMAND} -E copy testavx.opt.wasm ${CMAKE_CURRENT_BINARY_DIR}/testavx.wasm ) diff --git a/samples/workload/wasm-av1/build.sh b/samples/workload/wasm-av1/build.sh index 15b6c7deb..7f82c6c52 100755 --- a/samples/workload/wasm-av1/build.sh +++ b/samples/workload/wasm-av1/build.sh @@ -17,7 +17,7 @@ fi set -xe -EMSDK_WASM_DIR="${EMSDK}/upstream/emscripten/cache/wasm" +EMSDK_WASM_DIR="${EMSDK}/upstream/emscripten/cache/sysroot/lib/wasm32-emscripten" BUILD_SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" OUT_DIR="${BUILD_SCRIPT_DIR}/out" WASM_AV1_DIR="${BUILD_SCRIPT_DIR}/wasm-av1" diff --git a/samples/workload/wasm-av1/build_workload.sh b/samples/workload/wasm-av1/build_workload.sh new file mode 120000 index 000000000..a31afa928 --- /dev/null +++ b/samples/workload/wasm-av1/build_workload.sh @@ -0,0 +1 @@ +../docker/build_workload.sh \ No newline at end of file diff --git a/samples/workload/wasm-av1/docker_build.sh b/samples/workload/wasm-av1/docker_build.sh deleted file mode 120000 index 3c6de9bca..000000000 --- a/samples/workload/wasm-av1/docker_build.sh +++ /dev/null @@ -1 +0,0 @@ -../docker/docker_build.sh \ No newline at end of file diff --git a/test-tools/.gitignore b/test-tools/.gitignore new file mode 100644 index 000000000..6aa8dc0ee --- /dev/null +++ b/test-tools/.gitignore @@ -0,0 +1 @@ +/wasi-sdk diff --git a/test-tools/build-wasi-sdk/build_wasi_sdk.py b/test-tools/build-wasi-sdk/build_wasi_sdk.py new file mode 100755 index 000000000..4c6789116 --- /dev/null +++ b/test-tools/build-wasi-sdk/build_wasi_sdk.py @@ -0,0 +1,299 @@ +#!/usr/bin/env python3 +# +# Copyright (C) 2019 Intel Corporation. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# + +""" +The script operates on such directories and files +|-- core +| `-- deps +| |-- emscripten +| `-- wasi-sdk +| `-- src +| |-- llvm-project +| `-- wasi-libc +`-- test-tools + |-- build-wasi-sdk + | |-- build_wasi_sdk.py + | |-- include + | `-- patches + `-- wasi-sdk + |-- bin + |-- lib + `-- share + `-- wasi-sysroot +""" + +import hashlib +import logging +import os +import pathlib +import shlex +import shutil +import subprocess +import sys +import tarfile +import tempfile +import urllib +import urllib.request + +logger = logging.getLogger("build_wasi_sdk") + +external_repos = { + "config": { + "sha256": "302e5e7f3c4996976c58efde8b2f28f71d51357e784330eeed738e129300dc33", + "store_dir": "core/deps/wasi-sdk/src/config", + "strip_prefix": "config-191bcb948f7191c36eefe634336f5fc5c0c4c2be", + "url": "https://git.savannah.gnu.org/cgit/config.git/snapshot/config-191bcb948f7191c36eefe634336f5fc5c0c4c2be.tar.gz", + }, + "emscripten": { + "sha256": "0904a65379aea3ea94087b8c12985b2fee48599b473e3bef914fec2e3941532d", + "store_dir": "core/deps/emscripten", + "strip_prefix": "emscripten-2.0.28", + "url": "https://github.com/emscripten-core/emscripten/archive/refs/tags/2.0.28.tar.gz", + }, + "llvm-project": { + "sha256": "dc5169e51919f2817d06615285e9da6a804f0f881dc55d6247baa25aed3cc143", + "store_dir": "core/deps/wasi-sdk/src/llvm-project", + "strip_prefix": "llvm-project-34ff6a75f58377f32a5046a29f55c4c0e58bee9e", + "url": "https://github.com/llvm/llvm-project/archive/34ff6a75f58377f32a5046a29f55c4c0e58bee9e.tar.gz", + }, + "wasi-sdk": { + "sha256": "fc4fdb0e97b915241f32209492a7d0fab42c24216f87c1d5d75f46f7c70a553d", + "store_dir": "core/deps/wasi-sdk", + "strip_prefix": "wasi-sdk-1a953299860bbcc198ad8c12a21d1b2e2f738355", + "url": "https://github.com/WebAssembly/wasi-sdk/archive/1a953299860bbcc198ad8c12a21d1b2e2f738355.tar.gz", + }, + "wasi-libc": { + "sha256": "f6316ca9479d3463eb1c4f6a1d1f659bf15f67cb3c1e2e83d9d11f188dccd864", + "store_dir": "core/deps/wasi-sdk/src/wasi-libc", + "strip_prefix": "wasi-libc-a78cd329aec717f149934d7362f57050c9401f60", + "url": "https://github.com/WebAssembly/wasi-libc/archive/a78cd329aec717f149934d7362f57050c9401f60.tar.gz", + }, +} + +# TOOD: can we use headers from wasi-libc and clang directly ? +emscripten_headers_src_dst = [ + ("include/compat/emmintrin.h", "sse/emmintrin.h"), + ("include/compat/immintrin.h", "sse/immintrin.h"), + ("include/compat/smmintrin.h", "sse/smmintrin.h"), + ("include/compat/xmmintrin.h", "sse/xmmintrin.h"), + ("lib/libc/musl/include/pthread.h", "libc/musl/pthread.h"), + ("lib/libc/musl/include/signal.h", "libc/musl/signal.h"), + ("lib/libc/musl/include/netdb.h", "libc/musl/netdb.h"), + ("lib/libc/musl/include/sys/wait.h", "libc/musl/sys/wait.h"), + ("lib/libc/musl/include/sys/socket.h", "libc/musl/sys/socket.h"), + ("lib/libc/musl/include/setjmp.h", "libc/musl/setjmp.h"), + ("lib/libc/musl/arch/emscripten/bits/setjmp.h", "libc/musl/bits/setjmp.h"), +] + + +def checksum(name, local_file): + sha256 = hashlib.sha256() + with open(local_file, "rb") as f: + bytes = f.read(4096) + while bytes: + sha256.update(bytes) + bytes = f.read(4096) + + return sha256.hexdigest() == external_repos[name]["sha256"] + + +def download(url, local_file): + logger.debug(f"download from {url}") + urllib.request.urlretrieve(url, local_file) + return local_file.exists() + + +def unpack(tar_file, strip_prefix, dest_dir): + # extract .tar.gz to /tmp, then move back without strippred prefix directories + with tempfile.TemporaryDirectory() as tmp: + with tarfile.open(tar_file) as tar: + logger.debug(f"extract to {tmp}") + tar.extractall(tmp) + + strip_prefix_dir = ( + pathlib.Path(tmp).joinpath(strip_prefix + os.path.sep).resolve() + ) + if not strip_prefix_dir.exists(): + logger.error(f"extract {tar_file.name} failed") + return False + + # mv /tmp/${strip_prefix} dest_dir/* + logger.debug(f"move {strip_prefix_dir} to {dest_dir}") + shutil.copytree( + str(strip_prefix_dir), + str(dest_dir), + copy_function=shutil.move, + dirs_exist_ok=True, + ) + + return True + + +def download_repo(name, root): + if not name in external_repos: + logger.error(f"{name} is not a known repository") + return False + + store_dir = root.joinpath(f'{external_repos[name]["store_dir"]}').resolve() + download_flag = store_dir.joinpath("DOWNLOADED") + if store_dir.exists() and download_flag.exists(): + logger.info( + f"keep using '{store_dir.relative_to(root)}'. Or to remove it and try again" + ) + return True + + # download only when the target is neither existed nor broken + download_dir = pathlib.Path("/tmp/build_wasi_sdk/") + download_dir.mkdir(exist_ok=True) + + tar_name = pathlib.Path(external_repos[name]["url"]).name + tar_file = download_dir.joinpath(tar_name) + if tar_file.exists(): + if checksum(name, tar_file): + logger.debug(f"use pre-downloaded {tar_file}") + else: + logger.debug(f"{tar_file} is broken, remove it") + tar_file.unlink() + + if not tar_file.exists(): + if not download(external_repos[name]["url"], tar_file) or not checksum( + name, tar_file + ): + logger.error(f"download {name} failed") + return False + + # unpack and removing *strip_prefix* + if not unpack(tar_file, external_repos[name]["strip_prefix"], store_dir): + return False + + # leave a FLAG + download_flag.touch() + + # leave download files in /tmp + return True + + +def run_patch(patch_file, cwd): + if not patch_file.exists(): + logger.error(f"{patch_file} not found") + return False + + with open(patch_file, "r") as f: + try: + PATCH_DRY_RUN_CMD = "patch -f -p1 --dry-run" + if subprocess.check_call(shlex.split(PATCH_DRY_RUN_CMD), stdin=f, cwd=cwd): + logger.error(f"patch dry-run {cwd} failed") + return False + + PATCH_CMD = "patch -f -p1" + f.seek(0) + if subprocess.check_call(shlex.split(PATCH_CMD), stdin=f, cwd=cwd): + logger.error(f"patch {cwd} failed") + return False + except subprocess.CalledProcessError: + logger.error(f"patch {cwd} failed") + return False + return True + + +def build_and_install_wasi_sdk(root): + store_dir = root.joinpath(f'{external_repos["wasi-sdk"]["store_dir"]}').resolve() + if not store_dir.exists(): + logger.error(f"{store_dir} does not found") + return False + + # patch wasi-libc and wasi-sdk + patch_flag = store_dir.joinpath("PATCHED") + if not patch_flag.exists(): + if not run_patch( + root.joinpath("test-tools/build-wasi-sdk/patches/wasi_libc.patch"), + store_dir.joinpath("src/wasi-libc"), + ): + return False + + if not run_patch( + root.joinpath("test-tools/build-wasi-sdk/patches/wasi_sdk.patch"), store_dir + ): + return False + + patch_flag.touch() + else: + logger.info("bypass the patch phase") + + # build + build_flag = store_dir.joinpath("BUILDED") + if not build_flag.exists(): + BUILD_CMD = "make build" + if subprocess.check_call(shlex.split(BUILD_CMD), cwd=store_dir): + logger.error(f"build wasi-sdk failed") + return False + + build_flag.touch() + else: + logger.info("bypass the build phase") + + # install + install_flag = store_dir.joinpath("INSTALLED") + binary_path = root.joinpath("test-tools").resolve() + if not install_flag.exists(): + shutil.copytree( + str(store_dir.joinpath("build/install/opt").resolve()), + str(binary_path), + dirs_exist_ok=True, + ) + + # install headers + emscripten_headers = ( + root.joinpath(external_repos["emscripten"]["store_dir"]) + .joinpath("system") + .resolve() + ) + wasi_sysroot_headers = binary_path.joinpath( + "wasi-sdk/share/wasi-sysroot/include" + ).resolve() + for (src, dst) in emscripten_headers_src_dst: + src = emscripten_headers.joinpath(src) + dst = wasi_sysroot_headers.joinpath(dst) + dst.parent.mkdir(parents=True, exist_ok=True) + shutil.copy(src, dst) + + install_flag.touch() + else: + logger.info("bypass the install phase") + + return True + + +def main(): + console = logging.StreamHandler() + console.setFormatter(logging.Formatter("%(asctime)s - %(message)s")) + logger.setLevel(logging.INFO) + logger.addHandler(console) + logger.propagate = False + + # locate the root of WAMR + current_file = pathlib.Path(__file__) + if current_file.is_symlink(): + current_file = pathlib.Path(os.readlink(current_file)) + root = current_file.parent.joinpath("../..").resolve() + logger.info(f"The root of WAMR is {root}") + + # download repos + for repo in external_repos.keys(): + if not download_repo(repo, root): + return False + + # build wasi_sdk and install + if not build_and_install_wasi_sdk(root): + return False + + # TODO install headers from emscripten + + return True + + +if __name__ == "__main__": + sys.exit(0 if main() else 1) diff --git a/test-tools/build-wasi-sdk/include/.gitkeep b/test-tools/build-wasi-sdk/include/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/test-tools/build-wasi-sdk/patches/wasi_libc.patch b/test-tools/build-wasi-sdk/patches/wasi_libc.patch new file mode 100644 index 000000000..e236735b4 --- /dev/null +++ b/test-tools/build-wasi-sdk/patches/wasi_libc.patch @@ -0,0 +1,13 @@ +diff --git a/expected/wasm32-wasi/predefined-macros.txt b/expected/wasm32-wasi/predefined-macros.txt +index c1bb19e..954f3b5 100644 +--- a/expected/wasm32-wasi/predefined-macros.txt ++++ b/expected/wasm32-wasi/predefined-macros.txt +@@ -3002,6 +3002,8 @@ + #define __alignof_is_defined 1 + #define __bitop(x,i,o) ((x)[(i)/8] o (1<<(i)%8)) + #define __bool_true_false_are_defined 1 ++#define __clang_literal_encoding__ "UTF-8" ++#define __clang_wide_literal_encoding__ "UTF-32" + #define __inline inline + #define __restrict restrict + #define __tg_complex(fun,x) (__RETCAST_CX(x)( __FLTCX((x)+I) && __IS_FP(x) ? fun ## f (x) : __LDBLCX((x)+I) ? fun ## l (x) : fun(x) )) diff --git a/test-tools/build-wasi-sdk/patches/wasi_sdk.patch b/test-tools/build-wasi-sdk/patches/wasi_sdk.patch new file mode 100644 index 000000000..0fc4caee7 --- /dev/null +++ b/test-tools/build-wasi-sdk/patches/wasi_sdk.patch @@ -0,0 +1,15 @@ +diff --git a/version.sh b/version.sh +index 8e7c44c..ff0d3ba 100755 +--- a/version.sh ++++ b/version.sh +@@ -1,5 +1,6 @@ + #!/usr/bin/env bash +-set -e +-GIT_DESCR=$(git describe --long --candidates=999 --match='wasi-sdk-*' --dirty='+m' --abbrev=12) +-GIT_PACKAGE_VERSION=$(echo $GIT_DESCR | perl -ne 'if(/^wasi-sdk-(\d+)-(\d+)-g([0-9a-f]{7,12})([+]m)?$/) { if($2 == 0) { print "$1.$2$4" } else { print "$1.$2g$3$4" } exit } else { print "could not parse git description"; exit 1 }';) +-echo $GIT_PACKAGE_VERSION ++#set -e ++#GIT_DESCR=$(git describe --long --candidates=999 --match='wasi-sdk-*' --dirty='+m' --abbrev=12) ++#GIT_PACKAGE_VERSION=$(echo $GIT_DESCR | perl -ne 'if(/^wasi-sdk-(\d+)-(\d+)-g([0-9a-f]{7,12})([+]m)?$/) { if($2 == 0) { print "$1.$2$4" } else { print "$1.$2g$3$4" } exit } else { print "could not parse git description"; exit 1 }';) ++#echo $GIT_PACKAGE_VERSION ++echo wasi-sdk-13-eng diff --git a/wamr-compiler/build_llvm.py b/wamr-compiler/build_llvm.py index 71f788631..6597f61a8 100644 --- a/wamr-compiler/build_llvm.py +++ b/wamr-compiler/build_llvm.py @@ -1,98 +1,14 @@ +#!/usr/bin/env python3 # # Copyright (C) 2019 Intel Corporation. All rights reserved. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception # -#!/usr/bin/env python3 -import os +import pathlib +import subprocess import sys -from pathlib import Path -def clone_llvm(): - llvm_dir = Path("llvm") - if(llvm_dir.exists() == False): - print("Clone llvm to core/deps/ ..") - for line in os.popen("git clone --branch release/11.x https://github.com/llvm/llvm-project.git llvm"): - print(line) - else: - print("llvm source codes already existed") - return llvm_dir - -""" def detect_VS_version(): - program_dirs = [os.environ['ProgramFiles(x86)'], os.environ['ProgramFiles']] - for dir in program_dirs: - vswhere = Path("{}\\Microsoft Visual Studio\\Installer\\vswhere.exe".format(dir)) - if (vswhere.exists()): - print('"{}" -version 14.0,16.0'.format(vswhere)) - for line in os.popen('"{}" -version 14.0,16.0'.format(vswhere)): - keyvalue = line.split(':', maxsplit=1) - if(keyvalue[0] == "installationPath"): - value = keyvalue[1].strip() - for line in os.popen('"{}\\VC\\Auxiliary\\Build\\vcvars32.bat"'.format(value)): - print(line) - break """ - - -def main(): - current_os = sys.platform - print("current OS is ", current_os) - - current_dir = Path.cwd() - deps_dir = current_dir.joinpath( "../core/deps") - - os.chdir(deps_dir) - llvm_dir = clone_llvm() - os.chdir(llvm_dir) - - if(current_os == "linux"): - build_dir_name = "build" - llvm_file = "bin/llvm-lto" - # generator = '"Unix Makefiles"' - elif(current_os == "win32"): - build_dir_name = "win32build" - llvm_file = "LLVM.sln" - # generator = '"Visual Studio 15 2017"' - else: - build_dir_name = "build" - # generator = '""' - - Path(build_dir_name).mkdir(exist_ok = True) - build_dir = Path(build_dir_name) - os.chdir(build_dir) - - if ( not Path(llvm_file).exists()): - core_number = os.cpu_count() - print("Build llvm with", core_number, " cores") - cmd = 'cmake ../llvm \ - -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \ - -DCMAKE_BUILD_TYPE:STRING="Release" \ - -DLLVM_TARGETS_TO_BUILD:STRING="X86;ARM;AArch64;Mips;RISCV" \ - -DLLVM_INCLUDE_GO_TESTS=OFF \ - -DLLVM_INCLUDE_TOOLS=OFF \ - -DLLVM_INCLUDE_UTILS=OFF \ - -DLLVM_ENABLE_TERMINFO=OFF \ - -DLLVM_BUILD_LLVM_DYLIB:BOOL=OFF \ - -DLLVM_OPTIMIZED_TABLEGEN:BOOL=ON \ - -DLLVM_ENABLE_ZLIB:BOOL=OFF \ - -DLLVM_INCLUDE_DOCS:BOOL=OFF \ - -DLLVM_INCLUDE_EXAMPLES:BOOL=OFF \ - -DLLVM_INCLUDE_TESTS:BOOL=OFF \ - -DLLVM_INCLUDE_BENCHMARKS:BOOL=OFF \ - -DLLVM_APPEND_VC_REV:BOOL=OFF' - print(cmd) - for line in os.popen(cmd): - print(line) - else: - print("llvm has already been Cmaked") - - if(current_os == "linux"): - for line in os.popen("make -j {}".format(core_number)): - print(line) - elif(current_os == "win32"): - print("Please open LLVM.sln in {} to build *Release* version".format(build_dir.absolute())) - - os.chdir(current_dir) - - -if __name__ == "__main__": - main() +script = ( + pathlib.Path(__file__).parent.joinpath("../build-scripts/build_llvm.py").resolve() +) +subprocess.check_call([sys.executable, script]) diff --git a/wamr-compiler/build_llvm.sh b/wamr-compiler/build_llvm.sh index 045b599ad..089e48b2b 100755 --- a/wamr-compiler/build_llvm.sh +++ b/wamr-compiler/build_llvm.sh @@ -3,44 +3,4 @@ # Copyright (C) 2020 Intel Corporation. All rights reserved. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -DEPS_DIR=${PWD}/../core/deps - -cd ${DEPS_DIR} -if [ ! -d "llvm" ]; then - echo "Clone llvm to core/deps/ .." - git clone --depth 1 --branch release/11.x https://github.com/llvm/llvm-project.git llvm -fi - -cd llvm -mkdir -p build -cd build - -if [ ! -f bin/llvm-lto ]; then - - CORE_NUM=$(nproc --all) - if [ -z "${CORE_NUM}" ]; then - CORE_NUM=1 - fi - - echo "Build llvm with" ${CORE_NUM} "cores" - - cmake ../llvm \ - -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \ - -DCMAKE_BUILD_TYPE:STRING="Release" \ - -DLLVM_TARGETS_TO_BUILD:STRING="X86;ARM;AArch64;Mips;RISCV" \ - -DLLVM_BUILD_LLVM_DYLIB:BOOL=OFF \ - -DLLVM_OPTIMIZED_TABLEGEN:BOOL=ON \ - -DLLVM_ENABLE_ZLIB:BOOL=OFF \ - -DLLVM_INCLUDE_DOCS:BOOL=OFF \ - -DLLVM_INCLUDE_EXAMPLES:BOOL=OFF \ - -DLLVM_INCLUDE_TESTS:BOOL=OFF \ - -DLLVM_INCLUDE_BENCHMARKS:BOOL=OFF \ - -DLLVM_APPEND_VC_REV:BOOL=OFF - make -j ${CORE_NUM} - -else - echo "llvm has already been built" -fi - -cd ${PWD} - +/usr/bin/env python3 ../build-scripts/build_llvm.py diff --git a/wamr-compiler/build_llvm_arc.sh b/wamr-compiler/build_llvm_arc.sh index ddd933cd5..c48c30959 100755 --- a/wamr-compiler/build_llvm_arc.sh +++ b/wamr-compiler/build_llvm_arc.sh @@ -3,45 +3,4 @@ # Copyright (C) 2020 Intel Corporation. All rights reserved. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -DEPS_DIR=${PWD}/../core/deps - -cd ${DEPS_DIR} -if [ ! -d "llvm" ]; then - echo "Clone llvm to core/deps/ .." - git clone https://github.com/llvm/llvm-project.git llvm -fi - -cd llvm -mkdir -p build -cd build - -if [ ! -f bin/llvm-lto ]; then - - CORE_NUM=$(nproc --all) - if [ -z "${CORE_NUM}" ]; then - CORE_NUM=1 - fi - - echo "Build llvm with" ${CORE_NUM} "cores" - - cmake ../llvm \ - -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \ - -DCMAKE_BUILD_TYPE:STRING="Release" \ - -DLLVM_TARGETS_TO_BUILD:STRING="X86" \ - -DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD:STRING="ARC" \ - -DLLVM_BUILD_LLVM_DYLIB:BOOL=OFF \ - -DLLVM_OPTIMIZED_TABLEGEN:BOOL=ON \ - -DLLVM_ENABLE_ZLIB:BOOL=OFF \ - -DLLVM_INCLUDE_DOCS:BOOL=OFF \ - -DLLVM_INCLUDE_EXAMPLES:BOOL=OFF \ - -DLLVM_INCLUDE_TESTS:BOOL=OFF \ - -DLLVM_INCLUDE_BENCHMARKS:BOOL=OFF \ - -DLLVM_APPEND_VC_REV:BOOL=OFF - make -j ${CORE_NUM} - -else - echo "llvm has already been built" -fi - -cd ${PWD} - +/usr/bin/env python3 ../build-scripts/build_llvm.py --platform arc diff --git a/wamr-compiler/build_llvm_xtensa.sh b/wamr-compiler/build_llvm_xtensa.sh index 97e3404de..8277f9ca0 100755 --- a/wamr-compiler/build_llvm_xtensa.sh +++ b/wamr-compiler/build_llvm_xtensa.sh @@ -1,47 +1,6 @@ #!/bin/sh -# Copyright (C) 2019 Intel Corporation. All rights reserved. +# Copyright (C) 2020 Intel Corporation. All rights reserved. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -DEPS_DIR=${PWD}/../core/deps - -cd ${DEPS_DIR} -if [ ! -d "llvm" ]; then - echo "Clone llvm Xtensa to core/deps/ .." - git clone --depth 1 --branch xtensa_release_10.0.1 https://github.com/espressif/llvm-project.git llvm -fi - -cd llvm -mkdir -p build -cd build - -if [ ! -f bin/llvm-lto ]; then - - CORE_NUM=$(nproc --all) - if [ -z "${CORE_NUM}" ]; then - CORE_NUM=1 - fi - - echo "Build llvm with" ${CORE_NUM} "cores" - - cmake ../llvm \ - -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \ - -DCMAKE_BUILD_TYPE:STRING="Release" \ - -DLLVM_TARGETS_TO_BUILD:STRING="X86;ARM;AArch64;Mips;RISCV" \ - -DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD:STRING="Xtensa" \ - -DLLVM_BUILD_LLVM_DYLIB:BOOL=OFF \ - -DLLVM_OPTIMIZED_TABLEGEN:BOOL=ON \ - -DLLVM_ENABLE_ZLIB:BOOL=OFF \ - -DLLVM_INCLUDE_DOCS:BOOL=OFF \ - -DLLVM_INCLUDE_EXAMPLES:BOOL=OFF \ - -DLLVM_INCLUDE_TESTS:BOOL=OFF \ - -DLLVM_INCLUDE_BENCHMARKS:BOOL=OFF \ - -DLLVM_APPEND_VC_REV:BOOL=OFF - make -j ${CORE_NUM} - -else - echo "llvm has already been built" -fi - -cd ${PWD} - +/usr/bin/env python3 ../build-scripts/build_llvm.py --platform xtensa