diff --git a/.github/scripts/install_qemu_xtensa.sh b/.github/scripts/install_qemu_xtensa.sh
new file mode 100755
index 000000000..4a0e0fe5c
--- /dev/null
+++ b/.github/scripts/install_qemu_xtensa.sh
@@ -0,0 +1,10 @@
+#! /bin/sh
+
+set -e
+
+URL=https://github.com/espressif/qemu/releases/download/esp-develop-9.0.0-20240606/qemu-xtensa-softmmu-esp_develop_9.0.0_20240606-x86_64-linux-gnu.tar.xz
+
+DIR=$(mktemp -d)
+cd ${DIR}
+curl -fLsS "${URL}" | xzcat | tar -x
+ln -s ${DIR}/qemu/bin/qemu-system-xtensa /usr/local/bin/qemu-system-xtensa
diff --git a/.github/workflows/build_llvm_libraries.yml b/.github/workflows/build_llvm_libraries.yml
index 80abf57ec..5cde57209 100644
--- a/.github/workflows/build_llvm_libraries.yml
+++ b/.github/workflows/build_llvm_libraries.yml
@@ -14,6 +14,14 @@ on:
       container_image:
         required: false
         type: string
+      extra_build_llvm_options:
+        required: false
+        type: string
+        default: ""
+      cache_key_suffix:
+        required: false
+        type: string
+        default: ""
     outputs:
       cache_key:
         description: "A cached key of LLVM libraries"
@@ -45,7 +53,7 @@ jobs:
 
       - name: retrieve the last commit ID
         id: get_last_commit
-        run: echo "last_commit=$(GH_TOKEN=${{ secrets.GITHUB_TOKEN }} /usr/bin/env python3 ./build_llvm.py --llvm-ver)" >> $GITHUB_OUTPUT
+        run: echo "last_commit=$(GH_TOKEN=${{ secrets.GITHUB_TOKEN }} /usr/bin/env python3 ./build_llvm.py ${{ inputs.extra_build_llvm_options }} --llvm-ver)" >> $GITHUB_OUTPUT
         working-directory: build-scripts
 
       # Bump the prefix number to evict all previous caches and
@@ -54,7 +62,7 @@ jobs:
       # suspect.
       - name: form the cache key of libraries
         id: create_lib_cache_key
-        run: echo "key=0-llvm-libraries-${{ inputs.os }}-${{ inputs.arch }}-${{ steps.get_last_commit.outputs.last_commit }}" >> $GITHUB_OUTPUT
+        run: echo "key=0-llvm-libraries-${{ inputs.os }}-${{ inputs.arch }}-${{ steps.get_last_commit.outputs.last_commit }}${{ inputs.cache_key_suffix }}" >> $GITHUB_OUTPUT
 
       - name: Cache LLVM libraries
         id: retrieve_llvm_libs
@@ -101,5 +109,5 @@ jobs:
 
       - name: Build LLVM libraries
         if: steps.retrieve_llvm_libs.outputs.cache-hit != 'true'
-        run: /usr/bin/env python3 ./build_llvm.py --arch ${{ inputs.arch }}
+        run: /usr/bin/env python3 ./build_llvm.py ${{ inputs.extra_build_llvm_options }} --arch ${{ inputs.arch }}
         working-directory: build-scripts
diff --git a/.github/workflows/build_wamr_vscode_ext.yml b/.github/workflows/build_wamr_vscode_ext.yml
index 322ba1c06..756215e60 100644
--- a/.github/workflows/build_wamr_vscode_ext.yml
+++ b/.github/workflows/build_wamr_vscode_ext.yml
@@ -20,10 +20,10 @@ jobs:
     steps:
       - uses: actions/checkout@v4
 
-      - name: Use Node.js 16.x
+      - name: Use Node.js 18.x
         uses: actions/setup-node@v4
         with:
-          node-version: 16.x
+          node-version: 18.x
 
       - name: set vscode extension to correct version
         run: |
@@ -33,7 +33,7 @@ jobs:
 
       - name: generate wamr ide vscode extension
         run: |
-          npm install -g vsce
+          npm install -g @vscode/vsce
           rm -rf node_modules
           npm install
           vsce package
diff --git a/.github/workflows/compilation_on_android_ubuntu.yml b/.github/workflows/compilation_on_android_ubuntu.yml
index d4d329081..249a6f276 100644
--- a/.github/workflows/compilation_on_android_ubuntu.yml
+++ b/.github/workflows/compilation_on_android_ubuntu.yml
@@ -68,6 +68,7 @@ env:
   WAMR_COMPILER_TEST_OPTIONS: "-s wamr_compiler -S -b -P"
   GC_TEST_OPTIONS: "-s spec -G -b -P"
   MEMORY64_TEST_OPTIONS: "-s spec -W -b -P"
+  MULTI_MEMORY_TEST_OPTIONS: "-s spec -E -b -P"
 
 jobs:
   build_llvm_libraries_on_ubuntu_2204:
@@ -148,6 +149,7 @@ jobs:
             "-DWAMR_BUILD_TAIL_CALL=1",
             "-DWAMR_DISABLE_HW_BOUND_CHECK=1",
             "-DWAMR_BUILD_MEMORY64=1",
+            "-DWAMR_BUILD_MULTI_MEMORY=1",
           ]
         os: [ubuntu-22.04]
         platform: [android, linux]
@@ -206,11 +208,9 @@ jobs:
             make_options_feature: "-DWAMR_BUILD_MINI_LOADER=1"
           - make_options_run_mode: $MULTI_TIER_JIT_BUILD_OPTIONS
             make_options_feature: "-DWAMR_BUILD_MINI_LOADER=1"
-          # Memory64 only on CLASSIC INTERP mode, and only on 64-bit platform
+          # Memory64 only on CLASSIC INTERP and AOT mode, and only on 64-bit platform
           - make_options_feature: "-DWAMR_BUILD_MEMORY64=1"
             platform: android
-          - make_options_run_mode: $AOT_BUILD_OPTIONS
-            make_options_feature: "-DWAMR_BUILD_MEMORY64=1"
           - make_options_run_mode: $FAST_INTERP_BUILD_OPTIONS
             make_options_feature: "-DWAMR_BUILD_MEMORY64=1"
           - make_options_run_mode: $FAST_JIT_BUILD_OPTIONS
@@ -221,6 +221,21 @@ jobs:
             make_options_feature: "-DWAMR_BUILD_MEMORY64=1"
           - make_options_run_mode: $MULTI_TIER_JIT_BUILD_OPTIONS
             make_options_feature: "-DWAMR_BUILD_MEMORY64=1"
+          # Multi memory only on CLASSIC INTERP mode, and only on 64-bit platform
+          - make_options_feature: "-DWAMR_BUILD_MEMORY64=1"
+            platform: android
+          - make_options_run_mode: $AOT_BUILD_OPTIONS
+            make_options_feature: "-DWAMR_BUILD_MULTI_MEMORY=1"
+          - make_options_run_mode: $FAST_INTERP_BUILD_OPTIONS
+            make_options_feature: "-DWAMR_BUILD_MULTI_MEMORY=1"
+          - make_options_run_mode: $FAST_JIT_BUILD_OPTIONS
+            make_options_feature: "-DWAMR_BUILD_MULTI_MEMORY=1"
+          - make_options_run_mode: $LLVM_LAZY_JIT_BUILD_OPTIONS
+            make_options_feature: "-DWAMR_BUILD_MULTI_MEMORY=1"
+          - make_options_run_mode: $LLVM_EAGER_JIT_BUILD_OPTIONS
+            make_options_feature: "-DWAMR_BUILD_MULTI_MEMORY=1"
+          - make_options_run_mode: $MULTI_TIER_JIT_BUILD_OPTIONS
+            make_options_feature: "-DWAMR_BUILD_MULTI_MEMORY=1"
           # Fast-JIT and Multi-Tier-JIT mode don't support android
           - make_options_run_mode: $FAST_JIT_BUILD_OPTIONS
             platform: android
@@ -593,6 +608,7 @@ jobs:
             $WASI_TEST_OPTIONS,
             $GC_TEST_OPTIONS,
             $MEMORY64_TEST_OPTIONS,
+            $MULTI_MEMORY_TEST_OPTIONS,
           ]
         wasi_sdk_release:
           [
@@ -640,18 +656,30 @@ jobs:
             test_option: $MEMORY64_TEST_OPTIONS
           - running_mode: "multi-tier-jit"
             test_option: $MEMORY64_TEST_OPTIONS
+          # aot, fast-interp, fast-jit, llvm-jit, multi-tier-jit don't support Multi Memory 
+          - running_mode: "aot"
+            test_option: $MULTI_MEMORY_TEST_OPTIONS
+          - running_mode: "fast-interp"
+            test_option: $MULTI_MEMORY_TEST_OPTIONS
+          - running_mode: "fast-jit"
+            test_option: $MULTI_MEMORY_TEST_OPTIONS
+          - running_mode: "jit"
+            test_option: $MULTI_MEMORY_TEST_OPTIONS
+          - running_mode: "multi-tier-jit"
+            test_option: $MULTI_MEMORY_TEST_OPTIONS
+
     steps:
       - name: checkout
         uses: actions/checkout@v4
 
       - name: Set-up OCaml
         uses: ocaml/setup-ocaml@v3
-        if: matrix.test_option == '$GC_TEST_OPTIONS' || matrix.test_option == '$MEMORY64_TEST_OPTIONS'
+        if: matrix.test_option == '$GC_TEST_OPTIONS'
         with:
           ocaml-compiler: 4.13
 
       - name: Set-up Ocamlbuild
-        if: matrix.test_option == '$GC_TEST_OPTIONS' || matrix.test_option == '$MEMORY64_TEST_OPTIONS'
+        if: matrix.test_option == '$GC_TEST_OPTIONS'
         run: opam install ocamlbuild dune menhir
 
       - name: download and install wasi-sdk
@@ -717,13 +745,13 @@ jobs:
 
       - name: run tests
         timeout-minutes: 30
-        if: matrix.test_option != '$GC_TEST_OPTIONS' && matrix.test_option != '$MEMORY64_TEST_OPTIONS'
+        if: matrix.test_option != '$GC_TEST_OPTIONS'
         run: ./test_wamr.sh ${{ matrix.test_option }} -t ${{ matrix.running_mode }}
         working-directory: ./tests/wamr-test-suites
 
-      - name: run gc or memory64 tests
+      - name: run gc tests
         timeout-minutes: 20
-        if: matrix.test_option == '$GC_TEST_OPTIONS' || matrix.test_option == '$MEMORY64_TEST_OPTIONS'
+        if: matrix.test_option == '$GC_TEST_OPTIONS'
         run: |
           eval $(opam env)
           ./test_wamr.sh ${{ matrix.test_option }} -t ${{ matrix.running_mode }}
diff --git a/.github/workflows/compilation_on_nuttx.yml b/.github/workflows/compilation_on_nuttx.yml
index 09bdfd324..2f8014fac 100644
--- a/.github/workflows/compilation_on_nuttx.yml
+++ b/.github/workflows/compilation_on_nuttx.yml
@@ -68,32 +68,31 @@ jobs:
           # arm64
           "boards/arm64/qemu/qemu-armv8a/configs/nsh",
         ]
-        wamr_config_option: [
-          "CONFIG_INTERPRETERS_WAMR=y\\nCONFIG_INTERPRETERS_WAMR_AOT=y\\nCONFIG_INTERPRETERS_WAMR_FAST=y\\n",
-          "CONFIG_INTERPRETERS_WAMR=y\\nCONFIG_INTERPRETERS_WAMR_AOT=y\\nCONFIG_INTERPRETERS_WAMR_FAST=y\\nCONFIG_INTERPRETERS_WAMR_LIBC_WASI=y\\n",
-          "CONFIG_INTERPRETERS_WAMR=y\\nCONFIG_INTERPRETERS_WAMR_AOT=y\\nCONFIG_INTERPRETERS_WAMR_FAST=y\\nCONFIG_INTERPRETERS_WAMR_LIBC_BUILTIN=y\\n",
-          "CONFIG_INTERPRETERS_WAMR=y\\nCONFIG_INTERPRETERS_WAMR_AOT=y\\nCONFIG_INTERPRETERS_WAMR_CLASSIC=y\\n",
-          "CONFIG_INTERPRETERS_WAMR=y\\nCONFIG_INTERPRETERS_WAMR_AOT=y\\nCONFIG_INTERPRETERS_WAMR_CLASSIC=y\\nCONFIG_INTERPRETERS_WAMR_LIBC_WASI=y\\n",
-          "CONFIG_INTERPRETERS_WAMR=y\\nCONFIG_INTERPRETERS_WAMR_AOT=y\\nCONFIG_INTERPRETERS_WAMR_CLASSIC=y\\nCONFIG_INTERPRETERS_WAMR_LIBC_BUILTIN=y\\n",
-          "CONFIG_INTERPRETERS_WAMR=y\\nCONFIG_INTERPRETERS_WAMR_AOT=y\\nCONFIG_INTERPRETERS_WAMR_LIBC_BUILTIN=y\\n",
-          "CONFIG_INTERPRETERS_WAMR=y\\nCONFIG_INTERPRETERS_WAMR_AOT=y\\n",
-          "CONFIG_INTERPRETERS_WAMR=y\\nCONFIG_INTERPRETERS_WAMR_FAST=y\\n",
-          "CONFIG_INTERPRETERS_WAMR=y\\nCONFIG_INTERPRETERS_WAMR_CLASSIC=y\\n",
-        ]
+
+        wamr_config_option:
+          - "CONFIG_INTERPRETERS_WAMR_AOT"
+          - "CONFIG_INTERPRETERS_WAMR_FAST"
+          - "CONFIG_INTERPRETERS_WAMR_CLASSIC"
+          - "CONFIG_INTERPRETERS_WAMR_AOT CONFIG_INTERPRETERS_WAMR_FAST"
+          - "CONFIG_INTERPRETERS_WAMR_AOT CONFIG_INTERPRETERS_WAMR_FAST CONFIG_INTERPRETERS_WAMR_LIBC_WASI"
+          - "CONFIG_INTERPRETERS_WAMR_AOT CONFIG_INTERPRETERS_WAMR_FAST CONFIG_INTERPRETERS_WAMR_LIBC_BUILTIN"
+          - "CONFIG_INTERPRETERS_WAMR_AOT CONFIG_INTERPRETERS_WAMR_CLASSIC"
+          - "CONFIG_INTERPRETERS_WAMR_AOT CONFIG_INTERPRETERS_WAMR_CLASSIC CONFIG_INTERPRETERS_WAMR_LIBC_WASI"
+          - "CONFIG_INTERPRETERS_WAMR_AOT CONFIG_INTERPRETERS_WAMR_CLASSIC CONFIG_INTERPRETERS_WAMR_LIBC_WASI"
 
     steps:
       - name: Checkout NuttX
         uses: actions/checkout@v4
         with:
           repository: apache/nuttx
-          ref: releases/12.4
+          ref: releases/12.6
           path: nuttx
 
       - name: Checkout NuttX Apps
         uses: actions/checkout@v4
         with:
           repository: apache/nuttx-apps
-          ref: releases/12.4
+          ref: releases/12.6
           path: apps
 
       - name: Checkout WAMR
@@ -102,16 +101,22 @@ jobs:
           repository: ${{ github.repository }}
           path: apps/interpreters/wamr/wamr
 
-      - name: Enable WAMR for NuttX
+      - name: Configure WAMR
+        working-directory: nuttx
         run: |
-          find nuttx/boards -name defconfig | xargs sed -i '$a\CONFIG_EOL_IS_LF=y\n${{ matrix.wamr_config_option }}'
-          find nuttx/boards/sim -name defconfig | xargs sed -i '$a\CONFIG_LIBM=y\n'
+          tools/configure.sh ${{ matrix.nuttx_board_config }}
+          kconfig-tweak --enable CONFIG_PSEUDOFS_SOFTLINKS
+          kconfig-tweak --enable CONFIG_INTERPRETERS_WAMR
+          kconfig-tweak --enable CONFIG_INTERPRETERS_IWASM_TASK
+          kconfig-tweak --set-val CONFIG_INTERPRETERS_WAMR_PRIORITY 100
+          kconfig-tweak --set-val CONFIG_INTERPRETERS_WAMR_STACKSIZE 8192
+          for x in ${{ matrix.wamr_config_option }}; do
+            kconfig-tweak --enable $x
+          done
 
       - name: Build
-        run: |
-          cd nuttx
-          tools/configure.sh ${{ matrix.nuttx_board_config }}
-          make -j$(nproc) EXTRAFLAGS=-Werror
+        working-directory: nuttx
+        run: make -j$(nproc) EXTRAFLAGS=-Werror
 
       - name: Checkout Bloaty
         uses: actions/checkout@v3
diff --git a/.github/workflows/nightly_run.yml b/.github/workflows/nightly_run.yml
index 1124df629..f39085d09 100644
--- a/.github/workflows/nightly_run.yml
+++ b/.github/workflows/nightly_run.yml
@@ -132,6 +132,7 @@ jobs:
             "-DWAMR_BUILD_TAIL_CALL=1",
             "-DWAMR_DISABLE_HW_BOUND_CHECK=1",
             "-DWAMR_BUILD_MEMORY64=1",
+            "-DWAMR_BUILD_MULTI_MEMORY=1",
           ]
         os: [ubuntu-20.04]
         platform: [android, linux]
@@ -190,11 +191,9 @@ jobs:
             make_options_feature: "-DWAMR_BUILD_MINI_LOADER=1"
           - make_options_run_mode: $MULTI_TIER_JIT_BUILD_OPTIONS
             make_options_feature: "-DWAMR_BUILD_MINI_LOADER=1"
-          # Memory64 only on CLASSIC INTERP mode, and only on 64-bit platform
+          # Memory64 only on CLASSIC INTERP and AOT mode, and only on 64-bit platform
           - make_options_feature: "-DWAMR_BUILD_MEMORY64=1"
             platform: android
-          - make_options_run_mode: $AOT_BUILD_OPTIONS
-            make_options_feature: "-DWAMR_BUILD_MEMORY64=1"
           - make_options_run_mode: $FAST_INTERP_BUILD_OPTIONS
             make_options_feature: "-DWAMR_BUILD_MEMORY64=1"
           - make_options_run_mode: $FAST_JIT_BUILD_OPTIONS
@@ -205,6 +204,21 @@ jobs:
             make_options_feature: "-DWAMR_BUILD_MEMORY64=1"
           - make_options_run_mode: $MULTI_TIER_JIT_BUILD_OPTIONS
             make_options_feature: "-DWAMR_BUILD_MEMORY64=1"
+          # Multi memory only on CLASSIC INTERP mode, and only on 64-bit platform
+          - make_options_feature: "-DWAMR_BUILD_MEMORY64=1"
+            platform: android
+          - make_options_run_mode: $AOT_BUILD_OPTIONS
+            make_options_feature: "-DWAMR_BUILD_MULTI_MEMORY=1"
+          - make_options_run_mode: $FAST_INTERP_BUILD_OPTIONS
+            make_options_feature: "-DWAMR_BUILD_MULTI_MEMORY=1"
+          - make_options_run_mode: $FAST_JIT_BUILD_OPTIONS
+            make_options_feature: "-DWAMR_BUILD_MULTI_MEMORY=1"
+          - make_options_run_mode: $LLVM_LAZY_JIT_BUILD_OPTIONS
+            make_options_feature: "-DWAMR_BUILD_MULTI_MEMORY=1"
+          - make_options_run_mode: $LLVM_EAGER_JIT_BUILD_OPTIONS
+            make_options_feature: "-DWAMR_BUILD_MULTI_MEMORY=1"
+          - make_options_run_mode: $MULTI_TIER_JIT_BUILD_OPTIONS
+            make_options_feature: "-DWAMR_BUILD_MULTI_MEMORY=1"
           # Fast-JIT and Multi-Tier-JIT mode don't support android
           - make_options_run_mode: $FAST_JIT_BUILD_OPTIONS
             platform: android
@@ -289,6 +303,7 @@ jobs:
             "-DWAMR_BUILD_TAIL_CALL=1",
             "-DWAMR_DISABLE_HW_BOUND_CHECK=1",
             "-DWAMR_BUILD_MEMORY64=1",
+            "-DWAMR_BUILD_MULTI_MEMORY=1",
           ]
         exclude:
           # incompatible feature and platform
@@ -322,17 +337,19 @@ jobs:
             make_options_feature: "-DWAMR_BUILD_MEMORY64=1"
           - make_options_run_mode: $FAST_JIT_BUILD_OPTIONS
             make_options_feature: "-DWAMR_BUILD_MEMORY64=1"
+          # Memory64 only on CLASSIC INTERP mode
+          - make_options_run_mode: $FAST_INTERP_BUILD_OPTIONS
+            make_options_feature: "-DWAMR_BUILD_MULTI_MEMORY=1"
+          - make_options_run_mode: $FAST_JIT_BUILD_OPTIONS
+            make_options_feature: "-DWAMR_BUILD_MULTI_MEMORY=1"
     steps:
-      - name: checkout
-        uses: actions/checkout@v3
-
       - name: Install dependencies
-        uses: nick-fields/retry@v2
-        with:
-          timeout_minutes: 10
-          max_attempts: 3
-          command: apt update && apt install -y make g++-4.8 gcc-4.8 wget git
-          on_retry_command: sudo rm -r /var/lib/apt/lists/*
+        run: |
+          apt update && apt install -y make g++-4.8 gcc-4.8 wget git
+
+      - name: checkout
+        run: |
+          git clone https://github.com/${{ github.repository }} wamr
 
       - name: Install cmake
         run: |
@@ -345,7 +362,7 @@ jobs:
           mkdir build && cd build
           cmake .. ${{ matrix.make_options_run_mode }} ${{ matrix.make_options_feature }} -DCMAKE_C_COMPILER=gcc-4.8 -DCMAKE_CXX_COMPILER=g++-4.8
           cmake --build . --config Release --parallel 4
-        working-directory: product-mini/platforms/linux
+        working-directory: wamr/product-mini/platforms/linux
 
   build_samples_wasm_c_api:
     needs:
diff --git a/.github/workflows/spec_test_on_nuttx.yml b/.github/workflows/spec_test_on_nuttx.yml
index 4976cab86..f2e59ba68 100644
--- a/.github/workflows/spec_test_on_nuttx.yml
+++ b/.github/workflows/spec_test_on_nuttx.yml
@@ -28,8 +28,6 @@ on:
 env:
   LLVM_CACHE_SUFFIX: "build-llvm_libraries_ex"
   WASI_SDK_PATH: "/opt/wasi-sdk"
-  WAMR_COMMON_OPTION:
-    "CONFIG_INTERPRETERS_WAMR=y\\nCONFIG_INTERPRETERS_WAMR_STACKSIZE=327680\\nCONFIG_INTERPRETERS_WAMR_LOG=y\\nCONFIG_INTERPRETERS_WAMR_LIBC_BUILTIN=y\\nCONFIG_INTERPRETERS_WAMR_REF_TYPES=y\\nCONFIG_INTERPRETERS_WAMR_ENABLE_SPEC_TEST=y\\nCONFIG_INTERPRETERS_WAMR_SHARED_MEMORY=y\\nCONFIG_INTERPRETERS_WAMR_BULK_MEMORY=y\\nCONFIG_EOL_IS_LF=y\\nCONFIG_ARM_SEMIHOSTING_HOSTFS=y\\nCONFIG_ARM_SEMIHOSTING_HOSTFS_CACHE_COHERENCE=y\\nCONFIG_RISCV_SEMIHOSTING_HOSTFS=y\\nCONFIG_FS_HOSTFS=y\\nCONFIG_LIBC_FLOATINGPOINT=y\\nCONFIG_INTERPRETERS_WAMR_STACK_GUARD_SIZE=1024\\n"
 
 jobs:
   build_llvm_libraries:
@@ -39,9 +37,18 @@ jobs:
       arch: "ARM RISCV AArch64"
       container_image: ghcr.io/no1wudi/nuttx/apache-nuttx-ci-linux@sha256:8c4e00b607d4d6d66ba8f51c4544819a616eac69d3a2ac669e2af2150e2eb0f9
 
+  build_llvm_libraries_xtensa:
+    uses: ./.github/workflows/build_llvm_libraries.yml
+    with:
+      os: "ubuntu-22.04"
+      arch: "Xtensa"
+      extra_build_llvm_options: "--platform xtensa"
+      cache_key_suffix: "-xtensa"
+      container_image: ghcr.io/no1wudi/nuttx/apache-nuttx-ci-linux@sha256:8c4e00b607d4d6d66ba8f51c4544819a616eac69d3a2ac669e2af2150e2eb0f9
+
   spec_test_on_qemu:
     runs-on: ubuntu-latest
-    needs: [build_llvm_libraries]
+    needs: [build_llvm_libraries, build_llvm_libraries_xtensa]
     container:
       image: ghcr.io/no1wudi/nuttx/apache-nuttx-ci-linux@sha256:8c4e00b607d4d6d66ba8f51c4544819a616eac69d3a2ac669e2af2150e2eb0f9
     strategy:
@@ -67,6 +74,11 @@ jobs:
             target: "riscv32",
             fpu_type: "none"
           },
+          {
+            config: "boards/risc-v/qemu-rv/rv-virt/configs/nsh",
+            target: "riscv32_ilp32f",
+            fpu_type: "fp"
+          },
           # {
           #   config: "boards/risc-v/qemu-rv/rv-virt/configs/nsh",
           #   target: "riscv32_ilp32d",
@@ -77,50 +89,61 @@ jobs:
             target: "riscv64",
             fpu_type: "none"
           },
+          {
+            config: "boards/xtensa/esp32s3/esp32s3-devkit/configs/qemu_debug",
+            target: "xtensa",
+            fpu_type: "none"
+          },
         ]
 
         wamr_test_option: [
           {
             mode: "-t aot",
-            option: "CONFIG_INTERPRETERS_WAMR_AOT=y\\n"
+            option: "CONFIG_INTERPRETERS_WAMR_AOT"
+          },
+          {
+            mode: "-t aot -X",
+            option: "CONFIG_INTERPRETERS_WAMR_AOT"
           },
           # {
-          #   mode: "-t aot -X",
-          #   option: "CONFIG_INTERPRETERS_WAMR_AOT=y\\n"
-          # },
-          # {
           #   mode: "-t classic-interp",
-          #   option: "CONFIG_INTERPRETERS_WAMR_CLASSIC=y\\n"
+          #   option: "CONFIG_INTERPRETERS_WAMR_CLASSIC"
           # },
           # {
           #   mode: "-t fast-interp",
-          #   option: "CONFIG_INTERPRETERS_WAMR_FAST=y\\n"
+          #   option: "CONFIG_INTERPRETERS_WAMR_FAST"
           # },
         ]
 
         wamr_feature_option:
           # Empty option for default
           - { option: "", mode: "" }
-          - { option: "CONFIG_INTERPRETERS_WAMR_GC=y\\nCONFIG_INTERPRETERS_WAMR_AOT_STACK_FRAME=y\\n", mode: "-G" }
+          - { option: "CONFIG_INTERPRETERS_WAMR_GC CONFIG_INTERPRETERS_WAMR_AOT_STACK_FRAME", mode: "-G" }
 
         exclude:
           # XIP is not fully supported yet on RISCV64, some relocations can not be resolved
           - target_config: { config: "boards/risc-v/qemu-rv/rv-virt/configs/nsh64" }
             wamr_test_option: { mode: "-t aot -X" }
 
+          # Our xtensa environment doesn't have enough memory
+          - target_config: { target: "xtensa" }
+            wamr_feature_option: { mode: "-G" }
+
     steps:
+      # Note: we use an unreleased version nuttx for xtensa because
+      # 12.4 doesn't contain necessary esp32s3 changes.
       - name: Checkout NuttX
         uses: actions/checkout@v4
         with:
           repository: apache/nuttx
-          ref: releases/12.4
+          ref: ${{ matrix.target_config.target == 'xtensa' && '985d395b025cf2012b22f6bb4461959fa6d87645' || 'releases/12.6' }}
           path: nuttx
 
       - name: Checkout NuttX Apps
         uses: actions/checkout@v4
         with:
           repository: apache/nuttx-apps
-          ref: releases/12.4
+          ref: ${{ matrix.target_config.target == 'xtensa' && '2ef3eb25c0cec944b13792185f7e5d5a05990d5f' || 'releases/12.6' }}
           path: apps
 
       - name: Checkout WAMR
@@ -140,7 +163,7 @@ jobs:
             ./core/deps/llvm/build/lib
             ./core/deps/llvm/build/libexec
             ./core/deps/llvm/build/share
-          key: ${{ needs.build_llvm_libraries.outputs.cache_key }}
+          key: ${{ matrix.target_config.target == 'xtensa' && needs.build_llvm_libraries_xtensa.outputs.cache_key || needs.build_llvm_libraries.outputs.cache_key }}
 
       - name: Quit if cache miss
         if: contains(matrix.wamr_test_option.mode, 'aot') && steps.retrieve_llvm_libs.outputs.cache-hit != 'true'
@@ -152,7 +175,12 @@ jobs:
 
       # Inject the config option to NuttX
       # TODO: Merge this into NuttX once GC is generally available
+      #
+      # Note: the version of nuttx-apps we use for xtensa does have
+      # an equivalent. (the default of INTERPRETERS_WAMR_TAIL_CALL is
+      # different though.)
       - name: Modify Kconfig
+        if: matrix.target_config.target != 'xtensa'
         run: |
           echo "\n" >> apps/interpreters/wamr/Kconfig
           echo "config INTERPRETERS_WAMR_GC" >> apps/interpreters/wamr/Kconfig
@@ -167,29 +195,6 @@ jobs:
           echo "\tbool \"Enable Tail Call\"" >> apps/interpreters/wamr/Kconfig
           echo "\tdefault y" >> apps/interpreters/wamr/Kconfig
 
-      - name: Enable WAMR for NuttX
-        run: |
-          find nuttx/boards -name defconfig | xargs sed -i '$a\${{ env.WAMR_COMMON_OPTION }}'
-      
-      - name: Enable WAMR Interpreter for NuttX
-        run: |
-          find nuttx/boards -name defconfig | xargs sed -i '$a\${{ matrix.wamr_test_option.option }}'
-
-      - name: Enable WAMR Feature for NuttX
-        if: matrix.wamr_feature_option.option != ''
-        run: |
-          find nuttx/boards -name defconfig | xargs sed -i '$a\${{ matrix.wamr_feature_option.option }}'
-
-      - name: Disable FPU for NuttX
-        if: matrix.target_config.fpu_type == 'none'
-        run: |
-          find nuttx/boards -name defconfig | xargs sed -i '$a\# CONFIG_ARCH_FPU is not set\n'
-
-      - name: Disable DPFPU for NuttX
-        if: matrix.target_config.fpu_type == 'fp'
-        run: |
-          find nuttx/boards -name defconfig | xargs sed -i '$a\# CONFIG_ARCH_DPFPU is not set\n'
-
       - name: Build wamrc
         if: contains(matrix.wamr_test_option.mode, 'aot')
         working-directory: apps/interpreters/wamr/wamr/wamr-compiler
@@ -197,15 +202,159 @@ jobs:
           cmake -Bbuild .
           cmake --build build
 
-      - name: Build
-        id: build_firmware
+      # the nuttx version we use for xtensa requires esptool.py newer than
+      # what we have in our version of the apache-nuttx-ci-linux image.
+      - name: Install the latest esptool.py (xtensa)
+        if: matrix.target_config.target == 'xtensa'
+        run: |
+          pip3 install esptool==4.7.0
+          esptool.py version
+
+      - name: Configure NuttX
+        run: |
+          tools/configure.sh ${{ matrix.target_config.config }}
+        working-directory: nuttx
+
+      # depending on configurations, the iwasm command line generated
+      # by spec-test-script can be longer than the default NSH_LINELEN,
+      # which is 64 or 80.
+      - name: Enable WAMR for NuttX
+        run: |
+          kconfig-tweak --enable CONFIG_INTERPRETERS_WAMR
+          kconfig-tweak --enable CONFIG_INTERPRETERS_WAMR_LOG
+          kconfig-tweak --enable CONFIG_INTERPRETERS_WAMR_LIBC_BUILTIN
+          kconfig-tweak --enable CONFIG_INTERPRETERS_WAMR_REF_TYPES
+          kconfig-tweak --enable CONFIG_INTERPRETERS_WAMR_ENABLE_SPEC_TEST
+          kconfig-tweak --enable CONFIG_INTERPRETERS_WAMR_SHARED_MEMORY
+          kconfig-tweak --enable CONFIG_INTERPRETERS_WAMR_BULK_MEMORY
+          kconfig-tweak --set-val CONFIG_INTERPRETERS_WAMR_STACK_GUARD_SIZE 1024
+          kconfig-tweak --enable CONFIG_FS_HOSTFS
+          kconfig-tweak --enable CONFIG_ARM_SEMIHOSTING_HOSTFS
+          kconfig-tweak --enable CONFIG_ARM_SEMIHOSTING_HOSTFS_CACHE_COHERENCE
+          kconfig-tweak --enable CONFIG_RISCV_SEMIHOSTING_HOSTFS
+          kconfig-tweak --enable CONFIG_RISCV_SEMIHOSTING_HOSTFS_CACHE_COHERENCE
+          kconfig-tweak --enable CONFIG_XTENSA_SEMIHOSTING_HOSTFS
+          kconfig-tweak --enable CONFIG_XTENSA_SEMIHOSTING_HOSTFS_CACHE_COHERENCE
+          kconfig-tweak --enable CONFIG_LIBC_FLOATINGPOINT
+          kconfig-tweak --set-val CONFIG_NSH_LINELEN 255
+        working-directory: nuttx
+
+      - name: Set WAMR stack size for NuttX
+        if: matrix.target_config.target != 'xtensa'
+        run: |
+          kconfig-tweak --set-val CONFIG_INTERPRETERS_WAMR_STACKSIZE 327680
+        working-directory: nuttx
+
+      # because qemu doesn't have a proper emulation of esp32s3 psram,
+      # we are limited to the internal ram, which is about 400KB.
+      - name: Set WAMR stack size for NuttX (xtensa)
+        if: matrix.target_config.target == 'xtensa'
+        run: |
+          kconfig-tweak --set-val CONFIG_INTERPRETERS_WAMR_STACKSIZE 25600
+        working-directory: nuttx
+
+      - name: Enable WAMR interpreter/aot runtime for NuttX
+        if: matrix.wamr_test_option.option != ''
+        run: |
+          for x in ${{ matrix.wamr_test_option.option }}; do
+            kconfig-tweak --enable $x
+          done
+        working-directory: nuttx
+
+      - name: Enable WAMR Features for NuttX
+        if: matrix.wamr_feature_option.option != ''
+        run: |
+          for x in ${{ matrix.wamr_feature_option.option }}; do
+            kconfig-tweak --enable $x
+          done
+        working-directory: nuttx
+
+      - name: Disable FPU for NuttX
+        if: matrix.target_config.fpu_type == 'none'
+        run: |
+          kconfig-tweak --disable CONFIG_ARCH_FPU
+        working-directory: nuttx
+
+      - name: Disable DPFPU for NuttX
+        if: matrix.target_config.fpu_type == 'fp'
+        run: |
+          kconfig-tweak --disable CONFIG_ARCH_DPFPU
+        working-directory: nuttx
+
+      # Note: while a real hardware would need
+      # INTERPRETERS_WAMR_MEM_DUAL_BUS_MIRROR=y,
+      # it doesn't work with xtensa qemu which we use on the CI because it
+      # doesn't have a proper emulation of I/D separate mappings.
+      # we work it around by using INTERPRETERS_WAMR_MEM_DUAL_BUS_MIRROR=n.
+      # this configuration won't work on a real hardware.
+      - name: Tweak NuttX config (xtensa)
+        if: matrix.target_config.target == 'xtensa'
+        run: |
+          kconfig-tweak --enable CONFIG_INTERPRETERS_WAMR_AOT_WORD_ALIGN_READ
+          kconfig-tweak --disable CONFIG_INTERPRETERS_WAMR_MEM_DUAL_BUS_MIRROR
+        working-directory: nuttx
+
+      - name: Build NuttX
+        run: |
+          make olddefconfig
+          make -j$(nproc)
+        working-directory: nuttx
+
+      # for xtensa, build a 8MB firmware image.
+      # simple boot is assumed. (thus the nuttx.bin offset in the image is 0)
+      # qemu will infer the flash size from the file size.
+      - name: Post build processing (xtensa)
+        if: matrix.target_config.target == 'xtensa'
         run: |
           cd nuttx
-          tools/configure.sh ${{ matrix.target_config.config }}
-          make -j$(nproc)
-          echo "firmware=$PWD/nuttx" >> $GITHUB_OUTPUT
-      
+          dd if=/dev/zero of=flash.img bs=1024 count=8192
+          dd if=nuttx.bin of=flash.img conv=notrunc
+          mv flash.img nuttx
+
+      - name: Build firmware path
+        id: build_firmware_path
+        run: |
+          echo "firmware=$PWD/nuttx/nuttx" >> $GITHUB_OUTPUT
+
+      # for xtensa, use the espressif fork of qemu, which has esp32s3 support.
+      - name: Install QEMU (xtensa)
+        if: matrix.target_config.target == 'xtensa'
+        run: |
+          apt-get remove -y qemu-system-misc
+          apt-get update && apt-get install -y libsdl2-2.0-0
+          ./.github/scripts/install_qemu_xtensa.sh
+          qemu-system-xtensa --version
+        working-directory: apps/interpreters/wamr/wamr
+
       - name: Test
+        if: matrix.target_config.target != 'xtensa'
         run: |
           cd apps/interpreters/wamr/wamr/tests/wamr-test-suites
-          ./test_wamr.sh -s spec ${{ matrix.wamr_test_option.mode }} -m ${{ matrix.target_config.target }} -b -Q -P -F ${{ steps.build_firmware.outputs.firmware }} ${{ matrix.wamr_feature_option.mode}}
+          ./test_wamr.sh -s spec ${{ matrix.wamr_test_option.mode }} -m ${{ matrix.target_config.target }} -b -Q -P -F ${{ steps.build_firmware_path.outputs.firmware }} ${{ matrix.wamr_feature_option.mode}}
+
+      # for xtensa, for some reasons, when running the tests
+      # with test_wamr.sh -P, nuttx occasionally hangs after
+      # "total segments stored 6" on the CI.
+      # i (yamamoto) couldn't reproduce it locally (macOS) even
+      # with the identical flash image.
+      # for now, run the tests without -P.
+      - name: Test
+        if: matrix.target_config.target == 'xtensa'
+        run: |
+          cd apps/interpreters/wamr/wamr/tests/wamr-test-suites
+          ./test_wamr.sh -s spec ${{ matrix.wamr_test_option.mode }} -m ${{ matrix.target_config.target }} -b -Q -F ${{ steps.build_firmware_path.outputs.firmware }} ${{ matrix.wamr_feature_option.mode}}
+
+      - name: pack the log
+        if: always()
+        run: |
+          mkdir log
+          cp $PWD/nuttx/.config log/dot-config
+          cp ${{ steps.build_firmware_path.outputs.firmware }} log
+          tar -C apps/interpreters/wamr/wamr/tests/wamr-test-suites/workspace -cvzf log/report.tgz report
+
+      - name: upload the log
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: spec-test-log-${{ github.run_id }}-${{ strategy.job-index }}-${{ matrix.target_config.target }}
+          path: log
diff --git a/.gitignore b/.gitignore
index b85dd392c..355d391f1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,12 +5,14 @@
 .venv
 /.idea
 **/cmake-build-*/
-**/*build/
+**/*build*/
+!/build-scripts
 *.obj
 *.a
 *.so
 .clangd
 .DS_Store
+*.o
 
 core/deps/**
 core/shared/mem-alloc/tlsf
@@ -36,4 +38,4 @@ tests/benchmarks/coremark/coremark*
 samples/workload/include/**
 !samples/workload/include/.gitkeep
 
-# core/iwasm/libraries/wasi-threads
\ No newline at end of file
+# core/iwasm/libraries/wasi-threads
diff --git a/ATTRIBUTIONS.md b/ATTRIBUTIONS.md
index b6e556d2b..cba0e5c4b 100644
--- a/ATTRIBUTIONS.md
+++ b/ATTRIBUTIONS.md
@@ -16,7 +16,7 @@ WAMR project reused some components from other open source project:
 - **asmjit**: for the Fast JIT x86-64 codegen implementation
 - **zydis**: for the Fast JIT x86-64 codegen implementation
 - **NuttX ELF headers**: used in core/iwasm/aot/debug/elf_parser.c
-- **Dhrystone**: for the test benchmakr dhrystone
+- **Dhrystone**: for the test benchmark dhrystone
 
 The WAMR fast interpreter is a clean room development. We would acknowledge the inspirations by [WASM3](https://github.com/wasm3/wasm3) open source project for the approach of pre-calculated operand stack location.
 
diff --git a/CMakeLists.txt b/CMakeLists.txt
index bedf0af6c..c7f766689 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -121,7 +121,7 @@ set (WAMR_ROOT_DIR ${CMAKE_CURRENT_SOURCE_DIR})
 
 include (${WAMR_ROOT_DIR}/build-scripts/runtime_lib.cmake)
 
-set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wextra -Wformat -Wformat-security -Wshadow -Wno-unused-parameter")
+set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wextra -Wformat -Wformat-security -Wshadow -Wno-unused-parameter -fvisibility=hidden")
 # set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wconversion -Wsign-conversion")
 
 set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Wformat -Wformat-security -Wno-unused")
diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md
index 988315ef2..e296cdf9e 100644
--- a/RELEASE_NOTES.md
+++ b/RELEASE_NOTES.md
@@ -1,3 +1,77 @@
+## WAMR-2.1.2
+
+### Breaking Changes
+ - wasi-nn: Apply new architecture (#3692)
+
+### New Features
+ - [wasi-nn] Add a new wasi-nn backend openvino (#3603)
+ - Add APIs into wasm_c_api.h to summary wasm function execution duration (#3639)
+ - Add support for RISCV32 ILP32F (#3708)
+
+### Bug Fixes
+ - libc-builtin: Fix function prototype for wasm_runtime_module_realloc (#3702)
+ - Fix potential memory leak in insert_native_symbol (#3712)
+ - aot compiler: Fix NaN handling for opcode f32/f64.const in XIP mode (#3721)
+ - Fix table idx resolving in op call_indirect/return_call_indirect (#3726)
+
+### Enhancements
+ - Remove a few hardcoded spec test knowledge from the core library (#3648)
+ - Change log of import function to be consistent (#3656)
+ - libc-builtin: Fix a printf format (#3652)
+ - Set compile symbol visibility to hidden in cmake (#3655)
+ - wamrc: Add --mllvm= option (#3658)
+ - wamr-compiler: Avoid size-level tweak if target is specified (#3659)
+ - aot runtime: Add missing arm/thumb relocations (#3660)
+ - aot compiler: Enlarge AOTNativeSymbol->symbol (#3662)
+ - aot compiler: Bail out on too long native symbol names (#3663)
+ - Support more features for rt-thread (#3661)
+ - Zephyr User Mode Support (#3650)
+ - Set posix thread name for debug build (#3657)
+ - Add emscripten_sleep() wrapper to libc-emcc (#3669)
+ - Fix a compilation warning (#3682)
+ - wamrc: Add some help text for --size-level (#3689)
+ - Restore linux iwasm default visibility (#3691)
+ - posix_thread.c: Restore old signal alternate stack before thread exit (#3693)
+ - libc-wasi: Make rights of STDIN/STDOUT/STDERR fixed and overlook their access modes (#3694)
+ - [refactoring] Extract read leb to a separate file, share the code between loader and mini loader (#3701)
+ - debug-interp: Only add lock when signal_flag is SIG_SINGSTEP (#3704)
+ - Fix compilation warnings (#3707)
+ - Add missing headers in bh_atomic.h and aot_llvm_extra.cpp (#3715)
+ - Update std atomic check and simd compatibility check for arc compiler (#3716)
+ - aot compiler: Track non-0x00 tableindex as ref types use (#3695)
+ - compilation: Use the dedicated stack-sizes section only for AOT (#3732)
+ - riscv: Add missing relocation intrinsics for __fixdfsi/__ltdf2 (#3733)
+
+### Others
+ - Fix night run CI (#3640)
+ - spec-test-script/runtest.py: Don't assume the tmp dir path (#3632)
+ - wamr-test-suites: Remove dead code (wasi_test) (#3634)
+ - wamr-test-suites/test_wamr.sh: Add an option to specify wamrc binary (#3635)
+ - CI: Build llvm for xtensa (#3637)
+ - spec-test-script/runtest.py: Avoid specifying -v=0 unnecessarily (#3642)
+ - spec-test-script: Add xtensa case (#3643)
+ - spec-test-script/runtest.py: Move "--size-level=1" to common place for RISCV64 (#3644)
+ - spec-test-script/runtest.py: Use a shorter timeout when expected to fail (#3647)
+ - spec-test-script: Make case_last_words larger (#3651)
+ - spec-test-script/runtest.py: Reduce stack size for aot w/o gc (#3653)
+ - spec-test-script: Skip a few tests for xtensa qemu (#3664)
+ - spec-test-script: Use -mtext-section-literals for xtensa xip (#3666)
+ - spec_test_on_nuttx.yml: Add xtensa (#3665)
+ - spec_test_on_nuttx.yml: Enable xip (#3671)
+ - spec_test_on_nuttx.yml: Record more logs (#3670)
+ - spec_test_on_nuttx.yml: Replace sed with kconfig-tweak (#3672)
+ - spec_test_on_nuttx.yml: Retire CONFIG_EOL_IS_LF (#3676)
+ - spec-test-script/runtest.py: Use wamrc --xip option for xip (#3683)
+ - CI: Bump NuttX version to 12.6 (#3684)
+ - wamr-test-suites: Clean up generated tmp files after spec test (#3700)
+ - test_wamr.sh: Fix build wabt tool (#3703)
+ - NuttX: Retire CONFIG_ARCH_RV32IM and CONFIG_ARCH_RV64GC (#3717)
+ - runtest.py: Normallize option handling for XIP mode (#3722)
+ - CI: Enable XIP spectest for RISCV32 ILP32F (#3727)
+ - CI: Unify configuration stage for NuttX (#3725)
+
+---
+
 ## WAMR-2.1.1
 
 ### Breaking Changes
diff --git a/build-scripts/SConscript b/build-scripts/SConscript
index 648373b38..ccb1b19f4 100644
--- a/build-scripts/SConscript
+++ b/build-scripts/SConscript
@@ -31,9 +31,11 @@ if GetDepend(['WAMR_BUILD_LIBC_BUILTIN']):
 
 if GetDepend(['WAMR_BUILD_LIBC_WASI']):
     objs += SConscript(os.path.join(IWASM_DIR, 'libraries', 'libc-wasi', 'SConscript'))
+    objs += SConscript(os.path.join(SHARED_DIR, 'platform', 'common', 'posix', 'SConscript'))
+    objs += SConscript(os.path.join(SHARED_DIR, 'platform', 'common', 'libc-util', 'SConscript'))
 
 if GetDepend(['WAMR_BUILD_LIB_PTHREAD']):
-    objs += SConscript(os.path.join(IWASM_DIR, 'libraries', 'libc-pthread', 'SConscript'))
+    objs += SConscript(os.path.join(IWASM_DIR, 'libraries', 'lib-pthread', 'SConscript'))
 
 if GetDepend(['WAMR_BUILD_THREAD_MGR']):
     objs += SConscript(os.path.join(IWASM_DIR, 'libraries', 'thread-mgr', 'SConscript'))
@@ -41,6 +43,9 @@ if GetDepend(['WAMR_BUILD_THREAD_MGR']):
 if GetDepend(['WAMR_BUILD_LIBC_EMCC']):
     objs += SConscript(os.path.join(IWASM_DIR, 'libraries', 'libc-emmc', 'SConscript'))
 
+if GetDepend(['WAMR_BUILD_LIB_WASI_THREADS']):
+    objs += SConscript(os.path.join(IWASM_DIR, 'libraries', 'lib-wasi-threads', 'SConscript'))
+
 objs += SConscript(os.path.join(cwd, 'SConscript_config'));
 
 objs += SConscript(os.path.join(SHARED_DIR, 'platform', 'rt-thread', 'SConscript'))
diff --git a/build-scripts/SConscript_config b/build-scripts/SConscript_config
index 2401f3aa3..246bd0aaa 100644
--- a/build-scripts/SConscript_config
+++ b/build-scripts/SConscript_config
@@ -109,7 +109,27 @@ if GetDepend(['WAMR_BUILD_CUSTOM_NAME_SECTION']):
 
 if GetDepend(['WAMR_BUILD_TAIL_CALL']):
     CPPDEFINES += ['WASM_ENABLE_TAIL_CALL=1']
-    print('[WAMR] Tail call enabledd')
+    print('[WAMR] Tail call enabled')
+
+if GetDepend(['WAMR_BUILD_THREAD_MGR']):
+    CPPDEFINES += ['WASM_ENABLE_THREAD_MGR=1']
+    print('[WAMR] Thread manager enabled')
+
+if GetDepend(['WAMR_BUILD_LIBC_WASI']):
+    CPPDEFINES += ['WASM_ENABLE_LIBC_WASI=1']
+    CPPDEFINES += ['WASM_ENABLE_MODULE_INST_CONTEXT=1']
+    print('[WAMR] Libc wasi enabled')
+
+if GetDepend(['WAMR_BUILD_LIB_WASI_THREADS']):
+    CPPDEFINES += ['WASM_ENABLE_LIB_WASI_THREADS=1']
+    print('[WAMR] Lib wasi threads enabled')
+
+if GetDepend(['WAMR_BUILD_REF_TYPES']):
+    CPPDEFINES += ['WASM_ENABLE_REF_TYPES=1']
+    print('[WAMR] enable ref types')
+
+CPPDEFINES += ['BH_MALLOC=wasm_runtime_malloc']
+CPPDEFINES += ['BH_FREE=wasm_runtime_free']
 
 LIBS = ['m']
 
diff --git a/build-scripts/config_common.cmake b/build-scripts/config_common.cmake
index 28ace9835..252ba3a84 100644
--- a/build-scripts/config_common.cmake
+++ b/build-scripts/config_common.cmake
@@ -39,6 +39,8 @@ elseif (WAMR_BUILD_TARGET STREQUAL "RISCV64_LP64")
   add_definitions(-DBUILD_TARGET_RISCV64_LP64)
 elseif (WAMR_BUILD_TARGET STREQUAL "RISCV32" OR WAMR_BUILD_TARGET STREQUAL "RISCV32_ILP32D")
   add_definitions(-DBUILD_TARGET_RISCV32_ILP32D)
+elseif (WAMR_BUILD_TARGET STREQUAL "RISCV32_ILP32F")
+  add_definitions(-DBUILD_TARGET_RISCV32_ILP32F)
 elseif (WAMR_BUILD_TARGET STREQUAL "RISCV32_ILP32")
   add_definitions(-DBUILD_TARGET_RISCV32_ILP32)
 elseif (WAMR_BUILD_TARGET STREQUAL "ARC")
@@ -263,6 +265,11 @@ if (WAMR_BUILD_MEMORY64 EQUAL 1)
   set (WAMR_DISABLE_HW_BOUND_CHECK 1)
   message ("     Memory64 memory enabled")
 endif ()
+if (WAMR_BUILD_MULTI_MEMORY EQUAL 1)
+  add_definitions (-DWASM_ENABLE_MULTI_MEMORY=1)
+  message ("     Multi memory enabled")
+  set (WAMR_BUILD_DEBUG_INTERP 0)
+endif ()
 if (WAMR_BUILD_THREAD_MGR EQUAL 1)
   message ("     Thread manager enabled")
 endif ()
@@ -434,6 +441,20 @@ endif ()
 if (WAMR_BUILD_WASI_NN EQUAL 1)
   message ("     WASI-NN enabled")
   add_definitions (-DWASM_ENABLE_WASI_NN=1)
+  # Variant backends
+  if (NOT WAMR_BUILD_WASI_NN_TFLITE EQUAL 1 AND NOT WAMR_BUILD_WASI_NN_OPENVINO EQUAL 1)
+    message (FATAL_ERROR "   Need to select a backend for WASI-NN")
+  endif ()
+
+  if (WAMR_BUILD_WASI_NN_TFLITE EQUAL 1)
+    message ("     WASI-NN: backend tflite enabled")
+    add_definitions (-DWASM_ENABLE_WASI_NN_TFLITE)
+  endif ()
+  if (WAMR_BUILD_WASI_NN_OPENVINO EQUAL 1)
+    message ("     WASI-NN: backend openvino enabled")
+    add_definitions (-DWASM_ENABLE_WASI_NN_OPENVINO)
+  endif ()
+  # Variant devices
   if (WAMR_BUILD_WASI_NN_ENABLE_GPU EQUAL 1)
       message ("     WASI-NN: GPU enabled")
       add_definitions (-DWASM_ENABLE_WASI_NN_GPU=1)
@@ -446,7 +467,7 @@ if (WAMR_BUILD_WASI_NN EQUAL 1)
       add_definitions (-DWASM_WASI_NN_EXTERNAL_DELEGATE_PATH="${WAMR_BUILD_WASI_NN_EXTERNAL_DELEGATE_PATH}")
   endif ()
   if (WAMR_BUILD_WASI_EPHEMERAL_NN EQUAL 1)
-      message ("     WASI-NN: WASI-Ephemeral-NN enabled")
+      message ("     WASI-NN: use 'wasi_ephemeral_nn' instead of 'wasi-nn'")
       add_definitions (-DWASM_ENABLE_WASI_EPHEMERAL_NN=1)
   endif()
 endif ()
diff --git a/core/config.h b/core/config.h
index 9fd540f32..a25eb543e 100644
--- a/core/config.h
+++ b/core/config.h
@@ -20,6 +20,7 @@
     && !defined(BUILD_TARGET_RISCV64_LP64D) \
     && !defined(BUILD_TARGET_RISCV64_LP64) \
     && !defined(BUILD_TARGET_RISCV32_ILP32D) \
+    && !defined(BUILD_TARGET_RISCV32_ILP32F) \
     && !defined(BUILD_TARGET_RISCV32_ILP32) \
     && !defined(BUILD_TARGET_ARC)
 /* clang-format on */
@@ -43,7 +44,11 @@
 #define BUILD_TARGET_XTENSA
 #elif defined(__riscv) && (__riscv_xlen == 64)
 #define BUILD_TARGET_RISCV64_LP64D
-#elif defined(__riscv) && (__riscv_xlen == 32)
+#elif defined(__riscv) && (__riscv_xlen == 32) && !defined(__riscv_flen)
+#define BUILD_TARGET_RISCV32_ILP32
+#elif defined(__riscv) && (__riscv_xlen == 32) && (__riscv_flen == 32)
+#define BUILD_TARGET_RISCV32_ILP32F
+#elif defined(__riscv) && (__riscv_xlen == 32) && (__riscv_flen == 64)
 #define BUILD_TARGET_RISCV32_ILP32D
 #elif defined(__arc__)
 #define BUILD_TARGET_ARC
@@ -659,6 +664,11 @@
 #define WASM_ENABLE_MEMORY64 0
 #endif
 
+/* Disable multi-memory by default */
+#ifndef WASM_ENABLE_MULTI_MEMORY
+#define WASM_ENABLE_MULTI_MEMORY 0
+#endif
+
 #ifndef WASM_TABLE_MAX_SIZE
 #define WASM_TABLE_MAX_SIZE 1024
 #endif
diff --git a/core/iwasm/aot/aot_loader.c b/core/iwasm/aot/aot_loader.c
index 645f68b1d..b96079d3b 100644
--- a/core/iwasm/aot/aot_loader.c
+++ b/core/iwasm/aot/aot_loader.c
@@ -294,6 +294,39 @@ loader_malloc(uint64 size, char *error_buf, uint32 error_buf_size)
     return mem;
 }
 
+static void *
+loader_mmap(uint32 size, bool prot_exec, char *error_buf, uint32 error_buf_size)
+{
+    int map_prot =
+        MMAP_PROT_READ | MMAP_PROT_WRITE | (prot_exec ? MMAP_PROT_EXEC : 0);
+    int map_flags;
+    void *mem;
+
+#if UINTPTR_MAX == UINT64_MAX
+    /* The mmapped AOT data and code in 64-bit targets had better be in
+       range 0 to 2G, or aot loader may fail to apply some relocations,
+       e.g., R_X86_64_32/R_X86_64_32S/R_X86_64_PC32/R_RISCV_32.
+       We try to mmap with MMAP_MAP_32BIT flag first, and if fails, mmap
+       again without the flag. */
+    map_flags = MMAP_MAP_32BIT;
+    if ((mem = os_mmap(NULL, size, map_prot, map_flags,
+                       os_get_invalid_handle()))) {
+        /* The mmapped memory must be in the first 2 Gigabytes of the
+           process address space */
+        bh_assert((uintptr_t)mem < INT32_MAX);
+        return mem;
+    }
+#endif
+
+    map_flags = MMAP_MAP_NONE;
+    if (!(mem = os_mmap(NULL, size, map_prot, map_flags,
+                        os_get_invalid_handle()))) {
+        set_error_buf(error_buf, error_buf_size, "allocate memory failed");
+        return NULL;
+    }
+    return mem;
+}
+
 static char *
 load_string(uint8 **p_buf, const uint8 *buf_end, AOTModule *module,
             bool is_load_from_file_buf,
@@ -2378,7 +2411,6 @@ destroy_object_data_sections(AOTObjectDataSection *data_sections,
                 }
             }
 #endif
-            os_munmap(data_section->data, data_section->size);
         }
     wasm_runtime_free(data_sections);
 }
@@ -2392,6 +2424,9 @@ load_object_data_sections(const uint8 **p_buf, const uint8 *buf_end,
     AOTObjectDataSection *data_sections;
     uint64 size;
     uint32 i;
+    uint64 total_size = 0;
+    uint32 page_size = os_getpagesize();
+    uint8 *merged_sections = NULL;
 
     /* Allocate memory */
     size = sizeof(AOTObjectDataSection) * (uint64)module->data_section_count;
@@ -2400,41 +2435,40 @@ load_object_data_sections(const uint8 **p_buf, const uint8 *buf_end,
         return false;
     }
 
-    /* Create each data section */
+    /* First iteration: read data from buf, and calculate total memory needed */
     for (i = 0; i < module->data_section_count; i++) {
-        int map_prot = MMAP_PROT_READ | MMAP_PROT_WRITE;
-#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64) \
-    || defined(BUILD_TARGET_RISCV64_LP64D)                       \
-    || defined(BUILD_TARGET_RISCV64_LP64)
-        /* aot code and data in x86_64 must be in range 0 to 2G due to
-           relocation for R_X86_64_32/32S/PC32 */
-        int map_flags = MMAP_MAP_32BIT;
-#else
-        int map_flags = MMAP_MAP_NONE;
-#endif
-
         read_string(buf, buf_end, data_sections[i].name);
         read_uint32(buf, buf_end, data_sections[i].size);
-
+        CHECK_BUF(buf, buf_end, data_sections[i].size);
+        /* Temporary record data ptr for merge, will be replaced after the
+           merged_data_sections is mmapped */
+        if (data_sections[i].size > 0)
+            data_sections[i].data = (uint8 *)buf;
+        buf += data_sections[i].size;
+        total_size += align_uint64((uint64)data_sections[i].size, page_size);
+    }
+    if (total_size > UINT32_MAX) {
+        set_error_buf(error_buf, error_buf_size, "data sections too large");
+        return false;
+    }
+    if (total_size > 0) {
         /* Allocate memory for data */
-        if (data_sections[i].size > 0
-            && !(data_sections[i].data =
-                     os_mmap(NULL, data_sections[i].size, map_prot, map_flags,
-                             os_get_invalid_handle()))) {
-            set_error_buf(error_buf, error_buf_size, "allocate memory failed");
+        merged_sections = module->merged_data_sections =
+            loader_mmap((uint32)total_size, false, error_buf, error_buf_size);
+        if (!merged_sections) {
             return false;
         }
-#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
-#if !defined(BH_PLATFORM_LINUX_SGX) && !defined(BH_PLATFORM_WINDOWS) \
-    && !defined(BH_PLATFORM_DARWIN)
-        /* address must be in the first 2 Gigabytes of
-           the process address space */
-        bh_assert((uintptr_t)data_sections[i].data < INT32_MAX);
-#endif
-#endif
+        module->merged_data_sections_size = (uint32)total_size;
+    }
 
-        read_byte_array(buf, buf_end, data_sections[i].data,
-                        data_sections[i].size);
+    /* Second iteration: Create each data section */
+    for (i = 0; i < module->data_section_count; i++) {
+        if (data_sections[i].size > 0) {
+            bh_memcpy_s(merged_sections, data_sections[i].size,
+                        data_sections[i].data, data_sections[i].size);
+            data_sections[i].data = merged_sections;
+            merged_sections += align_uint(data_sections[i].size, page_size);
+        }
     }
 
     *p_buf = buf;
@@ -2532,6 +2566,82 @@ fail:
     return false;
 }
 
+#if !defined(BH_PLATFORM_NUTTX) && !defined(BH_PLATFORM_ESP_IDF)
+static bool
+try_merge_data_and_text(const uint8 **buf, const uint8 **buf_end,
+                        AOTModule *module, char *error_buf,
+                        uint32 error_buf_size)
+{
+    uint8 *old_buf = (uint8 *)*buf;
+    uint8 *old_end = (uint8 *)*buf_end;
+    size_t code_size = (size_t)(old_end - old_buf);
+    uint32 page_size = os_getpagesize();
+    uint64 total_size = 0;
+    uint32 i;
+    uint8 *sections;
+
+    if (code_size == 0) {
+        return true;
+    }
+
+    /* calculate the total memory needed */
+    total_size += align_uint64((uint64)code_size, page_size);
+    for (i = 0; i < module->data_section_count; ++i) {
+        total_size +=
+            align_uint64((uint64)module->data_sections[i].size, page_size);
+    }
+    /* distance between .data and .text should not be greater than 4GB
+       for some targets (e.g. arm64 reloc need < 4G distance) */
+    if (total_size > UINT32_MAX) {
+        return false;
+    }
+    /* code_size was checked and must be larger than 0 here */
+    bh_assert(total_size > 0);
+
+    sections = loader_mmap((uint32)total_size, false, NULL, 0);
+    if (!sections) {
+        /* merge failed but may be not critical for some targets */
+        return false;
+    }
+    /* change the code part to be executable */
+    if (os_mprotect(sections, code_size,
+                    MMAP_PROT_READ | MMAP_PROT_WRITE | MMAP_PROT_EXEC)
+        != 0) {
+        os_munmap(sections, (uint32)total_size);
+        return false;
+    }
+
+    module->merged_data_text_sections = sections;
+    module->merged_data_text_sections_size = (uint32)total_size;
+
+    /* order not essential just as compiler does: .text section first */
+    *buf = sections;
+    *buf_end = sections + code_size;
+    bh_memcpy_s(sections, code_size, old_buf, code_size);
+    os_munmap(old_buf, code_size);
+    sections += align_uint((uint32)code_size, page_size);
+
+    /* then migrate .data sections */
+    for (i = 0; i < module->data_section_count; ++i) {
+        AOTObjectDataSection *data_section = module->data_sections + i;
+        uint8 *old_data = data_section->data;
+        data_section->data = sections;
+        bh_memcpy_s(data_section->data, data_section->size, old_data,
+                    data_section->size);
+        sections += align_uint(data_section->size, page_size);
+    }
+    /* free the original data sections */
+    if (module->merged_data_sections) {
+        os_munmap(module->merged_data_sections,
+                  module->merged_data_sections_size);
+        module->merged_data_sections = NULL;
+        module->merged_data_sections_size = 0;
+    }
+
+    return true;
+}
+#endif /* ! defined(BH_PLATFORM_NUTTX) && !defined(BH_PLATFORM_ESP_IDF) */
+
 static bool
 load_text_section(const uint8 *buf, const uint8 *buf_end, AOTModule *module,
                   char *error_buf, uint32 error_buf_size)
@@ -3391,16 +3501,9 @@ load_relocation_section(const uint8 *buf, const uint8 *buf_end,
            + sizeof(uint64) * module->real_plt_count
            + sizeof(uint32) * module->float_plt_count;
     if (size > 0) {
-        map_prot = MMAP_PROT_READ | MMAP_PROT_WRITE | MMAP_PROT_EXEC;
-        /* aot code and data in x86_64 must be in range 0 to 2G due to
-           relocation for R_X86_64_32/32S/PC32 */
-        map_flags = MMAP_MAP_32BIT;
-
         if (size > UINT32_MAX
-            || !(module->extra_plt_data =
-                     os_mmap(NULL, (uint32)size, map_prot, map_flags,
-                             os_get_invalid_handle()))) {
-            set_error_buf(error_buf, error_buf_size, "mmap memory failed");
+            || !(module->extra_plt_data = loader_mmap(
+                     (uint32)size, true, error_buf, error_buf_size))) {
             goto fail;
         }
         module->extra_plt_data_size = (uint32)size;
@@ -3512,19 +3615,12 @@ load_relocation_section(const uint8 *buf, const uint8 *buf_end,
         GOTItem *got_item = module->got_item_list;
         uint32 got_item_idx = 0;
 
-        map_prot = MMAP_PROT_READ | MMAP_PROT_WRITE;
-        /* aot code and data in x86_64 must be in range 0 to 2G due to
-           relocation for R_X86_64_32/32S/PC32 */
-        map_flags = MMAP_MAP_32BIT;
-
         /* Create the GOT for func_ptrs, note that it is different from
            the .got section of a dynamic object file */
         size = (uint64)sizeof(void *) * got_item_count;
         if (size > UINT32_MAX
-            || !(module->got_func_ptrs =
-                     os_mmap(NULL, (uint32)size, map_prot, map_flags,
-                             os_get_invalid_handle()))) {
-            set_error_buf(error_buf, error_buf_size, "mmap memory failed");
+            || !(module->got_func_ptrs = loader_mmap(
+                     (uint32)size, false, error_buf, error_buf_size))) {
             goto fail;
         }
 
@@ -3749,6 +3845,17 @@ load_from_sections(AOTModule *module, AOTSection *sections,
                     return false;
                 break;
             case AOT_SECTION_TYPE_TEXT:
+#if !defined(BH_PLATFORM_NUTTX) && !defined(BH_PLATFORM_ESP_IDF)
+                /* try to merge .data and .text, with exceptions:
+                 * 1. XIP mode
+                 * 2. pre-mmapped module load from aot_load_from_sections()
+                 * 3. nuttx & esp-idf: have separate region for MMAP_PROT_EXEC
+                 */
+                if (!module->is_indirect_mode && is_load_from_file_buf)
+                    if (!try_merge_data_and_text(&buf, &buf_end, module,
+                                                 error_buf, error_buf_size))
+                        LOG_WARNING("merge .data and .text sections failed");
+#endif /* ! defined(BH_PLATFORM_NUTTX) && !defined(BH_PLATFORM_ESP_IDF) */
                 if (!load_text_section(buf, buf_end, module, error_buf,
                                        error_buf_size))
                     return false;
@@ -4065,37 +4172,16 @@ create_sections(AOTModule *module, const uint8 *buf, uint32 size,
 
             if (section_type == AOT_SECTION_TYPE_TEXT) {
                 if ((section_size > 0) && !module->is_indirect_mode) {
-                    int map_prot =
-                        MMAP_PROT_READ | MMAP_PROT_WRITE | MMAP_PROT_EXEC;
-#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64) \
-    || defined(BUILD_TARGET_RISCV64_LP64D)                       \
-    || defined(BUILD_TARGET_RISCV64_LP64)
-                    /* aot code and data in x86_64 must be in range 0 to 2G due
-                       to relocation for R_X86_64_32/32S/PC32 */
-                    int map_flags = MMAP_MAP_32BIT;
-#else
-                    int map_flags = MMAP_MAP_NONE;
-#endif
                     total_size =
                         (uint64)section_size + aot_get_plt_table_size();
                     total_size = (total_size + 3) & ~((uint64)3);
                     if (total_size >= UINT32_MAX
                         || !(aot_text =
-                                 os_mmap(NULL, (uint32)total_size, map_prot,
-                                         map_flags, os_get_invalid_handle()))) {
+                                 loader_mmap((uint32)total_size, true,
+                                             error_buf, error_buf_size))) {
                         wasm_runtime_free(section);
-                        set_error_buf(error_buf, error_buf_size,
-                                      "mmap memory failed");
                         goto fail;
                     }
-#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
-#if !defined(BH_PLATFORM_LINUX_SGX) && !defined(BH_PLATFORM_WINDOWS) \
-    && !defined(BH_PLATFORM_DARWIN)
-                    /* address must be in the first 2 Gigabytes of
-                       the process address space */
-                    bh_assert((uintptr_t)aot_text < INT32_MAX);
-#endif
-#endif
 
 #if (WASM_MEM_DUAL_BUS_MIRROR != 0)
                     mirrored_text = os_get_dbus_mirror(aot_text);
@@ -4179,7 +4265,11 @@ load(const uint8 *buf, uint32 size, AOTModule *module,
     if (!ret) {
         /* If load_from_sections() fails, then aot text is destroyed
            in destroy_sections() */
-        destroy_sections(section_list, module->is_indirect_mode ? false : true);
+        destroy_sections(section_list,
+                         module->is_indirect_mode
+                                 || module->merged_data_text_sections
+                             ? false
+                             : true);
         /* aot_unload() won't destroy aot text again */
         module->code = NULL;
     }
@@ -4329,7 +4419,8 @@ aot_unload(AOTModule *module)
     }
 #endif
 
-    if (module->code && !module->is_indirect_mode) {
+    if (module->code && !module->is_indirect_mode
+        && !module->merged_data_text_sections) {
         /* The layout is: literal size + literal + code (with plt table) */
         uint8 *mmap_addr = module->literal - sizeof(uint32);
         uint32 total_size =
@@ -4364,6 +4455,14 @@ aot_unload(AOTModule *module)
         destroy_object_data_sections(module->data_sections,
                                      module->data_section_count);
 
+    if (module->merged_data_sections)
+        os_munmap(module->merged_data_sections,
+                  module->merged_data_sections_size);
+
+    if (module->merged_data_text_sections)
+        os_munmap(module->merged_data_text_sections,
+                  module->merged_data_text_sections_size);
+
 #if WASM_ENABLE_DEBUG_AOT != 0
     jit_code_entry_destroy(module->elf_hdr);
 #endif
diff --git a/core/iwasm/aot/aot_runtime.c b/core/iwasm/aot/aot_runtime.c
index bfe691ea2..3ca26114f 100644
--- a/core/iwasm/aot/aot_runtime.c
+++ b/core/iwasm/aot/aot_runtime.c
@@ -1748,16 +1748,7 @@ aot_instantiate(AOTModule *module, AOTModuleInstance *parent,
     /* Initialize the thread related data */
     if (stack_size == 0)
         stack_size = DEFAULT_WASM_STACK_SIZE;
-#if WASM_ENABLE_SPEC_TEST != 0
-#if WASM_ENABLE_TAIL_CALL == 0
-    if (stack_size < 128 * 1024)
-        stack_size = 128 * 1024;
-#else
-    /* Some tail-call cases require large operand stack */
-    if (stack_size < 10 * 1024 * 1024)
-        stack_size = 10 * 1024 * 1024;
-#endif
-#endif
+
     module_inst->default_wasm_stack_size = stack_size;
 
     extra->stack_sizes =
@@ -2826,6 +2817,13 @@ aot_enlarge_memory(AOTModuleInstance *module_inst, uint32 inc_page_count)
     return wasm_enlarge_memory(module_inst, inc_page_count);
 }
 
+bool
+aot_enlarge_memory_with_idx(AOTModuleInstance *module_inst,
+                            uint32 inc_page_count, uint32 memidx)
+{
+    return wasm_enlarge_memory_with_idx(module_inst, inc_page_count, memidx);
+}
+
 bool
 aot_invoke_native(WASMExecEnv *exec_env, uint32 func_idx, uint32 argc,
                   uint32 *argv)
diff --git a/core/iwasm/aot/aot_runtime.h b/core/iwasm/aot/aot_runtime.h
index e3704f827..0eb647987 100644
--- a/core/iwasm/aot/aot_runtime.h
+++ b/core/iwasm/aot/aot_runtime.h
@@ -315,6 +315,13 @@ typedef struct AOTModule {
 
     /* Whether the underlying wasm binary buffer can be freed */
     bool is_binary_freeable;
+
+    /* `.data` sections merged into one mmaped to reduce the tlb cache miss */
+    uint8 *merged_data_sections;
+    uint32 merged_data_sections_size;
+    /* `.data` and `.text` sections merged into one large mmaped section */
+    uint8 *merged_data_text_sections;
+    uint32 merged_data_text_sections_size;
 } AOTModule;
 
 #define AOTMemoryInstance WASMMemoryInstance
@@ -605,6 +612,10 @@ aot_module_dup_data(AOTModuleInstance *module_inst, const char *src,
 bool
 aot_enlarge_memory(AOTModuleInstance *module_inst, uint32 inc_page_count);
 
+bool
+aot_enlarge_memory_with_idx(AOTModuleInstance *module_inst,
+                            uint32 inc_page_count, uint32 memidx);
+
 /**
  * Invoke native function from aot code
  */
diff --git a/core/iwasm/aot/arch/aot_reloc_arm.c b/core/iwasm/aot/arch/aot_reloc_arm.c
index 808af89f5..bb492bfeb 100644
--- a/core/iwasm/aot/arch/aot_reloc_arm.c
+++ b/core/iwasm/aot/arch/aot_reloc_arm.c
@@ -67,6 +67,7 @@ void __divdi3();
 void __divsf3();
 void __divsi3();
 void __eqdf2();
+void __eqsf2();
 void __extendsfdf2();
 void __fixdfdi();
 void __fixdfsi();
@@ -169,6 +170,7 @@ static SymbolMap target_sym_map[] = {
     REG_SYM(__divsf3),
     REG_SYM(__divsi3),
     REG_SYM(__eqdf2),
+    REG_SYM(__eqsf2),
     REG_SYM(__extendsfdf2),
     REG_SYM(__fixdfdi),
     REG_SYM(__fixdfsi),
diff --git a/core/iwasm/aot/arch/aot_reloc_riscv.c b/core/iwasm/aot/arch/aot_reloc_riscv.c
index 31830f780..b87bb2000 100644
--- a/core/iwasm/aot/arch/aot_reloc_riscv.c
+++ b/core/iwasm/aot/arch/aot_reloc_riscv.c
@@ -134,6 +134,7 @@ static SymbolMap target_sym_map[] = {
     REG_SYM(__eqdf2),
     REG_SYM(__extendsfdf2),
     REG_SYM(__fixdfdi),
+    REG_SYM(__fixdfsi),
     REG_SYM(__fixunsdfdi),
     REG_SYM(__fixunsdfsi),
     REG_SYM(__floatdidf),
@@ -143,6 +144,7 @@ static SymbolMap target_sym_map[] = {
     REG_SYM(__gedf2),
     REG_SYM(__gtdf2),
     REG_SYM(__ledf2),
+    REG_SYM(__ltdf2),
     REG_SYM(__muldf3),
     REG_SYM(__nedf2),
     REG_SYM(__negdf2),
diff --git a/core/iwasm/aot/arch/aot_reloc_thumb.c b/core/iwasm/aot/arch/aot_reloc_thumb.c
index 26614863b..f90507dec 100644
--- a/core/iwasm/aot/arch/aot_reloc_thumb.c
+++ b/core/iwasm/aot/arch/aot_reloc_thumb.c
@@ -16,32 +16,47 @@
 /* clang-format off */
 void __adddf3();
 void __addsf3();
+void __aeabi_d2f();
 void __aeabi_d2iz();
 void __aeabi_d2lz();
 void __aeabi_d2uiz();
 void __aeabi_d2ulz();
 void __aeabi_dadd();
+void __aeabi_dcmpeq();
 void __aeabi_dcmpge();
 void __aeabi_dcmpgt();
 void __aeabi_dcmple();
 void __aeabi_dcmplt();
 void __aeabi_dcmpun();
 void __aeabi_ddiv();
+void __aeabi_dmul();
+void __aeabi_dsub();
 void __aeabi_f2d();
 void __aeabi_f2iz();
 void __aeabi_f2lz();
 void __aeabi_f2ulz();
+void __aeabi_fadd();
+void __aeabi_fcmpeq();
 void __aeabi_fcmpge();
+void __aeabi_fcmpgt();
 void __aeabi_fcmple();
 void __aeabi_fcmplt();
 void __aeabi_fcmpun();
+void __aeabi_fdiv();
+void __aeabi_fmul();
+void __aeabi_fsub();
 void __aeabi_i2d();
+void __aeabi_i2f();
 void __aeabi_idiv();
 void __aeabi_idivmod();
 void __aeabi_l2d();
 void __aeabi_l2f();
 void __aeabi_ldivmod();
+void __aeabi_llsl();
+void __aeabi_llsr();
+void __aeabi_lmul();
 void __aeabi_ui2d();
+void __aeabi_ui2f();
 void __aeabi_uidiv();
 void __aeabi_uidivmod();
 void __aeabi_ul2d();
@@ -120,32 +135,47 @@ static SymbolMap target_sym_map[] = {
     REG_SYM(__unordsf2),
 #endif
     /* clang-format on */
+    REG_SYM(__aeabi_d2f),
     REG_SYM(__aeabi_d2iz),
     REG_SYM(__aeabi_d2lz),
     REG_SYM(__aeabi_d2uiz),
     REG_SYM(__aeabi_d2ulz),
     REG_SYM(__aeabi_dadd),
+    REG_SYM(__aeabi_dcmpeq),
     REG_SYM(__aeabi_dcmpge),
     REG_SYM(__aeabi_dcmpgt),
     REG_SYM(__aeabi_dcmple),
     REG_SYM(__aeabi_dcmplt),
     REG_SYM(__aeabi_dcmpun),
     REG_SYM(__aeabi_ddiv),
+    REG_SYM(__aeabi_dmul),
+    REG_SYM(__aeabi_dsub),
     REG_SYM(__aeabi_f2d),
     REG_SYM(__aeabi_f2iz),
     REG_SYM(__aeabi_f2lz),
     REG_SYM(__aeabi_f2ulz),
+    REG_SYM(__aeabi_fadd),
+    REG_SYM(__aeabi_fcmpeq),
     REG_SYM(__aeabi_fcmpge),
+    REG_SYM(__aeabi_fcmpgt),
     REG_SYM(__aeabi_fcmple),
     REG_SYM(__aeabi_fcmplt),
     REG_SYM(__aeabi_fcmpun),
+    REG_SYM(__aeabi_fdiv),
+    REG_SYM(__aeabi_fmul),
+    REG_SYM(__aeabi_fsub),
     REG_SYM(__aeabi_i2d),
+    REG_SYM(__aeabi_i2f),
     REG_SYM(__aeabi_idiv),
     REG_SYM(__aeabi_idivmod),
     REG_SYM(__aeabi_l2d),
     REG_SYM(__aeabi_l2f),
     REG_SYM(__aeabi_ldivmod),
+    REG_SYM(__aeabi_llsl),
+    REG_SYM(__aeabi_llsr),
+    REG_SYM(__aeabi_lmul),
     REG_SYM(__aeabi_ui2d),
+    REG_SYM(__aeabi_ui2f),
     REG_SYM(__aeabi_uidiv),
     REG_SYM(__aeabi_uidivmod),
     REG_SYM(__aeabi_ul2d),
diff --git a/core/iwasm/common/wasm_c_api.c b/core/iwasm/common/wasm_c_api.c
index 4fd662653..01109658f 100644
--- a/core/iwasm/common/wasm_c_api.c
+++ b/core/iwasm/common/wasm_c_api.c
@@ -5379,3 +5379,24 @@ wasm_extern_new_empty(wasm_store_t *store, wasm_externkind_t extern_kind)
     LOG_ERROR("Don't support linking table and memory for now");
     return NULL;
 }
+
+double
+wasm_instance_sum_wasm_exec_time(const wasm_instance_t *instance)
+{
+#if WASM_ENABLE_PERF_PROFILING != 0
+    return wasm_runtime_sum_wasm_exec_time(instance->inst_comm_rt);
+#else
+    return -1.0;
+#endif
+}
+
+double
+wasm_instance_get_wasm_func_exec_time(const wasm_instance_t *instance,
+                                      const char *name)
+{
+#if WASM_ENABLE_PERF_PROFILING != 0
+    return wasm_runtime_get_wasm_func_exec_time(instance->inst_comm_rt, name);
+#else
+    return -1.0;
+#endif
+}
diff --git a/core/iwasm/common/wasm_loader_common.c b/core/iwasm/common/wasm_loader_common.c
index 179639fca..6dd31be2c 100644
--- a/core/iwasm/common/wasm_loader_common.c
+++ b/core/iwasm/common/wasm_loader_common.c
@@ -3,14 +3,15 @@
  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  */
 #include "wasm_loader_common.h"
+#include "bh_leb128.h"
 #include "bh_log.h"
 #if WASM_ENABLE_GC != 0
 #include "../common/gc/gc_type.h"
 #endif
 
-static void
-set_error_buf(char *error_buf, uint32 error_buf_size, const char *string,
-              bool is_aot)
+void
+wasm_loader_set_error_buf(char *error_buf, uint32 error_buf_size,
+                          const char *string, bool is_aot)
 {
     if (error_buf != NULL) {
         snprintf(error_buf, error_buf_size, "%s module load failed: %s",
@@ -29,30 +30,30 @@ wasm_memory_check_flags(const uint8 mem_flag, char *error_buf,
         if (mem_flag & SHARED_MEMORY_FLAG) {
             LOG_VERBOSE("shared memory flag was found, please enable shared "
                         "memory, lib-pthread or lib-wasi-threads");
-            set_error_buf(error_buf, error_buf_size, "invalid limits flags",
-                          is_aot);
+            wasm_loader_set_error_buf(error_buf, error_buf_size,
+                                      "invalid limits flags", is_aot);
             return false;
         }
 #endif
 #if WASM_ENABLE_MEMORY64 == 0
         if (mem_flag & MEMORY64_FLAG) {
             LOG_VERBOSE("memory64 flag was found, please enable memory64");
-            set_error_buf(error_buf, error_buf_size, "invalid limits flags",
-                          is_aot);
+            wasm_loader_set_error_buf(error_buf, error_buf_size,
+                                      "invalid limits flags", is_aot);
             return false;
         }
 #endif
     }
 
     if (mem_flag > MAX_PAGE_COUNT_FLAG + SHARED_MEMORY_FLAG + MEMORY64_FLAG) {
-        set_error_buf(error_buf, error_buf_size, "invalid limits flags",
-                      is_aot);
+        wasm_loader_set_error_buf(error_buf, error_buf_size,
+                                  "invalid limits flags", is_aot);
         return false;
     }
     else if ((mem_flag & SHARED_MEMORY_FLAG)
              && !(mem_flag & MAX_PAGE_COUNT_FLAG)) {
-        set_error_buf(error_buf, error_buf_size,
-                      "shared memory must have maximum", is_aot);
+        wasm_loader_set_error_buf(error_buf, error_buf_size,
+                                  "shared memory must have maximum", is_aot);
         return false;
     }
 
@@ -130,3 +131,33 @@ is_indices_overflow(uint32 import, uint32 other, char *error_buf,
 
     return false;
 }
+
+bool
+read_leb(uint8 **p_buf, const uint8 *buf_end, uint32 maxbits, bool sign,
+         uint64 *p_result, char *error_buf, uint32 error_buf_size)
+{
+    size_t offset = 0;
+    bh_leb_read_status_t status =
+        bh_leb_read(*p_buf, buf_end, maxbits, sign, p_result, &offset);
+
+    switch (status) {
+        case BH_LEB_READ_SUCCESS:
+            *p_buf += offset;
+            return true;
+        case BH_LEB_READ_TOO_LONG:
+            wasm_loader_set_error_buf(error_buf, error_buf_size,
+                                      "integer representation too long", false);
+            return false;
+        case BH_LEB_READ_OVERFLOW:
+            wasm_loader_set_error_buf(error_buf, error_buf_size,
+                                      "integer too large", false);
+            return false;
+        case BH_LEB_READ_UNEXPECTED_END:
+            wasm_loader_set_error_buf(error_buf, error_buf_size,
+                                      "unexpected end", false);
+            return false;
+        default:
+            bh_assert(false);
+            return false;
+    }
+}
diff --git a/core/iwasm/common/wasm_loader_common.h b/core/iwasm/common/wasm_loader_common.h
index 6bd0cf6c2..d574110ba 100644
--- a/core/iwasm/common/wasm_loader_common.h
+++ b/core/iwasm/common/wasm_loader_common.h
@@ -30,6 +30,14 @@ bool
 is_indices_overflow(uint32 import, uint32 other, char *error_buf,
                     uint32 error_buf_size);
 
+bool
+read_leb(uint8 **p_buf, const uint8 *buf_end, uint32 maxbits, bool sign,
+         uint64 *p_result, char *error_buf, uint32 error_buf_size);
+
+void
+wasm_loader_set_error_buf(char *error_buf, uint32 error_buf_size,
+                          const char *string, bool is_aot);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/core/iwasm/common/wasm_memory.c b/core/iwasm/common/wasm_memory.c
index 187b4de03..71d337549 100644
--- a/core/iwasm/common/wasm_memory.c
+++ b/core/iwasm/common/wasm_memory.c
@@ -670,6 +670,16 @@ wasm_get_default_memory(WASMModuleInstance *module_inst)
         return NULL;
 }
 
+WASMMemoryInstance *
+wasm_get_memory_with_idx(WASMModuleInstance *module_inst, uint32 index)
+{
+    bh_assert(index < module_inst->memory_count);
+    if (module_inst->memories)
+        return module_inst->memories[index];
+    else
+        return NULL;
+}
+
 void
 wasm_runtime_set_mem_bound_check_bytes(WASMMemoryInstance *memory,
                                        uint64 memory_data_size)
@@ -747,9 +757,14 @@ wasm_mmap_linear_memory(uint64_t map_size, uint64 commit_size)
 }
 
 bool
-wasm_enlarge_memory_internal(WASMModuleInstance *module, uint32 inc_page_count)
+wasm_enlarge_memory_internal(WASMModuleInstance *module, uint32 inc_page_count,
+                             uint32 memidx)
 {
+#if WASM_ENABLE_MULTI_MEMORY != 0
+    WASMMemoryInstance *memory = wasm_get_memory_with_idx(module, memidx);
+#else
     WASMMemoryInstance *memory = wasm_get_default_memory(module);
+#endif
     uint8 *memory_data_old, *memory_data_new, *heap_data_old;
     uint32 num_bytes_per_page, heap_size;
     uint32 cur_page_count, max_page_count, total_page_count;
@@ -960,7 +975,7 @@ wasm_enlarge_memory(WASMModuleInstance *module, uint32 inc_page_count)
     if (module->memory_count > 0)
         shared_memory_lock(module->memories[0]);
 #endif
-    ret = wasm_enlarge_memory_internal(module, inc_page_count);
+    ret = wasm_enlarge_memory_internal(module, inc_page_count, 0);
 #if WASM_ENABLE_SHARED_MEMORY != 0
     if (module->memory_count > 0)
         shared_memory_unlock(module->memories[0]);
@@ -969,6 +984,25 @@ wasm_enlarge_memory(WASMModuleInstance *module, uint32 inc_page_count)
     return ret;
 }
 
+bool
+wasm_enlarge_memory_with_idx(WASMModuleInstance *module, uint32 inc_page_count,
+                             uint32 memidx)
+{
+    bool ret = false;
+
+#if WASM_ENABLE_SHARED_MEMORY != 0
+    if (memidx < module->memory_count)
+        shared_memory_lock(module->memories[memidx]);
+#endif
+    ret = wasm_enlarge_memory_internal(module, inc_page_count, memidx);
+#if WASM_ENABLE_SHARED_MEMORY != 0
+    if (memidx < module->memory_count)
+        shared_memory_unlock(module->memories[memidx]);
+#endif
+
+    return ret;
+}
+
 void
 wasm_deallocate_linear_memory(WASMMemoryInstance *memory_inst)
 {
diff --git a/core/iwasm/common/wasm_native.c b/core/iwasm/common/wasm_native.c
index 40152dfa5..9e8764a22 100644
--- a/core/iwasm/common/wasm_native.c
+++ b/core/iwasm/common/wasm_native.c
@@ -15,6 +15,9 @@
 #if WASM_ENABLE_THREAD_MGR != 0
 #include "../libraries/thread-mgr/thread_manager.h"
 #endif
+#if WASM_ENABLE_WASI_NN != 0 || WASM_ENABLE_WASI_EPHEMERAL_NN != 0
+#include "wasi_nn_host.h"
+#endif
 
 static NativeSymbolsList g_native_symbols_list = NULL;
 
@@ -230,7 +233,7 @@ wasm_native_resolve_symbol(const char *module_name, const char *field_name,
 #if WASM_ENABLE_WAMR_COMPILER == 0
                 /* Output warning except running aot compiler */
                 LOG_WARNING("failed to check signature '%s' and resolve "
-                            "pointer params for import function (%s %s)\n",
+                            "pointer params for import function (%s, %s)\n",
                             signature, module_name, field_name);
 #endif
                 return NULL;
@@ -472,11 +475,12 @@ quick_aot_entry_init();
 bool
 wasm_native_init()
 {
-#if WASM_ENABLE_SPEC_TEST != 0 || WASM_ENABLE_LIBC_BUILTIN != 0     \
-    || WASM_ENABLE_BASE_LIB != 0 || WASM_ENABLE_LIBC_EMCC != 0      \
-    || WASM_ENABLE_LIB_RATS != 0 || WASM_ENABLE_WASI_NN != 0        \
-    || WASM_ENABLE_APP_FRAMEWORK != 0 || WASM_ENABLE_LIBC_WASI != 0 \
-    || WASM_ENABLE_LIB_PTHREAD != 0 || WASM_ENABLE_LIB_WASI_THREADS != 0
+#if WASM_ENABLE_SPEC_TEST != 0 || WASM_ENABLE_LIBC_BUILTIN != 0          \
+    || WASM_ENABLE_BASE_LIB != 0 || WASM_ENABLE_LIBC_EMCC != 0           \
+    || WASM_ENABLE_LIB_RATS != 0 || WASM_ENABLE_WASI_NN != 0             \
+    || WASM_ENABLE_APP_FRAMEWORK != 0 || WASM_ENABLE_LIBC_WASI != 0      \
+    || WASM_ENABLE_LIB_PTHREAD != 0 || WASM_ENABLE_LIB_WASI_THREADS != 0 \
+    || WASM_ENABLE_WASI_NN != 0 || WASM_ENABLE_WASI_EPHEMERAL_NN != 0
     NativeSymbol *native_symbols;
     uint32 n_native_symbols;
 #endif
@@ -562,13 +566,30 @@ wasm_native_init()
         goto fail;
 #endif /* WASM_ENABLE_LIB_RATS */
 
+#if WASM_ENABLE_WASI_NN != 0 || WASM_ENABLE_WASI_EPHEMERAL_NN != 0
+    if (!wasi_nn_initialize())
+        goto fail;
+
+    n_native_symbols = get_wasi_nn_export_apis(&native_symbols);
+    if (n_native_symbols > 0
+        && !wasm_native_register_natives(
+#if WASM_ENABLE_WASI_EPHEMERAL_NN != 0
+            "wasi_ephemeral_nn",
+#else
+            "wasi_nn",
+#endif /* WASM_ENABLE_WASI_EPHEMERAL_NN != 0 */
+            native_symbols, n_native_symbols))
+        goto fail;
+#endif /* WASM_ENABLE_WASI_NN != 0 || WASM_ENABLE_WASI_EPHEMERAL_NN != 0 */
+
 #if WASM_ENABLE_QUICK_AOT_ENTRY != 0
     if (!quick_aot_entry_init()) {
-#if WASM_ENABLE_SPEC_TEST != 0 || WASM_ENABLE_LIBC_BUILTIN != 0     \
-    || WASM_ENABLE_BASE_LIB != 0 || WASM_ENABLE_LIBC_EMCC != 0      \
-    || WASM_ENABLE_LIB_RATS != 0 || WASM_ENABLE_WASI_NN != 0        \
-    || WASM_ENABLE_APP_FRAMEWORK != 0 || WASM_ENABLE_LIBC_WASI != 0 \
-    || WASM_ENABLE_LIB_PTHREAD != 0 || WASM_ENABLE_LIB_WASI_THREADS != 0
+#if WASM_ENABLE_SPEC_TEST != 0 || WASM_ENABLE_LIBC_BUILTIN != 0          \
+    || WASM_ENABLE_BASE_LIB != 0 || WASM_ENABLE_LIBC_EMCC != 0           \
+    || WASM_ENABLE_LIB_RATS != 0 || WASM_ENABLE_WASI_NN != 0             \
+    || WASM_ENABLE_APP_FRAMEWORK != 0 || WASM_ENABLE_LIBC_WASI != 0      \
+    || WASM_ENABLE_LIB_PTHREAD != 0 || WASM_ENABLE_LIB_WASI_THREADS != 0 \
+    || WASM_ENABLE_WASI_NN != 0 || WASM_ENABLE_WASI_EPHEMERAL_NN != 0
         goto fail;
 #else
         return false;
@@ -577,11 +598,12 @@ wasm_native_init()
 #endif
 
     return true;
-#if WASM_ENABLE_SPEC_TEST != 0 || WASM_ENABLE_LIBC_BUILTIN != 0     \
-    || WASM_ENABLE_BASE_LIB != 0 || WASM_ENABLE_LIBC_EMCC != 0      \
-    || WASM_ENABLE_LIB_RATS != 0 || WASM_ENABLE_WASI_NN != 0        \
-    || WASM_ENABLE_APP_FRAMEWORK != 0 || WASM_ENABLE_LIBC_WASI != 0 \
-    || WASM_ENABLE_LIB_PTHREAD != 0 || WASM_ENABLE_LIB_WASI_THREADS != 0
+#if WASM_ENABLE_SPEC_TEST != 0 || WASM_ENABLE_LIBC_BUILTIN != 0          \
+    || WASM_ENABLE_BASE_LIB != 0 || WASM_ENABLE_LIBC_EMCC != 0           \
+    || WASM_ENABLE_LIB_RATS != 0 || WASM_ENABLE_WASI_NN != 0             \
+    || WASM_ENABLE_APP_FRAMEWORK != 0 || WASM_ENABLE_LIBC_WASI != 0      \
+    || WASM_ENABLE_LIB_PTHREAD != 0 || WASM_ENABLE_LIB_WASI_THREADS != 0 \
+    || WASM_ENABLE_WASI_NN != 0 || WASM_ENABLE_WASI_EPHEMERAL_NN != 0
 fail:
     wasm_native_destroy();
     return false;
@@ -599,6 +621,7 @@ wasm_native_destroy()
         g_wasi_context_key = NULL;
     }
 #endif
+
 #if WASM_ENABLE_LIB_PTHREAD != 0
     lib_pthread_destroy();
 #endif
@@ -607,6 +630,10 @@ wasm_native_destroy()
     lib_wasi_threads_destroy();
 #endif
 
+#if WASM_ENABLE_WASI_NN != 0 || WASM_ENABLE_WASI_EPHEMERAL_NN != 0
+    wasi_nn_destroy();
+#endif
+
     node = g_native_symbols_list;
     while (node) {
         node_next = node->next;
diff --git a/core/iwasm/common/wasm_runtime_common.c b/core/iwasm/common/wasm_runtime_common.c
index 8ed14ec40..5dd2957de 100644
--- a/core/iwasm/common/wasm_runtime_common.c
+++ b/core/iwasm/common/wasm_runtime_common.c
@@ -181,15 +181,36 @@ static RunningMode runtime_running_mode = Mode_Default;
    of signal handler */
 static os_thread_local_attribute WASMExecEnv *exec_env_tls = NULL;
 
+static bool
+is_sig_addr_in_guard_pages(void *sig_addr, WASMModuleInstance *module_inst)
+{
+    WASMMemoryInstance *memory_inst;
+    uint8 *mapped_mem_start_addr = NULL;
+    uint8 *mapped_mem_end_addr = NULL;
+    uint32 i;
+
+    for (i = 0; i < module_inst->memory_count; ++i) {
+        /* To be compatible with multi memory, get the ith memory instance */
+        memory_inst = wasm_get_memory_with_idx(module_inst, i);
+        mapped_mem_start_addr = memory_inst->memory_data;
+        mapped_mem_end_addr = memory_inst->memory_data + 8 * (uint64)BH_GB;
+        if (mapped_mem_start_addr <= (uint8 *)sig_addr
+            && (uint8 *)sig_addr < mapped_mem_end_addr) {
+            /* The address which causes segmentation fault is inside
+               the memory instance's guard regions */
+            return true;
+        }
+    }
+
+    return false;
+}
+
 #ifndef BH_PLATFORM_WINDOWS
 static void
 runtime_signal_handler(void *sig_addr)
 {
     WASMModuleInstance *module_inst;
-    WASMMemoryInstance *memory_inst;
     WASMJmpBuf *jmpbuf_node;
-    uint8 *mapped_mem_start_addr = NULL;
-    uint8 *mapped_mem_end_addr = NULL;
     uint32 page_size = os_getpagesize();
 #if WASM_DISABLE_STACK_HW_BOUND_CHECK == 0
     uint8 *stack_min_addr;
@@ -201,23 +222,13 @@ runtime_signal_handler(void *sig_addr)
         && (jmpbuf_node = exec_env_tls->jmpbuf_stack_top)) {
         /* Get mapped mem info of current instance */
         module_inst = (WASMModuleInstance *)exec_env_tls->module_inst;
-        /* Get the default memory instance */
-        memory_inst = wasm_get_default_memory(module_inst);
-        if (memory_inst) {
-            mapped_mem_start_addr = memory_inst->memory_data;
-            mapped_mem_end_addr = memory_inst->memory_data + 8 * (uint64)BH_GB;
-        }
 
 #if WASM_DISABLE_STACK_HW_BOUND_CHECK == 0
         /* Get stack info of current thread */
         stack_min_addr = os_thread_get_stack_boundary();
 #endif
 
-        if (memory_inst
-            && (mapped_mem_start_addr <= (uint8 *)sig_addr
-                && (uint8 *)sig_addr < mapped_mem_end_addr)) {
-            /* The address which causes segmentation fault is inside
-               the memory instance's guard regions */
+        if (is_sig_addr_in_guard_pages(sig_addr, module_inst)) {
             wasm_set_exception(module_inst, "out of bounds memory access");
             os_longjmp(jmpbuf_node->jmpbuf, 1);
         }
@@ -340,16 +351,7 @@ runtime_exception_handler(EXCEPTION_POINTERS *exce_info)
         && (jmpbuf_node = exec_env_tls->jmpbuf_stack_top)) {
         module_inst = (WASMModuleInstance *)exec_env_tls->module_inst;
         if (ExceptionRecord->ExceptionCode == EXCEPTION_ACCESS_VIOLATION) {
-            /* Get the default memory instance */
-            memory_inst = wasm_get_default_memory(module_inst);
-            if (memory_inst) {
-                mapped_mem_start_addr = memory_inst->memory_data;
-                mapped_mem_end_addr =
-                    memory_inst->memory_data + 8 * (uint64)BH_GB;
-            }
-
-            if (memory_inst && mapped_mem_start_addr <= (uint8 *)sig_addr
-                && (uint8 *)sig_addr < mapped_mem_end_addr) {
+            if (is_sig_addr_in_guard_pages(sig_addr, module_inst)) {
                 /* The address which causes segmentation fault is inside
                    the memory instance's guard regions.
                    Set exception and let the wasm func continue to run, when
@@ -1417,12 +1419,39 @@ wasm_runtime_load_ex(uint8 *buf, uint32 size, const LoadArgs *args,
                      char *error_buf, uint32 error_buf_size)
 {
     WASMModuleCommon *module_common = NULL;
+    uint32 package_type;
+    bool magic_header_detected = false;
 
     if (!args) {
+        set_error_buf(error_buf, error_buf_size,
+                      "WASM module load failed: null load arguments");
         return NULL;
     }
 
-    if (get_package_type(buf, size) == Wasm_Module_Bytecode) {
+    if (size < 4) {
+        set_error_buf(error_buf, error_buf_size,
+                      "WASM module load failed: unexpected end");
+        return NULL;
+    }
+
+    package_type = get_package_type(buf, size);
+    if (package_type == Wasm_Module_Bytecode) {
+#if WASM_ENABLE_INTERP != 0
+        magic_header_detected = true;
+#endif
+    }
+    else if (package_type == Wasm_Module_AoT) {
+#if WASM_ENABLE_AOT != 0
+        magic_header_detected = true;
+#endif
+    }
+    if (!magic_header_detected) {
+        set_error_buf(error_buf, error_buf_size,
+                      "WASM module load failed: magic header not detected");
+        return NULL;
+    }
+
+    if (package_type == Wasm_Module_Bytecode) {
 #if WASM_ENABLE_INTERP != 0
         module_common =
             (WASMModuleCommon *)wasm_load(buf, size,
@@ -1435,7 +1464,7 @@ wasm_runtime_load_ex(uint8 *buf, uint32 size, const LoadArgs *args,
                 args->wasm_binary_freeable;
 #endif
     }
-    else if (get_package_type(buf, size) == Wasm_Module_AoT) {
+    else if (package_type == Wasm_Module_AoT) {
 #if WASM_ENABLE_AOT != 0
         module_common = (WASMModuleCommon *)aot_load_from_aot_file(
             buf, size, args, error_buf, error_buf_size);
@@ -1444,15 +1473,7 @@ wasm_runtime_load_ex(uint8 *buf, uint32 size, const LoadArgs *args,
                 args->wasm_binary_freeable;
 #endif
     }
-    else {
-        if (size < 4)
-            set_error_buf(error_buf, error_buf_size,
-                          "WASM module load failed: unexpected end");
-        else
-            set_error_buf(error_buf, error_buf_size,
-                          "WASM module load failed: magic header not detected");
-        return NULL;
-    }
+
     if (!module_common) {
         LOG_DEBUG("WASM module load failed");
         return NULL;
@@ -4718,9 +4739,13 @@ fail:
  * Implementation of wasm_runtime_invoke_native()
  */
 
-/* The invoke native implementation on ARM platform with VFP co-processor */
+/**
+ * The invoke native implementation on ARM platform with VFP co-processor,
+ * RISCV32 platform with/without FPU/DPFPU and ARC platform.
+ */
 #if defined(BUILD_TARGET_ARM_VFP) || defined(BUILD_TARGET_THUMB_VFP) \
     || defined(BUILD_TARGET_RISCV32_ILP32D)                          \
+    || defined(BUILD_TARGET_RISCV32_ILP32F)                          \
     || defined(BUILD_TARGET_RISCV32_ILP32) || defined(BUILD_TARGET_ARC)
 typedef void (*GenericFunctionPointer)();
 void
@@ -4821,7 +4846,8 @@ wasm_runtime_invoke_native(WASMExecEnv *exec_env, void *func_ptr,
 #endif
                     n_ints += 2;
                 }
-#if defined(BUILD_TARGET_RISCV32_ILP32) \
+#if defined(BUILD_TARGET_RISCV32_ILP32)     \
+    || defined(BUILD_TARGET_RISCV32_ILP32F) \
     || defined(BUILD_TARGET_RISCV32_ILP32D) || defined(BUILD_TARGET_ARC)
                 /* part in register, part in stack */
                 else if (n_ints == MAX_REG_INTS - 1) {
@@ -4843,19 +4869,32 @@ wasm_runtime_invoke_native(WASMExecEnv *exec_env, void *func_ptr,
             case VALUE_TYPE_F32:
                 if (n_fps < MAX_REG_FLOATS)
                     n_fps++;
+#if defined(BUILD_TARGET_RISCV32_ILP32F)
+                else if (n_ints < MAX_REG_INTS) {
+                    n_ints++;
+                }
+#endif
                 else
                     n_stacks++;
                 break;
             case VALUE_TYPE_F64:
+#if defined(BUILD_TARGET_RISCV32_ILP32) \
+    || defined(BUILD_TARGET_RISCV32_ILP32F) || defined(BUILD_TARGET_ARC)
+                if (n_ints < MAX_REG_INTS - 1) {
+                    n_ints += 2;
+                }
+                else if (n_ints == MAX_REG_INTS - 1) {
+                    n_ints++;
+                    n_stacks++;
+                }
+#endif
+#if defined(BUILD_TARGET_ARM_VFP) || defined(BUILD_TARGET_THUMB_VFP)
                 if (n_fps < MAX_REG_FLOATS - 1) {
-#if !defined(BUILD_TARGET_RISCV32_ILP32) && !defined(BUILD_TARGET_ARC)
                     /* 64-bit data must be 8 bytes aligned in arm */
                     if (n_fps & 1)
                         n_fps++;
-#endif
                     n_fps += 2;
                 }
-#if defined(BUILD_TARGET_RISCV32_ILP32) || defined(BUILD_TARGET_ARC)
                 else if (n_fps == MAX_REG_FLOATS - 1) {
                     n_fps++;
                     n_stacks++;
@@ -4887,7 +4926,7 @@ wasm_runtime_invoke_native(WASMExecEnv *exec_env, void *func_ptr,
                     /* use int regs firstly if available */
                     if (n_ints & 1)
                         n_ints++;
-                    ints += 2;
+                    n_ints += 2;
                 }
                 else {
                     /* 64-bit data in stack must be 8 bytes aligned in riscv32
@@ -4911,7 +4950,8 @@ wasm_runtime_invoke_native(WASMExecEnv *exec_env, void *func_ptr,
             n_stacks++;
     }
 
-#if defined(BUILD_TARGET_ARM_VFP) || defined(BUILD_TARGET_THUMB_VFP)
+#if defined(BUILD_TARGET_ARM_VFP) || defined(BUILD_TARGET_THUMB_VFP) \
+    || defined(BUILD_TARGET_RISCV32_ILP32F)
     argc1 = MAX_REG_INTS + MAX_REG_FLOATS + n_stacks;
 #elif defined(BUILD_TARGET_RISCV32_ILP32) || defined(BUILD_TARGET_ARC)
     argc1 = MAX_REG_INTS + n_stacks;
@@ -4928,7 +4968,8 @@ wasm_runtime_invoke_native(WASMExecEnv *exec_env, void *func_ptr,
     }
 
     ints = argv1;
-#if defined(BUILD_TARGET_ARM_VFP) || defined(BUILD_TARGET_THUMB_VFP)
+#if defined(BUILD_TARGET_ARM_VFP) || defined(BUILD_TARGET_THUMB_VFP) \
+    || defined(BUILD_TARGET_RISCV32_ILP32F)
     fps = ints + MAX_REG_INTS;
     stacks = fps + MAX_REG_FLOATS;
 #elif defined(BUILD_TARGET_RISCV32_ILP32) || defined(BUILD_TARGET_ARC)
@@ -5018,7 +5059,8 @@ wasm_runtime_invoke_native(WASMExecEnv *exec_env, void *func_ptr,
                     ints[n_ints++] = *argv_src++;
                     ints[n_ints++] = *argv_src++;
                 }
-#if defined(BUILD_TARGET_RISCV32_ILP32) \
+#if defined(BUILD_TARGET_RISCV32_ILP32)     \
+    || defined(BUILD_TARGET_RISCV32_ILP32F) \
     || defined(BUILD_TARGET_RISCV32_ILP32D) || defined(BUILD_TARGET_ARC)
                 else if (n_ints == MAX_REG_INTS - 1) {
                     ints[n_ints++] = *argv_src++;
@@ -5042,22 +5084,36 @@ wasm_runtime_invoke_native(WASMExecEnv *exec_env, void *func_ptr,
             {
                 if (n_fps < MAX_REG_FLOATS)
                     *(float32 *)&fps[n_fps++] = *(float32 *)argv_src++;
+#if defined(BUILD_TARGET_RISCV32_ILP32F)
+                else if (n_ints < MAX_REG_INTS) {
+                    ints[n_ints++] = *argv_src++;
+                }
+#endif
                 else
                     *(float32 *)&stacks[n_stacks++] = *(float32 *)argv_src++;
                 break;
             }
             case VALUE_TYPE_F64:
             {
+#if defined(BUILD_TARGET_RISCV32_ILP32) \
+    || defined(BUILD_TARGET_RISCV32_ILP32F) || defined(BUILD_TARGET_ARC)
+                if (n_ints < MAX_REG_INTS - 1) {
+                    ints[n_ints++] = *argv_src++;
+                    ints[n_ints++] = *argv_src++;
+                }
+                else if (n_ints == MAX_REG_INTS - 1) {
+                    ints[n_ints++] = *argv_src++;
+                    stacks[n_stacks++] = *argv_src++;
+                }
+#endif
+#if defined(BUILD_TARGET_ARM_VFP) || defined(BUILD_TARGET_THUMB_VFP)
                 if (n_fps < MAX_REG_FLOATS - 1) {
-#if !defined(BUILD_TARGET_RISCV32_ILP32) && !defined(BUILD_TARGET_ARC)
                     /* 64-bit data must be 8 bytes aligned in arm */
                     if (n_fps & 1)
                         n_fps++;
-#endif
                     fps[n_fps++] = *argv_src++;
                     fps[n_fps++] = *argv_src++;
                 }
-#if defined(BUILD_TARGET_RISCV32_ILP32) || defined(BUILD_TARGET_ARC)
                 else if (n_fps == MAX_REG_FLOATS - 1) {
                     fps[n_fps++] = *argv_src++;
                     stacks[n_stacks++] = *argv_src++;
@@ -5249,6 +5305,7 @@ fail:
 #endif /* end of defined(BUILD_TARGET_ARM_VFP)    \
           || defined(BUILD_TARGET_THUMB_VFP)      \
           || defined(BUILD_TARGET_RISCV32_ILP32D) \
+          || defined(BUILD_TARGET_RISCV32_ILP32F) \
           || defined(BUILD_TARGET_RISCV32_ILP32)  \
           || defined(BUILD_TARGET_ARC) */
 
diff --git a/core/iwasm/compilation/aot.c b/core/iwasm/compilation/aot.c
index 2bacae87a..0a2cae1f0 100644
--- a/core/iwasm/compilation/aot.c
+++ b/core/iwasm/compilation/aot.c
@@ -540,6 +540,7 @@ aot_create_comp_data(WASMModule *module, const char *target_arch,
     /* TODO: create import memories */
 
     /* Allocate memory for memory array, reserve one AOTMemory space at least */
+    /* TODO: multi-memory */
     if (!comp_data->memory_count)
         comp_data->memory_count = 1;
 
diff --git a/core/iwasm/compilation/aot.h b/core/iwasm/compilation/aot.h
index ad765ca04..dcf9bbe12 100644
--- a/core/iwasm/compilation/aot.h
+++ b/core/iwasm/compilation/aot.h
@@ -300,7 +300,7 @@ typedef struct AOTCompData {
 
 typedef struct AOTNativeSymbol {
     bh_list_link link;
-    char symbol[32];
+    char symbol[48];
     int32 index;
 } AOTNativeSymbol;
 
diff --git a/core/iwasm/compilation/aot_emit_const.c b/core/iwasm/compilation/aot_emit_const.c
index 5665b480b..64fa3ded1 100644
--- a/core/iwasm/compilation/aot_emit_const.c
+++ b/core/iwasm/compilation/aot_emit_const.c
@@ -68,23 +68,21 @@ aot_compile_op_f32_const(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
 {
     LLVMValueRef alloca, value;
 
-    if (!isnan(f32_const)) {
-        if (comp_ctx->is_indirect_mode
-            && aot_intrinsic_check_capability(comp_ctx, "f32.const")) {
-            WASMValue wasm_value;
-            memcpy(&wasm_value.f32, &f32_const, sizeof(float32));
-            value = aot_load_const_from_table(comp_ctx, func_ctx->native_symbol,
-                                              &wasm_value, VALUE_TYPE_F32);
-            if (!value) {
-                return false;
-            }
-            PUSH_F32(value);
-        }
-        else {
-            value = F32_CONST(f32_const);
-            CHECK_LLVM_CONST(value);
-            PUSH_F32(value);
+    if (comp_ctx->is_indirect_mode
+        && aot_intrinsic_check_capability(comp_ctx, "f32.const")) {
+        WASMValue wasm_value;
+        memcpy(&wasm_value.f32, &f32_const, sizeof(float32));
+        value = aot_load_const_from_table(comp_ctx, func_ctx->native_symbol,
+                                          &wasm_value, VALUE_TYPE_F32);
+        if (!value) {
+            return false;
         }
+        PUSH_F32(value);
+    }
+    else if (!isnan(f32_const)) {
+        value = F32_CONST(f32_const);
+        CHECK_LLVM_CONST(value);
+        PUSH_F32(value);
     }
     else {
         int32 i32_const;
@@ -123,23 +121,21 @@ aot_compile_op_f64_const(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
 {
     LLVMValueRef alloca, value;
 
-    if (!isnan(f64_const)) {
-        if (comp_ctx->is_indirect_mode
-            && aot_intrinsic_check_capability(comp_ctx, "f64.const")) {
-            WASMValue wasm_value;
-            memcpy(&wasm_value.f64, &f64_const, sizeof(float64));
-            value = aot_load_const_from_table(comp_ctx, func_ctx->native_symbol,
-                                              &wasm_value, VALUE_TYPE_F64);
-            if (!value) {
-                return false;
-            }
-            PUSH_F64(value);
-        }
-        else {
-            value = F64_CONST(f64_const);
-            CHECK_LLVM_CONST(value);
-            PUSH_F64(value);
+    if (comp_ctx->is_indirect_mode
+        && aot_intrinsic_check_capability(comp_ctx, "f64.const")) {
+        WASMValue wasm_value;
+        memcpy(&wasm_value.f64, &f64_const, sizeof(float64));
+        value = aot_load_const_from_table(comp_ctx, func_ctx->native_symbol,
+                                          &wasm_value, VALUE_TYPE_F64);
+        if (!value) {
+            return false;
         }
+        PUSH_F64(value);
+    }
+    else if (!isnan(f64_const)) {
+        value = F64_CONST(f64_const);
+        CHECK_LLVM_CONST(value);
+        PUSH_F64(value);
     }
     else {
         int64 i64_const;
diff --git a/core/iwasm/compilation/aot_emit_memory.c b/core/iwasm/compilation/aot_emit_memory.c
index e7c9b679b..806150ff1 100644
--- a/core/iwasm/compilation/aot_emit_memory.c
+++ b/core/iwasm/compilation/aot_emit_memory.c
@@ -895,6 +895,7 @@ aot_compile_op_memory_grow(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
 
     POP_PAGE_COUNT(delta);
 
+    /* TODO: multi-memory aot_enlarge_memory_with_idx() */
     /* Function type of aot_enlarge_memory() */
     param_types[0] = INT8_PTR_TYPE;
     param_types[1] = I32_TYPE;
diff --git a/core/iwasm/compilation/aot_llvm.c b/core/iwasm/compilation/aot_llvm.c
index 6471c9c3e..d738cfc0e 100644
--- a/core/iwasm/compilation/aot_llvm.c
+++ b/core/iwasm/compilation/aot_llvm.c
@@ -1690,7 +1690,15 @@ aot_create_stack_sizes(const AOTCompData *comp_data, AOTCompContext *comp_ctx)
      * avoid creating extra relocations in the precheck functions.
      */
     LLVMSetLinkage(stack_sizes, LLVMInternalLinkage);
-    LLVMSetSection(stack_sizes, aot_stack_sizes_section_name);
+    /*
+     * for AOT, place it into a dedicated section for the convenience
+     * of the AOT file generation and symbol resolutions.
+     *
+     * for JIT, it doesn't matter.
+     */
+    if (!comp_ctx->is_jit_mode) {
+        LLVMSetSection(stack_sizes, aot_stack_sizes_section_name);
+    }
     comp_ctx->stack_sizes_type = stack_sizes_type;
     comp_ctx->stack_sizes = stack_sizes;
     return true;
@@ -3108,6 +3116,16 @@ aot_create_comp_context(const AOTCompData *comp_data, aot_comp_option_t option)
         goto fail;
     }
 
+    /* Return error if ref-types and GC are disabled by command line but
+       ref-types instructions are used */
+    if (!option->enable_ref_types && !option->enable_gc
+        && wasm_module->is_ref_types_used) {
+        aot_set_last_error("ref-types instruction was found, "
+                           "try removing --disable-ref-types option "
+                           "or adding --enable-gc option.");
+        goto fail;
+    }
+
     /* Disable features when they are not actually used */
     if (!wasm_module->is_simd_used) {
         option->enable_simd = comp_ctx->enable_simd = false;
@@ -3121,7 +3139,8 @@ aot_create_comp_context(const AOTCompData *comp_data, aot_comp_option_t option)
 #endif
 
     if (option->enable_simd && strcmp(comp_ctx->target_arch, "x86_64") != 0
-        && strncmp(comp_ctx->target_arch, "aarch64", 7) != 0) {
+        && strncmp(comp_ctx->target_arch, "aarch64", 7) != 0
+        && strcmp(comp_ctx->target_arch, "arc") != 0) {
         /* Disable simd if it isn't supported by target arch */
         option->enable_simd = false;
     }
@@ -3277,6 +3296,7 @@ static bool
 insert_native_symbol(AOTCompContext *comp_ctx, const char *symbol, int32 idx)
 {
     AOTNativeSymbol *sym = wasm_runtime_malloc(sizeof(AOTNativeSymbol));
+    int ret;
 
     if (!sym) {
         aot_set_last_error("alloc native symbol failed.");
@@ -3285,7 +3305,12 @@ insert_native_symbol(AOTCompContext *comp_ctx, const char *symbol, int32 idx)
 
     memset(sym, 0, sizeof(AOTNativeSymbol));
     bh_assert(strlen(symbol) <= sizeof(sym->symbol));
-    snprintf(sym->symbol, sizeof(sym->symbol), "%s", symbol);
+    ret = snprintf(sym->symbol, sizeof(sym->symbol), "%s", symbol);
+    if (ret < 0 || ret + 1 > (int)sizeof(sym->symbol)) {
+        wasm_runtime_free(sym);
+        aot_set_last_error_v("symbol name too long: %s", symbol);
+        return false;
+    }
     sym->index = idx;
 
     if (BH_LIST_ERROR == bh_list_insert(&comp_ctx->native_symbols, sym)) {
diff --git a/core/iwasm/compilation/aot_llvm_extra.cpp b/core/iwasm/compilation/aot_llvm_extra.cpp
index fdd5517b9..e6770eb45 100644
--- a/core/iwasm/compilation/aot_llvm_extra.cpp
+++ b/core/iwasm/compilation/aot_llvm_extra.cpp
@@ -41,6 +41,9 @@
 #include <llvm/Target/CodeGenCWrappers.h>
 #include <llvm/Target/TargetMachine.h>
 #include <llvm/Target/TargetOptions.h>
+#if LLVM_VERSION_MAJOR >= 17
+#include <llvm/TargetParser/Triple.h>
+#endif
 #include <llvm/Transforms/Utils/LowerMemIntrinsics.h>
 #include <llvm/Transforms/Vectorize/LoopVectorize.h>
 #include <llvm/Transforms/Vectorize/LoadStoreVectorizer.h>
@@ -173,6 +176,9 @@ aot_check_simd_compatibility(const char *arch_c_str, const char *cpu_c_str)
     else if (targetArch == llvm::Triple::aarch64) {
         return subTargetInfo->checkFeatures("+neon");
     }
+    else if (targetArch == llvm::Triple::arc) {
+        return true;
+    }
     else {
         return false;
     }
diff --git a/core/iwasm/fast-jit/fe/jit_emit_memory.c b/core/iwasm/fast-jit/fe/jit_emit_memory.c
index ea245ba34..bbe82cf67 100644
--- a/core/iwasm/fast-jit/fe/jit_emit_memory.c
+++ b/core/iwasm/fast-jit/fe/jit_emit_memory.c
@@ -602,6 +602,7 @@ jit_compile_op_memory_grow(JitCompContext *cc, uint32 mem_idx)
     args[0] = get_module_inst_reg(cc->jit_frame);
     args[1] = inc_page_count;
 
+    /* TODO: multi-memory wasm_enlarge_memory_with_idx() */
     if (!jit_emit_callnative(cc, wasm_enlarge_memory, grow_res, args, 2)) {
         goto fail;
     }
diff --git a/core/iwasm/include/wasm_c_api.h b/core/iwasm/include/wasm_c_api.h
index 3f1d2b64a..4994454bd 100644
--- a/core/iwasm/include/wasm_c_api.h
+++ b/core/iwasm/include/wasm_c_api.h
@@ -691,6 +691,11 @@ WASM_API_EXTERN own wasm_instance_t* wasm_instance_new_with_args_ex(
 
 WASM_API_EXTERN void wasm_instance_exports(const wasm_instance_t*, own wasm_extern_vec_t* out);
 
+// Return total wasm functions' execution time in ms
+WASM_API_EXTERN double wasm_instance_sum_wasm_exec_time(const wasm_instance_t*);
+// Return execution time in ms of a given wasm function with
+// func_name. If the function is not found, return 0.
+WASM_API_EXTERN double wasm_instance_get_wasm_func_exec_time(const wasm_instance_t*, const char *);
 
 ///////////////////////////////////////////////////////////////////////////////
 // Convenience
diff --git a/core/iwasm/interpreter/wasm.h b/core/iwasm/interpreter/wasm.h
index ea93adb03..e043465d4 100644
--- a/core/iwasm/interpreter/wasm.h
+++ b/core/iwasm/interpreter/wasm.h
@@ -94,6 +94,14 @@ extern "C" {
 #define SHARED_MEMORY_FLAG 0x02
 #define MEMORY64_FLAG 0x04
 
+/**
+ * In the multi-memory proposal, the memarg in loads and stores are
+ * reinterpreted as a bitfield, bit 6 serves as a flag indicating the presence
+ * of the optional memory index, if it is set, then an i32 memory index follows
+ * after the alignment bitfield
+ */
+#define OPT_MEMIDX_FLAG 0x40
+
 #define DEFAULT_NUM_BYTES_PER_PAGE 65536
 #define DEFAULT_MAX_PAGES 65536
 #define DEFAULT_MEM64_MAX_PAGES UINT32_MAX
diff --git a/core/iwasm/interpreter/wasm_interp_classic.c b/core/iwasm/interpreter/wasm_interp_classic.c
index bd8a4eeb1..4a8ba4e2c 100644
--- a/core/iwasm/interpreter/wasm_interp_classic.c
+++ b/core/iwasm/interpreter/wasm_interp_classic.c
@@ -697,6 +697,44 @@ wasm_interp_get_frame_ref(WASMInterpFrame *frame)
 #define read_leb_mem_offset(p, p_end, res) read_leb_uint32(p, p_end, res)
 #endif
 
+#if WASM_ENABLE_MULTI_MEMORY != 0
+/* If the current memidx differs than the last cached one,
+ * update memory related information */
+#define read_leb_memidx(p, p_end, res)                        \
+    do {                                                      \
+        read_leb_uint32(p, p_end, res);                       \
+        if (res != memidx_cached) {                           \
+            memory = wasm_get_memory_with_idx(module, res);   \
+            linear_mem_size = GET_LINEAR_MEMORY_SIZE(memory); \
+            memidx_cached = res;                              \
+        }                                                     \
+    } while (0)
+/* First read the alignment, then if it has flag indicating following memidx,
+ * read and update memory related information, if it differs than the
+ * last(cached) one. If it doesn't have flag reset the
+ * memory instance to the default memories[0] */
+#define read_leb_memarg(p, p_end, res)                         \
+    do {                                                       \
+        read_leb_uint32(p, p_end, res);                        \
+        if (!(res & OPT_MEMIDX_FLAG))                          \
+            memidx = 0;                                        \
+        else                                                   \
+            read_leb_uint32(p, p_end, memidx);                 \
+        if (memidx != memidx_cached) {                         \
+            memory = wasm_get_memory_with_idx(module, memidx); \
+            linear_mem_size = GET_LINEAR_MEMORY_SIZE(memory);  \
+            memidx_cached = memidx;                            \
+        }                                                      \
+    } while (0)
+#else
+#define read_leb_memarg(p, p_end, res)  \
+    do {                                \
+        read_leb_uint32(p, p_end, res); \
+        (void)res;                      \
+    } while (0)
+#define read_leb_memidx(p, p_end, res) read_leb_memarg(p, p_end, res)
+#endif
+
 #if WASM_ENABLE_LABELS_AS_VALUES == 0
 #define RECOVER_FRAME_IP_END() frame_ip_end = wasm_get_func_code_end(cur_func)
 #else
@@ -1399,26 +1437,48 @@ wasm_interp_call_func_import(WASMModuleInstance *module_inst,
 #endif /* WASM_ENABLE_DEBUG_INTERP */
 #endif /* WASM_ENABLE_THREAD_MGR */
 
+#if WASM_ENABLE_THREAD_MGR != 0 && WASM_ENABLE_DEBUG_INTERP != 0
+#if BH_ATOMIC_32_IS_ATOMIC != 0
+#define GET_SIGNAL_FLAG()                                             \
+    do {                                                              \
+        signal_flag =                                                 \
+            BH_ATOMIC_32_LOAD(exec_env->current_status->signal_flag); \
+    } while (0)
+#else
+#define GET_SIGNAL_FLAG()                                    \
+    do {                                                     \
+        os_mutex_lock(&exec_env->wait_lock);                 \
+        signal_flag = exec_env->current_status->signal_flag; \
+        os_mutex_unlock(&exec_env->wait_lock);               \
+    } while (0)
+#endif
+#endif
+
 #if WASM_ENABLE_LABELS_AS_VALUES != 0
 
 #define HANDLE_OP(opcode) HANDLE_##opcode:
 #define FETCH_OPCODE_AND_DISPATCH() goto *handle_table[*frame_ip++]
 
 #if WASM_ENABLE_THREAD_MGR != 0 && WASM_ENABLE_DEBUG_INTERP != 0
-#define HANDLE_OP_END()                                                   \
-    do {                                                                  \
-        /* Record the current frame_ip, so when exception occurs,         \
-           debugger can know the exact opcode who caused the exception */ \
-        frame_ip_orig = frame_ip;                                         \
-        os_mutex_lock(&exec_env->wait_lock);                              \
-        while (exec_env->current_status->signal_flag == WAMR_SIG_SINGSTEP \
-               && exec_env->current_status->step_count++ == 1) {          \
-            exec_env->current_status->step_count = 0;                     \
-            SYNC_ALL_TO_FRAME();                                          \
-            wasm_cluster_thread_waiting_run(exec_env);                    \
-        }                                                                 \
-        os_mutex_unlock(&exec_env->wait_lock);                            \
-        goto *handle_table[*frame_ip++];                                  \
+#define HANDLE_OP_END()                                                       \
+    do {                                                                      \
+        /* Record the current frame_ip, so when exception occurs,             \
+           debugger can know the exact opcode who caused the exception */     \
+        frame_ip_orig = frame_ip;                                             \
+        /* Atomic load the exec_env's signal_flag first, and then handle      \
+           more with lock if it is WAMR_SIG_SINGSTEP */                       \
+        GET_SIGNAL_FLAG();                                                    \
+        if (signal_flag == WAMR_SIG_SINGSTEP) {                               \
+            os_mutex_lock(&exec_env->wait_lock);                              \
+            while (exec_env->current_status->signal_flag == WAMR_SIG_SINGSTEP \
+                   && exec_env->current_status->step_count++ == 1) {          \
+                exec_env->current_status->step_count = 0;                     \
+                SYNC_ALL_TO_FRAME();                                          \
+                wasm_cluster_thread_waiting_run(exec_env);                    \
+            }                                                                 \
+            os_mutex_unlock(&exec_env->wait_lock);                            \
+        }                                                                     \
+        goto *handle_table[*frame_ip++];                                      \
     } while (0)
 #else
 #define HANDLE_OP_END() FETCH_OPCODE_AND_DISPATCH()
@@ -1427,16 +1487,24 @@ wasm_interp_call_func_import(WASMModuleInstance *module_inst,
 #else /* else of WASM_ENABLE_LABELS_AS_VALUES */
 #define HANDLE_OP(opcode) case opcode:
 #if WASM_ENABLE_THREAD_MGR != 0 && WASM_ENABLE_DEBUG_INTERP != 0
-#define HANDLE_OP_END()                                            \
-    os_mutex_lock(&exec_env->wait_lock);                           \
-    if (exec_env->current_status->signal_flag == WAMR_SIG_SINGSTEP \
-        && exec_env->current_status->step_count++ == 1) {          \
-        exec_env->current_status->step_count = 0;                  \
-        SYNC_ALL_TO_FRAME();                                       \
-        wasm_cluster_thread_waiting_run(exec_env);                 \
-    }                                                              \
-    os_mutex_unlock(&exec_env->wait_lock);                         \
-    continue
+#define HANDLE_OP_END()                                                   \
+    /* Record the current frame_ip, so when exception occurs,             \
+       debugger can know the exact opcode who caused the exception */     \
+    frame_ip_orig = frame_ip;                                             \
+    /* Atomic load the exec_env's signal_flag first, and then handle      \
+       more with lock if it is WAMR_SIG_SINGSTEP */                       \
+    GET_SIGNAL_FLAG();                                                    \
+    if (signal_flag == WAMR_SIG_SINGSTEP) {                               \
+        os_mutex_lock(&exec_env->wait_lock);                              \
+        while (exec_env->current_status->signal_flag == WAMR_SIG_SINGSTEP \
+               && exec_env->current_status->step_count++ == 1) {          \
+            exec_env->current_status->step_count = 0;                     \
+            SYNC_ALL_TO_FRAME();                                          \
+            wasm_cluster_thread_waiting_run(exec_env);                    \
+        }                                                                 \
+        os_mutex_unlock(&exec_env->wait_lock);                            \
+    }                                                                     \
+    continue;
 #else
 #define HANDLE_OP_END() continue
 #endif
@@ -1537,6 +1605,10 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
     if (memory)
         is_memory64 = memory->is_memory64;
 #endif
+#if WASM_ENABLE_MULTI_MEMORY != 0
+    uint32 memidx = 0;
+    uint32 memidx_cached = (uint32)-1;
+#endif
 
 #if WASM_ENABLE_DEBUG_INTERP != 0
     uint8 *frame_ip_orig = NULL;
@@ -1545,6 +1617,9 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
         debug_instance ? &debug_instance->watch_point_list_read : NULL;
     bh_list *watch_point_list_write =
         debug_instance ? &debug_instance->watch_point_list_write : NULL;
+#if WASM_ENABLE_THREAD_MGR != 0
+    uint32 signal_flag;
+#endif
 #endif
 
 #if WASM_ENABLE_LABELS_AS_VALUES != 0
@@ -2248,8 +2323,15 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                 bh_assert(tidx < module->module->type_count);
                 cur_type = wasm_types[tidx];
 
+                /* clang-format off */
+#if WASM_ENABLE_REF_TYPES != 0 || WASM_ENABLE_GC != 0
                 read_leb_uint32(frame_ip, frame_ip_end, tbl_idx);
+#else
+                frame_ip++;
+                tbl_idx = 0;
+#endif
                 bh_assert(tbl_idx < module->table_count);
+                /* clang-format on */
 
                 tbl_inst = wasm_get_table_inst(module, tbl_idx);
 
@@ -4252,13 +4334,12 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                 uint32 flags;
                 mem_offset_t offset, addr;
 
-                read_leb_uint32(frame_ip, frame_ip_end, flags);
+                read_leb_memarg(frame_ip, frame_ip_end, flags);
                 read_leb_mem_offset(frame_ip, frame_ip_end, offset);
                 addr = POP_MEM_OFFSET();
                 CHECK_MEMORY_OVERFLOW(4);
                 PUSH_I32(LOAD_I32(maddr));
                 CHECK_READ_WATCHPOINT(addr, offset);
-                (void)flags;
                 HANDLE_OP_END();
             }
 
@@ -4268,13 +4349,12 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                 uint32 flags;
                 mem_offset_t offset, addr;
 
-                read_leb_uint32(frame_ip, frame_ip_end, flags);
+                read_leb_memarg(frame_ip, frame_ip_end, flags);
                 read_leb_mem_offset(frame_ip, frame_ip_end, offset);
                 addr = POP_MEM_OFFSET();
                 CHECK_MEMORY_OVERFLOW(8);
                 PUSH_I64(LOAD_I64(maddr));
                 CHECK_READ_WATCHPOINT(addr, offset);
-                (void)flags;
                 HANDLE_OP_END();
             }
 
@@ -4283,13 +4363,12 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                 uint32 flags;
                 mem_offset_t offset, addr;
 
-                read_leb_uint32(frame_ip, frame_ip_end, flags);
+                read_leb_memarg(frame_ip, frame_ip_end, flags);
                 read_leb_mem_offset(frame_ip, frame_ip_end, offset);
                 addr = POP_MEM_OFFSET();
                 CHECK_MEMORY_OVERFLOW(1);
                 PUSH_I32(sign_ext_8_32(*(int8 *)maddr));
                 CHECK_READ_WATCHPOINT(addr, offset);
-                (void)flags;
                 HANDLE_OP_END();
             }
 
@@ -4298,13 +4377,12 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                 uint32 flags;
                 mem_offset_t offset, addr;
 
-                read_leb_uint32(frame_ip, frame_ip_end, flags);
+                read_leb_memarg(frame_ip, frame_ip_end, flags);
                 read_leb_mem_offset(frame_ip, frame_ip_end, offset);
                 addr = POP_MEM_OFFSET();
                 CHECK_MEMORY_OVERFLOW(1);
                 PUSH_I32((uint32)(*(uint8 *)maddr));
                 CHECK_READ_WATCHPOINT(addr, offset);
-                (void)flags;
                 HANDLE_OP_END();
             }
 
@@ -4313,13 +4391,12 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                 uint32 flags;
                 mem_offset_t offset, addr;
 
-                read_leb_uint32(frame_ip, frame_ip_end, flags);
+                read_leb_memarg(frame_ip, frame_ip_end, flags);
                 read_leb_mem_offset(frame_ip, frame_ip_end, offset);
                 addr = POP_MEM_OFFSET();
                 CHECK_MEMORY_OVERFLOW(2);
                 PUSH_I32(sign_ext_16_32(LOAD_I16(maddr)));
                 CHECK_READ_WATCHPOINT(addr, offset);
-                (void)flags;
                 HANDLE_OP_END();
             }
 
@@ -4328,13 +4405,12 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                 uint32 flags;
                 mem_offset_t offset, addr;
 
-                read_leb_uint32(frame_ip, frame_ip_end, flags);
+                read_leb_memarg(frame_ip, frame_ip_end, flags);
                 read_leb_mem_offset(frame_ip, frame_ip_end, offset);
                 addr = POP_MEM_OFFSET();
                 CHECK_MEMORY_OVERFLOW(2);
                 PUSH_I32((uint32)(LOAD_U16(maddr)));
                 CHECK_READ_WATCHPOINT(addr, offset);
-                (void)flags;
                 HANDLE_OP_END();
             }
 
@@ -4343,13 +4419,12 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                 uint32 flags;
                 mem_offset_t offset, addr;
 
-                read_leb_uint32(frame_ip, frame_ip_end, flags);
+                read_leb_memarg(frame_ip, frame_ip_end, flags);
                 read_leb_mem_offset(frame_ip, frame_ip_end, offset);
                 addr = POP_MEM_OFFSET();
                 CHECK_MEMORY_OVERFLOW(1);
                 PUSH_I64(sign_ext_8_64(*(int8 *)maddr));
                 CHECK_READ_WATCHPOINT(addr, offset);
-                (void)flags;
                 HANDLE_OP_END();
             }
 
@@ -4358,13 +4433,12 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                 uint32 flags;
                 mem_offset_t offset, addr;
 
-                read_leb_uint32(frame_ip, frame_ip_end, flags);
+                read_leb_memarg(frame_ip, frame_ip_end, flags);
                 read_leb_mem_offset(frame_ip, frame_ip_end, offset);
                 addr = POP_MEM_OFFSET();
                 CHECK_MEMORY_OVERFLOW(1);
                 PUSH_I64((uint64)(*(uint8 *)maddr));
                 CHECK_READ_WATCHPOINT(addr, offset);
-                (void)flags;
                 HANDLE_OP_END();
             }
 
@@ -4373,13 +4447,12 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                 uint32 flags;
                 mem_offset_t offset, addr;
 
-                read_leb_uint32(frame_ip, frame_ip_end, flags);
+                read_leb_memarg(frame_ip, frame_ip_end, flags);
                 read_leb_mem_offset(frame_ip, frame_ip_end, offset);
                 addr = POP_MEM_OFFSET();
                 CHECK_MEMORY_OVERFLOW(2);
                 PUSH_I64(sign_ext_16_64(LOAD_I16(maddr)));
                 CHECK_READ_WATCHPOINT(addr, offset);
-                (void)flags;
                 HANDLE_OP_END();
             }
 
@@ -4388,13 +4461,12 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                 uint32 flags;
                 mem_offset_t offset, addr;
 
-                read_leb_uint32(frame_ip, frame_ip_end, flags);
+                read_leb_memarg(frame_ip, frame_ip_end, flags);
                 read_leb_mem_offset(frame_ip, frame_ip_end, offset);
                 addr = POP_MEM_OFFSET();
                 CHECK_MEMORY_OVERFLOW(2);
                 PUSH_I64((uint64)(LOAD_U16(maddr)));
                 CHECK_READ_WATCHPOINT(addr, offset);
-                (void)flags;
                 HANDLE_OP_END();
             }
 
@@ -4403,14 +4475,12 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                 uint32 flags;
                 mem_offset_t offset, addr;
 
-                opcode = *(frame_ip - 1);
-                read_leb_uint32(frame_ip, frame_ip_end, flags);
+                read_leb_memarg(frame_ip, frame_ip_end, flags);
                 read_leb_mem_offset(frame_ip, frame_ip_end, offset);
                 addr = POP_MEM_OFFSET();
                 CHECK_MEMORY_OVERFLOW(4);
                 PUSH_I64(sign_ext_32_64(LOAD_I32(maddr)));
                 CHECK_READ_WATCHPOINT(addr, offset);
-                (void)flags;
                 HANDLE_OP_END();
             }
 
@@ -4419,13 +4489,12 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                 uint32 flags;
                 mem_offset_t offset, addr;
 
-                read_leb_uint32(frame_ip, frame_ip_end, flags);
+                read_leb_memarg(frame_ip, frame_ip_end, flags);
                 read_leb_mem_offset(frame_ip, frame_ip_end, offset);
                 addr = POP_MEM_OFFSET();
                 CHECK_MEMORY_OVERFLOW(4);
                 PUSH_I64((uint64)(LOAD_U32(maddr)));
                 CHECK_READ_WATCHPOINT(addr, offset);
-                (void)flags;
                 HANDLE_OP_END();
             }
 
@@ -4436,7 +4505,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                 uint32 flags;
                 mem_offset_t offset, addr;
 
-                read_leb_uint32(frame_ip, frame_ip_end, flags);
+                read_leb_memarg(frame_ip, frame_ip_end, flags);
                 read_leb_mem_offset(frame_ip, frame_ip_end, offset);
                 frame_sp--;
                 addr = POP_MEM_OFFSET();
@@ -4451,7 +4520,6 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                     STORE_U32(maddr, frame_sp[1]);
                 }
                 CHECK_WRITE_WATCHPOINT(addr, offset);
-                (void)flags;
                 HANDLE_OP_END();
             }
 
@@ -4461,7 +4529,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                 uint32 flags;
                 mem_offset_t offset, addr;
 
-                read_leb_uint32(frame_ip, frame_ip_end, flags);
+                read_leb_memarg(frame_ip, frame_ip_end, flags);
                 read_leb_mem_offset(frame_ip, frame_ip_end, offset);
                 frame_sp -= 2;
                 addr = POP_MEM_OFFSET();
@@ -4479,7 +4547,6 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                                     GET_I64_FROM_ADDR(frame_sp + 1));
                 }
                 CHECK_WRITE_WATCHPOINT(addr, offset);
-                (void)flags;
                 HANDLE_OP_END();
             }
 
@@ -4491,7 +4558,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                 uint32 sval;
 
                 opcode = *(frame_ip - 1);
-                read_leb_uint32(frame_ip, frame_ip_end, flags);
+                read_leb_memarg(frame_ip, frame_ip_end, flags);
                 read_leb_mem_offset(frame_ip, frame_ip_end, offset);
                 sval = (uint32)POP_I32();
                 addr = POP_MEM_OFFSET();
@@ -4505,7 +4572,6 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                     STORE_U16(maddr, (uint16)sval);
                 }
                 CHECK_WRITE_WATCHPOINT(addr, offset);
-                (void)flags;
                 HANDLE_OP_END();
             }
 
@@ -4518,7 +4584,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                 uint64 sval;
 
                 opcode = *(frame_ip - 1);
-                read_leb_uint32(frame_ip, frame_ip_end, flags);
+                read_leb_memarg(frame_ip, frame_ip_end, flags);
                 read_leb_mem_offset(frame_ip, frame_ip_end, offset);
                 sval = (uint64)POP_I64();
                 addr = POP_MEM_OFFSET();
@@ -4536,29 +4602,27 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                     STORE_U32(maddr, (uint32)sval);
                 }
                 CHECK_WRITE_WATCHPOINT(addr, offset);
-                (void)flags;
                 HANDLE_OP_END();
             }
 
             /* memory size and memory grow instructions */
             HANDLE_OP(WASM_OP_MEMORY_SIZE)
             {
-                uint32 reserved;
-                read_leb_uint32(frame_ip, frame_ip_end, reserved);
+                uint32 mem_idx;
+                read_leb_memidx(frame_ip, frame_ip_end, mem_idx);
                 PUSH_PAGE_COUNT(memory->cur_page_count);
-                (void)reserved;
                 HANDLE_OP_END();
             }
 
             HANDLE_OP(WASM_OP_MEMORY_GROW)
             {
-                uint32 reserved, delta,
-                    prev_page_count = memory->cur_page_count;
+                uint32 mem_idx, delta, prev_page_count;
 
-                read_leb_uint32(frame_ip, frame_ip_end, reserved);
+                read_leb_memidx(frame_ip, frame_ip_end, mem_idx);
+                prev_page_count = memory->cur_page_count;
                 delta = (uint32)POP_PAGE_COUNT();
 
-                if (!wasm_enlarge_memory(module, delta)) {
+                if (!wasm_enlarge_memory_with_idx(module, delta, mem_idx)) {
                     /* failed to memory.grow, return -1 */
                     PUSH_PAGE_COUNT(-1);
                 }
@@ -4574,7 +4638,6 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
 #endif
                 }
 
-                (void)reserved;
                 HANDLE_OP_END();
             }
 
@@ -5570,8 +5633,12 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                         uint8 *data;
 
                         read_leb_uint32(frame_ip, frame_ip_end, segment);
+#if WASM_ENABLE_MULTI_MEMORY != 0
+                        read_leb_memidx(frame_ip, frame_ip_end, memidx);
+#else
                         /* skip memory index */
                         frame_ip++;
+#endif
 
                         bytes = (uint64)(uint32)POP_I32();
                         offset = (uint64)(uint32)POP_I32();
@@ -5620,33 +5687,54 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                     {
                         mem_offset_t dst, src, len;
                         uint8 *mdst, *msrc;
+                        uint64 dlen;
 
-                        frame_ip += 2;
                         len = POP_MEM_OFFSET();
                         src = POP_MEM_OFFSET();
                         dst = POP_MEM_OFFSET();
 
+#if WASM_ENABLE_MULTI_MEMORY != 0
+                        /* dst memidx */
+                        read_leb_memidx(frame_ip, frame_ip_end, memidx);
+#else
+                        /* skip dst memidx */
+                        frame_ip += 1;
+#endif
 #if WASM_ENABLE_THREAD_MGR != 0
                         linear_mem_size = get_linear_mem_size();
 #endif
-
+                        /* dst boundary check */
 #ifndef OS_ENABLE_HW_BOUND_CHECK
-                        CHECK_BULK_MEMORY_OVERFLOW(src, len, msrc);
                         CHECK_BULK_MEMORY_OVERFLOW(dst, len, mdst);
 #else
-                        if ((uint64)(uint32)src + len > linear_mem_size)
+                        if ((uint64)dst + len > linear_mem_size)
                             goto out_of_bounds;
-                        msrc = memory->memory_data + (uint32)src;
+                        mdst = memory->memory_data + dst;
+#endif
+                        dlen = linear_mem_size - dst;
 
-                        if ((uint64)(uint32)dst + len > linear_mem_size)
+#if WASM_ENABLE_MULTI_MEMORY != 0
+                        /* src memidx */
+                        read_leb_memidx(frame_ip, frame_ip_end, memidx);
+#else
+                        /* skip src memidx */
+                        frame_ip += 1;
+#endif
+#if WASM_ENABLE_THREAD_MGR != 0
+                        linear_mem_size = get_linear_mem_size();
+#endif
+                        /* src boundary check */
+#ifndef OS_ENABLE_HW_BOUND_CHECK
+                        CHECK_BULK_MEMORY_OVERFLOW(src, len, msrc);
+#else
+                        if ((uint64)src + len > linear_mem_size)
                             goto out_of_bounds;
-                        mdst = memory->memory_data + (uint32)dst;
+                        msrc = memory->memory_data + src;
 #endif
 
-                        /* allowing the destination and source to overlap */
 #if WASM_ENABLE_MEMORY64 == 0
-                        bh_memmove_s(mdst, (uint32)(linear_mem_size - dst),
-                                     msrc, (uint32)len);
+                        /* allowing the destination and source to overlap */
+                        bh_memmove_s(mdst, (uint32)dlen, msrc, (uint32)len);
 #else
                         /* use memmove when memory64 is enabled since len
                            may be larger than UINT32_MAX */
@@ -5658,7 +5746,13 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                     {
                         mem_offset_t dst, len;
                         uint8 fill_val, *mdst;
+
+#if WASM_ENABLE_MULTI_MEMORY != 0
+                        read_leb_memidx(frame_ip, frame_ip_end, memidx);
+#else
+                        /* skip memory index */
                         frame_ip++;
+#endif
 
                         len = POP_MEM_OFFSET();
                         fill_val = POP_I32();
diff --git a/core/iwasm/interpreter/wasm_interp_fast.c b/core/iwasm/interpreter/wasm_interp_fast.c
index 825079249..51963759f 100644
--- a/core/iwasm/interpreter/wasm_interp_fast.c
+++ b/core/iwasm/interpreter/wasm_interp_fast.c
@@ -3837,6 +3837,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                 addr_ret = GET_OFFSET();
                 delta = (uint32)frame_lp[addr1];
 
+                /* TODO: multi-memory wasm_enlarge_memory_with_idx() */
                 if (!wasm_enlarge_memory(module, delta)) {
                     /* failed to memory.grow, return -1 */
                     frame_lp[addr_ret] = -1;
diff --git a/core/iwasm/interpreter/wasm_loader.c b/core/iwasm/interpreter/wasm_loader.c
index 785b28980..13947ac82 100644
--- a/core/iwasm/interpreter/wasm_loader.c
+++ b/core/iwasm/interpreter/wasm_loader.c
@@ -56,10 +56,7 @@ has_module_memory64(WASMModule *module)
 static void
 set_error_buf(char *error_buf, uint32 error_buf_size, const char *string)
 {
-    if (error_buf != NULL) {
-        snprintf(error_buf, error_buf_size, "WASM module load failed: %s",
-                 string);
-    }
+    wasm_loader_set_error_buf(error_buf, error_buf_size, string, false);
 }
 
 #if WASM_ENABLE_MEMORY64 != 0
@@ -130,82 +127,17 @@ check_buf1(const uint8 *buf, const uint8 *buf_end, uint32 length,
 #define skip_leb_uint32(p, p_end) skip_leb(p)
 #define skip_leb_int32(p, p_end) skip_leb(p)
 #define skip_leb_mem_offset(p, p_end) skip_leb(p)
-
-static bool
-read_leb(uint8 **p_buf, const uint8 *buf_end, uint32 maxbits, bool sign,
-         uint64 *p_result, char *error_buf, uint32 error_buf_size)
-{
-    const uint8 *buf = *p_buf;
-    uint64 result = 0;
-    uint32 shift = 0;
-    uint32 offset = 0, bcnt = 0;
-    uint64 byte;
-
-    while (true) {
-        /* uN or SN must not exceed ceil(N/7) bytes */
-        if (bcnt + 1 > (maxbits + 6) / 7) {
-            set_error_buf(error_buf, error_buf_size,
-                          "integer representation too long");
-            return false;
-        }
-
-        CHECK_BUF(buf, buf_end, offset + 1);
-        byte = buf[offset];
-        offset += 1;
-        result |= ((byte & 0x7f) << shift);
-        shift += 7;
-        bcnt += 1;
-        if ((byte & 0x80) == 0) {
-            break;
-        }
-    }
-
-    if (!sign && maxbits == 32 && shift >= maxbits) {
-        /* The top bits set represent values > 32 bits */
-        if (((uint8)byte) & 0xf0)
-            goto fail_integer_too_large;
-    }
-    else if (sign && maxbits == 32) {
-        if (shift < maxbits) {
-            /* Sign extend, second-highest bit is the sign bit */
-            if ((uint8)byte & 0x40)
-                result |= (~((uint64)0)) << shift;
-        }
-        else {
-            /* The top bits should be a sign-extension of the sign bit */
-            bool sign_bit_set = ((uint8)byte) & 0x8;
-            int top_bits = ((uint8)byte) & 0xf0;
-            if ((sign_bit_set && top_bits != 0x70)
-                || (!sign_bit_set && top_bits != 0))
-                goto fail_integer_too_large;
-        }
-    }
-    else if (sign && maxbits == 64) {
-        if (shift < maxbits) {
-            /* Sign extend, second-highest bit is the sign bit */
-            if ((uint8)byte & 0x40)
-                result |= (~((uint64)0)) << shift;
-        }
-        else {
-            /* The top bits should be a sign-extension of the sign bit */
-            bool sign_bit_set = ((uint8)byte) & 0x1;
-            int top_bits = ((uint8)byte) & 0xfe;
-
-            if ((sign_bit_set && top_bits != 0x7e)
-                || (!sign_bit_set && top_bits != 0))
-                goto fail_integer_too_large;
-        }
-    }
-
-    *p_buf += offset;
-    *p_result = result;
-    return true;
-
-fail_integer_too_large:
-    set_error_buf(error_buf, error_buf_size, "integer too large");
-fail:
-    return false;
-}
+#define skip_leb_memidx(p, p_end) skip_leb(p)
+#if WASM_ENABLE_MULTI_MEMORY == 0
+#define skip_leb_align(p, p_end) skip_leb(p)
+#else
+/* Skip the following memidx if applicable */
+#define skip_leb_align(p, p_end)       \
+    do {                               \
+        if (*p++ & OPT_MEMIDX_FLAG)    \
+            skip_leb_uint32(p, p_end); \
+    } while (0)
+#endif
 
 #define read_uint8(p) TEMPLATE_READ_VALUE(uint8, p)
 #define read_uint32(p) TEMPLATE_READ_VALUE(uint32, p)
@@ -252,6 +184,40 @@ fail:
         res = (int32)res64;                                             \
     } while (0)
 
+#define read_leb_memidx(p, p_end, res) read_leb_uint32(p, p_end, res)
+#if WASM_ENABLE_MULTI_MEMORY != 0
+#define check_memidx(module, memidx)                                        \
+    do {                                                                    \
+        if (memidx >= module->import_memory_count + module->memory_count) { \
+            set_error_buf_v(error_buf, error_buf_size, "unknown memory %d", \
+                            memidx);                                        \
+            goto fail;                                                      \
+        }                                                                   \
+    } while (0)
+/* Bit 6(0x40) indicating the optional memidx, and reset bit 6 for
+ * alignment check */
+#define read_leb_memarg(p, p_end, res)                      \
+    do {                                                    \
+        read_leb_uint32(p, p_end, res);                     \
+        if (res & OPT_MEMIDX_FLAG) {                        \
+            res &= ~OPT_MEMIDX_FLAG;                        \
+            read_leb_uint32(p, p_end, memidx); /* memidx */ \
+            check_memidx(module, memidx);                   \
+        }                                                   \
+    } while (0)
+#else
+/* reserved byte 0x00 */
+#define check_memidx(module, memidx)                                        \
+    do {                                                                    \
+        (void)module;                                                       \
+        if (memidx != 0) {                                                  \
+            set_error_buf(error_buf, error_buf_size, "zero byte expected"); \
+            goto fail;                                                      \
+        }                                                                   \
+    } while (0)
+#define read_leb_memarg(p, p_end, res) read_leb_uint32(p, p_end, res)
+#endif
+
 static char *
 type2str(uint8 type)
 {
@@ -3367,11 +3333,13 @@ load_import_section(const uint8 *buf, const uint8 *buf_end, WASMModule *module,
                     if (flags & 1)
                         read_leb_uint32(p, p_end, u32);
                     module->import_memory_count++;
+#if WASM_ENABLE_MULTI_MEMORY == 0
                     if (module->import_memory_count > 1) {
                         set_error_buf(error_buf, error_buf_size,
                                       "multiple memories");
                         return false;
                     }
+#endif
                     break;
 
 #if WASM_ENABLE_TAGS != 0
@@ -3982,11 +3950,14 @@ load_memory_section(const uint8 *buf, const uint8 *buf_end, WASMModule *module,
     WASMMemory *memory;
 
     read_leb_uint32(p, p_end, memory_count);
+
+#if WASM_ENABLE_MULTI_MEMORY == 0
     /* a total of one memory is allowed */
     if (module->import_memory_count + memory_count > 1) {
         set_error_buf(error_buf, error_buf_size, "multiple memories");
         return false;
     }
+#endif
 
     if (memory_count) {
         module->memory_count = memory_count;
@@ -7228,10 +7199,10 @@ wasm_loader_find_block_addr(WASMExecEnv *exec_env, BlockAddr *block_addr_cache,
             case WASM_OP_RETURN_CALL_INDIRECT:
 #endif
                 skip_leb_uint32(p, p_end); /* typeidx */
-#if WASM_ENABLE_REF_TYPES == 0 && WASM_ENABLE_GC == 0
-                u8 = read_uint8(p); /* 0x00 */
-#else
+#if WASM_ENABLE_REF_TYPES != 0 || WASM_ENABLE_GC != 0
                 skip_leb_uint32(p, p_end); /* tableidx */
+#else
+                u8 = read_uint8(p); /* 0x00 */
 #endif
                 break;
 
@@ -7337,13 +7308,13 @@ wasm_loader_find_block_addr(WASMExecEnv *exec_env, BlockAddr *block_addr_cache,
             case WASM_OP_I64_STORE8:
             case WASM_OP_I64_STORE16:
             case WASM_OP_I64_STORE32:
-                skip_leb_uint32(p, p_end);     /* align */
+                skip_leb_align(p, p_end);      /* align */
                 skip_leb_mem_offset(p, p_end); /* offset */
                 break;
 
             case WASM_OP_MEMORY_SIZE:
             case WASM_OP_MEMORY_GROW:
-                skip_leb_uint32(p, p_end); /* 0x00 */
+                skip_leb_memidx(p, p_end); /* memidx */
                 break;
 
             case WASM_OP_I32_CONST:
@@ -7641,19 +7612,17 @@ wasm_loader_find_block_addr(WASMExecEnv *exec_env, BlockAddr *block_addr_cache,
 #if WASM_ENABLE_BULK_MEMORY != 0
                     case WASM_OP_MEMORY_INIT:
                         skip_leb_uint32(p, p_end);
-                        /* skip memory idx */
-                        p++;
+                        skip_leb_memidx(p, p_end);
                         break;
                     case WASM_OP_DATA_DROP:
                         skip_leb_uint32(p, p_end);
                         break;
                     case WASM_OP_MEMORY_COPY:
-                        /* skip two memory idx */
-                        p += 2;
+                        skip_leb_memidx(p, p_end);
+                        skip_leb_memidx(p, p_end);
                         break;
                     case WASM_OP_MEMORY_FILL:
-                        /* skip memory idx */
-                        p++;
+                        skip_leb_memidx(p, p_end);
                         break;
 #endif /* WASM_ENABLE_BULK_MEMORY */
 #if WASM_ENABLE_REF_TYPES != 0
@@ -7780,7 +7749,6 @@ wasm_loader_find_block_addr(WASMExecEnv *exec_env, BlockAddr *block_addr_cache,
 #if WASM_ENABLE_SHARED_MEMORY != 0
             case WASM_OP_ATOMIC_PREFIX:
             {
-                /* TODO: memory64 offset type changes */
                 uint32 opcode1;
 
                 /* atomic_op (u32_leb) + memarg (2 u32_leb) */
@@ -10955,6 +10923,7 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func,
 #else
     mem_offset_type = VALUE_TYPE_I32;
 #endif
+    uint32 memidx;
 
     global_count = module->import_global_count + module->global_count;
 
@@ -12083,6 +12052,16 @@ re_scan:
 
                 read_leb_uint32(p, p_end, type_idx);
 #if WASM_ENABLE_REF_TYPES != 0 || WASM_ENABLE_GC != 0
+#if WASM_ENABLE_WAMR_COMPILER != 0
+                if (p + 1 < p_end && *p != 0x00) {
+                    /*
+                     * Any non-0x00 byte requires the ref types proposal.
+                     * This is different from checking the table_idx value
+                     * since `0x80 0x00` etc. are all valid encodings of zero.
+                     */
+                    module->is_ref_types_used = true;
+                }
+#endif
                 read_leb_uint32(p, p_end, table_idx);
 #else
                 CHECK_BUF(p, p_end, 1);
@@ -13224,7 +13203,7 @@ re_scan:
                 }
 #endif
                 CHECK_MEMORY();
-                read_leb_uint32(p, p_end, align);          /* align */
+                read_leb_memarg(p, p_end, align);          /* align */
                 read_leb_mem_offset(p, p_end, mem_offset); /* offset */
                 if (!check_memory_access_align(opcode, align, error_buf,
                                                error_buf_size)) {
@@ -13290,12 +13269,8 @@ re_scan:
 
             case WASM_OP_MEMORY_SIZE:
                 CHECK_MEMORY();
-                /* reserved byte 0x00 */
-                if (*p++ != 0x00) {
-                    set_error_buf(error_buf, error_buf_size,
-                                  "zero byte expected");
-                    goto fail;
-                }
+                read_leb_uint32(p, p_end, memidx);
+                check_memidx(module, memidx);
                 PUSH_PAGE_COUNT();
 
                 module->possible_memory_grow = true;
@@ -13306,12 +13281,8 @@ re_scan:
 
             case WASM_OP_MEMORY_GROW:
                 CHECK_MEMORY();
-                /* reserved byte 0x00 */
-                if (*p++ != 0x00) {
-                    set_error_buf(error_buf, error_buf_size,
-                                  "zero byte expected");
-                    goto fail;
-                }
+                read_leb_uint32(p, p_end, memidx);
+                check_memidx(module, memidx);
                 POP_AND_PUSH(mem_offset_type, mem_offset_type);
 
                 module->possible_memory_grow = true;
@@ -14663,8 +14634,8 @@ re_scan:
                             && module->memory_count == 0)
                             goto fail_unknown_memory;
 
-                        if (*p++ != 0x00)
-                            goto fail_zero_byte_expected;
+                        read_leb_uint32(p, p_end, memidx);
+                        check_memidx(module, memidx);
 
                         if (data_seg_idx >= module->data_seg_count) {
                             set_error_buf_v(error_buf, error_buf_size,
@@ -14713,10 +14684,11 @@ re_scan:
                     case WASM_OP_MEMORY_COPY:
                     {
                         CHECK_BUF(p, p_end, sizeof(int16));
-                        /* both src and dst memory index should be 0 */
-                        if (*(int16 *)p != 0x0000)
-                            goto fail_zero_byte_expected;
-                        p += 2;
+                        /* check both src and dst memory index */
+                        read_leb_uint32(p, p_end, memidx);
+                        check_memidx(module, memidx);
+                        read_leb_uint32(p, p_end, memidx);
+                        check_memidx(module, memidx);
 
                         if (module->import_memory_count == 0
                             && module->memory_count == 0)
@@ -14735,9 +14707,8 @@ re_scan:
                     }
                     case WASM_OP_MEMORY_FILL:
                     {
-                        if (*p++ != 0x00) {
-                            goto fail_zero_byte_expected;
-                        }
+                        read_leb_uint32(p, p_end, memidx);
+                        check_memidx(module, memidx);
                         if (module->import_memory_count == 0
                             && module->memory_count == 0) {
                             goto fail_unknown_memory;
@@ -14753,10 +14724,6 @@ re_scan:
 #endif
                         break;
                     }
-                    fail_zero_byte_expected:
-                        set_error_buf(error_buf, error_buf_size,
-                                      "zero byte expected");
-                        goto fail;
 
                     fail_unknown_memory:
                         set_error_buf(error_buf, error_buf_size,
@@ -14990,7 +14957,6 @@ re_scan:
 #if (WASM_ENABLE_WAMR_COMPILER != 0) || (WASM_ENABLE_JIT != 0)
             case WASM_OP_SIMD_PREFIX:
             {
-                /* TODO: memory64 offset type changes */
                 uint32 opcode1;
 
 #if WASM_ENABLE_WAMR_COMPILER != 0
diff --git a/core/iwasm/interpreter/wasm_mini_loader.c b/core/iwasm/interpreter/wasm_mini_loader.c
index 0012b84ae..8826f98db 100644
--- a/core/iwasm/interpreter/wasm_mini_loader.c
+++ b/core/iwasm/interpreter/wasm_mini_loader.c
@@ -44,9 +44,7 @@ has_module_memory64(WASMModule *module)
 static void
 set_error_buf(char *error_buf, uint32 error_buf_size, const char *string)
 {
-    if (error_buf != NULL)
-        snprintf(error_buf, error_buf_size, "WASM module load failed: %s",
-                 string);
+    wasm_loader_set_error_buf(error_buf, error_buf_size, string, false);
 }
 
 #define CHECK_BUF(buf, buf_end, length)                            \
@@ -64,6 +62,17 @@ set_error_buf(char *error_buf, uint32 error_buf_size, const char *string)
 #define skip_leb_uint32(p, p_end) skip_leb(p)
 #define skip_leb_int32(p, p_end) skip_leb(p)
 #define skip_leb_mem_offset(p, p_end) skip_leb(p)
+#define skip_leb_memidx(p, p_end) skip_leb(p)
+#if WASM_ENABLE_MULTI_MEMORY == 0
+#define skip_leb_align(p, p_end) skip_leb(p)
+#else
+/* Skip the following memidx if applicable */
+#define skip_leb_align(p, p_end)       \
+    do {                               \
+        if (*p++ & OPT_MEMIDX_FLAG)    \
+            skip_leb_uint32(p, p_end); \
+    } while (0)
+#endif
 
 static bool
 is_32bit_type(uint8 type)
@@ -95,71 +104,6 @@ is_byte_a_type(uint8 type)
            || (type == VALUE_TYPE_VOID);
 }
 
-static void
-read_leb(uint8 **p_buf, const uint8 *buf_end, uint32 maxbits, bool sign,
-         uint64 *p_result, char *error_buf, uint32 error_buf_size)
-{
-    const uint8 *buf = *p_buf;
-    uint64 result = 0;
-    uint32 shift = 0;
-    uint32 offset = 0, bcnt = 0;
-    uint64 byte;
-
-    while (true) {
-        bh_assert(bcnt + 1 <= (maxbits + 6) / 7);
-        CHECK_BUF(buf, buf_end, offset + 1);
-        byte = buf[offset];
-        offset += 1;
-        result |= ((byte & 0x7f) << shift);
-        shift += 7;
-        bcnt += 1;
-        if ((byte & 0x80) == 0) {
-            break;
-        }
-    }
-
-    if (!sign && maxbits == 32 && shift >= maxbits) {
-        /* The top bits set represent values > 32 bits */
-        bh_assert(!(((uint8)byte) & 0xf0));
-    }
-    else if (sign && maxbits == 32) {
-        if (shift < maxbits) {
-            /* Sign extend, second-highest bit is the sign bit */
-            if ((uint8)byte & 0x40)
-                result |= (~((uint64)0)) << shift;
-        }
-        else {
-            /* The top bits should be a sign-extension of the sign bit */
-            bool sign_bit_set = ((uint8)byte) & 0x8;
-            int top_bits = ((uint8)byte) & 0xf0;
-            bh_assert(!((sign_bit_set && top_bits != 0x70)
-                        || (!sign_bit_set && top_bits != 0)));
-            (void)top_bits;
-            (void)sign_bit_set;
-        }
-    }
-    else if (sign && maxbits == 64) {
-        if (shift < maxbits) {
-            /* Sign extend, second-highest bit is the sign bit */
-            if ((uint8)byte & 0x40)
-                result |= (~((uint64)0)) << shift;
-        }
-        else {
-            /* The top bits should be a sign-extension of the sign bit */
-            bool sign_bit_set = ((uint8)byte) & 0x1;
-            int top_bits = ((uint8)byte) & 0xfe;
-
-            bh_assert(!((sign_bit_set && top_bits != 0x7e)
-                        || (!sign_bit_set && top_bits != 0)));
-            (void)top_bits;
-            (void)sign_bit_set;
-        }
-    }
-
-    *p_buf += offset;
-    *p_result = result;
-}
-
 #define read_uint8(p) TEMPLATE_READ_VALUE(uint8, p)
 #define read_uint32(p) TEMPLATE_READ_VALUE(uint32, p)
 #define read_bool(p) TEMPLATE_READ_VALUE(bool, p)
@@ -199,6 +143,35 @@ read_leb(uint8 **p_buf, const uint8 *buf_end, uint32 maxbits, bool sign,
 #else
 #define read_leb_mem_offset(p, p_end, res) read_leb_uint32(p, p_end, res)
 #endif
+#define read_leb_memidx(p, p_end, res) read_leb_uint32(p, p_end, res)
+#if WASM_ENABLE_MULTI_MEMORY != 0
+#define check_memidx(module, memidx)                                     \
+    do {                                                                 \
+        bh_assert(memidx                                                 \
+                  < module->import_memory_count + module->memory_count); \
+        (void)memidx;                                                    \
+    } while (0)
+/* Bit 6 indicating the optional memidx, and reset bit 6 for
+ * alignment check */
+#define read_leb_memarg(p, p_end, res)                      \
+    do {                                                    \
+        read_leb_uint32(p, p_end, res);                     \
+        if (res & OPT_MEMIDX_FLAG) {                        \
+            res &= ~OPT_MEMIDX_FLAG;                        \
+            read_leb_uint32(p, p_end, memidx); /* memidx */ \
+            check_memidx(module, memidx);                   \
+        }                                                   \
+    } while (0)
+#else
+/* reserved byte 0x00 */
+#define check_memidx(module, memidx) \
+    do {                             \
+        (void)module;                \
+        bh_assert(memidx == 0);      \
+        (void)memidx;                \
+    } while (0)
+#define read_leb_memarg(p, p_end, res) read_leb_uint32(p, p_end, res)
+#endif
 
 static void *
 loader_malloc(uint64 size, char *error_buf, uint32 error_buf_size)
@@ -949,7 +922,9 @@ load_import_section(const uint8 *buf, const uint8 *buf_end, WASMModule *module,
                     if (flags & 1)
                         read_leb_uint32(p, p_end, u32);
                     module->import_memory_count++;
+#if WASM_ENABLE_MULTI_MEMORY != 0
                     bh_assert(module->import_memory_count <= 1);
+#endif
                     break;
 
                 case IMPORT_KIND_GLOBAL: /* import global */
@@ -1290,7 +1265,9 @@ load_memory_section(const uint8 *buf, const uint8 *buf_end, WASMModule *module,
     WASMMemory *memory;
 
     read_leb_uint32(p, p_end, memory_count);
+#if WASM_ENABLE_MULTI_MEMORY != 0
     bh_assert(module->import_memory_count + memory_count <= 1);
+#endif
 
     if (memory_count) {
         module->memory_count = memory_count;
@@ -3568,8 +3545,11 @@ wasm_loader_find_block_addr(WASMExecEnv *exec_env, BlockAddr *block_addr_cache,
             case WASM_OP_RETURN_CALL_INDIRECT:
 #endif
                 skip_leb_uint32(p, p_end); /* typeidx */
-                CHECK_BUF(p, p_end, 1);
+#if WASM_ENABLE_REF_TYPES != 0
+                skip_leb_uint32(p, p_end); /* tableidx */
+#else
                 u8 = read_uint8(p); /* 0x00 */
+#endif
                 break;
 
 #if WASM_ENABLE_EXCE_HANDLING != 0
@@ -3649,13 +3629,13 @@ wasm_loader_find_block_addr(WASMExecEnv *exec_env, BlockAddr *block_addr_cache,
             case WASM_OP_I64_STORE8:
             case WASM_OP_I64_STORE16:
             case WASM_OP_I64_STORE32:
-                skip_leb_uint32(p, p_end);     /* align */
+                skip_leb_align(p, p_end);      /* align */
                 skip_leb_mem_offset(p, p_end); /* offset */
                 break;
 
             case WASM_OP_MEMORY_SIZE:
             case WASM_OP_MEMORY_GROW:
-                skip_leb_uint32(p, p_end); /* 0x00 */
+                skip_leb_memidx(p, p_end); /* memidx */
                 break;
 
             case WASM_OP_I32_CONST:
@@ -3822,19 +3802,17 @@ wasm_loader_find_block_addr(WASMExecEnv *exec_env, BlockAddr *block_addr_cache,
 #if WASM_ENABLE_BULK_MEMORY != 0
                     case WASM_OP_MEMORY_INIT:
                         skip_leb_uint32(p, p_end);
-                        /* skip memory idx */
-                        p++;
+                        skip_leb_memidx(p, p_end);
                         break;
                     case WASM_OP_DATA_DROP:
                         skip_leb_uint32(p, p_end);
                         break;
                     case WASM_OP_MEMORY_COPY:
-                        /* skip two memory idx */
-                        p += 2;
+                        skip_leb_memidx(p, p_end);
+                        skip_leb_memidx(p, p_end);
                         break;
                     case WASM_OP_MEMORY_FILL:
-                        /* skip memory idx */
-                        p++;
+                        skip_leb_memidx(p, p_end);
                         break;
 #endif
 #if WASM_ENABLE_REF_TYPES != 0
@@ -5969,7 +5947,7 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func,
     uint8 *param_types, *local_types, local_type, global_type, mem_offset_type;
     BlockType func_block_type;
     uint16 *local_offsets, local_offset;
-    uint32 count, local_idx, global_idx, u32, align, i;
+    uint32 count, local_idx, global_idx, u32, align, i, memidx;
     mem_offset_t mem_offset;
     int32 i32, i32_const = 0;
     int64 i64_const;
@@ -7331,7 +7309,7 @@ re_scan:
                 }
 #endif
                 CHECK_MEMORY();
-                read_leb_uint32(p, p_end, align);          /* align */
+                read_leb_memarg(p, p_end, align);          /* align */
                 read_leb_mem_offset(p, p_end, mem_offset); /* offset */
 #if WASM_ENABLE_FAST_INTERP != 0
                 emit_uint32(loader_ctx, mem_offset);
@@ -7393,9 +7371,8 @@ re_scan:
 
             case WASM_OP_MEMORY_SIZE:
                 CHECK_MEMORY();
-                /* reserved byte 0x00 */
-                bh_assert(*p == 0x00);
-                p++;
+                read_leb_memidx(p, p_end, memidx);
+                check_memidx(module, memidx);
                 PUSH_PAGE_COUNT();
 
                 module->possible_memory_grow = true;
@@ -7406,9 +7383,8 @@ re_scan:
 
             case WASM_OP_MEMORY_GROW:
                 CHECK_MEMORY();
-                /* reserved byte 0x00 */
-                bh_assert(*p == 0x00);
-                p++;
+                read_leb_memidx(p, p_end, memidx);
+                check_memidx(module, memidx);
                 POP_AND_PUSH(mem_offset_type, mem_offset_type);
 
                 module->possible_memory_grow = true;
@@ -7746,16 +7722,13 @@ re_scan:
 #if WASM_ENABLE_BULK_MEMORY != 0
                     case WASM_OP_MEMORY_INIT:
                     {
+                        CHECK_MEMORY();
                         read_leb_uint32(p, p_end, segment_index);
 #if WASM_ENABLE_FAST_INTERP != 0
                         emit_uint32(loader_ctx, segment_index);
 #endif
-                        bh_assert(module->import_memory_count
-                                      + module->memory_count
-                                  > 0);
-
-                        bh_assert(*p == 0x00);
-                        p++;
+                        read_leb_memidx(p, p_end, memidx);
+                        check_memidx(module, memidx);
 
                         bh_assert(segment_index < module->data_seg_count);
                         bh_assert(module->data_seg_count1 > 0);
@@ -7783,14 +7756,13 @@ re_scan:
                     }
                     case WASM_OP_MEMORY_COPY:
                     {
+                        CHECK_MEMORY();
                         CHECK_BUF(p, p_end, sizeof(int16));
-                        /* both src and dst memory index should be 0 */
-                        bh_assert(*(int16 *)p == 0x0000);
-                        p += 2;
-
-                        bh_assert(module->import_memory_count
-                                      + module->memory_count
-                                  > 0);
+                        /* check both src and dst memory index */
+                        read_leb_memidx(p, p_end, memidx);
+                        check_memidx(module, memidx);
+                        read_leb_memidx(p, p_end, memidx);
+                        check_memidx(module, memidx);
 
                         POP_MEM_OFFSET();
                         POP_MEM_OFFSET();
@@ -7802,12 +7774,9 @@ re_scan:
                     }
                     case WASM_OP_MEMORY_FILL:
                     {
-                        bh_assert(*p == 0);
-                        p++;
-
-                        bh_assert(module->import_memory_count
-                                      + module->memory_count
-                                  > 0);
+                        CHECK_MEMORY();
+                        read_leb_memidx(p, p_end, memidx);
+                        check_memidx(module, memidx);
 
                         POP_MEM_OFFSET();
                         POP_I32();
diff --git a/core/iwasm/interpreter/wasm_runtime.c b/core/iwasm/interpreter/wasm_runtime.c
index 0d4d0b37d..f70f9cb73 100644
--- a/core/iwasm/interpreter/wasm_runtime.c
+++ b/core/iwasm/interpreter/wasm_runtime.c
@@ -194,114 +194,119 @@ memory_instantiate(WASMModuleInstance *module_inst, WASMModuleInstance *parent,
     default_max_page =
         memory->is_memory64 ? DEFAULT_MEM64_MAX_PAGES : DEFAULT_MAX_PAGES;
 
-    if (heap_size > 0 && module_inst->module->malloc_function != (uint32)-1
-        && module_inst->module->free_function != (uint32)-1) {
-        /* Disable app heap, use malloc/free function exported
-           by wasm app to allocate/free memory instead */
-        heap_size = 0;
-    }
+    /* The app heap should be in the default memory */
+    if (memory_idx == 0) {
+        if (heap_size > 0 && module_inst->module->malloc_function != (uint32)-1
+            && module_inst->module->free_function != (uint32)-1) {
+            /* Disable app heap, use malloc/free function exported
+               by wasm app to allocate/free memory instead */
+            heap_size = 0;
+        }
 
-    /* If initial memory is the largest size allowed, disallowing insert host
-     * managed heap */
-    if (heap_size > 0
-        && heap_offset == GET_MAX_LINEAR_MEMORY_SIZE(memory->is_memory64)) {
-        set_error_buf(error_buf, error_buf_size,
-                      "failed to insert app heap into linear memory, "
-                      "try using `--heap-size=0` option");
-        return NULL;
-    }
-
-    if (init_page_count == max_page_count && init_page_count == 1) {
-        /* If only one page and at most one page, we just append
-           the app heap to the end of linear memory, enlarge the
-           num_bytes_per_page, and don't change the page count */
-        heap_offset = num_bytes_per_page;
-        num_bytes_per_page += heap_size;
-        if (num_bytes_per_page < heap_size) {
+        /* If initial memory is the largest size allowed, disallowing insert
+         * host managed heap */
+        if (heap_size > 0
+            && heap_offset == GET_MAX_LINEAR_MEMORY_SIZE(memory->is_memory64)) {
             set_error_buf(error_buf, error_buf_size,
                           "failed to insert app heap into linear memory, "
                           "try using `--heap-size=0` option");
             return NULL;
         }
-    }
-    else if (heap_size > 0) {
-        if (init_page_count == max_page_count && init_page_count == 0) {
-            /* If the memory data size is always 0, we resize it to
-               one page for app heap */
-            num_bytes_per_page = heap_size;
-            heap_offset = 0;
-            inc_page_count = 1;
-        }
-        else if (module->aux_heap_base_global_index != (uint32)-1
-                 && module->aux_heap_base
-                        < (uint64)num_bytes_per_page * init_page_count) {
-            /* Insert app heap before __heap_base */
-            aux_heap_base = module->aux_heap_base;
-            bytes_of_last_page = aux_heap_base % num_bytes_per_page;
-            if (bytes_of_last_page == 0)
-                bytes_of_last_page = num_bytes_per_page;
-            bytes_to_page_end = num_bytes_per_page - bytes_of_last_page;
-            inc_page_count =
-                (heap_size - bytes_to_page_end + num_bytes_per_page - 1)
-                / num_bytes_per_page;
-            heap_offset = aux_heap_base;
-            aux_heap_base += heap_size;
 
-            bytes_of_last_page = aux_heap_base % num_bytes_per_page;
-            if (bytes_of_last_page == 0)
-                bytes_of_last_page = num_bytes_per_page;
-            bytes_to_page_end = num_bytes_per_page - bytes_of_last_page;
-            if (bytes_to_page_end < 1 * BH_KB) {
-                aux_heap_base += 1 * BH_KB;
-                inc_page_count++;
+        if (init_page_count == max_page_count && init_page_count == 1) {
+            /* If only one page and at most one page, we just append
+               the app heap to the end of linear memory, enlarge the
+               num_bytes_per_page, and don't change the page count */
+            heap_offset = num_bytes_per_page;
+            num_bytes_per_page += heap_size;
+            if (num_bytes_per_page < heap_size) {
+                set_error_buf(error_buf, error_buf_size,
+                              "failed to insert app heap into linear memory, "
+                              "try using `--heap-size=0` option");
+                return NULL;
             }
+        }
+        else if (heap_size > 0) {
+            if (init_page_count == max_page_count && init_page_count == 0) {
+                /* If the memory data size is always 0, we resize it to
+                   one page for app heap */
+                num_bytes_per_page = heap_size;
+                heap_offset = 0;
+                inc_page_count = 1;
+            }
+            else if (module->aux_heap_base_global_index != (uint32)-1
+                     && module->aux_heap_base
+                            < (uint64)num_bytes_per_page * init_page_count) {
+                /* Insert app heap before __heap_base */
+                aux_heap_base = module->aux_heap_base;
+                bytes_of_last_page = aux_heap_base % num_bytes_per_page;
+                if (bytes_of_last_page == 0)
+                    bytes_of_last_page = num_bytes_per_page;
+                bytes_to_page_end = num_bytes_per_page - bytes_of_last_page;
+                inc_page_count =
+                    (heap_size - bytes_to_page_end + num_bytes_per_page - 1)
+                    / num_bytes_per_page;
+                heap_offset = aux_heap_base;
+                aux_heap_base += heap_size;
 
-            /* Adjust __heap_base global value */
-            global_idx = module->aux_heap_base_global_index;
-            bh_assert(module_inst->e->globals
-                      && global_idx < module_inst->e->global_count);
-            global_addr = module_inst->global_data
-                          + module_inst->e->globals[global_idx].data_offset;
+                bytes_of_last_page = aux_heap_base % num_bytes_per_page;
+                if (bytes_of_last_page == 0)
+                    bytes_of_last_page = num_bytes_per_page;
+                bytes_to_page_end = num_bytes_per_page - bytes_of_last_page;
+                if (bytes_to_page_end < 1 * BH_KB) {
+                    aux_heap_base += 1 * BH_KB;
+                    inc_page_count++;
+                }
+
+                /* Adjust __heap_base global value */
+                global_idx = module->aux_heap_base_global_index;
+                bh_assert(module_inst->e->globals
+                          && global_idx < module_inst->e->global_count);
+                global_addr = module_inst->global_data
+                              + module_inst->e->globals[global_idx].data_offset;
 #if WASM_ENABLE_MEMORY64 != 0
-            if (memory->is_memory64) {
-                /* For memory64, the global value should be i64 */
-                *(uint64 *)global_addr = aux_heap_base;
-            }
-            else
+                if (memory->is_memory64) {
+                    /* For memory64, the global value should be i64 */
+                    *(uint64 *)global_addr = aux_heap_base;
+                }
+                else
 #endif
-            {
-                /* For memory32, the global value should be i32 */
-                *(uint32 *)global_addr = (uint32)aux_heap_base;
+                {
+                    /* For memory32, the global value should be i32 */
+                    *(uint32 *)global_addr = (uint32)aux_heap_base;
+                }
+                LOG_VERBOSE("Reset __heap_base global to %" PRIu64,
+                            aux_heap_base);
+            }
+            else {
+                /* Insert app heap before new page */
+                inc_page_count =
+                    (heap_size + num_bytes_per_page - 1) / num_bytes_per_page;
+                heap_offset = (uint64)num_bytes_per_page * init_page_count;
+                heap_size = (uint64)num_bytes_per_page * inc_page_count;
+                if (heap_size > 0)
+                    heap_size -= 1 * BH_KB;
+            }
+            init_page_count += inc_page_count;
+            max_page_count += inc_page_count;
+            if (init_page_count > default_max_page) {
+                set_error_buf(error_buf, error_buf_size,
+                              "failed to insert app heap into linear memory, "
+                              "try using `--heap-size=0` option");
+                return NULL;
             }
-            LOG_VERBOSE("Reset __heap_base global to %" PRIu64, aux_heap_base);
-        }
-        else {
-            /* Insert app heap before new page */
-            inc_page_count =
-                (heap_size + num_bytes_per_page - 1) / num_bytes_per_page;
-            heap_offset = (uint64)num_bytes_per_page * init_page_count;
-            heap_size = (uint64)num_bytes_per_page * inc_page_count;
-            if (heap_size > 0)
-                heap_size -= 1 * BH_KB;
-        }
-        init_page_count += inc_page_count;
-        max_page_count += inc_page_count;
-        if (init_page_count > default_max_page) {
-            set_error_buf(error_buf, error_buf_size,
-                          "failed to insert app heap into linear memory, "
-                          "try using `--heap-size=0` option");
-            return NULL;
-        }
 
-        if (max_page_count > default_max_page)
-            max_page_count = default_max_page;
+            if (max_page_count > default_max_page)
+                max_page_count = default_max_page;
+        }
     }
 
     LOG_VERBOSE("Memory instantiate:");
     LOG_VERBOSE("  page bytes: %u, init pages: %u, max pages: %u",
                 num_bytes_per_page, init_page_count, max_page_count);
-    LOG_VERBOSE("  heap offset: %" PRIu64 ", heap size: %u\n", heap_offset,
-                heap_size);
+    if (memory_idx == 0)
+        LOG_VERBOSE("  heap offset: %" PRIu64 ", heap size: %u\n", heap_offset,
+                    heap_size);
 
     max_memory_data_size = (uint64)num_bytes_per_page * max_page_count;
     bh_assert(max_memory_data_size
@@ -326,12 +331,14 @@ memory_instantiate(WASMModuleInstance *module_inst, WASMModuleInstance *parent,
     memory->max_page_count = max_page_count;
     memory->memory_data_size = memory_data_size;
 
-    memory->heap_data = memory->memory_data + heap_offset;
-    memory->heap_data_end = memory->heap_data + heap_size;
-    memory->memory_data_end = memory->memory_data + memory_data_size;
+    if (memory_idx == 0) {
+        memory->heap_data = memory->memory_data + heap_offset;
+        memory->heap_data_end = memory->heap_data + heap_size;
+        memory->memory_data_end = memory->memory_data + memory_data_size;
+    }
 
     /* Initialize heap */
-    if (heap_size > 0) {
+    if (memory_idx == 0 && heap_size > 0) {
         uint32 heap_struct_size = mem_allocator_get_heap_struct_size();
 
         if (!(memory->heap_handle = runtime_malloc(
@@ -361,7 +368,7 @@ memory_instantiate(WASMModuleInstance *module_inst, WASMModuleInstance *parent,
     return memory;
 
 fail2:
-    if (heap_size > 0)
+    if (memory_idx == 0 && heap_size > 0)
         wasm_runtime_free(memory->heap_handle);
 fail1:
     if (memory->memory_data)
@@ -1351,7 +1358,45 @@ export_globals_instantiate(const WASMModule *module,
     bh_assert((uint32)(export_global - export_globals) == export_glob_count);
     return export_globals;
 }
-#endif
+
+#if WASM_ENABLE_MULTI_MEMORY != 0
+static void
+export_memories_deinstantiate(WASMExportMemInstance *memories)
+{
+    if (memories)
+        wasm_runtime_free(memories);
+}
+
+static WASMExportMemInstance *
+export_memories_instantiate(const WASMModule *module,
+                            WASMModuleInstance *module_inst,
+                            uint32 export_mem_count, char *error_buf,
+                            uint32 error_buf_size)
+{
+    WASMExportMemInstance *export_memories, *export_memory;
+    WASMExport *export = module->exports;
+    uint32 i;
+    uint64 total_size =
+        sizeof(WASMExportMemInstance) * (uint64)export_mem_count;
+
+    if (!(export_memory = export_memories =
+              runtime_malloc(total_size, error_buf, error_buf_size))) {
+        return NULL;
+    }
+
+    for (i = 0; i < module->export_count; i++, export ++)
+        if (export->kind == EXPORT_KIND_MEMORY) {
+            export_memory->name = export->name;
+            export_memory->memory = module_inst->memories[export->index];
+            export_memory++;
+        }
+
+    bh_assert((uint32)(export_memory - export_memories) == export_mem_count);
+    return export_memories;
+}
+#endif /* end of if WASM_ENABLE_MULTI_MEMORY != 0 */
+
+#endif /* end of if WASM_ENABLE_MULTI_MODULE != 0 */
 
 static WASMFunctionInstance *
 lookup_post_instantiate_func(WASMModuleInstance *module_inst,
@@ -2387,6 +2432,12 @@ wasm_instantiate(WASMModule *module, WASMModuleInstance *parent,
                      module, module_inst, module_inst->export_global_count,
                      error_buf, error_buf_size)))
 #endif
+#if WASM_ENABLE_MULTI_MODULE != 0 && WASM_ENABLE_MULTI_MEMORY != 0
+        || (module_inst->export_memory_count > 0
+            && !(module_inst->export_memories = export_memories_instantiate(
+                     module, module_inst, module_inst->export_memory_count,
+                     error_buf, error_buf_size)))
+#endif
 #if WASM_ENABLE_JIT != 0
         || (module_inst->e->function_count > 0
             && !init_func_ptrs(module_inst, module, error_buf, error_buf_size))
@@ -2990,16 +3041,7 @@ wasm_instantiate(WASMModule *module, WASMModuleInstance *parent,
     /* Initialize the thread related data */
     if (stack_size == 0)
         stack_size = DEFAULT_WASM_STACK_SIZE;
-#if WASM_ENABLE_SPEC_TEST != 0
-#if WASM_ENABLE_TAIL_CALL == 0
-    if (stack_size < 128 * 1024)
-        stack_size = 128 * 1024;
-#else
-    /* Some tail-call cases require large operand stack */
-    if (stack_size < 10 * 1024 * 1024)
-        stack_size = 10 * 1024 * 1024;
-#endif
-#endif
+
     module_inst->default_wasm_stack_size = stack_size;
 
     if (module->malloc_function != (uint32)-1) {
@@ -3198,6 +3240,10 @@ wasm_deinstantiate(WASMModuleInstance *module_inst, bool is_sub_inst)
     export_globals_deinstantiate(module_inst->export_globals);
 #endif
 
+#if WASM_ENABLE_MULTI_MODULE != 0 && WASM_ENABLE_MULTI_MEMORY != 0
+    export_memories_deinstantiate(module_inst->export_memories);
+#endif
+
 #if WASM_ENABLE_GC == 0 && WASM_ENABLE_REF_TYPES != 0
     wasm_externref_cleanup((WASMModuleInstanceCommon *)module_inst);
 #endif
@@ -3260,12 +3306,16 @@ wasm_lookup_global(const WASMModuleInstance *module_inst, const char *name)
 WASMMemoryInstance *
 wasm_lookup_memory(const WASMModuleInstance *module_inst, const char *name)
 {
-    /**
-     * using a strong assumption that one module instance only has
-     * one memory instance
-     */
+#if WASM_ENABLE_MULTI_MEMORY != 0
+    uint32 i;
+    for (i = 0; i < module_inst->export_memory_count; i++)
+        if (!strcmp(module_inst->export_memories[i].name, name))
+            return module_inst->export_memories[i].memory;
+    return NULL;
+#else
     (void)module_inst->export_memories;
     return module_inst->memories[0];
+#endif
 }
 
 WASMTableInstance *
diff --git a/core/iwasm/interpreter/wasm_runtime.h b/core/iwasm/interpreter/wasm_runtime.h
index 80aee8742..8666541f2 100644
--- a/core/iwasm/interpreter/wasm_runtime.h
+++ b/core/iwasm/interpreter/wasm_runtime.h
@@ -620,9 +620,16 @@ wasm_check_app_addr_and_convert(WASMModuleInstance *module_inst, bool is_str,
 WASMMemoryInstance *
 wasm_get_default_memory(WASMModuleInstance *module_inst);
 
+WASMMemoryInstance *
+wasm_get_memory_with_idx(WASMModuleInstance *module_inst, uint32 index);
+
 bool
 wasm_enlarge_memory(WASMModuleInstance *module_inst, uint32 inc_page_count);
 
+bool
+wasm_enlarge_memory_with_idx(WASMModuleInstance *module_inst,
+                             uint32 inc_page_count, uint32 memidx);
+
 bool
 wasm_call_indirect(WASMExecEnv *exec_env, uint32 tbl_idx, uint32 elem_idx,
                    uint32 argc, uint32 argv[]);
diff --git a/core/iwasm/libraries/lib-pthread/SConscript b/core/iwasm/libraries/lib-pthread/SConscript
index d03936c2f..1eb1cc24c 100644
--- a/core/iwasm/libraries/lib-pthread/SConscript
+++ b/core/iwasm/libraries/lib-pthread/SConscript
@@ -9,12 +9,12 @@ from building import *
 cwd     = GetCurrentDir()
 
 src = Split('''
-libc_pthread_wrapper.c
+lib_pthread_wrapper.c
 ''')
 
 CPPPATH = [cwd]
 
 
-group = DefineGroup('iwasm_libc_pthread', src, depend = [''], CPPPATH = CPPPATH)
+group = DefineGroup('iwasm_lib_pthread', src, depend = [''], CPPPATH = CPPPATH)
 
 Return('group')
diff --git a/core/iwasm/libraries/lib-wasi-threads/SConscript b/core/iwasm/libraries/lib-wasi-threads/SConscript
new file mode 100755
index 000000000..c4d62e3db
--- /dev/null
+++ b/core/iwasm/libraries/lib-wasi-threads/SConscript
@@ -0,0 +1,15 @@
+#
+# Copyright 2024 Sony Semiconductor Solutions Corporation.
+#
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+
+from building import *
+
+cwd     = GetCurrentDir()
+src     = Glob('*.c')
+CPPPATH = [cwd]
+
+group = DefineGroup('iwasm_lib_wasi_threads', src, depend = [''], CPPPATH = CPPPATH)
+
+Return('group')
diff --git a/core/iwasm/libraries/libc-builtin/libc_builtin_wrapper.c b/core/iwasm/libraries/libc-builtin/libc_builtin_wrapper.c
index f4cf6b8ef..a68c07494 100644
--- a/core/iwasm/libraries/libc-builtin/libc_builtin_wrapper.c
+++ b/core/iwasm/libraries/libc-builtin/libc_builtin_wrapper.c
@@ -16,7 +16,7 @@
 void
 wasm_runtime_set_exception(wasm_module_inst_t module, const char *exception);
 
-uint32
+uint64
 wasm_runtime_module_realloc(wasm_module_inst_t module, uint64 ptr, uint64 size,
                             void **p_native_addr);
 
@@ -683,9 +683,12 @@ calloc_wrapper(wasm_exec_env_t exec_env, uint32 nmemb, uint32 size)
 static uint32
 realloc_wrapper(wasm_exec_env_t exec_env, uint32 ptr, uint32 new_size)
 {
+    uint64 ret_offset = 0;
     wasm_module_inst_t module_inst = get_module_inst(exec_env);
 
-    return wasm_runtime_module_realloc(module_inst, ptr, new_size, NULL);
+    ret_offset = wasm_runtime_module_realloc(module_inst, ptr, new_size, NULL);
+    bh_assert(ret_offset < UINT32_MAX);
+    return (uint32)ret_offset;
 }
 
 static void
@@ -1005,7 +1008,7 @@ print_i32_wrapper(wasm_exec_env_t exec_env, int32 i32)
 static void
 print_i64_wrapper(wasm_exec_env_t exec_env, int64 i64)
 {
-    os_printf("in specttest.print_i64(%" PRId32 ")\n", i64);
+    os_printf("in specttest.print_i64(%" PRId64 ")\n", i64);
 }
 
 static void
diff --git a/core/iwasm/libraries/libc-emcc/libc_emcc_wrapper.c b/core/iwasm/libraries/libc-emcc/libc_emcc_wrapper.c
index 969955415..427bfd656 100644
--- a/core/iwasm/libraries/libc-emcc/libc_emcc_wrapper.c
+++ b/core/iwasm/libraries/libc-emcc/libc_emcc_wrapper.c
@@ -495,6 +495,24 @@ emscripten_notify_memory_growth_wrapper(wasm_exec_env_t exec_env, int i)
     (void)i;
 }
 
+static void
+emscripten_sleep_wrapper(wasm_exec_env_t exec_env, int timeout_ms)
+{
+    unsigned int sec;
+    useconds_t us;
+
+    if (timeout_ms <= 0)
+        return;
+
+    sec = timeout_ms / 1000;
+    us = (timeout_ms % 1000) * 1000;
+
+    if (sec > 0)
+        sleep(sec);
+    if (us > 0)
+        usleep(us);
+}
+
 static void
 emscripten_thread_sleep_wrapper(wasm_exec_env_t exec_env, double timeout_ms)
 {
@@ -544,6 +562,7 @@ static NativeSymbol native_symbols_libc_emcc[] = {
     REG_NATIVE_FUNC(__sys_getcwd, "(*~)i"),
     REG_NATIVE_FUNC(__sys_uname, "(*)i"),
     REG_NATIVE_FUNC(emscripten_notify_memory_growth, "(i)"),
+    REG_NATIVE_FUNC(emscripten_sleep, "(i)"),
     REG_NATIVE_FUNC(emscripten_thread_sleep, "(F)"),
 #endif /* end of BH_PLATFORM_LINUX_SGX */
 };
diff --git a/core/iwasm/libraries/libc-wasi/sandboxed-system-primitives/src/posix.c b/core/iwasm/libraries/libc-wasi/sandboxed-system-primitives/src/posix.c
index 627bbbb38..a313b9be5 100644
--- a/core/iwasm/libraries/libc-wasi/sandboxed-system-primitives/src/posix.c
+++ b/core/iwasm/libraries/libc-wasi/sandboxed-system-primitives/src/posix.c
@@ -459,8 +459,27 @@ fd_determine_type_rights(os_file_handle fd, __wasi_filetype_t *type,
                          __wasi_rights_t *rights_inheriting)
 {
     struct __wasi_filestat_t buf;
-    __wasi_errno_t error = os_fstat(fd, &buf);
+    __wasi_errno_t error;
 
+    if (os_is_stdin_handle(fd)) {
+        *rights_base = RIGHTS_STDIN;
+        *rights_inheriting = RIGHTS_STDIN;
+        return __WASI_ESUCCESS;
+    }
+
+    if (os_is_stdout_handle(fd)) {
+        *rights_base = RIGHTS_STDOUT;
+        *rights_inheriting = RIGHTS_STDOUT;
+        return __WASI_ESUCCESS;
+    }
+
+    if (os_is_stderr_handle(fd)) {
+        *rights_base = RIGHTS_STDERR;
+        *rights_inheriting = RIGHTS_STDERR;
+        return __WASI_ESUCCESS;
+    }
+
+    error = os_fstat(fd, &buf);
     if (error != __WASI_ESUCCESS)
         return error;
 
diff --git a/core/iwasm/libraries/libc-wasi/sandboxed-system-primitives/src/rights.h b/core/iwasm/libraries/libc-wasi/sandboxed-system-primitives/src/rights.h
index 4f5838159..41ff56f27 100644
--- a/core/iwasm/libraries/libc-wasi/sandboxed-system-primitives/src/rights.h
+++ b/core/iwasm/libraries/libc-wasi/sandboxed-system-primitives/src/rights.h
@@ -47,6 +47,19 @@
 #define RIGHTS_CHARACTER_DEVICE_BASE RIGHTS_ALL
 #define RIGHTS_CHARACTER_DEVICE_INHERITING RIGHTS_ALL
 
+#define RIGHTS_STDIN \
+  (__WASI_RIGHT_FD_ADVISE | __WASI_RIGHT_FD_FILESTAT_GET | \
+  __WASI_RIGHT_FD_READ | __WASI_RIGHT_FD_WRITE | \
+  __WASI_RIGHT_POLL_FD_READWRITE)
+
+#define RIGHTS_STDOUT \
+  (__WASI_RIGHT_FD_ADVISE | __WASI_RIGHT_FD_DATASYNC | \
+  __WASI_RIGHT_FD_FILESTAT_GET | __WASI_RIGHT_FD_SYNC | \
+  __WASI_RIGHT_FD_READ | __WASI_RIGHT_FD_WRITE | \
+  __WASI_RIGHT_POLL_FD_READWRITE)
+
+#define RIGHTS_STDERR RIGHTS_STDOUT
+
 // Only allow directory operations on directories. Directories can only
 // yield file descriptors to other directories and files.
 #define RIGHTS_DIRECTORY_BASE                                          \
diff --git a/core/iwasm/libraries/thread-mgr/thread_manager.h b/core/iwasm/libraries/thread-mgr/thread_manager.h
index 0cbba888f..f5ca1eaed 100644
--- a/core/iwasm/libraries/thread-mgr/thread_manager.h
+++ b/core/iwasm/libraries/thread-mgr/thread_manager.h
@@ -184,9 +184,9 @@ wasm_cluster_is_thread_terminated(WASMExecEnv *exec_env);
     ((signo) == WAMR_SIG_STOP || (signo) == WAMR_SIG_TRAP)
 
 struct WASMCurrentEnvStatus {
-    uint64 signal_flag : 32;
-    uint64 step_count : 16;
-    uint64 running_status : 16;
+    uint32 signal_flag;
+    uint16 step_count;
+    uint16 running_status;
 };
 
 WASMCurrentEnvStatus *
diff --git a/core/iwasm/libraries/wasi-nn/README.md b/core/iwasm/libraries/wasi-nn/README.md
index 7eb156ffc..e0d3a25ce 100644
--- a/core/iwasm/libraries/wasi-nn/README.md
+++ b/core/iwasm/libraries/wasi-nn/README.md
@@ -19,6 +19,13 @@ $ cmake -DWAMR_BUILD_WASI_NN=1 <other options> ...
 > ![Caution]
 > If enable `WAMR_BUID_WASI_NN`, iwasm will link a shared WAMR library instead of a static one. Wasi-nn backends will be loaded dynamically at runtime. Users shall specify the path of the backend library and register it to the iwasm runtime with `--native-lib=<path of backend library>`. All shared libraries should be placed in the `LD_LIBRARY_PATH`.
 
+#### Compilation options
+
+- `WAMR_BUILD_WASI_NN`. enable wasi-nn support. can't work alone. need to identify a backend. Match legacy wasi-nn spec naming convention. use `wasi_nn` as import module names.
+- `WAMR_BUILD_WASI_EPHEMERAL_NN`. Match latest wasi-nn spec naming convention. use `wasi_ephemeral_nn` as import module names.
+- `WAMR_BUILD_WASI_NN_TFLITE`. identify the backend as TensorFlow Lite.
+- `WAMR_BUILD_WASI_NN_OPENVINO`. identify the backend as OpenVINO.
+
 ### Wasm
 
 The definition of functions provided by WASI-NN (Wasm imports) is in the header file [wasi_nn.h](_core/iwasm/libraries/wasi-nn/wasi_nn.h_). By only including this file in a WASM application you will bind WASI-NN into your module.
@@ -37,6 +44,12 @@ typedef enum { fp16 = 0, fp32, up8, ip32 } tensor_type;
 
 It is required to recompile the Wasm application if you want to switch between the two sets of functions.
 
+#### Openvino
+
+If you're planning to use OpenVINO backends, the first step is to install OpenVINO on your computer. To do this correctly, please follow the official installation guide which you can find at this link: https://docs.openvino.ai/2024/get-started/install-openvino/install-openvino-archive-linux.html.
+
+After you've installed OpenVINO, you'll need to let cmake system know where to find it. You can do this by setting an environment variable named `OpenVINO_DIR`. This variable should point to the place on your computer where OpenVINO is installed. By setting this variable, your system will be able to locate and use OpenVINO when needed. You can find installation path by running the following command if using APT `$dpkg -L openvino`. The path should be _/opt/intel/openvino/_ or _/usr/lib/openvino_.
+
 ## Tests
 
 To run the tests we assume that the current directory is the root of the repository.
@@ -167,7 +180,7 @@ Due to the different requirements of each backend, we'll use a Docker container
 $ pwd
 /workspaces/wasm-micro-runtime/
 
-$ docker build -t wasi-nn-smoke:v1.0 -f Dockerfile.wasi-nn-smoke .
+$ docker build -t wasi-nn-smoke:v1.0 -f ./core/iwasm/libraries/wasi-nn/test/Dockerfile.wasi-nn-smoke .
 ```
 
 #### Execute
diff --git a/core/iwasm/libraries/wasi-nn/cmake/Findtensorflow_lite.cmake b/core/iwasm/libraries/wasi-nn/cmake/Findtensorflow_lite.cmake
index 59fdaf758..052dd9804 100644
--- a/core/iwasm/libraries/wasi-nn/cmake/Findtensorflow_lite.cmake
+++ b/core/iwasm/libraries/wasi-nn/cmake/Findtensorflow_lite.cmake
@@ -1,12 +1,13 @@
 # Copyright (C) 2019 Intel Corporation. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-
-find_library(TENSORFLOW_LITE 
-     NAMES tensorflow-lite
+find_library(TENSORFLOW_LITE
+  NAMES tensorflow-lite
+  HINTS ${CMAKE_CURRENT_BINARY_DIR}/tensorflow-lite
+  NO_DEFAULT_PATHS
 )
 
-if(NOT EXISTS ${TENSORFLOW_LITE})
+if(NOT TENSORFLOW_LITE)
   if(NOT EXISTS "${WAMR_ROOT_DIR}/core/deps/tensorflow-src")
     execute_process(
       COMMAND "${WAMR_ROOT_DIR}/core/deps/install_tensorflow.sh"
@@ -32,11 +33,15 @@ if(NOT EXISTS ${TENSORFLOW_LITE})
     "${TENSORFLOW_SOURCE_DIR}/tensorflow/lite"
     "${CMAKE_CURRENT_BINARY_DIR}/tensorflow-lite"
     EXCLUDE_FROM_ALL
-  )  
-
-  set(TENSORFLOW_LITE_INCLUDE_DIR "${TENSORFLOW_SOURCE_DIR}")
-  set(FLATBUFFER_INCLUDE_DIR "${CMAKE_CURRENT_BINARY_DIR}/flatbuffers/include")
-
-  include_directories(${TENSORFLOW_LITE_INCLUDE_DIR})
-  include_directories(${FLATBUFFER_INCLUDE_DIR})
+  )
+else ()
+  message(STATUS "TensorFlow Lite library found: ${TENSORFLOW_LITE}")
+  set(TENSORFLOW_SOURCE_DIR "${WAMR_ROOT_DIR}/core/deps/tensorflow-src")
 endif()
+
+set(TENSORFLOW_LITE_INCLUDE_DIR "${TENSORFLOW_SOURCE_DIR}/tensorflow/lite")
+set(FLATBUFFER_INCLUDE_DIR "${CMAKE_CURRENT_BINARY_DIR}/flatbuffers/include")
+
+include_directories(${TENSORFLOW_SOURCE_DIR})
+include_directories(${FLATBUFFER_INCLUDE_DIR})
+link_directories(${CMAKE_CURRENT_BINARY_DIR}/tensorflow-lite)
diff --git a/core/iwasm/libraries/wasi-nn/cmake/wasi_nn.cmake b/core/iwasm/libraries/wasi-nn/cmake/wasi_nn.cmake
index 172205790..e2ad257e0 100644
--- a/core/iwasm/libraries/wasi-nn/cmake/wasi_nn.cmake
+++ b/core/iwasm/libraries/wasi-nn/cmake/wasi_nn.cmake
@@ -3,54 +3,72 @@
 
 list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_LIST_DIR})
 
-# Find tensorflow-lite
-find_package(tensorflow_lite REQUIRED)
+if(WAMR_BUILD_WASI_NN_TFLITE EQUAL 1)
+  # Find tensorflow-lite
+  find_package(tensorflow_lite REQUIRED)
+endif()
 
-set(WASI_NN_ROOT ${CMAKE_CURRENT_LIST_DIR}/..)
+if(WAMR_BUILD_WASI_NN_OPENVINO EQUAL 1)
+  if(NOT DEFINED ENV{OpenVINO_DIR})
+    message(FATAL_ERROR
+        "OpenVINO_DIR is not defined. "
+        "Please follow https://docs.openvino.ai/2024/get-started/install-openvino.html,"
+        "install openvino, and set environment variable OpenVINO_DIR."
+        "Like OpenVINO_DIR=/usr/lib/openvino-2023.2/ cmake ..."
+        "Or OpenVINO_DIR=/opt/intel/openvino/ cmake ..."
+    )
+  endif()
+
+  list(APPEND CMAKE_MODULE_PATH $ENV{OpenVINO_DIR})
+  # Find OpenVINO
+  find_package(OpenVINO REQUIRED COMPONENTS Runtime)
+endif()
 
 #
 # wasi-nn general
-add_library(
-  wasi-nn-general
-  SHARED
-    ${WASI_NN_ROOT}/src/wasi_nn.c
-    ${WASI_NN_ROOT}/src/utils/wasi_nn_app_native.c
+set(WASI_NN_ROOT ${CMAKE_CURRENT_LIST_DIR}/..)
+set(WASI_NN_SOURCES
+  ${WASI_NN_ROOT}/src/wasi_nn.c
+  ${WASI_NN_ROOT}/src/utils/wasi_nn_app_native.c
 )
-target_include_directories(
-  wasi-nn-general
-  PUBLIC
-    ${WASI_NN_ROOT}/include
-    ${WASI_NN_ROOT}/src
-    ${WASI_NN_ROOT}/src/utils
-)
-target_link_libraries(
-  wasi-nn-general
-  PUBLIC
-    libiwasm
-)
-target_compile_definitions(
-  wasi-nn-general
-  PUBLIC
-   $<$<CONFIG:Debug>:NN_LOG_LEVEL=0>
-   $<$<CONFIG:Release>:NN_LOG_LEVEL=2>
+include_directories(${WASI_NN_ROOT}/include)
+add_compile_definitions(
+  $<$<CONFIG:Debug>:NN_LOG_LEVEL=0>
+  $<$<CONFIG:Release>:NN_LOG_LEVEL=2>
 )
 
 #
 # wasi-nn backends
-add_library(
-  wasi-nn-tflite
-  SHARED
-    ${WASI_NN_ROOT}/src/wasi_nn_tensorflowlite.cpp
-)
-#target_link_options(
-#  wasi-nn-tflite
-#  PRIVATE
-#    -Wl,--whole-archive libwasi-nn-general.a
-#    -Wl,--no-whole-archive
-#)
-target_link_libraries(
-  wasi-nn-tflite
-  PUBLIC
-    tensorflow-lite
-    wasi-nn-general
-)
+#
+# - tflite
+if(WAMR_BUILD_WASI_NN_TFLITE EQUAL 1)
+  add_library(
+    wasi_nn_tflite
+    SHARED
+      ${WASI_NN_ROOT}/src/wasi_nn_tensorflowlite.cpp
+  )
+
+  target_link_libraries(
+    wasi_nn_tflite
+    PUBLIC
+      libiwasm
+      tensorflow-lite
+  )
+endif()
+
+# - openvino
+if(WAMR_BUILD_WASI_NN_OPENVINO EQUAL 1)
+  add_library(
+    wasi_nn_openvino
+    SHARED
+      ${WASI_NN_ROOT}/src/wasi_nn_openvino.c
+  )
+
+  target_link_libraries(
+    wasi_nn_openvino
+    PUBLIC
+      libiwasm
+      openvino::runtime
+      openvino::runtime::c
+  )
+endif()
\ No newline at end of file
diff --git a/core/iwasm/libraries/wasi-nn/include/wasi_nn_host.h b/core/iwasm/libraries/wasi-nn/include/wasi_nn_host.h
new file mode 100644
index 000000000..cb056915f
--- /dev/null
+++ b/core/iwasm/libraries/wasi-nn/include/wasi_nn_host.h
@@ -0,0 +1,20 @@
+/*
+ * Copyright (C) 2019 Intel Corporation.  All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef WASI_NN_HOST_H
+#define WASI_NN_HOST_H
+
+#include "lib_export.h"
+
+uint32_t
+get_wasi_nn_export_apis(NativeSymbol **p_native_symbols);
+
+bool
+wasi_nn_initialize();
+
+void
+wasi_nn_destroy();
+
+#endif /* WASI_NN_HOST_H */
\ No newline at end of file
diff --git a/core/iwasm/libraries/wasi-nn/include/wasi_nn_types.h b/core/iwasm/libraries/wasi-nn/include/wasi_nn_types.h
index 75f14eb70..d36f5977c 100644
--- a/core/iwasm/libraries/wasi-nn/include/wasi_nn_types.h
+++ b/core/iwasm/libraries/wasi-nn/include/wasi_nn_types.h
@@ -126,6 +126,7 @@ typedef enum {
     tensorflowlite,
     ggml,
     autodetect,
+    unknown_backend,
 } graph_encoding;
 
 // Define where the graph should be executed.
@@ -161,8 +162,9 @@ typedef struct {
     BACKEND_DEINITIALIZE deinit;
 } api_function;
 
-bool
-wasi_nn_register_backend(api_function apis);
+void
+wasi_nn_dump_tensor_dimension(tensor_dimensions *dim, int32_t output_len,
+                              char *output);
 
 #ifdef __cplusplus
 }
diff --git a/core/iwasm/libraries/wasi-nn/src/utils/wasi_nn_app_native.c b/core/iwasm/libraries/wasi-nn/src/utils/wasi_nn_app_native.c
index 07516f34d..6e91c949b 100644
--- a/core/iwasm/libraries/wasi-nn/src/utils/wasi_nn_app_native.c
+++ b/core/iwasm/libraries/wasi-nn/src/utils/wasi_nn_app_native.c
@@ -88,7 +88,7 @@ graph_builder_array_app_native(wasm_module_inst_t instance,
         }
 
         NN_DBG_PRINTF("Graph builder %d contains %d elements", i,
-                      builder->size);
+                      builder[i].size);
     }
 
     builder_array->buf = builder;
diff --git a/core/iwasm/libraries/wasi-nn/src/wasi_nn.c b/core/iwasm/libraries/wasi-nn/src/wasi_nn.c
index de931b41b..0d56981fc 100644
--- a/core/iwasm/libraries/wasi-nn/src/wasi_nn.c
+++ b/core/iwasm/libraries/wasi-nn/src/wasi_nn.c
@@ -10,38 +10,37 @@
 #include <errno.h>
 #include <string.h>
 #include <stdint.h>
+#include <dlfcn.h>
 
 #include "wasi_nn_private.h"
-#include "wasi_nn_app_native.h"
-#include "logger.h"
+#include "utils/wasi_nn_app_native.h"
+#include "utils/logger.h"
 
 #include "bh_platform.h"
 #include "wasi_nn_types.h"
 #include "wasm_export.h"
 
 #define HASHMAP_INITIAL_SIZE 20
+#define TFLITE_BACKEND_LIB "libwasi_nn_tflite.so"
+#define OPENVINO_BACKEND_LIB "libwasi_nn_openvino.so"
+#define LLAMACPP_BACKEND_LIB "libwasi_nn_llamacpp.so"
 
 /* Global variables */
-// if using `load_by_name`, there is no known `encoding` at the time of loading
-// so, just keep one `api_function` is enough
-static api_function lookup = { 0 };
+struct backends_api_functions {
+    void *backend_handle;
+    api_function functions;
+} lookup[autodetect] = { 0 };
 
-#define call_wasi_nn_func(wasi_error, func, ...)                 \
-    do {                                                         \
-        if (lookup.func) {                                       \
-            wasi_error = lookup.func(__VA_ARGS__);               \
-        }                                                        \
-        else {                                                   \
-            NN_ERR_PRINTF("Error: %s is not registered", #func); \
-            wasi_error = unsupported_operation;                  \
-        }                                                        \
+#define call_wasi_nn_func(backend_encoding, func, wasi_error, ...)         \
+    do {                                                                   \
+        wasi_error = lookup[backend_encoding].functions.func(__VA_ARGS__); \
+        if (wasi_error != success)                                         \
+            NN_ERR_PRINTF("Error %s() -> %d", #func, wasi_error);          \
     } while (0)
 
+/* HashMap utils */
 static HashMap *hashmap;
 
-static void
-wasi_nn_ctx_destroy(WASINNContext *wasi_nn_ctx);
-
 static uint32
 hash_func(const void *key)
 {
@@ -68,7 +67,31 @@ key_equal_func(void *key1, void *key2)
 
 static void
 key_destroy_func(void *key1)
-{}
+{
+    /* key type is wasm_module_inst_t*. do nothing */
+}
+
+static void
+wasi_nn_ctx_destroy(WASINNContext *wasi_nn_ctx)
+{
+    NN_DBG_PRINTF("[WASI NN] DEINIT...");
+
+    if (wasi_nn_ctx == NULL) {
+        NN_ERR_PRINTF(
+            "Error when deallocating memory. WASI-NN context is NULL");
+        return;
+    }
+    NN_DBG_PRINTF("Freeing wasi-nn");
+    NN_DBG_PRINTF("-> is_model_loaded: %d", wasi_nn_ctx->is_model_loaded);
+    NN_DBG_PRINTF("-> current_encoding: %d", wasi_nn_ctx->backend);
+
+    /* deinit() the backend */
+    wasi_nn_error res;
+    call_wasi_nn_func(wasi_nn_ctx->backend, deinit, res,
+                      wasi_nn_ctx->backend_ctx);
+
+    wasm_runtime_free(wasi_nn_ctx);
+}
 
 static void
 value_destroy_func(void *value)
@@ -76,34 +99,11 @@ value_destroy_func(void *value)
     wasi_nn_ctx_destroy((WASINNContext *)value);
 }
 
-static WASINNContext *
-wasi_nn_initialize_context()
-{
-    NN_DBG_PRINTF("Initializing wasi-nn context");
-    WASINNContext *wasi_nn_ctx =
-        (WASINNContext *)wasm_runtime_malloc(sizeof(WASINNContext));
-    if (wasi_nn_ctx == NULL) {
-        NN_ERR_PRINTF("Error when allocating memory for WASI-NN context");
-        return NULL;
-    }
-    wasi_nn_ctx->is_model_loaded = false;
-
-    /* only one backend can be registered */
-    wasi_nn_error res;
-    call_wasi_nn_func(res, init, &wasi_nn_ctx->backend_ctx);
-    if (res != success) {
-        NN_ERR_PRINTF("Error while initializing backend");
-        wasm_runtime_free(wasi_nn_ctx);
-        return NULL;
-    }
-
-    return wasi_nn_ctx;
-}
-
-static bool
+bool
 wasi_nn_initialize()
 {
-    NN_DBG_PRINTF("Initializing wasi-nn");
+    NN_DBG_PRINTF("[WASI NN General] Initializing wasi-nn");
+
     // hashmap { instance: wasi_nn_ctx }
     hashmap = bh_hash_map_create(HASHMAP_INITIAL_SIZE, true, hash_func,
                                  key_equal_func, key_destroy_func,
@@ -112,9 +112,26 @@ wasi_nn_initialize()
         NN_ERR_PRINTF("Error while initializing hashmap");
         return false;
     }
+
     return true;
 }
 
+static WASINNContext *
+wasi_nn_initialize_context()
+{
+    NN_DBG_PRINTF("[WASI NN] INIT...");
+
+    WASINNContext *wasi_nn_ctx =
+        (WASINNContext *)wasm_runtime_malloc(sizeof(WASINNContext));
+    if (wasi_nn_ctx == NULL) {
+        NN_ERR_PRINTF("Error when allocating memory for WASI-NN context");
+        return NULL;
+    }
+
+    memset(wasi_nn_ctx, 0, sizeof(WASINNContext));
+    return wasi_nn_ctx;
+}
+
 /* Get wasi-nn context from module instance */
 static WASINNContext *
 wasm_runtime_get_wasi_nn_ctx(wasm_module_inst_t instance)
@@ -125,6 +142,7 @@ wasm_runtime_get_wasi_nn_ctx(wasm_module_inst_t instance)
         wasi_nn_ctx = wasi_nn_initialize_context();
         if (wasi_nn_ctx == NULL)
             return NULL;
+
         bool ok =
             bh_hash_map_insert(hashmap, (void *)instance, (void *)wasi_nn_ctx);
         if (!ok) {
@@ -133,41 +151,35 @@ wasm_runtime_get_wasi_nn_ctx(wasm_module_inst_t instance)
             return NULL;
         }
     }
-    NN_DBG_PRINTF("Returning ctx");
+
     return wasi_nn_ctx;
 }
 
-static void
-wasi_nn_ctx_destroy(WASINNContext *wasi_nn_ctx)
-{
-    if (wasi_nn_ctx == NULL) {
-        NN_ERR_PRINTF(
-            "Error when deallocating memory. WASI-NN context is NULL");
-        return;
-    }
-    NN_DBG_PRINTF("Freeing wasi-nn");
-    NN_DBG_PRINTF("-> is_model_loaded: %d", wasi_nn_ctx->is_model_loaded);
-    NN_DBG_PRINTF("-> current_encoding: %d", wasi_nn_ctx->current_encoding);
-
-    /* only one backend can be registered */
-    wasi_nn_error res;
-    call_wasi_nn_func(res, deinit, wasi_nn_ctx->backend_ctx);
-    if (res != success) {
-        NN_ERR_PRINTF("Error while destroyging backend");
-    }
-
-    wasm_runtime_free(wasi_nn_ctx);
-}
-
 void
 wasi_nn_destroy()
 {
     // destroy hashmap will destroy keys and values
     bh_hash_map_destroy(hashmap);
+
+    // close backends' libraries and registered functions
+    for (unsigned i = 0; i < sizeof(lookup) / sizeof(lookup[0]); i++) {
+        if (lookup[i].backend_handle) {
+            dlclose(lookup[i].backend_handle);
+            lookup[i].backend_handle = NULL;
+        }
+
+        lookup[i].functions.init = NULL;
+        lookup[i].functions.deinit = NULL;
+        lookup[i].functions.load = NULL;
+        lookup[i].functions.load_by_name = NULL;
+        lookup[i].functions.init_execution_context = NULL;
+        lookup[i].functions.set_input = NULL;
+        lookup[i].functions.compute = NULL;
+        lookup[i].functions.get_output = NULL;
+    }
 }
 
 /* Utils */
-
 static wasi_nn_error
 is_model_initialized(WASINNContext *wasi_nn_ctx)
 {
@@ -178,8 +190,169 @@ is_model_initialized(WASINNContext *wasi_nn_ctx)
     return success;
 }
 
-/* WASI-NN implementation */
+/*
+ *TODO: choose a proper backend based on
+ * - hardware
+ * - model file format
+ * - on device ML framework
+ */
+static graph_encoding
+choose_a_backend()
+{
+    void *handle;
 
+    handle = dlopen(LLAMACPP_BACKEND_LIB, RTLD_LAZY);
+    if (handle) {
+        NN_INFO_PRINTF("Using llama.cpp backend");
+        dlclose(handle);
+        return ggml;
+    }
+
+    handle = dlopen(OPENVINO_BACKEND_LIB, RTLD_LAZY);
+    if (handle) {
+        NN_INFO_PRINTF("Using openvino backend");
+        dlclose(handle);
+        return openvino;
+    }
+
+    handle = dlopen(TFLITE_BACKEND_LIB, RTLD_LAZY);
+    if (handle) {
+        NN_INFO_PRINTF("Using tflite backend");
+        dlclose(handle);
+        return tensorflowlite;
+    }
+
+    return unknown_backend;
+}
+
+static bool
+register_backend(void *handle, api_function *functions)
+{
+    BACKEND_INITIALIZE init = (BACKEND_INITIALIZE)dlsym(handle, "init_backend");
+    if (!init) {
+        NN_WARN_PRINTF("init_backend() not found");
+        return false;
+    }
+    functions->init = init;
+
+    BACKEND_DEINITIALIZE deinit =
+        (BACKEND_DEINITIALIZE)dlsym(handle, "deinit_backend");
+    if (!deinit) {
+        NN_WARN_PRINTF("deinit_backend() not found");
+        return false;
+    }
+    functions->deinit = deinit;
+
+    LOAD load = (LOAD)dlsym(handle, "load");
+    if (!load) {
+        NN_WARN_PRINTF("load() not found");
+        return false;
+    }
+    functions->load = load;
+
+    LOAD_BY_NAME load_by_name = (LOAD_BY_NAME)dlsym(handle, "load_by_name");
+    if (!load_by_name) {
+        NN_WARN_PRINTF("load_by_name() not found");
+        return false;
+    }
+    functions->load_by_name = load_by_name;
+
+    INIT_EXECUTION_CONTEXT init_execution_context =
+        (INIT_EXECUTION_CONTEXT)dlsym(handle, "init_execution_context");
+    if (!init_execution_context) {
+        NN_WARN_PRINTF("init_execution_context() not found");
+        return false;
+    }
+    functions->init_execution_context = init_execution_context;
+
+    SET_INPUT set_input = (SET_INPUT)dlsym(handle, "set_input");
+    if (!set_input) {
+        NN_WARN_PRINTF("set_input() not found");
+        return false;
+    }
+    functions->set_input = set_input;
+
+    COMPUTE compute = (COMPUTE)dlsym(handle, "compute");
+    if (!compute) {
+        NN_WARN_PRINTF("compute() not found");
+        return false;
+    }
+    functions->compute = compute;
+
+    GET_OUTPUT get_output = (GET_OUTPUT)dlsym(handle, "get_output");
+    if (!get_output) {
+        NN_WARN_PRINTF("get_output() not found");
+        return false;
+    }
+    functions->get_output = get_output;
+
+    return true;
+}
+
+static bool
+prepare_backend(const char *lib_name, struct backends_api_functions *backend)
+{
+    NN_DBG_PRINTF("[Native Register] prepare_backend %s", lib_name);
+
+    void *handle;
+    handle = dlopen(lib_name, RTLD_LAZY);
+    if (!handle) {
+        NN_ERR_PRINTF("Error loading %s. %s", lib_name, dlerror());
+        return false;
+    }
+
+    if (!register_backend(handle, &(backend->functions))) {
+        NN_ERR_PRINTF("Error when registering functions of %s", lib_name);
+        dlclose(handle);
+        return false;
+    }
+
+    backend->backend_handle = handle;
+    return true;
+}
+
+static const char *
+graph_encoding_to_backend_lib_name(graph_encoding encoding)
+{
+    switch (encoding) {
+        case openvino:
+            return OPENVINO_BACKEND_LIB;
+        case tensorflowlite:
+            return TFLITE_BACKEND_LIB;
+        case ggml:
+            return LLAMACPP_BACKEND_LIB;
+        default:
+            return NULL;
+    }
+}
+
+static bool
+detect_and_load_backend(graph_encoding backend_hint,
+                        struct backends_api_functions *backends,
+                        graph_encoding *loaded_backed)
+{
+    if (backend_hint >= autodetect)
+        return false;
+
+    if (backend_hint == autodetect)
+        backend_hint = choose_a_backend();
+
+    /* if already loaded */
+    if (lookup[backend_hint].backend_handle) {
+        *loaded_backed = backend_hint;
+        return true;
+    }
+
+    *loaded_backed = backend_hint;
+    const char *backend_lib_name =
+        graph_encoding_to_backend_lib_name(backend_hint);
+    if (!backend_lib_name)
+        return false;
+
+    return prepare_backend(backend_lib_name, backends + backend_hint);
+}
+
+/* WASI-NN implementation */
 #if WASM_ENABLE_WASI_EPHEMERAL_NN != 0
 wasi_nn_error
 wasi_nn_load(wasm_exec_env_t exec_env, graph_builder_wasm *builder,
@@ -191,7 +364,7 @@ wasi_nn_load(wasm_exec_env_t exec_env, graph_builder_array_wasm *builder,
              graph_encoding encoding, execution_target target, graph *g)
 #endif /* WASM_ENABLE_WASI_EPHEMERAL_NN != 0 */
 {
-    NN_DBG_PRINTF("Running wasi_nn_load [encoding=%d, target=%d]...", encoding,
+    NN_DBG_PRINTF("[WASI NN] LOAD [encoding=%d, target=%d]...", encoding,
                   target);
 
     wasm_module_inst_t instance = wasm_runtime_get_module_inst(exec_env);
@@ -219,14 +392,28 @@ wasi_nn_load(wasm_exec_env_t exec_env, graph_builder_array_wasm *builder,
         goto fail;
     }
 
+    graph_encoding loaded_backed = autodetect;
+    if (!detect_and_load_backend(encoding, lookup, &loaded_backed)) {
+        res = invalid_encoding;
+        NN_ERR_PRINTF("load backend failed");
+        goto fail;
+    }
+
     WASINNContext *wasi_nn_ctx = wasm_runtime_get_wasi_nn_ctx(instance);
-    call_wasi_nn_func(res, load, wasi_nn_ctx->backend_ctx, &builder_native,
-                      encoding, target, g);
-    NN_DBG_PRINTF("wasi_nn_load finished with status %d [graph=%d]", res, *g);
+    wasi_nn_ctx->backend = loaded_backed;
+
+    /* init() the backend */
+    call_wasi_nn_func(wasi_nn_ctx->backend, init, res,
+                      &wasi_nn_ctx->backend_ctx);
     if (res != success)
         goto fail;
 
-    wasi_nn_ctx->current_encoding = encoding;
+    call_wasi_nn_func(wasi_nn_ctx->backend, load, res, wasi_nn_ctx->backend_ctx,
+                      &builder_native, encoding, target, g);
+    if (res != success)
+        goto fail;
+
+    wasi_nn_ctx->backend = loaded_backed;
     wasi_nn_ctx->is_model_loaded = true;
 
 fail:
@@ -241,7 +428,7 @@ wasi_nn_error
 wasi_nn_load_by_name(wasm_exec_env_t exec_env, char *name, uint32_t name_len,
                      graph *g)
 {
-    NN_DBG_PRINTF("Running wasi_nn_load_by_name ...");
+    NN_DBG_PRINTF("[WASI NN] LOAD_BY_NAME %s...", name);
 
     wasm_module_inst_t instance = wasm_runtime_get_module_inst(exec_env);
     if (!instance) {
@@ -249,23 +436,37 @@ wasi_nn_load_by_name(wasm_exec_env_t exec_env, char *name, uint32_t name_len,
     }
 
     if (!wasm_runtime_validate_native_addr(instance, name, name_len)) {
+        NN_ERR_PRINTF("name is invalid");
         return invalid_argument;
     }
 
     if (!wasm_runtime_validate_native_addr(instance, g,
                                            (uint64)sizeof(graph))) {
+        NN_ERR_PRINTF("graph is invalid");
         return invalid_argument;
     }
 
+    graph_encoding loaded_backed = autodetect;
+    if (detect_and_load_backend(autodetect, lookup, &loaded_backed)) {
+        NN_ERR_PRINTF("load backend failed");
+        return invalid_encoding;
+    }
+
     WASINNContext *wasi_nn_ctx = wasm_runtime_get_wasi_nn_ctx(instance);
     wasi_nn_error res;
-    call_wasi_nn_func(res, load_by_name, wasi_nn_ctx->backend_ctx, name,
-                      name_len, g);
-    NN_DBG_PRINTF("wasi_nn_load_by_name finished with status %d", *g);
+
+    /* init() the backend */
+    call_wasi_nn_func(wasi_nn_ctx->backend, init, res,
+                      &wasi_nn_ctx->backend_ctx);
     if (res != success)
         return res;
 
-    wasi_nn_ctx->current_encoding = autodetect;
+    call_wasi_nn_func(wasi_nn_ctx->backend, load_by_name, res,
+                      wasi_nn_ctx->backend_ctx, name, name_len, g);
+    if (res != success)
+        return res;
+
+    wasi_nn_ctx->backend = loaded_backed;
     wasi_nn_ctx->is_model_loaded = true;
     return success;
 }
@@ -274,7 +475,7 @@ wasi_nn_error
 wasi_nn_init_execution_context(wasm_exec_env_t exec_env, graph g,
                                graph_execution_context *ctx)
 {
-    NN_DBG_PRINTF("Running wasi_nn_init_execution_context [graph=%d]...", g);
+    NN_DBG_PRINTF("[WASI NN] INIT_EXECUTION_CONTEXT...");
 
     wasm_module_inst_t instance = wasm_runtime_get_module_inst(exec_env);
     if (!instance) {
@@ -293,11 +494,8 @@ wasi_nn_init_execution_context(wasm_exec_env_t exec_env, graph g,
         return invalid_argument;
     }
 
-    call_wasi_nn_func(res, init_execution_context, wasi_nn_ctx->backend_ctx, g,
-                      ctx);
-    NN_DBG_PRINTF(
-        "wasi_nn_init_execution_context finished with status %d [ctx=%d]", res,
-        *ctx);
+    call_wasi_nn_func(wasi_nn_ctx->backend, init_execution_context, res,
+                      wasi_nn_ctx->backend_ctx, g, ctx);
     return res;
 }
 
@@ -305,8 +503,7 @@ wasi_nn_error
 wasi_nn_set_input(wasm_exec_env_t exec_env, graph_execution_context ctx,
                   uint32_t index, tensor_wasm *input_tensor)
 {
-    NN_DBG_PRINTF("Running wasi_nn_set_input [ctx=%d, index=%d]...", ctx,
-                  index);
+    NN_DBG_PRINTF("[WASI NN] SET_INPUT [ctx=%d, index=%d]...", ctx, index);
 
     wasm_module_inst_t instance = wasm_runtime_get_module_inst(exec_env);
     if (!instance) {
@@ -325,20 +522,20 @@ wasi_nn_set_input(wasm_exec_env_t exec_env, graph_execution_context ctx,
                                     &input_tensor_native)))
         return res;
 
-    call_wasi_nn_func(res, set_input, wasi_nn_ctx->backend_ctx, ctx, index,
+    call_wasi_nn_func(wasi_nn_ctx->backend, set_input, res,
+                      wasi_nn_ctx->backend_ctx, ctx, index,
                       &input_tensor_native);
     // XXX: Free intermediate structure pointers
     if (input_tensor_native.dimensions)
         wasm_runtime_free(input_tensor_native.dimensions);
 
-    NN_DBG_PRINTF("wasi_nn_set_input finished with status %d", res);
     return res;
 }
 
 wasi_nn_error
 wasi_nn_compute(wasm_exec_env_t exec_env, graph_execution_context ctx)
 {
-    NN_DBG_PRINTF("Running wasi_nn_compute [ctx=%d]...", ctx);
+    NN_DBG_PRINTF("[WASI NN] COMPUTE [ctx=%d]...", ctx);
 
     wasm_module_inst_t instance = wasm_runtime_get_module_inst(exec_env);
     if (!instance) {
@@ -351,8 +548,8 @@ wasi_nn_compute(wasm_exec_env_t exec_env, graph_execution_context ctx)
     if (success != (res = is_model_initialized(wasi_nn_ctx)))
         return res;
 
-    call_wasi_nn_func(res, compute, wasi_nn_ctx->backend_ctx, ctx);
-    NN_DBG_PRINTF("wasi_nn_compute finished with status %d", res);
+    call_wasi_nn_func(wasi_nn_ctx->backend, compute, res,
+                      wasi_nn_ctx->backend_ctx, ctx);
     return res;
 }
 
@@ -368,8 +565,7 @@ wasi_nn_get_output(wasm_exec_env_t exec_env, graph_execution_context ctx,
                    uint32_t *output_tensor_size)
 #endif /* WASM_ENABLE_WASI_EPHEMERAL_NN != 0 */
 {
-    NN_DBG_PRINTF("Running wasi_nn_get_output [ctx=%d, index=%d]...", ctx,
-                  index);
+    NN_DBG_PRINTF("[WASI NN] GET_OUTPUT [ctx=%d, index=%d]...", ctx, index);
 
     wasm_module_inst_t instance = wasm_runtime_get_module_inst(exec_env);
     if (!instance) {
@@ -389,15 +585,15 @@ wasi_nn_get_output(wasm_exec_env_t exec_env, graph_execution_context ctx,
     }
 
 #if WASM_ENABLE_WASI_EPHEMERAL_NN != 0
-    call_wasi_nn_func(res, get_output, wasi_nn_ctx->backend_ctx, ctx, index,
-                      output_tensor, &output_tensor_len);
+    call_wasi_nn_func(wasi_nn_ctx->backend, get_output, res,
+                      wasi_nn_ctx->backend_ctx, ctx, index, output_tensor,
+                      &output_tensor_len);
     *output_tensor_size = output_tensor_len;
 #else  /* WASM_ENABLE_WASI_EPHEMERAL_NN == 0 */
-    call_wasi_nn_func(res, get_output, wasi_nn_ctx->backend_ctx, ctx, index,
-                      output_tensor, output_tensor_size);
+    call_wasi_nn_func(wasi_nn_ctx->backend, get_output, res,
+                      wasi_nn_ctx->backend_ctx, ctx, index, output_tensor,
+                      output_tensor_size);
 #endif /* WASM_ENABLE_WASI_EPHEMERAL_NN != 0 */
-    NN_DBG_PRINTF("wasi_nn_get_output finished with status %d [data_size=%d]",
-                  res, *output_tensor_size);
     return res;
 }
 
@@ -431,44 +627,3 @@ get_wasi_nn_export_apis(NativeSymbol **p_native_symbols)
     *p_native_symbols = native_symbols_wasi_nn;
     return sizeof(native_symbols_wasi_nn) / sizeof(NativeSymbol);
 }
-
-__attribute__((used)) uint32_t
-get_native_lib(char **p_module_name, NativeSymbol **p_native_symbols)
-{
-    NN_DBG_PRINTF("--|> get_native_lib");
-
-#if WASM_ENABLE_WASI_EPHEMERAL_NN != 0
-    *p_module_name = "wasi_ephemeral_nn";
-#else  /* WASM_ENABLE_WASI_EPHEMERAL_NN == 0 */
-    *p_module_name = "wasi_nn";
-#endif /* WASM_ENABLE_WASI_EPHEMERAL_NN != 0 */
-
-    return get_wasi_nn_export_apis(p_native_symbols);
-}
-
-__attribute__((used)) int
-init_native_lib()
-{
-    NN_DBG_PRINTF("--|> init_native_lib");
-
-    if (!wasi_nn_initialize())
-        return 1;
-
-    return 0;
-}
-
-__attribute__((used)) void
-deinit_native_lib()
-{
-    NN_DBG_PRINTF("--|> deinit_native_lib");
-
-    wasi_nn_destroy();
-}
-
-__attribute__((used)) bool
-wasi_nn_register_backend(api_function apis)
-{
-    NN_DBG_PRINTF("--|> wasi_nn_register_backend");
-    lookup = apis;
-    return true;
-}
\ No newline at end of file
diff --git a/core/iwasm/libraries/wasi-nn/src/wasi_nn_openvino.c b/core/iwasm/libraries/wasi-nn/src/wasi_nn_openvino.c
new file mode 100644
index 000000000..db2f91db0
--- /dev/null
+++ b/core/iwasm/libraries/wasi-nn/src/wasi_nn_openvino.c
@@ -0,0 +1,549 @@
+/*
+ * Copyright (C) 2019 Intel Corporation.  All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#include "wasi_nn_types.h"
+#include "wasi_nn_openvino.h"
+#include "utils/logger.h"
+#include "bh_platform.h"
+
+#include "openvino/c/openvino.h"
+
+/*
+ * refer to
+ * https://docs.openvino.ai/2024/openvino-workflow/running-inference/integrate-openvino-with-your-application.html
+ *
+ * Steps about integrating OpenVINO are:
+ *
+ * 1. Create OpenVINO Runtime Core
+ * 2. Compile Model
+ * 3. Create Inference Request
+ * 4. Set Inputs
+ * 5. Start Inference
+ * 6. Process Inference Results
+ *
+ * from 4. to 6. is the Inference Loop
+ */
+
+typedef struct {
+    ov_core_t *core;
+    /* keep input model files */
+    void *weight_data;
+    ov_tensor_t *weights_tensor;
+    ov_model_t *model;
+    /* add prepostprocess */
+    ov_model_t *new_model;
+    ov_compiled_model_t *compiled_model;
+    ov_infer_request_t *infer_request;
+    ov_tensor_t *input_tensor;
+} OpenVINOContext;
+
+/*
+ * BE AWARE OF "goto fail"
+ */
+#define CHECK_OV_STATUS(status, error_code)                \
+    do {                                                   \
+        ov_status_e s = status;                            \
+        if (s != OK) {                                     \
+            NN_ERR_PRINTF("return status \"%s\", line %d", \
+                          ov_get_error_info(s), __LINE__); \
+            error_code = runtime_error;                    \
+            goto fail;                                     \
+        }                                                  \
+    } while (0)
+
+static void
+dump_ov_shape_t(const ov_shape_t *shape, int32_t output_len, char *output)
+{
+    int ret = 0;
+
+    ret = snprintf(output, output_len, "%ld,[", shape->rank);
+    if (!ret)
+        return;
+
+    output_len -= ret;
+    output += ret;
+
+    for (unsigned i = 0; i < shape->rank && output_len; i++) {
+        ret = snprintf(output, output_len, " %ld", shape->dims[i]);
+        if (!ret)
+            return;
+
+        output_len -= ret;
+        output += ret;
+    }
+
+    snprintf(output, output_len, "]");
+    return;
+}
+
+#ifndef NDEBUG
+static void
+print_model_input_output_info(ov_model_t *model)
+{
+    wasi_nn_error ov_error = success;
+    char *friendly_name = NULL;
+    size_t input_size = 0;
+    ov_output_const_port_t *input_port = NULL;
+    ov_shape_t input_shape = { 0 };
+    ov_element_type_e input_type;
+    char shape_info[64] = { 0 };
+    ov_output_const_port_t *output_port = NULL;
+    ov_shape_t output_shape = { 0 };
+    ov_element_type_e output_type;
+
+    CHECK_OV_STATUS(ov_model_get_friendly_name(model, &friendly_name),
+                    ov_error);
+    NN_DBG_PRINTF("model name: %s", friendly_name);
+
+    ov_model_inputs_size(model, &input_size);
+    for (unsigned i = 0; i < input_size; i++) {
+        CHECK_OV_STATUS(ov_model_const_input_by_index(model, i, &input_port),
+                        ov_error);
+        CHECK_OV_STATUS(ov_const_port_get_shape(input_port, &input_shape),
+                        ov_error);
+        CHECK_OV_STATUS(ov_port_get_element_type(input_port, &input_type),
+                        ov_error);
+
+        dump_ov_shape_t(&input_shape, 60, shape_info);
+        NN_DBG_PRINTF("model input[%u]. element_type: %d, shape: %s", i,
+                      input_type, shape_info);
+
+        ov_shape_free(&input_shape);
+        memset(&input_shape, 0, sizeof(input_shape));
+        ov_output_const_port_free(input_port);
+        input_port = NULL;
+    }
+
+    size_t output_size = 0;
+    ov_model_outputs_size(model, &output_size);
+    for (unsigned i = 0; i < output_size; i++) {
+        CHECK_OV_STATUS(ov_model_const_output_by_index(model, i, &output_port),
+                        ov_error);
+        CHECK_OV_STATUS(ov_const_port_get_shape(output_port, &output_shape),
+                        ov_error);
+        CHECK_OV_STATUS(ov_port_get_element_type(output_port, &output_type),
+                        ov_error);
+
+        dump_ov_shape_t(&output_shape, 60, shape_info);
+        NN_DBG_PRINTF("model output[%u]. element_type: %d, shape: %s", i,
+                      output_type, shape_info);
+
+        ov_shape_free(&output_shape);
+        memset(&output_shape, 0, sizeof(output_shape));
+        ov_output_const_port_free(output_port);
+        output_port = NULL;
+    }
+
+    ov_error = ov_error;
+fail:
+    if (friendly_name)
+        ov_free(friendly_name);
+    ov_shape_free(&input_shape);
+    if (input_port)
+        ov_output_const_port_free(input_port);
+    ov_shape_free(&output_shape);
+    if (output_port)
+        ov_output_const_port_free(output_port);
+    return;
+}
+#endif
+
+static ov_element_type_e
+wasi_nn_tensor_type_to_openvino_element_type(tensor_type wasi_nn_type)
+{
+    switch (wasi_nn_type) {
+        case fp16:
+            return F16;
+        case fp32:
+            return F32;
+#if WASM_ENABLE_WASI_EPHEMERAL_NN != 0
+        case fp64:
+            return F64;
+        case bf16:
+            return BF16;
+        case i64:
+            return I64;
+        case u8:
+            return U8;
+        case i32:
+            return I32;
+#else
+        case up8:
+            return U8;
+        case ip32:
+            return I32;
+#endif
+        default:
+            break;
+    }
+
+    NN_ERR_PRINTF("%d is an undefined tensor type", wasi_nn_type);
+    return UNDEFINED;
+}
+
+static wasi_nn_error
+uint32_array_to_int64_array(uint32_t array_size, uint32_t *src, int64_t **dst)
+{
+    *dst = os_malloc(array_size * sizeof(int64_t));
+    if (!(*dst))
+        return runtime_error;
+
+    for (unsigned i = 0; i < array_size; i++) {
+        (*dst)[i] = src[i];
+    }
+
+    return success;
+}
+
+__attribute__((visibility("default"))) wasi_nn_error
+load(void *ctx, graph_builder_array *builder, graph_encoding encoding,
+     execution_target target, graph *g)
+{
+    OpenVINOContext *ov_ctx = (OpenVINOContext *)ctx;
+    wasi_nn_error ret = unsupported_operation;
+
+    if (encoding != openvino) {
+        NN_ERR_PRINTF("Unexpected encoding %d.", encoding);
+        return invalid_argument;
+    }
+
+    /*FIXME: unblock non-cpu device after supporting */
+    if (target != cpu) {
+        NN_ERR_PRINTF("Unexpected device %d.", target);
+        return invalid_argument;
+    }
+
+    if (builder->size != 2) {
+        NN_ERR_PRINTF("Unexpected builder format.");
+        return invalid_argument;
+    }
+
+    /*
+     * The first builder is the XML file.
+     * The second builder is the weight file.
+     */
+    graph_builder xml = builder->buf[0];
+    graph_builder weight = builder->buf[1];
+
+    /* if xml is a String with a model in IR */
+    if (!(xml.buf[xml.size] == '\0' && xml.buf[xml.size - 1] != '\0')) {
+        NN_ERR_PRINTF("Invalid xml string.");
+        return invalid_argument;
+    }
+
+    /* transfer weight to an ov tensor */
+    {
+        ov_ctx->weight_data = os_malloc(weight.size);
+        if (!ov_ctx->weight_data)
+            goto fail;
+        memcpy(ov_ctx->weight_data, weight.buf, weight.size);
+
+        ov_element_type_e type = U8;
+        int64_t dims[1] = { weight.size };
+        ov_shape_t shape = { 1, dims };
+        CHECK_OV_STATUS(ov_tensor_create_from_host_ptr(type, shape,
+                                                       ov_ctx->weight_data,
+                                                       &ov_ctx->weights_tensor),
+                        ret);
+    }
+
+    /* load model from buffer */
+    CHECK_OV_STATUS(ov_core_read_model_from_memory_buffer(
+                        ov_ctx->core, (char *)xml.buf, xml.size,
+                        ov_ctx->weights_tensor, &ov_ctx->model),
+                    ret);
+#ifndef NDEBUG
+    print_model_input_output_info(ov_ctx->model);
+#endif
+
+    ret = success;
+fail:
+    return ret;
+}
+
+__attribute__((visibility("default"))) wasi_nn_error
+load_by_name(void *ctx, const char *filename, uint32_t filename_len, graph *g)
+{
+    OpenVINOContext *ov_ctx = (OpenVINOContext *)ctx;
+    wasi_nn_error ret = unsupported_operation;
+
+    CHECK_OV_STATUS(
+        ov_core_read_model(ov_ctx->core, filename, NULL, &ov_ctx->model), ret);
+
+    ret = success;
+fail:
+    return ret;
+}
+
+__attribute__((visibility("default"))) wasi_nn_error
+init_execution_context(void *ctx, graph g, graph_execution_context *exec_ctx)
+{
+    return success;
+}
+
+__attribute__((visibility("default"))) wasi_nn_error
+set_input(void *ctx, graph_execution_context exec_ctx, uint32_t index,
+          tensor *wasi_nn_tensor)
+{
+    OpenVINOContext *ov_ctx = (OpenVINOContext *)ctx;
+    wasi_nn_error ret = unsupported_operation;
+    ov_shape_t input_shape = { 0 };
+    int64_t *ov_dims = NULL;
+
+    ov_preprocess_prepostprocessor_t *ppp = NULL;
+    ov_preprocess_input_info_t *input_info = NULL;
+    ov_preprocess_input_tensor_info_t *input_tensor_info = NULL;
+    ov_layout_t *input_layout = NULL;
+    ov_preprocess_preprocess_steps_t *input_process = NULL;
+    ov_preprocess_input_model_info_t *p_input_model = NULL;
+    ov_layout_t *model_layout = NULL;
+    ov_preprocess_output_info_t *output_info = NULL;
+    ov_preprocess_output_tensor_info_t *output_tensor_info = NULL;
+
+    /* wasi_nn_tensor -> ov_tensor */
+    {
+        ret = uint32_array_to_int64_array(wasi_nn_tensor->dimensions->size,
+                                          wasi_nn_tensor->dimensions->buf,
+                                          &ov_dims);
+        if (ret != success)
+            goto fail;
+
+        /* NCHW -> NHWC */
+        if (wasi_nn_tensor->dimensions->size == 4 || ov_dims[1] == 3) {
+            /* N */
+            /* H */
+            ov_dims[1] = ov_dims[2];
+            /* W */
+            ov_dims[2] = ov_dims[3];
+            /* C */
+            ov_dims[3] = 3;
+        }
+
+        CHECK_OV_STATUS(ov_shape_create(wasi_nn_tensor->dimensions->size,
+                                        ov_dims, &input_shape),
+                        ret);
+
+        ov_element_type_e input_type =
+            wasi_nn_tensor_type_to_openvino_element_type(wasi_nn_tensor->type);
+        if (input_type == UNDEFINED)
+            goto fail;
+
+        char shape_info[64] = { 0 };
+        dump_ov_shape_t(&input_shape, 60, shape_info);
+        NN_DBG_PRINTF("input tensor. element_type: %d, shape: %s", input_type,
+                      shape_info);
+
+        CHECK_OV_STATUS(ov_tensor_create_from_host_ptr(input_type, input_shape,
+                                                       wasi_nn_tensor->data,
+                                                       &ov_ctx->input_tensor),
+                        ret);
+    }
+
+    /* set preprocess based on wasi_nn_tensor */
+    {
+        CHECK_OV_STATUS(
+            ov_preprocess_prepostprocessor_create(ov_ctx->model, &ppp), ret);
+
+        /* reuse user' created tensor's info */
+        CHECK_OV_STATUS(ov_preprocess_prepostprocessor_get_input_info_by_index(
+                            ppp, index, &input_info),
+                        ret);
+        CHECK_OV_STATUS(ov_preprocess_input_info_get_tensor_info(
+                            input_info, &input_tensor_info),
+                        ret);
+        CHECK_OV_STATUS(ov_preprocess_input_tensor_info_set_from(
+                            input_tensor_info, ov_ctx->input_tensor),
+                        ret);
+        /* ! HAS TO BE NHWC. Match previous layout conversion */
+        CHECK_OV_STATUS(ov_layout_create("NHWC", &input_layout), ret);
+        CHECK_OV_STATUS(ov_preprocess_input_tensor_info_set_layout(
+                            input_tensor_info, input_layout),
+                        ret);
+
+        /* add RESIZE */
+        CHECK_OV_STATUS(ov_preprocess_input_info_get_preprocess_steps(
+                            input_info, &input_process),
+                        ret);
+        CHECK_OV_STATUS(
+            ov_preprocess_preprocess_steps_resize(input_process, RESIZE_LINEAR),
+            ret);
+
+        /* input model */
+        CHECK_OV_STATUS(
+            ov_preprocess_input_info_get_model_info(input_info, &p_input_model),
+            ret);
+        // TODO: what if not?
+        CHECK_OV_STATUS(ov_layout_create("NCHW", &model_layout), ret);
+        CHECK_OV_STATUS(ov_preprocess_input_model_info_set_layout(p_input_model,
+                                                                  model_layout),
+                        ret);
+
+        /* output -> F32(possibility) */
+        CHECK_OV_STATUS(ov_preprocess_prepostprocessor_get_output_info_by_index(
+                            ppp, index, &output_info),
+                        ret);
+        CHECK_OV_STATUS(ov_preprocess_output_info_get_tensor_info(
+                            output_info, &output_tensor_info),
+                        ret);
+        CHECK_OV_STATUS(
+            ov_preprocess_output_set_element_type(output_tensor_info, F32),
+            ret);
+
+        CHECK_OV_STATUS(
+            ov_preprocess_prepostprocessor_build(ppp, &ov_ctx->new_model), ret);
+    }
+
+    CHECK_OV_STATUS(ov_core_compile_model(ov_ctx->core, ov_ctx->new_model,
+                                          "CPU", 0, &ov_ctx->compiled_model),
+                    ret);
+
+    CHECK_OV_STATUS(ov_compiled_model_create_infer_request(
+                        ov_ctx->compiled_model, &ov_ctx->infer_request),
+                    ret);
+
+    /* install ov_tensor -> infer_request */
+    CHECK_OV_STATUS(ov_infer_request_set_input_tensor_by_index(
+                        ov_ctx->infer_request, index, ov_ctx->input_tensor),
+                    ret);
+    ret = success;
+
+fail:
+    if (ov_dims)
+        os_free(ov_dims);
+    ov_shape_free(&input_shape);
+    if (ppp)
+        ov_preprocess_prepostprocessor_free(ppp);
+    if (input_info)
+        ov_preprocess_input_info_free(input_info);
+    if (input_tensor_info)
+        ov_preprocess_input_tensor_info_free(input_tensor_info);
+    if (input_layout)
+        ov_layout_free(input_layout);
+    if (input_process)
+        ov_preprocess_preprocess_steps_free(input_process);
+    if (p_input_model)
+        ov_preprocess_input_model_info_free(p_input_model);
+    if (model_layout)
+        ov_layout_free(model_layout);
+    if (output_info)
+        ov_preprocess_output_info_free(output_info);
+    if (output_tensor_info)
+        ov_preprocess_output_tensor_info_free(output_tensor_info);
+
+    return ret;
+}
+
+__attribute__((visibility("default"))) wasi_nn_error
+compute(void *ctx, graph_execution_context exec_ctx)
+{
+    OpenVINOContext *ov_ctx = (OpenVINOContext *)ctx;
+    wasi_nn_error ret = unsupported_operation;
+
+    CHECK_OV_STATUS(ov_infer_request_infer(ov_ctx->infer_request), ret);
+    ret = success;
+fail:
+    return ret;
+}
+
+__attribute__((visibility("default"))) wasi_nn_error
+get_output(void *ctx, graph_execution_context exec_ctx, uint32_t index,
+           tensor_data output_tensor, uint32_t *output_tensor_size)
+{
+    OpenVINOContext *ov_ctx = (OpenVINOContext *)ctx;
+    wasi_nn_error ret = unsupported_operation;
+    ov_tensor_t *ov_tensor = NULL;
+    void *data = NULL;
+    size_t byte_size = 0;
+
+    CHECK_OV_STATUS(ov_infer_request_get_output_tensor_by_index(
+                        ov_ctx->infer_request, index, &ov_tensor),
+                    ret);
+
+    CHECK_OV_STATUS(ov_tensor_get_byte_size(ov_tensor, &byte_size), ret);
+
+    CHECK_OV_STATUS(ov_tensor_data(ov_tensor, &data), ret);
+
+    memcpy(output_tensor, data, byte_size);
+
+    *output_tensor_size = (uint32_t)byte_size;
+
+    ret = success;
+
+fail:
+    if (ov_tensor)
+        ov_tensor_free(ov_tensor);
+    return ret;
+}
+
+__attribute__((visibility("default"))) wasi_nn_error
+init_backend(void **ctx)
+{
+    ov_version_t version;
+    OpenVINOContext *ov_ctx = NULL;
+    wasi_nn_error ret = unsupported_operation;
+
+    if (!ctx) {
+        ret = invalid_argument;
+        goto fail;
+    }
+
+    /* Get OpenVINO runtime version */
+    CHECK_OV_STATUS(ov_get_openvino_version(&version), ret);
+    NN_INFO_PRINTF("OpenVINO INFO:");
+    NN_INFO_PRINTF("  Description : %s", version.description);
+    NN_INFO_PRINTF("  Build Number: %s", version.buildNumber);
+    ov_version_free(&version);
+
+    ov_ctx = (OpenVINOContext *)os_malloc(sizeof(OpenVINOContext));
+    if (!ov_ctx) {
+        NN_ERR_PRINTF("Allocate for OpenVINOContext failed");
+        ret = runtime_error;
+        goto fail;
+    }
+
+    memset(ov_ctx, 0, sizeof(OpenVINOContext));
+
+    /* Initialize OpenVINO Runtime Core */
+    CHECK_OV_STATUS(ov_core_create(&ov_ctx->core), ret);
+
+    *ctx = (void *)ov_ctx;
+    return success;
+fail:
+    openvino_destroy((void *)ov_ctx);
+    return ret;
+}
+
+__attribute__((visibility("default"))) wasi_nn_error
+deinit_backend(void *ctx)
+{
+    OpenVINOContext *ov_ctx = (OpenVINOContext *)ctx;
+
+    if (!ov_ctx)
+        return invalid_argument;
+
+    if (ov_ctx->weight_data)
+        os_free(ov_ctx->weight_data);
+
+    if (ov_ctx->weights_tensor)
+        ov_tensor_free(ov_ctx->weights_tensor);
+
+    if (ov_ctx->input_tensor)
+        ov_tensor_free(ov_ctx->input_tensor);
+
+    if (ov_ctx->infer_request)
+        ov_infer_request_free(ov_ctx->infer_request);
+
+    if (ov_ctx->compiled_model)
+        ov_compiled_model_free(ov_ctx->compiled_model);
+
+    if (ov_ctx->model)
+        ov_model_free(ov_ctx->model);
+
+    if (ov_ctx->core)
+        ov_core_free(ov_ctx->core);
+
+    os_free(ov_ctx);
+    return success;
+}
diff --git a/core/iwasm/libraries/wasi-nn/src/wasi_nn_openvino.h b/core/iwasm/libraries/wasi-nn/src/wasi_nn_openvino.h
new file mode 100644
index 000000000..ea03a226f
--- /dev/null
+++ b/core/iwasm/libraries/wasi-nn/src/wasi_nn_openvino.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2019 Intel Corporation.  All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef WASI_NN_OPENVINO_HPP
+#define WASI_NN_OPENVINO_HPP
+
+#include "wasi_nn_types.h"
+
+__attribute__((visibility("default"))) wasi_nn_error
+load(void *ctx, graph_builder_array *builder, graph_encoding encoding,
+     execution_target target, graph *g);
+
+__attribute__((visibility("default"))) wasi_nn_error
+init_execution_context(void *ctx, graph g, graph_execution_context *exec_ctx);
+
+__attribute__((visibility("default"))) wasi_nn_error
+set_input(void *ctx, graph_execution_context exec_ctx, uint32_t index,
+          tensor *input_tensor);
+
+__attribute__((visibility("default"))) wasi_nn_error
+compute(void *ctx, graph_execution_context exec_ctx);
+
+__attribute__((visibility("default"))) wasi_nn_error
+get_output(void *ctx, graph_execution_context exec_ctx, uint32_t index,
+           tensor_data output_tensor, uint32_t *output_tensor_size);
+
+__attribute__((visibility("default"))) wasi_nn_error
+init_backend(void **ctx);
+
+__attribute__((visibility("default"))) wasi_nn_error
+deinit_backend(void *ctx);
+
+#endif /* WASI_NN_OPENVINO_HPP */
\ No newline at end of file
diff --git a/core/iwasm/libraries/wasi-nn/src/wasi_nn_private.h b/core/iwasm/libraries/wasi-nn/src/wasi_nn_private.h
index df5080dea..bacae99ad 100644
--- a/core/iwasm/libraries/wasi-nn/src/wasi_nn_private.h
+++ b/core/iwasm/libraries/wasi-nn/src/wasi_nn_private.h
@@ -11,8 +11,7 @@
 
 typedef struct {
     bool is_model_loaded;
-    // Optional
-    graph_encoding current_encoding;
+    graph_encoding backend;
     void *backend_ctx;
 } WASINNContext;
 
diff --git a/core/iwasm/libraries/wasi-nn/src/wasi_nn_tensorflowlite.cpp b/core/iwasm/libraries/wasi-nn/src/wasi_nn_tensorflowlite.cpp
index 606aca243..6a3b5a47d 100644
--- a/core/iwasm/libraries/wasi-nn/src/wasi_nn_tensorflowlite.cpp
+++ b/core/iwasm/libraries/wasi-nn/src/wasi_nn_tensorflowlite.cpp
@@ -4,7 +4,7 @@
  */
 
 #include "wasi_nn_tensorflowlite.hpp"
-#include "logger.h"
+#include "utils/logger.h"
 
 #include "bh_platform.h"
 #include "wasi_nn_types.h"
@@ -113,10 +113,9 @@ is_valid_graph_execution_context(TFLiteContext *tfl_ctx,
 }
 
 /* WASI-NN (tensorflow) implementation */
-
-wasi_nn_error
-tensorflowlite_load(void *tflite_ctx, graph_builder_array *builder,
-                    graph_encoding encoding, execution_target target, graph *g)
+__attribute__((visibility("default"))) wasi_nn_error
+load(void *tflite_ctx, graph_builder_array *builder, graph_encoding encoding,
+     execution_target target, graph *g)
 {
     TFLiteContext *tfl_ctx = (TFLiteContext *)tflite_ctx;
 
@@ -168,9 +167,9 @@ tensorflowlite_load(void *tflite_ctx, graph_builder_array *builder,
     return success;
 }
 
-wasi_nn_error
-tensorflowlite_load_by_name(void *tflite_ctx, const char *filename,
-                            uint32_t filename_len, graph *g)
+__attribute__((visibility("default"))) wasi_nn_error
+load_by_name(void *tflite_ctx, const char *filename, uint32_t filename_len,
+             graph *g)
 {
     TFLiteContext *tfl_ctx = (TFLiteContext *)tflite_ctx;
 
@@ -192,9 +191,8 @@ tensorflowlite_load_by_name(void *tflite_ctx, const char *filename,
     return success;
 }
 
-wasi_nn_error
-tensorflowlite_init_execution_context(void *tflite_ctx, graph g,
-                                      graph_execution_context *ctx)
+__attribute__((visibility("default"))) wasi_nn_error
+init_execution_context(void *tflite_ctx, graph g, graph_execution_context *ctx)
 {
     TFLiteContext *tfl_ctx = (TFLiteContext *)tflite_ctx;
 
@@ -281,9 +279,9 @@ tensorflowlite_init_execution_context(void *tflite_ctx, graph g,
     return success;
 }
 
-wasi_nn_error
-tensorflowlite_set_input(void *tflite_ctx, graph_execution_context ctx,
-                         uint32_t index, tensor *input_tensor)
+__attribute__((visibility("default"))) wasi_nn_error
+set_input(void *tflite_ctx, graph_execution_context ctx, uint32_t index,
+          tensor *input_tensor)
 {
     TFLiteContext *tfl_ctx = (TFLiteContext *)tflite_ctx;
 
@@ -352,8 +350,8 @@ tensorflowlite_set_input(void *tflite_ctx, graph_execution_context ctx,
     return success;
 }
 
-wasi_nn_error
-tensorflowlite_compute(void *tflite_ctx, graph_execution_context ctx)
+__attribute__((visibility("default"))) wasi_nn_error
+compute(void *tflite_ctx, graph_execution_context ctx)
 {
     TFLiteContext *tfl_ctx = (TFLiteContext *)tflite_ctx;
 
@@ -365,10 +363,9 @@ tensorflowlite_compute(void *tflite_ctx, graph_execution_context ctx)
     return success;
 }
 
-wasi_nn_error
-tensorflowlite_get_output(void *tflite_ctx, graph_execution_context ctx,
-                          uint32_t index, tensor_data output_tensor,
-                          uint32_t *output_tensor_size)
+__attribute__((visibility("default"))) wasi_nn_error
+get_output(void *tflite_ctx, graph_execution_context ctx, uint32_t index,
+           tensor_data output_tensor, uint32_t *output_tensor_size)
 {
     TFLiteContext *tfl_ctx = (TFLiteContext *)tflite_ctx;
 
@@ -434,8 +431,8 @@ tensorflowlite_get_output(void *tflite_ctx, graph_execution_context ctx,
     return success;
 }
 
-wasi_nn_error
-tensorflowlite_initialize(void **tflite_ctx)
+__attribute__((visibility("default"))) wasi_nn_error
+init_backend(void **tflite_ctx)
 {
     TFLiteContext *tfl_ctx = new TFLiteContext();
     if (tfl_ctx == NULL) {
@@ -461,8 +458,8 @@ tensorflowlite_initialize(void **tflite_ctx)
     return success;
 }
 
-wasi_nn_error
-tensorflowlite_destroy(void *tflite_ctx)
+__attribute__((visibility("default"))) wasi_nn_error
+deinit_backend(void *tflite_ctx)
 {
     /*
         TensorFlow Lite memory is internally managed by tensorflow
@@ -513,19 +510,3 @@ tensorflowlite_destroy(void *tflite_ctx)
     NN_DBG_PRINTF("Memory free'd.");
     return success;
 }
-
-__attribute__((constructor(200))) void
-tflite_register_backend()
-{
-    api_function apis = {
-        .load = tensorflowlite_load,
-        .load_by_name = tensorflowlite_load_by_name,
-        .init_execution_context = tensorflowlite_init_execution_context,
-        .set_input = tensorflowlite_set_input,
-        .compute = tensorflowlite_compute,
-        .get_output = tensorflowlite_get_output,
-        .init = tensorflowlite_initialize,
-        .deinit = tensorflowlite_destroy,
-    };
-    wasi_nn_register_backend(apis);
-}
\ No newline at end of file
diff --git a/core/iwasm/libraries/wasi-nn/src/wasi_nn_tensorflowlite.hpp b/core/iwasm/libraries/wasi-nn/src/wasi_nn_tensorflowlite.hpp
index 630e741c0..d6e04ab0e 100644
--- a/core/iwasm/libraries/wasi-nn/src/wasi_nn_tensorflowlite.hpp
+++ b/core/iwasm/libraries/wasi-nn/src/wasi_nn_tensorflowlite.hpp
@@ -12,31 +12,33 @@
 extern "C" {
 #endif
 
-wasi_nn_error
-tensorflowlite_load(void *tflite_ctx, graph_builder_array *builder,
-                    graph_encoding encoding, execution_target target, graph *g);
+__attribute__((visibility("default"))) wasi_nn_error
+load(void *tflite_ctx, graph_builder_array *builder, graph_encoding encoding,
+     execution_target target, graph *g);
 
-wasi_nn_error
-tensorflowlite_init_execution_context(void *tflite_ctx, graph g,
-                                      graph_execution_context *ctx);
+__attribute__((visibility("default"))) wasi_nn_error
+load_by_name(void *tflite_ctx, const char *filename, uint32_t filename_len,
+             graph *g);
 
-wasi_nn_error
-tensorflowlite_set_input(void *tflite_ctx, graph_execution_context ctx,
-                         uint32_t index, tensor *input_tensor);
+__attribute__((visibility("default"))) wasi_nn_error
+init_execution_context(void *tflite_ctx, graph g, graph_execution_context *ctx);
 
-wasi_nn_error
-tensorflowlite_compute(void *tflite_ctx, graph_execution_context ctx);
+__attribute__((visibility("default"))) wasi_nn_error
+set_input(void *tflite_ctx, graph_execution_context ctx, uint32_t index,
+          tensor *input_tensor);
 
-wasi_nn_error
-tensorflowlite_get_output(void *tflite_ctx, graph_execution_context ctx,
-                          uint32_t index, tensor_data output_tensor,
-                          uint32_t *output_tensor_size);
+__attribute__((visibility("default"))) wasi_nn_error
+compute(void *tflite_ctx, graph_execution_context ctx);
 
-wasi_nn_error
-tensorflowlite_initialize(void **tflite_ctx);
+__attribute__((visibility("default"))) wasi_nn_error
+get_output(void *tflite_ctx, graph_execution_context ctx, uint32_t index,
+           tensor_data output_tensor, uint32_t *output_tensor_size);
 
-wasi_nn_error
-tensorflowlite_destroy(void *tflite_ctx);
+__attribute__((visibility("default"))) wasi_nn_error
+init_backend(void **tflite_ctx);
+
+__attribute__((visibility("default"))) wasi_nn_error
+deinit_backend(void *tflite_ctx);
 
 #ifdef __cplusplus
 }
diff --git a/core/iwasm/libraries/wasi-nn/test/Dockerfile.wasi-nn-smoke b/core/iwasm/libraries/wasi-nn/test/Dockerfile.wasi-nn-smoke
index 997080c90..261c77261 100644
--- a/core/iwasm/libraries/wasi-nn/test/Dockerfile.wasi-nn-smoke
+++ b/core/iwasm/libraries/wasi-nn/test/Dockerfile.wasi-nn-smoke
@@ -1,6 +1,8 @@
 # Copyright (C) 2019 Intel Corporation.  All rights reserved.
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
+# hadolint global ignore=DL3003,DL3008,DL3009,DL3059
+
 FROM mcr.microsoft.com/devcontainers/rust:1-1-bullseye@sha256:ddc1ee022d327f024c07484c9333db3fbbfd504bc096cdb66635653a2bebb33e
 
 ARG DEBIAN_FRONTEND=noninteractive
@@ -8,7 +10,10 @@ ENV TZ=Asian/Shanghai
 
 # hadolint ignore=DL3009
 RUN apt-get update \
-  && apt-get upgrade -y
+  && apt-get upgrade -y \
+  && apt-get install -y --no-install-recommends cmake
+
+RUN rustup target add wasm32-wasi
 
 #
 # Openvino
@@ -17,33 +22,27 @@ RUN apt-get update \
 #   - https://docs.openvino.ai/2023.3/openvino_docs_install_guides_installing_openvino_from_archive_linux.html
 #   - https://docs.openvino.ai/2024/get-started/install-openvino/install-openvino-archive-linux.html
 #
-# FIXME: upgrade to 2024.1 or latest after wasi-nn(rust binding) is ready
-WORKDIR /opt/intel
-RUN wget -q https://storage.openvinotoolkit.org/repositories/openvino/packages/2022.3.2/linux/l_openvino_toolkit_ubuntu20_2022.3.2.9279.e2c7e4d7b4d_x86_64.tgz
-RUN tar -xf l_openvino_toolkit_ubuntu20_2022.3.2.9279.e2c7e4d7b4d_x86_64.tgz \
-  && rm l_openvino_toolkit_ubuntu20_2022.3.2.9279.e2c7e4d7b4d_x86_64.tgz \
-  && mv l_openvino_toolkit_ubuntu20_2022.3.2.9279.e2c7e4d7b4d_x86_64 /opt/intel/openvino
-
-WORKDIR /opt/intel/openvino
-RUN ./install_dependencies/install_openvino_dependencies.sh -y \
-  && ./setupvars.sh
-
-#
-# wasmtime
-WORKDIR /opt
-RUN wget -q https://github.com/bytecodealliance/wasmtime/releases/download/v21.0.0/wasmtime-v21.0.0-x86_64-linux.tar.xz
-RUN tar -xf wasmtime-v21.0.0-x86_64-linux.tar.xz \
-  && rm wasmtime-v21.0.0-x86_64-linux.tar.xz \
-  && ln -sf "$(realpath ./wasmtime-v21.0.0-x86_64-linux/wasmtime)" /usr/local/bin/wasmtime
+RUN wget -q https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
+SHELL ["/bin/bash", "-o", "pipefail", "-c"]
+RUN apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \
+  && echo "deb https://apt.repos.intel.com/openvino/2023 ubuntu20 main" | tee /etc/apt/sources.list.d/intel-openvino-2023.list
+RUN apt-get update \
+  && apt-get upgrade -y \
+  && apt-get install --no-install-recommends -y openvino-2023.2.0
 
 #
 # wasi-nn
 # compilation requirements
-RUN rustup target add wasm32-wasi wasm32-unknown-unknown
 WORKDIR /workspaces/wasi-nn
 RUN git clone --depth 1 https://github.com/bytecodealliance/wasi-nn.git .
-# hadolint ignore=DL3059
-#RUN ./build.sh rust
+
+WORKDIR /workspaces/wasi-nn/rust/examples/classification-example/
+RUN cargo build --target=wasm32-wasi
+
+WORKDIR /workspaces/wasi-nn/rust/examples/classification-example/build
+RUN cp ../target/wasm32-wasi/debug/wasi-nn-example.wasm . \
+  && wget -q --no-clobber https://github.com/intel/openvino-rs/raw/main/crates/openvino/tests/fixtures/mobilenet/mobilenet.xml \
+  && wget -q --no-clobber https://github.com/intel/openvino-rs/raw/main/crates/openvino/tests/fixtures/mobilenet/mobilenet.bin
 # There are model files(mobilenet*) and wasm files(wasi-nn-example.wasm) in the directory,
 # /workspaces/wasi-nn/rust/examples/classification-example/build
 
@@ -52,14 +51,32 @@ RUN git clone --depth 1 https://github.com/bytecodealliance/wasi-nn.git .
 WORKDIR /tmp
 RUN wget -q https://raw.githubusercontent.com/WasmEdge/WasmEdge/master/utils/install.sh \
   && chmod a+x ./install.sh
-RUN ./install.sh -p /opt/wasmedge --plugins wasi_nn-tensorflowlite
+# RUN ./install.sh -p /opt/wasmedge --plugins wasi_nn-tensorflowlite wasi_nn-openvino
+RUN ./install.sh -r yes -D -p /opt/wasmedge --plugins wasi_nn-openvino --dist ubuntu20.04 \
+  && /opt/wasmedge/bin/wasmedge --version
 ENV PATH=/opt/wasmedge/bin:${PATH}
-ENV WASMEDGE_LIB_DIR=/opt/wasmedge/lib
+# ENV WASMEDGE_LIB_DIR=/opt/wasmedge/lib
 
 #
 # wasmedge-wasinn-examples
 WORKDIR /workspaces/wasmedge-wasinn-examples
 RUN git clone --depth 1 https://github.com/second-state/WasmEdge-WASINN-examples.git .
+COPY core/iwasm/libraries/wasi-nn/test/bump_wasi_nn_to_0_6_0.patch .
+RUN git apply ./bump_wasi_nn_to_0_6_0.patch
+# recompile with wasi-nn 0.6.0
+RUN cd openvino-mobilenet-image/rust && cargo build --target=wasm32-wasi
+RUN cd openvino-mobilenet-raw/rust && cargo build --target=wasm32-wasi
+RUN cd openvino-road-segmentation-adas/openvino-road-seg-adas && cargo build --target=wasm32-wasi
+RUN cd tflite-birds_v1-image/rust && cargo build --target=wasm32-wasi
+
+# preparation
+RUN cd openvino-mobilenet-image \
+  && ./download_mobilenet.sh . \
+  && ls -l mobilenet.xml mobilenet.bin
+
+RUN cd openvino-mobilenet-raw \
+  && ./download_mobilenet.sh . \
+  && ls -l mobilenet.xml mobilenet.bin tensor-1x224x224x3-f32.bgr
 
 #
 # iwasm. build from source
@@ -67,11 +84,16 @@ WORKDIR /workspaces/wamr
 COPY . .
 
 WORKDIR /workspaces/wamr/product-mini/platforms/linux
-RUN cmake -S . -B build -DWAMR_BUILD_WASI_NN=1 -DWAMR_BUILD_WASI_EPHEMERAL_NN=1 \
-  && cmake --build build
-RUN ln -sf "$(realpath ./build/iwasm)" /usr/local/bin/iwasm
 
-#
+RUN OpenVINO_DIR=/usr/lib/openvino-2023.2.0 \
+    cmake -S . -B build \
+    -DWAMR_BUILD_WASI_NN=1 -DWAMR_BUILD_WASI_EPHEMERAL_NN=1 \
+    -DWAMR_BUILD_WASI_NN_OPENVINO=1 -DWAMR_BUILD_WASI_NN_TFLITE=1 \
+  && cmake --build build
+
+ENV PATH=/workspaces/wamr/product-mini/platforms/linux/build:${PATH}
+ENV LD_LIBRARY_PATH=/workspaces/wamr/product-mini/platforms/linux/build
+
 # add smoke test script
 COPY core/iwasm/libraries/wasi-nn/test/run_smoke_test.py /
 
diff --git a/core/iwasm/libraries/wasi-nn/test/bump_wasi_nn_to_0_6_0.patch b/core/iwasm/libraries/wasi-nn/test/bump_wasi_nn_to_0_6_0.patch
new file mode 100644
index 000000000..46e152b27
--- /dev/null
+++ b/core/iwasm/libraries/wasi-nn/test/bump_wasi_nn_to_0_6_0.patch
@@ -0,0 +1,47 @@
+diff --git a/openvino-mobilenet-image/rust/Cargo.toml b/openvino-mobilenet-image/rust/Cargo.toml
+index d09e0a4..c7083fb 100644
+--- a/openvino-mobilenet-image/rust/Cargo.toml
++++ b/openvino-mobilenet-image/rust/Cargo.toml
+@@ -8,6 +8,6 @@ publish = false
+ 
+ [dependencies]
+ image = { version = "0.23.14", default-features = false, features = ["gif", "jpeg", "ico", "png", "pnm", "tga", "tiff", "webp", "bmp", "hdr", "dxt", "dds", "farbfeld"]  }
+-wasi-nn = { version = "0.4.0" }
++wasi-nn = { version = "0.6.0" }
+ 
+ [workspace]
+diff --git a/openvino-mobilenet-raw/rust/Cargo.toml b/openvino-mobilenet-raw/rust/Cargo.toml
+index 8eab25b..3f00aec 100644
+--- a/openvino-mobilenet-raw/rust/Cargo.toml
++++ b/openvino-mobilenet-raw/rust/Cargo.toml
+@@ -7,6 +7,6 @@ edition = "2021"
+ publish = false
+ 
+ [dependencies]
+-wasi-nn = { version = "0.4.0" }
++wasi-nn = { version = "0.6.0" }
+ 
+ [workspace]
+diff --git a/openvino-road-segmentation-adas/openvino-road-seg-adas/Cargo.toml b/openvino-road-segmentation-adas/openvino-road-seg-adas/Cargo.toml
+index 998f391..93f91e0 100644
+--- a/openvino-road-segmentation-adas/openvino-road-seg-adas/Cargo.toml
++++ b/openvino-road-segmentation-adas/openvino-road-seg-adas/Cargo.toml
+@@ -5,5 +5,5 @@ name = "openvino-road-seg-adas"
+ version = "0.2.0"
+ 
+ [dependencies]
+-wasi-nn = "0.4.0"
++wasi-nn = "0.6.0"
+ image = { version = "0.23.14", default-features = false, features = ["gif", "jpeg", "ico", "png", "pnm", "tga", "tiff", "webp", "bmp", "hdr", "dxt", "dds", "farbfeld"]  }
+diff --git a/tflite-birds_v1-image/rust/Cargo.toml b/tflite-birds_v1-image/rust/Cargo.toml
+index 572ecb9..9e89e87 100644
+--- a/tflite-birds_v1-image/rust/Cargo.toml
++++ b/tflite-birds_v1-image/rust/Cargo.toml
+@@ -8,6 +8,6 @@ publish = false
+ 
+ [dependencies]
+ image = { version = "0.23.14", default-features = false, features = ["gif", "jpeg", "ico", "png", "pnm", "tga", "tiff", "webp", "bmp", "hdr", "dxt", "dds", "farbfeld"]  }
+-wasi-nn = "0.4.0"
++wasi-nn = "0.6.0"
+ 
+ [workspace]
diff --git a/core/iwasm/libraries/wasi-nn/test/requirements.txt b/core/iwasm/libraries/wasi-nn/test/requirements.txt
index 0d031ffdd..177c22c19 100644
--- a/core/iwasm/libraries/wasi-nn/test/requirements.txt
+++ b/core/iwasm/libraries/wasi-nn/test/requirements.txt
@@ -1,2 +1,2 @@
-tensorflow==2.11.1
+tensorflow==2.12.1
 numpy==1.26.4
diff --git a/core/iwasm/libraries/wasi-nn/test/run_smoke_test.py b/core/iwasm/libraries/wasi-nn/test/run_smoke_test.py
index 09e775be8..a62d9cb7a 100644
--- a/core/iwasm/libraries/wasi-nn/test/run_smoke_test.py
+++ b/core/iwasm/libraries/wasi-nn/test/run_smoke_test.py
@@ -4,6 +4,7 @@
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 #
 
+from dataclasses import dataclass
 from pathlib import Path
 from pprint import pprint
 import re
@@ -13,93 +14,144 @@ import subprocess
 from typing import List
 
 
-def execute_tflite_birds_v1_image_once(
-    runtime_bin: str, runtime_args: List[str], cwd: Path
+@dataclass
+class WasmEdgeExampleResult:
+    class_id: int
+    possibility: float
+
+
+def execute_once(
+    runtime_bin: str,
+    runtime_args: List[str],
+    wasm_file: str,
+    wasm_args: List[str],
+    cwd: Path,
 ) -> str:
-    """
-    execute tflite_birds_v1_image example with
-
-    ```
-    iwasm --native-lib=somewhere/libwasi-nn-tflite.so --map-dir=.:. \
-        ./wasmedge-wasinn-example-tflite-bird-image.wasm  \
-        lite-model_aiy_vision_classifier_birds_V1_3.tflite \
-        bird.jpg
-    ```
-
-    or
-
-    ```
-    wasmedge --dir=.:. \
-        ./wasmedge-wasinn-example-tflite-bird-image.wasm  \
-        lite-model_aiy_vision_classifier_birds_V1_3.tflite \
-        bird.jpg
-    ```
-
-    assumption:
-    - under the right directory, tflite-birds_v1-image
-    - every materials are ready
-    """
-
-    wasm_file = "./wasmedge-wasinn-example-tflite-bird-image.wasm"
-    wasm_args = ["lite-model_aiy_vision_classifier_birds_V1_3.tflite", "bird.jpg"]
-
     cmd = [runtime_bin]
     cmd.extend(runtime_args)
     cmd.append(wasm_file)
     cmd.extend(wasm_args)
 
-    try:
-        p = subprocess.run(
-            cmd,
-            cwd=cwd,
-            capture_output=True,
-            check=True,
-            text=True,
-            universal_newlines=True,
-        )
-        return p.stdout
-    except subprocess.CalledProcessError as e:
-        print(e.stderr)
-        print()
-        print(e.stdout)
+    # print(f'Execute: {" ".join(cmd)}')
+
+    p = subprocess.run(
+        cmd,
+        cwd=cwd,
+        capture_output=True,
+        check=True,
+        text=True,
+        universal_newlines=True,
+    )
+    return p.stdout
 
 
-def filter_output_tflite_birds_v1_image(output: str) -> List[str]:
+def execute_openvino_road_segmentation_adas_once(
+    runtime_bin: str, runtime_args: List[str], cwd: Path
+) -> str:
+    """
+    execute openvino-road-segmentation-adas with iwasm and wasmedge
     """
-    not all output is needed for comparision
 
-    pick lines like: "   1.) [526](136)Cathartes burrovianus"
+    wasm_file = (
+        "./openvino-road-seg-adas/target/wasm32-wasi/debug/openvino-road-seg-adas.wasm"
+    )
+    wasm_args = [
+        "./model/road-segmentation-adas-0001.xml",
+        "./model/road-segmentation-adas-0001.bin",
+        "./image/empty_road_mapillary.jpg",
+    ]
+    return execute_once(runtime_bin, runtime_args, wasm_file, wasm_args, cwd)
+
+
+def execute_openvino_mobilenet_raw_once(
+    runtime_bin: str, runtime_args: List[str], cwd: Path
+) -> str:
+    """
+    execute openvino-mobilenet-image with iwasm and wasmedge
+    """
+
+    wasm_file = "./rust/target/wasm32-wasi/debug/wasmedge-wasinn-example-mobilenet.wasm"
+    wasm_args = [
+        "mobilenet.xml",
+        "mobilenet.bin",
+        "./tensor-1x224x224x3-f32.bgr",
+    ]
+    return execute_once(runtime_bin, runtime_args, wasm_file, wasm_args, cwd)
+
+
+def execute_openvino_mobilenet_image_once(
+    runtime_bin: str, runtime_args: List[str], cwd: Path
+) -> str:
+    """
+    execute openvino-mobilenet-image with iwasm and wasmedge
+    """
+
+    wasm_file = (
+        "./rust/target/wasm32-wasi/debug/wasmedge-wasinn-example-mobilenet-image.wasm"
+    )
+    wasm_args = [
+        "mobilenet.xml",
+        "mobilenet.bin",
+        "input.jpg",
+    ]
+    return execute_once(runtime_bin, runtime_args, wasm_file, wasm_args, cwd)
+
+
+def execute_tflite_birds_v1_image_once(
+    runtime_bin: str, runtime_args: List[str], cwd: Path
+) -> str:
+    """
+    execute openvino-mobilenet-image with iwasm and wasmedge
+    """
+
+    wasm_file = (
+        "rust/target/wasm32-wasi/debug/wasmedge-wasinn-example-tflite-bird-image.wasm"
+    )
+    wasm_args = ["lite-model_aiy_vision_classifier_birds_V1_3.tflite", "bird.jpg"]
+    return execute_once(runtime_bin, runtime_args, wasm_file, wasm_args, cwd)
+
+
+def filter_output(output: str) -> List[WasmEdgeExampleResult]:
+    """
+    not all output is required for comparison
+
+    pick lines like: " 1.) [166](198)Aix galericulata"
     """
     filtered = []
-    PATTERN = re.compile(r"^\s+\d\.\)\s+\[\d+\]\(\d+\)\w+")
+    PATTERN = re.compile(r"^\s+\d\.\)\s+\[(\d+)\]\(([.0-9]+)\)\w+")
     for line in output.split("\n"):
-        if PATTERN.search(line):
-            filtered.append(line.strip())
+        m = PATTERN.search(line)
+        if m:
+            class_id, possibility = m.groups()
+            filtered.append(WasmEdgeExampleResult(class_id, possibility))
 
+    assert len(filtered)
     return filtered
 
 
-def execute_tflite_birds_v1_image(iwasm_bin: str, wasmedge_bin: str, cwd: Path):
-    iwasm_output = execute_tflite_birds_v1_image_once(
-        iwasm_bin,
-        [
-            "--native-lib=/workspaces/wamr/product-mini/platforms/linux/build/libwasi-nn-tflite.so",
-            "--map-dir=.:.",
-        ],
-        cwd,
+def compare_output(
+    iwasm_output: List[WasmEdgeExampleResult],
+    wasmedge_output: List[WasmEdgeExampleResult],
+) -> bool:
+    """
+    only compare top 2 and ignore possibility
+    """
+    return (iwasm_output[0].class_id, iwasm_output[1].class_id) == (
+        wasmedge_output[0].class_id,
+        wasmedge_output[1].class_id,
     )
-    iwasm_output = filter_output_tflite_birds_v1_image(iwasm_output)
 
-    wasmedge_output = execute_tflite_birds_v1_image_once(
-        wasmedge_bin, ["--dir=.:."], cwd
-    )
-    wasmedge_output = filter_output_tflite_birds_v1_image(wasmedge_output)
 
-    if iwasm_output == wasmedge_output:
-        print("- tflite_birds_v1_image. PASS")
+def summarizer_result(
+    example_name: str,
+    iwasm_output: List[WasmEdgeExampleResult],
+    wasmedge_output: List[WasmEdgeExampleResult],
+):
+    if compare_output(iwasm_output, wasmedge_output):
+        print(f"- {example_name}. PASS")
         return
 
-    print("- tflite_birds_v1_image. FAILED")
+    print(f"- {example_name}. FAILED")
     print("------------------------------------------------------------")
     pprint(iwasm_output)
     print("<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<")
@@ -107,14 +159,129 @@ def execute_tflite_birds_v1_image(iwasm_bin: str, wasmedge_bin: str, cwd: Path):
     print("------------------------------------------------------------")
 
 
-def execute_wasmedge_wasinn_exmaples(iwasm_bin: str, wasmedge_bin: str):
+def execute_tflite_birds_v1_image(iwasm_bin: str, wasmedge_bin: str, cwd: Path):
+    iwasm_output = execute_tflite_birds_v1_image_once(
+        iwasm_bin,
+        [
+            "--map-dir=.:.",
+        ],
+        cwd,
+    )
+    iwasm_output = filter_output(iwasm_output)
+
+    wasmedge_output = execute_tflite_birds_v1_image_once(
+        wasmedge_bin, ["--dir=.:."], cwd
+    )
+    wasmedge_output = filter_output(wasmedge_output)
+
+    summarizer_result("tf_lite_birds_v1_image", iwasm_output, wasmedge_output)
+
+
+def execute_openvino_mobilenet_image(iwasm_bin: str, wasmedge_bin: str, cwd: Path):
+    iwasm_output = execute_openvino_mobilenet_image_once(
+        iwasm_bin,
+        [
+            "--map-dir=.:.",
+        ],
+        cwd,
+    )
+    iwasm_output = filter_output(iwasm_output)
+
+    wasmedge_output = execute_openvino_mobilenet_image_once(
+        wasmedge_bin, ["--dir=.:."], cwd
+    )
+    wasmedge_output = filter_output(wasmedge_output)
+
+    summarizer_result("openvino_mobile_image", iwasm_output, wasmedge_output)
+
+
+def execute_openvino_mobilenet_raw(iwasm_bin: str, wasmedge_bin: str, cwd: Path):
+    iwasm_output = execute_openvino_mobilenet_raw_once(
+        iwasm_bin,
+        [
+            "--map-dir=.:.",
+        ],
+        cwd,
+    )
+    iwasm_output = filter_output(iwasm_output)
+
+    wasmedge_output = execute_openvino_mobilenet_raw_once(
+        wasmedge_bin, ["--dir=.:."], cwd
+    )
+    wasmedge_output = filter_output(wasmedge_output)
+
+    summarizer_result("openvino_mobile_raw", iwasm_output, wasmedge_output)
+
+
+def execute_openvino_road_segmentation_adas(
+    iwasm_bin: str, wasmedge_bin: str, cwd: Path
+):
+    def filter_output(output: str) -> str:
+        """
+        focus on lines:
+           The size of the output buffer is 7340032 bytes
+           dump tensor to "wasinn-openvino-inference-output-1x4x512x896xf32.tensor"
+        """
+        for line in output.split("\n"):
+            if "The size of the output buffer is" in line:
+                dump_tensor_size = int(line.split(" ")[-2])
+                continue
+
+            if "dump tensor to " in line:
+                dump_tensor_file = line.split(" ")[-1]
+                continue
+
+        return (dump_tensor_file, dump_tensor_size)
+
+    iwasm_output = execute_openvino_road_segmentation_adas_once(
+        iwasm_bin,
+        [
+            "--map-dir=.:.",
+        ],
+        cwd,
+    )
+    iwasm_tensor_file, iwasm_tensor_size = filter_output(iwasm_output)
+
+    wasmedge_output = execute_openvino_road_segmentation_adas_once(
+        wasmedge_bin, ["--dir=.:."], cwd
+    )
+    wasmedge_tensor_file, wasmedge_tensor_size = filter_output(wasmedge_output)
+
+    # TODO: binary compare?
+    if iwasm_tensor_size == wasmedge_tensor_size:
+        print(f"- openvino_road_segmentation_adas. PASS")
+        return
+
+    print(f"- openvino_road_segmentation_adas. FAILED")
+    print("------------------------------------------------------------")
+    print(f"FILE:{iwasm_tensor_file}, SIZE:{iwasm_tensor_size}")
+    print("<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<")
+    print(f"FILE:{wasmedge_tensor_file}, SIZE:{wasmedge_tensor_size}")
+    print("------------------------------------------------------------")
+
+
+def execute_wasmedge_wasinn_examples(iwasm_bin: str, wasmedge_bin: str):
     assert Path.cwd().name == "wasmedge-wasinn-examples"
     assert shutil.which(iwasm_bin)
     assert shutil.which(wasmedge_bin)
 
-    tflite_birds_v1_image_dir = Path.cwd().joinpath("./tflite-birds_v1-image")
-    execute_tflite_birds_v1_image(iwasm_bin, wasmedge_bin, tflite_birds_v1_image_dir)
+    # TODO: keep commenting until https://github.com/bytecodealliance/wasm-micro-runtime/pull/3597 is merged
+    # tflite_birds_v1_image_dir = Path.cwd().joinpath("./tflite-birds_v1-image")
+    # execute_tflite_birds_v1_image(iwasm_bin, wasmedge_bin, tflite_birds_v1_image_dir)
+
+    openvino_mobile_image_dir = Path.cwd().joinpath("./openvino-mobilenet-image")
+    execute_openvino_mobilenet_image(iwasm_bin, wasmedge_bin, openvino_mobile_image_dir)
+
+    openvino_mobile_raw_dir = Path.cwd().joinpath("./openvino-mobilenet-raw")
+    execute_openvino_mobilenet_raw(iwasm_bin, wasmedge_bin, openvino_mobile_raw_dir)
+
+    openvino_road_segmentation_adas_dir = Path.cwd().joinpath(
+        "./openvino-road-segmentation-adas"
+    )
+    execute_openvino_road_segmentation_adas(
+        iwasm_bin, wasmedge_bin, openvino_road_segmentation_adas_dir
+    )
 
 
 if __name__ == "__main__":
-    execute_wasmedge_wasinn_exmaples("iwasm", "wasmedge")
+    execute_wasmedge_wasinn_examples("iwasm", "wasmedge")
diff --git a/core/shared/mem-alloc/ems/ems_gc.h b/core/shared/mem-alloc/ems/ems_gc.h
index 59cc00285..ff65b4e7c 100644
--- a/core/shared/mem-alloc/ems/ems_gc.h
+++ b/core/shared/mem-alloc/ems/ems_gc.h
@@ -309,10 +309,10 @@ void
 wasm_runtime_set_wasm_object_extra_info_flag(gc_object_t obj, bool set);
 
 void
-wasm_runtime_gc_prepare();
+wasm_runtime_gc_prepare(void *exec_env);
 
 void
-wasm_runtime_gc_finalize();
+wasm_runtime_gc_finalize(void *exec_env);
 #endif /* end of WASM_ENABLE_GC != 0 */
 
 #define GC_HEAP_STAT_SIZE (128 / 4)
diff --git a/core/shared/platform/common/libc-util/SConscript b/core/shared/platform/common/libc-util/SConscript
new file mode 100644
index 000000000..7180b26c4
--- /dev/null
+++ b/core/shared/platform/common/libc-util/SConscript
@@ -0,0 +1,20 @@
+#
+# Copyright 2024 Sony Semiconductor Solutions Corporation.
+#
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+
+from building import *
+import re
+
+Import('rtconfig')
+
+cwd     = GetCurrentDir()
+src     = Split('''
+libc_errno.c
+''')
+CPPPATH = [cwd]
+
+group = DefineGroup('iwasm_libc_util', src, depend = [''], CPPPATH = CPPPATH)
+
+Return('group')
diff --git a/core/shared/platform/common/posix/SConscript b/core/shared/platform/common/posix/SConscript
new file mode 100644
index 000000000..48cffda25
--- /dev/null
+++ b/core/shared/platform/common/posix/SConscript
@@ -0,0 +1,20 @@
+#
+# Copyright 2024 Sony Semiconductor Solutions Corporation.
+#
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+
+from building import *
+import re
+
+Import('rtconfig')
+
+cwd     = GetCurrentDir()
+src     = Split('''
+posix_file.c
+''')
+CPPPATH = [cwd]
+
+group = DefineGroup('iwasm_common_posix', src, depend = [''], CPPPATH = CPPPATH)
+
+Return('group')
diff --git a/core/shared/platform/common/posix/posix_file.c b/core/shared/platform/common/posix/posix_file.c
index ac7e58537..9ae0a03a2 100644
--- a/core/shared/platform/common/posix/posix_file.c
+++ b/core/shared/platform/common/posix/posix_file.c
@@ -26,7 +26,7 @@
  * (platform_internal.h)
  */
 #if !defined(CONFIG_HAS_D_INO)
-#if !defined(__NuttX__)
+#if !defined(__NuttX__) && !defined(__RTTHREAD__)
 #define CONFIG_HAS_D_INO 1
 #define CONFIG_HAS_ISATTY 1
 #else
@@ -54,6 +54,18 @@
 #define CONFIG_HAS_O_SYNC
 #endif
 
+#ifndef STDIN_FILENO
+#define STDIN_FILENO 0
+#endif
+
+#ifndef STDOUT_FILENO
+#define STDOUT_FILENO 1
+#endif
+
+#ifndef STDERR_FILENO
+#define STDERR_FILENO 2
+#endif
+
 // Converts a POSIX timespec to a WASI timestamp.
 static __wasi_timestamp_t
 convert_timespec(const struct timespec *ts)
@@ -858,30 +870,39 @@ os_isatty(os_file_handle handle)
 #endif
 }
 
+bool
+os_is_stdin_handle(os_file_handle fd)
+{
+    return fd == STDIN_FILENO;
+}
+
+bool
+os_is_stdout_handle(os_file_handle fd)
+{
+    return fd == STDOUT_FILENO;
+}
+
+bool
+os_is_stderr_handle(os_file_handle fd)
+{
+    return fd == STDERR_FILENO;
+}
+
 os_file_handle
 os_convert_stdin_handle(os_raw_file_handle raw_stdin)
 {
-#ifndef STDIN_FILENO
-#define STDIN_FILENO 0
-#endif
     return raw_stdin >= 0 ? raw_stdin : STDIN_FILENO;
 }
 
 os_file_handle
 os_convert_stdout_handle(os_raw_file_handle raw_stdout)
 {
-#ifndef STDOUT_FILENO
-#define STDOUT_FILENO 1
-#endif
     return raw_stdout >= 0 ? raw_stdout : STDOUT_FILENO;
 }
 
 os_file_handle
 os_convert_stderr_handle(os_raw_file_handle raw_stderr)
 {
-#ifndef STDERR_FILENO
-#define STDERR_FILENO 2
-#endif
     return raw_stderr >= 0 ? raw_stderr : STDERR_FILENO;
 }
 
diff --git a/core/shared/platform/common/posix/posix_thread.c b/core/shared/platform/common/posix/posix_thread.c
index 189092e9b..5ec957e52 100644
--- a/core/shared/platform/common/posix/posix_thread.c
+++ b/core/shared/platform/common/posix/posix_thread.c
@@ -4,8 +4,10 @@
  */
 
 #ifndef _GNU_SOURCE
+#if !defined(__RTTHREAD__)
 #define _GNU_SOURCE
 #endif
+#endif
 #include "platform_api_vmcore.h"
 #include "platform_api_extension.h"
 
@@ -46,6 +48,13 @@ os_thread_wrapper(void *arg)
 #endif
 #ifdef OS_ENABLE_WAKEUP_BLOCKING_OP
     os_end_blocking_op();
+#endif
+#if BH_DEBUG != 0
+#if defined __APPLE__
+    pthread_setname_np("wamr");
+#else
+    pthread_setname_np(pthread_self(), "wamr");
+#endif
 #endif
     start_func(thread_arg);
 #ifdef OS_ENABLE_HW_BOUND_CHECK
@@ -448,7 +457,7 @@ os_thread_get_stack_boundary()
         addr += guard_size;
     }
     (void)stack_size;
-#elif defined(__APPLE__) || defined(__NuttX__)
+#elif defined(__APPLE__) || defined(__NuttX__) || defined(__RTTHREAD__)
     if ((addr = (uint8 *)pthread_get_stackaddr_np(self))) {
         stack_size = pthread_get_stacksize_np(self);
 
@@ -495,6 +504,8 @@ static os_thread_local_attribute bool thread_signal_inited = false;
 #if WASM_DISABLE_STACK_HW_BOUND_CHECK == 0
 /* The signal alternate stack base addr */
 static os_thread_local_attribute uint8 *sigalt_stack_base_addr;
+/* The previous signal alternate stack */
+static os_thread_local_attribute stack_t prev_sigalt_stack;
 
 /*
  * ASAN is not designed to work with custom stack unwind or other low-level
@@ -674,7 +685,9 @@ os_thread_signal_init(os_signal_handler handler)
     sigalt_stack_info.ss_sp = map_addr;
     sigalt_stack_info.ss_size = map_size;
     sigalt_stack_info.ss_flags = 0;
-    if (sigaltstack(&sigalt_stack_info, NULL) != 0) {
+    memset(&prev_sigalt_stack, 0, sizeof(stack_t));
+    /* Set signal alternate stack and save the previous one */
+    if (sigaltstack(&sigalt_stack_info, &prev_sigalt_stack) != 0) {
         os_printf("Failed to init signal alternate stack\n");
         goto fail2;
     }
@@ -720,19 +733,12 @@ fail1:
 void
 os_thread_signal_destroy()
 {
-#if WASM_DISABLE_STACK_HW_BOUND_CHECK == 0
-    stack_t sigalt_stack_info;
-#endif
-
     if (!thread_signal_inited)
         return;
 
 #if WASM_DISABLE_STACK_HW_BOUND_CHECK == 0
-    /* Disable signal alternate stack */
-    memset(&sigalt_stack_info, 0, sizeof(stack_t));
-    sigalt_stack_info.ss_flags = SS_DISABLE;
-    sigalt_stack_info.ss_size = SIG_ALT_STACK_SIZE;
-    sigaltstack(&sigalt_stack_info, NULL);
+    /* Restore the previous signal alternate stack */
+    sigaltstack(&prev_sigalt_stack, NULL);
 
     os_munmap(sigalt_stack_base_addr, SIG_ALT_STACK_SIZE);
 
diff --git a/core/shared/platform/esp-idf/espidf_file.c b/core/shared/platform/esp-idf/espidf_file.c
index ac7e58537..be50a2900 100644
--- a/core/shared/platform/esp-idf/espidf_file.c
+++ b/core/shared/platform/esp-idf/espidf_file.c
@@ -54,6 +54,18 @@
 #define CONFIG_HAS_O_SYNC
 #endif
 
+#ifndef STDIN_FILENO
+#define STDIN_FILENO 0
+#endif
+
+#ifndef STDOUT_FILENO
+#define STDOUT_FILENO 1
+#endif
+
+#ifndef STDERR_FILENO
+#define STDERR_FILENO 2
+#endif
+
 // Converts a POSIX timespec to a WASI timestamp.
 static __wasi_timestamp_t
 convert_timespec(const struct timespec *ts)
@@ -858,30 +870,39 @@ os_isatty(os_file_handle handle)
 #endif
 }
 
+bool
+os_is_stdin_handle(os_file_handle fd)
+{
+    return fd == STDIN_FILENO;
+}
+
+bool
+os_is_stdout_handle(os_file_handle fd)
+{
+    return fd == STDOUT_FILENO;
+}
+
+bool
+os_is_stderr_handle(os_file_handle fd)
+{
+    return fd == STDERR_FILENO;
+}
+
 os_file_handle
 os_convert_stdin_handle(os_raw_file_handle raw_stdin)
 {
-#ifndef STDIN_FILENO
-#define STDIN_FILENO 0
-#endif
     return raw_stdin >= 0 ? raw_stdin : STDIN_FILENO;
 }
 
 os_file_handle
 os_convert_stdout_handle(os_raw_file_handle raw_stdout)
 {
-#ifndef STDOUT_FILENO
-#define STDOUT_FILENO 1
-#endif
     return raw_stdout >= 0 ? raw_stdout : STDOUT_FILENO;
 }
 
 os_file_handle
 os_convert_stderr_handle(os_raw_file_handle raw_stderr)
 {
-#ifndef STDERR_FILENO
-#define STDERR_FILENO 2
-#endif
     return raw_stderr >= 0 ? raw_stderr : STDERR_FILENO;
 }
 
diff --git a/core/shared/platform/include/platform_api_extension.h b/core/shared/platform/include/platform_api_extension.h
index 7c6120ba2..b1c3b4f4a 100644
--- a/core/shared/platform/include/platform_api_extension.h
+++ b/core/shared/platform/include/platform_api_extension.h
@@ -104,8 +104,9 @@ os_thread_exit(void *retval);
 #endif
 
 /* Clang's __GNUC_PREREQ macro has a different meaning than GCC one,
-   so we have to handle this case specially */
-#if defined(__clang__)
+   so we have to handle this case specially(except the CCAC compiler
+   provided by MetaWare, which doesn't support atomic operations) */
+#if defined(__clang__) && !defined(__CCAC__)
 /* Clang provides stdatomic.h since 3.6.0
    See https://releases.llvm.org/3.6.0/tools/clang/docs/ReleaseNotes.html */
 #if __clang_major__ > 3 || (__clang_major__ == 3 && __clang_minor__ >= 6)
@@ -1502,6 +1503,33 @@ os_convert_stdout_handle(os_raw_file_handle raw_stdout);
 os_file_handle
 os_convert_stderr_handle(os_raw_file_handle raw_stderr);
 
+/**
+ *
+ * @param fd a file handle
+ *
+ * @return true if it is stdin
+ */
+bool
+os_is_stdin_handle(os_file_handle fd);
+
+/**
+ *
+ * @param fd a file handle
+ *
+ * @return true if it is stdout
+ */
+bool
+os_is_stdout_handle(os_file_handle fd);
+
+/**
+ *
+ * @param fd a file handle
+ *
+ * @return true if it is stderr
+ */
+bool
+os_is_stderr_handle(os_file_handle fd);
+
 /**
  * Open a directory stream for the provided directory handle. The returned
  * directory stream will be positioned at the first entry in the directory.
diff --git a/core/shared/platform/rt-thread/platform_internal.h b/core/shared/platform/rt-thread/platform_internal.h
index 4ebdabb10..69d6d5581 100644
--- a/core/shared/platform/rt-thread/platform_internal.h
+++ b/core/shared/platform/rt-thread/platform_internal.h
@@ -7,7 +7,16 @@
 #ifndef RTTHREAD_PLATFORM_INTERNAL_H
 #define RTTHREAD_PLATFORM_INTERNAL_H
 
+#include <sys/ioctl.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <errno.h>
+#include <poll.h>
+#if defined(RT_USING_PTHREADS)
+#include <pthread.h>
+#else
 #include <rtthread.h>
+#endif
 #include <stdbool.h>
 #include <string.h>
 #include <stdio.h>
@@ -15,6 +24,8 @@
 #include <math.h>
 #include <stdint.h>
 #include <ctype.h>
+#include <dirent.h>
+#include <assert.h>
 
 #if defined(WASM_ENABLE_AOT)
 #if defined(RTT_WAMR_BUILD_TARGET_THUMB)
@@ -32,12 +43,67 @@
 #endif
 #endif /* WASM_ENABLE_AOT */
 
+/* Use rt-thread's definition as default */
+#if 0 // defined(RT_USING_PTHREADS)
+typedef pthread_t korp_tid;
+typedef pthread_mutex_t korp_mutex;
+typedef pthread_cond_t korp_cond;
+typedef pthread_t korp_thread;
+#else
 typedef rt_thread_t korp_tid;
 typedef struct rt_mutex korp_mutex;
 typedef struct rt_thread korp_cond;
 typedef struct rt_thread korp_thread;
+#endif
 typedef unsigned int korp_sem;
 
+#if !defined(socklen_t) && !defined(SOCKLEN_T_DEFINED)
+typedef uint32_t socklen_t;
+#endif
+
+#if !defined(SOL_SOCKET)
+#define SOL_SOCKET 1
+#endif
+
+#if !defined(SO_TYPE)
+#define SO_TYPE 3
+#endif
+
+#if !defined(SOCK_DGRAM)
+#define SOCK_DGRAM 2
+#endif
+
+#if !defined(SOCK_STREAM)
+#define SOCK_STREAM 1
+#endif
+
+#if !defined(UTIME_NOW)
+#define UTIME_NOW -2L
+#endif
+
+#if !defined(UTIME_OMIT)
+#define UTIME_OMIT -1L
+#endif
+
+#if !defined(AT_SYMLINK_NOFOLLOW)
+#define AT_SYMLINK_NOFOLLOW 2
+#endif
+
+#if !defined(AT_SYMLINK_FOLLOW)
+#define AT_SYMLINK_FOLLOW 4
+#endif
+
+#if !defined(AT_REMOVEDIR)
+#define AT_REMOVEDIR 8
+#endif
+
+#define DT_BLK 0x06
+#define DT_CHR 0x02
+#define DT_LNK 0x0A
+
+#define PTHREAD_STACK_MIN 1024
+#define BH_THREAD_DEFAULT_PRIORITY 30
+
 /* korp_rwlock is used in platform_api_extension.h,
    we just define the type to make the compiler happy */
 typedef struct {
diff --git a/core/shared/platform/rt-thread/rtt_file.c b/core/shared/platform/rt-thread/rtt_file.c
new file mode 100644
index 000000000..b9fd1f9fa
--- /dev/null
+++ b/core/shared/platform/rt-thread/rtt_file.c
@@ -0,0 +1,194 @@
+/*
+ * Copyright 2024 Sony Semiconductor Solutions Corporation.
+ *
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#include "platform_api_vmcore.h"
+#include "platform_api_extension.h"
+
+#include <errno.h>
+#include <stdio.h>
+#include <stddef.h>
+#include <fcntl.h>
+#include <stdint.h>
+
+struct iovec {
+    void *iov_base;
+    size_t iov_len;
+};
+
+ssize_t
+readv(int fd, const struct iovec *iov, int iovcnt)
+{
+    ssize_t ntotal;
+    ssize_t nread;
+    size_t remaining;
+    uint8_t *buffer;
+    int i;
+
+    /* Process each entry in the struct iovec array */
+
+    for (i = 0, ntotal = 0; i < iovcnt; i++) {
+        /* Ignore zero-length reads */
+
+        if (iov[i].iov_len > 0) {
+            buffer = iov[i].iov_base;
+            remaining = iov[i].iov_len;
+
+            /* Read repeatedly as necessary to fill buffer */
+
+            do {
+                /* NOTE:  read() is a cancellation point */
+
+                nread = read(fd, buffer, remaining);
+
+                /* Check for a read error */
+
+                if (nread < 0) {
+                    return nread;
+                }
+
+                /* Check for an end-of-file condition */
+
+                else if (nread == 0) {
+                    return ntotal;
+                }
+
+                /* Update pointers and counts in order to handle partial
+                 * buffer reads.
+                 */
+
+                buffer += nread;
+                remaining -= nread;
+                ntotal += nread;
+            } while (remaining > 0);
+        }
+    }
+
+    return ntotal;
+}
+
+ssize_t
+writev(int fd, const struct iovec *iov, int iovcnt)
+{
+    uint16_t i, num;
+    int length;
+
+    num = 0;
+    for (i = 0; i < iovcnt; i++) {
+        if (iov[i].iov_len > 0) {
+            length = write(fd, iov[i].iov_base, iov[i].iov_len);
+            if (length != iov[i].iov_len)
+                return errno;
+
+            num += iov[i].iov_len;
+        }
+    }
+    return num;
+}
+
+int
+fstatat(int fd, const char *path, struct stat *buf, int flag)
+{
+    errno = ENOSYS;
+    return -1;
+}
+
+int
+mkdirat(int fd, const char *path, mode_t mode)
+{
+    errno = ENOSYS;
+    return -1;
+}
+
+ssize_t
+readlinkat(int fd, const char *path, char *buf, size_t bufsize)
+{
+    errno = EINVAL;
+    return -1;
+}
+
+int
+linkat(int fd1, const char *path1, int fd2, const char *path2, int flag)
+{
+    errno = ENOSYS;
+    return -1;
+}
+
+int
+renameat(int fromfd, const char *from, int tofd, const char *to)
+{
+    errno = ENOSYS;
+    return -1;
+}
+
+int
+symlinkat(const char *target, int fd, const char *path)
+{
+    errno = ENOSYS;
+    return -1;
+}
+
+int
+unlinkat(int fd, const char *path, int flag)
+{
+    errno = ENOSYS;
+    return -1;
+}
+
+int
+utimensat(int fd, const char *path, const struct timespec *ts, int flag)
+{
+    errno = ENOSYS;
+    return -1;
+}
+
+DIR *
+fdopendir(int fd)
+{
+    errno = ENOSYS;
+    return NULL;
+}
+
+int
+fdatasync(int fd)
+{
+    errno = ENOSYS;
+    return -1;
+}
+
+ssize_t
+preadv(int fd, const struct iovec *iov, int iovcnt, off_t offset)
+{
+    errno = ENOSYS;
+    return 0;
+}
+
+ssize_t
+pwritev(int fd, const struct iovec *iov, int iovcnt, off_t offset)
+{
+    errno = ENOSYS;
+    return 0;
+}
+
+char *
+realpath(char *path, char *resolved_path)
+{
+    errno = ENOSYS;
+    return NULL;
+}
+
+int
+futimens(int fd, const struct timespec *times)
+{
+    errno = ENOSYS;
+    return -1;
+}
+
+int
+posix_fallocate(int __fd, off_t __offset, off_t __length)
+{
+    errno = ENOSYS;
+    return -1;
+}
diff --git a/core/shared/platform/rt-thread/rtt_platform.c b/core/shared/platform/rt-thread/rtt_platform.c
index 10c59a7fa..904bb52ed 100644
--- a/core/shared/platform/rt-thread/rtt_platform.c
+++ b/core/shared/platform/rt-thread/rtt_platform.c
@@ -134,84 +134,41 @@ os_time_thread_cputime_us(void)
     return os_time_get_boot_us();
 }
 
-korp_tid
-os_self_thread(void)
-{
-    return rt_thread_self();
-}
-
-uint8 *
-os_thread_get_stack_boundary(void)
-{
-    rt_thread_t tid = rt_thread_self();
-    return tid->stack_addr;
-}
-
-void
-os_thread_jit_write_protect_np(bool enabled)
-{}
-
-int
-os_mutex_init(korp_mutex *mutex)
-{
-    return rt_mutex_init(mutex, "wamr0", RT_IPC_FLAG_FIFO);
-}
-
-int
-os_mutex_destroy(korp_mutex *mutex)
-{
-    return rt_mutex_detach(mutex);
-}
-
-int
-os_mutex_lock(korp_mutex *mutex)
-{
-    return rt_mutex_take(mutex, RT_WAITING_FOREVER);
-}
-
-int
-os_mutex_unlock(korp_mutex *mutex)
-{
-    return rt_mutex_release(mutex);
-}
-
-/*
- * functions below was not implement
- */
-
-int
-os_cond_init(korp_cond *cond)
-{
-    return 0;
-}
-
-int
-os_cond_destroy(korp_cond *cond)
-{
-    return 0;
-}
-
-int
-os_cond_wait(korp_cond *cond, korp_mutex *mutex)
-{
-    return 0;
-}
-
 void *
 os_mmap(void *hint, size_t size, int prot, int flags, os_file_handle file)
 {
-    void *addr;
+    void *buf_origin;
+    void *buf_fixed;
+    rt_ubase_t *addr_field;
 
-    if ((addr = rt_malloc(size)))
-        memset(addr, 0, size);
+    buf_origin = rt_malloc(size + 8 + sizeof(rt_ubase_t));
+    if (!buf_origin)
+        return NULL;
 
-    return addr;
+    buf_fixed = buf_origin + sizeof(void *);
+    if ((rt_ubase_t)buf_fixed & 0x7) {
+        buf_fixed = (void *)((rt_ubase_t)(buf_fixed + 8) & (~7));
+    }
+
+    addr_field = buf_fixed - sizeof(rt_ubase_t);
+    *addr_field = (rt_ubase_t)buf_origin;
+
+    memset(buf_origin, 0, size + 8 + sizeof(rt_ubase_t));
+    return buf_fixed;
 }
 
 void
 os_munmap(void *addr, size_t size)
 {
-    rt_free(addr);
+    void *mem_origin;
+    rt_ubase_t *addr_field;
+
+    if (addr) {
+        addr_field = addr - sizeof(rt_ubase_t);
+        mem_origin = (void *)(*addr_field);
+
+        rt_free(mem_origin);
+    }
 }
 
 int
@@ -227,3 +184,29 @@ os_dcache_flush(void)
 void
 os_icache_flush(void *start, size_t len)
 {}
+
+int
+os_getpagesize(void)
+{
+    return 4096;
+}
+
+void *
+os_mremap(void *in, size_t old_size, size_t new_size)
+{
+    return os_realloc(in, new_size);
+}
+
+__wasi_errno_t
+os_clock_time_get(__wasi_clockid_t clock_id, __wasi_timestamp_t precision,
+                  __wasi_timestamp_t *time)
+{
+    *time = rt_tick_get() * 1000ll * 1000ll;
+    return 0;
+}
+
+__wasi_errno_t
+os_clock_res_get(__wasi_clockid_t clock_id, __wasi_timestamp_t *resolution)
+{
+    return 0;
+}
diff --git a/core/shared/platform/rt-thread/rtt_socket.c b/core/shared/platform/rt-thread/rtt_socket.c
new file mode 100644
index 000000000..ae1d9ed33
--- /dev/null
+++ b/core/shared/platform/rt-thread/rtt_socket.c
@@ -0,0 +1,385 @@
+/*
+ * Copyright 2024 Sony Semiconductor Solutions Corporation.
+ *
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#include "platform_api_vmcore.h"
+#include "platform_api_extension.h"
+
+int
+os_socket_accept(bh_socket_t server_sock, bh_socket_t *sock, void *addr,
+                 unsigned int *addrlen)
+{
+    return BHT_ERROR;
+}
+
+int
+os_socket_connect(bh_socket_t socket, const char *addr, int port)
+{
+    return BHT_ERROR;
+}
+
+int
+os_socket_recv_from(bh_socket_t socket, void *buf, unsigned int len, int flags,
+                    bh_sockaddr_t *src_addr)
+{
+    return BHT_ERROR;
+}
+
+int
+os_socket_send_to(bh_socket_t socket, const void *buf, unsigned int len,
+                  int flags, const bh_sockaddr_t *dest_addr)
+{
+    return BHT_ERROR;
+}
+
+int
+os_socket_addr_resolve(const char *host, const char *service,
+                       uint8_t *hint_is_tcp, uint8_t *hint_is_ipv4,
+                       bh_addr_info_t *addr_info, size_t addr_info_size,
+                       size_t *max_info_size)
+{
+    return BHT_ERROR;
+}
+
+int
+os_socket_close(bh_socket_t socket)
+{
+    return BHT_ERROR;
+}
+
+int
+os_socket_addr_local(bh_socket_t socket, bh_sockaddr_t *sockaddr)
+{
+    return BHT_ERROR;
+}
+
+int
+os_socket_addr_remote(bh_socket_t socket, bh_sockaddr_t *sockaddr)
+{
+    return BHT_ERROR;
+}
+
+int
+os_socket_bind(bh_socket_t socket, const char *host, int *port)
+{
+    return BHT_ERROR;
+}
+
+int
+os_socket_listen(bh_socket_t socket, int max_client)
+{
+    return BHT_ERROR;
+}
+
+int
+os_socket_create(bh_socket_t *sock, bool is_ipv4, bool is_tcp)
+{
+    return BHT_ERROR;
+}
+
+int
+os_socket_send(bh_socket_t socket, const void *buf, unsigned int len)
+{
+    return BHT_ERROR;
+}
+
+__wasi_errno_t
+os_socket_shutdown(bh_socket_t socket)
+{
+    return __WASI_ENOSYS;
+}
+
+int
+os_socket_inet_network(bool is_ipv4, const char *cp, bh_ip_addr_buffer_t *out)
+{
+    return BHT_ERROR;
+}
+
+int
+os_socket_set_send_timeout(bh_socket_t socket, uint64 timeout_us)
+{
+    return BHT_ERROR;
+}
+
+int
+os_socket_get_recv_timeout(bh_socket_t socket, uint64 *timeout_us)
+{
+    return BHT_ERROR;
+}
+
+int
+os_socket_set_send_buf_size(bh_socket_t socket, size_t bufsiz)
+{
+    return BHT_ERROR;
+}
+
+int
+os_socket_get_send_buf_size(bh_socket_t socket, size_t *bufsiz)
+{
+    return BHT_ERROR;
+}
+
+int
+os_socket_set_recv_buf_size(bh_socket_t socket, size_t bufsiz)
+{
+    return BHT_ERROR;
+}
+
+int
+os_socket_get_recv_buf_size(bh_socket_t socket, size_t *bufsiz)
+{
+    return BHT_ERROR;
+}
+
+int
+os_socket_set_broadcast(bh_socket_t socket, bool is_enabled)
+{
+    return BHT_ERROR;
+}
+
+int
+os_socket_get_broadcast(bh_socket_t socket, bool *is_enabled)
+{
+    return BHT_ERROR;
+}
+
+int
+os_socket_get_send_timeout(bh_socket_t socket, uint64 *timeout_us)
+{
+    return BHT_ERROR;
+}
+
+int
+os_socket_set_recv_timeout(bh_socket_t socket, uint64 timeout_us)
+{
+    return BHT_ERROR;
+}
+
+int
+os_socket_set_keep_alive(bh_socket_t socket, bool is_enabled)
+{
+    return BHT_ERROR;
+}
+
+int
+os_socket_get_keep_alive(bh_socket_t socket, bool *is_enabled)
+{
+    return BHT_ERROR;
+}
+
+int
+os_socket_set_reuse_addr(bh_socket_t socket, bool is_enabled)
+{
+    return BHT_ERROR;
+}
+
+int
+os_socket_get_reuse_addr(bh_socket_t socket, bool *is_enabled)
+{
+    return BHT_ERROR;
+}
+
+int
+os_socket_set_reuse_port(bh_socket_t socket, bool is_enabled)
+{
+    return BHT_ERROR;
+}
+
+int
+os_socket_get_reuse_port(bh_socket_t socket, bool *is_enabled)
+{
+    return BHT_ERROR;
+}
+
+int
+os_socket_set_linger(bh_socket_t socket, bool is_enabled, int linger_s)
+{
+    return BHT_ERROR;
+}
+
+int
+os_socket_get_linger(bh_socket_t socket, bool *is_enabled, int *linger_s)
+{
+    return BHT_ERROR;
+}
+
+int
+os_socket_set_tcp_no_delay(bh_socket_t socket, bool is_enabled)
+{
+    return BHT_ERROR;
+}
+
+int
+os_socket_get_tcp_no_delay(bh_socket_t socket, bool *is_enabled)
+{
+    return BHT_ERROR;
+}
+
+int
+os_socket_set_tcp_quick_ack(bh_socket_t socket, bool is_enabled)
+{
+    return BHT_ERROR;
+}
+
+int
+os_socket_get_tcp_quick_ack(bh_socket_t socket, bool *is_enabled)
+{
+    return BHT_ERROR;
+}
+
+int
+os_socket_set_tcp_keep_idle(bh_socket_t socket, uint32 time_s)
+{
+    return BHT_ERROR;
+}
+
+int
+os_socket_get_tcp_keep_idle(bh_socket_t socket, uint32 *time_s)
+{
+    return BHT_ERROR;
+}
+
+int
+os_socket_set_tcp_keep_intvl(bh_socket_t socket, uint32 time_s)
+{
+    return BHT_ERROR;
+}
+
+int
+os_socket_get_tcp_keep_intvl(bh_socket_t socket, uint32 *time_s)
+{
+    return BHT_ERROR;
+}
+
+int
+os_socket_set_tcp_fastopen_connect(bh_socket_t socket, bool is_enabled)
+{
+    return BHT_ERROR;
+}
+
+int
+os_socket_get_tcp_fastopen_connect(bh_socket_t socket, bool *is_enabled)
+{
+    return BHT_ERROR;
+}
+
+int
+os_socket_set_ip_multicast_loop(bh_socket_t socket, bool ipv6, bool is_enabled)
+{
+    return BHT_ERROR;
+}
+
+int
+os_socket_get_ip_multicast_loop(bh_socket_t socket, bool ipv6, bool *is_enabled)
+{
+    return BHT_ERROR;
+}
+
+int
+os_socket_set_ip_add_membership(bh_socket_t socket,
+                                bh_ip_addr_buffer_t *imr_multiaddr,
+                                uint32_t imr_interface, bool is_ipv6)
+{
+    return BHT_ERROR;
+}
+
+int
+os_socket_set_ip_drop_membership(bh_socket_t socket,
+                                 bh_ip_addr_buffer_t *imr_multiaddr,
+                                 uint32_t imr_interface, bool is_ipv6)
+{
+    return BHT_ERROR;
+}
+
+int
+os_socket_set_ip_ttl(bh_socket_t socket, uint8_t ttl_s)
+{
+    return BHT_ERROR;
+}
+
+int
+os_socket_get_ip_ttl(bh_socket_t socket, uint8_t *ttl_s)
+{
+    return BHT_ERROR;
+}
+
+int
+os_socket_set_ip_multicast_ttl(bh_socket_t socket, uint8_t ttl_s)
+{
+    return BHT_ERROR;
+}
+
+int
+os_socket_get_ip_multicast_ttl(bh_socket_t socket, uint8_t *ttl_s)
+{
+    return BHT_ERROR;
+}
+
+int
+os_socket_set_ipv6_only(bh_socket_t socket, bool is_enabled)
+{
+    return BHT_ERROR;
+}
+
+int
+os_socket_get_ipv6_only(bh_socket_t socket, bool *is_enabled)
+{
+    return BHT_ERROR;
+}
+
+static void
+swap16(uint8 *pData)
+{
+    uint8 value = *pData;
+    *(pData) = *(pData + 1);
+    *(pData + 1) = value;
+}
+
+static void
+swap32(uint8 *pData)
+{
+    uint8 value = *pData;
+    *pData = *(pData + 3);
+    *(pData + 3) = value;
+
+    value = *(pData + 1);
+    *(pData + 1) = *(pData + 2);
+    *(pData + 2) = value;
+}
+
+/** In-enclave implementation of POSIX functions **/
+static bool
+is_little_endian()
+{
+    long i = 0x01020304;
+    unsigned char *c = (unsigned char *)&i;
+    return (*c == 0x04) ? true : false;
+}
+
+uint16
+htons(uint16 value)
+{
+    uint16 ret;
+    if (is_little_endian()) {
+        ret = value;
+        swap16((uint8 *)&ret);
+        return ret;
+    }
+
+    return value;
+}
+
+uint32
+htonl(uint32 value)
+{
+    uint32 ret;
+    if (is_little_endian()) {
+        ret = value;
+        swap32((uint8 *)&ret);
+        return ret;
+    }
+
+    return value;
+}
diff --git a/core/shared/platform/rt-thread/rtt_thread.c b/core/shared/platform/rt-thread/rtt_thread.c
new file mode 100644
index 000000000..5f988fad0
--- /dev/null
+++ b/core/shared/platform/rt-thread/rtt_thread.c
@@ -0,0 +1,427 @@
+/*
+ * Copyright 2024 Sony Semiconductor Solutions Corporation.
+ *
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#include "platform_api_vmcore.h"
+#include "platform_api_extension.h"
+
+#include <errno.h>
+#include <stdio.h>
+#include <stddef.h>
+#include <fcntl.h>
+#include <stdint.h>
+
+struct os_thread_data;
+typedef struct os_thread_wait_node *os_thread_wait_list;
+typedef struct os_thread_wait_node {
+    /* Binary semaphore */
+    rt_sem_t sem;
+    os_thread_wait_list next;
+} os_thread_wait_node;
+
+typedef struct os_thread_data {
+    /* Next thread data */
+    struct os_thread_data *next;
+    /* Thread handle */
+    rt_thread_t handle;
+    /* Thread start routine */
+    thread_start_routine_t start_routine;
+    /* Thread start routine argument */
+    void *arg;
+    /* Wait node of current thread */
+    os_thread_wait_node wait_node;
+    /* Lock for waiting list */
+    rt_mutex_t wait_list_lock;
+    /* Waiting list of other threads who are joining this thread */
+    os_thread_wait_list thread_wait_list;
+} os_thread_data;
+
+/* Lock for thread data list */
+static rt_mutex_t thread_data_lock;
+
+static bool is_thread_sys_inited = false;
+
+/* Thread data list */
+static os_thread_data *thread_data_list = NULL;
+
+/* Thread data of supervisor thread */
+static os_thread_data supervisor_thread_data;
+
+/* Thread name index */
+static int thread_name_index = 0;
+
+static void
+thread_data_list_add(os_thread_data *thread_data)
+{
+    rt_mutex_take(thread_data_lock, RT_WAITING_FOREVER);
+    if (!thread_data_list)
+        thread_data_list = thread_data;
+    else {
+        /* If already in list, just return */
+        os_thread_data *p = thread_data_list;
+        while (p) {
+            if (p == thread_data) {
+                rt_mutex_release(thread_data_lock);
+                return;
+            }
+            p = p->next;
+        }
+
+        /* Set as head of list */
+        thread_data->next = thread_data_list;
+        thread_data_list = thread_data;
+    }
+    rt_mutex_release(thread_data_lock);
+}
+
+static void
+os_thread_wrapper(void *arg)
+{
+    os_thread_data *thread_data = arg;
+
+    thread_data->handle = rt_thread_self();
+    thread_data_list_add(thread_data);
+
+    thread_data->start_routine(thread_data->arg);
+    rt_kprintf("start_routine quit\n");
+    os_thread_exit(NULL);
+}
+
+static void
+thread_data_list_remove(os_thread_data *thread_data)
+{
+    rt_mutex_take(thread_data_lock, RT_WAITING_FOREVER);
+    if (thread_data_list) {
+        if (thread_data_list == thread_data)
+            thread_data_list = thread_data_list->next;
+        else {
+            /* Search and remove it from list */
+            os_thread_data *p = thread_data_list;
+            while (p && p->next != thread_data)
+                p = p->next;
+            if (p && p->next == thread_data)
+                p->next = p->next->next;
+        }
+    }
+    rt_mutex_release(thread_data_lock);
+}
+
+static os_thread_data *
+thread_data_list_lookup(rt_thread_t handle)
+{
+    rt_mutex_take(thread_data_lock, RT_WAITING_FOREVER);
+    if (thread_data_list) {
+        os_thread_data *p = thread_data_list;
+        while (p) {
+            if (p->handle == handle) {
+                /* Found */
+                rt_mutex_release(thread_data_lock);
+                return p;
+            }
+            p = p->next;
+        }
+    }
+    rt_mutex_release(thread_data_lock);
+    return NULL;
+}
+
+static os_thread_data *
+thread_data_current()
+{
+    rt_thread_t handle = rt_thread_self();
+    return thread_data_list_lookup(handle);
+}
+
+int
+os_thread_sys_init()
+{
+    if (is_thread_sys_inited)
+        return BHT_OK;
+
+    if (!(thread_data_lock =
+              rt_mutex_create("thread_data_lock_mutex", RT_IPC_FLAG_FIFO)))
+        return BHT_ERROR;
+
+    /* Initialize supervisor thread data */
+    memset(&supervisor_thread_data, 0, sizeof(supervisor_thread_data));
+
+    if (!(supervisor_thread_data.wait_node.sem =
+              rt_sem_create("spvr", 0, RT_IPC_FLAG_PRIO))) {
+        rt_mutex_delete(thread_data_lock);
+        return BHT_ERROR;
+    }
+
+    supervisor_thread_data.handle = rt_thread_self();
+    /* Set as head of thread data list */
+    thread_data_list = &supervisor_thread_data;
+
+    is_thread_sys_inited = true;
+    return BHT_OK;
+}
+
+void
+os_thread_sys_destroy()
+{
+    if (is_thread_sys_inited) {
+        rt_sem_release(supervisor_thread_data.wait_node.sem);
+        rt_mutex_delete(thread_data_lock);
+        is_thread_sys_inited = false;
+    }
+}
+
+korp_tid
+os_self_thread(void)
+{
+    return rt_thread_self();
+}
+
+uint8 *
+os_thread_get_stack_boundary(void)
+{
+    rt_thread_t tid = rt_thread_self();
+    return tid->stack_addr;
+}
+
+void
+os_thread_jit_write_protect_np(bool enabled)
+{}
+
+int
+os_mutex_init(korp_mutex *mutex)
+{
+    return rt_mutex_init(mutex, "wamr0", RT_IPC_FLAG_FIFO);
+}
+
+int
+os_mutex_destroy(korp_mutex *mutex)
+{
+    return rt_mutex_detach(mutex);
+}
+
+int
+os_mutex_lock(korp_mutex *mutex)
+{
+    return rt_mutex_take(mutex, RT_WAITING_FOREVER);
+}
+
+int
+os_mutex_unlock(korp_mutex *mutex)
+{
+    return rt_mutex_release(mutex);
+}
+
+/*
+ * functions below was not implement
+ */
+
+int
+os_cond_init(korp_cond *cond)
+{
+    return 0;
+}
+
+int
+os_cond_destroy(korp_cond *cond)
+{
+    return 0;
+}
+
+int
+os_cond_wait(korp_cond *cond, korp_mutex *mutex)
+{
+    return 0;
+}
+
+int
+os_cond_signal(korp_cond *cond)
+{
+    return 0;
+}
+
+int
+os_cond_reltimedwait(korp_cond *cond, korp_mutex *mutex, uint64 useconds)
+{
+    return 0;
+}
+
+int
+os_rwlock_init(korp_rwlock *lock)
+{
+    return BHT_OK;
+}
+
+int
+os_rwlock_rdlock(korp_rwlock *lock)
+{
+
+    return BHT_OK;
+}
+
+int
+os_rwlock_wrlock(korp_rwlock *lock)
+{
+
+    return BHT_OK;
+}
+
+int
+os_rwlock_unlock(korp_rwlock *lock)
+{
+    return BHT_OK;
+}
+
+int
+os_rwlock_destroy(korp_rwlock *lock)
+{
+    return BHT_OK;
+}
+
+int
+os_thread_create_with_prio(korp_tid *p_tid, thread_start_routine_t start,
+                           void *arg, unsigned int stack_size, int prio)
+{
+    os_thread_data *thread_data;
+    char thread_name[32];
+    void *stack;
+
+    if (!p_tid || !stack_size)
+        return BHT_ERROR;
+
+    /* Create and initialize thread data */
+    if (!(thread_data = rt_malloc(sizeof(os_thread_data))))
+        return BHT_ERROR;
+
+    memset(thread_data, 0, sizeof(os_thread_data));
+
+    thread_data->start_routine = start;
+    thread_data->arg = arg;
+
+    if (!(thread_data->wait_node.sem =
+              rt_sem_create("sem", 0, RT_IPC_FLAG_PRIO)))
+        goto fail1;
+
+    if (!(thread_data->wait_list_lock =
+              rt_mutex_create("wait_list_lock_mutex", RT_IPC_FLAG_FIFO)))
+        goto fail2;
+
+    snprintf(thread_name, sizeof(thread_name), "%s%d", "wasm-thread-",
+             ++thread_name_index);
+
+    thread_data->handle = rt_thread_create(thread_name, os_thread_wrapper,
+                                           thread_data, stack_size, 15, 5);
+    if (thread_data->handle == RT_NULL) {
+        rt_kprintf("os_thread_create_with_prio failed, tid=%d\n",
+                   thread_data->handle);
+        goto fail3;
+    }
+
+    thread_data_list_add(thread_data);
+    *p_tid = thread_data->handle;
+    rt_thread_startup(*p_tid);
+    return BHT_OK;
+
+fail3:
+    rt_mutex_delete(thread_data->wait_list_lock);
+fail2:
+    rt_sem_delete(thread_data->wait_node.sem);
+fail1:
+    rt_free(thread_data);
+    return BHT_ERROR;
+}
+
+int
+os_thread_create(korp_tid *p_tid, thread_start_routine_t start, void *arg,
+                 unsigned int stack_size)
+{
+    return os_thread_create_with_prio(p_tid, start, arg, stack_size,
+                                      BH_THREAD_DEFAULT_PRIORITY);
+}
+
+int
+os_thread_detach(korp_tid thread)
+{
+    /* Do nothing */
+    (void)thread;
+    return BHT_OK;
+}
+
+int
+os_thread_join(korp_tid thread, void **value_ptr)
+{
+    os_thread_data *thread_data, *curr_thread_data;
+    rt_thread_t handle = thread;
+
+    (void)value_ptr;
+
+    /* Get thread data of current thread */
+    curr_thread_data = thread_data_current();
+    curr_thread_data->wait_node.next = NULL;
+
+    /* Get thread data */
+    thread_data = thread_data_list_lookup(handle);
+
+    rt_mutex_take(thread_data->wait_list_lock, RT_WAITING_FOREVER);
+    if (!thread_data->thread_wait_list)
+        thread_data->thread_wait_list = &curr_thread_data->wait_node;
+    else {
+        /* Add to end of waiting list */
+        os_thread_wait_node *p = thread_data->thread_wait_list;
+        while (p->next)
+            p = p->next;
+        p->next = &curr_thread_data->wait_node;
+    }
+    rt_mutex_release(thread_data->wait_list_lock);
+
+    /* Wait the sem */
+    rt_sem_take(curr_thread_data->wait_node.sem, RT_WAITING_FOREVER);
+    return BHT_OK;
+}
+
+static void
+os_thread_cleanup(void)
+{
+    os_thread_data *thread_data = thread_data_current();
+    os_thread_wait_list thread_wait_list;
+    rt_mutex_t wait_list_lock;
+    rt_sem_t wait_node_sem;
+
+    // bh_assert(thread_data != NULL);
+    wait_list_lock = thread_data->wait_list_lock;
+    thread_wait_list = thread_data->thread_wait_list;
+    wait_node_sem = thread_data->wait_node.sem;
+
+    rt_mutex_take(wait_list_lock, RT_WAITING_FOREVER);
+    if (thread_wait_list) {
+        /* Signal each joining thread */
+        os_thread_wait_list head = thread_wait_list;
+        while (head) {
+            os_thread_wait_list next = head->next;
+            rt_sem_release(head->sem);
+            head = next;
+        }
+    }
+    rt_mutex_release(wait_list_lock);
+
+    /* Free sem and lock */
+    rt_sem_delete(wait_node_sem);
+    rt_mutex_delete(wait_list_lock);
+
+    thread_data_list_remove(thread_data);
+    rt_free(thread_data);
+}
+
+void
+os_thread_exit(void *retval)
+{
+    (void)retval;
+    os_thread_cleanup();
+    // vTaskDelete(NULL);
+}
+
+int
+os_thread_kill(korp_tid tid, int sig)
+{
+    return rt_thread_kill(tid, sig);
+}
diff --git a/core/shared/platform/windows/win_file.c b/core/shared/platform/windows/win_file.c
index 63dfb5b5f..408d0d00c 100644
--- a/core/shared/platform/windows/win_file.c
+++ b/core/shared/platform/windows/win_file.c
@@ -1540,6 +1540,24 @@ create_stdio_handle(HANDLE raw_stdio_handle, DWORD stdio)
     return stdio_handle;
 }
 
+bool
+os_is_stdin_handle(os_file_handle fd)
+{
+    return fd->raw.handle == GetStdHandle(STD_INPUT_HANDLE);
+}
+
+bool
+os_is_stdout_handle(os_file_handle fd)
+{
+    return fd->raw.handle == GetStdHandle(STD_OUTPUT_HANDLE);
+}
+
+bool
+os_is_stderr_handle(os_file_handle fd)
+{
+    return fd->raw.handle == GetStdHandle(STD_ERROR_HANDLE);
+}
+
 os_file_handle
 os_convert_stdin_handle(os_raw_file_handle raw_stdin)
 {
diff --git a/core/shared/platform/zephyr/platform_internal.h b/core/shared/platform/zephyr/platform_internal.h
index 3c0f55266..fa8fbd861 100644
--- a/core/shared/platform/zephyr/platform_internal.h
+++ b/core/shared/platform/zephyr/platform_internal.h
@@ -1,5 +1,6 @@
 /*
  * Copyright (C) 2019 Intel Corporation.  All rights reserved.
+ * SPDX-FileCopyrightText: 2024 Siemens AG (For Zephyr usermode changes)
  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  */
 
@@ -18,7 +19,6 @@
 #include <misc/printk.h>
 #endif
 #else /* else of KERNEL_VERSION_NUMBER < 0x030200 */
-#include <zephyr/kernel.h>
 #include <zephyr/sys/printk.h>
 #endif /* end of KERNEL_VERSION_NUMBER < 0x030200 */
 
@@ -37,12 +37,14 @@
 #endif
 
 #if KERNEL_VERSION_NUMBER < 0x030200 /* version 3.2.0 */
+#include <zephyr.h>
 #include <net/net_pkt.h>
 #include <net/net_if.h>
 #include <net/net_ip.h>
 #include <net/net_core.h>
 #include <net/net_context.h>
 #else /* else of KERNEL_VERSION_NUMBER < 0x030200 */
+#include <zephyr/kernel.h>
 #include <zephyr/net/net_pkt.h>
 #include <zephyr/net/net_if.h>
 #include <zephyr/net/net_ip.h>
@@ -50,6 +52,11 @@
 #include <zephyr/net/net_context.h>
 #endif /* end of KERNEL_VERSION_NUMBER < 0x030200 */
 
+#ifdef CONFIG_USERSPACE
+#include <zephyr/sys/mutex.h>
+#include <zephyr/sys/sem.h>
+#endif /* end of CONFIG_USERSPACE */
+
 #if KERNEL_VERSION_NUMBER >= 0x030300 /* version 3.3.0 */
 #include <zephyr/cache.h>
 #endif /* end of KERNEL_VERSION_NUMBER > 0x030300 */
@@ -64,10 +71,39 @@
 #endif
 #endif
 
+#ifdef signbit /* probably since Zephyr v3.5.0 a new picolib is included */
+#define BH_HAS_SIGNBIT 1
+#endif
+
 #ifndef BH_PLATFORM_ZEPHYR
 #define BH_PLATFORM_ZEPHYR
 #endif
 
+// Synchronization primitives for usermode
+#ifdef CONFIG_USERSPACE
+#define mutex_t struct sys_mutex
+#define mutex_init(mtx) sys_mutex_init(mtx)
+#define mutex_lock(mtx, timeout) sys_mutex_lock(mtx, timeout)
+#define mutex_unlock(mtx) sys_mutex_unlock(mtx)
+
+#define sem_t struct sys_sem
+#define sem_init(sem, init_count, limit) sys_sem_init(sem, init_count, limit)
+#define sem_give(sem) sys_sem_give(sem)
+#define sem_take(sem, timeout) sys_sem_take(sem, timeout)
+#define sem_count_get(sem) sys_sem_count_get(sem)
+#else /* else of CONFIG_USERSPACE */
+#define mutex_t struct k_mutex
+#define mutex_init(mtx) k_mutex_init(mtx)
+#define mutex_lock(mtx, timeout) k_mutex_lock(mtx, timeout)
+#define mutex_unlock(mtx) k_mutex_unlock(mtx)
+
+#define sem_t struct k_sem
+#define sem_init(sem, init_count, limit) k_sem_init(sem, init_count, limit)
+#define sem_give(sem) k_sem_give(sem)
+#define sem_take(sem, timeout) k_sem_take(sem, timeout)
+#define sem_count_get(sem) k_sem_count_get(sem)
+#endif /* end of CONFIG_USERSPACE */
+
 #define BH_APPLET_PRESERVED_STACK_SIZE (2 * BH_KB)
 
 /* Default thread priority */
@@ -75,7 +111,7 @@
 
 typedef struct k_thread korp_thread;
 typedef korp_thread *korp_tid;
-typedef struct k_mutex korp_mutex;
+typedef mutex_t korp_mutex;
 typedef unsigned int korp_sem;
 
 /* korp_rwlock is used in platform_api_extension.h,
@@ -87,7 +123,7 @@ typedef struct {
 struct os_thread_wait_node;
 typedef struct os_thread_wait_node *os_thread_wait_list;
 typedef struct korp_cond {
-    struct k_mutex wait_list_lock;
+    mutex_t wait_list_lock;
     os_thread_wait_list thread_wait_list;
 } korp_cond;
 
@@ -120,11 +156,14 @@ float fmaxf(float x, float y);
 float rintf(float x);
 float fabsf(float x);
 float truncf(float x);
-int signbit(double x);
 int isnan(double x);
 double pow(double x, double y);
 double scalbn(double x, int n);
 
+#ifndef BH_HAS_SIGNBIT
+int signbit(double x);
+#endif
+
 unsigned long long int strtoull(const char *nptr, char **endptr, int base);
 double strtod(const char *nptr, char **endptr);
 float strtof(const char *nptr, char **endptr);
diff --git a/core/shared/platform/zephyr/zephyr_platform.c b/core/shared/platform/zephyr/zephyr_platform.c
index 490de34d8..b383def67 100644
--- a/core/shared/platform/zephyr/zephyr_platform.c
+++ b/core/shared/platform/zephyr/zephyr_platform.c
@@ -1,5 +1,6 @@
 /*
  * Copyright (C) 2019 Intel Corporation.  All rights reserved.
+ * SPDX-FileCopyrightText: 2024 Siemens AG (For Zephyr usermode changes)
  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  */
 
@@ -35,12 +36,14 @@ disable_mpu_rasr_xn(void)
 #endif /* end of CONFIG_ARM_MPU */
 #endif
 
+#ifndef CONFIG_USERSPACE
 static int
 _stdout_hook_iwasm(int c)
 {
     printk("%c", (char)c);
     return 1;
 }
+#endif
 
 int
 os_thread_sys_init();
@@ -51,9 +54,11 @@ os_thread_sys_destroy();
 int
 bh_platform_init()
 {
+#ifndef CONFIG_USERSPACE
     extern void __stdout_hook_install(int (*hook)(int));
     /* Enable printf() in Zephyr */
     __stdout_hook_install(_stdout_hook_iwasm);
+#endif
 
 #if WASM_ENABLE_AOT != 0
 #ifdef CONFIG_ARM_MPU
diff --git a/core/shared/platform/zephyr/zephyr_thread.c b/core/shared/platform/zephyr/zephyr_thread.c
index 53ca71f62..628a842d6 100644
--- a/core/shared/platform/zephyr/zephyr_thread.c
+++ b/core/shared/platform/zephyr/zephyr_thread.c
@@ -1,5 +1,6 @@
 /*
  * Copyright (C) 2019 Intel Corporation.  All rights reserved.
+ * SPDX-FileCopyrightText: 2024 Siemens AG (For Zephyr usermode changes)
  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  */
 
@@ -33,22 +34,22 @@
 static K_THREAD_STACK_ARRAY_DEFINE(mpu_stacks, BH_ZEPHYR_MPU_STACK_COUNT,
                                    BH_ZEPHYR_MPU_STACK_SIZE);
 static bool mpu_stack_allocated[BH_ZEPHYR_MPU_STACK_COUNT];
-static struct k_mutex mpu_stack_lock;
+static mutex_t mpu_stack_lock;
 
 static char *
 mpu_stack_alloc()
 {
     int i;
 
-    k_mutex_lock(&mpu_stack_lock, K_FOREVER);
+    mutex_lock(&mpu_stack_lock, K_FOREVER);
     for (i = 0; i < BH_ZEPHYR_MPU_STACK_COUNT; i++) {
         if (!mpu_stack_allocated[i]) {
             mpu_stack_allocated[i] = true;
-            k_mutex_unlock(&mpu_stack_lock);
+            mutex_unlock(&mpu_stack_lock);
             return (char *)mpu_stacks[i];
         }
     }
-    k_mutex_unlock(&mpu_stack_lock);
+    mutex_unlock(&mpu_stack_lock);
     return NULL;
 }
 
@@ -57,17 +58,17 @@ mpu_stack_free(char *stack)
 {
     int i;
 
-    k_mutex_lock(&mpu_stack_lock, K_FOREVER);
+    mutex_lock(&mpu_stack_lock, K_FOREVER);
     for (i = 0; i < BH_ZEPHYR_MPU_STACK_COUNT; i++) {
         if ((char *)mpu_stacks[i] == stack)
             mpu_stack_allocated[i] = false;
     }
-    k_mutex_unlock(&mpu_stack_lock);
+    mutex_unlock(&mpu_stack_lock);
 }
 #endif
 
 typedef struct os_thread_wait_node {
-    struct k_sem sem;
+    sem_t sem;
     os_thread_wait_list next;
 } os_thread_wait_node;
 
@@ -79,7 +80,7 @@ typedef struct os_thread_data {
     /* Jeff thread local root */
     void *tlr;
     /* Lock for waiting list */
-    struct k_mutex wait_list_lock;
+    mutex_t wait_list_lock;
     /* Waiting list of other threads who are joining this thread */
     os_thread_wait_list thread_wait_list;
     /* Thread stack size */
@@ -106,13 +107,13 @@ static bool is_thread_sys_inited = false;
 static os_thread_data supervisor_thread_data;
 
 /* Lock for thread data list */
-static struct k_mutex thread_data_lock;
+static mutex_t thread_data_lock;
 
 /* Thread data list */
 static os_thread_data *thread_data_list = NULL;
 
 /* Lock for thread object list */
-static struct k_mutex thread_obj_lock;
+static mutex_t thread_obj_lock;
 
 /* Thread object list */
 static os_thread_obj *thread_obj_list = NULL;
@@ -120,7 +121,7 @@ static os_thread_obj *thread_obj_list = NULL;
 static void
 thread_data_list_add(os_thread_data *thread_data)
 {
-    k_mutex_lock(&thread_data_lock, K_FOREVER);
+    mutex_lock(&thread_data_lock, K_FOREVER);
     if (!thread_data_list)
         thread_data_list = thread_data;
     else {
@@ -128,7 +129,7 @@ thread_data_list_add(os_thread_data *thread_data)
         os_thread_data *p = thread_data_list;
         while (p) {
             if (p == thread_data) {
-                k_mutex_unlock(&thread_data_lock);
+                mutex_unlock(&thread_data_lock);
                 return;
             }
             p = p->next;
@@ -138,13 +139,13 @@ thread_data_list_add(os_thread_data *thread_data)
         thread_data->next = thread_data_list;
         thread_data_list = thread_data;
     }
-    k_mutex_unlock(&thread_data_lock);
+    mutex_unlock(&thread_data_lock);
 }
 
 static void
 thread_data_list_remove(os_thread_data *thread_data)
 {
-    k_mutex_lock(&thread_data_lock, K_FOREVER);
+    mutex_lock(&thread_data_lock, K_FOREVER);
     if (thread_data_list) {
         if (thread_data_list == thread_data)
             thread_data_list = thread_data_list->next;
@@ -157,32 +158,32 @@ thread_data_list_remove(os_thread_data *thread_data)
                 p->next = p->next->next;
         }
     }
-    k_mutex_unlock(&thread_data_lock);
+    mutex_unlock(&thread_data_lock);
 }
 
 static os_thread_data *
 thread_data_list_lookup(k_tid_t tid)
 {
-    k_mutex_lock(&thread_data_lock, K_FOREVER);
+    mutex_lock(&thread_data_lock, K_FOREVER);
     if (thread_data_list) {
         os_thread_data *p = thread_data_list;
         while (p) {
             if (p->tid == tid) {
                 /* Found */
-                k_mutex_unlock(&thread_data_lock);
+                mutex_unlock(&thread_data_lock);
                 return p;
             }
             p = p->next;
         }
     }
-    k_mutex_unlock(&thread_data_lock);
+    mutex_unlock(&thread_data_lock);
     return NULL;
 }
 
 static void
 thread_obj_list_add(os_thread_obj *thread_obj)
 {
-    k_mutex_lock(&thread_obj_lock, K_FOREVER);
+    mutex_lock(&thread_obj_lock, K_FOREVER);
     if (!thread_obj_list)
         thread_obj_list = thread_obj;
     else {
@@ -190,14 +191,14 @@ thread_obj_list_add(os_thread_obj *thread_obj)
         thread_obj->next = thread_obj_list;
         thread_obj_list = thread_obj;
     }
-    k_mutex_unlock(&thread_obj_lock);
+    mutex_unlock(&thread_obj_lock);
 }
 
 static void
 thread_obj_list_reclaim()
 {
     os_thread_obj *p, *p_prev;
-    k_mutex_lock(&thread_obj_lock, K_FOREVER);
+    mutex_lock(&thread_obj_lock, K_FOREVER);
     p_prev = NULL;
     p = thread_obj_list;
     while (p) {
@@ -218,7 +219,7 @@ thread_obj_list_reclaim()
             p = p->next;
         }
     }
-    k_mutex_unlock(&thread_obj_lock);
+    mutex_unlock(&thread_obj_lock);
 }
 
 int
@@ -228,10 +229,10 @@ os_thread_sys_init()
         return BHT_OK;
 
 #if BH_ENABLE_ZEPHYR_MPU_STACK != 0
-    k_mutex_init(&mpu_stack_lock);
+    mutex_init(&mpu_stack_lock);
 #endif
-    k_mutex_init(&thread_data_lock);
-    k_mutex_init(&thread_obj_lock);
+    mutex_init(&thread_data_lock);
+    mutex_init(&thread_obj_lock);
 
     /* Initialize supervisor thread data */
     memset(&supervisor_thread_data, 0, sizeof(supervisor_thread_data));
@@ -264,19 +265,19 @@ os_thread_cleanup(void)
     os_thread_data *thread_data = thread_data_current();
 
     bh_assert(thread_data != NULL);
-    k_mutex_lock(&thread_data->wait_list_lock, K_FOREVER);
+    mutex_lock(&thread_data->wait_list_lock, K_FOREVER);
     if (thread_data->thread_wait_list) {
         /* Signal each joining thread */
         os_thread_wait_list head = thread_data->thread_wait_list;
         while (head) {
             os_thread_wait_list next = head->next;
-            k_sem_give(&head->sem);
+            sem_give(&head->sem);
             /* head will be freed by joining thread */
             head = next;
         }
         thread_data->thread_wait_list = NULL;
     }
-    k_mutex_unlock(&thread_data->wait_list_lock);
+    mutex_unlock(&thread_data->wait_list_lock);
 
     thread_data_list_remove(thread_data);
     /* Set flag to true for the next thread creating to
@@ -341,7 +342,7 @@ os_thread_create_with_prio(korp_tid *p_tid, thread_start_routine_t start,
     }
 
     memset(thread_data, 0, thread_data_size);
-    k_mutex_init(&thread_data->wait_list_lock);
+    mutex_init(&thread_data->wait_list_lock);
     thread_data->stack_size = stack_size;
     thread_data->tid = tid;
 
@@ -360,6 +361,8 @@ os_thread_create_with_prio(korp_tid *p_tid, thread_start_routine_t start,
 
     bh_assert(tid == thread_data->tid);
 
+    k_thread_name_set(tid, "wasm-zephyr");
+
     /* Set thread custom data */
     thread_data_list_add(thread_data);
     thread_obj_list_add((os_thread_obj *)tid);
@@ -394,14 +397,14 @@ os_thread_join(korp_tid thread, void **value_ptr)
     if (!(node = BH_MALLOC(sizeof(os_thread_wait_node))))
         return BHT_ERROR;
 
-    k_sem_init(&node->sem, 0, 1);
+    sem_init(&node->sem, 0, 1);
     node->next = NULL;
 
     /* Get thread data */
     thread_data = thread_data_list_lookup(thread);
     bh_assert(thread_data != NULL);
 
-    k_mutex_lock(&thread_data->wait_list_lock, K_FOREVER);
+    mutex_lock(&thread_data->wait_list_lock, K_FOREVER);
     if (!thread_data->thread_wait_list)
         thread_data->thread_wait_list = node;
     else {
@@ -411,10 +414,10 @@ os_thread_join(korp_tid thread, void **value_ptr)
             p = p->next;
         p->next = node;
     }
-    k_mutex_unlock(&thread_data->wait_list_lock);
+    mutex_unlock(&thread_data->wait_list_lock);
 
     /* Wait the sem */
-    k_sem_take(&node->sem, K_FOREVER);
+    sem_take(&node->sem, K_FOREVER);
 
     /* Wait some time for the thread to be actually terminated */
     k_sleep(Z_TIMEOUT_MS(100));
@@ -427,14 +430,14 @@ os_thread_join(korp_tid thread, void **value_ptr)
 int
 os_mutex_init(korp_mutex *mutex)
 {
-    k_mutex_init(mutex);
+    mutex_init(mutex);
     return BHT_OK;
 }
 
 int
 os_recursive_mutex_init(korp_mutex *mutex)
 {
-    k_mutex_init(mutex);
+    mutex_init(mutex);
     return BHT_OK;
 }
 
@@ -448,16 +451,16 @@ os_mutex_destroy(korp_mutex *mutex)
 int
 os_mutex_lock(korp_mutex *mutex)
 {
-    return k_mutex_lock(mutex, K_FOREVER);
+    return mutex_lock(mutex, K_FOREVER);
 }
 
 int
 os_mutex_unlock(korp_mutex *mutex)
 {
 #if KERNEL_VERSION_NUMBER >= 0x020200 /* version 2.2.0 */
-    return k_mutex_unlock(mutex);
+    return mutex_unlock(mutex);
 #else
-    k_mutex_unlock(mutex);
+    mutex_unlock(mutex);
     return 0;
 #endif
 }
@@ -465,7 +468,7 @@ os_mutex_unlock(korp_mutex *mutex)
 int
 os_cond_init(korp_cond *cond)
 {
-    k_mutex_init(&cond->wait_list_lock);
+    mutex_init(&cond->wait_list_lock);
     cond->thread_wait_list = NULL;
     return BHT_OK;
 }
@@ -486,10 +489,10 @@ os_cond_wait_internal(korp_cond *cond, korp_mutex *mutex, bool timed, int mills)
     if (!(node = BH_MALLOC(sizeof(os_thread_wait_node))))
         return BHT_ERROR;
 
-    k_sem_init(&node->sem, 0, 1);
+    sem_init(&node->sem, 0, 1);
     node->next = NULL;
 
-    k_mutex_lock(&cond->wait_list_lock, K_FOREVER);
+    mutex_lock(&cond->wait_list_lock, K_FOREVER);
     if (!cond->thread_wait_list)
         cond->thread_wait_list = node;
     else {
@@ -499,15 +502,15 @@ os_cond_wait_internal(korp_cond *cond, korp_mutex *mutex, bool timed, int mills)
             p = p->next;
         p->next = node;
     }
-    k_mutex_unlock(&cond->wait_list_lock);
+    mutex_unlock(&cond->wait_list_lock);
 
     /* Unlock mutex, wait sem and lock mutex again */
-    k_mutex_unlock(mutex);
-    k_sem_take(&node->sem, timed ? Z_TIMEOUT_MS(mills) : K_FOREVER);
-    k_mutex_lock(mutex, K_FOREVER);
+    mutex_unlock(mutex);
+    sem_take(&node->sem, timed ? Z_TIMEOUT_MS(mills) : K_FOREVER);
+    mutex_lock(mutex, K_FOREVER);
 
     /* Remove wait node from wait list */
-    k_mutex_lock(&cond->wait_list_lock, K_FOREVER);
+    mutex_lock(&cond->wait_list_lock, K_FOREVER);
     if (cond->thread_wait_list == node)
         cond->thread_wait_list = node->next;
     else {
@@ -518,7 +521,7 @@ os_cond_wait_internal(korp_cond *cond, korp_mutex *mutex, bool timed, int mills)
         p->next = node->next;
     }
     BH_FREE(node);
-    k_mutex_unlock(&cond->wait_list_lock);
+    mutex_unlock(&cond->wait_list_lock);
 
     return BHT_OK;
 }
@@ -556,10 +559,10 @@ int
 os_cond_signal(korp_cond *cond)
 {
     /* Signal the head wait node of wait list */
-    k_mutex_lock(&cond->wait_list_lock, K_FOREVER);
+    mutex_lock(&cond->wait_list_lock, K_FOREVER);
     if (cond->thread_wait_list)
-        k_sem_give(&cond->thread_wait_list->sem);
-    k_mutex_unlock(&cond->wait_list_lock);
+        sem_give(&cond->thread_wait_list->sem);
+    mutex_unlock(&cond->wait_list_lock);
 
     return BHT_OK;
 }
@@ -567,7 +570,7 @@ os_cond_signal(korp_cond *cond)
 uint8 *
 os_thread_get_stack_boundary()
 {
-#if defined(CONFIG_THREAD_STACK_INFO)
+#if defined(CONFIG_THREAD_STACK_INFO) && !defined(CONFIG_USERSPACE)
     korp_tid thread = k_current_get();
     return (uint8 *)thread->stack_info.start;
 #else
@@ -598,13 +601,13 @@ int
 os_cond_broadcast(korp_cond *cond)
 {
     os_thread_wait_node *node;
-    k_mutex_lock(&cond->wait_list_lock, K_FOREVER);
+    mutex_lock(&cond->wait_list_lock, K_FOREVER);
     node = cond->thread_wait_list;
     while (node) {
         os_thread_wait_node *next = node->next;
-        k_sem_give(&node->sem);
+        sem_give(&node->sem);
         node = next;
     }
-    k_mutex_unlock(&cond->wait_list_lock);
+    mutex_unlock(&cond->wait_list_lock);
     return BHT_OK;
 }
diff --git a/core/shared/utils/bh_atomic.h b/core/shared/utils/bh_atomic.h
index 4f7d9bc83..5148497f8 100644
--- a/core/shared/utils/bh_atomic.h
+++ b/core/shared/utils/bh_atomic.h
@@ -6,6 +6,7 @@
 #ifndef _BH_ATOMIC_H
 #define _BH_ATOMIC_H
 
+#include "bh_platform.h"
 #include "gnuc.h"
 
 #ifdef __cplusplus
diff --git a/core/shared/utils/bh_leb128.c b/core/shared/utils/bh_leb128.c
new file mode 100644
index 000000000..8e4b13dce
--- /dev/null
+++ b/core/shared/utils/bh_leb128.c
@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) 2019 Intel Corporation.  All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#include "bh_leb128.h"
+
+bh_leb_read_status_t
+bh_leb_read(const uint8 *buf, const uint8 *buf_end, uint32 maxbits, bool sign,
+            uint64 *p_result, size_t *p_offset)
+{
+    uint64 result = 0;
+    uint32 shift = 0;
+    uint32 offset = 0, bcnt = 0;
+    uint64 byte;
+
+    while (true) {
+        /* uN or SN must not exceed ceil(N/7) bytes */
+        if (bcnt + 1 > (maxbits + 6) / 7) {
+            return BH_LEB_READ_TOO_LONG;
+        }
+
+        if ((uintptr_t)buf + offset + 1 < (uintptr_t)buf
+            || (uintptr_t)buf + offset + 1 > (uintptr_t)buf_end) {
+            return BH_LEB_READ_UNEXPECTED_END;
+        }
+        byte = buf[offset];
+        offset += 1;
+        result |= ((byte & 0x7f) << shift);
+        shift += 7;
+        bcnt += 1;
+        if ((byte & 0x80) == 0) {
+            break;
+        }
+    }
+
+    if (!sign && maxbits == 32 && shift >= maxbits) {
+        /* The top bits set represent values > 32 bits */
+        if (((uint8)byte) & 0xf0)
+            return BH_LEB_READ_OVERFLOW;
+    }
+    else if (sign && maxbits == 32) {
+        if (shift < maxbits) {
+            /* Sign extend, second-highest bit is the sign bit */
+            if ((uint8)byte & 0x40)
+                result |= (~((uint64)0)) << shift;
+        }
+        else {
+            /* The top bits should be a sign-extension of the sign bit */
+            bool sign_bit_set = ((uint8)byte) & 0x8;
+            int top_bits = ((uint8)byte) & 0xf0;
+            if ((sign_bit_set && top_bits != 0x70)
+                || (!sign_bit_set && top_bits != 0))
+                return BH_LEB_READ_OVERFLOW;
+        }
+    }
+    else if (sign && maxbits == 64) {
+        if (shift < maxbits) {
+            /* Sign extend, second-highest bit is the sign bit */
+            if ((uint8)byte & 0x40)
+                result |= (~((uint64)0)) << shift;
+        }
+        else {
+            /* The top bits should be a sign-extension of the sign bit */
+            bool sign_bit_set = ((uint8)byte) & 0x1;
+            int top_bits = ((uint8)byte) & 0xfe;
+
+            if ((sign_bit_set && top_bits != 0x7e)
+                || (!sign_bit_set && top_bits != 0))
+                return BH_LEB_READ_OVERFLOW;
+        }
+    }
+
+    *p_offset = offset;
+    *p_result = result;
+    return BH_LEB_READ_SUCCESS;
+}
\ No newline at end of file
diff --git a/core/shared/utils/bh_leb128.h b/core/shared/utils/bh_leb128.h
new file mode 100644
index 000000000..ce73b4b88
--- /dev/null
+++ b/core/shared/utils/bh_leb128.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef _BH_LEB128_H
+#define _BH_LEB128_H
+
+#include "bh_platform.h"
+
+typedef enum {
+    BH_LEB_READ_SUCCESS,
+    BH_LEB_READ_TOO_LONG,
+    BH_LEB_READ_OVERFLOW,
+    BH_LEB_READ_UNEXPECTED_END,
+} bh_leb_read_status_t;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+bh_leb_read_status_t
+bh_leb_read(const uint8 *buf, const uint8 *buf_end, uint32 maxbits, bool sign,
+            uint64 *p_result, size_t *p_offset);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
\ No newline at end of file
diff --git a/core/version.h b/core/version.h
index a1626f83c..4e2cbaefc 100644
--- a/core/version.h
+++ b/core/version.h
@@ -7,5 +7,5 @@
 #define _WAMR_VERSION_H_
 #define WAMR_VERSION_MAJOR 2
 #define WAMR_VERSION_MINOR 1
-#define WAMR_VERSION_PATCH 1
+#define WAMR_VERSION_PATCH 2
 #endif
diff --git a/doc/build_wamr.md b/doc/build_wamr.md
index 181f32ca0..2adb17f6a 100644
--- a/doc/build_wamr.md
+++ b/doc/build_wamr.md
@@ -28,7 +28,7 @@ The script `runtime_lib.cmake` defines a number of variables for configuring the
   - For ARM and THUMB, the format is \<arch>\[\<sub-arch>]\[_VFP], where \<sub-arch> is the ARM sub-architecture and the "_VFP" suffix means using VFP coprocessor registers s0-s15 (d0-d7) for passing arguments or returning results in standard procedure-call. Both \<sub-arch> and "_VFP" are optional, e.g. ARMV7, ARMV7_VFP, THUMBV7, THUMBV7_VFP and so on.
   - For AARCH64, the format is\<arch>[\<sub-arch>], VFP is enabled by default. \<sub-arch> is optional, e.g. AARCH64, AARCH64V8, AARCH64V8.1 and so on.
   - For RISCV64, the format is \<arch\>[_abi], where "_abi" is optional, currently the supported formats are RISCV64, RISCV64_LP64D and RISCV64_LP64: RISCV64 and RISCV64_LP64D are identical, using [LP64D](https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-cc.adoc#named-abis) as abi (LP64 with hardware floating-point calling convention for FLEN=64). And RISCV64_LP64 uses [LP64](https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-cc.adoc#named-abis) as abi (Integer calling-convention only, and hardware floating-point calling convention is not used).
-  - For RISCV32, the format is \<arch\>[_abi], where "_abi" is optional, currently the supported formats are RISCV32, RISCV32_ILP32D and RISCV32_ILP32: RISCV32 and RISCV32_ILP32D are identical, using [ILP32D](https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-cc.adoc#named-abis) as abi (ILP32 with hardware floating-point calling convention for FLEN=64). And RISCV32_ILP32 uses [ILP32](https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-cc.adoc#named-abis) as abi (Integer calling-convention only, and hardware floating-point calling convention is not used).
+  - For RISCV32, the format is \<arch\>[_abi], where "_abi" is optional, currently the supported formats are RISCV32, RISCV32_ILP32D, RISCV32_ILP32F and RISCV32_ILP32: RISCV32 and RISCV32_ILP32D are identical, using [ILP32D](https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-cc.adoc#named-abis) as abi (ILP32 with hardware floating-point calling convention for FLEN=64). RISCV32_ILP32F uses [ILP32F](https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-cc.adoc#named-abis) as abi (ILP32 with hardware floating-point calling convention for FLEN=32). And RISCV32_ILP32 uses [ILP32](https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-cc.adoc#named-abis) as abi (Integer calling-convention only, and hardware floating-point calling convention is not used).
 
 ```bash
 cmake -DWAMR_BUILD_PLATFORM=linux -DWAMR_BUILD_TARGET=ARM
diff --git a/product-mini/platforms/alios-things/aos.mk b/product-mini/platforms/alios-things/aos.mk
index 947a4a91a..7d98ca0b8 100644
--- a/product-mini/platforms/alios-things/aos.mk
+++ b/product-mini/platforms/alios-things/aos.mk
@@ -102,6 +102,7 @@ $(NAME)_SOURCES := ${SHARED_ROOT}/platform/alios/alios_platform.c \
                    ${SHARED_ROOT}/utils/bh_common.c \
                    ${SHARED_ROOT}/utils/bh_hashmap.c \
                    ${SHARED_ROOT}/utils/bh_list.c \
+                   ${SHARED_ROOT}/utils/bh_leb128.c \
                    ${SHARED_ROOT}/utils/bh_log.c \
                    ${SHARED_ROOT}/utils/bh_queue.c \
                    ${SHARED_ROOT}/utils/bh_vector.c \
diff --git a/product-mini/platforms/nuttx/CMakeLists.txt b/product-mini/platforms/nuttx/CMakeLists.txt
index e9fe5a9e3..ac6c47b91 100644
--- a/product-mini/platforms/nuttx/CMakeLists.txt
+++ b/product-mini/platforms/nuttx/CMakeLists.txt
@@ -18,9 +18,9 @@ elseif(CONFIG_ARCH_X86_64)
   set(WAMR_BUILD_TARGET X86_64)
 elseif(CONFIG_ARCH_XTENSA)
   set(WAMR_BUILD_TARGET XTENSA)
-elseif(CONFIG_ARCH_RV64GC OR CONFIG_ARCH_RV64)
+elseif(CONFIG_ARCH_RV64)
   set(WAMR_BUILD_TARGET RISCV64)
-elseif(CONFIG_ARCH_RV32IM OR CONFIG_ARCH_RV32)
+elseif(CONFIG_ARCH_RV32)
   set(WAMR_BUILD_TARGET RISCV32)
 elseif(CONFIG_ARCH_SIM)
   if(CONFIG_SIM_M32 OR CONFIG_HOST_X86)
diff --git a/product-mini/platforms/nuttx/wamr.mk b/product-mini/platforms/nuttx/wamr.mk
index 75bd69bef..38553e863 100644
--- a/product-mini/platforms/nuttx/wamr.mk
+++ b/product-mini/platforms/nuttx/wamr.mk
@@ -21,12 +21,6 @@ else ifeq ($(CONFIG_ARCH_X86_64),y)
 WAMR_BUILD_TARGET := X86_64
 else ifeq ($(CONFIG_ARCH_XTENSA),y)
 WAMR_BUILD_TARGET := XTENSA
-# RV64GC and RV32IM used in older
-# version NuttX
-else ifeq ($(CONFIG_ARCH_RV64GC),y)
-WAMR_BUILD_TARGET := RISCV64
-else ifeq ($(CONFIG_ARCH_RV32IM),y)
-WAMR_BUILD_TARGET := RISCV32
 else ifeq ($(CONFIG_ARCH_RV64),y)
 WAMR_BUILD_TARGET := RISCV64
 else ifeq ($(CONFIG_ARCH_RV32),y)
@@ -107,10 +101,10 @@ else ifeq (${WAMR_BUILD_TARGET}, RISCV32)
 
 ifeq (${CONFIG_ARCH_DPFPU},y)
   CFLAGS += -DBUILD_TARGET_RISCV32_ILP32D
-else ifneq (${CONFIG_ARCH_FPU},y)
-  CFLAGS += -DBUILD_TARGET_RISCV32_ILP32
+else ifeq (${CONFIG_ARCH_FPU},y)
+  CFLAGS += -DBUILD_TARGET_RISCV32_ILP32F
 else
-  $(error riscv32 ilp32f is unsupported)
+  CFLAGS += -DBUILD_TARGET_RISCV32_ILP32
 endif
 
   INVOKE_NATIVE += invokeNative_riscv.S
@@ -441,6 +435,7 @@ CSRCS += nuttx_platform.c \
          bh_common.c \
          bh_hashmap.c \
          bh_list.c \
+         bh_leb128.c \
          bh_log.c \
          bh_queue.c \
          bh_vector.c \
diff --git a/product-mini/platforms/rt-thread/iwasm.c b/product-mini/platforms/rt-thread/iwasm.c
index 9a21301d8..7d15a041d 100644
--- a/product-mini/platforms/rt-thread/iwasm.c
+++ b/product-mini/platforms/rt-thread/iwasm.c
@@ -11,6 +11,10 @@
 #include <dfs_file.h>
 #include <dfs_fs.h>
 
+#if WASM_ENABLE_LIBC_WASI != 0
+#include "../common/libc_wasi.c"
+#endif
+
 #ifdef WAMR_ENABLE_RTT_EXPORT
 
 #ifdef WAMR_RTT_EXPORT_VPRINTF
@@ -160,6 +164,15 @@ static NativeSymbol native_export_symbols[] = {
 
 #endif /* WAMR_ENABLE_RTT_EXPORT */
 
+static void *
+app_instance_func(wasm_module_inst_t module_inst, const char *func_name,
+                  int app_argc, char **app_argv)
+{
+    wasm_application_execute_func(module_inst, func_name, app_argc - 1,
+                                  app_argv + 1);
+    return wasm_runtime_get_exception(module_inst);
+}
+
 /**
  * run WASM module instance.
  * @param module_inst instance of wasm module
@@ -170,12 +183,8 @@ static NativeSymbol native_export_symbols[] = {
 static void *
 app_instance_main(wasm_module_inst_t module_inst, int app_argc, char **app_argv)
 {
-    const char *exception;
-
     wasm_application_execute_main(module_inst, app_argc, app_argv);
-    if ((exception = wasm_runtime_get_exception(module_inst)))
-        rt_kprintf("%s\n", exception);
-    return NULL;
+    return wasm_runtime_get_exception(module_inst);
 }
 
 rt_uint8_t *
@@ -214,28 +223,36 @@ void
 iwasm_help(void)
 {
 #ifdef WAMR_ENABLE_IWASM_PARAMS
-    rt_kputs("wrong input: iwasm [-t] [-m] [-s] <*.wasm> <wasm_args ...>\n"
-             "             iwasm [-h]\n");
-    rt_kputs("\t -h: show this tips.\n");
-    rt_kputs("\t -t: show time taking to run this app.\n");
-    rt_kputs("\t -m: show memory taking to run this app\n");
-    rt_kputs("\t wasm file name and exec params must behind of all vm-param\n");
+    rt_kputs("Usage: iwasm [-options] wasm_file [args...]\n");
+    rt_kputs("options:\n");
+    rt_kputs("  -t                       Show time taking to run this app.\n");
+    rt_kputs("  -m                       Show memory taking to run this app\n");
+    rt_kputs("  -f|--function name       Specify a function name of the module "
+             "to run rather than main\n");
+    rt_kputs("  --max-threads=n          Set maximum thread number per "
+             "cluster, default is 4\n");
 #else
-    rt_kputs("wrong input: iwasm <*.wasm> <wasm_args ...>\n");
+    rt_kputs("Usage: iwasm wasm_file [args...]\n");
 #endif /* WAMR_ENABLE_PARAMS */
 }
 
 int
 iwasm(int argc, char **argv)
 {
+    const char *exception = NULL;
+    const char *func_name = NULL;
     rt_uint8_t *wasm_file_buf = NULL;
     rt_uint32_t wasm_file_size;
-    rt_uint32_t stack_size = 4 * 1024, heap_size = 4 * 1024;
+    rt_uint32_t stack_size = 64 * 1024, heap_size = 256 * 1024;
     wasm_module_t wasm_module = NULL;
     wasm_module_inst_t wasm_module_inst = NULL;
     RuntimeInitArgs init_args;
     static char error_buf[128] = { 0 };
     /* avoid stack overflow */
+#if WASM_ENABLE_LIBC_WASI != 0
+    libc_wasi_parse_context_t wasi_parse_ctx;
+    memset(&wasi_parse_ctx, 0, sizeof(wasi_parse_ctx));
+#endif
 
 #ifdef WAMR_ENABLE_IWASM_PARAMS
     int i_arg_begin;
@@ -260,6 +277,17 @@ iwasm(int argc, char **argv)
             iwasm_help();
             return 0;
         }
+        else if (argv[i_arg_begin][1] == 'f') {
+            func_name = argv[++i_arg_begin];
+        }
+        else if (!strncmp(argv[i_arg_begin], "--max-threads=", 14)) {
+            if (argv[0][14] != '\0')
+                wasm_runtime_set_max_thread_num(atoi(argv[0] + 14));
+            else {
+                iwasm_help();
+                return 0;
+            }
+        }
         else if (argv[i_arg_begin][1] == 0x00) {
             continue;
         }
@@ -303,8 +331,8 @@ iwasm(int argc, char **argv)
     rt_thread_t tid;
     if (show_stack) {
         tid = rt_thread_self();
-        printf("thread stack addr: %p, size: %u, sp: %p\n", tid->stack_addr,
-               tid->stack_size, tid->sp);
+        rt_kprintf("thread stack addr: %p, size: %u, sp: %p\n", tid->stack_addr,
+                   tid->stack_size, tid->sp);
     }
 #endif /* WAMR_ENABLE_PARAMS */
 
@@ -326,6 +354,10 @@ iwasm(int argc, char **argv)
         rt_kprintf("%s\n", error_buf);
         goto fail2;
     }
+#if WASM_ENABLE_LIBC_WASI != 0
+    libc_wasi_init(wasm_module, argc, argv, &wasi_parse_ctx);
+#endif
+
     rt_memset(error_buf, 0x00, sizeof(error_buf));
     wasm_module_inst = wasm_runtime_instantiate(
         wasm_module, stack_size, heap_size, error_buf, sizeof(error_buf));
@@ -341,13 +373,31 @@ iwasm(int argc, char **argv)
     }
 #endif /* WAMR_ENABLE_PARAMS */
 
-    app_instance_main(wasm_module_inst, argc - i_arg_begin, &argv[i_arg_begin]);
+    if (func_name) {
+        exception = app_instance_func(wasm_module_inst, func_name,
+                                      argc - i_arg_begin, &argv[i_arg_begin]);
+    }
+    else {
+        exception = app_instance_main(wasm_module_inst, argc - i_arg_begin,
+                                      &argv[i_arg_begin]);
+        rt_kprintf("finshed run app_instance_main\n");
+    }
+
+    if (exception)
+        rt_kprintf("%s\n", exception);
+
+#if WASM_ENABLE_LIBC_WASI != 0
+    if (!exception) {
+        /* propagate wasi exit code. */
+        wasm_runtime_get_wasi_exit_code(wasm_module_inst);
+    }
+#endif
 
 #ifdef WAMR_ENABLE_IWASM_PARAMS
     if (show_time_exec) {
         ticks_exec = rt_tick_get() - ticks_exec;
-        printf("[iwasm] execute ticks took: %u [ticks/s = %u]\n", ticks_exec,
-               RT_TICK_PER_SECOND);
+        rt_kprintf("[iwasm] execute ticks took: %u [ticks/s = %u]\n",
+                   ticks_exec, RT_TICK_PER_SECOND);
     }
 #if defined(RT_USING_HEAP) && defined(RT_USING_MEMHEAP_AS_HEAP)
     if (show_mem) {
@@ -361,8 +411,8 @@ iwasm(int argc, char **argv)
     }
 #endif
     if (show_stack) {
-        printf("[iwasm] thread stack addr: %p, size: %u, sp: %p\n",
-               tid->stack_addr, tid->stack_size, tid->sp);
+        rt_kprintf("[iwasm] thread stack addr: %p, size: %u, sp: %p\n",
+                   tid->stack_addr, tid->stack_size, tid->sp);
     }
 
 #endif /* WAMR_ENABLE_PARAMS */
diff --git a/tests/unit/shared-utils/bh_leb128_test.cc b/tests/unit/shared-utils/bh_leb128_test.cc
new file mode 100644
index 000000000..f53864646
--- /dev/null
+++ b/tests/unit/shared-utils/bh_leb128_test.cc
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#include "bh_leb128.h"
+#include "gtest/gtest.h"
+
+#include <vector>
+#include <type_traits>
+
+template<typename T>
+void
+run_read_leb_test(std::vector<uint8_t> data,
+                  bh_leb_read_status_t expected_status, T expected_value)
+{
+    size_t offset = 0;
+    uint64 value;
+    bh_leb_read_status_t status =
+        bh_leb_read(data.data(), data.data() + data.size(), sizeof(T) * 8,
+                    std::is_signed<T>::value, &value, &offset);
+    ASSERT_EQ(expected_status, status);
+    if (status == BH_LEB_READ_SUCCESS) {
+        ASSERT_EQ(data.size(), offset);
+        ASSERT_EQ(expected_value, (T)value);
+    }
+}
+
+TEST(bh_leb128_test_suite, read_leb_u32)
+{
+    run_read_leb_test<uint32>({ 0 }, BH_LEB_READ_SUCCESS,
+                              0); // min value
+    run_read_leb_test<uint32>({ 2 }, BH_LEB_READ_SUCCESS,
+                              2); // single-byte value
+    run_read_leb_test<uint32>({ 127 }, BH_LEB_READ_SUCCESS,
+                              127); // max single-byte value
+    run_read_leb_test<uint32>({ 128, 1 }, BH_LEB_READ_SUCCESS,
+                              128); // min value with continuation bit
+    run_read_leb_test<uint32>({ 160, 138, 32 }, BH_LEB_READ_SUCCESS,
+                              525600); // arbitrary value
+    run_read_leb_test<uint32>({ 255, 255, 255, 255, 15 }, BH_LEB_READ_SUCCESS,
+                              UINT32_MAX); // max value
+    run_read_leb_test<uint32>({ 255, 255, 255, 255, 16 }, BH_LEB_READ_OVERFLOW,
+                              UINT32_MAX); // overflow
+    run_read_leb_test<uint32>({ 255, 255, 255, 255, 128 }, BH_LEB_READ_TOO_LONG,
+                              0);
+    run_read_leb_test<uint32>({ 128 }, BH_LEB_READ_UNEXPECTED_END, 0);
+}
+
+TEST(bh_leb128_test_suite, read_leb_i64)
+{
+    run_read_leb_test<int64>({ 184, 188, 195, 159, 237, 209, 128, 2 },
+                             BH_LEB_READ_SUCCESS,
+                             1128712371232312); // arbitrary value
+    run_read_leb_test<int64>(
+        { 128, 128, 128, 128, 128, 128, 128, 128, 128, 127 },
+        BH_LEB_READ_SUCCESS,
+        (uint64)INT64_MIN); // min value
+    run_read_leb_test<int64>({ 255, 255, 255, 255, 255, 255, 255, 255, 255, 0 },
+                             BH_LEB_READ_SUCCESS,
+                             INT64_MAX); // max value
+}
\ No newline at end of file
diff --git a/tests/wamr-test-suites/spec-test-script/all.py b/tests/wamr-test-suites/spec-test-script/all.py
index dadf394da..005874eee 100644
--- a/tests/wamr-test-suites/spec-test-script/all.py
+++ b/tests/wamr-test-suites/spec-test-script/all.py
@@ -14,7 +14,7 @@ import time
 
 """
 The script itself has to be put under the same directory with the "spec".
-To run a single non-GC and non-memory64 case with interpreter mode:
+To run a single non-GC case with interpreter mode:
   cd workspace
   python3 runtest.py --wast2wasm wabt/bin/wat2wasm --interpreter iwasm \
     spec/test/core/xxx.wast
@@ -22,7 +22,7 @@ To run a single non-GC case with aot mode:
   cd workspace
   python3 runtest.py --aot --wast2wasm wabt/bin/wat2wasm --interpreter iwasm \
     --aot-compiler wamrc spec/test/core/xxx.wast
-To run a single GC case or single memory64 case:
+To run a single GC case case:
   cd workspace
   python3 runtest.py --wast2wasm spec/interpreter/wasm --interpreter iwasm \
     --aot-compiler wamrc --gc spec/test/core/xxx.wast
@@ -66,6 +66,7 @@ AVAILABLE_TARGETS = [
     "RISCV64_LP64D",
     "THUMBV7",
     "THUMBV7_VFP",
+    "XTENSA",
 ]
 
 def ignore_the_case(
@@ -78,6 +79,7 @@ def ignore_the_case(
     simd_flag=False,
     gc_flag=False,
     memory64_flag=False,
+    multi_memory_flag=False,
     xip_flag=False,
     eh_flag=False,
     qemu_flag=False,
@@ -93,6 +95,10 @@ def ignore_the_case(
     if "i386" == target and case_name in ["float_exprs", "conversions"]:
         return True
 
+    # esp32s3 qemu doesn't have PSRAM emulation
+    if qemu_flag and target == 'xtensa' and case_name in ["memory_size"]:
+        return True
+
     if gc_flag:
         if case_name in ["array_init_elem", "array_init_data"]:
             return True
@@ -129,7 +135,7 @@ def ignore_the_case(
     return False
 
 
-def preflight_check(aot_flag, eh_flag):
+def preflight_check(aot_flag, aot_compiler, eh_flag):
     if not pathlib.Path(SPEC_TEST_DIR).resolve().exists():
         print(f"Can not find {SPEC_TEST_DIR}")
         return False
@@ -138,8 +144,8 @@ def preflight_check(aot_flag, eh_flag):
         print(f"Can not find {WAST2WASM_CMD}")
         return False
 
-    if aot_flag and not pathlib.Path(WAMRC_CMD).resolve().exists():
-        print(f"Can not find {WAMRC_CMD}")
+    if aot_flag and not pathlib.Path(aot_compiler).resolve().exists():
+        print(f"Can not find {aot_compiler}")
         return False
 
     return True
@@ -149,6 +155,7 @@ def test_case(
     case_path,
     target,
     aot_flag=False,
+    aot_compiler=WAMRC_CMD,
     sgx_flag=False,
     multi_module_flag=False,
     multi_thread_flag=False,
@@ -159,6 +166,7 @@ def test_case(
     verbose_flag=True,
     gc_flag=False,
     memory64_flag=False,
+    multi_memory_flag=False,
     qemu_flag=False,
     qemu_firmware="",
     log="",
@@ -177,7 +185,7 @@ def test_case(
     if no_pty:
         CMD.append("--no-pty")
     CMD.append("--aot-compiler")
-    CMD.append(WAMRC_CMD)
+    CMD.append(aot_compiler)
 
     if aot_flag:
         CMD.append("--aot")
@@ -217,6 +225,9 @@ def test_case(
     if memory64_flag:
         CMD.append("--memory64")
 
+    if multi_memory_flag:
+        CMD.append("--multi-memory")
+
     if log != "":
         CMD.append("--log-dir")
         CMD.append(log)
@@ -245,7 +256,7 @@ def test_case(
                 if verbose_flag:
                     print(output, end="")
                 else:
-                    if len(case_last_words) == 16:
+                    if len(case_last_words) == 1024:
                         case_last_words.pop(0)
                     case_last_words.append(output)
 
@@ -274,6 +285,7 @@ def test_case(
 def test_suite(
     target,
     aot_flag=False,
+    aot_compiler=WAMRC_CMD,
     sgx_flag=False,
     multi_module_flag=False,
     multi_thread_flag=False,
@@ -284,6 +296,7 @@ def test_suite(
     verbose_flag=True,
     gc_flag=False,
     memory64_flag=False,
+    multi_memory_flag=False,
     parl_flag=False,
     qemu_flag=False,
     qemu_firmware="",
@@ -309,6 +322,10 @@ def test_suite(
         eh_case_list_include = [test for test in eh_case_list if test.stem in ["throw", "tag", "try_catch", "rethrow", "try_delegate"]]
         case_list.extend(eh_case_list_include)
 
+    if multi_memory_flag:
+        multi_memory_list = sorted(suite_path.glob("multi-memory/*.wast"))
+        case_list.extend(multi_memory_list)
+
     # ignore based on command line options
     filtered_case_list = []
     for case_path in case_list:
@@ -323,6 +340,7 @@ def test_suite(
             simd_flag,
             gc_flag,
             memory64_flag,
+            multi_memory_flag,
             xip_flag,
             eh_flag,
             qemu_flag,
@@ -348,6 +366,7 @@ def test_suite(
                         str(case_path),
                         target,
                         aot_flag,
+                        aot_compiler,
                         sgx_flag,
                         multi_module_flag,
                         multi_thread_flag,
@@ -358,6 +377,7 @@ def test_suite(
                         verbose_flag,
                         gc_flag,
                         memory64_flag,
+                        multi_memory_flag,
                         qemu_flag,
                         qemu_firmware,
                         log,
@@ -389,6 +409,7 @@ def test_suite(
                     str(case_path),
                     target,
                     aot_flag,
+                    aot_compiler,
                     sgx_flag,
                     multi_module_flag,
                     multi_thread_flag,
@@ -399,6 +420,7 @@ def test_suite(
                     verbose_flag,
                     gc_flag,
                     memory64_flag,
+                    multi_memory_flag,
                     qemu_flag,
                     qemu_firmware,
                     log,
@@ -470,6 +492,12 @@ def main():
         dest="aot_flag",
         help="Running with AOT mode",
     )
+    parser.add_argument(
+        "--aot-compiler",
+        default=WAMRC_CMD,
+        dest="aot_compiler",
+        help="AOT compiler",
+    )
     parser.add_argument(
         "-x",
         action="store_true",
@@ -531,6 +559,13 @@ def main():
         dest="memory64_flag",
         help="Running with memory64 feature",
     )
+    parser.add_argument(
+        "--multi-memory",
+        action="store_true",
+        default=False,
+        dest="multi_memory_flag",
+        help="Running with multi-memory feature",
+    )
     parser.add_argument(
         "cases",
         metavar="path_to__case",
@@ -550,20 +585,22 @@ def main():
     if options.target == "x86_32":
         options.target = "i386"
 
-    if not preflight_check(options.aot_flag, options.eh_flag):
+    if not preflight_check(options.aot_flag, options.aot_compiler, options.eh_flag):
         return False
 
     if not options.cases:
         if options.parl_flag:
             # several cases might share the same workspace/tempfile at the same time
             # so, disable it while running parallelly
-            options.clean_up_flag = False
+            if options.multi_module_flag:
+                options.clean_up_flag = False
             options.verbose_flag = False
 
         start = time.time_ns()
         ret = test_suite(
             options.target,
             options.aot_flag,
+            options.aot_compiler,
             options.sgx_flag,
             options.multi_module_flag,
             options.multi_thread_flag,
@@ -574,6 +611,7 @@ def main():
             options.verbose_flag,
             options.gc_flag,
             options.memory64_flag,
+            options.multi_memory_flag,
             options.parl_flag,
             options.qemu_flag,
             options.qemu_firmware,
@@ -591,6 +629,7 @@ def main():
                     case,
                     options.target,
                     options.aot_flag,
+                    options.aot_compiler,
                     options.sgx_flag,
                     options.multi_module_flag,
                     options.multi_thread_flag,
@@ -601,6 +640,7 @@ def main():
                     options.verbose_flag,
                     options.gc_flag,
                     options.memory64_flag,
+                    options.multi_memory_flag,
                     options.qemu_flag,
                     options.qemu_firmware,
                     options.log,
diff --git a/tests/wamr-test-suites/spec-test-script/multi_memory_ignore_cases.patch b/tests/wamr-test-suites/spec-test-script/multi_memory_ignore_cases.patch
new file mode 100644
index 000000000..38b33175a
--- /dev/null
+++ b/tests/wamr-test-suites/spec-test-script/multi_memory_ignore_cases.patch
@@ -0,0 +1,1022 @@
+diff --git a/test/core/elem.wast b/test/core/elem.wast
+index 575ecef8..6eecab93 100644
+--- a/test/core/elem.wast
++++ b/test/core/elem.wast
+@@ -571,9 +571,11 @@
+   (func $const-i32-d (type $out-i32) (i32.const 68))
+ )
+ 
++(;
+ (assert_return (invoke $module1 "call-7") (i32.const 67))
+ (assert_return (invoke $module1 "call-8") (i32.const 68))
+ (assert_return (invoke $module1 "call-9") (i32.const 66))
++;)
+ 
+ (module $module3
+   (type $out-i32 (func (result i32)))
+@@ -584,6 +586,8 @@
+   (func $const-i32-f (type $out-i32) (i32.const 70))
+ )
+ 
++(;
+ (assert_return (invoke $module1 "call-7") (i32.const 67))
+ (assert_return (invoke $module1 "call-8") (i32.const 69))
+ (assert_return (invoke $module1 "call-9") (i32.const 70))
++;)
+diff --git a/test/core/imports.wast b/test/core/imports.wast
+index 94c1af5c..bb1704fc 100644
+--- a/test/core/imports.wast
++++ b/test/core/imports.wast
+@@ -86,7 +86,7 @@
+ (assert_return (invoke "print64" (i64.const 24)))
+ 
+ (assert_invalid
+-  (module 
++  (module
+     (type (func (result i32)))
+     (import "test" "func" (func (type 1)))
+   )
+@@ -559,6 +559,7 @@
+ (assert_return (invoke "grow" (i32.const 1)) (i32.const -1))
+ (assert_return (invoke "grow" (i32.const 0)) (i32.const 2))
+ 
++(;
+ (module $Mgm
+   (memory (export "memory") 1) ;; initial size is 1
+   (func (export "grow") (result i32) (memory.grow (i32.const 1)))
+@@ -567,7 +568,7 @@
+ (assert_return (invoke $Mgm "grow") (i32.const 1)) ;; now size is 2
+ (module $Mgim1
+   ;; imported memory limits should match, because external memory size is 2 now
+-  (memory (export "memory") (import "grown-memory" "memory") 2) 
++  (memory (export "memory") (import "grown-memory" "memory") 2)
+   (func (export "grow") (result i32) (memory.grow (i32.const 1)))
+ )
+ (register "grown-imported-memory" $Mgim1)
+@@ -578,7 +579,7 @@
+   (func (export "size") (result i32) (memory.size))
+ )
+ (assert_return (invoke $Mgim2 "size") (i32.const 3))
+-
++;)
+ 
+ ;; Syntax errors
+ 
+@@ -650,6 +651,7 @@
+   "import after memory"
+ )
+ 
++(;
+ ;; This module is required to validate, regardless of whether it can be
+ ;; linked. Overloading is not possible in wasm itself, but it is possible
+ ;; in modules from which wasm can import.
+@@ -676,3 +678,4 @@
+   )
+   "unknown import"
+ )
++;)
+\ No newline at end of file
+diff --git a/test/core/linking.wast b/test/core/linking.wast
+index 994e0f49..8fbcc021 100644
+--- a/test/core/linking.wast
++++ b/test/core/linking.wast
+@@ -19,11 +19,11 @@
+ (assert_return (invoke $Nf "call") (i32.const 3))
+ (assert_return (invoke $Nf "call Mf.call") (i32.const 2))
+ 
+-(module
++(module $M1
+   (import "spectest" "print_i32" (func $f (param i32)))
+   (export "print" (func $f))
+ )
+-(register "reexport_f")
++(register "reexport_f" $M1)
+ (assert_unlinkable
+   (module (import "reexport_f" "print" (func (param i64))))
+   "incompatible import type"
+@@ -35,7 +35,6 @@
+ 
+ 
+ ;; Globals
+-
+ (module $Mg
+   (global $glob (export "glob") i32 (i32.const 42))
+   (func (export "get") (result i32) (global.get $glob))
+@@ -47,6 +46,7 @@
+ )
+ (register "Mg" $Mg)
+ 
++(; only sharing initial values
+ (module $Ng
+   (global $x (import "Mg" "glob") i32)
+   (global $mut_glob (import "Mg" "mut_glob") (mut i32))
+@@ -81,7 +81,7 @@
+ (assert_return (get $Ng "Mg.mut_glob") (i32.const 241))
+ (assert_return (invoke $Mg "get_mut") (i32.const 241))
+ (assert_return (invoke $Ng "Mg.get_mut") (i32.const 241))
+-
++;)
+ 
+ (assert_unlinkable
+   (module (import "Mg" "mut_glob" (global i32)))
+@@ -130,7 +130,7 @@
+ 
+ 
+ ;; Tables
+-
++(; no such support
+ (module $Mt
+   (type (func (result i32)))
+   (type (func))
+@@ -307,10 +307,11 @@
+   (module (table (import "Mtable_ex" "t-extern") 1 funcref))
+   "incompatible import type"
+ )
++;)
+ 
+ 
+ ;; Memories
+-
++(; no such support
+ (module $Mm
+   (memory (export "mem") 1 5)
+   (data (i32.const 10) "\00\01\02\03\04\05\06\07\08\09")
+@@ -451,3 +452,4 @@
+ 
+ (assert_return (invoke $Ms "get memory[0]") (i32.const 104))  ;; 'h'
+ (assert_return (invoke $Ms "get table[0]") (i32.const 0xdead))
++;)
+\ No newline at end of file
+diff --git a/test/core/load.wast b/test/core/load.wast
+index 9fe48e2b..3e9c2f8c 100644
+--- a/test/core/load.wast
++++ b/test/core/load.wast
+@@ -29,6 +29,8 @@
+ (register "M")
+ 
+ (module
++  (func $readM1 (import "M" "read") (param i32) (result i32))
++  (export "readM1" (func $readM1))
+   (memory $mem1 (import "M" "mem") 2)
+   (memory $mem2 3)
+ 
+@@ -43,11 +45,12 @@
+   )
+ )
+ 
+-(assert_return (invoke $M "read" (i32.const 20)) (i32.const 1))
+-(assert_return (invoke $M "read" (i32.const 21)) (i32.const 2))
+-(assert_return (invoke $M "read" (i32.const 22)) (i32.const 3))
+-(assert_return (invoke $M "read" (i32.const 23)) (i32.const 4))
+-(assert_return (invoke $M "read" (i32.const 24)) (i32.const 5))
++;; To invoke the function in M as a submodule, not as an independent module
++(assert_return (invoke "readM1" (i32.const 20)) (i32.const 1))
++(assert_return (invoke "readM1" (i32.const 21)) (i32.const 2))
++(assert_return (invoke "readM1" (i32.const 22)) (i32.const 3))
++(assert_return (invoke "readM1" (i32.const 23)) (i32.const 4))
++(assert_return (invoke "readM1" (i32.const 24)) (i32.const 5))
+ 
+ (assert_return (invoke "read1" (i32.const 20)) (i32.const 1))
+ (assert_return (invoke "read1" (i32.const 21)) (i32.const 2))
+diff --git a/test/core/memory_grow.wast b/test/core/memory_grow.wast
+index 4b6dbc83..dc46c029 100644
+--- a/test/core/memory_grow.wast
++++ b/test/core/memory_grow.wast
+@@ -106,15 +106,15 @@
+ 
+ ;; Multiple memories
+ 
+-(module
++(module $MemroygrowM
+   (memory (export "mem1") 2 5)
+   (memory (export "mem2") 0)
+ )
+-(register "M")
++(register "MemroygrowM" $MemorygrowM)
+ 
+ (module
+-  (memory $mem1 (import "M" "mem1") 1 6)
+-  (memory $mem2 (import "M" "mem2") 0)
++  (memory $mem1 (import "MemroygrowM" "mem1") 1 6)
++  (memory $mem2 (import "MemroygrowM" "mem2") 0)
+   (memory $mem3 3)
+   (memory $mem4 4 5)
+ 
+diff --git a/test/core/memory_size.wast b/test/core/memory_size.wast
+index a1d6ea2d..b58c75d0 100644
+--- a/test/core/memory_size.wast
++++ b/test/core/memory_size.wast
+@@ -65,15 +65,15 @@
+ 
+ ;; Multiple memories
+ 
+-(module
++(module $MemmorysizeM
+   (memory (export "mem1") 2 4)
+   (memory (export "mem2") 0)
+ )
+-(register "M")
++(register "MemmorysizeM" $MemmorysizeM)
+ 
+ (module
+-  (memory $mem1 (import "M" "mem1") 1 5)
+-  (memory $mem2 (import "M" "mem2") 0)
++  (memory $mem1 (import "MemmorysizeM" "mem1") 1 5)
++  (memory $mem2 (import "MemmorysizeM" "mem2") 0)
+   (memory $mem3 3)
+   (memory $mem4 4 5)
+ 
+diff --git a/test/core/multi-memory/imports2.wast b/test/core/multi-memory/imports2.wast
+index 314bc131..e1060599 100644
+--- a/test/core/multi-memory/imports2.wast
++++ b/test/core/multi-memory/imports2.wast
+@@ -1,13 +1,13 @@
+-(module
++(module $imports2test
+   (memory (export "z") 0 0)
+   (memory (export "memory-2-inf") 2)
+   (memory (export "memory-2-4") 2 4)
+ )
+ 
+-(register "test")
++(register "imports2test" $imports2test)
+ 
+ (module
+-  (import "test" "z" (memory 0))
++  (import "imports2test" "z" (memory 0))
+   (memory $m (import "spectest" "memory") 1 2)
+   (data (memory 1) (i32.const 10) "\10")
+ 
+@@ -31,9 +31,9 @@
+ (assert_trap (invoke "load" (i32.const 1000000)) "out of bounds memory access")
+ 
+ (module
+-  (import "test" "memory-2-inf" (memory 2))
+-  (import "test" "memory-2-inf" (memory 1))
+-  (import "test" "memory-2-inf" (memory 0))
++  (import "imports2test" "memory-2-inf" (memory 2))
++  (import "imports2test" "memory-2-inf" (memory 1))
++  (import "imports2test" "memory-2-inf" (memory 0))
+ )
+ 
+ (module
+@@ -46,7 +46,7 @@
+ )
+ 
+ (assert_unlinkable
+-  (module (import "test" "unknown" (memory 1)))
++  (module (import "imports2test" "unknown" (memory 1)))
+   "unknown import"
+ )
+ (assert_unlinkable
+@@ -55,11 +55,11 @@
+ )
+ 
+ (assert_unlinkable
+-  (module (import "test" "memory-2-inf" (memory 3)))
++  (module (import "imports2test" "memory-2-inf" (memory 3)))
+   "incompatible import type"
+ )
+ (assert_unlinkable
+-  (module (import "test" "memory-2-inf" (memory 2 3)))
++  (module (import "imports2test" "memory-2-inf" (memory 2 3)))
+   "incompatible import type"
+ )
+ (assert_unlinkable
+diff --git a/test/core/multi-memory/imports4.wast b/test/core/multi-memory/imports4.wast
+index 411b1c0f..0a819454 100644
+--- a/test/core/multi-memory/imports4.wast
++++ b/test/core/multi-memory/imports4.wast
+@@ -1,12 +1,12 @@
+-(module
++(module $imports4test
+   (memory (export "memory-2-inf") 2)
+   (memory (export "memory-2-4") 2 4)
+ )
+ 
+-(register "test")
++(register "imports4test")
+ 
+ (module
+-  (import "test" "memory-2-4" (memory 1))
++  (import "imports4test" "memory-2-4" (memory 1))
+   (memory $m (import "spectest" "memory") 0 3)  ;; actual has max size 2
+   (func (export "grow") (param i32) (result i32) (memory.grow $m (local.get 0)))
+ )
+@@ -16,6 +16,8 @@
+ (assert_return (invoke "grow" (i32.const 1)) (i32.const -1))
+ (assert_return (invoke "grow" (i32.const 0)) (i32.const 2))
+ 
++;; TODO: Current implementation call grow on one submodule instance can't really change its definition
++(;
+ (module $Mgm
+   (memory 0)
+   (memory 0)
+@@ -45,3 +47,4 @@
+   (func (export "size") (result i32) (memory.size $m))
+ )
+ (assert_return (invoke $Mgim2 "size") (i32.const 3))
++;)
+\ No newline at end of file
+diff --git a/test/core/multi-memory/linking0.wast b/test/core/multi-memory/linking0.wast
+index b09c69f6..d57d484e 100644
+--- a/test/core/multi-memory/linking0.wast
++++ b/test/core/multi-memory/linking0.wast
+@@ -24,8 +24,8 @@
+   )
+   "unknown import"
+ )
+-(assert_trap (invoke $Mt "call" (i32.const 7)) "uninitialized element")
+-
++;; can't call function in submodule when module can't be instantiated
++;; (assert_trap (invoke "call" (i32.const 7)) "uninitialized element")
+ 
+ (assert_trap
+   (module
+@@ -39,4 +39,5 @@
+   )
+   "out of bounds memory access"
+ )
+-(assert_return (invoke $Mt "call" (i32.const 7)) (i32.const 0))
++;; can't call function in submodule when module can't be instantiated
++;; (assert_return (invoke "call" (i32.const 7)) (i32.const 0))
+diff --git a/test/core/multi-memory/linking1.wast b/test/core/multi-memory/linking1.wast
+index 39eabb00..49c87ce8 100644
+--- a/test/core/multi-memory/linking1.wast
++++ b/test/core/multi-memory/linking1.wast
+@@ -1,4 +1,4 @@
+-(module $Mm
++(module $linking1Mm
+   (memory $mem0 (export "mem0") 0 0)
+   (memory $mem1 (export "mem1") 1 5)
+   (memory $mem2 (export "mem2") 0 0)
+@@ -9,11 +9,11 @@
+     (i32.load8_u $mem1 (local.get 0))
+   )
+ )
+-(register "Mm" $Mm)
++(register "linking1Mm" $linking1Mm)
+ 
+-(module $Nm
+-  (func $loadM (import "Mm" "load") (param i32) (result i32))
+-  (memory (import "Mm" "mem0") 0)
++(module $linking1Nm
++  (func $loadM (import "linking1Mm" "load") (param i32) (result i32))
++  (memory (import "linking1Mm" "mem0") 0)
+ 
+   (memory $m 1)
+   (data (memory 1) (i32.const 10) "\f0\f1\f2\f3\f4\f5")
+@@ -24,12 +24,14 @@
+   )
+ )
+ 
+-(assert_return (invoke $Mm "load" (i32.const 12)) (i32.const 2))
+-(assert_return (invoke $Nm "Mm.load" (i32.const 12)) (i32.const 2))
+-(assert_return (invoke $Nm "load" (i32.const 12)) (i32.const 0xf2))
++(assert_return (invoke $linking1Mm "load" (i32.const 12)) (i32.const 2))
++(assert_return (invoke $linking1Nm "Mm.load" (i32.const 12)) (i32.const 2))
++(assert_return (invoke $linking1Nm "load" (i32.const 12)) (i32.const 0xf2))
+ 
+-(module $Om
+-  (memory (import "Mm" "mem1") 1)
++(module $linking1Om
++  (func $loadM (import "linking1Mm" "load") (param i32) (result i32))
++  (export "Mm.load" (func $loadM))
++  (memory (import "linking1Mm" "mem1") 1)
+   (data (i32.const 5) "\a0\a1\a2\a3\a4\a5\a6\a7")
+ 
+   (func (export "load") (param $a i32) (result i32)
+@@ -37,19 +39,20 @@
+   )
+ )
+ 
+-(assert_return (invoke $Mm "load" (i32.const 12)) (i32.const 0xa7))
+-(assert_return (invoke $Nm "Mm.load" (i32.const 12)) (i32.const 0xa7))
+-(assert_return (invoke $Nm "load" (i32.const 12)) (i32.const 0xf2))
+-(assert_return (invoke $Om "load" (i32.const 12)) (i32.const 0xa7))
++;; To invoke the function in Mm as a submodule, not as an independent module
++(assert_return (invoke $linking1Om "Mm.load" (i32.const 12)) (i32.const 0xa7))
++;; (assert_return (invoke $Nm "Mm.load" (i32.const 12)) (i32.const 0xa7))
++;; (assert_return (invoke $Nm "load" (i32.const 12)) (i32.const 0xf2))
++(assert_return (invoke $linking1Om "load" (i32.const 12)) (i32.const 0xa7))
+ 
+ (module
+-  (memory (import "Mm" "mem1") 0)
++  (memory (import "linking1Mm" "mem1") 0)
+   (data (i32.const 0xffff) "a")
+ )
+ 
+ (assert_trap
+   (module
+-    (memory (import "Mm" "mem0") 0)
++    (memory (import "linking1Mm" "mem0") 0)
+     (data (i32.const 0xffff) "a")
+   )
+   "out of bounds memory access"
+@@ -57,7 +60,7 @@
+ 
+ (assert_trap
+   (module
+-    (memory (import "Mm" "mem1") 0)
++    (memory (import "linking1Mm" "mem1") 0)
+     (data (i32.const 0x10000) "a")
+   )
+   "out of bounds memory access"
+diff --git a/test/core/multi-memory/linking2.wast b/test/core/multi-memory/linking2.wast
+index 26bf3cca..5eae4643 100644
+--- a/test/core/multi-memory/linking2.wast
++++ b/test/core/multi-memory/linking2.wast
+@@ -1,4 +1,4 @@
+-(module $Mm
++(module $linking2Mm
+   (memory $mem0 (export "mem0") 0 0)
+   (memory $mem1 (export "mem1") 1 5)
+   (memory $mem2 (export "mem2") 0 0)
+@@ -9,22 +9,22 @@
+     (i32.load8_u $mem1 (local.get 0))
+   )
+ )
+-(register "Mm" $Mm)
++(register "linking2Mm" $linking2Mm)
+ 
+-(module $Pm
+-  (memory (import "Mm" "mem1") 1 8)
++(module
++  (memory (import "linking2Mm" "mem1") 1 8)
+ 
+   (func (export "grow") (param $a i32) (result i32)
+     (memory.grow (local.get 0))
+   )
+ )
+ 
+-(assert_return (invoke $Pm "grow" (i32.const 0)) (i32.const 1))
+-(assert_return (invoke $Pm "grow" (i32.const 2)) (i32.const 1))
+-(assert_return (invoke $Pm "grow" (i32.const 0)) (i32.const 3))
+-(assert_return (invoke $Pm "grow" (i32.const 1)) (i32.const 3))
+-(assert_return (invoke $Pm "grow" (i32.const 1)) (i32.const 4))
+-(assert_return (invoke $Pm "grow" (i32.const 0)) (i32.const 5))
+-(assert_return (invoke $Pm "grow" (i32.const 1)) (i32.const -1))
+-(assert_return (invoke $Pm "grow" (i32.const 0)) (i32.const 5))
++(assert_return (invoke "grow" (i32.const 0)) (i32.const 1))
++(assert_return (invoke "grow" (i32.const 2)) (i32.const 1))
++(assert_return (invoke "grow" (i32.const 0)) (i32.const 3))
++(assert_return (invoke "grow" (i32.const 1)) (i32.const 3))
++(assert_return (invoke "grow" (i32.const 1)) (i32.const 4))
++(assert_return (invoke "grow" (i32.const 0)) (i32.const 5))
++(assert_return (invoke "grow" (i32.const 1)) (i32.const -1))
++(assert_return (invoke "grow" (i32.const 0)) (i32.const 5))
+ 
+diff --git a/test/core/multi-memory/linking3.wast b/test/core/multi-memory/linking3.wast
+index e23fbe4e..d3efe95a 100644
+--- a/test/core/multi-memory/linking3.wast
++++ b/test/core/multi-memory/linking3.wast
+@@ -33,8 +33,9 @@
+   )
+   "out of bounds memory access"
+ )
+-(assert_return (invoke $Mm "load" (i32.const 0)) (i32.const 97))
+-(assert_return (invoke $Mm "load" (i32.const 327670)) (i32.const 0))
++;; can't call function in submodule when module can't be instantiated
++;; (assert_return (invoke $Mm "load" (i32.const 0)) (i32.const 97))
++;; (assert_return (invoke $Mm "load" (i32.const 327670)) (i32.const 0))
+ 
+ (assert_trap
+   (module
+@@ -46,7 +47,8 @@
+   )
+   "out of bounds table access"
+ )
+-(assert_return (invoke $Mm "load" (i32.const 0)) (i32.const 97))
++;; can't call function in submodule when module can't be instantiated
++;; (assert_return (invoke $Mm "load" (i32.const 0)) (i32.const 97))
+ 
+ ;; Store is modified if the start function traps.
+ (module $Ms
+@@ -79,5 +81,6 @@
+   "unreachable"
+ )
+ 
+-(assert_return (invoke $Ms "get memory[0]") (i32.const 104))  ;; 'h'
+-(assert_return (invoke $Ms "get table[0]") (i32.const 0xdead))
++;; can't call function in submodule when module can't be instantiated
++;; (assert_return (invoke $Ms "get memory[0]") (i32.const 104))  ;; 'h'
++;; (assert_return (invoke $Ms "get table[0]") (i32.const 0xdead))
+diff --git a/test/core/multi-memory/load1.wast b/test/core/multi-memory/load1.wast
+index be309c39..6a0faf0d 100644
+--- a/test/core/multi-memory/load1.wast
++++ b/test/core/multi-memory/load1.wast
+@@ -8,6 +8,8 @@
+ (register "M")
+ 
+ (module
++  (func $readM1 (import "M" "read") (param i32) (result i32))
++  (export "readM1" (func $readM1))
+   (memory $mem1 (import "M" "mem") 2)
+   (memory $mem2 3)
+ 
+@@ -22,11 +24,12 @@
+   )
+ )
+ 
+-(assert_return (invoke $M "read" (i32.const 20)) (i32.const 1))
+-(assert_return (invoke $M "read" (i32.const 21)) (i32.const 2))
+-(assert_return (invoke $M "read" (i32.const 22)) (i32.const 3))
+-(assert_return (invoke $M "read" (i32.const 23)) (i32.const 4))
+-(assert_return (invoke $M "read" (i32.const 24)) (i32.const 5))
++;; To invoke the function in M as a submodule, not as an independent module
++(assert_return (invoke "readM1" (i32.const 20)) (i32.const 1))
++(assert_return (invoke "readM1" (i32.const 21)) (i32.const 2))
++(assert_return (invoke "readM1" (i32.const 22)) (i32.const 3))
++(assert_return (invoke "readM1" (i32.const 23)) (i32.const 4))
++(assert_return (invoke "readM1" (i32.const 24)) (i32.const 5))
+ 
+ (assert_return (invoke "read1" (i32.const 20)) (i32.const 1))
+ (assert_return (invoke "read1" (i32.const 21)) (i32.const 2))
+diff --git a/test/core/multi-memory/store1.wast b/test/core/multi-memory/store1.wast
+index 10cf2c42..eafe6cc9 100644
+--- a/test/core/multi-memory/store1.wast
++++ b/test/core/multi-memory/store1.wast
+@@ -10,6 +10,9 @@
+ )
+ (register "M1")
+ 
++(invoke "store" (i32.const 0) (i64.const 1))
++(assert_return (invoke "load" (i32.const 0)) (i64.const 1))
++
+ (module $M2
+   (memory (export "mem") 1)
+ 
+@@ -22,10 +25,8 @@
+ )
+ (register "M2")
+ 
+-(invoke $M1 "store" (i32.const 0) (i64.const 1))
+-(invoke $M2 "store" (i32.const 0) (i64.const 2))
+-(assert_return (invoke $M1 "load" (i32.const 0)) (i64.const 1))
+-(assert_return (invoke $M2 "load" (i32.const 0)) (i64.const 2))
++(invoke "store" (i32.const 0) (i64.const 2))
++(assert_return (invoke "load" (i32.const 0)) (i64.const 2))
+ 
+ (module
+   (memory $mem1 (import "M1" "mem") 1)
+diff --git a/test/core/ref_func.wast b/test/core/ref_func.wast
+index adb5cb78..6396013b 100644
+--- a/test/core/ref_func.wast
++++ b/test/core/ref_func.wast
+@@ -4,7 +4,7 @@
+ (register "M")
+ 
+ (module
+-  (func $f (import "M" "f") (param i32) (result i32))
++  (func $f (param $x i32) (result i32) (local.get $x))
+   (func $g (param $x i32) (result i32)
+     (i32.add (local.get $x) (i32.const 1))
+   )
+diff --git a/test/core/store.wast b/test/core/store.wast
+index 86f6263a..65a0d4ee 100644
+--- a/test/core/store.wast
++++ b/test/core/store.wast
+@@ -35,7 +35,10 @@
+     (i64.store (local.get 0) (local.get 1))
+   )
+ )
+-(register "M1")
++(register "M1" $M1)
++
++(invoke "store" (i32.const 0) (i64.const 1))
++(assert_return (invoke "load" (i32.const 0)) (i64.const 1))
+ 
+ (module $M2
+   (memory (export "mem") 1)
+@@ -47,12 +50,10 @@
+     (i64.store (local.get 0) (local.get 1))
+   )
+ )
+-(register "M2")
++(register "M2" $M2)
+ 
+-(invoke $M1 "store" (i32.const 0) (i64.const 1))
+-(invoke $M2 "store" (i32.const 0) (i64.const 2))
+-(assert_return (invoke $M1 "load" (i32.const 0)) (i64.const 1))
+-(assert_return (invoke $M2 "load" (i32.const 0)) (i64.const 2))
++(invoke "store" (i32.const 0) (i64.const 2))
++(assert_return (invoke "load" (i32.const 0)) (i64.const 2))
+ 
+ (module
+   (memory $mem1 (import "M1" "mem") 1)
+diff --git a/test/core/table_copy.wast b/test/core/table_copy.wast
+index 380e84ee..59230cfb 100644
+--- a/test/core/table_copy.wast
++++ b/test/core/table_copy.wast
+@@ -14,11 +14,11 @@
+ 
+ (module
+   (type (func (result i32)))  ;; type #0
+-  (import "a" "ef0" (func (result i32)))    ;; index 0
+-  (import "a" "ef1" (func (result i32)))
+-  (import "a" "ef2" (func (result i32)))
+-  (import "a" "ef3" (func (result i32)))
+-  (import "a" "ef4" (func (result i32)))    ;; index 4
++  (func (result i32) (i32.const 0))    ;; index 0
++  (func (result i32) (i32.const 1))
++  (func (result i32) (i32.const 2))
++  (func (result i32) (i32.const 3))
++  (func (result i32) (i32.const 4))    ;; index 4
+   (table $t0 30 30 funcref)
+   (table $t1 30 30 funcref)
+   (elem (table $t0) (i32.const 2) func 3 1 4 1)
+@@ -106,11 +106,11 @@
+ 
+ (module
+   (type (func (result i32)))  ;; type #0
+-  (import "a" "ef0" (func (result i32)))    ;; index 0
+-  (import "a" "ef1" (func (result i32)))
+-  (import "a" "ef2" (func (result i32)))
+-  (import "a" "ef3" (func (result i32)))
+-  (import "a" "ef4" (func (result i32)))    ;; index 4
++  (func (result i32) (i32.const 0))    ;; index 0
++  (func (result i32) (i32.const 1))
++  (func (result i32) (i32.const 2))
++  (func (result i32) (i32.const 3))
++  (func (result i32) (i32.const 4))    ;; index 4
+   (table $t0 30 30 funcref)
+   (table $t1 30 30 funcref)
+   (elem (table $t0) (i32.const 2) func 3 1 4 1)
+@@ -198,11 +198,11 @@
+ 
+ (module
+   (type (func (result i32)))  ;; type #0
+-  (import "a" "ef0" (func (result i32)))    ;; index 0
+-  (import "a" "ef1" (func (result i32)))
+-  (import "a" "ef2" (func (result i32)))
+-  (import "a" "ef3" (func (result i32)))
+-  (import "a" "ef4" (func (result i32)))    ;; index 4
++  (func (result i32) (i32.const 0))    ;; index 0
++  (func (result i32) (i32.const 1))
++  (func (result i32) (i32.const 2))
++  (func (result i32) (i32.const 3))
++  (func (result i32) (i32.const 4))    ;; index 4
+   (table $t0 30 30 funcref)
+   (table $t1 30 30 funcref)
+   (elem (table $t0) (i32.const 2) func 3 1 4 1)
+@@ -290,11 +290,11 @@
+ 
+ (module
+   (type (func (result i32)))  ;; type #0
+-  (import "a" "ef0" (func (result i32)))    ;; index 0
+-  (import "a" "ef1" (func (result i32)))
+-  (import "a" "ef2" (func (result i32)))
+-  (import "a" "ef3" (func (result i32)))
+-  (import "a" "ef4" (func (result i32)))    ;; index 4
++  (func (result i32) (i32.const 0))    ;; index 0
++  (func (result i32) (i32.const 1))
++  (func (result i32) (i32.const 2))
++  (func (result i32) (i32.const 3))
++  (func (result i32) (i32.const 4))    ;; index 4
+   (table $t0 30 30 funcref)
+   (table $t1 30 30 funcref)
+   (elem (table $t0) (i32.const 2) func 3 1 4 1)
+@@ -382,11 +382,11 @@
+ 
+ (module
+   (type (func (result i32)))  ;; type #0
+-  (import "a" "ef0" (func (result i32)))    ;; index 0
+-  (import "a" "ef1" (func (result i32)))
+-  (import "a" "ef2" (func (result i32)))
+-  (import "a" "ef3" (func (result i32)))
+-  (import "a" "ef4" (func (result i32)))    ;; index 4
++  (func (result i32) (i32.const 0))    ;; index 0
++  (func (result i32) (i32.const 1))
++  (func (result i32) (i32.const 2))
++  (func (result i32) (i32.const 3))
++  (func (result i32) (i32.const 4))    ;; index 4
+   (table $t0 30 30 funcref)
+   (table $t1 30 30 funcref)
+   (elem (table $t0) (i32.const 2) func 3 1 4 1)
+@@ -474,11 +474,11 @@
+ 
+ (module
+   (type (func (result i32)))  ;; type #0
+-  (import "a" "ef0" (func (result i32)))    ;; index 0
+-  (import "a" "ef1" (func (result i32)))
+-  (import "a" "ef2" (func (result i32)))
+-  (import "a" "ef3" (func (result i32)))
+-  (import "a" "ef4" (func (result i32)))    ;; index 4
++  (func (result i32) (i32.const 0))    ;; index 0
++  (func (result i32) (i32.const 1))
++  (func (result i32) (i32.const 2))
++  (func (result i32) (i32.const 3))
++  (func (result i32) (i32.const 4))    ;; index 4
+   (table $t0 30 30 funcref)
+   (table $t1 30 30 funcref)
+   (elem (table $t0) (i32.const 2) func 3 1 4 1)
+@@ -566,11 +566,11 @@
+ 
+ (module
+   (type (func (result i32)))  ;; type #0
+-  (import "a" "ef0" (func (result i32)))    ;; index 0
+-  (import "a" "ef1" (func (result i32)))
+-  (import "a" "ef2" (func (result i32)))
+-  (import "a" "ef3" (func (result i32)))
+-  (import "a" "ef4" (func (result i32)))    ;; index 4
++  (func (result i32) (i32.const 0))    ;; index 0
++  (func (result i32) (i32.const 1))
++  (func (result i32) (i32.const 2))
++  (func (result i32) (i32.const 3))
++  (func (result i32) (i32.const 4))    ;; index 4
+   (table $t0 30 30 funcref)
+   (table $t1 30 30 funcref)
+   (elem (table $t0) (i32.const 2) func 3 1 4 1)
+@@ -658,11 +658,11 @@
+ 
+ (module
+   (type (func (result i32)))  ;; type #0
+-  (import "a" "ef0" (func (result i32)))    ;; index 0
+-  (import "a" "ef1" (func (result i32)))
+-  (import "a" "ef2" (func (result i32)))
+-  (import "a" "ef3" (func (result i32)))
+-  (import "a" "ef4" (func (result i32)))    ;; index 4
++  (func (result i32) (i32.const 0))    ;; index 0
++  (func (result i32) (i32.const 1))
++  (func (result i32) (i32.const 2))
++  (func (result i32) (i32.const 3))
++  (func (result i32) (i32.const 4))    ;; index 4
+   (table $t0 30 30 funcref)
+   (table $t1 30 30 funcref)
+   (elem (table $t0) (i32.const 2) func 3 1 4 1)
+@@ -750,11 +750,11 @@
+ 
+ (module
+   (type (func (result i32)))  ;; type #0
+-  (import "a" "ef0" (func (result i32)))    ;; index 0
+-  (import "a" "ef1" (func (result i32)))
+-  (import "a" "ef2" (func (result i32)))
+-  (import "a" "ef3" (func (result i32)))
+-  (import "a" "ef4" (func (result i32)))    ;; index 4
++  (func (result i32) (i32.const 0))    ;; index 0
++  (func (result i32) (i32.const 1))
++  (func (result i32) (i32.const 2))
++  (func (result i32) (i32.const 3))
++  (func (result i32) (i32.const 4))    ;; index 4
+   (table $t0 30 30 funcref)
+   (table $t1 30 30 funcref)
+   (elem (table $t0) (i32.const 2) func 3 1 4 1)
+@@ -842,11 +842,11 @@
+ 
+ (module
+   (type (func (result i32)))  ;; type #0
+-  (import "a" "ef0" (func (result i32)))    ;; index 0
+-  (import "a" "ef1" (func (result i32)))
+-  (import "a" "ef2" (func (result i32)))
+-  (import "a" "ef3" (func (result i32)))
+-  (import "a" "ef4" (func (result i32)))    ;; index 4
++  (func (result i32) (i32.const 0))    ;; index 0
++  (func (result i32) (i32.const 1))
++  (func (result i32) (i32.const 2))
++  (func (result i32) (i32.const 3))
++  (func (result i32) (i32.const 4))    ;; index 4
+   (table $t0 30 30 funcref)
+   (table $t1 30 30 funcref)
+   (elem (table $t1) (i32.const 2) func 3 1 4 1)
+@@ -934,11 +934,11 @@
+ 
+ (module
+   (type (func (result i32)))  ;; type #0
+-  (import "a" "ef0" (func (result i32)))    ;; index 0
+-  (import "a" "ef1" (func (result i32)))
+-  (import "a" "ef2" (func (result i32)))
+-  (import "a" "ef3" (func (result i32)))
+-  (import "a" "ef4" (func (result i32)))    ;; index 4
++  (func (result i32) (i32.const 0))    ;; index 0
++  (func (result i32) (i32.const 1))
++  (func (result i32) (i32.const 2))
++  (func (result i32) (i32.const 3))
++  (func (result i32) (i32.const 4))    ;; index 4
+   (table $t0 30 30 funcref)
+   (table $t1 30 30 funcref)
+   (elem (table $t1) (i32.const 2) func 3 1 4 1)
+@@ -1026,11 +1026,11 @@
+ 
+ (module
+   (type (func (result i32)))  ;; type #0
+-  (import "a" "ef0" (func (result i32)))    ;; index 0
+-  (import "a" "ef1" (func (result i32)))
+-  (import "a" "ef2" (func (result i32)))
+-  (import "a" "ef3" (func (result i32)))
+-  (import "a" "ef4" (func (result i32)))    ;; index 4
++  (func (result i32) (i32.const 0))    ;; index 0
++  (func (result i32) (i32.const 1))
++  (func (result i32) (i32.const 2))
++  (func (result i32) (i32.const 3))
++  (func (result i32) (i32.const 4))    ;; index 4
+   (table $t0 30 30 funcref)
+   (table $t1 30 30 funcref)
+   (elem (table $t1) (i32.const 2) func 3 1 4 1)
+@@ -1118,11 +1118,11 @@
+ 
+ (module
+   (type (func (result i32)))  ;; type #0
+-  (import "a" "ef0" (func (result i32)))    ;; index 0
+-  (import "a" "ef1" (func (result i32)))
+-  (import "a" "ef2" (func (result i32)))
+-  (import "a" "ef3" (func (result i32)))
+-  (import "a" "ef4" (func (result i32)))    ;; index 4
++  (func (result i32) (i32.const 0))    ;; index 0
++  (func (result i32) (i32.const 1))
++  (func (result i32) (i32.const 2))
++  (func (result i32) (i32.const 3))
++  (func (result i32) (i32.const 4))    ;; index 4
+   (table $t0 30 30 funcref)
+   (table $t1 30 30 funcref)
+   (elem (table $t1) (i32.const 2) func 3 1 4 1)
+@@ -1210,11 +1210,11 @@
+ 
+ (module
+   (type (func (result i32)))  ;; type #0
+-  (import "a" "ef0" (func (result i32)))    ;; index 0
+-  (import "a" "ef1" (func (result i32)))
+-  (import "a" "ef2" (func (result i32)))
+-  (import "a" "ef3" (func (result i32)))
+-  (import "a" "ef4" (func (result i32)))    ;; index 4
++  (func (result i32) (i32.const 0))    ;; index 0
++  (func (result i32) (i32.const 1))
++  (func (result i32) (i32.const 2))
++  (func (result i32) (i32.const 3))
++  (func (result i32) (i32.const 4))    ;; index 4
+   (table $t0 30 30 funcref)
+   (table $t1 30 30 funcref)
+   (elem (table $t1) (i32.const 2) func 3 1 4 1)
+@@ -1302,11 +1302,11 @@
+ 
+ (module
+   (type (func (result i32)))  ;; type #0
+-  (import "a" "ef0" (func (result i32)))    ;; index 0
+-  (import "a" "ef1" (func (result i32)))
+-  (import "a" "ef2" (func (result i32)))
+-  (import "a" "ef3" (func (result i32)))
+-  (import "a" "ef4" (func (result i32)))    ;; index 4
++  (func (result i32) (i32.const 0))    ;; index 0
++  (func (result i32) (i32.const 1))
++  (func (result i32) (i32.const 2))
++  (func (result i32) (i32.const 3))
++  (func (result i32) (i32.const 4))    ;; index 4
+   (table $t0 30 30 funcref)
+   (table $t1 30 30 funcref)
+   (elem (table $t1) (i32.const 2) func 3 1 4 1)
+@@ -1394,11 +1394,11 @@
+ 
+ (module
+   (type (func (result i32)))  ;; type #0
+-  (import "a" "ef0" (func (result i32)))    ;; index 0
+-  (import "a" "ef1" (func (result i32)))
+-  (import "a" "ef2" (func (result i32)))
+-  (import "a" "ef3" (func (result i32)))
+-  (import "a" "ef4" (func (result i32)))    ;; index 4
++  (func (result i32) (i32.const 0))    ;; index 0
++  (func (result i32) (i32.const 1))
++  (func (result i32) (i32.const 2))
++  (func (result i32) (i32.const 3))
++  (func (result i32) (i32.const 4))    ;; index 4
+   (table $t0 30 30 funcref)
+   (table $t1 30 30 funcref)
+   (elem (table $t1) (i32.const 2) func 3 1 4 1)
+@@ -1486,11 +1486,11 @@
+ 
+ (module
+   (type (func (result i32)))  ;; type #0
+-  (import "a" "ef0" (func (result i32)))    ;; index 0
+-  (import "a" "ef1" (func (result i32)))
+-  (import "a" "ef2" (func (result i32)))
+-  (import "a" "ef3" (func (result i32)))
+-  (import "a" "ef4" (func (result i32)))    ;; index 4
++  (func (result i32) (i32.const 0))    ;; index 0
++  (func (result i32) (i32.const 1))
++  (func (result i32) (i32.const 2))
++  (func (result i32) (i32.const 3))
++  (func (result i32) (i32.const 4))    ;; index 4
+   (table $t0 30 30 funcref)
+   (table $t1 30 30 funcref)
+   (elem (table $t1) (i32.const 2) func 3 1 4 1)
+@@ -1578,11 +1578,11 @@
+ 
+ (module
+   (type (func (result i32)))  ;; type #0
+-  (import "a" "ef0" (func (result i32)))    ;; index 0
+-  (import "a" "ef1" (func (result i32)))
+-  (import "a" "ef2" (func (result i32)))
+-  (import "a" "ef3" (func (result i32)))
+-  (import "a" "ef4" (func (result i32)))    ;; index 4
++  (func (result i32) (i32.const 0))    ;; index 0
++  (func (result i32) (i32.const 1))
++  (func (result i32) (i32.const 2))
++  (func (result i32) (i32.const 3))
++  (func (result i32) (i32.const 4))    ;; index 4
+   (table $t0 30 30 funcref)
+   (table $t1 30 30 funcref)
+   (elem (table $t1) (i32.const 2) func 3 1 4 1)
+diff --git a/test/core/table_init.wast b/test/core/table_init.wast
+index 0b2d26f7..3c595e5b 100644
+--- a/test/core/table_init.wast
++++ b/test/core/table_init.wast
+@@ -14,11 +14,11 @@
+ 
+ (module
+   (type (func (result i32)))  ;; type #0
+-  (import "a" "ef0" (func (result i32)))    ;; index 0
+-  (import "a" "ef1" (func (result i32)))
+-  (import "a" "ef2" (func (result i32)))
+-  (import "a" "ef3" (func (result i32)))
+-  (import "a" "ef4" (func (result i32)))    ;; index 4
++  (func (result i32) (i32.const 0))    ;; index 0
++  (func (result i32) (i32.const 1))
++  (func (result i32) (i32.const 2))
++  (func (result i32) (i32.const 3))
++  (func (result i32) (i32.const 4))    ;; index 4
+   (table $t0 30 30 funcref)
+   (table $t1 30 30 funcref)
+   (elem (table $t0) (i32.const 2) func 3 1 4 1)
+@@ -72,11 +72,11 @@
+ 
+ (module
+   (type (func (result i32)))  ;; type #0
+-  (import "a" "ef0" (func (result i32)))    ;; index 0
+-  (import "a" "ef1" (func (result i32)))
+-  (import "a" "ef2" (func (result i32)))
+-  (import "a" "ef3" (func (result i32)))
+-  (import "a" "ef4" (func (result i32)))    ;; index 4
++  (func (result i32) (i32.const 0))    ;; index 0
++  (func (result i32) (i32.const 1))
++  (func (result i32) (i32.const 2))
++  (func (result i32) (i32.const 3))
++  (func (result i32) (i32.const 4))    ;; index 4
+   (table $t0 30 30 funcref)
+   (table $t1 30 30 funcref)
+   (elem (table $t0) (i32.const 2) func 3 1 4 1)
+@@ -130,11 +130,11 @@
+ 
+ (module
+   (type (func (result i32)))  ;; type #0
+-  (import "a" "ef0" (func (result i32)))    ;; index 0
+-  (import "a" "ef1" (func (result i32)))
+-  (import "a" "ef2" (func (result i32)))
+-  (import "a" "ef3" (func (result i32)))
+-  (import "a" "ef4" (func (result i32)))    ;; index 4
++  (func (result i32) (i32.const 0))    ;; index 0
++  (func (result i32) (i32.const 1))
++  (func (result i32) (i32.const 2))
++  (func (result i32) (i32.const 3))
++  (func (result i32) (i32.const 4))    ;; index 4
+   (table $t0 30 30 funcref)
+   (table $t1 30 30 funcref)
+   (elem (table $t0) (i32.const 2) func 3 1 4 1)
+@@ -196,11 +196,11 @@
+ 
+ (module
+   (type (func (result i32)))  ;; type #0
+-  (import "a" "ef0" (func (result i32)))    ;; index 0
+-  (import "a" "ef1" (func (result i32)))
+-  (import "a" "ef2" (func (result i32)))
+-  (import "a" "ef3" (func (result i32)))
+-  (import "a" "ef4" (func (result i32)))    ;; index 4
++  (func (result i32) (i32.const 0))    ;; index 0
++  (func (result i32) (i32.const 1))
++  (func (result i32) (i32.const 2))
++  (func (result i32) (i32.const 3))
++  (func (result i32) (i32.const 4))    ;; index 4
+   (table $t0 30 30 funcref)
+   (table $t1 30 30 funcref)
+   (elem (table $t1) (i32.const 2) func 3 1 4 1)
+@@ -254,11 +254,11 @@
+ 
+ (module
+   (type (func (result i32)))  ;; type #0
+-  (import "a" "ef0" (func (result i32)))    ;; index 0
+-  (import "a" "ef1" (func (result i32)))
+-  (import "a" "ef2" (func (result i32)))
+-  (import "a" "ef3" (func (result i32)))
+-  (import "a" "ef4" (func (result i32)))    ;; index 4
++  (func (result i32) (i32.const 0))    ;; index 0
++  (func (result i32) (i32.const 1))
++  (func (result i32) (i32.const 2))
++  (func (result i32) (i32.const 3))
++  (func (result i32) (i32.const 4))    ;; index 4
+   (table $t0 30 30 funcref)
+   (table $t1 30 30 funcref)
+   (elem (table $t1) (i32.const 2) func 3 1 4 1)
+@@ -312,11 +312,11 @@
+ 
+ (module
+   (type (func (result i32)))  ;; type #0
+-  (import "a" "ef0" (func (result i32)))    ;; index 0
+-  (import "a" "ef1" (func (result i32)))
+-  (import "a" "ef2" (func (result i32)))
+-  (import "a" "ef3" (func (result i32)))
+-  (import "a" "ef4" (func (result i32)))    ;; index 4
++  (func (result i32) (i32.const 0))    ;; index 0
++  (func (result i32) (i32.const 1))
++  (func (result i32) (i32.const 2))
++  (func (result i32) (i32.const 3))
++  (func (result i32) (i32.const 4))    ;; index 4
+   (table $t0 30 30 funcref)
+   (table $t1 30 30 funcref)
+   (elem (table $t1) (i32.const 2) func 3 1 4 1)
diff --git a/tests/wamr-test-suites/spec-test-script/runtest.py b/tests/wamr-test-suites/spec-test-script/runtest.py
index adb1ae47c..97820eaad 100755
--- a/tests/wamr-test-suites/spec-test-script/runtest.py
+++ b/tests/wamr-test-suites/spec-test-script/runtest.py
@@ -55,9 +55,18 @@ aot_target_options_map = {
     "riscv32": ["--target=riscv32", "--target-abi=ilp32", "--cpu=generic-rv32", "--cpu-features=+m,+a,+c"],
     "riscv32_ilp32f": ["--target=riscv32", "--target-abi=ilp32f", "--cpu=generic-rv32", "--cpu-features=+m,+a,+c,+f"],
     "riscv32_ilp32d": ["--target=riscv32", "--target-abi=ilp32d", "--cpu=generic-rv32", "--cpu-features=+m,+a,+c,+f,+d"],
-    "riscv64": ["--target=riscv64", "--target-abi=lp64", "--cpu=generic-rv64", "--cpu-features=+m,+a,+c"],
-    "riscv64_lp64f": ["--target=riscv64", "--target-abi=lp64f", "--cpu=generic-rv64", "--cpu-features=+m,+a,+c,+f"],
-    "riscv64_lp64d": ["--target=riscv64", "--target-abi=lp64d", "--cpu=generic-rv64", "--cpu-features=+m,+a,+c,+f,+d"],
+    # RISCV64 requires -mcmodel=medany, which can be set by --size-level=1
+    "riscv64": ["--target=riscv64", "--target-abi=lp64", "--cpu=generic-rv64", "--cpu-features=+m,+a,+c", "--size-level=1"],
+    "riscv64_lp64f": ["--target=riscv64", "--target-abi=lp64f", "--cpu=generic-rv64", "--cpu-features=+m,+a,+c,+f", "--size-level=1"],
+    "riscv64_lp64d": ["--target=riscv64", "--target-abi=lp64d", "--cpu=generic-rv64", "--cpu-features=+m,+a,+c,+f,+d", "--size-level=1"],
+    "xtensa": ["--target=xtensa"],
+}
+
+# AOT compilation options mapping for XIP mode
+aot_target_options_map_xip = {
+    # avoid l32r relocations for xtensa
+    "xtensa": ["--mllvm=-mtext-section-literals"],
+    "riscv32_ilp32f": ["--enable-builtin-intrinsics=i64.common,f64.common,f32.const,f64.const,f64xi32,f64xi64,f64_promote_f32,f32_demote_f64"],
 }
 
 def debug(data):
@@ -271,6 +280,8 @@ parser.add_argument('--rundir',
         help="change to the directory before running tests")
 parser.add_argument('--start-timeout', default=30, type=int,
         help="default timeout for initial prompt")
+parser.add_argument('--start-fail-timeout', default=2, type=int,
+        help="default timeout for initial prompt (when expected to fail)")
 parser.add_argument('--test-timeout', default=20, type=int,
         help="default timeout for each individual test action")
 parser.add_argument('--no-pty', action='store_true',
@@ -316,6 +327,9 @@ parser.add_argument('--gc', default=False, action='store_true',
 parser.add_argument('--memory64', default=False, action='store_true',
         help='Test with Memory64')
 
+parser.add_argument('--multi-memory', default=False, action='store_true',
+        help='Test with multi-memory(with multi-module auto enabled)')
+
 parser.add_argument('--qemu', default=False, action='store_true',
         help="Enable QEMU")
 
@@ -829,6 +843,12 @@ def test_assert_return(r, opts, form):
         if ' ' in func:
             func = func.replace(' ', '\\')
 
+        # Note: 'as-memory.grow-first' doesn't actually grow memory.
+        # (thus not in this list)
+        if opts.qemu and opts.target == 'xtensa' and func in {'as-memory.grow-value', 'as-memory.grow-size', 'as-memory.grow-last', 'as-memory.grow-everywhere'}:
+            log("ignoring memory.grow test")
+            return
+
         if m.group(2) == '':
             args = []
         else:
@@ -1080,6 +1100,8 @@ def compile_wast_to_wasm(form, wast_tempfile, wasm_tempfile, opts):
         cmd = [opts.wast2wasm, "--enable-threads", "--no-check", "--enable-exceptions", "--enable-tail-call", wast_tempfile, "-o", wasm_tempfile ]
     elif opts.memory64:
         cmd = [opts.wast2wasm, "--enable-memory64", "--no-check", wast_tempfile, "-o", wasm_tempfile ]
+    elif opts.multi_memory:
+        cmd = [opts.wast2wasm, "--enable-multi-memory", "--no-check", wast_tempfile, "-o", wasm_tempfile ]
     else:
         cmd = [opts.wast2wasm, "--enable-threads", "--no-check",
                wast_tempfile, "-o", wasm_tempfile ]
@@ -1111,8 +1133,9 @@ def compile_wasm_to_aot(wasm_tempfile, aot_tempfile, runner, opts, r, output = '
         cmd.append("--disable-simd")
 
     if opts.xip:
-        cmd.append("--enable-indirect-mode")
-        cmd.append("--disable-llvm-intrinsics")
+        cmd.append("--xip")
+        if test_target in aot_target_options_map_xip:
+            cmd += aot_target_options_map_xip[test_target]
 
     if opts.multi_thread:
         cmd.append("--enable-multi-thread")
@@ -1139,10 +1162,6 @@ def compile_wasm_to_aot(wasm_tempfile, aot_tempfile, runner, opts, r, output = '
     if opts.qemu or opts.memory64:
         cmd.append("--bounds-checks=1")
 
-    # RISCV64 requires -mcmodel=medany, which can be set by --size-level=1
-    if test_target.startswith("riscv64"):
-        cmd.append("--size-level=1")
-
     cmd += ["-o", aot_tempfile, wasm_tempfile]
 
     log("Running: %s" % " ".join(cmd))
@@ -1158,10 +1177,28 @@ def run_wasm_with_repl(wasm_tempfile, aot_tempfile, opts, r):
     tmpfile = aot_tempfile if test_aot else wasm_tempfile
     log("Starting interpreter for module '%s'" % tmpfile)
 
-    cmd_iwasm = [opts.interpreter, "--heap-size=0", "-v=5" if opts.verbose else "-v=0", "--repl", tmpfile]
+    if opts.qemu:
+        tmpfile = f"/tmp/{os.path.basename(tmpfile)}"
 
+    cmd_iwasm = [opts.interpreter, "--heap-size=0", "--repl"]
     if opts.multi_module:
-        cmd_iwasm.insert(1, "--module-path=" + (tempfile.gettempdir() if not opts.qemu else "/tmp" ))
+        cmd_iwasm.append("--module-path=" + (tempfile.gettempdir() if not opts.qemu else "/tmp" ))
+    if opts.gc:
+        # our tail-call implementation is known broken.
+        # work it around by using a huge stack.
+        # cf. https://github.com/bytecodealliance/wasm-micro-runtime/issues/2231
+        cmd_iwasm.append("--stack-size=10485760")  # 10MB (!)
+    else:
+        if opts.aot:
+            # Note: aot w/o gc doesn't require the interpreter stack at all.
+            # Note: 1 is the minimum value we can specify because 0 means
+            # the default.
+            cmd_iwasm.append("--stack-size=1")
+        else:
+            cmd_iwasm.append("--stack-size=131072")  # 128KB
+    if opts.verbose:
+        cmd_iwasm.append("-v=5")
+    cmd_iwasm.append(tmpfile)
 
     if opts.qemu:
         if opts.qemu_firmware == '':
@@ -1179,6 +1216,8 @@ def run_wasm_with_repl(wasm_tempfile, aot_tempfile, opts, r):
         elif opts.target.startswith("riscv64"):
             cmd = "qemu-system-riscv64 -semihosting -M virt,aclint=on -cpu rv64 -smp 1 -nographic -bios none -kernel".split()
             cmd.append(opts.qemu_firmware)
+        elif opts.target.startswith("xtensa"):
+            cmd = f"qemu-system-xtensa -semihosting -nographic -serial mon:stdio -machine esp32s3 -drive file={opts.qemu_firmware},if=mtd,format=raw".split()
         else:
             raise Exception("Unknwon target for QEMU: %s" % opts.target)
 
@@ -1224,7 +1263,7 @@ def test_assert_with_exception(form, wast_tempfile, wasm_tempfile, aot_tempfile,
     if test_aot:
         r = compile_wasm_to_aot(wasm_tempfile, aot_tempfile, True, opts, r)
         try:
-            assert_prompt(r, ['Compile success'], opts.start_timeout, True)
+            assert_prompt(r, ['Compile success'], opts.start_fail_timeout, True)
         except:
             _, exc, _ = sys.exc_info()
             if (r.buf.find(expected) >= 0):
@@ -1245,7 +1284,7 @@ def test_assert_with_exception(form, wast_tempfile, wasm_tempfile, aot_tempfile,
     if loadable:
         # Wait for the initial prompt
         try:
-            assert_prompt(r, ['webassembly> '], opts.start_timeout, True)
+            assert_prompt(r, ['webassembly> '], opts.start_fail_timeout, True)
         except:
             _, exc, _ = sys.exc_info()
             if (r.buf.find(expected) >= 0):
@@ -1278,6 +1317,12 @@ if __name__ == "__main__":
     wasm_tempfile = create_tmp_file(".wasm")
     if test_aot:
         aot_tempfile = create_tmp_file(".aot")
+        # could be potientially compiled to aot
+        # with the future following call test_assert_xxx,
+        # add them to temp_file_repo now even if no actual following file,
+        # it will be simple ignore during final deletion if not exist
+        prefix = wasm_tempfile.split(".wasm")[0]
+        temp_file_repo.append(prefix + ".aot")
 
     ret_code = 0
     try:
@@ -1325,7 +1370,7 @@ if __name__ == "__main__":
                     if test_aot:
                         r = compile_wasm_to_aot(wasm_tempfile, aot_tempfile, True, opts, r)
                         try:
-                            assert_prompt(r, ['Compile success'], opts.start_timeout, True)
+                            assert_prompt(r, ['Compile success'], opts.start_fail_timeout, True)
                         except:
                             _, exc, _ = sys.exc_info()
                             if (r.buf.find(error_msg) >= 0):
@@ -1400,6 +1445,12 @@ if __name__ == "__main__":
 
                         if test_aot:
                             r = compile_wasm_to_aot(temp_files[1], temp_files[2], True, opts, r)
+                            # could be potientially compiled to aot
+                            # with the future following call test_assert_xxx,
+                            # add them to temp_file_repo now even if no actual following file,
+                            # it will be simple ignore during final deletion if not exist
+                            prefix = temp_files[1].split(".wasm")[0]
+                            temp_file_repo.append(prefix + ".aot")
                             try:
                                 assert_prompt(r, ['Compile success'], opts.start_timeout, False)
                             except:
diff --git a/tests/wamr-test-suites/test_wamr.sh b/tests/wamr-test-suites/test_wamr.sh
index b9890eed1..8254cc712 100755
--- a/tests/wamr-test-suites/test_wamr.sh
+++ b/tests/wamr-test-suites/test_wamr.sh
@@ -25,6 +25,7 @@ function help()
     echo "-S enable SIMD feature"
     echo "-G enable GC feature"
     echo "-W enable memory64 feature"
+    echo "-E enable multi memory feature"
     echo "-X enable XIP feature"
     echo "-e enable exception handling"
     echo "-x test SGX"
@@ -39,6 +40,7 @@ function help()
     echo "-C enable code coverage collect"
     echo "-j set the platform to test"
     echo "-T set sanitizer to use in tests(ubsan|tsan|asan)"
+    echo "-A use the specified wamrc command instead of building it"
     echo "-r [requirement name] [N [N ...]] specify a requirement name followed by one or more"
     echo "                                  subrequirement IDs, if no subrequirement is specificed,"
     echo "                                  it will run all subrequirements. When this optin is used,"
@@ -58,6 +60,7 @@ COLLECT_CODE_COVERAGE=0
 ENABLE_SIMD=0
 ENABLE_GC=0
 ENABLE_MEMORY64=0
+ENABLE_MULTI_MEMORY=0
 ENABLE_XIP=0
 ENABLE_EH=0
 ENABLE_DEBUG_VERSION=0
@@ -75,15 +78,16 @@ fi
 PARALLELISM=0
 ENABLE_QEMU=0
 QEMU_FIRMWARE=""
+WAMRC_CMD=""
 # prod/testsuite-all branch
 WASI_TESTSUITE_COMMIT="ee807fc551978490bf1c277059aabfa1e589a6c2"
 TARGET_LIST=("AARCH64" "AARCH64_VFP" "ARMV7" "ARMV7_VFP" "THUMBV7" "THUMBV7_VFP" \
-             "RISCV32" "RISCV32_ILP32F" "RISCV32_ILP32D" "RISCV64" "RISCV64_LP64F" "RISCV64_LP64D")
+             "RISCV32" "RISCV32_ILP32F" "RISCV32_ILP32D" "RISCV64" "RISCV64_LP64F" "RISCV64_LP64D" "XTENSA")
 REQUIREMENT_NAME=""
 # Initialize an empty array for subrequirement IDs
 SUBREQUIREMENT_IDS=()
 
-while getopts ":s:cabgvt:m:MCpSXexwWPGQF:j:T:r:" opt
+while getopts ":s:cabgvt:m:MCpSXexwWEPGQF:j:T:r:A:" opt
 do
     OPT_PARSED="TRUE"
     case $opt in
@@ -92,8 +96,8 @@ do
         # get next suite if there are multiple vaule in -s
         eval "nxarg=\${$((OPTIND))}"
         # just get test cases, loop until the next symbol '-'
-        # IN  ====>  -s spec wasi unit -t fast-classic
-        # GET ====>  spec wasi unit
+        # IN  ====>  -s spec unit -t fast-classic
+        # GET ====>  spec unit
         while [[ "${nxarg}" != -* && ${nxarg} ]];
         do
             TEST_CASE_ARR+=(${nxarg})
@@ -146,6 +150,11 @@ do
         echo "enable wasm64(memory64) feature"
         ENABLE_MEMORY64=1
         ;;
+        E)
+        echo "enable multi memory feature(auto enable multi module)"
+        ENABLE_MULTI_MEMORY=1
+        ENABLE_MULTI_MODULE=1
+        ;;
         C)
         echo "enable code coverage"
         COLLECT_CODE_COVERAGE=1
@@ -214,6 +223,10 @@ do
         echo "Only Test requirement name: ${REQUIREMENT_NAME}"
         [[ ${#SUBREQUIREMENT_IDS[@]} -ne 0 ]] && echo "Choose subrequirement IDs: ${SUBREQUIREMENT_IDS[@]}"
         ;;
+        A)
+        echo "Using wamrc ${OPTARG}"
+        WAMRC_CMD=${OPTARG}
+        ;;
         ?)
         help
         exit 1
@@ -251,7 +264,7 @@ else
     readonly IWASM_CMD="${WAMR_DIR}/product-mini/platforms/${PLATFORM}/build/iwasm"
 fi
 
-readonly WAMRC_CMD="${WAMR_DIR}/wamr-compiler/build/wamrc"
+readonly WAMRC_CMD_DEFAULT="${WAMR_DIR}/wamr-compiler/build/wamrc"
 
 readonly CLASSIC_INTERP_COMPILE_FLAGS="\
     -DWAMR_BUILD_TARGET=${TARGET} \
@@ -403,12 +416,13 @@ function setup_wabt()
             git clone --recursive https://github.com/WebAssembly/wabt
         fi
         echo "upate wabt"
-        cd wabt
-        git fetch origin
-        git reset --hard origin/main
-        git checkout tags/${WABT_VERSION} -B ${WABT_VERSION}
-        cd ..
-        make -C wabt gcc-release -j 4 || exit 1
+        cd wabt \
+        && git fetch origin \
+        && git reset --hard origin/main \
+        && git checkout tags/${WABT_VERSION} -B ${WABT_VERSION} \
+        && git submodule update --init \
+        && cd .. \
+        && make -C wabt gcc-release -j 4 || exit 1
     fi
 }
 
@@ -489,6 +503,20 @@ function spec_test()
         git reset --hard 48e69f394869c55b7bbe14ac963c09f4605490b6
         git checkout 044d0d2e77bdcbe891f7e0b9dd2ac01d56435f0b -- test/core/elem.wast test/core/data.wast
         git apply ../../spec-test-script/memory64_ignore_cases.patch || exit 1
+    elif [[ ${ENABLE_MULTI_MEMORY} == 1 ]]; then
+        echo "checkout spec for multi memory proposal"
+
+        # check spec test cases for multi memory
+        git clone -b main --single-branch https://github.com/WebAssembly/multi-memory.git spec
+        pushd spec
+
+        # Reset to commit: "Merge pull request #48 from backes/specify-memcpy-immediate-order"
+        git reset --hard 48e69f394869c55b7bbe14ac963c09f4605490b6
+        git checkout 044d0d2e77bdcbe891f7e0b9dd2ac01d56435f0b -- test/core/elem.wast
+        git apply ../../spec-test-script/multi_memory_ignore_cases.patch || exit 1
+        if [[ ${RUNNING_MODE} == "aot" ]]; then
+            git apply ../../spec-test-script/multi_module_aot_ignore_cases.patch || exit 1
+        fi
     else
         echo "checkout spec for default proposal"
 
@@ -550,6 +578,7 @@ function spec_test()
     # require warmc only in aot mode
     if [[ $1 == 'aot' ]]; then
         ARGS_FOR_SPEC_TEST+="-t "
+        ARGS_FOR_SPEC_TEST+="--aot-compiler ${WAMRC_CMD} "
     fi
 
     if [[ ${PARALLELISM} == 1 ]]; then
@@ -564,6 +593,13 @@ function spec_test()
         ARGS_FOR_SPEC_TEST+="--memory64 "
     fi
 
+    # multi memory is only enabled in interp and aot mode
+    if [[ 1 == ${ENABLE_MULTI_MEMORY} ]]; then
+        if [[ $1 == 'classic-interp' || $1 == 'aot' ]]; then
+            ARGS_FOR_SPEC_TEST+="--multi-memory "
+        fi
+    fi
+
     if [[ ${ENABLE_QEMU} == 1 ]]; then
         ARGS_FOR_SPEC_TEST+="--qemu "
         ARGS_FOR_SPEC_TEST+="--qemu-firmware ${QEMU_FIRMWARE} "
@@ -588,22 +624,6 @@ function spec_test()
     echo -e "\nFinish spec tests" | tee -a ${REPORT_DIR}/spec_test_report.txt
 }
 
-function wasi_test()
-{
-    echo "Now start wasi tests"
-    touch ${REPORT_DIR}/wasi_test_report.txt
-
-    cd ${WORK_DIR}/../../wasi
-    [[ $1 != "aot" ]] && \
-        python wasi_test.py --interpreter ${IWASM_CMD} ${SGX_OPT}\
-                            | tee ${REPORT_DIR}/wasi_test_report.txt \
-    || \
-        python wasi_test.py --aot --aot-compiler ${WAMRC_CMD} ${SGX_OPT}\
-                            --interpreter ${IWASM_CMD} \
-                            | tee ${REPORT_DIR}/wasi_test_report.txt
-    echo "Finish wasi tests"
-}
-
 function wamr_compiler_test()
 {
     if [[ $1 != "aot" ]]; then
@@ -801,9 +821,14 @@ function build_wamrc()
         return
     fi
 
+    BUILD_LLVM_SH=build_llvm.sh
+    if [ ${TARGET} = "XTENSA" ]; then
+        BUILD_LLVM_SH=build_llvm_xtensa.sh
+    fi
+
     echo "Build wamrc for spec test under aot compile type"
     cd ${WAMR_DIR}/wamr-compiler \
-        && ./build_llvm.sh \
+        && ./${BUILD_LLVM_SH} \
         && if [ -d build ]; then rm -r build/*; else mkdir build; fi \
         && cd build \
         && cmake .. -DCOLLECT_CODE_COVERAGE=${COLLECT_CODE_COVERAGE} \
@@ -855,6 +880,14 @@ function do_execute_in_running_mode()
 {
     local RUNNING_MODE="$1"
 
+    if [[ ${ENABLE_MULTI_MEMORY} -eq 1 ]]; then
+        if [[ "${RUNNING_MODE}" != "classic-interp" \
+                && "${RUNNING_MODE}" != "aot" ]]; then
+            echo "support multi-memory in classic-interp mode and aot mode"
+            return 0
+        fi
+    fi
+
     if [[ ${ENABLE_MEMORY64} -eq 1 ]]; then
         if [[ "${RUNNING_MODE}" != "classic-interp" \
                 && "${RUNNING_MODE}" != "aot" ]]; then
@@ -944,6 +977,12 @@ function trigger()
         EXTRA_COMPILE_FLAGS+=" -DWAMR_BUILD_MEMORY64=0"
     fi
 
+    if [[ ${ENABLE_MULTI_MEMORY} == 1 ]];then
+        EXTRA_COMPILE_FLAGS+=" -DWAMR_BUILD_MULTI_MEMORY=1"
+    else
+        EXTRA_COMPILE_FLAGS+=" -DWAMR_BUILD_MULTI_MEMORY=0"
+    fi
+
     if [[ ${ENABLE_MULTI_THREAD} == 1 ]];then
         EXTRA_COMPILE_FLAGS+=" -DWAMR_BUILD_LIB_PTHREAD=1"
     fi
@@ -1068,7 +1107,10 @@ function trigger()
                 if [[ ${ENABLE_QEMU} == 0 ]]; then
                     build_iwasm_with_cfg $BUILD_FLAGS
                 fi
-                build_wamrc
+                if [ -z "${WAMRC_CMD}" ]; then
+                   build_wamrc
+                   WAMRC_CMD=${WAMRC_CMD_DEFAULT}
+                fi
                 for suite in "${TEST_CASE_ARR[@]}"; do
                     $suite"_test" aot
                 done
diff --git a/wamr-compiler/CMakeLists.txt b/wamr-compiler/CMakeLists.txt
index 245b4a031..2ab0462b2 100644
--- a/wamr-compiler/CMakeLists.txt
+++ b/wamr-compiler/CMakeLists.txt
@@ -121,6 +121,8 @@ elseif (WAMR_BUILD_TARGET STREQUAL "RISCV64_LP64")
   add_definitions(-DBUILD_TARGET_RISCV64_LP64)
 elseif (WAMR_BUILD_TARGET STREQUAL "RISCV32" OR WAMR_BUILD_TARGET STREQUAL "RISCV32_ILP32D")
   add_definitions(-DBUILD_TARGET_RISCV32_ILP32D)
+elseif (WAMR_BUILD_TARGET STREQUAL  "RISCV32_ILP32F")
+  add_definitions(-DBUILD_TARGET_RISCV32_ILP32F)
 elseif (WAMR_BUILD_TARGET STREQUAL "RISCV32_ILP32")
   add_definitions(-DBUILD_TARGET_RISCV32_ILP32)
 else ()
diff --git a/wamr-compiler/main.c b/wamr-compiler/main.c
index ae24ee5bc..b3e731e53 100644
--- a/wamr-compiler/main.c
+++ b/wamr-compiler/main.c
@@ -9,6 +9,8 @@
 #include "wasm_export.h"
 #include "aot_export.h"
 
+#include <llvm-c/Support.h>
+
 #if BH_HAS_DLFCN
 #include <dlfcn.h>
 
@@ -126,6 +128,10 @@ print_help()
     printf("                            Use --cpu-features=+help to list all the features supported\n");
     printf("  --opt-level=n             Set the optimization level (0 to 3, default is 3)\n");
     printf("  --size-level=n            Set the code size level (0 to 3, default is 3)\n");
+    printf("                              0 - Large code model\n");
+    printf("                              1 - Medium code model\n");
+    printf("                              2 - Kernel code model\n");
+    printf("                              3 - Small code model\n");
     printf("  -sgx                      Generate code for SGX platform (Intel Software Guard Extensions)\n");
     printf("  --bounds-checks=1/0       Enable or disable the bounds checks for memory access:\n");
     printf("                              by default it is disabled in all 64-bit platforms except SGX and\n");
@@ -195,6 +201,7 @@ print_help()
 #if WASM_ENABLE_LINUX_PERF != 0
     printf("  --enable-linux-perf       Enable linux perf support\n");
 #endif
+    printf("  --mllvm=<option>          Add the LLVM command line option\n");
     printf("  -v=n                      Set log verbose level (0 to 5, default is 2), larger with more log\n");
     printf("  --version                 Show version information\n");
     printf("Examples: wamrc -o test.aot test.wasm\n");
@@ -315,6 +322,8 @@ int
 main(int argc, char *argv[])
 {
     char *wasm_file_name = NULL, *out_file_name = NULL;
+    char **llvm_options = NULL;
+    size_t llvm_options_count = 0;
     uint8 *wasm_file = NULL;
     uint32 wasm_file_size;
     wasm_module_t wasm_module = NULL;
@@ -550,6 +559,24 @@ main(int argc, char *argv[])
             enable_linux_perf = true;
         }
 #endif
+        else if (!strncmp(argv[0], "--mllvm=", 8)) {
+            void *np;
+            if (argv[0][8] == '\0')
+                PRINT_HELP_AND_EXIT();
+            if (llvm_options_count == 0)
+                llvm_options_count += 2;
+            else
+                llvm_options_count++;
+            np = realloc(llvm_options, llvm_options_count * sizeof(char *));
+            if (np == NULL) {
+                printf("Memory allocation failure\n");
+                goto fail0;
+            }
+            llvm_options = np;
+            if (llvm_options_count == 2)
+                llvm_options[llvm_options_count - 2] = "wamrc";
+            llvm_options[llvm_options_count - 1] = argv[0] + 8;
+        }
         else if (!strcmp(argv[0], "--version")) {
             uint32 major, minor, patch;
             wasm_runtime_get_version(&major, &minor, &patch);
@@ -576,7 +603,7 @@ main(int argc, char *argv[])
         }
 #if defined(_WIN32) || defined(_WIN32_) || defined(__APPLE__) \
     || defined(__MACH__)
-        if (!option.target_abi) {
+        if (!option.target_arch && !option.target_abi) {
             LOG_VERBOSE("Set size level to 1 for Windows or MacOS AOT file");
             option.size_level = 1;
         }
@@ -625,6 +652,10 @@ main(int argc, char *argv[])
         native_lib_list, native_lib_count, native_handle_list);
 #endif
 
+    if (llvm_options_count > 0)
+        LLVMParseCommandLineOptions(llvm_options_count,
+                                    (const char **)llvm_options, "wamrc");
+
     bh_print_time("Begin to load wasm file");
 
     if (use_dummy_wasm) {
@@ -738,6 +769,7 @@ fail0:
     if (option.custom_sections) {
         free(option.custom_sections);
     }
+    free(llvm_options);
 
     bh_print_time("wamrc return");
     return exit_status;