mirror of
				https://github.com/bytecodealliance/wasm-micro-runtime.git
				synced 2025-10-26 02:41:16 +00:00 
			
		
		
		
	 c2d7fa30df
			
		
	
	
		c2d7fa30df
		
			
		
	
	
	
	
		
			
			LLVM 18 and later, instcombine perfoms only one iteration.
it performs extra "verify fixpoint" operation when instcombine
is specified in certain ways, including how we do so here.
a problem is that the verification raises a fatal error when it
finds we didn't reach a fixpoint:
    LLVM ERROR: Instruction Combining did not reach a fixpoint
    after 1 iterations
while it should be rare, it's quite normal not to reach a fixpoint.
this commit fixes the issue by simply disabing the verification.
cf. 41895843b5
		
	
			
		
			
				
	
	
		
			436 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			436 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| /*
 | |
|  * Copyright (C) 2019 Intel Corporation. All rights reserved.
 | |
|  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 | |
|  */
 | |
| 
 | |
| #include <llvm/Passes/StandardInstrumentations.h>
 | |
| #include <llvm/Support/Error.h>
 | |
| #if LLVM_VERSION_MAJOR < 17
 | |
| #include <llvm/ADT/None.h>
 | |
| #include <llvm/ADT/Optional.h>
 | |
| #include <llvm/ADT/Triple.h>
 | |
| #endif
 | |
| #include <llvm/ADT/SmallVector.h>
 | |
| #include <llvm/ADT/Twine.h>
 | |
| #include <llvm/Analysis/TargetTransformInfo.h>
 | |
| #include <llvm/CodeGen/TargetPassConfig.h>
 | |
| #include <llvm/ExecutionEngine/ExecutionEngine.h>
 | |
| #include <llvm/MC/MCSubtargetInfo.h>
 | |
| #include <llvm/Support/TargetSelect.h>
 | |
| #include <llvm/Target/TargetMachine.h>
 | |
| #include <llvm-c/Core.h>
 | |
| #include <llvm-c/ExecutionEngine.h>
 | |
| #if LLVM_VERSION_MAJOR < 17
 | |
| #include <llvm-c/Initialization.h>
 | |
| #endif
 | |
| #include <llvm/ExecutionEngine/GenericValue.h>
 | |
| #include <llvm/ExecutionEngine/JITEventListener.h>
 | |
| #include <llvm/ExecutionEngine/RTDyldMemoryManager.h>
 | |
| #include <llvm/ExecutionEngine/Orc/LLJIT.h>
 | |
| #include <llvm/IR/DerivedTypes.h>
 | |
| #include <llvm/IR/Module.h>
 | |
| #include <llvm/IR/Instructions.h>
 | |
| #include <llvm/IR/IntrinsicInst.h>
 | |
| #include <llvm/IR/PassManager.h>
 | |
| #include <llvm/Support/CommandLine.h>
 | |
| #include <llvm/Support/ErrorHandling.h>
 | |
| #if LLVM_VERSION_MAJOR >= 17
 | |
| #include <llvm/Support/PGOOptions.h>
 | |
| #include <llvm/Support/VirtualFileSystem.h>
 | |
| #endif
 | |
| #include <llvm/Target/CodeGenCWrappers.h>
 | |
| #include <llvm/Target/TargetMachine.h>
 | |
| #include <llvm/Target/TargetOptions.h>
 | |
| #if LLVM_VERSION_MAJOR >= 17
 | |
| #include <llvm/TargetParser/Triple.h>
 | |
| #endif
 | |
| #include <llvm/Transforms/Utils/LowerMemIntrinsics.h>
 | |
| #include <llvm/Transforms/Vectorize/LoopVectorize.h>
 | |
| #include <llvm/Transforms/Vectorize/LoadStoreVectorizer.h>
 | |
| #include <llvm/Transforms/Vectorize/SLPVectorizer.h>
 | |
| #include <llvm/Transforms/Vectorize/VectorCombine.h>
 | |
| #include <llvm/Transforms/Scalar/LoopRotation.h>
 | |
| #include <llvm/Transforms/Scalar/SimpleLoopUnswitch.h>
 | |
| #include <llvm/Transforms/Scalar/LICM.h>
 | |
| #include <llvm/Transforms/Scalar/GVN.h>
 | |
| #include <llvm/Passes/PassBuilder.h>
 | |
| #include <llvm/Analysis/TargetLibraryInfo.h>
 | |
| #if LLVM_VERSION_MAJOR >= 12
 | |
| #include <llvm/Analysis/AliasAnalysis.h>
 | |
| #endif
 | |
| #include <llvm/ProfileData/InstrProf.h>
 | |
| 
 | |
| #include <cstring>
 | |
| #include "../aot/aot_runtime.h"
 | |
| #include "aot_llvm.h"
 | |
| 
 | |
| using namespace llvm;
 | |
| using namespace llvm::orc;
 | |
| 
 | |
| #if LLVM_VERSION_MAJOR >= 17
 | |
| namespace llvm {
 | |
| template<typename T>
 | |
| using Optional = std::optional<T>;
 | |
| }
 | |
| #endif
 | |
| 
 | |
| LLVM_C_EXTERN_C_BEGIN
 | |
| 
 | |
| bool
 | |
| aot_check_simd_compatibility(const char *arch_c_str, const char *cpu_c_str);
 | |
| 
 | |
| void
 | |
| aot_apply_llvm_new_pass_manager(AOTCompContext *comp_ctx, LLVMModuleRef module);
 | |
| 
 | |
| LLVM_C_EXTERN_C_END
 | |
| 
 | |
| ExitOnError ExitOnErr;
 | |
| 
 | |
| class ExpandMemoryOpPass : public PassInfoMixin<ExpandMemoryOpPass>
 | |
| {
 | |
|   public:
 | |
|     PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
 | |
| };
 | |
| 
 | |
| PreservedAnalyses
 | |
| ExpandMemoryOpPass::run(Function &F, FunctionAnalysisManager &AM)
 | |
| {
 | |
|     SmallVector<MemIntrinsic *, 16> MemCalls;
 | |
| 
 | |
|     /* Iterate over all instructions in the function, looking for memcpy,
 | |
|      * memmove, and memset.  When we find one, expand it into a loop. */
 | |
| 
 | |
|     for (auto &BB : F) {
 | |
|         for (auto &Inst : BB) {
 | |
|             if (auto *Memcpy = dyn_cast_or_null<MemCpyInst>(&Inst)) {
 | |
|                 MemCalls.push_back(Memcpy);
 | |
|             }
 | |
|             else if (auto *Memmove = dyn_cast_or_null<MemMoveInst>(&Inst)) {
 | |
|                 MemCalls.push_back(Memmove);
 | |
|             }
 | |
|             else if (auto *Memset = dyn_cast_or_null<MemSetInst>(&Inst)) {
 | |
|                 MemCalls.push_back(Memset);
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     for (MemIntrinsic *MemCall : MemCalls) {
 | |
|         if (MemCpyInst *Memcpy = dyn_cast<MemCpyInst>(MemCall)) {
 | |
|             Function *ParentFunc = Memcpy->getParent()->getParent();
 | |
|             const TargetTransformInfo &TTI =
 | |
|                 AM.getResult<TargetIRAnalysis>(*ParentFunc);
 | |
|             expandMemCpyAsLoop(Memcpy, TTI);
 | |
|             Memcpy->eraseFromParent();
 | |
|         }
 | |
|         else if (MemMoveInst *Memmove = dyn_cast<MemMoveInst>(MemCall)) {
 | |
| #if LLVM_VERSION_MAJOR >= 17
 | |
|             Function *ParentFunc = Memmove->getParent()->getParent();
 | |
|             const TargetTransformInfo &TTI =
 | |
|                 AM.getResult<TargetIRAnalysis>(*ParentFunc);
 | |
|             expandMemMoveAsLoop(Memmove, TTI);
 | |
| #else
 | |
|             expandMemMoveAsLoop(Memmove);
 | |
| #endif
 | |
|             Memmove->eraseFromParent();
 | |
|         }
 | |
|         else if (MemSetInst *Memset = dyn_cast<MemSetInst>(MemCall)) {
 | |
|             expandMemSetAsLoop(Memset);
 | |
|             Memset->eraseFromParent();
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     PreservedAnalyses PA;
 | |
|     PA.preserveSet<CFGAnalyses>();
 | |
| 
 | |
|     return PA;
 | |
| }
 | |
| 
 | |
| bool
 | |
| aot_check_simd_compatibility(const char *arch_c_str, const char *cpu_c_str)
 | |
| {
 | |
| #if WASM_ENABLE_SIMD != 0
 | |
|     if (!arch_c_str || !cpu_c_str) {
 | |
|         return false;
 | |
|     }
 | |
| 
 | |
|     llvm::SmallVector<std::string, 1> targetAttributes;
 | |
|     llvm::Triple targetTriple(arch_c_str, "", "");
 | |
|     auto targetMachine =
 | |
|         std::unique_ptr<llvm::TargetMachine>(llvm::EngineBuilder().selectTarget(
 | |
|             targetTriple, "", std::string(cpu_c_str), targetAttributes));
 | |
|     if (!targetMachine) {
 | |
|         return false;
 | |
|     }
 | |
| 
 | |
|     const llvm::Triple::ArchType targetArch =
 | |
|         targetMachine->getTargetTriple().getArch();
 | |
|     const llvm::MCSubtargetInfo *subTargetInfo =
 | |
|         targetMachine->getMCSubtargetInfo();
 | |
|     if (subTargetInfo == nullptr) {
 | |
|         return false;
 | |
|     }
 | |
| 
 | |
|     if (targetArch == llvm::Triple::x86_64) {
 | |
|         return subTargetInfo->checkFeatures("+sse4.1");
 | |
|     }
 | |
|     else if (targetArch == llvm::Triple::aarch64) {
 | |
|         return subTargetInfo->checkFeatures("+neon");
 | |
|     }
 | |
|     else if (targetArch == llvm::Triple::arc) {
 | |
|         return true;
 | |
|     }
 | |
|     else {
 | |
|         return false;
 | |
|     }
 | |
| #else
 | |
|     (void)arch_c_str;
 | |
|     (void)cpu_c_str;
 | |
|     return true;
 | |
| #endif /* WASM_ENABLE_SIMD */
 | |
| }
 | |
| 
 | |
| void
 | |
| aot_apply_llvm_new_pass_manager(AOTCompContext *comp_ctx, LLVMModuleRef module)
 | |
| {
 | |
|     TargetMachine *TM =
 | |
|         reinterpret_cast<TargetMachine *>(comp_ctx->target_machine);
 | |
|     PipelineTuningOptions PTO;
 | |
|     PTO.LoopVectorization = true;
 | |
|     PTO.SLPVectorization = true;
 | |
|     PTO.LoopUnrolling = true;
 | |
| 
 | |
| #if LLVM_VERSION_MAJOR >= 16
 | |
|     Optional<PGOOptions> PGO = std::nullopt;
 | |
| #else
 | |
|     Optional<PGOOptions> PGO = llvm::None;
 | |
| #endif
 | |
| 
 | |
|     if (comp_ctx->enable_llvm_pgo) {
 | |
|         /* Disable static counter allocation for value profiler,
 | |
|            it will be allocated by runtime */
 | |
|         const char *argv[] = { "", "-vp-static-alloc=false" };
 | |
|         cl::ParseCommandLineOptions(2, argv);
 | |
| #if LLVM_VERSION_MAJOR < 17
 | |
|         PGO = PGOOptions("", "", "", PGOOptions::IRInstr);
 | |
| #else
 | |
|         auto FS = vfs::getRealFileSystem();
 | |
|         PGO = PGOOptions("", "", "", "", FS, PGOOptions::IRInstr);
 | |
| #endif
 | |
|     }
 | |
|     else if (comp_ctx->use_prof_file) {
 | |
| #if LLVM_VERSION_MAJOR < 17
 | |
|         PGO = PGOOptions(comp_ctx->use_prof_file, "", "", PGOOptions::IRUse);
 | |
| #else
 | |
|         auto FS = vfs::getRealFileSystem();
 | |
|         PGO = PGOOptions(comp_ctx->use_prof_file, "", "", "", FS,
 | |
|                          PGOOptions::IRUse);
 | |
| #endif
 | |
|     }
 | |
| 
 | |
| #ifdef DEBUG_PASS
 | |
|     PassInstrumentationCallbacks PIC;
 | |
|     PassBuilder PB(TM, PTO, PGO, &PIC);
 | |
| #else
 | |
| #if LLVM_VERSION_MAJOR == 12
 | |
|     PassBuilder PB(false, TM, PTO, PGO);
 | |
| #else
 | |
|     PassBuilder PB(TM, PTO, PGO);
 | |
| #endif
 | |
| #endif
 | |
| 
 | |
|     /* Register all the basic analyses with the managers */
 | |
|     LoopAnalysisManager LAM;
 | |
|     FunctionAnalysisManager FAM;
 | |
|     CGSCCAnalysisManager CGAM;
 | |
|     ModuleAnalysisManager MAM;
 | |
| 
 | |
|     /* Register the target library analysis directly and give it a
 | |
|        customized preset TLI */
 | |
|     std::unique_ptr<TargetLibraryInfoImpl> TLII(
 | |
|         new TargetLibraryInfoImpl(Triple(TM->getTargetTriple())));
 | |
|     FAM.registerPass([&] { return TargetLibraryAnalysis(*TLII); });
 | |
| 
 | |
|     /* Register the AA manager first so that our version is the one used */
 | |
|     AAManager AA = PB.buildDefaultAAPipeline();
 | |
|     FAM.registerPass([&] { return std::move(AA); });
 | |
| 
 | |
| #ifdef DEBUG_PASS
 | |
|     StandardInstrumentations SI(true, false);
 | |
|     SI.registerCallbacks(PIC, &FAM);
 | |
| #endif
 | |
| 
 | |
|     PB.registerFunctionAnalyses(FAM);
 | |
|     PB.registerLoopAnalyses(LAM);
 | |
|     PB.registerModuleAnalyses(MAM);
 | |
|     PB.registerCGSCCAnalyses(CGAM);
 | |
|     PB.crossRegisterProxies(LAM, FAM, CGAM, MAM);
 | |
| 
 | |
| #if LLVM_VERSION_MAJOR <= 13
 | |
|     PassBuilder::OptimizationLevel OL;
 | |
| 
 | |
|     switch (comp_ctx->opt_level) {
 | |
|         case 0:
 | |
|             OL = PassBuilder::OptimizationLevel::O0;
 | |
|             break;
 | |
|         case 1:
 | |
|             OL = PassBuilder::OptimizationLevel::O1;
 | |
|             break;
 | |
|         case 2:
 | |
|             OL = PassBuilder::OptimizationLevel::O2;
 | |
|             break;
 | |
|         case 3:
 | |
|         default:
 | |
|             OL = PassBuilder::OptimizationLevel::O3;
 | |
|             break;
 | |
|     }
 | |
| #else
 | |
|     OptimizationLevel OL;
 | |
| 
 | |
|     switch (comp_ctx->opt_level) {
 | |
|         case 0:
 | |
|             OL = OptimizationLevel::O0;
 | |
|             break;
 | |
|         case 1:
 | |
|             OL = OptimizationLevel::O1;
 | |
|             break;
 | |
|         case 2:
 | |
|             OL = OptimizationLevel::O2;
 | |
|             break;
 | |
|         case 3:
 | |
|         default:
 | |
|             OL = OptimizationLevel::O3;
 | |
|             break;
 | |
|     }
 | |
| #endif /* end of LLVM_VERSION_MAJOR */
 | |
| 
 | |
|     bool disable_llvm_lto = comp_ctx->disable_llvm_lto;
 | |
| #if WASM_ENABLE_SPEC_TEST != 0
 | |
|     disable_llvm_lto = true;
 | |
| #endif
 | |
| 
 | |
|     Module *M = reinterpret_cast<Module *>(module);
 | |
|     if (disable_llvm_lto) {
 | |
|         for (Function &F : *M) {
 | |
|             F.addFnAttr("disable-tail-calls", "true");
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     ModulePassManager MPM;
 | |
| 
 | |
|     if (comp_ctx->is_jit_mode) {
 | |
| #if LLVM_VERSION_MAJOR >= 18
 | |
| #define INSTCOMBINE "instcombine<no-verify-fixpoint>"
 | |
| #else
 | |
| #define INSTCOMBINE "instcombine"
 | |
| #endif
 | |
|         const char *Passes =
 | |
|             "loop-vectorize,slp-vectorizer,"
 | |
|             "load-store-vectorizer,vector-combine,"
 | |
|             "mem2reg," INSTCOMBINE ",simplifycfg,jump-threading,indvars";
 | |
|         ExitOnErr(PB.parsePassPipeline(MPM, Passes));
 | |
|     }
 | |
|     else {
 | |
|         FunctionPassManager FPM;
 | |
| 
 | |
|         /* Apply Vectorize related passes for AOT mode */
 | |
|         FPM.addPass(LoopVectorizePass());
 | |
|         FPM.addPass(SLPVectorizerPass());
 | |
|         FPM.addPass(LoadStoreVectorizerPass());
 | |
|         FPM.addPass(VectorCombinePass());
 | |
| 
 | |
|         if (comp_ctx->enable_llvm_pgo || comp_ctx->use_prof_file) {
 | |
|             /* LICM pass: loop invariant code motion, attempting to remove
 | |
|                as much code from the body of a loop as possible. Experiments
 | |
|                show it is good to enable it when pgo is enabled. */
 | |
| #if LLVM_VERSION_MAJOR >= 15
 | |
|             LICMOptions licm_opt;
 | |
|             FPM.addPass(
 | |
|                 createFunctionToLoopPassAdaptor(LICMPass(licm_opt), true));
 | |
| #else
 | |
|             FPM.addPass(createFunctionToLoopPassAdaptor(LICMPass(), true));
 | |
| #endif
 | |
|         }
 | |
| 
 | |
|         /*
 | |
|         FPM.addPass(createFunctionToLoopPassAdaptor(LoopRotatePass()));
 | |
|         FPM.addPass(createFunctionToLoopPassAdaptor(SimpleLoopUnswitchPass()));
 | |
|         */
 | |
| 
 | |
|         MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
 | |
| 
 | |
|         if (comp_ctx->llvm_passes) {
 | |
|             ExitOnErr(PB.parsePassPipeline(MPM, comp_ctx->llvm_passes));
 | |
|         }
 | |
| 
 | |
|         if (
 | |
| #if LLVM_VERSION_MAJOR <= 13
 | |
|             PassBuilder::OptimizationLevel::O0 == OL
 | |
| #else
 | |
|             OptimizationLevel::O0 == OL
 | |
| #endif
 | |
|         ) {
 | |
|             MPM.addPass(PB.buildO0DefaultPipeline(OL));
 | |
|         }
 | |
|         else {
 | |
|             if (!disable_llvm_lto) {
 | |
|                 /* Apply LTO for AOT mode */
 | |
|                 if (comp_ctx->comp_data->func_count >= 10
 | |
|                     || comp_ctx->enable_llvm_pgo || comp_ctx->use_prof_file)
 | |
|                     /* Add the pre-link optimizations if the func count
 | |
|                        is large enough or PGO is enabled */
 | |
|                     MPM.addPass(PB.buildLTOPreLinkDefaultPipeline(OL));
 | |
|                 else
 | |
|                     MPM.addPass(PB.buildLTODefaultPipeline(OL, NULL));
 | |
|             }
 | |
|             else {
 | |
|                 MPM.addPass(PB.buildPerModuleDefaultPipeline(OL));
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         /* Run specific passes for AOT indirect mode in last since general
 | |
|             optimization may create some intrinsic function calls like
 | |
|             llvm.memset, so let's remove these function calls here. */
 | |
|         if (comp_ctx->is_indirect_mode) {
 | |
|             FunctionPassManager FPM1;
 | |
|             FPM1.addPass(ExpandMemoryOpPass());
 | |
|             MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM1)));
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     MPM.run(*M, MAM);
 | |
| }
 | |
| 
 | |
| char *
 | |
| aot_compress_aot_func_names(AOTCompContext *comp_ctx, uint32 *p_size)
 | |
| {
 | |
|     std::vector<std::string> NameStrs;
 | |
|     std::string Result;
 | |
|     char buf[32], *compressed_str;
 | |
|     uint32 compressed_str_len, i;
 | |
| 
 | |
|     for (i = 0; i < comp_ctx->func_ctx_count; i++) {
 | |
|         snprintf(buf, sizeof(buf), "%s%d", AOT_FUNC_PREFIX, i);
 | |
|         std::string str(buf);
 | |
|         NameStrs.push_back(str);
 | |
|     }
 | |
| 
 | |
| #if LLVM_VERSION_MAJOR < 18
 | |
| #define collectGlobalObjectNameStrings collectPGOFuncNameStrings
 | |
| #endif
 | |
|     if (collectGlobalObjectNameStrings(NameStrs, true, Result)) {
 | |
|         aot_set_last_error("collect pgo func name strings failed");
 | |
|         return NULL;
 | |
|     }
 | |
| 
 | |
|     compressed_str_len = (uint32)Result.size();
 | |
|     if (!(compressed_str = (char *)wasm_runtime_malloc(compressed_str_len))) {
 | |
|         aot_set_last_error("allocate memory failed");
 | |
|         return NULL;
 | |
|     }
 | |
| 
 | |
|     bh_memcpy_s(compressed_str, compressed_str_len, Result.c_str(),
 | |
|                 compressed_str_len);
 | |
|     *p_size = compressed_str_len;
 | |
|     return compressed_str;
 | |
| }
 |