10b57cec5SDimitry Andric //===- CodeGenPrepare.cpp - Prepare a function for code generation --------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This pass munges the code in the input function to better prepare it for 100b57cec5SDimitry Andric // SelectionDAG-based code generation. This works around limitations in it's 110b57cec5SDimitry Andric // basic-block-at-a-time approach. It should eventually be removed. 120b57cec5SDimitry Andric // 130b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 140b57cec5SDimitry Andric 150b57cec5SDimitry Andric #include "llvm/ADT/APInt.h" 160b57cec5SDimitry Andric #include "llvm/ADT/ArrayRef.h" 170b57cec5SDimitry Andric #include "llvm/ADT/DenseMap.h" 180b57cec5SDimitry Andric #include "llvm/ADT/MapVector.h" 190b57cec5SDimitry Andric #include "llvm/ADT/PointerIntPair.h" 200b57cec5SDimitry Andric #include "llvm/ADT/STLExtras.h" 210b57cec5SDimitry Andric #include "llvm/ADT/SmallPtrSet.h" 220b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h" 230b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h" 240b57cec5SDimitry Andric #include "llvm/Analysis/BlockFrequencyInfo.h" 250b57cec5SDimitry Andric #include "llvm/Analysis/BranchProbabilityInfo.h" 260b57cec5SDimitry Andric #include "llvm/Analysis/InstructionSimplify.h" 270b57cec5SDimitry Andric #include "llvm/Analysis/LoopInfo.h" 280b57cec5SDimitry Andric #include "llvm/Analysis/ProfileSummaryInfo.h" 290b57cec5SDimitry Andric #include "llvm/Analysis/TargetLibraryInfo.h" 300b57cec5SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h" 310b57cec5SDimitry Andric #include "llvm/Analysis/ValueTracking.h" 320b57cec5SDimitry Andric #include "llvm/Analysis/VectorUtils.h" 330b57cec5SDimitry Andric #include "llvm/CodeGen/Analysis.h" 3481ad6265SDimitry Andric #include "llvm/CodeGen/BasicBlockSectionsProfileReader.h" 350b57cec5SDimitry Andric #include "llvm/CodeGen/ISDOpcodes.h" 360b57cec5SDimitry Andric #include "llvm/CodeGen/SelectionDAGNodes.h" 370b57cec5SDimitry Andric #include "llvm/CodeGen/TargetLowering.h" 380b57cec5SDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h" 390b57cec5SDimitry Andric #include "llvm/CodeGen/TargetSubtargetInfo.h" 400b57cec5SDimitry Andric #include "llvm/CodeGen/ValueTypes.h" 410b57cec5SDimitry Andric #include "llvm/Config/llvm-config.h" 420b57cec5SDimitry Andric #include "llvm/IR/Argument.h" 430b57cec5SDimitry Andric #include "llvm/IR/Attributes.h" 440b57cec5SDimitry Andric #include "llvm/IR/BasicBlock.h" 450b57cec5SDimitry Andric #include "llvm/IR/Constant.h" 460b57cec5SDimitry Andric #include "llvm/IR/Constants.h" 470b57cec5SDimitry Andric #include "llvm/IR/DataLayout.h" 48fe6060f1SDimitry Andric #include "llvm/IR/DebugInfo.h" 490b57cec5SDimitry Andric #include "llvm/IR/DerivedTypes.h" 500b57cec5SDimitry Andric #include "llvm/IR/Dominators.h" 510b57cec5SDimitry Andric #include "llvm/IR/Function.h" 520b57cec5SDimitry Andric #include "llvm/IR/GetElementPtrTypeIterator.h" 530b57cec5SDimitry Andric #include "llvm/IR/GlobalValue.h" 540b57cec5SDimitry Andric #include "llvm/IR/GlobalVariable.h" 550b57cec5SDimitry Andric #include "llvm/IR/IRBuilder.h" 560b57cec5SDimitry Andric #include "llvm/IR/InlineAsm.h" 570b57cec5SDimitry Andric #include "llvm/IR/InstrTypes.h" 580b57cec5SDimitry Andric #include "llvm/IR/Instruction.h" 590b57cec5SDimitry Andric #include "llvm/IR/Instructions.h" 600b57cec5SDimitry Andric #include "llvm/IR/IntrinsicInst.h" 610b57cec5SDimitry Andric #include "llvm/IR/Intrinsics.h" 62480093f4SDimitry Andric #include "llvm/IR/IntrinsicsAArch64.h" 630b57cec5SDimitry Andric #include "llvm/IR/LLVMContext.h" 640b57cec5SDimitry Andric #include "llvm/IR/MDBuilder.h" 650b57cec5SDimitry Andric #include "llvm/IR/Module.h" 660b57cec5SDimitry Andric #include "llvm/IR/Operator.h" 670b57cec5SDimitry Andric #include "llvm/IR/PatternMatch.h" 680b57cec5SDimitry Andric #include "llvm/IR/Statepoint.h" 690b57cec5SDimitry Andric #include "llvm/IR/Type.h" 700b57cec5SDimitry Andric #include "llvm/IR/Use.h" 710b57cec5SDimitry Andric #include "llvm/IR/User.h" 720b57cec5SDimitry Andric #include "llvm/IR/Value.h" 730b57cec5SDimitry Andric #include "llvm/IR/ValueHandle.h" 740b57cec5SDimitry Andric #include "llvm/IR/ValueMap.h" 75480093f4SDimitry Andric #include "llvm/InitializePasses.h" 760b57cec5SDimitry Andric #include "llvm/Pass.h" 770b57cec5SDimitry Andric #include "llvm/Support/BlockFrequency.h" 780b57cec5SDimitry Andric #include "llvm/Support/BranchProbability.h" 790b57cec5SDimitry Andric #include "llvm/Support/Casting.h" 800b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h" 810b57cec5SDimitry Andric #include "llvm/Support/Compiler.h" 820b57cec5SDimitry Andric #include "llvm/Support/Debug.h" 830b57cec5SDimitry Andric #include "llvm/Support/ErrorHandling.h" 840b57cec5SDimitry Andric #include "llvm/Support/MachineValueType.h" 850b57cec5SDimitry Andric #include "llvm/Support/MathExtras.h" 860b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h" 870b57cec5SDimitry Andric #include "llvm/Target/TargetMachine.h" 880b57cec5SDimitry Andric #include "llvm/Target/TargetOptions.h" 890b57cec5SDimitry Andric #include "llvm/Transforms/Utils/BasicBlockUtils.h" 900b57cec5SDimitry Andric #include "llvm/Transforms/Utils/BypassSlowDivision.h" 91480093f4SDimitry Andric #include "llvm/Transforms/Utils/Local.h" 920b57cec5SDimitry Andric #include "llvm/Transforms/Utils/SimplifyLibCalls.h" 93480093f4SDimitry Andric #include "llvm/Transforms/Utils/SizeOpts.h" 940b57cec5SDimitry Andric #include <algorithm> 950b57cec5SDimitry Andric #include <cassert> 960b57cec5SDimitry Andric #include <cstdint> 970b57cec5SDimitry Andric #include <iterator> 980b57cec5SDimitry Andric #include <limits> 990b57cec5SDimitry Andric #include <memory> 1000b57cec5SDimitry Andric #include <utility> 1010b57cec5SDimitry Andric #include <vector> 1020b57cec5SDimitry Andric 1030b57cec5SDimitry Andric using namespace llvm; 1040b57cec5SDimitry Andric using namespace llvm::PatternMatch; 1050b57cec5SDimitry Andric 1060b57cec5SDimitry Andric #define DEBUG_TYPE "codegenprepare" 1070b57cec5SDimitry Andric 1080b57cec5SDimitry Andric STATISTIC(NumBlocksElim, "Number of blocks eliminated"); 1090b57cec5SDimitry Andric STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated"); 1100b57cec5SDimitry Andric STATISTIC(NumGEPsElim, "Number of GEPs converted to casts"); 1110b57cec5SDimitry Andric STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of " 1120b57cec5SDimitry Andric "sunken Cmps"); 1130b57cec5SDimitry Andric STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses " 1140b57cec5SDimitry Andric "of sunken Casts"); 1150b57cec5SDimitry Andric STATISTIC(NumMemoryInsts, "Number of memory instructions whose address " 1160b57cec5SDimitry Andric "computations were sunk"); 1170b57cec5SDimitry Andric STATISTIC(NumMemoryInstsPhiCreated, 1180b57cec5SDimitry Andric "Number of phis created when address " 1190b57cec5SDimitry Andric "computations were sunk to memory instructions"); 1200b57cec5SDimitry Andric STATISTIC(NumMemoryInstsSelectCreated, 1210b57cec5SDimitry Andric "Number of select created when address " 1220b57cec5SDimitry Andric "computations were sunk to memory instructions"); 1230b57cec5SDimitry Andric STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads"); 1240b57cec5SDimitry Andric STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized"); 1250b57cec5SDimitry Andric STATISTIC(NumAndsAdded, 1260b57cec5SDimitry Andric "Number of and mask instructions added to form ext loads"); 1270b57cec5SDimitry Andric STATISTIC(NumAndUses, "Number of uses of and mask instructions optimized"); 1280b57cec5SDimitry Andric STATISTIC(NumRetsDup, "Number of return instructions duplicated"); 1290b57cec5SDimitry Andric STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved"); 1300b57cec5SDimitry Andric STATISTIC(NumSelectsExpanded, "Number of selects turned into branches"); 1310b57cec5SDimitry Andric STATISTIC(NumStoreExtractExposed, "Number of store(extractelement) exposed"); 1320b57cec5SDimitry Andric 1330b57cec5SDimitry Andric static cl::opt<bool> DisableBranchOpts( 1340b57cec5SDimitry Andric "disable-cgp-branch-opts", cl::Hidden, cl::init(false), 1350b57cec5SDimitry Andric cl::desc("Disable branch optimizations in CodeGenPrepare")); 1360b57cec5SDimitry Andric 1370b57cec5SDimitry Andric static cl::opt<bool> 1380b57cec5SDimitry Andric DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false), 1390b57cec5SDimitry Andric cl::desc("Disable GC optimizations in CodeGenPrepare")); 1400b57cec5SDimitry Andric 1410b57cec5SDimitry Andric static cl::opt<bool> DisableSelectToBranch( 1420b57cec5SDimitry Andric "disable-cgp-select2branch", cl::Hidden, cl::init(false), 1430b57cec5SDimitry Andric cl::desc("Disable select to branch conversion.")); 1440b57cec5SDimitry Andric 1450b57cec5SDimitry Andric static cl::opt<bool> AddrSinkUsingGEPs( 1460b57cec5SDimitry Andric "addr-sink-using-gep", cl::Hidden, cl::init(true), 1470b57cec5SDimitry Andric cl::desc("Address sinking in CGP using GEPs.")); 1480b57cec5SDimitry Andric 1490b57cec5SDimitry Andric static cl::opt<bool> EnableAndCmpSinking( 1500b57cec5SDimitry Andric "enable-andcmp-sinking", cl::Hidden, cl::init(true), 1510b57cec5SDimitry Andric cl::desc("Enable sinkinig and/cmp into branches.")); 1520b57cec5SDimitry Andric 1530b57cec5SDimitry Andric static cl::opt<bool> DisableStoreExtract( 1540b57cec5SDimitry Andric "disable-cgp-store-extract", cl::Hidden, cl::init(false), 1550b57cec5SDimitry Andric cl::desc("Disable store(extract) optimizations in CodeGenPrepare")); 1560b57cec5SDimitry Andric 1570b57cec5SDimitry Andric static cl::opt<bool> StressStoreExtract( 1580b57cec5SDimitry Andric "stress-cgp-store-extract", cl::Hidden, cl::init(false), 1590b57cec5SDimitry Andric cl::desc("Stress test store(extract) optimizations in CodeGenPrepare")); 1600b57cec5SDimitry Andric 1610b57cec5SDimitry Andric static cl::opt<bool> DisableExtLdPromotion( 1620b57cec5SDimitry Andric "disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false), 1630b57cec5SDimitry Andric cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in " 1640b57cec5SDimitry Andric "CodeGenPrepare")); 1650b57cec5SDimitry Andric 1660b57cec5SDimitry Andric static cl::opt<bool> StressExtLdPromotion( 1670b57cec5SDimitry Andric "stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false), 1680b57cec5SDimitry Andric cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) " 1690b57cec5SDimitry Andric "optimization in CodeGenPrepare")); 1700b57cec5SDimitry Andric 1710b57cec5SDimitry Andric static cl::opt<bool> DisablePreheaderProtect( 1720b57cec5SDimitry Andric "disable-preheader-prot", cl::Hidden, cl::init(false), 1730b57cec5SDimitry Andric cl::desc("Disable protection against removing loop preheaders")); 1740b57cec5SDimitry Andric 1750b57cec5SDimitry Andric static cl::opt<bool> ProfileGuidedSectionPrefix( 17681ad6265SDimitry Andric "profile-guided-section-prefix", cl::Hidden, cl::init(true), 1770b57cec5SDimitry Andric cl::desc("Use profile info to add section prefix for hot/cold functions")); 1780b57cec5SDimitry Andric 1795ffd83dbSDimitry Andric static cl::opt<bool> ProfileUnknownInSpecialSection( 18081ad6265SDimitry Andric "profile-unknown-in-special-section", cl::Hidden, 1815ffd83dbSDimitry Andric cl::desc("In profiling mode like sampleFDO, if a function doesn't have " 1825ffd83dbSDimitry Andric "profile, we cannot tell the function is cold for sure because " 1835ffd83dbSDimitry Andric "it may be a function newly added without ever being sampled. " 1845ffd83dbSDimitry Andric "With the flag enabled, compiler can put such profile unknown " 1855ffd83dbSDimitry Andric "functions into a special section, so runtime system can choose " 1865ffd83dbSDimitry Andric "to handle it in a different way than .text section, to save " 1875ffd83dbSDimitry Andric "RAM for example. ")); 1885ffd83dbSDimitry Andric 18981ad6265SDimitry Andric static cl::opt<bool> BBSectionsGuidedSectionPrefix( 19081ad6265SDimitry Andric "bbsections-guided-section-prefix", cl::Hidden, cl::init(true), 19181ad6265SDimitry Andric cl::desc("Use the basic-block-sections profile to determine the text " 19281ad6265SDimitry Andric "section prefix for hot functions. Functions with " 19381ad6265SDimitry Andric "basic-block-sections profile will be placed in `.text.hot` " 19481ad6265SDimitry Andric "regardless of their FDO profile info. Other functions won't be " 19581ad6265SDimitry Andric "impacted, i.e., their prefixes will be decided by FDO/sampleFDO " 19681ad6265SDimitry Andric "profiles.")); 19781ad6265SDimitry Andric 1980b57cec5SDimitry Andric static cl::opt<unsigned> FreqRatioToSkipMerge( 1990b57cec5SDimitry Andric "cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2), 2000b57cec5SDimitry Andric cl::desc("Skip merging empty blocks if (frequency of empty block) / " 2010b57cec5SDimitry Andric "(frequency of destination block) is greater than this ratio")); 2020b57cec5SDimitry Andric 2030b57cec5SDimitry Andric static cl::opt<bool> ForceSplitStore( 2040b57cec5SDimitry Andric "force-split-store", cl::Hidden, cl::init(false), 2050b57cec5SDimitry Andric cl::desc("Force store splitting no matter what the target query says.")); 2060b57cec5SDimitry Andric 2070b57cec5SDimitry Andric static cl::opt<bool> 2080b57cec5SDimitry Andric EnableTypePromotionMerge("cgp-type-promotion-merge", cl::Hidden, 2090b57cec5SDimitry Andric cl::desc("Enable merging of redundant sexts when one is dominating" 2100b57cec5SDimitry Andric " the other."), cl::init(true)); 2110b57cec5SDimitry Andric 2120b57cec5SDimitry Andric static cl::opt<bool> DisableComplexAddrModes( 2130b57cec5SDimitry Andric "disable-complex-addr-modes", cl::Hidden, cl::init(false), 2140b57cec5SDimitry Andric cl::desc("Disables combining addressing modes with different parts " 2150b57cec5SDimitry Andric "in optimizeMemoryInst.")); 2160b57cec5SDimitry Andric 2170b57cec5SDimitry Andric static cl::opt<bool> 2180b57cec5SDimitry Andric AddrSinkNewPhis("addr-sink-new-phis", cl::Hidden, cl::init(false), 2190b57cec5SDimitry Andric cl::desc("Allow creation of Phis in Address sinking.")); 2200b57cec5SDimitry Andric 2210b57cec5SDimitry Andric static cl::opt<bool> 2220b57cec5SDimitry Andric AddrSinkNewSelects("addr-sink-new-select", cl::Hidden, cl::init(true), 2230b57cec5SDimitry Andric cl::desc("Allow creation of selects in Address sinking.")); 2240b57cec5SDimitry Andric 2250b57cec5SDimitry Andric static cl::opt<bool> AddrSinkCombineBaseReg( 2260b57cec5SDimitry Andric "addr-sink-combine-base-reg", cl::Hidden, cl::init(true), 2270b57cec5SDimitry Andric cl::desc("Allow combining of BaseReg field in Address sinking.")); 2280b57cec5SDimitry Andric 2290b57cec5SDimitry Andric static cl::opt<bool> AddrSinkCombineBaseGV( 2300b57cec5SDimitry Andric "addr-sink-combine-base-gv", cl::Hidden, cl::init(true), 2310b57cec5SDimitry Andric cl::desc("Allow combining of BaseGV field in Address sinking.")); 2320b57cec5SDimitry Andric 2330b57cec5SDimitry Andric static cl::opt<bool> AddrSinkCombineBaseOffs( 2340b57cec5SDimitry Andric "addr-sink-combine-base-offs", cl::Hidden, cl::init(true), 2350b57cec5SDimitry Andric cl::desc("Allow combining of BaseOffs field in Address sinking.")); 2360b57cec5SDimitry Andric 2370b57cec5SDimitry Andric static cl::opt<bool> AddrSinkCombineScaledReg( 2380b57cec5SDimitry Andric "addr-sink-combine-scaled-reg", cl::Hidden, cl::init(true), 2390b57cec5SDimitry Andric cl::desc("Allow combining of ScaledReg field in Address sinking.")); 2400b57cec5SDimitry Andric 2410b57cec5SDimitry Andric static cl::opt<bool> 2420b57cec5SDimitry Andric EnableGEPOffsetSplit("cgp-split-large-offset-gep", cl::Hidden, 2430b57cec5SDimitry Andric cl::init(true), 2440b57cec5SDimitry Andric cl::desc("Enable splitting large offset of GEP.")); 2450b57cec5SDimitry Andric 246480093f4SDimitry Andric static cl::opt<bool> EnableICMP_EQToICMP_ST( 247480093f4SDimitry Andric "cgp-icmp-eq2icmp-st", cl::Hidden, cl::init(false), 248480093f4SDimitry Andric cl::desc("Enable ICMP_EQ to ICMP_S(L|G)T conversion.")); 249480093f4SDimitry Andric 2505ffd83dbSDimitry Andric static cl::opt<bool> 2515ffd83dbSDimitry Andric VerifyBFIUpdates("cgp-verify-bfi-updates", cl::Hidden, cl::init(false), 2525ffd83dbSDimitry Andric cl::desc("Enable BFI update verification for " 2535ffd83dbSDimitry Andric "CodeGenPrepare.")); 2545ffd83dbSDimitry Andric 2555ffd83dbSDimitry Andric static cl::opt<bool> OptimizePhiTypes( 2565ffd83dbSDimitry Andric "cgp-optimize-phi-types", cl::Hidden, cl::init(false), 2575ffd83dbSDimitry Andric cl::desc("Enable converting phi types in CodeGenPrepare")); 2585ffd83dbSDimitry Andric 2590b57cec5SDimitry Andric namespace { 2600b57cec5SDimitry Andric 2610b57cec5SDimitry Andric enum ExtType { 2620b57cec5SDimitry Andric ZeroExtension, // Zero extension has been seen. 2630b57cec5SDimitry Andric SignExtension, // Sign extension has been seen. 2640b57cec5SDimitry Andric BothExtension // This extension type is used if we saw sext after 2650b57cec5SDimitry Andric // ZeroExtension had been set, or if we saw zext after 2660b57cec5SDimitry Andric // SignExtension had been set. It makes the type 2670b57cec5SDimitry Andric // information of a promoted instruction invalid. 2680b57cec5SDimitry Andric }; 2690b57cec5SDimitry Andric 2700b57cec5SDimitry Andric using SetOfInstrs = SmallPtrSet<Instruction *, 16>; 2710b57cec5SDimitry Andric using TypeIsSExt = PointerIntPair<Type *, 2, ExtType>; 2720b57cec5SDimitry Andric using InstrToOrigTy = DenseMap<Instruction *, TypeIsSExt>; 2730b57cec5SDimitry Andric using SExts = SmallVector<Instruction *, 16>; 2740b57cec5SDimitry Andric using ValueToSExts = DenseMap<Value *, SExts>; 2750b57cec5SDimitry Andric 2760b57cec5SDimitry Andric class TypePromotionTransaction; 2770b57cec5SDimitry Andric 2780b57cec5SDimitry Andric class CodeGenPrepare : public FunctionPass { 2790b57cec5SDimitry Andric const TargetMachine *TM = nullptr; 2800b57cec5SDimitry Andric const TargetSubtargetInfo *SubtargetInfo; 2810b57cec5SDimitry Andric const TargetLowering *TLI = nullptr; 2820b57cec5SDimitry Andric const TargetRegisterInfo *TRI; 2830b57cec5SDimitry Andric const TargetTransformInfo *TTI = nullptr; 28481ad6265SDimitry Andric const BasicBlockSectionsProfileReader *BBSectionsProfileReader = nullptr; 2850b57cec5SDimitry Andric const TargetLibraryInfo *TLInfo; 2860b57cec5SDimitry Andric const LoopInfo *LI; 2870b57cec5SDimitry Andric std::unique_ptr<BlockFrequencyInfo> BFI; 2880b57cec5SDimitry Andric std::unique_ptr<BranchProbabilityInfo> BPI; 289480093f4SDimitry Andric ProfileSummaryInfo *PSI; 2900b57cec5SDimitry Andric 2910b57cec5SDimitry Andric /// As we scan instructions optimizing them, this is the next instruction 2920b57cec5SDimitry Andric /// to optimize. Transforms that can invalidate this should update it. 2930b57cec5SDimitry Andric BasicBlock::iterator CurInstIterator; 2940b57cec5SDimitry Andric 2950b57cec5SDimitry Andric /// Keeps track of non-local addresses that have been sunk into a block. 2960b57cec5SDimitry Andric /// This allows us to avoid inserting duplicate code for blocks with 2970b57cec5SDimitry Andric /// multiple load/stores of the same address. The usage of WeakTrackingVH 2980b57cec5SDimitry Andric /// enables SunkAddrs to be treated as a cache whose entries can be 2990b57cec5SDimitry Andric /// invalidated if a sunken address computation has been erased. 3000b57cec5SDimitry Andric ValueMap<Value*, WeakTrackingVH> SunkAddrs; 3010b57cec5SDimitry Andric 3020b57cec5SDimitry Andric /// Keeps track of all instructions inserted for the current function. 3030b57cec5SDimitry Andric SetOfInstrs InsertedInsts; 3040b57cec5SDimitry Andric 3050b57cec5SDimitry Andric /// Keeps track of the type of the related instruction before their 3060b57cec5SDimitry Andric /// promotion for the current function. 3070b57cec5SDimitry Andric InstrToOrigTy PromotedInsts; 3080b57cec5SDimitry Andric 3090b57cec5SDimitry Andric /// Keep track of instructions removed during promotion. 3100b57cec5SDimitry Andric SetOfInstrs RemovedInsts; 3110b57cec5SDimitry Andric 3120b57cec5SDimitry Andric /// Keep track of sext chains based on their initial value. 3130b57cec5SDimitry Andric DenseMap<Value *, Instruction *> SeenChainsForSExt; 3140b57cec5SDimitry Andric 3150b57cec5SDimitry Andric /// Keep track of GEPs accessing the same data structures such as structs or 3160b57cec5SDimitry Andric /// arrays that are candidates to be split later because of their large 3170b57cec5SDimitry Andric /// size. 3180b57cec5SDimitry Andric MapVector< 3190b57cec5SDimitry Andric AssertingVH<Value>, 3200b57cec5SDimitry Andric SmallVector<std::pair<AssertingVH<GetElementPtrInst>, int64_t>, 32>> 3210b57cec5SDimitry Andric LargeOffsetGEPMap; 3220b57cec5SDimitry Andric 3230b57cec5SDimitry Andric /// Keep track of new GEP base after splitting the GEPs having large offset. 3240b57cec5SDimitry Andric SmallSet<AssertingVH<Value>, 2> NewGEPBases; 3250b57cec5SDimitry Andric 3260b57cec5SDimitry Andric /// Map serial numbers to Large offset GEPs. 3270b57cec5SDimitry Andric DenseMap<AssertingVH<GetElementPtrInst>, int> LargeOffsetGEPID; 3280b57cec5SDimitry Andric 3290b57cec5SDimitry Andric /// Keep track of SExt promoted. 3300b57cec5SDimitry Andric ValueToSExts ValToSExtendedUses; 3310b57cec5SDimitry Andric 332480093f4SDimitry Andric /// True if the function has the OptSize attribute. 3330b57cec5SDimitry Andric bool OptSize; 3340b57cec5SDimitry Andric 3350b57cec5SDimitry Andric /// DataLayout for the Function being processed. 3360b57cec5SDimitry Andric const DataLayout *DL = nullptr; 3370b57cec5SDimitry Andric 3380b57cec5SDimitry Andric /// Building the dominator tree can be expensive, so we only build it 3390b57cec5SDimitry Andric /// lazily and update it when required. 3400b57cec5SDimitry Andric std::unique_ptr<DominatorTree> DT; 3410b57cec5SDimitry Andric 3420b57cec5SDimitry Andric public: 3430b57cec5SDimitry Andric static char ID; // Pass identification, replacement for typeid 3440b57cec5SDimitry Andric 3450b57cec5SDimitry Andric CodeGenPrepare() : FunctionPass(ID) { 3460b57cec5SDimitry Andric initializeCodeGenPreparePass(*PassRegistry::getPassRegistry()); 3470b57cec5SDimitry Andric } 3480b57cec5SDimitry Andric 3490b57cec5SDimitry Andric bool runOnFunction(Function &F) override; 3500b57cec5SDimitry Andric 3510b57cec5SDimitry Andric StringRef getPassName() const override { return "CodeGen Prepare"; } 3520b57cec5SDimitry Andric 3530b57cec5SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 3540b57cec5SDimitry Andric // FIXME: When we can selectively preserve passes, preserve the domtree. 3550b57cec5SDimitry Andric AU.addRequired<ProfileSummaryInfoWrapperPass>(); 3560b57cec5SDimitry Andric AU.addRequired<TargetLibraryInfoWrapperPass>(); 3575ffd83dbSDimitry Andric AU.addRequired<TargetPassConfig>(); 3580b57cec5SDimitry Andric AU.addRequired<TargetTransformInfoWrapperPass>(); 3590b57cec5SDimitry Andric AU.addRequired<LoopInfoWrapperPass>(); 36081ad6265SDimitry Andric AU.addUsedIfAvailable<BasicBlockSectionsProfileReader>(); 3610b57cec5SDimitry Andric } 3620b57cec5SDimitry Andric 3630b57cec5SDimitry Andric private: 3640b57cec5SDimitry Andric template <typename F> 3650b57cec5SDimitry Andric void resetIteratorIfInvalidatedWhileCalling(BasicBlock *BB, F f) { 3660b57cec5SDimitry Andric // Substituting can cause recursive simplifications, which can invalidate 3670b57cec5SDimitry Andric // our iterator. Use a WeakTrackingVH to hold onto it in case this 3680b57cec5SDimitry Andric // happens. 3690b57cec5SDimitry Andric Value *CurValue = &*CurInstIterator; 3700b57cec5SDimitry Andric WeakTrackingVH IterHandle(CurValue); 3710b57cec5SDimitry Andric 3720b57cec5SDimitry Andric f(); 3730b57cec5SDimitry Andric 3740b57cec5SDimitry Andric // If the iterator instruction was recursively deleted, start over at the 3750b57cec5SDimitry Andric // start of the block. 3760b57cec5SDimitry Andric if (IterHandle != CurValue) { 3770b57cec5SDimitry Andric CurInstIterator = BB->begin(); 3780b57cec5SDimitry Andric SunkAddrs.clear(); 3790b57cec5SDimitry Andric } 3800b57cec5SDimitry Andric } 3810b57cec5SDimitry Andric 3820b57cec5SDimitry Andric // Get the DominatorTree, building if necessary. 3830b57cec5SDimitry Andric DominatorTree &getDT(Function &F) { 3840b57cec5SDimitry Andric if (!DT) 3858bcb0991SDimitry Andric DT = std::make_unique<DominatorTree>(F); 3860b57cec5SDimitry Andric return *DT; 3870b57cec5SDimitry Andric } 3880b57cec5SDimitry Andric 389e8d8bef9SDimitry Andric void removeAllAssertingVHReferences(Value *V); 390fe6060f1SDimitry Andric bool eliminateAssumptions(Function &F); 3910b57cec5SDimitry Andric bool eliminateFallThrough(Function &F); 3920b57cec5SDimitry Andric bool eliminateMostlyEmptyBlocks(Function &F); 3930b57cec5SDimitry Andric BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB); 3940b57cec5SDimitry Andric bool canMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const; 3950b57cec5SDimitry Andric void eliminateMostlyEmptyBlock(BasicBlock *BB); 3960b57cec5SDimitry Andric bool isMergingEmptyBlockProfitable(BasicBlock *BB, BasicBlock *DestBB, 3970b57cec5SDimitry Andric bool isPreheader); 398e8d8bef9SDimitry Andric bool makeBitReverse(Instruction &I); 3990b57cec5SDimitry Andric bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT); 4000b57cec5SDimitry Andric bool optimizeInst(Instruction *I, bool &ModifiedDT); 4010b57cec5SDimitry Andric bool optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, 4020b57cec5SDimitry Andric Type *AccessTy, unsigned AddrSpace); 4035ffd83dbSDimitry Andric bool optimizeGatherScatterInst(Instruction *MemoryInst, Value *Ptr); 4040b57cec5SDimitry Andric bool optimizeInlineAsmInst(CallInst *CS); 4050b57cec5SDimitry Andric bool optimizeCallInst(CallInst *CI, bool &ModifiedDT); 4060b57cec5SDimitry Andric bool optimizeExt(Instruction *&I); 4070b57cec5SDimitry Andric bool optimizeExtUses(Instruction *I); 4080b57cec5SDimitry Andric bool optimizeLoadExt(LoadInst *Load); 4090b57cec5SDimitry Andric bool optimizeShiftInst(BinaryOperator *BO); 4105ffd83dbSDimitry Andric bool optimizeFunnelShift(IntrinsicInst *Fsh); 4110b57cec5SDimitry Andric bool optimizeSelectInst(SelectInst *SI); 4120b57cec5SDimitry Andric bool optimizeShuffleVectorInst(ShuffleVectorInst *SVI); 41381ad6265SDimitry Andric bool optimizeSwitchType(SwitchInst *SI); 41481ad6265SDimitry Andric bool optimizeSwitchPhiConstants(SwitchInst *SI); 4150b57cec5SDimitry Andric bool optimizeSwitchInst(SwitchInst *SI); 4160b57cec5SDimitry Andric bool optimizeExtractElementInst(Instruction *Inst); 4170b57cec5SDimitry Andric bool dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT); 418480093f4SDimitry Andric bool fixupDbgValue(Instruction *I); 4190b57cec5SDimitry Andric bool placeDbgValues(Function &F); 420fe6060f1SDimitry Andric bool placePseudoProbes(Function &F); 4210b57cec5SDimitry Andric bool canFormExtLd(const SmallVectorImpl<Instruction *> &MovedExts, 4220b57cec5SDimitry Andric LoadInst *&LI, Instruction *&Inst, bool HasPromoted); 4230b57cec5SDimitry Andric bool tryToPromoteExts(TypePromotionTransaction &TPT, 4240b57cec5SDimitry Andric const SmallVectorImpl<Instruction *> &Exts, 4250b57cec5SDimitry Andric SmallVectorImpl<Instruction *> &ProfitablyMovedExts, 4260b57cec5SDimitry Andric unsigned CreatedInstsCost = 0); 4270b57cec5SDimitry Andric bool mergeSExts(Function &F); 4280b57cec5SDimitry Andric bool splitLargeGEPOffsets(); 4295ffd83dbSDimitry Andric bool optimizePhiType(PHINode *Inst, SmallPtrSetImpl<PHINode *> &Visited, 4305ffd83dbSDimitry Andric SmallPtrSetImpl<Instruction *> &DeletedInstrs); 4315ffd83dbSDimitry Andric bool optimizePhiTypes(Function &F); 4320b57cec5SDimitry Andric bool performAddressTypePromotion( 4330b57cec5SDimitry Andric Instruction *&Inst, 4340b57cec5SDimitry Andric bool AllowPromotionWithoutCommonHeader, 4350b57cec5SDimitry Andric bool HasPromoted, TypePromotionTransaction &TPT, 4360b57cec5SDimitry Andric SmallVectorImpl<Instruction *> &SpeculativelyMovedExts); 4370b57cec5SDimitry Andric bool splitBranchCondition(Function &F, bool &ModifiedDT); 4385ffd83dbSDimitry Andric bool simplifyOffsetableRelocate(GCStatepointInst &I); 4390b57cec5SDimitry Andric 4400b57cec5SDimitry Andric bool tryToSinkFreeOperands(Instruction *I); 4415ffd83dbSDimitry Andric bool replaceMathCmpWithIntrinsic(BinaryOperator *BO, Value *Arg0, 4425ffd83dbSDimitry Andric Value *Arg1, CmpInst *Cmp, 4430b57cec5SDimitry Andric Intrinsic::ID IID); 4440b57cec5SDimitry Andric bool optimizeCmp(CmpInst *Cmp, bool &ModifiedDT); 4450b57cec5SDimitry Andric bool combineToUSubWithOverflow(CmpInst *Cmp, bool &ModifiedDT); 4460b57cec5SDimitry Andric bool combineToUAddWithOverflow(CmpInst *Cmp, bool &ModifiedDT); 4475ffd83dbSDimitry Andric void verifyBFIUpdates(Function &F); 4480b57cec5SDimitry Andric }; 4490b57cec5SDimitry Andric 4500b57cec5SDimitry Andric } // end anonymous namespace 4510b57cec5SDimitry Andric 4520b57cec5SDimitry Andric char CodeGenPrepare::ID = 0; 4530b57cec5SDimitry Andric 4540b57cec5SDimitry Andric INITIALIZE_PASS_BEGIN(CodeGenPrepare, DEBUG_TYPE, 4550b57cec5SDimitry Andric "Optimize for code generation", false, false) 45681ad6265SDimitry Andric INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReader) 457e8d8bef9SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 4580b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) 459e8d8bef9SDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 460e8d8bef9SDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) 461e8d8bef9SDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) 4620b57cec5SDimitry Andric INITIALIZE_PASS_END(CodeGenPrepare, DEBUG_TYPE, 4630b57cec5SDimitry Andric "Optimize for code generation", false, false) 4640b57cec5SDimitry Andric 4650b57cec5SDimitry Andric FunctionPass *llvm::createCodeGenPreparePass() { return new CodeGenPrepare(); } 4660b57cec5SDimitry Andric 4670b57cec5SDimitry Andric bool CodeGenPrepare::runOnFunction(Function &F) { 4680b57cec5SDimitry Andric if (skipFunction(F)) 4690b57cec5SDimitry Andric return false; 4700b57cec5SDimitry Andric 4710b57cec5SDimitry Andric DL = &F.getParent()->getDataLayout(); 4720b57cec5SDimitry Andric 4730b57cec5SDimitry Andric bool EverMadeChange = false; 4740b57cec5SDimitry Andric // Clear per function information. 4750b57cec5SDimitry Andric InsertedInsts.clear(); 4760b57cec5SDimitry Andric PromotedInsts.clear(); 4770b57cec5SDimitry Andric 4785ffd83dbSDimitry Andric TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>(); 4790b57cec5SDimitry Andric SubtargetInfo = TM->getSubtargetImpl(F); 4800b57cec5SDimitry Andric TLI = SubtargetInfo->getTargetLowering(); 4810b57cec5SDimitry Andric TRI = SubtargetInfo->getRegisterInfo(); 4828bcb0991SDimitry Andric TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); 4830b57cec5SDimitry Andric TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); 4840b57cec5SDimitry Andric LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 4850b57cec5SDimitry Andric BPI.reset(new BranchProbabilityInfo(F, *LI)); 4860b57cec5SDimitry Andric BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI)); 487480093f4SDimitry Andric PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); 48881ad6265SDimitry Andric BBSectionsProfileReader = 48981ad6265SDimitry Andric getAnalysisIfAvailable<BasicBlockSectionsProfileReader>(); 4900b57cec5SDimitry Andric OptSize = F.hasOptSize(); 49181ad6265SDimitry Andric // Use the basic-block-sections profile to promote hot functions to .text.hot if requested. 49281ad6265SDimitry Andric if (BBSectionsGuidedSectionPrefix && BBSectionsProfileReader && 49381ad6265SDimitry Andric BBSectionsProfileReader->isFunctionHot(F.getName())) { 49481ad6265SDimitry Andric F.setSectionPrefix("hot"); 49581ad6265SDimitry Andric } else if (ProfileGuidedSectionPrefix) { 496e8d8bef9SDimitry Andric // The hot attribute overwrites profile count based hotness while profile 497e8d8bef9SDimitry Andric // counts based hotness overwrite the cold attribute. 498e8d8bef9SDimitry Andric // This is a conservative behabvior. 499e8d8bef9SDimitry Andric if (F.hasFnAttribute(Attribute::Hot) || 500e8d8bef9SDimitry Andric PSI->isFunctionHotInCallGraph(&F, *BFI)) 501e8d8bef9SDimitry Andric F.setSectionPrefix("hot"); 502e8d8bef9SDimitry Andric // If PSI shows this function is not hot, we will placed the function 503e8d8bef9SDimitry Andric // into unlikely section if (1) PSI shows this is a cold function, or 504e8d8bef9SDimitry Andric // (2) the function has a attribute of cold. 505e8d8bef9SDimitry Andric else if (PSI->isFunctionColdInCallGraph(&F, *BFI) || 506e8d8bef9SDimitry Andric F.hasFnAttribute(Attribute::Cold)) 507e8d8bef9SDimitry Andric F.setSectionPrefix("unlikely"); 5085ffd83dbSDimitry Andric else if (ProfileUnknownInSpecialSection && PSI->hasPartialSampleProfile() && 5095ffd83dbSDimitry Andric PSI->isFunctionHotnessUnknown(F)) 510e8d8bef9SDimitry Andric F.setSectionPrefix("unknown"); 5110b57cec5SDimitry Andric } 5120b57cec5SDimitry Andric 5130b57cec5SDimitry Andric /// This optimization identifies DIV instructions that can be 5140b57cec5SDimitry Andric /// profitably bypassed and carried out with a shorter, faster divide. 5155ffd83dbSDimitry Andric if (!OptSize && !PSI->hasHugeWorkingSetSize() && TLI->isSlowDivBypassed()) { 5160b57cec5SDimitry Andric const DenseMap<unsigned int, unsigned int> &BypassWidths = 5170b57cec5SDimitry Andric TLI->getBypassSlowDivWidths(); 5180b57cec5SDimitry Andric BasicBlock* BB = &*F.begin(); 5190b57cec5SDimitry Andric while (BB != nullptr) { 5200b57cec5SDimitry Andric // bypassSlowDivision may create new BBs, but we don't want to reapply the 5210b57cec5SDimitry Andric // optimization to those blocks. 5220b57cec5SDimitry Andric BasicBlock* Next = BB->getNextNode(); 523480093f4SDimitry Andric // F.hasOptSize is already checked in the outer if statement. 524480093f4SDimitry Andric if (!llvm::shouldOptimizeForSize(BB, PSI, BFI.get())) 5250b57cec5SDimitry Andric EverMadeChange |= bypassSlowDivision(BB, BypassWidths); 5260b57cec5SDimitry Andric BB = Next; 5270b57cec5SDimitry Andric } 5280b57cec5SDimitry Andric } 5290b57cec5SDimitry Andric 530fe6060f1SDimitry Andric // Get rid of @llvm.assume builtins before attempting to eliminate empty 531fe6060f1SDimitry Andric // blocks, since there might be blocks that only contain @llvm.assume calls 532fe6060f1SDimitry Andric // (plus arguments that we can get rid of). 533fe6060f1SDimitry Andric EverMadeChange |= eliminateAssumptions(F); 534fe6060f1SDimitry Andric 5350b57cec5SDimitry Andric // Eliminate blocks that contain only PHI nodes and an 5360b57cec5SDimitry Andric // unconditional branch. 5370b57cec5SDimitry Andric EverMadeChange |= eliminateMostlyEmptyBlocks(F); 5380b57cec5SDimitry Andric 5390b57cec5SDimitry Andric bool ModifiedDT = false; 5400b57cec5SDimitry Andric if (!DisableBranchOpts) 5410b57cec5SDimitry Andric EverMadeChange |= splitBranchCondition(F, ModifiedDT); 5420b57cec5SDimitry Andric 5430b57cec5SDimitry Andric // Split some critical edges where one of the sources is an indirect branch, 5440b57cec5SDimitry Andric // to help generate sane code for PHIs involving such edges. 54581ad6265SDimitry Andric EverMadeChange |= 54681ad6265SDimitry Andric SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/true); 5470b57cec5SDimitry Andric 5480b57cec5SDimitry Andric bool MadeChange = true; 5490b57cec5SDimitry Andric while (MadeChange) { 5500b57cec5SDimitry Andric MadeChange = false; 5510b57cec5SDimitry Andric DT.reset(); 552349cc55cSDimitry Andric for (BasicBlock &BB : llvm::make_early_inc_range(F)) { 5530b57cec5SDimitry Andric bool ModifiedDTOnIteration = false; 554349cc55cSDimitry Andric MadeChange |= optimizeBlock(BB, ModifiedDTOnIteration); 5550b57cec5SDimitry Andric 5560b57cec5SDimitry Andric // Restart BB iteration if the dominator tree of the Function was changed 5570b57cec5SDimitry Andric if (ModifiedDTOnIteration) 5580b57cec5SDimitry Andric break; 5590b57cec5SDimitry Andric } 5600b57cec5SDimitry Andric if (EnableTypePromotionMerge && !ValToSExtendedUses.empty()) 5610b57cec5SDimitry Andric MadeChange |= mergeSExts(F); 5620b57cec5SDimitry Andric if (!LargeOffsetGEPMap.empty()) 5630b57cec5SDimitry Andric MadeChange |= splitLargeGEPOffsets(); 5645ffd83dbSDimitry Andric MadeChange |= optimizePhiTypes(F); 5655ffd83dbSDimitry Andric 5665ffd83dbSDimitry Andric if (MadeChange) 5675ffd83dbSDimitry Andric eliminateFallThrough(F); 5680b57cec5SDimitry Andric 5690b57cec5SDimitry Andric // Really free removed instructions during promotion. 5700b57cec5SDimitry Andric for (Instruction *I : RemovedInsts) 5710b57cec5SDimitry Andric I->deleteValue(); 5720b57cec5SDimitry Andric 5730b57cec5SDimitry Andric EverMadeChange |= MadeChange; 5740b57cec5SDimitry Andric SeenChainsForSExt.clear(); 5750b57cec5SDimitry Andric ValToSExtendedUses.clear(); 5760b57cec5SDimitry Andric RemovedInsts.clear(); 5770b57cec5SDimitry Andric LargeOffsetGEPMap.clear(); 5780b57cec5SDimitry Andric LargeOffsetGEPID.clear(); 5790b57cec5SDimitry Andric } 5800b57cec5SDimitry Andric 581e8d8bef9SDimitry Andric NewGEPBases.clear(); 5820b57cec5SDimitry Andric SunkAddrs.clear(); 5830b57cec5SDimitry Andric 5840b57cec5SDimitry Andric if (!DisableBranchOpts) { 5850b57cec5SDimitry Andric MadeChange = false; 5860b57cec5SDimitry Andric // Use a set vector to get deterministic iteration order. The order the 5870b57cec5SDimitry Andric // blocks are removed may affect whether or not PHI nodes in successors 5880b57cec5SDimitry Andric // are removed. 5890b57cec5SDimitry Andric SmallSetVector<BasicBlock*, 8> WorkList; 5900b57cec5SDimitry Andric for (BasicBlock &BB : F) { 591e8d8bef9SDimitry Andric SmallVector<BasicBlock *, 2> Successors(successors(&BB)); 5920b57cec5SDimitry Andric MadeChange |= ConstantFoldTerminator(&BB, true); 5930b57cec5SDimitry Andric if (!MadeChange) continue; 5940b57cec5SDimitry Andric 595fe6060f1SDimitry Andric for (BasicBlock *Succ : Successors) 596fe6060f1SDimitry Andric if (pred_empty(Succ)) 597fe6060f1SDimitry Andric WorkList.insert(Succ); 5980b57cec5SDimitry Andric } 5990b57cec5SDimitry Andric 6000b57cec5SDimitry Andric // Delete the dead blocks and any of their dead successors. 6010b57cec5SDimitry Andric MadeChange |= !WorkList.empty(); 6020b57cec5SDimitry Andric while (!WorkList.empty()) { 6030b57cec5SDimitry Andric BasicBlock *BB = WorkList.pop_back_val(); 604e8d8bef9SDimitry Andric SmallVector<BasicBlock*, 2> Successors(successors(BB)); 6050b57cec5SDimitry Andric 6060b57cec5SDimitry Andric DeleteDeadBlock(BB); 6070b57cec5SDimitry Andric 608fe6060f1SDimitry Andric for (BasicBlock *Succ : Successors) 609fe6060f1SDimitry Andric if (pred_empty(Succ)) 610fe6060f1SDimitry Andric WorkList.insert(Succ); 6110b57cec5SDimitry Andric } 6120b57cec5SDimitry Andric 6130b57cec5SDimitry Andric // Merge pairs of basic blocks with unconditional branches, connected by 6140b57cec5SDimitry Andric // a single edge. 6150b57cec5SDimitry Andric if (EverMadeChange || MadeChange) 6160b57cec5SDimitry Andric MadeChange |= eliminateFallThrough(F); 6170b57cec5SDimitry Andric 6180b57cec5SDimitry Andric EverMadeChange |= MadeChange; 6190b57cec5SDimitry Andric } 6200b57cec5SDimitry Andric 6210b57cec5SDimitry Andric if (!DisableGCOpts) { 6225ffd83dbSDimitry Andric SmallVector<GCStatepointInst *, 2> Statepoints; 6230b57cec5SDimitry Andric for (BasicBlock &BB : F) 6240b57cec5SDimitry Andric for (Instruction &I : BB) 6255ffd83dbSDimitry Andric if (auto *SP = dyn_cast<GCStatepointInst>(&I)) 6265ffd83dbSDimitry Andric Statepoints.push_back(SP); 6270b57cec5SDimitry Andric for (auto &I : Statepoints) 6280b57cec5SDimitry Andric EverMadeChange |= simplifyOffsetableRelocate(*I); 6290b57cec5SDimitry Andric } 6300b57cec5SDimitry Andric 6310b57cec5SDimitry Andric // Do this last to clean up use-before-def scenarios introduced by other 6320b57cec5SDimitry Andric // preparatory transforms. 6330b57cec5SDimitry Andric EverMadeChange |= placeDbgValues(F); 634fe6060f1SDimitry Andric EverMadeChange |= placePseudoProbes(F); 6350b57cec5SDimitry Andric 6365ffd83dbSDimitry Andric #ifndef NDEBUG 6375ffd83dbSDimitry Andric if (VerifyBFIUpdates) 6385ffd83dbSDimitry Andric verifyBFIUpdates(F); 6395ffd83dbSDimitry Andric #endif 6405ffd83dbSDimitry Andric 6410b57cec5SDimitry Andric return EverMadeChange; 6420b57cec5SDimitry Andric } 6430b57cec5SDimitry Andric 644fe6060f1SDimitry Andric bool CodeGenPrepare::eliminateAssumptions(Function &F) { 645fe6060f1SDimitry Andric bool MadeChange = false; 646fe6060f1SDimitry Andric for (BasicBlock &BB : F) { 647fe6060f1SDimitry Andric CurInstIterator = BB.begin(); 648fe6060f1SDimitry Andric while (CurInstIterator != BB.end()) { 649fe6060f1SDimitry Andric Instruction *I = &*(CurInstIterator++); 650fe6060f1SDimitry Andric if (auto *Assume = dyn_cast<AssumeInst>(I)) { 651fe6060f1SDimitry Andric MadeChange = true; 652fe6060f1SDimitry Andric Value *Operand = Assume->getOperand(0); 653fe6060f1SDimitry Andric Assume->eraseFromParent(); 654fe6060f1SDimitry Andric 655fe6060f1SDimitry Andric resetIteratorIfInvalidatedWhileCalling(&BB, [&]() { 656fe6060f1SDimitry Andric RecursivelyDeleteTriviallyDeadInstructions(Operand, TLInfo, nullptr); 657fe6060f1SDimitry Andric }); 658fe6060f1SDimitry Andric } 659fe6060f1SDimitry Andric } 660fe6060f1SDimitry Andric } 661fe6060f1SDimitry Andric return MadeChange; 662fe6060f1SDimitry Andric } 663fe6060f1SDimitry Andric 664e8d8bef9SDimitry Andric /// An instruction is about to be deleted, so remove all references to it in our 665e8d8bef9SDimitry Andric /// GEP-tracking data strcutures. 666e8d8bef9SDimitry Andric void CodeGenPrepare::removeAllAssertingVHReferences(Value *V) { 667e8d8bef9SDimitry Andric LargeOffsetGEPMap.erase(V); 668e8d8bef9SDimitry Andric NewGEPBases.erase(V); 669e8d8bef9SDimitry Andric 670e8d8bef9SDimitry Andric auto GEP = dyn_cast<GetElementPtrInst>(V); 671e8d8bef9SDimitry Andric if (!GEP) 672e8d8bef9SDimitry Andric return; 673e8d8bef9SDimitry Andric 674e8d8bef9SDimitry Andric LargeOffsetGEPID.erase(GEP); 675e8d8bef9SDimitry Andric 676e8d8bef9SDimitry Andric auto VecI = LargeOffsetGEPMap.find(GEP->getPointerOperand()); 677e8d8bef9SDimitry Andric if (VecI == LargeOffsetGEPMap.end()) 678e8d8bef9SDimitry Andric return; 679e8d8bef9SDimitry Andric 680e8d8bef9SDimitry Andric auto &GEPVector = VecI->second; 681349cc55cSDimitry Andric llvm::erase_if(GEPVector, [=](auto &Elt) { return Elt.first == GEP; }); 682e8d8bef9SDimitry Andric 683e8d8bef9SDimitry Andric if (GEPVector.empty()) 684e8d8bef9SDimitry Andric LargeOffsetGEPMap.erase(VecI); 685e8d8bef9SDimitry Andric } 686e8d8bef9SDimitry Andric 6875ffd83dbSDimitry Andric // Verify BFI has been updated correctly by recomputing BFI and comparing them. 6885ffd83dbSDimitry Andric void LLVM_ATTRIBUTE_UNUSED CodeGenPrepare::verifyBFIUpdates(Function &F) { 6895ffd83dbSDimitry Andric DominatorTree NewDT(F); 6905ffd83dbSDimitry Andric LoopInfo NewLI(NewDT); 6915ffd83dbSDimitry Andric BranchProbabilityInfo NewBPI(F, NewLI, TLInfo); 6925ffd83dbSDimitry Andric BlockFrequencyInfo NewBFI(F, NewBPI, NewLI); 6935ffd83dbSDimitry Andric NewBFI.verifyMatch(*BFI); 6945ffd83dbSDimitry Andric } 6955ffd83dbSDimitry Andric 6960b57cec5SDimitry Andric /// Merge basic blocks which are connected by a single edge, where one of the 6970b57cec5SDimitry Andric /// basic blocks has a single successor pointing to the other basic block, 6980b57cec5SDimitry Andric /// which has a single predecessor. 6990b57cec5SDimitry Andric bool CodeGenPrepare::eliminateFallThrough(Function &F) { 7000b57cec5SDimitry Andric bool Changed = false; 7010b57cec5SDimitry Andric // Scan all of the blocks in the function, except for the entry block. 7020b57cec5SDimitry Andric // Use a temporary array to avoid iterator being invalidated when 7030b57cec5SDimitry Andric // deleting blocks. 7040b57cec5SDimitry Andric SmallVector<WeakTrackingVH, 16> Blocks; 705e8d8bef9SDimitry Andric for (auto &Block : llvm::drop_begin(F)) 7060b57cec5SDimitry Andric Blocks.push_back(&Block); 7070b57cec5SDimitry Andric 708e8d8bef9SDimitry Andric SmallSet<WeakTrackingVH, 16> Preds; 7090b57cec5SDimitry Andric for (auto &Block : Blocks) { 7100b57cec5SDimitry Andric auto *BB = cast_or_null<BasicBlock>(Block); 7110b57cec5SDimitry Andric if (!BB) 7120b57cec5SDimitry Andric continue; 7130b57cec5SDimitry Andric // If the destination block has a single pred, then this is a trivial 7140b57cec5SDimitry Andric // edge, just collapse it. 7150b57cec5SDimitry Andric BasicBlock *SinglePred = BB->getSinglePredecessor(); 7160b57cec5SDimitry Andric 7170b57cec5SDimitry Andric // Don't merge if BB's address is taken. 7180b57cec5SDimitry Andric if (!SinglePred || SinglePred == BB || BB->hasAddressTaken()) continue; 7190b57cec5SDimitry Andric 7200b57cec5SDimitry Andric BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator()); 7210b57cec5SDimitry Andric if (Term && !Term->isConditional()) { 7220b57cec5SDimitry Andric Changed = true; 7230b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "To merge:\n" << *BB << "\n\n\n"); 7240b57cec5SDimitry Andric 7250b57cec5SDimitry Andric // Merge BB into SinglePred and delete it. 7260b57cec5SDimitry Andric MergeBlockIntoPredecessor(BB); 727e8d8bef9SDimitry Andric Preds.insert(SinglePred); 7280b57cec5SDimitry Andric } 7290b57cec5SDimitry Andric } 730e8d8bef9SDimitry Andric 731e8d8bef9SDimitry Andric // (Repeatedly) merging blocks into their predecessors can create redundant 732e8d8bef9SDimitry Andric // debug intrinsics. 733fcaf7f86SDimitry Andric for (const auto &Pred : Preds) 734e8d8bef9SDimitry Andric if (auto *BB = cast_or_null<BasicBlock>(Pred)) 735e8d8bef9SDimitry Andric RemoveRedundantDbgInstrs(BB); 736e8d8bef9SDimitry Andric 7370b57cec5SDimitry Andric return Changed; 7380b57cec5SDimitry Andric } 7390b57cec5SDimitry Andric 7400b57cec5SDimitry Andric /// Find a destination block from BB if BB is mergeable empty block. 7410b57cec5SDimitry Andric BasicBlock *CodeGenPrepare::findDestBlockOfMergeableEmptyBlock(BasicBlock *BB) { 7420b57cec5SDimitry Andric // If this block doesn't end with an uncond branch, ignore it. 7430b57cec5SDimitry Andric BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()); 7440b57cec5SDimitry Andric if (!BI || !BI->isUnconditional()) 7450b57cec5SDimitry Andric return nullptr; 7460b57cec5SDimitry Andric 7470b57cec5SDimitry Andric // If the instruction before the branch (skipping debug info) isn't a phi 7480b57cec5SDimitry Andric // node, then other stuff is happening here. 7490b57cec5SDimitry Andric BasicBlock::iterator BBI = BI->getIterator(); 7500b57cec5SDimitry Andric if (BBI != BB->begin()) { 7510b57cec5SDimitry Andric --BBI; 7520b57cec5SDimitry Andric while (isa<DbgInfoIntrinsic>(BBI)) { 7530b57cec5SDimitry Andric if (BBI == BB->begin()) 7540b57cec5SDimitry Andric break; 7550b57cec5SDimitry Andric --BBI; 7560b57cec5SDimitry Andric } 7570b57cec5SDimitry Andric if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI)) 7580b57cec5SDimitry Andric return nullptr; 7590b57cec5SDimitry Andric } 7600b57cec5SDimitry Andric 7610b57cec5SDimitry Andric // Do not break infinite loops. 7620b57cec5SDimitry Andric BasicBlock *DestBB = BI->getSuccessor(0); 7630b57cec5SDimitry Andric if (DestBB == BB) 7640b57cec5SDimitry Andric return nullptr; 7650b57cec5SDimitry Andric 7660b57cec5SDimitry Andric if (!canMergeBlocks(BB, DestBB)) 7670b57cec5SDimitry Andric DestBB = nullptr; 7680b57cec5SDimitry Andric 7690b57cec5SDimitry Andric return DestBB; 7700b57cec5SDimitry Andric } 7710b57cec5SDimitry Andric 7720b57cec5SDimitry Andric /// Eliminate blocks that contain only PHI nodes, debug info directives, and an 7730b57cec5SDimitry Andric /// unconditional branch. Passes before isel (e.g. LSR/loopsimplify) often split 7740b57cec5SDimitry Andric /// edges in ways that are non-optimal for isel. Start by eliminating these 7750b57cec5SDimitry Andric /// blocks so we can split them the way we want them. 7760b57cec5SDimitry Andric bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) { 7770b57cec5SDimitry Andric SmallPtrSet<BasicBlock *, 16> Preheaders; 7780b57cec5SDimitry Andric SmallVector<Loop *, 16> LoopList(LI->begin(), LI->end()); 7790b57cec5SDimitry Andric while (!LoopList.empty()) { 7800b57cec5SDimitry Andric Loop *L = LoopList.pop_back_val(); 781e8d8bef9SDimitry Andric llvm::append_range(LoopList, *L); 7820b57cec5SDimitry Andric if (BasicBlock *Preheader = L->getLoopPreheader()) 7830b57cec5SDimitry Andric Preheaders.insert(Preheader); 7840b57cec5SDimitry Andric } 7850b57cec5SDimitry Andric 7860b57cec5SDimitry Andric bool MadeChange = false; 7870b57cec5SDimitry Andric // Copy blocks into a temporary array to avoid iterator invalidation issues 7880b57cec5SDimitry Andric // as we remove them. 7890b57cec5SDimitry Andric // Note that this intentionally skips the entry block. 7900b57cec5SDimitry Andric SmallVector<WeakTrackingVH, 16> Blocks; 791e8d8bef9SDimitry Andric for (auto &Block : llvm::drop_begin(F)) 7920b57cec5SDimitry Andric Blocks.push_back(&Block); 7930b57cec5SDimitry Andric 7940b57cec5SDimitry Andric for (auto &Block : Blocks) { 7950b57cec5SDimitry Andric BasicBlock *BB = cast_or_null<BasicBlock>(Block); 7960b57cec5SDimitry Andric if (!BB) 7970b57cec5SDimitry Andric continue; 7980b57cec5SDimitry Andric BasicBlock *DestBB = findDestBlockOfMergeableEmptyBlock(BB); 7990b57cec5SDimitry Andric if (!DestBB || 8000b57cec5SDimitry Andric !isMergingEmptyBlockProfitable(BB, DestBB, Preheaders.count(BB))) 8010b57cec5SDimitry Andric continue; 8020b57cec5SDimitry Andric 8030b57cec5SDimitry Andric eliminateMostlyEmptyBlock(BB); 8040b57cec5SDimitry Andric MadeChange = true; 8050b57cec5SDimitry Andric } 8060b57cec5SDimitry Andric return MadeChange; 8070b57cec5SDimitry Andric } 8080b57cec5SDimitry Andric 8090b57cec5SDimitry Andric bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB, 8100b57cec5SDimitry Andric BasicBlock *DestBB, 8110b57cec5SDimitry Andric bool isPreheader) { 8120b57cec5SDimitry Andric // Do not delete loop preheaders if doing so would create a critical edge. 8130b57cec5SDimitry Andric // Loop preheaders can be good locations to spill registers. If the 8140b57cec5SDimitry Andric // preheader is deleted and we create a critical edge, registers may be 8150b57cec5SDimitry Andric // spilled in the loop body instead. 8160b57cec5SDimitry Andric if (!DisablePreheaderProtect && isPreheader && 8170b57cec5SDimitry Andric !(BB->getSinglePredecessor() && 8180b57cec5SDimitry Andric BB->getSinglePredecessor()->getSingleSuccessor())) 8190b57cec5SDimitry Andric return false; 8200b57cec5SDimitry Andric 8210b57cec5SDimitry Andric // Skip merging if the block's successor is also a successor to any callbr 8220b57cec5SDimitry Andric // that leads to this block. 8230b57cec5SDimitry Andric // FIXME: Is this really needed? Is this a correctness issue? 824fe6060f1SDimitry Andric for (BasicBlock *Pred : predecessors(BB)) { 825fe6060f1SDimitry Andric if (auto *CBI = dyn_cast<CallBrInst>((Pred)->getTerminator())) 8260b57cec5SDimitry Andric for (unsigned i = 0, e = CBI->getNumSuccessors(); i != e; ++i) 8270b57cec5SDimitry Andric if (DestBB == CBI->getSuccessor(i)) 8280b57cec5SDimitry Andric return false; 8290b57cec5SDimitry Andric } 8300b57cec5SDimitry Andric 8310b57cec5SDimitry Andric // Try to skip merging if the unique predecessor of BB is terminated by a 8320b57cec5SDimitry Andric // switch or indirect branch instruction, and BB is used as an incoming block 8330b57cec5SDimitry Andric // of PHIs in DestBB. In such case, merging BB and DestBB would cause ISel to 8340b57cec5SDimitry Andric // add COPY instructions in the predecessor of BB instead of BB (if it is not 8350b57cec5SDimitry Andric // merged). Note that the critical edge created by merging such blocks wont be 8360b57cec5SDimitry Andric // split in MachineSink because the jump table is not analyzable. By keeping 8370b57cec5SDimitry Andric // such empty block (BB), ISel will place COPY instructions in BB, not in the 8380b57cec5SDimitry Andric // predecessor of BB. 8390b57cec5SDimitry Andric BasicBlock *Pred = BB->getUniquePredecessor(); 8400b57cec5SDimitry Andric if (!Pred || 8410b57cec5SDimitry Andric !(isa<SwitchInst>(Pred->getTerminator()) || 8420b57cec5SDimitry Andric isa<IndirectBrInst>(Pred->getTerminator()))) 8430b57cec5SDimitry Andric return true; 8440b57cec5SDimitry Andric 8450b57cec5SDimitry Andric if (BB->getTerminator() != BB->getFirstNonPHIOrDbg()) 8460b57cec5SDimitry Andric return true; 8470b57cec5SDimitry Andric 8480b57cec5SDimitry Andric // We use a simple cost heuristic which determine skipping merging is 8490b57cec5SDimitry Andric // profitable if the cost of skipping merging is less than the cost of 8500b57cec5SDimitry Andric // merging : Cost(skipping merging) < Cost(merging BB), where the 8510b57cec5SDimitry Andric // Cost(skipping merging) is Freq(BB) * (Cost(Copy) + Cost(Branch)), and 8520b57cec5SDimitry Andric // the Cost(merging BB) is Freq(Pred) * Cost(Copy). 8530b57cec5SDimitry Andric // Assuming Cost(Copy) == Cost(Branch), we could simplify it to : 8540b57cec5SDimitry Andric // Freq(Pred) / Freq(BB) > 2. 8550b57cec5SDimitry Andric // Note that if there are multiple empty blocks sharing the same incoming 8560b57cec5SDimitry Andric // value for the PHIs in the DestBB, we consider them together. In such 8570b57cec5SDimitry Andric // case, Cost(merging BB) will be the sum of their frequencies. 8580b57cec5SDimitry Andric 8590b57cec5SDimitry Andric if (!isa<PHINode>(DestBB->begin())) 8600b57cec5SDimitry Andric return true; 8610b57cec5SDimitry Andric 8620b57cec5SDimitry Andric SmallPtrSet<BasicBlock *, 16> SameIncomingValueBBs; 8630b57cec5SDimitry Andric 8640b57cec5SDimitry Andric // Find all other incoming blocks from which incoming values of all PHIs in 8650b57cec5SDimitry Andric // DestBB are the same as the ones from BB. 866fe6060f1SDimitry Andric for (BasicBlock *DestBBPred : predecessors(DestBB)) { 8670b57cec5SDimitry Andric if (DestBBPred == BB) 8680b57cec5SDimitry Andric continue; 8690b57cec5SDimitry Andric 8700b57cec5SDimitry Andric if (llvm::all_of(DestBB->phis(), [&](const PHINode &DestPN) { 8710b57cec5SDimitry Andric return DestPN.getIncomingValueForBlock(BB) == 8720b57cec5SDimitry Andric DestPN.getIncomingValueForBlock(DestBBPred); 8730b57cec5SDimitry Andric })) 8740b57cec5SDimitry Andric SameIncomingValueBBs.insert(DestBBPred); 8750b57cec5SDimitry Andric } 8760b57cec5SDimitry Andric 8770b57cec5SDimitry Andric // See if all BB's incoming values are same as the value from Pred. In this 8780b57cec5SDimitry Andric // case, no reason to skip merging because COPYs are expected to be place in 8790b57cec5SDimitry Andric // Pred already. 8800b57cec5SDimitry Andric if (SameIncomingValueBBs.count(Pred)) 8810b57cec5SDimitry Andric return true; 8820b57cec5SDimitry Andric 8830b57cec5SDimitry Andric BlockFrequency PredFreq = BFI->getBlockFreq(Pred); 8840b57cec5SDimitry Andric BlockFrequency BBFreq = BFI->getBlockFreq(BB); 8850b57cec5SDimitry Andric 8865ffd83dbSDimitry Andric for (auto *SameValueBB : SameIncomingValueBBs) 8870b57cec5SDimitry Andric if (SameValueBB->getUniquePredecessor() == Pred && 8880b57cec5SDimitry Andric DestBB == findDestBlockOfMergeableEmptyBlock(SameValueBB)) 8890b57cec5SDimitry Andric BBFreq += BFI->getBlockFreq(SameValueBB); 8900b57cec5SDimitry Andric 8910b57cec5SDimitry Andric return PredFreq.getFrequency() <= 8920b57cec5SDimitry Andric BBFreq.getFrequency() * FreqRatioToSkipMerge; 8930b57cec5SDimitry Andric } 8940b57cec5SDimitry Andric 8950b57cec5SDimitry Andric /// Return true if we can merge BB into DestBB if there is a single 8960b57cec5SDimitry Andric /// unconditional branch between them, and BB contains no other non-phi 8970b57cec5SDimitry Andric /// instructions. 8980b57cec5SDimitry Andric bool CodeGenPrepare::canMergeBlocks(const BasicBlock *BB, 8990b57cec5SDimitry Andric const BasicBlock *DestBB) const { 9000b57cec5SDimitry Andric // We only want to eliminate blocks whose phi nodes are used by phi nodes in 9010b57cec5SDimitry Andric // the successor. If there are more complex condition (e.g. preheaders), 9020b57cec5SDimitry Andric // don't mess around with them. 9030b57cec5SDimitry Andric for (const PHINode &PN : BB->phis()) { 9040b57cec5SDimitry Andric for (const User *U : PN.users()) { 9050b57cec5SDimitry Andric const Instruction *UI = cast<Instruction>(U); 9060b57cec5SDimitry Andric if (UI->getParent() != DestBB || !isa<PHINode>(UI)) 9070b57cec5SDimitry Andric return false; 9080b57cec5SDimitry Andric // If User is inside DestBB block and it is a PHINode then check 9090b57cec5SDimitry Andric // incoming value. If incoming value is not from BB then this is 9100b57cec5SDimitry Andric // a complex condition (e.g. preheaders) we want to avoid here. 9110b57cec5SDimitry Andric if (UI->getParent() == DestBB) { 9120b57cec5SDimitry Andric if (const PHINode *UPN = dyn_cast<PHINode>(UI)) 9130b57cec5SDimitry Andric for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) { 9140b57cec5SDimitry Andric Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I)); 9150b57cec5SDimitry Andric if (Insn && Insn->getParent() == BB && 9160b57cec5SDimitry Andric Insn->getParent() != UPN->getIncomingBlock(I)) 9170b57cec5SDimitry Andric return false; 9180b57cec5SDimitry Andric } 9190b57cec5SDimitry Andric } 9200b57cec5SDimitry Andric } 9210b57cec5SDimitry Andric } 9220b57cec5SDimitry Andric 9230b57cec5SDimitry Andric // If BB and DestBB contain any common predecessors, then the phi nodes in BB 9240b57cec5SDimitry Andric // and DestBB may have conflicting incoming values for the block. If so, we 9250b57cec5SDimitry Andric // can't merge the block. 9260b57cec5SDimitry Andric const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin()); 9270b57cec5SDimitry Andric if (!DestBBPN) return true; // no conflict. 9280b57cec5SDimitry Andric 9290b57cec5SDimitry Andric // Collect the preds of BB. 9300b57cec5SDimitry Andric SmallPtrSet<const BasicBlock*, 16> BBPreds; 9310b57cec5SDimitry Andric if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) { 9320b57cec5SDimitry Andric // It is faster to get preds from a PHI than with pred_iterator. 9330b57cec5SDimitry Andric for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i) 9340b57cec5SDimitry Andric BBPreds.insert(BBPN->getIncomingBlock(i)); 9350b57cec5SDimitry Andric } else { 9360b57cec5SDimitry Andric BBPreds.insert(pred_begin(BB), pred_end(BB)); 9370b57cec5SDimitry Andric } 9380b57cec5SDimitry Andric 9390b57cec5SDimitry Andric // Walk the preds of DestBB. 9400b57cec5SDimitry Andric for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) { 9410b57cec5SDimitry Andric BasicBlock *Pred = DestBBPN->getIncomingBlock(i); 9420b57cec5SDimitry Andric if (BBPreds.count(Pred)) { // Common predecessor? 9430b57cec5SDimitry Andric for (const PHINode &PN : DestBB->phis()) { 9440b57cec5SDimitry Andric const Value *V1 = PN.getIncomingValueForBlock(Pred); 9450b57cec5SDimitry Andric const Value *V2 = PN.getIncomingValueForBlock(BB); 9460b57cec5SDimitry Andric 9470b57cec5SDimitry Andric // If V2 is a phi node in BB, look up what the mapped value will be. 9480b57cec5SDimitry Andric if (const PHINode *V2PN = dyn_cast<PHINode>(V2)) 9490b57cec5SDimitry Andric if (V2PN->getParent() == BB) 9500b57cec5SDimitry Andric V2 = V2PN->getIncomingValueForBlock(Pred); 9510b57cec5SDimitry Andric 9520b57cec5SDimitry Andric // If there is a conflict, bail out. 9530b57cec5SDimitry Andric if (V1 != V2) return false; 9540b57cec5SDimitry Andric } 9550b57cec5SDimitry Andric } 9560b57cec5SDimitry Andric } 9570b57cec5SDimitry Andric 9580b57cec5SDimitry Andric return true; 9590b57cec5SDimitry Andric } 9600b57cec5SDimitry Andric 9610b57cec5SDimitry Andric /// Eliminate a basic block that has only phi's and an unconditional branch in 9620b57cec5SDimitry Andric /// it. 9630b57cec5SDimitry Andric void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) { 9640b57cec5SDimitry Andric BranchInst *BI = cast<BranchInst>(BB->getTerminator()); 9650b57cec5SDimitry Andric BasicBlock *DestBB = BI->getSuccessor(0); 9660b57cec5SDimitry Andric 9670b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" 9680b57cec5SDimitry Andric << *BB << *DestBB); 9690b57cec5SDimitry Andric 9700b57cec5SDimitry Andric // If the destination block has a single pred, then this is a trivial edge, 9710b57cec5SDimitry Andric // just collapse it. 9720b57cec5SDimitry Andric if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) { 9730b57cec5SDimitry Andric if (SinglePred != DestBB) { 9740b57cec5SDimitry Andric assert(SinglePred == BB && 9750b57cec5SDimitry Andric "Single predecessor not the same as predecessor"); 9760b57cec5SDimitry Andric // Merge DestBB into SinglePred/BB and delete it. 9770b57cec5SDimitry Andric MergeBlockIntoPredecessor(DestBB); 9780b57cec5SDimitry Andric // Note: BB(=SinglePred) will not be deleted on this path. 9790b57cec5SDimitry Andric // DestBB(=its single successor) is the one that was deleted. 9800b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "AFTER:\n" << *SinglePred << "\n\n\n"); 9810b57cec5SDimitry Andric return; 9820b57cec5SDimitry Andric } 9830b57cec5SDimitry Andric } 9840b57cec5SDimitry Andric 9850b57cec5SDimitry Andric // Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB 9860b57cec5SDimitry Andric // to handle the new incoming edges it is about to have. 9870b57cec5SDimitry Andric for (PHINode &PN : DestBB->phis()) { 9880b57cec5SDimitry Andric // Remove the incoming value for BB, and remember it. 9890b57cec5SDimitry Andric Value *InVal = PN.removeIncomingValue(BB, false); 9900b57cec5SDimitry Andric 9910b57cec5SDimitry Andric // Two options: either the InVal is a phi node defined in BB or it is some 9920b57cec5SDimitry Andric // value that dominates BB. 9930b57cec5SDimitry Andric PHINode *InValPhi = dyn_cast<PHINode>(InVal); 9940b57cec5SDimitry Andric if (InValPhi && InValPhi->getParent() == BB) { 9950b57cec5SDimitry Andric // Add all of the input values of the input PHI as inputs of this phi. 9960b57cec5SDimitry Andric for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i) 9970b57cec5SDimitry Andric PN.addIncoming(InValPhi->getIncomingValue(i), 9980b57cec5SDimitry Andric InValPhi->getIncomingBlock(i)); 9990b57cec5SDimitry Andric } else { 10000b57cec5SDimitry Andric // Otherwise, add one instance of the dominating value for each edge that 10010b57cec5SDimitry Andric // we will be adding. 10020b57cec5SDimitry Andric if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) { 10030b57cec5SDimitry Andric for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i) 10040b57cec5SDimitry Andric PN.addIncoming(InVal, BBPN->getIncomingBlock(i)); 10050b57cec5SDimitry Andric } else { 1006fe6060f1SDimitry Andric for (BasicBlock *Pred : predecessors(BB)) 1007fe6060f1SDimitry Andric PN.addIncoming(InVal, Pred); 10080b57cec5SDimitry Andric } 10090b57cec5SDimitry Andric } 10100b57cec5SDimitry Andric } 10110b57cec5SDimitry Andric 10120b57cec5SDimitry Andric // The PHIs are now updated, change everything that refers to BB to use 10130b57cec5SDimitry Andric // DestBB and remove BB. 10140b57cec5SDimitry Andric BB->replaceAllUsesWith(DestBB); 10150b57cec5SDimitry Andric BB->eraseFromParent(); 10160b57cec5SDimitry Andric ++NumBlocksElim; 10170b57cec5SDimitry Andric 10180b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); 10190b57cec5SDimitry Andric } 10200b57cec5SDimitry Andric 10210b57cec5SDimitry Andric // Computes a map of base pointer relocation instructions to corresponding 10220b57cec5SDimitry Andric // derived pointer relocation instructions given a vector of all relocate calls 10230b57cec5SDimitry Andric static void computeBaseDerivedRelocateMap( 10240b57cec5SDimitry Andric const SmallVectorImpl<GCRelocateInst *> &AllRelocateCalls, 10250b57cec5SDimitry Andric DenseMap<GCRelocateInst *, SmallVector<GCRelocateInst *, 2>> 10260b57cec5SDimitry Andric &RelocateInstMap) { 10270b57cec5SDimitry Andric // Collect information in two maps: one primarily for locating the base object 10280b57cec5SDimitry Andric // while filling the second map; the second map is the final structure holding 10290b57cec5SDimitry Andric // a mapping between Base and corresponding Derived relocate calls 10300b57cec5SDimitry Andric DenseMap<std::pair<unsigned, unsigned>, GCRelocateInst *> RelocateIdxMap; 10310b57cec5SDimitry Andric for (auto *ThisRelocate : AllRelocateCalls) { 10320b57cec5SDimitry Andric auto K = std::make_pair(ThisRelocate->getBasePtrIndex(), 10330b57cec5SDimitry Andric ThisRelocate->getDerivedPtrIndex()); 10340b57cec5SDimitry Andric RelocateIdxMap.insert(std::make_pair(K, ThisRelocate)); 10350b57cec5SDimitry Andric } 10360b57cec5SDimitry Andric for (auto &Item : RelocateIdxMap) { 10370b57cec5SDimitry Andric std::pair<unsigned, unsigned> Key = Item.first; 10380b57cec5SDimitry Andric if (Key.first == Key.second) 10390b57cec5SDimitry Andric // Base relocation: nothing to insert 10400b57cec5SDimitry Andric continue; 10410b57cec5SDimitry Andric 10420b57cec5SDimitry Andric GCRelocateInst *I = Item.second; 10430b57cec5SDimitry Andric auto BaseKey = std::make_pair(Key.first, Key.first); 10440b57cec5SDimitry Andric 10450b57cec5SDimitry Andric // We're iterating over RelocateIdxMap so we cannot modify it. 10460b57cec5SDimitry Andric auto MaybeBase = RelocateIdxMap.find(BaseKey); 10470b57cec5SDimitry Andric if (MaybeBase == RelocateIdxMap.end()) 10480b57cec5SDimitry Andric // TODO: We might want to insert a new base object relocate and gep off 10490b57cec5SDimitry Andric // that, if there are enough derived object relocates. 10500b57cec5SDimitry Andric continue; 10510b57cec5SDimitry Andric 10520b57cec5SDimitry Andric RelocateInstMap[MaybeBase->second].push_back(I); 10530b57cec5SDimitry Andric } 10540b57cec5SDimitry Andric } 10550b57cec5SDimitry Andric 10560b57cec5SDimitry Andric // Accepts a GEP and extracts the operands into a vector provided they're all 10570b57cec5SDimitry Andric // small integer constants 10580b57cec5SDimitry Andric static bool getGEPSmallConstantIntOffsetV(GetElementPtrInst *GEP, 10590b57cec5SDimitry Andric SmallVectorImpl<Value *> &OffsetV) { 10600b57cec5SDimitry Andric for (unsigned i = 1; i < GEP->getNumOperands(); i++) { 10610b57cec5SDimitry Andric // Only accept small constant integer operands 10625ffd83dbSDimitry Andric auto *Op = dyn_cast<ConstantInt>(GEP->getOperand(i)); 10630b57cec5SDimitry Andric if (!Op || Op->getZExtValue() > 20) 10640b57cec5SDimitry Andric return false; 10650b57cec5SDimitry Andric } 10660b57cec5SDimitry Andric 10670b57cec5SDimitry Andric for (unsigned i = 1; i < GEP->getNumOperands(); i++) 10680b57cec5SDimitry Andric OffsetV.push_back(GEP->getOperand(i)); 10690b57cec5SDimitry Andric return true; 10700b57cec5SDimitry Andric } 10710b57cec5SDimitry Andric 10720b57cec5SDimitry Andric // Takes a RelocatedBase (base pointer relocation instruction) and Targets to 10730b57cec5SDimitry Andric // replace, computes a replacement, and affects it. 10740b57cec5SDimitry Andric static bool 10750b57cec5SDimitry Andric simplifyRelocatesOffABase(GCRelocateInst *RelocatedBase, 10760b57cec5SDimitry Andric const SmallVectorImpl<GCRelocateInst *> &Targets) { 10770b57cec5SDimitry Andric bool MadeChange = false; 10780b57cec5SDimitry Andric // We must ensure the relocation of derived pointer is defined after 10790b57cec5SDimitry Andric // relocation of base pointer. If we find a relocation corresponding to base 10800b57cec5SDimitry Andric // defined earlier than relocation of base then we move relocation of base 10810b57cec5SDimitry Andric // right before found relocation. We consider only relocation in the same 10820b57cec5SDimitry Andric // basic block as relocation of base. Relocations from other basic block will 10830b57cec5SDimitry Andric // be skipped by optimization and we do not care about them. 10840b57cec5SDimitry Andric for (auto R = RelocatedBase->getParent()->getFirstInsertionPt(); 10850b57cec5SDimitry Andric &*R != RelocatedBase; ++R) 10865ffd83dbSDimitry Andric if (auto *RI = dyn_cast<GCRelocateInst>(R)) 10870b57cec5SDimitry Andric if (RI->getStatepoint() == RelocatedBase->getStatepoint()) 10880b57cec5SDimitry Andric if (RI->getBasePtrIndex() == RelocatedBase->getBasePtrIndex()) { 10890b57cec5SDimitry Andric RelocatedBase->moveBefore(RI); 10900b57cec5SDimitry Andric break; 10910b57cec5SDimitry Andric } 10920b57cec5SDimitry Andric 10930b57cec5SDimitry Andric for (GCRelocateInst *ToReplace : Targets) { 10940b57cec5SDimitry Andric assert(ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex() && 10950b57cec5SDimitry Andric "Not relocating a derived object of the original base object"); 10960b57cec5SDimitry Andric if (ToReplace->getBasePtrIndex() == ToReplace->getDerivedPtrIndex()) { 10970b57cec5SDimitry Andric // A duplicate relocate call. TODO: coalesce duplicates. 10980b57cec5SDimitry Andric continue; 10990b57cec5SDimitry Andric } 11000b57cec5SDimitry Andric 11010b57cec5SDimitry Andric if (RelocatedBase->getParent() != ToReplace->getParent()) { 11020b57cec5SDimitry Andric // Base and derived relocates are in different basic blocks. 11030b57cec5SDimitry Andric // In this case transform is only valid when base dominates derived 11040b57cec5SDimitry Andric // relocate. However it would be too expensive to check dominance 11050b57cec5SDimitry Andric // for each such relocate, so we skip the whole transformation. 11060b57cec5SDimitry Andric continue; 11070b57cec5SDimitry Andric } 11080b57cec5SDimitry Andric 11090b57cec5SDimitry Andric Value *Base = ToReplace->getBasePtr(); 11105ffd83dbSDimitry Andric auto *Derived = dyn_cast<GetElementPtrInst>(ToReplace->getDerivedPtr()); 11110b57cec5SDimitry Andric if (!Derived || Derived->getPointerOperand() != Base) 11120b57cec5SDimitry Andric continue; 11130b57cec5SDimitry Andric 11140b57cec5SDimitry Andric SmallVector<Value *, 2> OffsetV; 11150b57cec5SDimitry Andric if (!getGEPSmallConstantIntOffsetV(Derived, OffsetV)) 11160b57cec5SDimitry Andric continue; 11170b57cec5SDimitry Andric 11180b57cec5SDimitry Andric // Create a Builder and replace the target callsite with a gep 11190b57cec5SDimitry Andric assert(RelocatedBase->getNextNode() && 11200b57cec5SDimitry Andric "Should always have one since it's not a terminator"); 11210b57cec5SDimitry Andric 11220b57cec5SDimitry Andric // Insert after RelocatedBase 11230b57cec5SDimitry Andric IRBuilder<> Builder(RelocatedBase->getNextNode()); 11240b57cec5SDimitry Andric Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc()); 11250b57cec5SDimitry Andric 11260b57cec5SDimitry Andric // If gc_relocate does not match the actual type, cast it to the right type. 11270b57cec5SDimitry Andric // In theory, there must be a bitcast after gc_relocate if the type does not 11280b57cec5SDimitry Andric // match, and we should reuse it to get the derived pointer. But it could be 11290b57cec5SDimitry Andric // cases like this: 11300b57cec5SDimitry Andric // bb1: 11310b57cec5SDimitry Andric // ... 11320b57cec5SDimitry Andric // %g1 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(...) 11330b57cec5SDimitry Andric // br label %merge 11340b57cec5SDimitry Andric // 11350b57cec5SDimitry Andric // bb2: 11360b57cec5SDimitry Andric // ... 11370b57cec5SDimitry Andric // %g2 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(...) 11380b57cec5SDimitry Andric // br label %merge 11390b57cec5SDimitry Andric // 11400b57cec5SDimitry Andric // merge: 11410b57cec5SDimitry Andric // %p1 = phi i8 addrspace(1)* [ %g1, %bb1 ], [ %g2, %bb2 ] 11420b57cec5SDimitry Andric // %cast = bitcast i8 addrspace(1)* %p1 in to i32 addrspace(1)* 11430b57cec5SDimitry Andric // 11440b57cec5SDimitry Andric // In this case, we can not find the bitcast any more. So we insert a new bitcast 11450b57cec5SDimitry Andric // no matter there is already one or not. In this way, we can handle all cases, and 11460b57cec5SDimitry Andric // the extra bitcast should be optimized away in later passes. 11470b57cec5SDimitry Andric Value *ActualRelocatedBase = RelocatedBase; 11480b57cec5SDimitry Andric if (RelocatedBase->getType() != Base->getType()) { 11490b57cec5SDimitry Andric ActualRelocatedBase = 11500b57cec5SDimitry Andric Builder.CreateBitCast(RelocatedBase, Base->getType()); 11510b57cec5SDimitry Andric } 11520b57cec5SDimitry Andric Value *Replacement = Builder.CreateGEP( 11530b57cec5SDimitry Andric Derived->getSourceElementType(), ActualRelocatedBase, makeArrayRef(OffsetV)); 11540b57cec5SDimitry Andric Replacement->takeName(ToReplace); 11550b57cec5SDimitry Andric // If the newly generated derived pointer's type does not match the original derived 11560b57cec5SDimitry Andric // pointer's type, cast the new derived pointer to match it. Same reasoning as above. 11570b57cec5SDimitry Andric Value *ActualReplacement = Replacement; 11580b57cec5SDimitry Andric if (Replacement->getType() != ToReplace->getType()) { 11590b57cec5SDimitry Andric ActualReplacement = 11600b57cec5SDimitry Andric Builder.CreateBitCast(Replacement, ToReplace->getType()); 11610b57cec5SDimitry Andric } 11620b57cec5SDimitry Andric ToReplace->replaceAllUsesWith(ActualReplacement); 11630b57cec5SDimitry Andric ToReplace->eraseFromParent(); 11640b57cec5SDimitry Andric 11650b57cec5SDimitry Andric MadeChange = true; 11660b57cec5SDimitry Andric } 11670b57cec5SDimitry Andric return MadeChange; 11680b57cec5SDimitry Andric } 11690b57cec5SDimitry Andric 11700b57cec5SDimitry Andric // Turns this: 11710b57cec5SDimitry Andric // 11720b57cec5SDimitry Andric // %base = ... 11730b57cec5SDimitry Andric // %ptr = gep %base + 15 11740b57cec5SDimitry Andric // %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr) 11750b57cec5SDimitry Andric // %base' = relocate(%tok, i32 4, i32 4) 11760b57cec5SDimitry Andric // %ptr' = relocate(%tok, i32 4, i32 5) 11770b57cec5SDimitry Andric // %val = load %ptr' 11780b57cec5SDimitry Andric // 11790b57cec5SDimitry Andric // into this: 11800b57cec5SDimitry Andric // 11810b57cec5SDimitry Andric // %base = ... 11820b57cec5SDimitry Andric // %ptr = gep %base + 15 11830b57cec5SDimitry Andric // %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr) 11840b57cec5SDimitry Andric // %base' = gc.relocate(%tok, i32 4, i32 4) 11850b57cec5SDimitry Andric // %ptr' = gep %base' + 15 11860b57cec5SDimitry Andric // %val = load %ptr' 11875ffd83dbSDimitry Andric bool CodeGenPrepare::simplifyOffsetableRelocate(GCStatepointInst &I) { 11880b57cec5SDimitry Andric bool MadeChange = false; 11890b57cec5SDimitry Andric SmallVector<GCRelocateInst *, 2> AllRelocateCalls; 11900b57cec5SDimitry Andric for (auto *U : I.users()) 11910b57cec5SDimitry Andric if (GCRelocateInst *Relocate = dyn_cast<GCRelocateInst>(U)) 11920b57cec5SDimitry Andric // Collect all the relocate calls associated with a statepoint 11930b57cec5SDimitry Andric AllRelocateCalls.push_back(Relocate); 11940b57cec5SDimitry Andric 11950b57cec5SDimitry Andric // We need at least one base pointer relocation + one derived pointer 11960b57cec5SDimitry Andric // relocation to mangle 11970b57cec5SDimitry Andric if (AllRelocateCalls.size() < 2) 11980b57cec5SDimitry Andric return false; 11990b57cec5SDimitry Andric 12000b57cec5SDimitry Andric // RelocateInstMap is a mapping from the base relocate instruction to the 12010b57cec5SDimitry Andric // corresponding derived relocate instructions 12020b57cec5SDimitry Andric DenseMap<GCRelocateInst *, SmallVector<GCRelocateInst *, 2>> RelocateInstMap; 12030b57cec5SDimitry Andric computeBaseDerivedRelocateMap(AllRelocateCalls, RelocateInstMap); 12040b57cec5SDimitry Andric if (RelocateInstMap.empty()) 12050b57cec5SDimitry Andric return false; 12060b57cec5SDimitry Andric 12070b57cec5SDimitry Andric for (auto &Item : RelocateInstMap) 12080b57cec5SDimitry Andric // Item.first is the RelocatedBase to offset against 12090b57cec5SDimitry Andric // Item.second is the vector of Targets to replace 12100b57cec5SDimitry Andric MadeChange = simplifyRelocatesOffABase(Item.first, Item.second); 12110b57cec5SDimitry Andric return MadeChange; 12120b57cec5SDimitry Andric } 12130b57cec5SDimitry Andric 12140b57cec5SDimitry Andric /// Sink the specified cast instruction into its user blocks. 12150b57cec5SDimitry Andric static bool SinkCast(CastInst *CI) { 12160b57cec5SDimitry Andric BasicBlock *DefBB = CI->getParent(); 12170b57cec5SDimitry Andric 12180b57cec5SDimitry Andric /// InsertedCasts - Only insert a cast in each block once. 12190b57cec5SDimitry Andric DenseMap<BasicBlock*, CastInst*> InsertedCasts; 12200b57cec5SDimitry Andric 12210b57cec5SDimitry Andric bool MadeChange = false; 12220b57cec5SDimitry Andric for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end(); 12230b57cec5SDimitry Andric UI != E; ) { 12240b57cec5SDimitry Andric Use &TheUse = UI.getUse(); 12250b57cec5SDimitry Andric Instruction *User = cast<Instruction>(*UI); 12260b57cec5SDimitry Andric 12270b57cec5SDimitry Andric // Figure out which BB this cast is used in. For PHI's this is the 12280b57cec5SDimitry Andric // appropriate predecessor block. 12290b57cec5SDimitry Andric BasicBlock *UserBB = User->getParent(); 12300b57cec5SDimitry Andric if (PHINode *PN = dyn_cast<PHINode>(User)) { 12310b57cec5SDimitry Andric UserBB = PN->getIncomingBlock(TheUse); 12320b57cec5SDimitry Andric } 12330b57cec5SDimitry Andric 12340b57cec5SDimitry Andric // Preincrement use iterator so we don't invalidate it. 12350b57cec5SDimitry Andric ++UI; 12360b57cec5SDimitry Andric 12370b57cec5SDimitry Andric // The first insertion point of a block containing an EH pad is after the 12380b57cec5SDimitry Andric // pad. If the pad is the user, we cannot sink the cast past the pad. 12390b57cec5SDimitry Andric if (User->isEHPad()) 12400b57cec5SDimitry Andric continue; 12410b57cec5SDimitry Andric 12420b57cec5SDimitry Andric // If the block selected to receive the cast is an EH pad that does not 12430b57cec5SDimitry Andric // allow non-PHI instructions before the terminator, we can't sink the 12440b57cec5SDimitry Andric // cast. 12450b57cec5SDimitry Andric if (UserBB->getTerminator()->isEHPad()) 12460b57cec5SDimitry Andric continue; 12470b57cec5SDimitry Andric 12480b57cec5SDimitry Andric // If this user is in the same block as the cast, don't change the cast. 12490b57cec5SDimitry Andric if (UserBB == DefBB) continue; 12500b57cec5SDimitry Andric 12510b57cec5SDimitry Andric // If we have already inserted a cast into this block, use it. 12520b57cec5SDimitry Andric CastInst *&InsertedCast = InsertedCasts[UserBB]; 12530b57cec5SDimitry Andric 12540b57cec5SDimitry Andric if (!InsertedCast) { 12550b57cec5SDimitry Andric BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); 12560b57cec5SDimitry Andric assert(InsertPt != UserBB->end()); 12570b57cec5SDimitry Andric InsertedCast = CastInst::Create(CI->getOpcode(), CI->getOperand(0), 12580b57cec5SDimitry Andric CI->getType(), "", &*InsertPt); 12590b57cec5SDimitry Andric InsertedCast->setDebugLoc(CI->getDebugLoc()); 12600b57cec5SDimitry Andric } 12610b57cec5SDimitry Andric 12620b57cec5SDimitry Andric // Replace a use of the cast with a use of the new cast. 12630b57cec5SDimitry Andric TheUse = InsertedCast; 12640b57cec5SDimitry Andric MadeChange = true; 12650b57cec5SDimitry Andric ++NumCastUses; 12660b57cec5SDimitry Andric } 12670b57cec5SDimitry Andric 12680b57cec5SDimitry Andric // If we removed all uses, nuke the cast. 12690b57cec5SDimitry Andric if (CI->use_empty()) { 12700b57cec5SDimitry Andric salvageDebugInfo(*CI); 12710b57cec5SDimitry Andric CI->eraseFromParent(); 12720b57cec5SDimitry Andric MadeChange = true; 12730b57cec5SDimitry Andric } 12740b57cec5SDimitry Andric 12750b57cec5SDimitry Andric return MadeChange; 12760b57cec5SDimitry Andric } 12770b57cec5SDimitry Andric 12780b57cec5SDimitry Andric /// If the specified cast instruction is a noop copy (e.g. it's casting from 12790b57cec5SDimitry Andric /// one pointer type to another, i32->i8 on PPC), sink it into user blocks to 12800b57cec5SDimitry Andric /// reduce the number of virtual registers that must be created and coalesced. 12810b57cec5SDimitry Andric /// 12820b57cec5SDimitry Andric /// Return true if any changes are made. 12830b57cec5SDimitry Andric static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI, 12840b57cec5SDimitry Andric const DataLayout &DL) { 12850b57cec5SDimitry Andric // Sink only "cheap" (or nop) address-space casts. This is a weaker condition 12860b57cec5SDimitry Andric // than sinking only nop casts, but is helpful on some platforms. 12870b57cec5SDimitry Andric if (auto *ASC = dyn_cast<AddrSpaceCastInst>(CI)) { 12880b57cec5SDimitry Andric if (!TLI.isFreeAddrSpaceCast(ASC->getSrcAddressSpace(), 12890b57cec5SDimitry Andric ASC->getDestAddressSpace())) 12900b57cec5SDimitry Andric return false; 12910b57cec5SDimitry Andric } 12920b57cec5SDimitry Andric 12930b57cec5SDimitry Andric // If this is a noop copy, 12940b57cec5SDimitry Andric EVT SrcVT = TLI.getValueType(DL, CI->getOperand(0)->getType()); 12950b57cec5SDimitry Andric EVT DstVT = TLI.getValueType(DL, CI->getType()); 12960b57cec5SDimitry Andric 12970b57cec5SDimitry Andric // This is an fp<->int conversion? 12980b57cec5SDimitry Andric if (SrcVT.isInteger() != DstVT.isInteger()) 12990b57cec5SDimitry Andric return false; 13000b57cec5SDimitry Andric 13010b57cec5SDimitry Andric // If this is an extension, it will be a zero or sign extension, which 13020b57cec5SDimitry Andric // isn't a noop. 13030b57cec5SDimitry Andric if (SrcVT.bitsLT(DstVT)) return false; 13040b57cec5SDimitry Andric 13050b57cec5SDimitry Andric // If these values will be promoted, find out what they will be promoted 13060b57cec5SDimitry Andric // to. This helps us consider truncates on PPC as noop copies when they 13070b57cec5SDimitry Andric // are. 13080b57cec5SDimitry Andric if (TLI.getTypeAction(CI->getContext(), SrcVT) == 13090b57cec5SDimitry Andric TargetLowering::TypePromoteInteger) 13100b57cec5SDimitry Andric SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT); 13110b57cec5SDimitry Andric if (TLI.getTypeAction(CI->getContext(), DstVT) == 13120b57cec5SDimitry Andric TargetLowering::TypePromoteInteger) 13130b57cec5SDimitry Andric DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT); 13140b57cec5SDimitry Andric 13150b57cec5SDimitry Andric // If, after promotion, these are the same types, this is a noop copy. 13160b57cec5SDimitry Andric if (SrcVT != DstVT) 13170b57cec5SDimitry Andric return false; 13180b57cec5SDimitry Andric 13190b57cec5SDimitry Andric return SinkCast(CI); 13200b57cec5SDimitry Andric } 13210b57cec5SDimitry Andric 1322fe6060f1SDimitry Andric // Match a simple increment by constant operation. Note that if a sub is 1323fe6060f1SDimitry Andric // matched, the step is negated (as if the step had been canonicalized to 1324fe6060f1SDimitry Andric // an add, even though we leave the instruction alone.) 1325fe6060f1SDimitry Andric bool matchIncrement(const Instruction* IVInc, Instruction *&LHS, 1326fe6060f1SDimitry Andric Constant *&Step) { 1327fe6060f1SDimitry Andric if (match(IVInc, m_Add(m_Instruction(LHS), m_Constant(Step))) || 1328fe6060f1SDimitry Andric match(IVInc, m_ExtractValue<0>(m_Intrinsic<Intrinsic::uadd_with_overflow>( 1329fe6060f1SDimitry Andric m_Instruction(LHS), m_Constant(Step))))) 1330fe6060f1SDimitry Andric return true; 1331fe6060f1SDimitry Andric if (match(IVInc, m_Sub(m_Instruction(LHS), m_Constant(Step))) || 1332fe6060f1SDimitry Andric match(IVInc, m_ExtractValue<0>(m_Intrinsic<Intrinsic::usub_with_overflow>( 1333fe6060f1SDimitry Andric m_Instruction(LHS), m_Constant(Step))))) { 1334fe6060f1SDimitry Andric Step = ConstantExpr::getNeg(Step); 1335fe6060f1SDimitry Andric return true; 1336fe6060f1SDimitry Andric } 1337fe6060f1SDimitry Andric return false; 1338fe6060f1SDimitry Andric } 1339fe6060f1SDimitry Andric 1340fe6060f1SDimitry Andric /// If given \p PN is an inductive variable with value IVInc coming from the 1341fe6060f1SDimitry Andric /// backedge, and on each iteration it gets increased by Step, return pair 1342fe6060f1SDimitry Andric /// <IVInc, Step>. Otherwise, return None. 1343fe6060f1SDimitry Andric static Optional<std::pair<Instruction *, Constant *> > 1344fe6060f1SDimitry Andric getIVIncrement(const PHINode *PN, const LoopInfo *LI) { 1345fe6060f1SDimitry Andric const Loop *L = LI->getLoopFor(PN->getParent()); 1346fe6060f1SDimitry Andric if (!L || L->getHeader() != PN->getParent() || !L->getLoopLatch()) 1347fe6060f1SDimitry Andric return None; 1348fe6060f1SDimitry Andric auto *IVInc = 1349fe6060f1SDimitry Andric dyn_cast<Instruction>(PN->getIncomingValueForBlock(L->getLoopLatch())); 1350fe6060f1SDimitry Andric if (!IVInc || LI->getLoopFor(IVInc->getParent()) != L) 1351fe6060f1SDimitry Andric return None; 1352fe6060f1SDimitry Andric Instruction *LHS = nullptr; 1353fe6060f1SDimitry Andric Constant *Step = nullptr; 1354fe6060f1SDimitry Andric if (matchIncrement(IVInc, LHS, Step) && LHS == PN) 1355fe6060f1SDimitry Andric return std::make_pair(IVInc, Step); 1356fe6060f1SDimitry Andric return None; 1357fe6060f1SDimitry Andric } 1358fe6060f1SDimitry Andric 1359fe6060f1SDimitry Andric static bool isIVIncrement(const Value *V, const LoopInfo *LI) { 1360fe6060f1SDimitry Andric auto *I = dyn_cast<Instruction>(V); 1361fe6060f1SDimitry Andric if (!I) 1362fe6060f1SDimitry Andric return false; 1363fe6060f1SDimitry Andric Instruction *LHS = nullptr; 1364fe6060f1SDimitry Andric Constant *Step = nullptr; 1365fe6060f1SDimitry Andric if (!matchIncrement(I, LHS, Step)) 1366fe6060f1SDimitry Andric return false; 1367fe6060f1SDimitry Andric if (auto *PN = dyn_cast<PHINode>(LHS)) 1368fe6060f1SDimitry Andric if (auto IVInc = getIVIncrement(PN, LI)) 1369fe6060f1SDimitry Andric return IVInc->first == I; 1370fe6060f1SDimitry Andric return false; 1371fe6060f1SDimitry Andric } 1372fe6060f1SDimitry Andric 13730b57cec5SDimitry Andric bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO, 13745ffd83dbSDimitry Andric Value *Arg0, Value *Arg1, 13750b57cec5SDimitry Andric CmpInst *Cmp, 13760b57cec5SDimitry Andric Intrinsic::ID IID) { 1377fe6060f1SDimitry Andric auto IsReplacableIVIncrement = [this, &Cmp](BinaryOperator *BO) { 1378fe6060f1SDimitry Andric if (!isIVIncrement(BO, LI)) 1379fe6060f1SDimitry Andric return false; 1380fe6060f1SDimitry Andric const Loop *L = LI->getLoopFor(BO->getParent()); 1381fe6060f1SDimitry Andric assert(L && "L should not be null after isIVIncrement()"); 1382fe6060f1SDimitry Andric // Do not risk on moving increment into a child loop. 1383fe6060f1SDimitry Andric if (LI->getLoopFor(Cmp->getParent()) != L) 1384fe6060f1SDimitry Andric return false; 1385fe6060f1SDimitry Andric 1386fe6060f1SDimitry Andric // Finally, we need to ensure that the insert point will dominate all 1387fe6060f1SDimitry Andric // existing uses of the increment. 1388fe6060f1SDimitry Andric 1389fe6060f1SDimitry Andric auto &DT = getDT(*BO->getParent()->getParent()); 1390fe6060f1SDimitry Andric if (DT.dominates(Cmp->getParent(), BO->getParent())) 1391fe6060f1SDimitry Andric // If we're moving up the dom tree, all uses are trivially dominated. 1392fe6060f1SDimitry Andric // (This is the common case for code produced by LSR.) 1393fe6060f1SDimitry Andric return true; 1394fe6060f1SDimitry Andric 1395fe6060f1SDimitry Andric // Otherwise, special case the single use in the phi recurrence. 1396fe6060f1SDimitry Andric return BO->hasOneUse() && DT.dominates(Cmp->getParent(), L->getLoopLatch()); 1397fe6060f1SDimitry Andric }; 1398fe6060f1SDimitry Andric if (BO->getParent() != Cmp->getParent() && !IsReplacableIVIncrement(BO)) { 13990b57cec5SDimitry Andric // We used to use a dominator tree here to allow multi-block optimization. 14000b57cec5SDimitry Andric // But that was problematic because: 14010b57cec5SDimitry Andric // 1. It could cause a perf regression by hoisting the math op into the 14020b57cec5SDimitry Andric // critical path. 14030b57cec5SDimitry Andric // 2. It could cause a perf regression by creating a value that was live 14040b57cec5SDimitry Andric // across multiple blocks and increasing register pressure. 14050b57cec5SDimitry Andric // 3. Use of a dominator tree could cause large compile-time regression. 14060b57cec5SDimitry Andric // This is because we recompute the DT on every change in the main CGP 14070b57cec5SDimitry Andric // run-loop. The recomputing is probably unnecessary in many cases, so if 14080b57cec5SDimitry Andric // that was fixed, using a DT here would be ok. 1409fe6060f1SDimitry Andric // 1410fe6060f1SDimitry Andric // There is one important particular case we still want to handle: if BO is 1411fe6060f1SDimitry Andric // the IV increment. Important properties that make it profitable: 1412fe6060f1SDimitry Andric // - We can speculate IV increment anywhere in the loop (as long as the 1413fe6060f1SDimitry Andric // indvar Phi is its only user); 1414fe6060f1SDimitry Andric // - Upon computing Cmp, we effectively compute something equivalent to the 1415fe6060f1SDimitry Andric // IV increment (despite it loops differently in the IR). So moving it up 1416fe6060f1SDimitry Andric // to the cmp point does not really increase register pressure. 14170b57cec5SDimitry Andric return false; 14180b57cec5SDimitry Andric } 14190b57cec5SDimitry Andric 14200b57cec5SDimitry Andric // We allow matching the canonical IR (add X, C) back to (usubo X, -C). 14210b57cec5SDimitry Andric if (BO->getOpcode() == Instruction::Add && 14220b57cec5SDimitry Andric IID == Intrinsic::usub_with_overflow) { 14230b57cec5SDimitry Andric assert(isa<Constant>(Arg1) && "Unexpected input for usubo"); 14240b57cec5SDimitry Andric Arg1 = ConstantExpr::getNeg(cast<Constant>(Arg1)); 14250b57cec5SDimitry Andric } 14260b57cec5SDimitry Andric 14270b57cec5SDimitry Andric // Insert at the first instruction of the pair. 14280b57cec5SDimitry Andric Instruction *InsertPt = nullptr; 14290b57cec5SDimitry Andric for (Instruction &Iter : *Cmp->getParent()) { 14305ffd83dbSDimitry Andric // If BO is an XOR, it is not guaranteed that it comes after both inputs to 14315ffd83dbSDimitry Andric // the overflow intrinsic are defined. 14325ffd83dbSDimitry Andric if ((BO->getOpcode() != Instruction::Xor && &Iter == BO) || &Iter == Cmp) { 14330b57cec5SDimitry Andric InsertPt = &Iter; 14340b57cec5SDimitry Andric break; 14350b57cec5SDimitry Andric } 14360b57cec5SDimitry Andric } 14370b57cec5SDimitry Andric assert(InsertPt != nullptr && "Parent block did not contain cmp or binop"); 14380b57cec5SDimitry Andric 14390b57cec5SDimitry Andric IRBuilder<> Builder(InsertPt); 14400b57cec5SDimitry Andric Value *MathOV = Builder.CreateBinaryIntrinsic(IID, Arg0, Arg1); 14415ffd83dbSDimitry Andric if (BO->getOpcode() != Instruction::Xor) { 14420b57cec5SDimitry Andric Value *Math = Builder.CreateExtractValue(MathOV, 0, "math"); 14430b57cec5SDimitry Andric BO->replaceAllUsesWith(Math); 14445ffd83dbSDimitry Andric } else 14455ffd83dbSDimitry Andric assert(BO->hasOneUse() && 14465ffd83dbSDimitry Andric "Patterns with XOr should use the BO only in the compare"); 14475ffd83dbSDimitry Andric Value *OV = Builder.CreateExtractValue(MathOV, 1, "ov"); 14480b57cec5SDimitry Andric Cmp->replaceAllUsesWith(OV); 14490b57cec5SDimitry Andric Cmp->eraseFromParent(); 14505ffd83dbSDimitry Andric BO->eraseFromParent(); 14510b57cec5SDimitry Andric return true; 14520b57cec5SDimitry Andric } 14530b57cec5SDimitry Andric 14540b57cec5SDimitry Andric /// Match special-case patterns that check for unsigned add overflow. 14550b57cec5SDimitry Andric static bool matchUAddWithOverflowConstantEdgeCases(CmpInst *Cmp, 14560b57cec5SDimitry Andric BinaryOperator *&Add) { 14570b57cec5SDimitry Andric // Add = add A, 1; Cmp = icmp eq A,-1 (overflow if A is max val) 14580b57cec5SDimitry Andric // Add = add A,-1; Cmp = icmp ne A, 0 (overflow if A is non-zero) 14590b57cec5SDimitry Andric Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1); 14600b57cec5SDimitry Andric 14610b57cec5SDimitry Andric // We are not expecting non-canonical/degenerate code. Just bail out. 14620b57cec5SDimitry Andric if (isa<Constant>(A)) 14630b57cec5SDimitry Andric return false; 14640b57cec5SDimitry Andric 14650b57cec5SDimitry Andric ICmpInst::Predicate Pred = Cmp->getPredicate(); 14660b57cec5SDimitry Andric if (Pred == ICmpInst::ICMP_EQ && match(B, m_AllOnes())) 14670b57cec5SDimitry Andric B = ConstantInt::get(B->getType(), 1); 14680b57cec5SDimitry Andric else if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt())) 14690b57cec5SDimitry Andric B = ConstantInt::get(B->getType(), -1); 14700b57cec5SDimitry Andric else 14710b57cec5SDimitry Andric return false; 14720b57cec5SDimitry Andric 14730b57cec5SDimitry Andric // Check the users of the variable operand of the compare looking for an add 14740b57cec5SDimitry Andric // with the adjusted constant. 14750b57cec5SDimitry Andric for (User *U : A->users()) { 14760b57cec5SDimitry Andric if (match(U, m_Add(m_Specific(A), m_Specific(B)))) { 14770b57cec5SDimitry Andric Add = cast<BinaryOperator>(U); 14780b57cec5SDimitry Andric return true; 14790b57cec5SDimitry Andric } 14800b57cec5SDimitry Andric } 14810b57cec5SDimitry Andric return false; 14820b57cec5SDimitry Andric } 14830b57cec5SDimitry Andric 14840b57cec5SDimitry Andric /// Try to combine the compare into a call to the llvm.uadd.with.overflow 14850b57cec5SDimitry Andric /// intrinsic. Return true if any changes were made. 14860b57cec5SDimitry Andric bool CodeGenPrepare::combineToUAddWithOverflow(CmpInst *Cmp, 14870b57cec5SDimitry Andric bool &ModifiedDT) { 14880b57cec5SDimitry Andric Value *A, *B; 14890b57cec5SDimitry Andric BinaryOperator *Add; 14905ffd83dbSDimitry Andric if (!match(Cmp, m_UAddWithOverflow(m_Value(A), m_Value(B), m_BinOp(Add)))) { 14910b57cec5SDimitry Andric if (!matchUAddWithOverflowConstantEdgeCases(Cmp, Add)) 14920b57cec5SDimitry Andric return false; 14935ffd83dbSDimitry Andric // Set A and B in case we match matchUAddWithOverflowConstantEdgeCases. 14945ffd83dbSDimitry Andric A = Add->getOperand(0); 14955ffd83dbSDimitry Andric B = Add->getOperand(1); 14965ffd83dbSDimitry Andric } 14970b57cec5SDimitry Andric 14980b57cec5SDimitry Andric if (!TLI->shouldFormOverflowOp(ISD::UADDO, 14995ffd83dbSDimitry Andric TLI->getValueType(*DL, Add->getType()), 15005ffd83dbSDimitry Andric Add->hasNUsesOrMore(2))) 15010b57cec5SDimitry Andric return false; 15020b57cec5SDimitry Andric 15030b57cec5SDimitry Andric // We don't want to move around uses of condition values this late, so we 15040b57cec5SDimitry Andric // check if it is legal to create the call to the intrinsic in the basic 15050b57cec5SDimitry Andric // block containing the icmp. 15060b57cec5SDimitry Andric if (Add->getParent() != Cmp->getParent() && !Add->hasOneUse()) 15070b57cec5SDimitry Andric return false; 15080b57cec5SDimitry Andric 15095ffd83dbSDimitry Andric if (!replaceMathCmpWithIntrinsic(Add, A, B, Cmp, 15105ffd83dbSDimitry Andric Intrinsic::uadd_with_overflow)) 15110b57cec5SDimitry Andric return false; 15120b57cec5SDimitry Andric 15130b57cec5SDimitry Andric // Reset callers - do not crash by iterating over a dead instruction. 15140b57cec5SDimitry Andric ModifiedDT = true; 15150b57cec5SDimitry Andric return true; 15160b57cec5SDimitry Andric } 15170b57cec5SDimitry Andric 15180b57cec5SDimitry Andric bool CodeGenPrepare::combineToUSubWithOverflow(CmpInst *Cmp, 15190b57cec5SDimitry Andric bool &ModifiedDT) { 15200b57cec5SDimitry Andric // We are not expecting non-canonical/degenerate code. Just bail out. 15210b57cec5SDimitry Andric Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1); 15220b57cec5SDimitry Andric if (isa<Constant>(A) && isa<Constant>(B)) 15230b57cec5SDimitry Andric return false; 15240b57cec5SDimitry Andric 15250b57cec5SDimitry Andric // Convert (A u> B) to (A u< B) to simplify pattern matching. 15260b57cec5SDimitry Andric ICmpInst::Predicate Pred = Cmp->getPredicate(); 15270b57cec5SDimitry Andric if (Pred == ICmpInst::ICMP_UGT) { 15280b57cec5SDimitry Andric std::swap(A, B); 15290b57cec5SDimitry Andric Pred = ICmpInst::ICMP_ULT; 15300b57cec5SDimitry Andric } 15310b57cec5SDimitry Andric // Convert special-case: (A == 0) is the same as (A u< 1). 15320b57cec5SDimitry Andric if (Pred == ICmpInst::ICMP_EQ && match(B, m_ZeroInt())) { 15330b57cec5SDimitry Andric B = ConstantInt::get(B->getType(), 1); 15340b57cec5SDimitry Andric Pred = ICmpInst::ICMP_ULT; 15350b57cec5SDimitry Andric } 15360b57cec5SDimitry Andric // Convert special-case: (A != 0) is the same as (0 u< A). 15370b57cec5SDimitry Andric if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt())) { 15380b57cec5SDimitry Andric std::swap(A, B); 15390b57cec5SDimitry Andric Pred = ICmpInst::ICMP_ULT; 15400b57cec5SDimitry Andric } 15410b57cec5SDimitry Andric if (Pred != ICmpInst::ICMP_ULT) 15420b57cec5SDimitry Andric return false; 15430b57cec5SDimitry Andric 15440b57cec5SDimitry Andric // Walk the users of a variable operand of a compare looking for a subtract or 15450b57cec5SDimitry Andric // add with that same operand. Also match the 2nd operand of the compare to 15460b57cec5SDimitry Andric // the add/sub, but that may be a negated constant operand of an add. 15470b57cec5SDimitry Andric Value *CmpVariableOperand = isa<Constant>(A) ? B : A; 15480b57cec5SDimitry Andric BinaryOperator *Sub = nullptr; 15490b57cec5SDimitry Andric for (User *U : CmpVariableOperand->users()) { 15500b57cec5SDimitry Andric // A - B, A u< B --> usubo(A, B) 15510b57cec5SDimitry Andric if (match(U, m_Sub(m_Specific(A), m_Specific(B)))) { 15520b57cec5SDimitry Andric Sub = cast<BinaryOperator>(U); 15530b57cec5SDimitry Andric break; 15540b57cec5SDimitry Andric } 15550b57cec5SDimitry Andric 15560b57cec5SDimitry Andric // A + (-C), A u< C (canonicalized form of (sub A, C)) 15570b57cec5SDimitry Andric const APInt *CmpC, *AddC; 15580b57cec5SDimitry Andric if (match(U, m_Add(m_Specific(A), m_APInt(AddC))) && 15590b57cec5SDimitry Andric match(B, m_APInt(CmpC)) && *AddC == -(*CmpC)) { 15600b57cec5SDimitry Andric Sub = cast<BinaryOperator>(U); 15610b57cec5SDimitry Andric break; 15620b57cec5SDimitry Andric } 15630b57cec5SDimitry Andric } 15640b57cec5SDimitry Andric if (!Sub) 15650b57cec5SDimitry Andric return false; 15660b57cec5SDimitry Andric 15670b57cec5SDimitry Andric if (!TLI->shouldFormOverflowOp(ISD::USUBO, 15685ffd83dbSDimitry Andric TLI->getValueType(*DL, Sub->getType()), 15695ffd83dbSDimitry Andric Sub->hasNUsesOrMore(2))) 15700b57cec5SDimitry Andric return false; 15710b57cec5SDimitry Andric 15725ffd83dbSDimitry Andric if (!replaceMathCmpWithIntrinsic(Sub, Sub->getOperand(0), Sub->getOperand(1), 15735ffd83dbSDimitry Andric Cmp, Intrinsic::usub_with_overflow)) 15740b57cec5SDimitry Andric return false; 15750b57cec5SDimitry Andric 15760b57cec5SDimitry Andric // Reset callers - do not crash by iterating over a dead instruction. 15770b57cec5SDimitry Andric ModifiedDT = true; 15780b57cec5SDimitry Andric return true; 15790b57cec5SDimitry Andric } 15800b57cec5SDimitry Andric 15810b57cec5SDimitry Andric /// Sink the given CmpInst into user blocks to reduce the number of virtual 15820b57cec5SDimitry Andric /// registers that must be created and coalesced. This is a clear win except on 15830b57cec5SDimitry Andric /// targets with multiple condition code registers (PowerPC), where it might 15840b57cec5SDimitry Andric /// lose; some adjustment may be wanted there. 15850b57cec5SDimitry Andric /// 15860b57cec5SDimitry Andric /// Return true if any changes are made. 15870b57cec5SDimitry Andric static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) { 15880b57cec5SDimitry Andric if (TLI.hasMultipleConditionRegisters()) 15890b57cec5SDimitry Andric return false; 15900b57cec5SDimitry Andric 15910b57cec5SDimitry Andric // Avoid sinking soft-FP comparisons, since this can move them into a loop. 15920b57cec5SDimitry Andric if (TLI.useSoftFloat() && isa<FCmpInst>(Cmp)) 15930b57cec5SDimitry Andric return false; 15940b57cec5SDimitry Andric 15950b57cec5SDimitry Andric // Only insert a cmp in each block once. 15960b57cec5SDimitry Andric DenseMap<BasicBlock*, CmpInst*> InsertedCmps; 15970b57cec5SDimitry Andric 15980b57cec5SDimitry Andric bool MadeChange = false; 15990b57cec5SDimitry Andric for (Value::user_iterator UI = Cmp->user_begin(), E = Cmp->user_end(); 16000b57cec5SDimitry Andric UI != E; ) { 16010b57cec5SDimitry Andric Use &TheUse = UI.getUse(); 16020b57cec5SDimitry Andric Instruction *User = cast<Instruction>(*UI); 16030b57cec5SDimitry Andric 16040b57cec5SDimitry Andric // Preincrement use iterator so we don't invalidate it. 16050b57cec5SDimitry Andric ++UI; 16060b57cec5SDimitry Andric 16070b57cec5SDimitry Andric // Don't bother for PHI nodes. 16080b57cec5SDimitry Andric if (isa<PHINode>(User)) 16090b57cec5SDimitry Andric continue; 16100b57cec5SDimitry Andric 16110b57cec5SDimitry Andric // Figure out which BB this cmp is used in. 16120b57cec5SDimitry Andric BasicBlock *UserBB = User->getParent(); 16130b57cec5SDimitry Andric BasicBlock *DefBB = Cmp->getParent(); 16140b57cec5SDimitry Andric 16150b57cec5SDimitry Andric // If this user is in the same block as the cmp, don't change the cmp. 16160b57cec5SDimitry Andric if (UserBB == DefBB) continue; 16170b57cec5SDimitry Andric 16180b57cec5SDimitry Andric // If we have already inserted a cmp into this block, use it. 16190b57cec5SDimitry Andric CmpInst *&InsertedCmp = InsertedCmps[UserBB]; 16200b57cec5SDimitry Andric 16210b57cec5SDimitry Andric if (!InsertedCmp) { 16220b57cec5SDimitry Andric BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); 16230b57cec5SDimitry Andric assert(InsertPt != UserBB->end()); 16240b57cec5SDimitry Andric InsertedCmp = 16250b57cec5SDimitry Andric CmpInst::Create(Cmp->getOpcode(), Cmp->getPredicate(), 16260b57cec5SDimitry Andric Cmp->getOperand(0), Cmp->getOperand(1), "", 16270b57cec5SDimitry Andric &*InsertPt); 16280b57cec5SDimitry Andric // Propagate the debug info. 16290b57cec5SDimitry Andric InsertedCmp->setDebugLoc(Cmp->getDebugLoc()); 16300b57cec5SDimitry Andric } 16310b57cec5SDimitry Andric 16320b57cec5SDimitry Andric // Replace a use of the cmp with a use of the new cmp. 16330b57cec5SDimitry Andric TheUse = InsertedCmp; 16340b57cec5SDimitry Andric MadeChange = true; 16350b57cec5SDimitry Andric ++NumCmpUses; 16360b57cec5SDimitry Andric } 16370b57cec5SDimitry Andric 16380b57cec5SDimitry Andric // If we removed all uses, nuke the cmp. 16390b57cec5SDimitry Andric if (Cmp->use_empty()) { 16400b57cec5SDimitry Andric Cmp->eraseFromParent(); 16410b57cec5SDimitry Andric MadeChange = true; 16420b57cec5SDimitry Andric } 16430b57cec5SDimitry Andric 16440b57cec5SDimitry Andric return MadeChange; 16450b57cec5SDimitry Andric } 16460b57cec5SDimitry Andric 1647480093f4SDimitry Andric /// For pattern like: 1648480093f4SDimitry Andric /// 1649480093f4SDimitry Andric /// DomCond = icmp sgt/slt CmpOp0, CmpOp1 (might not be in DomBB) 1650480093f4SDimitry Andric /// ... 1651480093f4SDimitry Andric /// DomBB: 1652480093f4SDimitry Andric /// ... 1653480093f4SDimitry Andric /// br DomCond, TrueBB, CmpBB 1654480093f4SDimitry Andric /// CmpBB: (with DomBB being the single predecessor) 1655480093f4SDimitry Andric /// ... 1656480093f4SDimitry Andric /// Cmp = icmp eq CmpOp0, CmpOp1 1657480093f4SDimitry Andric /// ... 1658480093f4SDimitry Andric /// 1659480093f4SDimitry Andric /// It would use two comparison on targets that lowering of icmp sgt/slt is 1660480093f4SDimitry Andric /// different from lowering of icmp eq (PowerPC). This function try to convert 1661480093f4SDimitry Andric /// 'Cmp = icmp eq CmpOp0, CmpOp1' to ' Cmp = icmp slt/sgt CmpOp0, CmpOp1'. 1662480093f4SDimitry Andric /// After that, DomCond and Cmp can use the same comparison so reduce one 1663480093f4SDimitry Andric /// comparison. 1664480093f4SDimitry Andric /// 1665480093f4SDimitry Andric /// Return true if any changes are made. 1666480093f4SDimitry Andric static bool foldICmpWithDominatingICmp(CmpInst *Cmp, 1667480093f4SDimitry Andric const TargetLowering &TLI) { 1668480093f4SDimitry Andric if (!EnableICMP_EQToICMP_ST && TLI.isEqualityCmpFoldedWithSignedCmp()) 1669480093f4SDimitry Andric return false; 1670480093f4SDimitry Andric 1671480093f4SDimitry Andric ICmpInst::Predicate Pred = Cmp->getPredicate(); 1672480093f4SDimitry Andric if (Pred != ICmpInst::ICMP_EQ) 1673480093f4SDimitry Andric return false; 1674480093f4SDimitry Andric 1675480093f4SDimitry Andric // If icmp eq has users other than BranchInst and SelectInst, converting it to 1676480093f4SDimitry Andric // icmp slt/sgt would introduce more redundant LLVM IR. 1677480093f4SDimitry Andric for (User *U : Cmp->users()) { 1678480093f4SDimitry Andric if (isa<BranchInst>(U)) 1679480093f4SDimitry Andric continue; 1680480093f4SDimitry Andric if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == Cmp) 1681480093f4SDimitry Andric continue; 1682480093f4SDimitry Andric return false; 1683480093f4SDimitry Andric } 1684480093f4SDimitry Andric 1685480093f4SDimitry Andric // This is a cheap/incomplete check for dominance - just match a single 1686480093f4SDimitry Andric // predecessor with a conditional branch. 1687480093f4SDimitry Andric BasicBlock *CmpBB = Cmp->getParent(); 1688480093f4SDimitry Andric BasicBlock *DomBB = CmpBB->getSinglePredecessor(); 1689480093f4SDimitry Andric if (!DomBB) 1690480093f4SDimitry Andric return false; 1691480093f4SDimitry Andric 1692480093f4SDimitry Andric // We want to ensure that the only way control gets to the comparison of 1693480093f4SDimitry Andric // interest is that a less/greater than comparison on the same operands is 1694480093f4SDimitry Andric // false. 1695480093f4SDimitry Andric Value *DomCond; 1696480093f4SDimitry Andric BasicBlock *TrueBB, *FalseBB; 1697480093f4SDimitry Andric if (!match(DomBB->getTerminator(), m_Br(m_Value(DomCond), TrueBB, FalseBB))) 1698480093f4SDimitry Andric return false; 1699480093f4SDimitry Andric if (CmpBB != FalseBB) 1700480093f4SDimitry Andric return false; 1701480093f4SDimitry Andric 1702480093f4SDimitry Andric Value *CmpOp0 = Cmp->getOperand(0), *CmpOp1 = Cmp->getOperand(1); 1703480093f4SDimitry Andric ICmpInst::Predicate DomPred; 1704480093f4SDimitry Andric if (!match(DomCond, m_ICmp(DomPred, m_Specific(CmpOp0), m_Specific(CmpOp1)))) 1705480093f4SDimitry Andric return false; 1706480093f4SDimitry Andric if (DomPred != ICmpInst::ICMP_SGT && DomPred != ICmpInst::ICMP_SLT) 1707480093f4SDimitry Andric return false; 1708480093f4SDimitry Andric 1709480093f4SDimitry Andric // Convert the equality comparison to the opposite of the dominating 1710480093f4SDimitry Andric // comparison and swap the direction for all branch/select users. 1711480093f4SDimitry Andric // We have conceptually converted: 1712480093f4SDimitry Andric // Res = (a < b) ? <LT_RES> : (a == b) ? <EQ_RES> : <GT_RES>; 1713480093f4SDimitry Andric // to 1714480093f4SDimitry Andric // Res = (a < b) ? <LT_RES> : (a > b) ? <GT_RES> : <EQ_RES>; 1715480093f4SDimitry Andric // And similarly for branches. 1716480093f4SDimitry Andric for (User *U : Cmp->users()) { 1717480093f4SDimitry Andric if (auto *BI = dyn_cast<BranchInst>(U)) { 1718480093f4SDimitry Andric assert(BI->isConditional() && "Must be conditional"); 1719480093f4SDimitry Andric BI->swapSuccessors(); 1720480093f4SDimitry Andric continue; 1721480093f4SDimitry Andric } 1722480093f4SDimitry Andric if (auto *SI = dyn_cast<SelectInst>(U)) { 1723480093f4SDimitry Andric // Swap operands 1724480093f4SDimitry Andric SI->swapValues(); 1725480093f4SDimitry Andric SI->swapProfMetadata(); 1726480093f4SDimitry Andric continue; 1727480093f4SDimitry Andric } 1728480093f4SDimitry Andric llvm_unreachable("Must be a branch or a select"); 1729480093f4SDimitry Andric } 1730480093f4SDimitry Andric Cmp->setPredicate(CmpInst::getSwappedPredicate(DomPred)); 1731480093f4SDimitry Andric return true; 1732480093f4SDimitry Andric } 1733480093f4SDimitry Andric 17340b57cec5SDimitry Andric bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, bool &ModifiedDT) { 17350b57cec5SDimitry Andric if (sinkCmpExpression(Cmp, *TLI)) 17360b57cec5SDimitry Andric return true; 17370b57cec5SDimitry Andric 17380b57cec5SDimitry Andric if (combineToUAddWithOverflow(Cmp, ModifiedDT)) 17390b57cec5SDimitry Andric return true; 17400b57cec5SDimitry Andric 17410b57cec5SDimitry Andric if (combineToUSubWithOverflow(Cmp, ModifiedDT)) 17420b57cec5SDimitry Andric return true; 17430b57cec5SDimitry Andric 1744480093f4SDimitry Andric if (foldICmpWithDominatingICmp(Cmp, *TLI)) 1745480093f4SDimitry Andric return true; 1746480093f4SDimitry Andric 17470b57cec5SDimitry Andric return false; 17480b57cec5SDimitry Andric } 17490b57cec5SDimitry Andric 17500b57cec5SDimitry Andric /// Duplicate and sink the given 'and' instruction into user blocks where it is 17510b57cec5SDimitry Andric /// used in a compare to allow isel to generate better code for targets where 17520b57cec5SDimitry Andric /// this operation can be combined. 17530b57cec5SDimitry Andric /// 17540b57cec5SDimitry Andric /// Return true if any changes are made. 17550b57cec5SDimitry Andric static bool sinkAndCmp0Expression(Instruction *AndI, 17560b57cec5SDimitry Andric const TargetLowering &TLI, 17570b57cec5SDimitry Andric SetOfInstrs &InsertedInsts) { 17580b57cec5SDimitry Andric // Double-check that we're not trying to optimize an instruction that was 17590b57cec5SDimitry Andric // already optimized by some other part of this pass. 17600b57cec5SDimitry Andric assert(!InsertedInsts.count(AndI) && 17610b57cec5SDimitry Andric "Attempting to optimize already optimized and instruction"); 17620b57cec5SDimitry Andric (void) InsertedInsts; 17630b57cec5SDimitry Andric 17640b57cec5SDimitry Andric // Nothing to do for single use in same basic block. 17650b57cec5SDimitry Andric if (AndI->hasOneUse() && 17660b57cec5SDimitry Andric AndI->getParent() == cast<Instruction>(*AndI->user_begin())->getParent()) 17670b57cec5SDimitry Andric return false; 17680b57cec5SDimitry Andric 17690b57cec5SDimitry Andric // Try to avoid cases where sinking/duplicating is likely to increase register 17700b57cec5SDimitry Andric // pressure. 17710b57cec5SDimitry Andric if (!isa<ConstantInt>(AndI->getOperand(0)) && 17720b57cec5SDimitry Andric !isa<ConstantInt>(AndI->getOperand(1)) && 17730b57cec5SDimitry Andric AndI->getOperand(0)->hasOneUse() && AndI->getOperand(1)->hasOneUse()) 17740b57cec5SDimitry Andric return false; 17750b57cec5SDimitry Andric 17760b57cec5SDimitry Andric for (auto *U : AndI->users()) { 17770b57cec5SDimitry Andric Instruction *User = cast<Instruction>(U); 17780b57cec5SDimitry Andric 17790b57cec5SDimitry Andric // Only sink 'and' feeding icmp with 0. 17800b57cec5SDimitry Andric if (!isa<ICmpInst>(User)) 17810b57cec5SDimitry Andric return false; 17820b57cec5SDimitry Andric 17830b57cec5SDimitry Andric auto *CmpC = dyn_cast<ConstantInt>(User->getOperand(1)); 17840b57cec5SDimitry Andric if (!CmpC || !CmpC->isZero()) 17850b57cec5SDimitry Andric return false; 17860b57cec5SDimitry Andric } 17870b57cec5SDimitry Andric 17880b57cec5SDimitry Andric if (!TLI.isMaskAndCmp0FoldingBeneficial(*AndI)) 17890b57cec5SDimitry Andric return false; 17900b57cec5SDimitry Andric 17910b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "found 'and' feeding only icmp 0;\n"); 17920b57cec5SDimitry Andric LLVM_DEBUG(AndI->getParent()->dump()); 17930b57cec5SDimitry Andric 17940b57cec5SDimitry Andric // Push the 'and' into the same block as the icmp 0. There should only be 17950b57cec5SDimitry Andric // one (icmp (and, 0)) in each block, since CSE/GVN should have removed any 17960b57cec5SDimitry Andric // others, so we don't need to keep track of which BBs we insert into. 17970b57cec5SDimitry Andric for (Value::user_iterator UI = AndI->user_begin(), E = AndI->user_end(); 17980b57cec5SDimitry Andric UI != E; ) { 17990b57cec5SDimitry Andric Use &TheUse = UI.getUse(); 18000b57cec5SDimitry Andric Instruction *User = cast<Instruction>(*UI); 18010b57cec5SDimitry Andric 18020b57cec5SDimitry Andric // Preincrement use iterator so we don't invalidate it. 18030b57cec5SDimitry Andric ++UI; 18040b57cec5SDimitry Andric 18050b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "sinking 'and' use: " << *User << "\n"); 18060b57cec5SDimitry Andric 18070b57cec5SDimitry Andric // Keep the 'and' in the same place if the use is already in the same block. 18080b57cec5SDimitry Andric Instruction *InsertPt = 18090b57cec5SDimitry Andric User->getParent() == AndI->getParent() ? AndI : User; 18100b57cec5SDimitry Andric Instruction *InsertedAnd = 18110b57cec5SDimitry Andric BinaryOperator::Create(Instruction::And, AndI->getOperand(0), 18120b57cec5SDimitry Andric AndI->getOperand(1), "", InsertPt); 18130b57cec5SDimitry Andric // Propagate the debug info. 18140b57cec5SDimitry Andric InsertedAnd->setDebugLoc(AndI->getDebugLoc()); 18150b57cec5SDimitry Andric 18160b57cec5SDimitry Andric // Replace a use of the 'and' with a use of the new 'and'. 18170b57cec5SDimitry Andric TheUse = InsertedAnd; 18180b57cec5SDimitry Andric ++NumAndUses; 18190b57cec5SDimitry Andric LLVM_DEBUG(User->getParent()->dump()); 18200b57cec5SDimitry Andric } 18210b57cec5SDimitry Andric 18220b57cec5SDimitry Andric // We removed all uses, nuke the and. 18230b57cec5SDimitry Andric AndI->eraseFromParent(); 18240b57cec5SDimitry Andric return true; 18250b57cec5SDimitry Andric } 18260b57cec5SDimitry Andric 18270b57cec5SDimitry Andric /// Check if the candidates could be combined with a shift instruction, which 18280b57cec5SDimitry Andric /// includes: 18290b57cec5SDimitry Andric /// 1. Truncate instruction 18300b57cec5SDimitry Andric /// 2. And instruction and the imm is a mask of the low bits: 18310b57cec5SDimitry Andric /// imm & (imm+1) == 0 18320b57cec5SDimitry Andric static bool isExtractBitsCandidateUse(Instruction *User) { 18330b57cec5SDimitry Andric if (!isa<TruncInst>(User)) { 18340b57cec5SDimitry Andric if (User->getOpcode() != Instruction::And || 18350b57cec5SDimitry Andric !isa<ConstantInt>(User->getOperand(1))) 18360b57cec5SDimitry Andric return false; 18370b57cec5SDimitry Andric 18380b57cec5SDimitry Andric const APInt &Cimm = cast<ConstantInt>(User->getOperand(1))->getValue(); 18390b57cec5SDimitry Andric 18400b57cec5SDimitry Andric if ((Cimm & (Cimm + 1)).getBoolValue()) 18410b57cec5SDimitry Andric return false; 18420b57cec5SDimitry Andric } 18430b57cec5SDimitry Andric return true; 18440b57cec5SDimitry Andric } 18450b57cec5SDimitry Andric 18460b57cec5SDimitry Andric /// Sink both shift and truncate instruction to the use of truncate's BB. 18470b57cec5SDimitry Andric static bool 18480b57cec5SDimitry Andric SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI, 18490b57cec5SDimitry Andric DenseMap<BasicBlock *, BinaryOperator *> &InsertedShifts, 18500b57cec5SDimitry Andric const TargetLowering &TLI, const DataLayout &DL) { 18510b57cec5SDimitry Andric BasicBlock *UserBB = User->getParent(); 18520b57cec5SDimitry Andric DenseMap<BasicBlock *, CastInst *> InsertedTruncs; 18538bcb0991SDimitry Andric auto *TruncI = cast<TruncInst>(User); 18540b57cec5SDimitry Andric bool MadeChange = false; 18550b57cec5SDimitry Andric 18560b57cec5SDimitry Andric for (Value::user_iterator TruncUI = TruncI->user_begin(), 18570b57cec5SDimitry Andric TruncE = TruncI->user_end(); 18580b57cec5SDimitry Andric TruncUI != TruncE;) { 18590b57cec5SDimitry Andric 18600b57cec5SDimitry Andric Use &TruncTheUse = TruncUI.getUse(); 18610b57cec5SDimitry Andric Instruction *TruncUser = cast<Instruction>(*TruncUI); 18620b57cec5SDimitry Andric // Preincrement use iterator so we don't invalidate it. 18630b57cec5SDimitry Andric 18640b57cec5SDimitry Andric ++TruncUI; 18650b57cec5SDimitry Andric 18660b57cec5SDimitry Andric int ISDOpcode = TLI.InstructionOpcodeToISD(TruncUser->getOpcode()); 18670b57cec5SDimitry Andric if (!ISDOpcode) 18680b57cec5SDimitry Andric continue; 18690b57cec5SDimitry Andric 18700b57cec5SDimitry Andric // If the use is actually a legal node, there will not be an 18710b57cec5SDimitry Andric // implicit truncate. 18720b57cec5SDimitry Andric // FIXME: always querying the result type is just an 18730b57cec5SDimitry Andric // approximation; some nodes' legality is determined by the 18740b57cec5SDimitry Andric // operand or other means. There's no good way to find out though. 18750b57cec5SDimitry Andric if (TLI.isOperationLegalOrCustom( 18760b57cec5SDimitry Andric ISDOpcode, TLI.getValueType(DL, TruncUser->getType(), true))) 18770b57cec5SDimitry Andric continue; 18780b57cec5SDimitry Andric 18790b57cec5SDimitry Andric // Don't bother for PHI nodes. 18800b57cec5SDimitry Andric if (isa<PHINode>(TruncUser)) 18810b57cec5SDimitry Andric continue; 18820b57cec5SDimitry Andric 18830b57cec5SDimitry Andric BasicBlock *TruncUserBB = TruncUser->getParent(); 18840b57cec5SDimitry Andric 18850b57cec5SDimitry Andric if (UserBB == TruncUserBB) 18860b57cec5SDimitry Andric continue; 18870b57cec5SDimitry Andric 18880b57cec5SDimitry Andric BinaryOperator *&InsertedShift = InsertedShifts[TruncUserBB]; 18890b57cec5SDimitry Andric CastInst *&InsertedTrunc = InsertedTruncs[TruncUserBB]; 18900b57cec5SDimitry Andric 18910b57cec5SDimitry Andric if (!InsertedShift && !InsertedTrunc) { 18920b57cec5SDimitry Andric BasicBlock::iterator InsertPt = TruncUserBB->getFirstInsertionPt(); 18930b57cec5SDimitry Andric assert(InsertPt != TruncUserBB->end()); 18940b57cec5SDimitry Andric // Sink the shift 18950b57cec5SDimitry Andric if (ShiftI->getOpcode() == Instruction::AShr) 18960b57cec5SDimitry Andric InsertedShift = BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, 18970b57cec5SDimitry Andric "", &*InsertPt); 18980b57cec5SDimitry Andric else 18990b57cec5SDimitry Andric InsertedShift = BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, 19000b57cec5SDimitry Andric "", &*InsertPt); 19010b57cec5SDimitry Andric InsertedShift->setDebugLoc(ShiftI->getDebugLoc()); 19020b57cec5SDimitry Andric 19030b57cec5SDimitry Andric // Sink the trunc 19040b57cec5SDimitry Andric BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt(); 19050b57cec5SDimitry Andric TruncInsertPt++; 19060b57cec5SDimitry Andric assert(TruncInsertPt != TruncUserBB->end()); 19070b57cec5SDimitry Andric 19080b57cec5SDimitry Andric InsertedTrunc = CastInst::Create(TruncI->getOpcode(), InsertedShift, 19090b57cec5SDimitry Andric TruncI->getType(), "", &*TruncInsertPt); 19100b57cec5SDimitry Andric InsertedTrunc->setDebugLoc(TruncI->getDebugLoc()); 19110b57cec5SDimitry Andric 19120b57cec5SDimitry Andric MadeChange = true; 19130b57cec5SDimitry Andric 19140b57cec5SDimitry Andric TruncTheUse = InsertedTrunc; 19150b57cec5SDimitry Andric } 19160b57cec5SDimitry Andric } 19170b57cec5SDimitry Andric return MadeChange; 19180b57cec5SDimitry Andric } 19190b57cec5SDimitry Andric 19200b57cec5SDimitry Andric /// Sink the shift *right* instruction into user blocks if the uses could 19210b57cec5SDimitry Andric /// potentially be combined with this shift instruction and generate BitExtract 19220b57cec5SDimitry Andric /// instruction. It will only be applied if the architecture supports BitExtract 19230b57cec5SDimitry Andric /// instruction. Here is an example: 19240b57cec5SDimitry Andric /// BB1: 19250b57cec5SDimitry Andric /// %x.extract.shift = lshr i64 %arg1, 32 19260b57cec5SDimitry Andric /// BB2: 19270b57cec5SDimitry Andric /// %x.extract.trunc = trunc i64 %x.extract.shift to i16 19280b57cec5SDimitry Andric /// ==> 19290b57cec5SDimitry Andric /// 19300b57cec5SDimitry Andric /// BB2: 19310b57cec5SDimitry Andric /// %x.extract.shift.1 = lshr i64 %arg1, 32 19320b57cec5SDimitry Andric /// %x.extract.trunc = trunc i64 %x.extract.shift.1 to i16 19330b57cec5SDimitry Andric /// 19340b57cec5SDimitry Andric /// CodeGen will recognize the pattern in BB2 and generate BitExtract 19350b57cec5SDimitry Andric /// instruction. 19360b57cec5SDimitry Andric /// Return true if any changes are made. 19370b57cec5SDimitry Andric static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI, 19380b57cec5SDimitry Andric const TargetLowering &TLI, 19390b57cec5SDimitry Andric const DataLayout &DL) { 19400b57cec5SDimitry Andric BasicBlock *DefBB = ShiftI->getParent(); 19410b57cec5SDimitry Andric 19420b57cec5SDimitry Andric /// Only insert instructions in each block once. 19430b57cec5SDimitry Andric DenseMap<BasicBlock *, BinaryOperator *> InsertedShifts; 19440b57cec5SDimitry Andric 19450b57cec5SDimitry Andric bool shiftIsLegal = TLI.isTypeLegal(TLI.getValueType(DL, ShiftI->getType())); 19460b57cec5SDimitry Andric 19470b57cec5SDimitry Andric bool MadeChange = false; 19480b57cec5SDimitry Andric for (Value::user_iterator UI = ShiftI->user_begin(), E = ShiftI->user_end(); 19490b57cec5SDimitry Andric UI != E;) { 19500b57cec5SDimitry Andric Use &TheUse = UI.getUse(); 19510b57cec5SDimitry Andric Instruction *User = cast<Instruction>(*UI); 19520b57cec5SDimitry Andric // Preincrement use iterator so we don't invalidate it. 19530b57cec5SDimitry Andric ++UI; 19540b57cec5SDimitry Andric 19550b57cec5SDimitry Andric // Don't bother for PHI nodes. 19560b57cec5SDimitry Andric if (isa<PHINode>(User)) 19570b57cec5SDimitry Andric continue; 19580b57cec5SDimitry Andric 19590b57cec5SDimitry Andric if (!isExtractBitsCandidateUse(User)) 19600b57cec5SDimitry Andric continue; 19610b57cec5SDimitry Andric 19620b57cec5SDimitry Andric BasicBlock *UserBB = User->getParent(); 19630b57cec5SDimitry Andric 19640b57cec5SDimitry Andric if (UserBB == DefBB) { 19650b57cec5SDimitry Andric // If the shift and truncate instruction are in the same BB. The use of 19660b57cec5SDimitry Andric // the truncate(TruncUse) may still introduce another truncate if not 19670b57cec5SDimitry Andric // legal. In this case, we would like to sink both shift and truncate 19680b57cec5SDimitry Andric // instruction to the BB of TruncUse. 19690b57cec5SDimitry Andric // for example: 19700b57cec5SDimitry Andric // BB1: 19710b57cec5SDimitry Andric // i64 shift.result = lshr i64 opnd, imm 19720b57cec5SDimitry Andric // trunc.result = trunc shift.result to i16 19730b57cec5SDimitry Andric // 19740b57cec5SDimitry Andric // BB2: 19750b57cec5SDimitry Andric // ----> We will have an implicit truncate here if the architecture does 19760b57cec5SDimitry Andric // not have i16 compare. 19770b57cec5SDimitry Andric // cmp i16 trunc.result, opnd2 19780b57cec5SDimitry Andric // 19790b57cec5SDimitry Andric if (isa<TruncInst>(User) && shiftIsLegal 19800b57cec5SDimitry Andric // If the type of the truncate is legal, no truncate will be 19810b57cec5SDimitry Andric // introduced in other basic blocks. 19820b57cec5SDimitry Andric && 19830b57cec5SDimitry Andric (!TLI.isTypeLegal(TLI.getValueType(DL, User->getType())))) 19840b57cec5SDimitry Andric MadeChange = 19850b57cec5SDimitry Andric SinkShiftAndTruncate(ShiftI, User, CI, InsertedShifts, TLI, DL); 19860b57cec5SDimitry Andric 19870b57cec5SDimitry Andric continue; 19880b57cec5SDimitry Andric } 19890b57cec5SDimitry Andric // If we have already inserted a shift into this block, use it. 19900b57cec5SDimitry Andric BinaryOperator *&InsertedShift = InsertedShifts[UserBB]; 19910b57cec5SDimitry Andric 19920b57cec5SDimitry Andric if (!InsertedShift) { 19930b57cec5SDimitry Andric BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); 19940b57cec5SDimitry Andric assert(InsertPt != UserBB->end()); 19950b57cec5SDimitry Andric 19960b57cec5SDimitry Andric if (ShiftI->getOpcode() == Instruction::AShr) 19970b57cec5SDimitry Andric InsertedShift = BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, 19980b57cec5SDimitry Andric "", &*InsertPt); 19990b57cec5SDimitry Andric else 20000b57cec5SDimitry Andric InsertedShift = BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, 20010b57cec5SDimitry Andric "", &*InsertPt); 20020b57cec5SDimitry Andric InsertedShift->setDebugLoc(ShiftI->getDebugLoc()); 20030b57cec5SDimitry Andric 20040b57cec5SDimitry Andric MadeChange = true; 20050b57cec5SDimitry Andric } 20060b57cec5SDimitry Andric 20070b57cec5SDimitry Andric // Replace a use of the shift with a use of the new shift. 20080b57cec5SDimitry Andric TheUse = InsertedShift; 20090b57cec5SDimitry Andric } 20100b57cec5SDimitry Andric 20110b57cec5SDimitry Andric // If we removed all uses, or there are none, nuke the shift. 20120b57cec5SDimitry Andric if (ShiftI->use_empty()) { 20130b57cec5SDimitry Andric salvageDebugInfo(*ShiftI); 20140b57cec5SDimitry Andric ShiftI->eraseFromParent(); 20150b57cec5SDimitry Andric MadeChange = true; 20160b57cec5SDimitry Andric } 20170b57cec5SDimitry Andric 20180b57cec5SDimitry Andric return MadeChange; 20190b57cec5SDimitry Andric } 20200b57cec5SDimitry Andric 20210b57cec5SDimitry Andric /// If counting leading or trailing zeros is an expensive operation and a zero 20220b57cec5SDimitry Andric /// input is defined, add a check for zero to avoid calling the intrinsic. 20230b57cec5SDimitry Andric /// 20240b57cec5SDimitry Andric /// We want to transform: 20250b57cec5SDimitry Andric /// %z = call i64 @llvm.cttz.i64(i64 %A, i1 false) 20260b57cec5SDimitry Andric /// 20270b57cec5SDimitry Andric /// into: 20280b57cec5SDimitry Andric /// entry: 20290b57cec5SDimitry Andric /// %cmpz = icmp eq i64 %A, 0 20300b57cec5SDimitry Andric /// br i1 %cmpz, label %cond.end, label %cond.false 20310b57cec5SDimitry Andric /// cond.false: 20320b57cec5SDimitry Andric /// %z = call i64 @llvm.cttz.i64(i64 %A, i1 true) 20330b57cec5SDimitry Andric /// br label %cond.end 20340b57cec5SDimitry Andric /// cond.end: 20350b57cec5SDimitry Andric /// %ctz = phi i64 [ 64, %entry ], [ %z, %cond.false ] 20360b57cec5SDimitry Andric /// 20370b57cec5SDimitry Andric /// If the transform is performed, return true and set ModifiedDT to true. 20380b57cec5SDimitry Andric static bool despeculateCountZeros(IntrinsicInst *CountZeros, 20390b57cec5SDimitry Andric const TargetLowering *TLI, 20400b57cec5SDimitry Andric const DataLayout *DL, 20410b57cec5SDimitry Andric bool &ModifiedDT) { 20420b57cec5SDimitry Andric // If a zero input is undefined, it doesn't make sense to despeculate that. 20430b57cec5SDimitry Andric if (match(CountZeros->getOperand(1), m_One())) 20440b57cec5SDimitry Andric return false; 20450b57cec5SDimitry Andric 20460b57cec5SDimitry Andric // If it's cheap to speculate, there's nothing to do. 20470b57cec5SDimitry Andric auto IntrinsicID = CountZeros->getIntrinsicID(); 20480b57cec5SDimitry Andric if ((IntrinsicID == Intrinsic::cttz && TLI->isCheapToSpeculateCttz()) || 20490b57cec5SDimitry Andric (IntrinsicID == Intrinsic::ctlz && TLI->isCheapToSpeculateCtlz())) 20500b57cec5SDimitry Andric return false; 20510b57cec5SDimitry Andric 20520b57cec5SDimitry Andric // Only handle legal scalar cases. Anything else requires too much work. 20530b57cec5SDimitry Andric Type *Ty = CountZeros->getType(); 2054349cc55cSDimitry Andric unsigned SizeInBits = Ty->getScalarSizeInBits(); 20550b57cec5SDimitry Andric if (Ty->isVectorTy() || SizeInBits > DL->getLargestLegalIntTypeSizeInBits()) 20560b57cec5SDimitry Andric return false; 20570b57cec5SDimitry Andric 2058fe6060f1SDimitry Andric // Bail if the value is never zero. 205981ad6265SDimitry Andric Use &Op = CountZeros->getOperandUse(0); 206081ad6265SDimitry Andric if (isKnownNonZero(Op, *DL)) 2061fe6060f1SDimitry Andric return false; 2062fe6060f1SDimitry Andric 20630b57cec5SDimitry Andric // The intrinsic will be sunk behind a compare against zero and branch. 20640b57cec5SDimitry Andric BasicBlock *StartBlock = CountZeros->getParent(); 20650b57cec5SDimitry Andric BasicBlock *CallBlock = StartBlock->splitBasicBlock(CountZeros, "cond.false"); 20660b57cec5SDimitry Andric 20670b57cec5SDimitry Andric // Create another block after the count zero intrinsic. A PHI will be added 20680b57cec5SDimitry Andric // in this block to select the result of the intrinsic or the bit-width 20690b57cec5SDimitry Andric // constant if the input to the intrinsic is zero. 20700b57cec5SDimitry Andric BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(CountZeros)); 20710b57cec5SDimitry Andric BasicBlock *EndBlock = CallBlock->splitBasicBlock(SplitPt, "cond.end"); 20720b57cec5SDimitry Andric 20730b57cec5SDimitry Andric // Set up a builder to create a compare, conditional branch, and PHI. 20740b57cec5SDimitry Andric IRBuilder<> Builder(CountZeros->getContext()); 20750b57cec5SDimitry Andric Builder.SetInsertPoint(StartBlock->getTerminator()); 20760b57cec5SDimitry Andric Builder.SetCurrentDebugLocation(CountZeros->getDebugLoc()); 20770b57cec5SDimitry Andric 20780b57cec5SDimitry Andric // Replace the unconditional branch that was created by the first split with 20790b57cec5SDimitry Andric // a compare against zero and a conditional branch. 20800b57cec5SDimitry Andric Value *Zero = Constant::getNullValue(Ty); 208181ad6265SDimitry Andric // Avoid introducing branch on poison. This also replaces the ctz operand. 208281ad6265SDimitry Andric if (!isGuaranteedNotToBeUndefOrPoison(Op)) 208381ad6265SDimitry Andric Op = Builder.CreateFreeze(Op, Op->getName() + ".fr"); 208481ad6265SDimitry Andric Value *Cmp = Builder.CreateICmpEQ(Op, Zero, "cmpz"); 20850b57cec5SDimitry Andric Builder.CreateCondBr(Cmp, EndBlock, CallBlock); 20860b57cec5SDimitry Andric StartBlock->getTerminator()->eraseFromParent(); 20870b57cec5SDimitry Andric 20880b57cec5SDimitry Andric // Create a PHI in the end block to select either the output of the intrinsic 20890b57cec5SDimitry Andric // or the bit width of the operand. 20900b57cec5SDimitry Andric Builder.SetInsertPoint(&EndBlock->front()); 20910b57cec5SDimitry Andric PHINode *PN = Builder.CreatePHI(Ty, 2, "ctz"); 20920b57cec5SDimitry Andric CountZeros->replaceAllUsesWith(PN); 20930b57cec5SDimitry Andric Value *BitWidth = Builder.getInt(APInt(SizeInBits, SizeInBits)); 20940b57cec5SDimitry Andric PN->addIncoming(BitWidth, StartBlock); 20950b57cec5SDimitry Andric PN->addIncoming(CountZeros, CallBlock); 20960b57cec5SDimitry Andric 20970b57cec5SDimitry Andric // We are explicitly handling the zero case, so we can set the intrinsic's 20980b57cec5SDimitry Andric // undefined zero argument to 'true'. This will also prevent reprocessing the 20990b57cec5SDimitry Andric // intrinsic; we only despeculate when a zero input is defined. 21000b57cec5SDimitry Andric CountZeros->setArgOperand(1, Builder.getTrue()); 21010b57cec5SDimitry Andric ModifiedDT = true; 21020b57cec5SDimitry Andric return true; 21030b57cec5SDimitry Andric } 21040b57cec5SDimitry Andric 21050b57cec5SDimitry Andric bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) { 21060b57cec5SDimitry Andric BasicBlock *BB = CI->getParent(); 21070b57cec5SDimitry Andric 21080b57cec5SDimitry Andric // Lower inline assembly if we can. 21090b57cec5SDimitry Andric // If we found an inline asm expession, and if the target knows how to 21100b57cec5SDimitry Andric // lower it to normal LLVM code, do so now. 21115ffd83dbSDimitry Andric if (CI->isInlineAsm()) { 21120b57cec5SDimitry Andric if (TLI->ExpandInlineAsm(CI)) { 21130b57cec5SDimitry Andric // Avoid invalidating the iterator. 21140b57cec5SDimitry Andric CurInstIterator = BB->begin(); 21150b57cec5SDimitry Andric // Avoid processing instructions out of order, which could cause 21160b57cec5SDimitry Andric // reuse before a value is defined. 21170b57cec5SDimitry Andric SunkAddrs.clear(); 21180b57cec5SDimitry Andric return true; 21190b57cec5SDimitry Andric } 21200b57cec5SDimitry Andric // Sink address computing for memory operands into the block. 21210b57cec5SDimitry Andric if (optimizeInlineAsmInst(CI)) 21220b57cec5SDimitry Andric return true; 21230b57cec5SDimitry Andric } 21240b57cec5SDimitry Andric 21250b57cec5SDimitry Andric // Align the pointer arguments to this call if the target thinks it's a good 21260b57cec5SDimitry Andric // idea 212781ad6265SDimitry Andric unsigned MinSize; 212881ad6265SDimitry Andric Align PrefAlign; 21295ffd83dbSDimitry Andric if (TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) { 2130349cc55cSDimitry Andric for (auto &Arg : CI->args()) { 21310b57cec5SDimitry Andric // We want to align both objects whose address is used directly and 21320b57cec5SDimitry Andric // objects whose address is used in casts and GEPs, though it only makes 21330b57cec5SDimitry Andric // sense for GEPs if the offset is a multiple of the desired alignment and 21340b57cec5SDimitry Andric // if size - offset meets the size threshold. 21350b57cec5SDimitry Andric if (!Arg->getType()->isPointerTy()) 21360b57cec5SDimitry Andric continue; 21370b57cec5SDimitry Andric APInt Offset(DL->getIndexSizeInBits( 21380b57cec5SDimitry Andric cast<PointerType>(Arg->getType())->getAddressSpace()), 21390b57cec5SDimitry Andric 0); 21400b57cec5SDimitry Andric Value *Val = Arg->stripAndAccumulateInBoundsConstantOffsets(*DL, Offset); 21410b57cec5SDimitry Andric uint64_t Offset2 = Offset.getLimitedValue(); 214281ad6265SDimitry Andric if (!isAligned(PrefAlign, Offset2)) 21430b57cec5SDimitry Andric continue; 21440b57cec5SDimitry Andric AllocaInst *AI; 214581ad6265SDimitry Andric if ((AI = dyn_cast<AllocaInst>(Val)) && AI->getAlign() < PrefAlign && 21460b57cec5SDimitry Andric DL->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2) 214781ad6265SDimitry Andric AI->setAlignment(PrefAlign); 21480b57cec5SDimitry Andric // Global variables can only be aligned if they are defined in this 21490b57cec5SDimitry Andric // object (i.e. they are uniquely initialized in this object), and 21500b57cec5SDimitry Andric // over-aligning global variables that have an explicit section is 21510b57cec5SDimitry Andric // forbidden. 21520b57cec5SDimitry Andric GlobalVariable *GV; 21530b57cec5SDimitry Andric if ((GV = dyn_cast<GlobalVariable>(Val)) && GV->canIncreaseAlignment() && 21540b57cec5SDimitry Andric GV->getPointerAlignment(*DL) < PrefAlign && 21550b57cec5SDimitry Andric DL->getTypeAllocSize(GV->getValueType()) >= 21560b57cec5SDimitry Andric MinSize + Offset2) 215781ad6265SDimitry Andric GV->setAlignment(PrefAlign); 21580b57cec5SDimitry Andric } 21590b57cec5SDimitry Andric // If this is a memcpy (or similar) then we may be able to improve the 21600b57cec5SDimitry Andric // alignment 21610b57cec5SDimitry Andric if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(CI)) { 21625ffd83dbSDimitry Andric Align DestAlign = getKnownAlignment(MI->getDest(), *DL); 21635ffd83dbSDimitry Andric MaybeAlign MIDestAlign = MI->getDestAlign(); 21645ffd83dbSDimitry Andric if (!MIDestAlign || DestAlign > *MIDestAlign) 21650b57cec5SDimitry Andric MI->setDestAlignment(DestAlign); 21660b57cec5SDimitry Andric if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) { 21675ffd83dbSDimitry Andric MaybeAlign MTISrcAlign = MTI->getSourceAlign(); 21685ffd83dbSDimitry Andric Align SrcAlign = getKnownAlignment(MTI->getSource(), *DL); 21695ffd83dbSDimitry Andric if (!MTISrcAlign || SrcAlign > *MTISrcAlign) 21700b57cec5SDimitry Andric MTI->setSourceAlignment(SrcAlign); 21710b57cec5SDimitry Andric } 21720b57cec5SDimitry Andric } 21730b57cec5SDimitry Andric } 21740b57cec5SDimitry Andric 21750b57cec5SDimitry Andric // If we have a cold call site, try to sink addressing computation into the 21760b57cec5SDimitry Andric // cold block. This interacts with our handling for loads and stores to 21770b57cec5SDimitry Andric // ensure that we can fold all uses of a potential addressing computation 21780b57cec5SDimitry Andric // into their uses. TODO: generalize this to work over profiling data 21795ffd83dbSDimitry Andric if (CI->hasFnAttr(Attribute::Cold) && 21805ffd83dbSDimitry Andric !OptSize && !llvm::shouldOptimizeForSize(BB, PSI, BFI.get())) 2181349cc55cSDimitry Andric for (auto &Arg : CI->args()) { 21820b57cec5SDimitry Andric if (!Arg->getType()->isPointerTy()) 21830b57cec5SDimitry Andric continue; 21840b57cec5SDimitry Andric unsigned AS = Arg->getType()->getPointerAddressSpace(); 21850b57cec5SDimitry Andric return optimizeMemoryInst(CI, Arg, Arg->getType(), AS); 21860b57cec5SDimitry Andric } 21870b57cec5SDimitry Andric 21880b57cec5SDimitry Andric IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI); 21890b57cec5SDimitry Andric if (II) { 21900b57cec5SDimitry Andric switch (II->getIntrinsicID()) { 21910b57cec5SDimitry Andric default: break; 2192fe6060f1SDimitry Andric case Intrinsic::assume: 2193fe6060f1SDimitry Andric llvm_unreachable("llvm.assume should have been removed already"); 21940b57cec5SDimitry Andric case Intrinsic::experimental_widenable_condition: { 21950b57cec5SDimitry Andric // Give up on future widening oppurtunties so that we can fold away dead 21960b57cec5SDimitry Andric // paths and merge blocks before going into block-local instruction 21970b57cec5SDimitry Andric // selection. 21980b57cec5SDimitry Andric if (II->use_empty()) { 21990b57cec5SDimitry Andric II->eraseFromParent(); 22000b57cec5SDimitry Andric return true; 22010b57cec5SDimitry Andric } 22020b57cec5SDimitry Andric Constant *RetVal = ConstantInt::getTrue(II->getContext()); 22030b57cec5SDimitry Andric resetIteratorIfInvalidatedWhileCalling(BB, [&]() { 22040b57cec5SDimitry Andric replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr); 22050b57cec5SDimitry Andric }); 22060b57cec5SDimitry Andric return true; 22070b57cec5SDimitry Andric } 22088bcb0991SDimitry Andric case Intrinsic::objectsize: 22098bcb0991SDimitry Andric llvm_unreachable("llvm.objectsize.* should have been lowered already"); 22108bcb0991SDimitry Andric case Intrinsic::is_constant: 22118bcb0991SDimitry Andric llvm_unreachable("llvm.is.constant.* should have been lowered already"); 22120b57cec5SDimitry Andric case Intrinsic::aarch64_stlxr: 22130b57cec5SDimitry Andric case Intrinsic::aarch64_stxr: { 22140b57cec5SDimitry Andric ZExtInst *ExtVal = dyn_cast<ZExtInst>(CI->getArgOperand(0)); 22150b57cec5SDimitry Andric if (!ExtVal || !ExtVal->hasOneUse() || 22160b57cec5SDimitry Andric ExtVal->getParent() == CI->getParent()) 22170b57cec5SDimitry Andric return false; 22180b57cec5SDimitry Andric // Sink a zext feeding stlxr/stxr before it, so it can be folded into it. 22190b57cec5SDimitry Andric ExtVal->moveBefore(CI); 22200b57cec5SDimitry Andric // Mark this instruction as "inserted by CGP", so that other 22210b57cec5SDimitry Andric // optimizations don't touch it. 22220b57cec5SDimitry Andric InsertedInsts.insert(ExtVal); 22230b57cec5SDimitry Andric return true; 22240b57cec5SDimitry Andric } 22250b57cec5SDimitry Andric 22260b57cec5SDimitry Andric case Intrinsic::launder_invariant_group: 22270b57cec5SDimitry Andric case Intrinsic::strip_invariant_group: { 22280b57cec5SDimitry Andric Value *ArgVal = II->getArgOperand(0); 22290b57cec5SDimitry Andric auto it = LargeOffsetGEPMap.find(II); 22300b57cec5SDimitry Andric if (it != LargeOffsetGEPMap.end()) { 22310b57cec5SDimitry Andric // Merge entries in LargeOffsetGEPMap to reflect the RAUW. 22320b57cec5SDimitry Andric // Make sure not to have to deal with iterator invalidation 22330b57cec5SDimitry Andric // after possibly adding ArgVal to LargeOffsetGEPMap. 22340b57cec5SDimitry Andric auto GEPs = std::move(it->second); 22350b57cec5SDimitry Andric LargeOffsetGEPMap[ArgVal].append(GEPs.begin(), GEPs.end()); 22360b57cec5SDimitry Andric LargeOffsetGEPMap.erase(II); 22370b57cec5SDimitry Andric } 22380b57cec5SDimitry Andric 22390b57cec5SDimitry Andric II->replaceAllUsesWith(ArgVal); 22400b57cec5SDimitry Andric II->eraseFromParent(); 22410b57cec5SDimitry Andric return true; 22420b57cec5SDimitry Andric } 22430b57cec5SDimitry Andric case Intrinsic::cttz: 22440b57cec5SDimitry Andric case Intrinsic::ctlz: 22450b57cec5SDimitry Andric // If counting zeros is expensive, try to avoid it. 22460b57cec5SDimitry Andric return despeculateCountZeros(II, TLI, DL, ModifiedDT); 22475ffd83dbSDimitry Andric case Intrinsic::fshl: 22485ffd83dbSDimitry Andric case Intrinsic::fshr: 22495ffd83dbSDimitry Andric return optimizeFunnelShift(II); 2250480093f4SDimitry Andric case Intrinsic::dbg_value: 2251480093f4SDimitry Andric return fixupDbgValue(II); 22525ffd83dbSDimitry Andric case Intrinsic::vscale: { 22535ffd83dbSDimitry Andric // If datalayout has no special restrictions on vector data layout, 22545ffd83dbSDimitry Andric // replace `llvm.vscale` by an equivalent constant expression 22555ffd83dbSDimitry Andric // to benefit from cheap constant propagation. 22565ffd83dbSDimitry Andric Type *ScalableVectorTy = 22575ffd83dbSDimitry Andric VectorType::get(Type::getInt8Ty(II->getContext()), 1, true); 22585ffd83dbSDimitry Andric if (DL->getTypeAllocSize(ScalableVectorTy).getKnownMinSize() == 8) { 22595ffd83dbSDimitry Andric auto *Null = Constant::getNullValue(ScalableVectorTy->getPointerTo()); 22605ffd83dbSDimitry Andric auto *One = ConstantInt::getSigned(II->getType(), 1); 22615ffd83dbSDimitry Andric auto *CGep = 22625ffd83dbSDimitry Andric ConstantExpr::getGetElementPtr(ScalableVectorTy, Null, One); 22635ffd83dbSDimitry Andric II->replaceAllUsesWith(ConstantExpr::getPtrToInt(CGep, II->getType())); 22645ffd83dbSDimitry Andric II->eraseFromParent(); 22655ffd83dbSDimitry Andric return true; 22665ffd83dbSDimitry Andric } 22675ffd83dbSDimitry Andric break; 22685ffd83dbSDimitry Andric } 22695ffd83dbSDimitry Andric case Intrinsic::masked_gather: 22705ffd83dbSDimitry Andric return optimizeGatherScatterInst(II, II->getArgOperand(0)); 22715ffd83dbSDimitry Andric case Intrinsic::masked_scatter: 22725ffd83dbSDimitry Andric return optimizeGatherScatterInst(II, II->getArgOperand(1)); 22730b57cec5SDimitry Andric } 22740b57cec5SDimitry Andric 22750b57cec5SDimitry Andric SmallVector<Value *, 2> PtrOps; 22760b57cec5SDimitry Andric Type *AccessTy; 22770b57cec5SDimitry Andric if (TLI->getAddrModeArguments(II, PtrOps, AccessTy)) 22780b57cec5SDimitry Andric while (!PtrOps.empty()) { 22790b57cec5SDimitry Andric Value *PtrVal = PtrOps.pop_back_val(); 22800b57cec5SDimitry Andric unsigned AS = PtrVal->getType()->getPointerAddressSpace(); 22810b57cec5SDimitry Andric if (optimizeMemoryInst(II, PtrVal, AccessTy, AS)) 22820b57cec5SDimitry Andric return true; 22830b57cec5SDimitry Andric } 22840b57cec5SDimitry Andric } 22850b57cec5SDimitry Andric 22860b57cec5SDimitry Andric // From here on out we're working with named functions. 22870b57cec5SDimitry Andric if (!CI->getCalledFunction()) return false; 22880b57cec5SDimitry Andric 22890b57cec5SDimitry Andric // Lower all default uses of _chk calls. This is very similar 22900b57cec5SDimitry Andric // to what InstCombineCalls does, but here we are only lowering calls 22910b57cec5SDimitry Andric // to fortified library functions (e.g. __memcpy_chk) that have the default 22920b57cec5SDimitry Andric // "don't know" as the objectsize. Anything else should be left alone. 22930b57cec5SDimitry Andric FortifiedLibCallSimplifier Simplifier(TLInfo, true); 22945ffd83dbSDimitry Andric IRBuilder<> Builder(CI); 22955ffd83dbSDimitry Andric if (Value *V = Simplifier.optimizeCall(CI, Builder)) { 22960b57cec5SDimitry Andric CI->replaceAllUsesWith(V); 22970b57cec5SDimitry Andric CI->eraseFromParent(); 22980b57cec5SDimitry Andric return true; 22990b57cec5SDimitry Andric } 23000b57cec5SDimitry Andric 23010b57cec5SDimitry Andric return false; 23020b57cec5SDimitry Andric } 23030b57cec5SDimitry Andric 23040b57cec5SDimitry Andric /// Look for opportunities to duplicate return instructions to the predecessor 23050b57cec5SDimitry Andric /// to enable tail call optimizations. The case it is currently looking for is: 23060b57cec5SDimitry Andric /// @code 23070b57cec5SDimitry Andric /// bb0: 23080b57cec5SDimitry Andric /// %tmp0 = tail call i32 @f0() 23090b57cec5SDimitry Andric /// br label %return 23100b57cec5SDimitry Andric /// bb1: 23110b57cec5SDimitry Andric /// %tmp1 = tail call i32 @f1() 23120b57cec5SDimitry Andric /// br label %return 23130b57cec5SDimitry Andric /// bb2: 23140b57cec5SDimitry Andric /// %tmp2 = tail call i32 @f2() 23150b57cec5SDimitry Andric /// br label %return 23160b57cec5SDimitry Andric /// return: 23170b57cec5SDimitry Andric /// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ] 23180b57cec5SDimitry Andric /// ret i32 %retval 23190b57cec5SDimitry Andric /// @endcode 23200b57cec5SDimitry Andric /// 23210b57cec5SDimitry Andric /// => 23220b57cec5SDimitry Andric /// 23230b57cec5SDimitry Andric /// @code 23240b57cec5SDimitry Andric /// bb0: 23250b57cec5SDimitry Andric /// %tmp0 = tail call i32 @f0() 23260b57cec5SDimitry Andric /// ret i32 %tmp0 23270b57cec5SDimitry Andric /// bb1: 23280b57cec5SDimitry Andric /// %tmp1 = tail call i32 @f1() 23290b57cec5SDimitry Andric /// ret i32 %tmp1 23300b57cec5SDimitry Andric /// bb2: 23310b57cec5SDimitry Andric /// %tmp2 = tail call i32 @f2() 23320b57cec5SDimitry Andric /// ret i32 %tmp2 23330b57cec5SDimitry Andric /// @endcode 23340b57cec5SDimitry Andric bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT) { 23350b57cec5SDimitry Andric ReturnInst *RetI = dyn_cast<ReturnInst>(BB->getTerminator()); 23360b57cec5SDimitry Andric if (!RetI) 23370b57cec5SDimitry Andric return false; 23380b57cec5SDimitry Andric 23390b57cec5SDimitry Andric PHINode *PN = nullptr; 23405ffd83dbSDimitry Andric ExtractValueInst *EVI = nullptr; 23410b57cec5SDimitry Andric BitCastInst *BCI = nullptr; 23420b57cec5SDimitry Andric Value *V = RetI->getReturnValue(); 23430b57cec5SDimitry Andric if (V) { 23440b57cec5SDimitry Andric BCI = dyn_cast<BitCastInst>(V); 23450b57cec5SDimitry Andric if (BCI) 23460b57cec5SDimitry Andric V = BCI->getOperand(0); 23470b57cec5SDimitry Andric 23485ffd83dbSDimitry Andric EVI = dyn_cast<ExtractValueInst>(V); 23495ffd83dbSDimitry Andric if (EVI) { 23505ffd83dbSDimitry Andric V = EVI->getOperand(0); 2351e8d8bef9SDimitry Andric if (!llvm::all_of(EVI->indices(), [](unsigned idx) { return idx == 0; })) 23525ffd83dbSDimitry Andric return false; 23535ffd83dbSDimitry Andric } 23545ffd83dbSDimitry Andric 23550b57cec5SDimitry Andric PN = dyn_cast<PHINode>(V); 23560b57cec5SDimitry Andric if (!PN) 23570b57cec5SDimitry Andric return false; 23580b57cec5SDimitry Andric } 23590b57cec5SDimitry Andric 23600b57cec5SDimitry Andric if (PN && PN->getParent() != BB) 23610b57cec5SDimitry Andric return false; 23620b57cec5SDimitry Andric 2363fe6060f1SDimitry Andric auto isLifetimeEndOrBitCastFor = [](const Instruction *Inst) { 2364fe6060f1SDimitry Andric const BitCastInst *BC = dyn_cast<BitCastInst>(Inst); 2365fe6060f1SDimitry Andric if (BC && BC->hasOneUse()) 2366fe6060f1SDimitry Andric Inst = BC->user_back(); 2367fe6060f1SDimitry Andric 2368fe6060f1SDimitry Andric if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) 2369fe6060f1SDimitry Andric return II->getIntrinsicID() == Intrinsic::lifetime_end; 2370fe6060f1SDimitry Andric return false; 2371fe6060f1SDimitry Andric }; 2372fe6060f1SDimitry Andric 2373fe6060f1SDimitry Andric // Make sure there are no instructions between the first instruction 2374fe6060f1SDimitry Andric // and return. 2375fe6060f1SDimitry Andric const Instruction *BI = BB->getFirstNonPHI(); 23760b57cec5SDimitry Andric // Skip over debug and the bitcast. 2377fe6060f1SDimitry Andric while (isa<DbgInfoIntrinsic>(BI) || BI == BCI || BI == EVI || 2378fe6060f1SDimitry Andric isa<PseudoProbeInst>(BI) || isLifetimeEndOrBitCastFor(BI)) 2379fe6060f1SDimitry Andric BI = BI->getNextNode(); 2380fe6060f1SDimitry Andric if (BI != RetI) 23810b57cec5SDimitry Andric return false; 23820b57cec5SDimitry Andric 23830b57cec5SDimitry Andric /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail 23840b57cec5SDimitry Andric /// call. 23850b57cec5SDimitry Andric const Function *F = BB->getParent(); 23868bcb0991SDimitry Andric SmallVector<BasicBlock*, 4> TailCallBBs; 23870b57cec5SDimitry Andric if (PN) { 23880b57cec5SDimitry Andric for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) { 23890b57cec5SDimitry Andric // Look through bitcasts. 23900b57cec5SDimitry Andric Value *IncomingVal = PN->getIncomingValue(I)->stripPointerCasts(); 23910b57cec5SDimitry Andric CallInst *CI = dyn_cast<CallInst>(IncomingVal); 23928bcb0991SDimitry Andric BasicBlock *PredBB = PN->getIncomingBlock(I); 23930b57cec5SDimitry Andric // Make sure the phi value is indeed produced by the tail call. 23948bcb0991SDimitry Andric if (CI && CI->hasOneUse() && CI->getParent() == PredBB && 23950b57cec5SDimitry Andric TLI->mayBeEmittedAsTailCall(CI) && 23960b57cec5SDimitry Andric attributesPermitTailCall(F, CI, RetI, *TLI)) 23978bcb0991SDimitry Andric TailCallBBs.push_back(PredBB); 23980b57cec5SDimitry Andric } 23990b57cec5SDimitry Andric } else { 24000b57cec5SDimitry Andric SmallPtrSet<BasicBlock*, 4> VisitedBBs; 2401fe6060f1SDimitry Andric for (BasicBlock *Pred : predecessors(BB)) { 2402fe6060f1SDimitry Andric if (!VisitedBBs.insert(Pred).second) 24030b57cec5SDimitry Andric continue; 2404fe6060f1SDimitry Andric if (Instruction *I = Pred->rbegin()->getPrevNonDebugInstruction(true)) { 2405e8d8bef9SDimitry Andric CallInst *CI = dyn_cast<CallInst>(I); 24060b57cec5SDimitry Andric if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI) && 24070b57cec5SDimitry Andric attributesPermitTailCall(F, CI, RetI, *TLI)) 2408fe6060f1SDimitry Andric TailCallBBs.push_back(Pred); 24090b57cec5SDimitry Andric } 24100b57cec5SDimitry Andric } 2411e8d8bef9SDimitry Andric } 24120b57cec5SDimitry Andric 24130b57cec5SDimitry Andric bool Changed = false; 24148bcb0991SDimitry Andric for (auto const &TailCallBB : TailCallBBs) { 24150b57cec5SDimitry Andric // Make sure the call instruction is followed by an unconditional branch to 24160b57cec5SDimitry Andric // the return block. 24178bcb0991SDimitry Andric BranchInst *BI = dyn_cast<BranchInst>(TailCallBB->getTerminator()); 24180b57cec5SDimitry Andric if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB) 24190b57cec5SDimitry Andric continue; 24200b57cec5SDimitry Andric 24218bcb0991SDimitry Andric // Duplicate the return into TailCallBB. 24228bcb0991SDimitry Andric (void)FoldReturnIntoUncondBranch(RetI, BB, TailCallBB); 24235ffd83dbSDimitry Andric assert(!VerifyBFIUpdates || 24245ffd83dbSDimitry Andric BFI->getBlockFreq(BB) >= BFI->getBlockFreq(TailCallBB)); 24255ffd83dbSDimitry Andric BFI->setBlockFreq( 24265ffd83dbSDimitry Andric BB, 24275ffd83dbSDimitry Andric (BFI->getBlockFreq(BB) - BFI->getBlockFreq(TailCallBB)).getFrequency()); 24280b57cec5SDimitry Andric ModifiedDT = Changed = true; 24290b57cec5SDimitry Andric ++NumRetsDup; 24300b57cec5SDimitry Andric } 24310b57cec5SDimitry Andric 24320b57cec5SDimitry Andric // If we eliminated all predecessors of the block, delete the block now. 2433e8d8bef9SDimitry Andric if (Changed && !BB->hasAddressTaken() && pred_empty(BB)) 24340b57cec5SDimitry Andric BB->eraseFromParent(); 24350b57cec5SDimitry Andric 24360b57cec5SDimitry Andric return Changed; 24370b57cec5SDimitry Andric } 24380b57cec5SDimitry Andric 24390b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 24400b57cec5SDimitry Andric // Memory Optimization 24410b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 24420b57cec5SDimitry Andric 24430b57cec5SDimitry Andric namespace { 24440b57cec5SDimitry Andric 24450b57cec5SDimitry Andric /// This is an extended version of TargetLowering::AddrMode 24460b57cec5SDimitry Andric /// which holds actual Value*'s for register values. 24470b57cec5SDimitry Andric struct ExtAddrMode : public TargetLowering::AddrMode { 24480b57cec5SDimitry Andric Value *BaseReg = nullptr; 24490b57cec5SDimitry Andric Value *ScaledReg = nullptr; 24500b57cec5SDimitry Andric Value *OriginalValue = nullptr; 24510b57cec5SDimitry Andric bool InBounds = true; 24520b57cec5SDimitry Andric 24530b57cec5SDimitry Andric enum FieldName { 24540b57cec5SDimitry Andric NoField = 0x00, 24550b57cec5SDimitry Andric BaseRegField = 0x01, 24560b57cec5SDimitry Andric BaseGVField = 0x02, 24570b57cec5SDimitry Andric BaseOffsField = 0x04, 24580b57cec5SDimitry Andric ScaledRegField = 0x08, 24590b57cec5SDimitry Andric ScaleField = 0x10, 24600b57cec5SDimitry Andric MultipleFields = 0xff 24610b57cec5SDimitry Andric }; 24620b57cec5SDimitry Andric 24630b57cec5SDimitry Andric 24640b57cec5SDimitry Andric ExtAddrMode() = default; 24650b57cec5SDimitry Andric 24660b57cec5SDimitry Andric void print(raw_ostream &OS) const; 24670b57cec5SDimitry Andric void dump() const; 24680b57cec5SDimitry Andric 24690b57cec5SDimitry Andric FieldName compare(const ExtAddrMode &other) { 24700b57cec5SDimitry Andric // First check that the types are the same on each field, as differing types 24710b57cec5SDimitry Andric // is something we can't cope with later on. 24720b57cec5SDimitry Andric if (BaseReg && other.BaseReg && 24730b57cec5SDimitry Andric BaseReg->getType() != other.BaseReg->getType()) 24740b57cec5SDimitry Andric return MultipleFields; 24750b57cec5SDimitry Andric if (BaseGV && other.BaseGV && 24760b57cec5SDimitry Andric BaseGV->getType() != other.BaseGV->getType()) 24770b57cec5SDimitry Andric return MultipleFields; 24780b57cec5SDimitry Andric if (ScaledReg && other.ScaledReg && 24790b57cec5SDimitry Andric ScaledReg->getType() != other.ScaledReg->getType()) 24800b57cec5SDimitry Andric return MultipleFields; 24810b57cec5SDimitry Andric 24820b57cec5SDimitry Andric // Conservatively reject 'inbounds' mismatches. 24830b57cec5SDimitry Andric if (InBounds != other.InBounds) 24840b57cec5SDimitry Andric return MultipleFields; 24850b57cec5SDimitry Andric 24860b57cec5SDimitry Andric // Check each field to see if it differs. 24870b57cec5SDimitry Andric unsigned Result = NoField; 24880b57cec5SDimitry Andric if (BaseReg != other.BaseReg) 24890b57cec5SDimitry Andric Result |= BaseRegField; 24900b57cec5SDimitry Andric if (BaseGV != other.BaseGV) 24910b57cec5SDimitry Andric Result |= BaseGVField; 24920b57cec5SDimitry Andric if (BaseOffs != other.BaseOffs) 24930b57cec5SDimitry Andric Result |= BaseOffsField; 24940b57cec5SDimitry Andric if (ScaledReg != other.ScaledReg) 24950b57cec5SDimitry Andric Result |= ScaledRegField; 24960b57cec5SDimitry Andric // Don't count 0 as being a different scale, because that actually means 24970b57cec5SDimitry Andric // unscaled (which will already be counted by having no ScaledReg). 24980b57cec5SDimitry Andric if (Scale && other.Scale && Scale != other.Scale) 24990b57cec5SDimitry Andric Result |= ScaleField; 25000b57cec5SDimitry Andric 25010b57cec5SDimitry Andric if (countPopulation(Result) > 1) 25020b57cec5SDimitry Andric return MultipleFields; 25030b57cec5SDimitry Andric else 25040b57cec5SDimitry Andric return static_cast<FieldName>(Result); 25050b57cec5SDimitry Andric } 25060b57cec5SDimitry Andric 25070b57cec5SDimitry Andric // An AddrMode is trivial if it involves no calculation i.e. it is just a base 25080b57cec5SDimitry Andric // with no offset. 25090b57cec5SDimitry Andric bool isTrivial() { 25100b57cec5SDimitry Andric // An AddrMode is (BaseGV + BaseReg + BaseOffs + ScaleReg * Scale) so it is 25110b57cec5SDimitry Andric // trivial if at most one of these terms is nonzero, except that BaseGV and 25120b57cec5SDimitry Andric // BaseReg both being zero actually means a null pointer value, which we 25130b57cec5SDimitry Andric // consider to be 'non-zero' here. 25140b57cec5SDimitry Andric return !BaseOffs && !Scale && !(BaseGV && BaseReg); 25150b57cec5SDimitry Andric } 25160b57cec5SDimitry Andric 25170b57cec5SDimitry Andric Value *GetFieldAsValue(FieldName Field, Type *IntPtrTy) { 25180b57cec5SDimitry Andric switch (Field) { 25190b57cec5SDimitry Andric default: 25200b57cec5SDimitry Andric return nullptr; 25210b57cec5SDimitry Andric case BaseRegField: 25220b57cec5SDimitry Andric return BaseReg; 25230b57cec5SDimitry Andric case BaseGVField: 25240b57cec5SDimitry Andric return BaseGV; 25250b57cec5SDimitry Andric case ScaledRegField: 25260b57cec5SDimitry Andric return ScaledReg; 25270b57cec5SDimitry Andric case BaseOffsField: 25280b57cec5SDimitry Andric return ConstantInt::get(IntPtrTy, BaseOffs); 25290b57cec5SDimitry Andric } 25300b57cec5SDimitry Andric } 25310b57cec5SDimitry Andric 25320b57cec5SDimitry Andric void SetCombinedField(FieldName Field, Value *V, 25330b57cec5SDimitry Andric const SmallVectorImpl<ExtAddrMode> &AddrModes) { 25340b57cec5SDimitry Andric switch (Field) { 25350b57cec5SDimitry Andric default: 25360b57cec5SDimitry Andric llvm_unreachable("Unhandled fields are expected to be rejected earlier"); 25370b57cec5SDimitry Andric break; 25380b57cec5SDimitry Andric case ExtAddrMode::BaseRegField: 25390b57cec5SDimitry Andric BaseReg = V; 25400b57cec5SDimitry Andric break; 25410b57cec5SDimitry Andric case ExtAddrMode::BaseGVField: 25420b57cec5SDimitry Andric // A combined BaseGV is an Instruction, not a GlobalValue, so it goes 25430b57cec5SDimitry Andric // in the BaseReg field. 25440b57cec5SDimitry Andric assert(BaseReg == nullptr); 25450b57cec5SDimitry Andric BaseReg = V; 25460b57cec5SDimitry Andric BaseGV = nullptr; 25470b57cec5SDimitry Andric break; 25480b57cec5SDimitry Andric case ExtAddrMode::ScaledRegField: 25490b57cec5SDimitry Andric ScaledReg = V; 25500b57cec5SDimitry Andric // If we have a mix of scaled and unscaled addrmodes then we want scale 25510b57cec5SDimitry Andric // to be the scale and not zero. 25520b57cec5SDimitry Andric if (!Scale) 25530b57cec5SDimitry Andric for (const ExtAddrMode &AM : AddrModes) 25540b57cec5SDimitry Andric if (AM.Scale) { 25550b57cec5SDimitry Andric Scale = AM.Scale; 25560b57cec5SDimitry Andric break; 25570b57cec5SDimitry Andric } 25580b57cec5SDimitry Andric break; 25590b57cec5SDimitry Andric case ExtAddrMode::BaseOffsField: 25600b57cec5SDimitry Andric // The offset is no longer a constant, so it goes in ScaledReg with a 25610b57cec5SDimitry Andric // scale of 1. 25620b57cec5SDimitry Andric assert(ScaledReg == nullptr); 25630b57cec5SDimitry Andric ScaledReg = V; 25640b57cec5SDimitry Andric Scale = 1; 25650b57cec5SDimitry Andric BaseOffs = 0; 25660b57cec5SDimitry Andric break; 25670b57cec5SDimitry Andric } 25680b57cec5SDimitry Andric } 25690b57cec5SDimitry Andric }; 25700b57cec5SDimitry Andric 25710b57cec5SDimitry Andric #ifndef NDEBUG 25720b57cec5SDimitry Andric static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) { 25730b57cec5SDimitry Andric AM.print(OS); 25740b57cec5SDimitry Andric return OS; 25750b57cec5SDimitry Andric } 25760b57cec5SDimitry Andric #endif 25770b57cec5SDimitry Andric 25780b57cec5SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 25790b57cec5SDimitry Andric void ExtAddrMode::print(raw_ostream &OS) const { 25800b57cec5SDimitry Andric bool NeedPlus = false; 25810b57cec5SDimitry Andric OS << "["; 25820b57cec5SDimitry Andric if (InBounds) 25830b57cec5SDimitry Andric OS << "inbounds "; 25840b57cec5SDimitry Andric if (BaseGV) { 25850b57cec5SDimitry Andric OS << (NeedPlus ? " + " : "") 25860b57cec5SDimitry Andric << "GV:"; 25870b57cec5SDimitry Andric BaseGV->printAsOperand(OS, /*PrintType=*/false); 25880b57cec5SDimitry Andric NeedPlus = true; 25890b57cec5SDimitry Andric } 25900b57cec5SDimitry Andric 25910b57cec5SDimitry Andric if (BaseOffs) { 25920b57cec5SDimitry Andric OS << (NeedPlus ? " + " : "") 25930b57cec5SDimitry Andric << BaseOffs; 25940b57cec5SDimitry Andric NeedPlus = true; 25950b57cec5SDimitry Andric } 25960b57cec5SDimitry Andric 25970b57cec5SDimitry Andric if (BaseReg) { 25980b57cec5SDimitry Andric OS << (NeedPlus ? " + " : "") 25990b57cec5SDimitry Andric << "Base:"; 26000b57cec5SDimitry Andric BaseReg->printAsOperand(OS, /*PrintType=*/false); 26010b57cec5SDimitry Andric NeedPlus = true; 26020b57cec5SDimitry Andric } 26030b57cec5SDimitry Andric if (Scale) { 26040b57cec5SDimitry Andric OS << (NeedPlus ? " + " : "") 26050b57cec5SDimitry Andric << Scale << "*"; 26060b57cec5SDimitry Andric ScaledReg->printAsOperand(OS, /*PrintType=*/false); 26070b57cec5SDimitry Andric } 26080b57cec5SDimitry Andric 26090b57cec5SDimitry Andric OS << ']'; 26100b57cec5SDimitry Andric } 26110b57cec5SDimitry Andric 26120b57cec5SDimitry Andric LLVM_DUMP_METHOD void ExtAddrMode::dump() const { 26130b57cec5SDimitry Andric print(dbgs()); 26140b57cec5SDimitry Andric dbgs() << '\n'; 26150b57cec5SDimitry Andric } 26160b57cec5SDimitry Andric #endif 26170b57cec5SDimitry Andric 2618*972a253aSDimitry Andric } // end anonymous namespace 2619*972a253aSDimitry Andric 26200b57cec5SDimitry Andric namespace { 26210b57cec5SDimitry Andric 26220b57cec5SDimitry Andric /// This class provides transaction based operation on the IR. 26230b57cec5SDimitry Andric /// Every change made through this class is recorded in the internal state and 26240b57cec5SDimitry Andric /// can be undone (rollback) until commit is called. 26255ffd83dbSDimitry Andric /// CGP does not check if instructions could be speculatively executed when 26265ffd83dbSDimitry Andric /// moved. Preserving the original location would pessimize the debugging 26275ffd83dbSDimitry Andric /// experience, as well as negatively impact the quality of sample PGO. 26280b57cec5SDimitry Andric class TypePromotionTransaction { 26290b57cec5SDimitry Andric /// This represents the common interface of the individual transaction. 26300b57cec5SDimitry Andric /// Each class implements the logic for doing one specific modification on 26310b57cec5SDimitry Andric /// the IR via the TypePromotionTransaction. 26320b57cec5SDimitry Andric class TypePromotionAction { 26330b57cec5SDimitry Andric protected: 26340b57cec5SDimitry Andric /// The Instruction modified. 26350b57cec5SDimitry Andric Instruction *Inst; 26360b57cec5SDimitry Andric 26370b57cec5SDimitry Andric public: 26380b57cec5SDimitry Andric /// Constructor of the action. 26390b57cec5SDimitry Andric /// The constructor performs the related action on the IR. 26400b57cec5SDimitry Andric TypePromotionAction(Instruction *Inst) : Inst(Inst) {} 26410b57cec5SDimitry Andric 26420b57cec5SDimitry Andric virtual ~TypePromotionAction() = default; 26430b57cec5SDimitry Andric 26440b57cec5SDimitry Andric /// Undo the modification done by this action. 26450b57cec5SDimitry Andric /// When this method is called, the IR must be in the same state as it was 26460b57cec5SDimitry Andric /// before this action was applied. 26470b57cec5SDimitry Andric /// \pre Undoing the action works if and only if the IR is in the exact same 26480b57cec5SDimitry Andric /// state as it was directly after this action was applied. 26490b57cec5SDimitry Andric virtual void undo() = 0; 26500b57cec5SDimitry Andric 26510b57cec5SDimitry Andric /// Advocate every change made by this action. 26520b57cec5SDimitry Andric /// When the results on the IR of the action are to be kept, it is important 26530b57cec5SDimitry Andric /// to call this function, otherwise hidden information may be kept forever. 26540b57cec5SDimitry Andric virtual void commit() { 26550b57cec5SDimitry Andric // Nothing to be done, this action is not doing anything. 26560b57cec5SDimitry Andric } 26570b57cec5SDimitry Andric }; 26580b57cec5SDimitry Andric 26590b57cec5SDimitry Andric /// Utility to remember the position of an instruction. 26600b57cec5SDimitry Andric class InsertionHandler { 26610b57cec5SDimitry Andric /// Position of an instruction. 26620b57cec5SDimitry Andric /// Either an instruction: 26630b57cec5SDimitry Andric /// - Is the first in a basic block: BB is used. 26640b57cec5SDimitry Andric /// - Has a previous instruction: PrevInst is used. 26650b57cec5SDimitry Andric union { 26660b57cec5SDimitry Andric Instruction *PrevInst; 26670b57cec5SDimitry Andric BasicBlock *BB; 26680b57cec5SDimitry Andric } Point; 26690b57cec5SDimitry Andric 26700b57cec5SDimitry Andric /// Remember whether or not the instruction had a previous instruction. 26710b57cec5SDimitry Andric bool HasPrevInstruction; 26720b57cec5SDimitry Andric 26730b57cec5SDimitry Andric public: 26740b57cec5SDimitry Andric /// Record the position of \p Inst. 26750b57cec5SDimitry Andric InsertionHandler(Instruction *Inst) { 26760b57cec5SDimitry Andric BasicBlock::iterator It = Inst->getIterator(); 26770b57cec5SDimitry Andric HasPrevInstruction = (It != (Inst->getParent()->begin())); 26780b57cec5SDimitry Andric if (HasPrevInstruction) 26790b57cec5SDimitry Andric Point.PrevInst = &*--It; 26800b57cec5SDimitry Andric else 26810b57cec5SDimitry Andric Point.BB = Inst->getParent(); 26820b57cec5SDimitry Andric } 26830b57cec5SDimitry Andric 26840b57cec5SDimitry Andric /// Insert \p Inst at the recorded position. 26850b57cec5SDimitry Andric void insert(Instruction *Inst) { 26860b57cec5SDimitry Andric if (HasPrevInstruction) { 26870b57cec5SDimitry Andric if (Inst->getParent()) 26880b57cec5SDimitry Andric Inst->removeFromParent(); 26890b57cec5SDimitry Andric Inst->insertAfter(Point.PrevInst); 26900b57cec5SDimitry Andric } else { 26910b57cec5SDimitry Andric Instruction *Position = &*Point.BB->getFirstInsertionPt(); 26920b57cec5SDimitry Andric if (Inst->getParent()) 26930b57cec5SDimitry Andric Inst->moveBefore(Position); 26940b57cec5SDimitry Andric else 26950b57cec5SDimitry Andric Inst->insertBefore(Position); 26960b57cec5SDimitry Andric } 26970b57cec5SDimitry Andric } 26980b57cec5SDimitry Andric }; 26990b57cec5SDimitry Andric 27000b57cec5SDimitry Andric /// Move an instruction before another. 27010b57cec5SDimitry Andric class InstructionMoveBefore : public TypePromotionAction { 27020b57cec5SDimitry Andric /// Original position of the instruction. 27030b57cec5SDimitry Andric InsertionHandler Position; 27040b57cec5SDimitry Andric 27050b57cec5SDimitry Andric public: 27060b57cec5SDimitry Andric /// Move \p Inst before \p Before. 27070b57cec5SDimitry Andric InstructionMoveBefore(Instruction *Inst, Instruction *Before) 27080b57cec5SDimitry Andric : TypePromotionAction(Inst), Position(Inst) { 27090b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Do: move: " << *Inst << "\nbefore: " << *Before 27100b57cec5SDimitry Andric << "\n"); 27110b57cec5SDimitry Andric Inst->moveBefore(Before); 27120b57cec5SDimitry Andric } 27130b57cec5SDimitry Andric 27140b57cec5SDimitry Andric /// Move the instruction back to its original position. 27150b57cec5SDimitry Andric void undo() override { 27160b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Undo: moveBefore: " << *Inst << "\n"); 27170b57cec5SDimitry Andric Position.insert(Inst); 27180b57cec5SDimitry Andric } 27190b57cec5SDimitry Andric }; 27200b57cec5SDimitry Andric 27210b57cec5SDimitry Andric /// Set the operand of an instruction with a new value. 27220b57cec5SDimitry Andric class OperandSetter : public TypePromotionAction { 27230b57cec5SDimitry Andric /// Original operand of the instruction. 27240b57cec5SDimitry Andric Value *Origin; 27250b57cec5SDimitry Andric 27260b57cec5SDimitry Andric /// Index of the modified instruction. 27270b57cec5SDimitry Andric unsigned Idx; 27280b57cec5SDimitry Andric 27290b57cec5SDimitry Andric public: 27300b57cec5SDimitry Andric /// Set \p Idx operand of \p Inst with \p NewVal. 27310b57cec5SDimitry Andric OperandSetter(Instruction *Inst, unsigned Idx, Value *NewVal) 27320b57cec5SDimitry Andric : TypePromotionAction(Inst), Idx(Idx) { 27330b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Do: setOperand: " << Idx << "\n" 27340b57cec5SDimitry Andric << "for:" << *Inst << "\n" 27350b57cec5SDimitry Andric << "with:" << *NewVal << "\n"); 27360b57cec5SDimitry Andric Origin = Inst->getOperand(Idx); 27370b57cec5SDimitry Andric Inst->setOperand(Idx, NewVal); 27380b57cec5SDimitry Andric } 27390b57cec5SDimitry Andric 27400b57cec5SDimitry Andric /// Restore the original value of the instruction. 27410b57cec5SDimitry Andric void undo() override { 27420b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Undo: setOperand:" << Idx << "\n" 27430b57cec5SDimitry Andric << "for: " << *Inst << "\n" 27440b57cec5SDimitry Andric << "with: " << *Origin << "\n"); 27450b57cec5SDimitry Andric Inst->setOperand(Idx, Origin); 27460b57cec5SDimitry Andric } 27470b57cec5SDimitry Andric }; 27480b57cec5SDimitry Andric 27490b57cec5SDimitry Andric /// Hide the operands of an instruction. 27500b57cec5SDimitry Andric /// Do as if this instruction was not using any of its operands. 27510b57cec5SDimitry Andric class OperandsHider : public TypePromotionAction { 27520b57cec5SDimitry Andric /// The list of original operands. 27530b57cec5SDimitry Andric SmallVector<Value *, 4> OriginalValues; 27540b57cec5SDimitry Andric 27550b57cec5SDimitry Andric public: 27560b57cec5SDimitry Andric /// Remove \p Inst from the uses of the operands of \p Inst. 27570b57cec5SDimitry Andric OperandsHider(Instruction *Inst) : TypePromotionAction(Inst) { 27580b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Do: OperandsHider: " << *Inst << "\n"); 27590b57cec5SDimitry Andric unsigned NumOpnds = Inst->getNumOperands(); 27600b57cec5SDimitry Andric OriginalValues.reserve(NumOpnds); 27610b57cec5SDimitry Andric for (unsigned It = 0; It < NumOpnds; ++It) { 27620b57cec5SDimitry Andric // Save the current operand. 27630b57cec5SDimitry Andric Value *Val = Inst->getOperand(It); 27640b57cec5SDimitry Andric OriginalValues.push_back(Val); 27650b57cec5SDimitry Andric // Set a dummy one. 27660b57cec5SDimitry Andric // We could use OperandSetter here, but that would imply an overhead 27670b57cec5SDimitry Andric // that we are not willing to pay. 27680b57cec5SDimitry Andric Inst->setOperand(It, UndefValue::get(Val->getType())); 27690b57cec5SDimitry Andric } 27700b57cec5SDimitry Andric } 27710b57cec5SDimitry Andric 27720b57cec5SDimitry Andric /// Restore the original list of uses. 27730b57cec5SDimitry Andric void undo() override { 27740b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Undo: OperandsHider: " << *Inst << "\n"); 27750b57cec5SDimitry Andric for (unsigned It = 0, EndIt = OriginalValues.size(); It != EndIt; ++It) 27760b57cec5SDimitry Andric Inst->setOperand(It, OriginalValues[It]); 27770b57cec5SDimitry Andric } 27780b57cec5SDimitry Andric }; 27790b57cec5SDimitry Andric 27800b57cec5SDimitry Andric /// Build a truncate instruction. 27810b57cec5SDimitry Andric class TruncBuilder : public TypePromotionAction { 27820b57cec5SDimitry Andric Value *Val; 27830b57cec5SDimitry Andric 27840b57cec5SDimitry Andric public: 27850b57cec5SDimitry Andric /// Build a truncate instruction of \p Opnd producing a \p Ty 27860b57cec5SDimitry Andric /// result. 27870b57cec5SDimitry Andric /// trunc Opnd to Ty. 27880b57cec5SDimitry Andric TruncBuilder(Instruction *Opnd, Type *Ty) : TypePromotionAction(Opnd) { 27890b57cec5SDimitry Andric IRBuilder<> Builder(Opnd); 27905ffd83dbSDimitry Andric Builder.SetCurrentDebugLocation(DebugLoc()); 27910b57cec5SDimitry Andric Val = Builder.CreateTrunc(Opnd, Ty, "promoted"); 27920b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Do: TruncBuilder: " << *Val << "\n"); 27930b57cec5SDimitry Andric } 27940b57cec5SDimitry Andric 27950b57cec5SDimitry Andric /// Get the built value. 27960b57cec5SDimitry Andric Value *getBuiltValue() { return Val; } 27970b57cec5SDimitry Andric 27980b57cec5SDimitry Andric /// Remove the built instruction. 27990b57cec5SDimitry Andric void undo() override { 28000b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Undo: TruncBuilder: " << *Val << "\n"); 28010b57cec5SDimitry Andric if (Instruction *IVal = dyn_cast<Instruction>(Val)) 28020b57cec5SDimitry Andric IVal->eraseFromParent(); 28030b57cec5SDimitry Andric } 28040b57cec5SDimitry Andric }; 28050b57cec5SDimitry Andric 28060b57cec5SDimitry Andric /// Build a sign extension instruction. 28070b57cec5SDimitry Andric class SExtBuilder : public TypePromotionAction { 28080b57cec5SDimitry Andric Value *Val; 28090b57cec5SDimitry Andric 28100b57cec5SDimitry Andric public: 28110b57cec5SDimitry Andric /// Build a sign extension instruction of \p Opnd producing a \p Ty 28120b57cec5SDimitry Andric /// result. 28130b57cec5SDimitry Andric /// sext Opnd to Ty. 28140b57cec5SDimitry Andric SExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty) 28150b57cec5SDimitry Andric : TypePromotionAction(InsertPt) { 28160b57cec5SDimitry Andric IRBuilder<> Builder(InsertPt); 28170b57cec5SDimitry Andric Val = Builder.CreateSExt(Opnd, Ty, "promoted"); 28180b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Do: SExtBuilder: " << *Val << "\n"); 28190b57cec5SDimitry Andric } 28200b57cec5SDimitry Andric 28210b57cec5SDimitry Andric /// Get the built value. 28220b57cec5SDimitry Andric Value *getBuiltValue() { return Val; } 28230b57cec5SDimitry Andric 28240b57cec5SDimitry Andric /// Remove the built instruction. 28250b57cec5SDimitry Andric void undo() override { 28260b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Undo: SExtBuilder: " << *Val << "\n"); 28270b57cec5SDimitry Andric if (Instruction *IVal = dyn_cast<Instruction>(Val)) 28280b57cec5SDimitry Andric IVal->eraseFromParent(); 28290b57cec5SDimitry Andric } 28300b57cec5SDimitry Andric }; 28310b57cec5SDimitry Andric 28320b57cec5SDimitry Andric /// Build a zero extension instruction. 28330b57cec5SDimitry Andric class ZExtBuilder : public TypePromotionAction { 28340b57cec5SDimitry Andric Value *Val; 28350b57cec5SDimitry Andric 28360b57cec5SDimitry Andric public: 28370b57cec5SDimitry Andric /// Build a zero extension instruction of \p Opnd producing a \p Ty 28380b57cec5SDimitry Andric /// result. 28390b57cec5SDimitry Andric /// zext Opnd to Ty. 28400b57cec5SDimitry Andric ZExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty) 28410b57cec5SDimitry Andric : TypePromotionAction(InsertPt) { 28420b57cec5SDimitry Andric IRBuilder<> Builder(InsertPt); 28435ffd83dbSDimitry Andric Builder.SetCurrentDebugLocation(DebugLoc()); 28440b57cec5SDimitry Andric Val = Builder.CreateZExt(Opnd, Ty, "promoted"); 28450b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Do: ZExtBuilder: " << *Val << "\n"); 28460b57cec5SDimitry Andric } 28470b57cec5SDimitry Andric 28480b57cec5SDimitry Andric /// Get the built value. 28490b57cec5SDimitry Andric Value *getBuiltValue() { return Val; } 28500b57cec5SDimitry Andric 28510b57cec5SDimitry Andric /// Remove the built instruction. 28520b57cec5SDimitry Andric void undo() override { 28530b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Undo: ZExtBuilder: " << *Val << "\n"); 28540b57cec5SDimitry Andric if (Instruction *IVal = dyn_cast<Instruction>(Val)) 28550b57cec5SDimitry Andric IVal->eraseFromParent(); 28560b57cec5SDimitry Andric } 28570b57cec5SDimitry Andric }; 28580b57cec5SDimitry Andric 28590b57cec5SDimitry Andric /// Mutate an instruction to another type. 28600b57cec5SDimitry Andric class TypeMutator : public TypePromotionAction { 28610b57cec5SDimitry Andric /// Record the original type. 28620b57cec5SDimitry Andric Type *OrigTy; 28630b57cec5SDimitry Andric 28640b57cec5SDimitry Andric public: 28650b57cec5SDimitry Andric /// Mutate the type of \p Inst into \p NewTy. 28660b57cec5SDimitry Andric TypeMutator(Instruction *Inst, Type *NewTy) 28670b57cec5SDimitry Andric : TypePromotionAction(Inst), OrigTy(Inst->getType()) { 28680b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Do: MutateType: " << *Inst << " with " << *NewTy 28690b57cec5SDimitry Andric << "\n"); 28700b57cec5SDimitry Andric Inst->mutateType(NewTy); 28710b57cec5SDimitry Andric } 28720b57cec5SDimitry Andric 28730b57cec5SDimitry Andric /// Mutate the instruction back to its original type. 28740b57cec5SDimitry Andric void undo() override { 28750b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Undo: MutateType: " << *Inst << " with " << *OrigTy 28760b57cec5SDimitry Andric << "\n"); 28770b57cec5SDimitry Andric Inst->mutateType(OrigTy); 28780b57cec5SDimitry Andric } 28790b57cec5SDimitry Andric }; 28800b57cec5SDimitry Andric 28810b57cec5SDimitry Andric /// Replace the uses of an instruction by another instruction. 28820b57cec5SDimitry Andric class UsesReplacer : public TypePromotionAction { 28830b57cec5SDimitry Andric /// Helper structure to keep track of the replaced uses. 28840b57cec5SDimitry Andric struct InstructionAndIdx { 28850b57cec5SDimitry Andric /// The instruction using the instruction. 28860b57cec5SDimitry Andric Instruction *Inst; 28870b57cec5SDimitry Andric 28880b57cec5SDimitry Andric /// The index where this instruction is used for Inst. 28890b57cec5SDimitry Andric unsigned Idx; 28900b57cec5SDimitry Andric 28910b57cec5SDimitry Andric InstructionAndIdx(Instruction *Inst, unsigned Idx) 28920b57cec5SDimitry Andric : Inst(Inst), Idx(Idx) {} 28930b57cec5SDimitry Andric }; 28940b57cec5SDimitry Andric 28950b57cec5SDimitry Andric /// Keep track of the original uses (pair Instruction, Index). 28960b57cec5SDimitry Andric SmallVector<InstructionAndIdx, 4> OriginalUses; 28970b57cec5SDimitry Andric /// Keep track of the debug users. 28980b57cec5SDimitry Andric SmallVector<DbgValueInst *, 1> DbgValues; 28990b57cec5SDimitry Andric 2900fe6060f1SDimitry Andric /// Keep track of the new value so that we can undo it by replacing 2901fe6060f1SDimitry Andric /// instances of the new value with the original value. 2902fe6060f1SDimitry Andric Value *New; 2903fe6060f1SDimitry Andric 29040b57cec5SDimitry Andric using use_iterator = SmallVectorImpl<InstructionAndIdx>::iterator; 29050b57cec5SDimitry Andric 29060b57cec5SDimitry Andric public: 29070b57cec5SDimitry Andric /// Replace all the use of \p Inst by \p New. 2908fe6060f1SDimitry Andric UsesReplacer(Instruction *Inst, Value *New) 2909fe6060f1SDimitry Andric : TypePromotionAction(Inst), New(New) { 29100b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Do: UsersReplacer: " << *Inst << " with " << *New 29110b57cec5SDimitry Andric << "\n"); 29120b57cec5SDimitry Andric // Record the original uses. 29130b57cec5SDimitry Andric for (Use &U : Inst->uses()) { 29140b57cec5SDimitry Andric Instruction *UserI = cast<Instruction>(U.getUser()); 29150b57cec5SDimitry Andric OriginalUses.push_back(InstructionAndIdx(UserI, U.getOperandNo())); 29160b57cec5SDimitry Andric } 29170b57cec5SDimitry Andric // Record the debug uses separately. They are not in the instruction's 29180b57cec5SDimitry Andric // use list, but they are replaced by RAUW. 29190b57cec5SDimitry Andric findDbgValues(DbgValues, Inst); 29200b57cec5SDimitry Andric 29210b57cec5SDimitry Andric // Now, we can replace the uses. 29220b57cec5SDimitry Andric Inst->replaceAllUsesWith(New); 29230b57cec5SDimitry Andric } 29240b57cec5SDimitry Andric 29250b57cec5SDimitry Andric /// Reassign the original uses of Inst to Inst. 29260b57cec5SDimitry Andric void undo() override { 29270b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Undo: UsersReplacer: " << *Inst << "\n"); 2928fe6060f1SDimitry Andric for (InstructionAndIdx &Use : OriginalUses) 2929fe6060f1SDimitry Andric Use.Inst->setOperand(Use.Idx, Inst); 29300b57cec5SDimitry Andric // RAUW has replaced all original uses with references to the new value, 29310b57cec5SDimitry Andric // including the debug uses. Since we are undoing the replacements, 29320b57cec5SDimitry Andric // the original debug uses must also be reinstated to maintain the 29330b57cec5SDimitry Andric // correctness and utility of debug value instructions. 2934fe6060f1SDimitry Andric for (auto *DVI : DbgValues) 2935fe6060f1SDimitry Andric DVI->replaceVariableLocationOp(New, Inst); 29360b57cec5SDimitry Andric } 29370b57cec5SDimitry Andric }; 29380b57cec5SDimitry Andric 29390b57cec5SDimitry Andric /// Remove an instruction from the IR. 29400b57cec5SDimitry Andric class InstructionRemover : public TypePromotionAction { 29410b57cec5SDimitry Andric /// Original position of the instruction. 29420b57cec5SDimitry Andric InsertionHandler Inserter; 29430b57cec5SDimitry Andric 29440b57cec5SDimitry Andric /// Helper structure to hide all the link to the instruction. In other 29450b57cec5SDimitry Andric /// words, this helps to do as if the instruction was removed. 29460b57cec5SDimitry Andric OperandsHider Hider; 29470b57cec5SDimitry Andric 29480b57cec5SDimitry Andric /// Keep track of the uses replaced, if any. 29490b57cec5SDimitry Andric UsesReplacer *Replacer = nullptr; 29500b57cec5SDimitry Andric 29510b57cec5SDimitry Andric /// Keep track of instructions removed. 29520b57cec5SDimitry Andric SetOfInstrs &RemovedInsts; 29530b57cec5SDimitry Andric 29540b57cec5SDimitry Andric public: 29550b57cec5SDimitry Andric /// Remove all reference of \p Inst and optionally replace all its 29560b57cec5SDimitry Andric /// uses with New. 29570b57cec5SDimitry Andric /// \p RemovedInsts Keep track of the instructions removed by this Action. 29580b57cec5SDimitry Andric /// \pre If !Inst->use_empty(), then New != nullptr 29590b57cec5SDimitry Andric InstructionRemover(Instruction *Inst, SetOfInstrs &RemovedInsts, 29600b57cec5SDimitry Andric Value *New = nullptr) 29610b57cec5SDimitry Andric : TypePromotionAction(Inst), Inserter(Inst), Hider(Inst), 29620b57cec5SDimitry Andric RemovedInsts(RemovedInsts) { 29630b57cec5SDimitry Andric if (New) 29640b57cec5SDimitry Andric Replacer = new UsesReplacer(Inst, New); 29650b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n"); 29660b57cec5SDimitry Andric RemovedInsts.insert(Inst); 29670b57cec5SDimitry Andric /// The instructions removed here will be freed after completing 29680b57cec5SDimitry Andric /// optimizeBlock() for all blocks as we need to keep track of the 29690b57cec5SDimitry Andric /// removed instructions during promotion. 29700b57cec5SDimitry Andric Inst->removeFromParent(); 29710b57cec5SDimitry Andric } 29720b57cec5SDimitry Andric 29730b57cec5SDimitry Andric ~InstructionRemover() override { delete Replacer; } 29740b57cec5SDimitry Andric 29750b57cec5SDimitry Andric /// Resurrect the instruction and reassign it to the proper uses if 29760b57cec5SDimitry Andric /// new value was provided when build this action. 29770b57cec5SDimitry Andric void undo() override { 29780b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Undo: InstructionRemover: " << *Inst << "\n"); 29790b57cec5SDimitry Andric Inserter.insert(Inst); 29800b57cec5SDimitry Andric if (Replacer) 29810b57cec5SDimitry Andric Replacer->undo(); 29820b57cec5SDimitry Andric Hider.undo(); 29830b57cec5SDimitry Andric RemovedInsts.erase(Inst); 29840b57cec5SDimitry Andric } 29850b57cec5SDimitry Andric }; 29860b57cec5SDimitry Andric 29870b57cec5SDimitry Andric public: 29880b57cec5SDimitry Andric /// Restoration point. 29890b57cec5SDimitry Andric /// The restoration point is a pointer to an action instead of an iterator 29900b57cec5SDimitry Andric /// because the iterator may be invalidated but not the pointer. 29910b57cec5SDimitry Andric using ConstRestorationPt = const TypePromotionAction *; 29920b57cec5SDimitry Andric 29930b57cec5SDimitry Andric TypePromotionTransaction(SetOfInstrs &RemovedInsts) 29940b57cec5SDimitry Andric : RemovedInsts(RemovedInsts) {} 29950b57cec5SDimitry Andric 29965ffd83dbSDimitry Andric /// Advocate every changes made in that transaction. Return true if any change 29975ffd83dbSDimitry Andric /// happen. 29985ffd83dbSDimitry Andric bool commit(); 29990b57cec5SDimitry Andric 30000b57cec5SDimitry Andric /// Undo all the changes made after the given point. 30010b57cec5SDimitry Andric void rollback(ConstRestorationPt Point); 30020b57cec5SDimitry Andric 30030b57cec5SDimitry Andric /// Get the current restoration point. 30040b57cec5SDimitry Andric ConstRestorationPt getRestorationPoint() const; 30050b57cec5SDimitry Andric 30060b57cec5SDimitry Andric /// \name API for IR modification with state keeping to support rollback. 30070b57cec5SDimitry Andric /// @{ 30080b57cec5SDimitry Andric /// Same as Instruction::setOperand. 30090b57cec5SDimitry Andric void setOperand(Instruction *Inst, unsigned Idx, Value *NewVal); 30100b57cec5SDimitry Andric 30110b57cec5SDimitry Andric /// Same as Instruction::eraseFromParent. 30120b57cec5SDimitry Andric void eraseInstruction(Instruction *Inst, Value *NewVal = nullptr); 30130b57cec5SDimitry Andric 30140b57cec5SDimitry Andric /// Same as Value::replaceAllUsesWith. 30150b57cec5SDimitry Andric void replaceAllUsesWith(Instruction *Inst, Value *New); 30160b57cec5SDimitry Andric 30170b57cec5SDimitry Andric /// Same as Value::mutateType. 30180b57cec5SDimitry Andric void mutateType(Instruction *Inst, Type *NewTy); 30190b57cec5SDimitry Andric 30200b57cec5SDimitry Andric /// Same as IRBuilder::createTrunc. 30210b57cec5SDimitry Andric Value *createTrunc(Instruction *Opnd, Type *Ty); 30220b57cec5SDimitry Andric 30230b57cec5SDimitry Andric /// Same as IRBuilder::createSExt. 30240b57cec5SDimitry Andric Value *createSExt(Instruction *Inst, Value *Opnd, Type *Ty); 30250b57cec5SDimitry Andric 30260b57cec5SDimitry Andric /// Same as IRBuilder::createZExt. 30270b57cec5SDimitry Andric Value *createZExt(Instruction *Inst, Value *Opnd, Type *Ty); 30280b57cec5SDimitry Andric 30290b57cec5SDimitry Andric /// Same as Instruction::moveBefore. 30300b57cec5SDimitry Andric void moveBefore(Instruction *Inst, Instruction *Before); 30310b57cec5SDimitry Andric /// @} 30320b57cec5SDimitry Andric 30330b57cec5SDimitry Andric private: 30340b57cec5SDimitry Andric /// The ordered list of actions made so far. 30350b57cec5SDimitry Andric SmallVector<std::unique_ptr<TypePromotionAction>, 16> Actions; 30360b57cec5SDimitry Andric 30370b57cec5SDimitry Andric using CommitPt = SmallVectorImpl<std::unique_ptr<TypePromotionAction>>::iterator; 30380b57cec5SDimitry Andric 30390b57cec5SDimitry Andric SetOfInstrs &RemovedInsts; 30400b57cec5SDimitry Andric }; 30410b57cec5SDimitry Andric 30420b57cec5SDimitry Andric } // end anonymous namespace 30430b57cec5SDimitry Andric 30440b57cec5SDimitry Andric void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx, 30450b57cec5SDimitry Andric Value *NewVal) { 30468bcb0991SDimitry Andric Actions.push_back(std::make_unique<TypePromotionTransaction::OperandSetter>( 30470b57cec5SDimitry Andric Inst, Idx, NewVal)); 30480b57cec5SDimitry Andric } 30490b57cec5SDimitry Andric 30500b57cec5SDimitry Andric void TypePromotionTransaction::eraseInstruction(Instruction *Inst, 30510b57cec5SDimitry Andric Value *NewVal) { 30520b57cec5SDimitry Andric Actions.push_back( 30538bcb0991SDimitry Andric std::make_unique<TypePromotionTransaction::InstructionRemover>( 30540b57cec5SDimitry Andric Inst, RemovedInsts, NewVal)); 30550b57cec5SDimitry Andric } 30560b57cec5SDimitry Andric 30570b57cec5SDimitry Andric void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst, 30580b57cec5SDimitry Andric Value *New) { 30590b57cec5SDimitry Andric Actions.push_back( 30608bcb0991SDimitry Andric std::make_unique<TypePromotionTransaction::UsesReplacer>(Inst, New)); 30610b57cec5SDimitry Andric } 30620b57cec5SDimitry Andric 30630b57cec5SDimitry Andric void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) { 30640b57cec5SDimitry Andric Actions.push_back( 30658bcb0991SDimitry Andric std::make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy)); 30660b57cec5SDimitry Andric } 30670b57cec5SDimitry Andric 30680b57cec5SDimitry Andric Value *TypePromotionTransaction::createTrunc(Instruction *Opnd, 30690b57cec5SDimitry Andric Type *Ty) { 30700b57cec5SDimitry Andric std::unique_ptr<TruncBuilder> Ptr(new TruncBuilder(Opnd, Ty)); 30710b57cec5SDimitry Andric Value *Val = Ptr->getBuiltValue(); 30720b57cec5SDimitry Andric Actions.push_back(std::move(Ptr)); 30730b57cec5SDimitry Andric return Val; 30740b57cec5SDimitry Andric } 30750b57cec5SDimitry Andric 30760b57cec5SDimitry Andric Value *TypePromotionTransaction::createSExt(Instruction *Inst, 30770b57cec5SDimitry Andric Value *Opnd, Type *Ty) { 30780b57cec5SDimitry Andric std::unique_ptr<SExtBuilder> Ptr(new SExtBuilder(Inst, Opnd, Ty)); 30790b57cec5SDimitry Andric Value *Val = Ptr->getBuiltValue(); 30800b57cec5SDimitry Andric Actions.push_back(std::move(Ptr)); 30810b57cec5SDimitry Andric return Val; 30820b57cec5SDimitry Andric } 30830b57cec5SDimitry Andric 30840b57cec5SDimitry Andric Value *TypePromotionTransaction::createZExt(Instruction *Inst, 30850b57cec5SDimitry Andric Value *Opnd, Type *Ty) { 30860b57cec5SDimitry Andric std::unique_ptr<ZExtBuilder> Ptr(new ZExtBuilder(Inst, Opnd, Ty)); 30870b57cec5SDimitry Andric Value *Val = Ptr->getBuiltValue(); 30880b57cec5SDimitry Andric Actions.push_back(std::move(Ptr)); 30890b57cec5SDimitry Andric return Val; 30900b57cec5SDimitry Andric } 30910b57cec5SDimitry Andric 30920b57cec5SDimitry Andric void TypePromotionTransaction::moveBefore(Instruction *Inst, 30930b57cec5SDimitry Andric Instruction *Before) { 30940b57cec5SDimitry Andric Actions.push_back( 30958bcb0991SDimitry Andric std::make_unique<TypePromotionTransaction::InstructionMoveBefore>( 30960b57cec5SDimitry Andric Inst, Before)); 30970b57cec5SDimitry Andric } 30980b57cec5SDimitry Andric 30990b57cec5SDimitry Andric TypePromotionTransaction::ConstRestorationPt 31000b57cec5SDimitry Andric TypePromotionTransaction::getRestorationPoint() const { 31010b57cec5SDimitry Andric return !Actions.empty() ? Actions.back().get() : nullptr; 31020b57cec5SDimitry Andric } 31030b57cec5SDimitry Andric 31045ffd83dbSDimitry Andric bool TypePromotionTransaction::commit() { 3105fe6060f1SDimitry Andric for (std::unique_ptr<TypePromotionAction> &Action : Actions) 3106fe6060f1SDimitry Andric Action->commit(); 31075ffd83dbSDimitry Andric bool Modified = !Actions.empty(); 31080b57cec5SDimitry Andric Actions.clear(); 31095ffd83dbSDimitry Andric return Modified; 31100b57cec5SDimitry Andric } 31110b57cec5SDimitry Andric 31120b57cec5SDimitry Andric void TypePromotionTransaction::rollback( 31130b57cec5SDimitry Andric TypePromotionTransaction::ConstRestorationPt Point) { 31140b57cec5SDimitry Andric while (!Actions.empty() && Point != Actions.back().get()) { 31150b57cec5SDimitry Andric std::unique_ptr<TypePromotionAction> Curr = Actions.pop_back_val(); 31160b57cec5SDimitry Andric Curr->undo(); 31170b57cec5SDimitry Andric } 31180b57cec5SDimitry Andric } 31190b57cec5SDimitry Andric 31200b57cec5SDimitry Andric namespace { 31210b57cec5SDimitry Andric 31220b57cec5SDimitry Andric /// A helper class for matching addressing modes. 31230b57cec5SDimitry Andric /// 31240b57cec5SDimitry Andric /// This encapsulates the logic for matching the target-legal addressing modes. 31250b57cec5SDimitry Andric class AddressingModeMatcher { 31260b57cec5SDimitry Andric SmallVectorImpl<Instruction*> &AddrModeInsts; 31270b57cec5SDimitry Andric const TargetLowering &TLI; 31280b57cec5SDimitry Andric const TargetRegisterInfo &TRI; 31290b57cec5SDimitry Andric const DataLayout &DL; 3130fe6060f1SDimitry Andric const LoopInfo &LI; 3131fe6060f1SDimitry Andric const std::function<const DominatorTree &()> getDTFn; 31320b57cec5SDimitry Andric 31330b57cec5SDimitry Andric /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and 31340b57cec5SDimitry Andric /// the memory instruction that we're computing this address for. 31350b57cec5SDimitry Andric Type *AccessTy; 31360b57cec5SDimitry Andric unsigned AddrSpace; 31370b57cec5SDimitry Andric Instruction *MemoryInst; 31380b57cec5SDimitry Andric 31390b57cec5SDimitry Andric /// This is the addressing mode that we're building up. This is 31400b57cec5SDimitry Andric /// part of the return value of this addressing mode matching stuff. 31410b57cec5SDimitry Andric ExtAddrMode &AddrMode; 31420b57cec5SDimitry Andric 31430b57cec5SDimitry Andric /// The instructions inserted by other CodeGenPrepare optimizations. 31440b57cec5SDimitry Andric const SetOfInstrs &InsertedInsts; 31450b57cec5SDimitry Andric 31460b57cec5SDimitry Andric /// A map from the instructions to their type before promotion. 31470b57cec5SDimitry Andric InstrToOrigTy &PromotedInsts; 31480b57cec5SDimitry Andric 31490b57cec5SDimitry Andric /// The ongoing transaction where every action should be registered. 31500b57cec5SDimitry Andric TypePromotionTransaction &TPT; 31510b57cec5SDimitry Andric 31520b57cec5SDimitry Andric // A GEP which has too large offset to be folded into the addressing mode. 31530b57cec5SDimitry Andric std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP; 31540b57cec5SDimitry Andric 31550b57cec5SDimitry Andric /// This is set to true when we should not do profitability checks. 31560b57cec5SDimitry Andric /// When true, IsProfitableToFoldIntoAddressingMode always returns true. 31570b57cec5SDimitry Andric bool IgnoreProfitability; 31580b57cec5SDimitry Andric 3159480093f4SDimitry Andric /// True if we are optimizing for size. 3160480093f4SDimitry Andric bool OptSize; 3161480093f4SDimitry Andric 3162480093f4SDimitry Andric ProfileSummaryInfo *PSI; 3163480093f4SDimitry Andric BlockFrequencyInfo *BFI; 3164480093f4SDimitry Andric 31650b57cec5SDimitry Andric AddressingModeMatcher( 31660b57cec5SDimitry Andric SmallVectorImpl<Instruction *> &AMI, const TargetLowering &TLI, 3167fe6060f1SDimitry Andric const TargetRegisterInfo &TRI, const LoopInfo &LI, 3168fe6060f1SDimitry Andric const std::function<const DominatorTree &()> getDTFn, 3169fe6060f1SDimitry Andric Type *AT, unsigned AS, Instruction *MI, ExtAddrMode &AM, 3170fe6060f1SDimitry Andric const SetOfInstrs &InsertedInsts, InstrToOrigTy &PromotedInsts, 3171fe6060f1SDimitry Andric TypePromotionTransaction &TPT, 3172480093f4SDimitry Andric std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP, 3173480093f4SDimitry Andric bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) 31740b57cec5SDimitry Andric : AddrModeInsts(AMI), TLI(TLI), TRI(TRI), 3175fe6060f1SDimitry Andric DL(MI->getModule()->getDataLayout()), LI(LI), getDTFn(getDTFn), 3176fe6060f1SDimitry Andric AccessTy(AT), AddrSpace(AS), MemoryInst(MI), AddrMode(AM), 3177fe6060f1SDimitry Andric InsertedInsts(InsertedInsts), PromotedInsts(PromotedInsts), TPT(TPT), 3178fe6060f1SDimitry Andric LargeOffsetGEP(LargeOffsetGEP), OptSize(OptSize), PSI(PSI), BFI(BFI) { 31790b57cec5SDimitry Andric IgnoreProfitability = false; 31800b57cec5SDimitry Andric } 31810b57cec5SDimitry Andric 31820b57cec5SDimitry Andric public: 31830b57cec5SDimitry Andric /// Find the maximal addressing mode that a load/store of V can fold, 31840b57cec5SDimitry Andric /// give an access type of AccessTy. This returns a list of involved 31850b57cec5SDimitry Andric /// instructions in AddrModeInsts. 31860b57cec5SDimitry Andric /// \p InsertedInsts The instructions inserted by other CodeGenPrepare 31870b57cec5SDimitry Andric /// optimizations. 31880b57cec5SDimitry Andric /// \p PromotedInsts maps the instructions to their type before promotion. 31890b57cec5SDimitry Andric /// \p The ongoing transaction where every action should be registered. 31900b57cec5SDimitry Andric static ExtAddrMode 31910b57cec5SDimitry Andric Match(Value *V, Type *AccessTy, unsigned AS, Instruction *MemoryInst, 31920b57cec5SDimitry Andric SmallVectorImpl<Instruction *> &AddrModeInsts, 3193fe6060f1SDimitry Andric const TargetLowering &TLI, const LoopInfo &LI, 3194fe6060f1SDimitry Andric const std::function<const DominatorTree &()> getDTFn, 3195fe6060f1SDimitry Andric const TargetRegisterInfo &TRI, const SetOfInstrs &InsertedInsts, 3196fe6060f1SDimitry Andric InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT, 3197480093f4SDimitry Andric std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP, 3198480093f4SDimitry Andric bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) { 31990b57cec5SDimitry Andric ExtAddrMode Result; 32000b57cec5SDimitry Andric 3201fe6060f1SDimitry Andric bool Success = AddressingModeMatcher( 3202fe6060f1SDimitry Andric AddrModeInsts, TLI, TRI, LI, getDTFn, AccessTy, AS, MemoryInst, Result, 3203fe6060f1SDimitry Andric InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI, 3204fe6060f1SDimitry Andric BFI).matchAddr(V, 0); 32050b57cec5SDimitry Andric (void)Success; assert(Success && "Couldn't select *anything*?"); 32060b57cec5SDimitry Andric return Result; 32070b57cec5SDimitry Andric } 32080b57cec5SDimitry Andric 32090b57cec5SDimitry Andric private: 32100b57cec5SDimitry Andric bool matchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth); 32110b57cec5SDimitry Andric bool matchAddr(Value *Addr, unsigned Depth); 32120b57cec5SDimitry Andric bool matchOperationAddr(User *AddrInst, unsigned Opcode, unsigned Depth, 32130b57cec5SDimitry Andric bool *MovedAway = nullptr); 32140b57cec5SDimitry Andric bool isProfitableToFoldIntoAddressingMode(Instruction *I, 32150b57cec5SDimitry Andric ExtAddrMode &AMBefore, 32160b57cec5SDimitry Andric ExtAddrMode &AMAfter); 32170b57cec5SDimitry Andric bool valueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2); 32180b57cec5SDimitry Andric bool isPromotionProfitable(unsigned NewCost, unsigned OldCost, 32190b57cec5SDimitry Andric Value *PromotedOperand) const; 32200b57cec5SDimitry Andric }; 32210b57cec5SDimitry Andric 32220b57cec5SDimitry Andric class PhiNodeSet; 32230b57cec5SDimitry Andric 32240b57cec5SDimitry Andric /// An iterator for PhiNodeSet. 32250b57cec5SDimitry Andric class PhiNodeSetIterator { 32260b57cec5SDimitry Andric PhiNodeSet * const Set; 32270b57cec5SDimitry Andric size_t CurrentIndex = 0; 32280b57cec5SDimitry Andric 32290b57cec5SDimitry Andric public: 32300b57cec5SDimitry Andric /// The constructor. Start should point to either a valid element, or be equal 32310b57cec5SDimitry Andric /// to the size of the underlying SmallVector of the PhiNodeSet. 32320b57cec5SDimitry Andric PhiNodeSetIterator(PhiNodeSet * const Set, size_t Start); 32330b57cec5SDimitry Andric PHINode * operator*() const; 32340b57cec5SDimitry Andric PhiNodeSetIterator& operator++(); 32350b57cec5SDimitry Andric bool operator==(const PhiNodeSetIterator &RHS) const; 32360b57cec5SDimitry Andric bool operator!=(const PhiNodeSetIterator &RHS) const; 32370b57cec5SDimitry Andric }; 32380b57cec5SDimitry Andric 32390b57cec5SDimitry Andric /// Keeps a set of PHINodes. 32400b57cec5SDimitry Andric /// 32410b57cec5SDimitry Andric /// This is a minimal set implementation for a specific use case: 32420b57cec5SDimitry Andric /// It is very fast when there are very few elements, but also provides good 32430b57cec5SDimitry Andric /// performance when there are many. It is similar to SmallPtrSet, but also 32440b57cec5SDimitry Andric /// provides iteration by insertion order, which is deterministic and stable 32450b57cec5SDimitry Andric /// across runs. It is also similar to SmallSetVector, but provides removing 32460b57cec5SDimitry Andric /// elements in O(1) time. This is achieved by not actually removing the element 32470b57cec5SDimitry Andric /// from the underlying vector, so comes at the cost of using more memory, but 32480b57cec5SDimitry Andric /// that is fine, since PhiNodeSets are used as short lived objects. 32490b57cec5SDimitry Andric class PhiNodeSet { 32500b57cec5SDimitry Andric friend class PhiNodeSetIterator; 32510b57cec5SDimitry Andric 32520b57cec5SDimitry Andric using MapType = SmallDenseMap<PHINode *, size_t, 32>; 32530b57cec5SDimitry Andric using iterator = PhiNodeSetIterator; 32540b57cec5SDimitry Andric 32550b57cec5SDimitry Andric /// Keeps the elements in the order of their insertion in the underlying 32560b57cec5SDimitry Andric /// vector. To achieve constant time removal, it never deletes any element. 32570b57cec5SDimitry Andric SmallVector<PHINode *, 32> NodeList; 32580b57cec5SDimitry Andric 32590b57cec5SDimitry Andric /// Keeps the elements in the underlying set implementation. This (and not the 32600b57cec5SDimitry Andric /// NodeList defined above) is the source of truth on whether an element 32610b57cec5SDimitry Andric /// is actually in the collection. 32620b57cec5SDimitry Andric MapType NodeMap; 32630b57cec5SDimitry Andric 32640b57cec5SDimitry Andric /// Points to the first valid (not deleted) element when the set is not empty 32650b57cec5SDimitry Andric /// and the value is not zero. Equals to the size of the underlying vector 32660b57cec5SDimitry Andric /// when the set is empty. When the value is 0, as in the beginning, the 32670b57cec5SDimitry Andric /// first element may or may not be valid. 32680b57cec5SDimitry Andric size_t FirstValidElement = 0; 32690b57cec5SDimitry Andric 32700b57cec5SDimitry Andric public: 32710b57cec5SDimitry Andric /// Inserts a new element to the collection. 32720b57cec5SDimitry Andric /// \returns true if the element is actually added, i.e. was not in the 32730b57cec5SDimitry Andric /// collection before the operation. 32740b57cec5SDimitry Andric bool insert(PHINode *Ptr) { 32750b57cec5SDimitry Andric if (NodeMap.insert(std::make_pair(Ptr, NodeList.size())).second) { 32760b57cec5SDimitry Andric NodeList.push_back(Ptr); 32770b57cec5SDimitry Andric return true; 32780b57cec5SDimitry Andric } 32790b57cec5SDimitry Andric return false; 32800b57cec5SDimitry Andric } 32810b57cec5SDimitry Andric 32820b57cec5SDimitry Andric /// Removes the element from the collection. 32830b57cec5SDimitry Andric /// \returns whether the element is actually removed, i.e. was in the 32840b57cec5SDimitry Andric /// collection before the operation. 32850b57cec5SDimitry Andric bool erase(PHINode *Ptr) { 3286e8d8bef9SDimitry Andric if (NodeMap.erase(Ptr)) { 32870b57cec5SDimitry Andric SkipRemovedElements(FirstValidElement); 32880b57cec5SDimitry Andric return true; 32890b57cec5SDimitry Andric } 32900b57cec5SDimitry Andric return false; 32910b57cec5SDimitry Andric } 32920b57cec5SDimitry Andric 32930b57cec5SDimitry Andric /// Removes all elements and clears the collection. 32940b57cec5SDimitry Andric void clear() { 32950b57cec5SDimitry Andric NodeMap.clear(); 32960b57cec5SDimitry Andric NodeList.clear(); 32970b57cec5SDimitry Andric FirstValidElement = 0; 32980b57cec5SDimitry Andric } 32990b57cec5SDimitry Andric 33000b57cec5SDimitry Andric /// \returns an iterator that will iterate the elements in the order of 33010b57cec5SDimitry Andric /// insertion. 33020b57cec5SDimitry Andric iterator begin() { 33030b57cec5SDimitry Andric if (FirstValidElement == 0) 33040b57cec5SDimitry Andric SkipRemovedElements(FirstValidElement); 33050b57cec5SDimitry Andric return PhiNodeSetIterator(this, FirstValidElement); 33060b57cec5SDimitry Andric } 33070b57cec5SDimitry Andric 33080b57cec5SDimitry Andric /// \returns an iterator that points to the end of the collection. 33090b57cec5SDimitry Andric iterator end() { return PhiNodeSetIterator(this, NodeList.size()); } 33100b57cec5SDimitry Andric 33110b57cec5SDimitry Andric /// Returns the number of elements in the collection. 33120b57cec5SDimitry Andric size_t size() const { 33130b57cec5SDimitry Andric return NodeMap.size(); 33140b57cec5SDimitry Andric } 33150b57cec5SDimitry Andric 33160b57cec5SDimitry Andric /// \returns 1 if the given element is in the collection, and 0 if otherwise. 33170b57cec5SDimitry Andric size_t count(PHINode *Ptr) const { 33180b57cec5SDimitry Andric return NodeMap.count(Ptr); 33190b57cec5SDimitry Andric } 33200b57cec5SDimitry Andric 33210b57cec5SDimitry Andric private: 33220b57cec5SDimitry Andric /// Updates the CurrentIndex so that it will point to a valid element. 33230b57cec5SDimitry Andric /// 33240b57cec5SDimitry Andric /// If the element of NodeList at CurrentIndex is valid, it does not 33250b57cec5SDimitry Andric /// change it. If there are no more valid elements, it updates CurrentIndex 33260b57cec5SDimitry Andric /// to point to the end of the NodeList. 33270b57cec5SDimitry Andric void SkipRemovedElements(size_t &CurrentIndex) { 33280b57cec5SDimitry Andric while (CurrentIndex < NodeList.size()) { 33290b57cec5SDimitry Andric auto it = NodeMap.find(NodeList[CurrentIndex]); 33300b57cec5SDimitry Andric // If the element has been deleted and added again later, NodeMap will 33310b57cec5SDimitry Andric // point to a different index, so CurrentIndex will still be invalid. 33320b57cec5SDimitry Andric if (it != NodeMap.end() && it->second == CurrentIndex) 33330b57cec5SDimitry Andric break; 33340b57cec5SDimitry Andric ++CurrentIndex; 33350b57cec5SDimitry Andric } 33360b57cec5SDimitry Andric } 33370b57cec5SDimitry Andric }; 33380b57cec5SDimitry Andric 33390b57cec5SDimitry Andric PhiNodeSetIterator::PhiNodeSetIterator(PhiNodeSet *const Set, size_t Start) 33400b57cec5SDimitry Andric : Set(Set), CurrentIndex(Start) {} 33410b57cec5SDimitry Andric 33420b57cec5SDimitry Andric PHINode * PhiNodeSetIterator::operator*() const { 33430b57cec5SDimitry Andric assert(CurrentIndex < Set->NodeList.size() && 33440b57cec5SDimitry Andric "PhiNodeSet access out of range"); 33450b57cec5SDimitry Andric return Set->NodeList[CurrentIndex]; 33460b57cec5SDimitry Andric } 33470b57cec5SDimitry Andric 33480b57cec5SDimitry Andric PhiNodeSetIterator& PhiNodeSetIterator::operator++() { 33490b57cec5SDimitry Andric assert(CurrentIndex < Set->NodeList.size() && 33500b57cec5SDimitry Andric "PhiNodeSet access out of range"); 33510b57cec5SDimitry Andric ++CurrentIndex; 33520b57cec5SDimitry Andric Set->SkipRemovedElements(CurrentIndex); 33530b57cec5SDimitry Andric return *this; 33540b57cec5SDimitry Andric } 33550b57cec5SDimitry Andric 33560b57cec5SDimitry Andric bool PhiNodeSetIterator::operator==(const PhiNodeSetIterator &RHS) const { 33570b57cec5SDimitry Andric return CurrentIndex == RHS.CurrentIndex; 33580b57cec5SDimitry Andric } 33590b57cec5SDimitry Andric 33600b57cec5SDimitry Andric bool PhiNodeSetIterator::operator!=(const PhiNodeSetIterator &RHS) const { 33610b57cec5SDimitry Andric return !((*this) == RHS); 33620b57cec5SDimitry Andric } 33630b57cec5SDimitry Andric 33640b57cec5SDimitry Andric /// Keep track of simplification of Phi nodes. 33650b57cec5SDimitry Andric /// Accept the set of all phi nodes and erase phi node from this set 33660b57cec5SDimitry Andric /// if it is simplified. 33670b57cec5SDimitry Andric class SimplificationTracker { 33680b57cec5SDimitry Andric DenseMap<Value *, Value *> Storage; 33690b57cec5SDimitry Andric const SimplifyQuery &SQ; 33700b57cec5SDimitry Andric // Tracks newly created Phi nodes. The elements are iterated by insertion 33710b57cec5SDimitry Andric // order. 33720b57cec5SDimitry Andric PhiNodeSet AllPhiNodes; 33730b57cec5SDimitry Andric // Tracks newly created Select nodes. 33740b57cec5SDimitry Andric SmallPtrSet<SelectInst *, 32> AllSelectNodes; 33750b57cec5SDimitry Andric 33760b57cec5SDimitry Andric public: 33770b57cec5SDimitry Andric SimplificationTracker(const SimplifyQuery &sq) 33780b57cec5SDimitry Andric : SQ(sq) {} 33790b57cec5SDimitry Andric 33800b57cec5SDimitry Andric Value *Get(Value *V) { 33810b57cec5SDimitry Andric do { 33820b57cec5SDimitry Andric auto SV = Storage.find(V); 33830b57cec5SDimitry Andric if (SV == Storage.end()) 33840b57cec5SDimitry Andric return V; 33850b57cec5SDimitry Andric V = SV->second; 33860b57cec5SDimitry Andric } while (true); 33870b57cec5SDimitry Andric } 33880b57cec5SDimitry Andric 33890b57cec5SDimitry Andric Value *Simplify(Value *Val) { 33900b57cec5SDimitry Andric SmallVector<Value *, 32> WorkList; 33910b57cec5SDimitry Andric SmallPtrSet<Value *, 32> Visited; 33920b57cec5SDimitry Andric WorkList.push_back(Val); 33930b57cec5SDimitry Andric while (!WorkList.empty()) { 33945ffd83dbSDimitry Andric auto *P = WorkList.pop_back_val(); 33950b57cec5SDimitry Andric if (!Visited.insert(P).second) 33960b57cec5SDimitry Andric continue; 33970b57cec5SDimitry Andric if (auto *PI = dyn_cast<Instruction>(P)) 339881ad6265SDimitry Andric if (Value *V = simplifyInstruction(cast<Instruction>(PI), SQ)) { 33990b57cec5SDimitry Andric for (auto *U : PI->users()) 34000b57cec5SDimitry Andric WorkList.push_back(cast<Value>(U)); 34010b57cec5SDimitry Andric Put(PI, V); 34020b57cec5SDimitry Andric PI->replaceAllUsesWith(V); 34030b57cec5SDimitry Andric if (auto *PHI = dyn_cast<PHINode>(PI)) 34040b57cec5SDimitry Andric AllPhiNodes.erase(PHI); 34050b57cec5SDimitry Andric if (auto *Select = dyn_cast<SelectInst>(PI)) 34060b57cec5SDimitry Andric AllSelectNodes.erase(Select); 34070b57cec5SDimitry Andric PI->eraseFromParent(); 34080b57cec5SDimitry Andric } 34090b57cec5SDimitry Andric } 34100b57cec5SDimitry Andric return Get(Val); 34110b57cec5SDimitry Andric } 34120b57cec5SDimitry Andric 34130b57cec5SDimitry Andric void Put(Value *From, Value *To) { 34140b57cec5SDimitry Andric Storage.insert({ From, To }); 34150b57cec5SDimitry Andric } 34160b57cec5SDimitry Andric 34170b57cec5SDimitry Andric void ReplacePhi(PHINode *From, PHINode *To) { 34180b57cec5SDimitry Andric Value* OldReplacement = Get(From); 34190b57cec5SDimitry Andric while (OldReplacement != From) { 34200b57cec5SDimitry Andric From = To; 34210b57cec5SDimitry Andric To = dyn_cast<PHINode>(OldReplacement); 34220b57cec5SDimitry Andric OldReplacement = Get(From); 34230b57cec5SDimitry Andric } 34248bcb0991SDimitry Andric assert(To && Get(To) == To && "Replacement PHI node is already replaced."); 34250b57cec5SDimitry Andric Put(From, To); 34260b57cec5SDimitry Andric From->replaceAllUsesWith(To); 34270b57cec5SDimitry Andric AllPhiNodes.erase(From); 34280b57cec5SDimitry Andric From->eraseFromParent(); 34290b57cec5SDimitry Andric } 34300b57cec5SDimitry Andric 34310b57cec5SDimitry Andric PhiNodeSet& newPhiNodes() { return AllPhiNodes; } 34320b57cec5SDimitry Andric 34330b57cec5SDimitry Andric void insertNewPhi(PHINode *PN) { AllPhiNodes.insert(PN); } 34340b57cec5SDimitry Andric 34350b57cec5SDimitry Andric void insertNewSelect(SelectInst *SI) { AllSelectNodes.insert(SI); } 34360b57cec5SDimitry Andric 34370b57cec5SDimitry Andric unsigned countNewPhiNodes() const { return AllPhiNodes.size(); } 34380b57cec5SDimitry Andric 34390b57cec5SDimitry Andric unsigned countNewSelectNodes() const { return AllSelectNodes.size(); } 34400b57cec5SDimitry Andric 34410b57cec5SDimitry Andric void destroyNewNodes(Type *CommonType) { 34420b57cec5SDimitry Andric // For safe erasing, replace the uses with dummy value first. 344381ad6265SDimitry Andric auto *Dummy = PoisonValue::get(CommonType); 34445ffd83dbSDimitry Andric for (auto *I : AllPhiNodes) { 34450b57cec5SDimitry Andric I->replaceAllUsesWith(Dummy); 34460b57cec5SDimitry Andric I->eraseFromParent(); 34470b57cec5SDimitry Andric } 34480b57cec5SDimitry Andric AllPhiNodes.clear(); 34495ffd83dbSDimitry Andric for (auto *I : AllSelectNodes) { 34500b57cec5SDimitry Andric I->replaceAllUsesWith(Dummy); 34510b57cec5SDimitry Andric I->eraseFromParent(); 34520b57cec5SDimitry Andric } 34530b57cec5SDimitry Andric AllSelectNodes.clear(); 34540b57cec5SDimitry Andric } 34550b57cec5SDimitry Andric }; 34560b57cec5SDimitry Andric 34570b57cec5SDimitry Andric /// A helper class for combining addressing modes. 34580b57cec5SDimitry Andric class AddressingModeCombiner { 34590b57cec5SDimitry Andric typedef DenseMap<Value *, Value *> FoldAddrToValueMapping; 34600b57cec5SDimitry Andric typedef std::pair<PHINode *, PHINode *> PHIPair; 34610b57cec5SDimitry Andric 34620b57cec5SDimitry Andric private: 34630b57cec5SDimitry Andric /// The addressing modes we've collected. 34640b57cec5SDimitry Andric SmallVector<ExtAddrMode, 16> AddrModes; 34650b57cec5SDimitry Andric 34660b57cec5SDimitry Andric /// The field in which the AddrModes differ, when we have more than one. 34670b57cec5SDimitry Andric ExtAddrMode::FieldName DifferentField = ExtAddrMode::NoField; 34680b57cec5SDimitry Andric 34690b57cec5SDimitry Andric /// Are the AddrModes that we have all just equal to their original values? 34700b57cec5SDimitry Andric bool AllAddrModesTrivial = true; 34710b57cec5SDimitry Andric 34720b57cec5SDimitry Andric /// Common Type for all different fields in addressing modes. 34731fd87a68SDimitry Andric Type *CommonType = nullptr; 34740b57cec5SDimitry Andric 34750b57cec5SDimitry Andric /// SimplifyQuery for simplifyInstruction utility. 34760b57cec5SDimitry Andric const SimplifyQuery &SQ; 34770b57cec5SDimitry Andric 34780b57cec5SDimitry Andric /// Original Address. 34790b57cec5SDimitry Andric Value *Original; 34800b57cec5SDimitry Andric 34810b57cec5SDimitry Andric public: 34820b57cec5SDimitry Andric AddressingModeCombiner(const SimplifyQuery &_SQ, Value *OriginalValue) 34831fd87a68SDimitry Andric : SQ(_SQ), Original(OriginalValue) {} 34840b57cec5SDimitry Andric 34850b57cec5SDimitry Andric /// Get the combined AddrMode 34860b57cec5SDimitry Andric const ExtAddrMode &getAddrMode() const { 34870b57cec5SDimitry Andric return AddrModes[0]; 34880b57cec5SDimitry Andric } 34890b57cec5SDimitry Andric 34900b57cec5SDimitry Andric /// Add a new AddrMode if it's compatible with the AddrModes we already 34910b57cec5SDimitry Andric /// have. 34920b57cec5SDimitry Andric /// \return True iff we succeeded in doing so. 34930b57cec5SDimitry Andric bool addNewAddrMode(ExtAddrMode &NewAddrMode) { 34940b57cec5SDimitry Andric // Take note of if we have any non-trivial AddrModes, as we need to detect 34950b57cec5SDimitry Andric // when all AddrModes are trivial as then we would introduce a phi or select 34960b57cec5SDimitry Andric // which just duplicates what's already there. 34970b57cec5SDimitry Andric AllAddrModesTrivial = AllAddrModesTrivial && NewAddrMode.isTrivial(); 34980b57cec5SDimitry Andric 34990b57cec5SDimitry Andric // If this is the first addrmode then everything is fine. 35000b57cec5SDimitry Andric if (AddrModes.empty()) { 35010b57cec5SDimitry Andric AddrModes.emplace_back(NewAddrMode); 35020b57cec5SDimitry Andric return true; 35030b57cec5SDimitry Andric } 35040b57cec5SDimitry Andric 35050b57cec5SDimitry Andric // Figure out how different this is from the other address modes, which we 35060b57cec5SDimitry Andric // can do just by comparing against the first one given that we only care 35070b57cec5SDimitry Andric // about the cumulative difference. 35080b57cec5SDimitry Andric ExtAddrMode::FieldName ThisDifferentField = 35090b57cec5SDimitry Andric AddrModes[0].compare(NewAddrMode); 35100b57cec5SDimitry Andric if (DifferentField == ExtAddrMode::NoField) 35110b57cec5SDimitry Andric DifferentField = ThisDifferentField; 35120b57cec5SDimitry Andric else if (DifferentField != ThisDifferentField) 35130b57cec5SDimitry Andric DifferentField = ExtAddrMode::MultipleFields; 35140b57cec5SDimitry Andric 35150b57cec5SDimitry Andric // If NewAddrMode differs in more than one dimension we cannot handle it. 35160b57cec5SDimitry Andric bool CanHandle = DifferentField != ExtAddrMode::MultipleFields; 35170b57cec5SDimitry Andric 35180b57cec5SDimitry Andric // If Scale Field is different then we reject. 35190b57cec5SDimitry Andric CanHandle = CanHandle && DifferentField != ExtAddrMode::ScaleField; 35200b57cec5SDimitry Andric 35210b57cec5SDimitry Andric // We also must reject the case when base offset is different and 35220b57cec5SDimitry Andric // scale reg is not null, we cannot handle this case due to merge of 35230b57cec5SDimitry Andric // different offsets will be used as ScaleReg. 35240b57cec5SDimitry Andric CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseOffsField || 35250b57cec5SDimitry Andric !NewAddrMode.ScaledReg); 35260b57cec5SDimitry Andric 35270b57cec5SDimitry Andric // We also must reject the case when GV is different and BaseReg installed 35280b57cec5SDimitry Andric // due to we want to use base reg as a merge of GV values. 35290b57cec5SDimitry Andric CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseGVField || 35300b57cec5SDimitry Andric !NewAddrMode.HasBaseReg); 35310b57cec5SDimitry Andric 35320b57cec5SDimitry Andric // Even if NewAddMode is the same we still need to collect it due to 35330b57cec5SDimitry Andric // original value is different. And later we will need all original values 35340b57cec5SDimitry Andric // as anchors during finding the common Phi node. 35350b57cec5SDimitry Andric if (CanHandle) 35360b57cec5SDimitry Andric AddrModes.emplace_back(NewAddrMode); 35370b57cec5SDimitry Andric else 35380b57cec5SDimitry Andric AddrModes.clear(); 35390b57cec5SDimitry Andric 35400b57cec5SDimitry Andric return CanHandle; 35410b57cec5SDimitry Andric } 35420b57cec5SDimitry Andric 35430b57cec5SDimitry Andric /// Combine the addressing modes we've collected into a single 35440b57cec5SDimitry Andric /// addressing mode. 35450b57cec5SDimitry Andric /// \return True iff we successfully combined them or we only had one so 35460b57cec5SDimitry Andric /// didn't need to combine them anyway. 35470b57cec5SDimitry Andric bool combineAddrModes() { 35480b57cec5SDimitry Andric // If we have no AddrModes then they can't be combined. 35490b57cec5SDimitry Andric if (AddrModes.size() == 0) 35500b57cec5SDimitry Andric return false; 35510b57cec5SDimitry Andric 35520b57cec5SDimitry Andric // A single AddrMode can trivially be combined. 35530b57cec5SDimitry Andric if (AddrModes.size() == 1 || DifferentField == ExtAddrMode::NoField) 35540b57cec5SDimitry Andric return true; 35550b57cec5SDimitry Andric 35560b57cec5SDimitry Andric // If the AddrModes we collected are all just equal to the value they are 35570b57cec5SDimitry Andric // derived from then combining them wouldn't do anything useful. 35580b57cec5SDimitry Andric if (AllAddrModesTrivial) 35590b57cec5SDimitry Andric return false; 35600b57cec5SDimitry Andric 35610b57cec5SDimitry Andric if (!addrModeCombiningAllowed()) 35620b57cec5SDimitry Andric return false; 35630b57cec5SDimitry Andric 35640b57cec5SDimitry Andric // Build a map between <original value, basic block where we saw it> to 35650b57cec5SDimitry Andric // value of base register. 35660b57cec5SDimitry Andric // Bail out if there is no common type. 35670b57cec5SDimitry Andric FoldAddrToValueMapping Map; 35680b57cec5SDimitry Andric if (!initializeMap(Map)) 35690b57cec5SDimitry Andric return false; 35700b57cec5SDimitry Andric 35710b57cec5SDimitry Andric Value *CommonValue = findCommon(Map); 35720b57cec5SDimitry Andric if (CommonValue) 35730b57cec5SDimitry Andric AddrModes[0].SetCombinedField(DifferentField, CommonValue, AddrModes); 35740b57cec5SDimitry Andric return CommonValue != nullptr; 35750b57cec5SDimitry Andric } 35760b57cec5SDimitry Andric 35770b57cec5SDimitry Andric private: 35780b57cec5SDimitry Andric /// Initialize Map with anchor values. For address seen 35790b57cec5SDimitry Andric /// we set the value of different field saw in this address. 35800b57cec5SDimitry Andric /// At the same time we find a common type for different field we will 35810b57cec5SDimitry Andric /// use to create new Phi/Select nodes. Keep it in CommonType field. 35820b57cec5SDimitry Andric /// Return false if there is no common type found. 35830b57cec5SDimitry Andric bool initializeMap(FoldAddrToValueMapping &Map) { 35840b57cec5SDimitry Andric // Keep track of keys where the value is null. We will need to replace it 35850b57cec5SDimitry Andric // with constant null when we know the common type. 35860b57cec5SDimitry Andric SmallVector<Value *, 2> NullValue; 35870b57cec5SDimitry Andric Type *IntPtrTy = SQ.DL.getIntPtrType(AddrModes[0].OriginalValue->getType()); 35880b57cec5SDimitry Andric for (auto &AM : AddrModes) { 35890b57cec5SDimitry Andric Value *DV = AM.GetFieldAsValue(DifferentField, IntPtrTy); 35900b57cec5SDimitry Andric if (DV) { 35910b57cec5SDimitry Andric auto *Type = DV->getType(); 35920b57cec5SDimitry Andric if (CommonType && CommonType != Type) 35930b57cec5SDimitry Andric return false; 35940b57cec5SDimitry Andric CommonType = Type; 35950b57cec5SDimitry Andric Map[AM.OriginalValue] = DV; 35960b57cec5SDimitry Andric } else { 35970b57cec5SDimitry Andric NullValue.push_back(AM.OriginalValue); 35980b57cec5SDimitry Andric } 35990b57cec5SDimitry Andric } 36000b57cec5SDimitry Andric assert(CommonType && "At least one non-null value must be!"); 36010b57cec5SDimitry Andric for (auto *V : NullValue) 36020b57cec5SDimitry Andric Map[V] = Constant::getNullValue(CommonType); 36030b57cec5SDimitry Andric return true; 36040b57cec5SDimitry Andric } 36050b57cec5SDimitry Andric 36060b57cec5SDimitry Andric /// We have mapping between value A and other value B where B was a field in 36070b57cec5SDimitry Andric /// addressing mode represented by A. Also we have an original value C 36080b57cec5SDimitry Andric /// representing an address we start with. Traversing from C through phi and 36090b57cec5SDimitry Andric /// selects we ended up with A's in a map. This utility function tries to find 36100b57cec5SDimitry Andric /// a value V which is a field in addressing mode C and traversing through phi 36110b57cec5SDimitry Andric /// nodes and selects we will end up in corresponded values B in a map. 36120b57cec5SDimitry Andric /// The utility will create a new Phi/Selects if needed. 36130b57cec5SDimitry Andric // The simple example looks as follows: 36140b57cec5SDimitry Andric // BB1: 36150b57cec5SDimitry Andric // p1 = b1 + 40 36160b57cec5SDimitry Andric // br cond BB2, BB3 36170b57cec5SDimitry Andric // BB2: 36180b57cec5SDimitry Andric // p2 = b2 + 40 36190b57cec5SDimitry Andric // br BB3 36200b57cec5SDimitry Andric // BB3: 36210b57cec5SDimitry Andric // p = phi [p1, BB1], [p2, BB2] 36220b57cec5SDimitry Andric // v = load p 36230b57cec5SDimitry Andric // Map is 36240b57cec5SDimitry Andric // p1 -> b1 36250b57cec5SDimitry Andric // p2 -> b2 36260b57cec5SDimitry Andric // Request is 36270b57cec5SDimitry Andric // p -> ? 36280b57cec5SDimitry Andric // The function tries to find or build phi [b1, BB1], [b2, BB2] in BB3. 36290b57cec5SDimitry Andric Value *findCommon(FoldAddrToValueMapping &Map) { 36300b57cec5SDimitry Andric // Tracks the simplification of newly created phi nodes. The reason we use 36310b57cec5SDimitry Andric // this mapping is because we will add new created Phi nodes in AddrToBase. 36320b57cec5SDimitry Andric // Simplification of Phi nodes is recursive, so some Phi node may 36330b57cec5SDimitry Andric // be simplified after we added it to AddrToBase. In reality this 36340b57cec5SDimitry Andric // simplification is possible only if original phi/selects were not 36350b57cec5SDimitry Andric // simplified yet. 36360b57cec5SDimitry Andric // Using this mapping we can find the current value in AddrToBase. 36370b57cec5SDimitry Andric SimplificationTracker ST(SQ); 36380b57cec5SDimitry Andric 36390b57cec5SDimitry Andric // First step, DFS to create PHI nodes for all intermediate blocks. 36400b57cec5SDimitry Andric // Also fill traverse order for the second step. 36410b57cec5SDimitry Andric SmallVector<Value *, 32> TraverseOrder; 36420b57cec5SDimitry Andric InsertPlaceholders(Map, TraverseOrder, ST); 36430b57cec5SDimitry Andric 36440b57cec5SDimitry Andric // Second Step, fill new nodes by merged values and simplify if possible. 36450b57cec5SDimitry Andric FillPlaceholders(Map, TraverseOrder, ST); 36460b57cec5SDimitry Andric 36470b57cec5SDimitry Andric if (!AddrSinkNewSelects && ST.countNewSelectNodes() > 0) { 36480b57cec5SDimitry Andric ST.destroyNewNodes(CommonType); 36490b57cec5SDimitry Andric return nullptr; 36500b57cec5SDimitry Andric } 36510b57cec5SDimitry Andric 36520b57cec5SDimitry Andric // Now we'd like to match New Phi nodes to existed ones. 36530b57cec5SDimitry Andric unsigned PhiNotMatchedCount = 0; 36540b57cec5SDimitry Andric if (!MatchPhiSet(ST, AddrSinkNewPhis, PhiNotMatchedCount)) { 36550b57cec5SDimitry Andric ST.destroyNewNodes(CommonType); 36560b57cec5SDimitry Andric return nullptr; 36570b57cec5SDimitry Andric } 36580b57cec5SDimitry Andric 36590b57cec5SDimitry Andric auto *Result = ST.Get(Map.find(Original)->second); 36600b57cec5SDimitry Andric if (Result) { 36610b57cec5SDimitry Andric NumMemoryInstsPhiCreated += ST.countNewPhiNodes() + PhiNotMatchedCount; 36620b57cec5SDimitry Andric NumMemoryInstsSelectCreated += ST.countNewSelectNodes(); 36630b57cec5SDimitry Andric } 36640b57cec5SDimitry Andric return Result; 36650b57cec5SDimitry Andric } 36660b57cec5SDimitry Andric 36670b57cec5SDimitry Andric /// Try to match PHI node to Candidate. 36680b57cec5SDimitry Andric /// Matcher tracks the matched Phi nodes. 36690b57cec5SDimitry Andric bool MatchPhiNode(PHINode *PHI, PHINode *Candidate, 36700b57cec5SDimitry Andric SmallSetVector<PHIPair, 8> &Matcher, 36710b57cec5SDimitry Andric PhiNodeSet &PhiNodesToMatch) { 36720b57cec5SDimitry Andric SmallVector<PHIPair, 8> WorkList; 36730b57cec5SDimitry Andric Matcher.insert({ PHI, Candidate }); 36740b57cec5SDimitry Andric SmallSet<PHINode *, 8> MatchedPHIs; 36750b57cec5SDimitry Andric MatchedPHIs.insert(PHI); 36760b57cec5SDimitry Andric WorkList.push_back({ PHI, Candidate }); 36770b57cec5SDimitry Andric SmallSet<PHIPair, 8> Visited; 36780b57cec5SDimitry Andric while (!WorkList.empty()) { 36790b57cec5SDimitry Andric auto Item = WorkList.pop_back_val(); 36800b57cec5SDimitry Andric if (!Visited.insert(Item).second) 36810b57cec5SDimitry Andric continue; 36820b57cec5SDimitry Andric // We iterate over all incoming values to Phi to compare them. 36830b57cec5SDimitry Andric // If values are different and both of them Phi and the first one is a 36840b57cec5SDimitry Andric // Phi we added (subject to match) and both of them is in the same basic 36850b57cec5SDimitry Andric // block then we can match our pair if values match. So we state that 36860b57cec5SDimitry Andric // these values match and add it to work list to verify that. 3687fcaf7f86SDimitry Andric for (auto *B : Item.first->blocks()) { 36880b57cec5SDimitry Andric Value *FirstValue = Item.first->getIncomingValueForBlock(B); 36890b57cec5SDimitry Andric Value *SecondValue = Item.second->getIncomingValueForBlock(B); 36900b57cec5SDimitry Andric if (FirstValue == SecondValue) 36910b57cec5SDimitry Andric continue; 36920b57cec5SDimitry Andric 36930b57cec5SDimitry Andric PHINode *FirstPhi = dyn_cast<PHINode>(FirstValue); 36940b57cec5SDimitry Andric PHINode *SecondPhi = dyn_cast<PHINode>(SecondValue); 36950b57cec5SDimitry Andric 36960b57cec5SDimitry Andric // One of them is not Phi or 36970b57cec5SDimitry Andric // The first one is not Phi node from the set we'd like to match or 36980b57cec5SDimitry Andric // Phi nodes from different basic blocks then 36990b57cec5SDimitry Andric // we will not be able to match. 37000b57cec5SDimitry Andric if (!FirstPhi || !SecondPhi || !PhiNodesToMatch.count(FirstPhi) || 37010b57cec5SDimitry Andric FirstPhi->getParent() != SecondPhi->getParent()) 37020b57cec5SDimitry Andric return false; 37030b57cec5SDimitry Andric 37040b57cec5SDimitry Andric // If we already matched them then continue. 37050b57cec5SDimitry Andric if (Matcher.count({ FirstPhi, SecondPhi })) 37060b57cec5SDimitry Andric continue; 37070b57cec5SDimitry Andric // So the values are different and does not match. So we need them to 37080b57cec5SDimitry Andric // match. (But we register no more than one match per PHI node, so that 37090b57cec5SDimitry Andric // we won't later try to replace them twice.) 37108bcb0991SDimitry Andric if (MatchedPHIs.insert(FirstPhi).second) 37110b57cec5SDimitry Andric Matcher.insert({ FirstPhi, SecondPhi }); 37120b57cec5SDimitry Andric // But me must check it. 37130b57cec5SDimitry Andric WorkList.push_back({ FirstPhi, SecondPhi }); 37140b57cec5SDimitry Andric } 37150b57cec5SDimitry Andric } 37160b57cec5SDimitry Andric return true; 37170b57cec5SDimitry Andric } 37180b57cec5SDimitry Andric 37190b57cec5SDimitry Andric /// For the given set of PHI nodes (in the SimplificationTracker) try 37200b57cec5SDimitry Andric /// to find their equivalents. 37210b57cec5SDimitry Andric /// Returns false if this matching fails and creation of new Phi is disabled. 37220b57cec5SDimitry Andric bool MatchPhiSet(SimplificationTracker &ST, bool AllowNewPhiNodes, 37230b57cec5SDimitry Andric unsigned &PhiNotMatchedCount) { 37240b57cec5SDimitry Andric // Matched and PhiNodesToMatch iterate their elements in a deterministic 37250b57cec5SDimitry Andric // order, so the replacements (ReplacePhi) are also done in a deterministic 37260b57cec5SDimitry Andric // order. 37270b57cec5SDimitry Andric SmallSetVector<PHIPair, 8> Matched; 37280b57cec5SDimitry Andric SmallPtrSet<PHINode *, 8> WillNotMatch; 37290b57cec5SDimitry Andric PhiNodeSet &PhiNodesToMatch = ST.newPhiNodes(); 37300b57cec5SDimitry Andric while (PhiNodesToMatch.size()) { 37310b57cec5SDimitry Andric PHINode *PHI = *PhiNodesToMatch.begin(); 37320b57cec5SDimitry Andric 37330b57cec5SDimitry Andric // Add us, if no Phi nodes in the basic block we do not match. 37340b57cec5SDimitry Andric WillNotMatch.clear(); 37350b57cec5SDimitry Andric WillNotMatch.insert(PHI); 37360b57cec5SDimitry Andric 37370b57cec5SDimitry Andric // Traverse all Phis until we found equivalent or fail to do that. 37380b57cec5SDimitry Andric bool IsMatched = false; 37390b57cec5SDimitry Andric for (auto &P : PHI->getParent()->phis()) { 3740349cc55cSDimitry Andric // Skip new Phi nodes. 3741349cc55cSDimitry Andric if (PhiNodesToMatch.count(&P)) 37420b57cec5SDimitry Andric continue; 37430b57cec5SDimitry Andric if ((IsMatched = MatchPhiNode(PHI, &P, Matched, PhiNodesToMatch))) 37440b57cec5SDimitry Andric break; 37450b57cec5SDimitry Andric // If it does not match, collect all Phi nodes from matcher. 37460b57cec5SDimitry Andric // if we end up with no match, them all these Phi nodes will not match 37470b57cec5SDimitry Andric // later. 37480b57cec5SDimitry Andric for (auto M : Matched) 37490b57cec5SDimitry Andric WillNotMatch.insert(M.first); 37500b57cec5SDimitry Andric Matched.clear(); 37510b57cec5SDimitry Andric } 37520b57cec5SDimitry Andric if (IsMatched) { 37530b57cec5SDimitry Andric // Replace all matched values and erase them. 37540b57cec5SDimitry Andric for (auto MV : Matched) 37550b57cec5SDimitry Andric ST.ReplacePhi(MV.first, MV.second); 37560b57cec5SDimitry Andric Matched.clear(); 37570b57cec5SDimitry Andric continue; 37580b57cec5SDimitry Andric } 37590b57cec5SDimitry Andric // If we are not allowed to create new nodes then bail out. 37600b57cec5SDimitry Andric if (!AllowNewPhiNodes) 37610b57cec5SDimitry Andric return false; 37620b57cec5SDimitry Andric // Just remove all seen values in matcher. They will not match anything. 37630b57cec5SDimitry Andric PhiNotMatchedCount += WillNotMatch.size(); 37640b57cec5SDimitry Andric for (auto *P : WillNotMatch) 37650b57cec5SDimitry Andric PhiNodesToMatch.erase(P); 37660b57cec5SDimitry Andric } 37670b57cec5SDimitry Andric return true; 37680b57cec5SDimitry Andric } 37690b57cec5SDimitry Andric /// Fill the placeholders with values from predecessors and simplify them. 37700b57cec5SDimitry Andric void FillPlaceholders(FoldAddrToValueMapping &Map, 37710b57cec5SDimitry Andric SmallVectorImpl<Value *> &TraverseOrder, 37720b57cec5SDimitry Andric SimplificationTracker &ST) { 37730b57cec5SDimitry Andric while (!TraverseOrder.empty()) { 37740b57cec5SDimitry Andric Value *Current = TraverseOrder.pop_back_val(); 37750b57cec5SDimitry Andric assert(Map.find(Current) != Map.end() && "No node to fill!!!"); 37760b57cec5SDimitry Andric Value *V = Map[Current]; 37770b57cec5SDimitry Andric 37780b57cec5SDimitry Andric if (SelectInst *Select = dyn_cast<SelectInst>(V)) { 37790b57cec5SDimitry Andric // CurrentValue also must be Select. 37800b57cec5SDimitry Andric auto *CurrentSelect = cast<SelectInst>(Current); 37810b57cec5SDimitry Andric auto *TrueValue = CurrentSelect->getTrueValue(); 37820b57cec5SDimitry Andric assert(Map.find(TrueValue) != Map.end() && "No True Value!"); 37830b57cec5SDimitry Andric Select->setTrueValue(ST.Get(Map[TrueValue])); 37840b57cec5SDimitry Andric auto *FalseValue = CurrentSelect->getFalseValue(); 37850b57cec5SDimitry Andric assert(Map.find(FalseValue) != Map.end() && "No False Value!"); 37860b57cec5SDimitry Andric Select->setFalseValue(ST.Get(Map[FalseValue])); 37870b57cec5SDimitry Andric } else { 37880b57cec5SDimitry Andric // Must be a Phi node then. 37898bcb0991SDimitry Andric auto *PHI = cast<PHINode>(V); 37900b57cec5SDimitry Andric // Fill the Phi node with values from predecessors. 37915ffd83dbSDimitry Andric for (auto *B : predecessors(PHI->getParent())) { 37928bcb0991SDimitry Andric Value *PV = cast<PHINode>(Current)->getIncomingValueForBlock(B); 37930b57cec5SDimitry Andric assert(Map.find(PV) != Map.end() && "No predecessor Value!"); 37940b57cec5SDimitry Andric PHI->addIncoming(ST.Get(Map[PV]), B); 37950b57cec5SDimitry Andric } 37960b57cec5SDimitry Andric } 37970b57cec5SDimitry Andric Map[Current] = ST.Simplify(V); 37980b57cec5SDimitry Andric } 37990b57cec5SDimitry Andric } 38000b57cec5SDimitry Andric 38010b57cec5SDimitry Andric /// Starting from original value recursively iterates over def-use chain up to 38020b57cec5SDimitry Andric /// known ending values represented in a map. For each traversed phi/select 38030b57cec5SDimitry Andric /// inserts a placeholder Phi or Select. 38040b57cec5SDimitry Andric /// Reports all new created Phi/Select nodes by adding them to set. 38050b57cec5SDimitry Andric /// Also reports and order in what values have been traversed. 38060b57cec5SDimitry Andric void InsertPlaceholders(FoldAddrToValueMapping &Map, 38070b57cec5SDimitry Andric SmallVectorImpl<Value *> &TraverseOrder, 38080b57cec5SDimitry Andric SimplificationTracker &ST) { 38090b57cec5SDimitry Andric SmallVector<Value *, 32> Worklist; 38100b57cec5SDimitry Andric assert((isa<PHINode>(Original) || isa<SelectInst>(Original)) && 38110b57cec5SDimitry Andric "Address must be a Phi or Select node"); 381281ad6265SDimitry Andric auto *Dummy = PoisonValue::get(CommonType); 38130b57cec5SDimitry Andric Worklist.push_back(Original); 38140b57cec5SDimitry Andric while (!Worklist.empty()) { 38150b57cec5SDimitry Andric Value *Current = Worklist.pop_back_val(); 38160b57cec5SDimitry Andric // if it is already visited or it is an ending value then skip it. 38170b57cec5SDimitry Andric if (Map.find(Current) != Map.end()) 38180b57cec5SDimitry Andric continue; 38190b57cec5SDimitry Andric TraverseOrder.push_back(Current); 38200b57cec5SDimitry Andric 38210b57cec5SDimitry Andric // CurrentValue must be a Phi node or select. All others must be covered 38220b57cec5SDimitry Andric // by anchors. 38230b57cec5SDimitry Andric if (SelectInst *CurrentSelect = dyn_cast<SelectInst>(Current)) { 38240b57cec5SDimitry Andric // Is it OK to get metadata from OrigSelect?! 38250b57cec5SDimitry Andric // Create a Select placeholder with dummy value. 38260b57cec5SDimitry Andric SelectInst *Select = SelectInst::Create( 38270b57cec5SDimitry Andric CurrentSelect->getCondition(), Dummy, Dummy, 38280b57cec5SDimitry Andric CurrentSelect->getName(), CurrentSelect, CurrentSelect); 38290b57cec5SDimitry Andric Map[Current] = Select; 38300b57cec5SDimitry Andric ST.insertNewSelect(Select); 38310b57cec5SDimitry Andric // We are interested in True and False values. 38320b57cec5SDimitry Andric Worklist.push_back(CurrentSelect->getTrueValue()); 38330b57cec5SDimitry Andric Worklist.push_back(CurrentSelect->getFalseValue()); 38340b57cec5SDimitry Andric } else { 38350b57cec5SDimitry Andric // It must be a Phi node then. 38360b57cec5SDimitry Andric PHINode *CurrentPhi = cast<PHINode>(Current); 38370b57cec5SDimitry Andric unsigned PredCount = CurrentPhi->getNumIncomingValues(); 38380b57cec5SDimitry Andric PHINode *PHI = 38390b57cec5SDimitry Andric PHINode::Create(CommonType, PredCount, "sunk_phi", CurrentPhi); 38400b57cec5SDimitry Andric Map[Current] = PHI; 38410b57cec5SDimitry Andric ST.insertNewPhi(PHI); 3842e8d8bef9SDimitry Andric append_range(Worklist, CurrentPhi->incoming_values()); 38430b57cec5SDimitry Andric } 38440b57cec5SDimitry Andric } 38450b57cec5SDimitry Andric } 38460b57cec5SDimitry Andric 38470b57cec5SDimitry Andric bool addrModeCombiningAllowed() { 38480b57cec5SDimitry Andric if (DisableComplexAddrModes) 38490b57cec5SDimitry Andric return false; 38500b57cec5SDimitry Andric switch (DifferentField) { 38510b57cec5SDimitry Andric default: 38520b57cec5SDimitry Andric return false; 38530b57cec5SDimitry Andric case ExtAddrMode::BaseRegField: 38540b57cec5SDimitry Andric return AddrSinkCombineBaseReg; 38550b57cec5SDimitry Andric case ExtAddrMode::BaseGVField: 38560b57cec5SDimitry Andric return AddrSinkCombineBaseGV; 38570b57cec5SDimitry Andric case ExtAddrMode::BaseOffsField: 38580b57cec5SDimitry Andric return AddrSinkCombineBaseOffs; 38590b57cec5SDimitry Andric case ExtAddrMode::ScaledRegField: 38600b57cec5SDimitry Andric return AddrSinkCombineScaledReg; 38610b57cec5SDimitry Andric } 38620b57cec5SDimitry Andric } 38630b57cec5SDimitry Andric }; 38640b57cec5SDimitry Andric } // end anonymous namespace 38650b57cec5SDimitry Andric 38660b57cec5SDimitry Andric /// Try adding ScaleReg*Scale to the current addressing mode. 38670b57cec5SDimitry Andric /// Return true and update AddrMode if this addr mode is legal for the target, 38680b57cec5SDimitry Andric /// false if not. 38690b57cec5SDimitry Andric bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale, 38700b57cec5SDimitry Andric unsigned Depth) { 38710b57cec5SDimitry Andric // If Scale is 1, then this is the same as adding ScaleReg to the addressing 38720b57cec5SDimitry Andric // mode. Just process that directly. 38730b57cec5SDimitry Andric if (Scale == 1) 38740b57cec5SDimitry Andric return matchAddr(ScaleReg, Depth); 38750b57cec5SDimitry Andric 38760b57cec5SDimitry Andric // If the scale is 0, it takes nothing to add this. 38770b57cec5SDimitry Andric if (Scale == 0) 38780b57cec5SDimitry Andric return true; 38790b57cec5SDimitry Andric 38800b57cec5SDimitry Andric // If we already have a scale of this value, we can add to it, otherwise, we 38810b57cec5SDimitry Andric // need an available scale field. 38820b57cec5SDimitry Andric if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg) 38830b57cec5SDimitry Andric return false; 38840b57cec5SDimitry Andric 38850b57cec5SDimitry Andric ExtAddrMode TestAddrMode = AddrMode; 38860b57cec5SDimitry Andric 38870b57cec5SDimitry Andric // Add scale to turn X*4+X*3 -> X*7. This could also do things like 38880b57cec5SDimitry Andric // [A+B + A*7] -> [B+A*8]. 38890b57cec5SDimitry Andric TestAddrMode.Scale += Scale; 38900b57cec5SDimitry Andric TestAddrMode.ScaledReg = ScaleReg; 38910b57cec5SDimitry Andric 38920b57cec5SDimitry Andric // If the new address isn't legal, bail out. 38930b57cec5SDimitry Andric if (!TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace)) 38940b57cec5SDimitry Andric return false; 38950b57cec5SDimitry Andric 38960b57cec5SDimitry Andric // It was legal, so commit it. 38970b57cec5SDimitry Andric AddrMode = TestAddrMode; 38980b57cec5SDimitry Andric 38990b57cec5SDimitry Andric // Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now 39000b57cec5SDimitry Andric // to see if ScaleReg is actually X+C. If so, we can turn this into adding 3901fe6060f1SDimitry Andric // X*Scale + C*Scale to addr mode. If we found available IV increment, do not 3902fe6060f1SDimitry Andric // go any further: we can reuse it and cannot eliminate it. 39030b57cec5SDimitry Andric ConstantInt *CI = nullptr; Value *AddLHS = nullptr; 39040b57cec5SDimitry Andric if (isa<Instruction>(ScaleReg) && // not a constant expr. 39055ffd83dbSDimitry Andric match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI))) && 3906fe6060f1SDimitry Andric !isIVIncrement(ScaleReg, &LI) && CI->getValue().isSignedIntN(64)) { 39070b57cec5SDimitry Andric TestAddrMode.InBounds = false; 39080b57cec5SDimitry Andric TestAddrMode.ScaledReg = AddLHS; 39090b57cec5SDimitry Andric TestAddrMode.BaseOffs += CI->getSExtValue() * TestAddrMode.Scale; 39100b57cec5SDimitry Andric 39110b57cec5SDimitry Andric // If this addressing mode is legal, commit it and remember that we folded 39120b57cec5SDimitry Andric // this instruction. 39130b57cec5SDimitry Andric if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace)) { 39140b57cec5SDimitry Andric AddrModeInsts.push_back(cast<Instruction>(ScaleReg)); 39150b57cec5SDimitry Andric AddrMode = TestAddrMode; 39160b57cec5SDimitry Andric return true; 39170b57cec5SDimitry Andric } 3918fe6060f1SDimitry Andric // Restore status quo. 3919fe6060f1SDimitry Andric TestAddrMode = AddrMode; 39200b57cec5SDimitry Andric } 39210b57cec5SDimitry Andric 3922fe6060f1SDimitry Andric // If this is an add recurrence with a constant step, return the increment 3923fe6060f1SDimitry Andric // instruction and the canonicalized step. 3924fe6060f1SDimitry Andric auto GetConstantStep = [this](const Value * V) 3925fe6060f1SDimitry Andric ->Optional<std::pair<Instruction *, APInt> > { 3926fe6060f1SDimitry Andric auto *PN = dyn_cast<PHINode>(V); 3927fe6060f1SDimitry Andric if (!PN) 3928fe6060f1SDimitry Andric return None; 3929fe6060f1SDimitry Andric auto IVInc = getIVIncrement(PN, &LI); 3930fe6060f1SDimitry Andric if (!IVInc) 3931fe6060f1SDimitry Andric return None; 3932fe6060f1SDimitry Andric // TODO: The result of the intrinsics above is two-compliment. However when 3933fe6060f1SDimitry Andric // IV inc is expressed as add or sub, iv.next is potentially a poison value. 3934fe6060f1SDimitry Andric // If it has nuw or nsw flags, we need to make sure that these flags are 3935fe6060f1SDimitry Andric // inferrable at the point of memory instruction. Otherwise we are replacing 3936fe6060f1SDimitry Andric // well-defined two-compliment computation with poison. Currently, to avoid 3937fe6060f1SDimitry Andric // potentially complex analysis needed to prove this, we reject such cases. 3938fe6060f1SDimitry Andric if (auto *OIVInc = dyn_cast<OverflowingBinaryOperator>(IVInc->first)) 3939fe6060f1SDimitry Andric if (OIVInc->hasNoSignedWrap() || OIVInc->hasNoUnsignedWrap()) 3940fe6060f1SDimitry Andric return None; 3941fe6060f1SDimitry Andric if (auto *ConstantStep = dyn_cast<ConstantInt>(IVInc->second)) 3942fe6060f1SDimitry Andric return std::make_pair(IVInc->first, ConstantStep->getValue()); 3943fe6060f1SDimitry Andric return None; 3944fe6060f1SDimitry Andric }; 3945fe6060f1SDimitry Andric 3946fe6060f1SDimitry Andric // Try to account for the following special case: 3947fe6060f1SDimitry Andric // 1. ScaleReg is an inductive variable; 3948fe6060f1SDimitry Andric // 2. We use it with non-zero offset; 3949fe6060f1SDimitry Andric // 3. IV's increment is available at the point of memory instruction. 3950fe6060f1SDimitry Andric // 3951fe6060f1SDimitry Andric // In this case, we may reuse the IV increment instead of the IV Phi to 3952fe6060f1SDimitry Andric // achieve the following advantages: 3953fe6060f1SDimitry Andric // 1. If IV step matches the offset, we will have no need in the offset; 3954fe6060f1SDimitry Andric // 2. Even if they don't match, we will reduce the overlap of living IV 3955fe6060f1SDimitry Andric // and IV increment, that will potentially lead to better register 3956fe6060f1SDimitry Andric // assignment. 3957fe6060f1SDimitry Andric if (AddrMode.BaseOffs) { 3958fe6060f1SDimitry Andric if (auto IVStep = GetConstantStep(ScaleReg)) { 3959fe6060f1SDimitry Andric Instruction *IVInc = IVStep->first; 3960fe6060f1SDimitry Andric // The following assert is important to ensure a lack of infinite loops. 3961fe6060f1SDimitry Andric // This transforms is (intentionally) the inverse of the one just above. 3962fe6060f1SDimitry Andric // If they don't agree on the definition of an increment, we'd alternate 3963fe6060f1SDimitry Andric // back and forth indefinitely. 3964fe6060f1SDimitry Andric assert(isIVIncrement(IVInc, &LI) && "implied by GetConstantStep"); 3965fe6060f1SDimitry Andric APInt Step = IVStep->second; 3966fe6060f1SDimitry Andric APInt Offset = Step * AddrMode.Scale; 3967fe6060f1SDimitry Andric if (Offset.isSignedIntN(64)) { 3968fe6060f1SDimitry Andric TestAddrMode.InBounds = false; 3969fe6060f1SDimitry Andric TestAddrMode.ScaledReg = IVInc; 3970fe6060f1SDimitry Andric TestAddrMode.BaseOffs -= Offset.getLimitedValue(); 3971fe6060f1SDimitry Andric // If this addressing mode is legal, commit it.. 3972fe6060f1SDimitry Andric // (Note that we defer the (expensive) domtree base legality check 3973fe6060f1SDimitry Andric // to the very last possible point.) 3974fe6060f1SDimitry Andric if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace) && 3975fe6060f1SDimitry Andric getDTFn().dominates(IVInc, MemoryInst)) { 3976fe6060f1SDimitry Andric AddrModeInsts.push_back(cast<Instruction>(IVInc)); 3977fe6060f1SDimitry Andric AddrMode = TestAddrMode; 3978fe6060f1SDimitry Andric return true; 3979fe6060f1SDimitry Andric } 3980fe6060f1SDimitry Andric // Restore status quo. 3981fe6060f1SDimitry Andric TestAddrMode = AddrMode; 3982fe6060f1SDimitry Andric } 3983fe6060f1SDimitry Andric } 3984fe6060f1SDimitry Andric } 3985fe6060f1SDimitry Andric 3986fe6060f1SDimitry Andric // Otherwise, just return what we have. 39870b57cec5SDimitry Andric return true; 39880b57cec5SDimitry Andric } 39890b57cec5SDimitry Andric 39900b57cec5SDimitry Andric /// This is a little filter, which returns true if an addressing computation 39910b57cec5SDimitry Andric /// involving I might be folded into a load/store accessing it. 39920b57cec5SDimitry Andric /// This doesn't need to be perfect, but needs to accept at least 39930b57cec5SDimitry Andric /// the set of instructions that MatchOperationAddr can. 39940b57cec5SDimitry Andric static bool MightBeFoldableInst(Instruction *I) { 39950b57cec5SDimitry Andric switch (I->getOpcode()) { 39960b57cec5SDimitry Andric case Instruction::BitCast: 39970b57cec5SDimitry Andric case Instruction::AddrSpaceCast: 39980b57cec5SDimitry Andric // Don't touch identity bitcasts. 39990b57cec5SDimitry Andric if (I->getType() == I->getOperand(0)->getType()) 40000b57cec5SDimitry Andric return false; 40010b57cec5SDimitry Andric return I->getType()->isIntOrPtrTy(); 40020b57cec5SDimitry Andric case Instruction::PtrToInt: 40030b57cec5SDimitry Andric // PtrToInt is always a noop, as we know that the int type is pointer sized. 40040b57cec5SDimitry Andric return true; 40050b57cec5SDimitry Andric case Instruction::IntToPtr: 40060b57cec5SDimitry Andric // We know the input is intptr_t, so this is foldable. 40070b57cec5SDimitry Andric return true; 40080b57cec5SDimitry Andric case Instruction::Add: 40090b57cec5SDimitry Andric return true; 40100b57cec5SDimitry Andric case Instruction::Mul: 40110b57cec5SDimitry Andric case Instruction::Shl: 40120b57cec5SDimitry Andric // Can only handle X*C and X << C. 40130b57cec5SDimitry Andric return isa<ConstantInt>(I->getOperand(1)); 40140b57cec5SDimitry Andric case Instruction::GetElementPtr: 40150b57cec5SDimitry Andric return true; 40160b57cec5SDimitry Andric default: 40170b57cec5SDimitry Andric return false; 40180b57cec5SDimitry Andric } 40190b57cec5SDimitry Andric } 40200b57cec5SDimitry Andric 40210b57cec5SDimitry Andric /// Check whether or not \p Val is a legal instruction for \p TLI. 40220b57cec5SDimitry Andric /// \note \p Val is assumed to be the product of some type promotion. 40230b57cec5SDimitry Andric /// Therefore if \p Val has an undefined state in \p TLI, this is assumed 40240b57cec5SDimitry Andric /// to be legal, as the non-promoted value would have had the same state. 40250b57cec5SDimitry Andric static bool isPromotedInstructionLegal(const TargetLowering &TLI, 40260b57cec5SDimitry Andric const DataLayout &DL, Value *Val) { 40270b57cec5SDimitry Andric Instruction *PromotedInst = dyn_cast<Instruction>(Val); 40280b57cec5SDimitry Andric if (!PromotedInst) 40290b57cec5SDimitry Andric return false; 40300b57cec5SDimitry Andric int ISDOpcode = TLI.InstructionOpcodeToISD(PromotedInst->getOpcode()); 40310b57cec5SDimitry Andric // If the ISDOpcode is undefined, it was undefined before the promotion. 40320b57cec5SDimitry Andric if (!ISDOpcode) 40330b57cec5SDimitry Andric return true; 40340b57cec5SDimitry Andric // Otherwise, check if the promoted instruction is legal or not. 40350b57cec5SDimitry Andric return TLI.isOperationLegalOrCustom( 40360b57cec5SDimitry Andric ISDOpcode, TLI.getValueType(DL, PromotedInst->getType())); 40370b57cec5SDimitry Andric } 40380b57cec5SDimitry Andric 40390b57cec5SDimitry Andric namespace { 40400b57cec5SDimitry Andric 40410b57cec5SDimitry Andric /// Hepler class to perform type promotion. 40420b57cec5SDimitry Andric class TypePromotionHelper { 40430b57cec5SDimitry Andric /// Utility function to add a promoted instruction \p ExtOpnd to 40440b57cec5SDimitry Andric /// \p PromotedInsts and record the type of extension we have seen. 40450b57cec5SDimitry Andric static void addPromotedInst(InstrToOrigTy &PromotedInsts, 40460b57cec5SDimitry Andric Instruction *ExtOpnd, 40470b57cec5SDimitry Andric bool IsSExt) { 40480b57cec5SDimitry Andric ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension; 40490b57cec5SDimitry Andric InstrToOrigTy::iterator It = PromotedInsts.find(ExtOpnd); 40500b57cec5SDimitry Andric if (It != PromotedInsts.end()) { 40510b57cec5SDimitry Andric // If the new extension is same as original, the information in 40520b57cec5SDimitry Andric // PromotedInsts[ExtOpnd] is still correct. 40530b57cec5SDimitry Andric if (It->second.getInt() == ExtTy) 40540b57cec5SDimitry Andric return; 40550b57cec5SDimitry Andric 40560b57cec5SDimitry Andric // Now the new extension is different from old extension, we make 40570b57cec5SDimitry Andric // the type information invalid by setting extension type to 40580b57cec5SDimitry Andric // BothExtension. 40590b57cec5SDimitry Andric ExtTy = BothExtension; 40600b57cec5SDimitry Andric } 40610b57cec5SDimitry Andric PromotedInsts[ExtOpnd] = TypeIsSExt(ExtOpnd->getType(), ExtTy); 40620b57cec5SDimitry Andric } 40630b57cec5SDimitry Andric 40640b57cec5SDimitry Andric /// Utility function to query the original type of instruction \p Opnd 40650b57cec5SDimitry Andric /// with a matched extension type. If the extension doesn't match, we 40660b57cec5SDimitry Andric /// cannot use the information we had on the original type. 40670b57cec5SDimitry Andric /// BothExtension doesn't match any extension type. 40680b57cec5SDimitry Andric static const Type *getOrigType(const InstrToOrigTy &PromotedInsts, 40690b57cec5SDimitry Andric Instruction *Opnd, 40700b57cec5SDimitry Andric bool IsSExt) { 40710b57cec5SDimitry Andric ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension; 40720b57cec5SDimitry Andric InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd); 40730b57cec5SDimitry Andric if (It != PromotedInsts.end() && It->second.getInt() == ExtTy) 40740b57cec5SDimitry Andric return It->second.getPointer(); 40750b57cec5SDimitry Andric return nullptr; 40760b57cec5SDimitry Andric } 40770b57cec5SDimitry Andric 40780b57cec5SDimitry Andric /// Utility function to check whether or not a sign or zero extension 40790b57cec5SDimitry Andric /// of \p Inst with \p ConsideredExtType can be moved through \p Inst by 40800b57cec5SDimitry Andric /// either using the operands of \p Inst or promoting \p Inst. 40810b57cec5SDimitry Andric /// The type of the extension is defined by \p IsSExt. 40820b57cec5SDimitry Andric /// In other words, check if: 40830b57cec5SDimitry Andric /// ext (Ty Inst opnd1 opnd2 ... opndN) to ConsideredExtType. 40840b57cec5SDimitry Andric /// #1 Promotion applies: 40850b57cec5SDimitry Andric /// ConsideredExtType Inst (ext opnd1 to ConsideredExtType, ...). 40860b57cec5SDimitry Andric /// #2 Operand reuses: 40870b57cec5SDimitry Andric /// ext opnd1 to ConsideredExtType. 40880b57cec5SDimitry Andric /// \p PromotedInsts maps the instructions to their type before promotion. 40890b57cec5SDimitry Andric static bool canGetThrough(const Instruction *Inst, Type *ConsideredExtType, 40900b57cec5SDimitry Andric const InstrToOrigTy &PromotedInsts, bool IsSExt); 40910b57cec5SDimitry Andric 40920b57cec5SDimitry Andric /// Utility function to determine if \p OpIdx should be promoted when 40930b57cec5SDimitry Andric /// promoting \p Inst. 40940b57cec5SDimitry Andric static bool shouldExtOperand(const Instruction *Inst, int OpIdx) { 40950b57cec5SDimitry Andric return !(isa<SelectInst>(Inst) && OpIdx == 0); 40960b57cec5SDimitry Andric } 40970b57cec5SDimitry Andric 40980b57cec5SDimitry Andric /// Utility function to promote the operand of \p Ext when this 40990b57cec5SDimitry Andric /// operand is a promotable trunc or sext or zext. 41000b57cec5SDimitry Andric /// \p PromotedInsts maps the instructions to their type before promotion. 41010b57cec5SDimitry Andric /// \p CreatedInstsCost[out] contains the cost of all instructions 41020b57cec5SDimitry Andric /// created to promote the operand of Ext. 41030b57cec5SDimitry Andric /// Newly added extensions are inserted in \p Exts. 41040b57cec5SDimitry Andric /// Newly added truncates are inserted in \p Truncs. 41050b57cec5SDimitry Andric /// Should never be called directly. 41060b57cec5SDimitry Andric /// \return The promoted value which is used instead of Ext. 41070b57cec5SDimitry Andric static Value *promoteOperandForTruncAndAnyExt( 41080b57cec5SDimitry Andric Instruction *Ext, TypePromotionTransaction &TPT, 41090b57cec5SDimitry Andric InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost, 41100b57cec5SDimitry Andric SmallVectorImpl<Instruction *> *Exts, 41110b57cec5SDimitry Andric SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI); 41120b57cec5SDimitry Andric 41130b57cec5SDimitry Andric /// Utility function to promote the operand of \p Ext when this 41140b57cec5SDimitry Andric /// operand is promotable and is not a supported trunc or sext. 41150b57cec5SDimitry Andric /// \p PromotedInsts maps the instructions to their type before promotion. 41160b57cec5SDimitry Andric /// \p CreatedInstsCost[out] contains the cost of all the instructions 41170b57cec5SDimitry Andric /// created to promote the operand of Ext. 41180b57cec5SDimitry Andric /// Newly added extensions are inserted in \p Exts. 41190b57cec5SDimitry Andric /// Newly added truncates are inserted in \p Truncs. 41200b57cec5SDimitry Andric /// Should never be called directly. 41210b57cec5SDimitry Andric /// \return The promoted value which is used instead of Ext. 41220b57cec5SDimitry Andric static Value *promoteOperandForOther(Instruction *Ext, 41230b57cec5SDimitry Andric TypePromotionTransaction &TPT, 41240b57cec5SDimitry Andric InstrToOrigTy &PromotedInsts, 41250b57cec5SDimitry Andric unsigned &CreatedInstsCost, 41260b57cec5SDimitry Andric SmallVectorImpl<Instruction *> *Exts, 41270b57cec5SDimitry Andric SmallVectorImpl<Instruction *> *Truncs, 41280b57cec5SDimitry Andric const TargetLowering &TLI, bool IsSExt); 41290b57cec5SDimitry Andric 41300b57cec5SDimitry Andric /// \see promoteOperandForOther. 41310b57cec5SDimitry Andric static Value *signExtendOperandForOther( 41320b57cec5SDimitry Andric Instruction *Ext, TypePromotionTransaction &TPT, 41330b57cec5SDimitry Andric InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost, 41340b57cec5SDimitry Andric SmallVectorImpl<Instruction *> *Exts, 41350b57cec5SDimitry Andric SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) { 41360b57cec5SDimitry Andric return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost, 41370b57cec5SDimitry Andric Exts, Truncs, TLI, true); 41380b57cec5SDimitry Andric } 41390b57cec5SDimitry Andric 41400b57cec5SDimitry Andric /// \see promoteOperandForOther. 41410b57cec5SDimitry Andric static Value *zeroExtendOperandForOther( 41420b57cec5SDimitry Andric Instruction *Ext, TypePromotionTransaction &TPT, 41430b57cec5SDimitry Andric InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost, 41440b57cec5SDimitry Andric SmallVectorImpl<Instruction *> *Exts, 41450b57cec5SDimitry Andric SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) { 41460b57cec5SDimitry Andric return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost, 41470b57cec5SDimitry Andric Exts, Truncs, TLI, false); 41480b57cec5SDimitry Andric } 41490b57cec5SDimitry Andric 41500b57cec5SDimitry Andric public: 41510b57cec5SDimitry Andric /// Type for the utility function that promotes the operand of Ext. 41520b57cec5SDimitry Andric using Action = Value *(*)(Instruction *Ext, TypePromotionTransaction &TPT, 41530b57cec5SDimitry Andric InstrToOrigTy &PromotedInsts, 41540b57cec5SDimitry Andric unsigned &CreatedInstsCost, 41550b57cec5SDimitry Andric SmallVectorImpl<Instruction *> *Exts, 41560b57cec5SDimitry Andric SmallVectorImpl<Instruction *> *Truncs, 41570b57cec5SDimitry Andric const TargetLowering &TLI); 41580b57cec5SDimitry Andric 41590b57cec5SDimitry Andric /// Given a sign/zero extend instruction \p Ext, return the appropriate 41600b57cec5SDimitry Andric /// action to promote the operand of \p Ext instead of using Ext. 41610b57cec5SDimitry Andric /// \return NULL if no promotable action is possible with the current 41620b57cec5SDimitry Andric /// sign extension. 41630b57cec5SDimitry Andric /// \p InsertedInsts keeps track of all the instructions inserted by the 41640b57cec5SDimitry Andric /// other CodeGenPrepare optimizations. This information is important 41650b57cec5SDimitry Andric /// because we do not want to promote these instructions as CodeGenPrepare 41660b57cec5SDimitry Andric /// will reinsert them later. Thus creating an infinite loop: create/remove. 41670b57cec5SDimitry Andric /// \p PromotedInsts maps the instructions to their type before promotion. 41680b57cec5SDimitry Andric static Action getAction(Instruction *Ext, const SetOfInstrs &InsertedInsts, 41690b57cec5SDimitry Andric const TargetLowering &TLI, 41700b57cec5SDimitry Andric const InstrToOrigTy &PromotedInsts); 41710b57cec5SDimitry Andric }; 41720b57cec5SDimitry Andric 41730b57cec5SDimitry Andric } // end anonymous namespace 41740b57cec5SDimitry Andric 41750b57cec5SDimitry Andric bool TypePromotionHelper::canGetThrough(const Instruction *Inst, 41760b57cec5SDimitry Andric Type *ConsideredExtType, 41770b57cec5SDimitry Andric const InstrToOrigTy &PromotedInsts, 41780b57cec5SDimitry Andric bool IsSExt) { 41790b57cec5SDimitry Andric // The promotion helper does not know how to deal with vector types yet. 41800b57cec5SDimitry Andric // To be able to fix that, we would need to fix the places where we 41810b57cec5SDimitry Andric // statically extend, e.g., constants and such. 41820b57cec5SDimitry Andric if (Inst->getType()->isVectorTy()) 41830b57cec5SDimitry Andric return false; 41840b57cec5SDimitry Andric 41850b57cec5SDimitry Andric // We can always get through zext. 41860b57cec5SDimitry Andric if (isa<ZExtInst>(Inst)) 41870b57cec5SDimitry Andric return true; 41880b57cec5SDimitry Andric 41890b57cec5SDimitry Andric // sext(sext) is ok too. 41900b57cec5SDimitry Andric if (IsSExt && isa<SExtInst>(Inst)) 41910b57cec5SDimitry Andric return true; 41920b57cec5SDimitry Andric 41930b57cec5SDimitry Andric // We can get through binary operator, if it is legal. In other words, the 41940b57cec5SDimitry Andric // binary operator must have a nuw or nsw flag. 419504eeddc0SDimitry Andric if (const auto *BinOp = dyn_cast<BinaryOperator>(Inst)) 419604eeddc0SDimitry Andric if (isa<OverflowingBinaryOperator>(BinOp) && 41970b57cec5SDimitry Andric ((!IsSExt && BinOp->hasNoUnsignedWrap()) || 41980b57cec5SDimitry Andric (IsSExt && BinOp->hasNoSignedWrap()))) 41990b57cec5SDimitry Andric return true; 42000b57cec5SDimitry Andric 42010b57cec5SDimitry Andric // ext(and(opnd, cst)) --> and(ext(opnd), ext(cst)) 42020b57cec5SDimitry Andric if ((Inst->getOpcode() == Instruction::And || 42030b57cec5SDimitry Andric Inst->getOpcode() == Instruction::Or)) 42040b57cec5SDimitry Andric return true; 42050b57cec5SDimitry Andric 42060b57cec5SDimitry Andric // ext(xor(opnd, cst)) --> xor(ext(opnd), ext(cst)) 42070b57cec5SDimitry Andric if (Inst->getOpcode() == Instruction::Xor) { 42080b57cec5SDimitry Andric // Make sure it is not a NOT. 420904eeddc0SDimitry Andric if (const auto *Cst = dyn_cast<ConstantInt>(Inst->getOperand(1))) 421004eeddc0SDimitry Andric if (!Cst->getValue().isAllOnes()) 42110b57cec5SDimitry Andric return true; 42120b57cec5SDimitry Andric } 42130b57cec5SDimitry Andric 42140b57cec5SDimitry Andric // zext(shrl(opnd, cst)) --> shrl(zext(opnd), zext(cst)) 42150b57cec5SDimitry Andric // It may change a poisoned value into a regular value, like 42160b57cec5SDimitry Andric // zext i32 (shrl i8 %val, 12) --> shrl i32 (zext i8 %val), 12 42170b57cec5SDimitry Andric // poisoned value regular value 42180b57cec5SDimitry Andric // It should be OK since undef covers valid value. 42190b57cec5SDimitry Andric if (Inst->getOpcode() == Instruction::LShr && !IsSExt) 42200b57cec5SDimitry Andric return true; 42210b57cec5SDimitry Andric 42220b57cec5SDimitry Andric // and(ext(shl(opnd, cst)), cst) --> and(shl(ext(opnd), ext(cst)), cst) 42230b57cec5SDimitry Andric // It may change a poisoned value into a regular value, like 42240b57cec5SDimitry Andric // zext i32 (shl i8 %val, 12) --> shl i32 (zext i8 %val), 12 42250b57cec5SDimitry Andric // poisoned value regular value 42260b57cec5SDimitry Andric // It should be OK since undef covers valid value. 42270b57cec5SDimitry Andric if (Inst->getOpcode() == Instruction::Shl && Inst->hasOneUse()) { 42288bcb0991SDimitry Andric const auto *ExtInst = cast<const Instruction>(*Inst->user_begin()); 42290b57cec5SDimitry Andric if (ExtInst->hasOneUse()) { 42308bcb0991SDimitry Andric const auto *AndInst = dyn_cast<const Instruction>(*ExtInst->user_begin()); 42310b57cec5SDimitry Andric if (AndInst && AndInst->getOpcode() == Instruction::And) { 42328bcb0991SDimitry Andric const auto *Cst = dyn_cast<ConstantInt>(AndInst->getOperand(1)); 42330b57cec5SDimitry Andric if (Cst && 42340b57cec5SDimitry Andric Cst->getValue().isIntN(Inst->getType()->getIntegerBitWidth())) 42350b57cec5SDimitry Andric return true; 42360b57cec5SDimitry Andric } 42370b57cec5SDimitry Andric } 42380b57cec5SDimitry Andric } 42390b57cec5SDimitry Andric 42400b57cec5SDimitry Andric // Check if we can do the following simplification. 42410b57cec5SDimitry Andric // ext(trunc(opnd)) --> ext(opnd) 42420b57cec5SDimitry Andric if (!isa<TruncInst>(Inst)) 42430b57cec5SDimitry Andric return false; 42440b57cec5SDimitry Andric 42450b57cec5SDimitry Andric Value *OpndVal = Inst->getOperand(0); 42460b57cec5SDimitry Andric // Check if we can use this operand in the extension. 42470b57cec5SDimitry Andric // If the type is larger than the result type of the extension, we cannot. 42480b57cec5SDimitry Andric if (!OpndVal->getType()->isIntegerTy() || 42490b57cec5SDimitry Andric OpndVal->getType()->getIntegerBitWidth() > 42500b57cec5SDimitry Andric ConsideredExtType->getIntegerBitWidth()) 42510b57cec5SDimitry Andric return false; 42520b57cec5SDimitry Andric 42530b57cec5SDimitry Andric // If the operand of the truncate is not an instruction, we will not have 42540b57cec5SDimitry Andric // any information on the dropped bits. 42550b57cec5SDimitry Andric // (Actually we could for constant but it is not worth the extra logic). 42560b57cec5SDimitry Andric Instruction *Opnd = dyn_cast<Instruction>(OpndVal); 42570b57cec5SDimitry Andric if (!Opnd) 42580b57cec5SDimitry Andric return false; 42590b57cec5SDimitry Andric 42600b57cec5SDimitry Andric // Check if the source of the type is narrow enough. 42610b57cec5SDimitry Andric // I.e., check that trunc just drops extended bits of the same kind of 42620b57cec5SDimitry Andric // the extension. 42630b57cec5SDimitry Andric // #1 get the type of the operand and check the kind of the extended bits. 42640b57cec5SDimitry Andric const Type *OpndType = getOrigType(PromotedInsts, Opnd, IsSExt); 42650b57cec5SDimitry Andric if (OpndType) 42660b57cec5SDimitry Andric ; 42670b57cec5SDimitry Andric else if ((IsSExt && isa<SExtInst>(Opnd)) || (!IsSExt && isa<ZExtInst>(Opnd))) 42680b57cec5SDimitry Andric OpndType = Opnd->getOperand(0)->getType(); 42690b57cec5SDimitry Andric else 42700b57cec5SDimitry Andric return false; 42710b57cec5SDimitry Andric 42720b57cec5SDimitry Andric // #2 check that the truncate just drops extended bits. 42730b57cec5SDimitry Andric return Inst->getType()->getIntegerBitWidth() >= 42740b57cec5SDimitry Andric OpndType->getIntegerBitWidth(); 42750b57cec5SDimitry Andric } 42760b57cec5SDimitry Andric 42770b57cec5SDimitry Andric TypePromotionHelper::Action TypePromotionHelper::getAction( 42780b57cec5SDimitry Andric Instruction *Ext, const SetOfInstrs &InsertedInsts, 42790b57cec5SDimitry Andric const TargetLowering &TLI, const InstrToOrigTy &PromotedInsts) { 42800b57cec5SDimitry Andric assert((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) && 42810b57cec5SDimitry Andric "Unexpected instruction type"); 42820b57cec5SDimitry Andric Instruction *ExtOpnd = dyn_cast<Instruction>(Ext->getOperand(0)); 42830b57cec5SDimitry Andric Type *ExtTy = Ext->getType(); 42840b57cec5SDimitry Andric bool IsSExt = isa<SExtInst>(Ext); 42850b57cec5SDimitry Andric // If the operand of the extension is not an instruction, we cannot 42860b57cec5SDimitry Andric // get through. 42870b57cec5SDimitry Andric // If it, check we can get through. 42880b57cec5SDimitry Andric if (!ExtOpnd || !canGetThrough(ExtOpnd, ExtTy, PromotedInsts, IsSExt)) 42890b57cec5SDimitry Andric return nullptr; 42900b57cec5SDimitry Andric 42910b57cec5SDimitry Andric // Do not promote if the operand has been added by codegenprepare. 42920b57cec5SDimitry Andric // Otherwise, it means we are undoing an optimization that is likely to be 42930b57cec5SDimitry Andric // redone, thus causing potential infinite loop. 42940b57cec5SDimitry Andric if (isa<TruncInst>(ExtOpnd) && InsertedInsts.count(ExtOpnd)) 42950b57cec5SDimitry Andric return nullptr; 42960b57cec5SDimitry Andric 42970b57cec5SDimitry Andric // SExt or Trunc instructions. 42980b57cec5SDimitry Andric // Return the related handler. 42990b57cec5SDimitry Andric if (isa<SExtInst>(ExtOpnd) || isa<TruncInst>(ExtOpnd) || 43000b57cec5SDimitry Andric isa<ZExtInst>(ExtOpnd)) 43010b57cec5SDimitry Andric return promoteOperandForTruncAndAnyExt; 43020b57cec5SDimitry Andric 43030b57cec5SDimitry Andric // Regular instruction. 43040b57cec5SDimitry Andric // Abort early if we will have to insert non-free instructions. 43050b57cec5SDimitry Andric if (!ExtOpnd->hasOneUse() && !TLI.isTruncateFree(ExtTy, ExtOpnd->getType())) 43060b57cec5SDimitry Andric return nullptr; 43070b57cec5SDimitry Andric return IsSExt ? signExtendOperandForOther : zeroExtendOperandForOther; 43080b57cec5SDimitry Andric } 43090b57cec5SDimitry Andric 43100b57cec5SDimitry Andric Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt( 43110b57cec5SDimitry Andric Instruction *SExt, TypePromotionTransaction &TPT, 43120b57cec5SDimitry Andric InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost, 43130b57cec5SDimitry Andric SmallVectorImpl<Instruction *> *Exts, 43140b57cec5SDimitry Andric SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) { 43150b57cec5SDimitry Andric // By construction, the operand of SExt is an instruction. Otherwise we cannot 43160b57cec5SDimitry Andric // get through it and this method should not be called. 43170b57cec5SDimitry Andric Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0)); 43180b57cec5SDimitry Andric Value *ExtVal = SExt; 43190b57cec5SDimitry Andric bool HasMergedNonFreeExt = false; 43200b57cec5SDimitry Andric if (isa<ZExtInst>(SExtOpnd)) { 43210b57cec5SDimitry Andric // Replace s|zext(zext(opnd)) 43220b57cec5SDimitry Andric // => zext(opnd). 43230b57cec5SDimitry Andric HasMergedNonFreeExt = !TLI.isExtFree(SExtOpnd); 43240b57cec5SDimitry Andric Value *ZExt = 43250b57cec5SDimitry Andric TPT.createZExt(SExt, SExtOpnd->getOperand(0), SExt->getType()); 43260b57cec5SDimitry Andric TPT.replaceAllUsesWith(SExt, ZExt); 43270b57cec5SDimitry Andric TPT.eraseInstruction(SExt); 43280b57cec5SDimitry Andric ExtVal = ZExt; 43290b57cec5SDimitry Andric } else { 43300b57cec5SDimitry Andric // Replace z|sext(trunc(opnd)) or sext(sext(opnd)) 43310b57cec5SDimitry Andric // => z|sext(opnd). 43320b57cec5SDimitry Andric TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0)); 43330b57cec5SDimitry Andric } 43340b57cec5SDimitry Andric CreatedInstsCost = 0; 43350b57cec5SDimitry Andric 43360b57cec5SDimitry Andric // Remove dead code. 43370b57cec5SDimitry Andric if (SExtOpnd->use_empty()) 43380b57cec5SDimitry Andric TPT.eraseInstruction(SExtOpnd); 43390b57cec5SDimitry Andric 43400b57cec5SDimitry Andric // Check if the extension is still needed. 43410b57cec5SDimitry Andric Instruction *ExtInst = dyn_cast<Instruction>(ExtVal); 43420b57cec5SDimitry Andric if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType()) { 43430b57cec5SDimitry Andric if (ExtInst) { 43440b57cec5SDimitry Andric if (Exts) 43450b57cec5SDimitry Andric Exts->push_back(ExtInst); 43460b57cec5SDimitry Andric CreatedInstsCost = !TLI.isExtFree(ExtInst) && !HasMergedNonFreeExt; 43470b57cec5SDimitry Andric } 43480b57cec5SDimitry Andric return ExtVal; 43490b57cec5SDimitry Andric } 43500b57cec5SDimitry Andric 43510b57cec5SDimitry Andric // At this point we have: ext ty opnd to ty. 43520b57cec5SDimitry Andric // Reassign the uses of ExtInst to the opnd and remove ExtInst. 43530b57cec5SDimitry Andric Value *NextVal = ExtInst->getOperand(0); 43540b57cec5SDimitry Andric TPT.eraseInstruction(ExtInst, NextVal); 43550b57cec5SDimitry Andric return NextVal; 43560b57cec5SDimitry Andric } 43570b57cec5SDimitry Andric 43580b57cec5SDimitry Andric Value *TypePromotionHelper::promoteOperandForOther( 43590b57cec5SDimitry Andric Instruction *Ext, TypePromotionTransaction &TPT, 43600b57cec5SDimitry Andric InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost, 43610b57cec5SDimitry Andric SmallVectorImpl<Instruction *> *Exts, 43620b57cec5SDimitry Andric SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI, 43630b57cec5SDimitry Andric bool IsSExt) { 43640b57cec5SDimitry Andric // By construction, the operand of Ext is an instruction. Otherwise we cannot 43650b57cec5SDimitry Andric // get through it and this method should not be called. 43660b57cec5SDimitry Andric Instruction *ExtOpnd = cast<Instruction>(Ext->getOperand(0)); 43670b57cec5SDimitry Andric CreatedInstsCost = 0; 43680b57cec5SDimitry Andric if (!ExtOpnd->hasOneUse()) { 43690b57cec5SDimitry Andric // ExtOpnd will be promoted. 43700b57cec5SDimitry Andric // All its uses, but Ext, will need to use a truncated value of the 43710b57cec5SDimitry Andric // promoted version. 43720b57cec5SDimitry Andric // Create the truncate now. 43730b57cec5SDimitry Andric Value *Trunc = TPT.createTrunc(Ext, ExtOpnd->getType()); 43740b57cec5SDimitry Andric if (Instruction *ITrunc = dyn_cast<Instruction>(Trunc)) { 43750b57cec5SDimitry Andric // Insert it just after the definition. 43760b57cec5SDimitry Andric ITrunc->moveAfter(ExtOpnd); 43770b57cec5SDimitry Andric if (Truncs) 43780b57cec5SDimitry Andric Truncs->push_back(ITrunc); 43790b57cec5SDimitry Andric } 43800b57cec5SDimitry Andric 43810b57cec5SDimitry Andric TPT.replaceAllUsesWith(ExtOpnd, Trunc); 43820b57cec5SDimitry Andric // Restore the operand of Ext (which has been replaced by the previous call 43830b57cec5SDimitry Andric // to replaceAllUsesWith) to avoid creating a cycle trunc <-> sext. 43840b57cec5SDimitry Andric TPT.setOperand(Ext, 0, ExtOpnd); 43850b57cec5SDimitry Andric } 43860b57cec5SDimitry Andric 43870b57cec5SDimitry Andric // Get through the Instruction: 43880b57cec5SDimitry Andric // 1. Update its type. 43890b57cec5SDimitry Andric // 2. Replace the uses of Ext by Inst. 43900b57cec5SDimitry Andric // 3. Extend each operand that needs to be extended. 43910b57cec5SDimitry Andric 43920b57cec5SDimitry Andric // Remember the original type of the instruction before promotion. 43930b57cec5SDimitry Andric // This is useful to know that the high bits are sign extended bits. 43940b57cec5SDimitry Andric addPromotedInst(PromotedInsts, ExtOpnd, IsSExt); 43950b57cec5SDimitry Andric // Step #1. 43960b57cec5SDimitry Andric TPT.mutateType(ExtOpnd, Ext->getType()); 43970b57cec5SDimitry Andric // Step #2. 43980b57cec5SDimitry Andric TPT.replaceAllUsesWith(Ext, ExtOpnd); 43990b57cec5SDimitry Andric // Step #3. 44000b57cec5SDimitry Andric Instruction *ExtForOpnd = Ext; 44010b57cec5SDimitry Andric 44020b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Propagate Ext to operands\n"); 44030b57cec5SDimitry Andric for (int OpIdx = 0, EndOpIdx = ExtOpnd->getNumOperands(); OpIdx != EndOpIdx; 44040b57cec5SDimitry Andric ++OpIdx) { 44050b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Operand:\n" << *(ExtOpnd->getOperand(OpIdx)) << '\n'); 44060b57cec5SDimitry Andric if (ExtOpnd->getOperand(OpIdx)->getType() == Ext->getType() || 44070b57cec5SDimitry Andric !shouldExtOperand(ExtOpnd, OpIdx)) { 44080b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "No need to propagate\n"); 44090b57cec5SDimitry Andric continue; 44100b57cec5SDimitry Andric } 44110b57cec5SDimitry Andric // Check if we can statically extend the operand. 44120b57cec5SDimitry Andric Value *Opnd = ExtOpnd->getOperand(OpIdx); 44130b57cec5SDimitry Andric if (const ConstantInt *Cst = dyn_cast<ConstantInt>(Opnd)) { 44140b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Statically extend\n"); 44150b57cec5SDimitry Andric unsigned BitWidth = Ext->getType()->getIntegerBitWidth(); 44160b57cec5SDimitry Andric APInt CstVal = IsSExt ? Cst->getValue().sext(BitWidth) 44170b57cec5SDimitry Andric : Cst->getValue().zext(BitWidth); 44180b57cec5SDimitry Andric TPT.setOperand(ExtOpnd, OpIdx, ConstantInt::get(Ext->getType(), CstVal)); 44190b57cec5SDimitry Andric continue; 44200b57cec5SDimitry Andric } 44210b57cec5SDimitry Andric // UndefValue are typed, so we have to statically sign extend them. 44220b57cec5SDimitry Andric if (isa<UndefValue>(Opnd)) { 44230b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Statically extend\n"); 44240b57cec5SDimitry Andric TPT.setOperand(ExtOpnd, OpIdx, UndefValue::get(Ext->getType())); 44250b57cec5SDimitry Andric continue; 44260b57cec5SDimitry Andric } 44270b57cec5SDimitry Andric 44280b57cec5SDimitry Andric // Otherwise we have to explicitly sign extend the operand. 44290b57cec5SDimitry Andric // Check if Ext was reused to extend an operand. 44300b57cec5SDimitry Andric if (!ExtForOpnd) { 44310b57cec5SDimitry Andric // If yes, create a new one. 44320b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "More operands to ext\n"); 44330b57cec5SDimitry Andric Value *ValForExtOpnd = IsSExt ? TPT.createSExt(Ext, Opnd, Ext->getType()) 44340b57cec5SDimitry Andric : TPT.createZExt(Ext, Opnd, Ext->getType()); 44350b57cec5SDimitry Andric if (!isa<Instruction>(ValForExtOpnd)) { 44360b57cec5SDimitry Andric TPT.setOperand(ExtOpnd, OpIdx, ValForExtOpnd); 44370b57cec5SDimitry Andric continue; 44380b57cec5SDimitry Andric } 44390b57cec5SDimitry Andric ExtForOpnd = cast<Instruction>(ValForExtOpnd); 44400b57cec5SDimitry Andric } 44410b57cec5SDimitry Andric if (Exts) 44420b57cec5SDimitry Andric Exts->push_back(ExtForOpnd); 44430b57cec5SDimitry Andric TPT.setOperand(ExtForOpnd, 0, Opnd); 44440b57cec5SDimitry Andric 44450b57cec5SDimitry Andric // Move the sign extension before the insertion point. 44460b57cec5SDimitry Andric TPT.moveBefore(ExtForOpnd, ExtOpnd); 44470b57cec5SDimitry Andric TPT.setOperand(ExtOpnd, OpIdx, ExtForOpnd); 44480b57cec5SDimitry Andric CreatedInstsCost += !TLI.isExtFree(ExtForOpnd); 44490b57cec5SDimitry Andric // If more sext are required, new instructions will have to be created. 44500b57cec5SDimitry Andric ExtForOpnd = nullptr; 44510b57cec5SDimitry Andric } 44520b57cec5SDimitry Andric if (ExtForOpnd == Ext) { 44530b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Extension is useless now\n"); 44540b57cec5SDimitry Andric TPT.eraseInstruction(Ext); 44550b57cec5SDimitry Andric } 44560b57cec5SDimitry Andric return ExtOpnd; 44570b57cec5SDimitry Andric } 44580b57cec5SDimitry Andric 44590b57cec5SDimitry Andric /// Check whether or not promoting an instruction to a wider type is profitable. 44600b57cec5SDimitry Andric /// \p NewCost gives the cost of extension instructions created by the 44610b57cec5SDimitry Andric /// promotion. 44620b57cec5SDimitry Andric /// \p OldCost gives the cost of extension instructions before the promotion 44630b57cec5SDimitry Andric /// plus the number of instructions that have been 44640b57cec5SDimitry Andric /// matched in the addressing mode the promotion. 44650b57cec5SDimitry Andric /// \p PromotedOperand is the value that has been promoted. 44660b57cec5SDimitry Andric /// \return True if the promotion is profitable, false otherwise. 44670b57cec5SDimitry Andric bool AddressingModeMatcher::isPromotionProfitable( 44680b57cec5SDimitry Andric unsigned NewCost, unsigned OldCost, Value *PromotedOperand) const { 44690b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "OldCost: " << OldCost << "\tNewCost: " << NewCost 44700b57cec5SDimitry Andric << '\n'); 44710b57cec5SDimitry Andric // The cost of the new extensions is greater than the cost of the 44720b57cec5SDimitry Andric // old extension plus what we folded. 44730b57cec5SDimitry Andric // This is not profitable. 44740b57cec5SDimitry Andric if (NewCost > OldCost) 44750b57cec5SDimitry Andric return false; 44760b57cec5SDimitry Andric if (NewCost < OldCost) 44770b57cec5SDimitry Andric return true; 44780b57cec5SDimitry Andric // The promotion is neutral but it may help folding the sign extension in 44790b57cec5SDimitry Andric // loads for instance. 44800b57cec5SDimitry Andric // Check that we did not create an illegal instruction. 44810b57cec5SDimitry Andric return isPromotedInstructionLegal(TLI, DL, PromotedOperand); 44820b57cec5SDimitry Andric } 44830b57cec5SDimitry Andric 44840b57cec5SDimitry Andric /// Given an instruction or constant expr, see if we can fold the operation 44850b57cec5SDimitry Andric /// into the addressing mode. If so, update the addressing mode and return 44860b57cec5SDimitry Andric /// true, otherwise return false without modifying AddrMode. 44870b57cec5SDimitry Andric /// If \p MovedAway is not NULL, it contains the information of whether or 44880b57cec5SDimitry Andric /// not AddrInst has to be folded into the addressing mode on success. 44890b57cec5SDimitry Andric /// If \p MovedAway == true, \p AddrInst will not be part of the addressing 44900b57cec5SDimitry Andric /// because it has been moved away. 44910b57cec5SDimitry Andric /// Thus AddrInst must not be added in the matched instructions. 44920b57cec5SDimitry Andric /// This state can happen when AddrInst is a sext, since it may be moved away. 44930b57cec5SDimitry Andric /// Therefore, AddrInst may not be valid when MovedAway is true and it must 44940b57cec5SDimitry Andric /// not be referenced anymore. 44950b57cec5SDimitry Andric bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode, 44960b57cec5SDimitry Andric unsigned Depth, 44970b57cec5SDimitry Andric bool *MovedAway) { 44980b57cec5SDimitry Andric // Avoid exponential behavior on extremely deep expression trees. 44990b57cec5SDimitry Andric if (Depth >= 5) return false; 45000b57cec5SDimitry Andric 45010b57cec5SDimitry Andric // By default, all matched instructions stay in place. 45020b57cec5SDimitry Andric if (MovedAway) 45030b57cec5SDimitry Andric *MovedAway = false; 45040b57cec5SDimitry Andric 45050b57cec5SDimitry Andric switch (Opcode) { 45060b57cec5SDimitry Andric case Instruction::PtrToInt: 45070b57cec5SDimitry Andric // PtrToInt is always a noop, as we know that the int type is pointer sized. 45080b57cec5SDimitry Andric return matchAddr(AddrInst->getOperand(0), Depth); 45090b57cec5SDimitry Andric case Instruction::IntToPtr: { 45100b57cec5SDimitry Andric auto AS = AddrInst->getType()->getPointerAddressSpace(); 45110b57cec5SDimitry Andric auto PtrTy = MVT::getIntegerVT(DL.getPointerSizeInBits(AS)); 45120b57cec5SDimitry Andric // This inttoptr is a no-op if the integer type is pointer sized. 45130b57cec5SDimitry Andric if (TLI.getValueType(DL, AddrInst->getOperand(0)->getType()) == PtrTy) 45140b57cec5SDimitry Andric return matchAddr(AddrInst->getOperand(0), Depth); 45150b57cec5SDimitry Andric return false; 45160b57cec5SDimitry Andric } 45170b57cec5SDimitry Andric case Instruction::BitCast: 45180b57cec5SDimitry Andric // BitCast is always a noop, and we can handle it as long as it is 45190b57cec5SDimitry Andric // int->int or pointer->pointer (we don't want int<->fp or something). 45200b57cec5SDimitry Andric if (AddrInst->getOperand(0)->getType()->isIntOrPtrTy() && 45210b57cec5SDimitry Andric // Don't touch identity bitcasts. These were probably put here by LSR, 45220b57cec5SDimitry Andric // and we don't want to mess around with them. Assume it knows what it 45230b57cec5SDimitry Andric // is doing. 45240b57cec5SDimitry Andric AddrInst->getOperand(0)->getType() != AddrInst->getType()) 45250b57cec5SDimitry Andric return matchAddr(AddrInst->getOperand(0), Depth); 45260b57cec5SDimitry Andric return false; 45270b57cec5SDimitry Andric case Instruction::AddrSpaceCast: { 45280b57cec5SDimitry Andric unsigned SrcAS 45290b57cec5SDimitry Andric = AddrInst->getOperand(0)->getType()->getPointerAddressSpace(); 45300b57cec5SDimitry Andric unsigned DestAS = AddrInst->getType()->getPointerAddressSpace(); 4531e8d8bef9SDimitry Andric if (TLI.getTargetMachine().isNoopAddrSpaceCast(SrcAS, DestAS)) 45320b57cec5SDimitry Andric return matchAddr(AddrInst->getOperand(0), Depth); 45330b57cec5SDimitry Andric return false; 45340b57cec5SDimitry Andric } 45350b57cec5SDimitry Andric case Instruction::Add: { 45360b57cec5SDimitry Andric // Check to see if we can merge in the RHS then the LHS. If so, we win. 45370b57cec5SDimitry Andric ExtAddrMode BackupAddrMode = AddrMode; 45380b57cec5SDimitry Andric unsigned OldSize = AddrModeInsts.size(); 45390b57cec5SDimitry Andric // Start a transaction at this point. 45400b57cec5SDimitry Andric // The LHS may match but not the RHS. 45410b57cec5SDimitry Andric // Therefore, we need a higher level restoration point to undo partially 45420b57cec5SDimitry Andric // matched operation. 45430b57cec5SDimitry Andric TypePromotionTransaction::ConstRestorationPt LastKnownGood = 45440b57cec5SDimitry Andric TPT.getRestorationPoint(); 45450b57cec5SDimitry Andric 45460b57cec5SDimitry Andric AddrMode.InBounds = false; 45470b57cec5SDimitry Andric if (matchAddr(AddrInst->getOperand(1), Depth+1) && 45480b57cec5SDimitry Andric matchAddr(AddrInst->getOperand(0), Depth+1)) 45490b57cec5SDimitry Andric return true; 45500b57cec5SDimitry Andric 45510b57cec5SDimitry Andric // Restore the old addr mode info. 45520b57cec5SDimitry Andric AddrMode = BackupAddrMode; 45530b57cec5SDimitry Andric AddrModeInsts.resize(OldSize); 45540b57cec5SDimitry Andric TPT.rollback(LastKnownGood); 45550b57cec5SDimitry Andric 45560b57cec5SDimitry Andric // Otherwise this was over-aggressive. Try merging in the LHS then the RHS. 45570b57cec5SDimitry Andric if (matchAddr(AddrInst->getOperand(0), Depth+1) && 45580b57cec5SDimitry Andric matchAddr(AddrInst->getOperand(1), Depth+1)) 45590b57cec5SDimitry Andric return true; 45600b57cec5SDimitry Andric 45610b57cec5SDimitry Andric // Otherwise we definitely can't merge the ADD in. 45620b57cec5SDimitry Andric AddrMode = BackupAddrMode; 45630b57cec5SDimitry Andric AddrModeInsts.resize(OldSize); 45640b57cec5SDimitry Andric TPT.rollback(LastKnownGood); 45650b57cec5SDimitry Andric break; 45660b57cec5SDimitry Andric } 45670b57cec5SDimitry Andric //case Instruction::Or: 45680b57cec5SDimitry Andric // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD. 45690b57cec5SDimitry Andric //break; 45700b57cec5SDimitry Andric case Instruction::Mul: 45710b57cec5SDimitry Andric case Instruction::Shl: { 45720b57cec5SDimitry Andric // Can only handle X*C and X << C. 45730b57cec5SDimitry Andric AddrMode.InBounds = false; 45740b57cec5SDimitry Andric ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1)); 45750b57cec5SDimitry Andric if (!RHS || RHS->getBitWidth() > 64) 45760b57cec5SDimitry Andric return false; 457781ad6265SDimitry Andric int64_t Scale = Opcode == Instruction::Shl 457881ad6265SDimitry Andric ? 1LL << RHS->getLimitedValue(RHS->getBitWidth() - 1) 457981ad6265SDimitry Andric : RHS->getSExtValue(); 45800b57cec5SDimitry Andric 45810b57cec5SDimitry Andric return matchScaledValue(AddrInst->getOperand(0), Scale, Depth); 45820b57cec5SDimitry Andric } 45830b57cec5SDimitry Andric case Instruction::GetElementPtr: { 45840b57cec5SDimitry Andric // Scan the GEP. We check it if it contains constant offsets and at most 45850b57cec5SDimitry Andric // one variable offset. 45860b57cec5SDimitry Andric int VariableOperand = -1; 45870b57cec5SDimitry Andric unsigned VariableScale = 0; 45880b57cec5SDimitry Andric 45890b57cec5SDimitry Andric int64_t ConstantOffset = 0; 45900b57cec5SDimitry Andric gep_type_iterator GTI = gep_type_begin(AddrInst); 45910b57cec5SDimitry Andric for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) { 45920b57cec5SDimitry Andric if (StructType *STy = GTI.getStructTypeOrNull()) { 45930b57cec5SDimitry Andric const StructLayout *SL = DL.getStructLayout(STy); 45940b57cec5SDimitry Andric unsigned Idx = 45950b57cec5SDimitry Andric cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue(); 45960b57cec5SDimitry Andric ConstantOffset += SL->getElementOffset(Idx); 45970b57cec5SDimitry Andric } else { 45985ffd83dbSDimitry Andric TypeSize TS = DL.getTypeAllocSize(GTI.getIndexedType()); 45995ffd83dbSDimitry Andric if (TS.isNonZero()) { 46005ffd83dbSDimitry Andric // The optimisations below currently only work for fixed offsets. 46015ffd83dbSDimitry Andric if (TS.isScalable()) 46025ffd83dbSDimitry Andric return false; 46035ffd83dbSDimitry Andric int64_t TypeSize = TS.getFixedSize(); 46045ffd83dbSDimitry Andric if (ConstantInt *CI = 46055ffd83dbSDimitry Andric dyn_cast<ConstantInt>(AddrInst->getOperand(i))) { 46060b57cec5SDimitry Andric const APInt &CVal = CI->getValue(); 46070b57cec5SDimitry Andric if (CVal.getMinSignedBits() <= 64) { 46080b57cec5SDimitry Andric ConstantOffset += CVal.getSExtValue() * TypeSize; 46090b57cec5SDimitry Andric continue; 46100b57cec5SDimitry Andric } 46110b57cec5SDimitry Andric } 46120b57cec5SDimitry Andric // We only allow one variable index at the moment. 46130b57cec5SDimitry Andric if (VariableOperand != -1) 46140b57cec5SDimitry Andric return false; 46150b57cec5SDimitry Andric 46160b57cec5SDimitry Andric // Remember the variable index. 46170b57cec5SDimitry Andric VariableOperand = i; 46180b57cec5SDimitry Andric VariableScale = TypeSize; 46190b57cec5SDimitry Andric } 46200b57cec5SDimitry Andric } 46210b57cec5SDimitry Andric } 46220b57cec5SDimitry Andric 46230b57cec5SDimitry Andric // A common case is for the GEP to only do a constant offset. In this case, 46240b57cec5SDimitry Andric // just add it to the disp field and check validity. 46250b57cec5SDimitry Andric if (VariableOperand == -1) { 46260b57cec5SDimitry Andric AddrMode.BaseOffs += ConstantOffset; 46270b57cec5SDimitry Andric if (ConstantOffset == 0 || 46280b57cec5SDimitry Andric TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) { 46290b57cec5SDimitry Andric // Check to see if we can fold the base pointer in too. 46300b57cec5SDimitry Andric if (matchAddr(AddrInst->getOperand(0), Depth+1)) { 46310b57cec5SDimitry Andric if (!cast<GEPOperator>(AddrInst)->isInBounds()) 46320b57cec5SDimitry Andric AddrMode.InBounds = false; 46330b57cec5SDimitry Andric return true; 46340b57cec5SDimitry Andric } 46350b57cec5SDimitry Andric } else if (EnableGEPOffsetSplit && isa<GetElementPtrInst>(AddrInst) && 46360b57cec5SDimitry Andric TLI.shouldConsiderGEPOffsetSplit() && Depth == 0 && 46370b57cec5SDimitry Andric ConstantOffset > 0) { 46380b57cec5SDimitry Andric // Record GEPs with non-zero offsets as candidates for splitting in the 46390b57cec5SDimitry Andric // event that the offset cannot fit into the r+i addressing mode. 46400b57cec5SDimitry Andric // Simple and common case that only one GEP is used in calculating the 46410b57cec5SDimitry Andric // address for the memory access. 46420b57cec5SDimitry Andric Value *Base = AddrInst->getOperand(0); 46430b57cec5SDimitry Andric auto *BaseI = dyn_cast<Instruction>(Base); 46440b57cec5SDimitry Andric auto *GEP = cast<GetElementPtrInst>(AddrInst); 46450b57cec5SDimitry Andric if (isa<Argument>(Base) || isa<GlobalValue>(Base) || 46460b57cec5SDimitry Andric (BaseI && !isa<CastInst>(BaseI) && 46470b57cec5SDimitry Andric !isa<GetElementPtrInst>(BaseI))) { 46480b57cec5SDimitry Andric // Make sure the parent block allows inserting non-PHI instructions 46490b57cec5SDimitry Andric // before the terminator. 46500b57cec5SDimitry Andric BasicBlock *Parent = 46510b57cec5SDimitry Andric BaseI ? BaseI->getParent() : &GEP->getFunction()->getEntryBlock(); 46520b57cec5SDimitry Andric if (!Parent->getTerminator()->isEHPad()) 46530b57cec5SDimitry Andric LargeOffsetGEP = std::make_pair(GEP, ConstantOffset); 46540b57cec5SDimitry Andric } 46550b57cec5SDimitry Andric } 46560b57cec5SDimitry Andric AddrMode.BaseOffs -= ConstantOffset; 46570b57cec5SDimitry Andric return false; 46580b57cec5SDimitry Andric } 46590b57cec5SDimitry Andric 46600b57cec5SDimitry Andric // Save the valid addressing mode in case we can't match. 46610b57cec5SDimitry Andric ExtAddrMode BackupAddrMode = AddrMode; 46620b57cec5SDimitry Andric unsigned OldSize = AddrModeInsts.size(); 46630b57cec5SDimitry Andric 46640b57cec5SDimitry Andric // See if the scale and offset amount is valid for this target. 46650b57cec5SDimitry Andric AddrMode.BaseOffs += ConstantOffset; 46660b57cec5SDimitry Andric if (!cast<GEPOperator>(AddrInst)->isInBounds()) 46670b57cec5SDimitry Andric AddrMode.InBounds = false; 46680b57cec5SDimitry Andric 46690b57cec5SDimitry Andric // Match the base operand of the GEP. 46700b57cec5SDimitry Andric if (!matchAddr(AddrInst->getOperand(0), Depth+1)) { 46710b57cec5SDimitry Andric // If it couldn't be matched, just stuff the value in a register. 46720b57cec5SDimitry Andric if (AddrMode.HasBaseReg) { 46730b57cec5SDimitry Andric AddrMode = BackupAddrMode; 46740b57cec5SDimitry Andric AddrModeInsts.resize(OldSize); 46750b57cec5SDimitry Andric return false; 46760b57cec5SDimitry Andric } 46770b57cec5SDimitry Andric AddrMode.HasBaseReg = true; 46780b57cec5SDimitry Andric AddrMode.BaseReg = AddrInst->getOperand(0); 46790b57cec5SDimitry Andric } 46800b57cec5SDimitry Andric 46810b57cec5SDimitry Andric // Match the remaining variable portion of the GEP. 46820b57cec5SDimitry Andric if (!matchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale, 46830b57cec5SDimitry Andric Depth)) { 46840b57cec5SDimitry Andric // If it couldn't be matched, try stuffing the base into a register 46850b57cec5SDimitry Andric // instead of matching it, and retrying the match of the scale. 46860b57cec5SDimitry Andric AddrMode = BackupAddrMode; 46870b57cec5SDimitry Andric AddrModeInsts.resize(OldSize); 46880b57cec5SDimitry Andric if (AddrMode.HasBaseReg) 46890b57cec5SDimitry Andric return false; 46900b57cec5SDimitry Andric AddrMode.HasBaseReg = true; 46910b57cec5SDimitry Andric AddrMode.BaseReg = AddrInst->getOperand(0); 46920b57cec5SDimitry Andric AddrMode.BaseOffs += ConstantOffset; 46930b57cec5SDimitry Andric if (!matchScaledValue(AddrInst->getOperand(VariableOperand), 46940b57cec5SDimitry Andric VariableScale, Depth)) { 46950b57cec5SDimitry Andric // If even that didn't work, bail. 46960b57cec5SDimitry Andric AddrMode = BackupAddrMode; 46970b57cec5SDimitry Andric AddrModeInsts.resize(OldSize); 46980b57cec5SDimitry Andric return false; 46990b57cec5SDimitry Andric } 47000b57cec5SDimitry Andric } 47010b57cec5SDimitry Andric 47020b57cec5SDimitry Andric return true; 47030b57cec5SDimitry Andric } 47040b57cec5SDimitry Andric case Instruction::SExt: 47050b57cec5SDimitry Andric case Instruction::ZExt: { 47060b57cec5SDimitry Andric Instruction *Ext = dyn_cast<Instruction>(AddrInst); 47070b57cec5SDimitry Andric if (!Ext) 47080b57cec5SDimitry Andric return false; 47090b57cec5SDimitry Andric 47100b57cec5SDimitry Andric // Try to move this ext out of the way of the addressing mode. 47110b57cec5SDimitry Andric // Ask for a method for doing so. 47120b57cec5SDimitry Andric TypePromotionHelper::Action TPH = 47130b57cec5SDimitry Andric TypePromotionHelper::getAction(Ext, InsertedInsts, TLI, PromotedInsts); 47140b57cec5SDimitry Andric if (!TPH) 47150b57cec5SDimitry Andric return false; 47160b57cec5SDimitry Andric 47170b57cec5SDimitry Andric TypePromotionTransaction::ConstRestorationPt LastKnownGood = 47180b57cec5SDimitry Andric TPT.getRestorationPoint(); 47190b57cec5SDimitry Andric unsigned CreatedInstsCost = 0; 47200b57cec5SDimitry Andric unsigned ExtCost = !TLI.isExtFree(Ext); 47210b57cec5SDimitry Andric Value *PromotedOperand = 47220b57cec5SDimitry Andric TPH(Ext, TPT, PromotedInsts, CreatedInstsCost, nullptr, nullptr, TLI); 47230b57cec5SDimitry Andric // SExt has been moved away. 47240b57cec5SDimitry Andric // Thus either it will be rematched later in the recursive calls or it is 47250b57cec5SDimitry Andric // gone. Anyway, we must not fold it into the addressing mode at this point. 47260b57cec5SDimitry Andric // E.g., 47270b57cec5SDimitry Andric // op = add opnd, 1 47280b57cec5SDimitry Andric // idx = ext op 47290b57cec5SDimitry Andric // addr = gep base, idx 47300b57cec5SDimitry Andric // is now: 47310b57cec5SDimitry Andric // promotedOpnd = ext opnd <- no match here 47320b57cec5SDimitry Andric // op = promoted_add promotedOpnd, 1 <- match (later in recursive calls) 47330b57cec5SDimitry Andric // addr = gep base, op <- match 47340b57cec5SDimitry Andric if (MovedAway) 47350b57cec5SDimitry Andric *MovedAway = true; 47360b57cec5SDimitry Andric 47370b57cec5SDimitry Andric assert(PromotedOperand && 47380b57cec5SDimitry Andric "TypePromotionHelper should have filtered out those cases"); 47390b57cec5SDimitry Andric 47400b57cec5SDimitry Andric ExtAddrMode BackupAddrMode = AddrMode; 47410b57cec5SDimitry Andric unsigned OldSize = AddrModeInsts.size(); 47420b57cec5SDimitry Andric 47430b57cec5SDimitry Andric if (!matchAddr(PromotedOperand, Depth) || 47440b57cec5SDimitry Andric // The total of the new cost is equal to the cost of the created 47450b57cec5SDimitry Andric // instructions. 47460b57cec5SDimitry Andric // The total of the old cost is equal to the cost of the extension plus 47470b57cec5SDimitry Andric // what we have saved in the addressing mode. 47480b57cec5SDimitry Andric !isPromotionProfitable(CreatedInstsCost, 47490b57cec5SDimitry Andric ExtCost + (AddrModeInsts.size() - OldSize), 47500b57cec5SDimitry Andric PromotedOperand)) { 47510b57cec5SDimitry Andric AddrMode = BackupAddrMode; 47520b57cec5SDimitry Andric AddrModeInsts.resize(OldSize); 47530b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Sign extension does not pay off: rollback\n"); 47540b57cec5SDimitry Andric TPT.rollback(LastKnownGood); 47550b57cec5SDimitry Andric return false; 47560b57cec5SDimitry Andric } 47570b57cec5SDimitry Andric return true; 47580b57cec5SDimitry Andric } 47590b57cec5SDimitry Andric } 47600b57cec5SDimitry Andric return false; 47610b57cec5SDimitry Andric } 47620b57cec5SDimitry Andric 47630b57cec5SDimitry Andric /// If we can, try to add the value of 'Addr' into the current addressing mode. 47640b57cec5SDimitry Andric /// If Addr can't be added to AddrMode this returns false and leaves AddrMode 47650b57cec5SDimitry Andric /// unmodified. This assumes that Addr is either a pointer type or intptr_t 47660b57cec5SDimitry Andric /// for the target. 47670b57cec5SDimitry Andric /// 47680b57cec5SDimitry Andric bool AddressingModeMatcher::matchAddr(Value *Addr, unsigned Depth) { 47690b57cec5SDimitry Andric // Start a transaction at this point that we will rollback if the matching 47700b57cec5SDimitry Andric // fails. 47710b57cec5SDimitry Andric TypePromotionTransaction::ConstRestorationPt LastKnownGood = 47720b57cec5SDimitry Andric TPT.getRestorationPoint(); 47730b57cec5SDimitry Andric if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) { 47745ffd83dbSDimitry Andric if (CI->getValue().isSignedIntN(64)) { 47750b57cec5SDimitry Andric // Fold in immediates if legal for the target. 47760b57cec5SDimitry Andric AddrMode.BaseOffs += CI->getSExtValue(); 47770b57cec5SDimitry Andric if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) 47780b57cec5SDimitry Andric return true; 47790b57cec5SDimitry Andric AddrMode.BaseOffs -= CI->getSExtValue(); 47805ffd83dbSDimitry Andric } 47810b57cec5SDimitry Andric } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) { 47820b57cec5SDimitry Andric // If this is a global variable, try to fold it into the addressing mode. 47830b57cec5SDimitry Andric if (!AddrMode.BaseGV) { 47840b57cec5SDimitry Andric AddrMode.BaseGV = GV; 47850b57cec5SDimitry Andric if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) 47860b57cec5SDimitry Andric return true; 47870b57cec5SDimitry Andric AddrMode.BaseGV = nullptr; 47880b57cec5SDimitry Andric } 47890b57cec5SDimitry Andric } else if (Instruction *I = dyn_cast<Instruction>(Addr)) { 47900b57cec5SDimitry Andric ExtAddrMode BackupAddrMode = AddrMode; 47910b57cec5SDimitry Andric unsigned OldSize = AddrModeInsts.size(); 47920b57cec5SDimitry Andric 47930b57cec5SDimitry Andric // Check to see if it is possible to fold this operation. 47940b57cec5SDimitry Andric bool MovedAway = false; 47950b57cec5SDimitry Andric if (matchOperationAddr(I, I->getOpcode(), Depth, &MovedAway)) { 47960b57cec5SDimitry Andric // This instruction may have been moved away. If so, there is nothing 47970b57cec5SDimitry Andric // to check here. 47980b57cec5SDimitry Andric if (MovedAway) 47990b57cec5SDimitry Andric return true; 48000b57cec5SDimitry Andric // Okay, it's possible to fold this. Check to see if it is actually 48010b57cec5SDimitry Andric // *profitable* to do so. We use a simple cost model to avoid increasing 48020b57cec5SDimitry Andric // register pressure too much. 48030b57cec5SDimitry Andric if (I->hasOneUse() || 48040b57cec5SDimitry Andric isProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) { 48050b57cec5SDimitry Andric AddrModeInsts.push_back(I); 48060b57cec5SDimitry Andric return true; 48070b57cec5SDimitry Andric } 48080b57cec5SDimitry Andric 48090b57cec5SDimitry Andric // It isn't profitable to do this, roll back. 48100b57cec5SDimitry Andric AddrMode = BackupAddrMode; 48110b57cec5SDimitry Andric AddrModeInsts.resize(OldSize); 48120b57cec5SDimitry Andric TPT.rollback(LastKnownGood); 48130b57cec5SDimitry Andric } 48140b57cec5SDimitry Andric } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) { 48150b57cec5SDimitry Andric if (matchOperationAddr(CE, CE->getOpcode(), Depth)) 48160b57cec5SDimitry Andric return true; 48170b57cec5SDimitry Andric TPT.rollback(LastKnownGood); 48180b57cec5SDimitry Andric } else if (isa<ConstantPointerNull>(Addr)) { 48190b57cec5SDimitry Andric // Null pointer gets folded without affecting the addressing mode. 48200b57cec5SDimitry Andric return true; 48210b57cec5SDimitry Andric } 48220b57cec5SDimitry Andric 48230b57cec5SDimitry Andric // Worse case, the target should support [reg] addressing modes. :) 48240b57cec5SDimitry Andric if (!AddrMode.HasBaseReg) { 48250b57cec5SDimitry Andric AddrMode.HasBaseReg = true; 48260b57cec5SDimitry Andric AddrMode.BaseReg = Addr; 48270b57cec5SDimitry Andric // Still check for legality in case the target supports [imm] but not [i+r]. 48280b57cec5SDimitry Andric if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) 48290b57cec5SDimitry Andric return true; 48300b57cec5SDimitry Andric AddrMode.HasBaseReg = false; 48310b57cec5SDimitry Andric AddrMode.BaseReg = nullptr; 48320b57cec5SDimitry Andric } 48330b57cec5SDimitry Andric 48340b57cec5SDimitry Andric // If the base register is already taken, see if we can do [r+r]. 48350b57cec5SDimitry Andric if (AddrMode.Scale == 0) { 48360b57cec5SDimitry Andric AddrMode.Scale = 1; 48370b57cec5SDimitry Andric AddrMode.ScaledReg = Addr; 48380b57cec5SDimitry Andric if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) 48390b57cec5SDimitry Andric return true; 48400b57cec5SDimitry Andric AddrMode.Scale = 0; 48410b57cec5SDimitry Andric AddrMode.ScaledReg = nullptr; 48420b57cec5SDimitry Andric } 48430b57cec5SDimitry Andric // Couldn't match. 48440b57cec5SDimitry Andric TPT.rollback(LastKnownGood); 48450b57cec5SDimitry Andric return false; 48460b57cec5SDimitry Andric } 48470b57cec5SDimitry Andric 48480b57cec5SDimitry Andric /// Check to see if all uses of OpVal by the specified inline asm call are due 48490b57cec5SDimitry Andric /// to memory operands. If so, return true, otherwise return false. 48500b57cec5SDimitry Andric static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, 48510b57cec5SDimitry Andric const TargetLowering &TLI, 48520b57cec5SDimitry Andric const TargetRegisterInfo &TRI) { 48530b57cec5SDimitry Andric const Function *F = CI->getFunction(); 48540b57cec5SDimitry Andric TargetLowering::AsmOperandInfoVector TargetConstraints = 48555ffd83dbSDimitry Andric TLI.ParseConstraints(F->getParent()->getDataLayout(), &TRI, *CI); 48560b57cec5SDimitry Andric 48570eae32dcSDimitry Andric for (TargetLowering::AsmOperandInfo &OpInfo : TargetConstraints) { 48580b57cec5SDimitry Andric // Compute the constraint code and ConstraintType to use. 48590b57cec5SDimitry Andric TLI.ComputeConstraintToUse(OpInfo, SDValue()); 48600b57cec5SDimitry Andric 48610b57cec5SDimitry Andric // If this asm operand is our Value*, and if it isn't an indirect memory 486281ad6265SDimitry Andric // operand, we can't fold it! TODO: Also handle C_Address? 48630b57cec5SDimitry Andric if (OpInfo.CallOperandVal == OpVal && 48640b57cec5SDimitry Andric (OpInfo.ConstraintType != TargetLowering::C_Memory || 48650b57cec5SDimitry Andric !OpInfo.isIndirect)) 48660b57cec5SDimitry Andric return false; 48670b57cec5SDimitry Andric } 48680b57cec5SDimitry Andric 48690b57cec5SDimitry Andric return true; 48700b57cec5SDimitry Andric } 48710b57cec5SDimitry Andric 48720b57cec5SDimitry Andric // Max number of memory uses to look at before aborting the search to conserve 48730b57cec5SDimitry Andric // compile time. 48740b57cec5SDimitry Andric static constexpr int MaxMemoryUsesToScan = 20; 48750b57cec5SDimitry Andric 48760b57cec5SDimitry Andric /// Recursively walk all the uses of I until we find a memory use. 48770b57cec5SDimitry Andric /// If we find an obviously non-foldable instruction, return true. 4878349cc55cSDimitry Andric /// Add accessed addresses and types to MemoryUses. 48790b57cec5SDimitry Andric static bool FindAllMemoryUses( 4880349cc55cSDimitry Andric Instruction *I, SmallVectorImpl<std::pair<Value *, Type *>> &MemoryUses, 48810b57cec5SDimitry Andric SmallPtrSetImpl<Instruction *> &ConsideredInsts, const TargetLowering &TLI, 4882480093f4SDimitry Andric const TargetRegisterInfo &TRI, bool OptSize, ProfileSummaryInfo *PSI, 4883480093f4SDimitry Andric BlockFrequencyInfo *BFI, int SeenInsts = 0) { 48840b57cec5SDimitry Andric // If we already considered this instruction, we're done. 48850b57cec5SDimitry Andric if (!ConsideredInsts.insert(I).second) 48860b57cec5SDimitry Andric return false; 48870b57cec5SDimitry Andric 48880b57cec5SDimitry Andric // If this is an obviously unfoldable instruction, bail out. 48890b57cec5SDimitry Andric if (!MightBeFoldableInst(I)) 48900b57cec5SDimitry Andric return true; 48910b57cec5SDimitry Andric 48920b57cec5SDimitry Andric // Loop over all the uses, recursively processing them. 48930b57cec5SDimitry Andric for (Use &U : I->uses()) { 48940b57cec5SDimitry Andric // Conservatively return true if we're seeing a large number or a deep chain 48950b57cec5SDimitry Andric // of users. This avoids excessive compilation times in pathological cases. 48960b57cec5SDimitry Andric if (SeenInsts++ >= MaxMemoryUsesToScan) 48970b57cec5SDimitry Andric return true; 48980b57cec5SDimitry Andric 48990b57cec5SDimitry Andric Instruction *UserI = cast<Instruction>(U.getUser()); 49000b57cec5SDimitry Andric if (LoadInst *LI = dyn_cast<LoadInst>(UserI)) { 4901349cc55cSDimitry Andric MemoryUses.push_back({U.get(), LI->getType()}); 49020b57cec5SDimitry Andric continue; 49030b57cec5SDimitry Andric } 49040b57cec5SDimitry Andric 49050b57cec5SDimitry Andric if (StoreInst *SI = dyn_cast<StoreInst>(UserI)) { 4906349cc55cSDimitry Andric if (U.getOperandNo() != StoreInst::getPointerOperandIndex()) 49070b57cec5SDimitry Andric return true; // Storing addr, not into addr. 4908349cc55cSDimitry Andric MemoryUses.push_back({U.get(), SI->getValueOperand()->getType()}); 49090b57cec5SDimitry Andric continue; 49100b57cec5SDimitry Andric } 49110b57cec5SDimitry Andric 49120b57cec5SDimitry Andric if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(UserI)) { 4913349cc55cSDimitry Andric if (U.getOperandNo() != AtomicRMWInst::getPointerOperandIndex()) 49140b57cec5SDimitry Andric return true; // Storing addr, not into addr. 4915349cc55cSDimitry Andric MemoryUses.push_back({U.get(), RMW->getValOperand()->getType()}); 49160b57cec5SDimitry Andric continue; 49170b57cec5SDimitry Andric } 49180b57cec5SDimitry Andric 49190b57cec5SDimitry Andric if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(UserI)) { 4920349cc55cSDimitry Andric if (U.getOperandNo() != AtomicCmpXchgInst::getPointerOperandIndex()) 49210b57cec5SDimitry Andric return true; // Storing addr, not into addr. 4922349cc55cSDimitry Andric MemoryUses.push_back({U.get(), CmpX->getCompareOperand()->getType()}); 49230b57cec5SDimitry Andric continue; 49240b57cec5SDimitry Andric } 49250b57cec5SDimitry Andric 49260b57cec5SDimitry Andric if (CallInst *CI = dyn_cast<CallInst>(UserI)) { 49275ffd83dbSDimitry Andric if (CI->hasFnAttr(Attribute::Cold)) { 49280b57cec5SDimitry Andric // If this is a cold call, we can sink the addressing calculation into 49290b57cec5SDimitry Andric // the cold path. See optimizeCallInst 4930480093f4SDimitry Andric bool OptForSize = OptSize || 4931480093f4SDimitry Andric llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI); 49325ffd83dbSDimitry Andric if (!OptForSize) 49330b57cec5SDimitry Andric continue; 49345ffd83dbSDimitry Andric } 49350b57cec5SDimitry Andric 49365ffd83dbSDimitry Andric InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledOperand()); 49370b57cec5SDimitry Andric if (!IA) return true; 49380b57cec5SDimitry Andric 49390b57cec5SDimitry Andric // If this is a memory operand, we're cool, otherwise bail out. 49400b57cec5SDimitry Andric if (!IsOperandAMemoryOperand(CI, IA, I, TLI, TRI)) 49410b57cec5SDimitry Andric return true; 49420b57cec5SDimitry Andric continue; 49430b57cec5SDimitry Andric } 49440b57cec5SDimitry Andric 4945480093f4SDimitry Andric if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI, TRI, OptSize, 4946480093f4SDimitry Andric PSI, BFI, SeenInsts)) 49470b57cec5SDimitry Andric return true; 49480b57cec5SDimitry Andric } 49490b57cec5SDimitry Andric 49500b57cec5SDimitry Andric return false; 49510b57cec5SDimitry Andric } 49520b57cec5SDimitry Andric 49530b57cec5SDimitry Andric /// Return true if Val is already known to be live at the use site that we're 49540b57cec5SDimitry Andric /// folding it into. If so, there is no cost to include it in the addressing 49550b57cec5SDimitry Andric /// mode. KnownLive1 and KnownLive2 are two values that we know are live at the 49560b57cec5SDimitry Andric /// instruction already. 49570b57cec5SDimitry Andric bool AddressingModeMatcher::valueAlreadyLiveAtInst(Value *Val,Value *KnownLive1, 49580b57cec5SDimitry Andric Value *KnownLive2) { 49590b57cec5SDimitry Andric // If Val is either of the known-live values, we know it is live! 49600b57cec5SDimitry Andric if (Val == nullptr || Val == KnownLive1 || Val == KnownLive2) 49610b57cec5SDimitry Andric return true; 49620b57cec5SDimitry Andric 49630b57cec5SDimitry Andric // All values other than instructions and arguments (e.g. constants) are live. 49640b57cec5SDimitry Andric if (!isa<Instruction>(Val) && !isa<Argument>(Val)) return true; 49650b57cec5SDimitry Andric 49660b57cec5SDimitry Andric // If Val is a constant sized alloca in the entry block, it is live, this is 49670b57cec5SDimitry Andric // true because it is just a reference to the stack/frame pointer, which is 49680b57cec5SDimitry Andric // live for the whole function. 49690b57cec5SDimitry Andric if (AllocaInst *AI = dyn_cast<AllocaInst>(Val)) 49700b57cec5SDimitry Andric if (AI->isStaticAlloca()) 49710b57cec5SDimitry Andric return true; 49720b57cec5SDimitry Andric 49730b57cec5SDimitry Andric // Check to see if this value is already used in the memory instruction's 49740b57cec5SDimitry Andric // block. If so, it's already live into the block at the very least, so we 49750b57cec5SDimitry Andric // can reasonably fold it. 49760b57cec5SDimitry Andric return Val->isUsedInBasicBlock(MemoryInst->getParent()); 49770b57cec5SDimitry Andric } 49780b57cec5SDimitry Andric 49790b57cec5SDimitry Andric /// It is possible for the addressing mode of the machine to fold the specified 49800b57cec5SDimitry Andric /// instruction into a load or store that ultimately uses it. 49810b57cec5SDimitry Andric /// However, the specified instruction has multiple uses. 49820b57cec5SDimitry Andric /// Given this, it may actually increase register pressure to fold it 49830b57cec5SDimitry Andric /// into the load. For example, consider this code: 49840b57cec5SDimitry Andric /// 49850b57cec5SDimitry Andric /// X = ... 49860b57cec5SDimitry Andric /// Y = X+1 49870b57cec5SDimitry Andric /// use(Y) -> nonload/store 49880b57cec5SDimitry Andric /// Z = Y+1 49890b57cec5SDimitry Andric /// load Z 49900b57cec5SDimitry Andric /// 49910b57cec5SDimitry Andric /// In this case, Y has multiple uses, and can be folded into the load of Z 49920b57cec5SDimitry Andric /// (yielding load [X+2]). However, doing this will cause both "X" and "X+1" to 49930b57cec5SDimitry Andric /// be live at the use(Y) line. If we don't fold Y into load Z, we use one 49940b57cec5SDimitry Andric /// fewer register. Since Y can't be folded into "use(Y)" we don't increase the 49950b57cec5SDimitry Andric /// number of computations either. 49960b57cec5SDimitry Andric /// 49970b57cec5SDimitry Andric /// Note that this (like most of CodeGenPrepare) is just a rough heuristic. If 49980b57cec5SDimitry Andric /// X was live across 'load Z' for other reasons, we actually *would* want to 49990b57cec5SDimitry Andric /// fold the addressing mode in the Z case. This would make Y die earlier. 50000b57cec5SDimitry Andric bool AddressingModeMatcher:: 50010b57cec5SDimitry Andric isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore, 50020b57cec5SDimitry Andric ExtAddrMode &AMAfter) { 50030b57cec5SDimitry Andric if (IgnoreProfitability) return true; 50040b57cec5SDimitry Andric 50050b57cec5SDimitry Andric // AMBefore is the addressing mode before this instruction was folded into it, 50060b57cec5SDimitry Andric // and AMAfter is the addressing mode after the instruction was folded. Get 50070b57cec5SDimitry Andric // the set of registers referenced by AMAfter and subtract out those 50080b57cec5SDimitry Andric // referenced by AMBefore: this is the set of values which folding in this 50090b57cec5SDimitry Andric // address extends the lifetime of. 50100b57cec5SDimitry Andric // 50110b57cec5SDimitry Andric // Note that there are only two potential values being referenced here, 50120b57cec5SDimitry Andric // BaseReg and ScaleReg (global addresses are always available, as are any 50130b57cec5SDimitry Andric // folded immediates). 50140b57cec5SDimitry Andric Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg; 50150b57cec5SDimitry Andric 50160b57cec5SDimitry Andric // If the BaseReg or ScaledReg was referenced by the previous addrmode, their 50170b57cec5SDimitry Andric // lifetime wasn't extended by adding this instruction. 50180b57cec5SDimitry Andric if (valueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg)) 50190b57cec5SDimitry Andric BaseReg = nullptr; 50200b57cec5SDimitry Andric if (valueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg)) 50210b57cec5SDimitry Andric ScaledReg = nullptr; 50220b57cec5SDimitry Andric 50230b57cec5SDimitry Andric // If folding this instruction (and it's subexprs) didn't extend any live 50240b57cec5SDimitry Andric // ranges, we're ok with it. 50250b57cec5SDimitry Andric if (!BaseReg && !ScaledReg) 50260b57cec5SDimitry Andric return true; 50270b57cec5SDimitry Andric 50280b57cec5SDimitry Andric // If all uses of this instruction can have the address mode sunk into them, 50290b57cec5SDimitry Andric // we can remove the addressing mode and effectively trade one live register 50300b57cec5SDimitry Andric // for another (at worst.) In this context, folding an addressing mode into 50310b57cec5SDimitry Andric // the use is just a particularly nice way of sinking it. 5032349cc55cSDimitry Andric SmallVector<std::pair<Value *, Type *>, 16> MemoryUses; 50330b57cec5SDimitry Andric SmallPtrSet<Instruction*, 16> ConsideredInsts; 5034480093f4SDimitry Andric if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI, OptSize, 5035480093f4SDimitry Andric PSI, BFI)) 50360b57cec5SDimitry Andric return false; // Has a non-memory, non-foldable use! 50370b57cec5SDimitry Andric 50380b57cec5SDimitry Andric // Now that we know that all uses of this instruction are part of a chain of 50390b57cec5SDimitry Andric // computation involving only operations that could theoretically be folded 50400b57cec5SDimitry Andric // into a memory use, loop over each of these memory operation uses and see 50410b57cec5SDimitry Andric // if they could *actually* fold the instruction. The assumption is that 50420b57cec5SDimitry Andric // addressing modes are cheap and that duplicating the computation involved 50430b57cec5SDimitry Andric // many times is worthwhile, even on a fastpath. For sinking candidates 50440b57cec5SDimitry Andric // (i.e. cold call sites), this serves as a way to prevent excessive code 50450b57cec5SDimitry Andric // growth since most architectures have some reasonable small and fast way to 50460b57cec5SDimitry Andric // compute an effective address. (i.e LEA on x86) 50470b57cec5SDimitry Andric SmallVector<Instruction*, 32> MatchedAddrModeInsts; 5048349cc55cSDimitry Andric for (const std::pair<Value *, Type *> &Pair : MemoryUses) { 5049349cc55cSDimitry Andric Value *Address = Pair.first; 5050349cc55cSDimitry Andric Type *AddressAccessTy = Pair.second; 5051349cc55cSDimitry Andric unsigned AS = Address->getType()->getPointerAddressSpace(); 50520b57cec5SDimitry Andric 50530b57cec5SDimitry Andric // Do a match against the root of this address, ignoring profitability. This 50540b57cec5SDimitry Andric // will tell us if the addressing mode for the memory operation will 50550b57cec5SDimitry Andric // *actually* cover the shared instruction. 50560b57cec5SDimitry Andric ExtAddrMode Result; 50570b57cec5SDimitry Andric std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr, 50580b57cec5SDimitry Andric 0); 50590b57cec5SDimitry Andric TypePromotionTransaction::ConstRestorationPt LastKnownGood = 50600b57cec5SDimitry Andric TPT.getRestorationPoint(); 5061fe6060f1SDimitry Andric AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, TRI, LI, getDTFn, 5062fe6060f1SDimitry Andric AddressAccessTy, AS, MemoryInst, Result, 5063fe6060f1SDimitry Andric InsertedInsts, PromotedInsts, TPT, 5064fe6060f1SDimitry Andric LargeOffsetGEP, OptSize, PSI, BFI); 50650b57cec5SDimitry Andric Matcher.IgnoreProfitability = true; 50660b57cec5SDimitry Andric bool Success = Matcher.matchAddr(Address, 0); 50670b57cec5SDimitry Andric (void)Success; assert(Success && "Couldn't select *anything*?"); 50680b57cec5SDimitry Andric 50690b57cec5SDimitry Andric // The match was to check the profitability, the changes made are not 50700b57cec5SDimitry Andric // part of the original matcher. Therefore, they should be dropped 50710b57cec5SDimitry Andric // otherwise the original matcher will not present the right state. 50720b57cec5SDimitry Andric TPT.rollback(LastKnownGood); 50730b57cec5SDimitry Andric 50740b57cec5SDimitry Andric // If the match didn't cover I, then it won't be shared by it. 50750b57cec5SDimitry Andric if (!is_contained(MatchedAddrModeInsts, I)) 50760b57cec5SDimitry Andric return false; 50770b57cec5SDimitry Andric 50780b57cec5SDimitry Andric MatchedAddrModeInsts.clear(); 50790b57cec5SDimitry Andric } 50800b57cec5SDimitry Andric 50810b57cec5SDimitry Andric return true; 50820b57cec5SDimitry Andric } 50830b57cec5SDimitry Andric 50840b57cec5SDimitry Andric /// Return true if the specified values are defined in a 50850b57cec5SDimitry Andric /// different basic block than BB. 50860b57cec5SDimitry Andric static bool IsNonLocalValue(Value *V, BasicBlock *BB) { 50870b57cec5SDimitry Andric if (Instruction *I = dyn_cast<Instruction>(V)) 50880b57cec5SDimitry Andric return I->getParent() != BB; 50890b57cec5SDimitry Andric return false; 50900b57cec5SDimitry Andric } 50910b57cec5SDimitry Andric 50920b57cec5SDimitry Andric /// Sink addressing mode computation immediate before MemoryInst if doing so 50930b57cec5SDimitry Andric /// can be done without increasing register pressure. The need for the 50940b57cec5SDimitry Andric /// register pressure constraint means this can end up being an all or nothing 50950b57cec5SDimitry Andric /// decision for all uses of the same addressing computation. 50960b57cec5SDimitry Andric /// 50970b57cec5SDimitry Andric /// Load and Store Instructions often have addressing modes that can do 50980b57cec5SDimitry Andric /// significant amounts of computation. As such, instruction selection will try 50990b57cec5SDimitry Andric /// to get the load or store to do as much computation as possible for the 51000b57cec5SDimitry Andric /// program. The problem is that isel can only see within a single block. As 51010b57cec5SDimitry Andric /// such, we sink as much legal addressing mode work into the block as possible. 51020b57cec5SDimitry Andric /// 51030b57cec5SDimitry Andric /// This method is used to optimize both load/store and inline asms with memory 51040b57cec5SDimitry Andric /// operands. It's also used to sink addressing computations feeding into cold 51050b57cec5SDimitry Andric /// call sites into their (cold) basic block. 51060b57cec5SDimitry Andric /// 51070b57cec5SDimitry Andric /// The motivation for handling sinking into cold blocks is that doing so can 51080b57cec5SDimitry Andric /// both enable other address mode sinking (by satisfying the register pressure 51090b57cec5SDimitry Andric /// constraint above), and reduce register pressure globally (by removing the 51100b57cec5SDimitry Andric /// addressing mode computation from the fast path entirely.). 51110b57cec5SDimitry Andric bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, 51120b57cec5SDimitry Andric Type *AccessTy, unsigned AddrSpace) { 51130b57cec5SDimitry Andric Value *Repl = Addr; 51140b57cec5SDimitry Andric 51150b57cec5SDimitry Andric // Try to collapse single-value PHI nodes. This is necessary to undo 51160b57cec5SDimitry Andric // unprofitable PRE transformations. 51170b57cec5SDimitry Andric SmallVector<Value*, 8> worklist; 51180b57cec5SDimitry Andric SmallPtrSet<Value*, 16> Visited; 51190b57cec5SDimitry Andric worklist.push_back(Addr); 51200b57cec5SDimitry Andric 51210b57cec5SDimitry Andric // Use a worklist to iteratively look through PHI and select nodes, and 51220b57cec5SDimitry Andric // ensure that the addressing mode obtained from the non-PHI/select roots of 51230b57cec5SDimitry Andric // the graph are compatible. 51240b57cec5SDimitry Andric bool PhiOrSelectSeen = false; 51250b57cec5SDimitry Andric SmallVector<Instruction*, 16> AddrModeInsts; 51260b57cec5SDimitry Andric const SimplifyQuery SQ(*DL, TLInfo); 51270b57cec5SDimitry Andric AddressingModeCombiner AddrModes(SQ, Addr); 51280b57cec5SDimitry Andric TypePromotionTransaction TPT(RemovedInsts); 51290b57cec5SDimitry Andric TypePromotionTransaction::ConstRestorationPt LastKnownGood = 51300b57cec5SDimitry Andric TPT.getRestorationPoint(); 51310b57cec5SDimitry Andric while (!worklist.empty()) { 5132349cc55cSDimitry Andric Value *V = worklist.pop_back_val(); 51330b57cec5SDimitry Andric 51340b57cec5SDimitry Andric // We allow traversing cyclic Phi nodes. 51350b57cec5SDimitry Andric // In case of success after this loop we ensure that traversing through 51360b57cec5SDimitry Andric // Phi nodes ends up with all cases to compute address of the form 51370b57cec5SDimitry Andric // BaseGV + Base + Scale * Index + Offset 51380b57cec5SDimitry Andric // where Scale and Offset are constans and BaseGV, Base and Index 51390b57cec5SDimitry Andric // are exactly the same Values in all cases. 51400b57cec5SDimitry Andric // It means that BaseGV, Scale and Offset dominate our memory instruction 51410b57cec5SDimitry Andric // and have the same value as they had in address computation represented 51420b57cec5SDimitry Andric // as Phi. So we can safely sink address computation to memory instruction. 51430b57cec5SDimitry Andric if (!Visited.insert(V).second) 51440b57cec5SDimitry Andric continue; 51450b57cec5SDimitry Andric 51460b57cec5SDimitry Andric // For a PHI node, push all of its incoming values. 51470b57cec5SDimitry Andric if (PHINode *P = dyn_cast<PHINode>(V)) { 5148e8d8bef9SDimitry Andric append_range(worklist, P->incoming_values()); 51490b57cec5SDimitry Andric PhiOrSelectSeen = true; 51500b57cec5SDimitry Andric continue; 51510b57cec5SDimitry Andric } 51520b57cec5SDimitry Andric // Similar for select. 51530b57cec5SDimitry Andric if (SelectInst *SI = dyn_cast<SelectInst>(V)) { 51540b57cec5SDimitry Andric worklist.push_back(SI->getFalseValue()); 51550b57cec5SDimitry Andric worklist.push_back(SI->getTrueValue()); 51560b57cec5SDimitry Andric PhiOrSelectSeen = true; 51570b57cec5SDimitry Andric continue; 51580b57cec5SDimitry Andric } 51590b57cec5SDimitry Andric 51600b57cec5SDimitry Andric // For non-PHIs, determine the addressing mode being computed. Note that 51610b57cec5SDimitry Andric // the result may differ depending on what other uses our candidate 51620b57cec5SDimitry Andric // addressing instructions might have. 51630b57cec5SDimitry Andric AddrModeInsts.clear(); 51640b57cec5SDimitry Andric std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr, 51650b57cec5SDimitry Andric 0); 5166fe6060f1SDimitry Andric // Defer the query (and possible computation of) the dom tree to point of 5167fe6060f1SDimitry Andric // actual use. It's expected that most address matches don't actually need 5168fe6060f1SDimitry Andric // the domtree. 5169fe6060f1SDimitry Andric auto getDTFn = [MemoryInst, this]() -> const DominatorTree & { 5170fe6060f1SDimitry Andric Function *F = MemoryInst->getParent()->getParent(); 5171fe6060f1SDimitry Andric return this->getDT(*F); 5172fe6060f1SDimitry Andric }; 51730b57cec5SDimitry Andric ExtAddrMode NewAddrMode = AddressingModeMatcher::Match( 5174fe6060f1SDimitry Andric V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *LI, getDTFn, 5175fe6060f1SDimitry Andric *TRI, InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI, 5176480093f4SDimitry Andric BFI.get()); 51770b57cec5SDimitry Andric 51780b57cec5SDimitry Andric GetElementPtrInst *GEP = LargeOffsetGEP.first; 51790b57cec5SDimitry Andric if (GEP && !NewGEPBases.count(GEP)) { 51800b57cec5SDimitry Andric // If splitting the underlying data structure can reduce the offset of a 51810b57cec5SDimitry Andric // GEP, collect the GEP. Skip the GEPs that are the new bases of 51820b57cec5SDimitry Andric // previously split data structures. 51830b57cec5SDimitry Andric LargeOffsetGEPMap[GEP->getPointerOperand()].push_back(LargeOffsetGEP); 518481ad6265SDimitry Andric LargeOffsetGEPID.insert(std::make_pair(GEP, LargeOffsetGEPID.size())); 51850b57cec5SDimitry Andric } 51860b57cec5SDimitry Andric 51870b57cec5SDimitry Andric NewAddrMode.OriginalValue = V; 51880b57cec5SDimitry Andric if (!AddrModes.addNewAddrMode(NewAddrMode)) 51890b57cec5SDimitry Andric break; 51900b57cec5SDimitry Andric } 51910b57cec5SDimitry Andric 51920b57cec5SDimitry Andric // Try to combine the AddrModes we've collected. If we couldn't collect any, 51930b57cec5SDimitry Andric // or we have multiple but either couldn't combine them or combining them 51940b57cec5SDimitry Andric // wouldn't do anything useful, bail out now. 51950b57cec5SDimitry Andric if (!AddrModes.combineAddrModes()) { 51960b57cec5SDimitry Andric TPT.rollback(LastKnownGood); 51970b57cec5SDimitry Andric return false; 51980b57cec5SDimitry Andric } 51995ffd83dbSDimitry Andric bool Modified = TPT.commit(); 52000b57cec5SDimitry Andric 52010b57cec5SDimitry Andric // Get the combined AddrMode (or the only AddrMode, if we only had one). 52020b57cec5SDimitry Andric ExtAddrMode AddrMode = AddrModes.getAddrMode(); 52030b57cec5SDimitry Andric 52040b57cec5SDimitry Andric // If all the instructions matched are already in this BB, don't do anything. 52050b57cec5SDimitry Andric // If we saw a Phi node then it is not local definitely, and if we saw a select 52060b57cec5SDimitry Andric // then we want to push the address calculation past it even if it's already 52070b57cec5SDimitry Andric // in this BB. 52080b57cec5SDimitry Andric if (!PhiOrSelectSeen && none_of(AddrModeInsts, [&](Value *V) { 52090b57cec5SDimitry Andric return IsNonLocalValue(V, MemoryInst->getParent()); 52100b57cec5SDimitry Andric })) { 52110b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode 52120b57cec5SDimitry Andric << "\n"); 52135ffd83dbSDimitry Andric return Modified; 52140b57cec5SDimitry Andric } 52150b57cec5SDimitry Andric 52160b57cec5SDimitry Andric // Insert this computation right after this user. Since our caller is 52170b57cec5SDimitry Andric // scanning from the top of the BB to the bottom, reuse of the expr are 52180b57cec5SDimitry Andric // guaranteed to happen later. 52190b57cec5SDimitry Andric IRBuilder<> Builder(MemoryInst); 52200b57cec5SDimitry Andric 52210b57cec5SDimitry Andric // Now that we determined the addressing expression we want to use and know 52220b57cec5SDimitry Andric // that we have to sink it into this block. Check to see if we have already 52230b57cec5SDimitry Andric // done this for some other load/store instr in this block. If so, reuse 52240b57cec5SDimitry Andric // the computation. Before attempting reuse, check if the address is valid 52250b57cec5SDimitry Andric // as it may have been erased. 52260b57cec5SDimitry Andric 52270b57cec5SDimitry Andric WeakTrackingVH SunkAddrVH = SunkAddrs[Addr]; 52280b57cec5SDimitry Andric 52290b57cec5SDimitry Andric Value * SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr; 5230fcaf7f86SDimitry Andric Type *IntPtrTy = DL->getIntPtrType(Addr->getType()); 52310b57cec5SDimitry Andric if (SunkAddr) { 52320b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode 52330b57cec5SDimitry Andric << " for " << *MemoryInst << "\n"); 5234fcaf7f86SDimitry Andric if (SunkAddr->getType() != Addr->getType()) { 5235fcaf7f86SDimitry Andric if (SunkAddr->getType()->getPointerAddressSpace() != 5236fcaf7f86SDimitry Andric Addr->getType()->getPointerAddressSpace() && 5237fcaf7f86SDimitry Andric !DL->isNonIntegralPointerType(Addr->getType())) { 5238fcaf7f86SDimitry Andric // There are two reasons the address spaces might not match: a no-op 5239fcaf7f86SDimitry Andric // addrspacecast, or a ptrtoint/inttoptr pair. Either way, we emit a 5240fcaf7f86SDimitry Andric // ptrtoint/inttoptr pair to ensure we match the original semantics. 5241fcaf7f86SDimitry Andric // TODO: allow bitcast between different address space pointers with the 5242fcaf7f86SDimitry Andric // same size. 5243fcaf7f86SDimitry Andric SunkAddr = Builder.CreatePtrToInt(SunkAddr, IntPtrTy, "sunkaddr"); 5244fcaf7f86SDimitry Andric SunkAddr = 5245fcaf7f86SDimitry Andric Builder.CreateIntToPtr(SunkAddr, Addr->getType(), "sunkaddr"); 5246fcaf7f86SDimitry Andric } else 52470b57cec5SDimitry Andric SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType()); 5248fcaf7f86SDimitry Andric } 52498bcb0991SDimitry Andric } else if (AddrSinkUsingGEPs || (!AddrSinkUsingGEPs.getNumOccurrences() && 52505ffd83dbSDimitry Andric SubtargetInfo->addrSinkUsingGEPs())) { 52510b57cec5SDimitry Andric // By default, we use the GEP-based method when AA is used later. This 52520b57cec5SDimitry Andric // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities. 52530b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode 52540b57cec5SDimitry Andric << " for " << *MemoryInst << "\n"); 52550b57cec5SDimitry Andric Value *ResultPtr = nullptr, *ResultIndex = nullptr; 52560b57cec5SDimitry Andric 52570b57cec5SDimitry Andric // First, find the pointer. 52580b57cec5SDimitry Andric if (AddrMode.BaseReg && AddrMode.BaseReg->getType()->isPointerTy()) { 52590b57cec5SDimitry Andric ResultPtr = AddrMode.BaseReg; 52600b57cec5SDimitry Andric AddrMode.BaseReg = nullptr; 52610b57cec5SDimitry Andric } 52620b57cec5SDimitry Andric 52630b57cec5SDimitry Andric if (AddrMode.Scale && AddrMode.ScaledReg->getType()->isPointerTy()) { 52640b57cec5SDimitry Andric // We can't add more than one pointer together, nor can we scale a 52650b57cec5SDimitry Andric // pointer (both of which seem meaningless). 52660b57cec5SDimitry Andric if (ResultPtr || AddrMode.Scale != 1) 52675ffd83dbSDimitry Andric return Modified; 52680b57cec5SDimitry Andric 52690b57cec5SDimitry Andric ResultPtr = AddrMode.ScaledReg; 52700b57cec5SDimitry Andric AddrMode.Scale = 0; 52710b57cec5SDimitry Andric } 52720b57cec5SDimitry Andric 52730b57cec5SDimitry Andric // It is only safe to sign extend the BaseReg if we know that the math 52740b57cec5SDimitry Andric // required to create it did not overflow before we extend it. Since 52750b57cec5SDimitry Andric // the original IR value was tossed in favor of a constant back when 52760b57cec5SDimitry Andric // the AddrMode was created we need to bail out gracefully if widths 52770b57cec5SDimitry Andric // do not match instead of extending it. 52780b57cec5SDimitry Andric // 52790b57cec5SDimitry Andric // (See below for code to add the scale.) 52800b57cec5SDimitry Andric if (AddrMode.Scale) { 52810b57cec5SDimitry Andric Type *ScaledRegTy = AddrMode.ScaledReg->getType(); 52820b57cec5SDimitry Andric if (cast<IntegerType>(IntPtrTy)->getBitWidth() > 52830b57cec5SDimitry Andric cast<IntegerType>(ScaledRegTy)->getBitWidth()) 52845ffd83dbSDimitry Andric return Modified; 52850b57cec5SDimitry Andric } 52860b57cec5SDimitry Andric 52870b57cec5SDimitry Andric if (AddrMode.BaseGV) { 52880b57cec5SDimitry Andric if (ResultPtr) 52895ffd83dbSDimitry Andric return Modified; 52900b57cec5SDimitry Andric 52910b57cec5SDimitry Andric ResultPtr = AddrMode.BaseGV; 52920b57cec5SDimitry Andric } 52930b57cec5SDimitry Andric 52940b57cec5SDimitry Andric // If the real base value actually came from an inttoptr, then the matcher 52950b57cec5SDimitry Andric // will look through it and provide only the integer value. In that case, 52960b57cec5SDimitry Andric // use it here. 52970b57cec5SDimitry Andric if (!DL->isNonIntegralPointerType(Addr->getType())) { 52980b57cec5SDimitry Andric if (!ResultPtr && AddrMode.BaseReg) { 52990b57cec5SDimitry Andric ResultPtr = Builder.CreateIntToPtr(AddrMode.BaseReg, Addr->getType(), 53000b57cec5SDimitry Andric "sunkaddr"); 53010b57cec5SDimitry Andric AddrMode.BaseReg = nullptr; 53020b57cec5SDimitry Andric } else if (!ResultPtr && AddrMode.Scale == 1) { 53030b57cec5SDimitry Andric ResultPtr = Builder.CreateIntToPtr(AddrMode.ScaledReg, Addr->getType(), 53040b57cec5SDimitry Andric "sunkaddr"); 53050b57cec5SDimitry Andric AddrMode.Scale = 0; 53060b57cec5SDimitry Andric } 53070b57cec5SDimitry Andric } 53080b57cec5SDimitry Andric 53090b57cec5SDimitry Andric if (!ResultPtr && 53100b57cec5SDimitry Andric !AddrMode.BaseReg && !AddrMode.Scale && !AddrMode.BaseOffs) { 53110b57cec5SDimitry Andric SunkAddr = Constant::getNullValue(Addr->getType()); 53120b57cec5SDimitry Andric } else if (!ResultPtr) { 53135ffd83dbSDimitry Andric return Modified; 53140b57cec5SDimitry Andric } else { 53150b57cec5SDimitry Andric Type *I8PtrTy = 53160b57cec5SDimitry Andric Builder.getInt8PtrTy(Addr->getType()->getPointerAddressSpace()); 53170b57cec5SDimitry Andric Type *I8Ty = Builder.getInt8Ty(); 53180b57cec5SDimitry Andric 53190b57cec5SDimitry Andric // Start with the base register. Do this first so that subsequent address 53200b57cec5SDimitry Andric // matching finds it last, which will prevent it from trying to match it 53210b57cec5SDimitry Andric // as the scaled value in case it happens to be a mul. That would be 53220b57cec5SDimitry Andric // problematic if we've sunk a different mul for the scale, because then 53230b57cec5SDimitry Andric // we'd end up sinking both muls. 53240b57cec5SDimitry Andric if (AddrMode.BaseReg) { 53250b57cec5SDimitry Andric Value *V = AddrMode.BaseReg; 53260b57cec5SDimitry Andric if (V->getType() != IntPtrTy) 53270b57cec5SDimitry Andric V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr"); 53280b57cec5SDimitry Andric 53290b57cec5SDimitry Andric ResultIndex = V; 53300b57cec5SDimitry Andric } 53310b57cec5SDimitry Andric 53320b57cec5SDimitry Andric // Add the scale value. 53330b57cec5SDimitry Andric if (AddrMode.Scale) { 53340b57cec5SDimitry Andric Value *V = AddrMode.ScaledReg; 53350b57cec5SDimitry Andric if (V->getType() == IntPtrTy) { 53360b57cec5SDimitry Andric // done. 53370b57cec5SDimitry Andric } else { 53380b57cec5SDimitry Andric assert(cast<IntegerType>(IntPtrTy)->getBitWidth() < 53390b57cec5SDimitry Andric cast<IntegerType>(V->getType())->getBitWidth() && 53400b57cec5SDimitry Andric "We can't transform if ScaledReg is too narrow"); 53410b57cec5SDimitry Andric V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr"); 53420b57cec5SDimitry Andric } 53430b57cec5SDimitry Andric 53440b57cec5SDimitry Andric if (AddrMode.Scale != 1) 53450b57cec5SDimitry Andric V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale), 53460b57cec5SDimitry Andric "sunkaddr"); 53470b57cec5SDimitry Andric if (ResultIndex) 53480b57cec5SDimitry Andric ResultIndex = Builder.CreateAdd(ResultIndex, V, "sunkaddr"); 53490b57cec5SDimitry Andric else 53500b57cec5SDimitry Andric ResultIndex = V; 53510b57cec5SDimitry Andric } 53520b57cec5SDimitry Andric 53530b57cec5SDimitry Andric // Add in the Base Offset if present. 53540b57cec5SDimitry Andric if (AddrMode.BaseOffs) { 53550b57cec5SDimitry Andric Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs); 53560b57cec5SDimitry Andric if (ResultIndex) { 53570b57cec5SDimitry Andric // We need to add this separately from the scale above to help with 53580b57cec5SDimitry Andric // SDAG consecutive load/store merging. 53590b57cec5SDimitry Andric if (ResultPtr->getType() != I8PtrTy) 53600b57cec5SDimitry Andric ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy); 536181ad6265SDimitry Andric ResultPtr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, 536281ad6265SDimitry Andric "sunkaddr", AddrMode.InBounds); 53630b57cec5SDimitry Andric } 53640b57cec5SDimitry Andric 53650b57cec5SDimitry Andric ResultIndex = V; 53660b57cec5SDimitry Andric } 53670b57cec5SDimitry Andric 53680b57cec5SDimitry Andric if (!ResultIndex) { 53690b57cec5SDimitry Andric SunkAddr = ResultPtr; 53700b57cec5SDimitry Andric } else { 53710b57cec5SDimitry Andric if (ResultPtr->getType() != I8PtrTy) 53720b57cec5SDimitry Andric ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy); 537381ad6265SDimitry Andric SunkAddr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr", 537481ad6265SDimitry Andric AddrMode.InBounds); 53750b57cec5SDimitry Andric } 53760b57cec5SDimitry Andric 5377fcaf7f86SDimitry Andric if (SunkAddr->getType() != Addr->getType()) { 5378fcaf7f86SDimitry Andric if (SunkAddr->getType()->getPointerAddressSpace() != 5379fcaf7f86SDimitry Andric Addr->getType()->getPointerAddressSpace() && 5380fcaf7f86SDimitry Andric !DL->isNonIntegralPointerType(Addr->getType())) { 5381fcaf7f86SDimitry Andric // There are two reasons the address spaces might not match: a no-op 5382fcaf7f86SDimitry Andric // addrspacecast, or a ptrtoint/inttoptr pair. Either way, we emit a 5383fcaf7f86SDimitry Andric // ptrtoint/inttoptr pair to ensure we match the original semantics. 5384fcaf7f86SDimitry Andric // TODO: allow bitcast between different address space pointers with 5385fcaf7f86SDimitry Andric // the same size. 5386fcaf7f86SDimitry Andric SunkAddr = Builder.CreatePtrToInt(SunkAddr, IntPtrTy, "sunkaddr"); 5387fcaf7f86SDimitry Andric SunkAddr = 5388fcaf7f86SDimitry Andric Builder.CreateIntToPtr(SunkAddr, Addr->getType(), "sunkaddr"); 5389fcaf7f86SDimitry Andric } else 53900b57cec5SDimitry Andric SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType()); 53910b57cec5SDimitry Andric } 5392fcaf7f86SDimitry Andric } 53930b57cec5SDimitry Andric } else { 53940b57cec5SDimitry Andric // We'd require a ptrtoint/inttoptr down the line, which we can't do for 53950b57cec5SDimitry Andric // non-integral pointers, so in that case bail out now. 53960b57cec5SDimitry Andric Type *BaseTy = AddrMode.BaseReg ? AddrMode.BaseReg->getType() : nullptr; 53970b57cec5SDimitry Andric Type *ScaleTy = AddrMode.Scale ? AddrMode.ScaledReg->getType() : nullptr; 53980b57cec5SDimitry Andric PointerType *BasePtrTy = dyn_cast_or_null<PointerType>(BaseTy); 53990b57cec5SDimitry Andric PointerType *ScalePtrTy = dyn_cast_or_null<PointerType>(ScaleTy); 54000b57cec5SDimitry Andric if (DL->isNonIntegralPointerType(Addr->getType()) || 54010b57cec5SDimitry Andric (BasePtrTy && DL->isNonIntegralPointerType(BasePtrTy)) || 54020b57cec5SDimitry Andric (ScalePtrTy && DL->isNonIntegralPointerType(ScalePtrTy)) || 54030b57cec5SDimitry Andric (AddrMode.BaseGV && 54040b57cec5SDimitry Andric DL->isNonIntegralPointerType(AddrMode.BaseGV->getType()))) 54055ffd83dbSDimitry Andric return Modified; 54060b57cec5SDimitry Andric 54070b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode 54080b57cec5SDimitry Andric << " for " << *MemoryInst << "\n"); 54090b57cec5SDimitry Andric Type *IntPtrTy = DL->getIntPtrType(Addr->getType()); 54100b57cec5SDimitry Andric Value *Result = nullptr; 54110b57cec5SDimitry Andric 54120b57cec5SDimitry Andric // Start with the base register. Do this first so that subsequent address 54130b57cec5SDimitry Andric // matching finds it last, which will prevent it from trying to match it 54140b57cec5SDimitry Andric // as the scaled value in case it happens to be a mul. That would be 54150b57cec5SDimitry Andric // problematic if we've sunk a different mul for the scale, because then 54160b57cec5SDimitry Andric // we'd end up sinking both muls. 54170b57cec5SDimitry Andric if (AddrMode.BaseReg) { 54180b57cec5SDimitry Andric Value *V = AddrMode.BaseReg; 54190b57cec5SDimitry Andric if (V->getType()->isPointerTy()) 54200b57cec5SDimitry Andric V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr"); 54210b57cec5SDimitry Andric if (V->getType() != IntPtrTy) 54220b57cec5SDimitry Andric V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr"); 54230b57cec5SDimitry Andric Result = V; 54240b57cec5SDimitry Andric } 54250b57cec5SDimitry Andric 54260b57cec5SDimitry Andric // Add the scale value. 54270b57cec5SDimitry Andric if (AddrMode.Scale) { 54280b57cec5SDimitry Andric Value *V = AddrMode.ScaledReg; 54290b57cec5SDimitry Andric if (V->getType() == IntPtrTy) { 54300b57cec5SDimitry Andric // done. 54310b57cec5SDimitry Andric } else if (V->getType()->isPointerTy()) { 54320b57cec5SDimitry Andric V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr"); 54330b57cec5SDimitry Andric } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() < 54340b57cec5SDimitry Andric cast<IntegerType>(V->getType())->getBitWidth()) { 54350b57cec5SDimitry Andric V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr"); 54360b57cec5SDimitry Andric } else { 54370b57cec5SDimitry Andric // It is only safe to sign extend the BaseReg if we know that the math 54380b57cec5SDimitry Andric // required to create it did not overflow before we extend it. Since 54390b57cec5SDimitry Andric // the original IR value was tossed in favor of a constant back when 54400b57cec5SDimitry Andric // the AddrMode was created we need to bail out gracefully if widths 54410b57cec5SDimitry Andric // do not match instead of extending it. 54420b57cec5SDimitry Andric Instruction *I = dyn_cast_or_null<Instruction>(Result); 54430b57cec5SDimitry Andric if (I && (Result != AddrMode.BaseReg)) 54440b57cec5SDimitry Andric I->eraseFromParent(); 54455ffd83dbSDimitry Andric return Modified; 54460b57cec5SDimitry Andric } 54470b57cec5SDimitry Andric if (AddrMode.Scale != 1) 54480b57cec5SDimitry Andric V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale), 54490b57cec5SDimitry Andric "sunkaddr"); 54500b57cec5SDimitry Andric if (Result) 54510b57cec5SDimitry Andric Result = Builder.CreateAdd(Result, V, "sunkaddr"); 54520b57cec5SDimitry Andric else 54530b57cec5SDimitry Andric Result = V; 54540b57cec5SDimitry Andric } 54550b57cec5SDimitry Andric 54560b57cec5SDimitry Andric // Add in the BaseGV if present. 54570b57cec5SDimitry Andric if (AddrMode.BaseGV) { 54580b57cec5SDimitry Andric Value *V = Builder.CreatePtrToInt(AddrMode.BaseGV, IntPtrTy, "sunkaddr"); 54590b57cec5SDimitry Andric if (Result) 54600b57cec5SDimitry Andric Result = Builder.CreateAdd(Result, V, "sunkaddr"); 54610b57cec5SDimitry Andric else 54620b57cec5SDimitry Andric Result = V; 54630b57cec5SDimitry Andric } 54640b57cec5SDimitry Andric 54650b57cec5SDimitry Andric // Add in the Base Offset if present. 54660b57cec5SDimitry Andric if (AddrMode.BaseOffs) { 54670b57cec5SDimitry Andric Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs); 54680b57cec5SDimitry Andric if (Result) 54690b57cec5SDimitry Andric Result = Builder.CreateAdd(Result, V, "sunkaddr"); 54700b57cec5SDimitry Andric else 54710b57cec5SDimitry Andric Result = V; 54720b57cec5SDimitry Andric } 54730b57cec5SDimitry Andric 54740b57cec5SDimitry Andric if (!Result) 54750b57cec5SDimitry Andric SunkAddr = Constant::getNullValue(Addr->getType()); 54760b57cec5SDimitry Andric else 54770b57cec5SDimitry Andric SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr"); 54780b57cec5SDimitry Andric } 54790b57cec5SDimitry Andric 54800b57cec5SDimitry Andric MemoryInst->replaceUsesOfWith(Repl, SunkAddr); 54810b57cec5SDimitry Andric // Store the newly computed address into the cache. In the case we reused a 54820b57cec5SDimitry Andric // value, this should be idempotent. 54830b57cec5SDimitry Andric SunkAddrs[Addr] = WeakTrackingVH(SunkAddr); 54840b57cec5SDimitry Andric 54850b57cec5SDimitry Andric // If we have no uses, recursively delete the value and all dead instructions 54860b57cec5SDimitry Andric // using it. 54870b57cec5SDimitry Andric if (Repl->use_empty()) { 5488e8d8bef9SDimitry Andric resetIteratorIfInvalidatedWhileCalling(CurInstIterator->getParent(), [&]() { 5489e8d8bef9SDimitry Andric RecursivelyDeleteTriviallyDeadInstructions( 5490e8d8bef9SDimitry Andric Repl, TLInfo, nullptr, 5491e8d8bef9SDimitry Andric [&](Value *V) { removeAllAssertingVHReferences(V); }); 5492e8d8bef9SDimitry Andric }); 54930b57cec5SDimitry Andric } 54940b57cec5SDimitry Andric ++NumMemoryInsts; 54950b57cec5SDimitry Andric return true; 54960b57cec5SDimitry Andric } 54970b57cec5SDimitry Andric 54985ffd83dbSDimitry Andric /// Rewrite GEP input to gather/scatter to enable SelectionDAGBuilder to find 54995ffd83dbSDimitry Andric /// a uniform base to use for ISD::MGATHER/MSCATTER. SelectionDAGBuilder can 55005ffd83dbSDimitry Andric /// only handle a 2 operand GEP in the same basic block or a splat constant 55015ffd83dbSDimitry Andric /// vector. The 2 operands to the GEP must have a scalar pointer and a vector 55025ffd83dbSDimitry Andric /// index. 55035ffd83dbSDimitry Andric /// 55045ffd83dbSDimitry Andric /// If the existing GEP has a vector base pointer that is splat, we can look 55055ffd83dbSDimitry Andric /// through the splat to find the scalar pointer. If we can't find a scalar 55065ffd83dbSDimitry Andric /// pointer there's nothing we can do. 55075ffd83dbSDimitry Andric /// 55085ffd83dbSDimitry Andric /// If we have a GEP with more than 2 indices where the middle indices are all 55095ffd83dbSDimitry Andric /// zeroes, we can replace it with 2 GEPs where the second has 2 operands. 55105ffd83dbSDimitry Andric /// 55115ffd83dbSDimitry Andric /// If the final index isn't a vector or is a splat, we can emit a scalar GEP 55125ffd83dbSDimitry Andric /// followed by a GEP with an all zeroes vector index. This will enable 5513e8d8bef9SDimitry Andric /// SelectionDAGBuilder to use the scalar GEP as the uniform base and have a 55145ffd83dbSDimitry Andric /// zero index. 55155ffd83dbSDimitry Andric bool CodeGenPrepare::optimizeGatherScatterInst(Instruction *MemoryInst, 55165ffd83dbSDimitry Andric Value *Ptr) { 5517e8d8bef9SDimitry Andric Value *NewAddr; 5518e8d8bef9SDimitry Andric 5519e8d8bef9SDimitry Andric if (const auto *GEP = dyn_cast<GetElementPtrInst>(Ptr)) { 5520e8d8bef9SDimitry Andric // Don't optimize GEPs that don't have indices. 5521e8d8bef9SDimitry Andric if (!GEP->hasIndices()) 55225ffd83dbSDimitry Andric return false; 55235ffd83dbSDimitry Andric 55245ffd83dbSDimitry Andric // If the GEP and the gather/scatter aren't in the same BB, don't optimize. 55255ffd83dbSDimitry Andric // FIXME: We should support this by sinking the GEP. 55265ffd83dbSDimitry Andric if (MemoryInst->getParent() != GEP->getParent()) 55275ffd83dbSDimitry Andric return false; 55285ffd83dbSDimitry Andric 5529e8d8bef9SDimitry Andric SmallVector<Value *, 2> Ops(GEP->operands()); 55305ffd83dbSDimitry Andric 55315ffd83dbSDimitry Andric bool RewriteGEP = false; 55325ffd83dbSDimitry Andric 55335ffd83dbSDimitry Andric if (Ops[0]->getType()->isVectorTy()) { 5534e8d8bef9SDimitry Andric Ops[0] = getSplatValue(Ops[0]); 55355ffd83dbSDimitry Andric if (!Ops[0]) 55365ffd83dbSDimitry Andric return false; 55375ffd83dbSDimitry Andric RewriteGEP = true; 55385ffd83dbSDimitry Andric } 55395ffd83dbSDimitry Andric 55405ffd83dbSDimitry Andric unsigned FinalIndex = Ops.size() - 1; 55415ffd83dbSDimitry Andric 55425ffd83dbSDimitry Andric // Ensure all but the last index is 0. 55435ffd83dbSDimitry Andric // FIXME: This isn't strictly required. All that's required is that they are 55445ffd83dbSDimitry Andric // all scalars or splats. 55455ffd83dbSDimitry Andric for (unsigned i = 1; i < FinalIndex; ++i) { 55465ffd83dbSDimitry Andric auto *C = dyn_cast<Constant>(Ops[i]); 55475ffd83dbSDimitry Andric if (!C) 55485ffd83dbSDimitry Andric return false; 55495ffd83dbSDimitry Andric if (isa<VectorType>(C->getType())) 55505ffd83dbSDimitry Andric C = C->getSplatValue(); 55515ffd83dbSDimitry Andric auto *CI = dyn_cast_or_null<ConstantInt>(C); 55525ffd83dbSDimitry Andric if (!CI || !CI->isZero()) 55535ffd83dbSDimitry Andric return false; 55545ffd83dbSDimitry Andric // Scalarize the index if needed. 55555ffd83dbSDimitry Andric Ops[i] = CI; 55565ffd83dbSDimitry Andric } 55575ffd83dbSDimitry Andric 55585ffd83dbSDimitry Andric // Try to scalarize the final index. 55595ffd83dbSDimitry Andric if (Ops[FinalIndex]->getType()->isVectorTy()) { 5560e8d8bef9SDimitry Andric if (Value *V = getSplatValue(Ops[FinalIndex])) { 55615ffd83dbSDimitry Andric auto *C = dyn_cast<ConstantInt>(V); 55625ffd83dbSDimitry Andric // Don't scalarize all zeros vector. 55635ffd83dbSDimitry Andric if (!C || !C->isZero()) { 55645ffd83dbSDimitry Andric Ops[FinalIndex] = V; 55655ffd83dbSDimitry Andric RewriteGEP = true; 55665ffd83dbSDimitry Andric } 55675ffd83dbSDimitry Andric } 55685ffd83dbSDimitry Andric } 55695ffd83dbSDimitry Andric 55705ffd83dbSDimitry Andric // If we made any changes or the we have extra operands, we need to generate 55715ffd83dbSDimitry Andric // new instructions. 55725ffd83dbSDimitry Andric if (!RewriteGEP && Ops.size() == 2) 55735ffd83dbSDimitry Andric return false; 55745ffd83dbSDimitry Andric 5575e8d8bef9SDimitry Andric auto NumElts = cast<VectorType>(Ptr->getType())->getElementCount(); 55765ffd83dbSDimitry Andric 55775ffd83dbSDimitry Andric IRBuilder<> Builder(MemoryInst); 55785ffd83dbSDimitry Andric 5579fe6060f1SDimitry Andric Type *SourceTy = GEP->getSourceElementType(); 55805ffd83dbSDimitry Andric Type *ScalarIndexTy = DL->getIndexType(Ops[0]->getType()->getScalarType()); 55815ffd83dbSDimitry Andric 55825ffd83dbSDimitry Andric // If the final index isn't a vector, emit a scalar GEP containing all ops 55835ffd83dbSDimitry Andric // and a vector GEP with all zeroes final index. 55845ffd83dbSDimitry Andric if (!Ops[FinalIndex]->getType()->isVectorTy()) { 5585fe6060f1SDimitry Andric NewAddr = Builder.CreateGEP(SourceTy, Ops[0], 5586fe6060f1SDimitry Andric makeArrayRef(Ops).drop_front()); 5587e8d8bef9SDimitry Andric auto *IndexTy = VectorType::get(ScalarIndexTy, NumElts); 5588fe6060f1SDimitry Andric auto *SecondTy = GetElementPtrInst::getIndexedType( 5589fe6060f1SDimitry Andric SourceTy, makeArrayRef(Ops).drop_front()); 5590fe6060f1SDimitry Andric NewAddr = 5591fe6060f1SDimitry Andric Builder.CreateGEP(SecondTy, NewAddr, Constant::getNullValue(IndexTy)); 55925ffd83dbSDimitry Andric } else { 55935ffd83dbSDimitry Andric Value *Base = Ops[0]; 55945ffd83dbSDimitry Andric Value *Index = Ops[FinalIndex]; 55955ffd83dbSDimitry Andric 55965ffd83dbSDimitry Andric // Create a scalar GEP if there are more than 2 operands. 55975ffd83dbSDimitry Andric if (Ops.size() != 2) { 55985ffd83dbSDimitry Andric // Replace the last index with 0. 55995ffd83dbSDimitry Andric Ops[FinalIndex] = Constant::getNullValue(ScalarIndexTy); 5600fe6060f1SDimitry Andric Base = Builder.CreateGEP(SourceTy, Base, 5601fe6060f1SDimitry Andric makeArrayRef(Ops).drop_front()); 5602fe6060f1SDimitry Andric SourceTy = GetElementPtrInst::getIndexedType( 5603fe6060f1SDimitry Andric SourceTy, makeArrayRef(Ops).drop_front()); 56045ffd83dbSDimitry Andric } 56055ffd83dbSDimitry Andric 56065ffd83dbSDimitry Andric // Now create the GEP with scalar pointer and vector index. 5607fe6060f1SDimitry Andric NewAddr = Builder.CreateGEP(SourceTy, Base, Index); 56085ffd83dbSDimitry Andric } 5609e8d8bef9SDimitry Andric } else if (!isa<Constant>(Ptr)) { 5610e8d8bef9SDimitry Andric // Not a GEP, maybe its a splat and we can create a GEP to enable 5611e8d8bef9SDimitry Andric // SelectionDAGBuilder to use it as a uniform base. 5612e8d8bef9SDimitry Andric Value *V = getSplatValue(Ptr); 5613e8d8bef9SDimitry Andric if (!V) 5614e8d8bef9SDimitry Andric return false; 5615e8d8bef9SDimitry Andric 5616e8d8bef9SDimitry Andric auto NumElts = cast<VectorType>(Ptr->getType())->getElementCount(); 5617e8d8bef9SDimitry Andric 5618e8d8bef9SDimitry Andric IRBuilder<> Builder(MemoryInst); 5619e8d8bef9SDimitry Andric 5620e8d8bef9SDimitry Andric // Emit a vector GEP with a scalar pointer and all 0s vector index. 5621e8d8bef9SDimitry Andric Type *ScalarIndexTy = DL->getIndexType(V->getType()->getScalarType()); 5622e8d8bef9SDimitry Andric auto *IndexTy = VectorType::get(ScalarIndexTy, NumElts); 5623fe6060f1SDimitry Andric Type *ScalarTy; 5624fe6060f1SDimitry Andric if (cast<IntrinsicInst>(MemoryInst)->getIntrinsicID() == 5625fe6060f1SDimitry Andric Intrinsic::masked_gather) { 5626fe6060f1SDimitry Andric ScalarTy = MemoryInst->getType()->getScalarType(); 5627fe6060f1SDimitry Andric } else { 5628fe6060f1SDimitry Andric assert(cast<IntrinsicInst>(MemoryInst)->getIntrinsicID() == 5629fe6060f1SDimitry Andric Intrinsic::masked_scatter); 5630fe6060f1SDimitry Andric ScalarTy = MemoryInst->getOperand(0)->getType()->getScalarType(); 5631fe6060f1SDimitry Andric } 5632fe6060f1SDimitry Andric NewAddr = Builder.CreateGEP(ScalarTy, V, Constant::getNullValue(IndexTy)); 5633e8d8bef9SDimitry Andric } else { 5634e8d8bef9SDimitry Andric // Constant, SelectionDAGBuilder knows to check if its a splat. 5635e8d8bef9SDimitry Andric return false; 5636e8d8bef9SDimitry Andric } 56375ffd83dbSDimitry Andric 56385ffd83dbSDimitry Andric MemoryInst->replaceUsesOfWith(Ptr, NewAddr); 56395ffd83dbSDimitry Andric 56405ffd83dbSDimitry Andric // If we have no uses, recursively delete the value and all dead instructions 56415ffd83dbSDimitry Andric // using it. 56425ffd83dbSDimitry Andric if (Ptr->use_empty()) 5643e8d8bef9SDimitry Andric RecursivelyDeleteTriviallyDeadInstructions( 5644e8d8bef9SDimitry Andric Ptr, TLInfo, nullptr, 5645e8d8bef9SDimitry Andric [&](Value *V) { removeAllAssertingVHReferences(V); }); 56465ffd83dbSDimitry Andric 56475ffd83dbSDimitry Andric return true; 56485ffd83dbSDimitry Andric } 56495ffd83dbSDimitry Andric 56500b57cec5SDimitry Andric /// If there are any memory operands, use OptimizeMemoryInst to sink their 56510b57cec5SDimitry Andric /// address computing into the block when possible / profitable. 56520b57cec5SDimitry Andric bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) { 56530b57cec5SDimitry Andric bool MadeChange = false; 56540b57cec5SDimitry Andric 56550b57cec5SDimitry Andric const TargetRegisterInfo *TRI = 56560b57cec5SDimitry Andric TM->getSubtargetImpl(*CS->getFunction())->getRegisterInfo(); 56570b57cec5SDimitry Andric TargetLowering::AsmOperandInfoVector TargetConstraints = 56585ffd83dbSDimitry Andric TLI->ParseConstraints(*DL, TRI, *CS); 56590b57cec5SDimitry Andric unsigned ArgNo = 0; 56600eae32dcSDimitry Andric for (TargetLowering::AsmOperandInfo &OpInfo : TargetConstraints) { 56610b57cec5SDimitry Andric // Compute the constraint code and ConstraintType to use. 56620b57cec5SDimitry Andric TLI->ComputeConstraintToUse(OpInfo, SDValue()); 56630b57cec5SDimitry Andric 566481ad6265SDimitry Andric // TODO: Also handle C_Address? 56650b57cec5SDimitry Andric if (OpInfo.ConstraintType == TargetLowering::C_Memory && 56660b57cec5SDimitry Andric OpInfo.isIndirect) { 56670b57cec5SDimitry Andric Value *OpVal = CS->getArgOperand(ArgNo++); 56680b57cec5SDimitry Andric MadeChange |= optimizeMemoryInst(CS, OpVal, OpVal->getType(), ~0u); 56690b57cec5SDimitry Andric } else if (OpInfo.Type == InlineAsm::isInput) 56700b57cec5SDimitry Andric ArgNo++; 56710b57cec5SDimitry Andric } 56720b57cec5SDimitry Andric 56730b57cec5SDimitry Andric return MadeChange; 56740b57cec5SDimitry Andric } 56750b57cec5SDimitry Andric 56760b57cec5SDimitry Andric /// Check if all the uses of \p Val are equivalent (or free) zero or 56770b57cec5SDimitry Andric /// sign extensions. 56780b57cec5SDimitry Andric static bool hasSameExtUse(Value *Val, const TargetLowering &TLI) { 56790b57cec5SDimitry Andric assert(!Val->use_empty() && "Input must have at least one use"); 56800b57cec5SDimitry Andric const Instruction *FirstUser = cast<Instruction>(*Val->user_begin()); 56810b57cec5SDimitry Andric bool IsSExt = isa<SExtInst>(FirstUser); 56820b57cec5SDimitry Andric Type *ExtTy = FirstUser->getType(); 56830b57cec5SDimitry Andric for (const User *U : Val->users()) { 56840b57cec5SDimitry Andric const Instruction *UI = cast<Instruction>(U); 56850b57cec5SDimitry Andric if ((IsSExt && !isa<SExtInst>(UI)) || (!IsSExt && !isa<ZExtInst>(UI))) 56860b57cec5SDimitry Andric return false; 56870b57cec5SDimitry Andric Type *CurTy = UI->getType(); 56880b57cec5SDimitry Andric // Same input and output types: Same instruction after CSE. 56890b57cec5SDimitry Andric if (CurTy == ExtTy) 56900b57cec5SDimitry Andric continue; 56910b57cec5SDimitry Andric 56920b57cec5SDimitry Andric // If IsSExt is true, we are in this situation: 56930b57cec5SDimitry Andric // a = Val 56940b57cec5SDimitry Andric // b = sext ty1 a to ty2 56950b57cec5SDimitry Andric // c = sext ty1 a to ty3 56960b57cec5SDimitry Andric // Assuming ty2 is shorter than ty3, this could be turned into: 56970b57cec5SDimitry Andric // a = Val 56980b57cec5SDimitry Andric // b = sext ty1 a to ty2 56990b57cec5SDimitry Andric // c = sext ty2 b to ty3 57000b57cec5SDimitry Andric // However, the last sext is not free. 57010b57cec5SDimitry Andric if (IsSExt) 57020b57cec5SDimitry Andric return false; 57030b57cec5SDimitry Andric 57040b57cec5SDimitry Andric // This is a ZExt, maybe this is free to extend from one type to another. 57050b57cec5SDimitry Andric // In that case, we would not account for a different use. 57060b57cec5SDimitry Andric Type *NarrowTy; 57070b57cec5SDimitry Andric Type *LargeTy; 57080b57cec5SDimitry Andric if (ExtTy->getScalarType()->getIntegerBitWidth() > 57090b57cec5SDimitry Andric CurTy->getScalarType()->getIntegerBitWidth()) { 57100b57cec5SDimitry Andric NarrowTy = CurTy; 57110b57cec5SDimitry Andric LargeTy = ExtTy; 57120b57cec5SDimitry Andric } else { 57130b57cec5SDimitry Andric NarrowTy = ExtTy; 57140b57cec5SDimitry Andric LargeTy = CurTy; 57150b57cec5SDimitry Andric } 57160b57cec5SDimitry Andric 57170b57cec5SDimitry Andric if (!TLI.isZExtFree(NarrowTy, LargeTy)) 57180b57cec5SDimitry Andric return false; 57190b57cec5SDimitry Andric } 57200b57cec5SDimitry Andric // All uses are the same or can be derived from one another for free. 57210b57cec5SDimitry Andric return true; 57220b57cec5SDimitry Andric } 57230b57cec5SDimitry Andric 57240b57cec5SDimitry Andric /// Try to speculatively promote extensions in \p Exts and continue 57250b57cec5SDimitry Andric /// promoting through newly promoted operands recursively as far as doing so is 57260b57cec5SDimitry Andric /// profitable. Save extensions profitably moved up, in \p ProfitablyMovedExts. 57270b57cec5SDimitry Andric /// When some promotion happened, \p TPT contains the proper state to revert 57280b57cec5SDimitry Andric /// them. 57290b57cec5SDimitry Andric /// 57300b57cec5SDimitry Andric /// \return true if some promotion happened, false otherwise. 57310b57cec5SDimitry Andric bool CodeGenPrepare::tryToPromoteExts( 57320b57cec5SDimitry Andric TypePromotionTransaction &TPT, const SmallVectorImpl<Instruction *> &Exts, 57330b57cec5SDimitry Andric SmallVectorImpl<Instruction *> &ProfitablyMovedExts, 57340b57cec5SDimitry Andric unsigned CreatedInstsCost) { 57350b57cec5SDimitry Andric bool Promoted = false; 57360b57cec5SDimitry Andric 57370b57cec5SDimitry Andric // Iterate over all the extensions to try to promote them. 57385ffd83dbSDimitry Andric for (auto *I : Exts) { 57390b57cec5SDimitry Andric // Early check if we directly have ext(load). 57400b57cec5SDimitry Andric if (isa<LoadInst>(I->getOperand(0))) { 57410b57cec5SDimitry Andric ProfitablyMovedExts.push_back(I); 57420b57cec5SDimitry Andric continue; 57430b57cec5SDimitry Andric } 57440b57cec5SDimitry Andric 57450b57cec5SDimitry Andric // Check whether or not we want to do any promotion. The reason we have 57460b57cec5SDimitry Andric // this check inside the for loop is to catch the case where an extension 57470b57cec5SDimitry Andric // is directly fed by a load because in such case the extension can be moved 57480b57cec5SDimitry Andric // up without any promotion on its operands. 57495ffd83dbSDimitry Andric if (!TLI->enableExtLdPromotion() || DisableExtLdPromotion) 57500b57cec5SDimitry Andric return false; 57510b57cec5SDimitry Andric 57520b57cec5SDimitry Andric // Get the action to perform the promotion. 57530b57cec5SDimitry Andric TypePromotionHelper::Action TPH = 57540b57cec5SDimitry Andric TypePromotionHelper::getAction(I, InsertedInsts, *TLI, PromotedInsts); 57550b57cec5SDimitry Andric // Check if we can promote. 57560b57cec5SDimitry Andric if (!TPH) { 57570b57cec5SDimitry Andric // Save the current extension as we cannot move up through its operand. 57580b57cec5SDimitry Andric ProfitablyMovedExts.push_back(I); 57590b57cec5SDimitry Andric continue; 57600b57cec5SDimitry Andric } 57610b57cec5SDimitry Andric 57620b57cec5SDimitry Andric // Save the current state. 57630b57cec5SDimitry Andric TypePromotionTransaction::ConstRestorationPt LastKnownGood = 57640b57cec5SDimitry Andric TPT.getRestorationPoint(); 57650b57cec5SDimitry Andric SmallVector<Instruction *, 4> NewExts; 57660b57cec5SDimitry Andric unsigned NewCreatedInstsCost = 0; 57670b57cec5SDimitry Andric unsigned ExtCost = !TLI->isExtFree(I); 57680b57cec5SDimitry Andric // Promote. 57690b57cec5SDimitry Andric Value *PromotedVal = TPH(I, TPT, PromotedInsts, NewCreatedInstsCost, 57700b57cec5SDimitry Andric &NewExts, nullptr, *TLI); 57710b57cec5SDimitry Andric assert(PromotedVal && 57720b57cec5SDimitry Andric "TypePromotionHelper should have filtered out those cases"); 57730b57cec5SDimitry Andric 57740b57cec5SDimitry Andric // We would be able to merge only one extension in a load. 57750b57cec5SDimitry Andric // Therefore, if we have more than 1 new extension we heuristically 57760b57cec5SDimitry Andric // cut this search path, because it means we degrade the code quality. 57770b57cec5SDimitry Andric // With exactly 2, the transformation is neutral, because we will merge 57780b57cec5SDimitry Andric // one extension but leave one. However, we optimistically keep going, 57790b57cec5SDimitry Andric // because the new extension may be removed too. 57800b57cec5SDimitry Andric long long TotalCreatedInstsCost = CreatedInstsCost + NewCreatedInstsCost; 57810b57cec5SDimitry Andric // FIXME: It would be possible to propagate a negative value instead of 57820b57cec5SDimitry Andric // conservatively ceiling it to 0. 57830b57cec5SDimitry Andric TotalCreatedInstsCost = 57840b57cec5SDimitry Andric std::max((long long)0, (TotalCreatedInstsCost - ExtCost)); 57850b57cec5SDimitry Andric if (!StressExtLdPromotion && 57860b57cec5SDimitry Andric (TotalCreatedInstsCost > 1 || 57870b57cec5SDimitry Andric !isPromotedInstructionLegal(*TLI, *DL, PromotedVal))) { 57880b57cec5SDimitry Andric // This promotion is not profitable, rollback to the previous state, and 57890b57cec5SDimitry Andric // save the current extension in ProfitablyMovedExts as the latest 57900b57cec5SDimitry Andric // speculative promotion turned out to be unprofitable. 57910b57cec5SDimitry Andric TPT.rollback(LastKnownGood); 57920b57cec5SDimitry Andric ProfitablyMovedExts.push_back(I); 57930b57cec5SDimitry Andric continue; 57940b57cec5SDimitry Andric } 57950b57cec5SDimitry Andric // Continue promoting NewExts as far as doing so is profitable. 57960b57cec5SDimitry Andric SmallVector<Instruction *, 2> NewlyMovedExts; 57970b57cec5SDimitry Andric (void)tryToPromoteExts(TPT, NewExts, NewlyMovedExts, TotalCreatedInstsCost); 57980b57cec5SDimitry Andric bool NewPromoted = false; 57995ffd83dbSDimitry Andric for (auto *ExtInst : NewlyMovedExts) { 58000b57cec5SDimitry Andric Instruction *MovedExt = cast<Instruction>(ExtInst); 58010b57cec5SDimitry Andric Value *ExtOperand = MovedExt->getOperand(0); 58020b57cec5SDimitry Andric // If we have reached to a load, we need this extra profitability check 58030b57cec5SDimitry Andric // as it could potentially be merged into an ext(load). 58040b57cec5SDimitry Andric if (isa<LoadInst>(ExtOperand) && 58050b57cec5SDimitry Andric !(StressExtLdPromotion || NewCreatedInstsCost <= ExtCost || 58060b57cec5SDimitry Andric (ExtOperand->hasOneUse() || hasSameExtUse(ExtOperand, *TLI)))) 58070b57cec5SDimitry Andric continue; 58080b57cec5SDimitry Andric 58090b57cec5SDimitry Andric ProfitablyMovedExts.push_back(MovedExt); 58100b57cec5SDimitry Andric NewPromoted = true; 58110b57cec5SDimitry Andric } 58120b57cec5SDimitry Andric 58130b57cec5SDimitry Andric // If none of speculative promotions for NewExts is profitable, rollback 58140b57cec5SDimitry Andric // and save the current extension (I) as the last profitable extension. 58150b57cec5SDimitry Andric if (!NewPromoted) { 58160b57cec5SDimitry Andric TPT.rollback(LastKnownGood); 58170b57cec5SDimitry Andric ProfitablyMovedExts.push_back(I); 58180b57cec5SDimitry Andric continue; 58190b57cec5SDimitry Andric } 58200b57cec5SDimitry Andric // The promotion is profitable. 58210b57cec5SDimitry Andric Promoted = true; 58220b57cec5SDimitry Andric } 58230b57cec5SDimitry Andric return Promoted; 58240b57cec5SDimitry Andric } 58250b57cec5SDimitry Andric 58260b57cec5SDimitry Andric /// Merging redundant sexts when one is dominating the other. 58270b57cec5SDimitry Andric bool CodeGenPrepare::mergeSExts(Function &F) { 58280b57cec5SDimitry Andric bool Changed = false; 58290b57cec5SDimitry Andric for (auto &Entry : ValToSExtendedUses) { 58300b57cec5SDimitry Andric SExts &Insts = Entry.second; 58310b57cec5SDimitry Andric SExts CurPts; 58320b57cec5SDimitry Andric for (Instruction *Inst : Insts) { 58330b57cec5SDimitry Andric if (RemovedInsts.count(Inst) || !isa<SExtInst>(Inst) || 58340b57cec5SDimitry Andric Inst->getOperand(0) != Entry.first) 58350b57cec5SDimitry Andric continue; 58360b57cec5SDimitry Andric bool inserted = false; 58370b57cec5SDimitry Andric for (auto &Pt : CurPts) { 58380b57cec5SDimitry Andric if (getDT(F).dominates(Inst, Pt)) { 58390b57cec5SDimitry Andric Pt->replaceAllUsesWith(Inst); 58400b57cec5SDimitry Andric RemovedInsts.insert(Pt); 58410b57cec5SDimitry Andric Pt->removeFromParent(); 58420b57cec5SDimitry Andric Pt = Inst; 58430b57cec5SDimitry Andric inserted = true; 58440b57cec5SDimitry Andric Changed = true; 58450b57cec5SDimitry Andric break; 58460b57cec5SDimitry Andric } 58470b57cec5SDimitry Andric if (!getDT(F).dominates(Pt, Inst)) 58480b57cec5SDimitry Andric // Give up if we need to merge in a common dominator as the 58490b57cec5SDimitry Andric // experiments show it is not profitable. 58500b57cec5SDimitry Andric continue; 58510b57cec5SDimitry Andric Inst->replaceAllUsesWith(Pt); 58520b57cec5SDimitry Andric RemovedInsts.insert(Inst); 58530b57cec5SDimitry Andric Inst->removeFromParent(); 58540b57cec5SDimitry Andric inserted = true; 58550b57cec5SDimitry Andric Changed = true; 58560b57cec5SDimitry Andric break; 58570b57cec5SDimitry Andric } 58580b57cec5SDimitry Andric if (!inserted) 58590b57cec5SDimitry Andric CurPts.push_back(Inst); 58600b57cec5SDimitry Andric } 58610b57cec5SDimitry Andric } 58620b57cec5SDimitry Andric return Changed; 58630b57cec5SDimitry Andric } 58640b57cec5SDimitry Andric 58655ffd83dbSDimitry Andric // Splitting large data structures so that the GEPs accessing them can have 58660b57cec5SDimitry Andric // smaller offsets so that they can be sunk to the same blocks as their users. 58675ffd83dbSDimitry Andric // For example, a large struct starting from %base is split into two parts 58680b57cec5SDimitry Andric // where the second part starts from %new_base. 58690b57cec5SDimitry Andric // 58700b57cec5SDimitry Andric // Before: 58710b57cec5SDimitry Andric // BB0: 58720b57cec5SDimitry Andric // %base = 58730b57cec5SDimitry Andric // 58740b57cec5SDimitry Andric // BB1: 58750b57cec5SDimitry Andric // %gep0 = gep %base, off0 58760b57cec5SDimitry Andric // %gep1 = gep %base, off1 58770b57cec5SDimitry Andric // %gep2 = gep %base, off2 58780b57cec5SDimitry Andric // 58790b57cec5SDimitry Andric // BB2: 58800b57cec5SDimitry Andric // %load1 = load %gep0 58810b57cec5SDimitry Andric // %load2 = load %gep1 58820b57cec5SDimitry Andric // %load3 = load %gep2 58830b57cec5SDimitry Andric // 58840b57cec5SDimitry Andric // After: 58850b57cec5SDimitry Andric // BB0: 58860b57cec5SDimitry Andric // %base = 58870b57cec5SDimitry Andric // %new_base = gep %base, off0 58880b57cec5SDimitry Andric // 58890b57cec5SDimitry Andric // BB1: 58900b57cec5SDimitry Andric // %new_gep0 = %new_base 58910b57cec5SDimitry Andric // %new_gep1 = gep %new_base, off1 - off0 58920b57cec5SDimitry Andric // %new_gep2 = gep %new_base, off2 - off0 58930b57cec5SDimitry Andric // 58940b57cec5SDimitry Andric // BB2: 58950b57cec5SDimitry Andric // %load1 = load i32, i32* %new_gep0 58960b57cec5SDimitry Andric // %load2 = load i32, i32* %new_gep1 58970b57cec5SDimitry Andric // %load3 = load i32, i32* %new_gep2 58980b57cec5SDimitry Andric // 58990b57cec5SDimitry Andric // %new_gep1 and %new_gep2 can be sunk to BB2 now after the splitting because 59000b57cec5SDimitry Andric // their offsets are smaller enough to fit into the addressing mode. 59010b57cec5SDimitry Andric bool CodeGenPrepare::splitLargeGEPOffsets() { 59020b57cec5SDimitry Andric bool Changed = false; 59030b57cec5SDimitry Andric for (auto &Entry : LargeOffsetGEPMap) { 59040b57cec5SDimitry Andric Value *OldBase = Entry.first; 59050b57cec5SDimitry Andric SmallVectorImpl<std::pair<AssertingVH<GetElementPtrInst>, int64_t>> 59060b57cec5SDimitry Andric &LargeOffsetGEPs = Entry.second; 59070b57cec5SDimitry Andric auto compareGEPOffset = 59080b57cec5SDimitry Andric [&](const std::pair<GetElementPtrInst *, int64_t> &LHS, 59090b57cec5SDimitry Andric const std::pair<GetElementPtrInst *, int64_t> &RHS) { 59100b57cec5SDimitry Andric if (LHS.first == RHS.first) 59110b57cec5SDimitry Andric return false; 59120b57cec5SDimitry Andric if (LHS.second != RHS.second) 59130b57cec5SDimitry Andric return LHS.second < RHS.second; 59140b57cec5SDimitry Andric return LargeOffsetGEPID[LHS.first] < LargeOffsetGEPID[RHS.first]; 59150b57cec5SDimitry Andric }; 59160b57cec5SDimitry Andric // Sorting all the GEPs of the same data structures based on the offsets. 59170b57cec5SDimitry Andric llvm::sort(LargeOffsetGEPs, compareGEPOffset); 59180b57cec5SDimitry Andric LargeOffsetGEPs.erase( 59190b57cec5SDimitry Andric std::unique(LargeOffsetGEPs.begin(), LargeOffsetGEPs.end()), 59200b57cec5SDimitry Andric LargeOffsetGEPs.end()); 59210b57cec5SDimitry Andric // Skip if all the GEPs have the same offsets. 59220b57cec5SDimitry Andric if (LargeOffsetGEPs.front().second == LargeOffsetGEPs.back().second) 59230b57cec5SDimitry Andric continue; 59240b57cec5SDimitry Andric GetElementPtrInst *BaseGEP = LargeOffsetGEPs.begin()->first; 59250b57cec5SDimitry Andric int64_t BaseOffset = LargeOffsetGEPs.begin()->second; 59260b57cec5SDimitry Andric Value *NewBaseGEP = nullptr; 59270b57cec5SDimitry Andric 59285ffd83dbSDimitry Andric auto *LargeOffsetGEP = LargeOffsetGEPs.begin(); 59290b57cec5SDimitry Andric while (LargeOffsetGEP != LargeOffsetGEPs.end()) { 59300b57cec5SDimitry Andric GetElementPtrInst *GEP = LargeOffsetGEP->first; 59310b57cec5SDimitry Andric int64_t Offset = LargeOffsetGEP->second; 59320b57cec5SDimitry Andric if (Offset != BaseOffset) { 59330b57cec5SDimitry Andric TargetLowering::AddrMode AddrMode; 59340b57cec5SDimitry Andric AddrMode.BaseOffs = Offset - BaseOffset; 59350b57cec5SDimitry Andric // The result type of the GEP might not be the type of the memory 59360b57cec5SDimitry Andric // access. 59370b57cec5SDimitry Andric if (!TLI->isLegalAddressingMode(*DL, AddrMode, 59380b57cec5SDimitry Andric GEP->getResultElementType(), 59390b57cec5SDimitry Andric GEP->getAddressSpace())) { 59400b57cec5SDimitry Andric // We need to create a new base if the offset to the current base is 59410b57cec5SDimitry Andric // too large to fit into the addressing mode. So, a very large struct 59425ffd83dbSDimitry Andric // may be split into several parts. 59430b57cec5SDimitry Andric BaseGEP = GEP; 59440b57cec5SDimitry Andric BaseOffset = Offset; 59450b57cec5SDimitry Andric NewBaseGEP = nullptr; 59460b57cec5SDimitry Andric } 59470b57cec5SDimitry Andric } 59480b57cec5SDimitry Andric 59490b57cec5SDimitry Andric // Generate a new GEP to replace the current one. 59500b57cec5SDimitry Andric LLVMContext &Ctx = GEP->getContext(); 59510b57cec5SDimitry Andric Type *IntPtrTy = DL->getIntPtrType(GEP->getType()); 59520b57cec5SDimitry Andric Type *I8PtrTy = 59530b57cec5SDimitry Andric Type::getInt8PtrTy(Ctx, GEP->getType()->getPointerAddressSpace()); 59540b57cec5SDimitry Andric Type *I8Ty = Type::getInt8Ty(Ctx); 59550b57cec5SDimitry Andric 59560b57cec5SDimitry Andric if (!NewBaseGEP) { 59570b57cec5SDimitry Andric // Create a new base if we don't have one yet. Find the insertion 59580b57cec5SDimitry Andric // pointer for the new base first. 59590b57cec5SDimitry Andric BasicBlock::iterator NewBaseInsertPt; 59600b57cec5SDimitry Andric BasicBlock *NewBaseInsertBB; 59610b57cec5SDimitry Andric if (auto *BaseI = dyn_cast<Instruction>(OldBase)) { 59620b57cec5SDimitry Andric // If the base of the struct is an instruction, the new base will be 59630b57cec5SDimitry Andric // inserted close to it. 59640b57cec5SDimitry Andric NewBaseInsertBB = BaseI->getParent(); 59650b57cec5SDimitry Andric if (isa<PHINode>(BaseI)) 59660b57cec5SDimitry Andric NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt(); 59670b57cec5SDimitry Andric else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(BaseI)) { 59680b57cec5SDimitry Andric NewBaseInsertBB = 59690b57cec5SDimitry Andric SplitEdge(NewBaseInsertBB, Invoke->getNormalDest()); 59700b57cec5SDimitry Andric NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt(); 59710b57cec5SDimitry Andric } else 59720b57cec5SDimitry Andric NewBaseInsertPt = std::next(BaseI->getIterator()); 59730b57cec5SDimitry Andric } else { 59740b57cec5SDimitry Andric // If the current base is an argument or global value, the new base 59750b57cec5SDimitry Andric // will be inserted to the entry block. 59760b57cec5SDimitry Andric NewBaseInsertBB = &BaseGEP->getFunction()->getEntryBlock(); 59770b57cec5SDimitry Andric NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt(); 59780b57cec5SDimitry Andric } 59790b57cec5SDimitry Andric IRBuilder<> NewBaseBuilder(NewBaseInsertBB, NewBaseInsertPt); 59800b57cec5SDimitry Andric // Create a new base. 59810b57cec5SDimitry Andric Value *BaseIndex = ConstantInt::get(IntPtrTy, BaseOffset); 59820b57cec5SDimitry Andric NewBaseGEP = OldBase; 59830b57cec5SDimitry Andric if (NewBaseGEP->getType() != I8PtrTy) 59840b57cec5SDimitry Andric NewBaseGEP = NewBaseBuilder.CreatePointerCast(NewBaseGEP, I8PtrTy); 59850b57cec5SDimitry Andric NewBaseGEP = 59860b57cec5SDimitry Andric NewBaseBuilder.CreateGEP(I8Ty, NewBaseGEP, BaseIndex, "splitgep"); 59870b57cec5SDimitry Andric NewGEPBases.insert(NewBaseGEP); 59880b57cec5SDimitry Andric } 59890b57cec5SDimitry Andric 59900b57cec5SDimitry Andric IRBuilder<> Builder(GEP); 59910b57cec5SDimitry Andric Value *NewGEP = NewBaseGEP; 59920b57cec5SDimitry Andric if (Offset == BaseOffset) { 59930b57cec5SDimitry Andric if (GEP->getType() != I8PtrTy) 59940b57cec5SDimitry Andric NewGEP = Builder.CreatePointerCast(NewGEP, GEP->getType()); 59950b57cec5SDimitry Andric } else { 59960b57cec5SDimitry Andric // Calculate the new offset for the new GEP. 59970b57cec5SDimitry Andric Value *Index = ConstantInt::get(IntPtrTy, Offset - BaseOffset); 59980b57cec5SDimitry Andric NewGEP = Builder.CreateGEP(I8Ty, NewBaseGEP, Index); 59990b57cec5SDimitry Andric 60000b57cec5SDimitry Andric if (GEP->getType() != I8PtrTy) 60010b57cec5SDimitry Andric NewGEP = Builder.CreatePointerCast(NewGEP, GEP->getType()); 60020b57cec5SDimitry Andric } 60030b57cec5SDimitry Andric GEP->replaceAllUsesWith(NewGEP); 60040b57cec5SDimitry Andric LargeOffsetGEPID.erase(GEP); 60050b57cec5SDimitry Andric LargeOffsetGEP = LargeOffsetGEPs.erase(LargeOffsetGEP); 60060b57cec5SDimitry Andric GEP->eraseFromParent(); 60070b57cec5SDimitry Andric Changed = true; 60080b57cec5SDimitry Andric } 60090b57cec5SDimitry Andric } 60100b57cec5SDimitry Andric return Changed; 60110b57cec5SDimitry Andric } 60120b57cec5SDimitry Andric 60135ffd83dbSDimitry Andric bool CodeGenPrepare::optimizePhiType( 60145ffd83dbSDimitry Andric PHINode *I, SmallPtrSetImpl<PHINode *> &Visited, 60155ffd83dbSDimitry Andric SmallPtrSetImpl<Instruction *> &DeletedInstrs) { 60165ffd83dbSDimitry Andric // We are looking for a collection on interconnected phi nodes that together 60175ffd83dbSDimitry Andric // only use loads/bitcasts and are used by stores/bitcasts, and the bitcasts 60185ffd83dbSDimitry Andric // are of the same type. Convert the whole set of nodes to the type of the 60195ffd83dbSDimitry Andric // bitcast. 60205ffd83dbSDimitry Andric Type *PhiTy = I->getType(); 60215ffd83dbSDimitry Andric Type *ConvertTy = nullptr; 60225ffd83dbSDimitry Andric if (Visited.count(I) || 60235ffd83dbSDimitry Andric (!I->getType()->isIntegerTy() && !I->getType()->isFloatingPointTy())) 60245ffd83dbSDimitry Andric return false; 60255ffd83dbSDimitry Andric 60265ffd83dbSDimitry Andric SmallVector<Instruction *, 4> Worklist; 60275ffd83dbSDimitry Andric Worklist.push_back(cast<Instruction>(I)); 60285ffd83dbSDimitry Andric SmallPtrSet<PHINode *, 4> PhiNodes; 60295ffd83dbSDimitry Andric PhiNodes.insert(I); 60305ffd83dbSDimitry Andric Visited.insert(I); 60315ffd83dbSDimitry Andric SmallPtrSet<Instruction *, 4> Defs; 60325ffd83dbSDimitry Andric SmallPtrSet<Instruction *, 4> Uses; 6033e8d8bef9SDimitry Andric // This works by adding extra bitcasts between load/stores and removing 6034e8d8bef9SDimitry Andric // existing bicasts. If we have a phi(bitcast(load)) or a store(bitcast(phi)) 6035e8d8bef9SDimitry Andric // we can get in the situation where we remove a bitcast in one iteration 6036e8d8bef9SDimitry Andric // just to add it again in the next. We need to ensure that at least one 6037e8d8bef9SDimitry Andric // bitcast we remove are anchored to something that will not change back. 6038e8d8bef9SDimitry Andric bool AnyAnchored = false; 60395ffd83dbSDimitry Andric 60405ffd83dbSDimitry Andric while (!Worklist.empty()) { 60415ffd83dbSDimitry Andric Instruction *II = Worklist.pop_back_val(); 60425ffd83dbSDimitry Andric 60435ffd83dbSDimitry Andric if (auto *Phi = dyn_cast<PHINode>(II)) { 60445ffd83dbSDimitry Andric // Handle Defs, which might also be PHI's 60455ffd83dbSDimitry Andric for (Value *V : Phi->incoming_values()) { 60465ffd83dbSDimitry Andric if (auto *OpPhi = dyn_cast<PHINode>(V)) { 60475ffd83dbSDimitry Andric if (!PhiNodes.count(OpPhi)) { 604881ad6265SDimitry Andric if (!Visited.insert(OpPhi).second) 60495ffd83dbSDimitry Andric return false; 60505ffd83dbSDimitry Andric PhiNodes.insert(OpPhi); 60515ffd83dbSDimitry Andric Worklist.push_back(OpPhi); 60525ffd83dbSDimitry Andric } 60535ffd83dbSDimitry Andric } else if (auto *OpLoad = dyn_cast<LoadInst>(V)) { 6054e8d8bef9SDimitry Andric if (!OpLoad->isSimple()) 6055e8d8bef9SDimitry Andric return false; 605681ad6265SDimitry Andric if (Defs.insert(OpLoad).second) 60575ffd83dbSDimitry Andric Worklist.push_back(OpLoad); 60585ffd83dbSDimitry Andric } else if (auto *OpEx = dyn_cast<ExtractElementInst>(V)) { 605981ad6265SDimitry Andric if (Defs.insert(OpEx).second) 60605ffd83dbSDimitry Andric Worklist.push_back(OpEx); 60615ffd83dbSDimitry Andric } else if (auto *OpBC = dyn_cast<BitCastInst>(V)) { 60625ffd83dbSDimitry Andric if (!ConvertTy) 60635ffd83dbSDimitry Andric ConvertTy = OpBC->getOperand(0)->getType(); 60645ffd83dbSDimitry Andric if (OpBC->getOperand(0)->getType() != ConvertTy) 60655ffd83dbSDimitry Andric return false; 606681ad6265SDimitry Andric if (Defs.insert(OpBC).second) { 60675ffd83dbSDimitry Andric Worklist.push_back(OpBC); 6068e8d8bef9SDimitry Andric AnyAnchored |= !isa<LoadInst>(OpBC->getOperand(0)) && 6069e8d8bef9SDimitry Andric !isa<ExtractElementInst>(OpBC->getOperand(0)); 60705ffd83dbSDimitry Andric } 6071e8d8bef9SDimitry Andric } else if (!isa<UndefValue>(V)) { 60725ffd83dbSDimitry Andric return false; 60735ffd83dbSDimitry Andric } 60745ffd83dbSDimitry Andric } 6075e8d8bef9SDimitry Andric } 60765ffd83dbSDimitry Andric 60775ffd83dbSDimitry Andric // Handle uses which might also be phi's 60785ffd83dbSDimitry Andric for (User *V : II->users()) { 60795ffd83dbSDimitry Andric if (auto *OpPhi = dyn_cast<PHINode>(V)) { 60805ffd83dbSDimitry Andric if (!PhiNodes.count(OpPhi)) { 60815ffd83dbSDimitry Andric if (Visited.count(OpPhi)) 60825ffd83dbSDimitry Andric return false; 60835ffd83dbSDimitry Andric PhiNodes.insert(OpPhi); 60845ffd83dbSDimitry Andric Visited.insert(OpPhi); 60855ffd83dbSDimitry Andric Worklist.push_back(OpPhi); 60865ffd83dbSDimitry Andric } 60875ffd83dbSDimitry Andric } else if (auto *OpStore = dyn_cast<StoreInst>(V)) { 6088e8d8bef9SDimitry Andric if (!OpStore->isSimple() || OpStore->getOperand(0) != II) 60895ffd83dbSDimitry Andric return false; 60905ffd83dbSDimitry Andric Uses.insert(OpStore); 60915ffd83dbSDimitry Andric } else if (auto *OpBC = dyn_cast<BitCastInst>(V)) { 60925ffd83dbSDimitry Andric if (!ConvertTy) 60935ffd83dbSDimitry Andric ConvertTy = OpBC->getType(); 60945ffd83dbSDimitry Andric if (OpBC->getType() != ConvertTy) 60955ffd83dbSDimitry Andric return false; 60965ffd83dbSDimitry Andric Uses.insert(OpBC); 6097e8d8bef9SDimitry Andric AnyAnchored |= 6098e8d8bef9SDimitry Andric any_of(OpBC->users(), [](User *U) { return !isa<StoreInst>(U); }); 6099e8d8bef9SDimitry Andric } else { 61005ffd83dbSDimitry Andric return false; 61015ffd83dbSDimitry Andric } 61025ffd83dbSDimitry Andric } 6103e8d8bef9SDimitry Andric } 61045ffd83dbSDimitry Andric 6105e8d8bef9SDimitry Andric if (!ConvertTy || !AnyAnchored || !TLI->shouldConvertPhiType(PhiTy, ConvertTy)) 61065ffd83dbSDimitry Andric return false; 61075ffd83dbSDimitry Andric 61085ffd83dbSDimitry Andric LLVM_DEBUG(dbgs() << "Converting " << *I << "\n and connected nodes to " 61095ffd83dbSDimitry Andric << *ConvertTy << "\n"); 61105ffd83dbSDimitry Andric 61115ffd83dbSDimitry Andric // Create all the new phi nodes of the new type, and bitcast any loads to the 61125ffd83dbSDimitry Andric // correct type. 61135ffd83dbSDimitry Andric ValueToValueMap ValMap; 61145ffd83dbSDimitry Andric ValMap[UndefValue::get(PhiTy)] = UndefValue::get(ConvertTy); 61155ffd83dbSDimitry Andric for (Instruction *D : Defs) { 6116e8d8bef9SDimitry Andric if (isa<BitCastInst>(D)) { 61175ffd83dbSDimitry Andric ValMap[D] = D->getOperand(0); 6118e8d8bef9SDimitry Andric DeletedInstrs.insert(D); 6119e8d8bef9SDimitry Andric } else { 61205ffd83dbSDimitry Andric ValMap[D] = 61215ffd83dbSDimitry Andric new BitCastInst(D, ConvertTy, D->getName() + ".bc", D->getNextNode()); 61225ffd83dbSDimitry Andric } 6123e8d8bef9SDimitry Andric } 61245ffd83dbSDimitry Andric for (PHINode *Phi : PhiNodes) 61255ffd83dbSDimitry Andric ValMap[Phi] = PHINode::Create(ConvertTy, Phi->getNumIncomingValues(), 61265ffd83dbSDimitry Andric Phi->getName() + ".tc", Phi); 61275ffd83dbSDimitry Andric // Pipe together all the PhiNodes. 61285ffd83dbSDimitry Andric for (PHINode *Phi : PhiNodes) { 61295ffd83dbSDimitry Andric PHINode *NewPhi = cast<PHINode>(ValMap[Phi]); 61305ffd83dbSDimitry Andric for (int i = 0, e = Phi->getNumIncomingValues(); i < e; i++) 61315ffd83dbSDimitry Andric NewPhi->addIncoming(ValMap[Phi->getIncomingValue(i)], 61325ffd83dbSDimitry Andric Phi->getIncomingBlock(i)); 6133e8d8bef9SDimitry Andric Visited.insert(NewPhi); 61345ffd83dbSDimitry Andric } 61355ffd83dbSDimitry Andric // And finally pipe up the stores and bitcasts 61365ffd83dbSDimitry Andric for (Instruction *U : Uses) { 61375ffd83dbSDimitry Andric if (isa<BitCastInst>(U)) { 61385ffd83dbSDimitry Andric DeletedInstrs.insert(U); 61395ffd83dbSDimitry Andric U->replaceAllUsesWith(ValMap[U->getOperand(0)]); 6140e8d8bef9SDimitry Andric } else { 61415ffd83dbSDimitry Andric U->setOperand(0, 61425ffd83dbSDimitry Andric new BitCastInst(ValMap[U->getOperand(0)], PhiTy, "bc", U)); 61435ffd83dbSDimitry Andric } 6144e8d8bef9SDimitry Andric } 61455ffd83dbSDimitry Andric 61465ffd83dbSDimitry Andric // Save the removed phis to be deleted later. 61475ffd83dbSDimitry Andric for (PHINode *Phi : PhiNodes) 61485ffd83dbSDimitry Andric DeletedInstrs.insert(Phi); 61495ffd83dbSDimitry Andric return true; 61505ffd83dbSDimitry Andric } 61515ffd83dbSDimitry Andric 61525ffd83dbSDimitry Andric bool CodeGenPrepare::optimizePhiTypes(Function &F) { 61535ffd83dbSDimitry Andric if (!OptimizePhiTypes) 61545ffd83dbSDimitry Andric return false; 61555ffd83dbSDimitry Andric 61565ffd83dbSDimitry Andric bool Changed = false; 61575ffd83dbSDimitry Andric SmallPtrSet<PHINode *, 4> Visited; 61585ffd83dbSDimitry Andric SmallPtrSet<Instruction *, 4> DeletedInstrs; 61595ffd83dbSDimitry Andric 61605ffd83dbSDimitry Andric // Attempt to optimize all the phis in the functions to the correct type. 61615ffd83dbSDimitry Andric for (auto &BB : F) 61625ffd83dbSDimitry Andric for (auto &Phi : BB.phis()) 61635ffd83dbSDimitry Andric Changed |= optimizePhiType(&Phi, Visited, DeletedInstrs); 61645ffd83dbSDimitry Andric 61655ffd83dbSDimitry Andric // Remove any old phi's that have been converted. 61665ffd83dbSDimitry Andric for (auto *I : DeletedInstrs) { 616781ad6265SDimitry Andric I->replaceAllUsesWith(PoisonValue::get(I->getType())); 61685ffd83dbSDimitry Andric I->eraseFromParent(); 61695ffd83dbSDimitry Andric } 61705ffd83dbSDimitry Andric 61715ffd83dbSDimitry Andric return Changed; 61725ffd83dbSDimitry Andric } 61735ffd83dbSDimitry Andric 61740b57cec5SDimitry Andric /// Return true, if an ext(load) can be formed from an extension in 61750b57cec5SDimitry Andric /// \p MovedExts. 61760b57cec5SDimitry Andric bool CodeGenPrepare::canFormExtLd( 61770b57cec5SDimitry Andric const SmallVectorImpl<Instruction *> &MovedExts, LoadInst *&LI, 61780b57cec5SDimitry Andric Instruction *&Inst, bool HasPromoted) { 61790b57cec5SDimitry Andric for (auto *MovedExtInst : MovedExts) { 61800b57cec5SDimitry Andric if (isa<LoadInst>(MovedExtInst->getOperand(0))) { 61810b57cec5SDimitry Andric LI = cast<LoadInst>(MovedExtInst->getOperand(0)); 61820b57cec5SDimitry Andric Inst = MovedExtInst; 61830b57cec5SDimitry Andric break; 61840b57cec5SDimitry Andric } 61850b57cec5SDimitry Andric } 61860b57cec5SDimitry Andric if (!LI) 61870b57cec5SDimitry Andric return false; 61880b57cec5SDimitry Andric 61890b57cec5SDimitry Andric // If they're already in the same block, there's nothing to do. 61900b57cec5SDimitry Andric // Make the cheap checks first if we did not promote. 61910b57cec5SDimitry Andric // If we promoted, we need to check if it is indeed profitable. 61920b57cec5SDimitry Andric if (!HasPromoted && LI->getParent() == Inst->getParent()) 61930b57cec5SDimitry Andric return false; 61940b57cec5SDimitry Andric 61950b57cec5SDimitry Andric return TLI->isExtLoad(LI, Inst, *DL); 61960b57cec5SDimitry Andric } 61970b57cec5SDimitry Andric 61980b57cec5SDimitry Andric /// Move a zext or sext fed by a load into the same basic block as the load, 61990b57cec5SDimitry Andric /// unless conditions are unfavorable. This allows SelectionDAG to fold the 62000b57cec5SDimitry Andric /// extend into the load. 62010b57cec5SDimitry Andric /// 62020b57cec5SDimitry Andric /// E.g., 62030b57cec5SDimitry Andric /// \code 62040b57cec5SDimitry Andric /// %ld = load i32* %addr 62050b57cec5SDimitry Andric /// %add = add nuw i32 %ld, 4 62060b57cec5SDimitry Andric /// %zext = zext i32 %add to i64 62070b57cec5SDimitry Andric // \endcode 62080b57cec5SDimitry Andric /// => 62090b57cec5SDimitry Andric /// \code 62100b57cec5SDimitry Andric /// %ld = load i32* %addr 62110b57cec5SDimitry Andric /// %zext = zext i32 %ld to i64 62120b57cec5SDimitry Andric /// %add = add nuw i64 %zext, 4 62130b57cec5SDimitry Andric /// \encode 62140b57cec5SDimitry Andric /// Note that the promotion in %add to i64 is done in tryToPromoteExts(), which 62150b57cec5SDimitry Andric /// allow us to match zext(load i32*) to i64. 62160b57cec5SDimitry Andric /// 62170b57cec5SDimitry Andric /// Also, try to promote the computations used to obtain a sign extended 62180b57cec5SDimitry Andric /// value used into memory accesses. 62190b57cec5SDimitry Andric /// E.g., 62200b57cec5SDimitry Andric /// \code 62210b57cec5SDimitry Andric /// a = add nsw i32 b, 3 62220b57cec5SDimitry Andric /// d = sext i32 a to i64 62230b57cec5SDimitry Andric /// e = getelementptr ..., i64 d 62240b57cec5SDimitry Andric /// \endcode 62250b57cec5SDimitry Andric /// => 62260b57cec5SDimitry Andric /// \code 62270b57cec5SDimitry Andric /// f = sext i32 b to i64 62280b57cec5SDimitry Andric /// a = add nsw i64 f, 3 62290b57cec5SDimitry Andric /// e = getelementptr ..., i64 a 62300b57cec5SDimitry Andric /// \endcode 62310b57cec5SDimitry Andric /// 62320b57cec5SDimitry Andric /// \p Inst[in/out] the extension may be modified during the process if some 62330b57cec5SDimitry Andric /// promotions apply. 62340b57cec5SDimitry Andric bool CodeGenPrepare::optimizeExt(Instruction *&Inst) { 62350b57cec5SDimitry Andric bool AllowPromotionWithoutCommonHeader = false; 62360b57cec5SDimitry Andric /// See if it is an interesting sext operations for the address type 62370b57cec5SDimitry Andric /// promotion before trying to promote it, e.g., the ones with the right 62380b57cec5SDimitry Andric /// type and used in memory accesses. 62390b57cec5SDimitry Andric bool ATPConsiderable = TTI->shouldConsiderAddressTypePromotion( 62400b57cec5SDimitry Andric *Inst, AllowPromotionWithoutCommonHeader); 62410b57cec5SDimitry Andric TypePromotionTransaction TPT(RemovedInsts); 62420b57cec5SDimitry Andric TypePromotionTransaction::ConstRestorationPt LastKnownGood = 62430b57cec5SDimitry Andric TPT.getRestorationPoint(); 62440b57cec5SDimitry Andric SmallVector<Instruction *, 1> Exts; 62450b57cec5SDimitry Andric SmallVector<Instruction *, 2> SpeculativelyMovedExts; 62460b57cec5SDimitry Andric Exts.push_back(Inst); 62470b57cec5SDimitry Andric 62480b57cec5SDimitry Andric bool HasPromoted = tryToPromoteExts(TPT, Exts, SpeculativelyMovedExts); 62490b57cec5SDimitry Andric 62500b57cec5SDimitry Andric // Look for a load being extended. 62510b57cec5SDimitry Andric LoadInst *LI = nullptr; 62520b57cec5SDimitry Andric Instruction *ExtFedByLoad; 62530b57cec5SDimitry Andric 62540b57cec5SDimitry Andric // Try to promote a chain of computation if it allows to form an extended 62550b57cec5SDimitry Andric // load. 62560b57cec5SDimitry Andric if (canFormExtLd(SpeculativelyMovedExts, LI, ExtFedByLoad, HasPromoted)) { 62570b57cec5SDimitry Andric assert(LI && ExtFedByLoad && "Expect a valid load and extension"); 62580b57cec5SDimitry Andric TPT.commit(); 62595ffd83dbSDimitry Andric // Move the extend into the same block as the load. 62600b57cec5SDimitry Andric ExtFedByLoad->moveAfter(LI); 62610b57cec5SDimitry Andric ++NumExtsMoved; 62620b57cec5SDimitry Andric Inst = ExtFedByLoad; 62630b57cec5SDimitry Andric return true; 62640b57cec5SDimitry Andric } 62650b57cec5SDimitry Andric 62660b57cec5SDimitry Andric // Continue promoting SExts if known as considerable depending on targets. 62670b57cec5SDimitry Andric if (ATPConsiderable && 62680b57cec5SDimitry Andric performAddressTypePromotion(Inst, AllowPromotionWithoutCommonHeader, 62690b57cec5SDimitry Andric HasPromoted, TPT, SpeculativelyMovedExts)) 62700b57cec5SDimitry Andric return true; 62710b57cec5SDimitry Andric 62720b57cec5SDimitry Andric TPT.rollback(LastKnownGood); 62730b57cec5SDimitry Andric return false; 62740b57cec5SDimitry Andric } 62750b57cec5SDimitry Andric 62760b57cec5SDimitry Andric // Perform address type promotion if doing so is profitable. 62770b57cec5SDimitry Andric // If AllowPromotionWithoutCommonHeader == false, we should find other sext 62780b57cec5SDimitry Andric // instructions that sign extended the same initial value. However, if 62790b57cec5SDimitry Andric // AllowPromotionWithoutCommonHeader == true, we expect promoting the 62800b57cec5SDimitry Andric // extension is just profitable. 62810b57cec5SDimitry Andric bool CodeGenPrepare::performAddressTypePromotion( 62820b57cec5SDimitry Andric Instruction *&Inst, bool AllowPromotionWithoutCommonHeader, 62830b57cec5SDimitry Andric bool HasPromoted, TypePromotionTransaction &TPT, 62840b57cec5SDimitry Andric SmallVectorImpl<Instruction *> &SpeculativelyMovedExts) { 62850b57cec5SDimitry Andric bool Promoted = false; 62860b57cec5SDimitry Andric SmallPtrSet<Instruction *, 1> UnhandledExts; 62870b57cec5SDimitry Andric bool AllSeenFirst = true; 62885ffd83dbSDimitry Andric for (auto *I : SpeculativelyMovedExts) { 62890b57cec5SDimitry Andric Value *HeadOfChain = I->getOperand(0); 62900b57cec5SDimitry Andric DenseMap<Value *, Instruction *>::iterator AlreadySeen = 62910b57cec5SDimitry Andric SeenChainsForSExt.find(HeadOfChain); 62920b57cec5SDimitry Andric // If there is an unhandled SExt which has the same header, try to promote 62930b57cec5SDimitry Andric // it as well. 62940b57cec5SDimitry Andric if (AlreadySeen != SeenChainsForSExt.end()) { 62950b57cec5SDimitry Andric if (AlreadySeen->second != nullptr) 62960b57cec5SDimitry Andric UnhandledExts.insert(AlreadySeen->second); 62970b57cec5SDimitry Andric AllSeenFirst = false; 62980b57cec5SDimitry Andric } 62990b57cec5SDimitry Andric } 63000b57cec5SDimitry Andric 63010b57cec5SDimitry Andric if (!AllSeenFirst || (AllowPromotionWithoutCommonHeader && 63020b57cec5SDimitry Andric SpeculativelyMovedExts.size() == 1)) { 63030b57cec5SDimitry Andric TPT.commit(); 63040b57cec5SDimitry Andric if (HasPromoted) 63050b57cec5SDimitry Andric Promoted = true; 63065ffd83dbSDimitry Andric for (auto *I : SpeculativelyMovedExts) { 63070b57cec5SDimitry Andric Value *HeadOfChain = I->getOperand(0); 63080b57cec5SDimitry Andric SeenChainsForSExt[HeadOfChain] = nullptr; 63090b57cec5SDimitry Andric ValToSExtendedUses[HeadOfChain].push_back(I); 63100b57cec5SDimitry Andric } 63110b57cec5SDimitry Andric // Update Inst as promotion happen. 63120b57cec5SDimitry Andric Inst = SpeculativelyMovedExts.pop_back_val(); 63130b57cec5SDimitry Andric } else { 63140b57cec5SDimitry Andric // This is the first chain visited from the header, keep the current chain 63150b57cec5SDimitry Andric // as unhandled. Defer to promote this until we encounter another SExt 63160b57cec5SDimitry Andric // chain derived from the same header. 63175ffd83dbSDimitry Andric for (auto *I : SpeculativelyMovedExts) { 63180b57cec5SDimitry Andric Value *HeadOfChain = I->getOperand(0); 63190b57cec5SDimitry Andric SeenChainsForSExt[HeadOfChain] = Inst; 63200b57cec5SDimitry Andric } 63210b57cec5SDimitry Andric return false; 63220b57cec5SDimitry Andric } 63230b57cec5SDimitry Andric 63240b57cec5SDimitry Andric if (!AllSeenFirst && !UnhandledExts.empty()) 63255ffd83dbSDimitry Andric for (auto *VisitedSExt : UnhandledExts) { 63260b57cec5SDimitry Andric if (RemovedInsts.count(VisitedSExt)) 63270b57cec5SDimitry Andric continue; 63280b57cec5SDimitry Andric TypePromotionTransaction TPT(RemovedInsts); 63290b57cec5SDimitry Andric SmallVector<Instruction *, 1> Exts; 63300b57cec5SDimitry Andric SmallVector<Instruction *, 2> Chains; 63310b57cec5SDimitry Andric Exts.push_back(VisitedSExt); 63320b57cec5SDimitry Andric bool HasPromoted = tryToPromoteExts(TPT, Exts, Chains); 63330b57cec5SDimitry Andric TPT.commit(); 63340b57cec5SDimitry Andric if (HasPromoted) 63350b57cec5SDimitry Andric Promoted = true; 63365ffd83dbSDimitry Andric for (auto *I : Chains) { 63370b57cec5SDimitry Andric Value *HeadOfChain = I->getOperand(0); 63380b57cec5SDimitry Andric // Mark this as handled. 63390b57cec5SDimitry Andric SeenChainsForSExt[HeadOfChain] = nullptr; 63400b57cec5SDimitry Andric ValToSExtendedUses[HeadOfChain].push_back(I); 63410b57cec5SDimitry Andric } 63420b57cec5SDimitry Andric } 63430b57cec5SDimitry Andric return Promoted; 63440b57cec5SDimitry Andric } 63450b57cec5SDimitry Andric 63460b57cec5SDimitry Andric bool CodeGenPrepare::optimizeExtUses(Instruction *I) { 63470b57cec5SDimitry Andric BasicBlock *DefBB = I->getParent(); 63480b57cec5SDimitry Andric 63490b57cec5SDimitry Andric // If the result of a {s|z}ext and its source are both live out, rewrite all 63500b57cec5SDimitry Andric // other uses of the source with result of extension. 63510b57cec5SDimitry Andric Value *Src = I->getOperand(0); 63520b57cec5SDimitry Andric if (Src->hasOneUse()) 63530b57cec5SDimitry Andric return false; 63540b57cec5SDimitry Andric 63550b57cec5SDimitry Andric // Only do this xform if truncating is free. 63565ffd83dbSDimitry Andric if (!TLI->isTruncateFree(I->getType(), Src->getType())) 63570b57cec5SDimitry Andric return false; 63580b57cec5SDimitry Andric 63590b57cec5SDimitry Andric // Only safe to perform the optimization if the source is also defined in 63600b57cec5SDimitry Andric // this block. 63610b57cec5SDimitry Andric if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent()) 63620b57cec5SDimitry Andric return false; 63630b57cec5SDimitry Andric 63640b57cec5SDimitry Andric bool DefIsLiveOut = false; 63650b57cec5SDimitry Andric for (User *U : I->users()) { 63660b57cec5SDimitry Andric Instruction *UI = cast<Instruction>(U); 63670b57cec5SDimitry Andric 63680b57cec5SDimitry Andric // Figure out which BB this ext is used in. 63690b57cec5SDimitry Andric BasicBlock *UserBB = UI->getParent(); 63700b57cec5SDimitry Andric if (UserBB == DefBB) continue; 63710b57cec5SDimitry Andric DefIsLiveOut = true; 63720b57cec5SDimitry Andric break; 63730b57cec5SDimitry Andric } 63740b57cec5SDimitry Andric if (!DefIsLiveOut) 63750b57cec5SDimitry Andric return false; 63760b57cec5SDimitry Andric 63770b57cec5SDimitry Andric // Make sure none of the uses are PHI nodes. 63780b57cec5SDimitry Andric for (User *U : Src->users()) { 63790b57cec5SDimitry Andric Instruction *UI = cast<Instruction>(U); 63800b57cec5SDimitry Andric BasicBlock *UserBB = UI->getParent(); 63810b57cec5SDimitry Andric if (UserBB == DefBB) continue; 63820b57cec5SDimitry Andric // Be conservative. We don't want this xform to end up introducing 63830b57cec5SDimitry Andric // reloads just before load / store instructions. 63840b57cec5SDimitry Andric if (isa<PHINode>(UI) || isa<LoadInst>(UI) || isa<StoreInst>(UI)) 63850b57cec5SDimitry Andric return false; 63860b57cec5SDimitry Andric } 63870b57cec5SDimitry Andric 63880b57cec5SDimitry Andric // InsertedTruncs - Only insert one trunc in each block once. 63890b57cec5SDimitry Andric DenseMap<BasicBlock*, Instruction*> InsertedTruncs; 63900b57cec5SDimitry Andric 63910b57cec5SDimitry Andric bool MadeChange = false; 63920b57cec5SDimitry Andric for (Use &U : Src->uses()) { 63930b57cec5SDimitry Andric Instruction *User = cast<Instruction>(U.getUser()); 63940b57cec5SDimitry Andric 63950b57cec5SDimitry Andric // Figure out which BB this ext is used in. 63960b57cec5SDimitry Andric BasicBlock *UserBB = User->getParent(); 63970b57cec5SDimitry Andric if (UserBB == DefBB) continue; 63980b57cec5SDimitry Andric 63990b57cec5SDimitry Andric // Both src and def are live in this block. Rewrite the use. 64000b57cec5SDimitry Andric Instruction *&InsertedTrunc = InsertedTruncs[UserBB]; 64010b57cec5SDimitry Andric 64020b57cec5SDimitry Andric if (!InsertedTrunc) { 64030b57cec5SDimitry Andric BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); 64040b57cec5SDimitry Andric assert(InsertPt != UserBB->end()); 64050b57cec5SDimitry Andric InsertedTrunc = new TruncInst(I, Src->getType(), "", &*InsertPt); 64060b57cec5SDimitry Andric InsertedInsts.insert(InsertedTrunc); 64070b57cec5SDimitry Andric } 64080b57cec5SDimitry Andric 64090b57cec5SDimitry Andric // Replace a use of the {s|z}ext source with a use of the result. 64100b57cec5SDimitry Andric U = InsertedTrunc; 64110b57cec5SDimitry Andric ++NumExtUses; 64120b57cec5SDimitry Andric MadeChange = true; 64130b57cec5SDimitry Andric } 64140b57cec5SDimitry Andric 64150b57cec5SDimitry Andric return MadeChange; 64160b57cec5SDimitry Andric } 64170b57cec5SDimitry Andric 64180b57cec5SDimitry Andric // Find loads whose uses only use some of the loaded value's bits. Add an "and" 64190b57cec5SDimitry Andric // just after the load if the target can fold this into one extload instruction, 64200b57cec5SDimitry Andric // with the hope of eliminating some of the other later "and" instructions using 64210b57cec5SDimitry Andric // the loaded value. "and"s that are made trivially redundant by the insertion 64220b57cec5SDimitry Andric // of the new "and" are removed by this function, while others (e.g. those whose 64230b57cec5SDimitry Andric // path from the load goes through a phi) are left for isel to potentially 64240b57cec5SDimitry Andric // remove. 64250b57cec5SDimitry Andric // 64260b57cec5SDimitry Andric // For example: 64270b57cec5SDimitry Andric // 64280b57cec5SDimitry Andric // b0: 64290b57cec5SDimitry Andric // x = load i32 64300b57cec5SDimitry Andric // ... 64310b57cec5SDimitry Andric // b1: 64320b57cec5SDimitry Andric // y = and x, 0xff 64330b57cec5SDimitry Andric // z = use y 64340b57cec5SDimitry Andric // 64350b57cec5SDimitry Andric // becomes: 64360b57cec5SDimitry Andric // 64370b57cec5SDimitry Andric // b0: 64380b57cec5SDimitry Andric // x = load i32 64390b57cec5SDimitry Andric // x' = and x, 0xff 64400b57cec5SDimitry Andric // ... 64410b57cec5SDimitry Andric // b1: 64420b57cec5SDimitry Andric // z = use x' 64430b57cec5SDimitry Andric // 64440b57cec5SDimitry Andric // whereas: 64450b57cec5SDimitry Andric // 64460b57cec5SDimitry Andric // b0: 64470b57cec5SDimitry Andric // x1 = load i32 64480b57cec5SDimitry Andric // ... 64490b57cec5SDimitry Andric // b1: 64500b57cec5SDimitry Andric // x2 = load i32 64510b57cec5SDimitry Andric // ... 64520b57cec5SDimitry Andric // b2: 64530b57cec5SDimitry Andric // x = phi x1, x2 64540b57cec5SDimitry Andric // y = and x, 0xff 64550b57cec5SDimitry Andric // 64560b57cec5SDimitry Andric // becomes (after a call to optimizeLoadExt for each load): 64570b57cec5SDimitry Andric // 64580b57cec5SDimitry Andric // b0: 64590b57cec5SDimitry Andric // x1 = load i32 64600b57cec5SDimitry Andric // x1' = and x1, 0xff 64610b57cec5SDimitry Andric // ... 64620b57cec5SDimitry Andric // b1: 64630b57cec5SDimitry Andric // x2 = load i32 64640b57cec5SDimitry Andric // x2' = and x2, 0xff 64650b57cec5SDimitry Andric // ... 64660b57cec5SDimitry Andric // b2: 64670b57cec5SDimitry Andric // x = phi x1', x2' 64680b57cec5SDimitry Andric // y = and x, 0xff 64690b57cec5SDimitry Andric bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) { 64700b57cec5SDimitry Andric if (!Load->isSimple() || !Load->getType()->isIntOrPtrTy()) 64710b57cec5SDimitry Andric return false; 64720b57cec5SDimitry Andric 64730b57cec5SDimitry Andric // Skip loads we've already transformed. 64740b57cec5SDimitry Andric if (Load->hasOneUse() && 64750b57cec5SDimitry Andric InsertedInsts.count(cast<Instruction>(*Load->user_begin()))) 64760b57cec5SDimitry Andric return false; 64770b57cec5SDimitry Andric 64780b57cec5SDimitry Andric // Look at all uses of Load, looking through phis, to determine how many bits 64790b57cec5SDimitry Andric // of the loaded value are needed. 64800b57cec5SDimitry Andric SmallVector<Instruction *, 8> WorkList; 64810b57cec5SDimitry Andric SmallPtrSet<Instruction *, 16> Visited; 64820b57cec5SDimitry Andric SmallVector<Instruction *, 8> AndsToMaybeRemove; 64830b57cec5SDimitry Andric for (auto *U : Load->users()) 64840b57cec5SDimitry Andric WorkList.push_back(cast<Instruction>(U)); 64850b57cec5SDimitry Andric 64860b57cec5SDimitry Andric EVT LoadResultVT = TLI->getValueType(*DL, Load->getType()); 64870b57cec5SDimitry Andric unsigned BitWidth = LoadResultVT.getSizeInBits(); 6488fe6060f1SDimitry Andric // If the BitWidth is 0, do not try to optimize the type 6489fe6060f1SDimitry Andric if (BitWidth == 0) 6490fe6060f1SDimitry Andric return false; 6491fe6060f1SDimitry Andric 64920b57cec5SDimitry Andric APInt DemandBits(BitWidth, 0); 64930b57cec5SDimitry Andric APInt WidestAndBits(BitWidth, 0); 64940b57cec5SDimitry Andric 64950b57cec5SDimitry Andric while (!WorkList.empty()) { 6496349cc55cSDimitry Andric Instruction *I = WorkList.pop_back_val(); 64970b57cec5SDimitry Andric 64980b57cec5SDimitry Andric // Break use-def graph loops. 64990b57cec5SDimitry Andric if (!Visited.insert(I).second) 65000b57cec5SDimitry Andric continue; 65010b57cec5SDimitry Andric 65020b57cec5SDimitry Andric // For a PHI node, push all of its users. 65030b57cec5SDimitry Andric if (auto *Phi = dyn_cast<PHINode>(I)) { 65040b57cec5SDimitry Andric for (auto *U : Phi->users()) 65050b57cec5SDimitry Andric WorkList.push_back(cast<Instruction>(U)); 65060b57cec5SDimitry Andric continue; 65070b57cec5SDimitry Andric } 65080b57cec5SDimitry Andric 65090b57cec5SDimitry Andric switch (I->getOpcode()) { 65100b57cec5SDimitry Andric case Instruction::And: { 65110b57cec5SDimitry Andric auto *AndC = dyn_cast<ConstantInt>(I->getOperand(1)); 65120b57cec5SDimitry Andric if (!AndC) 65130b57cec5SDimitry Andric return false; 65140b57cec5SDimitry Andric APInt AndBits = AndC->getValue(); 65150b57cec5SDimitry Andric DemandBits |= AndBits; 65160b57cec5SDimitry Andric // Keep track of the widest and mask we see. 65170b57cec5SDimitry Andric if (AndBits.ugt(WidestAndBits)) 65180b57cec5SDimitry Andric WidestAndBits = AndBits; 65190b57cec5SDimitry Andric if (AndBits == WidestAndBits && I->getOperand(0) == Load) 65200b57cec5SDimitry Andric AndsToMaybeRemove.push_back(I); 65210b57cec5SDimitry Andric break; 65220b57cec5SDimitry Andric } 65230b57cec5SDimitry Andric 65240b57cec5SDimitry Andric case Instruction::Shl: { 65250b57cec5SDimitry Andric auto *ShlC = dyn_cast<ConstantInt>(I->getOperand(1)); 65260b57cec5SDimitry Andric if (!ShlC) 65270b57cec5SDimitry Andric return false; 65280b57cec5SDimitry Andric uint64_t ShiftAmt = ShlC->getLimitedValue(BitWidth - 1); 65290b57cec5SDimitry Andric DemandBits.setLowBits(BitWidth - ShiftAmt); 65300b57cec5SDimitry Andric break; 65310b57cec5SDimitry Andric } 65320b57cec5SDimitry Andric 65330b57cec5SDimitry Andric case Instruction::Trunc: { 65340b57cec5SDimitry Andric EVT TruncVT = TLI->getValueType(*DL, I->getType()); 65350b57cec5SDimitry Andric unsigned TruncBitWidth = TruncVT.getSizeInBits(); 65360b57cec5SDimitry Andric DemandBits.setLowBits(TruncBitWidth); 65370b57cec5SDimitry Andric break; 65380b57cec5SDimitry Andric } 65390b57cec5SDimitry Andric 65400b57cec5SDimitry Andric default: 65410b57cec5SDimitry Andric return false; 65420b57cec5SDimitry Andric } 65430b57cec5SDimitry Andric } 65440b57cec5SDimitry Andric 65450b57cec5SDimitry Andric uint32_t ActiveBits = DemandBits.getActiveBits(); 65460b57cec5SDimitry Andric // Avoid hoisting (and (load x) 1) since it is unlikely to be folded by the 65470b57cec5SDimitry Andric // target even if isLoadExtLegal says an i1 EXTLOAD is valid. For example, 65480b57cec5SDimitry Andric // for the AArch64 target isLoadExtLegal(ZEXTLOAD, i32, i1) returns true, but 65490b57cec5SDimitry Andric // (and (load x) 1) is not matched as a single instruction, rather as a LDR 65500b57cec5SDimitry Andric // followed by an AND. 65510b57cec5SDimitry Andric // TODO: Look into removing this restriction by fixing backends to either 65520b57cec5SDimitry Andric // return false for isLoadExtLegal for i1 or have them select this pattern to 65530b57cec5SDimitry Andric // a single instruction. 65540b57cec5SDimitry Andric // 65550b57cec5SDimitry Andric // Also avoid hoisting if we didn't see any ands with the exact DemandBits 65560b57cec5SDimitry Andric // mask, since these are the only ands that will be removed by isel. 65570b57cec5SDimitry Andric if (ActiveBits <= 1 || !DemandBits.isMask(ActiveBits) || 65580b57cec5SDimitry Andric WidestAndBits != DemandBits) 65590b57cec5SDimitry Andric return false; 65600b57cec5SDimitry Andric 65610b57cec5SDimitry Andric LLVMContext &Ctx = Load->getType()->getContext(); 65620b57cec5SDimitry Andric Type *TruncTy = Type::getIntNTy(Ctx, ActiveBits); 65630b57cec5SDimitry Andric EVT TruncVT = TLI->getValueType(*DL, TruncTy); 65640b57cec5SDimitry Andric 65650b57cec5SDimitry Andric // Reject cases that won't be matched as extloads. 65660b57cec5SDimitry Andric if (!LoadResultVT.bitsGT(TruncVT) || !TruncVT.isRound() || 65670b57cec5SDimitry Andric !TLI->isLoadExtLegal(ISD::ZEXTLOAD, LoadResultVT, TruncVT)) 65680b57cec5SDimitry Andric return false; 65690b57cec5SDimitry Andric 65700b57cec5SDimitry Andric IRBuilder<> Builder(Load->getNextNode()); 65718bcb0991SDimitry Andric auto *NewAnd = cast<Instruction>( 65720b57cec5SDimitry Andric Builder.CreateAnd(Load, ConstantInt::get(Ctx, DemandBits))); 65730b57cec5SDimitry Andric // Mark this instruction as "inserted by CGP", so that other 65740b57cec5SDimitry Andric // optimizations don't touch it. 65750b57cec5SDimitry Andric InsertedInsts.insert(NewAnd); 65760b57cec5SDimitry Andric 65770b57cec5SDimitry Andric // Replace all uses of load with new and (except for the use of load in the 65780b57cec5SDimitry Andric // new and itself). 65790b57cec5SDimitry Andric Load->replaceAllUsesWith(NewAnd); 65800b57cec5SDimitry Andric NewAnd->setOperand(0, Load); 65810b57cec5SDimitry Andric 65820b57cec5SDimitry Andric // Remove any and instructions that are now redundant. 65830b57cec5SDimitry Andric for (auto *And : AndsToMaybeRemove) 65840b57cec5SDimitry Andric // Check that the and mask is the same as the one we decided to put on the 65850b57cec5SDimitry Andric // new and. 65860b57cec5SDimitry Andric if (cast<ConstantInt>(And->getOperand(1))->getValue() == DemandBits) { 65870b57cec5SDimitry Andric And->replaceAllUsesWith(NewAnd); 65880b57cec5SDimitry Andric if (&*CurInstIterator == And) 65890b57cec5SDimitry Andric CurInstIterator = std::next(And->getIterator()); 65900b57cec5SDimitry Andric And->eraseFromParent(); 65910b57cec5SDimitry Andric ++NumAndUses; 65920b57cec5SDimitry Andric } 65930b57cec5SDimitry Andric 65940b57cec5SDimitry Andric ++NumAndsAdded; 65950b57cec5SDimitry Andric return true; 65960b57cec5SDimitry Andric } 65970b57cec5SDimitry Andric 65980b57cec5SDimitry Andric /// Check if V (an operand of a select instruction) is an expensive instruction 65990b57cec5SDimitry Andric /// that is only used once. 66000b57cec5SDimitry Andric static bool sinkSelectOperand(const TargetTransformInfo *TTI, Value *V) { 66010b57cec5SDimitry Andric auto *I = dyn_cast<Instruction>(V); 66020b57cec5SDimitry Andric // If it's safe to speculatively execute, then it should not have side 66030b57cec5SDimitry Andric // effects; therefore, it's safe to sink and possibly *not* execute. 66040b57cec5SDimitry Andric return I && I->hasOneUse() && isSafeToSpeculativelyExecute(I) && 66055ffd83dbSDimitry Andric TTI->getUserCost(I, TargetTransformInfo::TCK_SizeAndLatency) >= 66065ffd83dbSDimitry Andric TargetTransformInfo::TCC_Expensive; 66070b57cec5SDimitry Andric } 66080b57cec5SDimitry Andric 66090b57cec5SDimitry Andric /// Returns true if a SelectInst should be turned into an explicit branch. 66100b57cec5SDimitry Andric static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI, 66110b57cec5SDimitry Andric const TargetLowering *TLI, 66120b57cec5SDimitry Andric SelectInst *SI) { 66130b57cec5SDimitry Andric // If even a predictable select is cheap, then a branch can't be cheaper. 66140b57cec5SDimitry Andric if (!TLI->isPredictableSelectExpensive()) 66150b57cec5SDimitry Andric return false; 66160b57cec5SDimitry Andric 66170b57cec5SDimitry Andric // FIXME: This should use the same heuristics as IfConversion to determine 66180b57cec5SDimitry Andric // whether a select is better represented as a branch. 66190b57cec5SDimitry Andric 66200b57cec5SDimitry Andric // If metadata tells us that the select condition is obviously predictable, 66210b57cec5SDimitry Andric // then we want to replace the select with a branch. 66220b57cec5SDimitry Andric uint64_t TrueWeight, FalseWeight; 66230b57cec5SDimitry Andric if (SI->extractProfMetadata(TrueWeight, FalseWeight)) { 66240b57cec5SDimitry Andric uint64_t Max = std::max(TrueWeight, FalseWeight); 66250b57cec5SDimitry Andric uint64_t Sum = TrueWeight + FalseWeight; 66260b57cec5SDimitry Andric if (Sum != 0) { 66270b57cec5SDimitry Andric auto Probability = BranchProbability::getBranchProbability(Max, Sum); 6628fe6060f1SDimitry Andric if (Probability > TTI->getPredictableBranchThreshold()) 66290b57cec5SDimitry Andric return true; 66300b57cec5SDimitry Andric } 66310b57cec5SDimitry Andric } 66320b57cec5SDimitry Andric 66330b57cec5SDimitry Andric CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition()); 66340b57cec5SDimitry Andric 66350b57cec5SDimitry Andric // If a branch is predictable, an out-of-order CPU can avoid blocking on its 66360b57cec5SDimitry Andric // comparison condition. If the compare has more than one use, there's 66370b57cec5SDimitry Andric // probably another cmov or setcc around, so it's not worth emitting a branch. 66380b57cec5SDimitry Andric if (!Cmp || !Cmp->hasOneUse()) 66390b57cec5SDimitry Andric return false; 66400b57cec5SDimitry Andric 66410b57cec5SDimitry Andric // If either operand of the select is expensive and only needed on one side 66420b57cec5SDimitry Andric // of the select, we should form a branch. 66430b57cec5SDimitry Andric if (sinkSelectOperand(TTI, SI->getTrueValue()) || 66440b57cec5SDimitry Andric sinkSelectOperand(TTI, SI->getFalseValue())) 66450b57cec5SDimitry Andric return true; 66460b57cec5SDimitry Andric 66470b57cec5SDimitry Andric return false; 66480b57cec5SDimitry Andric } 66490b57cec5SDimitry Andric 66500b57cec5SDimitry Andric /// If \p isTrue is true, return the true value of \p SI, otherwise return 66510b57cec5SDimitry Andric /// false value of \p SI. If the true/false value of \p SI is defined by any 66520b57cec5SDimitry Andric /// select instructions in \p Selects, look through the defining select 66530b57cec5SDimitry Andric /// instruction until the true/false value is not defined in \p Selects. 66540b57cec5SDimitry Andric static Value *getTrueOrFalseValue( 66550b57cec5SDimitry Andric SelectInst *SI, bool isTrue, 66560b57cec5SDimitry Andric const SmallPtrSet<const Instruction *, 2> &Selects) { 66570b57cec5SDimitry Andric Value *V = nullptr; 66580b57cec5SDimitry Andric 66590b57cec5SDimitry Andric for (SelectInst *DefSI = SI; DefSI != nullptr && Selects.count(DefSI); 66600b57cec5SDimitry Andric DefSI = dyn_cast<SelectInst>(V)) { 66610b57cec5SDimitry Andric assert(DefSI->getCondition() == SI->getCondition() && 66620b57cec5SDimitry Andric "The condition of DefSI does not match with SI"); 66630b57cec5SDimitry Andric V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue()); 66640b57cec5SDimitry Andric } 66650b57cec5SDimitry Andric 66660b57cec5SDimitry Andric assert(V && "Failed to get select true/false value"); 66670b57cec5SDimitry Andric return V; 66680b57cec5SDimitry Andric } 66690b57cec5SDimitry Andric 66700b57cec5SDimitry Andric bool CodeGenPrepare::optimizeShiftInst(BinaryOperator *Shift) { 66710b57cec5SDimitry Andric assert(Shift->isShift() && "Expected a shift"); 66720b57cec5SDimitry Andric 66730b57cec5SDimitry Andric // If this is (1) a vector shift, (2) shifts by scalars are cheaper than 66740b57cec5SDimitry Andric // general vector shifts, and (3) the shift amount is a select-of-splatted 66750b57cec5SDimitry Andric // values, hoist the shifts before the select: 66760b57cec5SDimitry Andric // shift Op0, (select Cond, TVal, FVal) --> 66770b57cec5SDimitry Andric // select Cond, (shift Op0, TVal), (shift Op0, FVal) 66780b57cec5SDimitry Andric // 66790b57cec5SDimitry Andric // This is inverting a generic IR transform when we know that the cost of a 66800b57cec5SDimitry Andric // general vector shift is more than the cost of 2 shift-by-scalars. 66810b57cec5SDimitry Andric // We can't do this effectively in SDAG because we may not be able to 66820b57cec5SDimitry Andric // determine if the select operands are splats from within a basic block. 66830b57cec5SDimitry Andric Type *Ty = Shift->getType(); 66840b57cec5SDimitry Andric if (!Ty->isVectorTy() || !TLI->isVectorShiftByScalarCheap(Ty)) 66850b57cec5SDimitry Andric return false; 66860b57cec5SDimitry Andric Value *Cond, *TVal, *FVal; 66870b57cec5SDimitry Andric if (!match(Shift->getOperand(1), 66880b57cec5SDimitry Andric m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal))))) 66890b57cec5SDimitry Andric return false; 66900b57cec5SDimitry Andric if (!isSplatValue(TVal) || !isSplatValue(FVal)) 66910b57cec5SDimitry Andric return false; 66920b57cec5SDimitry Andric 66930b57cec5SDimitry Andric IRBuilder<> Builder(Shift); 66940b57cec5SDimitry Andric BinaryOperator::BinaryOps Opcode = Shift->getOpcode(); 66950b57cec5SDimitry Andric Value *NewTVal = Builder.CreateBinOp(Opcode, Shift->getOperand(0), TVal); 66960b57cec5SDimitry Andric Value *NewFVal = Builder.CreateBinOp(Opcode, Shift->getOperand(0), FVal); 66970b57cec5SDimitry Andric Value *NewSel = Builder.CreateSelect(Cond, NewTVal, NewFVal); 66980b57cec5SDimitry Andric Shift->replaceAllUsesWith(NewSel); 66990b57cec5SDimitry Andric Shift->eraseFromParent(); 67000b57cec5SDimitry Andric return true; 67010b57cec5SDimitry Andric } 67020b57cec5SDimitry Andric 67035ffd83dbSDimitry Andric bool CodeGenPrepare::optimizeFunnelShift(IntrinsicInst *Fsh) { 67045ffd83dbSDimitry Andric Intrinsic::ID Opcode = Fsh->getIntrinsicID(); 67055ffd83dbSDimitry Andric assert((Opcode == Intrinsic::fshl || Opcode == Intrinsic::fshr) && 67065ffd83dbSDimitry Andric "Expected a funnel shift"); 67075ffd83dbSDimitry Andric 67085ffd83dbSDimitry Andric // If this is (1) a vector funnel shift, (2) shifts by scalars are cheaper 67095ffd83dbSDimitry Andric // than general vector shifts, and (3) the shift amount is select-of-splatted 67105ffd83dbSDimitry Andric // values, hoist the funnel shifts before the select: 67115ffd83dbSDimitry Andric // fsh Op0, Op1, (select Cond, TVal, FVal) --> 67125ffd83dbSDimitry Andric // select Cond, (fsh Op0, Op1, TVal), (fsh Op0, Op1, FVal) 67135ffd83dbSDimitry Andric // 67145ffd83dbSDimitry Andric // This is inverting a generic IR transform when we know that the cost of a 67155ffd83dbSDimitry Andric // general vector shift is more than the cost of 2 shift-by-scalars. 67165ffd83dbSDimitry Andric // We can't do this effectively in SDAG because we may not be able to 67175ffd83dbSDimitry Andric // determine if the select operands are splats from within a basic block. 67185ffd83dbSDimitry Andric Type *Ty = Fsh->getType(); 67195ffd83dbSDimitry Andric if (!Ty->isVectorTy() || !TLI->isVectorShiftByScalarCheap(Ty)) 67205ffd83dbSDimitry Andric return false; 67215ffd83dbSDimitry Andric Value *Cond, *TVal, *FVal; 67225ffd83dbSDimitry Andric if (!match(Fsh->getOperand(2), 67235ffd83dbSDimitry Andric m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal))))) 67245ffd83dbSDimitry Andric return false; 67255ffd83dbSDimitry Andric if (!isSplatValue(TVal) || !isSplatValue(FVal)) 67265ffd83dbSDimitry Andric return false; 67275ffd83dbSDimitry Andric 67285ffd83dbSDimitry Andric IRBuilder<> Builder(Fsh); 67295ffd83dbSDimitry Andric Value *X = Fsh->getOperand(0), *Y = Fsh->getOperand(1); 67305ffd83dbSDimitry Andric Value *NewTVal = Builder.CreateIntrinsic(Opcode, Ty, { X, Y, TVal }); 67315ffd83dbSDimitry Andric Value *NewFVal = Builder.CreateIntrinsic(Opcode, Ty, { X, Y, FVal }); 67325ffd83dbSDimitry Andric Value *NewSel = Builder.CreateSelect(Cond, NewTVal, NewFVal); 67335ffd83dbSDimitry Andric Fsh->replaceAllUsesWith(NewSel); 67345ffd83dbSDimitry Andric Fsh->eraseFromParent(); 67355ffd83dbSDimitry Andric return true; 67365ffd83dbSDimitry Andric } 67375ffd83dbSDimitry Andric 67380b57cec5SDimitry Andric /// If we have a SelectInst that will likely profit from branch prediction, 67390b57cec5SDimitry Andric /// turn it into a branch. 67400b57cec5SDimitry Andric bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) { 6741e8d8bef9SDimitry Andric if (DisableSelectToBranch) 67420b57cec5SDimitry Andric return false; 67430b57cec5SDimitry Andric 67440b57cec5SDimitry Andric // Find all consecutive select instructions that share the same condition. 67450b57cec5SDimitry Andric SmallVector<SelectInst *, 2> ASI; 67460b57cec5SDimitry Andric ASI.push_back(SI); 67470b57cec5SDimitry Andric for (BasicBlock::iterator It = ++BasicBlock::iterator(SI); 67480b57cec5SDimitry Andric It != SI->getParent()->end(); ++It) { 67490b57cec5SDimitry Andric SelectInst *I = dyn_cast<SelectInst>(&*It); 67500b57cec5SDimitry Andric if (I && SI->getCondition() == I->getCondition()) { 67510b57cec5SDimitry Andric ASI.push_back(I); 67520b57cec5SDimitry Andric } else { 67530b57cec5SDimitry Andric break; 67540b57cec5SDimitry Andric } 67550b57cec5SDimitry Andric } 67560b57cec5SDimitry Andric 67570b57cec5SDimitry Andric SelectInst *LastSI = ASI.back(); 67580b57cec5SDimitry Andric // Increment the current iterator to skip all the rest of select instructions 67590b57cec5SDimitry Andric // because they will be either "not lowered" or "all lowered" to branch. 67600b57cec5SDimitry Andric CurInstIterator = std::next(LastSI->getIterator()); 67610b57cec5SDimitry Andric 67620b57cec5SDimitry Andric bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1); 67630b57cec5SDimitry Andric 67640b57cec5SDimitry Andric // Can we convert the 'select' to CF ? 67650b57cec5SDimitry Andric if (VectorCond || SI->getMetadata(LLVMContext::MD_unpredictable)) 67660b57cec5SDimitry Andric return false; 67670b57cec5SDimitry Andric 67680b57cec5SDimitry Andric TargetLowering::SelectSupportKind SelectKind; 67690b57cec5SDimitry Andric if (VectorCond) 67700b57cec5SDimitry Andric SelectKind = TargetLowering::VectorMaskSelect; 67710b57cec5SDimitry Andric else if (SI->getType()->isVectorTy()) 67720b57cec5SDimitry Andric SelectKind = TargetLowering::ScalarCondVectorVal; 67730b57cec5SDimitry Andric else 67740b57cec5SDimitry Andric SelectKind = TargetLowering::ScalarValSelect; 67750b57cec5SDimitry Andric 67760b57cec5SDimitry Andric if (TLI->isSelectSupported(SelectKind) && 6777e8d8bef9SDimitry Andric (!isFormingBranchFromSelectProfitable(TTI, TLI, SI) || OptSize || 6778e8d8bef9SDimitry Andric llvm::shouldOptimizeForSize(SI->getParent(), PSI, BFI.get()))) 67790b57cec5SDimitry Andric return false; 67800b57cec5SDimitry Andric 67810b57cec5SDimitry Andric // The DominatorTree needs to be rebuilt by any consumers after this 67820b57cec5SDimitry Andric // transformation. We simply reset here rather than setting the ModifiedDT 67830b57cec5SDimitry Andric // flag to avoid restarting the function walk in runOnFunction for each 67840b57cec5SDimitry Andric // select optimized. 67850b57cec5SDimitry Andric DT.reset(); 67860b57cec5SDimitry Andric 67870b57cec5SDimitry Andric // Transform a sequence like this: 67880b57cec5SDimitry Andric // start: 67890b57cec5SDimitry Andric // %cmp = cmp uge i32 %a, %b 67900b57cec5SDimitry Andric // %sel = select i1 %cmp, i32 %c, i32 %d 67910b57cec5SDimitry Andric // 67920b57cec5SDimitry Andric // Into: 67930b57cec5SDimitry Andric // start: 67940b57cec5SDimitry Andric // %cmp = cmp uge i32 %a, %b 67955ffd83dbSDimitry Andric // %cmp.frozen = freeze %cmp 67965ffd83dbSDimitry Andric // br i1 %cmp.frozen, label %select.true, label %select.false 67970b57cec5SDimitry Andric // select.true: 67980b57cec5SDimitry Andric // br label %select.end 67990b57cec5SDimitry Andric // select.false: 68000b57cec5SDimitry Andric // br label %select.end 68010b57cec5SDimitry Andric // select.end: 68020b57cec5SDimitry Andric // %sel = phi i32 [ %c, %select.true ], [ %d, %select.false ] 68030b57cec5SDimitry Andric // 68045ffd83dbSDimitry Andric // %cmp should be frozen, otherwise it may introduce undefined behavior. 68050b57cec5SDimitry Andric // In addition, we may sink instructions that produce %c or %d from 68060b57cec5SDimitry Andric // the entry block into the destination(s) of the new branch. 68070b57cec5SDimitry Andric // If the true or false blocks do not contain a sunken instruction, that 68080b57cec5SDimitry Andric // block and its branch may be optimized away. In that case, one side of the 68090b57cec5SDimitry Andric // first branch will point directly to select.end, and the corresponding PHI 68100b57cec5SDimitry Andric // predecessor block will be the start block. 68110b57cec5SDimitry Andric 68120b57cec5SDimitry Andric // First, we split the block containing the select into 2 blocks. 68130b57cec5SDimitry Andric BasicBlock *StartBlock = SI->getParent(); 68140b57cec5SDimitry Andric BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(LastSI)); 68150b57cec5SDimitry Andric BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end"); 681655e4f9d5SDimitry Andric BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock).getFrequency()); 68170b57cec5SDimitry Andric 68180b57cec5SDimitry Andric // Delete the unconditional branch that was just created by the split. 68190b57cec5SDimitry Andric StartBlock->getTerminator()->eraseFromParent(); 68200b57cec5SDimitry Andric 68210b57cec5SDimitry Andric // These are the new basic blocks for the conditional branch. 68220b57cec5SDimitry Andric // At least one will become an actual new basic block. 68230b57cec5SDimitry Andric BasicBlock *TrueBlock = nullptr; 68240b57cec5SDimitry Andric BasicBlock *FalseBlock = nullptr; 68250b57cec5SDimitry Andric BranchInst *TrueBranch = nullptr; 68260b57cec5SDimitry Andric BranchInst *FalseBranch = nullptr; 68270b57cec5SDimitry Andric 68280b57cec5SDimitry Andric // Sink expensive instructions into the conditional blocks to avoid executing 68290b57cec5SDimitry Andric // them speculatively. 68300b57cec5SDimitry Andric for (SelectInst *SI : ASI) { 68310b57cec5SDimitry Andric if (sinkSelectOperand(TTI, SI->getTrueValue())) { 68320b57cec5SDimitry Andric if (TrueBlock == nullptr) { 68330b57cec5SDimitry Andric TrueBlock = BasicBlock::Create(SI->getContext(), "select.true.sink", 68340b57cec5SDimitry Andric EndBlock->getParent(), EndBlock); 68350b57cec5SDimitry Andric TrueBranch = BranchInst::Create(EndBlock, TrueBlock); 68360b57cec5SDimitry Andric TrueBranch->setDebugLoc(SI->getDebugLoc()); 68370b57cec5SDimitry Andric } 68380b57cec5SDimitry Andric auto *TrueInst = cast<Instruction>(SI->getTrueValue()); 68390b57cec5SDimitry Andric TrueInst->moveBefore(TrueBranch); 68400b57cec5SDimitry Andric } 68410b57cec5SDimitry Andric if (sinkSelectOperand(TTI, SI->getFalseValue())) { 68420b57cec5SDimitry Andric if (FalseBlock == nullptr) { 68430b57cec5SDimitry Andric FalseBlock = BasicBlock::Create(SI->getContext(), "select.false.sink", 68440b57cec5SDimitry Andric EndBlock->getParent(), EndBlock); 68450b57cec5SDimitry Andric FalseBranch = BranchInst::Create(EndBlock, FalseBlock); 68460b57cec5SDimitry Andric FalseBranch->setDebugLoc(SI->getDebugLoc()); 68470b57cec5SDimitry Andric } 68480b57cec5SDimitry Andric auto *FalseInst = cast<Instruction>(SI->getFalseValue()); 68490b57cec5SDimitry Andric FalseInst->moveBefore(FalseBranch); 68500b57cec5SDimitry Andric } 68510b57cec5SDimitry Andric } 68520b57cec5SDimitry Andric 68530b57cec5SDimitry Andric // If there was nothing to sink, then arbitrarily choose the 'false' side 68540b57cec5SDimitry Andric // for a new input value to the PHI. 68550b57cec5SDimitry Andric if (TrueBlock == FalseBlock) { 68560b57cec5SDimitry Andric assert(TrueBlock == nullptr && 68570b57cec5SDimitry Andric "Unexpected basic block transform while optimizing select"); 68580b57cec5SDimitry Andric 68590b57cec5SDimitry Andric FalseBlock = BasicBlock::Create(SI->getContext(), "select.false", 68600b57cec5SDimitry Andric EndBlock->getParent(), EndBlock); 68610b57cec5SDimitry Andric auto *FalseBranch = BranchInst::Create(EndBlock, FalseBlock); 68620b57cec5SDimitry Andric FalseBranch->setDebugLoc(SI->getDebugLoc()); 68630b57cec5SDimitry Andric } 68640b57cec5SDimitry Andric 68650b57cec5SDimitry Andric // Insert the real conditional branch based on the original condition. 68660b57cec5SDimitry Andric // If we did not create a new block for one of the 'true' or 'false' paths 68670b57cec5SDimitry Andric // of the condition, it means that side of the branch goes to the end block 68680b57cec5SDimitry Andric // directly and the path originates from the start block from the point of 68690b57cec5SDimitry Andric // view of the new PHI. 68700b57cec5SDimitry Andric BasicBlock *TT, *FT; 68710b57cec5SDimitry Andric if (TrueBlock == nullptr) { 68720b57cec5SDimitry Andric TT = EndBlock; 68730b57cec5SDimitry Andric FT = FalseBlock; 68740b57cec5SDimitry Andric TrueBlock = StartBlock; 68750b57cec5SDimitry Andric } else if (FalseBlock == nullptr) { 68760b57cec5SDimitry Andric TT = TrueBlock; 68770b57cec5SDimitry Andric FT = EndBlock; 68780b57cec5SDimitry Andric FalseBlock = StartBlock; 68790b57cec5SDimitry Andric } else { 68800b57cec5SDimitry Andric TT = TrueBlock; 68810b57cec5SDimitry Andric FT = FalseBlock; 68820b57cec5SDimitry Andric } 68835ffd83dbSDimitry Andric IRBuilder<> IB(SI); 68845ffd83dbSDimitry Andric auto *CondFr = IB.CreateFreeze(SI->getCondition(), SI->getName() + ".frozen"); 68855ffd83dbSDimitry Andric IB.CreateCondBr(CondFr, TT, FT, SI); 68860b57cec5SDimitry Andric 68870b57cec5SDimitry Andric SmallPtrSet<const Instruction *, 2> INS; 68880b57cec5SDimitry Andric INS.insert(ASI.begin(), ASI.end()); 68890b57cec5SDimitry Andric // Use reverse iterator because later select may use the value of the 68900b57cec5SDimitry Andric // earlier select, and we need to propagate value through earlier select 68910b57cec5SDimitry Andric // to get the PHI operand. 68920eae32dcSDimitry Andric for (SelectInst *SI : llvm::reverse(ASI)) { 68930b57cec5SDimitry Andric // The select itself is replaced with a PHI Node. 68940b57cec5SDimitry Andric PHINode *PN = PHINode::Create(SI->getType(), 2, "", &EndBlock->front()); 68950b57cec5SDimitry Andric PN->takeName(SI); 68960b57cec5SDimitry Andric PN->addIncoming(getTrueOrFalseValue(SI, true, INS), TrueBlock); 68970b57cec5SDimitry Andric PN->addIncoming(getTrueOrFalseValue(SI, false, INS), FalseBlock); 68980b57cec5SDimitry Andric PN->setDebugLoc(SI->getDebugLoc()); 68990b57cec5SDimitry Andric 69000b57cec5SDimitry Andric SI->replaceAllUsesWith(PN); 69010b57cec5SDimitry Andric SI->eraseFromParent(); 69020b57cec5SDimitry Andric INS.erase(SI); 69030b57cec5SDimitry Andric ++NumSelectsExpanded; 69040b57cec5SDimitry Andric } 69050b57cec5SDimitry Andric 69060b57cec5SDimitry Andric // Instruct OptimizeBlock to skip to the next block. 69070b57cec5SDimitry Andric CurInstIterator = StartBlock->end(); 69080b57cec5SDimitry Andric return true; 69090b57cec5SDimitry Andric } 69100b57cec5SDimitry Andric 69115ffd83dbSDimitry Andric /// Some targets only accept certain types for splat inputs. For example a VDUP 69125ffd83dbSDimitry Andric /// in MVE takes a GPR (integer) register, and the instruction that incorporate 69135ffd83dbSDimitry Andric /// a VDUP (such as a VADD qd, qm, rm) also require a gpr register. 69145ffd83dbSDimitry Andric bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) { 6915e8d8bef9SDimitry Andric // Accept shuf(insertelem(undef/poison, val, 0), undef/poison, <0,0,..>) only 69165ffd83dbSDimitry Andric if (!match(SVI, m_Shuffle(m_InsertElt(m_Undef(), m_Value(), m_ZeroInt()), 69175ffd83dbSDimitry Andric m_Undef(), m_ZeroMask()))) 69180b57cec5SDimitry Andric return false; 69195ffd83dbSDimitry Andric Type *NewType = TLI->shouldConvertSplatType(SVI); 69205ffd83dbSDimitry Andric if (!NewType) 69215ffd83dbSDimitry Andric return false; 69225ffd83dbSDimitry Andric 69235ffd83dbSDimitry Andric auto *SVIVecType = cast<FixedVectorType>(SVI->getType()); 69245ffd83dbSDimitry Andric assert(!NewType->isVectorTy() && "Expected a scalar type!"); 69255ffd83dbSDimitry Andric assert(NewType->getScalarSizeInBits() == SVIVecType->getScalarSizeInBits() && 69265ffd83dbSDimitry Andric "Expected a type of the same size!"); 69275ffd83dbSDimitry Andric auto *NewVecType = 69285ffd83dbSDimitry Andric FixedVectorType::get(NewType, SVIVecType->getNumElements()); 69295ffd83dbSDimitry Andric 69305ffd83dbSDimitry Andric // Create a bitcast (shuffle (insert (bitcast(..)))) 69315ffd83dbSDimitry Andric IRBuilder<> Builder(SVI->getContext()); 69325ffd83dbSDimitry Andric Builder.SetInsertPoint(SVI); 69335ffd83dbSDimitry Andric Value *BC1 = Builder.CreateBitCast( 69345ffd83dbSDimitry Andric cast<Instruction>(SVI->getOperand(0))->getOperand(1), NewType); 6935e8d8bef9SDimitry Andric Value *Shuffle = Builder.CreateVectorSplat(NewVecType->getNumElements(), BC1); 69365ffd83dbSDimitry Andric Value *BC2 = Builder.CreateBitCast(Shuffle, SVIVecType); 69375ffd83dbSDimitry Andric 69385ffd83dbSDimitry Andric SVI->replaceAllUsesWith(BC2); 6939e8d8bef9SDimitry Andric RecursivelyDeleteTriviallyDeadInstructions( 6940e8d8bef9SDimitry Andric SVI, TLInfo, nullptr, [&](Value *V) { removeAllAssertingVHReferences(V); }); 69415ffd83dbSDimitry Andric 69425ffd83dbSDimitry Andric // Also hoist the bitcast up to its operand if it they are not in the same 69435ffd83dbSDimitry Andric // block. 69445ffd83dbSDimitry Andric if (auto *BCI = dyn_cast<Instruction>(BC1)) 69455ffd83dbSDimitry Andric if (auto *Op = dyn_cast<Instruction>(BCI->getOperand(0))) 69465ffd83dbSDimitry Andric if (BCI->getParent() != Op->getParent() && !isa<PHINode>(Op) && 69475ffd83dbSDimitry Andric !Op->isTerminator() && !Op->isEHPad()) 69485ffd83dbSDimitry Andric BCI->moveAfter(Op); 69490b57cec5SDimitry Andric 69500b57cec5SDimitry Andric return true; 69510b57cec5SDimitry Andric } 69520b57cec5SDimitry Andric 69530b57cec5SDimitry Andric bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) { 69540b57cec5SDimitry Andric // If the operands of I can be folded into a target instruction together with 69550b57cec5SDimitry Andric // I, duplicate and sink them. 69560b57cec5SDimitry Andric SmallVector<Use *, 4> OpsToSink; 69575ffd83dbSDimitry Andric if (!TLI->shouldSinkOperands(I, OpsToSink)) 69580b57cec5SDimitry Andric return false; 69590b57cec5SDimitry Andric 69600b57cec5SDimitry Andric // OpsToSink can contain multiple uses in a use chain (e.g. 69610b57cec5SDimitry Andric // (%u1 with %u1 = shufflevector), (%u2 with %u2 = zext %u1)). The dominating 69628bcb0991SDimitry Andric // uses must come first, so we process the ops in reverse order so as to not 69638bcb0991SDimitry Andric // create invalid IR. 69640b57cec5SDimitry Andric BasicBlock *TargetBB = I->getParent(); 69650b57cec5SDimitry Andric bool Changed = false; 69660b57cec5SDimitry Andric SmallVector<Use *, 4> ToReplace; 6967349cc55cSDimitry Andric Instruction *InsertPoint = I; 6968349cc55cSDimitry Andric DenseMap<const Instruction *, unsigned long> InstOrdering; 6969349cc55cSDimitry Andric unsigned long InstNumber = 0; 6970349cc55cSDimitry Andric for (const auto &I : *TargetBB) 6971349cc55cSDimitry Andric InstOrdering[&I] = InstNumber++; 6972349cc55cSDimitry Andric 69738bcb0991SDimitry Andric for (Use *U : reverse(OpsToSink)) { 69740b57cec5SDimitry Andric auto *UI = cast<Instruction>(U->get()); 6975349cc55cSDimitry Andric if (isa<PHINode>(UI)) 69760b57cec5SDimitry Andric continue; 6977349cc55cSDimitry Andric if (UI->getParent() == TargetBB) { 6978349cc55cSDimitry Andric if (InstOrdering[UI] < InstOrdering[InsertPoint]) 6979349cc55cSDimitry Andric InsertPoint = UI; 6980349cc55cSDimitry Andric continue; 6981349cc55cSDimitry Andric } 69820b57cec5SDimitry Andric ToReplace.push_back(U); 69830b57cec5SDimitry Andric } 69840b57cec5SDimitry Andric 69858bcb0991SDimitry Andric SetVector<Instruction *> MaybeDead; 69868bcb0991SDimitry Andric DenseMap<Instruction *, Instruction *> NewInstructions; 69870b57cec5SDimitry Andric for (Use *U : ToReplace) { 69880b57cec5SDimitry Andric auto *UI = cast<Instruction>(U->get()); 69890b57cec5SDimitry Andric Instruction *NI = UI->clone(); 69908bcb0991SDimitry Andric NewInstructions[UI] = NI; 69910b57cec5SDimitry Andric MaybeDead.insert(UI); 69920b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Sinking " << *UI << " to user " << *I << "\n"); 69938bcb0991SDimitry Andric NI->insertBefore(InsertPoint); 69948bcb0991SDimitry Andric InsertPoint = NI; 69950b57cec5SDimitry Andric InsertedInsts.insert(NI); 69968bcb0991SDimitry Andric 69978bcb0991SDimitry Andric // Update the use for the new instruction, making sure that we update the 69988bcb0991SDimitry Andric // sunk instruction uses, if it is part of a chain that has already been 69998bcb0991SDimitry Andric // sunk. 70008bcb0991SDimitry Andric Instruction *OldI = cast<Instruction>(U->getUser()); 70018bcb0991SDimitry Andric if (NewInstructions.count(OldI)) 70028bcb0991SDimitry Andric NewInstructions[OldI]->setOperand(U->getOperandNo(), NI); 70038bcb0991SDimitry Andric else 70040b57cec5SDimitry Andric U->set(NI); 70050b57cec5SDimitry Andric Changed = true; 70060b57cec5SDimitry Andric } 70070b57cec5SDimitry Andric 70080b57cec5SDimitry Andric // Remove instructions that are dead after sinking. 70098bcb0991SDimitry Andric for (auto *I : MaybeDead) { 70108bcb0991SDimitry Andric if (!I->hasNUsesOrMore(1)) { 70118bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << "Removing dead instruction: " << *I << "\n"); 70120b57cec5SDimitry Andric I->eraseFromParent(); 70138bcb0991SDimitry Andric } 70148bcb0991SDimitry Andric } 70150b57cec5SDimitry Andric 70160b57cec5SDimitry Andric return Changed; 70170b57cec5SDimitry Andric } 70180b57cec5SDimitry Andric 701981ad6265SDimitry Andric bool CodeGenPrepare::optimizeSwitchType(SwitchInst *SI) { 70200b57cec5SDimitry Andric Value *Cond = SI->getCondition(); 70210b57cec5SDimitry Andric Type *OldType = Cond->getType(); 70220b57cec5SDimitry Andric LLVMContext &Context = Cond->getContext(); 7023fe6060f1SDimitry Andric EVT OldVT = TLI->getValueType(*DL, OldType); 702481ad6265SDimitry Andric MVT RegType = TLI->getPreferredSwitchConditionType(Context, OldVT); 70250b57cec5SDimitry Andric unsigned RegWidth = RegType.getSizeInBits(); 70260b57cec5SDimitry Andric 70270b57cec5SDimitry Andric if (RegWidth <= cast<IntegerType>(OldType)->getBitWidth()) 70280b57cec5SDimitry Andric return false; 70290b57cec5SDimitry Andric 70300b57cec5SDimitry Andric // If the register width is greater than the type width, expand the condition 70310b57cec5SDimitry Andric // of the switch instruction and each case constant to the width of the 70320b57cec5SDimitry Andric // register. By widening the type of the switch condition, subsequent 70330b57cec5SDimitry Andric // comparisons (for case comparisons) will not need to be extended to the 70340b57cec5SDimitry Andric // preferred register width, so we will potentially eliminate N-1 extends, 70350b57cec5SDimitry Andric // where N is the number of cases in the switch. 70360b57cec5SDimitry Andric auto *NewType = Type::getIntNTy(Context, RegWidth); 70370b57cec5SDimitry Andric 7038fe6060f1SDimitry Andric // Extend the switch condition and case constants using the target preferred 7039fe6060f1SDimitry Andric // extend unless the switch condition is a function argument with an extend 7040fe6060f1SDimitry Andric // attribute. In that case, we can avoid an unnecessary mask/extension by 7041fe6060f1SDimitry Andric // matching the argument extension instead. 70420b57cec5SDimitry Andric Instruction::CastOps ExtType = Instruction::ZExt; 7043fe6060f1SDimitry Andric // Some targets prefer SExt over ZExt. 7044fe6060f1SDimitry Andric if (TLI->isSExtCheaperThanZExt(OldVT, RegType)) 7045fe6060f1SDimitry Andric ExtType = Instruction::SExt; 7046fe6060f1SDimitry Andric 7047fe6060f1SDimitry Andric if (auto *Arg = dyn_cast<Argument>(Cond)) { 70480b57cec5SDimitry Andric if (Arg->hasSExtAttr()) 70490b57cec5SDimitry Andric ExtType = Instruction::SExt; 7050fe6060f1SDimitry Andric if (Arg->hasZExtAttr()) 7051fe6060f1SDimitry Andric ExtType = Instruction::ZExt; 7052fe6060f1SDimitry Andric } 70530b57cec5SDimitry Andric 70540b57cec5SDimitry Andric auto *ExtInst = CastInst::Create(ExtType, Cond, NewType); 70550b57cec5SDimitry Andric ExtInst->insertBefore(SI); 70560b57cec5SDimitry Andric ExtInst->setDebugLoc(SI->getDebugLoc()); 70570b57cec5SDimitry Andric SI->setCondition(ExtInst); 70580b57cec5SDimitry Andric for (auto Case : SI->cases()) { 705981ad6265SDimitry Andric const APInt &NarrowConst = Case.getCaseValue()->getValue(); 70600b57cec5SDimitry Andric APInt WideConst = (ExtType == Instruction::ZExt) ? 70610b57cec5SDimitry Andric NarrowConst.zext(RegWidth) : NarrowConst.sext(RegWidth); 70620b57cec5SDimitry Andric Case.setValue(ConstantInt::get(Context, WideConst)); 70630b57cec5SDimitry Andric } 70640b57cec5SDimitry Andric 70650b57cec5SDimitry Andric return true; 70660b57cec5SDimitry Andric } 70670b57cec5SDimitry Andric 706881ad6265SDimitry Andric bool CodeGenPrepare::optimizeSwitchPhiConstants(SwitchInst *SI) { 706981ad6265SDimitry Andric // The SCCP optimization tends to produce code like this: 707081ad6265SDimitry Andric // switch(x) { case 42: phi(42, ...) } 707181ad6265SDimitry Andric // Materializing the constant for the phi-argument needs instructions; So we 707281ad6265SDimitry Andric // change the code to: 707381ad6265SDimitry Andric // switch(x) { case 42: phi(x, ...) } 707481ad6265SDimitry Andric 707581ad6265SDimitry Andric Value *Condition = SI->getCondition(); 707681ad6265SDimitry Andric // Avoid endless loop in degenerate case. 707781ad6265SDimitry Andric if (isa<ConstantInt>(*Condition)) 707881ad6265SDimitry Andric return false; 707981ad6265SDimitry Andric 708081ad6265SDimitry Andric bool Changed = false; 708181ad6265SDimitry Andric BasicBlock *SwitchBB = SI->getParent(); 708281ad6265SDimitry Andric Type *ConditionType = Condition->getType(); 708381ad6265SDimitry Andric 708481ad6265SDimitry Andric for (const SwitchInst::CaseHandle &Case : SI->cases()) { 708581ad6265SDimitry Andric ConstantInt *CaseValue = Case.getCaseValue(); 708681ad6265SDimitry Andric BasicBlock *CaseBB = Case.getCaseSuccessor(); 708781ad6265SDimitry Andric // Set to true if we previously checked that `CaseBB` is only reached by 708881ad6265SDimitry Andric // a single case from this switch. 708981ad6265SDimitry Andric bool CheckedForSinglePred = false; 709081ad6265SDimitry Andric for (PHINode &PHI : CaseBB->phis()) { 709181ad6265SDimitry Andric Type *PHIType = PHI.getType(); 709281ad6265SDimitry Andric // If ZExt is free then we can also catch patterns like this: 709381ad6265SDimitry Andric // switch((i32)x) { case 42: phi((i64)42, ...); } 709481ad6265SDimitry Andric // and replace `(i64)42` with `zext i32 %x to i64`. 709581ad6265SDimitry Andric bool TryZExt = 709681ad6265SDimitry Andric PHIType->isIntegerTy() && 709781ad6265SDimitry Andric PHIType->getIntegerBitWidth() > ConditionType->getIntegerBitWidth() && 709881ad6265SDimitry Andric TLI->isZExtFree(ConditionType, PHIType); 709981ad6265SDimitry Andric if (PHIType == ConditionType || TryZExt) { 710081ad6265SDimitry Andric // Set to true to skip this case because of multiple preds. 710181ad6265SDimitry Andric bool SkipCase = false; 710281ad6265SDimitry Andric Value *Replacement = nullptr; 710381ad6265SDimitry Andric for (unsigned I = 0, E = PHI.getNumIncomingValues(); I != E; I++) { 710481ad6265SDimitry Andric Value *PHIValue = PHI.getIncomingValue(I); 710581ad6265SDimitry Andric if (PHIValue != CaseValue) { 710681ad6265SDimitry Andric if (!TryZExt) 710781ad6265SDimitry Andric continue; 710881ad6265SDimitry Andric ConstantInt *PHIValueInt = dyn_cast<ConstantInt>(PHIValue); 710981ad6265SDimitry Andric if (!PHIValueInt || 711081ad6265SDimitry Andric PHIValueInt->getValue() != 711181ad6265SDimitry Andric CaseValue->getValue().zext(PHIType->getIntegerBitWidth())) 711281ad6265SDimitry Andric continue; 711381ad6265SDimitry Andric } 711481ad6265SDimitry Andric if (PHI.getIncomingBlock(I) != SwitchBB) 711581ad6265SDimitry Andric continue; 711681ad6265SDimitry Andric // We cannot optimize if there are multiple case labels jumping to 711781ad6265SDimitry Andric // this block. This check may get expensive when there are many 711881ad6265SDimitry Andric // case labels so we test for it last. 711981ad6265SDimitry Andric if (!CheckedForSinglePred) { 712081ad6265SDimitry Andric CheckedForSinglePred = true; 712181ad6265SDimitry Andric if (SI->findCaseDest(CaseBB) == nullptr) { 712281ad6265SDimitry Andric SkipCase = true; 712381ad6265SDimitry Andric break; 712481ad6265SDimitry Andric } 712581ad6265SDimitry Andric } 712681ad6265SDimitry Andric 712781ad6265SDimitry Andric if (Replacement == nullptr) { 712881ad6265SDimitry Andric if (PHIValue == CaseValue) { 712981ad6265SDimitry Andric Replacement = Condition; 713081ad6265SDimitry Andric } else { 713181ad6265SDimitry Andric IRBuilder<> Builder(SI); 713281ad6265SDimitry Andric Replacement = Builder.CreateZExt(Condition, PHIType); 713381ad6265SDimitry Andric } 713481ad6265SDimitry Andric } 713581ad6265SDimitry Andric PHI.setIncomingValue(I, Replacement); 713681ad6265SDimitry Andric Changed = true; 713781ad6265SDimitry Andric } 713881ad6265SDimitry Andric if (SkipCase) 713981ad6265SDimitry Andric break; 714081ad6265SDimitry Andric } 714181ad6265SDimitry Andric } 714281ad6265SDimitry Andric } 714381ad6265SDimitry Andric return Changed; 714481ad6265SDimitry Andric } 714581ad6265SDimitry Andric 714681ad6265SDimitry Andric bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) { 714781ad6265SDimitry Andric bool Changed = optimizeSwitchType(SI); 714881ad6265SDimitry Andric Changed |= optimizeSwitchPhiConstants(SI); 714981ad6265SDimitry Andric return Changed; 715081ad6265SDimitry Andric } 71510b57cec5SDimitry Andric 71520b57cec5SDimitry Andric namespace { 71530b57cec5SDimitry Andric 71540b57cec5SDimitry Andric /// Helper class to promote a scalar operation to a vector one. 71550b57cec5SDimitry Andric /// This class is used to move downward extractelement transition. 71560b57cec5SDimitry Andric /// E.g., 71570b57cec5SDimitry Andric /// a = vector_op <2 x i32> 71580b57cec5SDimitry Andric /// b = extractelement <2 x i32> a, i32 0 71590b57cec5SDimitry Andric /// c = scalar_op b 71600b57cec5SDimitry Andric /// store c 71610b57cec5SDimitry Andric /// 71620b57cec5SDimitry Andric /// => 71630b57cec5SDimitry Andric /// a = vector_op <2 x i32> 71640b57cec5SDimitry Andric /// c = vector_op a (equivalent to scalar_op on the related lane) 71650b57cec5SDimitry Andric /// * d = extractelement <2 x i32> c, i32 0 71660b57cec5SDimitry Andric /// * store d 71670b57cec5SDimitry Andric /// Assuming both extractelement and store can be combine, we get rid of the 71680b57cec5SDimitry Andric /// transition. 71690b57cec5SDimitry Andric class VectorPromoteHelper { 71700b57cec5SDimitry Andric /// DataLayout associated with the current module. 71710b57cec5SDimitry Andric const DataLayout &DL; 71720b57cec5SDimitry Andric 71730b57cec5SDimitry Andric /// Used to perform some checks on the legality of vector operations. 71740b57cec5SDimitry Andric const TargetLowering &TLI; 71750b57cec5SDimitry Andric 71760b57cec5SDimitry Andric /// Used to estimated the cost of the promoted chain. 71770b57cec5SDimitry Andric const TargetTransformInfo &TTI; 71780b57cec5SDimitry Andric 71790b57cec5SDimitry Andric /// The transition being moved downwards. 71800b57cec5SDimitry Andric Instruction *Transition; 71810b57cec5SDimitry Andric 71820b57cec5SDimitry Andric /// The sequence of instructions to be promoted. 71830b57cec5SDimitry Andric SmallVector<Instruction *, 4> InstsToBePromoted; 71840b57cec5SDimitry Andric 71850b57cec5SDimitry Andric /// Cost of combining a store and an extract. 71860b57cec5SDimitry Andric unsigned StoreExtractCombineCost; 71870b57cec5SDimitry Andric 71880b57cec5SDimitry Andric /// Instruction that will be combined with the transition. 71890b57cec5SDimitry Andric Instruction *CombineInst = nullptr; 71900b57cec5SDimitry Andric 71910b57cec5SDimitry Andric /// The instruction that represents the current end of the transition. 71920b57cec5SDimitry Andric /// Since we are faking the promotion until we reach the end of the chain 71930b57cec5SDimitry Andric /// of computation, we need a way to get the current end of the transition. 71940b57cec5SDimitry Andric Instruction *getEndOfTransition() const { 71950b57cec5SDimitry Andric if (InstsToBePromoted.empty()) 71960b57cec5SDimitry Andric return Transition; 71970b57cec5SDimitry Andric return InstsToBePromoted.back(); 71980b57cec5SDimitry Andric } 71990b57cec5SDimitry Andric 72000b57cec5SDimitry Andric /// Return the index of the original value in the transition. 72010b57cec5SDimitry Andric /// E.g., for "extractelement <2 x i32> c, i32 1" the original value, 72020b57cec5SDimitry Andric /// c, is at index 0. 72030b57cec5SDimitry Andric unsigned getTransitionOriginalValueIdx() const { 72040b57cec5SDimitry Andric assert(isa<ExtractElementInst>(Transition) && 72050b57cec5SDimitry Andric "Other kind of transitions are not supported yet"); 72060b57cec5SDimitry Andric return 0; 72070b57cec5SDimitry Andric } 72080b57cec5SDimitry Andric 72090b57cec5SDimitry Andric /// Return the index of the index in the transition. 72100b57cec5SDimitry Andric /// E.g., for "extractelement <2 x i32> c, i32 0" the index 72110b57cec5SDimitry Andric /// is at index 1. 72120b57cec5SDimitry Andric unsigned getTransitionIdx() const { 72130b57cec5SDimitry Andric assert(isa<ExtractElementInst>(Transition) && 72140b57cec5SDimitry Andric "Other kind of transitions are not supported yet"); 72150b57cec5SDimitry Andric return 1; 72160b57cec5SDimitry Andric } 72170b57cec5SDimitry Andric 72180b57cec5SDimitry Andric /// Get the type of the transition. 72190b57cec5SDimitry Andric /// This is the type of the original value. 72200b57cec5SDimitry Andric /// E.g., for "extractelement <2 x i32> c, i32 1" the type of the 72210b57cec5SDimitry Andric /// transition is <2 x i32>. 72220b57cec5SDimitry Andric Type *getTransitionType() const { 72230b57cec5SDimitry Andric return Transition->getOperand(getTransitionOriginalValueIdx())->getType(); 72240b57cec5SDimitry Andric } 72250b57cec5SDimitry Andric 72260b57cec5SDimitry Andric /// Promote \p ToBePromoted by moving \p Def downward through. 72270b57cec5SDimitry Andric /// I.e., we have the following sequence: 72280b57cec5SDimitry Andric /// Def = Transition <ty1> a to <ty2> 72290b57cec5SDimitry Andric /// b = ToBePromoted <ty2> Def, ... 72300b57cec5SDimitry Andric /// => 72310b57cec5SDimitry Andric /// b = ToBePromoted <ty1> a, ... 72320b57cec5SDimitry Andric /// Def = Transition <ty1> ToBePromoted to <ty2> 72330b57cec5SDimitry Andric void promoteImpl(Instruction *ToBePromoted); 72340b57cec5SDimitry Andric 72350b57cec5SDimitry Andric /// Check whether or not it is profitable to promote all the 72360b57cec5SDimitry Andric /// instructions enqueued to be promoted. 72370b57cec5SDimitry Andric bool isProfitableToPromote() { 72380b57cec5SDimitry Andric Value *ValIdx = Transition->getOperand(getTransitionOriginalValueIdx()); 72390b57cec5SDimitry Andric unsigned Index = isa<ConstantInt>(ValIdx) 72400b57cec5SDimitry Andric ? cast<ConstantInt>(ValIdx)->getZExtValue() 72410b57cec5SDimitry Andric : -1; 72420b57cec5SDimitry Andric Type *PromotedType = getTransitionType(); 72430b57cec5SDimitry Andric 72440b57cec5SDimitry Andric StoreInst *ST = cast<StoreInst>(CombineInst); 72450b57cec5SDimitry Andric unsigned AS = ST->getPointerAddressSpace(); 72460b57cec5SDimitry Andric // Check if this store is supported. 72470b57cec5SDimitry Andric if (!TLI.allowsMisalignedMemoryAccesses( 72480b57cec5SDimitry Andric TLI.getValueType(DL, ST->getValueOperand()->getType()), AS, 7249fe6060f1SDimitry Andric ST->getAlign())) { 72500b57cec5SDimitry Andric // If this is not supported, there is no way we can combine 72510b57cec5SDimitry Andric // the extract with the store. 72520b57cec5SDimitry Andric return false; 72530b57cec5SDimitry Andric } 72540b57cec5SDimitry Andric 72550b57cec5SDimitry Andric // The scalar chain of computation has to pay for the transition 72560b57cec5SDimitry Andric // scalar to vector. 72570b57cec5SDimitry Andric // The vector chain has to account for the combining cost. 7258fe6060f1SDimitry Andric InstructionCost ScalarCost = 72590b57cec5SDimitry Andric TTI.getVectorInstrCost(Transition->getOpcode(), PromotedType, Index); 7260fe6060f1SDimitry Andric InstructionCost VectorCost = StoreExtractCombineCost; 72615ffd83dbSDimitry Andric enum TargetTransformInfo::TargetCostKind CostKind = 72625ffd83dbSDimitry Andric TargetTransformInfo::TCK_RecipThroughput; 72630b57cec5SDimitry Andric for (const auto &Inst : InstsToBePromoted) { 72640b57cec5SDimitry Andric // Compute the cost. 72650b57cec5SDimitry Andric // By construction, all instructions being promoted are arithmetic ones. 72660b57cec5SDimitry Andric // Moreover, one argument is a constant that can be viewed as a splat 72670b57cec5SDimitry Andric // constant. 72680b57cec5SDimitry Andric Value *Arg0 = Inst->getOperand(0); 72690b57cec5SDimitry Andric bool IsArg0Constant = isa<UndefValue>(Arg0) || isa<ConstantInt>(Arg0) || 72700b57cec5SDimitry Andric isa<ConstantFP>(Arg0); 72710b57cec5SDimitry Andric TargetTransformInfo::OperandValueKind Arg0OVK = 72720b57cec5SDimitry Andric IsArg0Constant ? TargetTransformInfo::OK_UniformConstantValue 72730b57cec5SDimitry Andric : TargetTransformInfo::OK_AnyValue; 72740b57cec5SDimitry Andric TargetTransformInfo::OperandValueKind Arg1OVK = 72750b57cec5SDimitry Andric !IsArg0Constant ? TargetTransformInfo::OK_UniformConstantValue 72760b57cec5SDimitry Andric : TargetTransformInfo::OK_AnyValue; 72770b57cec5SDimitry Andric ScalarCost += TTI.getArithmeticInstrCost( 72785ffd83dbSDimitry Andric Inst->getOpcode(), Inst->getType(), CostKind, Arg0OVK, Arg1OVK); 72790b57cec5SDimitry Andric VectorCost += TTI.getArithmeticInstrCost(Inst->getOpcode(), PromotedType, 72805ffd83dbSDimitry Andric CostKind, 72810b57cec5SDimitry Andric Arg0OVK, Arg1OVK); 72820b57cec5SDimitry Andric } 72830b57cec5SDimitry Andric LLVM_DEBUG( 72840b57cec5SDimitry Andric dbgs() << "Estimated cost of computation to be promoted:\nScalar: " 72850b57cec5SDimitry Andric << ScalarCost << "\nVector: " << VectorCost << '\n'); 72860b57cec5SDimitry Andric return ScalarCost > VectorCost; 72870b57cec5SDimitry Andric } 72880b57cec5SDimitry Andric 72890b57cec5SDimitry Andric /// Generate a constant vector with \p Val with the same 72900b57cec5SDimitry Andric /// number of elements as the transition. 72910b57cec5SDimitry Andric /// \p UseSplat defines whether or not \p Val should be replicated 72920b57cec5SDimitry Andric /// across the whole vector. 72930b57cec5SDimitry Andric /// In other words, if UseSplat == true, we generate <Val, Val, ..., Val>, 72940b57cec5SDimitry Andric /// otherwise we generate a vector with as many undef as possible: 72950b57cec5SDimitry Andric /// <undef, ..., undef, Val, undef, ..., undef> where \p Val is only 72960b57cec5SDimitry Andric /// used at the index of the extract. 72970b57cec5SDimitry Andric Value *getConstantVector(Constant *Val, bool UseSplat) const { 72980b57cec5SDimitry Andric unsigned ExtractIdx = std::numeric_limits<unsigned>::max(); 72990b57cec5SDimitry Andric if (!UseSplat) { 73000b57cec5SDimitry Andric // If we cannot determine where the constant must be, we have to 73010b57cec5SDimitry Andric // use a splat constant. 73020b57cec5SDimitry Andric Value *ValExtractIdx = Transition->getOperand(getTransitionIdx()); 73030b57cec5SDimitry Andric if (ConstantInt *CstVal = dyn_cast<ConstantInt>(ValExtractIdx)) 73040b57cec5SDimitry Andric ExtractIdx = CstVal->getSExtValue(); 73050b57cec5SDimitry Andric else 73060b57cec5SDimitry Andric UseSplat = true; 73070b57cec5SDimitry Andric } 73080b57cec5SDimitry Andric 73095ffd83dbSDimitry Andric ElementCount EC = cast<VectorType>(getTransitionType())->getElementCount(); 73100b57cec5SDimitry Andric if (UseSplat) 73115ffd83dbSDimitry Andric return ConstantVector::getSplat(EC, Val); 73120b57cec5SDimitry Andric 7313e8d8bef9SDimitry Andric if (!EC.isScalable()) { 73140b57cec5SDimitry Andric SmallVector<Constant *, 4> ConstVec; 73150b57cec5SDimitry Andric UndefValue *UndefVal = UndefValue::get(Val->getType()); 7316e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx != EC.getKnownMinValue(); ++Idx) { 73170b57cec5SDimitry Andric if (Idx == ExtractIdx) 73180b57cec5SDimitry Andric ConstVec.push_back(Val); 73190b57cec5SDimitry Andric else 73200b57cec5SDimitry Andric ConstVec.push_back(UndefVal); 73210b57cec5SDimitry Andric } 73220b57cec5SDimitry Andric return ConstantVector::get(ConstVec); 73235ffd83dbSDimitry Andric } else 73245ffd83dbSDimitry Andric llvm_unreachable( 73255ffd83dbSDimitry Andric "Generate scalable vector for non-splat is unimplemented"); 73260b57cec5SDimitry Andric } 73270b57cec5SDimitry Andric 73280b57cec5SDimitry Andric /// Check if promoting to a vector type an operand at \p OperandIdx 73290b57cec5SDimitry Andric /// in \p Use can trigger undefined behavior. 73300b57cec5SDimitry Andric static bool canCauseUndefinedBehavior(const Instruction *Use, 73310b57cec5SDimitry Andric unsigned OperandIdx) { 73320b57cec5SDimitry Andric // This is not safe to introduce undef when the operand is on 73330b57cec5SDimitry Andric // the right hand side of a division-like instruction. 73340b57cec5SDimitry Andric if (OperandIdx != 1) 73350b57cec5SDimitry Andric return false; 73360b57cec5SDimitry Andric switch (Use->getOpcode()) { 73370b57cec5SDimitry Andric default: 73380b57cec5SDimitry Andric return false; 73390b57cec5SDimitry Andric case Instruction::SDiv: 73400b57cec5SDimitry Andric case Instruction::UDiv: 73410b57cec5SDimitry Andric case Instruction::SRem: 73420b57cec5SDimitry Andric case Instruction::URem: 73430b57cec5SDimitry Andric return true; 73440b57cec5SDimitry Andric case Instruction::FDiv: 73450b57cec5SDimitry Andric case Instruction::FRem: 73460b57cec5SDimitry Andric return !Use->hasNoNaNs(); 73470b57cec5SDimitry Andric } 73480b57cec5SDimitry Andric llvm_unreachable(nullptr); 73490b57cec5SDimitry Andric } 73500b57cec5SDimitry Andric 73510b57cec5SDimitry Andric public: 73520b57cec5SDimitry Andric VectorPromoteHelper(const DataLayout &DL, const TargetLowering &TLI, 73530b57cec5SDimitry Andric const TargetTransformInfo &TTI, Instruction *Transition, 73540b57cec5SDimitry Andric unsigned CombineCost) 73550b57cec5SDimitry Andric : DL(DL), TLI(TLI), TTI(TTI), Transition(Transition), 73560b57cec5SDimitry Andric StoreExtractCombineCost(CombineCost) { 73570b57cec5SDimitry Andric assert(Transition && "Do not know how to promote null"); 73580b57cec5SDimitry Andric } 73590b57cec5SDimitry Andric 73600b57cec5SDimitry Andric /// Check if we can promote \p ToBePromoted to \p Type. 73610b57cec5SDimitry Andric bool canPromote(const Instruction *ToBePromoted) const { 73620b57cec5SDimitry Andric // We could support CastInst too. 73630b57cec5SDimitry Andric return isa<BinaryOperator>(ToBePromoted); 73640b57cec5SDimitry Andric } 73650b57cec5SDimitry Andric 73660b57cec5SDimitry Andric /// Check if it is profitable to promote \p ToBePromoted 73670b57cec5SDimitry Andric /// by moving downward the transition through. 73680b57cec5SDimitry Andric bool shouldPromote(const Instruction *ToBePromoted) const { 73690b57cec5SDimitry Andric // Promote only if all the operands can be statically expanded. 73700b57cec5SDimitry Andric // Indeed, we do not want to introduce any new kind of transitions. 73710b57cec5SDimitry Andric for (const Use &U : ToBePromoted->operands()) { 73720b57cec5SDimitry Andric const Value *Val = U.get(); 73730b57cec5SDimitry Andric if (Val == getEndOfTransition()) { 73740b57cec5SDimitry Andric // If the use is a division and the transition is on the rhs, 73750b57cec5SDimitry Andric // we cannot promote the operation, otherwise we may create a 73760b57cec5SDimitry Andric // division by zero. 73770b57cec5SDimitry Andric if (canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo())) 73780b57cec5SDimitry Andric return false; 73790b57cec5SDimitry Andric continue; 73800b57cec5SDimitry Andric } 73810b57cec5SDimitry Andric if (!isa<ConstantInt>(Val) && !isa<UndefValue>(Val) && 73820b57cec5SDimitry Andric !isa<ConstantFP>(Val)) 73830b57cec5SDimitry Andric return false; 73840b57cec5SDimitry Andric } 73850b57cec5SDimitry Andric // Check that the resulting operation is legal. 73860b57cec5SDimitry Andric int ISDOpcode = TLI.InstructionOpcodeToISD(ToBePromoted->getOpcode()); 73870b57cec5SDimitry Andric if (!ISDOpcode) 73880b57cec5SDimitry Andric return false; 73890b57cec5SDimitry Andric return StressStoreExtract || 73900b57cec5SDimitry Andric TLI.isOperationLegalOrCustom( 73910b57cec5SDimitry Andric ISDOpcode, TLI.getValueType(DL, getTransitionType(), true)); 73920b57cec5SDimitry Andric } 73930b57cec5SDimitry Andric 73940b57cec5SDimitry Andric /// Check whether or not \p Use can be combined 73950b57cec5SDimitry Andric /// with the transition. 73960b57cec5SDimitry Andric /// I.e., is it possible to do Use(Transition) => AnotherUse? 73970b57cec5SDimitry Andric bool canCombine(const Instruction *Use) { return isa<StoreInst>(Use); } 73980b57cec5SDimitry Andric 73990b57cec5SDimitry Andric /// Record \p ToBePromoted as part of the chain to be promoted. 74000b57cec5SDimitry Andric void enqueueForPromotion(Instruction *ToBePromoted) { 74010b57cec5SDimitry Andric InstsToBePromoted.push_back(ToBePromoted); 74020b57cec5SDimitry Andric } 74030b57cec5SDimitry Andric 74040b57cec5SDimitry Andric /// Set the instruction that will be combined with the transition. 74050b57cec5SDimitry Andric void recordCombineInstruction(Instruction *ToBeCombined) { 74060b57cec5SDimitry Andric assert(canCombine(ToBeCombined) && "Unsupported instruction to combine"); 74070b57cec5SDimitry Andric CombineInst = ToBeCombined; 74080b57cec5SDimitry Andric } 74090b57cec5SDimitry Andric 74100b57cec5SDimitry Andric /// Promote all the instructions enqueued for promotion if it is 74110b57cec5SDimitry Andric /// is profitable. 74120b57cec5SDimitry Andric /// \return True if the promotion happened, false otherwise. 74130b57cec5SDimitry Andric bool promote() { 74140b57cec5SDimitry Andric // Check if there is something to promote. 74150b57cec5SDimitry Andric // Right now, if we do not have anything to combine with, 74160b57cec5SDimitry Andric // we assume the promotion is not profitable. 74170b57cec5SDimitry Andric if (InstsToBePromoted.empty() || !CombineInst) 74180b57cec5SDimitry Andric return false; 74190b57cec5SDimitry Andric 74200b57cec5SDimitry Andric // Check cost. 74210b57cec5SDimitry Andric if (!StressStoreExtract && !isProfitableToPromote()) 74220b57cec5SDimitry Andric return false; 74230b57cec5SDimitry Andric 74240b57cec5SDimitry Andric // Promote. 74250b57cec5SDimitry Andric for (auto &ToBePromoted : InstsToBePromoted) 74260b57cec5SDimitry Andric promoteImpl(ToBePromoted); 74270b57cec5SDimitry Andric InstsToBePromoted.clear(); 74280b57cec5SDimitry Andric return true; 74290b57cec5SDimitry Andric } 74300b57cec5SDimitry Andric }; 74310b57cec5SDimitry Andric 74320b57cec5SDimitry Andric } // end anonymous namespace 74330b57cec5SDimitry Andric 74340b57cec5SDimitry Andric void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) { 74350b57cec5SDimitry Andric // At this point, we know that all the operands of ToBePromoted but Def 74360b57cec5SDimitry Andric // can be statically promoted. 74370b57cec5SDimitry Andric // For Def, we need to use its parameter in ToBePromoted: 74380b57cec5SDimitry Andric // b = ToBePromoted ty1 a 74390b57cec5SDimitry Andric // Def = Transition ty1 b to ty2 74400b57cec5SDimitry Andric // Move the transition down. 74410b57cec5SDimitry Andric // 1. Replace all uses of the promoted operation by the transition. 74420b57cec5SDimitry Andric // = ... b => = ... Def. 74430b57cec5SDimitry Andric assert(ToBePromoted->getType() == Transition->getType() && 74440b57cec5SDimitry Andric "The type of the result of the transition does not match " 74450b57cec5SDimitry Andric "the final type"); 74460b57cec5SDimitry Andric ToBePromoted->replaceAllUsesWith(Transition); 74470b57cec5SDimitry Andric // 2. Update the type of the uses. 74480b57cec5SDimitry Andric // b = ToBePromoted ty2 Def => b = ToBePromoted ty1 Def. 74490b57cec5SDimitry Andric Type *TransitionTy = getTransitionType(); 74500b57cec5SDimitry Andric ToBePromoted->mutateType(TransitionTy); 74510b57cec5SDimitry Andric // 3. Update all the operands of the promoted operation with promoted 74520b57cec5SDimitry Andric // operands. 74530b57cec5SDimitry Andric // b = ToBePromoted ty1 Def => b = ToBePromoted ty1 a. 74540b57cec5SDimitry Andric for (Use &U : ToBePromoted->operands()) { 74550b57cec5SDimitry Andric Value *Val = U.get(); 74560b57cec5SDimitry Andric Value *NewVal = nullptr; 74570b57cec5SDimitry Andric if (Val == Transition) 74580b57cec5SDimitry Andric NewVal = Transition->getOperand(getTransitionOriginalValueIdx()); 74590b57cec5SDimitry Andric else if (isa<UndefValue>(Val) || isa<ConstantInt>(Val) || 74600b57cec5SDimitry Andric isa<ConstantFP>(Val)) { 74610b57cec5SDimitry Andric // Use a splat constant if it is not safe to use undef. 74620b57cec5SDimitry Andric NewVal = getConstantVector( 74630b57cec5SDimitry Andric cast<Constant>(Val), 74640b57cec5SDimitry Andric isa<UndefValue>(Val) || 74650b57cec5SDimitry Andric canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo())); 74660b57cec5SDimitry Andric } else 74670b57cec5SDimitry Andric llvm_unreachable("Did you modified shouldPromote and forgot to update " 74680b57cec5SDimitry Andric "this?"); 74690b57cec5SDimitry Andric ToBePromoted->setOperand(U.getOperandNo(), NewVal); 74700b57cec5SDimitry Andric } 74710b57cec5SDimitry Andric Transition->moveAfter(ToBePromoted); 74720b57cec5SDimitry Andric Transition->setOperand(getTransitionOriginalValueIdx(), ToBePromoted); 74730b57cec5SDimitry Andric } 74740b57cec5SDimitry Andric 74750b57cec5SDimitry Andric /// Some targets can do store(extractelement) with one instruction. 74760b57cec5SDimitry Andric /// Try to push the extractelement towards the stores when the target 74770b57cec5SDimitry Andric /// has this feature and this is profitable. 74780b57cec5SDimitry Andric bool CodeGenPrepare::optimizeExtractElementInst(Instruction *Inst) { 74790b57cec5SDimitry Andric unsigned CombineCost = std::numeric_limits<unsigned>::max(); 74805ffd83dbSDimitry Andric if (DisableStoreExtract || 74810b57cec5SDimitry Andric (!StressStoreExtract && 74820b57cec5SDimitry Andric !TLI->canCombineStoreAndExtract(Inst->getOperand(0)->getType(), 74830b57cec5SDimitry Andric Inst->getOperand(1), CombineCost))) 74840b57cec5SDimitry Andric return false; 74850b57cec5SDimitry Andric 74860b57cec5SDimitry Andric // At this point we know that Inst is a vector to scalar transition. 74870b57cec5SDimitry Andric // Try to move it down the def-use chain, until: 74880b57cec5SDimitry Andric // - We can combine the transition with its single use 74890b57cec5SDimitry Andric // => we got rid of the transition. 74900b57cec5SDimitry Andric // - We escape the current basic block 74910b57cec5SDimitry Andric // => we would need to check that we are moving it at a cheaper place and 74920b57cec5SDimitry Andric // we do not do that for now. 74930b57cec5SDimitry Andric BasicBlock *Parent = Inst->getParent(); 74940b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Found an interesting transition: " << *Inst << '\n'); 74950b57cec5SDimitry Andric VectorPromoteHelper VPH(*DL, *TLI, *TTI, Inst, CombineCost); 74960b57cec5SDimitry Andric // If the transition has more than one use, assume this is not going to be 74970b57cec5SDimitry Andric // beneficial. 74980b57cec5SDimitry Andric while (Inst->hasOneUse()) { 74990b57cec5SDimitry Andric Instruction *ToBePromoted = cast<Instruction>(*Inst->user_begin()); 75000b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Use: " << *ToBePromoted << '\n'); 75010b57cec5SDimitry Andric 75020b57cec5SDimitry Andric if (ToBePromoted->getParent() != Parent) { 75030b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Instruction to promote is in a different block (" 75040b57cec5SDimitry Andric << ToBePromoted->getParent()->getName() 75050b57cec5SDimitry Andric << ") than the transition (" << Parent->getName() 75060b57cec5SDimitry Andric << ").\n"); 75070b57cec5SDimitry Andric return false; 75080b57cec5SDimitry Andric } 75090b57cec5SDimitry Andric 75100b57cec5SDimitry Andric if (VPH.canCombine(ToBePromoted)) { 75110b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Assume " << *Inst << '\n' 75120b57cec5SDimitry Andric << "will be combined with: " << *ToBePromoted << '\n'); 75130b57cec5SDimitry Andric VPH.recordCombineInstruction(ToBePromoted); 75140b57cec5SDimitry Andric bool Changed = VPH.promote(); 75150b57cec5SDimitry Andric NumStoreExtractExposed += Changed; 75160b57cec5SDimitry Andric return Changed; 75170b57cec5SDimitry Andric } 75180b57cec5SDimitry Andric 75190b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Try promoting.\n"); 75200b57cec5SDimitry Andric if (!VPH.canPromote(ToBePromoted) || !VPH.shouldPromote(ToBePromoted)) 75210b57cec5SDimitry Andric return false; 75220b57cec5SDimitry Andric 75230b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Promoting is possible... Enqueue for promotion!\n"); 75240b57cec5SDimitry Andric 75250b57cec5SDimitry Andric VPH.enqueueForPromotion(ToBePromoted); 75260b57cec5SDimitry Andric Inst = ToBePromoted; 75270b57cec5SDimitry Andric } 75280b57cec5SDimitry Andric return false; 75290b57cec5SDimitry Andric } 75300b57cec5SDimitry Andric 75310b57cec5SDimitry Andric /// For the instruction sequence of store below, F and I values 75320b57cec5SDimitry Andric /// are bundled together as an i64 value before being stored into memory. 75330b57cec5SDimitry Andric /// Sometimes it is more efficient to generate separate stores for F and I, 75340b57cec5SDimitry Andric /// which can remove the bitwise instructions or sink them to colder places. 75350b57cec5SDimitry Andric /// 75360b57cec5SDimitry Andric /// (store (or (zext (bitcast F to i32) to i64), 75370b57cec5SDimitry Andric /// (shl (zext I to i64), 32)), addr) --> 75380b57cec5SDimitry Andric /// (store F, addr) and (store I, addr+4) 75390b57cec5SDimitry Andric /// 75400b57cec5SDimitry Andric /// Similarly, splitting for other merged store can also be beneficial, like: 75410b57cec5SDimitry Andric /// For pair of {i32, i32}, i64 store --> two i32 stores. 75420b57cec5SDimitry Andric /// For pair of {i32, i16}, i64 store --> two i32 stores. 75430b57cec5SDimitry Andric /// For pair of {i16, i16}, i32 store --> two i16 stores. 75440b57cec5SDimitry Andric /// For pair of {i16, i8}, i32 store --> two i16 stores. 75450b57cec5SDimitry Andric /// For pair of {i8, i8}, i16 store --> two i8 stores. 75460b57cec5SDimitry Andric /// 75470b57cec5SDimitry Andric /// We allow each target to determine specifically which kind of splitting is 75480b57cec5SDimitry Andric /// supported. 75490b57cec5SDimitry Andric /// 75500b57cec5SDimitry Andric /// The store patterns are commonly seen from the simple code snippet below 75510b57cec5SDimitry Andric /// if only std::make_pair(...) is sroa transformed before inlined into hoo. 75520b57cec5SDimitry Andric /// void goo(const std::pair<int, float> &); 75530b57cec5SDimitry Andric /// hoo() { 75540b57cec5SDimitry Andric /// ... 75550b57cec5SDimitry Andric /// goo(std::make_pair(tmp, ftmp)); 75560b57cec5SDimitry Andric /// ... 75570b57cec5SDimitry Andric /// } 75580b57cec5SDimitry Andric /// 75590b57cec5SDimitry Andric /// Although we already have similar splitting in DAG Combine, we duplicate 75600b57cec5SDimitry Andric /// it in CodeGenPrepare to catch the case in which pattern is across 75610b57cec5SDimitry Andric /// multiple BBs. The logic in DAG Combine is kept to catch case generated 75620b57cec5SDimitry Andric /// during code expansion. 75630b57cec5SDimitry Andric static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL, 75640b57cec5SDimitry Andric const TargetLowering &TLI) { 75650b57cec5SDimitry Andric // Handle simple but common cases only. 75660b57cec5SDimitry Andric Type *StoreType = SI.getValueOperand()->getType(); 75675ffd83dbSDimitry Andric 75685ffd83dbSDimitry Andric // The code below assumes shifting a value by <number of bits>, 75695ffd83dbSDimitry Andric // whereas scalable vectors would have to be shifted by 75705ffd83dbSDimitry Andric // <2log(vscale) + number of bits> in order to store the 75715ffd83dbSDimitry Andric // low/high parts. Bailing out for now. 75725ffd83dbSDimitry Andric if (isa<ScalableVectorType>(StoreType)) 75735ffd83dbSDimitry Andric return false; 75745ffd83dbSDimitry Andric 75750b57cec5SDimitry Andric if (!DL.typeSizeEqualsStoreSize(StoreType) || 75760b57cec5SDimitry Andric DL.getTypeSizeInBits(StoreType) == 0) 75770b57cec5SDimitry Andric return false; 75780b57cec5SDimitry Andric 75790b57cec5SDimitry Andric unsigned HalfValBitSize = DL.getTypeSizeInBits(StoreType) / 2; 75800b57cec5SDimitry Andric Type *SplitStoreType = Type::getIntNTy(SI.getContext(), HalfValBitSize); 75810b57cec5SDimitry Andric if (!DL.typeSizeEqualsStoreSize(SplitStoreType)) 75820b57cec5SDimitry Andric return false; 75830b57cec5SDimitry Andric 75840b57cec5SDimitry Andric // Don't split the store if it is volatile. 75850b57cec5SDimitry Andric if (SI.isVolatile()) 75860b57cec5SDimitry Andric return false; 75870b57cec5SDimitry Andric 75880b57cec5SDimitry Andric // Match the following patterns: 75890b57cec5SDimitry Andric // (store (or (zext LValue to i64), 75900b57cec5SDimitry Andric // (shl (zext HValue to i64), 32)), HalfValBitSize) 75910b57cec5SDimitry Andric // or 75920b57cec5SDimitry Andric // (store (or (shl (zext HValue to i64), 32)), HalfValBitSize) 75930b57cec5SDimitry Andric // (zext LValue to i64), 75940b57cec5SDimitry Andric // Expect both operands of OR and the first operand of SHL have only 75950b57cec5SDimitry Andric // one use. 75960b57cec5SDimitry Andric Value *LValue, *HValue; 75970b57cec5SDimitry Andric if (!match(SI.getValueOperand(), 75980b57cec5SDimitry Andric m_c_Or(m_OneUse(m_ZExt(m_Value(LValue))), 75990b57cec5SDimitry Andric m_OneUse(m_Shl(m_OneUse(m_ZExt(m_Value(HValue))), 76000b57cec5SDimitry Andric m_SpecificInt(HalfValBitSize)))))) 76010b57cec5SDimitry Andric return false; 76020b57cec5SDimitry Andric 76030b57cec5SDimitry Andric // Check LValue and HValue are int with size less or equal than 32. 76040b57cec5SDimitry Andric if (!LValue->getType()->isIntegerTy() || 76050b57cec5SDimitry Andric DL.getTypeSizeInBits(LValue->getType()) > HalfValBitSize || 76060b57cec5SDimitry Andric !HValue->getType()->isIntegerTy() || 76070b57cec5SDimitry Andric DL.getTypeSizeInBits(HValue->getType()) > HalfValBitSize) 76080b57cec5SDimitry Andric return false; 76090b57cec5SDimitry Andric 76100b57cec5SDimitry Andric // If LValue/HValue is a bitcast instruction, use the EVT before bitcast 76110b57cec5SDimitry Andric // as the input of target query. 76120b57cec5SDimitry Andric auto *LBC = dyn_cast<BitCastInst>(LValue); 76130b57cec5SDimitry Andric auto *HBC = dyn_cast<BitCastInst>(HValue); 76140b57cec5SDimitry Andric EVT LowTy = LBC ? EVT::getEVT(LBC->getOperand(0)->getType()) 76150b57cec5SDimitry Andric : EVT::getEVT(LValue->getType()); 76160b57cec5SDimitry Andric EVT HighTy = HBC ? EVT::getEVT(HBC->getOperand(0)->getType()) 76170b57cec5SDimitry Andric : EVT::getEVT(HValue->getType()); 76180b57cec5SDimitry Andric if (!ForceSplitStore && !TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy)) 76190b57cec5SDimitry Andric return false; 76200b57cec5SDimitry Andric 76210b57cec5SDimitry Andric // Start to split store. 76220b57cec5SDimitry Andric IRBuilder<> Builder(SI.getContext()); 76230b57cec5SDimitry Andric Builder.SetInsertPoint(&SI); 76240b57cec5SDimitry Andric 76250b57cec5SDimitry Andric // If LValue/HValue is a bitcast in another BB, create a new one in current 76260b57cec5SDimitry Andric // BB so it may be merged with the splitted stores by dag combiner. 76270b57cec5SDimitry Andric if (LBC && LBC->getParent() != SI.getParent()) 76280b57cec5SDimitry Andric LValue = Builder.CreateBitCast(LBC->getOperand(0), LBC->getType()); 76290b57cec5SDimitry Andric if (HBC && HBC->getParent() != SI.getParent()) 76300b57cec5SDimitry Andric HValue = Builder.CreateBitCast(HBC->getOperand(0), HBC->getType()); 76310b57cec5SDimitry Andric 76320b57cec5SDimitry Andric bool IsLE = SI.getModule()->getDataLayout().isLittleEndian(); 76330b57cec5SDimitry Andric auto CreateSplitStore = [&](Value *V, bool Upper) { 76340b57cec5SDimitry Andric V = Builder.CreateZExtOrBitCast(V, SplitStoreType); 76350b57cec5SDimitry Andric Value *Addr = Builder.CreateBitCast( 76360b57cec5SDimitry Andric SI.getOperand(1), 76370b57cec5SDimitry Andric SplitStoreType->getPointerTo(SI.getPointerAddressSpace())); 76385ffd83dbSDimitry Andric Align Alignment = SI.getAlign(); 763913138422SDimitry Andric const bool IsOffsetStore = (IsLE && Upper) || (!IsLE && !Upper); 76405ffd83dbSDimitry Andric if (IsOffsetStore) { 76410b57cec5SDimitry Andric Addr = Builder.CreateGEP( 76420b57cec5SDimitry Andric SplitStoreType, Addr, 76430b57cec5SDimitry Andric ConstantInt::get(Type::getInt32Ty(SI.getContext()), 1)); 76445ffd83dbSDimitry Andric 764513138422SDimitry Andric // When splitting the store in half, naturally one half will retain the 764613138422SDimitry Andric // alignment of the original wider store, regardless of whether it was 764713138422SDimitry Andric // over-aligned or not, while the other will require adjustment. 764813138422SDimitry Andric Alignment = commonAlignment(Alignment, HalfValBitSize / 8); 764913138422SDimitry Andric } 76505ffd83dbSDimitry Andric Builder.CreateAlignedStore(V, Addr, Alignment); 76510b57cec5SDimitry Andric }; 76520b57cec5SDimitry Andric 76530b57cec5SDimitry Andric CreateSplitStore(LValue, false); 76540b57cec5SDimitry Andric CreateSplitStore(HValue, true); 76550b57cec5SDimitry Andric 76560b57cec5SDimitry Andric // Delete the old store. 76570b57cec5SDimitry Andric SI.eraseFromParent(); 76580b57cec5SDimitry Andric return true; 76590b57cec5SDimitry Andric } 76600b57cec5SDimitry Andric 76610b57cec5SDimitry Andric // Return true if the GEP has two operands, the first operand is of a sequential 76620b57cec5SDimitry Andric // type, and the second operand is a constant. 76630b57cec5SDimitry Andric static bool GEPSequentialConstIndexed(GetElementPtrInst *GEP) { 76640b57cec5SDimitry Andric gep_type_iterator I = gep_type_begin(*GEP); 76650b57cec5SDimitry Andric return GEP->getNumOperands() == 2 && 76660b57cec5SDimitry Andric I.isSequential() && 76670b57cec5SDimitry Andric isa<ConstantInt>(GEP->getOperand(1)); 76680b57cec5SDimitry Andric } 76690b57cec5SDimitry Andric 76700b57cec5SDimitry Andric // Try unmerging GEPs to reduce liveness interference (register pressure) across 76710b57cec5SDimitry Andric // IndirectBr edges. Since IndirectBr edges tend to touch on many blocks, 76720b57cec5SDimitry Andric // reducing liveness interference across those edges benefits global register 76730b57cec5SDimitry Andric // allocation. Currently handles only certain cases. 76740b57cec5SDimitry Andric // 76750b57cec5SDimitry Andric // For example, unmerge %GEPI and %UGEPI as below. 76760b57cec5SDimitry Andric // 76770b57cec5SDimitry Andric // ---------- BEFORE ---------- 76780b57cec5SDimitry Andric // SrcBlock: 76790b57cec5SDimitry Andric // ... 76800b57cec5SDimitry Andric // %GEPIOp = ... 76810b57cec5SDimitry Andric // ... 76820b57cec5SDimitry Andric // %GEPI = gep %GEPIOp, Idx 76830b57cec5SDimitry Andric // ... 76840b57cec5SDimitry Andric // indirectbr ... [ label %DstB0, label %DstB1, ... label %DstBi ... ] 76850b57cec5SDimitry Andric // (* %GEPI is alive on the indirectbr edges due to other uses ahead) 76860b57cec5SDimitry Andric // (* %GEPIOp is alive on the indirectbr edges only because of it's used by 76870b57cec5SDimitry Andric // %UGEPI) 76880b57cec5SDimitry Andric // 76890b57cec5SDimitry Andric // DstB0: ... (there may be a gep similar to %UGEPI to be unmerged) 76900b57cec5SDimitry Andric // DstB1: ... (there may be a gep similar to %UGEPI to be unmerged) 76910b57cec5SDimitry Andric // ... 76920b57cec5SDimitry Andric // 76930b57cec5SDimitry Andric // DstBi: 76940b57cec5SDimitry Andric // ... 76950b57cec5SDimitry Andric // %UGEPI = gep %GEPIOp, UIdx 76960b57cec5SDimitry Andric // ... 76970b57cec5SDimitry Andric // --------------------------- 76980b57cec5SDimitry Andric // 76990b57cec5SDimitry Andric // ---------- AFTER ---------- 77000b57cec5SDimitry Andric // SrcBlock: 77010b57cec5SDimitry Andric // ... (same as above) 77020b57cec5SDimitry Andric // (* %GEPI is still alive on the indirectbr edges) 77030b57cec5SDimitry Andric // (* %GEPIOp is no longer alive on the indirectbr edges as a result of the 77040b57cec5SDimitry Andric // unmerging) 77050b57cec5SDimitry Andric // ... 77060b57cec5SDimitry Andric // 77070b57cec5SDimitry Andric // DstBi: 77080b57cec5SDimitry Andric // ... 77090b57cec5SDimitry Andric // %UGEPI = gep %GEPI, (UIdx-Idx) 77100b57cec5SDimitry Andric // ... 77110b57cec5SDimitry Andric // --------------------------- 77120b57cec5SDimitry Andric // 77130b57cec5SDimitry Andric // The register pressure on the IndirectBr edges is reduced because %GEPIOp is 77140b57cec5SDimitry Andric // no longer alive on them. 77150b57cec5SDimitry Andric // 77160b57cec5SDimitry Andric // We try to unmerge GEPs here in CodGenPrepare, as opposed to limiting merging 77170b57cec5SDimitry Andric // of GEPs in the first place in InstCombiner::visitGetElementPtrInst() so as 77180b57cec5SDimitry Andric // not to disable further simplications and optimizations as a result of GEP 77190b57cec5SDimitry Andric // merging. 77200b57cec5SDimitry Andric // 77210b57cec5SDimitry Andric // Note this unmerging may increase the length of the data flow critical path 77220b57cec5SDimitry Andric // (the path from %GEPIOp to %UGEPI would go through %GEPI), which is a tradeoff 77230b57cec5SDimitry Andric // between the register pressure and the length of data-flow critical 77240b57cec5SDimitry Andric // path. Restricting this to the uncommon IndirectBr case would minimize the 77250b57cec5SDimitry Andric // impact of potentially longer critical path, if any, and the impact on compile 77260b57cec5SDimitry Andric // time. 77270b57cec5SDimitry Andric static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI, 77280b57cec5SDimitry Andric const TargetTransformInfo *TTI) { 77290b57cec5SDimitry Andric BasicBlock *SrcBlock = GEPI->getParent(); 77300b57cec5SDimitry Andric // Check that SrcBlock ends with an IndirectBr. If not, give up. The common 77310b57cec5SDimitry Andric // (non-IndirectBr) cases exit early here. 77320b57cec5SDimitry Andric if (!isa<IndirectBrInst>(SrcBlock->getTerminator())) 77330b57cec5SDimitry Andric return false; 77340b57cec5SDimitry Andric // Check that GEPI is a simple gep with a single constant index. 77350b57cec5SDimitry Andric if (!GEPSequentialConstIndexed(GEPI)) 77360b57cec5SDimitry Andric return false; 77370b57cec5SDimitry Andric ConstantInt *GEPIIdx = cast<ConstantInt>(GEPI->getOperand(1)); 77380b57cec5SDimitry Andric // Check that GEPI is a cheap one. 77395ffd83dbSDimitry Andric if (TTI->getIntImmCost(GEPIIdx->getValue(), GEPIIdx->getType(), 77405ffd83dbSDimitry Andric TargetTransformInfo::TCK_SizeAndLatency) 77410b57cec5SDimitry Andric > TargetTransformInfo::TCC_Basic) 77420b57cec5SDimitry Andric return false; 77430b57cec5SDimitry Andric Value *GEPIOp = GEPI->getOperand(0); 77440b57cec5SDimitry Andric // Check that GEPIOp is an instruction that's also defined in SrcBlock. 77450b57cec5SDimitry Andric if (!isa<Instruction>(GEPIOp)) 77460b57cec5SDimitry Andric return false; 77470b57cec5SDimitry Andric auto *GEPIOpI = cast<Instruction>(GEPIOp); 77480b57cec5SDimitry Andric if (GEPIOpI->getParent() != SrcBlock) 77490b57cec5SDimitry Andric return false; 77500b57cec5SDimitry Andric // Check that GEP is used outside the block, meaning it's alive on the 77510b57cec5SDimitry Andric // IndirectBr edge(s). 77520b57cec5SDimitry Andric if (find_if(GEPI->users(), [&](User *Usr) { 77530b57cec5SDimitry Andric if (auto *I = dyn_cast<Instruction>(Usr)) { 77540b57cec5SDimitry Andric if (I->getParent() != SrcBlock) { 77550b57cec5SDimitry Andric return true; 77560b57cec5SDimitry Andric } 77570b57cec5SDimitry Andric } 77580b57cec5SDimitry Andric return false; 77590b57cec5SDimitry Andric }) == GEPI->users().end()) 77600b57cec5SDimitry Andric return false; 77610b57cec5SDimitry Andric // The second elements of the GEP chains to be unmerged. 77620b57cec5SDimitry Andric std::vector<GetElementPtrInst *> UGEPIs; 77630b57cec5SDimitry Andric // Check each user of GEPIOp to check if unmerging would make GEPIOp not alive 77640b57cec5SDimitry Andric // on IndirectBr edges. 77650b57cec5SDimitry Andric for (User *Usr : GEPIOp->users()) { 77660b57cec5SDimitry Andric if (Usr == GEPI) continue; 77670b57cec5SDimitry Andric // Check if Usr is an Instruction. If not, give up. 77680b57cec5SDimitry Andric if (!isa<Instruction>(Usr)) 77690b57cec5SDimitry Andric return false; 77700b57cec5SDimitry Andric auto *UI = cast<Instruction>(Usr); 77710b57cec5SDimitry Andric // Check if Usr in the same block as GEPIOp, which is fine, skip. 77720b57cec5SDimitry Andric if (UI->getParent() == SrcBlock) 77730b57cec5SDimitry Andric continue; 77740b57cec5SDimitry Andric // Check if Usr is a GEP. If not, give up. 77750b57cec5SDimitry Andric if (!isa<GetElementPtrInst>(Usr)) 77760b57cec5SDimitry Andric return false; 77770b57cec5SDimitry Andric auto *UGEPI = cast<GetElementPtrInst>(Usr); 77780b57cec5SDimitry Andric // Check if UGEPI is a simple gep with a single constant index and GEPIOp is 77790b57cec5SDimitry Andric // the pointer operand to it. If so, record it in the vector. If not, give 77800b57cec5SDimitry Andric // up. 77810b57cec5SDimitry Andric if (!GEPSequentialConstIndexed(UGEPI)) 77820b57cec5SDimitry Andric return false; 77830b57cec5SDimitry Andric if (UGEPI->getOperand(0) != GEPIOp) 77840b57cec5SDimitry Andric return false; 77850b57cec5SDimitry Andric if (GEPIIdx->getType() != 77860b57cec5SDimitry Andric cast<ConstantInt>(UGEPI->getOperand(1))->getType()) 77870b57cec5SDimitry Andric return false; 77880b57cec5SDimitry Andric ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1)); 77895ffd83dbSDimitry Andric if (TTI->getIntImmCost(UGEPIIdx->getValue(), UGEPIIdx->getType(), 77905ffd83dbSDimitry Andric TargetTransformInfo::TCK_SizeAndLatency) 77910b57cec5SDimitry Andric > TargetTransformInfo::TCC_Basic) 77920b57cec5SDimitry Andric return false; 77930b57cec5SDimitry Andric UGEPIs.push_back(UGEPI); 77940b57cec5SDimitry Andric } 77950b57cec5SDimitry Andric if (UGEPIs.size() == 0) 77960b57cec5SDimitry Andric return false; 77970b57cec5SDimitry Andric // Check the materializing cost of (Uidx-Idx). 77980b57cec5SDimitry Andric for (GetElementPtrInst *UGEPI : UGEPIs) { 77990b57cec5SDimitry Andric ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1)); 78000b57cec5SDimitry Andric APInt NewIdx = UGEPIIdx->getValue() - GEPIIdx->getValue(); 7801fe6060f1SDimitry Andric InstructionCost ImmCost = TTI->getIntImmCost( 7802fe6060f1SDimitry Andric NewIdx, GEPIIdx->getType(), TargetTransformInfo::TCK_SizeAndLatency); 78030b57cec5SDimitry Andric if (ImmCost > TargetTransformInfo::TCC_Basic) 78040b57cec5SDimitry Andric return false; 78050b57cec5SDimitry Andric } 78060b57cec5SDimitry Andric // Now unmerge between GEPI and UGEPIs. 78070b57cec5SDimitry Andric for (GetElementPtrInst *UGEPI : UGEPIs) { 78080b57cec5SDimitry Andric UGEPI->setOperand(0, GEPI); 78090b57cec5SDimitry Andric ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1)); 78100b57cec5SDimitry Andric Constant *NewUGEPIIdx = 78110b57cec5SDimitry Andric ConstantInt::get(GEPIIdx->getType(), 78120b57cec5SDimitry Andric UGEPIIdx->getValue() - GEPIIdx->getValue()); 78130b57cec5SDimitry Andric UGEPI->setOperand(1, NewUGEPIIdx); 78140b57cec5SDimitry Andric // If GEPI is not inbounds but UGEPI is inbounds, change UGEPI to not 78150b57cec5SDimitry Andric // inbounds to avoid UB. 78160b57cec5SDimitry Andric if (!GEPI->isInBounds()) { 78170b57cec5SDimitry Andric UGEPI->setIsInBounds(false); 78180b57cec5SDimitry Andric } 78190b57cec5SDimitry Andric } 78200b57cec5SDimitry Andric // After unmerging, verify that GEPIOp is actually only used in SrcBlock (not 78210b57cec5SDimitry Andric // alive on IndirectBr edges). 7822fcaf7f86SDimitry Andric assert(llvm::none_of(GEPIOp->users(), 7823fcaf7f86SDimitry Andric [&](User *Usr) { 78240b57cec5SDimitry Andric return cast<Instruction>(Usr)->getParent() != SrcBlock; 7825fcaf7f86SDimitry Andric }) && 7826fcaf7f86SDimitry Andric "GEPIOp is used outside SrcBlock"); 78270b57cec5SDimitry Andric return true; 78280b57cec5SDimitry Andric } 78290b57cec5SDimitry Andric 7830fe6060f1SDimitry Andric static bool optimizeBranch(BranchInst *Branch, const TargetLowering &TLI) { 7831fe6060f1SDimitry Andric // Try and convert 7832fe6060f1SDimitry Andric // %c = icmp ult %x, 8 7833fe6060f1SDimitry Andric // br %c, bla, blb 7834fe6060f1SDimitry Andric // %tc = lshr %x, 3 7835fe6060f1SDimitry Andric // to 7836fe6060f1SDimitry Andric // %tc = lshr %x, 3 7837fe6060f1SDimitry Andric // %c = icmp eq %tc, 0 7838fe6060f1SDimitry Andric // br %c, bla, blb 7839fe6060f1SDimitry Andric // Creating the cmp to zero can be better for the backend, especially if the 7840fe6060f1SDimitry Andric // lshr produces flags that can be used automatically. 7841fe6060f1SDimitry Andric if (!TLI.preferZeroCompareBranch() || !Branch->isConditional()) 7842fe6060f1SDimitry Andric return false; 7843fe6060f1SDimitry Andric 7844fe6060f1SDimitry Andric ICmpInst *Cmp = dyn_cast<ICmpInst>(Branch->getCondition()); 7845fe6060f1SDimitry Andric if (!Cmp || !isa<ConstantInt>(Cmp->getOperand(1)) || !Cmp->hasOneUse()) 7846fe6060f1SDimitry Andric return false; 7847fe6060f1SDimitry Andric 7848fe6060f1SDimitry Andric Value *X = Cmp->getOperand(0); 7849fe6060f1SDimitry Andric APInt CmpC = cast<ConstantInt>(Cmp->getOperand(1))->getValue(); 7850fe6060f1SDimitry Andric 7851fe6060f1SDimitry Andric for (auto *U : X->users()) { 7852fe6060f1SDimitry Andric Instruction *UI = dyn_cast<Instruction>(U); 7853fe6060f1SDimitry Andric // A quick dominance check 7854fe6060f1SDimitry Andric if (!UI || 7855fe6060f1SDimitry Andric (UI->getParent() != Branch->getParent() && 7856fe6060f1SDimitry Andric UI->getParent() != Branch->getSuccessor(0) && 7857fe6060f1SDimitry Andric UI->getParent() != Branch->getSuccessor(1)) || 7858fe6060f1SDimitry Andric (UI->getParent() != Branch->getParent() && 7859fe6060f1SDimitry Andric !UI->getParent()->getSinglePredecessor())) 7860fe6060f1SDimitry Andric continue; 7861fe6060f1SDimitry Andric 7862fe6060f1SDimitry Andric if (CmpC.isPowerOf2() && Cmp->getPredicate() == ICmpInst::ICMP_ULT && 7863fe6060f1SDimitry Andric match(UI, m_Shr(m_Specific(X), m_SpecificInt(CmpC.logBase2())))) { 7864fe6060f1SDimitry Andric IRBuilder<> Builder(Branch); 7865fe6060f1SDimitry Andric if (UI->getParent() != Branch->getParent()) 7866fe6060f1SDimitry Andric UI->moveBefore(Branch); 7867fe6060f1SDimitry Andric Value *NewCmp = Builder.CreateCmp(ICmpInst::ICMP_EQ, UI, 7868fe6060f1SDimitry Andric ConstantInt::get(UI->getType(), 0)); 7869fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Converting " << *Cmp << "\n"); 7870fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << " to compare on zero: " << *NewCmp << "\n"); 7871fe6060f1SDimitry Andric Cmp->replaceAllUsesWith(NewCmp); 7872fe6060f1SDimitry Andric return true; 7873fe6060f1SDimitry Andric } 7874fe6060f1SDimitry Andric if (Cmp->isEquality() && 7875fe6060f1SDimitry Andric (match(UI, m_Add(m_Specific(X), m_SpecificInt(-CmpC))) || 7876fe6060f1SDimitry Andric match(UI, m_Sub(m_Specific(X), m_SpecificInt(CmpC))))) { 7877fe6060f1SDimitry Andric IRBuilder<> Builder(Branch); 7878fe6060f1SDimitry Andric if (UI->getParent() != Branch->getParent()) 7879fe6060f1SDimitry Andric UI->moveBefore(Branch); 7880fe6060f1SDimitry Andric Value *NewCmp = Builder.CreateCmp(Cmp->getPredicate(), UI, 7881fe6060f1SDimitry Andric ConstantInt::get(UI->getType(), 0)); 7882fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Converting " << *Cmp << "\n"); 7883fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << " to compare on zero: " << *NewCmp << "\n"); 7884fe6060f1SDimitry Andric Cmp->replaceAllUsesWith(NewCmp); 7885fe6060f1SDimitry Andric return true; 7886fe6060f1SDimitry Andric } 7887fe6060f1SDimitry Andric } 7888fe6060f1SDimitry Andric return false; 7889fe6060f1SDimitry Andric } 7890fe6060f1SDimitry Andric 78910b57cec5SDimitry Andric bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) { 78920b57cec5SDimitry Andric // Bail out if we inserted the instruction to prevent optimizations from 78930b57cec5SDimitry Andric // stepping on each other's toes. 78940b57cec5SDimitry Andric if (InsertedInsts.count(I)) 78950b57cec5SDimitry Andric return false; 78960b57cec5SDimitry Andric 78970b57cec5SDimitry Andric // TODO: Move into the switch on opcode below here. 78980b57cec5SDimitry Andric if (PHINode *P = dyn_cast<PHINode>(I)) { 78990b57cec5SDimitry Andric // It is possible for very late stage optimizations (such as SimplifyCFG) 79000b57cec5SDimitry Andric // to introduce PHI nodes too late to be cleaned up. If we detect such a 79010b57cec5SDimitry Andric // trivial PHI, go ahead and zap it here. 790281ad6265SDimitry Andric if (Value *V = simplifyInstruction(P, {*DL, TLInfo})) { 79030b57cec5SDimitry Andric LargeOffsetGEPMap.erase(P); 79040b57cec5SDimitry Andric P->replaceAllUsesWith(V); 79050b57cec5SDimitry Andric P->eraseFromParent(); 79060b57cec5SDimitry Andric ++NumPHIsElim; 79070b57cec5SDimitry Andric return true; 79080b57cec5SDimitry Andric } 79090b57cec5SDimitry Andric return false; 79100b57cec5SDimitry Andric } 79110b57cec5SDimitry Andric 79120b57cec5SDimitry Andric if (CastInst *CI = dyn_cast<CastInst>(I)) { 79130b57cec5SDimitry Andric // If the source of the cast is a constant, then this should have 79140b57cec5SDimitry Andric // already been constant folded. The only reason NOT to constant fold 79150b57cec5SDimitry Andric // it is if something (e.g. LSR) was careful to place the constant 79160b57cec5SDimitry Andric // evaluation in a block other than then one that uses it (e.g. to hoist 79170b57cec5SDimitry Andric // the address of globals out of a loop). If this is the case, we don't 79180b57cec5SDimitry Andric // want to forward-subst the cast. 79190b57cec5SDimitry Andric if (isa<Constant>(CI->getOperand(0))) 79200b57cec5SDimitry Andric return false; 79210b57cec5SDimitry Andric 79225ffd83dbSDimitry Andric if (OptimizeNoopCopyExpression(CI, *TLI, *DL)) 79230b57cec5SDimitry Andric return true; 79240b57cec5SDimitry Andric 79250b57cec5SDimitry Andric if (isa<ZExtInst>(I) || isa<SExtInst>(I)) { 79260b57cec5SDimitry Andric /// Sink a zext or sext into its user blocks if the target type doesn't 79270b57cec5SDimitry Andric /// fit in one register 79285ffd83dbSDimitry Andric if (TLI->getTypeAction(CI->getContext(), 79290b57cec5SDimitry Andric TLI->getValueType(*DL, CI->getType())) == 79300b57cec5SDimitry Andric TargetLowering::TypeExpandInteger) { 79310b57cec5SDimitry Andric return SinkCast(CI); 79320b57cec5SDimitry Andric } else { 79330b57cec5SDimitry Andric bool MadeChange = optimizeExt(I); 79340b57cec5SDimitry Andric return MadeChange | optimizeExtUses(I); 79350b57cec5SDimitry Andric } 79360b57cec5SDimitry Andric } 79370b57cec5SDimitry Andric return false; 79380b57cec5SDimitry Andric } 79390b57cec5SDimitry Andric 79400b57cec5SDimitry Andric if (auto *Cmp = dyn_cast<CmpInst>(I)) 79415ffd83dbSDimitry Andric if (optimizeCmp(Cmp, ModifiedDT)) 79420b57cec5SDimitry Andric return true; 79430b57cec5SDimitry Andric 79440b57cec5SDimitry Andric if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 79450b57cec5SDimitry Andric LI->setMetadata(LLVMContext::MD_invariant_group, nullptr); 79460b57cec5SDimitry Andric bool Modified = optimizeLoadExt(LI); 79470b57cec5SDimitry Andric unsigned AS = LI->getPointerAddressSpace(); 79480b57cec5SDimitry Andric Modified |= optimizeMemoryInst(I, I->getOperand(0), LI->getType(), AS); 79490b57cec5SDimitry Andric return Modified; 79500b57cec5SDimitry Andric } 79510b57cec5SDimitry Andric 79520b57cec5SDimitry Andric if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 79535ffd83dbSDimitry Andric if (splitMergedValStore(*SI, *DL, *TLI)) 79540b57cec5SDimitry Andric return true; 79550b57cec5SDimitry Andric SI->setMetadata(LLVMContext::MD_invariant_group, nullptr); 79560b57cec5SDimitry Andric unsigned AS = SI->getPointerAddressSpace(); 79570b57cec5SDimitry Andric return optimizeMemoryInst(I, SI->getOperand(1), 79580b57cec5SDimitry Andric SI->getOperand(0)->getType(), AS); 79590b57cec5SDimitry Andric } 79600b57cec5SDimitry Andric 79610b57cec5SDimitry Andric if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) { 79620b57cec5SDimitry Andric unsigned AS = RMW->getPointerAddressSpace(); 79630b57cec5SDimitry Andric return optimizeMemoryInst(I, RMW->getPointerOperand(), 79640b57cec5SDimitry Andric RMW->getType(), AS); 79650b57cec5SDimitry Andric } 79660b57cec5SDimitry Andric 79670b57cec5SDimitry Andric if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(I)) { 79680b57cec5SDimitry Andric unsigned AS = CmpX->getPointerAddressSpace(); 79690b57cec5SDimitry Andric return optimizeMemoryInst(I, CmpX->getPointerOperand(), 79700b57cec5SDimitry Andric CmpX->getCompareOperand()->getType(), AS); 79710b57cec5SDimitry Andric } 79720b57cec5SDimitry Andric 79730b57cec5SDimitry Andric BinaryOperator *BinOp = dyn_cast<BinaryOperator>(I); 79740b57cec5SDimitry Andric 7975349cc55cSDimitry Andric if (BinOp && BinOp->getOpcode() == Instruction::And && EnableAndCmpSinking && 7976349cc55cSDimitry Andric sinkAndCmp0Expression(BinOp, *TLI, InsertedInsts)) 7977349cc55cSDimitry Andric return true; 79780b57cec5SDimitry Andric 79790b57cec5SDimitry Andric // TODO: Move this into the switch on opcode - it handles shifts already. 79800b57cec5SDimitry Andric if (BinOp && (BinOp->getOpcode() == Instruction::AShr || 79810b57cec5SDimitry Andric BinOp->getOpcode() == Instruction::LShr)) { 79820b57cec5SDimitry Andric ConstantInt *CI = dyn_cast<ConstantInt>(BinOp->getOperand(1)); 79835ffd83dbSDimitry Andric if (CI && TLI->hasExtractBitsInsn()) 79840b57cec5SDimitry Andric if (OptimizeExtractBits(BinOp, CI, *TLI, *DL)) 79850b57cec5SDimitry Andric return true; 79860b57cec5SDimitry Andric } 79870b57cec5SDimitry Andric 79880b57cec5SDimitry Andric if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) { 79890b57cec5SDimitry Andric if (GEPI->hasAllZeroIndices()) { 79900b57cec5SDimitry Andric /// The GEP operand must be a pointer, so must its result -> BitCast 79910b57cec5SDimitry Andric Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(), 79920b57cec5SDimitry Andric GEPI->getName(), GEPI); 79930b57cec5SDimitry Andric NC->setDebugLoc(GEPI->getDebugLoc()); 79940b57cec5SDimitry Andric GEPI->replaceAllUsesWith(NC); 79950b57cec5SDimitry Andric GEPI->eraseFromParent(); 79960b57cec5SDimitry Andric ++NumGEPsElim; 79970b57cec5SDimitry Andric optimizeInst(NC, ModifiedDT); 79980b57cec5SDimitry Andric return true; 79990b57cec5SDimitry Andric } 80000b57cec5SDimitry Andric if (tryUnmergingGEPsAcrossIndirectBr(GEPI, TTI)) { 80010b57cec5SDimitry Andric return true; 80020b57cec5SDimitry Andric } 80030b57cec5SDimitry Andric return false; 80040b57cec5SDimitry Andric } 80050b57cec5SDimitry Andric 80065ffd83dbSDimitry Andric if (FreezeInst *FI = dyn_cast<FreezeInst>(I)) { 80075ffd83dbSDimitry Andric // freeze(icmp a, const)) -> icmp (freeze a), const 80085ffd83dbSDimitry Andric // This helps generate efficient conditional jumps. 80095ffd83dbSDimitry Andric Instruction *CmpI = nullptr; 80105ffd83dbSDimitry Andric if (ICmpInst *II = dyn_cast<ICmpInst>(FI->getOperand(0))) 80115ffd83dbSDimitry Andric CmpI = II; 80125ffd83dbSDimitry Andric else if (FCmpInst *F = dyn_cast<FCmpInst>(FI->getOperand(0))) 80135ffd83dbSDimitry Andric CmpI = F->getFastMathFlags().none() ? F : nullptr; 80145ffd83dbSDimitry Andric 80155ffd83dbSDimitry Andric if (CmpI && CmpI->hasOneUse()) { 80165ffd83dbSDimitry Andric auto Op0 = CmpI->getOperand(0), Op1 = CmpI->getOperand(1); 80175ffd83dbSDimitry Andric bool Const0 = isa<ConstantInt>(Op0) || isa<ConstantFP>(Op0) || 80185ffd83dbSDimitry Andric isa<ConstantPointerNull>(Op0); 80195ffd83dbSDimitry Andric bool Const1 = isa<ConstantInt>(Op1) || isa<ConstantFP>(Op1) || 80205ffd83dbSDimitry Andric isa<ConstantPointerNull>(Op1); 80215ffd83dbSDimitry Andric if (Const0 || Const1) { 80225ffd83dbSDimitry Andric if (!Const0 || !Const1) { 80235ffd83dbSDimitry Andric auto *F = new FreezeInst(Const0 ? Op1 : Op0, "", CmpI); 80245ffd83dbSDimitry Andric F->takeName(FI); 80255ffd83dbSDimitry Andric CmpI->setOperand(Const0 ? 1 : 0, F); 80265ffd83dbSDimitry Andric } 80275ffd83dbSDimitry Andric FI->replaceAllUsesWith(CmpI); 80285ffd83dbSDimitry Andric FI->eraseFromParent(); 80295ffd83dbSDimitry Andric return true; 80305ffd83dbSDimitry Andric } 80315ffd83dbSDimitry Andric } 80325ffd83dbSDimitry Andric return false; 80335ffd83dbSDimitry Andric } 80345ffd83dbSDimitry Andric 80350b57cec5SDimitry Andric if (tryToSinkFreeOperands(I)) 80360b57cec5SDimitry Andric return true; 80370b57cec5SDimitry Andric 80380b57cec5SDimitry Andric switch (I->getOpcode()) { 80390b57cec5SDimitry Andric case Instruction::Shl: 80400b57cec5SDimitry Andric case Instruction::LShr: 80410b57cec5SDimitry Andric case Instruction::AShr: 80420b57cec5SDimitry Andric return optimizeShiftInst(cast<BinaryOperator>(I)); 80430b57cec5SDimitry Andric case Instruction::Call: 80440b57cec5SDimitry Andric return optimizeCallInst(cast<CallInst>(I), ModifiedDT); 80450b57cec5SDimitry Andric case Instruction::Select: 80460b57cec5SDimitry Andric return optimizeSelectInst(cast<SelectInst>(I)); 80470b57cec5SDimitry Andric case Instruction::ShuffleVector: 80480b57cec5SDimitry Andric return optimizeShuffleVectorInst(cast<ShuffleVectorInst>(I)); 80490b57cec5SDimitry Andric case Instruction::Switch: 80500b57cec5SDimitry Andric return optimizeSwitchInst(cast<SwitchInst>(I)); 80510b57cec5SDimitry Andric case Instruction::ExtractElement: 80520b57cec5SDimitry Andric return optimizeExtractElementInst(cast<ExtractElementInst>(I)); 8053fe6060f1SDimitry Andric case Instruction::Br: 8054fe6060f1SDimitry Andric return optimizeBranch(cast<BranchInst>(I), *TLI); 80550b57cec5SDimitry Andric } 80560b57cec5SDimitry Andric 80570b57cec5SDimitry Andric return false; 80580b57cec5SDimitry Andric } 80590b57cec5SDimitry Andric 80600b57cec5SDimitry Andric /// Given an OR instruction, check to see if this is a bitreverse 80610b57cec5SDimitry Andric /// idiom. If so, insert the new intrinsic and return true. 8062e8d8bef9SDimitry Andric bool CodeGenPrepare::makeBitReverse(Instruction &I) { 80630b57cec5SDimitry Andric if (!I.getType()->isIntegerTy() || 8064e8d8bef9SDimitry Andric !TLI->isOperationLegalOrCustom(ISD::BITREVERSE, 8065e8d8bef9SDimitry Andric TLI->getValueType(*DL, I.getType(), true))) 80660b57cec5SDimitry Andric return false; 80670b57cec5SDimitry Andric 80680b57cec5SDimitry Andric SmallVector<Instruction*, 4> Insts; 80690b57cec5SDimitry Andric if (!recognizeBSwapOrBitReverseIdiom(&I, false, true, Insts)) 80700b57cec5SDimitry Andric return false; 80710b57cec5SDimitry Andric Instruction *LastInst = Insts.back(); 80720b57cec5SDimitry Andric I.replaceAllUsesWith(LastInst); 8073e8d8bef9SDimitry Andric RecursivelyDeleteTriviallyDeadInstructions( 8074e8d8bef9SDimitry Andric &I, TLInfo, nullptr, [&](Value *V) { removeAllAssertingVHReferences(V); }); 80750b57cec5SDimitry Andric return true; 80760b57cec5SDimitry Andric } 80770b57cec5SDimitry Andric 80780b57cec5SDimitry Andric // In this pass we look for GEP and cast instructions that are used 80790b57cec5SDimitry Andric // across basic blocks and rewrite them to improve basic-block-at-a-time 80800b57cec5SDimitry Andric // selection. 80810b57cec5SDimitry Andric bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, bool &ModifiedDT) { 80820b57cec5SDimitry Andric SunkAddrs.clear(); 80830b57cec5SDimitry Andric bool MadeChange = false; 80840b57cec5SDimitry Andric 80850b57cec5SDimitry Andric CurInstIterator = BB.begin(); 80860b57cec5SDimitry Andric while (CurInstIterator != BB.end()) { 80870b57cec5SDimitry Andric MadeChange |= optimizeInst(&*CurInstIterator++, ModifiedDT); 80880b57cec5SDimitry Andric if (ModifiedDT) 80890b57cec5SDimitry Andric return true; 80900b57cec5SDimitry Andric } 80910b57cec5SDimitry Andric 80920b57cec5SDimitry Andric bool MadeBitReverse = true; 80935ffd83dbSDimitry Andric while (MadeBitReverse) { 80940b57cec5SDimitry Andric MadeBitReverse = false; 80950b57cec5SDimitry Andric for (auto &I : reverse(BB)) { 8096e8d8bef9SDimitry Andric if (makeBitReverse(I)) { 80970b57cec5SDimitry Andric MadeBitReverse = MadeChange = true; 80980b57cec5SDimitry Andric break; 80990b57cec5SDimitry Andric } 81000b57cec5SDimitry Andric } 81010b57cec5SDimitry Andric } 81020b57cec5SDimitry Andric MadeChange |= dupRetToEnableTailCallOpts(&BB, ModifiedDT); 81030b57cec5SDimitry Andric 81040b57cec5SDimitry Andric return MadeChange; 81050b57cec5SDimitry Andric } 81060b57cec5SDimitry Andric 8107480093f4SDimitry Andric // Some CGP optimizations may move or alter what's computed in a block. Check 8108480093f4SDimitry Andric // whether a dbg.value intrinsic could be pointed at a more appropriate operand. 8109480093f4SDimitry Andric bool CodeGenPrepare::fixupDbgValue(Instruction *I) { 8110480093f4SDimitry Andric assert(isa<DbgValueInst>(I)); 8111480093f4SDimitry Andric DbgValueInst &DVI = *cast<DbgValueInst>(I); 8112480093f4SDimitry Andric 8113480093f4SDimitry Andric // Does this dbg.value refer to a sunk address calculation? 8114fe6060f1SDimitry Andric bool AnyChange = false; 8115fe6060f1SDimitry Andric SmallDenseSet<Value *> LocationOps(DVI.location_ops().begin(), 8116fe6060f1SDimitry Andric DVI.location_ops().end()); 8117fe6060f1SDimitry Andric for (Value *Location : LocationOps) { 8118480093f4SDimitry Andric WeakTrackingVH SunkAddrVH = SunkAddrs[Location]; 8119480093f4SDimitry Andric Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr; 8120480093f4SDimitry Andric if (SunkAddr) { 8121480093f4SDimitry Andric // Point dbg.value at locally computed address, which should give the best 8122fe6060f1SDimitry Andric // opportunity to be accurately lowered. This update may change the type 8123fe6060f1SDimitry Andric // of pointer being referred to; however this makes no difference to 8124fe6060f1SDimitry Andric // debugging information, and we can't generate bitcasts that may affect 8125fe6060f1SDimitry Andric // codegen. 8126fe6060f1SDimitry Andric DVI.replaceVariableLocationOp(Location, SunkAddr); 8127fe6060f1SDimitry Andric AnyChange = true; 8128480093f4SDimitry Andric } 8129fe6060f1SDimitry Andric } 8130fe6060f1SDimitry Andric return AnyChange; 8131480093f4SDimitry Andric } 8132480093f4SDimitry Andric 8133480093f4SDimitry Andric // A llvm.dbg.value may be using a value before its definition, due to 8134480093f4SDimitry Andric // optimizations in this pass and others. Scan for such dbg.values, and rescue 8135480093f4SDimitry Andric // them by moving the dbg.value to immediately after the value definition. 8136480093f4SDimitry Andric // FIXME: Ideally this should never be necessary, and this has the potential 8137480093f4SDimitry Andric // to re-order dbg.value intrinsics. 81380b57cec5SDimitry Andric bool CodeGenPrepare::placeDbgValues(Function &F) { 81390b57cec5SDimitry Andric bool MadeChange = false; 8140480093f4SDimitry Andric DominatorTree DT(F); 8141480093f4SDimitry Andric 81420b57cec5SDimitry Andric for (BasicBlock &BB : F) { 8143349cc55cSDimitry Andric for (Instruction &Insn : llvm::make_early_inc_range(BB)) { 8144349cc55cSDimitry Andric DbgValueInst *DVI = dyn_cast<DbgValueInst>(&Insn); 8145480093f4SDimitry Andric if (!DVI) 81460b57cec5SDimitry Andric continue; 81470b57cec5SDimitry Andric 8148fe6060f1SDimitry Andric SmallVector<Instruction *, 4> VIs; 8149fe6060f1SDimitry Andric for (Value *V : DVI->getValues()) 8150fe6060f1SDimitry Andric if (Instruction *VI = dyn_cast_or_null<Instruction>(V)) 8151fe6060f1SDimitry Andric VIs.push_back(VI); 8152480093f4SDimitry Andric 8153fe6060f1SDimitry Andric // This DVI may depend on multiple instructions, complicating any 8154fe6060f1SDimitry Andric // potential sink. This block takes the defensive approach, opting to 8155fe6060f1SDimitry Andric // "undef" the DVI if it has more than one instruction and any of them do 8156fe6060f1SDimitry Andric // not dominate DVI. 8157fe6060f1SDimitry Andric for (Instruction *VI : VIs) { 8158fe6060f1SDimitry Andric if (VI->isTerminator()) 8159480093f4SDimitry Andric continue; 8160480093f4SDimitry Andric 81610b57cec5SDimitry Andric // If VI is a phi in a block with an EHPad terminator, we can't insert 81620b57cec5SDimitry Andric // after it. 81630b57cec5SDimitry Andric if (isa<PHINode>(VI) && VI->getParent()->getTerminator()->isEHPad()) 81640b57cec5SDimitry Andric continue; 8165480093f4SDimitry Andric 8166480093f4SDimitry Andric // If the defining instruction dominates the dbg.value, we do not need 8167480093f4SDimitry Andric // to move the dbg.value. 8168480093f4SDimitry Andric if (DT.dominates(VI, DVI)) 8169480093f4SDimitry Andric continue; 8170480093f4SDimitry Andric 8171fe6060f1SDimitry Andric // If we depend on multiple instructions and any of them doesn't 8172fe6060f1SDimitry Andric // dominate this DVI, we probably can't salvage it: moving it to 8173fe6060f1SDimitry Andric // after any of the instructions could cause us to lose the others. 8174fe6060f1SDimitry Andric if (VIs.size() > 1) { 8175fe6060f1SDimitry Andric LLVM_DEBUG( 8176fe6060f1SDimitry Andric dbgs() 8177fe6060f1SDimitry Andric << "Unable to find valid location for Debug Value, undefing:\n" 8178fe6060f1SDimitry Andric << *DVI); 8179fe6060f1SDimitry Andric DVI->setUndef(); 8180fe6060f1SDimitry Andric break; 8181fe6060f1SDimitry Andric } 8182fe6060f1SDimitry Andric 81830b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Moving Debug Value before :\n" 81840b57cec5SDimitry Andric << *DVI << ' ' << *VI); 81850b57cec5SDimitry Andric DVI->removeFromParent(); 81860b57cec5SDimitry Andric if (isa<PHINode>(VI)) 81870b57cec5SDimitry Andric DVI->insertBefore(&*VI->getParent()->getFirstInsertionPt()); 81880b57cec5SDimitry Andric else 81890b57cec5SDimitry Andric DVI->insertAfter(VI); 81900b57cec5SDimitry Andric MadeChange = true; 81910b57cec5SDimitry Andric ++NumDbgValueMoved; 81920b57cec5SDimitry Andric } 81930b57cec5SDimitry Andric } 8194fe6060f1SDimitry Andric } 8195fe6060f1SDimitry Andric return MadeChange; 8196fe6060f1SDimitry Andric } 8197fe6060f1SDimitry Andric 8198fe6060f1SDimitry Andric // Group scattered pseudo probes in a block to favor SelectionDAG. Scattered 8199fe6060f1SDimitry Andric // probes can be chained dependencies of other regular DAG nodes and block DAG 8200fe6060f1SDimitry Andric // combine optimizations. 8201fe6060f1SDimitry Andric bool CodeGenPrepare::placePseudoProbes(Function &F) { 8202fe6060f1SDimitry Andric bool MadeChange = false; 8203fe6060f1SDimitry Andric for (auto &Block : F) { 8204fe6060f1SDimitry Andric // Move the rest probes to the beginning of the block. 8205fe6060f1SDimitry Andric auto FirstInst = Block.getFirstInsertionPt(); 8206fe6060f1SDimitry Andric while (FirstInst != Block.end() && FirstInst->isDebugOrPseudoInst()) 8207fe6060f1SDimitry Andric ++FirstInst; 8208fe6060f1SDimitry Andric BasicBlock::iterator I(FirstInst); 8209fe6060f1SDimitry Andric I++; 8210fe6060f1SDimitry Andric while (I != Block.end()) { 8211fe6060f1SDimitry Andric if (auto *II = dyn_cast<PseudoProbeInst>(I++)) { 8212fe6060f1SDimitry Andric II->moveBefore(&*FirstInst); 8213fe6060f1SDimitry Andric MadeChange = true; 8214fe6060f1SDimitry Andric } 8215fe6060f1SDimitry Andric } 8216fe6060f1SDimitry Andric } 82170b57cec5SDimitry Andric return MadeChange; 82180b57cec5SDimitry Andric } 82190b57cec5SDimitry Andric 82200b57cec5SDimitry Andric /// Scale down both weights to fit into uint32_t. 82210b57cec5SDimitry Andric static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) { 82220b57cec5SDimitry Andric uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse; 82230b57cec5SDimitry Andric uint32_t Scale = (NewMax / std::numeric_limits<uint32_t>::max()) + 1; 82240b57cec5SDimitry Andric NewTrue = NewTrue / Scale; 82250b57cec5SDimitry Andric NewFalse = NewFalse / Scale; 82260b57cec5SDimitry Andric } 82270b57cec5SDimitry Andric 82280b57cec5SDimitry Andric /// Some targets prefer to split a conditional branch like: 82290b57cec5SDimitry Andric /// \code 82300b57cec5SDimitry Andric /// %0 = icmp ne i32 %a, 0 82310b57cec5SDimitry Andric /// %1 = icmp ne i32 %b, 0 82320b57cec5SDimitry Andric /// %or.cond = or i1 %0, %1 82330b57cec5SDimitry Andric /// br i1 %or.cond, label %TrueBB, label %FalseBB 82340b57cec5SDimitry Andric /// \endcode 82350b57cec5SDimitry Andric /// into multiple branch instructions like: 82360b57cec5SDimitry Andric /// \code 82370b57cec5SDimitry Andric /// bb1: 82380b57cec5SDimitry Andric /// %0 = icmp ne i32 %a, 0 82390b57cec5SDimitry Andric /// br i1 %0, label %TrueBB, label %bb2 82400b57cec5SDimitry Andric /// bb2: 82410b57cec5SDimitry Andric /// %1 = icmp ne i32 %b, 0 82420b57cec5SDimitry Andric /// br i1 %1, label %TrueBB, label %FalseBB 82430b57cec5SDimitry Andric /// \endcode 82440b57cec5SDimitry Andric /// This usually allows instruction selection to do even further optimizations 82450b57cec5SDimitry Andric /// and combine the compare with the branch instruction. Currently this is 82460b57cec5SDimitry Andric /// applied for targets which have "cheap" jump instructions. 82470b57cec5SDimitry Andric /// 82480b57cec5SDimitry Andric /// FIXME: Remove the (equivalent?) implementation in SelectionDAG. 82490b57cec5SDimitry Andric /// 82500b57cec5SDimitry Andric bool CodeGenPrepare::splitBranchCondition(Function &F, bool &ModifiedDT) { 82515ffd83dbSDimitry Andric if (!TM->Options.EnableFastISel || TLI->isJumpExpensive()) 82520b57cec5SDimitry Andric return false; 82530b57cec5SDimitry Andric 82540b57cec5SDimitry Andric bool MadeChange = false; 82550b57cec5SDimitry Andric for (auto &BB : F) { 82560b57cec5SDimitry Andric // Does this BB end with the following? 82570b57cec5SDimitry Andric // %cond1 = icmp|fcmp|binary instruction ... 82580b57cec5SDimitry Andric // %cond2 = icmp|fcmp|binary instruction ... 82590b57cec5SDimitry Andric // %cond.or = or|and i1 %cond1, cond2 82600b57cec5SDimitry Andric // br i1 %cond.or label %dest1, label %dest2" 8261e8d8bef9SDimitry Andric Instruction *LogicOp; 82620b57cec5SDimitry Andric BasicBlock *TBB, *FBB; 8263e8d8bef9SDimitry Andric if (!match(BB.getTerminator(), 8264e8d8bef9SDimitry Andric m_Br(m_OneUse(m_Instruction(LogicOp)), TBB, FBB))) 82650b57cec5SDimitry Andric continue; 82660b57cec5SDimitry Andric 82670b57cec5SDimitry Andric auto *Br1 = cast<BranchInst>(BB.getTerminator()); 82680b57cec5SDimitry Andric if (Br1->getMetadata(LLVMContext::MD_unpredictable)) 82690b57cec5SDimitry Andric continue; 82700b57cec5SDimitry Andric 8271480093f4SDimitry Andric // The merging of mostly empty BB can cause a degenerate branch. 8272480093f4SDimitry Andric if (TBB == FBB) 8273480093f4SDimitry Andric continue; 8274480093f4SDimitry Andric 82750b57cec5SDimitry Andric unsigned Opc; 82760b57cec5SDimitry Andric Value *Cond1, *Cond2; 8277e8d8bef9SDimitry Andric if (match(LogicOp, 8278e8d8bef9SDimitry Andric m_LogicalAnd(m_OneUse(m_Value(Cond1)), m_OneUse(m_Value(Cond2))))) 82790b57cec5SDimitry Andric Opc = Instruction::And; 8280e8d8bef9SDimitry Andric else if (match(LogicOp, m_LogicalOr(m_OneUse(m_Value(Cond1)), 82810b57cec5SDimitry Andric m_OneUse(m_Value(Cond2))))) 82820b57cec5SDimitry Andric Opc = Instruction::Or; 82830b57cec5SDimitry Andric else 82840b57cec5SDimitry Andric continue; 82850b57cec5SDimitry Andric 8286e8d8bef9SDimitry Andric auto IsGoodCond = [](Value *Cond) { 8287e8d8bef9SDimitry Andric return match( 8288e8d8bef9SDimitry Andric Cond, 8289e8d8bef9SDimitry Andric m_CombineOr(m_Cmp(), m_CombineOr(m_LogicalAnd(m_Value(), m_Value()), 8290e8d8bef9SDimitry Andric m_LogicalOr(m_Value(), m_Value())))); 8291e8d8bef9SDimitry Andric }; 8292e8d8bef9SDimitry Andric if (!IsGoodCond(Cond1) || !IsGoodCond(Cond2)) 82930b57cec5SDimitry Andric continue; 82940b57cec5SDimitry Andric 82950b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Before branch condition splitting\n"; BB.dump()); 82960b57cec5SDimitry Andric 82970b57cec5SDimitry Andric // Create a new BB. 82985ffd83dbSDimitry Andric auto *TmpBB = 82990b57cec5SDimitry Andric BasicBlock::Create(BB.getContext(), BB.getName() + ".cond.split", 83000b57cec5SDimitry Andric BB.getParent(), BB.getNextNode()); 83010b57cec5SDimitry Andric 83020b57cec5SDimitry Andric // Update original basic block by using the first condition directly by the 83030b57cec5SDimitry Andric // branch instruction and removing the no longer needed and/or instruction. 83040b57cec5SDimitry Andric Br1->setCondition(Cond1); 83050b57cec5SDimitry Andric LogicOp->eraseFromParent(); 83060b57cec5SDimitry Andric 83070b57cec5SDimitry Andric // Depending on the condition we have to either replace the true or the 83080b57cec5SDimitry Andric // false successor of the original branch instruction. 83090b57cec5SDimitry Andric if (Opc == Instruction::And) 83100b57cec5SDimitry Andric Br1->setSuccessor(0, TmpBB); 83110b57cec5SDimitry Andric else 83120b57cec5SDimitry Andric Br1->setSuccessor(1, TmpBB); 83130b57cec5SDimitry Andric 83140b57cec5SDimitry Andric // Fill in the new basic block. 83150b57cec5SDimitry Andric auto *Br2 = IRBuilder<>(TmpBB).CreateCondBr(Cond2, TBB, FBB); 83160b57cec5SDimitry Andric if (auto *I = dyn_cast<Instruction>(Cond2)) { 83170b57cec5SDimitry Andric I->removeFromParent(); 83180b57cec5SDimitry Andric I->insertBefore(Br2); 83190b57cec5SDimitry Andric } 83200b57cec5SDimitry Andric 83210b57cec5SDimitry Andric // Update PHI nodes in both successors. The original BB needs to be 83220b57cec5SDimitry Andric // replaced in one successor's PHI nodes, because the branch comes now from 83230b57cec5SDimitry Andric // the newly generated BB (NewBB). In the other successor we need to add one 83240b57cec5SDimitry Andric // incoming edge to the PHI nodes, because both branch instructions target 83250b57cec5SDimitry Andric // now the same successor. Depending on the original branch condition 83260b57cec5SDimitry Andric // (and/or) we have to swap the successors (TrueDest, FalseDest), so that 83270b57cec5SDimitry Andric // we perform the correct update for the PHI nodes. 83280b57cec5SDimitry Andric // This doesn't change the successor order of the just created branch 83290b57cec5SDimitry Andric // instruction (or any other instruction). 83300b57cec5SDimitry Andric if (Opc == Instruction::Or) 83310b57cec5SDimitry Andric std::swap(TBB, FBB); 83320b57cec5SDimitry Andric 83330b57cec5SDimitry Andric // Replace the old BB with the new BB. 83340b57cec5SDimitry Andric TBB->replacePhiUsesWith(&BB, TmpBB); 83350b57cec5SDimitry Andric 83360b57cec5SDimitry Andric // Add another incoming edge form the new BB. 83370b57cec5SDimitry Andric for (PHINode &PN : FBB->phis()) { 83380b57cec5SDimitry Andric auto *Val = PN.getIncomingValueForBlock(&BB); 83390b57cec5SDimitry Andric PN.addIncoming(Val, TmpBB); 83400b57cec5SDimitry Andric } 83410b57cec5SDimitry Andric 83420b57cec5SDimitry Andric // Update the branch weights (from SelectionDAGBuilder:: 83430b57cec5SDimitry Andric // FindMergedConditions). 83440b57cec5SDimitry Andric if (Opc == Instruction::Or) { 83450b57cec5SDimitry Andric // Codegen X | Y as: 83460b57cec5SDimitry Andric // BB1: 83470b57cec5SDimitry Andric // jmp_if_X TBB 83480b57cec5SDimitry Andric // jmp TmpBB 83490b57cec5SDimitry Andric // TmpBB: 83500b57cec5SDimitry Andric // jmp_if_Y TBB 83510b57cec5SDimitry Andric // jmp FBB 83520b57cec5SDimitry Andric // 83530b57cec5SDimitry Andric 83540b57cec5SDimitry Andric // We have flexibility in setting Prob for BB1 and Prob for NewBB. 83550b57cec5SDimitry Andric // The requirement is that 83560b57cec5SDimitry Andric // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB) 83570b57cec5SDimitry Andric // = TrueProb for original BB. 83580b57cec5SDimitry Andric // Assuming the original weights are A and B, one choice is to set BB1's 83590b57cec5SDimitry Andric // weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice 83600b57cec5SDimitry Andric // assumes that 83610b57cec5SDimitry Andric // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB. 83620b57cec5SDimitry Andric // Another choice is to assume TrueProb for BB1 equals to TrueProb for 83630b57cec5SDimitry Andric // TmpBB, but the math is more complicated. 83640b57cec5SDimitry Andric uint64_t TrueWeight, FalseWeight; 83650b57cec5SDimitry Andric if (Br1->extractProfMetadata(TrueWeight, FalseWeight)) { 83660b57cec5SDimitry Andric uint64_t NewTrueWeight = TrueWeight; 83670b57cec5SDimitry Andric uint64_t NewFalseWeight = TrueWeight + 2 * FalseWeight; 83680b57cec5SDimitry Andric scaleWeights(NewTrueWeight, NewFalseWeight); 83690b57cec5SDimitry Andric Br1->setMetadata(LLVMContext::MD_prof, MDBuilder(Br1->getContext()) 83700b57cec5SDimitry Andric .createBranchWeights(TrueWeight, FalseWeight)); 83710b57cec5SDimitry Andric 83720b57cec5SDimitry Andric NewTrueWeight = TrueWeight; 83730b57cec5SDimitry Andric NewFalseWeight = 2 * FalseWeight; 83740b57cec5SDimitry Andric scaleWeights(NewTrueWeight, NewFalseWeight); 83750b57cec5SDimitry Andric Br2->setMetadata(LLVMContext::MD_prof, MDBuilder(Br2->getContext()) 83760b57cec5SDimitry Andric .createBranchWeights(TrueWeight, FalseWeight)); 83770b57cec5SDimitry Andric } 83780b57cec5SDimitry Andric } else { 83790b57cec5SDimitry Andric // Codegen X & Y as: 83800b57cec5SDimitry Andric // BB1: 83810b57cec5SDimitry Andric // jmp_if_X TmpBB 83820b57cec5SDimitry Andric // jmp FBB 83830b57cec5SDimitry Andric // TmpBB: 83840b57cec5SDimitry Andric // jmp_if_Y TBB 83850b57cec5SDimitry Andric // jmp FBB 83860b57cec5SDimitry Andric // 83870b57cec5SDimitry Andric // This requires creation of TmpBB after CurBB. 83880b57cec5SDimitry Andric 83890b57cec5SDimitry Andric // We have flexibility in setting Prob for BB1 and Prob for TmpBB. 83900b57cec5SDimitry Andric // The requirement is that 83910b57cec5SDimitry Andric // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB) 83920b57cec5SDimitry Andric // = FalseProb for original BB. 83930b57cec5SDimitry Andric // Assuming the original weights are A and B, one choice is to set BB1's 83940b57cec5SDimitry Andric // weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice 83950b57cec5SDimitry Andric // assumes that 83960b57cec5SDimitry Andric // FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB. 83970b57cec5SDimitry Andric uint64_t TrueWeight, FalseWeight; 83980b57cec5SDimitry Andric if (Br1->extractProfMetadata(TrueWeight, FalseWeight)) { 83990b57cec5SDimitry Andric uint64_t NewTrueWeight = 2 * TrueWeight + FalseWeight; 84000b57cec5SDimitry Andric uint64_t NewFalseWeight = FalseWeight; 84010b57cec5SDimitry Andric scaleWeights(NewTrueWeight, NewFalseWeight); 84020b57cec5SDimitry Andric Br1->setMetadata(LLVMContext::MD_prof, MDBuilder(Br1->getContext()) 84030b57cec5SDimitry Andric .createBranchWeights(TrueWeight, FalseWeight)); 84040b57cec5SDimitry Andric 84050b57cec5SDimitry Andric NewTrueWeight = 2 * TrueWeight; 84060b57cec5SDimitry Andric NewFalseWeight = FalseWeight; 84070b57cec5SDimitry Andric scaleWeights(NewTrueWeight, NewFalseWeight); 84080b57cec5SDimitry Andric Br2->setMetadata(LLVMContext::MD_prof, MDBuilder(Br2->getContext()) 84090b57cec5SDimitry Andric .createBranchWeights(TrueWeight, FalseWeight)); 84100b57cec5SDimitry Andric } 84110b57cec5SDimitry Andric } 84120b57cec5SDimitry Andric 84130b57cec5SDimitry Andric ModifiedDT = true; 84140b57cec5SDimitry Andric MadeChange = true; 84150b57cec5SDimitry Andric 84160b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "After branch condition splitting\n"; BB.dump(); 84170b57cec5SDimitry Andric TmpBB->dump()); 84180b57cec5SDimitry Andric } 84190b57cec5SDimitry Andric return MadeChange; 84200b57cec5SDimitry Andric } 8421