10b57cec5SDimitry Andric //===-- AArch64A57FPLoadBalancing.cpp - Balance FP ops statically on A57---===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // For best-case performance on Cortex-A57, we should try to use a balanced 90b57cec5SDimitry Andric // mix of odd and even D-registers when performing a critical sequence of 100b57cec5SDimitry Andric // independent, non-quadword FP/ASIMD floating-point multiply or 110b57cec5SDimitry Andric // multiply-accumulate operations. 120b57cec5SDimitry Andric // 130b57cec5SDimitry Andric // This pass attempts to detect situations where the register allocation may 140b57cec5SDimitry Andric // adversely affect this load balancing and to change the registers used so as 150b57cec5SDimitry Andric // to better utilize the CPU. 160b57cec5SDimitry Andric // 170b57cec5SDimitry Andric // Ideally we'd just take each multiply or multiply-accumulate in turn and 180b57cec5SDimitry Andric // allocate it alternating even or odd registers. However, multiply-accumulates 190b57cec5SDimitry Andric // are most efficiently performed in the same functional unit as their 200b57cec5SDimitry Andric // accumulation operand. Therefore this pass tries to find maximal sequences 210b57cec5SDimitry Andric // ("Chains") of multiply-accumulates linked via their accumulation operand, 220b57cec5SDimitry Andric // and assign them all the same "color" (oddness/evenness). 230b57cec5SDimitry Andric // 240b57cec5SDimitry Andric // This optimization affects S-register and D-register floating point 250b57cec5SDimitry Andric // multiplies and FMADD/FMAs, as well as vector (floating point only) muls and 260b57cec5SDimitry Andric // FMADD/FMA. Q register instructions (and 128-bit vector instructions) are 270b57cec5SDimitry Andric // not affected. 280b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 290b57cec5SDimitry Andric 300b57cec5SDimitry Andric #include "AArch64.h" 310b57cec5SDimitry Andric #include "AArch64InstrInfo.h" 320b57cec5SDimitry Andric #include "AArch64Subtarget.h" 330b57cec5SDimitry Andric #include "llvm/ADT/EquivalenceClasses.h" 340b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunction.h" 350b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h" 360b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstr.h" 370b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstrBuilder.h" 380b57cec5SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h" 390b57cec5SDimitry Andric #include "llvm/CodeGen/RegisterClassInfo.h" 400b57cec5SDimitry Andric #include "llvm/CodeGen/RegisterScavenging.h" 410b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h" 420b57cec5SDimitry Andric #include "llvm/Support/Debug.h" 430b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h" 440b57cec5SDimitry Andric using namespace llvm; 450b57cec5SDimitry Andric 460b57cec5SDimitry Andric #define DEBUG_TYPE "aarch64-a57-fp-load-balancing" 470b57cec5SDimitry Andric 480b57cec5SDimitry Andric // Enforce the algorithm to use the scavenged register even when the original 490b57cec5SDimitry Andric // destination register is the correct color. Used for testing. 500b57cec5SDimitry Andric static cl::opt<bool> 510b57cec5SDimitry Andric TransformAll("aarch64-a57-fp-load-balancing-force-all", 520b57cec5SDimitry Andric cl::desc("Always modify dest registers regardless of color"), 530b57cec5SDimitry Andric cl::init(false), cl::Hidden); 540b57cec5SDimitry Andric 550b57cec5SDimitry Andric // Never use the balance information obtained from chains - return a specific 560b57cec5SDimitry Andric // color always. Used for testing. 570b57cec5SDimitry Andric static cl::opt<unsigned> 580b57cec5SDimitry Andric OverrideBalance("aarch64-a57-fp-load-balancing-override", 590b57cec5SDimitry Andric cl::desc("Ignore balance information, always return " 600b57cec5SDimitry Andric "(1: Even, 2: Odd)."), 610b57cec5SDimitry Andric cl::init(0), cl::Hidden); 620b57cec5SDimitry Andric 630b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 640b57cec5SDimitry Andric // Helper functions 650b57cec5SDimitry Andric 660b57cec5SDimitry Andric // Is the instruction a type of multiply on 64-bit (or 32-bit) FPRs? 670b57cec5SDimitry Andric static bool isMul(MachineInstr *MI) { 680b57cec5SDimitry Andric switch (MI->getOpcode()) { 690b57cec5SDimitry Andric case AArch64::FMULSrr: 700b57cec5SDimitry Andric case AArch64::FNMULSrr: 710b57cec5SDimitry Andric case AArch64::FMULDrr: 720b57cec5SDimitry Andric case AArch64::FNMULDrr: 730b57cec5SDimitry Andric return true; 740b57cec5SDimitry Andric default: 750b57cec5SDimitry Andric return false; 760b57cec5SDimitry Andric } 770b57cec5SDimitry Andric } 780b57cec5SDimitry Andric 790b57cec5SDimitry Andric // Is the instruction a type of FP multiply-accumulate on 64-bit (or 32-bit) FPRs? 800b57cec5SDimitry Andric static bool isMla(MachineInstr *MI) { 810b57cec5SDimitry Andric switch (MI->getOpcode()) { 820b57cec5SDimitry Andric case AArch64::FMSUBSrrr: 830b57cec5SDimitry Andric case AArch64::FMADDSrrr: 840b57cec5SDimitry Andric case AArch64::FNMSUBSrrr: 850b57cec5SDimitry Andric case AArch64::FNMADDSrrr: 860b57cec5SDimitry Andric case AArch64::FMSUBDrrr: 870b57cec5SDimitry Andric case AArch64::FMADDDrrr: 880b57cec5SDimitry Andric case AArch64::FNMSUBDrrr: 890b57cec5SDimitry Andric case AArch64::FNMADDDrrr: 900b57cec5SDimitry Andric return true; 910b57cec5SDimitry Andric default: 920b57cec5SDimitry Andric return false; 930b57cec5SDimitry Andric } 940b57cec5SDimitry Andric } 950b57cec5SDimitry Andric 960b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 970b57cec5SDimitry Andric 980b57cec5SDimitry Andric namespace { 990b57cec5SDimitry Andric /// A "color", which is either even or odd. Yes, these aren't really colors 1000b57cec5SDimitry Andric /// but the algorithm is conceptually doing two-color graph coloring. 1010b57cec5SDimitry Andric enum class Color { Even, Odd }; 1020b57cec5SDimitry Andric #ifndef NDEBUG 1030b57cec5SDimitry Andric static const char *ColorNames[2] = { "Even", "Odd" }; 1040b57cec5SDimitry Andric #endif 1050b57cec5SDimitry Andric 1060b57cec5SDimitry Andric class Chain; 1070b57cec5SDimitry Andric 1080b57cec5SDimitry Andric class AArch64A57FPLoadBalancing : public MachineFunctionPass { 1090b57cec5SDimitry Andric MachineRegisterInfo *MRI; 1100b57cec5SDimitry Andric const TargetRegisterInfo *TRI; 1110b57cec5SDimitry Andric RegisterClassInfo RCI; 1120b57cec5SDimitry Andric 1130b57cec5SDimitry Andric public: 1140b57cec5SDimitry Andric static char ID; 1150b57cec5SDimitry Andric explicit AArch64A57FPLoadBalancing() : MachineFunctionPass(ID) { 1160b57cec5SDimitry Andric initializeAArch64A57FPLoadBalancingPass(*PassRegistry::getPassRegistry()); 1170b57cec5SDimitry Andric } 1180b57cec5SDimitry Andric 1190b57cec5SDimitry Andric bool runOnMachineFunction(MachineFunction &F) override; 1200b57cec5SDimitry Andric 1210b57cec5SDimitry Andric MachineFunctionProperties getRequiredProperties() const override { 1220b57cec5SDimitry Andric return MachineFunctionProperties().set( 1230b57cec5SDimitry Andric MachineFunctionProperties::Property::NoVRegs); 1240b57cec5SDimitry Andric } 1250b57cec5SDimitry Andric 1260b57cec5SDimitry Andric StringRef getPassName() const override { 1270b57cec5SDimitry Andric return "A57 FP Anti-dependency breaker"; 1280b57cec5SDimitry Andric } 1290b57cec5SDimitry Andric 1300b57cec5SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 1310b57cec5SDimitry Andric AU.setPreservesCFG(); 1320b57cec5SDimitry Andric MachineFunctionPass::getAnalysisUsage(AU); 1330b57cec5SDimitry Andric } 1340b57cec5SDimitry Andric 1350b57cec5SDimitry Andric private: 1360b57cec5SDimitry Andric bool runOnBasicBlock(MachineBasicBlock &MBB); 1370b57cec5SDimitry Andric bool colorChainSet(std::vector<Chain*> GV, MachineBasicBlock &MBB, 1380b57cec5SDimitry Andric int &Balance); 1390b57cec5SDimitry Andric bool colorChain(Chain *G, Color C, MachineBasicBlock &MBB); 1400b57cec5SDimitry Andric int scavengeRegister(Chain *G, Color C, MachineBasicBlock &MBB); 1410b57cec5SDimitry Andric void scanInstruction(MachineInstr *MI, unsigned Idx, 1420b57cec5SDimitry Andric std::map<unsigned, Chain*> &Active, 1430b57cec5SDimitry Andric std::vector<std::unique_ptr<Chain>> &AllChains); 1440b57cec5SDimitry Andric void maybeKillChain(MachineOperand &MO, unsigned Idx, 1450b57cec5SDimitry Andric std::map<unsigned, Chain*> &RegChains); 1460b57cec5SDimitry Andric Color getColor(unsigned Register); 1470b57cec5SDimitry Andric Chain *getAndEraseNext(Color PreferredColor, std::vector<Chain*> &L); 1480b57cec5SDimitry Andric }; 1490b57cec5SDimitry Andric } 1500b57cec5SDimitry Andric 1510b57cec5SDimitry Andric char AArch64A57FPLoadBalancing::ID = 0; 1520b57cec5SDimitry Andric 1530b57cec5SDimitry Andric INITIALIZE_PASS_BEGIN(AArch64A57FPLoadBalancing, DEBUG_TYPE, 1540b57cec5SDimitry Andric "AArch64 A57 FP Load-Balancing", false, false) 1550b57cec5SDimitry Andric INITIALIZE_PASS_END(AArch64A57FPLoadBalancing, DEBUG_TYPE, 1560b57cec5SDimitry Andric "AArch64 A57 FP Load-Balancing", false, false) 1570b57cec5SDimitry Andric 1580b57cec5SDimitry Andric namespace { 1590b57cec5SDimitry Andric /// A Chain is a sequence of instructions that are linked together by 1600b57cec5SDimitry Andric /// an accumulation operand. For example: 1610b57cec5SDimitry Andric /// 1620b57cec5SDimitry Andric /// fmul def d0, ? 1630b57cec5SDimitry Andric /// fmla def d1, ?, ?, killed d0 1640b57cec5SDimitry Andric /// fmla def d2, ?, ?, killed d1 1650b57cec5SDimitry Andric /// 1660b57cec5SDimitry Andric /// There may be other instructions interleaved in the sequence that 1670b57cec5SDimitry Andric /// do not belong to the chain. These other instructions must not use 1680b57cec5SDimitry Andric /// the "chain" register at any point. 1690b57cec5SDimitry Andric /// 1700b57cec5SDimitry Andric /// We currently only support chains where the "chain" operand is killed 1710b57cec5SDimitry Andric /// at each link in the chain for simplicity. 1720b57cec5SDimitry Andric /// A chain has three important instructions - Start, Last and Kill. 1730b57cec5SDimitry Andric /// * The start instruction is the first instruction in the chain. 1740b57cec5SDimitry Andric /// * Last is the final instruction in the chain. 1750b57cec5SDimitry Andric /// * Kill may or may not be defined. If defined, Kill is the instruction 1760b57cec5SDimitry Andric /// where the outgoing value of the Last instruction is killed. 1770b57cec5SDimitry Andric /// This information is important as if we know the outgoing value is 1780b57cec5SDimitry Andric /// killed with no intervening uses, we can safely change its register. 1790b57cec5SDimitry Andric /// 1800b57cec5SDimitry Andric /// Without a kill instruction, we must assume the outgoing value escapes 1810b57cec5SDimitry Andric /// beyond our model and either must not change its register or must 1820b57cec5SDimitry Andric /// create a fixup FMOV to keep the old register value consistent. 1830b57cec5SDimitry Andric /// 1840b57cec5SDimitry Andric class Chain { 1850b57cec5SDimitry Andric public: 1860b57cec5SDimitry Andric /// The important (marker) instructions. 1870b57cec5SDimitry Andric MachineInstr *StartInst, *LastInst, *KillInst; 1880b57cec5SDimitry Andric /// The index, from the start of the basic block, that each marker 1890b57cec5SDimitry Andric /// appears. These are stored so we can do quick interval tests. 1900b57cec5SDimitry Andric unsigned StartInstIdx, LastInstIdx, KillInstIdx; 1910b57cec5SDimitry Andric /// All instructions in the chain. 1920b57cec5SDimitry Andric std::set<MachineInstr*> Insts; 1930b57cec5SDimitry Andric /// True if KillInst cannot be modified. If this is true, 1940b57cec5SDimitry Andric /// we cannot change LastInst's outgoing register. 1950b57cec5SDimitry Andric /// This will be true for tied values and regmasks. 1960b57cec5SDimitry Andric bool KillIsImmutable; 1970b57cec5SDimitry Andric /// The "color" of LastInst. This will be the preferred chain color, 1980b57cec5SDimitry Andric /// as changing intermediate nodes is easy but changing the last 1990b57cec5SDimitry Andric /// instruction can be more tricky. 2000b57cec5SDimitry Andric Color LastColor; 2010b57cec5SDimitry Andric 2020b57cec5SDimitry Andric Chain(MachineInstr *MI, unsigned Idx, Color C) 2030b57cec5SDimitry Andric : StartInst(MI), LastInst(MI), KillInst(nullptr), 2040b57cec5SDimitry Andric StartInstIdx(Idx), LastInstIdx(Idx), KillInstIdx(0), 2050b57cec5SDimitry Andric LastColor(C) { 2060b57cec5SDimitry Andric Insts.insert(MI); 2070b57cec5SDimitry Andric } 2080b57cec5SDimitry Andric 2090b57cec5SDimitry Andric /// Add a new instruction into the chain. The instruction's dest operand 2100b57cec5SDimitry Andric /// has the given color. 2110b57cec5SDimitry Andric void add(MachineInstr *MI, unsigned Idx, Color C) { 2120b57cec5SDimitry Andric LastInst = MI; 2130b57cec5SDimitry Andric LastInstIdx = Idx; 2140b57cec5SDimitry Andric LastColor = C; 2150b57cec5SDimitry Andric assert((KillInstIdx == 0 || LastInstIdx < KillInstIdx) && 2160b57cec5SDimitry Andric "Chain: broken invariant. A Chain can only be killed after its last " 2170b57cec5SDimitry Andric "def"); 2180b57cec5SDimitry Andric 2190b57cec5SDimitry Andric Insts.insert(MI); 2200b57cec5SDimitry Andric } 2210b57cec5SDimitry Andric 2220b57cec5SDimitry Andric /// Return true if MI is a member of the chain. 2230b57cec5SDimitry Andric bool contains(MachineInstr &MI) { return Insts.count(&MI) > 0; } 2240b57cec5SDimitry Andric 2250b57cec5SDimitry Andric /// Return the number of instructions in the chain. 2260b57cec5SDimitry Andric unsigned size() const { 2270b57cec5SDimitry Andric return Insts.size(); 2280b57cec5SDimitry Andric } 2290b57cec5SDimitry Andric 2300b57cec5SDimitry Andric /// Inform the chain that its last active register (the dest register of 2310b57cec5SDimitry Andric /// LastInst) is killed by MI with no intervening uses or defs. 2320b57cec5SDimitry Andric void setKill(MachineInstr *MI, unsigned Idx, bool Immutable) { 2330b57cec5SDimitry Andric KillInst = MI; 2340b57cec5SDimitry Andric KillInstIdx = Idx; 2350b57cec5SDimitry Andric KillIsImmutable = Immutable; 2360b57cec5SDimitry Andric assert((KillInstIdx == 0 || LastInstIdx < KillInstIdx) && 2370b57cec5SDimitry Andric "Chain: broken invariant. A Chain can only be killed after its last " 2380b57cec5SDimitry Andric "def"); 2390b57cec5SDimitry Andric } 2400b57cec5SDimitry Andric 2410b57cec5SDimitry Andric /// Return the first instruction in the chain. 2420b57cec5SDimitry Andric MachineInstr *getStart() const { return StartInst; } 2430b57cec5SDimitry Andric /// Return the last instruction in the chain. 2440b57cec5SDimitry Andric MachineInstr *getLast() const { return LastInst; } 2450b57cec5SDimitry Andric /// Return the "kill" instruction (as set with setKill()) or NULL. 2460b57cec5SDimitry Andric MachineInstr *getKill() const { return KillInst; } 2470b57cec5SDimitry Andric /// Return an instruction that can be used as an iterator for the end 2480b57cec5SDimitry Andric /// of the chain. This is the maximum of KillInst (if set) and LastInst. 2490b57cec5SDimitry Andric MachineBasicBlock::iterator end() const { 2500b57cec5SDimitry Andric return ++MachineBasicBlock::iterator(KillInst ? KillInst : LastInst); 2510b57cec5SDimitry Andric } 2520b57cec5SDimitry Andric MachineBasicBlock::iterator begin() const { return getStart(); } 2530b57cec5SDimitry Andric 2540b57cec5SDimitry Andric /// Can the Kill instruction (assuming one exists) be modified? 2550b57cec5SDimitry Andric bool isKillImmutable() const { return KillIsImmutable; } 2560b57cec5SDimitry Andric 2570b57cec5SDimitry Andric /// Return the preferred color of this chain. 2580b57cec5SDimitry Andric Color getPreferredColor() { 2590b57cec5SDimitry Andric if (OverrideBalance != 0) 2600b57cec5SDimitry Andric return OverrideBalance == 1 ? Color::Even : Color::Odd; 2610b57cec5SDimitry Andric return LastColor; 2620b57cec5SDimitry Andric } 2630b57cec5SDimitry Andric 2640b57cec5SDimitry Andric /// Return true if this chain (StartInst..KillInst) overlaps with Other. 2650b57cec5SDimitry Andric bool rangeOverlapsWith(const Chain &Other) const { 2660b57cec5SDimitry Andric unsigned End = KillInst ? KillInstIdx : LastInstIdx; 2670b57cec5SDimitry Andric unsigned OtherEnd = Other.KillInst ? 2680b57cec5SDimitry Andric Other.KillInstIdx : Other.LastInstIdx; 2690b57cec5SDimitry Andric 2700b57cec5SDimitry Andric return StartInstIdx <= OtherEnd && Other.StartInstIdx <= End; 2710b57cec5SDimitry Andric } 2720b57cec5SDimitry Andric 2730b57cec5SDimitry Andric /// Return true if this chain starts before Other. 2740b57cec5SDimitry Andric bool startsBefore(const Chain *Other) const { 2750b57cec5SDimitry Andric return StartInstIdx < Other->StartInstIdx; 2760b57cec5SDimitry Andric } 2770b57cec5SDimitry Andric 2780b57cec5SDimitry Andric /// Return true if the group will require a fixup MOV at the end. 2790b57cec5SDimitry Andric bool requiresFixup() const { 2800b57cec5SDimitry Andric return (getKill() && isKillImmutable()) || !getKill(); 2810b57cec5SDimitry Andric } 2820b57cec5SDimitry Andric 2830b57cec5SDimitry Andric /// Return a simple string representation of the chain. 2840b57cec5SDimitry Andric std::string str() const { 2850b57cec5SDimitry Andric std::string S; 2860b57cec5SDimitry Andric raw_string_ostream OS(S); 2870b57cec5SDimitry Andric 2880b57cec5SDimitry Andric OS << "{"; 2890b57cec5SDimitry Andric StartInst->print(OS, /* SkipOpers= */true); 2900b57cec5SDimitry Andric OS << " -> "; 2910b57cec5SDimitry Andric LastInst->print(OS, /* SkipOpers= */true); 2920b57cec5SDimitry Andric if (KillInst) { 2930b57cec5SDimitry Andric OS << " (kill @ "; 2940b57cec5SDimitry Andric KillInst->print(OS, /* SkipOpers= */true); 2950b57cec5SDimitry Andric OS << ")"; 2960b57cec5SDimitry Andric } 2970b57cec5SDimitry Andric OS << "}"; 2980b57cec5SDimitry Andric 2990b57cec5SDimitry Andric return OS.str(); 3000b57cec5SDimitry Andric } 3010b57cec5SDimitry Andric 3020b57cec5SDimitry Andric }; 3030b57cec5SDimitry Andric 3040b57cec5SDimitry Andric } // end anonymous namespace 3050b57cec5SDimitry Andric 3060b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 3070b57cec5SDimitry Andric 3080b57cec5SDimitry Andric bool AArch64A57FPLoadBalancing::runOnMachineFunction(MachineFunction &F) { 3090b57cec5SDimitry Andric if (skipFunction(F.getFunction())) 3100b57cec5SDimitry Andric return false; 3110b57cec5SDimitry Andric 3120b57cec5SDimitry Andric if (!F.getSubtarget<AArch64Subtarget>().balanceFPOps()) 3130b57cec5SDimitry Andric return false; 3140b57cec5SDimitry Andric 3150b57cec5SDimitry Andric bool Changed = false; 3160b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "***** AArch64A57FPLoadBalancing *****\n"); 3170b57cec5SDimitry Andric 3180b57cec5SDimitry Andric MRI = &F.getRegInfo(); 3190b57cec5SDimitry Andric TRI = F.getRegInfo().getTargetRegisterInfo(); 3200b57cec5SDimitry Andric RCI.runOnMachineFunction(F); 3210b57cec5SDimitry Andric 3220b57cec5SDimitry Andric for (auto &MBB : F) { 3230b57cec5SDimitry Andric Changed |= runOnBasicBlock(MBB); 3240b57cec5SDimitry Andric } 3250b57cec5SDimitry Andric 3260b57cec5SDimitry Andric return Changed; 3270b57cec5SDimitry Andric } 3280b57cec5SDimitry Andric 3290b57cec5SDimitry Andric bool AArch64A57FPLoadBalancing::runOnBasicBlock(MachineBasicBlock &MBB) { 3300b57cec5SDimitry Andric bool Changed = false; 3310b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Running on MBB: " << MBB 3320b57cec5SDimitry Andric << " - scanning instructions...\n"); 3330b57cec5SDimitry Andric 3340b57cec5SDimitry Andric // First, scan the basic block producing a set of chains. 3350b57cec5SDimitry Andric 3360b57cec5SDimitry Andric // The currently "active" chains - chains that can be added to and haven't 3370b57cec5SDimitry Andric // been killed yet. This is keyed by register - all chains can only have one 3380b57cec5SDimitry Andric // "link" register between each inst in the chain. 3390b57cec5SDimitry Andric std::map<unsigned, Chain*> ActiveChains; 3400b57cec5SDimitry Andric std::vector<std::unique_ptr<Chain>> AllChains; 3410b57cec5SDimitry Andric unsigned Idx = 0; 3420b57cec5SDimitry Andric for (auto &MI : MBB) 3430b57cec5SDimitry Andric scanInstruction(&MI, Idx++, ActiveChains, AllChains); 3440b57cec5SDimitry Andric 3450b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Scan complete, " << AllChains.size() 3460b57cec5SDimitry Andric << " chains created.\n"); 3470b57cec5SDimitry Andric 3480b57cec5SDimitry Andric // Group the chains into disjoint sets based on their liveness range. This is 3490b57cec5SDimitry Andric // a poor-man's version of graph coloring. Ideally we'd create an interference 3500b57cec5SDimitry Andric // graph and perform full-on graph coloring on that, but; 3510b57cec5SDimitry Andric // (a) That's rather heavyweight for only two colors. 3520b57cec5SDimitry Andric // (b) We expect multiple disjoint interference regions - in practice the live 3530b57cec5SDimitry Andric // range of chains is quite small and they are clustered between loads 3540b57cec5SDimitry Andric // and stores. 3550b57cec5SDimitry Andric EquivalenceClasses<Chain*> EC; 3560b57cec5SDimitry Andric for (auto &I : AllChains) 3570b57cec5SDimitry Andric EC.insert(I.get()); 3580b57cec5SDimitry Andric 3590b57cec5SDimitry Andric for (auto &I : AllChains) 3600b57cec5SDimitry Andric for (auto &J : AllChains) 3610b57cec5SDimitry Andric if (I != J && I->rangeOverlapsWith(*J)) 3620b57cec5SDimitry Andric EC.unionSets(I.get(), J.get()); 3630b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Created " << EC.getNumClasses() << " disjoint sets.\n"); 3640b57cec5SDimitry Andric 3650b57cec5SDimitry Andric // Now we assume that every member of an equivalence class interferes 3660b57cec5SDimitry Andric // with every other member of that class, and with no members of other classes. 3670b57cec5SDimitry Andric 3680b57cec5SDimitry Andric // Convert the EquivalenceClasses to a simpler set of sets. 3690b57cec5SDimitry Andric std::vector<std::vector<Chain*> > V; 3700b57cec5SDimitry Andric for (auto I = EC.begin(), E = EC.end(); I != E; ++I) { 3710b57cec5SDimitry Andric std::vector<Chain*> Cs(EC.member_begin(I), EC.member_end()); 3720b57cec5SDimitry Andric if (Cs.empty()) continue; 3730b57cec5SDimitry Andric V.push_back(std::move(Cs)); 3740b57cec5SDimitry Andric } 3750b57cec5SDimitry Andric 3760b57cec5SDimitry Andric // Now we have a set of sets, order them by start address so 3770b57cec5SDimitry Andric // we can iterate over them sequentially. 3780b57cec5SDimitry Andric llvm::sort(V, 3790b57cec5SDimitry Andric [](const std::vector<Chain *> &A, const std::vector<Chain *> &B) { 3800b57cec5SDimitry Andric return A.front()->startsBefore(B.front()); 3810b57cec5SDimitry Andric }); 3820b57cec5SDimitry Andric 3830b57cec5SDimitry Andric // As we only have two colors, we can track the global (BB-level) balance of 3840b57cec5SDimitry Andric // odds versus evens. We aim to keep this near zero to keep both execution 3850b57cec5SDimitry Andric // units fed. 3860b57cec5SDimitry Andric // Positive means we're even-heavy, negative we're odd-heavy. 3870b57cec5SDimitry Andric // 3880b57cec5SDimitry Andric // FIXME: If chains have interdependencies, for example: 3890b57cec5SDimitry Andric // mul r0, r1, r2 3900b57cec5SDimitry Andric // mul r3, r0, r1 3910b57cec5SDimitry Andric // We do not model this and may color each one differently, assuming we'll 3920b57cec5SDimitry Andric // get ILP when we obviously can't. This hasn't been seen to be a problem 3930b57cec5SDimitry Andric // in practice so far, so we simplify the algorithm by ignoring it. 3940b57cec5SDimitry Andric int Parity = 0; 3950b57cec5SDimitry Andric 3960b57cec5SDimitry Andric for (auto &I : V) 3970b57cec5SDimitry Andric Changed |= colorChainSet(std::move(I), MBB, Parity); 3980b57cec5SDimitry Andric 3990b57cec5SDimitry Andric return Changed; 4000b57cec5SDimitry Andric } 4010b57cec5SDimitry Andric 4020b57cec5SDimitry Andric Chain *AArch64A57FPLoadBalancing::getAndEraseNext(Color PreferredColor, 4030b57cec5SDimitry Andric std::vector<Chain*> &L) { 4040b57cec5SDimitry Andric if (L.empty()) 4050b57cec5SDimitry Andric return nullptr; 4060b57cec5SDimitry Andric 4070b57cec5SDimitry Andric // We try and get the best candidate from L to color next, given that our 4080b57cec5SDimitry Andric // preferred color is "PreferredColor". L is ordered from larger to smaller 4090b57cec5SDimitry Andric // chains. It is beneficial to color the large chains before the small chains, 4100b57cec5SDimitry Andric // but if we can't find a chain of the maximum length with the preferred color, 4110b57cec5SDimitry Andric // we fuzz the size and look for slightly smaller chains before giving up and 4120b57cec5SDimitry Andric // returning a chain that must be recolored. 4130b57cec5SDimitry Andric 4140b57cec5SDimitry Andric // FIXME: Does this need to be configurable? 4150b57cec5SDimitry Andric const unsigned SizeFuzz = 1; 4160b57cec5SDimitry Andric unsigned MinSize = L.front()->size() - SizeFuzz; 4170b57cec5SDimitry Andric for (auto I = L.begin(), E = L.end(); I != E; ++I) { 4180b57cec5SDimitry Andric if ((*I)->size() <= MinSize) { 4190b57cec5SDimitry Andric // We've gone past the size limit. Return the previous item. 4200b57cec5SDimitry Andric Chain *Ch = *--I; 4210b57cec5SDimitry Andric L.erase(I); 4220b57cec5SDimitry Andric return Ch; 4230b57cec5SDimitry Andric } 4240b57cec5SDimitry Andric 4250b57cec5SDimitry Andric if ((*I)->getPreferredColor() == PreferredColor) { 4260b57cec5SDimitry Andric Chain *Ch = *I; 4270b57cec5SDimitry Andric L.erase(I); 4280b57cec5SDimitry Andric return Ch; 4290b57cec5SDimitry Andric } 4300b57cec5SDimitry Andric } 4310b57cec5SDimitry Andric 4320b57cec5SDimitry Andric // Bailout case - just return the first item. 4330b57cec5SDimitry Andric Chain *Ch = L.front(); 4340b57cec5SDimitry Andric L.erase(L.begin()); 4350b57cec5SDimitry Andric return Ch; 4360b57cec5SDimitry Andric } 4370b57cec5SDimitry Andric 4380b57cec5SDimitry Andric bool AArch64A57FPLoadBalancing::colorChainSet(std::vector<Chain*> GV, 4390b57cec5SDimitry Andric MachineBasicBlock &MBB, 4400b57cec5SDimitry Andric int &Parity) { 4410b57cec5SDimitry Andric bool Changed = false; 4420b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "colorChainSet(): #sets=" << GV.size() << "\n"); 4430b57cec5SDimitry Andric 4440b57cec5SDimitry Andric // Sort by descending size order so that we allocate the most important 4450b57cec5SDimitry Andric // sets first. 4460b57cec5SDimitry Andric // Tie-break equivalent sizes by sorting chains requiring fixups before 4470b57cec5SDimitry Andric // those without fixups. The logic here is that we should look at the 4480b57cec5SDimitry Andric // chains that we cannot change before we look at those we can, 4490b57cec5SDimitry Andric // so the parity counter is updated and we know what color we should 4500b57cec5SDimitry Andric // change them to! 4510b57cec5SDimitry Andric // Final tie-break with instruction order so pass output is stable (i.e. not 4520b57cec5SDimitry Andric // dependent on malloc'd pointer values). 4530b57cec5SDimitry Andric llvm::sort(GV, [](const Chain *G1, const Chain *G2) { 4540b57cec5SDimitry Andric if (G1->size() != G2->size()) 4550b57cec5SDimitry Andric return G1->size() > G2->size(); 4560b57cec5SDimitry Andric if (G1->requiresFixup() != G2->requiresFixup()) 4570b57cec5SDimitry Andric return G1->requiresFixup() > G2->requiresFixup(); 4580b57cec5SDimitry Andric // Make sure startsBefore() produces a stable final order. 4590b57cec5SDimitry Andric assert((G1 == G2 || (G1->startsBefore(G2) ^ G2->startsBefore(G1))) && 4600b57cec5SDimitry Andric "Starts before not total order!"); 4610b57cec5SDimitry Andric return G1->startsBefore(G2); 4620b57cec5SDimitry Andric }); 4630b57cec5SDimitry Andric 4640b57cec5SDimitry Andric Color PreferredColor = Parity < 0 ? Color::Even : Color::Odd; 4650b57cec5SDimitry Andric while (Chain *G = getAndEraseNext(PreferredColor, GV)) { 4660b57cec5SDimitry Andric // Start off by assuming we'll color to our own preferred color. 4670b57cec5SDimitry Andric Color C = PreferredColor; 4680b57cec5SDimitry Andric if (Parity == 0) 4690b57cec5SDimitry Andric // But if we really don't care, use the chain's preferred color. 4700b57cec5SDimitry Andric C = G->getPreferredColor(); 4710b57cec5SDimitry Andric 4720b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " - Parity=" << Parity 4730b57cec5SDimitry Andric << ", Color=" << ColorNames[(int)C] << "\n"); 4740b57cec5SDimitry Andric 4750b57cec5SDimitry Andric // If we'll need a fixup FMOV, don't bother. Testing has shown that this 4760b57cec5SDimitry Andric // happens infrequently and when it does it has at least a 50% chance of 4770b57cec5SDimitry Andric // slowing code down instead of speeding it up. 4780b57cec5SDimitry Andric if (G->requiresFixup() && C != G->getPreferredColor()) { 4790b57cec5SDimitry Andric C = G->getPreferredColor(); 4800b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " - " << G->str() 4810b57cec5SDimitry Andric << " - not worthwhile changing; " 4820b57cec5SDimitry Andric "color remains " 4830b57cec5SDimitry Andric << ColorNames[(int)C] << "\n"); 4840b57cec5SDimitry Andric } 4850b57cec5SDimitry Andric 4860b57cec5SDimitry Andric Changed |= colorChain(G, C, MBB); 4870b57cec5SDimitry Andric 4880b57cec5SDimitry Andric Parity += (C == Color::Even) ? G->size() : -G->size(); 4890b57cec5SDimitry Andric PreferredColor = Parity < 0 ? Color::Even : Color::Odd; 4900b57cec5SDimitry Andric } 4910b57cec5SDimitry Andric 4920b57cec5SDimitry Andric return Changed; 4930b57cec5SDimitry Andric } 4940b57cec5SDimitry Andric 4950b57cec5SDimitry Andric int AArch64A57FPLoadBalancing::scavengeRegister(Chain *G, Color C, 4960b57cec5SDimitry Andric MachineBasicBlock &MBB) { 4970b57cec5SDimitry Andric // Can we find an appropriate register that is available throughout the life 4980b57cec5SDimitry Andric // of the chain? Simulate liveness backwards until the end of the chain. 4990b57cec5SDimitry Andric LiveRegUnits Units(*TRI); 5000b57cec5SDimitry Andric Units.addLiveOuts(MBB); 5010b57cec5SDimitry Andric MachineBasicBlock::iterator I = MBB.end(); 5020b57cec5SDimitry Andric MachineBasicBlock::iterator ChainEnd = G->end(); 5030b57cec5SDimitry Andric while (I != ChainEnd) { 5040b57cec5SDimitry Andric --I; 5050b57cec5SDimitry Andric Units.stepBackward(*I); 5060b57cec5SDimitry Andric } 5070b57cec5SDimitry Andric 5080b57cec5SDimitry Andric // Check which register units are alive throughout the chain. 5090b57cec5SDimitry Andric MachineBasicBlock::iterator ChainBegin = G->begin(); 5100b57cec5SDimitry Andric assert(ChainBegin != ChainEnd && "Chain should contain instructions"); 5110b57cec5SDimitry Andric do { 5120b57cec5SDimitry Andric --I; 5130b57cec5SDimitry Andric Units.accumulate(*I); 5140b57cec5SDimitry Andric } while (I != ChainBegin); 5150b57cec5SDimitry Andric 5160b57cec5SDimitry Andric // Make sure we allocate in-order, to get the cheapest registers first. 517*bdd1243dSDimitry Andric unsigned RegClassID = ChainBegin->getDesc().operands()[0].RegClass; 5180b57cec5SDimitry Andric auto Ord = RCI.getOrder(TRI->getRegClass(RegClassID)); 5190b57cec5SDimitry Andric for (auto Reg : Ord) { 5200b57cec5SDimitry Andric if (!Units.available(Reg)) 5210b57cec5SDimitry Andric continue; 5220b57cec5SDimitry Andric if (C == getColor(Reg)) 5230b57cec5SDimitry Andric return Reg; 5240b57cec5SDimitry Andric } 5250b57cec5SDimitry Andric 5260b57cec5SDimitry Andric return -1; 5270b57cec5SDimitry Andric } 5280b57cec5SDimitry Andric 5290b57cec5SDimitry Andric bool AArch64A57FPLoadBalancing::colorChain(Chain *G, Color C, 5300b57cec5SDimitry Andric MachineBasicBlock &MBB) { 5310b57cec5SDimitry Andric bool Changed = false; 5320b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " - colorChain(" << G->str() << ", " 5330b57cec5SDimitry Andric << ColorNames[(int)C] << ")\n"); 5340b57cec5SDimitry Andric 5350b57cec5SDimitry Andric // Try and obtain a free register of the right class. Without a register 5360b57cec5SDimitry Andric // to play with we cannot continue. 5370b57cec5SDimitry Andric int Reg = scavengeRegister(G, C, MBB); 5380b57cec5SDimitry Andric if (Reg == -1) { 5390b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Scavenging (thus coloring) failed!\n"); 5400b57cec5SDimitry Andric return false; 5410b57cec5SDimitry Andric } 5420b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " - Scavenged register: " << printReg(Reg, TRI) << "\n"); 5430b57cec5SDimitry Andric 5440b57cec5SDimitry Andric std::map<unsigned, unsigned> Substs; 5450b57cec5SDimitry Andric for (MachineInstr &I : *G) { 5460b57cec5SDimitry Andric if (!G->contains(I) && (&I != G->getKill() || G->isKillImmutable())) 5470b57cec5SDimitry Andric continue; 5480b57cec5SDimitry Andric 5490b57cec5SDimitry Andric // I is a member of G, or I is a mutable instruction that kills G. 5500b57cec5SDimitry Andric 5510b57cec5SDimitry Andric std::vector<unsigned> ToErase; 5520b57cec5SDimitry Andric for (auto &U : I.operands()) { 5530b57cec5SDimitry Andric if (U.isReg() && U.isUse() && Substs.find(U.getReg()) != Substs.end()) { 5548bcb0991SDimitry Andric Register OrigReg = U.getReg(); 5550b57cec5SDimitry Andric U.setReg(Substs[OrigReg]); 5560b57cec5SDimitry Andric if (U.isKill()) 5570b57cec5SDimitry Andric // Don't erase straight away, because there may be other operands 5580b57cec5SDimitry Andric // that also reference this substitution! 5590b57cec5SDimitry Andric ToErase.push_back(OrigReg); 5600b57cec5SDimitry Andric } else if (U.isRegMask()) { 5610b57cec5SDimitry Andric for (auto J : Substs) { 5620b57cec5SDimitry Andric if (U.clobbersPhysReg(J.first)) 5630b57cec5SDimitry Andric ToErase.push_back(J.first); 5640b57cec5SDimitry Andric } 5650b57cec5SDimitry Andric } 5660b57cec5SDimitry Andric } 5670b57cec5SDimitry Andric // Now it's safe to remove the substs identified earlier. 5680b57cec5SDimitry Andric for (auto J : ToErase) 5690b57cec5SDimitry Andric Substs.erase(J); 5700b57cec5SDimitry Andric 5710b57cec5SDimitry Andric // Only change the def if this isn't the last instruction. 5720b57cec5SDimitry Andric if (&I != G->getKill()) { 5730b57cec5SDimitry Andric MachineOperand &MO = I.getOperand(0); 5740b57cec5SDimitry Andric 5750b57cec5SDimitry Andric bool Change = TransformAll || getColor(MO.getReg()) != C; 5760b57cec5SDimitry Andric if (G->requiresFixup() && &I == G->getLast()) 5770b57cec5SDimitry Andric Change = false; 5780b57cec5SDimitry Andric 5790b57cec5SDimitry Andric if (Change) { 5800b57cec5SDimitry Andric Substs[MO.getReg()] = Reg; 5810b57cec5SDimitry Andric MO.setReg(Reg); 5820b57cec5SDimitry Andric 5830b57cec5SDimitry Andric Changed = true; 5840b57cec5SDimitry Andric } 5850b57cec5SDimitry Andric } 5860b57cec5SDimitry Andric } 5870b57cec5SDimitry Andric assert(Substs.size() == 0 && "No substitutions should be left active!"); 5880b57cec5SDimitry Andric 5890b57cec5SDimitry Andric if (G->getKill()) { 5900b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " - Kill instruction seen.\n"); 5910b57cec5SDimitry Andric } else { 5920b57cec5SDimitry Andric // We didn't have a kill instruction, but we didn't seem to need to change 5930b57cec5SDimitry Andric // the destination register anyway. 5940b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " - Destination register not changed.\n"); 5950b57cec5SDimitry Andric } 5960b57cec5SDimitry Andric return Changed; 5970b57cec5SDimitry Andric } 5980b57cec5SDimitry Andric 5990b57cec5SDimitry Andric void AArch64A57FPLoadBalancing::scanInstruction( 6000b57cec5SDimitry Andric MachineInstr *MI, unsigned Idx, std::map<unsigned, Chain *> &ActiveChains, 6010b57cec5SDimitry Andric std::vector<std::unique_ptr<Chain>> &AllChains) { 6020b57cec5SDimitry Andric // Inspect "MI", updating ActiveChains and AllChains. 6030b57cec5SDimitry Andric 6040b57cec5SDimitry Andric if (isMul(MI)) { 6050b57cec5SDimitry Andric 6060b57cec5SDimitry Andric for (auto &I : MI->uses()) 6070b57cec5SDimitry Andric maybeKillChain(I, Idx, ActiveChains); 6080b57cec5SDimitry Andric for (auto &I : MI->defs()) 6090b57cec5SDimitry Andric maybeKillChain(I, Idx, ActiveChains); 6100b57cec5SDimitry Andric 6110b57cec5SDimitry Andric // Create a new chain. Multiplies don't require forwarding so can go on any 6120b57cec5SDimitry Andric // unit. 6138bcb0991SDimitry Andric Register DestReg = MI->getOperand(0).getReg(); 6140b57cec5SDimitry Andric 6150b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "New chain started for register " 6160b57cec5SDimitry Andric << printReg(DestReg, TRI) << " at " << *MI); 6170b57cec5SDimitry Andric 6188bcb0991SDimitry Andric auto G = std::make_unique<Chain>(MI, Idx, getColor(DestReg)); 6190b57cec5SDimitry Andric ActiveChains[DestReg] = G.get(); 6200b57cec5SDimitry Andric AllChains.push_back(std::move(G)); 6210b57cec5SDimitry Andric 6220b57cec5SDimitry Andric } else if (isMla(MI)) { 6230b57cec5SDimitry Andric 6240b57cec5SDimitry Andric // It is beneficial to keep MLAs on the same functional unit as their 6250b57cec5SDimitry Andric // accumulator operand. 6268bcb0991SDimitry Andric Register DestReg = MI->getOperand(0).getReg(); 6278bcb0991SDimitry Andric Register AccumReg = MI->getOperand(3).getReg(); 6280b57cec5SDimitry Andric 6290b57cec5SDimitry Andric maybeKillChain(MI->getOperand(1), Idx, ActiveChains); 6300b57cec5SDimitry Andric maybeKillChain(MI->getOperand(2), Idx, ActiveChains); 6310b57cec5SDimitry Andric if (DestReg != AccumReg) 6320b57cec5SDimitry Andric maybeKillChain(MI->getOperand(0), Idx, ActiveChains); 6330b57cec5SDimitry Andric 6340b57cec5SDimitry Andric if (ActiveChains.find(AccumReg) != ActiveChains.end()) { 6350b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Chain found for accumulator register " 6360b57cec5SDimitry Andric << printReg(AccumReg, TRI) << " in MI " << *MI); 6370b57cec5SDimitry Andric 6380b57cec5SDimitry Andric // For simplicity we only chain together sequences of MULs/MLAs where the 6390b57cec5SDimitry Andric // accumulator register is killed on each instruction. This means we don't 6400b57cec5SDimitry Andric // need to track other uses of the registers we want to rewrite. 6410b57cec5SDimitry Andric // 6420b57cec5SDimitry Andric // FIXME: We could extend to handle the non-kill cases for more coverage. 6430b57cec5SDimitry Andric if (MI->getOperand(3).isKill()) { 6440b57cec5SDimitry Andric // Add to chain. 6450b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Instruction was successfully added to chain.\n"); 6460b57cec5SDimitry Andric ActiveChains[AccumReg]->add(MI, Idx, getColor(DestReg)); 6470b57cec5SDimitry Andric // Handle cases where the destination is not the same as the accumulator. 6480b57cec5SDimitry Andric if (DestReg != AccumReg) { 6490b57cec5SDimitry Andric ActiveChains[DestReg] = ActiveChains[AccumReg]; 6500b57cec5SDimitry Andric ActiveChains.erase(AccumReg); 6510b57cec5SDimitry Andric } 6520b57cec5SDimitry Andric return; 6530b57cec5SDimitry Andric } 6540b57cec5SDimitry Andric 6550b57cec5SDimitry Andric LLVM_DEBUG( 6560b57cec5SDimitry Andric dbgs() << "Cannot add to chain because accumulator operand wasn't " 6570b57cec5SDimitry Andric << "marked <kill>!\n"); 6580b57cec5SDimitry Andric maybeKillChain(MI->getOperand(3), Idx, ActiveChains); 6590b57cec5SDimitry Andric } 6600b57cec5SDimitry Andric 6610b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Creating new chain for dest register " 6620b57cec5SDimitry Andric << printReg(DestReg, TRI) << "\n"); 6638bcb0991SDimitry Andric auto G = std::make_unique<Chain>(MI, Idx, getColor(DestReg)); 6640b57cec5SDimitry Andric ActiveChains[DestReg] = G.get(); 6650b57cec5SDimitry Andric AllChains.push_back(std::move(G)); 6660b57cec5SDimitry Andric 6670b57cec5SDimitry Andric } else { 6680b57cec5SDimitry Andric 6690b57cec5SDimitry Andric // Non-MUL or MLA instruction. Invalidate any chain in the uses or defs 6700b57cec5SDimitry Andric // lists. 6710b57cec5SDimitry Andric for (auto &I : MI->uses()) 6720b57cec5SDimitry Andric maybeKillChain(I, Idx, ActiveChains); 6730b57cec5SDimitry Andric for (auto &I : MI->defs()) 6740b57cec5SDimitry Andric maybeKillChain(I, Idx, ActiveChains); 6750b57cec5SDimitry Andric 6760b57cec5SDimitry Andric } 6770b57cec5SDimitry Andric } 6780b57cec5SDimitry Andric 6790b57cec5SDimitry Andric void AArch64A57FPLoadBalancing:: 6800b57cec5SDimitry Andric maybeKillChain(MachineOperand &MO, unsigned Idx, 6810b57cec5SDimitry Andric std::map<unsigned, Chain*> &ActiveChains) { 6820b57cec5SDimitry Andric // Given an operand and the set of active chains (keyed by register), 6830b57cec5SDimitry Andric // determine if a chain should be ended and remove from ActiveChains. 6840b57cec5SDimitry Andric MachineInstr *MI = MO.getParent(); 6850b57cec5SDimitry Andric 6860b57cec5SDimitry Andric if (MO.isReg()) { 6870b57cec5SDimitry Andric 6880b57cec5SDimitry Andric // If this is a KILL of a current chain, record it. 6890b57cec5SDimitry Andric if (MO.isKill() && ActiveChains.find(MO.getReg()) != ActiveChains.end()) { 6900b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Kill seen for chain " << printReg(MO.getReg(), TRI) 6910b57cec5SDimitry Andric << "\n"); 6920b57cec5SDimitry Andric ActiveChains[MO.getReg()]->setKill(MI, Idx, /*Immutable=*/MO.isTied()); 6930b57cec5SDimitry Andric } 6940b57cec5SDimitry Andric ActiveChains.erase(MO.getReg()); 6950b57cec5SDimitry Andric 6960b57cec5SDimitry Andric } else if (MO.isRegMask()) { 6970b57cec5SDimitry Andric 6980b57cec5SDimitry Andric for (auto I = ActiveChains.begin(), E = ActiveChains.end(); 6990b57cec5SDimitry Andric I != E;) { 7000b57cec5SDimitry Andric if (MO.clobbersPhysReg(I->first)) { 7010b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Kill (regmask) seen for chain " 7020b57cec5SDimitry Andric << printReg(I->first, TRI) << "\n"); 7030b57cec5SDimitry Andric I->second->setKill(MI, Idx, /*Immutable=*/true); 7040b57cec5SDimitry Andric ActiveChains.erase(I++); 7050b57cec5SDimitry Andric } else 7060b57cec5SDimitry Andric ++I; 7070b57cec5SDimitry Andric } 7080b57cec5SDimitry Andric 7090b57cec5SDimitry Andric } 7100b57cec5SDimitry Andric } 7110b57cec5SDimitry Andric 7120b57cec5SDimitry Andric Color AArch64A57FPLoadBalancing::getColor(unsigned Reg) { 7130b57cec5SDimitry Andric if ((TRI->getEncodingValue(Reg) % 2) == 0) 7140b57cec5SDimitry Andric return Color::Even; 7150b57cec5SDimitry Andric else 7160b57cec5SDimitry Andric return Color::Odd; 7170b57cec5SDimitry Andric } 7180b57cec5SDimitry Andric 7190b57cec5SDimitry Andric // Factory function used by AArch64TargetMachine to add the pass to the passmanager. 7200b57cec5SDimitry Andric FunctionPass *llvm::createAArch64A57FPLoadBalancing() { 7210b57cec5SDimitry Andric return new AArch64A57FPLoadBalancing(); 7220b57cec5SDimitry Andric } 723