xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp (revision 5f757f3ff9144b609b3c433dfd370cc6bdc191ad)
10b57cec5SDimitry Andric //===-- AArch64A57FPLoadBalancing.cpp - Balance FP ops statically on A57---===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric // For best-case performance on Cortex-A57, we should try to use a balanced
90b57cec5SDimitry Andric // mix of odd and even D-registers when performing a critical sequence of
100b57cec5SDimitry Andric // independent, non-quadword FP/ASIMD floating-point multiply or
110b57cec5SDimitry Andric // multiply-accumulate operations.
120b57cec5SDimitry Andric //
130b57cec5SDimitry Andric // This pass attempts to detect situations where the register allocation may
140b57cec5SDimitry Andric // adversely affect this load balancing and to change the registers used so as
150b57cec5SDimitry Andric // to better utilize the CPU.
160b57cec5SDimitry Andric //
170b57cec5SDimitry Andric // Ideally we'd just take each multiply or multiply-accumulate in turn and
180b57cec5SDimitry Andric // allocate it alternating even or odd registers. However, multiply-accumulates
190b57cec5SDimitry Andric // are most efficiently performed in the same functional unit as their
200b57cec5SDimitry Andric // accumulation operand. Therefore this pass tries to find maximal sequences
210b57cec5SDimitry Andric // ("Chains") of multiply-accumulates linked via their accumulation operand,
220b57cec5SDimitry Andric // and assign them all the same "color" (oddness/evenness).
230b57cec5SDimitry Andric //
240b57cec5SDimitry Andric // This optimization affects S-register and D-register floating point
250b57cec5SDimitry Andric // multiplies and FMADD/FMAs, as well as vector (floating point only) muls and
260b57cec5SDimitry Andric // FMADD/FMA. Q register instructions (and 128-bit vector instructions) are
270b57cec5SDimitry Andric // not affected.
280b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
290b57cec5SDimitry Andric 
300b57cec5SDimitry Andric #include "AArch64.h"
310b57cec5SDimitry Andric #include "AArch64InstrInfo.h"
320b57cec5SDimitry Andric #include "AArch64Subtarget.h"
330b57cec5SDimitry Andric #include "llvm/ADT/EquivalenceClasses.h"
340b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunction.h"
350b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
360b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstr.h"
370b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstrBuilder.h"
380b57cec5SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
390b57cec5SDimitry Andric #include "llvm/CodeGen/RegisterClassInfo.h"
400b57cec5SDimitry Andric #include "llvm/CodeGen/RegisterScavenging.h"
410b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h"
420b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
430b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
440b57cec5SDimitry Andric using namespace llvm;
450b57cec5SDimitry Andric 
460b57cec5SDimitry Andric #define DEBUG_TYPE "aarch64-a57-fp-load-balancing"
470b57cec5SDimitry Andric 
480b57cec5SDimitry Andric // Enforce the algorithm to use the scavenged register even when the original
490b57cec5SDimitry Andric // destination register is the correct color. Used for testing.
500b57cec5SDimitry Andric static cl::opt<bool>
510b57cec5SDimitry Andric TransformAll("aarch64-a57-fp-load-balancing-force-all",
520b57cec5SDimitry Andric              cl::desc("Always modify dest registers regardless of color"),
530b57cec5SDimitry Andric              cl::init(false), cl::Hidden);
540b57cec5SDimitry Andric 
550b57cec5SDimitry Andric // Never use the balance information obtained from chains - return a specific
560b57cec5SDimitry Andric // color always. Used for testing.
570b57cec5SDimitry Andric static cl::opt<unsigned>
580b57cec5SDimitry Andric OverrideBalance("aarch64-a57-fp-load-balancing-override",
590b57cec5SDimitry Andric               cl::desc("Ignore balance information, always return "
600b57cec5SDimitry Andric                        "(1: Even, 2: Odd)."),
610b57cec5SDimitry Andric               cl::init(0), cl::Hidden);
620b57cec5SDimitry Andric 
630b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
640b57cec5SDimitry Andric // Helper functions
650b57cec5SDimitry Andric 
660b57cec5SDimitry Andric // Is the instruction a type of multiply on 64-bit (or 32-bit) FPRs?
670b57cec5SDimitry Andric static bool isMul(MachineInstr *MI) {
680b57cec5SDimitry Andric   switch (MI->getOpcode()) {
690b57cec5SDimitry Andric   case AArch64::FMULSrr:
700b57cec5SDimitry Andric   case AArch64::FNMULSrr:
710b57cec5SDimitry Andric   case AArch64::FMULDrr:
720b57cec5SDimitry Andric   case AArch64::FNMULDrr:
730b57cec5SDimitry Andric     return true;
740b57cec5SDimitry Andric   default:
750b57cec5SDimitry Andric     return false;
760b57cec5SDimitry Andric   }
770b57cec5SDimitry Andric }
780b57cec5SDimitry Andric 
790b57cec5SDimitry Andric // Is the instruction a type of FP multiply-accumulate on 64-bit (or 32-bit) FPRs?
800b57cec5SDimitry Andric static bool isMla(MachineInstr *MI) {
810b57cec5SDimitry Andric   switch (MI->getOpcode()) {
820b57cec5SDimitry Andric   case AArch64::FMSUBSrrr:
830b57cec5SDimitry Andric   case AArch64::FMADDSrrr:
840b57cec5SDimitry Andric   case AArch64::FNMSUBSrrr:
850b57cec5SDimitry Andric   case AArch64::FNMADDSrrr:
860b57cec5SDimitry Andric   case AArch64::FMSUBDrrr:
870b57cec5SDimitry Andric   case AArch64::FMADDDrrr:
880b57cec5SDimitry Andric   case AArch64::FNMSUBDrrr:
890b57cec5SDimitry Andric   case AArch64::FNMADDDrrr:
900b57cec5SDimitry Andric     return true;
910b57cec5SDimitry Andric   default:
920b57cec5SDimitry Andric     return false;
930b57cec5SDimitry Andric   }
940b57cec5SDimitry Andric }
950b57cec5SDimitry Andric 
960b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
970b57cec5SDimitry Andric 
980b57cec5SDimitry Andric namespace {
990b57cec5SDimitry Andric /// A "color", which is either even or odd. Yes, these aren't really colors
1000b57cec5SDimitry Andric /// but the algorithm is conceptually doing two-color graph coloring.
1010b57cec5SDimitry Andric enum class Color { Even, Odd };
1020b57cec5SDimitry Andric #ifndef NDEBUG
1030b57cec5SDimitry Andric static const char *ColorNames[2] = { "Even", "Odd" };
1040b57cec5SDimitry Andric #endif
1050b57cec5SDimitry Andric 
1060b57cec5SDimitry Andric class Chain;
1070b57cec5SDimitry Andric 
1080b57cec5SDimitry Andric class AArch64A57FPLoadBalancing : public MachineFunctionPass {
1090b57cec5SDimitry Andric   MachineRegisterInfo *MRI;
1100b57cec5SDimitry Andric   const TargetRegisterInfo *TRI;
1110b57cec5SDimitry Andric   RegisterClassInfo RCI;
1120b57cec5SDimitry Andric 
1130b57cec5SDimitry Andric public:
1140b57cec5SDimitry Andric   static char ID;
1150b57cec5SDimitry Andric   explicit AArch64A57FPLoadBalancing() : MachineFunctionPass(ID) {
1160b57cec5SDimitry Andric     initializeAArch64A57FPLoadBalancingPass(*PassRegistry::getPassRegistry());
1170b57cec5SDimitry Andric   }
1180b57cec5SDimitry Andric 
1190b57cec5SDimitry Andric   bool runOnMachineFunction(MachineFunction &F) override;
1200b57cec5SDimitry Andric 
1210b57cec5SDimitry Andric   MachineFunctionProperties getRequiredProperties() const override {
1220b57cec5SDimitry Andric     return MachineFunctionProperties().set(
1230b57cec5SDimitry Andric         MachineFunctionProperties::Property::NoVRegs);
1240b57cec5SDimitry Andric   }
1250b57cec5SDimitry Andric 
1260b57cec5SDimitry Andric   StringRef getPassName() const override {
1270b57cec5SDimitry Andric     return "A57 FP Anti-dependency breaker";
1280b57cec5SDimitry Andric   }
1290b57cec5SDimitry Andric 
1300b57cec5SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
1310b57cec5SDimitry Andric     AU.setPreservesCFG();
1320b57cec5SDimitry Andric     MachineFunctionPass::getAnalysisUsage(AU);
1330b57cec5SDimitry Andric   }
1340b57cec5SDimitry Andric 
1350b57cec5SDimitry Andric private:
1360b57cec5SDimitry Andric   bool runOnBasicBlock(MachineBasicBlock &MBB);
1370b57cec5SDimitry Andric   bool colorChainSet(std::vector<Chain*> GV, MachineBasicBlock &MBB,
1380b57cec5SDimitry Andric                      int &Balance);
1390b57cec5SDimitry Andric   bool colorChain(Chain *G, Color C, MachineBasicBlock &MBB);
1400b57cec5SDimitry Andric   int scavengeRegister(Chain *G, Color C, MachineBasicBlock &MBB);
1410b57cec5SDimitry Andric   void scanInstruction(MachineInstr *MI, unsigned Idx,
1420b57cec5SDimitry Andric                        std::map<unsigned, Chain*> &Active,
1430b57cec5SDimitry Andric                        std::vector<std::unique_ptr<Chain>> &AllChains);
1440b57cec5SDimitry Andric   void maybeKillChain(MachineOperand &MO, unsigned Idx,
1450b57cec5SDimitry Andric                       std::map<unsigned, Chain*> &RegChains);
1460b57cec5SDimitry Andric   Color getColor(unsigned Register);
1470b57cec5SDimitry Andric   Chain *getAndEraseNext(Color PreferredColor, std::vector<Chain*> &L);
1480b57cec5SDimitry Andric };
1490b57cec5SDimitry Andric }
1500b57cec5SDimitry Andric 
1510b57cec5SDimitry Andric char AArch64A57FPLoadBalancing::ID = 0;
1520b57cec5SDimitry Andric 
1530b57cec5SDimitry Andric INITIALIZE_PASS_BEGIN(AArch64A57FPLoadBalancing, DEBUG_TYPE,
1540b57cec5SDimitry Andric                       "AArch64 A57 FP Load-Balancing", false, false)
1550b57cec5SDimitry Andric INITIALIZE_PASS_END(AArch64A57FPLoadBalancing, DEBUG_TYPE,
1560b57cec5SDimitry Andric                     "AArch64 A57 FP Load-Balancing", false, false)
1570b57cec5SDimitry Andric 
1580b57cec5SDimitry Andric namespace {
1590b57cec5SDimitry Andric /// A Chain is a sequence of instructions that are linked together by
1600b57cec5SDimitry Andric /// an accumulation operand. For example:
1610b57cec5SDimitry Andric ///
1620b57cec5SDimitry Andric ///   fmul def d0, ?
1630b57cec5SDimitry Andric ///   fmla def d1, ?, ?, killed d0
1640b57cec5SDimitry Andric ///   fmla def d2, ?, ?, killed d1
1650b57cec5SDimitry Andric ///
1660b57cec5SDimitry Andric /// There may be other instructions interleaved in the sequence that
1670b57cec5SDimitry Andric /// do not belong to the chain. These other instructions must not use
1680b57cec5SDimitry Andric /// the "chain" register at any point.
1690b57cec5SDimitry Andric ///
1700b57cec5SDimitry Andric /// We currently only support chains where the "chain" operand is killed
1710b57cec5SDimitry Andric /// at each link in the chain for simplicity.
1720b57cec5SDimitry Andric /// A chain has three important instructions - Start, Last and Kill.
1730b57cec5SDimitry Andric ///   * The start instruction is the first instruction in the chain.
1740b57cec5SDimitry Andric ///   * Last is the final instruction in the chain.
1750b57cec5SDimitry Andric ///   * Kill may or may not be defined. If defined, Kill is the instruction
1760b57cec5SDimitry Andric ///     where the outgoing value of the Last instruction is killed.
1770b57cec5SDimitry Andric ///     This information is important as if we know the outgoing value is
1780b57cec5SDimitry Andric ///     killed with no intervening uses, we can safely change its register.
1790b57cec5SDimitry Andric ///
1800b57cec5SDimitry Andric /// Without a kill instruction, we must assume the outgoing value escapes
1810b57cec5SDimitry Andric /// beyond our model and either must not change its register or must
1820b57cec5SDimitry Andric /// create a fixup FMOV to keep the old register value consistent.
1830b57cec5SDimitry Andric ///
1840b57cec5SDimitry Andric class Chain {
1850b57cec5SDimitry Andric public:
1860b57cec5SDimitry Andric   /// The important (marker) instructions.
1870b57cec5SDimitry Andric   MachineInstr *StartInst, *LastInst, *KillInst;
1880b57cec5SDimitry Andric   /// The index, from the start of the basic block, that each marker
1890b57cec5SDimitry Andric   /// appears. These are stored so we can do quick interval tests.
1900b57cec5SDimitry Andric   unsigned StartInstIdx, LastInstIdx, KillInstIdx;
1910b57cec5SDimitry Andric   /// All instructions in the chain.
1920b57cec5SDimitry Andric   std::set<MachineInstr*> Insts;
1930b57cec5SDimitry Andric   /// True if KillInst cannot be modified. If this is true,
1940b57cec5SDimitry Andric   /// we cannot change LastInst's outgoing register.
1950b57cec5SDimitry Andric   /// This will be true for tied values and regmasks.
1960b57cec5SDimitry Andric   bool KillIsImmutable;
1970b57cec5SDimitry Andric   /// The "color" of LastInst. This will be the preferred chain color,
1980b57cec5SDimitry Andric   /// as changing intermediate nodes is easy but changing the last
1990b57cec5SDimitry Andric   /// instruction can be more tricky.
2000b57cec5SDimitry Andric   Color LastColor;
2010b57cec5SDimitry Andric 
2020b57cec5SDimitry Andric   Chain(MachineInstr *MI, unsigned Idx, Color C)
2030b57cec5SDimitry Andric       : StartInst(MI), LastInst(MI), KillInst(nullptr),
2040b57cec5SDimitry Andric         StartInstIdx(Idx), LastInstIdx(Idx), KillInstIdx(0),
2050b57cec5SDimitry Andric         LastColor(C) {
2060b57cec5SDimitry Andric     Insts.insert(MI);
2070b57cec5SDimitry Andric   }
2080b57cec5SDimitry Andric 
2090b57cec5SDimitry Andric   /// Add a new instruction into the chain. The instruction's dest operand
2100b57cec5SDimitry Andric   /// has the given color.
2110b57cec5SDimitry Andric   void add(MachineInstr *MI, unsigned Idx, Color C) {
2120b57cec5SDimitry Andric     LastInst = MI;
2130b57cec5SDimitry Andric     LastInstIdx = Idx;
2140b57cec5SDimitry Andric     LastColor = C;
2150b57cec5SDimitry Andric     assert((KillInstIdx == 0 || LastInstIdx < KillInstIdx) &&
2160b57cec5SDimitry Andric            "Chain: broken invariant. A Chain can only be killed after its last "
2170b57cec5SDimitry Andric            "def");
2180b57cec5SDimitry Andric 
2190b57cec5SDimitry Andric     Insts.insert(MI);
2200b57cec5SDimitry Andric   }
2210b57cec5SDimitry Andric 
2220b57cec5SDimitry Andric   /// Return true if MI is a member of the chain.
2230b57cec5SDimitry Andric   bool contains(MachineInstr &MI) { return Insts.count(&MI) > 0; }
2240b57cec5SDimitry Andric 
2250b57cec5SDimitry Andric   /// Return the number of instructions in the chain.
2260b57cec5SDimitry Andric   unsigned size() const {
2270b57cec5SDimitry Andric     return Insts.size();
2280b57cec5SDimitry Andric   }
2290b57cec5SDimitry Andric 
2300b57cec5SDimitry Andric   /// Inform the chain that its last active register (the dest register of
2310b57cec5SDimitry Andric   /// LastInst) is killed by MI with no intervening uses or defs.
2320b57cec5SDimitry Andric   void setKill(MachineInstr *MI, unsigned Idx, bool Immutable) {
2330b57cec5SDimitry Andric     KillInst = MI;
2340b57cec5SDimitry Andric     KillInstIdx = Idx;
2350b57cec5SDimitry Andric     KillIsImmutable = Immutable;
2360b57cec5SDimitry Andric     assert((KillInstIdx == 0 || LastInstIdx < KillInstIdx) &&
2370b57cec5SDimitry Andric            "Chain: broken invariant. A Chain can only be killed after its last "
2380b57cec5SDimitry Andric            "def");
2390b57cec5SDimitry Andric   }
2400b57cec5SDimitry Andric 
2410b57cec5SDimitry Andric   /// Return the first instruction in the chain.
2420b57cec5SDimitry Andric   MachineInstr *getStart() const { return StartInst; }
2430b57cec5SDimitry Andric   /// Return the last instruction in the chain.
2440b57cec5SDimitry Andric   MachineInstr *getLast() const { return LastInst; }
2450b57cec5SDimitry Andric   /// Return the "kill" instruction (as set with setKill()) or NULL.
2460b57cec5SDimitry Andric   MachineInstr *getKill() const { return KillInst; }
2470b57cec5SDimitry Andric   /// Return an instruction that can be used as an iterator for the end
2480b57cec5SDimitry Andric   /// of the chain. This is the maximum of KillInst (if set) and LastInst.
2490b57cec5SDimitry Andric   MachineBasicBlock::iterator end() const {
2500b57cec5SDimitry Andric     return ++MachineBasicBlock::iterator(KillInst ? KillInst : LastInst);
2510b57cec5SDimitry Andric   }
2520b57cec5SDimitry Andric   MachineBasicBlock::iterator begin() const { return getStart(); }
2530b57cec5SDimitry Andric 
2540b57cec5SDimitry Andric   /// Can the Kill instruction (assuming one exists) be modified?
2550b57cec5SDimitry Andric   bool isKillImmutable() const { return KillIsImmutable; }
2560b57cec5SDimitry Andric 
2570b57cec5SDimitry Andric   /// Return the preferred color of this chain.
2580b57cec5SDimitry Andric   Color getPreferredColor() {
2590b57cec5SDimitry Andric     if (OverrideBalance != 0)
2600b57cec5SDimitry Andric       return OverrideBalance == 1 ? Color::Even : Color::Odd;
2610b57cec5SDimitry Andric     return LastColor;
2620b57cec5SDimitry Andric   }
2630b57cec5SDimitry Andric 
2640b57cec5SDimitry Andric   /// Return true if this chain (StartInst..KillInst) overlaps with Other.
2650b57cec5SDimitry Andric   bool rangeOverlapsWith(const Chain &Other) const {
2660b57cec5SDimitry Andric     unsigned End = KillInst ? KillInstIdx : LastInstIdx;
2670b57cec5SDimitry Andric     unsigned OtherEnd = Other.KillInst ?
2680b57cec5SDimitry Andric       Other.KillInstIdx : Other.LastInstIdx;
2690b57cec5SDimitry Andric 
2700b57cec5SDimitry Andric     return StartInstIdx <= OtherEnd && Other.StartInstIdx <= End;
2710b57cec5SDimitry Andric   }
2720b57cec5SDimitry Andric 
2730b57cec5SDimitry Andric   /// Return true if this chain starts before Other.
2740b57cec5SDimitry Andric   bool startsBefore(const Chain *Other) const {
2750b57cec5SDimitry Andric     return StartInstIdx < Other->StartInstIdx;
2760b57cec5SDimitry Andric   }
2770b57cec5SDimitry Andric 
2780b57cec5SDimitry Andric   /// Return true if the group will require a fixup MOV at the end.
2790b57cec5SDimitry Andric   bool requiresFixup() const {
2800b57cec5SDimitry Andric     return (getKill() && isKillImmutable()) || !getKill();
2810b57cec5SDimitry Andric   }
2820b57cec5SDimitry Andric 
2830b57cec5SDimitry Andric   /// Return a simple string representation of the chain.
2840b57cec5SDimitry Andric   std::string str() const {
2850b57cec5SDimitry Andric     std::string S;
2860b57cec5SDimitry Andric     raw_string_ostream OS(S);
2870b57cec5SDimitry Andric 
2880b57cec5SDimitry Andric     OS << "{";
2890b57cec5SDimitry Andric     StartInst->print(OS, /* SkipOpers= */true);
2900b57cec5SDimitry Andric     OS << " -> ";
2910b57cec5SDimitry Andric     LastInst->print(OS, /* SkipOpers= */true);
2920b57cec5SDimitry Andric     if (KillInst) {
2930b57cec5SDimitry Andric       OS << " (kill @ ";
2940b57cec5SDimitry Andric       KillInst->print(OS, /* SkipOpers= */true);
2950b57cec5SDimitry Andric       OS << ")";
2960b57cec5SDimitry Andric     }
2970b57cec5SDimitry Andric     OS << "}";
2980b57cec5SDimitry Andric 
2990b57cec5SDimitry Andric     return OS.str();
3000b57cec5SDimitry Andric   }
3010b57cec5SDimitry Andric 
3020b57cec5SDimitry Andric };
3030b57cec5SDimitry Andric 
3040b57cec5SDimitry Andric } // end anonymous namespace
3050b57cec5SDimitry Andric 
3060b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
3070b57cec5SDimitry Andric 
3080b57cec5SDimitry Andric bool AArch64A57FPLoadBalancing::runOnMachineFunction(MachineFunction &F) {
3090b57cec5SDimitry Andric   if (skipFunction(F.getFunction()))
3100b57cec5SDimitry Andric     return false;
3110b57cec5SDimitry Andric 
3120b57cec5SDimitry Andric   if (!F.getSubtarget<AArch64Subtarget>().balanceFPOps())
3130b57cec5SDimitry Andric     return false;
3140b57cec5SDimitry Andric 
3150b57cec5SDimitry Andric   bool Changed = false;
3160b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "***** AArch64A57FPLoadBalancing *****\n");
3170b57cec5SDimitry Andric 
3180b57cec5SDimitry Andric   MRI = &F.getRegInfo();
3190b57cec5SDimitry Andric   TRI = F.getRegInfo().getTargetRegisterInfo();
3200b57cec5SDimitry Andric   RCI.runOnMachineFunction(F);
3210b57cec5SDimitry Andric 
3220b57cec5SDimitry Andric   for (auto &MBB : F) {
3230b57cec5SDimitry Andric     Changed |= runOnBasicBlock(MBB);
3240b57cec5SDimitry Andric   }
3250b57cec5SDimitry Andric 
3260b57cec5SDimitry Andric   return Changed;
3270b57cec5SDimitry Andric }
3280b57cec5SDimitry Andric 
3290b57cec5SDimitry Andric bool AArch64A57FPLoadBalancing::runOnBasicBlock(MachineBasicBlock &MBB) {
3300b57cec5SDimitry Andric   bool Changed = false;
3310b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "Running on MBB: " << MBB
3320b57cec5SDimitry Andric                     << " - scanning instructions...\n");
3330b57cec5SDimitry Andric 
3340b57cec5SDimitry Andric   // First, scan the basic block producing a set of chains.
3350b57cec5SDimitry Andric 
3360b57cec5SDimitry Andric   // The currently "active" chains - chains that can be added to and haven't
3370b57cec5SDimitry Andric   // been killed yet. This is keyed by register - all chains can only have one
3380b57cec5SDimitry Andric   // "link" register between each inst in the chain.
3390b57cec5SDimitry Andric   std::map<unsigned, Chain*> ActiveChains;
3400b57cec5SDimitry Andric   std::vector<std::unique_ptr<Chain>> AllChains;
3410b57cec5SDimitry Andric   unsigned Idx = 0;
3420b57cec5SDimitry Andric   for (auto &MI : MBB)
3430b57cec5SDimitry Andric     scanInstruction(&MI, Idx++, ActiveChains, AllChains);
3440b57cec5SDimitry Andric 
3450b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "Scan complete, " << AllChains.size()
3460b57cec5SDimitry Andric                     << " chains created.\n");
3470b57cec5SDimitry Andric 
3480b57cec5SDimitry Andric   // Group the chains into disjoint sets based on their liveness range. This is
3490b57cec5SDimitry Andric   // a poor-man's version of graph coloring. Ideally we'd create an interference
3500b57cec5SDimitry Andric   // graph and perform full-on graph coloring on that, but;
3510b57cec5SDimitry Andric   //   (a) That's rather heavyweight for only two colors.
3520b57cec5SDimitry Andric   //   (b) We expect multiple disjoint interference regions - in practice the live
3530b57cec5SDimitry Andric   //       range of chains is quite small and they are clustered between loads
3540b57cec5SDimitry Andric   //       and stores.
3550b57cec5SDimitry Andric   EquivalenceClasses<Chain*> EC;
3560b57cec5SDimitry Andric   for (auto &I : AllChains)
3570b57cec5SDimitry Andric     EC.insert(I.get());
3580b57cec5SDimitry Andric 
3590b57cec5SDimitry Andric   for (auto &I : AllChains)
3600b57cec5SDimitry Andric     for (auto &J : AllChains)
3610b57cec5SDimitry Andric       if (I != J && I->rangeOverlapsWith(*J))
3620b57cec5SDimitry Andric         EC.unionSets(I.get(), J.get());
3630b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "Created " << EC.getNumClasses() << " disjoint sets.\n");
3640b57cec5SDimitry Andric 
3650b57cec5SDimitry Andric   // Now we assume that every member of an equivalence class interferes
3660b57cec5SDimitry Andric   // with every other member of that class, and with no members of other classes.
3670b57cec5SDimitry Andric 
3680b57cec5SDimitry Andric   // Convert the EquivalenceClasses to a simpler set of sets.
3690b57cec5SDimitry Andric   std::vector<std::vector<Chain*> > V;
3700b57cec5SDimitry Andric   for (auto I = EC.begin(), E = EC.end(); I != E; ++I) {
3710b57cec5SDimitry Andric     std::vector<Chain*> Cs(EC.member_begin(I), EC.member_end());
3720b57cec5SDimitry Andric     if (Cs.empty()) continue;
3730b57cec5SDimitry Andric     V.push_back(std::move(Cs));
3740b57cec5SDimitry Andric   }
3750b57cec5SDimitry Andric 
3760b57cec5SDimitry Andric   // Now we have a set of sets, order them by start address so
3770b57cec5SDimitry Andric   // we can iterate over them sequentially.
3780b57cec5SDimitry Andric   llvm::sort(V,
3790b57cec5SDimitry Andric              [](const std::vector<Chain *> &A, const std::vector<Chain *> &B) {
3800b57cec5SDimitry Andric                return A.front()->startsBefore(B.front());
3810b57cec5SDimitry Andric              });
3820b57cec5SDimitry Andric 
3830b57cec5SDimitry Andric   // As we only have two colors, we can track the global (BB-level) balance of
3840b57cec5SDimitry Andric   // odds versus evens. We aim to keep this near zero to keep both execution
3850b57cec5SDimitry Andric   // units fed.
3860b57cec5SDimitry Andric   // Positive means we're even-heavy, negative we're odd-heavy.
3870b57cec5SDimitry Andric   //
3880b57cec5SDimitry Andric   // FIXME: If chains have interdependencies, for example:
3890b57cec5SDimitry Andric   //   mul r0, r1, r2
3900b57cec5SDimitry Andric   //   mul r3, r0, r1
3910b57cec5SDimitry Andric   // We do not model this and may color each one differently, assuming we'll
3920b57cec5SDimitry Andric   // get ILP when we obviously can't. This hasn't been seen to be a problem
3930b57cec5SDimitry Andric   // in practice so far, so we simplify the algorithm by ignoring it.
3940b57cec5SDimitry Andric   int Parity = 0;
3950b57cec5SDimitry Andric 
3960b57cec5SDimitry Andric   for (auto &I : V)
3970b57cec5SDimitry Andric     Changed |= colorChainSet(std::move(I), MBB, Parity);
3980b57cec5SDimitry Andric 
3990b57cec5SDimitry Andric   return Changed;
4000b57cec5SDimitry Andric }
4010b57cec5SDimitry Andric 
4020b57cec5SDimitry Andric Chain *AArch64A57FPLoadBalancing::getAndEraseNext(Color PreferredColor,
4030b57cec5SDimitry Andric                                                   std::vector<Chain*> &L) {
4040b57cec5SDimitry Andric   if (L.empty())
4050b57cec5SDimitry Andric     return nullptr;
4060b57cec5SDimitry Andric 
4070b57cec5SDimitry Andric   // We try and get the best candidate from L to color next, given that our
4080b57cec5SDimitry Andric   // preferred color is "PreferredColor". L is ordered from larger to smaller
4090b57cec5SDimitry Andric   // chains. It is beneficial to color the large chains before the small chains,
4100b57cec5SDimitry Andric   // but if we can't find a chain of the maximum length with the preferred color,
4110b57cec5SDimitry Andric   // we fuzz the size and look for slightly smaller chains before giving up and
4120b57cec5SDimitry Andric   // returning a chain that must be recolored.
4130b57cec5SDimitry Andric 
4140b57cec5SDimitry Andric   // FIXME: Does this need to be configurable?
4150b57cec5SDimitry Andric   const unsigned SizeFuzz = 1;
4160b57cec5SDimitry Andric   unsigned MinSize = L.front()->size() - SizeFuzz;
4170b57cec5SDimitry Andric   for (auto I = L.begin(), E = L.end(); I != E; ++I) {
4180b57cec5SDimitry Andric     if ((*I)->size() <= MinSize) {
4190b57cec5SDimitry Andric       // We've gone past the size limit. Return the previous item.
4200b57cec5SDimitry Andric       Chain *Ch = *--I;
4210b57cec5SDimitry Andric       L.erase(I);
4220b57cec5SDimitry Andric       return Ch;
4230b57cec5SDimitry Andric     }
4240b57cec5SDimitry Andric 
4250b57cec5SDimitry Andric     if ((*I)->getPreferredColor() == PreferredColor) {
4260b57cec5SDimitry Andric       Chain *Ch = *I;
4270b57cec5SDimitry Andric       L.erase(I);
4280b57cec5SDimitry Andric       return Ch;
4290b57cec5SDimitry Andric     }
4300b57cec5SDimitry Andric   }
4310b57cec5SDimitry Andric 
4320b57cec5SDimitry Andric   // Bailout case - just return the first item.
4330b57cec5SDimitry Andric   Chain *Ch = L.front();
4340b57cec5SDimitry Andric   L.erase(L.begin());
4350b57cec5SDimitry Andric   return Ch;
4360b57cec5SDimitry Andric }
4370b57cec5SDimitry Andric 
4380b57cec5SDimitry Andric bool AArch64A57FPLoadBalancing::colorChainSet(std::vector<Chain*> GV,
4390b57cec5SDimitry Andric                                               MachineBasicBlock &MBB,
4400b57cec5SDimitry Andric                                               int &Parity) {
4410b57cec5SDimitry Andric   bool Changed = false;
4420b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "colorChainSet(): #sets=" << GV.size() << "\n");
4430b57cec5SDimitry Andric 
4440b57cec5SDimitry Andric   // Sort by descending size order so that we allocate the most important
4450b57cec5SDimitry Andric   // sets first.
4460b57cec5SDimitry Andric   // Tie-break equivalent sizes by sorting chains requiring fixups before
4470b57cec5SDimitry Andric   // those without fixups. The logic here is that we should look at the
4480b57cec5SDimitry Andric   // chains that we cannot change before we look at those we can,
4490b57cec5SDimitry Andric   // so the parity counter is updated and we know what color we should
4500b57cec5SDimitry Andric   // change them to!
4510b57cec5SDimitry Andric   // Final tie-break with instruction order so pass output is stable (i.e. not
4520b57cec5SDimitry Andric   // dependent on malloc'd pointer values).
4530b57cec5SDimitry Andric   llvm::sort(GV, [](const Chain *G1, const Chain *G2) {
4540b57cec5SDimitry Andric     if (G1->size() != G2->size())
4550b57cec5SDimitry Andric       return G1->size() > G2->size();
4560b57cec5SDimitry Andric     if (G1->requiresFixup() != G2->requiresFixup())
4570b57cec5SDimitry Andric       return G1->requiresFixup() > G2->requiresFixup();
4580b57cec5SDimitry Andric     // Make sure startsBefore() produces a stable final order.
4590b57cec5SDimitry Andric     assert((G1 == G2 || (G1->startsBefore(G2) ^ G2->startsBefore(G1))) &&
4600b57cec5SDimitry Andric            "Starts before not total order!");
4610b57cec5SDimitry Andric     return G1->startsBefore(G2);
4620b57cec5SDimitry Andric   });
4630b57cec5SDimitry Andric 
4640b57cec5SDimitry Andric   Color PreferredColor = Parity < 0 ? Color::Even : Color::Odd;
4650b57cec5SDimitry Andric   while (Chain *G = getAndEraseNext(PreferredColor, GV)) {
4660b57cec5SDimitry Andric     // Start off by assuming we'll color to our own preferred color.
4670b57cec5SDimitry Andric     Color C = PreferredColor;
4680b57cec5SDimitry Andric     if (Parity == 0)
4690b57cec5SDimitry Andric       // But if we really don't care, use the chain's preferred color.
4700b57cec5SDimitry Andric       C = G->getPreferredColor();
4710b57cec5SDimitry Andric 
4720b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << " - Parity=" << Parity
4730b57cec5SDimitry Andric                       << ", Color=" << ColorNames[(int)C] << "\n");
4740b57cec5SDimitry Andric 
4750b57cec5SDimitry Andric     // If we'll need a fixup FMOV, don't bother. Testing has shown that this
4760b57cec5SDimitry Andric     // happens infrequently and when it does it has at least a 50% chance of
4770b57cec5SDimitry Andric     // slowing code down instead of speeding it up.
4780b57cec5SDimitry Andric     if (G->requiresFixup() && C != G->getPreferredColor()) {
4790b57cec5SDimitry Andric       C = G->getPreferredColor();
4800b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << " - " << G->str()
4810b57cec5SDimitry Andric                         << " - not worthwhile changing; "
4820b57cec5SDimitry Andric                            "color remains "
4830b57cec5SDimitry Andric                         << ColorNames[(int)C] << "\n");
4840b57cec5SDimitry Andric     }
4850b57cec5SDimitry Andric 
4860b57cec5SDimitry Andric     Changed |= colorChain(G, C, MBB);
4870b57cec5SDimitry Andric 
4880b57cec5SDimitry Andric     Parity += (C == Color::Even) ? G->size() : -G->size();
4890b57cec5SDimitry Andric     PreferredColor = Parity < 0 ? Color::Even : Color::Odd;
4900b57cec5SDimitry Andric   }
4910b57cec5SDimitry Andric 
4920b57cec5SDimitry Andric   return Changed;
4930b57cec5SDimitry Andric }
4940b57cec5SDimitry Andric 
4950b57cec5SDimitry Andric int AArch64A57FPLoadBalancing::scavengeRegister(Chain *G, Color C,
4960b57cec5SDimitry Andric                                                 MachineBasicBlock &MBB) {
4970b57cec5SDimitry Andric   // Can we find an appropriate register that is available throughout the life
4980b57cec5SDimitry Andric   // of the chain? Simulate liveness backwards until the end of the chain.
4990b57cec5SDimitry Andric   LiveRegUnits Units(*TRI);
5000b57cec5SDimitry Andric   Units.addLiveOuts(MBB);
5010b57cec5SDimitry Andric   MachineBasicBlock::iterator I = MBB.end();
5020b57cec5SDimitry Andric   MachineBasicBlock::iterator ChainEnd = G->end();
5030b57cec5SDimitry Andric   while (I != ChainEnd) {
5040b57cec5SDimitry Andric     --I;
5050b57cec5SDimitry Andric     Units.stepBackward(*I);
5060b57cec5SDimitry Andric   }
5070b57cec5SDimitry Andric 
5080b57cec5SDimitry Andric   // Check which register units are alive throughout the chain.
5090b57cec5SDimitry Andric   MachineBasicBlock::iterator ChainBegin = G->begin();
5100b57cec5SDimitry Andric   assert(ChainBegin != ChainEnd && "Chain should contain instructions");
5110b57cec5SDimitry Andric   do {
5120b57cec5SDimitry Andric     --I;
5130b57cec5SDimitry Andric     Units.accumulate(*I);
5140b57cec5SDimitry Andric   } while (I != ChainBegin);
5150b57cec5SDimitry Andric 
5160b57cec5SDimitry Andric   // Make sure we allocate in-order, to get the cheapest registers first.
517*bdd1243dSDimitry Andric   unsigned RegClassID = ChainBegin->getDesc().operands()[0].RegClass;
5180b57cec5SDimitry Andric   auto Ord = RCI.getOrder(TRI->getRegClass(RegClassID));
5190b57cec5SDimitry Andric   for (auto Reg : Ord) {
5200b57cec5SDimitry Andric     if (!Units.available(Reg))
5210b57cec5SDimitry Andric       continue;
5220b57cec5SDimitry Andric     if (C == getColor(Reg))
5230b57cec5SDimitry Andric       return Reg;
5240b57cec5SDimitry Andric   }
5250b57cec5SDimitry Andric 
5260b57cec5SDimitry Andric   return -1;
5270b57cec5SDimitry Andric }
5280b57cec5SDimitry Andric 
5290b57cec5SDimitry Andric bool AArch64A57FPLoadBalancing::colorChain(Chain *G, Color C,
5300b57cec5SDimitry Andric                                            MachineBasicBlock &MBB) {
5310b57cec5SDimitry Andric   bool Changed = false;
5320b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << " - colorChain(" << G->str() << ", "
5330b57cec5SDimitry Andric                     << ColorNames[(int)C] << ")\n");
5340b57cec5SDimitry Andric 
5350b57cec5SDimitry Andric   // Try and obtain a free register of the right class. Without a register
5360b57cec5SDimitry Andric   // to play with we cannot continue.
5370b57cec5SDimitry Andric   int Reg = scavengeRegister(G, C, MBB);
5380b57cec5SDimitry Andric   if (Reg == -1) {
5390b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "Scavenging (thus coloring) failed!\n");
5400b57cec5SDimitry Andric     return false;
5410b57cec5SDimitry Andric   }
5420b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << " - Scavenged register: " << printReg(Reg, TRI) << "\n");
5430b57cec5SDimitry Andric 
5440b57cec5SDimitry Andric   std::map<unsigned, unsigned> Substs;
5450b57cec5SDimitry Andric   for (MachineInstr &I : *G) {
5460b57cec5SDimitry Andric     if (!G->contains(I) && (&I != G->getKill() || G->isKillImmutable()))
5470b57cec5SDimitry Andric       continue;
5480b57cec5SDimitry Andric 
5490b57cec5SDimitry Andric     // I is a member of G, or I is a mutable instruction that kills G.
5500b57cec5SDimitry Andric 
5510b57cec5SDimitry Andric     std::vector<unsigned> ToErase;
5520b57cec5SDimitry Andric     for (auto &U : I.operands()) {
5530b57cec5SDimitry Andric       if (U.isReg() && U.isUse() && Substs.find(U.getReg()) != Substs.end()) {
5548bcb0991SDimitry Andric         Register OrigReg = U.getReg();
5550b57cec5SDimitry Andric         U.setReg(Substs[OrigReg]);
5560b57cec5SDimitry Andric         if (U.isKill())
5570b57cec5SDimitry Andric           // Don't erase straight away, because there may be other operands
5580b57cec5SDimitry Andric           // that also reference this substitution!
5590b57cec5SDimitry Andric           ToErase.push_back(OrigReg);
5600b57cec5SDimitry Andric       } else if (U.isRegMask()) {
5610b57cec5SDimitry Andric         for (auto J : Substs) {
5620b57cec5SDimitry Andric           if (U.clobbersPhysReg(J.first))
5630b57cec5SDimitry Andric             ToErase.push_back(J.first);
5640b57cec5SDimitry Andric         }
5650b57cec5SDimitry Andric       }
5660b57cec5SDimitry Andric     }
5670b57cec5SDimitry Andric     // Now it's safe to remove the substs identified earlier.
5680b57cec5SDimitry Andric     for (auto J : ToErase)
5690b57cec5SDimitry Andric       Substs.erase(J);
5700b57cec5SDimitry Andric 
5710b57cec5SDimitry Andric     // Only change the def if this isn't the last instruction.
5720b57cec5SDimitry Andric     if (&I != G->getKill()) {
5730b57cec5SDimitry Andric       MachineOperand &MO = I.getOperand(0);
5740b57cec5SDimitry Andric 
5750b57cec5SDimitry Andric       bool Change = TransformAll || getColor(MO.getReg()) != C;
5760b57cec5SDimitry Andric       if (G->requiresFixup() && &I == G->getLast())
5770b57cec5SDimitry Andric         Change = false;
5780b57cec5SDimitry Andric 
5790b57cec5SDimitry Andric       if (Change) {
5800b57cec5SDimitry Andric         Substs[MO.getReg()] = Reg;
5810b57cec5SDimitry Andric         MO.setReg(Reg);
5820b57cec5SDimitry Andric 
5830b57cec5SDimitry Andric         Changed = true;
5840b57cec5SDimitry Andric       }
5850b57cec5SDimitry Andric     }
5860b57cec5SDimitry Andric   }
5870b57cec5SDimitry Andric   assert(Substs.size() == 0 && "No substitutions should be left active!");
5880b57cec5SDimitry Andric 
5890b57cec5SDimitry Andric   if (G->getKill()) {
5900b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << " - Kill instruction seen.\n");
5910b57cec5SDimitry Andric   } else {
5920b57cec5SDimitry Andric     // We didn't have a kill instruction, but we didn't seem to need to change
5930b57cec5SDimitry Andric     // the destination register anyway.
5940b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << " - Destination register not changed.\n");
5950b57cec5SDimitry Andric   }
5960b57cec5SDimitry Andric   return Changed;
5970b57cec5SDimitry Andric }
5980b57cec5SDimitry Andric 
5990b57cec5SDimitry Andric void AArch64A57FPLoadBalancing::scanInstruction(
6000b57cec5SDimitry Andric     MachineInstr *MI, unsigned Idx, std::map<unsigned, Chain *> &ActiveChains,
6010b57cec5SDimitry Andric     std::vector<std::unique_ptr<Chain>> &AllChains) {
6020b57cec5SDimitry Andric   // Inspect "MI", updating ActiveChains and AllChains.
6030b57cec5SDimitry Andric 
6040b57cec5SDimitry Andric   if (isMul(MI)) {
6050b57cec5SDimitry Andric 
6060b57cec5SDimitry Andric     for (auto &I : MI->uses())
6070b57cec5SDimitry Andric       maybeKillChain(I, Idx, ActiveChains);
6080b57cec5SDimitry Andric     for (auto &I : MI->defs())
6090b57cec5SDimitry Andric       maybeKillChain(I, Idx, ActiveChains);
6100b57cec5SDimitry Andric 
6110b57cec5SDimitry Andric     // Create a new chain. Multiplies don't require forwarding so can go on any
6120b57cec5SDimitry Andric     // unit.
6138bcb0991SDimitry Andric     Register DestReg = MI->getOperand(0).getReg();
6140b57cec5SDimitry Andric 
6150b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "New chain started for register "
6160b57cec5SDimitry Andric                       << printReg(DestReg, TRI) << " at " << *MI);
6170b57cec5SDimitry Andric 
6188bcb0991SDimitry Andric     auto G = std::make_unique<Chain>(MI, Idx, getColor(DestReg));
6190b57cec5SDimitry Andric     ActiveChains[DestReg] = G.get();
6200b57cec5SDimitry Andric     AllChains.push_back(std::move(G));
6210b57cec5SDimitry Andric 
6220b57cec5SDimitry Andric   } else if (isMla(MI)) {
6230b57cec5SDimitry Andric 
6240b57cec5SDimitry Andric     // It is beneficial to keep MLAs on the same functional unit as their
6250b57cec5SDimitry Andric     // accumulator operand.
6268bcb0991SDimitry Andric     Register DestReg = MI->getOperand(0).getReg();
6278bcb0991SDimitry Andric     Register AccumReg = MI->getOperand(3).getReg();
6280b57cec5SDimitry Andric 
6290b57cec5SDimitry Andric     maybeKillChain(MI->getOperand(1), Idx, ActiveChains);
6300b57cec5SDimitry Andric     maybeKillChain(MI->getOperand(2), Idx, ActiveChains);
6310b57cec5SDimitry Andric     if (DestReg != AccumReg)
6320b57cec5SDimitry Andric       maybeKillChain(MI->getOperand(0), Idx, ActiveChains);
6330b57cec5SDimitry Andric 
6340b57cec5SDimitry Andric     if (ActiveChains.find(AccumReg) != ActiveChains.end()) {
6350b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "Chain found for accumulator register "
6360b57cec5SDimitry Andric                         << printReg(AccumReg, TRI) << " in MI " << *MI);
6370b57cec5SDimitry Andric 
6380b57cec5SDimitry Andric       // For simplicity we only chain together sequences of MULs/MLAs where the
6390b57cec5SDimitry Andric       // accumulator register is killed on each instruction. This means we don't
6400b57cec5SDimitry Andric       // need to track other uses of the registers we want to rewrite.
6410b57cec5SDimitry Andric       //
6420b57cec5SDimitry Andric       // FIXME: We could extend to handle the non-kill cases for more coverage.
6430b57cec5SDimitry Andric       if (MI->getOperand(3).isKill()) {
6440b57cec5SDimitry Andric         // Add to chain.
6450b57cec5SDimitry Andric         LLVM_DEBUG(dbgs() << "Instruction was successfully added to chain.\n");
6460b57cec5SDimitry Andric         ActiveChains[AccumReg]->add(MI, Idx, getColor(DestReg));
6470b57cec5SDimitry Andric         // Handle cases where the destination is not the same as the accumulator.
6480b57cec5SDimitry Andric         if (DestReg != AccumReg) {
6490b57cec5SDimitry Andric           ActiveChains[DestReg] = ActiveChains[AccumReg];
6500b57cec5SDimitry Andric           ActiveChains.erase(AccumReg);
6510b57cec5SDimitry Andric         }
6520b57cec5SDimitry Andric         return;
6530b57cec5SDimitry Andric       }
6540b57cec5SDimitry Andric 
6550b57cec5SDimitry Andric       LLVM_DEBUG(
6560b57cec5SDimitry Andric           dbgs() << "Cannot add to chain because accumulator operand wasn't "
6570b57cec5SDimitry Andric                  << "marked <kill>!\n");
6580b57cec5SDimitry Andric       maybeKillChain(MI->getOperand(3), Idx, ActiveChains);
6590b57cec5SDimitry Andric     }
6600b57cec5SDimitry Andric 
6610b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "Creating new chain for dest register "
6620b57cec5SDimitry Andric                       << printReg(DestReg, TRI) << "\n");
6638bcb0991SDimitry Andric     auto G = std::make_unique<Chain>(MI, Idx, getColor(DestReg));
6640b57cec5SDimitry Andric     ActiveChains[DestReg] = G.get();
6650b57cec5SDimitry Andric     AllChains.push_back(std::move(G));
6660b57cec5SDimitry Andric 
6670b57cec5SDimitry Andric   } else {
6680b57cec5SDimitry Andric 
6690b57cec5SDimitry Andric     // Non-MUL or MLA instruction. Invalidate any chain in the uses or defs
6700b57cec5SDimitry Andric     // lists.
6710b57cec5SDimitry Andric     for (auto &I : MI->uses())
6720b57cec5SDimitry Andric       maybeKillChain(I, Idx, ActiveChains);
6730b57cec5SDimitry Andric     for (auto &I : MI->defs())
6740b57cec5SDimitry Andric       maybeKillChain(I, Idx, ActiveChains);
6750b57cec5SDimitry Andric 
6760b57cec5SDimitry Andric   }
6770b57cec5SDimitry Andric }
6780b57cec5SDimitry Andric 
6790b57cec5SDimitry Andric void AArch64A57FPLoadBalancing::
6800b57cec5SDimitry Andric maybeKillChain(MachineOperand &MO, unsigned Idx,
6810b57cec5SDimitry Andric                std::map<unsigned, Chain*> &ActiveChains) {
6820b57cec5SDimitry Andric   // Given an operand and the set of active chains (keyed by register),
6830b57cec5SDimitry Andric   // determine if a chain should be ended and remove from ActiveChains.
6840b57cec5SDimitry Andric   MachineInstr *MI = MO.getParent();
6850b57cec5SDimitry Andric 
6860b57cec5SDimitry Andric   if (MO.isReg()) {
6870b57cec5SDimitry Andric 
6880b57cec5SDimitry Andric     // If this is a KILL of a current chain, record it.
6890b57cec5SDimitry Andric     if (MO.isKill() && ActiveChains.find(MO.getReg()) != ActiveChains.end()) {
6900b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "Kill seen for chain " << printReg(MO.getReg(), TRI)
6910b57cec5SDimitry Andric                         << "\n");
6920b57cec5SDimitry Andric       ActiveChains[MO.getReg()]->setKill(MI, Idx, /*Immutable=*/MO.isTied());
6930b57cec5SDimitry Andric     }
6940b57cec5SDimitry Andric     ActiveChains.erase(MO.getReg());
6950b57cec5SDimitry Andric 
6960b57cec5SDimitry Andric   } else if (MO.isRegMask()) {
6970b57cec5SDimitry Andric 
6980b57cec5SDimitry Andric     for (auto I = ActiveChains.begin(), E = ActiveChains.end();
6990b57cec5SDimitry Andric          I != E;) {
7000b57cec5SDimitry Andric       if (MO.clobbersPhysReg(I->first)) {
7010b57cec5SDimitry Andric         LLVM_DEBUG(dbgs() << "Kill (regmask) seen for chain "
7020b57cec5SDimitry Andric                           << printReg(I->first, TRI) << "\n");
7030b57cec5SDimitry Andric         I->second->setKill(MI, Idx, /*Immutable=*/true);
7040b57cec5SDimitry Andric         ActiveChains.erase(I++);
7050b57cec5SDimitry Andric       } else
7060b57cec5SDimitry Andric         ++I;
7070b57cec5SDimitry Andric     }
7080b57cec5SDimitry Andric 
7090b57cec5SDimitry Andric   }
7100b57cec5SDimitry Andric }
7110b57cec5SDimitry Andric 
7120b57cec5SDimitry Andric Color AArch64A57FPLoadBalancing::getColor(unsigned Reg) {
7130b57cec5SDimitry Andric   if ((TRI->getEncodingValue(Reg) % 2) == 0)
7140b57cec5SDimitry Andric     return Color::Even;
7150b57cec5SDimitry Andric   else
7160b57cec5SDimitry Andric     return Color::Odd;
7170b57cec5SDimitry Andric }
7180b57cec5SDimitry Andric 
7190b57cec5SDimitry Andric // Factory function used by AArch64TargetMachine to add the pass to the passmanager.
7200b57cec5SDimitry Andric FunctionPass *llvm::createAArch64A57FPLoadBalancing() {
7210b57cec5SDimitry Andric   return new AArch64A57FPLoadBalancing();
7220b57cec5SDimitry Andric }
723