10b57cec5SDimitry Andric //===---------- AArch64CollectLOH.cpp - AArch64 collect LOH pass --*- C++ -*-=// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This file contains a pass that collect the Linker Optimization Hint (LOH). 100b57cec5SDimitry Andric // This pass should be run at the very end of the compilation flow, just before 110b57cec5SDimitry Andric // assembly printer. 120b57cec5SDimitry Andric // To be useful for the linker, the LOH must be printed into the assembly file. 130b57cec5SDimitry Andric // 140b57cec5SDimitry Andric // A LOH describes a sequence of instructions that may be optimized by the 150b57cec5SDimitry Andric // linker. 160b57cec5SDimitry Andric // This same sequence cannot be optimized by the compiler because some of 170b57cec5SDimitry Andric // the information will be known at link time. 180b57cec5SDimitry Andric // For instance, consider the following sequence: 190b57cec5SDimitry Andric // L1: adrp xA, sym@PAGE 200b57cec5SDimitry Andric // L2: add xB, xA, sym@PAGEOFF 210b57cec5SDimitry Andric // L3: ldr xC, [xB, #imm] 220b57cec5SDimitry Andric // This sequence can be turned into: 230b57cec5SDimitry Andric // A literal load if sym@PAGE + sym@PAGEOFF + #imm - address(L3) is < 1MB: 240b57cec5SDimitry Andric // L3: ldr xC, sym+#imm 250b57cec5SDimitry Andric // It may also be turned into either the following more efficient 260b57cec5SDimitry Andric // code sequences: 270b57cec5SDimitry Andric // - If sym@PAGEOFF + #imm fits the encoding space of L3. 280b57cec5SDimitry Andric // L1: adrp xA, sym@PAGE 290b57cec5SDimitry Andric // L3: ldr xC, [xB, sym@PAGEOFF + #imm] 300b57cec5SDimitry Andric // - If sym@PAGE + sym@PAGEOFF - address(L1) < 1MB: 310b57cec5SDimitry Andric // L1: adr xA, sym 320b57cec5SDimitry Andric // L3: ldr xC, [xB, #imm] 330b57cec5SDimitry Andric // 340b57cec5SDimitry Andric // To be valid a LOH must meet all the requirements needed by all the related 350b57cec5SDimitry Andric // possible linker transformations. 360b57cec5SDimitry Andric // For instance, using the running example, the constraints to emit 370b57cec5SDimitry Andric // ".loh AdrpAddLdr" are: 380b57cec5SDimitry Andric // - L1, L2, and L3 instructions are of the expected type, i.e., 390b57cec5SDimitry Andric // respectively ADRP, ADD (immediate), and LD. 400b57cec5SDimitry Andric // - The result of L1 is used only by L2. 410b57cec5SDimitry Andric // - The register argument (xA) used in the ADD instruction is defined 420b57cec5SDimitry Andric // only by L1. 430b57cec5SDimitry Andric // - The result of L2 is used only by L3. 440b57cec5SDimitry Andric // - The base address (xB) in L3 is defined only L2. 450b57cec5SDimitry Andric // - The ADRP in L1 and the ADD in L2 must reference the same symbol using 460b57cec5SDimitry Andric // @PAGE/@PAGEOFF with no additional constants 470b57cec5SDimitry Andric // 480b57cec5SDimitry Andric // Currently supported LOHs are: 490b57cec5SDimitry Andric // * So called non-ADRP-related: 500b57cec5SDimitry Andric // - .loh AdrpAddLdr L1, L2, L3: 510b57cec5SDimitry Andric // L1: adrp xA, sym@PAGE 520b57cec5SDimitry Andric // L2: add xB, xA, sym@PAGEOFF 530b57cec5SDimitry Andric // L3: ldr xC, [xB, #imm] 540b57cec5SDimitry Andric // - .loh AdrpLdrGotLdr L1, L2, L3: 550b57cec5SDimitry Andric // L1: adrp xA, sym@GOTPAGE 560b57cec5SDimitry Andric // L2: ldr xB, [xA, sym@GOTPAGEOFF] 570b57cec5SDimitry Andric // L3: ldr xC, [xB, #imm] 580b57cec5SDimitry Andric // - .loh AdrpLdr L1, L3: 590b57cec5SDimitry Andric // L1: adrp xA, sym@PAGE 600b57cec5SDimitry Andric // L3: ldr xC, [xA, sym@PAGEOFF] 610b57cec5SDimitry Andric // - .loh AdrpAddStr L1, L2, L3: 620b57cec5SDimitry Andric // L1: adrp xA, sym@PAGE 630b57cec5SDimitry Andric // L2: add xB, xA, sym@PAGEOFF 640b57cec5SDimitry Andric // L3: str xC, [xB, #imm] 650b57cec5SDimitry Andric // - .loh AdrpLdrGotStr L1, L2, L3: 660b57cec5SDimitry Andric // L1: adrp xA, sym@GOTPAGE 670b57cec5SDimitry Andric // L2: ldr xB, [xA, sym@GOTPAGEOFF] 680b57cec5SDimitry Andric // L3: str xC, [xB, #imm] 690b57cec5SDimitry Andric // - .loh AdrpAdd L1, L2: 700b57cec5SDimitry Andric // L1: adrp xA, sym@PAGE 710b57cec5SDimitry Andric // L2: add xB, xA, sym@PAGEOFF 720b57cec5SDimitry Andric // For all these LOHs, L1, L2, L3 form a simple chain: 730b57cec5SDimitry Andric // L1 result is used only by L2 and L2 result by L3. 740b57cec5SDimitry Andric // L3 LOH-related argument is defined only by L2 and L2 LOH-related argument 750b57cec5SDimitry Andric // by L1. 760b57cec5SDimitry Andric // All these LOHs aim at using more efficient load/store patterns by folding 770b57cec5SDimitry Andric // some instructions used to compute the address directly into the load/store. 780b57cec5SDimitry Andric // 790b57cec5SDimitry Andric // * So called ADRP-related: 800b57cec5SDimitry Andric // - .loh AdrpAdrp L2, L1: 810b57cec5SDimitry Andric // L2: ADRP xA, sym1@PAGE 820b57cec5SDimitry Andric // L1: ADRP xA, sym2@PAGE 830b57cec5SDimitry Andric // L2 dominates L1 and xA is not redifined between L2 and L1 840b57cec5SDimitry Andric // This LOH aims at getting rid of redundant ADRP instructions. 850b57cec5SDimitry Andric // 860b57cec5SDimitry Andric // The overall design for emitting the LOHs is: 870b57cec5SDimitry Andric // 1. AArch64CollectLOH (this pass) records the LOHs in the AArch64FunctionInfo. 880b57cec5SDimitry Andric // 2. AArch64AsmPrinter reads the LOHs from AArch64FunctionInfo and it: 890b57cec5SDimitry Andric // 1. Associates them a label. 900b57cec5SDimitry Andric // 2. Emits them in a MCStreamer (EmitLOHDirective). 910b57cec5SDimitry Andric // - The MCMachOStreamer records them into the MCAssembler. 920b57cec5SDimitry Andric // - The MCAsmStreamer prints them. 930b57cec5SDimitry Andric // - Other MCStreamers ignore them. 940b57cec5SDimitry Andric // 3. Closes the MCStreamer: 950b57cec5SDimitry Andric // - The MachObjectWriter gets them from the MCAssembler and writes 960b57cec5SDimitry Andric // them in the object file. 970b57cec5SDimitry Andric // - Other ObjectWriters ignore them. 980b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 990b57cec5SDimitry Andric 1000b57cec5SDimitry Andric #include "AArch64.h" 1010b57cec5SDimitry Andric #include "AArch64InstrInfo.h" 1020b57cec5SDimitry Andric #include "AArch64MachineFunctionInfo.h" 1038bcb0991SDimitry Andric #include "llvm/ADT/SmallSet.h" 1040b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h" 1050b57cec5SDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h" 1060b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h" 1070b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstr.h" 1080b57cec5SDimitry Andric #include "llvm/CodeGen/TargetRegisterInfo.h" 1090b57cec5SDimitry Andric #include "llvm/Support/Debug.h" 1100b57cec5SDimitry Andric #include "llvm/Support/ErrorHandling.h" 1110b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h" 1120b57cec5SDimitry Andric #include "llvm/Target/TargetMachine.h" 1130b57cec5SDimitry Andric using namespace llvm; 1140b57cec5SDimitry Andric 1150b57cec5SDimitry Andric #define DEBUG_TYPE "aarch64-collect-loh" 1160b57cec5SDimitry Andric 1170b57cec5SDimitry Andric STATISTIC(NumADRPSimpleCandidate, 1180b57cec5SDimitry Andric "Number of simplifiable ADRP dominate by another"); 1190b57cec5SDimitry Andric STATISTIC(NumADDToSTR, "Number of simplifiable STR reachable by ADD"); 1200b57cec5SDimitry Andric STATISTIC(NumLDRToSTR, "Number of simplifiable STR reachable by LDR"); 1210b57cec5SDimitry Andric STATISTIC(NumADDToLDR, "Number of simplifiable LDR reachable by ADD"); 1220b57cec5SDimitry Andric STATISTIC(NumLDRToLDR, "Number of simplifiable LDR reachable by LDR"); 1230b57cec5SDimitry Andric STATISTIC(NumADRPToLDR, "Number of simplifiable LDR reachable by ADRP"); 1240b57cec5SDimitry Andric STATISTIC(NumADRSimpleCandidate, "Number of simplifiable ADRP + ADD"); 1250b57cec5SDimitry Andric 1260b57cec5SDimitry Andric #define AARCH64_COLLECT_LOH_NAME "AArch64 Collect Linker Optimization Hint (LOH)" 1270b57cec5SDimitry Andric 1280b57cec5SDimitry Andric namespace { 1290b57cec5SDimitry Andric 1300b57cec5SDimitry Andric struct AArch64CollectLOH : public MachineFunctionPass { 1310b57cec5SDimitry Andric static char ID; 1320b57cec5SDimitry Andric AArch64CollectLOH() : MachineFunctionPass(ID) {} 1330b57cec5SDimitry Andric 1340b57cec5SDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override; 1350b57cec5SDimitry Andric 1360b57cec5SDimitry Andric MachineFunctionProperties getRequiredProperties() const override { 1370b57cec5SDimitry Andric return MachineFunctionProperties().set( 1380b57cec5SDimitry Andric MachineFunctionProperties::Property::NoVRegs); 1390b57cec5SDimitry Andric } 1400b57cec5SDimitry Andric 1410b57cec5SDimitry Andric StringRef getPassName() const override { return AARCH64_COLLECT_LOH_NAME; } 1420b57cec5SDimitry Andric 1430b57cec5SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 1440b57cec5SDimitry Andric MachineFunctionPass::getAnalysisUsage(AU); 1450b57cec5SDimitry Andric AU.setPreservesAll(); 1460b57cec5SDimitry Andric } 1470b57cec5SDimitry Andric }; 1480b57cec5SDimitry Andric 1490b57cec5SDimitry Andric char AArch64CollectLOH::ID = 0; 1500b57cec5SDimitry Andric 1510b57cec5SDimitry Andric } // end anonymous namespace. 1520b57cec5SDimitry Andric 1530b57cec5SDimitry Andric INITIALIZE_PASS(AArch64CollectLOH, "aarch64-collect-loh", 1540b57cec5SDimitry Andric AARCH64_COLLECT_LOH_NAME, false, false) 1550b57cec5SDimitry Andric 1560b57cec5SDimitry Andric static bool canAddBePartOfLOH(const MachineInstr &MI) { 1570b57cec5SDimitry Andric // Check immediate to see if the immediate is an address. 1580b57cec5SDimitry Andric switch (MI.getOperand(2).getType()) { 1590b57cec5SDimitry Andric default: 1600b57cec5SDimitry Andric return false; 1610b57cec5SDimitry Andric case MachineOperand::MO_GlobalAddress: 1620b57cec5SDimitry Andric case MachineOperand::MO_JumpTableIndex: 1630b57cec5SDimitry Andric case MachineOperand::MO_ConstantPoolIndex: 1640b57cec5SDimitry Andric case MachineOperand::MO_BlockAddress: 1650b57cec5SDimitry Andric return true; 1660b57cec5SDimitry Andric } 1670b57cec5SDimitry Andric } 1680b57cec5SDimitry Andric 1690b57cec5SDimitry Andric /// Answer the following question: Can Def be one of the definition 1700b57cec5SDimitry Andric /// involved in a part of a LOH? 1710b57cec5SDimitry Andric static bool canDefBePartOfLOH(const MachineInstr &MI) { 1720b57cec5SDimitry Andric // Accept ADRP, ADDLow and LOADGot. 1730b57cec5SDimitry Andric switch (MI.getOpcode()) { 1740b57cec5SDimitry Andric default: 1750b57cec5SDimitry Andric return false; 1760b57cec5SDimitry Andric case AArch64::ADRP: 1770b57cec5SDimitry Andric return true; 1780b57cec5SDimitry Andric case AArch64::ADDXri: 1790b57cec5SDimitry Andric return canAddBePartOfLOH(MI); 1800b57cec5SDimitry Andric case AArch64::LDRXui: 1818bcb0991SDimitry Andric case AArch64::LDRWui: 1820b57cec5SDimitry Andric // Check immediate to see if the immediate is an address. 1830b57cec5SDimitry Andric switch (MI.getOperand(2).getType()) { 1840b57cec5SDimitry Andric default: 1850b57cec5SDimitry Andric return false; 1860b57cec5SDimitry Andric case MachineOperand::MO_GlobalAddress: 1870b57cec5SDimitry Andric return MI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT; 1880b57cec5SDimitry Andric } 1890b57cec5SDimitry Andric } 1900b57cec5SDimitry Andric } 1910b57cec5SDimitry Andric 1920b57cec5SDimitry Andric /// Check whether the given instruction can the end of a LOH chain involving a 1930b57cec5SDimitry Andric /// store. 1940b57cec5SDimitry Andric static bool isCandidateStore(const MachineInstr &MI, const MachineOperand &MO) { 1950b57cec5SDimitry Andric switch (MI.getOpcode()) { 1960b57cec5SDimitry Andric default: 1970b57cec5SDimitry Andric return false; 1980b57cec5SDimitry Andric case AArch64::STRBBui: 1990b57cec5SDimitry Andric case AArch64::STRHHui: 2000b57cec5SDimitry Andric case AArch64::STRBui: 2010b57cec5SDimitry Andric case AArch64::STRHui: 2020b57cec5SDimitry Andric case AArch64::STRWui: 2030b57cec5SDimitry Andric case AArch64::STRXui: 2040b57cec5SDimitry Andric case AArch64::STRSui: 2050b57cec5SDimitry Andric case AArch64::STRDui: 2060b57cec5SDimitry Andric case AArch64::STRQui: 2070b57cec5SDimitry Andric // We can only optimize the index operand. 2080b57cec5SDimitry Andric // In case we have str xA, [xA, #imm], this is two different uses 2090b57cec5SDimitry Andric // of xA and we cannot fold, otherwise the xA stored may be wrong, 2100b57cec5SDimitry Andric // even if #imm == 0. 211*06c3fb27SDimitry Andric return MO.getOperandNo() == 1 && 2120b57cec5SDimitry Andric MI.getOperand(0).getReg() != MI.getOperand(1).getReg(); 2130b57cec5SDimitry Andric } 2140b57cec5SDimitry Andric } 2150b57cec5SDimitry Andric 2160b57cec5SDimitry Andric /// Check whether the given instruction can be the end of a LOH chain 2170b57cec5SDimitry Andric /// involving a load. 2180b57cec5SDimitry Andric static bool isCandidateLoad(const MachineInstr &MI) { 2190b57cec5SDimitry Andric switch (MI.getOpcode()) { 2200b57cec5SDimitry Andric default: 2210b57cec5SDimitry Andric return false; 2220b57cec5SDimitry Andric case AArch64::LDRSBWui: 2230b57cec5SDimitry Andric case AArch64::LDRSBXui: 2240b57cec5SDimitry Andric case AArch64::LDRSHWui: 2250b57cec5SDimitry Andric case AArch64::LDRSHXui: 2260b57cec5SDimitry Andric case AArch64::LDRSWui: 2270b57cec5SDimitry Andric case AArch64::LDRBui: 2280b57cec5SDimitry Andric case AArch64::LDRHui: 2290b57cec5SDimitry Andric case AArch64::LDRWui: 2300b57cec5SDimitry Andric case AArch64::LDRXui: 2310b57cec5SDimitry Andric case AArch64::LDRSui: 2320b57cec5SDimitry Andric case AArch64::LDRDui: 2330b57cec5SDimitry Andric case AArch64::LDRQui: 2340b57cec5SDimitry Andric return !(MI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT); 2350b57cec5SDimitry Andric } 2360b57cec5SDimitry Andric } 2370b57cec5SDimitry Andric 2380b57cec5SDimitry Andric /// Check whether the given instruction can load a litteral. 2390b57cec5SDimitry Andric static bool supportLoadFromLiteral(const MachineInstr &MI) { 2400b57cec5SDimitry Andric switch (MI.getOpcode()) { 2410b57cec5SDimitry Andric default: 2420b57cec5SDimitry Andric return false; 2430b57cec5SDimitry Andric case AArch64::LDRSWui: 2440b57cec5SDimitry Andric case AArch64::LDRWui: 2450b57cec5SDimitry Andric case AArch64::LDRXui: 2460b57cec5SDimitry Andric case AArch64::LDRSui: 2470b57cec5SDimitry Andric case AArch64::LDRDui: 2480b57cec5SDimitry Andric case AArch64::LDRQui: 2490b57cec5SDimitry Andric return true; 2500b57cec5SDimitry Andric } 2510b57cec5SDimitry Andric } 2520b57cec5SDimitry Andric 2530b57cec5SDimitry Andric /// Number of GPR registers traked by mapRegToGPRIndex() 2540b57cec5SDimitry Andric static const unsigned N_GPR_REGS = 31; 2550b57cec5SDimitry Andric /// Map register number to index from 0-30. 2560b57cec5SDimitry Andric static int mapRegToGPRIndex(MCPhysReg Reg) { 2570b57cec5SDimitry Andric static_assert(AArch64::X28 - AArch64::X0 + 3 == N_GPR_REGS, "Number of GPRs"); 2580b57cec5SDimitry Andric static_assert(AArch64::W30 - AArch64::W0 + 1 == N_GPR_REGS, "Number of GPRs"); 2590b57cec5SDimitry Andric if (AArch64::X0 <= Reg && Reg <= AArch64::X28) 2600b57cec5SDimitry Andric return Reg - AArch64::X0; 2610b57cec5SDimitry Andric if (AArch64::W0 <= Reg && Reg <= AArch64::W30) 2620b57cec5SDimitry Andric return Reg - AArch64::W0; 2630b57cec5SDimitry Andric // TableGen gives "FP" and "LR" an index not adjacent to X28 so we have to 2640b57cec5SDimitry Andric // handle them as special cases. 2650b57cec5SDimitry Andric if (Reg == AArch64::FP) 2660b57cec5SDimitry Andric return 29; 2670b57cec5SDimitry Andric if (Reg == AArch64::LR) 2680b57cec5SDimitry Andric return 30; 2690b57cec5SDimitry Andric return -1; 2700b57cec5SDimitry Andric } 2710b57cec5SDimitry Andric 2720b57cec5SDimitry Andric /// State tracked per register. 2730b57cec5SDimitry Andric /// The main algorithm walks backwards over a basic block maintaining this 2740b57cec5SDimitry Andric /// datastructure for each tracked general purpose register. 2750b57cec5SDimitry Andric struct LOHInfo { 2760b57cec5SDimitry Andric MCLOHType Type : 8; ///< "Best" type of LOH possible. 2770b57cec5SDimitry Andric bool IsCandidate : 1; ///< Possible LOH candidate. 2780b57cec5SDimitry Andric bool OneUser : 1; ///< Found exactly one user (yet). 2790b57cec5SDimitry Andric bool MultiUsers : 1; ///< Found multiple users. 2800b57cec5SDimitry Andric const MachineInstr *MI0; ///< First instruction involved in the LOH. 2810b57cec5SDimitry Andric const MachineInstr *MI1; ///< Second instruction involved in the LOH 2820b57cec5SDimitry Andric /// (if any). 2830b57cec5SDimitry Andric const MachineInstr *LastADRP; ///< Last ADRP in same register. 2840b57cec5SDimitry Andric }; 2850b57cec5SDimitry Andric 2860b57cec5SDimitry Andric /// Update state \p Info given \p MI uses the tracked register. 2870b57cec5SDimitry Andric static void handleUse(const MachineInstr &MI, const MachineOperand &MO, 2880b57cec5SDimitry Andric LOHInfo &Info) { 2890b57cec5SDimitry Andric // We have multiple uses if we already found one before. 2900b57cec5SDimitry Andric if (Info.MultiUsers || Info.OneUser) { 2910b57cec5SDimitry Andric Info.IsCandidate = false; 2920b57cec5SDimitry Andric Info.MultiUsers = true; 2930b57cec5SDimitry Andric return; 2940b57cec5SDimitry Andric } 2950b57cec5SDimitry Andric Info.OneUser = true; 2960b57cec5SDimitry Andric 2970b57cec5SDimitry Andric // Start new LOHInfo if applicable. 2980b57cec5SDimitry Andric if (isCandidateLoad(MI)) { 2990b57cec5SDimitry Andric Info.Type = MCLOH_AdrpLdr; 3000b57cec5SDimitry Andric Info.IsCandidate = true; 3010b57cec5SDimitry Andric Info.MI0 = &MI; 3020b57cec5SDimitry Andric // Note that even this is AdrpLdr now, we can switch to a Ldr variant 3030b57cec5SDimitry Andric // later. 3040b57cec5SDimitry Andric } else if (isCandidateStore(MI, MO)) { 3050b57cec5SDimitry Andric Info.Type = MCLOH_AdrpAddStr; 3060b57cec5SDimitry Andric Info.IsCandidate = true; 3070b57cec5SDimitry Andric Info.MI0 = &MI; 3080b57cec5SDimitry Andric Info.MI1 = nullptr; 3090b57cec5SDimitry Andric } else if (MI.getOpcode() == AArch64::ADDXri) { 3100b57cec5SDimitry Andric Info.Type = MCLOH_AdrpAdd; 3110b57cec5SDimitry Andric Info.IsCandidate = true; 3120b57cec5SDimitry Andric Info.MI0 = &MI; 3138bcb0991SDimitry Andric } else if ((MI.getOpcode() == AArch64::LDRXui || 3148bcb0991SDimitry Andric MI.getOpcode() == AArch64::LDRWui) && 3150b57cec5SDimitry Andric MI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT) { 3160b57cec5SDimitry Andric Info.Type = MCLOH_AdrpLdrGot; 3170b57cec5SDimitry Andric Info.IsCandidate = true; 3180b57cec5SDimitry Andric Info.MI0 = &MI; 3190b57cec5SDimitry Andric } 3200b57cec5SDimitry Andric } 3210b57cec5SDimitry Andric 3220b57cec5SDimitry Andric /// Update state \p Info given the tracked register is clobbered. 3230b57cec5SDimitry Andric static void handleClobber(LOHInfo &Info) { 3240b57cec5SDimitry Andric Info.IsCandidate = false; 3250b57cec5SDimitry Andric Info.OneUser = false; 3260b57cec5SDimitry Andric Info.MultiUsers = false; 3270b57cec5SDimitry Andric Info.LastADRP = nullptr; 3280b57cec5SDimitry Andric } 3290b57cec5SDimitry Andric 3300b57cec5SDimitry Andric /// Update state \p Info given that \p MI is possibly the middle instruction 3310b57cec5SDimitry Andric /// of an LOH involving 3 instructions. 3320b57cec5SDimitry Andric static bool handleMiddleInst(const MachineInstr &MI, LOHInfo &DefInfo, 3330b57cec5SDimitry Andric LOHInfo &OpInfo) { 3340b57cec5SDimitry Andric if (!DefInfo.IsCandidate || (&DefInfo != &OpInfo && OpInfo.OneUser)) 3350b57cec5SDimitry Andric return false; 3360b57cec5SDimitry Andric // Copy LOHInfo for dest register to LOHInfo for source register. 3370b57cec5SDimitry Andric if (&DefInfo != &OpInfo) { 3380b57cec5SDimitry Andric OpInfo = DefInfo; 3390b57cec5SDimitry Andric // Invalidate \p DefInfo because we track it in \p OpInfo now. 3400b57cec5SDimitry Andric handleClobber(DefInfo); 3410b57cec5SDimitry Andric } else 3420b57cec5SDimitry Andric DefInfo.LastADRP = nullptr; 3430b57cec5SDimitry Andric 3440b57cec5SDimitry Andric // Advance state machine. 3450b57cec5SDimitry Andric assert(OpInfo.IsCandidate && "Expect valid state"); 3460b57cec5SDimitry Andric if (MI.getOpcode() == AArch64::ADDXri && canAddBePartOfLOH(MI)) { 3470b57cec5SDimitry Andric if (OpInfo.Type == MCLOH_AdrpLdr) { 3480b57cec5SDimitry Andric OpInfo.Type = MCLOH_AdrpAddLdr; 3490b57cec5SDimitry Andric OpInfo.IsCandidate = true; 3500b57cec5SDimitry Andric OpInfo.MI1 = &MI; 3510b57cec5SDimitry Andric return true; 3520b57cec5SDimitry Andric } else if (OpInfo.Type == MCLOH_AdrpAddStr && OpInfo.MI1 == nullptr) { 3530b57cec5SDimitry Andric OpInfo.Type = MCLOH_AdrpAddStr; 3540b57cec5SDimitry Andric OpInfo.IsCandidate = true; 3550b57cec5SDimitry Andric OpInfo.MI1 = &MI; 3560b57cec5SDimitry Andric return true; 3570b57cec5SDimitry Andric } 3580b57cec5SDimitry Andric } else { 3598bcb0991SDimitry Andric assert((MI.getOpcode() == AArch64::LDRXui || 3608bcb0991SDimitry Andric MI.getOpcode() == AArch64::LDRWui) && 3618bcb0991SDimitry Andric "Expect LDRXui or LDRWui"); 3620b57cec5SDimitry Andric assert((MI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT) && 3630b57cec5SDimitry Andric "Expected GOT relocation"); 3640b57cec5SDimitry Andric if (OpInfo.Type == MCLOH_AdrpAddStr && OpInfo.MI1 == nullptr) { 3650b57cec5SDimitry Andric OpInfo.Type = MCLOH_AdrpLdrGotStr; 3660b57cec5SDimitry Andric OpInfo.IsCandidate = true; 3670b57cec5SDimitry Andric OpInfo.MI1 = &MI; 3680b57cec5SDimitry Andric return true; 3690b57cec5SDimitry Andric } else if (OpInfo.Type == MCLOH_AdrpLdr) { 3700b57cec5SDimitry Andric OpInfo.Type = MCLOH_AdrpLdrGotLdr; 3710b57cec5SDimitry Andric OpInfo.IsCandidate = true; 3720b57cec5SDimitry Andric OpInfo.MI1 = &MI; 3730b57cec5SDimitry Andric return true; 3740b57cec5SDimitry Andric } 3750b57cec5SDimitry Andric } 3760b57cec5SDimitry Andric return false; 3770b57cec5SDimitry Andric } 3780b57cec5SDimitry Andric 3790b57cec5SDimitry Andric /// Update state when seeing and ADRP instruction. 3800b57cec5SDimitry Andric static void handleADRP(const MachineInstr &MI, AArch64FunctionInfo &AFI, 3815ffd83dbSDimitry Andric LOHInfo &Info, LOHInfo *LOHInfos) { 3820b57cec5SDimitry Andric if (Info.LastADRP != nullptr) { 3830b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Adding MCLOH_AdrpAdrp:\n" 3840b57cec5SDimitry Andric << '\t' << MI << '\t' << *Info.LastADRP); 3850b57cec5SDimitry Andric AFI.addLOHDirective(MCLOH_AdrpAdrp, {&MI, Info.LastADRP}); 3860b57cec5SDimitry Andric ++NumADRPSimpleCandidate; 3870b57cec5SDimitry Andric } 3880b57cec5SDimitry Andric 3890b57cec5SDimitry Andric // Produce LOH directive if possible. 3900b57cec5SDimitry Andric if (Info.IsCandidate) { 3910b57cec5SDimitry Andric switch (Info.Type) { 3925ffd83dbSDimitry Andric case MCLOH_AdrpAdd: { 3935ffd83dbSDimitry Andric // ADRPs and ADDs for this candidate may be split apart if using 3945ffd83dbSDimitry Andric // GlobalISel instead of pseudo-expanded. If that happens, the 3955ffd83dbSDimitry Andric // def register of the ADD may have a use in between. Adding an LOH in 3965ffd83dbSDimitry Andric // this case can cause the linker to rewrite the ADRP to write to that 3975ffd83dbSDimitry Andric // register, clobbering the use. 3985ffd83dbSDimitry Andric const MachineInstr *AddMI = Info.MI0; 3995ffd83dbSDimitry Andric int DefIdx = mapRegToGPRIndex(MI.getOperand(0).getReg()); 4005ffd83dbSDimitry Andric int OpIdx = mapRegToGPRIndex(AddMI->getOperand(0).getReg()); 4015ffd83dbSDimitry Andric LOHInfo DefInfo = LOHInfos[OpIdx]; 4025ffd83dbSDimitry Andric if (DefIdx != OpIdx && (DefInfo.OneUser || DefInfo.MultiUsers)) 4035ffd83dbSDimitry Andric break; 4040b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Adding MCLOH_AdrpAdd:\n" 4050b57cec5SDimitry Andric << '\t' << MI << '\t' << *Info.MI0); 4060b57cec5SDimitry Andric AFI.addLOHDirective(MCLOH_AdrpAdd, {&MI, Info.MI0}); 4070b57cec5SDimitry Andric ++NumADRSimpleCandidate; 4080b57cec5SDimitry Andric break; 4095ffd83dbSDimitry Andric } 4100b57cec5SDimitry Andric case MCLOH_AdrpLdr: 4110b57cec5SDimitry Andric if (supportLoadFromLiteral(*Info.MI0)) { 4120b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Adding MCLOH_AdrpLdr:\n" 4130b57cec5SDimitry Andric << '\t' << MI << '\t' << *Info.MI0); 4140b57cec5SDimitry Andric AFI.addLOHDirective(MCLOH_AdrpLdr, {&MI, Info.MI0}); 4150b57cec5SDimitry Andric ++NumADRPToLDR; 4160b57cec5SDimitry Andric } 4170b57cec5SDimitry Andric break; 418fe6060f1SDimitry Andric case MCLOH_AdrpAddLdr: { 419fe6060f1SDimitry Andric // There is a possibility that the linker may try to rewrite: 420fe6060f1SDimitry Andric // adrp x0, @sym@PAGE 421fe6060f1SDimitry Andric // add x1, x0, @sym@PAGEOFF 422fe6060f1SDimitry Andric // [x0 = some other def] 423fe6060f1SDimitry Andric // ldr x2, [x1] 424fe6060f1SDimitry Andric // ...into... 425fe6060f1SDimitry Andric // adrp x0, @sym 426fe6060f1SDimitry Andric // nop 427fe6060f1SDimitry Andric // [x0 = some other def] 428fe6060f1SDimitry Andric // ldr x2, [x0] 429fe6060f1SDimitry Andric // ...if the offset to the symbol won't fit within a literal load. 430fe6060f1SDimitry Andric // This causes the load to use the result of the adrp, which in this 431fe6060f1SDimitry Andric // case has already been clobbered. 432fe6060f1SDimitry Andric // FIXME: Implement proper liveness tracking for all registers. For now, 433fe6060f1SDimitry Andric // don't emit the LOH if there are any instructions between the add and 434fe6060f1SDimitry Andric // the ldr. 435fe6060f1SDimitry Andric MachineInstr *AddMI = const_cast<MachineInstr *>(Info.MI1); 436fe6060f1SDimitry Andric const MachineInstr *LdrMI = Info.MI0; 437fe6060f1SDimitry Andric auto AddIt = MachineBasicBlock::iterator(AddMI); 438fe6060f1SDimitry Andric auto EndIt = AddMI->getParent()->end(); 439fe6060f1SDimitry Andric if (AddMI->getIterator() == EndIt || LdrMI != &*next_nodbg(AddIt, EndIt)) 440fe6060f1SDimitry Andric break; 441fe6060f1SDimitry Andric 4420b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Adding MCLOH_AdrpAddLdr:\n" 4430b57cec5SDimitry Andric << '\t' << MI << '\t' << *Info.MI1 << '\t' 4440b57cec5SDimitry Andric << *Info.MI0); 4450b57cec5SDimitry Andric AFI.addLOHDirective(MCLOH_AdrpAddLdr, {&MI, Info.MI1, Info.MI0}); 4460b57cec5SDimitry Andric ++NumADDToLDR; 4470b57cec5SDimitry Andric break; 448fe6060f1SDimitry Andric } 4490b57cec5SDimitry Andric case MCLOH_AdrpAddStr: 4500b57cec5SDimitry Andric if (Info.MI1 != nullptr) { 4510b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Adding MCLOH_AdrpAddStr:\n" 4520b57cec5SDimitry Andric << '\t' << MI << '\t' << *Info.MI1 << '\t' 4530b57cec5SDimitry Andric << *Info.MI0); 4540b57cec5SDimitry Andric AFI.addLOHDirective(MCLOH_AdrpAddStr, {&MI, Info.MI1, Info.MI0}); 4550b57cec5SDimitry Andric ++NumADDToSTR; 4560b57cec5SDimitry Andric } 4570b57cec5SDimitry Andric break; 4580b57cec5SDimitry Andric case MCLOH_AdrpLdrGotLdr: 4590b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Adding MCLOH_AdrpLdrGotLdr:\n" 4600b57cec5SDimitry Andric << '\t' << MI << '\t' << *Info.MI1 << '\t' 4610b57cec5SDimitry Andric << *Info.MI0); 4620b57cec5SDimitry Andric AFI.addLOHDirective(MCLOH_AdrpLdrGotLdr, {&MI, Info.MI1, Info.MI0}); 4630b57cec5SDimitry Andric ++NumLDRToLDR; 4640b57cec5SDimitry Andric break; 4650b57cec5SDimitry Andric case MCLOH_AdrpLdrGotStr: 4660b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Adding MCLOH_AdrpLdrGotStr:\n" 4670b57cec5SDimitry Andric << '\t' << MI << '\t' << *Info.MI1 << '\t' 4680b57cec5SDimitry Andric << *Info.MI0); 4690b57cec5SDimitry Andric AFI.addLOHDirective(MCLOH_AdrpLdrGotStr, {&MI, Info.MI1, Info.MI0}); 4700b57cec5SDimitry Andric ++NumLDRToSTR; 4710b57cec5SDimitry Andric break; 4720b57cec5SDimitry Andric case MCLOH_AdrpLdrGot: 4730b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Adding MCLOH_AdrpLdrGot:\n" 4740b57cec5SDimitry Andric << '\t' << MI << '\t' << *Info.MI0); 4750b57cec5SDimitry Andric AFI.addLOHDirective(MCLOH_AdrpLdrGot, {&MI, Info.MI0}); 4760b57cec5SDimitry Andric break; 4770b57cec5SDimitry Andric case MCLOH_AdrpAdrp: 4780b57cec5SDimitry Andric llvm_unreachable("MCLOH_AdrpAdrp not used in state machine"); 4790b57cec5SDimitry Andric } 4800b57cec5SDimitry Andric } 4810b57cec5SDimitry Andric 4820b57cec5SDimitry Andric handleClobber(Info); 4830b57cec5SDimitry Andric Info.LastADRP = &MI; 4840b57cec5SDimitry Andric } 4850b57cec5SDimitry Andric 4860b57cec5SDimitry Andric static void handleRegMaskClobber(const uint32_t *RegMask, MCPhysReg Reg, 4870b57cec5SDimitry Andric LOHInfo *LOHInfos) { 4880b57cec5SDimitry Andric if (!MachineOperand::clobbersPhysReg(RegMask, Reg)) 4890b57cec5SDimitry Andric return; 4900b57cec5SDimitry Andric int Idx = mapRegToGPRIndex(Reg); 4910b57cec5SDimitry Andric if (Idx >= 0) 4920b57cec5SDimitry Andric handleClobber(LOHInfos[Idx]); 4930b57cec5SDimitry Andric } 4940b57cec5SDimitry Andric 4950b57cec5SDimitry Andric static void handleNormalInst(const MachineInstr &MI, LOHInfo *LOHInfos) { 4960b57cec5SDimitry Andric // Handle defs and regmasks. 4970b57cec5SDimitry Andric for (const MachineOperand &MO : MI.operands()) { 4980b57cec5SDimitry Andric if (MO.isRegMask()) { 4990b57cec5SDimitry Andric const uint32_t *RegMask = MO.getRegMask(); 5000b57cec5SDimitry Andric for (MCPhysReg Reg : AArch64::GPR32RegClass) 5010b57cec5SDimitry Andric handleRegMaskClobber(RegMask, Reg, LOHInfos); 5020b57cec5SDimitry Andric for (MCPhysReg Reg : AArch64::GPR64RegClass) 5030b57cec5SDimitry Andric handleRegMaskClobber(RegMask, Reg, LOHInfos); 5040b57cec5SDimitry Andric continue; 5050b57cec5SDimitry Andric } 5060b57cec5SDimitry Andric if (!MO.isReg() || !MO.isDef()) 5070b57cec5SDimitry Andric continue; 5080b57cec5SDimitry Andric int Idx = mapRegToGPRIndex(MO.getReg()); 5090b57cec5SDimitry Andric if (Idx < 0) 5100b57cec5SDimitry Andric continue; 5110b57cec5SDimitry Andric handleClobber(LOHInfos[Idx]); 5120b57cec5SDimitry Andric } 5130b57cec5SDimitry Andric // Handle uses. 5148bcb0991SDimitry Andric 5158bcb0991SDimitry Andric SmallSet<int, 4> UsesSeen; 5160b57cec5SDimitry Andric for (const MachineOperand &MO : MI.uses()) { 5170b57cec5SDimitry Andric if (!MO.isReg() || !MO.readsReg()) 5180b57cec5SDimitry Andric continue; 5190b57cec5SDimitry Andric int Idx = mapRegToGPRIndex(MO.getReg()); 5200b57cec5SDimitry Andric if (Idx < 0) 5210b57cec5SDimitry Andric continue; 5228bcb0991SDimitry Andric 5238bcb0991SDimitry Andric // Multiple uses of the same register within a single instruction don't 5248bcb0991SDimitry Andric // count as MultiUser or block optimization. This is especially important on 5258bcb0991SDimitry Andric // arm64_32, where any memory operation is likely to be an explicit use of 5268bcb0991SDimitry Andric // xN and an implicit use of wN (the base address register). 52781ad6265SDimitry Andric if (UsesSeen.insert(Idx).second) 5280b57cec5SDimitry Andric handleUse(MI, MO, LOHInfos[Idx]); 5290b57cec5SDimitry Andric } 5300b57cec5SDimitry Andric } 5310b57cec5SDimitry Andric 5320b57cec5SDimitry Andric bool AArch64CollectLOH::runOnMachineFunction(MachineFunction &MF) { 5330b57cec5SDimitry Andric if (skipFunction(MF.getFunction())) 5340b57cec5SDimitry Andric return false; 5350b57cec5SDimitry Andric 5360b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "********** AArch64 Collect LOH **********\n" 5370b57cec5SDimitry Andric << "Looking in function " << MF.getName() << '\n'); 5380b57cec5SDimitry Andric 5390b57cec5SDimitry Andric LOHInfo LOHInfos[N_GPR_REGS]; 5400b57cec5SDimitry Andric AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>(); 5410b57cec5SDimitry Andric for (const MachineBasicBlock &MBB : MF) { 5420b57cec5SDimitry Andric // Reset register tracking state. 5430b57cec5SDimitry Andric memset(LOHInfos, 0, sizeof(LOHInfos)); 5440b57cec5SDimitry Andric // Live-out registers are used. 5450b57cec5SDimitry Andric for (const MachineBasicBlock *Succ : MBB.successors()) { 5460b57cec5SDimitry Andric for (const auto &LI : Succ->liveins()) { 5470b57cec5SDimitry Andric int RegIdx = mapRegToGPRIndex(LI.PhysReg); 5480b57cec5SDimitry Andric if (RegIdx >= 0) 5490b57cec5SDimitry Andric LOHInfos[RegIdx].OneUser = true; 5500b57cec5SDimitry Andric } 5510b57cec5SDimitry Andric } 5520b57cec5SDimitry Andric 5530b57cec5SDimitry Andric // Walk the basic block backwards and update the per register state machine 5540b57cec5SDimitry Andric // in the process. 5555ffd83dbSDimitry Andric for (const MachineInstr &MI : 5563a9a9c0cSDimitry Andric instructionsWithoutDebug(MBB.instr_rbegin(), MBB.instr_rend())) { 5570b57cec5SDimitry Andric unsigned Opcode = MI.getOpcode(); 5580b57cec5SDimitry Andric switch (Opcode) { 5590b57cec5SDimitry Andric case AArch64::ADDXri: 5600b57cec5SDimitry Andric case AArch64::LDRXui: 5618bcb0991SDimitry Andric case AArch64::LDRWui: 5620b57cec5SDimitry Andric if (canDefBePartOfLOH(MI)) { 5630b57cec5SDimitry Andric const MachineOperand &Def = MI.getOperand(0); 5640b57cec5SDimitry Andric const MachineOperand &Op = MI.getOperand(1); 5650b57cec5SDimitry Andric assert(Def.isReg() && Def.isDef() && "Expected reg def"); 5660b57cec5SDimitry Andric assert(Op.isReg() && Op.isUse() && "Expected reg use"); 5670b57cec5SDimitry Andric int DefIdx = mapRegToGPRIndex(Def.getReg()); 5680b57cec5SDimitry Andric int OpIdx = mapRegToGPRIndex(Op.getReg()); 5690b57cec5SDimitry Andric if (DefIdx >= 0 && OpIdx >= 0 && 5700b57cec5SDimitry Andric handleMiddleInst(MI, LOHInfos[DefIdx], LOHInfos[OpIdx])) 5710b57cec5SDimitry Andric continue; 5720b57cec5SDimitry Andric } 5730b57cec5SDimitry Andric break; 5740b57cec5SDimitry Andric case AArch64::ADRP: 5750b57cec5SDimitry Andric const MachineOperand &Op0 = MI.getOperand(0); 5760b57cec5SDimitry Andric int Idx = mapRegToGPRIndex(Op0.getReg()); 5770b57cec5SDimitry Andric if (Idx >= 0) { 5785ffd83dbSDimitry Andric handleADRP(MI, AFI, LOHInfos[Idx], LOHInfos); 5790b57cec5SDimitry Andric continue; 5800b57cec5SDimitry Andric } 5810b57cec5SDimitry Andric break; 5820b57cec5SDimitry Andric } 5830b57cec5SDimitry Andric handleNormalInst(MI, LOHInfos); 5840b57cec5SDimitry Andric } 5850b57cec5SDimitry Andric } 5860b57cec5SDimitry Andric 5870b57cec5SDimitry Andric // Return "no change": The pass only collects information. 5880b57cec5SDimitry Andric return false; 5890b57cec5SDimitry Andric } 5900b57cec5SDimitry Andric 5910b57cec5SDimitry Andric FunctionPass *llvm::createAArch64CollectLOHPass() { 5920b57cec5SDimitry Andric return new AArch64CollectLOH(); 5930b57cec5SDimitry Andric } 594