10b57cec5SDimitry Andric //===-- AArch64CondBrTuning.cpp --- Conditional branch tuning for AArch64 -===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric /// \file
90b57cec5SDimitry Andric /// This file contains a pass that transforms CBZ/CBNZ/TBZ/TBNZ instructions
100b57cec5SDimitry Andric /// into a conditional branch (B.cond), when the NZCV flags can be set for
110b57cec5SDimitry Andric /// "free". This is preferred on targets that have more flexibility when
120b57cec5SDimitry Andric /// scheduling B.cond instructions as compared to CBZ/CBNZ/TBZ/TBNZ (assuming
130b57cec5SDimitry Andric /// all other variables are equal). This can also reduce register pressure.
140b57cec5SDimitry Andric ///
150b57cec5SDimitry Andric /// A few examples:
160b57cec5SDimitry Andric ///
170b57cec5SDimitry Andric /// 1) add w8, w0, w1 -> cmn w0, w1 ; CMN is an alias of ADDS.
180b57cec5SDimitry Andric /// cbz w8, .LBB_2 -> b.eq .LBB0_2
190b57cec5SDimitry Andric ///
200b57cec5SDimitry Andric /// 2) add w8, w0, w1 -> adds w8, w0, w1 ; w8 has multiple uses.
210b57cec5SDimitry Andric /// cbz w8, .LBB1_2 -> b.eq .LBB1_2
220b57cec5SDimitry Andric ///
230b57cec5SDimitry Andric /// 3) sub w8, w0, w1 -> subs w8, w0, w1 ; w8 has multiple uses.
240b57cec5SDimitry Andric /// tbz w8, #31, .LBB6_2 -> b.pl .LBB6_2
250b57cec5SDimitry Andric ///
260b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
270b57cec5SDimitry Andric
280b57cec5SDimitry Andric #include "AArch64.h"
290b57cec5SDimitry Andric #include "AArch64Subtarget.h"
300b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunction.h"
310b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
320b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstrBuilder.h"
330b57cec5SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
340b57cec5SDimitry Andric #include "llvm/CodeGen/Passes.h"
350b57cec5SDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h"
360b57cec5SDimitry Andric #include "llvm/CodeGen/TargetRegisterInfo.h"
370b57cec5SDimitry Andric #include "llvm/CodeGen/TargetSubtargetInfo.h"
380b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
390b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
400b57cec5SDimitry Andric
410b57cec5SDimitry Andric using namespace llvm;
420b57cec5SDimitry Andric
430b57cec5SDimitry Andric #define DEBUG_TYPE "aarch64-cond-br-tuning"
440b57cec5SDimitry Andric #define AARCH64_CONDBR_TUNING_NAME "AArch64 Conditional Branch Tuning"
450b57cec5SDimitry Andric
460b57cec5SDimitry Andric namespace {
470b57cec5SDimitry Andric class AArch64CondBrTuning : public MachineFunctionPass {
480b57cec5SDimitry Andric const AArch64InstrInfo *TII;
490b57cec5SDimitry Andric const TargetRegisterInfo *TRI;
500b57cec5SDimitry Andric
510b57cec5SDimitry Andric MachineRegisterInfo *MRI;
520b57cec5SDimitry Andric
530b57cec5SDimitry Andric public:
540b57cec5SDimitry Andric static char ID;
AArch64CondBrTuning()550b57cec5SDimitry Andric AArch64CondBrTuning() : MachineFunctionPass(ID) {
560b57cec5SDimitry Andric initializeAArch64CondBrTuningPass(*PassRegistry::getPassRegistry());
570b57cec5SDimitry Andric }
580b57cec5SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override;
590b57cec5SDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override;
getPassName() const600b57cec5SDimitry Andric StringRef getPassName() const override { return AARCH64_CONDBR_TUNING_NAME; }
610b57cec5SDimitry Andric
620b57cec5SDimitry Andric private:
630b57cec5SDimitry Andric MachineInstr *getOperandDef(const MachineOperand &MO);
64*bdd1243dSDimitry Andric MachineInstr *convertToFlagSetting(MachineInstr &MI, bool IsFlagSetting,
65*bdd1243dSDimitry Andric bool Is64Bit);
660b57cec5SDimitry Andric MachineInstr *convertToCondBr(MachineInstr &MI);
670b57cec5SDimitry Andric bool tryToTuneBranch(MachineInstr &MI, MachineInstr &DefMI);
680b57cec5SDimitry Andric };
690b57cec5SDimitry Andric } // end anonymous namespace
700b57cec5SDimitry Andric
710b57cec5SDimitry Andric char AArch64CondBrTuning::ID = 0;
720b57cec5SDimitry Andric
730b57cec5SDimitry Andric INITIALIZE_PASS(AArch64CondBrTuning, "aarch64-cond-br-tuning",
740b57cec5SDimitry Andric AARCH64_CONDBR_TUNING_NAME, false, false)
750b57cec5SDimitry Andric
getAnalysisUsage(AnalysisUsage & AU) const760b57cec5SDimitry Andric void AArch64CondBrTuning::getAnalysisUsage(AnalysisUsage &AU) const {
770b57cec5SDimitry Andric AU.setPreservesCFG();
780b57cec5SDimitry Andric MachineFunctionPass::getAnalysisUsage(AU);
790b57cec5SDimitry Andric }
800b57cec5SDimitry Andric
getOperandDef(const MachineOperand & MO)810b57cec5SDimitry Andric MachineInstr *AArch64CondBrTuning::getOperandDef(const MachineOperand &MO) {
82*bdd1243dSDimitry Andric if (!MO.getReg().isVirtual())
830b57cec5SDimitry Andric return nullptr;
840b57cec5SDimitry Andric return MRI->getUniqueVRegDef(MO.getReg());
850b57cec5SDimitry Andric }
860b57cec5SDimitry Andric
convertToFlagSetting(MachineInstr & MI,bool IsFlagSetting,bool Is64Bit)870b57cec5SDimitry Andric MachineInstr *AArch64CondBrTuning::convertToFlagSetting(MachineInstr &MI,
88*bdd1243dSDimitry Andric bool IsFlagSetting,
89*bdd1243dSDimitry Andric bool Is64Bit) {
900b57cec5SDimitry Andric // If this is already the flag setting version of the instruction (e.g., SUBS)
910b57cec5SDimitry Andric // just make sure the implicit-def of NZCV isn't marked dead.
920b57cec5SDimitry Andric if (IsFlagSetting) {
934824e7fdSDimitry Andric for (MachineOperand &MO : MI.implicit_operands())
940b57cec5SDimitry Andric if (MO.isReg() && MO.isDead() && MO.getReg() == AArch64::NZCV)
950b57cec5SDimitry Andric MO.setIsDead(false);
960b57cec5SDimitry Andric return &MI;
970b57cec5SDimitry Andric }
98*bdd1243dSDimitry Andric unsigned NewOpc = TII->convertToFlagSettingOpc(MI.getOpcode());
998bcb0991SDimitry Andric Register NewDestReg = MI.getOperand(0).getReg();
1000b57cec5SDimitry Andric if (MRI->hasOneNonDBGUse(MI.getOperand(0).getReg()))
1010b57cec5SDimitry Andric NewDestReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1020b57cec5SDimitry Andric
1030b57cec5SDimitry Andric MachineInstrBuilder MIB = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
1040b57cec5SDimitry Andric TII->get(NewOpc), NewDestReg);
1054824e7fdSDimitry Andric for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
1064824e7fdSDimitry Andric MIB.add(MO);
1070b57cec5SDimitry Andric
1080b57cec5SDimitry Andric return MIB;
1090b57cec5SDimitry Andric }
1100b57cec5SDimitry Andric
convertToCondBr(MachineInstr & MI)1110b57cec5SDimitry Andric MachineInstr *AArch64CondBrTuning::convertToCondBr(MachineInstr &MI) {
1120b57cec5SDimitry Andric AArch64CC::CondCode CC;
1130b57cec5SDimitry Andric MachineBasicBlock *TargetMBB = TII->getBranchDestBlock(MI);
1140b57cec5SDimitry Andric switch (MI.getOpcode()) {
1150b57cec5SDimitry Andric default:
1160b57cec5SDimitry Andric llvm_unreachable("Unexpected opcode!");
1170b57cec5SDimitry Andric
1180b57cec5SDimitry Andric case AArch64::CBZW:
1190b57cec5SDimitry Andric case AArch64::CBZX:
1200b57cec5SDimitry Andric CC = AArch64CC::EQ;
1210b57cec5SDimitry Andric break;
1220b57cec5SDimitry Andric case AArch64::CBNZW:
1230b57cec5SDimitry Andric case AArch64::CBNZX:
1240b57cec5SDimitry Andric CC = AArch64CC::NE;
1250b57cec5SDimitry Andric break;
1260b57cec5SDimitry Andric case AArch64::TBZW:
1270b57cec5SDimitry Andric case AArch64::TBZX:
1280b57cec5SDimitry Andric CC = AArch64CC::PL;
1290b57cec5SDimitry Andric break;
1300b57cec5SDimitry Andric case AArch64::TBNZW:
1310b57cec5SDimitry Andric case AArch64::TBNZX:
1320b57cec5SDimitry Andric CC = AArch64CC::MI;
1330b57cec5SDimitry Andric break;
1340b57cec5SDimitry Andric }
1350b57cec5SDimitry Andric return BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(AArch64::Bcc))
1360b57cec5SDimitry Andric .addImm(CC)
1370b57cec5SDimitry Andric .addMBB(TargetMBB);
1380b57cec5SDimitry Andric }
1390b57cec5SDimitry Andric
tryToTuneBranch(MachineInstr & MI,MachineInstr & DefMI)1400b57cec5SDimitry Andric bool AArch64CondBrTuning::tryToTuneBranch(MachineInstr &MI,
1410b57cec5SDimitry Andric MachineInstr &DefMI) {
1420b57cec5SDimitry Andric // We don't want NZCV bits live across blocks.
1430b57cec5SDimitry Andric if (MI.getParent() != DefMI.getParent())
1440b57cec5SDimitry Andric return false;
1450b57cec5SDimitry Andric
1460b57cec5SDimitry Andric bool IsFlagSetting = true;
1470b57cec5SDimitry Andric unsigned MIOpc = MI.getOpcode();
1480b57cec5SDimitry Andric MachineInstr *NewCmp = nullptr, *NewBr = nullptr;
1490b57cec5SDimitry Andric switch (DefMI.getOpcode()) {
1500b57cec5SDimitry Andric default:
1510b57cec5SDimitry Andric return false;
1520b57cec5SDimitry Andric case AArch64::ADDWri:
1530b57cec5SDimitry Andric case AArch64::ADDWrr:
1540b57cec5SDimitry Andric case AArch64::ADDWrs:
1550b57cec5SDimitry Andric case AArch64::ADDWrx:
1560b57cec5SDimitry Andric case AArch64::ANDWri:
1570b57cec5SDimitry Andric case AArch64::ANDWrr:
1580b57cec5SDimitry Andric case AArch64::ANDWrs:
1590b57cec5SDimitry Andric case AArch64::BICWrr:
1600b57cec5SDimitry Andric case AArch64::BICWrs:
1610b57cec5SDimitry Andric case AArch64::SUBWri:
1620b57cec5SDimitry Andric case AArch64::SUBWrr:
1630b57cec5SDimitry Andric case AArch64::SUBWrs:
1640b57cec5SDimitry Andric case AArch64::SUBWrx:
1650b57cec5SDimitry Andric IsFlagSetting = false;
166*bdd1243dSDimitry Andric [[fallthrough]];
1670b57cec5SDimitry Andric case AArch64::ADDSWri:
1680b57cec5SDimitry Andric case AArch64::ADDSWrr:
1690b57cec5SDimitry Andric case AArch64::ADDSWrs:
1700b57cec5SDimitry Andric case AArch64::ADDSWrx:
1710b57cec5SDimitry Andric case AArch64::ANDSWri:
1720b57cec5SDimitry Andric case AArch64::ANDSWrr:
1730b57cec5SDimitry Andric case AArch64::ANDSWrs:
1740b57cec5SDimitry Andric case AArch64::BICSWrr:
1750b57cec5SDimitry Andric case AArch64::BICSWrs:
1760b57cec5SDimitry Andric case AArch64::SUBSWri:
1770b57cec5SDimitry Andric case AArch64::SUBSWrr:
1780b57cec5SDimitry Andric case AArch64::SUBSWrs:
1790b57cec5SDimitry Andric case AArch64::SUBSWrx:
1800b57cec5SDimitry Andric switch (MIOpc) {
1810b57cec5SDimitry Andric default:
1820b57cec5SDimitry Andric llvm_unreachable("Unexpected opcode!");
1830b57cec5SDimitry Andric
1840b57cec5SDimitry Andric case AArch64::CBZW:
1850b57cec5SDimitry Andric case AArch64::CBNZW:
1860b57cec5SDimitry Andric case AArch64::TBZW:
1870b57cec5SDimitry Andric case AArch64::TBNZW:
1880b57cec5SDimitry Andric // Check to see if the TBZ/TBNZ is checking the sign bit.
1890b57cec5SDimitry Andric if ((MIOpc == AArch64::TBZW || MIOpc == AArch64::TBNZW) &&
1900b57cec5SDimitry Andric MI.getOperand(1).getImm() != 31)
1910b57cec5SDimitry Andric return false;
1920b57cec5SDimitry Andric
1930b57cec5SDimitry Andric // There must not be any instruction between DefMI and MI that clobbers or
1940b57cec5SDimitry Andric // reads NZCV.
1955ffd83dbSDimitry Andric if (isNZCVTouchedInInstructionRange(DefMI, MI, TRI))
1960b57cec5SDimitry Andric return false;
1970b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Replacing instructions:\n ");
1980b57cec5SDimitry Andric LLVM_DEBUG(DefMI.print(dbgs()));
1990b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " ");
2000b57cec5SDimitry Andric LLVM_DEBUG(MI.print(dbgs()));
2010b57cec5SDimitry Andric
202*bdd1243dSDimitry Andric NewCmp = convertToFlagSetting(DefMI, IsFlagSetting, /*Is64Bit=*/false);
2030b57cec5SDimitry Andric NewBr = convertToCondBr(MI);
2040b57cec5SDimitry Andric break;
2050b57cec5SDimitry Andric }
2060b57cec5SDimitry Andric break;
2070b57cec5SDimitry Andric
2080b57cec5SDimitry Andric case AArch64::ADDXri:
2090b57cec5SDimitry Andric case AArch64::ADDXrr:
2100b57cec5SDimitry Andric case AArch64::ADDXrs:
2110b57cec5SDimitry Andric case AArch64::ADDXrx:
2120b57cec5SDimitry Andric case AArch64::ANDXri:
2130b57cec5SDimitry Andric case AArch64::ANDXrr:
2140b57cec5SDimitry Andric case AArch64::ANDXrs:
2150b57cec5SDimitry Andric case AArch64::BICXrr:
2160b57cec5SDimitry Andric case AArch64::BICXrs:
2170b57cec5SDimitry Andric case AArch64::SUBXri:
2180b57cec5SDimitry Andric case AArch64::SUBXrr:
2190b57cec5SDimitry Andric case AArch64::SUBXrs:
2200b57cec5SDimitry Andric case AArch64::SUBXrx:
2210b57cec5SDimitry Andric IsFlagSetting = false;
222*bdd1243dSDimitry Andric [[fallthrough]];
2230b57cec5SDimitry Andric case AArch64::ADDSXri:
2240b57cec5SDimitry Andric case AArch64::ADDSXrr:
2250b57cec5SDimitry Andric case AArch64::ADDSXrs:
2260b57cec5SDimitry Andric case AArch64::ADDSXrx:
2270b57cec5SDimitry Andric case AArch64::ANDSXri:
2280b57cec5SDimitry Andric case AArch64::ANDSXrr:
2290b57cec5SDimitry Andric case AArch64::ANDSXrs:
2300b57cec5SDimitry Andric case AArch64::BICSXrr:
2310b57cec5SDimitry Andric case AArch64::BICSXrs:
2320b57cec5SDimitry Andric case AArch64::SUBSXri:
2330b57cec5SDimitry Andric case AArch64::SUBSXrr:
2340b57cec5SDimitry Andric case AArch64::SUBSXrs:
2350b57cec5SDimitry Andric case AArch64::SUBSXrx:
2360b57cec5SDimitry Andric switch (MIOpc) {
2370b57cec5SDimitry Andric default:
2380b57cec5SDimitry Andric llvm_unreachable("Unexpected opcode!");
2390b57cec5SDimitry Andric
2400b57cec5SDimitry Andric case AArch64::CBZX:
2410b57cec5SDimitry Andric case AArch64::CBNZX:
2420b57cec5SDimitry Andric case AArch64::TBZX:
2430b57cec5SDimitry Andric case AArch64::TBNZX: {
2440b57cec5SDimitry Andric // Check to see if the TBZ/TBNZ is checking the sign bit.
2450b57cec5SDimitry Andric if ((MIOpc == AArch64::TBZX || MIOpc == AArch64::TBNZX) &&
2460b57cec5SDimitry Andric MI.getOperand(1).getImm() != 63)
2470b57cec5SDimitry Andric return false;
2480b57cec5SDimitry Andric // There must not be any instruction between DefMI and MI that clobbers or
2490b57cec5SDimitry Andric // reads NZCV.
2505ffd83dbSDimitry Andric if (isNZCVTouchedInInstructionRange(DefMI, MI, TRI))
2510b57cec5SDimitry Andric return false;
2520b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Replacing instructions:\n ");
2530b57cec5SDimitry Andric LLVM_DEBUG(DefMI.print(dbgs()));
2540b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " ");
2550b57cec5SDimitry Andric LLVM_DEBUG(MI.print(dbgs()));
2560b57cec5SDimitry Andric
257*bdd1243dSDimitry Andric NewCmp = convertToFlagSetting(DefMI, IsFlagSetting, /*Is64Bit=*/true);
2580b57cec5SDimitry Andric NewBr = convertToCondBr(MI);
2590b57cec5SDimitry Andric break;
2600b57cec5SDimitry Andric }
2610b57cec5SDimitry Andric }
2620b57cec5SDimitry Andric break;
2630b57cec5SDimitry Andric }
2640b57cec5SDimitry Andric (void)NewCmp; (void)NewBr;
2650b57cec5SDimitry Andric assert(NewCmp && NewBr && "Expected new instructions.");
2660b57cec5SDimitry Andric
2670b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " with instruction:\n ");
2680b57cec5SDimitry Andric LLVM_DEBUG(NewCmp->print(dbgs()));
2690b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " ");
2700b57cec5SDimitry Andric LLVM_DEBUG(NewBr->print(dbgs()));
2710b57cec5SDimitry Andric
2720b57cec5SDimitry Andric // If this was a flag setting version of the instruction, we use the original
2730b57cec5SDimitry Andric // instruction by just clearing the dead marked on the implicit-def of NCZV.
2740b57cec5SDimitry Andric // Therefore, we should not erase this instruction.
2750b57cec5SDimitry Andric if (!IsFlagSetting)
2760b57cec5SDimitry Andric DefMI.eraseFromParent();
2770b57cec5SDimitry Andric MI.eraseFromParent();
2780b57cec5SDimitry Andric return true;
2790b57cec5SDimitry Andric }
2800b57cec5SDimitry Andric
runOnMachineFunction(MachineFunction & MF)2810b57cec5SDimitry Andric bool AArch64CondBrTuning::runOnMachineFunction(MachineFunction &MF) {
2820b57cec5SDimitry Andric if (skipFunction(MF.getFunction()))
2830b57cec5SDimitry Andric return false;
2840b57cec5SDimitry Andric
2850b57cec5SDimitry Andric LLVM_DEBUG(
2860b57cec5SDimitry Andric dbgs() << "********** AArch64 Conditional Branch Tuning **********\n"
2870b57cec5SDimitry Andric << "********** Function: " << MF.getName() << '\n');
2880b57cec5SDimitry Andric
2890b57cec5SDimitry Andric TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
2900b57cec5SDimitry Andric TRI = MF.getSubtarget().getRegisterInfo();
2910b57cec5SDimitry Andric MRI = &MF.getRegInfo();
2920b57cec5SDimitry Andric
2930b57cec5SDimitry Andric bool Changed = false;
2940b57cec5SDimitry Andric for (MachineBasicBlock &MBB : MF) {
2950b57cec5SDimitry Andric bool LocalChange = false;
296349cc55cSDimitry Andric for (MachineInstr &MI : MBB.terminators()) {
2970b57cec5SDimitry Andric switch (MI.getOpcode()) {
2980b57cec5SDimitry Andric default:
2990b57cec5SDimitry Andric break;
3000b57cec5SDimitry Andric case AArch64::CBZW:
3010b57cec5SDimitry Andric case AArch64::CBZX:
3020b57cec5SDimitry Andric case AArch64::CBNZW:
3030b57cec5SDimitry Andric case AArch64::CBNZX:
3040b57cec5SDimitry Andric case AArch64::TBZW:
3050b57cec5SDimitry Andric case AArch64::TBZX:
3060b57cec5SDimitry Andric case AArch64::TBNZW:
3070b57cec5SDimitry Andric case AArch64::TBNZX:
3080b57cec5SDimitry Andric MachineInstr *DefMI = getOperandDef(MI.getOperand(0));
3090b57cec5SDimitry Andric LocalChange = (DefMI && tryToTuneBranch(MI, *DefMI));
3100b57cec5SDimitry Andric break;
3110b57cec5SDimitry Andric }
3120b57cec5SDimitry Andric // If the optimization was successful, we can't optimize any other
3130b57cec5SDimitry Andric // branches because doing so would clobber the NZCV flags.
3140b57cec5SDimitry Andric if (LocalChange) {
3150b57cec5SDimitry Andric Changed = true;
3160b57cec5SDimitry Andric break;
3170b57cec5SDimitry Andric }
3180b57cec5SDimitry Andric }
3190b57cec5SDimitry Andric }
3200b57cec5SDimitry Andric return Changed;
3210b57cec5SDimitry Andric }
3220b57cec5SDimitry Andric
createAArch64CondBrTuning()3230b57cec5SDimitry Andric FunctionPass *llvm::createAArch64CondBrTuning() {
3240b57cec5SDimitry Andric return new AArch64CondBrTuning();
3250b57cec5SDimitry Andric }
326