1 //===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the AArch64 implementation of the TargetInstrInfo class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AArch64InstrInfo.h" 14 #include "AArch64ExpandImm.h" 15 #include "AArch64MachineFunctionInfo.h" 16 #include "AArch64PointerAuth.h" 17 #include "AArch64Subtarget.h" 18 #include "MCTargetDesc/AArch64AddressingModes.h" 19 #include "MCTargetDesc/AArch64MCTargetDesc.h" 20 #include "Utils/AArch64BaseInfo.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/SmallVector.h" 24 #include "llvm/CodeGen/CFIInstBuilder.h" 25 #include "llvm/CodeGen/LivePhysRegs.h" 26 #include "llvm/CodeGen/MachineBasicBlock.h" 27 #include "llvm/CodeGen/MachineCombinerPattern.h" 28 #include "llvm/CodeGen/MachineFrameInfo.h" 29 #include "llvm/CodeGen/MachineFunction.h" 30 #include "llvm/CodeGen/MachineInstr.h" 31 #include "llvm/CodeGen/MachineInstrBuilder.h" 32 #include "llvm/CodeGen/MachineMemOperand.h" 33 #include "llvm/CodeGen/MachineModuleInfo.h" 34 #include "llvm/CodeGen/MachineOperand.h" 35 #include "llvm/CodeGen/MachineRegisterInfo.h" 36 #include "llvm/CodeGen/RegisterScavenging.h" 37 #include "llvm/CodeGen/StackMaps.h" 38 #include "llvm/CodeGen/TargetRegisterInfo.h" 39 #include "llvm/CodeGen/TargetSubtargetInfo.h" 40 #include "llvm/IR/DebugInfoMetadata.h" 41 #include "llvm/IR/DebugLoc.h" 42 #include "llvm/IR/GlobalValue.h" 43 #include "llvm/IR/Module.h" 44 #include "llvm/MC/MCAsmInfo.h" 45 #include "llvm/MC/MCInst.h" 46 #include "llvm/MC/MCInstBuilder.h" 47 #include "llvm/MC/MCInstrDesc.h" 48 #include "llvm/Support/Casting.h" 49 #include "llvm/Support/CodeGen.h" 50 #include "llvm/Support/CommandLine.h" 51 #include "llvm/Support/ErrorHandling.h" 52 #include "llvm/Support/LEB128.h" 53 #include "llvm/Support/MathExtras.h" 54 #include "llvm/Target/TargetMachine.h" 55 #include "llvm/Target/TargetOptions.h" 56 #include <cassert> 57 #include <cstdint> 58 #include <iterator> 59 #include <utility> 60 61 using namespace llvm; 62 63 #define GET_INSTRINFO_CTOR_DTOR 64 #include "AArch64GenInstrInfo.inc" 65 66 static cl::opt<unsigned> 67 CBDisplacementBits("aarch64-cb-offset-bits", cl::Hidden, cl::init(9), 68 cl::desc("Restrict range of CB instructions (DEBUG)")); 69 70 static cl::opt<unsigned> TBZDisplacementBits( 71 "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14), 72 cl::desc("Restrict range of TB[N]Z instructions (DEBUG)")); 73 74 static cl::opt<unsigned> CBZDisplacementBits( 75 "aarch64-cbz-offset-bits", cl::Hidden, cl::init(19), 76 cl::desc("Restrict range of CB[N]Z instructions (DEBUG)")); 77 78 static cl::opt<unsigned> 79 BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19), 80 cl::desc("Restrict range of Bcc instructions (DEBUG)")); 81 82 static cl::opt<unsigned> 83 BDisplacementBits("aarch64-b-offset-bits", cl::Hidden, cl::init(26), 84 cl::desc("Restrict range of B instructions (DEBUG)")); 85 86 AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI) 87 : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP, 88 AArch64::CATCHRET), 89 RI(STI.getTargetTriple(), STI.getHwMode()), Subtarget(STI) {} 90 91 /// GetInstSize - Return the number of bytes of code the specified 92 /// instruction may be. This returns the maximum number of bytes. 93 unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { 94 const MachineBasicBlock &MBB = *MI.getParent(); 95 const MachineFunction *MF = MBB.getParent(); 96 const Function &F = MF->getFunction(); 97 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo(); 98 99 { 100 auto Op = MI.getOpcode(); 101 if (Op == AArch64::INLINEASM || Op == AArch64::INLINEASM_BR) 102 return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI); 103 } 104 105 // Meta-instructions emit no code. 106 if (MI.isMetaInstruction()) 107 return 0; 108 109 // FIXME: We currently only handle pseudoinstructions that don't get expanded 110 // before the assembly printer. 111 unsigned NumBytes = 0; 112 const MCInstrDesc &Desc = MI.getDesc(); 113 114 if (!MI.isBundle() && isTailCallReturnInst(MI)) { 115 NumBytes = Desc.getSize() ? Desc.getSize() : 4; 116 117 const auto *MFI = MF->getInfo<AArch64FunctionInfo>(); 118 if (!MFI->shouldSignReturnAddress(MF)) 119 return NumBytes; 120 121 const auto &STI = MF->getSubtarget<AArch64Subtarget>(); 122 auto Method = STI.getAuthenticatedLRCheckMethod(*MF); 123 NumBytes += AArch64PAuth::getCheckerSizeInBytes(Method); 124 return NumBytes; 125 } 126 127 // Size should be preferably set in 128 // llvm/lib/Target/AArch64/AArch64InstrInfo.td (default case). 129 // Specific cases handle instructions of variable sizes 130 switch (Desc.getOpcode()) { 131 default: 132 if (Desc.getSize()) 133 return Desc.getSize(); 134 135 // Anything not explicitly designated otherwise (i.e. pseudo-instructions 136 // with fixed constant size but not specified in .td file) is a normal 137 // 4-byte insn. 138 NumBytes = 4; 139 break; 140 case TargetOpcode::STACKMAP: 141 // The upper bound for a stackmap intrinsic is the full length of its shadow 142 NumBytes = StackMapOpers(&MI).getNumPatchBytes(); 143 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!"); 144 break; 145 case TargetOpcode::PATCHPOINT: 146 // The size of the patchpoint intrinsic is the number of bytes requested 147 NumBytes = PatchPointOpers(&MI).getNumPatchBytes(); 148 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!"); 149 break; 150 case TargetOpcode::STATEPOINT: 151 NumBytes = StatepointOpers(&MI).getNumPatchBytes(); 152 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!"); 153 // No patch bytes means a normal call inst is emitted 154 if (NumBytes == 0) 155 NumBytes = 4; 156 break; 157 case TargetOpcode::PATCHABLE_FUNCTION_ENTER: 158 // If `patchable-function-entry` is set, PATCHABLE_FUNCTION_ENTER 159 // instructions are expanded to the specified number of NOPs. Otherwise, 160 // they are expanded to 36-byte XRay sleds. 161 NumBytes = 162 F.getFnAttributeAsParsedInteger("patchable-function-entry", 9) * 4; 163 break; 164 case TargetOpcode::PATCHABLE_FUNCTION_EXIT: 165 case TargetOpcode::PATCHABLE_TAIL_CALL: 166 case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL: 167 // An XRay sled can be 4 bytes of alignment plus a 32-byte block. 168 NumBytes = 36; 169 break; 170 case TargetOpcode::PATCHABLE_EVENT_CALL: 171 // EVENT_CALL XRay sleds are exactly 6 instructions long (no alignment). 172 NumBytes = 24; 173 break; 174 175 case AArch64::SPACE: 176 NumBytes = MI.getOperand(1).getImm(); 177 break; 178 case TargetOpcode::BUNDLE: 179 NumBytes = getInstBundleLength(MI); 180 break; 181 } 182 183 return NumBytes; 184 } 185 186 unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr &MI) const { 187 unsigned Size = 0; 188 MachineBasicBlock::const_instr_iterator I = MI.getIterator(); 189 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end(); 190 while (++I != E && I->isInsideBundle()) { 191 assert(!I->isBundle() && "No nested bundle!"); 192 Size += getInstSizeInBytes(*I); 193 } 194 return Size; 195 } 196 197 static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target, 198 SmallVectorImpl<MachineOperand> &Cond) { 199 // Block ends with fall-through condbranch. 200 switch (LastInst->getOpcode()) { 201 default: 202 llvm_unreachable("Unknown branch instruction?"); 203 case AArch64::Bcc: 204 Target = LastInst->getOperand(1).getMBB(); 205 Cond.push_back(LastInst->getOperand(0)); 206 break; 207 case AArch64::CBZW: 208 case AArch64::CBZX: 209 case AArch64::CBNZW: 210 case AArch64::CBNZX: 211 Target = LastInst->getOperand(1).getMBB(); 212 Cond.push_back(MachineOperand::CreateImm(-1)); 213 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode())); 214 Cond.push_back(LastInst->getOperand(0)); 215 break; 216 case AArch64::TBZW: 217 case AArch64::TBZX: 218 case AArch64::TBNZW: 219 case AArch64::TBNZX: 220 Target = LastInst->getOperand(2).getMBB(); 221 Cond.push_back(MachineOperand::CreateImm(-1)); 222 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode())); 223 Cond.push_back(LastInst->getOperand(0)); 224 Cond.push_back(LastInst->getOperand(1)); 225 break; 226 case AArch64::CBWPri: 227 case AArch64::CBXPri: 228 case AArch64::CBWPrr: 229 case AArch64::CBXPrr: 230 Target = LastInst->getOperand(3).getMBB(); 231 Cond.push_back(MachineOperand::CreateImm(-1)); 232 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode())); 233 Cond.push_back(LastInst->getOperand(0)); 234 Cond.push_back(LastInst->getOperand(1)); 235 Cond.push_back(LastInst->getOperand(2)); 236 break; 237 } 238 } 239 240 static unsigned getBranchDisplacementBits(unsigned Opc) { 241 switch (Opc) { 242 default: 243 llvm_unreachable("unexpected opcode!"); 244 case AArch64::B: 245 return BDisplacementBits; 246 case AArch64::TBNZW: 247 case AArch64::TBZW: 248 case AArch64::TBNZX: 249 case AArch64::TBZX: 250 return TBZDisplacementBits; 251 case AArch64::CBNZW: 252 case AArch64::CBZW: 253 case AArch64::CBNZX: 254 case AArch64::CBZX: 255 return CBZDisplacementBits; 256 case AArch64::Bcc: 257 return BCCDisplacementBits; 258 case AArch64::CBWPri: 259 case AArch64::CBXPri: 260 case AArch64::CBWPrr: 261 case AArch64::CBXPrr: 262 return CBDisplacementBits; 263 } 264 } 265 266 bool AArch64InstrInfo::isBranchOffsetInRange(unsigned BranchOp, 267 int64_t BrOffset) const { 268 unsigned Bits = getBranchDisplacementBits(BranchOp); 269 assert(Bits >= 3 && "max branch displacement must be enough to jump" 270 "over conditional branch expansion"); 271 return isIntN(Bits, BrOffset / 4); 272 } 273 274 MachineBasicBlock * 275 AArch64InstrInfo::getBranchDestBlock(const MachineInstr &MI) const { 276 switch (MI.getOpcode()) { 277 default: 278 llvm_unreachable("unexpected opcode!"); 279 case AArch64::B: 280 return MI.getOperand(0).getMBB(); 281 case AArch64::TBZW: 282 case AArch64::TBNZW: 283 case AArch64::TBZX: 284 case AArch64::TBNZX: 285 return MI.getOperand(2).getMBB(); 286 case AArch64::CBZW: 287 case AArch64::CBNZW: 288 case AArch64::CBZX: 289 case AArch64::CBNZX: 290 case AArch64::Bcc: 291 return MI.getOperand(1).getMBB(); 292 case AArch64::CBWPri: 293 case AArch64::CBXPri: 294 case AArch64::CBWPrr: 295 case AArch64::CBXPrr: 296 return MI.getOperand(3).getMBB(); 297 } 298 } 299 300 void AArch64InstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, 301 MachineBasicBlock &NewDestBB, 302 MachineBasicBlock &RestoreBB, 303 const DebugLoc &DL, 304 int64_t BrOffset, 305 RegScavenger *RS) const { 306 assert(RS && "RegScavenger required for long branching"); 307 assert(MBB.empty() && 308 "new block should be inserted for expanding unconditional branch"); 309 assert(MBB.pred_size() == 1); 310 assert(RestoreBB.empty() && 311 "restore block should be inserted for restoring clobbered registers"); 312 313 auto buildIndirectBranch = [&](Register Reg, MachineBasicBlock &DestBB) { 314 // Offsets outside of the signed 33-bit range are not supported for ADRP + 315 // ADD. 316 if (!isInt<33>(BrOffset)) 317 report_fatal_error( 318 "Branch offsets outside of the signed 33-bit range not supported"); 319 320 BuildMI(MBB, MBB.end(), DL, get(AArch64::ADRP), Reg) 321 .addSym(DestBB.getSymbol(), AArch64II::MO_PAGE); 322 BuildMI(MBB, MBB.end(), DL, get(AArch64::ADDXri), Reg) 323 .addReg(Reg) 324 .addSym(DestBB.getSymbol(), AArch64II::MO_PAGEOFF | AArch64II::MO_NC) 325 .addImm(0); 326 BuildMI(MBB, MBB.end(), DL, get(AArch64::BR)).addReg(Reg); 327 }; 328 329 RS->enterBasicBlockEnd(MBB); 330 // If X16 is unused, we can rely on the linker to insert a range extension 331 // thunk if NewDestBB is out of range of a single B instruction. 332 constexpr Register Reg = AArch64::X16; 333 if (!RS->isRegUsed(Reg)) { 334 insertUnconditionalBranch(MBB, &NewDestBB, DL); 335 RS->setRegUsed(Reg); 336 return; 337 } 338 339 // If there's a free register and it's worth inflating the code size, 340 // manually insert the indirect branch. 341 Register Scavenged = RS->FindUnusedReg(&AArch64::GPR64RegClass); 342 if (Scavenged != AArch64::NoRegister && 343 MBB.getSectionID() == MBBSectionID::ColdSectionID) { 344 buildIndirectBranch(Scavenged, NewDestBB); 345 RS->setRegUsed(Scavenged); 346 return; 347 } 348 349 // Note: Spilling X16 briefly moves the stack pointer, making it incompatible 350 // with red zones. 351 AArch64FunctionInfo *AFI = MBB.getParent()->getInfo<AArch64FunctionInfo>(); 352 if (!AFI || AFI->hasRedZone().value_or(true)) 353 report_fatal_error( 354 "Unable to insert indirect branch inside function that has red zone"); 355 356 // Otherwise, spill X16 and defer range extension to the linker. 357 BuildMI(MBB, MBB.end(), DL, get(AArch64::STRXpre)) 358 .addReg(AArch64::SP, RegState::Define) 359 .addReg(Reg) 360 .addReg(AArch64::SP) 361 .addImm(-16); 362 363 BuildMI(MBB, MBB.end(), DL, get(AArch64::B)).addMBB(&RestoreBB); 364 365 BuildMI(RestoreBB, RestoreBB.end(), DL, get(AArch64::LDRXpost)) 366 .addReg(AArch64::SP, RegState::Define) 367 .addReg(Reg, RegState::Define) 368 .addReg(AArch64::SP) 369 .addImm(16); 370 } 371 372 // Branch analysis. 373 bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB, 374 MachineBasicBlock *&TBB, 375 MachineBasicBlock *&FBB, 376 SmallVectorImpl<MachineOperand> &Cond, 377 bool AllowModify) const { 378 // If the block has no terminators, it just falls into the block after it. 379 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); 380 if (I == MBB.end()) 381 return false; 382 383 // Skip over SpeculationBarrierEndBB terminators 384 if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB || 385 I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) { 386 --I; 387 } 388 389 if (!isUnpredicatedTerminator(*I)) 390 return false; 391 392 // Get the last instruction in the block. 393 MachineInstr *LastInst = &*I; 394 395 // If there is only one terminator instruction, process it. 396 unsigned LastOpc = LastInst->getOpcode(); 397 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) { 398 if (isUncondBranchOpcode(LastOpc)) { 399 TBB = LastInst->getOperand(0).getMBB(); 400 return false; 401 } 402 if (isCondBranchOpcode(LastOpc)) { 403 // Block ends with fall-through condbranch. 404 parseCondBranch(LastInst, TBB, Cond); 405 return false; 406 } 407 return true; // Can't handle indirect branch. 408 } 409 410 // Get the instruction before it if it is a terminator. 411 MachineInstr *SecondLastInst = &*I; 412 unsigned SecondLastOpc = SecondLastInst->getOpcode(); 413 414 // If AllowModify is true and the block ends with two or more unconditional 415 // branches, delete all but the first unconditional branch. 416 if (AllowModify && isUncondBranchOpcode(LastOpc)) { 417 while (isUncondBranchOpcode(SecondLastOpc)) { 418 LastInst->eraseFromParent(); 419 LastInst = SecondLastInst; 420 LastOpc = LastInst->getOpcode(); 421 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) { 422 // Return now the only terminator is an unconditional branch. 423 TBB = LastInst->getOperand(0).getMBB(); 424 return false; 425 } 426 SecondLastInst = &*I; 427 SecondLastOpc = SecondLastInst->getOpcode(); 428 } 429 } 430 431 // If we're allowed to modify and the block ends in a unconditional branch 432 // which could simply fallthrough, remove the branch. (Note: This case only 433 // matters when we can't understand the whole sequence, otherwise it's also 434 // handled by BranchFolding.cpp.) 435 if (AllowModify && isUncondBranchOpcode(LastOpc) && 436 MBB.isLayoutSuccessor(getBranchDestBlock(*LastInst))) { 437 LastInst->eraseFromParent(); 438 LastInst = SecondLastInst; 439 LastOpc = LastInst->getOpcode(); 440 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) { 441 assert(!isUncondBranchOpcode(LastOpc) && 442 "unreachable unconditional branches removed above"); 443 444 if (isCondBranchOpcode(LastOpc)) { 445 // Block ends with fall-through condbranch. 446 parseCondBranch(LastInst, TBB, Cond); 447 return false; 448 } 449 return true; // Can't handle indirect branch. 450 } 451 SecondLastInst = &*I; 452 SecondLastOpc = SecondLastInst->getOpcode(); 453 } 454 455 // If there are three terminators, we don't know what sort of block this is. 456 if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I)) 457 return true; 458 459 // If the block ends with a B and a Bcc, handle it. 460 if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { 461 parseCondBranch(SecondLastInst, TBB, Cond); 462 FBB = LastInst->getOperand(0).getMBB(); 463 return false; 464 } 465 466 // If the block ends with two unconditional branches, handle it. The second 467 // one is not executed, so remove it. 468 if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { 469 TBB = SecondLastInst->getOperand(0).getMBB(); 470 I = LastInst; 471 if (AllowModify) 472 I->eraseFromParent(); 473 return false; 474 } 475 476 // ...likewise if it ends with an indirect branch followed by an unconditional 477 // branch. 478 if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { 479 I = LastInst; 480 if (AllowModify) 481 I->eraseFromParent(); 482 return true; 483 } 484 485 // Otherwise, can't handle this. 486 return true; 487 } 488 489 bool AArch64InstrInfo::analyzeBranchPredicate(MachineBasicBlock &MBB, 490 MachineBranchPredicate &MBP, 491 bool AllowModify) const { 492 // For the moment, handle only a block which ends with a cb(n)zx followed by 493 // a fallthrough. Why this? Because it is a common form. 494 // TODO: Should we handle b.cc? 495 496 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); 497 if (I == MBB.end()) 498 return true; 499 500 // Skip over SpeculationBarrierEndBB terminators 501 if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB || 502 I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) { 503 --I; 504 } 505 506 if (!isUnpredicatedTerminator(*I)) 507 return true; 508 509 // Get the last instruction in the block. 510 MachineInstr *LastInst = &*I; 511 unsigned LastOpc = LastInst->getOpcode(); 512 if (!isCondBranchOpcode(LastOpc)) 513 return true; 514 515 switch (LastOpc) { 516 default: 517 return true; 518 case AArch64::CBZW: 519 case AArch64::CBZX: 520 case AArch64::CBNZW: 521 case AArch64::CBNZX: 522 break; 523 }; 524 525 MBP.TrueDest = LastInst->getOperand(1).getMBB(); 526 assert(MBP.TrueDest && "expected!"); 527 MBP.FalseDest = MBB.getNextNode(); 528 529 MBP.ConditionDef = nullptr; 530 MBP.SingleUseCondition = false; 531 532 MBP.LHS = LastInst->getOperand(0); 533 MBP.RHS = MachineOperand::CreateImm(0); 534 MBP.Predicate = LastOpc == AArch64::CBNZX ? MachineBranchPredicate::PRED_NE 535 : MachineBranchPredicate::PRED_EQ; 536 return false; 537 } 538 539 bool AArch64InstrInfo::reverseBranchCondition( 540 SmallVectorImpl<MachineOperand> &Cond) const { 541 if (Cond[0].getImm() != -1) { 542 // Regular Bcc 543 AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm(); 544 Cond[0].setImm(AArch64CC::getInvertedCondCode(CC)); 545 } else { 546 // Folded compare-and-branch 547 switch (Cond[1].getImm()) { 548 default: 549 llvm_unreachable("Unknown conditional branch!"); 550 case AArch64::CBZW: 551 Cond[1].setImm(AArch64::CBNZW); 552 break; 553 case AArch64::CBNZW: 554 Cond[1].setImm(AArch64::CBZW); 555 break; 556 case AArch64::CBZX: 557 Cond[1].setImm(AArch64::CBNZX); 558 break; 559 case AArch64::CBNZX: 560 Cond[1].setImm(AArch64::CBZX); 561 break; 562 case AArch64::TBZW: 563 Cond[1].setImm(AArch64::TBNZW); 564 break; 565 case AArch64::TBNZW: 566 Cond[1].setImm(AArch64::TBZW); 567 break; 568 case AArch64::TBZX: 569 Cond[1].setImm(AArch64::TBNZX); 570 break; 571 case AArch64::TBNZX: 572 Cond[1].setImm(AArch64::TBZX); 573 break; 574 575 // Cond is { -1, Opcode, CC, Op0, Op1 } 576 case AArch64::CBWPri: 577 case AArch64::CBXPri: 578 case AArch64::CBWPrr: 579 case AArch64::CBXPrr: { 580 // Pseudos using standard 4bit Arm condition codes 581 AArch64CC::CondCode CC = 582 static_cast<AArch64CC::CondCode>(Cond[2].getImm()); 583 Cond[2].setImm(AArch64CC::getInvertedCondCode(CC)); 584 } 585 } 586 } 587 588 return false; 589 } 590 591 unsigned AArch64InstrInfo::removeBranch(MachineBasicBlock &MBB, 592 int *BytesRemoved) const { 593 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); 594 if (I == MBB.end()) 595 return 0; 596 597 if (!isUncondBranchOpcode(I->getOpcode()) && 598 !isCondBranchOpcode(I->getOpcode())) 599 return 0; 600 601 // Remove the branch. 602 I->eraseFromParent(); 603 604 I = MBB.end(); 605 606 if (I == MBB.begin()) { 607 if (BytesRemoved) 608 *BytesRemoved = 4; 609 return 1; 610 } 611 --I; 612 if (!isCondBranchOpcode(I->getOpcode())) { 613 if (BytesRemoved) 614 *BytesRemoved = 4; 615 return 1; 616 } 617 618 // Remove the branch. 619 I->eraseFromParent(); 620 if (BytesRemoved) 621 *BytesRemoved = 8; 622 623 return 2; 624 } 625 626 void AArch64InstrInfo::instantiateCondBranch( 627 MachineBasicBlock &MBB, const DebugLoc &DL, MachineBasicBlock *TBB, 628 ArrayRef<MachineOperand> Cond) const { 629 if (Cond[0].getImm() != -1) { 630 // Regular Bcc 631 BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB); 632 } else { 633 // Folded compare-and-branch 634 // Note that we use addOperand instead of addReg to keep the flags. 635 636 // cbz, cbnz 637 const MachineInstrBuilder MIB = 638 BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]); 639 640 // tbz/tbnz 641 if (Cond.size() > 3) 642 MIB.add(Cond[3]); 643 644 // cb 645 if (Cond.size() > 4) 646 MIB.add(Cond[4]); 647 648 MIB.addMBB(TBB); 649 } 650 } 651 652 unsigned AArch64InstrInfo::insertBranch( 653 MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, 654 ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const { 655 // Shouldn't be a fall through. 656 assert(TBB && "insertBranch must not be told to insert a fallthrough"); 657 658 if (!FBB) { 659 if (Cond.empty()) // Unconditional branch? 660 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB); 661 else 662 instantiateCondBranch(MBB, DL, TBB, Cond); 663 664 if (BytesAdded) 665 *BytesAdded = 4; 666 667 return 1; 668 } 669 670 // Two-way conditional branch. 671 instantiateCondBranch(MBB, DL, TBB, Cond); 672 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB); 673 674 if (BytesAdded) 675 *BytesAdded = 8; 676 677 return 2; 678 } 679 680 // Find the original register that VReg is copied from. 681 static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) { 682 while (Register::isVirtualRegister(VReg)) { 683 const MachineInstr *DefMI = MRI.getVRegDef(VReg); 684 if (!DefMI->isFullCopy()) 685 return VReg; 686 VReg = DefMI->getOperand(1).getReg(); 687 } 688 return VReg; 689 } 690 691 // Determine if VReg is defined by an instruction that can be folded into a 692 // csel instruction. If so, return the folded opcode, and the replacement 693 // register. 694 static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg, 695 unsigned *NewVReg = nullptr) { 696 VReg = removeCopies(MRI, VReg); 697 if (!Register::isVirtualRegister(VReg)) 698 return 0; 699 700 bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg)); 701 const MachineInstr *DefMI = MRI.getVRegDef(VReg); 702 unsigned Opc = 0; 703 unsigned SrcOpNum = 0; 704 switch (DefMI->getOpcode()) { 705 case AArch64::ADDSXri: 706 case AArch64::ADDSWri: 707 // if NZCV is used, do not fold. 708 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, 709 true) == -1) 710 return 0; 711 // fall-through to ADDXri and ADDWri. 712 [[fallthrough]]; 713 case AArch64::ADDXri: 714 case AArch64::ADDWri: 715 // add x, 1 -> csinc. 716 if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 || 717 DefMI->getOperand(3).getImm() != 0) 718 return 0; 719 SrcOpNum = 1; 720 Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr; 721 break; 722 723 case AArch64::ORNXrr: 724 case AArch64::ORNWrr: { 725 // not x -> csinv, represented as orn dst, xzr, src. 726 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg()); 727 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR) 728 return 0; 729 SrcOpNum = 2; 730 Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr; 731 break; 732 } 733 734 case AArch64::SUBSXrr: 735 case AArch64::SUBSWrr: 736 // if NZCV is used, do not fold. 737 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, 738 true) == -1) 739 return 0; 740 // fall-through to SUBXrr and SUBWrr. 741 [[fallthrough]]; 742 case AArch64::SUBXrr: 743 case AArch64::SUBWrr: { 744 // neg x -> csneg, represented as sub dst, xzr, src. 745 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg()); 746 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR) 747 return 0; 748 SrcOpNum = 2; 749 Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr; 750 break; 751 } 752 default: 753 return 0; 754 } 755 assert(Opc && SrcOpNum && "Missing parameters"); 756 757 if (NewVReg) 758 *NewVReg = DefMI->getOperand(SrcOpNum).getReg(); 759 return Opc; 760 } 761 762 bool AArch64InstrInfo::canInsertSelect(const MachineBasicBlock &MBB, 763 ArrayRef<MachineOperand> Cond, 764 Register DstReg, Register TrueReg, 765 Register FalseReg, int &CondCycles, 766 int &TrueCycles, 767 int &FalseCycles) const { 768 // Check register classes. 769 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 770 const TargetRegisterClass *RC = 771 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg)); 772 if (!RC) 773 return false; 774 775 // Also need to check the dest regclass, in case we're trying to optimize 776 // something like: 777 // %1(gpr) = PHI %2(fpr), bb1, %(fpr), bb2 778 if (!RI.getCommonSubClass(RC, MRI.getRegClass(DstReg))) 779 return false; 780 781 // Expanding cbz/tbz requires an extra cycle of latency on the condition. 782 unsigned ExtraCondLat = Cond.size() != 1; 783 784 // GPRs are handled by csel. 785 // FIXME: Fold in x+1, -x, and ~x when applicable. 786 if (AArch64::GPR64allRegClass.hasSubClassEq(RC) || 787 AArch64::GPR32allRegClass.hasSubClassEq(RC)) { 788 // Single-cycle csel, csinc, csinv, and csneg. 789 CondCycles = 1 + ExtraCondLat; 790 TrueCycles = FalseCycles = 1; 791 if (canFoldIntoCSel(MRI, TrueReg)) 792 TrueCycles = 0; 793 else if (canFoldIntoCSel(MRI, FalseReg)) 794 FalseCycles = 0; 795 return true; 796 } 797 798 // Scalar floating point is handled by fcsel. 799 // FIXME: Form fabs, fmin, and fmax when applicable. 800 if (AArch64::FPR64RegClass.hasSubClassEq(RC) || 801 AArch64::FPR32RegClass.hasSubClassEq(RC)) { 802 CondCycles = 5 + ExtraCondLat; 803 TrueCycles = FalseCycles = 2; 804 return true; 805 } 806 807 // Can't do vectors. 808 return false; 809 } 810 811 void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB, 812 MachineBasicBlock::iterator I, 813 const DebugLoc &DL, Register DstReg, 814 ArrayRef<MachineOperand> Cond, 815 Register TrueReg, Register FalseReg) const { 816 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 817 818 // Parse the condition code, see parseCondBranch() above. 819 AArch64CC::CondCode CC; 820 switch (Cond.size()) { 821 default: 822 llvm_unreachable("Unknown condition opcode in Cond"); 823 case 1: // b.cc 824 CC = AArch64CC::CondCode(Cond[0].getImm()); 825 break; 826 case 3: { // cbz/cbnz 827 // We must insert a compare against 0. 828 bool Is64Bit; 829 switch (Cond[1].getImm()) { 830 default: 831 llvm_unreachable("Unknown branch opcode in Cond"); 832 case AArch64::CBZW: 833 Is64Bit = false; 834 CC = AArch64CC::EQ; 835 break; 836 case AArch64::CBZX: 837 Is64Bit = true; 838 CC = AArch64CC::EQ; 839 break; 840 case AArch64::CBNZW: 841 Is64Bit = false; 842 CC = AArch64CC::NE; 843 break; 844 case AArch64::CBNZX: 845 Is64Bit = true; 846 CC = AArch64CC::NE; 847 break; 848 } 849 Register SrcReg = Cond[2].getReg(); 850 if (Is64Bit) { 851 // cmp reg, #0 is actually subs xzr, reg, #0. 852 MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass); 853 BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR) 854 .addReg(SrcReg) 855 .addImm(0) 856 .addImm(0); 857 } else { 858 MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass); 859 BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR) 860 .addReg(SrcReg) 861 .addImm(0) 862 .addImm(0); 863 } 864 break; 865 } 866 case 4: { // tbz/tbnz 867 // We must insert a tst instruction. 868 switch (Cond[1].getImm()) { 869 default: 870 llvm_unreachable("Unknown branch opcode in Cond"); 871 case AArch64::TBZW: 872 case AArch64::TBZX: 873 CC = AArch64CC::EQ; 874 break; 875 case AArch64::TBNZW: 876 case AArch64::TBNZX: 877 CC = AArch64CC::NE; 878 break; 879 } 880 // cmp reg, #foo is actually ands xzr, reg, #1<<foo. 881 if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW) 882 BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR) 883 .addReg(Cond[2].getReg()) 884 .addImm( 885 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32)); 886 else 887 BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR) 888 .addReg(Cond[2].getReg()) 889 .addImm( 890 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64)); 891 break; 892 } 893 case 5: { // cb 894 // We must insert a cmp, that is a subs 895 // 0 1 2 3 4 896 // Cond is { -1, Opcode, CC, Op0, Op1 } 897 unsigned SUBSOpC, SUBSDestReg; 898 bool IsImm = false; 899 CC = static_cast<AArch64CC::CondCode>(Cond[2].getImm()); 900 switch (Cond[1].getImm()) { 901 default: 902 llvm_unreachable("Unknown branch opcode in Cond"); 903 case AArch64::CBWPri: 904 SUBSOpC = AArch64::SUBSWri; 905 SUBSDestReg = AArch64::WZR; 906 IsImm = true; 907 break; 908 case AArch64::CBXPri: 909 SUBSOpC = AArch64::SUBSXri; 910 SUBSDestReg = AArch64::XZR; 911 IsImm = true; 912 break; 913 case AArch64::CBWPrr: 914 SUBSOpC = AArch64::SUBSWrr; 915 SUBSDestReg = AArch64::WZR; 916 IsImm = false; 917 break; 918 case AArch64::CBXPrr: 919 SUBSOpC = AArch64::SUBSXrr; 920 SUBSDestReg = AArch64::XZR; 921 IsImm = false; 922 break; 923 } 924 925 if (IsImm) 926 BuildMI(MBB, I, DL, get(SUBSOpC), SUBSDestReg) 927 .addReg(Cond[3].getReg()) 928 .addImm(Cond[4].getImm()) 929 .addImm(0); 930 else 931 BuildMI(MBB, I, DL, get(SUBSOpC), SUBSDestReg) 932 .addReg(Cond[3].getReg()) 933 .addReg(Cond[4].getReg()); 934 } 935 } 936 937 unsigned Opc = 0; 938 const TargetRegisterClass *RC = nullptr; 939 bool TryFold = false; 940 if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) { 941 RC = &AArch64::GPR64RegClass; 942 Opc = AArch64::CSELXr; 943 TryFold = true; 944 } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) { 945 RC = &AArch64::GPR32RegClass; 946 Opc = AArch64::CSELWr; 947 TryFold = true; 948 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) { 949 RC = &AArch64::FPR64RegClass; 950 Opc = AArch64::FCSELDrrr; 951 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) { 952 RC = &AArch64::FPR32RegClass; 953 Opc = AArch64::FCSELSrrr; 954 } 955 assert(RC && "Unsupported regclass"); 956 957 // Try folding simple instructions into the csel. 958 if (TryFold) { 959 unsigned NewVReg = 0; 960 unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg); 961 if (FoldedOpc) { 962 // The folded opcodes csinc, csinc and csneg apply the operation to 963 // FalseReg, so we need to invert the condition. 964 CC = AArch64CC::getInvertedCondCode(CC); 965 TrueReg = FalseReg; 966 } else 967 FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg); 968 969 // Fold the operation. Leave any dead instructions for DCE to clean up. 970 if (FoldedOpc) { 971 FalseReg = NewVReg; 972 Opc = FoldedOpc; 973 // The extends the live range of NewVReg. 974 MRI.clearKillFlags(NewVReg); 975 } 976 } 977 978 // Pull all virtual register into the appropriate class. 979 MRI.constrainRegClass(TrueReg, RC); 980 MRI.constrainRegClass(FalseReg, RC); 981 982 // Insert the csel. 983 BuildMI(MBB, I, DL, get(Opc), DstReg) 984 .addReg(TrueReg) 985 .addReg(FalseReg) 986 .addImm(CC); 987 } 988 989 // Return true if Imm can be loaded into a register by a "cheap" sequence of 990 // instructions. For now, "cheap" means at most two instructions. 991 static bool isCheapImmediate(const MachineInstr &MI, unsigned BitSize) { 992 if (BitSize == 32) 993 return true; 994 995 assert(BitSize == 64 && "Only bit sizes of 32 or 64 allowed"); 996 uint64_t Imm = static_cast<uint64_t>(MI.getOperand(1).getImm()); 997 SmallVector<AArch64_IMM::ImmInsnModel, 4> Is; 998 AArch64_IMM::expandMOVImm(Imm, BitSize, Is); 999 1000 return Is.size() <= 2; 1001 } 1002 1003 // FIXME: this implementation should be micro-architecture dependent, so a 1004 // micro-architecture target hook should be introduced here in future. 1005 bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const { 1006 if (Subtarget.hasExynosCheapAsMoveHandling()) { 1007 if (isExynosCheapAsMove(MI)) 1008 return true; 1009 return MI.isAsCheapAsAMove(); 1010 } 1011 1012 switch (MI.getOpcode()) { 1013 default: 1014 return MI.isAsCheapAsAMove(); 1015 1016 case AArch64::ADDWrs: 1017 case AArch64::ADDXrs: 1018 case AArch64::SUBWrs: 1019 case AArch64::SUBXrs: 1020 return Subtarget.hasALULSLFast() && MI.getOperand(3).getImm() <= 4; 1021 1022 // If MOVi32imm or MOVi64imm can be expanded into ORRWri or 1023 // ORRXri, it is as cheap as MOV. 1024 // Likewise if it can be expanded to MOVZ/MOVN/MOVK. 1025 case AArch64::MOVi32imm: 1026 return isCheapImmediate(MI, 32); 1027 case AArch64::MOVi64imm: 1028 return isCheapImmediate(MI, 64); 1029 } 1030 } 1031 1032 bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) { 1033 switch (MI.getOpcode()) { 1034 default: 1035 return false; 1036 1037 case AArch64::ADDWrs: 1038 case AArch64::ADDXrs: 1039 case AArch64::ADDSWrs: 1040 case AArch64::ADDSXrs: { 1041 unsigned Imm = MI.getOperand(3).getImm(); 1042 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm); 1043 if (ShiftVal == 0) 1044 return true; 1045 return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5; 1046 } 1047 1048 case AArch64::ADDWrx: 1049 case AArch64::ADDXrx: 1050 case AArch64::ADDXrx64: 1051 case AArch64::ADDSWrx: 1052 case AArch64::ADDSXrx: 1053 case AArch64::ADDSXrx64: { 1054 unsigned Imm = MI.getOperand(3).getImm(); 1055 switch (AArch64_AM::getArithExtendType(Imm)) { 1056 default: 1057 return false; 1058 case AArch64_AM::UXTB: 1059 case AArch64_AM::UXTH: 1060 case AArch64_AM::UXTW: 1061 case AArch64_AM::UXTX: 1062 return AArch64_AM::getArithShiftValue(Imm) <= 4; 1063 } 1064 } 1065 1066 case AArch64::SUBWrs: 1067 case AArch64::SUBSWrs: { 1068 unsigned Imm = MI.getOperand(3).getImm(); 1069 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm); 1070 return ShiftVal == 0 || 1071 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31); 1072 } 1073 1074 case AArch64::SUBXrs: 1075 case AArch64::SUBSXrs: { 1076 unsigned Imm = MI.getOperand(3).getImm(); 1077 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm); 1078 return ShiftVal == 0 || 1079 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63); 1080 } 1081 1082 case AArch64::SUBWrx: 1083 case AArch64::SUBXrx: 1084 case AArch64::SUBXrx64: 1085 case AArch64::SUBSWrx: 1086 case AArch64::SUBSXrx: 1087 case AArch64::SUBSXrx64: { 1088 unsigned Imm = MI.getOperand(3).getImm(); 1089 switch (AArch64_AM::getArithExtendType(Imm)) { 1090 default: 1091 return false; 1092 case AArch64_AM::UXTB: 1093 case AArch64_AM::UXTH: 1094 case AArch64_AM::UXTW: 1095 case AArch64_AM::UXTX: 1096 return AArch64_AM::getArithShiftValue(Imm) == 0; 1097 } 1098 } 1099 1100 case AArch64::LDRBBroW: 1101 case AArch64::LDRBBroX: 1102 case AArch64::LDRBroW: 1103 case AArch64::LDRBroX: 1104 case AArch64::LDRDroW: 1105 case AArch64::LDRDroX: 1106 case AArch64::LDRHHroW: 1107 case AArch64::LDRHHroX: 1108 case AArch64::LDRHroW: 1109 case AArch64::LDRHroX: 1110 case AArch64::LDRQroW: 1111 case AArch64::LDRQroX: 1112 case AArch64::LDRSBWroW: 1113 case AArch64::LDRSBWroX: 1114 case AArch64::LDRSBXroW: 1115 case AArch64::LDRSBXroX: 1116 case AArch64::LDRSHWroW: 1117 case AArch64::LDRSHWroX: 1118 case AArch64::LDRSHXroW: 1119 case AArch64::LDRSHXroX: 1120 case AArch64::LDRSWroW: 1121 case AArch64::LDRSWroX: 1122 case AArch64::LDRSroW: 1123 case AArch64::LDRSroX: 1124 case AArch64::LDRWroW: 1125 case AArch64::LDRWroX: 1126 case AArch64::LDRXroW: 1127 case AArch64::LDRXroX: 1128 case AArch64::PRFMroW: 1129 case AArch64::PRFMroX: 1130 case AArch64::STRBBroW: 1131 case AArch64::STRBBroX: 1132 case AArch64::STRBroW: 1133 case AArch64::STRBroX: 1134 case AArch64::STRDroW: 1135 case AArch64::STRDroX: 1136 case AArch64::STRHHroW: 1137 case AArch64::STRHHroX: 1138 case AArch64::STRHroW: 1139 case AArch64::STRHroX: 1140 case AArch64::STRQroW: 1141 case AArch64::STRQroX: 1142 case AArch64::STRSroW: 1143 case AArch64::STRSroX: 1144 case AArch64::STRWroW: 1145 case AArch64::STRWroX: 1146 case AArch64::STRXroW: 1147 case AArch64::STRXroX: { 1148 unsigned IsSigned = MI.getOperand(3).getImm(); 1149 return !IsSigned; 1150 } 1151 } 1152 } 1153 1154 bool AArch64InstrInfo::isSEHInstruction(const MachineInstr &MI) { 1155 unsigned Opc = MI.getOpcode(); 1156 switch (Opc) { 1157 default: 1158 return false; 1159 case AArch64::SEH_StackAlloc: 1160 case AArch64::SEH_SaveFPLR: 1161 case AArch64::SEH_SaveFPLR_X: 1162 case AArch64::SEH_SaveReg: 1163 case AArch64::SEH_SaveReg_X: 1164 case AArch64::SEH_SaveRegP: 1165 case AArch64::SEH_SaveRegP_X: 1166 case AArch64::SEH_SaveFReg: 1167 case AArch64::SEH_SaveFReg_X: 1168 case AArch64::SEH_SaveFRegP: 1169 case AArch64::SEH_SaveFRegP_X: 1170 case AArch64::SEH_SetFP: 1171 case AArch64::SEH_AddFP: 1172 case AArch64::SEH_Nop: 1173 case AArch64::SEH_PrologEnd: 1174 case AArch64::SEH_EpilogStart: 1175 case AArch64::SEH_EpilogEnd: 1176 case AArch64::SEH_PACSignLR: 1177 case AArch64::SEH_SaveAnyRegQP: 1178 case AArch64::SEH_SaveAnyRegQPX: 1179 case AArch64::SEH_AllocZ: 1180 case AArch64::SEH_SaveZReg: 1181 case AArch64::SEH_SavePReg: 1182 return true; 1183 } 1184 } 1185 1186 bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI, 1187 Register &SrcReg, Register &DstReg, 1188 unsigned &SubIdx) const { 1189 switch (MI.getOpcode()) { 1190 default: 1191 return false; 1192 case AArch64::SBFMXri: // aka sxtw 1193 case AArch64::UBFMXri: // aka uxtw 1194 // Check for the 32 -> 64 bit extension case, these instructions can do 1195 // much more. 1196 if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31) 1197 return false; 1198 // This is a signed or unsigned 32 -> 64 bit extension. 1199 SrcReg = MI.getOperand(1).getReg(); 1200 DstReg = MI.getOperand(0).getReg(); 1201 SubIdx = AArch64::sub_32; 1202 return true; 1203 } 1204 } 1205 1206 bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint( 1207 const MachineInstr &MIa, const MachineInstr &MIb) const { 1208 const TargetRegisterInfo *TRI = &getRegisterInfo(); 1209 const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr; 1210 int64_t OffsetA = 0, OffsetB = 0; 1211 TypeSize WidthA(0, false), WidthB(0, false); 1212 bool OffsetAIsScalable = false, OffsetBIsScalable = false; 1213 1214 assert(MIa.mayLoadOrStore() && "MIa must be a load or store."); 1215 assert(MIb.mayLoadOrStore() && "MIb must be a load or store."); 1216 1217 if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() || 1218 MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef()) 1219 return false; 1220 1221 // Retrieve the base, offset from the base and width. Width 1222 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If 1223 // base are identical, and the offset of a lower memory access + 1224 // the width doesn't overlap the offset of a higher memory access, 1225 // then the memory accesses are different. 1226 // If OffsetAIsScalable and OffsetBIsScalable are both true, they 1227 // are assumed to have the same scale (vscale). 1228 if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, OffsetAIsScalable, 1229 WidthA, TRI) && 1230 getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, OffsetBIsScalable, 1231 WidthB, TRI)) { 1232 if (BaseOpA->isIdenticalTo(*BaseOpB) && 1233 OffsetAIsScalable == OffsetBIsScalable) { 1234 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB; 1235 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA; 1236 TypeSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB; 1237 if (LowWidth.isScalable() == OffsetAIsScalable && 1238 LowOffset + (int)LowWidth.getKnownMinValue() <= HighOffset) 1239 return true; 1240 } 1241 } 1242 return false; 1243 } 1244 1245 bool AArch64InstrInfo::isSchedulingBoundary(const MachineInstr &MI, 1246 const MachineBasicBlock *MBB, 1247 const MachineFunction &MF) const { 1248 if (TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF)) 1249 return true; 1250 1251 // Do not move an instruction that can be recognized as a branch target. 1252 if (hasBTISemantics(MI)) 1253 return true; 1254 1255 switch (MI.getOpcode()) { 1256 case AArch64::HINT: 1257 // CSDB hints are scheduling barriers. 1258 if (MI.getOperand(0).getImm() == 0x14) 1259 return true; 1260 break; 1261 case AArch64::DSB: 1262 case AArch64::ISB: 1263 // DSB and ISB also are scheduling barriers. 1264 return true; 1265 case AArch64::MSRpstatesvcrImm1: 1266 // SMSTART and SMSTOP are also scheduling barriers. 1267 return true; 1268 default:; 1269 } 1270 if (isSEHInstruction(MI)) 1271 return true; 1272 auto Next = std::next(MI.getIterator()); 1273 return Next != MBB->end() && Next->isCFIInstruction(); 1274 } 1275 1276 /// analyzeCompare - For a comparison instruction, return the source registers 1277 /// in SrcReg and SrcReg2, and the value it compares against in CmpValue. 1278 /// Return true if the comparison instruction can be analyzed. 1279 bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg, 1280 Register &SrcReg2, int64_t &CmpMask, 1281 int64_t &CmpValue) const { 1282 // The first operand can be a frame index where we'd normally expect a 1283 // register. 1284 // FIXME: Pass subregisters out of analyzeCompare 1285 assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands"); 1286 if (!MI.getOperand(1).isReg() || MI.getOperand(1).getSubReg()) 1287 return false; 1288 1289 switch (MI.getOpcode()) { 1290 default: 1291 break; 1292 case AArch64::PTEST_PP: 1293 case AArch64::PTEST_PP_ANY: 1294 SrcReg = MI.getOperand(0).getReg(); 1295 SrcReg2 = MI.getOperand(1).getReg(); 1296 if (MI.getOperand(2).getSubReg()) 1297 return false; 1298 1299 // Not sure about the mask and value for now... 1300 CmpMask = ~0; 1301 CmpValue = 0; 1302 return true; 1303 case AArch64::SUBSWrr: 1304 case AArch64::SUBSWrs: 1305 case AArch64::SUBSWrx: 1306 case AArch64::SUBSXrr: 1307 case AArch64::SUBSXrs: 1308 case AArch64::SUBSXrx: 1309 case AArch64::ADDSWrr: 1310 case AArch64::ADDSWrs: 1311 case AArch64::ADDSWrx: 1312 case AArch64::ADDSXrr: 1313 case AArch64::ADDSXrs: 1314 case AArch64::ADDSXrx: 1315 // Replace SUBSWrr with SUBWrr if NZCV is not used. 1316 SrcReg = MI.getOperand(1).getReg(); 1317 SrcReg2 = MI.getOperand(2).getReg(); 1318 1319 // FIXME: Pass subregisters out of analyzeCompare 1320 if (MI.getOperand(2).getSubReg()) 1321 return false; 1322 1323 CmpMask = ~0; 1324 CmpValue = 0; 1325 return true; 1326 case AArch64::SUBSWri: 1327 case AArch64::ADDSWri: 1328 case AArch64::SUBSXri: 1329 case AArch64::ADDSXri: 1330 SrcReg = MI.getOperand(1).getReg(); 1331 SrcReg2 = 0; 1332 CmpMask = ~0; 1333 CmpValue = MI.getOperand(2).getImm(); 1334 return true; 1335 case AArch64::ANDSWri: 1336 case AArch64::ANDSXri: 1337 // ANDS does not use the same encoding scheme as the others xxxS 1338 // instructions. 1339 SrcReg = MI.getOperand(1).getReg(); 1340 SrcReg2 = 0; 1341 CmpMask = ~0; 1342 CmpValue = AArch64_AM::decodeLogicalImmediate( 1343 MI.getOperand(2).getImm(), 1344 MI.getOpcode() == AArch64::ANDSWri ? 32 : 64); 1345 return true; 1346 } 1347 1348 return false; 1349 } 1350 1351 static bool UpdateOperandRegClass(MachineInstr &Instr) { 1352 MachineBasicBlock *MBB = Instr.getParent(); 1353 assert(MBB && "Can't get MachineBasicBlock here"); 1354 MachineFunction *MF = MBB->getParent(); 1355 assert(MF && "Can't get MachineFunction here"); 1356 const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); 1357 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); 1358 MachineRegisterInfo *MRI = &MF->getRegInfo(); 1359 1360 for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx; 1361 ++OpIdx) { 1362 MachineOperand &MO = Instr.getOperand(OpIdx); 1363 const TargetRegisterClass *OpRegCstraints = 1364 Instr.getRegClassConstraint(OpIdx, TII, TRI); 1365 1366 // If there's no constraint, there's nothing to do. 1367 if (!OpRegCstraints) 1368 continue; 1369 // If the operand is a frame index, there's nothing to do here. 1370 // A frame index operand will resolve correctly during PEI. 1371 if (MO.isFI()) 1372 continue; 1373 1374 assert(MO.isReg() && 1375 "Operand has register constraints without being a register!"); 1376 1377 Register Reg = MO.getReg(); 1378 if (Reg.isPhysical()) { 1379 if (!OpRegCstraints->contains(Reg)) 1380 return false; 1381 } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) && 1382 !MRI->constrainRegClass(Reg, OpRegCstraints)) 1383 return false; 1384 } 1385 1386 return true; 1387 } 1388 1389 /// Return the opcode that does not set flags when possible - otherwise 1390 /// return the original opcode. The caller is responsible to do the actual 1391 /// substitution and legality checking. 1392 static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI) { 1393 // Don't convert all compare instructions, because for some the zero register 1394 // encoding becomes the sp register. 1395 bool MIDefinesZeroReg = false; 1396 if (MI.definesRegister(AArch64::WZR, /*TRI=*/nullptr) || 1397 MI.definesRegister(AArch64::XZR, /*TRI=*/nullptr)) 1398 MIDefinesZeroReg = true; 1399 1400 switch (MI.getOpcode()) { 1401 default: 1402 return MI.getOpcode(); 1403 case AArch64::ADDSWrr: 1404 return AArch64::ADDWrr; 1405 case AArch64::ADDSWri: 1406 return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri; 1407 case AArch64::ADDSWrs: 1408 return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs; 1409 case AArch64::ADDSWrx: 1410 return AArch64::ADDWrx; 1411 case AArch64::ADDSXrr: 1412 return AArch64::ADDXrr; 1413 case AArch64::ADDSXri: 1414 return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri; 1415 case AArch64::ADDSXrs: 1416 return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs; 1417 case AArch64::ADDSXrx: 1418 return AArch64::ADDXrx; 1419 case AArch64::SUBSWrr: 1420 return AArch64::SUBWrr; 1421 case AArch64::SUBSWri: 1422 return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri; 1423 case AArch64::SUBSWrs: 1424 return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs; 1425 case AArch64::SUBSWrx: 1426 return AArch64::SUBWrx; 1427 case AArch64::SUBSXrr: 1428 return AArch64::SUBXrr; 1429 case AArch64::SUBSXri: 1430 return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri; 1431 case AArch64::SUBSXrs: 1432 return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs; 1433 case AArch64::SUBSXrx: 1434 return AArch64::SUBXrx; 1435 } 1436 } 1437 1438 enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 }; 1439 1440 /// True when condition flags are accessed (either by writing or reading) 1441 /// on the instruction trace starting at From and ending at To. 1442 /// 1443 /// Note: If From and To are from different blocks it's assumed CC are accessed 1444 /// on the path. 1445 static bool areCFlagsAccessedBetweenInstrs( 1446 MachineBasicBlock::iterator From, MachineBasicBlock::iterator To, 1447 const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) { 1448 // Early exit if To is at the beginning of the BB. 1449 if (To == To->getParent()->begin()) 1450 return true; 1451 1452 // Check whether the instructions are in the same basic block 1453 // If not, assume the condition flags might get modified somewhere. 1454 if (To->getParent() != From->getParent()) 1455 return true; 1456 1457 // From must be above To. 1458 assert(std::any_of( 1459 ++To.getReverse(), To->getParent()->rend(), 1460 [From](MachineInstr &MI) { return MI.getIterator() == From; })); 1461 1462 // We iterate backward starting at \p To until we hit \p From. 1463 for (const MachineInstr &Instr : 1464 instructionsWithoutDebug(++To.getReverse(), From.getReverse())) { 1465 if (((AccessToCheck & AK_Write) && 1466 Instr.modifiesRegister(AArch64::NZCV, TRI)) || 1467 ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI))) 1468 return true; 1469 } 1470 return false; 1471 } 1472 1473 std::optional<unsigned> 1474 AArch64InstrInfo::canRemovePTestInstr(MachineInstr *PTest, MachineInstr *Mask, 1475 MachineInstr *Pred, 1476 const MachineRegisterInfo *MRI) const { 1477 unsigned MaskOpcode = Mask->getOpcode(); 1478 unsigned PredOpcode = Pred->getOpcode(); 1479 bool PredIsPTestLike = isPTestLikeOpcode(PredOpcode); 1480 bool PredIsWhileLike = isWhileOpcode(PredOpcode); 1481 1482 if (PredIsWhileLike) { 1483 // For PTEST(PG, PG), PTEST is redundant when PG is the result of a WHILEcc 1484 // instruction and the condition is "any" since WHILcc does an implicit 1485 // PTEST(ALL, PG) check and PG is always a subset of ALL. 1486 if ((Mask == Pred) && PTest->getOpcode() == AArch64::PTEST_PP_ANY) 1487 return PredOpcode; 1488 1489 // For PTEST(PTRUE_ALL, WHILE), if the element size matches, the PTEST is 1490 // redundant since WHILE performs an implicit PTEST with an all active 1491 // mask. 1492 if (isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31 && 1493 getElementSizeForOpcode(MaskOpcode) == 1494 getElementSizeForOpcode(PredOpcode)) 1495 return PredOpcode; 1496 1497 return {}; 1498 } 1499 1500 if (PredIsPTestLike) { 1501 // For PTEST(PG, PG), PTEST is redundant when PG is the result of an 1502 // instruction that sets the flags as PTEST would and the condition is 1503 // "any" since PG is always a subset of the governing predicate of the 1504 // ptest-like instruction. 1505 if ((Mask == Pred) && PTest->getOpcode() == AArch64::PTEST_PP_ANY) 1506 return PredOpcode; 1507 1508 auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg()); 1509 1510 // If the PTEST like instruction's general predicate is not `Mask`, attempt 1511 // to look through a copy and try again. This is because some instructions 1512 // take a predicate whose register class is a subset of its result class. 1513 if (Mask != PTestLikeMask && PTestLikeMask->isFullCopy() && 1514 PTestLikeMask->getOperand(1).getReg().isVirtual()) 1515 PTestLikeMask = 1516 MRI->getUniqueVRegDef(PTestLikeMask->getOperand(1).getReg()); 1517 1518 // For PTEST(PTRUE_ALL, PTEST_LIKE), the PTEST is redundant if the 1519 // the element size matches and either the PTEST_LIKE instruction uses 1520 // the same all active mask or the condition is "any". 1521 if (isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31 && 1522 getElementSizeForOpcode(MaskOpcode) == 1523 getElementSizeForOpcode(PredOpcode)) { 1524 if (Mask == PTestLikeMask || PTest->getOpcode() == AArch64::PTEST_PP_ANY) 1525 return PredOpcode; 1526 } 1527 1528 // For PTEST(PG, PTEST_LIKE(PG, ...)), the PTEST is redundant since the 1529 // flags are set based on the same mask 'PG', but PTEST_LIKE must operate 1530 // on 8-bit predicates like the PTEST. Otherwise, for instructions like 1531 // compare that also support 16/32/64-bit predicates, the implicit PTEST 1532 // performed by the compare could consider fewer lanes for these element 1533 // sizes. 1534 // 1535 // For example, consider 1536 // 1537 // ptrue p0.b ; P0=1111-1111-1111-1111 1538 // index z0.s, #0, #1 ; Z0=<0,1,2,3> 1539 // index z1.s, #1, #1 ; Z1=<1,2,3,4> 1540 // cmphi p1.s, p0/z, z1.s, z0.s ; P1=0001-0001-0001-0001 1541 // ; ^ last active 1542 // ptest p0, p1.b ; P1=0001-0001-0001-0001 1543 // ; ^ last active 1544 // 1545 // where the compare generates a canonical all active 32-bit predicate 1546 // (equivalent to 'ptrue p1.s, all'). The implicit PTEST sets the last 1547 // active flag, whereas the PTEST instruction with the same mask doesn't. 1548 // For PTEST_ANY this doesn't apply as the flags in this case would be 1549 // identical regardless of element size. 1550 uint64_t PredElementSize = getElementSizeForOpcode(PredOpcode); 1551 if (Mask == PTestLikeMask && (PredElementSize == AArch64::ElementSizeB || 1552 PTest->getOpcode() == AArch64::PTEST_PP_ANY)) 1553 return PredOpcode; 1554 1555 return {}; 1556 } 1557 1558 // If OP in PTEST(PG, OP(PG, ...)) has a flag-setting variant change the 1559 // opcode so the PTEST becomes redundant. 1560 switch (PredOpcode) { 1561 case AArch64::AND_PPzPP: 1562 case AArch64::BIC_PPzPP: 1563 case AArch64::EOR_PPzPP: 1564 case AArch64::NAND_PPzPP: 1565 case AArch64::NOR_PPzPP: 1566 case AArch64::ORN_PPzPP: 1567 case AArch64::ORR_PPzPP: 1568 case AArch64::BRKA_PPzP: 1569 case AArch64::BRKPA_PPzPP: 1570 case AArch64::BRKB_PPzP: 1571 case AArch64::BRKPB_PPzPP: 1572 case AArch64::RDFFR_PPz: { 1573 // Check to see if our mask is the same. If not the resulting flag bits 1574 // may be different and we can't remove the ptest. 1575 auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg()); 1576 if (Mask != PredMask) 1577 return {}; 1578 break; 1579 } 1580 case AArch64::BRKN_PPzP: { 1581 // BRKN uses an all active implicit mask to set flags unlike the other 1582 // flag-setting instructions. 1583 // PTEST(PTRUE_B(31), BRKN(PG, A, B)) -> BRKNS(PG, A, B). 1584 if ((MaskOpcode != AArch64::PTRUE_B) || 1585 (Mask->getOperand(1).getImm() != 31)) 1586 return {}; 1587 break; 1588 } 1589 case AArch64::PTRUE_B: 1590 // PTEST(OP=PTRUE_B(A), OP) -> PTRUES_B(A) 1591 break; 1592 default: 1593 // Bail out if we don't recognize the input 1594 return {}; 1595 } 1596 1597 return convertToFlagSettingOpc(PredOpcode); 1598 } 1599 1600 /// optimizePTestInstr - Attempt to remove a ptest of a predicate-generating 1601 /// operation which could set the flags in an identical manner 1602 bool AArch64InstrInfo::optimizePTestInstr( 1603 MachineInstr *PTest, unsigned MaskReg, unsigned PredReg, 1604 const MachineRegisterInfo *MRI) const { 1605 auto *Mask = MRI->getUniqueVRegDef(MaskReg); 1606 auto *Pred = MRI->getUniqueVRegDef(PredReg); 1607 unsigned PredOpcode = Pred->getOpcode(); 1608 auto NewOp = canRemovePTestInstr(PTest, Mask, Pred, MRI); 1609 if (!NewOp) 1610 return false; 1611 1612 const TargetRegisterInfo *TRI = &getRegisterInfo(); 1613 1614 // If another instruction between Pred and PTest accesses flags, don't remove 1615 // the ptest or update the earlier instruction to modify them. 1616 if (areCFlagsAccessedBetweenInstrs(Pred, PTest, TRI)) 1617 return false; 1618 1619 // If we pass all the checks, it's safe to remove the PTEST and use the flags 1620 // as they are prior to PTEST. Sometimes this requires the tested PTEST 1621 // operand to be replaced with an equivalent instruction that also sets the 1622 // flags. 1623 PTest->eraseFromParent(); 1624 if (*NewOp != PredOpcode) { 1625 Pred->setDesc(get(*NewOp)); 1626 bool succeeded = UpdateOperandRegClass(*Pred); 1627 (void)succeeded; 1628 assert(succeeded && "Operands have incompatible register classes!"); 1629 Pred->addRegisterDefined(AArch64::NZCV, TRI); 1630 } 1631 1632 // Ensure that the flags def is live. 1633 if (Pred->registerDefIsDead(AArch64::NZCV, TRI)) { 1634 unsigned i = 0, e = Pred->getNumOperands(); 1635 for (; i != e; ++i) { 1636 MachineOperand &MO = Pred->getOperand(i); 1637 if (MO.isReg() && MO.isDef() && MO.getReg() == AArch64::NZCV) { 1638 MO.setIsDead(false); 1639 break; 1640 } 1641 } 1642 } 1643 return true; 1644 } 1645 1646 /// Try to optimize a compare instruction. A compare instruction is an 1647 /// instruction which produces AArch64::NZCV. It can be truly compare 1648 /// instruction 1649 /// when there are no uses of its destination register. 1650 /// 1651 /// The following steps are tried in order: 1652 /// 1. Convert CmpInstr into an unconditional version. 1653 /// 2. Remove CmpInstr if above there is an instruction producing a needed 1654 /// condition code or an instruction which can be converted into such an 1655 /// instruction. 1656 /// Only comparison with zero is supported. 1657 bool AArch64InstrInfo::optimizeCompareInstr( 1658 MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, 1659 int64_t CmpValue, const MachineRegisterInfo *MRI) const { 1660 assert(CmpInstr.getParent()); 1661 assert(MRI); 1662 1663 // Replace SUBSWrr with SUBWrr if NZCV is not used. 1664 int DeadNZCVIdx = 1665 CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true); 1666 if (DeadNZCVIdx != -1) { 1667 if (CmpInstr.definesRegister(AArch64::WZR, /*TRI=*/nullptr) || 1668 CmpInstr.definesRegister(AArch64::XZR, /*TRI=*/nullptr)) { 1669 CmpInstr.eraseFromParent(); 1670 return true; 1671 } 1672 unsigned Opc = CmpInstr.getOpcode(); 1673 unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr); 1674 if (NewOpc == Opc) 1675 return false; 1676 const MCInstrDesc &MCID = get(NewOpc); 1677 CmpInstr.setDesc(MCID); 1678 CmpInstr.removeOperand(DeadNZCVIdx); 1679 bool succeeded = UpdateOperandRegClass(CmpInstr); 1680 (void)succeeded; 1681 assert(succeeded && "Some operands reg class are incompatible!"); 1682 return true; 1683 } 1684 1685 if (CmpInstr.getOpcode() == AArch64::PTEST_PP || 1686 CmpInstr.getOpcode() == AArch64::PTEST_PP_ANY) 1687 return optimizePTestInstr(&CmpInstr, SrcReg, SrcReg2, MRI); 1688 1689 if (SrcReg2 != 0) 1690 return false; 1691 1692 // CmpInstr is a Compare instruction if destination register is not used. 1693 if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg())) 1694 return false; 1695 1696 if (CmpValue == 0 && substituteCmpToZero(CmpInstr, SrcReg, *MRI)) 1697 return true; 1698 return (CmpValue == 0 || CmpValue == 1) && 1699 removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *MRI); 1700 } 1701 1702 /// Get opcode of S version of Instr. 1703 /// If Instr is S version its opcode is returned. 1704 /// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version 1705 /// or we are not interested in it. 1706 static unsigned sForm(MachineInstr &Instr) { 1707 switch (Instr.getOpcode()) { 1708 default: 1709 return AArch64::INSTRUCTION_LIST_END; 1710 1711 case AArch64::ADDSWrr: 1712 case AArch64::ADDSWri: 1713 case AArch64::ADDSXrr: 1714 case AArch64::ADDSXri: 1715 case AArch64::SUBSWrr: 1716 case AArch64::SUBSWri: 1717 case AArch64::SUBSXrr: 1718 case AArch64::SUBSXri: 1719 return Instr.getOpcode(); 1720 1721 case AArch64::ADDWrr: 1722 return AArch64::ADDSWrr; 1723 case AArch64::ADDWri: 1724 return AArch64::ADDSWri; 1725 case AArch64::ADDXrr: 1726 return AArch64::ADDSXrr; 1727 case AArch64::ADDXri: 1728 return AArch64::ADDSXri; 1729 case AArch64::ADCWr: 1730 return AArch64::ADCSWr; 1731 case AArch64::ADCXr: 1732 return AArch64::ADCSXr; 1733 case AArch64::SUBWrr: 1734 return AArch64::SUBSWrr; 1735 case AArch64::SUBWri: 1736 return AArch64::SUBSWri; 1737 case AArch64::SUBXrr: 1738 return AArch64::SUBSXrr; 1739 case AArch64::SUBXri: 1740 return AArch64::SUBSXri; 1741 case AArch64::SBCWr: 1742 return AArch64::SBCSWr; 1743 case AArch64::SBCXr: 1744 return AArch64::SBCSXr; 1745 case AArch64::ANDWri: 1746 return AArch64::ANDSWri; 1747 case AArch64::ANDXri: 1748 return AArch64::ANDSXri; 1749 } 1750 } 1751 1752 /// Check if AArch64::NZCV should be alive in successors of MBB. 1753 static bool areCFlagsAliveInSuccessors(const MachineBasicBlock *MBB) { 1754 for (auto *BB : MBB->successors()) 1755 if (BB->isLiveIn(AArch64::NZCV)) 1756 return true; 1757 return false; 1758 } 1759 1760 /// \returns The condition code operand index for \p Instr if it is a branch 1761 /// or select and -1 otherwise. 1762 static int 1763 findCondCodeUseOperandIdxForBranchOrSelect(const MachineInstr &Instr) { 1764 switch (Instr.getOpcode()) { 1765 default: 1766 return -1; 1767 1768 case AArch64::Bcc: { 1769 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV, /*TRI=*/nullptr); 1770 assert(Idx >= 2); 1771 return Idx - 2; 1772 } 1773 1774 case AArch64::CSINVWr: 1775 case AArch64::CSINVXr: 1776 case AArch64::CSINCWr: 1777 case AArch64::CSINCXr: 1778 case AArch64::CSELWr: 1779 case AArch64::CSELXr: 1780 case AArch64::CSNEGWr: 1781 case AArch64::CSNEGXr: 1782 case AArch64::FCSELSrrr: 1783 case AArch64::FCSELDrrr: { 1784 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV, /*TRI=*/nullptr); 1785 assert(Idx >= 1); 1786 return Idx - 1; 1787 } 1788 } 1789 } 1790 1791 /// Find a condition code used by the instruction. 1792 /// Returns AArch64CC::Invalid if either the instruction does not use condition 1793 /// codes or we don't optimize CmpInstr in the presence of such instructions. 1794 static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr) { 1795 int CCIdx = findCondCodeUseOperandIdxForBranchOrSelect(Instr); 1796 return CCIdx >= 0 ? static_cast<AArch64CC::CondCode>( 1797 Instr.getOperand(CCIdx).getImm()) 1798 : AArch64CC::Invalid; 1799 } 1800 1801 static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) { 1802 assert(CC != AArch64CC::Invalid); 1803 UsedNZCV UsedFlags; 1804 switch (CC) { 1805 default: 1806 break; 1807 1808 case AArch64CC::EQ: // Z set 1809 case AArch64CC::NE: // Z clear 1810 UsedFlags.Z = true; 1811 break; 1812 1813 case AArch64CC::HI: // Z clear and C set 1814 case AArch64CC::LS: // Z set or C clear 1815 UsedFlags.Z = true; 1816 [[fallthrough]]; 1817 case AArch64CC::HS: // C set 1818 case AArch64CC::LO: // C clear 1819 UsedFlags.C = true; 1820 break; 1821 1822 case AArch64CC::MI: // N set 1823 case AArch64CC::PL: // N clear 1824 UsedFlags.N = true; 1825 break; 1826 1827 case AArch64CC::VS: // V set 1828 case AArch64CC::VC: // V clear 1829 UsedFlags.V = true; 1830 break; 1831 1832 case AArch64CC::GT: // Z clear, N and V the same 1833 case AArch64CC::LE: // Z set, N and V differ 1834 UsedFlags.Z = true; 1835 [[fallthrough]]; 1836 case AArch64CC::GE: // N and V the same 1837 case AArch64CC::LT: // N and V differ 1838 UsedFlags.N = true; 1839 UsedFlags.V = true; 1840 break; 1841 } 1842 return UsedFlags; 1843 } 1844 1845 /// \returns Conditions flags used after \p CmpInstr in its MachineBB if NZCV 1846 /// flags are not alive in successors of the same \p CmpInstr and \p MI parent. 1847 /// \returns std::nullopt otherwise. 1848 /// 1849 /// Collect instructions using that flags in \p CCUseInstrs if provided. 1850 std::optional<UsedNZCV> 1851 llvm::examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr, 1852 const TargetRegisterInfo &TRI, 1853 SmallVectorImpl<MachineInstr *> *CCUseInstrs) { 1854 MachineBasicBlock *CmpParent = CmpInstr.getParent(); 1855 if (MI.getParent() != CmpParent) 1856 return std::nullopt; 1857 1858 if (areCFlagsAliveInSuccessors(CmpParent)) 1859 return std::nullopt; 1860 1861 UsedNZCV NZCVUsedAfterCmp; 1862 for (MachineInstr &Instr : instructionsWithoutDebug( 1863 std::next(CmpInstr.getIterator()), CmpParent->instr_end())) { 1864 if (Instr.readsRegister(AArch64::NZCV, &TRI)) { 1865 AArch64CC::CondCode CC = findCondCodeUsedByInstr(Instr); 1866 if (CC == AArch64CC::Invalid) // Unsupported conditional instruction 1867 return std::nullopt; 1868 NZCVUsedAfterCmp |= getUsedNZCV(CC); 1869 if (CCUseInstrs) 1870 CCUseInstrs->push_back(&Instr); 1871 } 1872 if (Instr.modifiesRegister(AArch64::NZCV, &TRI)) 1873 break; 1874 } 1875 return NZCVUsedAfterCmp; 1876 } 1877 1878 static bool isADDSRegImm(unsigned Opcode) { 1879 return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri; 1880 } 1881 1882 static bool isSUBSRegImm(unsigned Opcode) { 1883 return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri; 1884 } 1885 1886 /// Check if CmpInstr can be substituted by MI. 1887 /// 1888 /// CmpInstr can be substituted: 1889 /// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0' 1890 /// - and, MI and CmpInstr are from the same MachineBB 1891 /// - and, condition flags are not alive in successors of the CmpInstr parent 1892 /// - and, if MI opcode is the S form there must be no defs of flags between 1893 /// MI and CmpInstr 1894 /// or if MI opcode is not the S form there must be neither defs of flags 1895 /// nor uses of flags between MI and CmpInstr. 1896 /// - and, if C/V flags are not used after CmpInstr 1897 /// or if N flag is used but MI produces poison value if signed overflow 1898 /// occurs. 1899 static bool canInstrSubstituteCmpInstr(MachineInstr &MI, MachineInstr &CmpInstr, 1900 const TargetRegisterInfo &TRI) { 1901 // NOTE this assertion guarantees that MI.getOpcode() is add or subtraction 1902 // that may or may not set flags. 1903 assert(sForm(MI) != AArch64::INSTRUCTION_LIST_END); 1904 1905 const unsigned CmpOpcode = CmpInstr.getOpcode(); 1906 if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode)) 1907 return false; 1908 1909 assert((CmpInstr.getOperand(2).isImm() && 1910 CmpInstr.getOperand(2).getImm() == 0) && 1911 "Caller guarantees that CmpInstr compares with constant 0"); 1912 1913 std::optional<UsedNZCV> NZVCUsed = examineCFlagsUse(MI, CmpInstr, TRI); 1914 if (!NZVCUsed || NZVCUsed->C) 1915 return false; 1916 1917 // CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0', and MI is either 1918 // '%vreg = add ...' or '%vreg = sub ...'. 1919 // Condition flag V is used to indicate signed overflow. 1920 // 1) MI and CmpInstr set N and V to the same value. 1921 // 2) If MI is add/sub with no-signed-wrap, it produces a poison value when 1922 // signed overflow occurs, so CmpInstr could still be simplified away. 1923 if (NZVCUsed->V && !MI.getFlag(MachineInstr::NoSWrap)) 1924 return false; 1925 1926 AccessKind AccessToCheck = AK_Write; 1927 if (sForm(MI) != MI.getOpcode()) 1928 AccessToCheck = AK_All; 1929 return !areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AccessToCheck); 1930 } 1931 1932 /// Substitute an instruction comparing to zero with another instruction 1933 /// which produces needed condition flags. 1934 /// 1935 /// Return true on success. 1936 bool AArch64InstrInfo::substituteCmpToZero( 1937 MachineInstr &CmpInstr, unsigned SrcReg, 1938 const MachineRegisterInfo &MRI) const { 1939 // Get the unique definition of SrcReg. 1940 MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg); 1941 if (!MI) 1942 return false; 1943 1944 const TargetRegisterInfo &TRI = getRegisterInfo(); 1945 1946 unsigned NewOpc = sForm(*MI); 1947 if (NewOpc == AArch64::INSTRUCTION_LIST_END) 1948 return false; 1949 1950 if (!canInstrSubstituteCmpInstr(*MI, CmpInstr, TRI)) 1951 return false; 1952 1953 // Update the instruction to set NZCV. 1954 MI->setDesc(get(NewOpc)); 1955 CmpInstr.eraseFromParent(); 1956 bool succeeded = UpdateOperandRegClass(*MI); 1957 (void)succeeded; 1958 assert(succeeded && "Some operands reg class are incompatible!"); 1959 MI->addRegisterDefined(AArch64::NZCV, &TRI); 1960 return true; 1961 } 1962 1963 /// \returns True if \p CmpInstr can be removed. 1964 /// 1965 /// \p IsInvertCC is true if, after removing \p CmpInstr, condition 1966 /// codes used in \p CCUseInstrs must be inverted. 1967 static bool canCmpInstrBeRemoved(MachineInstr &MI, MachineInstr &CmpInstr, 1968 int CmpValue, const TargetRegisterInfo &TRI, 1969 SmallVectorImpl<MachineInstr *> &CCUseInstrs, 1970 bool &IsInvertCC) { 1971 assert((CmpValue == 0 || CmpValue == 1) && 1972 "Only comparisons to 0 or 1 considered for removal!"); 1973 1974 // MI is 'CSINCWr %vreg, wzr, wzr, <cc>' or 'CSINCXr %vreg, xzr, xzr, <cc>' 1975 unsigned MIOpc = MI.getOpcode(); 1976 if (MIOpc == AArch64::CSINCWr) { 1977 if (MI.getOperand(1).getReg() != AArch64::WZR || 1978 MI.getOperand(2).getReg() != AArch64::WZR) 1979 return false; 1980 } else if (MIOpc == AArch64::CSINCXr) { 1981 if (MI.getOperand(1).getReg() != AArch64::XZR || 1982 MI.getOperand(2).getReg() != AArch64::XZR) 1983 return false; 1984 } else { 1985 return false; 1986 } 1987 AArch64CC::CondCode MICC = findCondCodeUsedByInstr(MI); 1988 if (MICC == AArch64CC::Invalid) 1989 return false; 1990 1991 // NZCV needs to be defined 1992 if (MI.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) != -1) 1993 return false; 1994 1995 // CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0' or 'SUBS %vreg, 1' 1996 const unsigned CmpOpcode = CmpInstr.getOpcode(); 1997 bool IsSubsRegImm = isSUBSRegImm(CmpOpcode); 1998 if (CmpValue && !IsSubsRegImm) 1999 return false; 2000 if (!CmpValue && !IsSubsRegImm && !isADDSRegImm(CmpOpcode)) 2001 return false; 2002 2003 // MI conditions allowed: eq, ne, mi, pl 2004 UsedNZCV MIUsedNZCV = getUsedNZCV(MICC); 2005 if (MIUsedNZCV.C || MIUsedNZCV.V) 2006 return false; 2007 2008 std::optional<UsedNZCV> NZCVUsedAfterCmp = 2009 examineCFlagsUse(MI, CmpInstr, TRI, &CCUseInstrs); 2010 // Condition flags are not used in CmpInstr basic block successors and only 2011 // Z or N flags allowed to be used after CmpInstr within its basic block 2012 if (!NZCVUsedAfterCmp || NZCVUsedAfterCmp->C || NZCVUsedAfterCmp->V) 2013 return false; 2014 // Z or N flag used after CmpInstr must correspond to the flag used in MI 2015 if ((MIUsedNZCV.Z && NZCVUsedAfterCmp->N) || 2016 (MIUsedNZCV.N && NZCVUsedAfterCmp->Z)) 2017 return false; 2018 // If CmpInstr is comparison to zero MI conditions are limited to eq, ne 2019 if (MIUsedNZCV.N && !CmpValue) 2020 return false; 2021 2022 // There must be no defs of flags between MI and CmpInstr 2023 if (areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AK_Write)) 2024 return false; 2025 2026 // Condition code is inverted in the following cases: 2027 // 1. MI condition is ne; CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0' 2028 // 2. MI condition is eq, pl; CmpInstr is 'SUBS %vreg, 1' 2029 IsInvertCC = (CmpValue && (MICC == AArch64CC::EQ || MICC == AArch64CC::PL)) || 2030 (!CmpValue && MICC == AArch64CC::NE); 2031 return true; 2032 } 2033 2034 /// Remove comparison in csinc-cmp sequence 2035 /// 2036 /// Examples: 2037 /// 1. \code 2038 /// csinc w9, wzr, wzr, ne 2039 /// cmp w9, #0 2040 /// b.eq 2041 /// \endcode 2042 /// to 2043 /// \code 2044 /// csinc w9, wzr, wzr, ne 2045 /// b.ne 2046 /// \endcode 2047 /// 2048 /// 2. \code 2049 /// csinc x2, xzr, xzr, mi 2050 /// cmp x2, #1 2051 /// b.pl 2052 /// \endcode 2053 /// to 2054 /// \code 2055 /// csinc x2, xzr, xzr, mi 2056 /// b.pl 2057 /// \endcode 2058 /// 2059 /// \param CmpInstr comparison instruction 2060 /// \return True when comparison removed 2061 bool AArch64InstrInfo::removeCmpToZeroOrOne( 2062 MachineInstr &CmpInstr, unsigned SrcReg, int CmpValue, 2063 const MachineRegisterInfo &MRI) const { 2064 MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg); 2065 if (!MI) 2066 return false; 2067 const TargetRegisterInfo &TRI = getRegisterInfo(); 2068 SmallVector<MachineInstr *, 4> CCUseInstrs; 2069 bool IsInvertCC = false; 2070 if (!canCmpInstrBeRemoved(*MI, CmpInstr, CmpValue, TRI, CCUseInstrs, 2071 IsInvertCC)) 2072 return false; 2073 // Make transformation 2074 CmpInstr.eraseFromParent(); 2075 if (IsInvertCC) { 2076 // Invert condition codes in CmpInstr CC users 2077 for (MachineInstr *CCUseInstr : CCUseInstrs) { 2078 int Idx = findCondCodeUseOperandIdxForBranchOrSelect(*CCUseInstr); 2079 assert(Idx >= 0 && "Unexpected instruction using CC."); 2080 MachineOperand &CCOperand = CCUseInstr->getOperand(Idx); 2081 AArch64CC::CondCode CCUse = AArch64CC::getInvertedCondCode( 2082 static_cast<AArch64CC::CondCode>(CCOperand.getImm())); 2083 CCOperand.setImm(CCUse); 2084 } 2085 } 2086 return true; 2087 } 2088 2089 bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { 2090 if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD && 2091 MI.getOpcode() != AArch64::CATCHRET) 2092 return false; 2093 2094 MachineBasicBlock &MBB = *MI.getParent(); 2095 auto &Subtarget = MBB.getParent()->getSubtarget<AArch64Subtarget>(); 2096 auto TRI = Subtarget.getRegisterInfo(); 2097 DebugLoc DL = MI.getDebugLoc(); 2098 2099 if (MI.getOpcode() == AArch64::CATCHRET) { 2100 // Skip to the first instruction before the epilog. 2101 const TargetInstrInfo *TII = 2102 MBB.getParent()->getSubtarget().getInstrInfo(); 2103 MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB(); 2104 auto MBBI = MachineBasicBlock::iterator(MI); 2105 MachineBasicBlock::iterator FirstEpilogSEH = std::prev(MBBI); 2106 while (FirstEpilogSEH->getFlag(MachineInstr::FrameDestroy) && 2107 FirstEpilogSEH != MBB.begin()) 2108 FirstEpilogSEH = std::prev(FirstEpilogSEH); 2109 if (FirstEpilogSEH != MBB.begin()) 2110 FirstEpilogSEH = std::next(FirstEpilogSEH); 2111 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADRP)) 2112 .addReg(AArch64::X0, RegState::Define) 2113 .addMBB(TargetMBB); 2114 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADDXri)) 2115 .addReg(AArch64::X0, RegState::Define) 2116 .addReg(AArch64::X0) 2117 .addMBB(TargetMBB) 2118 .addImm(0); 2119 TargetMBB->setMachineBlockAddressTaken(); 2120 return true; 2121 } 2122 2123 Register Reg = MI.getOperand(0).getReg(); 2124 Module &M = *MBB.getParent()->getFunction().getParent(); 2125 if (M.getStackProtectorGuard() == "sysreg") { 2126 const AArch64SysReg::SysReg *SrcReg = 2127 AArch64SysReg::lookupSysRegByName(M.getStackProtectorGuardReg()); 2128 if (!SrcReg) 2129 report_fatal_error("Unknown SysReg for Stack Protector Guard Register"); 2130 2131 // mrs xN, sysreg 2132 BuildMI(MBB, MI, DL, get(AArch64::MRS)) 2133 .addDef(Reg, RegState::Renamable) 2134 .addImm(SrcReg->Encoding); 2135 int Offset = M.getStackProtectorGuardOffset(); 2136 if (Offset >= 0 && Offset <= 32760 && Offset % 8 == 0) { 2137 // ldr xN, [xN, #offset] 2138 BuildMI(MBB, MI, DL, get(AArch64::LDRXui)) 2139 .addDef(Reg) 2140 .addUse(Reg, RegState::Kill) 2141 .addImm(Offset / 8); 2142 } else if (Offset >= -256 && Offset <= 255) { 2143 // ldur xN, [xN, #offset] 2144 BuildMI(MBB, MI, DL, get(AArch64::LDURXi)) 2145 .addDef(Reg) 2146 .addUse(Reg, RegState::Kill) 2147 .addImm(Offset); 2148 } else if (Offset >= -4095 && Offset <= 4095) { 2149 if (Offset > 0) { 2150 // add xN, xN, #offset 2151 BuildMI(MBB, MI, DL, get(AArch64::ADDXri)) 2152 .addDef(Reg) 2153 .addUse(Reg, RegState::Kill) 2154 .addImm(Offset) 2155 .addImm(0); 2156 } else { 2157 // sub xN, xN, #offset 2158 BuildMI(MBB, MI, DL, get(AArch64::SUBXri)) 2159 .addDef(Reg) 2160 .addUse(Reg, RegState::Kill) 2161 .addImm(-Offset) 2162 .addImm(0); 2163 } 2164 // ldr xN, [xN] 2165 BuildMI(MBB, MI, DL, get(AArch64::LDRXui)) 2166 .addDef(Reg) 2167 .addUse(Reg, RegState::Kill) 2168 .addImm(0); 2169 } else { 2170 // Cases that are larger than +/- 4095 and not a multiple of 8, or larger 2171 // than 23760. 2172 // It might be nice to use AArch64::MOVi32imm here, which would get 2173 // expanded in PreSched2 after PostRA, but our lone scratch Reg already 2174 // contains the MRS result. findScratchNonCalleeSaveRegister() in 2175 // AArch64FrameLowering might help us find such a scratch register 2176 // though. If we failed to find a scratch register, we could emit a 2177 // stream of add instructions to build up the immediate. Or, we could try 2178 // to insert a AArch64::MOVi32imm before register allocation so that we 2179 // didn't need to scavenge for a scratch register. 2180 report_fatal_error("Unable to encode Stack Protector Guard Offset"); 2181 } 2182 MBB.erase(MI); 2183 return true; 2184 } 2185 2186 const GlobalValue *GV = 2187 cast<GlobalValue>((*MI.memoperands_begin())->getValue()); 2188 const TargetMachine &TM = MBB.getParent()->getTarget(); 2189 unsigned OpFlags = Subtarget.ClassifyGlobalReference(GV, TM); 2190 const unsigned char MO_NC = AArch64II::MO_NC; 2191 2192 if ((OpFlags & AArch64II::MO_GOT) != 0) { 2193 BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg) 2194 .addGlobalAddress(GV, 0, OpFlags); 2195 if (Subtarget.isTargetILP32()) { 2196 unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32); 2197 BuildMI(MBB, MI, DL, get(AArch64::LDRWui)) 2198 .addDef(Reg32, RegState::Dead) 2199 .addUse(Reg, RegState::Kill) 2200 .addImm(0) 2201 .addMemOperand(*MI.memoperands_begin()) 2202 .addDef(Reg, RegState::Implicit); 2203 } else { 2204 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg) 2205 .addReg(Reg, RegState::Kill) 2206 .addImm(0) 2207 .addMemOperand(*MI.memoperands_begin()); 2208 } 2209 } else if (TM.getCodeModel() == CodeModel::Large) { 2210 assert(!Subtarget.isTargetILP32() && "how can large exist in ILP32?"); 2211 BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg) 2212 .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC) 2213 .addImm(0); 2214 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg) 2215 .addReg(Reg, RegState::Kill) 2216 .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC) 2217 .addImm(16); 2218 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg) 2219 .addReg(Reg, RegState::Kill) 2220 .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC) 2221 .addImm(32); 2222 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg) 2223 .addReg(Reg, RegState::Kill) 2224 .addGlobalAddress(GV, 0, AArch64II::MO_G3) 2225 .addImm(48); 2226 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg) 2227 .addReg(Reg, RegState::Kill) 2228 .addImm(0) 2229 .addMemOperand(*MI.memoperands_begin()); 2230 } else if (TM.getCodeModel() == CodeModel::Tiny) { 2231 BuildMI(MBB, MI, DL, get(AArch64::ADR), Reg) 2232 .addGlobalAddress(GV, 0, OpFlags); 2233 } else { 2234 BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg) 2235 .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE); 2236 unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC; 2237 if (Subtarget.isTargetILP32()) { 2238 unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32); 2239 BuildMI(MBB, MI, DL, get(AArch64::LDRWui)) 2240 .addDef(Reg32, RegState::Dead) 2241 .addUse(Reg, RegState::Kill) 2242 .addGlobalAddress(GV, 0, LoFlags) 2243 .addMemOperand(*MI.memoperands_begin()) 2244 .addDef(Reg, RegState::Implicit); 2245 } else { 2246 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg) 2247 .addReg(Reg, RegState::Kill) 2248 .addGlobalAddress(GV, 0, LoFlags) 2249 .addMemOperand(*MI.memoperands_begin()); 2250 } 2251 } 2252 2253 MBB.erase(MI); 2254 2255 return true; 2256 } 2257 2258 // Return true if this instruction simply sets its single destination register 2259 // to zero. This is equivalent to a register rename of the zero-register. 2260 bool AArch64InstrInfo::isGPRZero(const MachineInstr &MI) { 2261 switch (MI.getOpcode()) { 2262 default: 2263 break; 2264 case AArch64::MOVZWi: 2265 case AArch64::MOVZXi: // movz Rd, #0 (LSL #0) 2266 if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) { 2267 assert(MI.getDesc().getNumOperands() == 3 && 2268 MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands"); 2269 return true; 2270 } 2271 break; 2272 case AArch64::ANDWri: // and Rd, Rzr, #imm 2273 return MI.getOperand(1).getReg() == AArch64::WZR; 2274 case AArch64::ANDXri: 2275 return MI.getOperand(1).getReg() == AArch64::XZR; 2276 case TargetOpcode::COPY: 2277 return MI.getOperand(1).getReg() == AArch64::WZR; 2278 } 2279 return false; 2280 } 2281 2282 // Return true if this instruction simply renames a general register without 2283 // modifying bits. 2284 bool AArch64InstrInfo::isGPRCopy(const MachineInstr &MI) { 2285 switch (MI.getOpcode()) { 2286 default: 2287 break; 2288 case TargetOpcode::COPY: { 2289 // GPR32 copies will by lowered to ORRXrs 2290 Register DstReg = MI.getOperand(0).getReg(); 2291 return (AArch64::GPR32RegClass.contains(DstReg) || 2292 AArch64::GPR64RegClass.contains(DstReg)); 2293 } 2294 case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0) 2295 if (MI.getOperand(1).getReg() == AArch64::XZR) { 2296 assert(MI.getDesc().getNumOperands() == 4 && 2297 MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands"); 2298 return true; 2299 } 2300 break; 2301 case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0) 2302 if (MI.getOperand(2).getImm() == 0) { 2303 assert(MI.getDesc().getNumOperands() == 4 && 2304 MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands"); 2305 return true; 2306 } 2307 break; 2308 } 2309 return false; 2310 } 2311 2312 // Return true if this instruction simply renames a general register without 2313 // modifying bits. 2314 bool AArch64InstrInfo::isFPRCopy(const MachineInstr &MI) { 2315 switch (MI.getOpcode()) { 2316 default: 2317 break; 2318 case TargetOpcode::COPY: { 2319 Register DstReg = MI.getOperand(0).getReg(); 2320 return AArch64::FPR128RegClass.contains(DstReg); 2321 } 2322 case AArch64::ORRv16i8: 2323 if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) { 2324 assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() && 2325 "invalid ORRv16i8 operands"); 2326 return true; 2327 } 2328 break; 2329 } 2330 return false; 2331 } 2332 2333 Register AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr &MI, 2334 int &FrameIndex) const { 2335 switch (MI.getOpcode()) { 2336 default: 2337 break; 2338 case AArch64::LDRWui: 2339 case AArch64::LDRXui: 2340 case AArch64::LDRBui: 2341 case AArch64::LDRHui: 2342 case AArch64::LDRSui: 2343 case AArch64::LDRDui: 2344 case AArch64::LDRQui: 2345 case AArch64::LDR_PXI: 2346 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() && 2347 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) { 2348 FrameIndex = MI.getOperand(1).getIndex(); 2349 return MI.getOperand(0).getReg(); 2350 } 2351 break; 2352 } 2353 2354 return 0; 2355 } 2356 2357 Register AArch64InstrInfo::isStoreToStackSlot(const MachineInstr &MI, 2358 int &FrameIndex) const { 2359 switch (MI.getOpcode()) { 2360 default: 2361 break; 2362 case AArch64::STRWui: 2363 case AArch64::STRXui: 2364 case AArch64::STRBui: 2365 case AArch64::STRHui: 2366 case AArch64::STRSui: 2367 case AArch64::STRDui: 2368 case AArch64::STRQui: 2369 case AArch64::STR_PXI: 2370 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() && 2371 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) { 2372 FrameIndex = MI.getOperand(1).getIndex(); 2373 return MI.getOperand(0).getReg(); 2374 } 2375 break; 2376 } 2377 return 0; 2378 } 2379 2380 /// Check all MachineMemOperands for a hint to suppress pairing. 2381 bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr &MI) { 2382 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) { 2383 return MMO->getFlags() & MOSuppressPair; 2384 }); 2385 } 2386 2387 /// Set a flag on the first MachineMemOperand to suppress pairing. 2388 void AArch64InstrInfo::suppressLdStPair(MachineInstr &MI) { 2389 if (MI.memoperands_empty()) 2390 return; 2391 (*MI.memoperands_begin())->setFlags(MOSuppressPair); 2392 } 2393 2394 /// Check all MachineMemOperands for a hint that the load/store is strided. 2395 bool AArch64InstrInfo::isStridedAccess(const MachineInstr &MI) { 2396 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) { 2397 return MMO->getFlags() & MOStridedAccess; 2398 }); 2399 } 2400 2401 bool AArch64InstrInfo::hasUnscaledLdStOffset(unsigned Opc) { 2402 switch (Opc) { 2403 default: 2404 return false; 2405 case AArch64::STURSi: 2406 case AArch64::STRSpre: 2407 case AArch64::STURDi: 2408 case AArch64::STRDpre: 2409 case AArch64::STURQi: 2410 case AArch64::STRQpre: 2411 case AArch64::STURBBi: 2412 case AArch64::STURHHi: 2413 case AArch64::STURWi: 2414 case AArch64::STRWpre: 2415 case AArch64::STURXi: 2416 case AArch64::STRXpre: 2417 case AArch64::LDURSi: 2418 case AArch64::LDRSpre: 2419 case AArch64::LDURDi: 2420 case AArch64::LDRDpre: 2421 case AArch64::LDURQi: 2422 case AArch64::LDRQpre: 2423 case AArch64::LDURWi: 2424 case AArch64::LDRWpre: 2425 case AArch64::LDURXi: 2426 case AArch64::LDRXpre: 2427 case AArch64::LDRSWpre: 2428 case AArch64::LDURSWi: 2429 case AArch64::LDURHHi: 2430 case AArch64::LDURBBi: 2431 case AArch64::LDURSBWi: 2432 case AArch64::LDURSHWi: 2433 return true; 2434 } 2435 } 2436 2437 std::optional<unsigned> AArch64InstrInfo::getUnscaledLdSt(unsigned Opc) { 2438 switch (Opc) { 2439 default: return {}; 2440 case AArch64::PRFMui: return AArch64::PRFUMi; 2441 case AArch64::LDRXui: return AArch64::LDURXi; 2442 case AArch64::LDRWui: return AArch64::LDURWi; 2443 case AArch64::LDRBui: return AArch64::LDURBi; 2444 case AArch64::LDRHui: return AArch64::LDURHi; 2445 case AArch64::LDRSui: return AArch64::LDURSi; 2446 case AArch64::LDRDui: return AArch64::LDURDi; 2447 case AArch64::LDRQui: return AArch64::LDURQi; 2448 case AArch64::LDRBBui: return AArch64::LDURBBi; 2449 case AArch64::LDRHHui: return AArch64::LDURHHi; 2450 case AArch64::LDRSBXui: return AArch64::LDURSBXi; 2451 case AArch64::LDRSBWui: return AArch64::LDURSBWi; 2452 case AArch64::LDRSHXui: return AArch64::LDURSHXi; 2453 case AArch64::LDRSHWui: return AArch64::LDURSHWi; 2454 case AArch64::LDRSWui: return AArch64::LDURSWi; 2455 case AArch64::STRXui: return AArch64::STURXi; 2456 case AArch64::STRWui: return AArch64::STURWi; 2457 case AArch64::STRBui: return AArch64::STURBi; 2458 case AArch64::STRHui: return AArch64::STURHi; 2459 case AArch64::STRSui: return AArch64::STURSi; 2460 case AArch64::STRDui: return AArch64::STURDi; 2461 case AArch64::STRQui: return AArch64::STURQi; 2462 case AArch64::STRBBui: return AArch64::STURBBi; 2463 case AArch64::STRHHui: return AArch64::STURHHi; 2464 } 2465 } 2466 2467 unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) { 2468 switch (Opc) { 2469 default: 2470 llvm_unreachable("Unhandled Opcode in getLoadStoreImmIdx"); 2471 case AArch64::ADDG: 2472 case AArch64::LDAPURBi: 2473 case AArch64::LDAPURHi: 2474 case AArch64::LDAPURi: 2475 case AArch64::LDAPURSBWi: 2476 case AArch64::LDAPURSBXi: 2477 case AArch64::LDAPURSHWi: 2478 case AArch64::LDAPURSHXi: 2479 case AArch64::LDAPURSWi: 2480 case AArch64::LDAPURXi: 2481 case AArch64::LDR_PPXI: 2482 case AArch64::LDR_PXI: 2483 case AArch64::LDR_ZXI: 2484 case AArch64::LDR_ZZXI: 2485 case AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS: 2486 case AArch64::LDR_ZZZXI: 2487 case AArch64::LDR_ZZZZXI: 2488 case AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS: 2489 case AArch64::LDRBBui: 2490 case AArch64::LDRBui: 2491 case AArch64::LDRDui: 2492 case AArch64::LDRHHui: 2493 case AArch64::LDRHui: 2494 case AArch64::LDRQui: 2495 case AArch64::LDRSBWui: 2496 case AArch64::LDRSBXui: 2497 case AArch64::LDRSHWui: 2498 case AArch64::LDRSHXui: 2499 case AArch64::LDRSui: 2500 case AArch64::LDRSWui: 2501 case AArch64::LDRWui: 2502 case AArch64::LDRXui: 2503 case AArch64::LDURBBi: 2504 case AArch64::LDURBi: 2505 case AArch64::LDURDi: 2506 case AArch64::LDURHHi: 2507 case AArch64::LDURHi: 2508 case AArch64::LDURQi: 2509 case AArch64::LDURSBWi: 2510 case AArch64::LDURSBXi: 2511 case AArch64::LDURSHWi: 2512 case AArch64::LDURSHXi: 2513 case AArch64::LDURSi: 2514 case AArch64::LDURSWi: 2515 case AArch64::LDURWi: 2516 case AArch64::LDURXi: 2517 case AArch64::PRFMui: 2518 case AArch64::PRFUMi: 2519 case AArch64::ST2Gi: 2520 case AArch64::STGi: 2521 case AArch64::STLURBi: 2522 case AArch64::STLURHi: 2523 case AArch64::STLURWi: 2524 case AArch64::STLURXi: 2525 case AArch64::StoreSwiftAsyncContext: 2526 case AArch64::STR_PPXI: 2527 case AArch64::STR_PXI: 2528 case AArch64::STR_ZXI: 2529 case AArch64::STR_ZZXI: 2530 case AArch64::STR_ZZXI_STRIDED_CONTIGUOUS: 2531 case AArch64::STR_ZZZXI: 2532 case AArch64::STR_ZZZZXI: 2533 case AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS: 2534 case AArch64::STRBBui: 2535 case AArch64::STRBui: 2536 case AArch64::STRDui: 2537 case AArch64::STRHHui: 2538 case AArch64::STRHui: 2539 case AArch64::STRQui: 2540 case AArch64::STRSui: 2541 case AArch64::STRWui: 2542 case AArch64::STRXui: 2543 case AArch64::STURBBi: 2544 case AArch64::STURBi: 2545 case AArch64::STURDi: 2546 case AArch64::STURHHi: 2547 case AArch64::STURHi: 2548 case AArch64::STURQi: 2549 case AArch64::STURSi: 2550 case AArch64::STURWi: 2551 case AArch64::STURXi: 2552 case AArch64::STZ2Gi: 2553 case AArch64::STZGi: 2554 case AArch64::TAGPstack: 2555 case AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO: 2556 case AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO: 2557 return 2; 2558 case AArch64::LD1B_D_IMM: 2559 case AArch64::LD1B_H_IMM: 2560 case AArch64::LD1B_IMM: 2561 case AArch64::LD1B_S_IMM: 2562 case AArch64::LD1D_IMM: 2563 case AArch64::LD1H_D_IMM: 2564 case AArch64::LD1H_IMM: 2565 case AArch64::LD1H_S_IMM: 2566 case AArch64::LD1RB_D_IMM: 2567 case AArch64::LD1RB_H_IMM: 2568 case AArch64::LD1RB_IMM: 2569 case AArch64::LD1RB_S_IMM: 2570 case AArch64::LD1RD_IMM: 2571 case AArch64::LD1RH_D_IMM: 2572 case AArch64::LD1RH_IMM: 2573 case AArch64::LD1RH_S_IMM: 2574 case AArch64::LD1RSB_D_IMM: 2575 case AArch64::LD1RSB_H_IMM: 2576 case AArch64::LD1RSB_S_IMM: 2577 case AArch64::LD1RSH_D_IMM: 2578 case AArch64::LD1RSH_S_IMM: 2579 case AArch64::LD1RSW_IMM: 2580 case AArch64::LD1RW_D_IMM: 2581 case AArch64::LD1RW_IMM: 2582 case AArch64::LD1SB_D_IMM: 2583 case AArch64::LD1SB_H_IMM: 2584 case AArch64::LD1SB_S_IMM: 2585 case AArch64::LD1SH_D_IMM: 2586 case AArch64::LD1SH_S_IMM: 2587 case AArch64::LD1SW_D_IMM: 2588 case AArch64::LD1W_D_IMM: 2589 case AArch64::LD1W_IMM: 2590 case AArch64::LD2B_IMM: 2591 case AArch64::LD2D_IMM: 2592 case AArch64::LD2H_IMM: 2593 case AArch64::LD2W_IMM: 2594 case AArch64::LD3B_IMM: 2595 case AArch64::LD3D_IMM: 2596 case AArch64::LD3H_IMM: 2597 case AArch64::LD3W_IMM: 2598 case AArch64::LD4B_IMM: 2599 case AArch64::LD4D_IMM: 2600 case AArch64::LD4H_IMM: 2601 case AArch64::LD4W_IMM: 2602 case AArch64::LDG: 2603 case AArch64::LDNF1B_D_IMM: 2604 case AArch64::LDNF1B_H_IMM: 2605 case AArch64::LDNF1B_IMM: 2606 case AArch64::LDNF1B_S_IMM: 2607 case AArch64::LDNF1D_IMM: 2608 case AArch64::LDNF1H_D_IMM: 2609 case AArch64::LDNF1H_IMM: 2610 case AArch64::LDNF1H_S_IMM: 2611 case AArch64::LDNF1SB_D_IMM: 2612 case AArch64::LDNF1SB_H_IMM: 2613 case AArch64::LDNF1SB_S_IMM: 2614 case AArch64::LDNF1SH_D_IMM: 2615 case AArch64::LDNF1SH_S_IMM: 2616 case AArch64::LDNF1SW_D_IMM: 2617 case AArch64::LDNF1W_D_IMM: 2618 case AArch64::LDNF1W_IMM: 2619 case AArch64::LDNPDi: 2620 case AArch64::LDNPQi: 2621 case AArch64::LDNPSi: 2622 case AArch64::LDNPWi: 2623 case AArch64::LDNPXi: 2624 case AArch64::LDNT1B_ZRI: 2625 case AArch64::LDNT1D_ZRI: 2626 case AArch64::LDNT1H_ZRI: 2627 case AArch64::LDNT1W_ZRI: 2628 case AArch64::LDPDi: 2629 case AArch64::LDPQi: 2630 case AArch64::LDPSi: 2631 case AArch64::LDPWi: 2632 case AArch64::LDPXi: 2633 case AArch64::LDRBBpost: 2634 case AArch64::LDRBBpre: 2635 case AArch64::LDRBpost: 2636 case AArch64::LDRBpre: 2637 case AArch64::LDRDpost: 2638 case AArch64::LDRDpre: 2639 case AArch64::LDRHHpost: 2640 case AArch64::LDRHHpre: 2641 case AArch64::LDRHpost: 2642 case AArch64::LDRHpre: 2643 case AArch64::LDRQpost: 2644 case AArch64::LDRQpre: 2645 case AArch64::LDRSpost: 2646 case AArch64::LDRSpre: 2647 case AArch64::LDRWpost: 2648 case AArch64::LDRWpre: 2649 case AArch64::LDRXpost: 2650 case AArch64::LDRXpre: 2651 case AArch64::ST1B_D_IMM: 2652 case AArch64::ST1B_H_IMM: 2653 case AArch64::ST1B_IMM: 2654 case AArch64::ST1B_S_IMM: 2655 case AArch64::ST1D_IMM: 2656 case AArch64::ST1H_D_IMM: 2657 case AArch64::ST1H_IMM: 2658 case AArch64::ST1H_S_IMM: 2659 case AArch64::ST1W_D_IMM: 2660 case AArch64::ST1W_IMM: 2661 case AArch64::ST2B_IMM: 2662 case AArch64::ST2D_IMM: 2663 case AArch64::ST2H_IMM: 2664 case AArch64::ST2W_IMM: 2665 case AArch64::ST3B_IMM: 2666 case AArch64::ST3D_IMM: 2667 case AArch64::ST3H_IMM: 2668 case AArch64::ST3W_IMM: 2669 case AArch64::ST4B_IMM: 2670 case AArch64::ST4D_IMM: 2671 case AArch64::ST4H_IMM: 2672 case AArch64::ST4W_IMM: 2673 case AArch64::STGPi: 2674 case AArch64::STGPreIndex: 2675 case AArch64::STZGPreIndex: 2676 case AArch64::ST2GPreIndex: 2677 case AArch64::STZ2GPreIndex: 2678 case AArch64::STGPostIndex: 2679 case AArch64::STZGPostIndex: 2680 case AArch64::ST2GPostIndex: 2681 case AArch64::STZ2GPostIndex: 2682 case AArch64::STNPDi: 2683 case AArch64::STNPQi: 2684 case AArch64::STNPSi: 2685 case AArch64::STNPWi: 2686 case AArch64::STNPXi: 2687 case AArch64::STNT1B_ZRI: 2688 case AArch64::STNT1D_ZRI: 2689 case AArch64::STNT1H_ZRI: 2690 case AArch64::STNT1W_ZRI: 2691 case AArch64::STPDi: 2692 case AArch64::STPQi: 2693 case AArch64::STPSi: 2694 case AArch64::STPWi: 2695 case AArch64::STPXi: 2696 case AArch64::STRBBpost: 2697 case AArch64::STRBBpre: 2698 case AArch64::STRBpost: 2699 case AArch64::STRBpre: 2700 case AArch64::STRDpost: 2701 case AArch64::STRDpre: 2702 case AArch64::STRHHpost: 2703 case AArch64::STRHHpre: 2704 case AArch64::STRHpost: 2705 case AArch64::STRHpre: 2706 case AArch64::STRQpost: 2707 case AArch64::STRQpre: 2708 case AArch64::STRSpost: 2709 case AArch64::STRSpre: 2710 case AArch64::STRWpost: 2711 case AArch64::STRWpre: 2712 case AArch64::STRXpost: 2713 case AArch64::STRXpre: 2714 return 3; 2715 case AArch64::LDPDpost: 2716 case AArch64::LDPDpre: 2717 case AArch64::LDPQpost: 2718 case AArch64::LDPQpre: 2719 case AArch64::LDPSpost: 2720 case AArch64::LDPSpre: 2721 case AArch64::LDPWpost: 2722 case AArch64::LDPWpre: 2723 case AArch64::LDPXpost: 2724 case AArch64::LDPXpre: 2725 case AArch64::STGPpre: 2726 case AArch64::STGPpost: 2727 case AArch64::STPDpost: 2728 case AArch64::STPDpre: 2729 case AArch64::STPQpost: 2730 case AArch64::STPQpre: 2731 case AArch64::STPSpost: 2732 case AArch64::STPSpre: 2733 case AArch64::STPWpost: 2734 case AArch64::STPWpre: 2735 case AArch64::STPXpost: 2736 case AArch64::STPXpre: 2737 return 4; 2738 } 2739 } 2740 2741 bool AArch64InstrInfo::isPairableLdStInst(const MachineInstr &MI) { 2742 switch (MI.getOpcode()) { 2743 default: 2744 return false; 2745 // Scaled instructions. 2746 case AArch64::STRSui: 2747 case AArch64::STRDui: 2748 case AArch64::STRQui: 2749 case AArch64::STRXui: 2750 case AArch64::STRWui: 2751 case AArch64::LDRSui: 2752 case AArch64::LDRDui: 2753 case AArch64::LDRQui: 2754 case AArch64::LDRXui: 2755 case AArch64::LDRWui: 2756 case AArch64::LDRSWui: 2757 // Unscaled instructions. 2758 case AArch64::STURSi: 2759 case AArch64::STRSpre: 2760 case AArch64::STURDi: 2761 case AArch64::STRDpre: 2762 case AArch64::STURQi: 2763 case AArch64::STRQpre: 2764 case AArch64::STURWi: 2765 case AArch64::STRWpre: 2766 case AArch64::STURXi: 2767 case AArch64::STRXpre: 2768 case AArch64::LDURSi: 2769 case AArch64::LDRSpre: 2770 case AArch64::LDURDi: 2771 case AArch64::LDRDpre: 2772 case AArch64::LDURQi: 2773 case AArch64::LDRQpre: 2774 case AArch64::LDURWi: 2775 case AArch64::LDRWpre: 2776 case AArch64::LDURXi: 2777 case AArch64::LDRXpre: 2778 case AArch64::LDURSWi: 2779 case AArch64::LDRSWpre: 2780 // SVE instructions. 2781 case AArch64::LDR_ZXI: 2782 case AArch64::STR_ZXI: 2783 return true; 2784 } 2785 } 2786 2787 bool AArch64InstrInfo::isTailCallReturnInst(const MachineInstr &MI) { 2788 switch (MI.getOpcode()) { 2789 default: 2790 assert((!MI.isCall() || !MI.isReturn()) && 2791 "Unexpected instruction - was a new tail call opcode introduced?"); 2792 return false; 2793 case AArch64::TCRETURNdi: 2794 case AArch64::TCRETURNri: 2795 case AArch64::TCRETURNrix16x17: 2796 case AArch64::TCRETURNrix17: 2797 case AArch64::TCRETURNrinotx16: 2798 case AArch64::TCRETURNriALL: 2799 case AArch64::AUTH_TCRETURN: 2800 case AArch64::AUTH_TCRETURN_BTI: 2801 return true; 2802 } 2803 } 2804 2805 unsigned AArch64InstrInfo::convertToFlagSettingOpc(unsigned Opc) { 2806 switch (Opc) { 2807 default: 2808 llvm_unreachable("Opcode has no flag setting equivalent!"); 2809 // 32-bit cases: 2810 case AArch64::ADDWri: 2811 return AArch64::ADDSWri; 2812 case AArch64::ADDWrr: 2813 return AArch64::ADDSWrr; 2814 case AArch64::ADDWrs: 2815 return AArch64::ADDSWrs; 2816 case AArch64::ADDWrx: 2817 return AArch64::ADDSWrx; 2818 case AArch64::ANDWri: 2819 return AArch64::ANDSWri; 2820 case AArch64::ANDWrr: 2821 return AArch64::ANDSWrr; 2822 case AArch64::ANDWrs: 2823 return AArch64::ANDSWrs; 2824 case AArch64::BICWrr: 2825 return AArch64::BICSWrr; 2826 case AArch64::BICWrs: 2827 return AArch64::BICSWrs; 2828 case AArch64::SUBWri: 2829 return AArch64::SUBSWri; 2830 case AArch64::SUBWrr: 2831 return AArch64::SUBSWrr; 2832 case AArch64::SUBWrs: 2833 return AArch64::SUBSWrs; 2834 case AArch64::SUBWrx: 2835 return AArch64::SUBSWrx; 2836 // 64-bit cases: 2837 case AArch64::ADDXri: 2838 return AArch64::ADDSXri; 2839 case AArch64::ADDXrr: 2840 return AArch64::ADDSXrr; 2841 case AArch64::ADDXrs: 2842 return AArch64::ADDSXrs; 2843 case AArch64::ADDXrx: 2844 return AArch64::ADDSXrx; 2845 case AArch64::ANDXri: 2846 return AArch64::ANDSXri; 2847 case AArch64::ANDXrr: 2848 return AArch64::ANDSXrr; 2849 case AArch64::ANDXrs: 2850 return AArch64::ANDSXrs; 2851 case AArch64::BICXrr: 2852 return AArch64::BICSXrr; 2853 case AArch64::BICXrs: 2854 return AArch64::BICSXrs; 2855 case AArch64::SUBXri: 2856 return AArch64::SUBSXri; 2857 case AArch64::SUBXrr: 2858 return AArch64::SUBSXrr; 2859 case AArch64::SUBXrs: 2860 return AArch64::SUBSXrs; 2861 case AArch64::SUBXrx: 2862 return AArch64::SUBSXrx; 2863 // SVE instructions: 2864 case AArch64::AND_PPzPP: 2865 return AArch64::ANDS_PPzPP; 2866 case AArch64::BIC_PPzPP: 2867 return AArch64::BICS_PPzPP; 2868 case AArch64::EOR_PPzPP: 2869 return AArch64::EORS_PPzPP; 2870 case AArch64::NAND_PPzPP: 2871 return AArch64::NANDS_PPzPP; 2872 case AArch64::NOR_PPzPP: 2873 return AArch64::NORS_PPzPP; 2874 case AArch64::ORN_PPzPP: 2875 return AArch64::ORNS_PPzPP; 2876 case AArch64::ORR_PPzPP: 2877 return AArch64::ORRS_PPzPP; 2878 case AArch64::BRKA_PPzP: 2879 return AArch64::BRKAS_PPzP; 2880 case AArch64::BRKPA_PPzPP: 2881 return AArch64::BRKPAS_PPzPP; 2882 case AArch64::BRKB_PPzP: 2883 return AArch64::BRKBS_PPzP; 2884 case AArch64::BRKPB_PPzPP: 2885 return AArch64::BRKPBS_PPzPP; 2886 case AArch64::BRKN_PPzP: 2887 return AArch64::BRKNS_PPzP; 2888 case AArch64::RDFFR_PPz: 2889 return AArch64::RDFFRS_PPz; 2890 case AArch64::PTRUE_B: 2891 return AArch64::PTRUES_B; 2892 } 2893 } 2894 2895 // Is this a candidate for ld/st merging or pairing? For example, we don't 2896 // touch volatiles or load/stores that have a hint to avoid pair formation. 2897 bool AArch64InstrInfo::isCandidateToMergeOrPair(const MachineInstr &MI) const { 2898 2899 bool IsPreLdSt = isPreLdSt(MI); 2900 2901 // If this is a volatile load/store, don't mess with it. 2902 if (MI.hasOrderedMemoryRef()) 2903 return false; 2904 2905 // Make sure this is a reg/fi+imm (as opposed to an address reloc). 2906 // For Pre-inc LD/ST, the operand is shifted by one. 2907 assert((MI.getOperand(IsPreLdSt ? 2 : 1).isReg() || 2908 MI.getOperand(IsPreLdSt ? 2 : 1).isFI()) && 2909 "Expected a reg or frame index operand."); 2910 2911 // For Pre-indexed addressing quadword instructions, the third operand is the 2912 // immediate value. 2913 bool IsImmPreLdSt = IsPreLdSt && MI.getOperand(3).isImm(); 2914 2915 if (!MI.getOperand(2).isImm() && !IsImmPreLdSt) 2916 return false; 2917 2918 // Can't merge/pair if the instruction modifies the base register. 2919 // e.g., ldr x0, [x0] 2920 // This case will never occur with an FI base. 2921 // However, if the instruction is an LDR<S,D,Q,W,X,SW>pre or 2922 // STR<S,D,Q,W,X>pre, it can be merged. 2923 // For example: 2924 // ldr q0, [x11, #32]! 2925 // ldr q1, [x11, #16] 2926 // to 2927 // ldp q0, q1, [x11, #32]! 2928 if (MI.getOperand(1).isReg() && !IsPreLdSt) { 2929 Register BaseReg = MI.getOperand(1).getReg(); 2930 const TargetRegisterInfo *TRI = &getRegisterInfo(); 2931 if (MI.modifiesRegister(BaseReg, TRI)) 2932 return false; 2933 } 2934 2935 // Pairing SVE fills/spills is only valid for little-endian targets that 2936 // implement VLS 128. 2937 switch (MI.getOpcode()) { 2938 default: 2939 break; 2940 case AArch64::LDR_ZXI: 2941 case AArch64::STR_ZXI: 2942 if (!Subtarget.isLittleEndian() || 2943 Subtarget.getSVEVectorSizeInBits() != 128) 2944 return false; 2945 } 2946 2947 // Check if this load/store has a hint to avoid pair formation. 2948 // MachineMemOperands hints are set by the AArch64StorePairSuppress pass. 2949 if (isLdStPairSuppressed(MI)) 2950 return false; 2951 2952 // Do not pair any callee-save store/reload instructions in the 2953 // prologue/epilogue if the CFI information encoded the operations as separate 2954 // instructions, as that will cause the size of the actual prologue to mismatch 2955 // with the prologue size recorded in the Windows CFI. 2956 const MCAsmInfo *MAI = MI.getMF()->getTarget().getMCAsmInfo(); 2957 bool NeedsWinCFI = MAI->usesWindowsCFI() && 2958 MI.getMF()->getFunction().needsUnwindTableEntry(); 2959 if (NeedsWinCFI && (MI.getFlag(MachineInstr::FrameSetup) || 2960 MI.getFlag(MachineInstr::FrameDestroy))) 2961 return false; 2962 2963 // On some CPUs quad load/store pairs are slower than two single load/stores. 2964 if (Subtarget.isPaired128Slow()) { 2965 switch (MI.getOpcode()) { 2966 default: 2967 break; 2968 case AArch64::LDURQi: 2969 case AArch64::STURQi: 2970 case AArch64::LDRQui: 2971 case AArch64::STRQui: 2972 return false; 2973 } 2974 } 2975 2976 return true; 2977 } 2978 2979 bool AArch64InstrInfo::getMemOperandsWithOffsetWidth( 2980 const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps, 2981 int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, 2982 const TargetRegisterInfo *TRI) const { 2983 if (!LdSt.mayLoadOrStore()) 2984 return false; 2985 2986 const MachineOperand *BaseOp; 2987 TypeSize WidthN(0, false); 2988 if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, OffsetIsScalable, 2989 WidthN, TRI)) 2990 return false; 2991 // The maximum vscale is 16 under AArch64, return the maximal extent for the 2992 // vector. 2993 Width = LocationSize::precise(WidthN); 2994 BaseOps.push_back(BaseOp); 2995 return true; 2996 } 2997 2998 std::optional<ExtAddrMode> 2999 AArch64InstrInfo::getAddrModeFromMemoryOp(const MachineInstr &MemI, 3000 const TargetRegisterInfo *TRI) const { 3001 const MachineOperand *Base; // Filled with the base operand of MI. 3002 int64_t Offset; // Filled with the offset of MI. 3003 bool OffsetIsScalable; 3004 if (!getMemOperandWithOffset(MemI, Base, Offset, OffsetIsScalable, TRI)) 3005 return std::nullopt; 3006 3007 if (!Base->isReg()) 3008 return std::nullopt; 3009 ExtAddrMode AM; 3010 AM.BaseReg = Base->getReg(); 3011 AM.Displacement = Offset; 3012 AM.ScaledReg = 0; 3013 AM.Scale = 0; 3014 return AM; 3015 } 3016 3017 bool AArch64InstrInfo::canFoldIntoAddrMode(const MachineInstr &MemI, 3018 Register Reg, 3019 const MachineInstr &AddrI, 3020 ExtAddrMode &AM) const { 3021 // Filter out instructions into which we cannot fold. 3022 unsigned NumBytes; 3023 int64_t OffsetScale = 1; 3024 switch (MemI.getOpcode()) { 3025 default: 3026 return false; 3027 3028 case AArch64::LDURQi: 3029 case AArch64::STURQi: 3030 NumBytes = 16; 3031 break; 3032 3033 case AArch64::LDURDi: 3034 case AArch64::STURDi: 3035 case AArch64::LDURXi: 3036 case AArch64::STURXi: 3037 NumBytes = 8; 3038 break; 3039 3040 case AArch64::LDURWi: 3041 case AArch64::LDURSWi: 3042 case AArch64::STURWi: 3043 NumBytes = 4; 3044 break; 3045 3046 case AArch64::LDURHi: 3047 case AArch64::STURHi: 3048 case AArch64::LDURHHi: 3049 case AArch64::STURHHi: 3050 case AArch64::LDURSHXi: 3051 case AArch64::LDURSHWi: 3052 NumBytes = 2; 3053 break; 3054 3055 case AArch64::LDRBroX: 3056 case AArch64::LDRBBroX: 3057 case AArch64::LDRSBXroX: 3058 case AArch64::LDRSBWroX: 3059 case AArch64::STRBroX: 3060 case AArch64::STRBBroX: 3061 case AArch64::LDURBi: 3062 case AArch64::LDURBBi: 3063 case AArch64::LDURSBXi: 3064 case AArch64::LDURSBWi: 3065 case AArch64::STURBi: 3066 case AArch64::STURBBi: 3067 case AArch64::LDRBui: 3068 case AArch64::LDRBBui: 3069 case AArch64::LDRSBXui: 3070 case AArch64::LDRSBWui: 3071 case AArch64::STRBui: 3072 case AArch64::STRBBui: 3073 NumBytes = 1; 3074 break; 3075 3076 case AArch64::LDRQroX: 3077 case AArch64::STRQroX: 3078 case AArch64::LDRQui: 3079 case AArch64::STRQui: 3080 NumBytes = 16; 3081 OffsetScale = 16; 3082 break; 3083 3084 case AArch64::LDRDroX: 3085 case AArch64::STRDroX: 3086 case AArch64::LDRXroX: 3087 case AArch64::STRXroX: 3088 case AArch64::LDRDui: 3089 case AArch64::STRDui: 3090 case AArch64::LDRXui: 3091 case AArch64::STRXui: 3092 NumBytes = 8; 3093 OffsetScale = 8; 3094 break; 3095 3096 case AArch64::LDRWroX: 3097 case AArch64::LDRSWroX: 3098 case AArch64::STRWroX: 3099 case AArch64::LDRWui: 3100 case AArch64::LDRSWui: 3101 case AArch64::STRWui: 3102 NumBytes = 4; 3103 OffsetScale = 4; 3104 break; 3105 3106 case AArch64::LDRHroX: 3107 case AArch64::STRHroX: 3108 case AArch64::LDRHHroX: 3109 case AArch64::STRHHroX: 3110 case AArch64::LDRSHXroX: 3111 case AArch64::LDRSHWroX: 3112 case AArch64::LDRHui: 3113 case AArch64::STRHui: 3114 case AArch64::LDRHHui: 3115 case AArch64::STRHHui: 3116 case AArch64::LDRSHXui: 3117 case AArch64::LDRSHWui: 3118 NumBytes = 2; 3119 OffsetScale = 2; 3120 break; 3121 } 3122 3123 // Check the fold operand is not the loaded/stored value. 3124 const MachineOperand &BaseRegOp = MemI.getOperand(0); 3125 if (BaseRegOp.isReg() && BaseRegOp.getReg() == Reg) 3126 return false; 3127 3128 // Handle memory instructions with a [Reg, Reg] addressing mode. 3129 if (MemI.getOperand(2).isReg()) { 3130 // Bail if the addressing mode already includes extension of the offset 3131 // register. 3132 if (MemI.getOperand(3).getImm()) 3133 return false; 3134 3135 // Check if we actually have a scaled offset. 3136 if (MemI.getOperand(4).getImm() == 0) 3137 OffsetScale = 1; 3138 3139 // If the address instructions is folded into the base register, then the 3140 // addressing mode must not have a scale. Then we can swap the base and the 3141 // scaled registers. 3142 if (MemI.getOperand(1).getReg() == Reg && OffsetScale != 1) 3143 return false; 3144 3145 switch (AddrI.getOpcode()) { 3146 default: 3147 return false; 3148 3149 case AArch64::SBFMXri: 3150 // sxtw Xa, Wm 3151 // ldr Xd, [Xn, Xa, lsl #N] 3152 // -> 3153 // ldr Xd, [Xn, Wm, sxtw #N] 3154 if (AddrI.getOperand(2).getImm() != 0 || 3155 AddrI.getOperand(3).getImm() != 31) 3156 return false; 3157 3158 AM.BaseReg = MemI.getOperand(1).getReg(); 3159 if (AM.BaseReg == Reg) 3160 AM.BaseReg = MemI.getOperand(2).getReg(); 3161 AM.ScaledReg = AddrI.getOperand(1).getReg(); 3162 AM.Scale = OffsetScale; 3163 AM.Displacement = 0; 3164 AM.Form = ExtAddrMode::Formula::SExtScaledReg; 3165 return true; 3166 3167 case TargetOpcode::SUBREG_TO_REG: { 3168 // mov Wa, Wm 3169 // ldr Xd, [Xn, Xa, lsl #N] 3170 // -> 3171 // ldr Xd, [Xn, Wm, uxtw #N] 3172 3173 // Zero-extension looks like an ORRWrs followed by a SUBREG_TO_REG. 3174 if (AddrI.getOperand(1).getImm() != 0 || 3175 AddrI.getOperand(3).getImm() != AArch64::sub_32) 3176 return false; 3177 3178 const MachineRegisterInfo &MRI = AddrI.getMF()->getRegInfo(); 3179 Register OffsetReg = AddrI.getOperand(2).getReg(); 3180 if (!OffsetReg.isVirtual() || !MRI.hasOneNonDBGUse(OffsetReg)) 3181 return false; 3182 3183 const MachineInstr &DefMI = *MRI.getVRegDef(OffsetReg); 3184 if (DefMI.getOpcode() != AArch64::ORRWrs || 3185 DefMI.getOperand(1).getReg() != AArch64::WZR || 3186 DefMI.getOperand(3).getImm() != 0) 3187 return false; 3188 3189 AM.BaseReg = MemI.getOperand(1).getReg(); 3190 if (AM.BaseReg == Reg) 3191 AM.BaseReg = MemI.getOperand(2).getReg(); 3192 AM.ScaledReg = DefMI.getOperand(2).getReg(); 3193 AM.Scale = OffsetScale; 3194 AM.Displacement = 0; 3195 AM.Form = ExtAddrMode::Formula::ZExtScaledReg; 3196 return true; 3197 } 3198 } 3199 } 3200 3201 // Handle memory instructions with a [Reg, #Imm] addressing mode. 3202 3203 // Check we are not breaking a potential conversion to an LDP. 3204 auto validateOffsetForLDP = [](unsigned NumBytes, int64_t OldOffset, 3205 int64_t NewOffset) -> bool { 3206 int64_t MinOffset, MaxOffset; 3207 switch (NumBytes) { 3208 default: 3209 return true; 3210 case 4: 3211 MinOffset = -256; 3212 MaxOffset = 252; 3213 break; 3214 case 8: 3215 MinOffset = -512; 3216 MaxOffset = 504; 3217 break; 3218 case 16: 3219 MinOffset = -1024; 3220 MaxOffset = 1008; 3221 break; 3222 } 3223 return OldOffset < MinOffset || OldOffset > MaxOffset || 3224 (NewOffset >= MinOffset && NewOffset <= MaxOffset); 3225 }; 3226 auto canFoldAddSubImmIntoAddrMode = [&](int64_t Disp) -> bool { 3227 int64_t OldOffset = MemI.getOperand(2).getImm() * OffsetScale; 3228 int64_t NewOffset = OldOffset + Disp; 3229 if (!isLegalAddressingMode(NumBytes, NewOffset, /* Scale */ 0)) 3230 return false; 3231 // If the old offset would fit into an LDP, but the new offset wouldn't, 3232 // bail out. 3233 if (!validateOffsetForLDP(NumBytes, OldOffset, NewOffset)) 3234 return false; 3235 AM.BaseReg = AddrI.getOperand(1).getReg(); 3236 AM.ScaledReg = 0; 3237 AM.Scale = 0; 3238 AM.Displacement = NewOffset; 3239 AM.Form = ExtAddrMode::Formula::Basic; 3240 return true; 3241 }; 3242 3243 auto canFoldAddRegIntoAddrMode = 3244 [&](int64_t Scale, 3245 ExtAddrMode::Formula Form = ExtAddrMode::Formula::Basic) -> bool { 3246 if (MemI.getOperand(2).getImm() != 0) 3247 return false; 3248 if ((unsigned)Scale != Scale) 3249 return false; 3250 if (!isLegalAddressingMode(NumBytes, /* Offset */ 0, Scale)) 3251 return false; 3252 AM.BaseReg = AddrI.getOperand(1).getReg(); 3253 AM.ScaledReg = AddrI.getOperand(2).getReg(); 3254 AM.Scale = Scale; 3255 AM.Displacement = 0; 3256 AM.Form = Form; 3257 return true; 3258 }; 3259 3260 auto avoidSlowSTRQ = [&](const MachineInstr &MemI) { 3261 unsigned Opcode = MemI.getOpcode(); 3262 return (Opcode == AArch64::STURQi || Opcode == AArch64::STRQui) && 3263 Subtarget.isSTRQroSlow(); 3264 }; 3265 3266 int64_t Disp = 0; 3267 const bool OptSize = MemI.getMF()->getFunction().hasOptSize(); 3268 switch (AddrI.getOpcode()) { 3269 default: 3270 return false; 3271 3272 case AArch64::ADDXri: 3273 // add Xa, Xn, #N 3274 // ldr Xd, [Xa, #M] 3275 // -> 3276 // ldr Xd, [Xn, #N'+M] 3277 Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm(); 3278 return canFoldAddSubImmIntoAddrMode(Disp); 3279 3280 case AArch64::SUBXri: 3281 // sub Xa, Xn, #N 3282 // ldr Xd, [Xa, #M] 3283 // -> 3284 // ldr Xd, [Xn, #N'+M] 3285 Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm(); 3286 return canFoldAddSubImmIntoAddrMode(-Disp); 3287 3288 case AArch64::ADDXrs: { 3289 // add Xa, Xn, Xm, lsl #N 3290 // ldr Xd, [Xa] 3291 // -> 3292 // ldr Xd, [Xn, Xm, lsl #N] 3293 3294 // Don't fold the add if the result would be slower, unless optimising for 3295 // size. 3296 unsigned Shift = static_cast<unsigned>(AddrI.getOperand(3).getImm()); 3297 if (AArch64_AM::getShiftType(Shift) != AArch64_AM::ShiftExtendType::LSL) 3298 return false; 3299 Shift = AArch64_AM::getShiftValue(Shift); 3300 if (!OptSize) { 3301 if (Shift != 2 && Shift != 3 && Subtarget.hasAddrLSLSlow14()) 3302 return false; 3303 if (avoidSlowSTRQ(MemI)) 3304 return false; 3305 } 3306 return canFoldAddRegIntoAddrMode(1ULL << Shift); 3307 } 3308 3309 case AArch64::ADDXrr: 3310 // add Xa, Xn, Xm 3311 // ldr Xd, [Xa] 3312 // -> 3313 // ldr Xd, [Xn, Xm, lsl #0] 3314 3315 // Don't fold the add if the result would be slower, unless optimising for 3316 // size. 3317 if (!OptSize && avoidSlowSTRQ(MemI)) 3318 return false; 3319 return canFoldAddRegIntoAddrMode(1); 3320 3321 case AArch64::ADDXrx: 3322 // add Xa, Xn, Wm, {s,u}xtw #N 3323 // ldr Xd, [Xa] 3324 // -> 3325 // ldr Xd, [Xn, Wm, {s,u}xtw #N] 3326 3327 // Don't fold the add if the result would be slower, unless optimising for 3328 // size. 3329 if (!OptSize && avoidSlowSTRQ(MemI)) 3330 return false; 3331 3332 // Can fold only sign-/zero-extend of a word. 3333 unsigned Imm = static_cast<unsigned>(AddrI.getOperand(3).getImm()); 3334 AArch64_AM::ShiftExtendType Extend = AArch64_AM::getArithExtendType(Imm); 3335 if (Extend != AArch64_AM::UXTW && Extend != AArch64_AM::SXTW) 3336 return false; 3337 3338 return canFoldAddRegIntoAddrMode( 3339 1ULL << AArch64_AM::getArithShiftValue(Imm), 3340 (Extend == AArch64_AM::SXTW) ? ExtAddrMode::Formula::SExtScaledReg 3341 : ExtAddrMode::Formula::ZExtScaledReg); 3342 } 3343 } 3344 3345 // Given an opcode for an instruction with a [Reg, #Imm] addressing mode, 3346 // return the opcode of an instruction performing the same operation, but using 3347 // the [Reg, Reg] addressing mode. 3348 static unsigned regOffsetOpcode(unsigned Opcode) { 3349 switch (Opcode) { 3350 default: 3351 llvm_unreachable("Address folding not implemented for instruction"); 3352 3353 case AArch64::LDURQi: 3354 case AArch64::LDRQui: 3355 return AArch64::LDRQroX; 3356 case AArch64::STURQi: 3357 case AArch64::STRQui: 3358 return AArch64::STRQroX; 3359 case AArch64::LDURDi: 3360 case AArch64::LDRDui: 3361 return AArch64::LDRDroX; 3362 case AArch64::STURDi: 3363 case AArch64::STRDui: 3364 return AArch64::STRDroX; 3365 case AArch64::LDURXi: 3366 case AArch64::LDRXui: 3367 return AArch64::LDRXroX; 3368 case AArch64::STURXi: 3369 case AArch64::STRXui: 3370 return AArch64::STRXroX; 3371 case AArch64::LDURWi: 3372 case AArch64::LDRWui: 3373 return AArch64::LDRWroX; 3374 case AArch64::LDURSWi: 3375 case AArch64::LDRSWui: 3376 return AArch64::LDRSWroX; 3377 case AArch64::STURWi: 3378 case AArch64::STRWui: 3379 return AArch64::STRWroX; 3380 case AArch64::LDURHi: 3381 case AArch64::LDRHui: 3382 return AArch64::LDRHroX; 3383 case AArch64::STURHi: 3384 case AArch64::STRHui: 3385 return AArch64::STRHroX; 3386 case AArch64::LDURHHi: 3387 case AArch64::LDRHHui: 3388 return AArch64::LDRHHroX; 3389 case AArch64::STURHHi: 3390 case AArch64::STRHHui: 3391 return AArch64::STRHHroX; 3392 case AArch64::LDURSHXi: 3393 case AArch64::LDRSHXui: 3394 return AArch64::LDRSHXroX; 3395 case AArch64::LDURSHWi: 3396 case AArch64::LDRSHWui: 3397 return AArch64::LDRSHWroX; 3398 case AArch64::LDURBi: 3399 case AArch64::LDRBui: 3400 return AArch64::LDRBroX; 3401 case AArch64::LDURBBi: 3402 case AArch64::LDRBBui: 3403 return AArch64::LDRBBroX; 3404 case AArch64::LDURSBXi: 3405 case AArch64::LDRSBXui: 3406 return AArch64::LDRSBXroX; 3407 case AArch64::LDURSBWi: 3408 case AArch64::LDRSBWui: 3409 return AArch64::LDRSBWroX; 3410 case AArch64::STURBi: 3411 case AArch64::STRBui: 3412 return AArch64::STRBroX; 3413 case AArch64::STURBBi: 3414 case AArch64::STRBBui: 3415 return AArch64::STRBBroX; 3416 } 3417 } 3418 3419 // Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return 3420 // the opcode of an instruction performing the same operation, but using the 3421 // [Reg, #Imm] addressing mode with scaled offset. 3422 unsigned scaledOffsetOpcode(unsigned Opcode, unsigned &Scale) { 3423 switch (Opcode) { 3424 default: 3425 llvm_unreachable("Address folding not implemented for instruction"); 3426 3427 case AArch64::LDURQi: 3428 Scale = 16; 3429 return AArch64::LDRQui; 3430 case AArch64::STURQi: 3431 Scale = 16; 3432 return AArch64::STRQui; 3433 case AArch64::LDURDi: 3434 Scale = 8; 3435 return AArch64::LDRDui; 3436 case AArch64::STURDi: 3437 Scale = 8; 3438 return AArch64::STRDui; 3439 case AArch64::LDURXi: 3440 Scale = 8; 3441 return AArch64::LDRXui; 3442 case AArch64::STURXi: 3443 Scale = 8; 3444 return AArch64::STRXui; 3445 case AArch64::LDURWi: 3446 Scale = 4; 3447 return AArch64::LDRWui; 3448 case AArch64::LDURSWi: 3449 Scale = 4; 3450 return AArch64::LDRSWui; 3451 case AArch64::STURWi: 3452 Scale = 4; 3453 return AArch64::STRWui; 3454 case AArch64::LDURHi: 3455 Scale = 2; 3456 return AArch64::LDRHui; 3457 case AArch64::STURHi: 3458 Scale = 2; 3459 return AArch64::STRHui; 3460 case AArch64::LDURHHi: 3461 Scale = 2; 3462 return AArch64::LDRHHui; 3463 case AArch64::STURHHi: 3464 Scale = 2; 3465 return AArch64::STRHHui; 3466 case AArch64::LDURSHXi: 3467 Scale = 2; 3468 return AArch64::LDRSHXui; 3469 case AArch64::LDURSHWi: 3470 Scale = 2; 3471 return AArch64::LDRSHWui; 3472 case AArch64::LDURBi: 3473 Scale = 1; 3474 return AArch64::LDRBui; 3475 case AArch64::LDURBBi: 3476 Scale = 1; 3477 return AArch64::LDRBBui; 3478 case AArch64::LDURSBXi: 3479 Scale = 1; 3480 return AArch64::LDRSBXui; 3481 case AArch64::LDURSBWi: 3482 Scale = 1; 3483 return AArch64::LDRSBWui; 3484 case AArch64::STURBi: 3485 Scale = 1; 3486 return AArch64::STRBui; 3487 case AArch64::STURBBi: 3488 Scale = 1; 3489 return AArch64::STRBBui; 3490 case AArch64::LDRQui: 3491 case AArch64::STRQui: 3492 Scale = 16; 3493 return Opcode; 3494 case AArch64::LDRDui: 3495 case AArch64::STRDui: 3496 case AArch64::LDRXui: 3497 case AArch64::STRXui: 3498 Scale = 8; 3499 return Opcode; 3500 case AArch64::LDRWui: 3501 case AArch64::LDRSWui: 3502 case AArch64::STRWui: 3503 Scale = 4; 3504 return Opcode; 3505 case AArch64::LDRHui: 3506 case AArch64::STRHui: 3507 case AArch64::LDRHHui: 3508 case AArch64::STRHHui: 3509 case AArch64::LDRSHXui: 3510 case AArch64::LDRSHWui: 3511 Scale = 2; 3512 return Opcode; 3513 case AArch64::LDRBui: 3514 case AArch64::LDRBBui: 3515 case AArch64::LDRSBXui: 3516 case AArch64::LDRSBWui: 3517 case AArch64::STRBui: 3518 case AArch64::STRBBui: 3519 Scale = 1; 3520 return Opcode; 3521 } 3522 } 3523 3524 // Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return 3525 // the opcode of an instruction performing the same operation, but using the 3526 // [Reg, #Imm] addressing mode with unscaled offset. 3527 unsigned unscaledOffsetOpcode(unsigned Opcode) { 3528 switch (Opcode) { 3529 default: 3530 llvm_unreachable("Address folding not implemented for instruction"); 3531 3532 case AArch64::LDURQi: 3533 case AArch64::STURQi: 3534 case AArch64::LDURDi: 3535 case AArch64::STURDi: 3536 case AArch64::LDURXi: 3537 case AArch64::STURXi: 3538 case AArch64::LDURWi: 3539 case AArch64::LDURSWi: 3540 case AArch64::STURWi: 3541 case AArch64::LDURHi: 3542 case AArch64::STURHi: 3543 case AArch64::LDURHHi: 3544 case AArch64::STURHHi: 3545 case AArch64::LDURSHXi: 3546 case AArch64::LDURSHWi: 3547 case AArch64::LDURBi: 3548 case AArch64::STURBi: 3549 case AArch64::LDURBBi: 3550 case AArch64::STURBBi: 3551 case AArch64::LDURSBWi: 3552 case AArch64::LDURSBXi: 3553 return Opcode; 3554 case AArch64::LDRQui: 3555 return AArch64::LDURQi; 3556 case AArch64::STRQui: 3557 return AArch64::STURQi; 3558 case AArch64::LDRDui: 3559 return AArch64::LDURDi; 3560 case AArch64::STRDui: 3561 return AArch64::STURDi; 3562 case AArch64::LDRXui: 3563 return AArch64::LDURXi; 3564 case AArch64::STRXui: 3565 return AArch64::STURXi; 3566 case AArch64::LDRWui: 3567 return AArch64::LDURWi; 3568 case AArch64::LDRSWui: 3569 return AArch64::LDURSWi; 3570 case AArch64::STRWui: 3571 return AArch64::STURWi; 3572 case AArch64::LDRHui: 3573 return AArch64::LDURHi; 3574 case AArch64::STRHui: 3575 return AArch64::STURHi; 3576 case AArch64::LDRHHui: 3577 return AArch64::LDURHHi; 3578 case AArch64::STRHHui: 3579 return AArch64::STURHHi; 3580 case AArch64::LDRSHXui: 3581 return AArch64::LDURSHXi; 3582 case AArch64::LDRSHWui: 3583 return AArch64::LDURSHWi; 3584 case AArch64::LDRBBui: 3585 return AArch64::LDURBBi; 3586 case AArch64::LDRBui: 3587 return AArch64::LDURBi; 3588 case AArch64::STRBBui: 3589 return AArch64::STURBBi; 3590 case AArch64::STRBui: 3591 return AArch64::STURBi; 3592 case AArch64::LDRSBWui: 3593 return AArch64::LDURSBWi; 3594 case AArch64::LDRSBXui: 3595 return AArch64::LDURSBXi; 3596 } 3597 } 3598 3599 // Given the opcode of a memory load/store instruction, return the opcode of an 3600 // instruction performing the same operation, but using 3601 // the [Reg, Reg, {s,u}xtw #N] addressing mode with sign-/zero-extend of the 3602 // offset register. 3603 static unsigned offsetExtendOpcode(unsigned Opcode) { 3604 switch (Opcode) { 3605 default: 3606 llvm_unreachable("Address folding not implemented for instruction"); 3607 3608 case AArch64::LDRQroX: 3609 case AArch64::LDURQi: 3610 case AArch64::LDRQui: 3611 return AArch64::LDRQroW; 3612 case AArch64::STRQroX: 3613 case AArch64::STURQi: 3614 case AArch64::STRQui: 3615 return AArch64::STRQroW; 3616 case AArch64::LDRDroX: 3617 case AArch64::LDURDi: 3618 case AArch64::LDRDui: 3619 return AArch64::LDRDroW; 3620 case AArch64::STRDroX: 3621 case AArch64::STURDi: 3622 case AArch64::STRDui: 3623 return AArch64::STRDroW; 3624 case AArch64::LDRXroX: 3625 case AArch64::LDURXi: 3626 case AArch64::LDRXui: 3627 return AArch64::LDRXroW; 3628 case AArch64::STRXroX: 3629 case AArch64::STURXi: 3630 case AArch64::STRXui: 3631 return AArch64::STRXroW; 3632 case AArch64::LDRWroX: 3633 case AArch64::LDURWi: 3634 case AArch64::LDRWui: 3635 return AArch64::LDRWroW; 3636 case AArch64::LDRSWroX: 3637 case AArch64::LDURSWi: 3638 case AArch64::LDRSWui: 3639 return AArch64::LDRSWroW; 3640 case AArch64::STRWroX: 3641 case AArch64::STURWi: 3642 case AArch64::STRWui: 3643 return AArch64::STRWroW; 3644 case AArch64::LDRHroX: 3645 case AArch64::LDURHi: 3646 case AArch64::LDRHui: 3647 return AArch64::LDRHroW; 3648 case AArch64::STRHroX: 3649 case AArch64::STURHi: 3650 case AArch64::STRHui: 3651 return AArch64::STRHroW; 3652 case AArch64::LDRHHroX: 3653 case AArch64::LDURHHi: 3654 case AArch64::LDRHHui: 3655 return AArch64::LDRHHroW; 3656 case AArch64::STRHHroX: 3657 case AArch64::STURHHi: 3658 case AArch64::STRHHui: 3659 return AArch64::STRHHroW; 3660 case AArch64::LDRSHXroX: 3661 case AArch64::LDURSHXi: 3662 case AArch64::LDRSHXui: 3663 return AArch64::LDRSHXroW; 3664 case AArch64::LDRSHWroX: 3665 case AArch64::LDURSHWi: 3666 case AArch64::LDRSHWui: 3667 return AArch64::LDRSHWroW; 3668 case AArch64::LDRBroX: 3669 case AArch64::LDURBi: 3670 case AArch64::LDRBui: 3671 return AArch64::LDRBroW; 3672 case AArch64::LDRBBroX: 3673 case AArch64::LDURBBi: 3674 case AArch64::LDRBBui: 3675 return AArch64::LDRBBroW; 3676 case AArch64::LDRSBXroX: 3677 case AArch64::LDURSBXi: 3678 case AArch64::LDRSBXui: 3679 return AArch64::LDRSBXroW; 3680 case AArch64::LDRSBWroX: 3681 case AArch64::LDURSBWi: 3682 case AArch64::LDRSBWui: 3683 return AArch64::LDRSBWroW; 3684 case AArch64::STRBroX: 3685 case AArch64::STURBi: 3686 case AArch64::STRBui: 3687 return AArch64::STRBroW; 3688 case AArch64::STRBBroX: 3689 case AArch64::STURBBi: 3690 case AArch64::STRBBui: 3691 return AArch64::STRBBroW; 3692 } 3693 } 3694 3695 MachineInstr *AArch64InstrInfo::emitLdStWithAddr(MachineInstr &MemI, 3696 const ExtAddrMode &AM) const { 3697 3698 const DebugLoc &DL = MemI.getDebugLoc(); 3699 MachineBasicBlock &MBB = *MemI.getParent(); 3700 MachineRegisterInfo &MRI = MemI.getMF()->getRegInfo(); 3701 3702 if (AM.Form == ExtAddrMode::Formula::Basic) { 3703 if (AM.ScaledReg) { 3704 // The new instruction will be in the form `ldr Rt, [Xn, Xm, lsl #imm]`. 3705 unsigned Opcode = regOffsetOpcode(MemI.getOpcode()); 3706 MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass); 3707 auto B = BuildMI(MBB, MemI, DL, get(Opcode)) 3708 .addReg(MemI.getOperand(0).getReg(), 3709 MemI.mayLoad() ? RegState::Define : 0) 3710 .addReg(AM.BaseReg) 3711 .addReg(AM.ScaledReg) 3712 .addImm(0) 3713 .addImm(AM.Scale > 1) 3714 .setMemRefs(MemI.memoperands()) 3715 .setMIFlags(MemI.getFlags()); 3716 return B.getInstr(); 3717 } 3718 3719 assert(AM.ScaledReg == 0 && AM.Scale == 0 && 3720 "Addressing mode not supported for folding"); 3721 3722 // The new instruction will be in the form `ld[u]r Rt, [Xn, #imm]`. 3723 unsigned Scale = 1; 3724 unsigned Opcode = MemI.getOpcode(); 3725 if (isInt<9>(AM.Displacement)) 3726 Opcode = unscaledOffsetOpcode(Opcode); 3727 else 3728 Opcode = scaledOffsetOpcode(Opcode, Scale); 3729 3730 auto B = BuildMI(MBB, MemI, DL, get(Opcode)) 3731 .addReg(MemI.getOperand(0).getReg(), 3732 MemI.mayLoad() ? RegState::Define : 0) 3733 .addReg(AM.BaseReg) 3734 .addImm(AM.Displacement / Scale) 3735 .setMemRefs(MemI.memoperands()) 3736 .setMIFlags(MemI.getFlags()); 3737 return B.getInstr(); 3738 } 3739 3740 if (AM.Form == ExtAddrMode::Formula::SExtScaledReg || 3741 AM.Form == ExtAddrMode::Formula::ZExtScaledReg) { 3742 // The new instruction will be in the form `ldr Rt, [Xn, Wm, {s,u}xtw #N]`. 3743 assert(AM.ScaledReg && !AM.Displacement && 3744 "Address offset can be a register or an immediate, but not both"); 3745 unsigned Opcode = offsetExtendOpcode(MemI.getOpcode()); 3746 MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass); 3747 // Make sure the offset register is in the correct register class. 3748 Register OffsetReg = AM.ScaledReg; 3749 const TargetRegisterClass *RC = MRI.getRegClass(OffsetReg); 3750 if (RC->hasSuperClassEq(&AArch64::GPR64RegClass)) { 3751 OffsetReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass); 3752 BuildMI(MBB, MemI, DL, get(TargetOpcode::COPY), OffsetReg) 3753 .addReg(AM.ScaledReg, 0, AArch64::sub_32); 3754 } 3755 auto B = BuildMI(MBB, MemI, DL, get(Opcode)) 3756 .addReg(MemI.getOperand(0).getReg(), 3757 MemI.mayLoad() ? RegState::Define : 0) 3758 .addReg(AM.BaseReg) 3759 .addReg(OffsetReg) 3760 .addImm(AM.Form == ExtAddrMode::Formula::SExtScaledReg) 3761 .addImm(AM.Scale != 1) 3762 .setMemRefs(MemI.memoperands()) 3763 .setMIFlags(MemI.getFlags()); 3764 3765 return B.getInstr(); 3766 } 3767 3768 llvm_unreachable( 3769 "Function must not be called with an addressing mode it can't handle"); 3770 } 3771 3772 /// Return true if the opcode is a post-index ld/st instruction, which really 3773 /// loads from base+0. 3774 static bool isPostIndexLdStOpcode(unsigned Opcode) { 3775 switch (Opcode) { 3776 default: 3777 return false; 3778 case AArch64::LD1Fourv16b_POST: 3779 case AArch64::LD1Fourv1d_POST: 3780 case AArch64::LD1Fourv2d_POST: 3781 case AArch64::LD1Fourv2s_POST: 3782 case AArch64::LD1Fourv4h_POST: 3783 case AArch64::LD1Fourv4s_POST: 3784 case AArch64::LD1Fourv8b_POST: 3785 case AArch64::LD1Fourv8h_POST: 3786 case AArch64::LD1Onev16b_POST: 3787 case AArch64::LD1Onev1d_POST: 3788 case AArch64::LD1Onev2d_POST: 3789 case AArch64::LD1Onev2s_POST: 3790 case AArch64::LD1Onev4h_POST: 3791 case AArch64::LD1Onev4s_POST: 3792 case AArch64::LD1Onev8b_POST: 3793 case AArch64::LD1Onev8h_POST: 3794 case AArch64::LD1Rv16b_POST: 3795 case AArch64::LD1Rv1d_POST: 3796 case AArch64::LD1Rv2d_POST: 3797 case AArch64::LD1Rv2s_POST: 3798 case AArch64::LD1Rv4h_POST: 3799 case AArch64::LD1Rv4s_POST: 3800 case AArch64::LD1Rv8b_POST: 3801 case AArch64::LD1Rv8h_POST: 3802 case AArch64::LD1Threev16b_POST: 3803 case AArch64::LD1Threev1d_POST: 3804 case AArch64::LD1Threev2d_POST: 3805 case AArch64::LD1Threev2s_POST: 3806 case AArch64::LD1Threev4h_POST: 3807 case AArch64::LD1Threev4s_POST: 3808 case AArch64::LD1Threev8b_POST: 3809 case AArch64::LD1Threev8h_POST: 3810 case AArch64::LD1Twov16b_POST: 3811 case AArch64::LD1Twov1d_POST: 3812 case AArch64::LD1Twov2d_POST: 3813 case AArch64::LD1Twov2s_POST: 3814 case AArch64::LD1Twov4h_POST: 3815 case AArch64::LD1Twov4s_POST: 3816 case AArch64::LD1Twov8b_POST: 3817 case AArch64::LD1Twov8h_POST: 3818 case AArch64::LD1i16_POST: 3819 case AArch64::LD1i32_POST: 3820 case AArch64::LD1i64_POST: 3821 case AArch64::LD1i8_POST: 3822 case AArch64::LD2Rv16b_POST: 3823 case AArch64::LD2Rv1d_POST: 3824 case AArch64::LD2Rv2d_POST: 3825 case AArch64::LD2Rv2s_POST: 3826 case AArch64::LD2Rv4h_POST: 3827 case AArch64::LD2Rv4s_POST: 3828 case AArch64::LD2Rv8b_POST: 3829 case AArch64::LD2Rv8h_POST: 3830 case AArch64::LD2Twov16b_POST: 3831 case AArch64::LD2Twov2d_POST: 3832 case AArch64::LD2Twov2s_POST: 3833 case AArch64::LD2Twov4h_POST: 3834 case AArch64::LD2Twov4s_POST: 3835 case AArch64::LD2Twov8b_POST: 3836 case AArch64::LD2Twov8h_POST: 3837 case AArch64::LD2i16_POST: 3838 case AArch64::LD2i32_POST: 3839 case AArch64::LD2i64_POST: 3840 case AArch64::LD2i8_POST: 3841 case AArch64::LD3Rv16b_POST: 3842 case AArch64::LD3Rv1d_POST: 3843 case AArch64::LD3Rv2d_POST: 3844 case AArch64::LD3Rv2s_POST: 3845 case AArch64::LD3Rv4h_POST: 3846 case AArch64::LD3Rv4s_POST: 3847 case AArch64::LD3Rv8b_POST: 3848 case AArch64::LD3Rv8h_POST: 3849 case AArch64::LD3Threev16b_POST: 3850 case AArch64::LD3Threev2d_POST: 3851 case AArch64::LD3Threev2s_POST: 3852 case AArch64::LD3Threev4h_POST: 3853 case AArch64::LD3Threev4s_POST: 3854 case AArch64::LD3Threev8b_POST: 3855 case AArch64::LD3Threev8h_POST: 3856 case AArch64::LD3i16_POST: 3857 case AArch64::LD3i32_POST: 3858 case AArch64::LD3i64_POST: 3859 case AArch64::LD3i8_POST: 3860 case AArch64::LD4Fourv16b_POST: 3861 case AArch64::LD4Fourv2d_POST: 3862 case AArch64::LD4Fourv2s_POST: 3863 case AArch64::LD4Fourv4h_POST: 3864 case AArch64::LD4Fourv4s_POST: 3865 case AArch64::LD4Fourv8b_POST: 3866 case AArch64::LD4Fourv8h_POST: 3867 case AArch64::LD4Rv16b_POST: 3868 case AArch64::LD4Rv1d_POST: 3869 case AArch64::LD4Rv2d_POST: 3870 case AArch64::LD4Rv2s_POST: 3871 case AArch64::LD4Rv4h_POST: 3872 case AArch64::LD4Rv4s_POST: 3873 case AArch64::LD4Rv8b_POST: 3874 case AArch64::LD4Rv8h_POST: 3875 case AArch64::LD4i16_POST: 3876 case AArch64::LD4i32_POST: 3877 case AArch64::LD4i64_POST: 3878 case AArch64::LD4i8_POST: 3879 case AArch64::LDAPRWpost: 3880 case AArch64::LDAPRXpost: 3881 case AArch64::LDIAPPWpost: 3882 case AArch64::LDIAPPXpost: 3883 case AArch64::LDPDpost: 3884 case AArch64::LDPQpost: 3885 case AArch64::LDPSWpost: 3886 case AArch64::LDPSpost: 3887 case AArch64::LDPWpost: 3888 case AArch64::LDPXpost: 3889 case AArch64::LDRBBpost: 3890 case AArch64::LDRBpost: 3891 case AArch64::LDRDpost: 3892 case AArch64::LDRHHpost: 3893 case AArch64::LDRHpost: 3894 case AArch64::LDRQpost: 3895 case AArch64::LDRSBWpost: 3896 case AArch64::LDRSBXpost: 3897 case AArch64::LDRSHWpost: 3898 case AArch64::LDRSHXpost: 3899 case AArch64::LDRSWpost: 3900 case AArch64::LDRSpost: 3901 case AArch64::LDRWpost: 3902 case AArch64::LDRXpost: 3903 case AArch64::ST1Fourv16b_POST: 3904 case AArch64::ST1Fourv1d_POST: 3905 case AArch64::ST1Fourv2d_POST: 3906 case AArch64::ST1Fourv2s_POST: 3907 case AArch64::ST1Fourv4h_POST: 3908 case AArch64::ST1Fourv4s_POST: 3909 case AArch64::ST1Fourv8b_POST: 3910 case AArch64::ST1Fourv8h_POST: 3911 case AArch64::ST1Onev16b_POST: 3912 case AArch64::ST1Onev1d_POST: 3913 case AArch64::ST1Onev2d_POST: 3914 case AArch64::ST1Onev2s_POST: 3915 case AArch64::ST1Onev4h_POST: 3916 case AArch64::ST1Onev4s_POST: 3917 case AArch64::ST1Onev8b_POST: 3918 case AArch64::ST1Onev8h_POST: 3919 case AArch64::ST1Threev16b_POST: 3920 case AArch64::ST1Threev1d_POST: 3921 case AArch64::ST1Threev2d_POST: 3922 case AArch64::ST1Threev2s_POST: 3923 case AArch64::ST1Threev4h_POST: 3924 case AArch64::ST1Threev4s_POST: 3925 case AArch64::ST1Threev8b_POST: 3926 case AArch64::ST1Threev8h_POST: 3927 case AArch64::ST1Twov16b_POST: 3928 case AArch64::ST1Twov1d_POST: 3929 case AArch64::ST1Twov2d_POST: 3930 case AArch64::ST1Twov2s_POST: 3931 case AArch64::ST1Twov4h_POST: 3932 case AArch64::ST1Twov4s_POST: 3933 case AArch64::ST1Twov8b_POST: 3934 case AArch64::ST1Twov8h_POST: 3935 case AArch64::ST1i16_POST: 3936 case AArch64::ST1i32_POST: 3937 case AArch64::ST1i64_POST: 3938 case AArch64::ST1i8_POST: 3939 case AArch64::ST2GPostIndex: 3940 case AArch64::ST2Twov16b_POST: 3941 case AArch64::ST2Twov2d_POST: 3942 case AArch64::ST2Twov2s_POST: 3943 case AArch64::ST2Twov4h_POST: 3944 case AArch64::ST2Twov4s_POST: 3945 case AArch64::ST2Twov8b_POST: 3946 case AArch64::ST2Twov8h_POST: 3947 case AArch64::ST2i16_POST: 3948 case AArch64::ST2i32_POST: 3949 case AArch64::ST2i64_POST: 3950 case AArch64::ST2i8_POST: 3951 case AArch64::ST3Threev16b_POST: 3952 case AArch64::ST3Threev2d_POST: 3953 case AArch64::ST3Threev2s_POST: 3954 case AArch64::ST3Threev4h_POST: 3955 case AArch64::ST3Threev4s_POST: 3956 case AArch64::ST3Threev8b_POST: 3957 case AArch64::ST3Threev8h_POST: 3958 case AArch64::ST3i16_POST: 3959 case AArch64::ST3i32_POST: 3960 case AArch64::ST3i64_POST: 3961 case AArch64::ST3i8_POST: 3962 case AArch64::ST4Fourv16b_POST: 3963 case AArch64::ST4Fourv2d_POST: 3964 case AArch64::ST4Fourv2s_POST: 3965 case AArch64::ST4Fourv4h_POST: 3966 case AArch64::ST4Fourv4s_POST: 3967 case AArch64::ST4Fourv8b_POST: 3968 case AArch64::ST4Fourv8h_POST: 3969 case AArch64::ST4i16_POST: 3970 case AArch64::ST4i32_POST: 3971 case AArch64::ST4i64_POST: 3972 case AArch64::ST4i8_POST: 3973 case AArch64::STGPostIndex: 3974 case AArch64::STGPpost: 3975 case AArch64::STPDpost: 3976 case AArch64::STPQpost: 3977 case AArch64::STPSpost: 3978 case AArch64::STPWpost: 3979 case AArch64::STPXpost: 3980 case AArch64::STRBBpost: 3981 case AArch64::STRBpost: 3982 case AArch64::STRDpost: 3983 case AArch64::STRHHpost: 3984 case AArch64::STRHpost: 3985 case AArch64::STRQpost: 3986 case AArch64::STRSpost: 3987 case AArch64::STRWpost: 3988 case AArch64::STRXpost: 3989 case AArch64::STZ2GPostIndex: 3990 case AArch64::STZGPostIndex: 3991 return true; 3992 } 3993 } 3994 3995 bool AArch64InstrInfo::getMemOperandWithOffsetWidth( 3996 const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset, 3997 bool &OffsetIsScalable, TypeSize &Width, 3998 const TargetRegisterInfo *TRI) const { 3999 assert(LdSt.mayLoadOrStore() && "Expected a memory operation."); 4000 // Handle only loads/stores with base register followed by immediate offset. 4001 if (LdSt.getNumExplicitOperands() == 3) { 4002 // Non-paired instruction (e.g., ldr x1, [x0, #8]). 4003 if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) || 4004 !LdSt.getOperand(2).isImm()) 4005 return false; 4006 } else if (LdSt.getNumExplicitOperands() == 4) { 4007 // Paired instruction (e.g., ldp x1, x2, [x0, #8]). 4008 if (!LdSt.getOperand(1).isReg() || 4009 (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()) || 4010 !LdSt.getOperand(3).isImm()) 4011 return false; 4012 } else 4013 return false; 4014 4015 // Get the scaling factor for the instruction and set the width for the 4016 // instruction. 4017 TypeSize Scale(0U, false); 4018 int64_t Dummy1, Dummy2; 4019 4020 // If this returns false, then it's an instruction we don't want to handle. 4021 if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2)) 4022 return false; 4023 4024 // Compute the offset. Offset is calculated as the immediate operand 4025 // multiplied by the scaling factor. Unscaled instructions have scaling factor 4026 // set to 1. Postindex are a special case which have an offset of 0. 4027 if (isPostIndexLdStOpcode(LdSt.getOpcode())) { 4028 BaseOp = &LdSt.getOperand(2); 4029 Offset = 0; 4030 } else if (LdSt.getNumExplicitOperands() == 3) { 4031 BaseOp = &LdSt.getOperand(1); 4032 Offset = LdSt.getOperand(2).getImm() * Scale.getKnownMinValue(); 4033 } else { 4034 assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands"); 4035 BaseOp = &LdSt.getOperand(2); 4036 Offset = LdSt.getOperand(3).getImm() * Scale.getKnownMinValue(); 4037 } 4038 OffsetIsScalable = Scale.isScalable(); 4039 4040 return BaseOp->isReg() || BaseOp->isFI(); 4041 } 4042 4043 MachineOperand & 4044 AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const { 4045 assert(LdSt.mayLoadOrStore() && "Expected a memory operation."); 4046 MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1); 4047 assert(OfsOp.isImm() && "Offset operand wasn't immediate."); 4048 return OfsOp; 4049 } 4050 4051 bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale, 4052 TypeSize &Width, int64_t &MinOffset, 4053 int64_t &MaxOffset) { 4054 switch (Opcode) { 4055 // Not a memory operation or something we want to handle. 4056 default: 4057 Scale = TypeSize::getFixed(0); 4058 Width = TypeSize::getFixed(0); 4059 MinOffset = MaxOffset = 0; 4060 return false; 4061 // LDR / STR 4062 case AArch64::LDRQui: 4063 case AArch64::STRQui: 4064 Scale = TypeSize::getFixed(16); 4065 Width = TypeSize::getFixed(16); 4066 MinOffset = 0; 4067 MaxOffset = 4095; 4068 break; 4069 case AArch64::LDRXui: 4070 case AArch64::LDRDui: 4071 case AArch64::STRXui: 4072 case AArch64::STRDui: 4073 case AArch64::PRFMui: 4074 Scale = TypeSize::getFixed(8); 4075 Width = TypeSize::getFixed(8); 4076 MinOffset = 0; 4077 MaxOffset = 4095; 4078 break; 4079 case AArch64::LDRWui: 4080 case AArch64::LDRSui: 4081 case AArch64::LDRSWui: 4082 case AArch64::STRWui: 4083 case AArch64::STRSui: 4084 Scale = TypeSize::getFixed(4); 4085 Width = TypeSize::getFixed(4); 4086 MinOffset = 0; 4087 MaxOffset = 4095; 4088 break; 4089 case AArch64::LDRHui: 4090 case AArch64::LDRHHui: 4091 case AArch64::LDRSHWui: 4092 case AArch64::LDRSHXui: 4093 case AArch64::STRHui: 4094 case AArch64::STRHHui: 4095 Scale = TypeSize::getFixed(2); 4096 Width = TypeSize::getFixed(2); 4097 MinOffset = 0; 4098 MaxOffset = 4095; 4099 break; 4100 case AArch64::LDRBui: 4101 case AArch64::LDRBBui: 4102 case AArch64::LDRSBWui: 4103 case AArch64::LDRSBXui: 4104 case AArch64::STRBui: 4105 case AArch64::STRBBui: 4106 Scale = TypeSize::getFixed(1); 4107 Width = TypeSize::getFixed(1); 4108 MinOffset = 0; 4109 MaxOffset = 4095; 4110 break; 4111 // post/pre inc 4112 case AArch64::STRQpre: 4113 case AArch64::LDRQpost: 4114 Scale = TypeSize::getFixed(1); 4115 Width = TypeSize::getFixed(16); 4116 MinOffset = -256; 4117 MaxOffset = 255; 4118 break; 4119 case AArch64::LDRDpost: 4120 case AArch64::LDRDpre: 4121 case AArch64::LDRXpost: 4122 case AArch64::LDRXpre: 4123 case AArch64::STRDpost: 4124 case AArch64::STRDpre: 4125 case AArch64::STRXpost: 4126 case AArch64::STRXpre: 4127 Scale = TypeSize::getFixed(1); 4128 Width = TypeSize::getFixed(8); 4129 MinOffset = -256; 4130 MaxOffset = 255; 4131 break; 4132 case AArch64::STRWpost: 4133 case AArch64::STRWpre: 4134 case AArch64::LDRWpost: 4135 case AArch64::LDRWpre: 4136 case AArch64::STRSpost: 4137 case AArch64::STRSpre: 4138 case AArch64::LDRSpost: 4139 case AArch64::LDRSpre: 4140 Scale = TypeSize::getFixed(1); 4141 Width = TypeSize::getFixed(4); 4142 MinOffset = -256; 4143 MaxOffset = 255; 4144 break; 4145 case AArch64::LDRHpost: 4146 case AArch64::LDRHpre: 4147 case AArch64::STRHpost: 4148 case AArch64::STRHpre: 4149 case AArch64::LDRHHpost: 4150 case AArch64::LDRHHpre: 4151 case AArch64::STRHHpost: 4152 case AArch64::STRHHpre: 4153 Scale = TypeSize::getFixed(1); 4154 Width = TypeSize::getFixed(2); 4155 MinOffset = -256; 4156 MaxOffset = 255; 4157 break; 4158 case AArch64::LDRBpost: 4159 case AArch64::LDRBpre: 4160 case AArch64::STRBpost: 4161 case AArch64::STRBpre: 4162 case AArch64::LDRBBpost: 4163 case AArch64::LDRBBpre: 4164 case AArch64::STRBBpost: 4165 case AArch64::STRBBpre: 4166 Scale = TypeSize::getFixed(1); 4167 Width = TypeSize::getFixed(1); 4168 MinOffset = -256; 4169 MaxOffset = 255; 4170 break; 4171 // Unscaled 4172 case AArch64::LDURQi: 4173 case AArch64::STURQi: 4174 Scale = TypeSize::getFixed(1); 4175 Width = TypeSize::getFixed(16); 4176 MinOffset = -256; 4177 MaxOffset = 255; 4178 break; 4179 case AArch64::LDURXi: 4180 case AArch64::LDURDi: 4181 case AArch64::LDAPURXi: 4182 case AArch64::STURXi: 4183 case AArch64::STURDi: 4184 case AArch64::STLURXi: 4185 case AArch64::PRFUMi: 4186 Scale = TypeSize::getFixed(1); 4187 Width = TypeSize::getFixed(8); 4188 MinOffset = -256; 4189 MaxOffset = 255; 4190 break; 4191 case AArch64::LDURWi: 4192 case AArch64::LDURSi: 4193 case AArch64::LDURSWi: 4194 case AArch64::LDAPURi: 4195 case AArch64::LDAPURSWi: 4196 case AArch64::STURWi: 4197 case AArch64::STURSi: 4198 case AArch64::STLURWi: 4199 Scale = TypeSize::getFixed(1); 4200 Width = TypeSize::getFixed(4); 4201 MinOffset = -256; 4202 MaxOffset = 255; 4203 break; 4204 case AArch64::LDURHi: 4205 case AArch64::LDURHHi: 4206 case AArch64::LDURSHXi: 4207 case AArch64::LDURSHWi: 4208 case AArch64::LDAPURHi: 4209 case AArch64::LDAPURSHWi: 4210 case AArch64::LDAPURSHXi: 4211 case AArch64::STURHi: 4212 case AArch64::STURHHi: 4213 case AArch64::STLURHi: 4214 Scale = TypeSize::getFixed(1); 4215 Width = TypeSize::getFixed(2); 4216 MinOffset = -256; 4217 MaxOffset = 255; 4218 break; 4219 case AArch64::LDURBi: 4220 case AArch64::LDURBBi: 4221 case AArch64::LDURSBXi: 4222 case AArch64::LDURSBWi: 4223 case AArch64::LDAPURBi: 4224 case AArch64::LDAPURSBWi: 4225 case AArch64::LDAPURSBXi: 4226 case AArch64::STURBi: 4227 case AArch64::STURBBi: 4228 case AArch64::STLURBi: 4229 Scale = TypeSize::getFixed(1); 4230 Width = TypeSize::getFixed(1); 4231 MinOffset = -256; 4232 MaxOffset = 255; 4233 break; 4234 // LDP / STP (including pre/post inc) 4235 case AArch64::LDPQi: 4236 case AArch64::LDNPQi: 4237 case AArch64::STPQi: 4238 case AArch64::STNPQi: 4239 case AArch64::LDPQpost: 4240 case AArch64::LDPQpre: 4241 case AArch64::STPQpost: 4242 case AArch64::STPQpre: 4243 Scale = TypeSize::getFixed(16); 4244 Width = TypeSize::getFixed(16 * 2); 4245 MinOffset = -64; 4246 MaxOffset = 63; 4247 break; 4248 case AArch64::LDPXi: 4249 case AArch64::LDPDi: 4250 case AArch64::LDNPXi: 4251 case AArch64::LDNPDi: 4252 case AArch64::STPXi: 4253 case AArch64::STPDi: 4254 case AArch64::STNPXi: 4255 case AArch64::STNPDi: 4256 case AArch64::LDPDpost: 4257 case AArch64::LDPDpre: 4258 case AArch64::LDPXpost: 4259 case AArch64::LDPXpre: 4260 case AArch64::STPDpost: 4261 case AArch64::STPDpre: 4262 case AArch64::STPXpost: 4263 case AArch64::STPXpre: 4264 Scale = TypeSize::getFixed(8); 4265 Width = TypeSize::getFixed(8 * 2); 4266 MinOffset = -64; 4267 MaxOffset = 63; 4268 break; 4269 case AArch64::LDPWi: 4270 case AArch64::LDPSi: 4271 case AArch64::LDNPWi: 4272 case AArch64::LDNPSi: 4273 case AArch64::STPWi: 4274 case AArch64::STPSi: 4275 case AArch64::STNPWi: 4276 case AArch64::STNPSi: 4277 case AArch64::LDPSpost: 4278 case AArch64::LDPSpre: 4279 case AArch64::LDPWpost: 4280 case AArch64::LDPWpre: 4281 case AArch64::STPSpost: 4282 case AArch64::STPSpre: 4283 case AArch64::STPWpost: 4284 case AArch64::STPWpre: 4285 Scale = TypeSize::getFixed(4); 4286 Width = TypeSize::getFixed(4 * 2); 4287 MinOffset = -64; 4288 MaxOffset = 63; 4289 break; 4290 case AArch64::StoreSwiftAsyncContext: 4291 // Store is an STRXui, but there might be an ADDXri in the expansion too. 4292 Scale = TypeSize::getFixed(1); 4293 Width = TypeSize::getFixed(8); 4294 MinOffset = 0; 4295 MaxOffset = 4095; 4296 break; 4297 case AArch64::ADDG: 4298 Scale = TypeSize::getFixed(16); 4299 Width = TypeSize::getFixed(0); 4300 MinOffset = 0; 4301 MaxOffset = 63; 4302 break; 4303 case AArch64::TAGPstack: 4304 Scale = TypeSize::getFixed(16); 4305 Width = TypeSize::getFixed(0); 4306 // TAGP with a negative offset turns into SUBP, which has a maximum offset 4307 // of 63 (not 64!). 4308 MinOffset = -63; 4309 MaxOffset = 63; 4310 break; 4311 case AArch64::LDG: 4312 case AArch64::STGi: 4313 case AArch64::STGPreIndex: 4314 case AArch64::STGPostIndex: 4315 case AArch64::STZGi: 4316 case AArch64::STZGPreIndex: 4317 case AArch64::STZGPostIndex: 4318 Scale = TypeSize::getFixed(16); 4319 Width = TypeSize::getFixed(16); 4320 MinOffset = -256; 4321 MaxOffset = 255; 4322 break; 4323 // SVE 4324 case AArch64::STR_ZZZZXI: 4325 case AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS: 4326 case AArch64::LDR_ZZZZXI: 4327 case AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS: 4328 Scale = TypeSize::getScalable(16); 4329 Width = TypeSize::getScalable(16 * 4); 4330 MinOffset = -256; 4331 MaxOffset = 252; 4332 break; 4333 case AArch64::STR_ZZZXI: 4334 case AArch64::LDR_ZZZXI: 4335 Scale = TypeSize::getScalable(16); 4336 Width = TypeSize::getScalable(16 * 3); 4337 MinOffset = -256; 4338 MaxOffset = 253; 4339 break; 4340 case AArch64::STR_ZZXI: 4341 case AArch64::STR_ZZXI_STRIDED_CONTIGUOUS: 4342 case AArch64::LDR_ZZXI: 4343 case AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS: 4344 Scale = TypeSize::getScalable(16); 4345 Width = TypeSize::getScalable(16 * 2); 4346 MinOffset = -256; 4347 MaxOffset = 254; 4348 break; 4349 case AArch64::LDR_PXI: 4350 case AArch64::STR_PXI: 4351 Scale = TypeSize::getScalable(2); 4352 Width = TypeSize::getScalable(2); 4353 MinOffset = -256; 4354 MaxOffset = 255; 4355 break; 4356 case AArch64::LDR_PPXI: 4357 case AArch64::STR_PPXI: 4358 Scale = TypeSize::getScalable(2); 4359 Width = TypeSize::getScalable(2 * 2); 4360 MinOffset = -256; 4361 MaxOffset = 254; 4362 break; 4363 case AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO: 4364 case AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO: 4365 case AArch64::LDR_ZXI: 4366 case AArch64::STR_ZXI: 4367 Scale = TypeSize::getScalable(16); 4368 Width = TypeSize::getScalable(16); 4369 MinOffset = -256; 4370 MaxOffset = 255; 4371 break; 4372 case AArch64::LD1B_IMM: 4373 case AArch64::LD1H_IMM: 4374 case AArch64::LD1W_IMM: 4375 case AArch64::LD1D_IMM: 4376 case AArch64::LDNT1B_ZRI: 4377 case AArch64::LDNT1H_ZRI: 4378 case AArch64::LDNT1W_ZRI: 4379 case AArch64::LDNT1D_ZRI: 4380 case AArch64::ST1B_IMM: 4381 case AArch64::ST1H_IMM: 4382 case AArch64::ST1W_IMM: 4383 case AArch64::ST1D_IMM: 4384 case AArch64::STNT1B_ZRI: 4385 case AArch64::STNT1H_ZRI: 4386 case AArch64::STNT1W_ZRI: 4387 case AArch64::STNT1D_ZRI: 4388 case AArch64::LDNF1B_IMM: 4389 case AArch64::LDNF1H_IMM: 4390 case AArch64::LDNF1W_IMM: 4391 case AArch64::LDNF1D_IMM: 4392 // A full vectors worth of data 4393 // Width = mbytes * elements 4394 Scale = TypeSize::getScalable(16); 4395 Width = TypeSize::getScalable(16); 4396 MinOffset = -8; 4397 MaxOffset = 7; 4398 break; 4399 case AArch64::LD2B_IMM: 4400 case AArch64::LD2H_IMM: 4401 case AArch64::LD2W_IMM: 4402 case AArch64::LD2D_IMM: 4403 case AArch64::ST2B_IMM: 4404 case AArch64::ST2H_IMM: 4405 case AArch64::ST2W_IMM: 4406 case AArch64::ST2D_IMM: 4407 Scale = TypeSize::getScalable(32); 4408 Width = TypeSize::getScalable(16 * 2); 4409 MinOffset = -8; 4410 MaxOffset = 7; 4411 break; 4412 case AArch64::LD3B_IMM: 4413 case AArch64::LD3H_IMM: 4414 case AArch64::LD3W_IMM: 4415 case AArch64::LD3D_IMM: 4416 case AArch64::ST3B_IMM: 4417 case AArch64::ST3H_IMM: 4418 case AArch64::ST3W_IMM: 4419 case AArch64::ST3D_IMM: 4420 Scale = TypeSize::getScalable(48); 4421 Width = TypeSize::getScalable(16 * 3); 4422 MinOffset = -8; 4423 MaxOffset = 7; 4424 break; 4425 case AArch64::LD4B_IMM: 4426 case AArch64::LD4H_IMM: 4427 case AArch64::LD4W_IMM: 4428 case AArch64::LD4D_IMM: 4429 case AArch64::ST4B_IMM: 4430 case AArch64::ST4H_IMM: 4431 case AArch64::ST4W_IMM: 4432 case AArch64::ST4D_IMM: 4433 Scale = TypeSize::getScalable(64); 4434 Width = TypeSize::getScalable(16 * 4); 4435 MinOffset = -8; 4436 MaxOffset = 7; 4437 break; 4438 case AArch64::LD1B_H_IMM: 4439 case AArch64::LD1SB_H_IMM: 4440 case AArch64::LD1H_S_IMM: 4441 case AArch64::LD1SH_S_IMM: 4442 case AArch64::LD1W_D_IMM: 4443 case AArch64::LD1SW_D_IMM: 4444 case AArch64::ST1B_H_IMM: 4445 case AArch64::ST1H_S_IMM: 4446 case AArch64::ST1W_D_IMM: 4447 case AArch64::LDNF1B_H_IMM: 4448 case AArch64::LDNF1SB_H_IMM: 4449 case AArch64::LDNF1H_S_IMM: 4450 case AArch64::LDNF1SH_S_IMM: 4451 case AArch64::LDNF1W_D_IMM: 4452 case AArch64::LDNF1SW_D_IMM: 4453 // A half vector worth of data 4454 // Width = mbytes * elements 4455 Scale = TypeSize::getScalable(8); 4456 Width = TypeSize::getScalable(8); 4457 MinOffset = -8; 4458 MaxOffset = 7; 4459 break; 4460 case AArch64::LD1B_S_IMM: 4461 case AArch64::LD1SB_S_IMM: 4462 case AArch64::LD1H_D_IMM: 4463 case AArch64::LD1SH_D_IMM: 4464 case AArch64::ST1B_S_IMM: 4465 case AArch64::ST1H_D_IMM: 4466 case AArch64::LDNF1B_S_IMM: 4467 case AArch64::LDNF1SB_S_IMM: 4468 case AArch64::LDNF1H_D_IMM: 4469 case AArch64::LDNF1SH_D_IMM: 4470 // A quarter vector worth of data 4471 // Width = mbytes * elements 4472 Scale = TypeSize::getScalable(4); 4473 Width = TypeSize::getScalable(4); 4474 MinOffset = -8; 4475 MaxOffset = 7; 4476 break; 4477 case AArch64::LD1B_D_IMM: 4478 case AArch64::LD1SB_D_IMM: 4479 case AArch64::ST1B_D_IMM: 4480 case AArch64::LDNF1B_D_IMM: 4481 case AArch64::LDNF1SB_D_IMM: 4482 // A eighth vector worth of data 4483 // Width = mbytes * elements 4484 Scale = TypeSize::getScalable(2); 4485 Width = TypeSize::getScalable(2); 4486 MinOffset = -8; 4487 MaxOffset = 7; 4488 break; 4489 case AArch64::ST2Gi: 4490 case AArch64::ST2GPreIndex: 4491 case AArch64::ST2GPostIndex: 4492 case AArch64::STZ2Gi: 4493 case AArch64::STZ2GPreIndex: 4494 case AArch64::STZ2GPostIndex: 4495 Scale = TypeSize::getFixed(16); 4496 Width = TypeSize::getFixed(32); 4497 MinOffset = -256; 4498 MaxOffset = 255; 4499 break; 4500 case AArch64::STGPi: 4501 case AArch64::STGPpost: 4502 case AArch64::STGPpre: 4503 Scale = TypeSize::getFixed(16); 4504 Width = TypeSize::getFixed(16); 4505 MinOffset = -64; 4506 MaxOffset = 63; 4507 break; 4508 case AArch64::LD1RB_IMM: 4509 case AArch64::LD1RB_H_IMM: 4510 case AArch64::LD1RB_S_IMM: 4511 case AArch64::LD1RB_D_IMM: 4512 case AArch64::LD1RSB_H_IMM: 4513 case AArch64::LD1RSB_S_IMM: 4514 case AArch64::LD1RSB_D_IMM: 4515 Scale = TypeSize::getFixed(1); 4516 Width = TypeSize::getFixed(1); 4517 MinOffset = 0; 4518 MaxOffset = 63; 4519 break; 4520 case AArch64::LD1RH_IMM: 4521 case AArch64::LD1RH_S_IMM: 4522 case AArch64::LD1RH_D_IMM: 4523 case AArch64::LD1RSH_S_IMM: 4524 case AArch64::LD1RSH_D_IMM: 4525 Scale = TypeSize::getFixed(2); 4526 Width = TypeSize::getFixed(2); 4527 MinOffset = 0; 4528 MaxOffset = 63; 4529 break; 4530 case AArch64::LD1RW_IMM: 4531 case AArch64::LD1RW_D_IMM: 4532 case AArch64::LD1RSW_IMM: 4533 Scale = TypeSize::getFixed(4); 4534 Width = TypeSize::getFixed(4); 4535 MinOffset = 0; 4536 MaxOffset = 63; 4537 break; 4538 case AArch64::LD1RD_IMM: 4539 Scale = TypeSize::getFixed(8); 4540 Width = TypeSize::getFixed(8); 4541 MinOffset = 0; 4542 MaxOffset = 63; 4543 break; 4544 } 4545 4546 return true; 4547 } 4548 4549 // Scaling factor for unscaled load or store. 4550 int AArch64InstrInfo::getMemScale(unsigned Opc) { 4551 switch (Opc) { 4552 default: 4553 llvm_unreachable("Opcode has unknown scale!"); 4554 case AArch64::LDRBBui: 4555 case AArch64::LDURBBi: 4556 case AArch64::LDRSBWui: 4557 case AArch64::LDURSBWi: 4558 case AArch64::STRBBui: 4559 case AArch64::STURBBi: 4560 return 1; 4561 case AArch64::LDRHHui: 4562 case AArch64::LDURHHi: 4563 case AArch64::LDRSHWui: 4564 case AArch64::LDURSHWi: 4565 case AArch64::STRHHui: 4566 case AArch64::STURHHi: 4567 return 2; 4568 case AArch64::LDRSui: 4569 case AArch64::LDURSi: 4570 case AArch64::LDRSpre: 4571 case AArch64::LDRSWui: 4572 case AArch64::LDURSWi: 4573 case AArch64::LDRSWpre: 4574 case AArch64::LDRWpre: 4575 case AArch64::LDRWui: 4576 case AArch64::LDURWi: 4577 case AArch64::STRSui: 4578 case AArch64::STURSi: 4579 case AArch64::STRSpre: 4580 case AArch64::STRWui: 4581 case AArch64::STURWi: 4582 case AArch64::STRWpre: 4583 case AArch64::LDPSi: 4584 case AArch64::LDPSWi: 4585 case AArch64::LDPWi: 4586 case AArch64::STPSi: 4587 case AArch64::STPWi: 4588 return 4; 4589 case AArch64::LDRDui: 4590 case AArch64::LDURDi: 4591 case AArch64::LDRDpre: 4592 case AArch64::LDRXui: 4593 case AArch64::LDURXi: 4594 case AArch64::LDRXpre: 4595 case AArch64::STRDui: 4596 case AArch64::STURDi: 4597 case AArch64::STRDpre: 4598 case AArch64::STRXui: 4599 case AArch64::STURXi: 4600 case AArch64::STRXpre: 4601 case AArch64::LDPDi: 4602 case AArch64::LDPXi: 4603 case AArch64::STPDi: 4604 case AArch64::STPXi: 4605 return 8; 4606 case AArch64::LDRQui: 4607 case AArch64::LDURQi: 4608 case AArch64::STRQui: 4609 case AArch64::STURQi: 4610 case AArch64::STRQpre: 4611 case AArch64::LDPQi: 4612 case AArch64::LDRQpre: 4613 case AArch64::STPQi: 4614 case AArch64::STGi: 4615 case AArch64::STZGi: 4616 case AArch64::ST2Gi: 4617 case AArch64::STZ2Gi: 4618 case AArch64::STGPi: 4619 return 16; 4620 } 4621 } 4622 4623 bool AArch64InstrInfo::isPreLd(const MachineInstr &MI) { 4624 switch (MI.getOpcode()) { 4625 default: 4626 return false; 4627 case AArch64::LDRWpre: 4628 case AArch64::LDRXpre: 4629 case AArch64::LDRSWpre: 4630 case AArch64::LDRSpre: 4631 case AArch64::LDRDpre: 4632 case AArch64::LDRQpre: 4633 return true; 4634 } 4635 } 4636 4637 bool AArch64InstrInfo::isPreSt(const MachineInstr &MI) { 4638 switch (MI.getOpcode()) { 4639 default: 4640 return false; 4641 case AArch64::STRWpre: 4642 case AArch64::STRXpre: 4643 case AArch64::STRSpre: 4644 case AArch64::STRDpre: 4645 case AArch64::STRQpre: 4646 return true; 4647 } 4648 } 4649 4650 bool AArch64InstrInfo::isPreLdSt(const MachineInstr &MI) { 4651 return isPreLd(MI) || isPreSt(MI); 4652 } 4653 4654 bool AArch64InstrInfo::isPairedLdSt(const MachineInstr &MI) { 4655 switch (MI.getOpcode()) { 4656 default: 4657 return false; 4658 case AArch64::LDPSi: 4659 case AArch64::LDPSWi: 4660 case AArch64::LDPDi: 4661 case AArch64::LDPQi: 4662 case AArch64::LDPWi: 4663 case AArch64::LDPXi: 4664 case AArch64::STPSi: 4665 case AArch64::STPDi: 4666 case AArch64::STPQi: 4667 case AArch64::STPWi: 4668 case AArch64::STPXi: 4669 case AArch64::STGPi: 4670 return true; 4671 } 4672 } 4673 4674 const MachineOperand &AArch64InstrInfo::getLdStBaseOp(const MachineInstr &MI) { 4675 assert(MI.mayLoadOrStore() && "Load or store instruction expected"); 4676 unsigned Idx = 4677 AArch64InstrInfo::isPairedLdSt(MI) || AArch64InstrInfo::isPreLdSt(MI) ? 2 4678 : 1; 4679 return MI.getOperand(Idx); 4680 } 4681 4682 const MachineOperand & 4683 AArch64InstrInfo::getLdStOffsetOp(const MachineInstr &MI) { 4684 assert(MI.mayLoadOrStore() && "Load or store instruction expected"); 4685 unsigned Idx = 4686 AArch64InstrInfo::isPairedLdSt(MI) || AArch64InstrInfo::isPreLdSt(MI) ? 3 4687 : 2; 4688 return MI.getOperand(Idx); 4689 } 4690 4691 const MachineOperand & 4692 AArch64InstrInfo::getLdStAmountOp(const MachineInstr &MI) { 4693 switch (MI.getOpcode()) { 4694 default: 4695 llvm_unreachable("Unexpected opcode"); 4696 case AArch64::LDRBroX: 4697 case AArch64::LDRBBroX: 4698 case AArch64::LDRSBXroX: 4699 case AArch64::LDRSBWroX: 4700 case AArch64::LDRHroX: 4701 case AArch64::LDRHHroX: 4702 case AArch64::LDRSHXroX: 4703 case AArch64::LDRSHWroX: 4704 case AArch64::LDRWroX: 4705 case AArch64::LDRSroX: 4706 case AArch64::LDRSWroX: 4707 case AArch64::LDRDroX: 4708 case AArch64::LDRXroX: 4709 case AArch64::LDRQroX: 4710 return MI.getOperand(4); 4711 } 4712 } 4713 4714 static const TargetRegisterClass *getRegClass(const MachineInstr &MI, 4715 Register Reg) { 4716 if (MI.getParent() == nullptr) 4717 return nullptr; 4718 const MachineFunction *MF = MI.getParent()->getParent(); 4719 return MF ? MF->getRegInfo().getRegClassOrNull(Reg) : nullptr; 4720 } 4721 4722 bool AArch64InstrInfo::isHForm(const MachineInstr &MI) { 4723 auto IsHFPR = [&](const MachineOperand &Op) { 4724 if (!Op.isReg()) 4725 return false; 4726 auto Reg = Op.getReg(); 4727 if (Reg.isPhysical()) 4728 return AArch64::FPR16RegClass.contains(Reg); 4729 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg); 4730 return TRC == &AArch64::FPR16RegClass || 4731 TRC == &AArch64::FPR16_loRegClass; 4732 }; 4733 return llvm::any_of(MI.operands(), IsHFPR); 4734 } 4735 4736 bool AArch64InstrInfo::isQForm(const MachineInstr &MI) { 4737 auto IsQFPR = [&](const MachineOperand &Op) { 4738 if (!Op.isReg()) 4739 return false; 4740 auto Reg = Op.getReg(); 4741 if (Reg.isPhysical()) 4742 return AArch64::FPR128RegClass.contains(Reg); 4743 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg); 4744 return TRC == &AArch64::FPR128RegClass || 4745 TRC == &AArch64::FPR128_loRegClass; 4746 }; 4747 return llvm::any_of(MI.operands(), IsQFPR); 4748 } 4749 4750 bool AArch64InstrInfo::hasBTISemantics(const MachineInstr &MI) { 4751 switch (MI.getOpcode()) { 4752 case AArch64::BRK: 4753 case AArch64::HLT: 4754 case AArch64::PACIASP: 4755 case AArch64::PACIBSP: 4756 // Implicit BTI behavior. 4757 return true; 4758 case AArch64::PAUTH_PROLOGUE: 4759 // PAUTH_PROLOGUE expands to PACI(A|B)SP. 4760 return true; 4761 case AArch64::HINT: { 4762 unsigned Imm = MI.getOperand(0).getImm(); 4763 // Explicit BTI instruction. 4764 if (Imm == 32 || Imm == 34 || Imm == 36 || Imm == 38) 4765 return true; 4766 // PACI(A|B)SP instructions. 4767 if (Imm == 25 || Imm == 27) 4768 return true; 4769 return false; 4770 } 4771 default: 4772 return false; 4773 } 4774 } 4775 4776 bool AArch64InstrInfo::isFpOrNEON(Register Reg) { 4777 if (Reg == 0) 4778 return false; 4779 assert(Reg.isPhysical() && "Expected physical register in isFpOrNEON"); 4780 return AArch64::FPR128RegClass.contains(Reg) || 4781 AArch64::FPR64RegClass.contains(Reg) || 4782 AArch64::FPR32RegClass.contains(Reg) || 4783 AArch64::FPR16RegClass.contains(Reg) || 4784 AArch64::FPR8RegClass.contains(Reg); 4785 } 4786 4787 bool AArch64InstrInfo::isFpOrNEON(const MachineInstr &MI) { 4788 auto IsFPR = [&](const MachineOperand &Op) { 4789 if (!Op.isReg()) 4790 return false; 4791 auto Reg = Op.getReg(); 4792 if (Reg.isPhysical()) 4793 return isFpOrNEON(Reg); 4794 4795 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg); 4796 return TRC == &AArch64::FPR128RegClass || 4797 TRC == &AArch64::FPR128_loRegClass || 4798 TRC == &AArch64::FPR64RegClass || 4799 TRC == &AArch64::FPR64_loRegClass || 4800 TRC == &AArch64::FPR32RegClass || TRC == &AArch64::FPR16RegClass || 4801 TRC == &AArch64::FPR8RegClass; 4802 }; 4803 return llvm::any_of(MI.operands(), IsFPR); 4804 } 4805 4806 // Scale the unscaled offsets. Returns false if the unscaled offset can't be 4807 // scaled. 4808 static bool scaleOffset(unsigned Opc, int64_t &Offset) { 4809 int Scale = AArch64InstrInfo::getMemScale(Opc); 4810 4811 // If the byte-offset isn't a multiple of the stride, we can't scale this 4812 // offset. 4813 if (Offset % Scale != 0) 4814 return false; 4815 4816 // Convert the byte-offset used by unscaled into an "element" offset used 4817 // by the scaled pair load/store instructions. 4818 Offset /= Scale; 4819 return true; 4820 } 4821 4822 static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) { 4823 if (FirstOpc == SecondOpc) 4824 return true; 4825 // We can also pair sign-ext and zero-ext instructions. 4826 switch (FirstOpc) { 4827 default: 4828 return false; 4829 case AArch64::STRSui: 4830 case AArch64::STURSi: 4831 return SecondOpc == AArch64::STRSui || SecondOpc == AArch64::STURSi; 4832 case AArch64::STRDui: 4833 case AArch64::STURDi: 4834 return SecondOpc == AArch64::STRDui || SecondOpc == AArch64::STURDi; 4835 case AArch64::STRQui: 4836 case AArch64::STURQi: 4837 return SecondOpc == AArch64::STRQui || SecondOpc == AArch64::STURQi; 4838 case AArch64::STRWui: 4839 case AArch64::STURWi: 4840 return SecondOpc == AArch64::STRWui || SecondOpc == AArch64::STURWi; 4841 case AArch64::STRXui: 4842 case AArch64::STURXi: 4843 return SecondOpc == AArch64::STRXui || SecondOpc == AArch64::STURXi; 4844 case AArch64::LDRSui: 4845 case AArch64::LDURSi: 4846 return SecondOpc == AArch64::LDRSui || SecondOpc == AArch64::LDURSi; 4847 case AArch64::LDRDui: 4848 case AArch64::LDURDi: 4849 return SecondOpc == AArch64::LDRDui || SecondOpc == AArch64::LDURDi; 4850 case AArch64::LDRQui: 4851 case AArch64::LDURQi: 4852 return SecondOpc == AArch64::LDRQui || SecondOpc == AArch64::LDURQi; 4853 case AArch64::LDRWui: 4854 case AArch64::LDURWi: 4855 return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi; 4856 case AArch64::LDRSWui: 4857 case AArch64::LDURSWi: 4858 return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi; 4859 case AArch64::LDRXui: 4860 case AArch64::LDURXi: 4861 return SecondOpc == AArch64::LDRXui || SecondOpc == AArch64::LDURXi; 4862 } 4863 // These instructions can't be paired based on their opcodes. 4864 return false; 4865 } 4866 4867 static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1, 4868 int64_t Offset1, unsigned Opcode1, int FI2, 4869 int64_t Offset2, unsigned Opcode2) { 4870 // Accesses through fixed stack object frame indices may access a different 4871 // fixed stack slot. Check that the object offsets + offsets match. 4872 if (MFI.isFixedObjectIndex(FI1) && MFI.isFixedObjectIndex(FI2)) { 4873 int64_t ObjectOffset1 = MFI.getObjectOffset(FI1); 4874 int64_t ObjectOffset2 = MFI.getObjectOffset(FI2); 4875 assert(ObjectOffset1 <= ObjectOffset2 && "Object offsets are not ordered."); 4876 // Convert to scaled object offsets. 4877 int Scale1 = AArch64InstrInfo::getMemScale(Opcode1); 4878 if (ObjectOffset1 % Scale1 != 0) 4879 return false; 4880 ObjectOffset1 /= Scale1; 4881 int Scale2 = AArch64InstrInfo::getMemScale(Opcode2); 4882 if (ObjectOffset2 % Scale2 != 0) 4883 return false; 4884 ObjectOffset2 /= Scale2; 4885 ObjectOffset1 += Offset1; 4886 ObjectOffset2 += Offset2; 4887 return ObjectOffset1 + 1 == ObjectOffset2; 4888 } 4889 4890 return FI1 == FI2; 4891 } 4892 4893 /// Detect opportunities for ldp/stp formation. 4894 /// 4895 /// Only called for LdSt for which getMemOperandWithOffset returns true. 4896 bool AArch64InstrInfo::shouldClusterMemOps( 4897 ArrayRef<const MachineOperand *> BaseOps1, int64_t OpOffset1, 4898 bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2, 4899 int64_t OpOffset2, bool OffsetIsScalable2, unsigned ClusterSize, 4900 unsigned NumBytes) const { 4901 assert(BaseOps1.size() == 1 && BaseOps2.size() == 1); 4902 const MachineOperand &BaseOp1 = *BaseOps1.front(); 4903 const MachineOperand &BaseOp2 = *BaseOps2.front(); 4904 const MachineInstr &FirstLdSt = *BaseOp1.getParent(); 4905 const MachineInstr &SecondLdSt = *BaseOp2.getParent(); 4906 if (BaseOp1.getType() != BaseOp2.getType()) 4907 return false; 4908 4909 assert((BaseOp1.isReg() || BaseOp1.isFI()) && 4910 "Only base registers and frame indices are supported."); 4911 4912 // Check for both base regs and base FI. 4913 if (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg()) 4914 return false; 4915 4916 // Only cluster up to a single pair. 4917 if (ClusterSize > 2) 4918 return false; 4919 4920 if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt)) 4921 return false; 4922 4923 // Can we pair these instructions based on their opcodes? 4924 unsigned FirstOpc = FirstLdSt.getOpcode(); 4925 unsigned SecondOpc = SecondLdSt.getOpcode(); 4926 if (!canPairLdStOpc(FirstOpc, SecondOpc)) 4927 return false; 4928 4929 // Can't merge volatiles or load/stores that have a hint to avoid pair 4930 // formation, for example. 4931 if (!isCandidateToMergeOrPair(FirstLdSt) || 4932 !isCandidateToMergeOrPair(SecondLdSt)) 4933 return false; 4934 4935 // isCandidateToMergeOrPair guarantees that operand 2 is an immediate. 4936 int64_t Offset1 = FirstLdSt.getOperand(2).getImm(); 4937 if (hasUnscaledLdStOffset(FirstOpc) && !scaleOffset(FirstOpc, Offset1)) 4938 return false; 4939 4940 int64_t Offset2 = SecondLdSt.getOperand(2).getImm(); 4941 if (hasUnscaledLdStOffset(SecondOpc) && !scaleOffset(SecondOpc, Offset2)) 4942 return false; 4943 4944 // Pairwise instructions have a 7-bit signed offset field. 4945 if (Offset1 > 63 || Offset1 < -64) 4946 return false; 4947 4948 // The caller should already have ordered First/SecondLdSt by offset. 4949 // Note: except for non-equal frame index bases 4950 if (BaseOp1.isFI()) { 4951 assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) && 4952 "Caller should have ordered offsets."); 4953 4954 const MachineFrameInfo &MFI = 4955 FirstLdSt.getParent()->getParent()->getFrameInfo(); 4956 return shouldClusterFI(MFI, BaseOp1.getIndex(), Offset1, FirstOpc, 4957 BaseOp2.getIndex(), Offset2, SecondOpc); 4958 } 4959 4960 assert(Offset1 <= Offset2 && "Caller should have ordered offsets."); 4961 4962 return Offset1 + 1 == Offset2; 4963 } 4964 4965 static const MachineInstrBuilder &AddSubReg(const MachineInstrBuilder &MIB, 4966 MCRegister Reg, unsigned SubIdx, 4967 unsigned State, 4968 const TargetRegisterInfo *TRI) { 4969 if (!SubIdx) 4970 return MIB.addReg(Reg, State); 4971 4972 if (Reg.isPhysical()) 4973 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State); 4974 return MIB.addReg(Reg, State, SubIdx); 4975 } 4976 4977 static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg, 4978 unsigned NumRegs) { 4979 // We really want the positive remainder mod 32 here, that happens to be 4980 // easily obtainable with a mask. 4981 return ((DestReg - SrcReg) & 0x1f) < NumRegs; 4982 } 4983 4984 void AArch64InstrInfo::copyPhysRegTuple(MachineBasicBlock &MBB, 4985 MachineBasicBlock::iterator I, 4986 const DebugLoc &DL, MCRegister DestReg, 4987 MCRegister SrcReg, bool KillSrc, 4988 unsigned Opcode, 4989 ArrayRef<unsigned> Indices) const { 4990 assert(Subtarget.hasNEON() && "Unexpected register copy without NEON"); 4991 const TargetRegisterInfo *TRI = &getRegisterInfo(); 4992 uint16_t DestEncoding = TRI->getEncodingValue(DestReg); 4993 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg); 4994 unsigned NumRegs = Indices.size(); 4995 4996 int SubReg = 0, End = NumRegs, Incr = 1; 4997 if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) { 4998 SubReg = NumRegs - 1; 4999 End = -1; 5000 Incr = -1; 5001 } 5002 5003 for (; SubReg != End; SubReg += Incr) { 5004 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode)); 5005 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI); 5006 AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI); 5007 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI); 5008 } 5009 } 5010 5011 void AArch64InstrInfo::copyGPRRegTuple(MachineBasicBlock &MBB, 5012 MachineBasicBlock::iterator I, 5013 const DebugLoc &DL, MCRegister DestReg, 5014 MCRegister SrcReg, bool KillSrc, 5015 unsigned Opcode, unsigned ZeroReg, 5016 llvm::ArrayRef<unsigned> Indices) const { 5017 const TargetRegisterInfo *TRI = &getRegisterInfo(); 5018 unsigned NumRegs = Indices.size(); 5019 5020 #ifndef NDEBUG 5021 uint16_t DestEncoding = TRI->getEncodingValue(DestReg); 5022 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg); 5023 assert(DestEncoding % NumRegs == 0 && SrcEncoding % NumRegs == 0 && 5024 "GPR reg sequences should not be able to overlap"); 5025 #endif 5026 5027 for (unsigned SubReg = 0; SubReg != NumRegs; ++SubReg) { 5028 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode)); 5029 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI); 5030 MIB.addReg(ZeroReg); 5031 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI); 5032 MIB.addImm(0); 5033 } 5034 } 5035 5036 void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, 5037 MachineBasicBlock::iterator I, 5038 const DebugLoc &DL, Register DestReg, 5039 Register SrcReg, bool KillSrc, 5040 bool RenamableDest, 5041 bool RenamableSrc) const { 5042 if (AArch64::GPR32spRegClass.contains(DestReg) && 5043 (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) { 5044 const TargetRegisterInfo *TRI = &getRegisterInfo(); 5045 5046 if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) { 5047 // If either operand is WSP, expand to ADD #0. 5048 if (Subtarget.hasZeroCycleRegMoveGPR64() && 5049 !Subtarget.hasZeroCycleRegMoveGPR32()) { 5050 // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move. 5051 MCRegister DestRegX = TRI->getMatchingSuperReg( 5052 DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass); 5053 MCRegister SrcRegX = TRI->getMatchingSuperReg( 5054 SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass); 5055 // This instruction is reading and writing X registers. This may upset 5056 // the register scavenger and machine verifier, so we need to indicate 5057 // that we are reading an undefined value from SrcRegX, but a proper 5058 // value from SrcReg. 5059 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX) 5060 .addReg(SrcRegX, RegState::Undef) 5061 .addImm(0) 5062 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)) 5063 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc)); 5064 } else { 5065 BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg) 5066 .addReg(SrcReg, getKillRegState(KillSrc)) 5067 .addImm(0) 5068 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 5069 } 5070 } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroingGP()) { 5071 BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg) 5072 .addImm(0) 5073 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 5074 } else { 5075 if (Subtarget.hasZeroCycleRegMoveGPR64() && 5076 !Subtarget.hasZeroCycleRegMoveGPR32()) { 5077 // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move. 5078 MCRegister DestRegX = TRI->getMatchingSuperReg( 5079 DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass); 5080 MCRegister SrcRegX = TRI->getMatchingSuperReg( 5081 SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass); 5082 // This instruction is reading and writing X registers. This may upset 5083 // the register scavenger and machine verifier, so we need to indicate 5084 // that we are reading an undefined value from SrcRegX, but a proper 5085 // value from SrcReg. 5086 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX) 5087 .addReg(AArch64::XZR) 5088 .addReg(SrcRegX, RegState::Undef) 5089 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc)); 5090 } else { 5091 // Otherwise, expand to ORR WZR. 5092 BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg) 5093 .addReg(AArch64::WZR) 5094 .addReg(SrcReg, getKillRegState(KillSrc)); 5095 } 5096 } 5097 return; 5098 } 5099 5100 // Copy a Predicate register by ORRing with itself. 5101 if (AArch64::PPRRegClass.contains(DestReg) && 5102 AArch64::PPRRegClass.contains(SrcReg)) { 5103 assert(Subtarget.isSVEorStreamingSVEAvailable() && 5104 "Unexpected SVE register."); 5105 BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), DestReg) 5106 .addReg(SrcReg) // Pg 5107 .addReg(SrcReg) 5108 .addReg(SrcReg, getKillRegState(KillSrc)); 5109 return; 5110 } 5111 5112 // Copy a predicate-as-counter register by ORRing with itself as if it 5113 // were a regular predicate (mask) register. 5114 bool DestIsPNR = AArch64::PNRRegClass.contains(DestReg); 5115 bool SrcIsPNR = AArch64::PNRRegClass.contains(SrcReg); 5116 if (DestIsPNR || SrcIsPNR) { 5117 auto ToPPR = [](MCRegister R) -> MCRegister { 5118 return (R - AArch64::PN0) + AArch64::P0; 5119 }; 5120 MCRegister PPRSrcReg = SrcIsPNR ? ToPPR(SrcReg) : SrcReg.asMCReg(); 5121 MCRegister PPRDestReg = DestIsPNR ? ToPPR(DestReg) : DestReg.asMCReg(); 5122 5123 if (PPRSrcReg != PPRDestReg) { 5124 auto NewMI = BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), PPRDestReg) 5125 .addReg(PPRSrcReg) // Pg 5126 .addReg(PPRSrcReg) 5127 .addReg(PPRSrcReg, getKillRegState(KillSrc)); 5128 if (DestIsPNR) 5129 NewMI.addDef(DestReg, RegState::Implicit); 5130 } 5131 return; 5132 } 5133 5134 // Copy a Z register by ORRing with itself. 5135 if (AArch64::ZPRRegClass.contains(DestReg) && 5136 AArch64::ZPRRegClass.contains(SrcReg)) { 5137 assert(Subtarget.isSVEorStreamingSVEAvailable() && 5138 "Unexpected SVE register."); 5139 BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ), DestReg) 5140 .addReg(SrcReg) 5141 .addReg(SrcReg, getKillRegState(KillSrc)); 5142 return; 5143 } 5144 5145 // Copy a Z register pair by copying the individual sub-registers. 5146 if ((AArch64::ZPR2RegClass.contains(DestReg) || 5147 AArch64::ZPR2StridedOrContiguousRegClass.contains(DestReg)) && 5148 (AArch64::ZPR2RegClass.contains(SrcReg) || 5149 AArch64::ZPR2StridedOrContiguousRegClass.contains(SrcReg))) { 5150 assert(Subtarget.isSVEorStreamingSVEAvailable() && 5151 "Unexpected SVE register."); 5152 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1}; 5153 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ, 5154 Indices); 5155 return; 5156 } 5157 5158 // Copy a Z register triple by copying the individual sub-registers. 5159 if (AArch64::ZPR3RegClass.contains(DestReg) && 5160 AArch64::ZPR3RegClass.contains(SrcReg)) { 5161 assert(Subtarget.isSVEorStreamingSVEAvailable() && 5162 "Unexpected SVE register."); 5163 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1, 5164 AArch64::zsub2}; 5165 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ, 5166 Indices); 5167 return; 5168 } 5169 5170 // Copy a Z register quad by copying the individual sub-registers. 5171 if ((AArch64::ZPR4RegClass.contains(DestReg) || 5172 AArch64::ZPR4StridedOrContiguousRegClass.contains(DestReg)) && 5173 (AArch64::ZPR4RegClass.contains(SrcReg) || 5174 AArch64::ZPR4StridedOrContiguousRegClass.contains(SrcReg))) { 5175 assert(Subtarget.isSVEorStreamingSVEAvailable() && 5176 "Unexpected SVE register."); 5177 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1, 5178 AArch64::zsub2, AArch64::zsub3}; 5179 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ, 5180 Indices); 5181 return; 5182 } 5183 5184 if (AArch64::GPR64spRegClass.contains(DestReg) && 5185 (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) { 5186 if (DestReg == AArch64::SP || SrcReg == AArch64::SP) { 5187 // If either operand is SP, expand to ADD #0. 5188 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg) 5189 .addReg(SrcReg, getKillRegState(KillSrc)) 5190 .addImm(0) 5191 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 5192 } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroingGP()) { 5193 BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg) 5194 .addImm(0) 5195 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 5196 } else { 5197 // Otherwise, expand to ORR XZR. 5198 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg) 5199 .addReg(AArch64::XZR) 5200 .addReg(SrcReg, getKillRegState(KillSrc)); 5201 } 5202 return; 5203 } 5204 5205 // Copy a DDDD register quad by copying the individual sub-registers. 5206 if (AArch64::DDDDRegClass.contains(DestReg) && 5207 AArch64::DDDDRegClass.contains(SrcReg)) { 5208 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1, 5209 AArch64::dsub2, AArch64::dsub3}; 5210 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8, 5211 Indices); 5212 return; 5213 } 5214 5215 // Copy a DDD register triple by copying the individual sub-registers. 5216 if (AArch64::DDDRegClass.contains(DestReg) && 5217 AArch64::DDDRegClass.contains(SrcReg)) { 5218 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1, 5219 AArch64::dsub2}; 5220 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8, 5221 Indices); 5222 return; 5223 } 5224 5225 // Copy a DD register pair by copying the individual sub-registers. 5226 if (AArch64::DDRegClass.contains(DestReg) && 5227 AArch64::DDRegClass.contains(SrcReg)) { 5228 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1}; 5229 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8, 5230 Indices); 5231 return; 5232 } 5233 5234 // Copy a QQQQ register quad by copying the individual sub-registers. 5235 if (AArch64::QQQQRegClass.contains(DestReg) && 5236 AArch64::QQQQRegClass.contains(SrcReg)) { 5237 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1, 5238 AArch64::qsub2, AArch64::qsub3}; 5239 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8, 5240 Indices); 5241 return; 5242 } 5243 5244 // Copy a QQQ register triple by copying the individual sub-registers. 5245 if (AArch64::QQQRegClass.contains(DestReg) && 5246 AArch64::QQQRegClass.contains(SrcReg)) { 5247 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1, 5248 AArch64::qsub2}; 5249 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8, 5250 Indices); 5251 return; 5252 } 5253 5254 // Copy a QQ register pair by copying the individual sub-registers. 5255 if (AArch64::QQRegClass.contains(DestReg) && 5256 AArch64::QQRegClass.contains(SrcReg)) { 5257 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1}; 5258 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8, 5259 Indices); 5260 return; 5261 } 5262 5263 if (AArch64::XSeqPairsClassRegClass.contains(DestReg) && 5264 AArch64::XSeqPairsClassRegClass.contains(SrcReg)) { 5265 static const unsigned Indices[] = {AArch64::sube64, AArch64::subo64}; 5266 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRXrs, 5267 AArch64::XZR, Indices); 5268 return; 5269 } 5270 5271 if (AArch64::WSeqPairsClassRegClass.contains(DestReg) && 5272 AArch64::WSeqPairsClassRegClass.contains(SrcReg)) { 5273 static const unsigned Indices[] = {AArch64::sube32, AArch64::subo32}; 5274 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRWrs, 5275 AArch64::WZR, Indices); 5276 return; 5277 } 5278 5279 if (AArch64::FPR128RegClass.contains(DestReg) && 5280 AArch64::FPR128RegClass.contains(SrcReg)) { 5281 if (Subtarget.isSVEorStreamingSVEAvailable() && 5282 !Subtarget.isNeonAvailable()) 5283 BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ)) 5284 .addReg(AArch64::Z0 + (DestReg - AArch64::Q0), RegState::Define) 5285 .addReg(AArch64::Z0 + (SrcReg - AArch64::Q0)) 5286 .addReg(AArch64::Z0 + (SrcReg - AArch64::Q0)); 5287 else if (Subtarget.isNeonAvailable()) 5288 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) 5289 .addReg(SrcReg) 5290 .addReg(SrcReg, getKillRegState(KillSrc)); 5291 else { 5292 BuildMI(MBB, I, DL, get(AArch64::STRQpre)) 5293 .addReg(AArch64::SP, RegState::Define) 5294 .addReg(SrcReg, getKillRegState(KillSrc)) 5295 .addReg(AArch64::SP) 5296 .addImm(-16); 5297 BuildMI(MBB, I, DL, get(AArch64::LDRQpost)) 5298 .addReg(AArch64::SP, RegState::Define) 5299 .addReg(DestReg, RegState::Define) 5300 .addReg(AArch64::SP) 5301 .addImm(16); 5302 } 5303 return; 5304 } 5305 5306 if (AArch64::FPR64RegClass.contains(DestReg) && 5307 AArch64::FPR64RegClass.contains(SrcReg)) { 5308 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg) 5309 .addReg(SrcReg, getKillRegState(KillSrc)); 5310 return; 5311 } 5312 5313 if (AArch64::FPR32RegClass.contains(DestReg) && 5314 AArch64::FPR32RegClass.contains(SrcReg)) { 5315 if (Subtarget.hasZeroCycleRegMoveFPR64() && 5316 !Subtarget.hasZeroCycleRegMoveFPR32()) { 5317 const TargetRegisterInfo *TRI = &getRegisterInfo(); 5318 MCRegister DestRegD = TRI->getMatchingSuperReg(DestReg, AArch64::ssub, 5319 &AArch64::FPR64RegClass); 5320 MCRegister SrcRegD = TRI->getMatchingSuperReg(SrcReg, AArch64::ssub, 5321 &AArch64::FPR64RegClass); 5322 // This instruction is reading and writing D registers. This may upset 5323 // the register scavenger and machine verifier, so we need to indicate 5324 // that we are reading an undefined value from SrcRegD, but a proper 5325 // value from SrcReg. 5326 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestRegD) 5327 .addReg(SrcRegD, RegState::Undef) 5328 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc)); 5329 } else { 5330 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg) 5331 .addReg(SrcReg, getKillRegState(KillSrc)); 5332 } 5333 return; 5334 } 5335 5336 if (AArch64::FPR16RegClass.contains(DestReg) && 5337 AArch64::FPR16RegClass.contains(SrcReg)) { 5338 if (Subtarget.hasZeroCycleRegMoveFPR64() && 5339 !Subtarget.hasZeroCycleRegMoveFPR32()) { 5340 const TargetRegisterInfo *TRI = &getRegisterInfo(); 5341 MCRegister DestRegD = TRI->getMatchingSuperReg(DestReg, AArch64::hsub, 5342 &AArch64::FPR64RegClass); 5343 MCRegister SrcRegD = TRI->getMatchingSuperReg(SrcReg, AArch64::hsub, 5344 &AArch64::FPR64RegClass); 5345 // This instruction is reading and writing D registers. This may upset 5346 // the register scavenger and machine verifier, so we need to indicate 5347 // that we are reading an undefined value from SrcRegD, but a proper 5348 // value from SrcReg. 5349 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestRegD) 5350 .addReg(SrcRegD, RegState::Undef) 5351 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc)); 5352 } else { 5353 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub, 5354 &AArch64::FPR32RegClass); 5355 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub, 5356 &AArch64::FPR32RegClass); 5357 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg) 5358 .addReg(SrcReg, getKillRegState(KillSrc)); 5359 } 5360 return; 5361 } 5362 5363 if (AArch64::FPR8RegClass.contains(DestReg) && 5364 AArch64::FPR8RegClass.contains(SrcReg)) { 5365 if (Subtarget.hasZeroCycleRegMoveFPR64() && 5366 !Subtarget.hasZeroCycleRegMoveFPR32()) { 5367 const TargetRegisterInfo *TRI = &getRegisterInfo(); 5368 MCRegister DestRegD = TRI->getMatchingSuperReg(DestReg, AArch64::bsub, 5369 &AArch64::FPR64RegClass); 5370 MCRegister SrcRegD = TRI->getMatchingSuperReg(SrcReg, AArch64::bsub, 5371 &AArch64::FPR64RegClass); 5372 // This instruction is reading and writing D registers. This may upset 5373 // the register scavenger and machine verifier, so we need to indicate 5374 // that we are reading an undefined value from SrcRegD, but a proper 5375 // value from SrcReg. 5376 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestRegD) 5377 .addReg(SrcRegD, RegState::Undef) 5378 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc)); 5379 } else { 5380 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub, 5381 &AArch64::FPR32RegClass); 5382 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub, 5383 &AArch64::FPR32RegClass); 5384 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg) 5385 .addReg(SrcReg, getKillRegState(KillSrc)); 5386 } 5387 return; 5388 } 5389 5390 // Copies between GPR64 and FPR64. 5391 if (AArch64::FPR64RegClass.contains(DestReg) && 5392 AArch64::GPR64RegClass.contains(SrcReg)) { 5393 BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg) 5394 .addReg(SrcReg, getKillRegState(KillSrc)); 5395 return; 5396 } 5397 if (AArch64::GPR64RegClass.contains(DestReg) && 5398 AArch64::FPR64RegClass.contains(SrcReg)) { 5399 BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg) 5400 .addReg(SrcReg, getKillRegState(KillSrc)); 5401 return; 5402 } 5403 // Copies between GPR32 and FPR32. 5404 if (AArch64::FPR32RegClass.contains(DestReg) && 5405 AArch64::GPR32RegClass.contains(SrcReg)) { 5406 BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg) 5407 .addReg(SrcReg, getKillRegState(KillSrc)); 5408 return; 5409 } 5410 if (AArch64::GPR32RegClass.contains(DestReg) && 5411 AArch64::FPR32RegClass.contains(SrcReg)) { 5412 BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg) 5413 .addReg(SrcReg, getKillRegState(KillSrc)); 5414 return; 5415 } 5416 5417 if (DestReg == AArch64::NZCV) { 5418 assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy"); 5419 BuildMI(MBB, I, DL, get(AArch64::MSR)) 5420 .addImm(AArch64SysReg::NZCV) 5421 .addReg(SrcReg, getKillRegState(KillSrc)) 5422 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define); 5423 return; 5424 } 5425 5426 if (SrcReg == AArch64::NZCV) { 5427 assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy"); 5428 BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg) 5429 .addImm(AArch64SysReg::NZCV) 5430 .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc)); 5431 return; 5432 } 5433 5434 #ifndef NDEBUG 5435 const TargetRegisterInfo &TRI = getRegisterInfo(); 5436 errs() << TRI.getRegAsmName(DestReg) << " = COPY " 5437 << TRI.getRegAsmName(SrcReg) << "\n"; 5438 #endif 5439 llvm_unreachable("unimplemented reg-to-reg copy"); 5440 } 5441 5442 static void storeRegPairToStackSlot(const TargetRegisterInfo &TRI, 5443 MachineBasicBlock &MBB, 5444 MachineBasicBlock::iterator InsertBefore, 5445 const MCInstrDesc &MCID, 5446 Register SrcReg, bool IsKill, 5447 unsigned SubIdx0, unsigned SubIdx1, int FI, 5448 MachineMemOperand *MMO) { 5449 Register SrcReg0 = SrcReg; 5450 Register SrcReg1 = SrcReg; 5451 if (SrcReg.isPhysical()) { 5452 SrcReg0 = TRI.getSubReg(SrcReg, SubIdx0); 5453 SubIdx0 = 0; 5454 SrcReg1 = TRI.getSubReg(SrcReg, SubIdx1); 5455 SubIdx1 = 0; 5456 } 5457 BuildMI(MBB, InsertBefore, DebugLoc(), MCID) 5458 .addReg(SrcReg0, getKillRegState(IsKill), SubIdx0) 5459 .addReg(SrcReg1, getKillRegState(IsKill), SubIdx1) 5460 .addFrameIndex(FI) 5461 .addImm(0) 5462 .addMemOperand(MMO); 5463 } 5464 5465 void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, 5466 MachineBasicBlock::iterator MBBI, 5467 Register SrcReg, bool isKill, int FI, 5468 const TargetRegisterClass *RC, 5469 const TargetRegisterInfo *TRI, 5470 Register VReg, 5471 MachineInstr::MIFlag Flags) const { 5472 MachineFunction &MF = *MBB.getParent(); 5473 MachineFrameInfo &MFI = MF.getFrameInfo(); 5474 5475 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI); 5476 MachineMemOperand *MMO = 5477 MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, 5478 MFI.getObjectSize(FI), MFI.getObjectAlign(FI)); 5479 unsigned Opc = 0; 5480 bool Offset = true; 5481 MCRegister PNRReg = MCRegister::NoRegister; 5482 unsigned StackID = TargetStackID::Default; 5483 switch (TRI->getSpillSize(*RC)) { 5484 case 1: 5485 if (AArch64::FPR8RegClass.hasSubClassEq(RC)) 5486 Opc = AArch64::STRBui; 5487 break; 5488 case 2: { 5489 if (AArch64::FPR16RegClass.hasSubClassEq(RC)) 5490 Opc = AArch64::STRHui; 5491 else if (AArch64::PNRRegClass.hasSubClassEq(RC) || 5492 AArch64::PPRRegClass.hasSubClassEq(RC)) { 5493 assert(Subtarget.isSVEorStreamingSVEAvailable() && 5494 "Unexpected register store without SVE store instructions"); 5495 Opc = AArch64::STR_PXI; 5496 StackID = TargetStackID::ScalableVector; 5497 } 5498 break; 5499 } 5500 case 4: 5501 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) { 5502 Opc = AArch64::STRWui; 5503 if (SrcReg.isVirtual()) 5504 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass); 5505 else 5506 assert(SrcReg != AArch64::WSP); 5507 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC)) 5508 Opc = AArch64::STRSui; 5509 else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) { 5510 Opc = AArch64::STR_PPXI; 5511 StackID = TargetStackID::ScalableVector; 5512 } 5513 break; 5514 case 8: 5515 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) { 5516 Opc = AArch64::STRXui; 5517 if (SrcReg.isVirtual()) 5518 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass); 5519 else 5520 assert(SrcReg != AArch64::SP); 5521 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) { 5522 Opc = AArch64::STRDui; 5523 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) { 5524 storeRegPairToStackSlot(getRegisterInfo(), MBB, MBBI, 5525 get(AArch64::STPWi), SrcReg, isKill, 5526 AArch64::sube32, AArch64::subo32, FI, MMO); 5527 return; 5528 } 5529 break; 5530 case 16: 5531 if (AArch64::FPR128RegClass.hasSubClassEq(RC)) 5532 Opc = AArch64::STRQui; 5533 else if (AArch64::DDRegClass.hasSubClassEq(RC)) { 5534 assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); 5535 Opc = AArch64::ST1Twov1d; 5536 Offset = false; 5537 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) { 5538 storeRegPairToStackSlot(getRegisterInfo(), MBB, MBBI, 5539 get(AArch64::STPXi), SrcReg, isKill, 5540 AArch64::sube64, AArch64::subo64, FI, MMO); 5541 return; 5542 } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) { 5543 assert(Subtarget.isSVEorStreamingSVEAvailable() && 5544 "Unexpected register store without SVE store instructions"); 5545 Opc = AArch64::STR_ZXI; 5546 StackID = TargetStackID::ScalableVector; 5547 } else if (AArch64::PPRRegClass.hasSubClassEq(RC)) { 5548 assert(Subtarget.isSVEorStreamingSVEAvailable() && 5549 "Unexpected predicate store without SVE store instructions"); 5550 Opc = AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO; 5551 StackID = TargetStackID::ScalableVector; 5552 } 5553 break; 5554 case 24: 5555 if (AArch64::DDDRegClass.hasSubClassEq(RC)) { 5556 assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); 5557 Opc = AArch64::ST1Threev1d; 5558 Offset = false; 5559 } 5560 break; 5561 case 32: 5562 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) { 5563 assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); 5564 Opc = AArch64::ST1Fourv1d; 5565 Offset = false; 5566 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) { 5567 assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); 5568 Opc = AArch64::ST1Twov2d; 5569 Offset = false; 5570 } else if (AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) { 5571 assert(Subtarget.isSVEorStreamingSVEAvailable() && 5572 "Unexpected register store without SVE store instructions"); 5573 Opc = AArch64::STR_ZZXI_STRIDED_CONTIGUOUS; 5574 StackID = TargetStackID::ScalableVector; 5575 } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) { 5576 assert(Subtarget.isSVEorStreamingSVEAvailable() && 5577 "Unexpected register store without SVE store instructions"); 5578 Opc = AArch64::STR_ZZXI; 5579 StackID = TargetStackID::ScalableVector; 5580 } 5581 break; 5582 case 48: 5583 if (AArch64::QQQRegClass.hasSubClassEq(RC)) { 5584 assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); 5585 Opc = AArch64::ST1Threev2d; 5586 Offset = false; 5587 } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) { 5588 assert(Subtarget.isSVEorStreamingSVEAvailable() && 5589 "Unexpected register store without SVE store instructions"); 5590 Opc = AArch64::STR_ZZZXI; 5591 StackID = TargetStackID::ScalableVector; 5592 } 5593 break; 5594 case 64: 5595 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) { 5596 assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); 5597 Opc = AArch64::ST1Fourv2d; 5598 Offset = false; 5599 } else if (AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) { 5600 assert(Subtarget.isSVEorStreamingSVEAvailable() && 5601 "Unexpected register store without SVE store instructions"); 5602 Opc = AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS; 5603 StackID = TargetStackID::ScalableVector; 5604 } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) { 5605 assert(Subtarget.isSVEorStreamingSVEAvailable() && 5606 "Unexpected register store without SVE store instructions"); 5607 Opc = AArch64::STR_ZZZZXI; 5608 StackID = TargetStackID::ScalableVector; 5609 } 5610 break; 5611 } 5612 assert(Opc && "Unknown register class"); 5613 MFI.setStackID(FI, StackID); 5614 5615 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc)) 5616 .addReg(SrcReg, getKillRegState(isKill)) 5617 .addFrameIndex(FI); 5618 5619 if (Offset) 5620 MI.addImm(0); 5621 if (PNRReg.isValid()) 5622 MI.addDef(PNRReg, RegState::Implicit); 5623 MI.addMemOperand(MMO); 5624 } 5625 5626 static void loadRegPairFromStackSlot(const TargetRegisterInfo &TRI, 5627 MachineBasicBlock &MBB, 5628 MachineBasicBlock::iterator InsertBefore, 5629 const MCInstrDesc &MCID, 5630 Register DestReg, unsigned SubIdx0, 5631 unsigned SubIdx1, int FI, 5632 MachineMemOperand *MMO) { 5633 Register DestReg0 = DestReg; 5634 Register DestReg1 = DestReg; 5635 bool IsUndef = true; 5636 if (DestReg.isPhysical()) { 5637 DestReg0 = TRI.getSubReg(DestReg, SubIdx0); 5638 SubIdx0 = 0; 5639 DestReg1 = TRI.getSubReg(DestReg, SubIdx1); 5640 SubIdx1 = 0; 5641 IsUndef = false; 5642 } 5643 BuildMI(MBB, InsertBefore, DebugLoc(), MCID) 5644 .addReg(DestReg0, RegState::Define | getUndefRegState(IsUndef), SubIdx0) 5645 .addReg(DestReg1, RegState::Define | getUndefRegState(IsUndef), SubIdx1) 5646 .addFrameIndex(FI) 5647 .addImm(0) 5648 .addMemOperand(MMO); 5649 } 5650 5651 void AArch64InstrInfo::loadRegFromStackSlot( 5652 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, 5653 int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, 5654 Register VReg, MachineInstr::MIFlag Flags) const { 5655 MachineFunction &MF = *MBB.getParent(); 5656 MachineFrameInfo &MFI = MF.getFrameInfo(); 5657 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI); 5658 MachineMemOperand *MMO = 5659 MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad, 5660 MFI.getObjectSize(FI), MFI.getObjectAlign(FI)); 5661 5662 unsigned Opc = 0; 5663 bool Offset = true; 5664 unsigned StackID = TargetStackID::Default; 5665 Register PNRReg = MCRegister::NoRegister; 5666 switch (TRI->getSpillSize(*RC)) { 5667 case 1: 5668 if (AArch64::FPR8RegClass.hasSubClassEq(RC)) 5669 Opc = AArch64::LDRBui; 5670 break; 5671 case 2: { 5672 bool IsPNR = AArch64::PNRRegClass.hasSubClassEq(RC); 5673 if (AArch64::FPR16RegClass.hasSubClassEq(RC)) 5674 Opc = AArch64::LDRHui; 5675 else if (IsPNR || AArch64::PPRRegClass.hasSubClassEq(RC)) { 5676 assert(Subtarget.isSVEorStreamingSVEAvailable() && 5677 "Unexpected register load without SVE load instructions"); 5678 if (IsPNR) 5679 PNRReg = DestReg; 5680 Opc = AArch64::LDR_PXI; 5681 StackID = TargetStackID::ScalableVector; 5682 } 5683 break; 5684 } 5685 case 4: 5686 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) { 5687 Opc = AArch64::LDRWui; 5688 if (DestReg.isVirtual()) 5689 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass); 5690 else 5691 assert(DestReg != AArch64::WSP); 5692 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC)) 5693 Opc = AArch64::LDRSui; 5694 else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) { 5695 Opc = AArch64::LDR_PPXI; 5696 StackID = TargetStackID::ScalableVector; 5697 } 5698 break; 5699 case 8: 5700 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) { 5701 Opc = AArch64::LDRXui; 5702 if (DestReg.isVirtual()) 5703 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass); 5704 else 5705 assert(DestReg != AArch64::SP); 5706 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) { 5707 Opc = AArch64::LDRDui; 5708 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) { 5709 loadRegPairFromStackSlot(getRegisterInfo(), MBB, MBBI, 5710 get(AArch64::LDPWi), DestReg, AArch64::sube32, 5711 AArch64::subo32, FI, MMO); 5712 return; 5713 } 5714 break; 5715 case 16: 5716 if (AArch64::FPR128RegClass.hasSubClassEq(RC)) 5717 Opc = AArch64::LDRQui; 5718 else if (AArch64::DDRegClass.hasSubClassEq(RC)) { 5719 assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); 5720 Opc = AArch64::LD1Twov1d; 5721 Offset = false; 5722 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) { 5723 loadRegPairFromStackSlot(getRegisterInfo(), MBB, MBBI, 5724 get(AArch64::LDPXi), DestReg, AArch64::sube64, 5725 AArch64::subo64, FI, MMO); 5726 return; 5727 } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) { 5728 assert(Subtarget.isSVEorStreamingSVEAvailable() && 5729 "Unexpected register load without SVE load instructions"); 5730 Opc = AArch64::LDR_ZXI; 5731 StackID = TargetStackID::ScalableVector; 5732 } else if (AArch64::PPRRegClass.hasSubClassEq(RC)) { 5733 assert(Subtarget.isSVEorStreamingSVEAvailable() && 5734 "Unexpected predicate load without SVE load instructions"); 5735 Opc = AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO; 5736 StackID = TargetStackID::ScalableVector; 5737 } 5738 break; 5739 case 24: 5740 if (AArch64::DDDRegClass.hasSubClassEq(RC)) { 5741 assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); 5742 Opc = AArch64::LD1Threev1d; 5743 Offset = false; 5744 } 5745 break; 5746 case 32: 5747 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) { 5748 assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); 5749 Opc = AArch64::LD1Fourv1d; 5750 Offset = false; 5751 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) { 5752 assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); 5753 Opc = AArch64::LD1Twov2d; 5754 Offset = false; 5755 } else if (AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) { 5756 assert(Subtarget.isSVEorStreamingSVEAvailable() && 5757 "Unexpected register load without SVE load instructions"); 5758 Opc = AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS; 5759 StackID = TargetStackID::ScalableVector; 5760 } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) { 5761 assert(Subtarget.isSVEorStreamingSVEAvailable() && 5762 "Unexpected register load without SVE load instructions"); 5763 Opc = AArch64::LDR_ZZXI; 5764 StackID = TargetStackID::ScalableVector; 5765 } 5766 break; 5767 case 48: 5768 if (AArch64::QQQRegClass.hasSubClassEq(RC)) { 5769 assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); 5770 Opc = AArch64::LD1Threev2d; 5771 Offset = false; 5772 } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) { 5773 assert(Subtarget.isSVEorStreamingSVEAvailable() && 5774 "Unexpected register load without SVE load instructions"); 5775 Opc = AArch64::LDR_ZZZXI; 5776 StackID = TargetStackID::ScalableVector; 5777 } 5778 break; 5779 case 64: 5780 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) { 5781 assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); 5782 Opc = AArch64::LD1Fourv2d; 5783 Offset = false; 5784 } else if (AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) { 5785 assert(Subtarget.isSVEorStreamingSVEAvailable() && 5786 "Unexpected register load without SVE load instructions"); 5787 Opc = AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS; 5788 StackID = TargetStackID::ScalableVector; 5789 } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) { 5790 assert(Subtarget.isSVEorStreamingSVEAvailable() && 5791 "Unexpected register load without SVE load instructions"); 5792 Opc = AArch64::LDR_ZZZZXI; 5793 StackID = TargetStackID::ScalableVector; 5794 } 5795 break; 5796 } 5797 5798 assert(Opc && "Unknown register class"); 5799 MFI.setStackID(FI, StackID); 5800 5801 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc)) 5802 .addReg(DestReg, getDefRegState(true)) 5803 .addFrameIndex(FI); 5804 if (Offset) 5805 MI.addImm(0); 5806 if (PNRReg.isValid() && !PNRReg.isVirtual()) 5807 MI.addDef(PNRReg, RegState::Implicit); 5808 MI.addMemOperand(MMO); 5809 } 5810 5811 bool llvm::isNZCVTouchedInInstructionRange(const MachineInstr &DefMI, 5812 const MachineInstr &UseMI, 5813 const TargetRegisterInfo *TRI) { 5814 return any_of(instructionsWithoutDebug(std::next(DefMI.getIterator()), 5815 UseMI.getIterator()), 5816 [TRI](const MachineInstr &I) { 5817 return I.modifiesRegister(AArch64::NZCV, TRI) || 5818 I.readsRegister(AArch64::NZCV, TRI); 5819 }); 5820 } 5821 5822 void AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets( 5823 const StackOffset &Offset, int64_t &ByteSized, int64_t &VGSized) { 5824 // The smallest scalable element supported by scaled SVE addressing 5825 // modes are predicates, which are 2 scalable bytes in size. So the scalable 5826 // byte offset must always be a multiple of 2. 5827 assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset"); 5828 5829 // VGSized offsets are divided by '2', because the VG register is the 5830 // the number of 64bit granules as opposed to 128bit vector chunks, 5831 // which is how the 'n' in e.g. MVT::nxv1i8 is modelled. 5832 // So, for a stack offset of 16 MVT::nxv1i8's, the size is n x 16 bytes. 5833 // VG = n * 2 and the dwarf offset must be VG * 8 bytes. 5834 ByteSized = Offset.getFixed(); 5835 VGSized = Offset.getScalable() / 2; 5836 } 5837 5838 /// Returns the offset in parts to which this frame offset can be 5839 /// decomposed for the purpose of describing a frame offset. 5840 /// For non-scalable offsets this is simply its byte size. 5841 void AArch64InstrInfo::decomposeStackOffsetForFrameOffsets( 5842 const StackOffset &Offset, int64_t &NumBytes, int64_t &NumPredicateVectors, 5843 int64_t &NumDataVectors) { 5844 // The smallest scalable element supported by scaled SVE addressing 5845 // modes are predicates, which are 2 scalable bytes in size. So the scalable 5846 // byte offset must always be a multiple of 2. 5847 assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset"); 5848 5849 NumBytes = Offset.getFixed(); 5850 NumDataVectors = 0; 5851 NumPredicateVectors = Offset.getScalable() / 2; 5852 // This method is used to get the offsets to adjust the frame offset. 5853 // If the function requires ADDPL to be used and needs more than two ADDPL 5854 // instructions, part of the offset is folded into NumDataVectors so that it 5855 // uses ADDVL for part of it, reducing the number of ADDPL instructions. 5856 if (NumPredicateVectors % 8 == 0 || NumPredicateVectors < -64 || 5857 NumPredicateVectors > 62) { 5858 NumDataVectors = NumPredicateVectors / 8; 5859 NumPredicateVectors -= NumDataVectors * 8; 5860 } 5861 } 5862 5863 // Convenience function to create a DWARF expression for 5864 // Expr + NumBytes + NumVGScaledBytes * AArch64::VG 5865 static void appendVGScaledOffsetExpr(SmallVectorImpl<char> &Expr, int NumBytes, 5866 int NumVGScaledBytes, unsigned VG, 5867 llvm::raw_string_ostream &Comment) { 5868 uint8_t buffer[16]; 5869 5870 if (NumBytes) { 5871 Expr.push_back(dwarf::DW_OP_consts); 5872 Expr.append(buffer, buffer + encodeSLEB128(NumBytes, buffer)); 5873 Expr.push_back((uint8_t)dwarf::DW_OP_plus); 5874 Comment << (NumBytes < 0 ? " - " : " + ") << std::abs(NumBytes); 5875 } 5876 5877 if (NumVGScaledBytes) { 5878 Expr.push_back((uint8_t)dwarf::DW_OP_consts); 5879 Expr.append(buffer, buffer + encodeSLEB128(NumVGScaledBytes, buffer)); 5880 5881 Expr.push_back((uint8_t)dwarf::DW_OP_bregx); 5882 Expr.append(buffer, buffer + encodeULEB128(VG, buffer)); 5883 Expr.push_back(0); 5884 5885 Expr.push_back((uint8_t)dwarf::DW_OP_mul); 5886 Expr.push_back((uint8_t)dwarf::DW_OP_plus); 5887 5888 Comment << (NumVGScaledBytes < 0 ? " - " : " + ") 5889 << std::abs(NumVGScaledBytes) << " * VG"; 5890 } 5891 } 5892 5893 // Creates an MCCFIInstruction: 5894 // { DW_CFA_def_cfa_expression, ULEB128 (sizeof expr), expr } 5895 static MCCFIInstruction createDefCFAExpression(const TargetRegisterInfo &TRI, 5896 unsigned Reg, 5897 const StackOffset &Offset) { 5898 int64_t NumBytes, NumVGScaledBytes; 5899 AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(Offset, NumBytes, 5900 NumVGScaledBytes); 5901 std::string CommentBuffer; 5902 llvm::raw_string_ostream Comment(CommentBuffer); 5903 5904 if (Reg == AArch64::SP) 5905 Comment << "sp"; 5906 else if (Reg == AArch64::FP) 5907 Comment << "fp"; 5908 else 5909 Comment << printReg(Reg, &TRI); 5910 5911 // Build up the expression (Reg + NumBytes + NumVGScaledBytes * AArch64::VG) 5912 SmallString<64> Expr; 5913 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true); 5914 Expr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfReg)); 5915 Expr.push_back(0); 5916 appendVGScaledOffsetExpr(Expr, NumBytes, NumVGScaledBytes, 5917 TRI.getDwarfRegNum(AArch64::VG, true), Comment); 5918 5919 // Wrap this into DW_CFA_def_cfa. 5920 SmallString<64> DefCfaExpr; 5921 DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression); 5922 uint8_t buffer[16]; 5923 DefCfaExpr.append(buffer, buffer + encodeULEB128(Expr.size(), buffer)); 5924 DefCfaExpr.append(Expr.str()); 5925 return MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str(), SMLoc(), 5926 Comment.str()); 5927 } 5928 5929 MCCFIInstruction llvm::createDefCFA(const TargetRegisterInfo &TRI, 5930 unsigned FrameReg, unsigned Reg, 5931 const StackOffset &Offset, 5932 bool LastAdjustmentWasScalable) { 5933 if (Offset.getScalable()) 5934 return createDefCFAExpression(TRI, Reg, Offset); 5935 5936 if (FrameReg == Reg && !LastAdjustmentWasScalable) 5937 return MCCFIInstruction::cfiDefCfaOffset(nullptr, int(Offset.getFixed())); 5938 5939 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true); 5940 return MCCFIInstruction::cfiDefCfa(nullptr, DwarfReg, (int)Offset.getFixed()); 5941 } 5942 5943 MCCFIInstruction llvm::createCFAOffset(const TargetRegisterInfo &TRI, 5944 unsigned Reg, 5945 const StackOffset &OffsetFromDefCFA) { 5946 int64_t NumBytes, NumVGScaledBytes; 5947 AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets( 5948 OffsetFromDefCFA, NumBytes, NumVGScaledBytes); 5949 5950 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true); 5951 5952 // Non-scalable offsets can use DW_CFA_offset directly. 5953 if (!NumVGScaledBytes) 5954 return MCCFIInstruction::createOffset(nullptr, DwarfReg, NumBytes); 5955 5956 std::string CommentBuffer; 5957 llvm::raw_string_ostream Comment(CommentBuffer); 5958 Comment << printReg(Reg, &TRI) << " @ cfa"; 5959 5960 // Build up expression (NumBytes + NumVGScaledBytes * AArch64::VG) 5961 SmallString<64> OffsetExpr; 5962 appendVGScaledOffsetExpr(OffsetExpr, NumBytes, NumVGScaledBytes, 5963 TRI.getDwarfRegNum(AArch64::VG, true), Comment); 5964 5965 // Wrap this into DW_CFA_expression 5966 SmallString<64> CfaExpr; 5967 CfaExpr.push_back(dwarf::DW_CFA_expression); 5968 uint8_t buffer[16]; 5969 CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer)); 5970 CfaExpr.append(buffer, buffer + encodeULEB128(OffsetExpr.size(), buffer)); 5971 CfaExpr.append(OffsetExpr.str()); 5972 5973 return MCCFIInstruction::createEscape(nullptr, CfaExpr.str(), SMLoc(), 5974 Comment.str()); 5975 } 5976 5977 // Helper function to emit a frame offset adjustment from a given 5978 // pointer (SrcReg), stored into DestReg. This function is explicit 5979 // in that it requires the opcode. 5980 static void emitFrameOffsetAdj(MachineBasicBlock &MBB, 5981 MachineBasicBlock::iterator MBBI, 5982 const DebugLoc &DL, unsigned DestReg, 5983 unsigned SrcReg, int64_t Offset, unsigned Opc, 5984 const TargetInstrInfo *TII, 5985 MachineInstr::MIFlag Flag, bool NeedsWinCFI, 5986 bool *HasWinCFI, bool EmitCFAOffset, 5987 StackOffset CFAOffset, unsigned FrameReg) { 5988 int Sign = 1; 5989 unsigned MaxEncoding, ShiftSize; 5990 switch (Opc) { 5991 case AArch64::ADDXri: 5992 case AArch64::ADDSXri: 5993 case AArch64::SUBXri: 5994 case AArch64::SUBSXri: 5995 MaxEncoding = 0xfff; 5996 ShiftSize = 12; 5997 break; 5998 case AArch64::ADDVL_XXI: 5999 case AArch64::ADDPL_XXI: 6000 case AArch64::ADDSVL_XXI: 6001 case AArch64::ADDSPL_XXI: 6002 MaxEncoding = 31; 6003 ShiftSize = 0; 6004 if (Offset < 0) { 6005 MaxEncoding = 32; 6006 Sign = -1; 6007 Offset = -Offset; 6008 } 6009 break; 6010 default: 6011 llvm_unreachable("Unsupported opcode"); 6012 } 6013 6014 // `Offset` can be in bytes or in "scalable bytes". 6015 int VScale = 1; 6016 if (Opc == AArch64::ADDVL_XXI || Opc == AArch64::ADDSVL_XXI) 6017 VScale = 16; 6018 else if (Opc == AArch64::ADDPL_XXI || Opc == AArch64::ADDSPL_XXI) 6019 VScale = 2; 6020 6021 // FIXME: If the offset won't fit in 24-bits, compute the offset into a 6022 // scratch register. If DestReg is a virtual register, use it as the 6023 // scratch register; otherwise, create a new virtual register (to be 6024 // replaced by the scavenger at the end of PEI). That case can be optimized 6025 // slightly if DestReg is SP which is always 16-byte aligned, so the scratch 6026 // register can be loaded with offset%8 and the add/sub can use an extending 6027 // instruction with LSL#3. 6028 // Currently the function handles any offsets but generates a poor sequence 6029 // of code. 6030 // assert(Offset < (1 << 24) && "unimplemented reg plus immediate"); 6031 6032 const unsigned MaxEncodableValue = MaxEncoding << ShiftSize; 6033 Register TmpReg = DestReg; 6034 if (TmpReg == AArch64::XZR) 6035 TmpReg = MBB.getParent()->getRegInfo().createVirtualRegister( 6036 &AArch64::GPR64RegClass); 6037 do { 6038 uint64_t ThisVal = std::min<uint64_t>(Offset, MaxEncodableValue); 6039 unsigned LocalShiftSize = 0; 6040 if (ThisVal > MaxEncoding) { 6041 ThisVal = ThisVal >> ShiftSize; 6042 LocalShiftSize = ShiftSize; 6043 } 6044 assert((ThisVal >> ShiftSize) <= MaxEncoding && 6045 "Encoding cannot handle value that big"); 6046 6047 Offset -= ThisVal << LocalShiftSize; 6048 if (Offset == 0) 6049 TmpReg = DestReg; 6050 auto MBI = BuildMI(MBB, MBBI, DL, TII->get(Opc), TmpReg) 6051 .addReg(SrcReg) 6052 .addImm(Sign * (int)ThisVal); 6053 if (ShiftSize) 6054 MBI = MBI.addImm( 6055 AArch64_AM::getShifterImm(AArch64_AM::LSL, LocalShiftSize)); 6056 MBI = MBI.setMIFlag(Flag); 6057 6058 auto Change = 6059 VScale == 1 6060 ? StackOffset::getFixed(ThisVal << LocalShiftSize) 6061 : StackOffset::getScalable(VScale * (ThisVal << LocalShiftSize)); 6062 if (Sign == -1 || Opc == AArch64::SUBXri || Opc == AArch64::SUBSXri) 6063 CFAOffset += Change; 6064 else 6065 CFAOffset -= Change; 6066 if (EmitCFAOffset && DestReg == TmpReg) { 6067 MachineFunction &MF = *MBB.getParent(); 6068 const TargetSubtargetInfo &STI = MF.getSubtarget(); 6069 const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); 6070 6071 unsigned CFIIndex = MF.addFrameInst( 6072 createDefCFA(TRI, FrameReg, DestReg, CFAOffset, VScale != 1)); 6073 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) 6074 .addCFIIndex(CFIIndex) 6075 .setMIFlags(Flag); 6076 } 6077 6078 if (NeedsWinCFI) { 6079 int Imm = (int)(ThisVal << LocalShiftSize); 6080 if (VScale != 1 && DestReg == AArch64::SP) { 6081 if (HasWinCFI) 6082 *HasWinCFI = true; 6083 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AllocZ)) 6084 .addImm(ThisVal) 6085 .setMIFlag(Flag); 6086 } else if ((DestReg == AArch64::FP && SrcReg == AArch64::SP) || 6087 (SrcReg == AArch64::FP && DestReg == AArch64::SP)) { 6088 assert(VScale == 1 && "Expected non-scalable operation"); 6089 if (HasWinCFI) 6090 *HasWinCFI = true; 6091 if (Imm == 0) 6092 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_SetFP)).setMIFlag(Flag); 6093 else 6094 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AddFP)) 6095 .addImm(Imm) 6096 .setMIFlag(Flag); 6097 assert(Offset == 0 && "Expected remaining offset to be zero to " 6098 "emit a single SEH directive"); 6099 } else if (DestReg == AArch64::SP) { 6100 assert(VScale == 1 && "Expected non-scalable operation"); 6101 if (HasWinCFI) 6102 *HasWinCFI = true; 6103 assert(SrcReg == AArch64::SP && "Unexpected SrcReg for SEH_StackAlloc"); 6104 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc)) 6105 .addImm(Imm) 6106 .setMIFlag(Flag); 6107 } 6108 } 6109 6110 SrcReg = TmpReg; 6111 } while (Offset); 6112 } 6113 6114 void llvm::emitFrameOffset(MachineBasicBlock &MBB, 6115 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, 6116 unsigned DestReg, unsigned SrcReg, 6117 StackOffset Offset, const TargetInstrInfo *TII, 6118 MachineInstr::MIFlag Flag, bool SetNZCV, 6119 bool NeedsWinCFI, bool *HasWinCFI, 6120 bool EmitCFAOffset, StackOffset CFAOffset, 6121 unsigned FrameReg) { 6122 // If a function is marked as arm_locally_streaming, then the runtime value of 6123 // vscale in the prologue/epilogue is different the runtime value of vscale 6124 // in the function's body. To avoid having to consider multiple vscales, 6125 // we can use `addsvl` to allocate any scalable stack-slots, which under 6126 // most circumstances will be only locals, not callee-save slots. 6127 const Function &F = MBB.getParent()->getFunction(); 6128 bool UseSVL = F.hasFnAttribute("aarch64_pstate_sm_body"); 6129 6130 int64_t Bytes, NumPredicateVectors, NumDataVectors; 6131 AArch64InstrInfo::decomposeStackOffsetForFrameOffsets( 6132 Offset, Bytes, NumPredicateVectors, NumDataVectors); 6133 6134 // First emit non-scalable frame offsets, or a simple 'mov'. 6135 if (Bytes || (!Offset && SrcReg != DestReg)) { 6136 assert((DestReg != AArch64::SP || Bytes % 8 == 0) && 6137 "SP increment/decrement not 8-byte aligned"); 6138 unsigned Opc = SetNZCV ? AArch64::ADDSXri : AArch64::ADDXri; 6139 if (Bytes < 0) { 6140 Bytes = -Bytes; 6141 Opc = SetNZCV ? AArch64::SUBSXri : AArch64::SUBXri; 6142 } 6143 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, Bytes, Opc, TII, Flag, 6144 NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset, 6145 FrameReg); 6146 CFAOffset += (Opc == AArch64::ADDXri || Opc == AArch64::ADDSXri) 6147 ? StackOffset::getFixed(-Bytes) 6148 : StackOffset::getFixed(Bytes); 6149 SrcReg = DestReg; 6150 FrameReg = DestReg; 6151 } 6152 6153 assert(!(SetNZCV && (NumPredicateVectors || NumDataVectors)) && 6154 "SetNZCV not supported with SVE vectors"); 6155 assert(!(NeedsWinCFI && NumPredicateVectors) && 6156 "WinCFI can't allocate fractions of an SVE data vector"); 6157 6158 if (NumDataVectors) { 6159 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumDataVectors, 6160 UseSVL ? AArch64::ADDSVL_XXI : AArch64::ADDVL_XXI, TII, 6161 Flag, NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset, 6162 FrameReg); 6163 CFAOffset += StackOffset::getScalable(-NumDataVectors * 16); 6164 SrcReg = DestReg; 6165 } 6166 6167 if (NumPredicateVectors) { 6168 assert(DestReg != AArch64::SP && "Unaligned access to SP"); 6169 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumPredicateVectors, 6170 UseSVL ? AArch64::ADDSPL_XXI : AArch64::ADDPL_XXI, TII, 6171 Flag, NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset, 6172 FrameReg); 6173 } 6174 } 6175 6176 MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( 6177 MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops, 6178 MachineBasicBlock::iterator InsertPt, int FrameIndex, 6179 LiveIntervals *LIS, VirtRegMap *VRM) const { 6180 // This is a bit of a hack. Consider this instruction: 6181 // 6182 // %0 = COPY %sp; GPR64all:%0 6183 // 6184 // We explicitly chose GPR64all for the virtual register so such a copy might 6185 // be eliminated by RegisterCoalescer. However, that may not be possible, and 6186 // %0 may even spill. We can't spill %sp, and since it is in the GPR64all 6187 // register class, TargetInstrInfo::foldMemoryOperand() is going to try. 6188 // 6189 // To prevent that, we are going to constrain the %0 register class here. 6190 if (MI.isFullCopy()) { 6191 Register DstReg = MI.getOperand(0).getReg(); 6192 Register SrcReg = MI.getOperand(1).getReg(); 6193 if (SrcReg == AArch64::SP && DstReg.isVirtual()) { 6194 MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass); 6195 return nullptr; 6196 } 6197 if (DstReg == AArch64::SP && SrcReg.isVirtual()) { 6198 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass); 6199 return nullptr; 6200 } 6201 // Nothing can folded with copy from/to NZCV. 6202 if (SrcReg == AArch64::NZCV || DstReg == AArch64::NZCV) 6203 return nullptr; 6204 } 6205 6206 // Handle the case where a copy is being spilled or filled but the source 6207 // and destination register class don't match. For example: 6208 // 6209 // %0 = COPY %xzr; GPR64common:%0 6210 // 6211 // In this case we can still safely fold away the COPY and generate the 6212 // following spill code: 6213 // 6214 // STRXui %xzr, %stack.0 6215 // 6216 // This also eliminates spilled cross register class COPYs (e.g. between x and 6217 // d regs) of the same size. For example: 6218 // 6219 // %0 = COPY %1; GPR64:%0, FPR64:%1 6220 // 6221 // will be filled as 6222 // 6223 // LDRDui %0, fi<#0> 6224 // 6225 // instead of 6226 // 6227 // LDRXui %Temp, fi<#0> 6228 // %0 = FMOV %Temp 6229 // 6230 if (MI.isCopy() && Ops.size() == 1 && 6231 // Make sure we're only folding the explicit COPY defs/uses. 6232 (Ops[0] == 0 || Ops[0] == 1)) { 6233 bool IsSpill = Ops[0] == 0; 6234 bool IsFill = !IsSpill; 6235 const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); 6236 const MachineRegisterInfo &MRI = MF.getRegInfo(); 6237 MachineBasicBlock &MBB = *MI.getParent(); 6238 const MachineOperand &DstMO = MI.getOperand(0); 6239 const MachineOperand &SrcMO = MI.getOperand(1); 6240 Register DstReg = DstMO.getReg(); 6241 Register SrcReg = SrcMO.getReg(); 6242 // This is slightly expensive to compute for physical regs since 6243 // getMinimalPhysRegClass is slow. 6244 auto getRegClass = [&](unsigned Reg) { 6245 return Register::isVirtualRegister(Reg) ? MRI.getRegClass(Reg) 6246 : TRI.getMinimalPhysRegClass(Reg); 6247 }; 6248 6249 if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) { 6250 assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) == 6251 TRI.getRegSizeInBits(*getRegClass(SrcReg)) && 6252 "Mismatched register size in non subreg COPY"); 6253 if (IsSpill) 6254 storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex, 6255 getRegClass(SrcReg), &TRI, Register()); 6256 else 6257 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, 6258 getRegClass(DstReg), &TRI, Register()); 6259 return &*--InsertPt; 6260 } 6261 6262 // Handle cases like spilling def of: 6263 // 6264 // %0:sub_32<def,read-undef> = COPY %wzr; GPR64common:%0 6265 // 6266 // where the physical register source can be widened and stored to the full 6267 // virtual reg destination stack slot, in this case producing: 6268 // 6269 // STRXui %xzr, %stack.0 6270 // 6271 if (IsSpill && DstMO.isUndef() && SrcReg == AArch64::WZR && 6272 TRI.getRegSizeInBits(*getRegClass(DstReg)) == 64) { 6273 assert(SrcMO.getSubReg() == 0 && 6274 "Unexpected subreg on physical register"); 6275 storeRegToStackSlot(MBB, InsertPt, AArch64::XZR, SrcMO.isKill(), 6276 FrameIndex, &AArch64::GPR64RegClass, &TRI, 6277 Register()); 6278 return &*--InsertPt; 6279 } 6280 6281 // Handle cases like filling use of: 6282 // 6283 // %0:sub_32<def,read-undef> = COPY %1; GPR64:%0, GPR32:%1 6284 // 6285 // where we can load the full virtual reg source stack slot, into the subreg 6286 // destination, in this case producing: 6287 // 6288 // LDRWui %0:sub_32<def,read-undef>, %stack.0 6289 // 6290 if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) { 6291 const TargetRegisterClass *FillRC; 6292 switch (DstMO.getSubReg()) { 6293 default: 6294 FillRC = nullptr; 6295 break; 6296 case AArch64::sub_32: 6297 FillRC = &AArch64::GPR32RegClass; 6298 break; 6299 case AArch64::ssub: 6300 FillRC = &AArch64::FPR32RegClass; 6301 break; 6302 case AArch64::dsub: 6303 FillRC = &AArch64::FPR64RegClass; 6304 break; 6305 } 6306 6307 if (FillRC) { 6308 assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) == 6309 TRI.getRegSizeInBits(*FillRC) && 6310 "Mismatched regclass size on folded subreg COPY"); 6311 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI, 6312 Register()); 6313 MachineInstr &LoadMI = *--InsertPt; 6314 MachineOperand &LoadDst = LoadMI.getOperand(0); 6315 assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load"); 6316 LoadDst.setSubReg(DstMO.getSubReg()); 6317 LoadDst.setIsUndef(); 6318 return &LoadMI; 6319 } 6320 } 6321 } 6322 6323 // Cannot fold. 6324 return nullptr; 6325 } 6326 6327 int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, 6328 StackOffset &SOffset, 6329 bool *OutUseUnscaledOp, 6330 unsigned *OutUnscaledOp, 6331 int64_t *EmittableOffset) { 6332 // Set output values in case of early exit. 6333 if (EmittableOffset) 6334 *EmittableOffset = 0; 6335 if (OutUseUnscaledOp) 6336 *OutUseUnscaledOp = false; 6337 if (OutUnscaledOp) 6338 *OutUnscaledOp = 0; 6339 6340 // Exit early for structured vector spills/fills as they can't take an 6341 // immediate offset. 6342 switch (MI.getOpcode()) { 6343 default: 6344 break; 6345 case AArch64::LD1Rv1d: 6346 case AArch64::LD1Rv2s: 6347 case AArch64::LD1Rv2d: 6348 case AArch64::LD1Rv4h: 6349 case AArch64::LD1Rv4s: 6350 case AArch64::LD1Rv8b: 6351 case AArch64::LD1Rv8h: 6352 case AArch64::LD1Rv16b: 6353 case AArch64::LD1Twov2d: 6354 case AArch64::LD1Threev2d: 6355 case AArch64::LD1Fourv2d: 6356 case AArch64::LD1Twov1d: 6357 case AArch64::LD1Threev1d: 6358 case AArch64::LD1Fourv1d: 6359 case AArch64::ST1Twov2d: 6360 case AArch64::ST1Threev2d: 6361 case AArch64::ST1Fourv2d: 6362 case AArch64::ST1Twov1d: 6363 case AArch64::ST1Threev1d: 6364 case AArch64::ST1Fourv1d: 6365 case AArch64::ST1i8: 6366 case AArch64::ST1i16: 6367 case AArch64::ST1i32: 6368 case AArch64::ST1i64: 6369 case AArch64::IRG: 6370 case AArch64::IRGstack: 6371 case AArch64::STGloop: 6372 case AArch64::STZGloop: 6373 return AArch64FrameOffsetCannotUpdate; 6374 } 6375 6376 // Get the min/max offset and the scale. 6377 TypeSize ScaleValue(0U, false), Width(0U, false); 6378 int64_t MinOff, MaxOff; 6379 if (!AArch64InstrInfo::getMemOpInfo(MI.getOpcode(), ScaleValue, Width, MinOff, 6380 MaxOff)) 6381 llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal"); 6382 6383 // Construct the complete offset. 6384 bool IsMulVL = ScaleValue.isScalable(); 6385 unsigned Scale = ScaleValue.getKnownMinValue(); 6386 int64_t Offset = IsMulVL ? SOffset.getScalable() : SOffset.getFixed(); 6387 6388 const MachineOperand &ImmOpnd = 6389 MI.getOperand(AArch64InstrInfo::getLoadStoreImmIdx(MI.getOpcode())); 6390 Offset += ImmOpnd.getImm() * Scale; 6391 6392 // If the offset doesn't match the scale, we rewrite the instruction to 6393 // use the unscaled instruction instead. Likewise, if we have a negative 6394 // offset and there is an unscaled op to use. 6395 std::optional<unsigned> UnscaledOp = 6396 AArch64InstrInfo::getUnscaledLdSt(MI.getOpcode()); 6397 bool useUnscaledOp = UnscaledOp && (Offset % Scale || Offset < 0); 6398 if (useUnscaledOp && 6399 !AArch64InstrInfo::getMemOpInfo(*UnscaledOp, ScaleValue, Width, MinOff, 6400 MaxOff)) 6401 llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal"); 6402 6403 Scale = ScaleValue.getKnownMinValue(); 6404 assert(IsMulVL == ScaleValue.isScalable() && 6405 "Unscaled opcode has different value for scalable"); 6406 6407 int64_t Remainder = Offset % Scale; 6408 assert(!(Remainder && useUnscaledOp) && 6409 "Cannot have remainder when using unscaled op"); 6410 6411 assert(MinOff < MaxOff && "Unexpected Min/Max offsets"); 6412 int64_t NewOffset = Offset / Scale; 6413 if (MinOff <= NewOffset && NewOffset <= MaxOff) 6414 Offset = Remainder; 6415 else { 6416 NewOffset = NewOffset < 0 ? MinOff : MaxOff; 6417 Offset = Offset - (NewOffset * Scale); 6418 } 6419 6420 if (EmittableOffset) 6421 *EmittableOffset = NewOffset; 6422 if (OutUseUnscaledOp) 6423 *OutUseUnscaledOp = useUnscaledOp; 6424 if (OutUnscaledOp && UnscaledOp) 6425 *OutUnscaledOp = *UnscaledOp; 6426 6427 if (IsMulVL) 6428 SOffset = StackOffset::get(SOffset.getFixed(), Offset); 6429 else 6430 SOffset = StackOffset::get(Offset, SOffset.getScalable()); 6431 return AArch64FrameOffsetCanUpdate | 6432 (SOffset ? 0 : AArch64FrameOffsetIsLegal); 6433 } 6434 6435 bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, 6436 unsigned FrameReg, StackOffset &Offset, 6437 const AArch64InstrInfo *TII) { 6438 unsigned Opcode = MI.getOpcode(); 6439 unsigned ImmIdx = FrameRegIdx + 1; 6440 6441 if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) { 6442 Offset += StackOffset::getFixed(MI.getOperand(ImmIdx).getImm()); 6443 emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(), 6444 MI.getOperand(0).getReg(), FrameReg, Offset, TII, 6445 MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri)); 6446 MI.eraseFromParent(); 6447 Offset = StackOffset(); 6448 return true; 6449 } 6450 6451 int64_t NewOffset; 6452 unsigned UnscaledOp; 6453 bool UseUnscaledOp; 6454 int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp, 6455 &UnscaledOp, &NewOffset); 6456 if (Status & AArch64FrameOffsetCanUpdate) { 6457 if (Status & AArch64FrameOffsetIsLegal) 6458 // Replace the FrameIndex with FrameReg. 6459 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); 6460 if (UseUnscaledOp) 6461 MI.setDesc(TII->get(UnscaledOp)); 6462 6463 MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset); 6464 return !Offset; 6465 } 6466 6467 return false; 6468 } 6469 6470 void AArch64InstrInfo::insertNoop(MachineBasicBlock &MBB, 6471 MachineBasicBlock::iterator MI) const { 6472 DebugLoc DL; 6473 BuildMI(MBB, MI, DL, get(AArch64::HINT)).addImm(0); 6474 } 6475 6476 MCInst AArch64InstrInfo::getNop() const { 6477 return MCInstBuilder(AArch64::HINT).addImm(0); 6478 } 6479 6480 // AArch64 supports MachineCombiner. 6481 bool AArch64InstrInfo::useMachineCombiner() const { return true; } 6482 6483 // True when Opc sets flag 6484 static bool isCombineInstrSettingFlag(unsigned Opc) { 6485 switch (Opc) { 6486 case AArch64::ADDSWrr: 6487 case AArch64::ADDSWri: 6488 case AArch64::ADDSXrr: 6489 case AArch64::ADDSXri: 6490 case AArch64::SUBSWrr: 6491 case AArch64::SUBSXrr: 6492 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi. 6493 case AArch64::SUBSWri: 6494 case AArch64::SUBSXri: 6495 return true; 6496 default: 6497 break; 6498 } 6499 return false; 6500 } 6501 6502 // 32b Opcodes that can be combined with a MUL 6503 static bool isCombineInstrCandidate32(unsigned Opc) { 6504 switch (Opc) { 6505 case AArch64::ADDWrr: 6506 case AArch64::ADDWri: 6507 case AArch64::SUBWrr: 6508 case AArch64::ADDSWrr: 6509 case AArch64::ADDSWri: 6510 case AArch64::SUBSWrr: 6511 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi. 6512 case AArch64::SUBWri: 6513 case AArch64::SUBSWri: 6514 return true; 6515 default: 6516 break; 6517 } 6518 return false; 6519 } 6520 6521 // 64b Opcodes that can be combined with a MUL 6522 static bool isCombineInstrCandidate64(unsigned Opc) { 6523 switch (Opc) { 6524 case AArch64::ADDXrr: 6525 case AArch64::ADDXri: 6526 case AArch64::SUBXrr: 6527 case AArch64::ADDSXrr: 6528 case AArch64::ADDSXri: 6529 case AArch64::SUBSXrr: 6530 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi. 6531 case AArch64::SUBXri: 6532 case AArch64::SUBSXri: 6533 case AArch64::ADDv8i8: 6534 case AArch64::ADDv16i8: 6535 case AArch64::ADDv4i16: 6536 case AArch64::ADDv8i16: 6537 case AArch64::ADDv2i32: 6538 case AArch64::ADDv4i32: 6539 case AArch64::SUBv8i8: 6540 case AArch64::SUBv16i8: 6541 case AArch64::SUBv4i16: 6542 case AArch64::SUBv8i16: 6543 case AArch64::SUBv2i32: 6544 case AArch64::SUBv4i32: 6545 return true; 6546 default: 6547 break; 6548 } 6549 return false; 6550 } 6551 6552 // FP Opcodes that can be combined with a FMUL. 6553 static bool isCombineInstrCandidateFP(const MachineInstr &Inst) { 6554 switch (Inst.getOpcode()) { 6555 default: 6556 break; 6557 case AArch64::FADDHrr: 6558 case AArch64::FADDSrr: 6559 case AArch64::FADDDrr: 6560 case AArch64::FADDv4f16: 6561 case AArch64::FADDv8f16: 6562 case AArch64::FADDv2f32: 6563 case AArch64::FADDv2f64: 6564 case AArch64::FADDv4f32: 6565 case AArch64::FSUBHrr: 6566 case AArch64::FSUBSrr: 6567 case AArch64::FSUBDrr: 6568 case AArch64::FSUBv4f16: 6569 case AArch64::FSUBv8f16: 6570 case AArch64::FSUBv2f32: 6571 case AArch64::FSUBv2f64: 6572 case AArch64::FSUBv4f32: 6573 TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options; 6574 // We can fuse FADD/FSUB with FMUL, if fusion is either allowed globally by 6575 // the target options or if FADD/FSUB has the contract fast-math flag. 6576 return Options.UnsafeFPMath || 6577 Options.AllowFPOpFusion == FPOpFusion::Fast || 6578 Inst.getFlag(MachineInstr::FmContract); 6579 return true; 6580 } 6581 return false; 6582 } 6583 6584 // Opcodes that can be combined with a MUL 6585 static bool isCombineInstrCandidate(unsigned Opc) { 6586 return (isCombineInstrCandidate32(Opc) || isCombineInstrCandidate64(Opc)); 6587 } 6588 6589 // 6590 // Utility routine that checks if \param MO is defined by an 6591 // \param CombineOpc instruction in the basic block \param MBB 6592 static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO, 6593 unsigned CombineOpc, unsigned ZeroReg = 0, 6594 bool CheckZeroReg = false) { 6595 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 6596 MachineInstr *MI = nullptr; 6597 6598 if (MO.isReg() && MO.getReg().isVirtual()) 6599 MI = MRI.getUniqueVRegDef(MO.getReg()); 6600 // And it needs to be in the trace (otherwise, it won't have a depth). 6601 if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != CombineOpc) 6602 return false; 6603 // Must only used by the user we combine with. 6604 if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg())) 6605 return false; 6606 6607 if (CheckZeroReg) { 6608 assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() && 6609 MI->getOperand(1).isReg() && MI->getOperand(2).isReg() && 6610 MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs"); 6611 // The third input reg must be zero. 6612 if (MI->getOperand(3).getReg() != ZeroReg) 6613 return false; 6614 } 6615 6616 if (isCombineInstrSettingFlag(CombineOpc) && 6617 MI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) == -1) 6618 return false; 6619 6620 return true; 6621 } 6622 6623 // 6624 // Is \param MO defined by an integer multiply and can be combined? 6625 static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO, 6626 unsigned MulOpc, unsigned ZeroReg) { 6627 return canCombine(MBB, MO, MulOpc, ZeroReg, true); 6628 } 6629 6630 // 6631 // Is \param MO defined by a floating-point multiply and can be combined? 6632 static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO, 6633 unsigned MulOpc) { 6634 return canCombine(MBB, MO, MulOpc); 6635 } 6636 6637 // TODO: There are many more machine instruction opcodes to match: 6638 // 1. Other data types (integer, vectors) 6639 // 2. Other math / logic operations (xor, or) 6640 // 3. Other forms of the same operation (intrinsics and other variants) 6641 bool AArch64InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst, 6642 bool Invert) const { 6643 if (Invert) 6644 return false; 6645 switch (Inst.getOpcode()) { 6646 // == Floating-point types == 6647 // -- Floating-point instructions -- 6648 case AArch64::FADDHrr: 6649 case AArch64::FADDSrr: 6650 case AArch64::FADDDrr: 6651 case AArch64::FMULHrr: 6652 case AArch64::FMULSrr: 6653 case AArch64::FMULDrr: 6654 case AArch64::FMULX16: 6655 case AArch64::FMULX32: 6656 case AArch64::FMULX64: 6657 // -- Advanced SIMD instructions -- 6658 case AArch64::FADDv4f16: 6659 case AArch64::FADDv8f16: 6660 case AArch64::FADDv2f32: 6661 case AArch64::FADDv4f32: 6662 case AArch64::FADDv2f64: 6663 case AArch64::FMULv4f16: 6664 case AArch64::FMULv8f16: 6665 case AArch64::FMULv2f32: 6666 case AArch64::FMULv4f32: 6667 case AArch64::FMULv2f64: 6668 case AArch64::FMULXv4f16: 6669 case AArch64::FMULXv8f16: 6670 case AArch64::FMULXv2f32: 6671 case AArch64::FMULXv4f32: 6672 case AArch64::FMULXv2f64: 6673 // -- SVE instructions -- 6674 // Opcodes FMULX_ZZZ_? don't exist because there is no unpredicated FMULX 6675 // in the SVE instruction set (though there are predicated ones). 6676 case AArch64::FADD_ZZZ_H: 6677 case AArch64::FADD_ZZZ_S: 6678 case AArch64::FADD_ZZZ_D: 6679 case AArch64::FMUL_ZZZ_H: 6680 case AArch64::FMUL_ZZZ_S: 6681 case AArch64::FMUL_ZZZ_D: 6682 return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath || 6683 (Inst.getFlag(MachineInstr::MIFlag::FmReassoc) && 6684 Inst.getFlag(MachineInstr::MIFlag::FmNsz)); 6685 6686 // == Integer types == 6687 // -- Base instructions -- 6688 // Opcodes MULWrr and MULXrr don't exist because 6689 // `MUL <Wd>, <Wn>, <Wm>` and `MUL <Xd>, <Xn>, <Xm>` are aliases of 6690 // `MADD <Wd>, <Wn>, <Wm>, WZR` and `MADD <Xd>, <Xn>, <Xm>, XZR` respectively. 6691 // The machine-combiner does not support three-source-operands machine 6692 // instruction. So we cannot reassociate MULs. 6693 case AArch64::ADDWrr: 6694 case AArch64::ADDXrr: 6695 case AArch64::ANDWrr: 6696 case AArch64::ANDXrr: 6697 case AArch64::ORRWrr: 6698 case AArch64::ORRXrr: 6699 case AArch64::EORWrr: 6700 case AArch64::EORXrr: 6701 case AArch64::EONWrr: 6702 case AArch64::EONXrr: 6703 // -- Advanced SIMD instructions -- 6704 // Opcodes MULv1i64 and MULv2i64 don't exist because there is no 64-bit MUL 6705 // in the Advanced SIMD instruction set. 6706 case AArch64::ADDv8i8: 6707 case AArch64::ADDv16i8: 6708 case AArch64::ADDv4i16: 6709 case AArch64::ADDv8i16: 6710 case AArch64::ADDv2i32: 6711 case AArch64::ADDv4i32: 6712 case AArch64::ADDv1i64: 6713 case AArch64::ADDv2i64: 6714 case AArch64::MULv8i8: 6715 case AArch64::MULv16i8: 6716 case AArch64::MULv4i16: 6717 case AArch64::MULv8i16: 6718 case AArch64::MULv2i32: 6719 case AArch64::MULv4i32: 6720 case AArch64::ANDv8i8: 6721 case AArch64::ANDv16i8: 6722 case AArch64::ORRv8i8: 6723 case AArch64::ORRv16i8: 6724 case AArch64::EORv8i8: 6725 case AArch64::EORv16i8: 6726 // -- SVE instructions -- 6727 case AArch64::ADD_ZZZ_B: 6728 case AArch64::ADD_ZZZ_H: 6729 case AArch64::ADD_ZZZ_S: 6730 case AArch64::ADD_ZZZ_D: 6731 case AArch64::MUL_ZZZ_B: 6732 case AArch64::MUL_ZZZ_H: 6733 case AArch64::MUL_ZZZ_S: 6734 case AArch64::MUL_ZZZ_D: 6735 case AArch64::AND_ZZZ: 6736 case AArch64::ORR_ZZZ: 6737 case AArch64::EOR_ZZZ: 6738 return true; 6739 6740 default: 6741 return false; 6742 } 6743 } 6744 6745 /// Find instructions that can be turned into madd. 6746 static bool getMaddPatterns(MachineInstr &Root, 6747 SmallVectorImpl<unsigned> &Patterns) { 6748 unsigned Opc = Root.getOpcode(); 6749 MachineBasicBlock &MBB = *Root.getParent(); 6750 bool Found = false; 6751 6752 if (!isCombineInstrCandidate(Opc)) 6753 return false; 6754 if (isCombineInstrSettingFlag(Opc)) { 6755 int Cmp_NZCV = 6756 Root.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true); 6757 // When NZCV is live bail out. 6758 if (Cmp_NZCV == -1) 6759 return false; 6760 unsigned NewOpc = convertToNonFlagSettingOpc(Root); 6761 // When opcode can't change bail out. 6762 // CHECKME: do we miss any cases for opcode conversion? 6763 if (NewOpc == Opc) 6764 return false; 6765 Opc = NewOpc; 6766 } 6767 6768 auto setFound = [&](int Opcode, int Operand, unsigned ZeroReg, 6769 unsigned Pattern) { 6770 if (canCombineWithMUL(MBB, Root.getOperand(Operand), Opcode, ZeroReg)) { 6771 Patterns.push_back(Pattern); 6772 Found = true; 6773 } 6774 }; 6775 6776 auto setVFound = [&](int Opcode, int Operand, unsigned Pattern) { 6777 if (canCombine(MBB, Root.getOperand(Operand), Opcode)) { 6778 Patterns.push_back(Pattern); 6779 Found = true; 6780 } 6781 }; 6782 6783 typedef AArch64MachineCombinerPattern MCP; 6784 6785 switch (Opc) { 6786 default: 6787 break; 6788 case AArch64::ADDWrr: 6789 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() && 6790 "ADDWrr does not have register operands"); 6791 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDW_OP1); 6792 setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULADDW_OP2); 6793 break; 6794 case AArch64::ADDXrr: 6795 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDX_OP1); 6796 setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULADDX_OP2); 6797 break; 6798 case AArch64::SUBWrr: 6799 setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULSUBW_OP2); 6800 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBW_OP1); 6801 break; 6802 case AArch64::SUBXrr: 6803 setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULSUBX_OP2); 6804 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBX_OP1); 6805 break; 6806 case AArch64::ADDWri: 6807 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDWI_OP1); 6808 break; 6809 case AArch64::ADDXri: 6810 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDXI_OP1); 6811 break; 6812 case AArch64::SUBWri: 6813 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBWI_OP1); 6814 break; 6815 case AArch64::SUBXri: 6816 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBXI_OP1); 6817 break; 6818 case AArch64::ADDv8i8: 6819 setVFound(AArch64::MULv8i8, 1, MCP::MULADDv8i8_OP1); 6820 setVFound(AArch64::MULv8i8, 2, MCP::MULADDv8i8_OP2); 6821 break; 6822 case AArch64::ADDv16i8: 6823 setVFound(AArch64::MULv16i8, 1, MCP::MULADDv16i8_OP1); 6824 setVFound(AArch64::MULv16i8, 2, MCP::MULADDv16i8_OP2); 6825 break; 6826 case AArch64::ADDv4i16: 6827 setVFound(AArch64::MULv4i16, 1, MCP::MULADDv4i16_OP1); 6828 setVFound(AArch64::MULv4i16, 2, MCP::MULADDv4i16_OP2); 6829 setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULADDv4i16_indexed_OP1); 6830 setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULADDv4i16_indexed_OP2); 6831 break; 6832 case AArch64::ADDv8i16: 6833 setVFound(AArch64::MULv8i16, 1, MCP::MULADDv8i16_OP1); 6834 setVFound(AArch64::MULv8i16, 2, MCP::MULADDv8i16_OP2); 6835 setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULADDv8i16_indexed_OP1); 6836 setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULADDv8i16_indexed_OP2); 6837 break; 6838 case AArch64::ADDv2i32: 6839 setVFound(AArch64::MULv2i32, 1, MCP::MULADDv2i32_OP1); 6840 setVFound(AArch64::MULv2i32, 2, MCP::MULADDv2i32_OP2); 6841 setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULADDv2i32_indexed_OP1); 6842 setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULADDv2i32_indexed_OP2); 6843 break; 6844 case AArch64::ADDv4i32: 6845 setVFound(AArch64::MULv4i32, 1, MCP::MULADDv4i32_OP1); 6846 setVFound(AArch64::MULv4i32, 2, MCP::MULADDv4i32_OP2); 6847 setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULADDv4i32_indexed_OP1); 6848 setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULADDv4i32_indexed_OP2); 6849 break; 6850 case AArch64::SUBv8i8: 6851 setVFound(AArch64::MULv8i8, 1, MCP::MULSUBv8i8_OP1); 6852 setVFound(AArch64::MULv8i8, 2, MCP::MULSUBv8i8_OP2); 6853 break; 6854 case AArch64::SUBv16i8: 6855 setVFound(AArch64::MULv16i8, 1, MCP::MULSUBv16i8_OP1); 6856 setVFound(AArch64::MULv16i8, 2, MCP::MULSUBv16i8_OP2); 6857 break; 6858 case AArch64::SUBv4i16: 6859 setVFound(AArch64::MULv4i16, 1, MCP::MULSUBv4i16_OP1); 6860 setVFound(AArch64::MULv4i16, 2, MCP::MULSUBv4i16_OP2); 6861 setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULSUBv4i16_indexed_OP1); 6862 setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULSUBv4i16_indexed_OP2); 6863 break; 6864 case AArch64::SUBv8i16: 6865 setVFound(AArch64::MULv8i16, 1, MCP::MULSUBv8i16_OP1); 6866 setVFound(AArch64::MULv8i16, 2, MCP::MULSUBv8i16_OP2); 6867 setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULSUBv8i16_indexed_OP1); 6868 setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULSUBv8i16_indexed_OP2); 6869 break; 6870 case AArch64::SUBv2i32: 6871 setVFound(AArch64::MULv2i32, 1, MCP::MULSUBv2i32_OP1); 6872 setVFound(AArch64::MULv2i32, 2, MCP::MULSUBv2i32_OP2); 6873 setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULSUBv2i32_indexed_OP1); 6874 setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULSUBv2i32_indexed_OP2); 6875 break; 6876 case AArch64::SUBv4i32: 6877 setVFound(AArch64::MULv4i32, 1, MCP::MULSUBv4i32_OP1); 6878 setVFound(AArch64::MULv4i32, 2, MCP::MULSUBv4i32_OP2); 6879 setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULSUBv4i32_indexed_OP1); 6880 setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULSUBv4i32_indexed_OP2); 6881 break; 6882 } 6883 return Found; 6884 } 6885 6886 bool AArch64InstrInfo::isAccumulationOpcode(unsigned Opcode) const { 6887 switch (Opcode) { 6888 default: 6889 break; 6890 case AArch64::UABALB_ZZZ_D: 6891 case AArch64::UABALB_ZZZ_H: 6892 case AArch64::UABALB_ZZZ_S: 6893 case AArch64::UABALT_ZZZ_D: 6894 case AArch64::UABALT_ZZZ_H: 6895 case AArch64::UABALT_ZZZ_S: 6896 case AArch64::SABALB_ZZZ_D: 6897 case AArch64::SABALB_ZZZ_S: 6898 case AArch64::SABALB_ZZZ_H: 6899 case AArch64::SABALT_ZZZ_D: 6900 case AArch64::SABALT_ZZZ_S: 6901 case AArch64::SABALT_ZZZ_H: 6902 case AArch64::UABALv16i8_v8i16: 6903 case AArch64::UABALv2i32_v2i64: 6904 case AArch64::UABALv4i16_v4i32: 6905 case AArch64::UABALv4i32_v2i64: 6906 case AArch64::UABALv8i16_v4i32: 6907 case AArch64::UABALv8i8_v8i16: 6908 case AArch64::UABAv16i8: 6909 case AArch64::UABAv2i32: 6910 case AArch64::UABAv4i16: 6911 case AArch64::UABAv4i32: 6912 case AArch64::UABAv8i16: 6913 case AArch64::UABAv8i8: 6914 case AArch64::SABALv16i8_v8i16: 6915 case AArch64::SABALv2i32_v2i64: 6916 case AArch64::SABALv4i16_v4i32: 6917 case AArch64::SABALv4i32_v2i64: 6918 case AArch64::SABALv8i16_v4i32: 6919 case AArch64::SABALv8i8_v8i16: 6920 case AArch64::SABAv16i8: 6921 case AArch64::SABAv2i32: 6922 case AArch64::SABAv4i16: 6923 case AArch64::SABAv4i32: 6924 case AArch64::SABAv8i16: 6925 case AArch64::SABAv8i8: 6926 return true; 6927 } 6928 6929 return false; 6930 } 6931 6932 unsigned AArch64InstrInfo::getAccumulationStartOpcode( 6933 unsigned AccumulationOpcode) const { 6934 switch (AccumulationOpcode) { 6935 default: 6936 llvm_unreachable("Unsupported accumulation Opcode!"); 6937 case AArch64::UABALB_ZZZ_D: 6938 return AArch64::UABDLB_ZZZ_D; 6939 case AArch64::UABALB_ZZZ_H: 6940 return AArch64::UABDLB_ZZZ_H; 6941 case AArch64::UABALB_ZZZ_S: 6942 return AArch64::UABDLB_ZZZ_S; 6943 case AArch64::UABALT_ZZZ_D: 6944 return AArch64::UABDLT_ZZZ_D; 6945 case AArch64::UABALT_ZZZ_H: 6946 return AArch64::UABDLT_ZZZ_H; 6947 case AArch64::UABALT_ZZZ_S: 6948 return AArch64::UABDLT_ZZZ_S; 6949 case AArch64::UABALv16i8_v8i16: 6950 return AArch64::UABDLv16i8_v8i16; 6951 case AArch64::UABALv2i32_v2i64: 6952 return AArch64::UABDLv2i32_v2i64; 6953 case AArch64::UABALv4i16_v4i32: 6954 return AArch64::UABDLv4i16_v4i32; 6955 case AArch64::UABALv4i32_v2i64: 6956 return AArch64::UABDLv4i32_v2i64; 6957 case AArch64::UABALv8i16_v4i32: 6958 return AArch64::UABDLv8i16_v4i32; 6959 case AArch64::UABALv8i8_v8i16: 6960 return AArch64::UABDLv8i8_v8i16; 6961 case AArch64::UABAv16i8: 6962 return AArch64::UABDv16i8; 6963 case AArch64::UABAv2i32: 6964 return AArch64::UABDv2i32; 6965 case AArch64::UABAv4i16: 6966 return AArch64::UABDv4i16; 6967 case AArch64::UABAv4i32: 6968 return AArch64::UABDv4i32; 6969 case AArch64::UABAv8i16: 6970 return AArch64::UABDv8i16; 6971 case AArch64::UABAv8i8: 6972 return AArch64::UABDv8i8; 6973 case AArch64::SABALB_ZZZ_D: 6974 return AArch64::SABDLB_ZZZ_D; 6975 case AArch64::SABALB_ZZZ_S: 6976 return AArch64::SABDLB_ZZZ_S; 6977 case AArch64::SABALB_ZZZ_H: 6978 return AArch64::SABDLB_ZZZ_H; 6979 case AArch64::SABALT_ZZZ_D: 6980 return AArch64::SABDLT_ZZZ_D; 6981 case AArch64::SABALT_ZZZ_S: 6982 return AArch64::SABDLT_ZZZ_S; 6983 case AArch64::SABALT_ZZZ_H: 6984 return AArch64::SABDLT_ZZZ_H; 6985 case AArch64::SABALv16i8_v8i16: 6986 return AArch64::SABDLv16i8_v8i16; 6987 case AArch64::SABALv2i32_v2i64: 6988 return AArch64::SABDLv2i32_v2i64; 6989 case AArch64::SABALv4i16_v4i32: 6990 return AArch64::SABDLv4i16_v4i32; 6991 case AArch64::SABALv4i32_v2i64: 6992 return AArch64::SABDLv4i32_v2i64; 6993 case AArch64::SABALv8i16_v4i32: 6994 return AArch64::SABDLv8i16_v4i32; 6995 case AArch64::SABALv8i8_v8i16: 6996 return AArch64::SABDLv8i8_v8i16; 6997 case AArch64::SABAv16i8: 6998 return AArch64::SABDv16i8; 6999 case AArch64::SABAv2i32: 7000 return AArch64::SABAv2i32; 7001 case AArch64::SABAv4i16: 7002 return AArch64::SABDv4i16; 7003 case AArch64::SABAv4i32: 7004 return AArch64::SABDv4i32; 7005 case AArch64::SABAv8i16: 7006 return AArch64::SABDv8i16; 7007 case AArch64::SABAv8i8: 7008 return AArch64::SABDv8i8; 7009 } 7010 } 7011 7012 /// Floating-Point Support 7013 7014 /// Find instructions that can be turned into madd. 7015 static bool getFMAPatterns(MachineInstr &Root, 7016 SmallVectorImpl<unsigned> &Patterns) { 7017 7018 if (!isCombineInstrCandidateFP(Root)) 7019 return false; 7020 7021 MachineBasicBlock &MBB = *Root.getParent(); 7022 bool Found = false; 7023 7024 auto Match = [&](int Opcode, int Operand, unsigned Pattern) -> bool { 7025 if (canCombineWithFMUL(MBB, Root.getOperand(Operand), Opcode)) { 7026 Patterns.push_back(Pattern); 7027 return true; 7028 } 7029 return false; 7030 }; 7031 7032 typedef AArch64MachineCombinerPattern MCP; 7033 7034 switch (Root.getOpcode()) { 7035 default: 7036 assert(false && "Unsupported FP instruction in combiner\n"); 7037 break; 7038 case AArch64::FADDHrr: 7039 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() && 7040 "FADDHrr does not have register operands"); 7041 7042 Found = Match(AArch64::FMULHrr, 1, MCP::FMULADDH_OP1); 7043 Found |= Match(AArch64::FMULHrr, 2, MCP::FMULADDH_OP2); 7044 break; 7045 case AArch64::FADDSrr: 7046 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() && 7047 "FADDSrr does not have register operands"); 7048 7049 Found |= Match(AArch64::FMULSrr, 1, MCP::FMULADDS_OP1) || 7050 Match(AArch64::FMULv1i32_indexed, 1, MCP::FMLAv1i32_indexed_OP1); 7051 7052 Found |= Match(AArch64::FMULSrr, 2, MCP::FMULADDS_OP2) || 7053 Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLAv1i32_indexed_OP2); 7054 break; 7055 case AArch64::FADDDrr: 7056 Found |= Match(AArch64::FMULDrr, 1, MCP::FMULADDD_OP1) || 7057 Match(AArch64::FMULv1i64_indexed, 1, MCP::FMLAv1i64_indexed_OP1); 7058 7059 Found |= Match(AArch64::FMULDrr, 2, MCP::FMULADDD_OP2) || 7060 Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLAv1i64_indexed_OP2); 7061 break; 7062 case AArch64::FADDv4f16: 7063 Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLAv4i16_indexed_OP1) || 7064 Match(AArch64::FMULv4f16, 1, MCP::FMLAv4f16_OP1); 7065 7066 Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLAv4i16_indexed_OP2) || 7067 Match(AArch64::FMULv4f16, 2, MCP::FMLAv4f16_OP2); 7068 break; 7069 case AArch64::FADDv8f16: 7070 Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLAv8i16_indexed_OP1) || 7071 Match(AArch64::FMULv8f16, 1, MCP::FMLAv8f16_OP1); 7072 7073 Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLAv8i16_indexed_OP2) || 7074 Match(AArch64::FMULv8f16, 2, MCP::FMLAv8f16_OP2); 7075 break; 7076 case AArch64::FADDv2f32: 7077 Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLAv2i32_indexed_OP1) || 7078 Match(AArch64::FMULv2f32, 1, MCP::FMLAv2f32_OP1); 7079 7080 Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLAv2i32_indexed_OP2) || 7081 Match(AArch64::FMULv2f32, 2, MCP::FMLAv2f32_OP2); 7082 break; 7083 case AArch64::FADDv2f64: 7084 Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLAv2i64_indexed_OP1) || 7085 Match(AArch64::FMULv2f64, 1, MCP::FMLAv2f64_OP1); 7086 7087 Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLAv2i64_indexed_OP2) || 7088 Match(AArch64::FMULv2f64, 2, MCP::FMLAv2f64_OP2); 7089 break; 7090 case AArch64::FADDv4f32: 7091 Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLAv4i32_indexed_OP1) || 7092 Match(AArch64::FMULv4f32, 1, MCP::FMLAv4f32_OP1); 7093 7094 Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLAv4i32_indexed_OP2) || 7095 Match(AArch64::FMULv4f32, 2, MCP::FMLAv4f32_OP2); 7096 break; 7097 case AArch64::FSUBHrr: 7098 Found = Match(AArch64::FMULHrr, 1, MCP::FMULSUBH_OP1); 7099 Found |= Match(AArch64::FMULHrr, 2, MCP::FMULSUBH_OP2); 7100 Found |= Match(AArch64::FNMULHrr, 1, MCP::FNMULSUBH_OP1); 7101 break; 7102 case AArch64::FSUBSrr: 7103 Found = Match(AArch64::FMULSrr, 1, MCP::FMULSUBS_OP1); 7104 7105 Found |= Match(AArch64::FMULSrr, 2, MCP::FMULSUBS_OP2) || 7106 Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLSv1i32_indexed_OP2); 7107 7108 Found |= Match(AArch64::FNMULSrr, 1, MCP::FNMULSUBS_OP1); 7109 break; 7110 case AArch64::FSUBDrr: 7111 Found = Match(AArch64::FMULDrr, 1, MCP::FMULSUBD_OP1); 7112 7113 Found |= Match(AArch64::FMULDrr, 2, MCP::FMULSUBD_OP2) || 7114 Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLSv1i64_indexed_OP2); 7115 7116 Found |= Match(AArch64::FNMULDrr, 1, MCP::FNMULSUBD_OP1); 7117 break; 7118 case AArch64::FSUBv4f16: 7119 Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLSv4i16_indexed_OP2) || 7120 Match(AArch64::FMULv4f16, 2, MCP::FMLSv4f16_OP2); 7121 7122 Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLSv4i16_indexed_OP1) || 7123 Match(AArch64::FMULv4f16, 1, MCP::FMLSv4f16_OP1); 7124 break; 7125 case AArch64::FSUBv8f16: 7126 Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLSv8i16_indexed_OP2) || 7127 Match(AArch64::FMULv8f16, 2, MCP::FMLSv8f16_OP2); 7128 7129 Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLSv8i16_indexed_OP1) || 7130 Match(AArch64::FMULv8f16, 1, MCP::FMLSv8f16_OP1); 7131 break; 7132 case AArch64::FSUBv2f32: 7133 Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLSv2i32_indexed_OP2) || 7134 Match(AArch64::FMULv2f32, 2, MCP::FMLSv2f32_OP2); 7135 7136 Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLSv2i32_indexed_OP1) || 7137 Match(AArch64::FMULv2f32, 1, MCP::FMLSv2f32_OP1); 7138 break; 7139 case AArch64::FSUBv2f64: 7140 Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLSv2i64_indexed_OP2) || 7141 Match(AArch64::FMULv2f64, 2, MCP::FMLSv2f64_OP2); 7142 7143 Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLSv2i64_indexed_OP1) || 7144 Match(AArch64::FMULv2f64, 1, MCP::FMLSv2f64_OP1); 7145 break; 7146 case AArch64::FSUBv4f32: 7147 Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLSv4i32_indexed_OP2) || 7148 Match(AArch64::FMULv4f32, 2, MCP::FMLSv4f32_OP2); 7149 7150 Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLSv4i32_indexed_OP1) || 7151 Match(AArch64::FMULv4f32, 1, MCP::FMLSv4f32_OP1); 7152 break; 7153 } 7154 return Found; 7155 } 7156 7157 static bool getFMULPatterns(MachineInstr &Root, 7158 SmallVectorImpl<unsigned> &Patterns) { 7159 MachineBasicBlock &MBB = *Root.getParent(); 7160 bool Found = false; 7161 7162 auto Match = [&](unsigned Opcode, int Operand, unsigned Pattern) -> bool { 7163 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 7164 MachineOperand &MO = Root.getOperand(Operand); 7165 MachineInstr *MI = nullptr; 7166 if (MO.isReg() && MO.getReg().isVirtual()) 7167 MI = MRI.getUniqueVRegDef(MO.getReg()); 7168 // Ignore No-op COPYs in FMUL(COPY(DUP(..))) 7169 if (MI && MI->getOpcode() == TargetOpcode::COPY && 7170 MI->getOperand(1).getReg().isVirtual()) 7171 MI = MRI.getUniqueVRegDef(MI->getOperand(1).getReg()); 7172 if (MI && MI->getOpcode() == Opcode) { 7173 Patterns.push_back(Pattern); 7174 return true; 7175 } 7176 return false; 7177 }; 7178 7179 typedef AArch64MachineCombinerPattern MCP; 7180 7181 switch (Root.getOpcode()) { 7182 default: 7183 return false; 7184 case AArch64::FMULv2f32: 7185 Found = Match(AArch64::DUPv2i32lane, 1, MCP::FMULv2i32_indexed_OP1); 7186 Found |= Match(AArch64::DUPv2i32lane, 2, MCP::FMULv2i32_indexed_OP2); 7187 break; 7188 case AArch64::FMULv2f64: 7189 Found = Match(AArch64::DUPv2i64lane, 1, MCP::FMULv2i64_indexed_OP1); 7190 Found |= Match(AArch64::DUPv2i64lane, 2, MCP::FMULv2i64_indexed_OP2); 7191 break; 7192 case AArch64::FMULv4f16: 7193 Found = Match(AArch64::DUPv4i16lane, 1, MCP::FMULv4i16_indexed_OP1); 7194 Found |= Match(AArch64::DUPv4i16lane, 2, MCP::FMULv4i16_indexed_OP2); 7195 break; 7196 case AArch64::FMULv4f32: 7197 Found = Match(AArch64::DUPv4i32lane, 1, MCP::FMULv4i32_indexed_OP1); 7198 Found |= Match(AArch64::DUPv4i32lane, 2, MCP::FMULv4i32_indexed_OP2); 7199 break; 7200 case AArch64::FMULv8f16: 7201 Found = Match(AArch64::DUPv8i16lane, 1, MCP::FMULv8i16_indexed_OP1); 7202 Found |= Match(AArch64::DUPv8i16lane, 2, MCP::FMULv8i16_indexed_OP2); 7203 break; 7204 } 7205 7206 return Found; 7207 } 7208 7209 static bool getFNEGPatterns(MachineInstr &Root, 7210 SmallVectorImpl<unsigned> &Patterns) { 7211 unsigned Opc = Root.getOpcode(); 7212 MachineBasicBlock &MBB = *Root.getParent(); 7213 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 7214 7215 auto Match = [&](unsigned Opcode, unsigned Pattern) -> bool { 7216 MachineOperand &MO = Root.getOperand(1); 7217 MachineInstr *MI = MRI.getUniqueVRegDef(MO.getReg()); 7218 if (MI != nullptr && (MI->getOpcode() == Opcode) && 7219 MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()) && 7220 Root.getFlag(MachineInstr::MIFlag::FmContract) && 7221 Root.getFlag(MachineInstr::MIFlag::FmNsz) && 7222 MI->getFlag(MachineInstr::MIFlag::FmContract) && 7223 MI->getFlag(MachineInstr::MIFlag::FmNsz)) { 7224 Patterns.push_back(Pattern); 7225 return true; 7226 } 7227 return false; 7228 }; 7229 7230 switch (Opc) { 7231 default: 7232 break; 7233 case AArch64::FNEGDr: 7234 return Match(AArch64::FMADDDrrr, AArch64MachineCombinerPattern::FNMADD); 7235 case AArch64::FNEGSr: 7236 return Match(AArch64::FMADDSrrr, AArch64MachineCombinerPattern::FNMADD); 7237 } 7238 7239 return false; 7240 } 7241 7242 /// Return true when a code sequence can improve throughput. It 7243 /// should be called only for instructions in loops. 7244 /// \param Pattern - combiner pattern 7245 bool AArch64InstrInfo::isThroughputPattern(unsigned Pattern) const { 7246 switch (Pattern) { 7247 default: 7248 break; 7249 case AArch64MachineCombinerPattern::FMULADDH_OP1: 7250 case AArch64MachineCombinerPattern::FMULADDH_OP2: 7251 case AArch64MachineCombinerPattern::FMULSUBH_OP1: 7252 case AArch64MachineCombinerPattern::FMULSUBH_OP2: 7253 case AArch64MachineCombinerPattern::FMULADDS_OP1: 7254 case AArch64MachineCombinerPattern::FMULADDS_OP2: 7255 case AArch64MachineCombinerPattern::FMULSUBS_OP1: 7256 case AArch64MachineCombinerPattern::FMULSUBS_OP2: 7257 case AArch64MachineCombinerPattern::FMULADDD_OP1: 7258 case AArch64MachineCombinerPattern::FMULADDD_OP2: 7259 case AArch64MachineCombinerPattern::FMULSUBD_OP1: 7260 case AArch64MachineCombinerPattern::FMULSUBD_OP2: 7261 case AArch64MachineCombinerPattern::FNMULSUBH_OP1: 7262 case AArch64MachineCombinerPattern::FNMULSUBS_OP1: 7263 case AArch64MachineCombinerPattern::FNMULSUBD_OP1: 7264 case AArch64MachineCombinerPattern::FMLAv4i16_indexed_OP1: 7265 case AArch64MachineCombinerPattern::FMLAv4i16_indexed_OP2: 7266 case AArch64MachineCombinerPattern::FMLAv8i16_indexed_OP1: 7267 case AArch64MachineCombinerPattern::FMLAv8i16_indexed_OP2: 7268 case AArch64MachineCombinerPattern::FMLAv1i32_indexed_OP1: 7269 case AArch64MachineCombinerPattern::FMLAv1i32_indexed_OP2: 7270 case AArch64MachineCombinerPattern::FMLAv1i64_indexed_OP1: 7271 case AArch64MachineCombinerPattern::FMLAv1i64_indexed_OP2: 7272 case AArch64MachineCombinerPattern::FMLAv4f16_OP2: 7273 case AArch64MachineCombinerPattern::FMLAv4f16_OP1: 7274 case AArch64MachineCombinerPattern::FMLAv8f16_OP1: 7275 case AArch64MachineCombinerPattern::FMLAv8f16_OP2: 7276 case AArch64MachineCombinerPattern::FMLAv2f32_OP2: 7277 case AArch64MachineCombinerPattern::FMLAv2f32_OP1: 7278 case AArch64MachineCombinerPattern::FMLAv2f64_OP1: 7279 case AArch64MachineCombinerPattern::FMLAv2f64_OP2: 7280 case AArch64MachineCombinerPattern::FMLAv2i32_indexed_OP1: 7281 case AArch64MachineCombinerPattern::FMLAv2i32_indexed_OP2: 7282 case AArch64MachineCombinerPattern::FMLAv2i64_indexed_OP1: 7283 case AArch64MachineCombinerPattern::FMLAv2i64_indexed_OP2: 7284 case AArch64MachineCombinerPattern::FMLAv4f32_OP1: 7285 case AArch64MachineCombinerPattern::FMLAv4f32_OP2: 7286 case AArch64MachineCombinerPattern::FMLAv4i32_indexed_OP1: 7287 case AArch64MachineCombinerPattern::FMLAv4i32_indexed_OP2: 7288 case AArch64MachineCombinerPattern::FMLSv4i16_indexed_OP1: 7289 case AArch64MachineCombinerPattern::FMLSv4i16_indexed_OP2: 7290 case AArch64MachineCombinerPattern::FMLSv8i16_indexed_OP1: 7291 case AArch64MachineCombinerPattern::FMLSv8i16_indexed_OP2: 7292 case AArch64MachineCombinerPattern::FMLSv1i32_indexed_OP2: 7293 case AArch64MachineCombinerPattern::FMLSv1i64_indexed_OP2: 7294 case AArch64MachineCombinerPattern::FMLSv2i32_indexed_OP2: 7295 case AArch64MachineCombinerPattern::FMLSv2i64_indexed_OP2: 7296 case AArch64MachineCombinerPattern::FMLSv4f16_OP1: 7297 case AArch64MachineCombinerPattern::FMLSv4f16_OP2: 7298 case AArch64MachineCombinerPattern::FMLSv8f16_OP1: 7299 case AArch64MachineCombinerPattern::FMLSv8f16_OP2: 7300 case AArch64MachineCombinerPattern::FMLSv2f32_OP2: 7301 case AArch64MachineCombinerPattern::FMLSv2f64_OP2: 7302 case AArch64MachineCombinerPattern::FMLSv4i32_indexed_OP2: 7303 case AArch64MachineCombinerPattern::FMLSv4f32_OP2: 7304 case AArch64MachineCombinerPattern::FMULv2i32_indexed_OP1: 7305 case AArch64MachineCombinerPattern::FMULv2i32_indexed_OP2: 7306 case AArch64MachineCombinerPattern::FMULv2i64_indexed_OP1: 7307 case AArch64MachineCombinerPattern::FMULv2i64_indexed_OP2: 7308 case AArch64MachineCombinerPattern::FMULv4i16_indexed_OP1: 7309 case AArch64MachineCombinerPattern::FMULv4i16_indexed_OP2: 7310 case AArch64MachineCombinerPattern::FMULv4i32_indexed_OP1: 7311 case AArch64MachineCombinerPattern::FMULv4i32_indexed_OP2: 7312 case AArch64MachineCombinerPattern::FMULv8i16_indexed_OP1: 7313 case AArch64MachineCombinerPattern::FMULv8i16_indexed_OP2: 7314 case AArch64MachineCombinerPattern::MULADDv8i8_OP1: 7315 case AArch64MachineCombinerPattern::MULADDv8i8_OP2: 7316 case AArch64MachineCombinerPattern::MULADDv16i8_OP1: 7317 case AArch64MachineCombinerPattern::MULADDv16i8_OP2: 7318 case AArch64MachineCombinerPattern::MULADDv4i16_OP1: 7319 case AArch64MachineCombinerPattern::MULADDv4i16_OP2: 7320 case AArch64MachineCombinerPattern::MULADDv8i16_OP1: 7321 case AArch64MachineCombinerPattern::MULADDv8i16_OP2: 7322 case AArch64MachineCombinerPattern::MULADDv2i32_OP1: 7323 case AArch64MachineCombinerPattern::MULADDv2i32_OP2: 7324 case AArch64MachineCombinerPattern::MULADDv4i32_OP1: 7325 case AArch64MachineCombinerPattern::MULADDv4i32_OP2: 7326 case AArch64MachineCombinerPattern::MULSUBv8i8_OP1: 7327 case AArch64MachineCombinerPattern::MULSUBv8i8_OP2: 7328 case AArch64MachineCombinerPattern::MULSUBv16i8_OP1: 7329 case AArch64MachineCombinerPattern::MULSUBv16i8_OP2: 7330 case AArch64MachineCombinerPattern::MULSUBv4i16_OP1: 7331 case AArch64MachineCombinerPattern::MULSUBv4i16_OP2: 7332 case AArch64MachineCombinerPattern::MULSUBv8i16_OP1: 7333 case AArch64MachineCombinerPattern::MULSUBv8i16_OP2: 7334 case AArch64MachineCombinerPattern::MULSUBv2i32_OP1: 7335 case AArch64MachineCombinerPattern::MULSUBv2i32_OP2: 7336 case AArch64MachineCombinerPattern::MULSUBv4i32_OP1: 7337 case AArch64MachineCombinerPattern::MULSUBv4i32_OP2: 7338 case AArch64MachineCombinerPattern::MULADDv4i16_indexed_OP1: 7339 case AArch64MachineCombinerPattern::MULADDv4i16_indexed_OP2: 7340 case AArch64MachineCombinerPattern::MULADDv8i16_indexed_OP1: 7341 case AArch64MachineCombinerPattern::MULADDv8i16_indexed_OP2: 7342 case AArch64MachineCombinerPattern::MULADDv2i32_indexed_OP1: 7343 case AArch64MachineCombinerPattern::MULADDv2i32_indexed_OP2: 7344 case AArch64MachineCombinerPattern::MULADDv4i32_indexed_OP1: 7345 case AArch64MachineCombinerPattern::MULADDv4i32_indexed_OP2: 7346 case AArch64MachineCombinerPattern::MULSUBv4i16_indexed_OP1: 7347 case AArch64MachineCombinerPattern::MULSUBv4i16_indexed_OP2: 7348 case AArch64MachineCombinerPattern::MULSUBv8i16_indexed_OP1: 7349 case AArch64MachineCombinerPattern::MULSUBv8i16_indexed_OP2: 7350 case AArch64MachineCombinerPattern::MULSUBv2i32_indexed_OP1: 7351 case AArch64MachineCombinerPattern::MULSUBv2i32_indexed_OP2: 7352 case AArch64MachineCombinerPattern::MULSUBv4i32_indexed_OP1: 7353 case AArch64MachineCombinerPattern::MULSUBv4i32_indexed_OP2: 7354 return true; 7355 } // end switch (Pattern) 7356 return false; 7357 } 7358 7359 /// Find other MI combine patterns. 7360 static bool getMiscPatterns(MachineInstr &Root, 7361 SmallVectorImpl<unsigned> &Patterns) { 7362 // A - (B + C) ==> (A - B) - C or (A - C) - B 7363 unsigned Opc = Root.getOpcode(); 7364 MachineBasicBlock &MBB = *Root.getParent(); 7365 7366 switch (Opc) { 7367 case AArch64::SUBWrr: 7368 case AArch64::SUBSWrr: 7369 case AArch64::SUBXrr: 7370 case AArch64::SUBSXrr: 7371 // Found candidate root. 7372 break; 7373 default: 7374 return false; 7375 } 7376 7377 if (isCombineInstrSettingFlag(Opc) && 7378 Root.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) == 7379 -1) 7380 return false; 7381 7382 if (canCombine(MBB, Root.getOperand(2), AArch64::ADDWrr) || 7383 canCombine(MBB, Root.getOperand(2), AArch64::ADDSWrr) || 7384 canCombine(MBB, Root.getOperand(2), AArch64::ADDXrr) || 7385 canCombine(MBB, Root.getOperand(2), AArch64::ADDSXrr)) { 7386 Patterns.push_back(AArch64MachineCombinerPattern::SUBADD_OP1); 7387 Patterns.push_back(AArch64MachineCombinerPattern::SUBADD_OP2); 7388 return true; 7389 } 7390 7391 return false; 7392 } 7393 7394 CombinerObjective 7395 AArch64InstrInfo::getCombinerObjective(unsigned Pattern) const { 7396 switch (Pattern) { 7397 case AArch64MachineCombinerPattern::SUBADD_OP1: 7398 case AArch64MachineCombinerPattern::SUBADD_OP2: 7399 return CombinerObjective::MustReduceDepth; 7400 default: 7401 return TargetInstrInfo::getCombinerObjective(Pattern); 7402 } 7403 } 7404 7405 /// Return true when there is potentially a faster code sequence for an 7406 /// instruction chain ending in \p Root. All potential patterns are listed in 7407 /// the \p Pattern vector. Pattern should be sorted in priority order since the 7408 /// pattern evaluator stops checking as soon as it finds a faster sequence. 7409 7410 bool AArch64InstrInfo::getMachineCombinerPatterns( 7411 MachineInstr &Root, SmallVectorImpl<unsigned> &Patterns, 7412 bool DoRegPressureReduce) const { 7413 // Integer patterns 7414 if (getMaddPatterns(Root, Patterns)) 7415 return true; 7416 // Floating point patterns 7417 if (getFMULPatterns(Root, Patterns)) 7418 return true; 7419 if (getFMAPatterns(Root, Patterns)) 7420 return true; 7421 if (getFNEGPatterns(Root, Patterns)) 7422 return true; 7423 7424 // Other patterns 7425 if (getMiscPatterns(Root, Patterns)) 7426 return true; 7427 7428 return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns, 7429 DoRegPressureReduce); 7430 } 7431 7432 enum class FMAInstKind { Default, Indexed, Accumulator }; 7433 /// genFusedMultiply - Generate fused multiply instructions. 7434 /// This function supports both integer and floating point instructions. 7435 /// A typical example: 7436 /// F|MUL I=A,B,0 7437 /// F|ADD R,I,C 7438 /// ==> F|MADD R,A,B,C 7439 /// \param MF Containing MachineFunction 7440 /// \param MRI Register information 7441 /// \param TII Target information 7442 /// \param Root is the F|ADD instruction 7443 /// \param [out] InsInstrs is a vector of machine instructions and will 7444 /// contain the generated madd instruction 7445 /// \param IdxMulOpd is index of operand in Root that is the result of 7446 /// the F|MUL. In the example above IdxMulOpd is 1. 7447 /// \param MaddOpc the opcode fo the f|madd instruction 7448 /// \param RC Register class of operands 7449 /// \param kind of fma instruction (addressing mode) to be generated 7450 /// \param ReplacedAddend is the result register from the instruction 7451 /// replacing the non-combined operand, if any. 7452 static MachineInstr * 7453 genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI, 7454 const TargetInstrInfo *TII, MachineInstr &Root, 7455 SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd, 7456 unsigned MaddOpc, const TargetRegisterClass *RC, 7457 FMAInstKind kind = FMAInstKind::Default, 7458 const Register *ReplacedAddend = nullptr) { 7459 assert(IdxMulOpd == 1 || IdxMulOpd == 2); 7460 7461 unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1; 7462 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg()); 7463 Register ResultReg = Root.getOperand(0).getReg(); 7464 Register SrcReg0 = MUL->getOperand(1).getReg(); 7465 bool Src0IsKill = MUL->getOperand(1).isKill(); 7466 Register SrcReg1 = MUL->getOperand(2).getReg(); 7467 bool Src1IsKill = MUL->getOperand(2).isKill(); 7468 7469 Register SrcReg2; 7470 bool Src2IsKill; 7471 if (ReplacedAddend) { 7472 // If we just generated a new addend, we must be it's only use. 7473 SrcReg2 = *ReplacedAddend; 7474 Src2IsKill = true; 7475 } else { 7476 SrcReg2 = Root.getOperand(IdxOtherOpd).getReg(); 7477 Src2IsKill = Root.getOperand(IdxOtherOpd).isKill(); 7478 } 7479 7480 if (ResultReg.isVirtual()) 7481 MRI.constrainRegClass(ResultReg, RC); 7482 if (SrcReg0.isVirtual()) 7483 MRI.constrainRegClass(SrcReg0, RC); 7484 if (SrcReg1.isVirtual()) 7485 MRI.constrainRegClass(SrcReg1, RC); 7486 if (SrcReg2.isVirtual()) 7487 MRI.constrainRegClass(SrcReg2, RC); 7488 7489 MachineInstrBuilder MIB; 7490 if (kind == FMAInstKind::Default) 7491 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg) 7492 .addReg(SrcReg0, getKillRegState(Src0IsKill)) 7493 .addReg(SrcReg1, getKillRegState(Src1IsKill)) 7494 .addReg(SrcReg2, getKillRegState(Src2IsKill)); 7495 else if (kind == FMAInstKind::Indexed) 7496 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg) 7497 .addReg(SrcReg2, getKillRegState(Src2IsKill)) 7498 .addReg(SrcReg0, getKillRegState(Src0IsKill)) 7499 .addReg(SrcReg1, getKillRegState(Src1IsKill)) 7500 .addImm(MUL->getOperand(3).getImm()); 7501 else if (kind == FMAInstKind::Accumulator) 7502 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg) 7503 .addReg(SrcReg2, getKillRegState(Src2IsKill)) 7504 .addReg(SrcReg0, getKillRegState(Src0IsKill)) 7505 .addReg(SrcReg1, getKillRegState(Src1IsKill)); 7506 else 7507 assert(false && "Invalid FMA instruction kind \n"); 7508 // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL) 7509 InsInstrs.push_back(MIB); 7510 return MUL; 7511 } 7512 7513 static MachineInstr * 7514 genFNegatedMAD(MachineFunction &MF, MachineRegisterInfo &MRI, 7515 const TargetInstrInfo *TII, MachineInstr &Root, 7516 SmallVectorImpl<MachineInstr *> &InsInstrs) { 7517 MachineInstr *MAD = MRI.getUniqueVRegDef(Root.getOperand(1).getReg()); 7518 7519 unsigned Opc = 0; 7520 const TargetRegisterClass *RC = MRI.getRegClass(MAD->getOperand(0).getReg()); 7521 if (AArch64::FPR32RegClass.hasSubClassEq(RC)) 7522 Opc = AArch64::FNMADDSrrr; 7523 else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) 7524 Opc = AArch64::FNMADDDrrr; 7525 else 7526 return nullptr; 7527 7528 Register ResultReg = Root.getOperand(0).getReg(); 7529 Register SrcReg0 = MAD->getOperand(1).getReg(); 7530 Register SrcReg1 = MAD->getOperand(2).getReg(); 7531 Register SrcReg2 = MAD->getOperand(3).getReg(); 7532 bool Src0IsKill = MAD->getOperand(1).isKill(); 7533 bool Src1IsKill = MAD->getOperand(2).isKill(); 7534 bool Src2IsKill = MAD->getOperand(3).isKill(); 7535 if (ResultReg.isVirtual()) 7536 MRI.constrainRegClass(ResultReg, RC); 7537 if (SrcReg0.isVirtual()) 7538 MRI.constrainRegClass(SrcReg0, RC); 7539 if (SrcReg1.isVirtual()) 7540 MRI.constrainRegClass(SrcReg1, RC); 7541 if (SrcReg2.isVirtual()) 7542 MRI.constrainRegClass(SrcReg2, RC); 7543 7544 MachineInstrBuilder MIB = 7545 BuildMI(MF, MIMetadata(Root), TII->get(Opc), ResultReg) 7546 .addReg(SrcReg0, getKillRegState(Src0IsKill)) 7547 .addReg(SrcReg1, getKillRegState(Src1IsKill)) 7548 .addReg(SrcReg2, getKillRegState(Src2IsKill)); 7549 InsInstrs.push_back(MIB); 7550 7551 return MAD; 7552 } 7553 7554 /// Fold (FMUL x (DUP y lane)) into (FMUL_indexed x y lane) 7555 static MachineInstr * 7556 genIndexedMultiply(MachineInstr &Root, 7557 SmallVectorImpl<MachineInstr *> &InsInstrs, 7558 unsigned IdxDupOp, unsigned MulOpc, 7559 const TargetRegisterClass *RC, MachineRegisterInfo &MRI) { 7560 assert(((IdxDupOp == 1) || (IdxDupOp == 2)) && 7561 "Invalid index of FMUL operand"); 7562 7563 MachineFunction &MF = *Root.getMF(); 7564 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); 7565 7566 MachineInstr *Dup = 7567 MF.getRegInfo().getUniqueVRegDef(Root.getOperand(IdxDupOp).getReg()); 7568 7569 if (Dup->getOpcode() == TargetOpcode::COPY) 7570 Dup = MRI.getUniqueVRegDef(Dup->getOperand(1).getReg()); 7571 7572 Register DupSrcReg = Dup->getOperand(1).getReg(); 7573 MRI.clearKillFlags(DupSrcReg); 7574 MRI.constrainRegClass(DupSrcReg, RC); 7575 7576 unsigned DupSrcLane = Dup->getOperand(2).getImm(); 7577 7578 unsigned IdxMulOp = IdxDupOp == 1 ? 2 : 1; 7579 MachineOperand &MulOp = Root.getOperand(IdxMulOp); 7580 7581 Register ResultReg = Root.getOperand(0).getReg(); 7582 7583 MachineInstrBuilder MIB; 7584 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MulOpc), ResultReg) 7585 .add(MulOp) 7586 .addReg(DupSrcReg) 7587 .addImm(DupSrcLane); 7588 7589 InsInstrs.push_back(MIB); 7590 return &Root; 7591 } 7592 7593 /// genFusedMultiplyAcc - Helper to generate fused multiply accumulate 7594 /// instructions. 7595 /// 7596 /// \see genFusedMultiply 7597 static MachineInstr *genFusedMultiplyAcc( 7598 MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, 7599 MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs, 7600 unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) { 7601 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC, 7602 FMAInstKind::Accumulator); 7603 } 7604 7605 /// genNeg - Helper to generate an intermediate negation of the second operand 7606 /// of Root 7607 static Register genNeg(MachineFunction &MF, MachineRegisterInfo &MRI, 7608 const TargetInstrInfo *TII, MachineInstr &Root, 7609 SmallVectorImpl<MachineInstr *> &InsInstrs, 7610 DenseMap<Register, unsigned> &InstrIdxForVirtReg, 7611 unsigned MnegOpc, const TargetRegisterClass *RC) { 7612 Register NewVR = MRI.createVirtualRegister(RC); 7613 MachineInstrBuilder MIB = 7614 BuildMI(MF, MIMetadata(Root), TII->get(MnegOpc), NewVR) 7615 .add(Root.getOperand(2)); 7616 InsInstrs.push_back(MIB); 7617 7618 assert(InstrIdxForVirtReg.empty()); 7619 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 7620 7621 return NewVR; 7622 } 7623 7624 /// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate 7625 /// instructions with an additional negation of the accumulator 7626 static MachineInstr *genFusedMultiplyAccNeg( 7627 MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, 7628 MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs, 7629 DenseMap<Register, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd, 7630 unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) { 7631 assert(IdxMulOpd == 1); 7632 7633 Register NewVR = 7634 genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC); 7635 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC, 7636 FMAInstKind::Accumulator, &NewVR); 7637 } 7638 7639 /// genFusedMultiplyIdx - Helper to generate fused multiply accumulate 7640 /// instructions. 7641 /// 7642 /// \see genFusedMultiply 7643 static MachineInstr *genFusedMultiplyIdx( 7644 MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, 7645 MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs, 7646 unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) { 7647 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC, 7648 FMAInstKind::Indexed); 7649 } 7650 7651 /// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate 7652 /// instructions with an additional negation of the accumulator 7653 static MachineInstr *genFusedMultiplyIdxNeg( 7654 MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, 7655 MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs, 7656 DenseMap<Register, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd, 7657 unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) { 7658 assert(IdxMulOpd == 1); 7659 7660 Register NewVR = 7661 genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC); 7662 7663 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC, 7664 FMAInstKind::Indexed, &NewVR); 7665 } 7666 7667 /// genMaddR - Generate madd instruction and combine mul and add using 7668 /// an extra virtual register 7669 /// Example - an ADD intermediate needs to be stored in a register: 7670 /// MUL I=A,B,0 7671 /// ADD R,I,Imm 7672 /// ==> ORR V, ZR, Imm 7673 /// ==> MADD R,A,B,V 7674 /// \param MF Containing MachineFunction 7675 /// \param MRI Register information 7676 /// \param TII Target information 7677 /// \param Root is the ADD instruction 7678 /// \param [out] InsInstrs is a vector of machine instructions and will 7679 /// contain the generated madd instruction 7680 /// \param IdxMulOpd is index of operand in Root that is the result of 7681 /// the MUL. In the example above IdxMulOpd is 1. 7682 /// \param MaddOpc the opcode fo the madd instruction 7683 /// \param VR is a virtual register that holds the value of an ADD operand 7684 /// (V in the example above). 7685 /// \param RC Register class of operands 7686 static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI, 7687 const TargetInstrInfo *TII, MachineInstr &Root, 7688 SmallVectorImpl<MachineInstr *> &InsInstrs, 7689 unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR, 7690 const TargetRegisterClass *RC) { 7691 assert(IdxMulOpd == 1 || IdxMulOpd == 2); 7692 7693 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg()); 7694 Register ResultReg = Root.getOperand(0).getReg(); 7695 Register SrcReg0 = MUL->getOperand(1).getReg(); 7696 bool Src0IsKill = MUL->getOperand(1).isKill(); 7697 Register SrcReg1 = MUL->getOperand(2).getReg(); 7698 bool Src1IsKill = MUL->getOperand(2).isKill(); 7699 7700 if (ResultReg.isVirtual()) 7701 MRI.constrainRegClass(ResultReg, RC); 7702 if (SrcReg0.isVirtual()) 7703 MRI.constrainRegClass(SrcReg0, RC); 7704 if (SrcReg1.isVirtual()) 7705 MRI.constrainRegClass(SrcReg1, RC); 7706 if (Register::isVirtualRegister(VR)) 7707 MRI.constrainRegClass(VR, RC); 7708 7709 MachineInstrBuilder MIB = 7710 BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg) 7711 .addReg(SrcReg0, getKillRegState(Src0IsKill)) 7712 .addReg(SrcReg1, getKillRegState(Src1IsKill)) 7713 .addReg(VR); 7714 // Insert the MADD 7715 InsInstrs.push_back(MIB); 7716 return MUL; 7717 } 7718 7719 /// Do the following transformation 7720 /// A - (B + C) ==> (A - B) - C 7721 /// A - (B + C) ==> (A - C) - B 7722 static void genSubAdd2SubSub(MachineFunction &MF, MachineRegisterInfo &MRI, 7723 const TargetInstrInfo *TII, MachineInstr &Root, 7724 SmallVectorImpl<MachineInstr *> &InsInstrs, 7725 SmallVectorImpl<MachineInstr *> &DelInstrs, 7726 unsigned IdxOpd1, 7727 DenseMap<Register, unsigned> &InstrIdxForVirtReg) { 7728 assert(IdxOpd1 == 1 || IdxOpd1 == 2); 7729 unsigned IdxOtherOpd = IdxOpd1 == 1 ? 2 : 1; 7730 MachineInstr *AddMI = MRI.getUniqueVRegDef(Root.getOperand(2).getReg()); 7731 7732 Register ResultReg = Root.getOperand(0).getReg(); 7733 Register RegA = Root.getOperand(1).getReg(); 7734 bool RegAIsKill = Root.getOperand(1).isKill(); 7735 Register RegB = AddMI->getOperand(IdxOpd1).getReg(); 7736 bool RegBIsKill = AddMI->getOperand(IdxOpd1).isKill(); 7737 Register RegC = AddMI->getOperand(IdxOtherOpd).getReg(); 7738 bool RegCIsKill = AddMI->getOperand(IdxOtherOpd).isKill(); 7739 Register NewVR = 7740 MRI.createVirtualRegister(MRI.getRegClass(Root.getOperand(2).getReg())); 7741 7742 unsigned Opcode = Root.getOpcode(); 7743 if (Opcode == AArch64::SUBSWrr) 7744 Opcode = AArch64::SUBWrr; 7745 else if (Opcode == AArch64::SUBSXrr) 7746 Opcode = AArch64::SUBXrr; 7747 else 7748 assert((Opcode == AArch64::SUBWrr || Opcode == AArch64::SUBXrr) && 7749 "Unexpected instruction opcode."); 7750 7751 uint32_t Flags = Root.mergeFlagsWith(*AddMI); 7752 Flags &= ~MachineInstr::NoSWrap; 7753 Flags &= ~MachineInstr::NoUWrap; 7754 7755 MachineInstrBuilder MIB1 = 7756 BuildMI(MF, MIMetadata(Root), TII->get(Opcode), NewVR) 7757 .addReg(RegA, getKillRegState(RegAIsKill)) 7758 .addReg(RegB, getKillRegState(RegBIsKill)) 7759 .setMIFlags(Flags); 7760 MachineInstrBuilder MIB2 = 7761 BuildMI(MF, MIMetadata(Root), TII->get(Opcode), ResultReg) 7762 .addReg(NewVR, getKillRegState(true)) 7763 .addReg(RegC, getKillRegState(RegCIsKill)) 7764 .setMIFlags(Flags); 7765 7766 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 7767 InsInstrs.push_back(MIB1); 7768 InsInstrs.push_back(MIB2); 7769 DelInstrs.push_back(AddMI); 7770 DelInstrs.push_back(&Root); 7771 } 7772 7773 unsigned AArch64InstrInfo::getReduceOpcodeForAccumulator( 7774 unsigned int AccumulatorOpCode) const { 7775 switch (AccumulatorOpCode) { 7776 case AArch64::UABALB_ZZZ_D: 7777 case AArch64::SABALB_ZZZ_D: 7778 case AArch64::UABALT_ZZZ_D: 7779 case AArch64::SABALT_ZZZ_D: 7780 return AArch64::ADD_ZZZ_D; 7781 case AArch64::UABALB_ZZZ_H: 7782 case AArch64::SABALB_ZZZ_H: 7783 case AArch64::UABALT_ZZZ_H: 7784 case AArch64::SABALT_ZZZ_H: 7785 return AArch64::ADD_ZZZ_H; 7786 case AArch64::UABALB_ZZZ_S: 7787 case AArch64::SABALB_ZZZ_S: 7788 case AArch64::UABALT_ZZZ_S: 7789 case AArch64::SABALT_ZZZ_S: 7790 return AArch64::ADD_ZZZ_S; 7791 case AArch64::UABALv16i8_v8i16: 7792 case AArch64::SABALv8i8_v8i16: 7793 case AArch64::SABAv8i16: 7794 case AArch64::UABAv8i16: 7795 return AArch64::ADDv8i16; 7796 case AArch64::SABALv2i32_v2i64: 7797 case AArch64::UABALv2i32_v2i64: 7798 case AArch64::SABALv4i32_v2i64: 7799 return AArch64::ADDv2i64; 7800 case AArch64::UABALv4i16_v4i32: 7801 case AArch64::SABALv4i16_v4i32: 7802 case AArch64::SABALv8i16_v4i32: 7803 case AArch64::SABAv4i32: 7804 case AArch64::UABAv4i32: 7805 return AArch64::ADDv4i32; 7806 case AArch64::UABALv4i32_v2i64: 7807 return AArch64::ADDv2i64; 7808 case AArch64::UABALv8i16_v4i32: 7809 return AArch64::ADDv4i32; 7810 case AArch64::UABALv8i8_v8i16: 7811 case AArch64::SABALv16i8_v8i16: 7812 return AArch64::ADDv8i16; 7813 case AArch64::UABAv16i8: 7814 case AArch64::SABAv16i8: 7815 return AArch64::ADDv16i8; 7816 case AArch64::UABAv4i16: 7817 case AArch64::SABAv4i16: 7818 return AArch64::ADDv4i16; 7819 case AArch64::UABAv2i32: 7820 case AArch64::SABAv2i32: 7821 return AArch64::ADDv2i32; 7822 case AArch64::UABAv8i8: 7823 case AArch64::SABAv8i8: 7824 return AArch64::ADDv8i8; 7825 default: 7826 llvm_unreachable("Unknown accumulator opcode"); 7827 } 7828 } 7829 7830 /// When getMachineCombinerPatterns() finds potential patterns, 7831 /// this function generates the instructions that could replace the 7832 /// original code sequence 7833 void AArch64InstrInfo::genAlternativeCodeSequence( 7834 MachineInstr &Root, unsigned Pattern, 7835 SmallVectorImpl<MachineInstr *> &InsInstrs, 7836 SmallVectorImpl<MachineInstr *> &DelInstrs, 7837 DenseMap<Register, unsigned> &InstrIdxForVirtReg) const { 7838 MachineBasicBlock &MBB = *Root.getParent(); 7839 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 7840 MachineFunction &MF = *MBB.getParent(); 7841 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); 7842 7843 MachineInstr *MUL = nullptr; 7844 const TargetRegisterClass *RC; 7845 unsigned Opc; 7846 switch (Pattern) { 7847 default: 7848 // Reassociate instructions. 7849 TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs, 7850 DelInstrs, InstrIdxForVirtReg); 7851 return; 7852 case AArch64MachineCombinerPattern::SUBADD_OP1: 7853 // A - (B + C) 7854 // ==> (A - B) - C 7855 genSubAdd2SubSub(MF, MRI, TII, Root, InsInstrs, DelInstrs, 1, 7856 InstrIdxForVirtReg); 7857 return; 7858 case AArch64MachineCombinerPattern::SUBADD_OP2: 7859 // A - (B + C) 7860 // ==> (A - C) - B 7861 genSubAdd2SubSub(MF, MRI, TII, Root, InsInstrs, DelInstrs, 2, 7862 InstrIdxForVirtReg); 7863 return; 7864 case AArch64MachineCombinerPattern::MULADDW_OP1: 7865 case AArch64MachineCombinerPattern::MULADDX_OP1: 7866 // MUL I=A,B,0 7867 // ADD R,I,C 7868 // ==> MADD R,A,B,C 7869 // --- Create(MADD); 7870 if (Pattern == AArch64MachineCombinerPattern::MULADDW_OP1) { 7871 Opc = AArch64::MADDWrrr; 7872 RC = &AArch64::GPR32RegClass; 7873 } else { 7874 Opc = AArch64::MADDXrrr; 7875 RC = &AArch64::GPR64RegClass; 7876 } 7877 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 7878 break; 7879 case AArch64MachineCombinerPattern::MULADDW_OP2: 7880 case AArch64MachineCombinerPattern::MULADDX_OP2: 7881 // MUL I=A,B,0 7882 // ADD R,C,I 7883 // ==> MADD R,A,B,C 7884 // --- Create(MADD); 7885 if (Pattern == AArch64MachineCombinerPattern::MULADDW_OP2) { 7886 Opc = AArch64::MADDWrrr; 7887 RC = &AArch64::GPR32RegClass; 7888 } else { 7889 Opc = AArch64::MADDXrrr; 7890 RC = &AArch64::GPR64RegClass; 7891 } 7892 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 7893 break; 7894 case AArch64MachineCombinerPattern::MULADDWI_OP1: 7895 case AArch64MachineCombinerPattern::MULADDXI_OP1: 7896 case AArch64MachineCombinerPattern::MULSUBWI_OP1: 7897 case AArch64MachineCombinerPattern::MULSUBXI_OP1: { 7898 // MUL I=A,B,0 7899 // ADD/SUB R,I,Imm 7900 // ==> MOV V, Imm/-Imm 7901 // ==> MADD R,A,B,V 7902 // --- Create(MADD); 7903 const TargetRegisterClass *RC; 7904 unsigned BitSize, MovImm; 7905 if (Pattern == AArch64MachineCombinerPattern::MULADDWI_OP1 || 7906 Pattern == AArch64MachineCombinerPattern::MULSUBWI_OP1) { 7907 MovImm = AArch64::MOVi32imm; 7908 RC = &AArch64::GPR32spRegClass; 7909 BitSize = 32; 7910 Opc = AArch64::MADDWrrr; 7911 RC = &AArch64::GPR32RegClass; 7912 } else { 7913 MovImm = AArch64::MOVi64imm; 7914 RC = &AArch64::GPR64spRegClass; 7915 BitSize = 64; 7916 Opc = AArch64::MADDXrrr; 7917 RC = &AArch64::GPR64RegClass; 7918 } 7919 Register NewVR = MRI.createVirtualRegister(RC); 7920 uint64_t Imm = Root.getOperand(2).getImm(); 7921 7922 if (Root.getOperand(3).isImm()) { 7923 unsigned Val = Root.getOperand(3).getImm(); 7924 Imm = Imm << Val; 7925 } 7926 bool IsSub = Pattern == AArch64MachineCombinerPattern::MULSUBWI_OP1 || 7927 Pattern == AArch64MachineCombinerPattern::MULSUBXI_OP1; 7928 uint64_t UImm = SignExtend64(IsSub ? -Imm : Imm, BitSize); 7929 // Check that the immediate can be composed via a single instruction. 7930 SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn; 7931 AArch64_IMM::expandMOVImm(UImm, BitSize, Insn); 7932 if (Insn.size() != 1) 7933 return; 7934 MachineInstrBuilder MIB1 = 7935 BuildMI(MF, MIMetadata(Root), TII->get(MovImm), NewVR) 7936 .addImm(IsSub ? -Imm : Imm); 7937 InsInstrs.push_back(MIB1); 7938 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 7939 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC); 7940 break; 7941 } 7942 case AArch64MachineCombinerPattern::MULSUBW_OP1: 7943 case AArch64MachineCombinerPattern::MULSUBX_OP1: { 7944 // MUL I=A,B,0 7945 // SUB R,I, C 7946 // ==> SUB V, 0, C 7947 // ==> MADD R,A,B,V // = -C + A*B 7948 // --- Create(MADD); 7949 const TargetRegisterClass *SubRC; 7950 unsigned SubOpc, ZeroReg; 7951 if (Pattern == AArch64MachineCombinerPattern::MULSUBW_OP1) { 7952 SubOpc = AArch64::SUBWrr; 7953 SubRC = &AArch64::GPR32spRegClass; 7954 ZeroReg = AArch64::WZR; 7955 Opc = AArch64::MADDWrrr; 7956 RC = &AArch64::GPR32RegClass; 7957 } else { 7958 SubOpc = AArch64::SUBXrr; 7959 SubRC = &AArch64::GPR64spRegClass; 7960 ZeroReg = AArch64::XZR; 7961 Opc = AArch64::MADDXrrr; 7962 RC = &AArch64::GPR64RegClass; 7963 } 7964 Register NewVR = MRI.createVirtualRegister(SubRC); 7965 // SUB NewVR, 0, C 7966 MachineInstrBuilder MIB1 = 7967 BuildMI(MF, MIMetadata(Root), TII->get(SubOpc), NewVR) 7968 .addReg(ZeroReg) 7969 .add(Root.getOperand(2)); 7970 InsInstrs.push_back(MIB1); 7971 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 7972 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC); 7973 break; 7974 } 7975 case AArch64MachineCombinerPattern::MULSUBW_OP2: 7976 case AArch64MachineCombinerPattern::MULSUBX_OP2: 7977 // MUL I=A,B,0 7978 // SUB R,C,I 7979 // ==> MSUB R,A,B,C (computes C - A*B) 7980 // --- Create(MSUB); 7981 if (Pattern == AArch64MachineCombinerPattern::MULSUBW_OP2) { 7982 Opc = AArch64::MSUBWrrr; 7983 RC = &AArch64::GPR32RegClass; 7984 } else { 7985 Opc = AArch64::MSUBXrrr; 7986 RC = &AArch64::GPR64RegClass; 7987 } 7988 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 7989 break; 7990 case AArch64MachineCombinerPattern::MULADDv8i8_OP1: 7991 Opc = AArch64::MLAv8i8; 7992 RC = &AArch64::FPR64RegClass; 7993 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 7994 break; 7995 case AArch64MachineCombinerPattern::MULADDv8i8_OP2: 7996 Opc = AArch64::MLAv8i8; 7997 RC = &AArch64::FPR64RegClass; 7998 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 7999 break; 8000 case AArch64MachineCombinerPattern::MULADDv16i8_OP1: 8001 Opc = AArch64::MLAv16i8; 8002 RC = &AArch64::FPR128RegClass; 8003 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 8004 break; 8005 case AArch64MachineCombinerPattern::MULADDv16i8_OP2: 8006 Opc = AArch64::MLAv16i8; 8007 RC = &AArch64::FPR128RegClass; 8008 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 8009 break; 8010 case AArch64MachineCombinerPattern::MULADDv4i16_OP1: 8011 Opc = AArch64::MLAv4i16; 8012 RC = &AArch64::FPR64RegClass; 8013 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 8014 break; 8015 case AArch64MachineCombinerPattern::MULADDv4i16_OP2: 8016 Opc = AArch64::MLAv4i16; 8017 RC = &AArch64::FPR64RegClass; 8018 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 8019 break; 8020 case AArch64MachineCombinerPattern::MULADDv8i16_OP1: 8021 Opc = AArch64::MLAv8i16; 8022 RC = &AArch64::FPR128RegClass; 8023 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 8024 break; 8025 case AArch64MachineCombinerPattern::MULADDv8i16_OP2: 8026 Opc = AArch64::MLAv8i16; 8027 RC = &AArch64::FPR128RegClass; 8028 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 8029 break; 8030 case AArch64MachineCombinerPattern::MULADDv2i32_OP1: 8031 Opc = AArch64::MLAv2i32; 8032 RC = &AArch64::FPR64RegClass; 8033 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 8034 break; 8035 case AArch64MachineCombinerPattern::MULADDv2i32_OP2: 8036 Opc = AArch64::MLAv2i32; 8037 RC = &AArch64::FPR64RegClass; 8038 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 8039 break; 8040 case AArch64MachineCombinerPattern::MULADDv4i32_OP1: 8041 Opc = AArch64::MLAv4i32; 8042 RC = &AArch64::FPR128RegClass; 8043 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 8044 break; 8045 case AArch64MachineCombinerPattern::MULADDv4i32_OP2: 8046 Opc = AArch64::MLAv4i32; 8047 RC = &AArch64::FPR128RegClass; 8048 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 8049 break; 8050 8051 case AArch64MachineCombinerPattern::MULSUBv8i8_OP1: 8052 Opc = AArch64::MLAv8i8; 8053 RC = &AArch64::FPR64RegClass; 8054 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs, 8055 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i8, 8056 RC); 8057 break; 8058 case AArch64MachineCombinerPattern::MULSUBv8i8_OP2: 8059 Opc = AArch64::MLSv8i8; 8060 RC = &AArch64::FPR64RegClass; 8061 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 8062 break; 8063 case AArch64MachineCombinerPattern::MULSUBv16i8_OP1: 8064 Opc = AArch64::MLAv16i8; 8065 RC = &AArch64::FPR128RegClass; 8066 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs, 8067 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv16i8, 8068 RC); 8069 break; 8070 case AArch64MachineCombinerPattern::MULSUBv16i8_OP2: 8071 Opc = AArch64::MLSv16i8; 8072 RC = &AArch64::FPR128RegClass; 8073 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 8074 break; 8075 case AArch64MachineCombinerPattern::MULSUBv4i16_OP1: 8076 Opc = AArch64::MLAv4i16; 8077 RC = &AArch64::FPR64RegClass; 8078 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs, 8079 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16, 8080 RC); 8081 break; 8082 case AArch64MachineCombinerPattern::MULSUBv4i16_OP2: 8083 Opc = AArch64::MLSv4i16; 8084 RC = &AArch64::FPR64RegClass; 8085 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 8086 break; 8087 case AArch64MachineCombinerPattern::MULSUBv8i16_OP1: 8088 Opc = AArch64::MLAv8i16; 8089 RC = &AArch64::FPR128RegClass; 8090 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs, 8091 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16, 8092 RC); 8093 break; 8094 case AArch64MachineCombinerPattern::MULSUBv8i16_OP2: 8095 Opc = AArch64::MLSv8i16; 8096 RC = &AArch64::FPR128RegClass; 8097 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 8098 break; 8099 case AArch64MachineCombinerPattern::MULSUBv2i32_OP1: 8100 Opc = AArch64::MLAv2i32; 8101 RC = &AArch64::FPR64RegClass; 8102 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs, 8103 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32, 8104 RC); 8105 break; 8106 case AArch64MachineCombinerPattern::MULSUBv2i32_OP2: 8107 Opc = AArch64::MLSv2i32; 8108 RC = &AArch64::FPR64RegClass; 8109 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 8110 break; 8111 case AArch64MachineCombinerPattern::MULSUBv4i32_OP1: 8112 Opc = AArch64::MLAv4i32; 8113 RC = &AArch64::FPR128RegClass; 8114 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs, 8115 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32, 8116 RC); 8117 break; 8118 case AArch64MachineCombinerPattern::MULSUBv4i32_OP2: 8119 Opc = AArch64::MLSv4i32; 8120 RC = &AArch64::FPR128RegClass; 8121 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 8122 break; 8123 8124 case AArch64MachineCombinerPattern::MULADDv4i16_indexed_OP1: 8125 Opc = AArch64::MLAv4i16_indexed; 8126 RC = &AArch64::FPR64RegClass; 8127 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 8128 break; 8129 case AArch64MachineCombinerPattern::MULADDv4i16_indexed_OP2: 8130 Opc = AArch64::MLAv4i16_indexed; 8131 RC = &AArch64::FPR64RegClass; 8132 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 8133 break; 8134 case AArch64MachineCombinerPattern::MULADDv8i16_indexed_OP1: 8135 Opc = AArch64::MLAv8i16_indexed; 8136 RC = &AArch64::FPR128RegClass; 8137 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 8138 break; 8139 case AArch64MachineCombinerPattern::MULADDv8i16_indexed_OP2: 8140 Opc = AArch64::MLAv8i16_indexed; 8141 RC = &AArch64::FPR128RegClass; 8142 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 8143 break; 8144 case AArch64MachineCombinerPattern::MULADDv2i32_indexed_OP1: 8145 Opc = AArch64::MLAv2i32_indexed; 8146 RC = &AArch64::FPR64RegClass; 8147 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 8148 break; 8149 case AArch64MachineCombinerPattern::MULADDv2i32_indexed_OP2: 8150 Opc = AArch64::MLAv2i32_indexed; 8151 RC = &AArch64::FPR64RegClass; 8152 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 8153 break; 8154 case AArch64MachineCombinerPattern::MULADDv4i32_indexed_OP1: 8155 Opc = AArch64::MLAv4i32_indexed; 8156 RC = &AArch64::FPR128RegClass; 8157 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 8158 break; 8159 case AArch64MachineCombinerPattern::MULADDv4i32_indexed_OP2: 8160 Opc = AArch64::MLAv4i32_indexed; 8161 RC = &AArch64::FPR128RegClass; 8162 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 8163 break; 8164 8165 case AArch64MachineCombinerPattern::MULSUBv4i16_indexed_OP1: 8166 Opc = AArch64::MLAv4i16_indexed; 8167 RC = &AArch64::FPR64RegClass; 8168 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs, 8169 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16, 8170 RC); 8171 break; 8172 case AArch64MachineCombinerPattern::MULSUBv4i16_indexed_OP2: 8173 Opc = AArch64::MLSv4i16_indexed; 8174 RC = &AArch64::FPR64RegClass; 8175 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 8176 break; 8177 case AArch64MachineCombinerPattern::MULSUBv8i16_indexed_OP1: 8178 Opc = AArch64::MLAv8i16_indexed; 8179 RC = &AArch64::FPR128RegClass; 8180 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs, 8181 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16, 8182 RC); 8183 break; 8184 case AArch64MachineCombinerPattern::MULSUBv8i16_indexed_OP2: 8185 Opc = AArch64::MLSv8i16_indexed; 8186 RC = &AArch64::FPR128RegClass; 8187 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 8188 break; 8189 case AArch64MachineCombinerPattern::MULSUBv2i32_indexed_OP1: 8190 Opc = AArch64::MLAv2i32_indexed; 8191 RC = &AArch64::FPR64RegClass; 8192 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs, 8193 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32, 8194 RC); 8195 break; 8196 case AArch64MachineCombinerPattern::MULSUBv2i32_indexed_OP2: 8197 Opc = AArch64::MLSv2i32_indexed; 8198 RC = &AArch64::FPR64RegClass; 8199 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 8200 break; 8201 case AArch64MachineCombinerPattern::MULSUBv4i32_indexed_OP1: 8202 Opc = AArch64::MLAv4i32_indexed; 8203 RC = &AArch64::FPR128RegClass; 8204 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs, 8205 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32, 8206 RC); 8207 break; 8208 case AArch64MachineCombinerPattern::MULSUBv4i32_indexed_OP2: 8209 Opc = AArch64::MLSv4i32_indexed; 8210 RC = &AArch64::FPR128RegClass; 8211 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 8212 break; 8213 8214 // Floating Point Support 8215 case AArch64MachineCombinerPattern::FMULADDH_OP1: 8216 Opc = AArch64::FMADDHrrr; 8217 RC = &AArch64::FPR16RegClass; 8218 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 8219 break; 8220 case AArch64MachineCombinerPattern::FMULADDS_OP1: 8221 Opc = AArch64::FMADDSrrr; 8222 RC = &AArch64::FPR32RegClass; 8223 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 8224 break; 8225 case AArch64MachineCombinerPattern::FMULADDD_OP1: 8226 Opc = AArch64::FMADDDrrr; 8227 RC = &AArch64::FPR64RegClass; 8228 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 8229 break; 8230 8231 case AArch64MachineCombinerPattern::FMULADDH_OP2: 8232 Opc = AArch64::FMADDHrrr; 8233 RC = &AArch64::FPR16RegClass; 8234 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 8235 break; 8236 case AArch64MachineCombinerPattern::FMULADDS_OP2: 8237 Opc = AArch64::FMADDSrrr; 8238 RC = &AArch64::FPR32RegClass; 8239 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 8240 break; 8241 case AArch64MachineCombinerPattern::FMULADDD_OP2: 8242 Opc = AArch64::FMADDDrrr; 8243 RC = &AArch64::FPR64RegClass; 8244 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 8245 break; 8246 8247 case AArch64MachineCombinerPattern::FMLAv1i32_indexed_OP1: 8248 Opc = AArch64::FMLAv1i32_indexed; 8249 RC = &AArch64::FPR32RegClass; 8250 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 8251 FMAInstKind::Indexed); 8252 break; 8253 case AArch64MachineCombinerPattern::FMLAv1i32_indexed_OP2: 8254 Opc = AArch64::FMLAv1i32_indexed; 8255 RC = &AArch64::FPR32RegClass; 8256 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 8257 FMAInstKind::Indexed); 8258 break; 8259 8260 case AArch64MachineCombinerPattern::FMLAv1i64_indexed_OP1: 8261 Opc = AArch64::FMLAv1i64_indexed; 8262 RC = &AArch64::FPR64RegClass; 8263 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 8264 FMAInstKind::Indexed); 8265 break; 8266 case AArch64MachineCombinerPattern::FMLAv1i64_indexed_OP2: 8267 Opc = AArch64::FMLAv1i64_indexed; 8268 RC = &AArch64::FPR64RegClass; 8269 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 8270 FMAInstKind::Indexed); 8271 break; 8272 8273 case AArch64MachineCombinerPattern::FMLAv4i16_indexed_OP1: 8274 RC = &AArch64::FPR64RegClass; 8275 Opc = AArch64::FMLAv4i16_indexed; 8276 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 8277 FMAInstKind::Indexed); 8278 break; 8279 case AArch64MachineCombinerPattern::FMLAv4f16_OP1: 8280 RC = &AArch64::FPR64RegClass; 8281 Opc = AArch64::FMLAv4f16; 8282 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 8283 FMAInstKind::Accumulator); 8284 break; 8285 case AArch64MachineCombinerPattern::FMLAv4i16_indexed_OP2: 8286 RC = &AArch64::FPR64RegClass; 8287 Opc = AArch64::FMLAv4i16_indexed; 8288 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 8289 FMAInstKind::Indexed); 8290 break; 8291 case AArch64MachineCombinerPattern::FMLAv4f16_OP2: 8292 RC = &AArch64::FPR64RegClass; 8293 Opc = AArch64::FMLAv4f16; 8294 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 8295 FMAInstKind::Accumulator); 8296 break; 8297 8298 case AArch64MachineCombinerPattern::FMLAv2i32_indexed_OP1: 8299 case AArch64MachineCombinerPattern::FMLAv2f32_OP1: 8300 RC = &AArch64::FPR64RegClass; 8301 if (Pattern == AArch64MachineCombinerPattern::FMLAv2i32_indexed_OP1) { 8302 Opc = AArch64::FMLAv2i32_indexed; 8303 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 8304 FMAInstKind::Indexed); 8305 } else { 8306 Opc = AArch64::FMLAv2f32; 8307 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 8308 FMAInstKind::Accumulator); 8309 } 8310 break; 8311 case AArch64MachineCombinerPattern::FMLAv2i32_indexed_OP2: 8312 case AArch64MachineCombinerPattern::FMLAv2f32_OP2: 8313 RC = &AArch64::FPR64RegClass; 8314 if (Pattern == AArch64MachineCombinerPattern::FMLAv2i32_indexed_OP2) { 8315 Opc = AArch64::FMLAv2i32_indexed; 8316 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 8317 FMAInstKind::Indexed); 8318 } else { 8319 Opc = AArch64::FMLAv2f32; 8320 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 8321 FMAInstKind::Accumulator); 8322 } 8323 break; 8324 8325 case AArch64MachineCombinerPattern::FMLAv8i16_indexed_OP1: 8326 RC = &AArch64::FPR128RegClass; 8327 Opc = AArch64::FMLAv8i16_indexed; 8328 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 8329 FMAInstKind::Indexed); 8330 break; 8331 case AArch64MachineCombinerPattern::FMLAv8f16_OP1: 8332 RC = &AArch64::FPR128RegClass; 8333 Opc = AArch64::FMLAv8f16; 8334 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 8335 FMAInstKind::Accumulator); 8336 break; 8337 case AArch64MachineCombinerPattern::FMLAv8i16_indexed_OP2: 8338 RC = &AArch64::FPR128RegClass; 8339 Opc = AArch64::FMLAv8i16_indexed; 8340 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 8341 FMAInstKind::Indexed); 8342 break; 8343 case AArch64MachineCombinerPattern::FMLAv8f16_OP2: 8344 RC = &AArch64::FPR128RegClass; 8345 Opc = AArch64::FMLAv8f16; 8346 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 8347 FMAInstKind::Accumulator); 8348 break; 8349 8350 case AArch64MachineCombinerPattern::FMLAv2i64_indexed_OP1: 8351 case AArch64MachineCombinerPattern::FMLAv2f64_OP1: 8352 RC = &AArch64::FPR128RegClass; 8353 if (Pattern == AArch64MachineCombinerPattern::FMLAv2i64_indexed_OP1) { 8354 Opc = AArch64::FMLAv2i64_indexed; 8355 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 8356 FMAInstKind::Indexed); 8357 } else { 8358 Opc = AArch64::FMLAv2f64; 8359 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 8360 FMAInstKind::Accumulator); 8361 } 8362 break; 8363 case AArch64MachineCombinerPattern::FMLAv2i64_indexed_OP2: 8364 case AArch64MachineCombinerPattern::FMLAv2f64_OP2: 8365 RC = &AArch64::FPR128RegClass; 8366 if (Pattern == AArch64MachineCombinerPattern::FMLAv2i64_indexed_OP2) { 8367 Opc = AArch64::FMLAv2i64_indexed; 8368 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 8369 FMAInstKind::Indexed); 8370 } else { 8371 Opc = AArch64::FMLAv2f64; 8372 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 8373 FMAInstKind::Accumulator); 8374 } 8375 break; 8376 8377 case AArch64MachineCombinerPattern::FMLAv4i32_indexed_OP1: 8378 case AArch64MachineCombinerPattern::FMLAv4f32_OP1: 8379 RC = &AArch64::FPR128RegClass; 8380 if (Pattern == AArch64MachineCombinerPattern::FMLAv4i32_indexed_OP1) { 8381 Opc = AArch64::FMLAv4i32_indexed; 8382 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 8383 FMAInstKind::Indexed); 8384 } else { 8385 Opc = AArch64::FMLAv4f32; 8386 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 8387 FMAInstKind::Accumulator); 8388 } 8389 break; 8390 8391 case AArch64MachineCombinerPattern::FMLAv4i32_indexed_OP2: 8392 case AArch64MachineCombinerPattern::FMLAv4f32_OP2: 8393 RC = &AArch64::FPR128RegClass; 8394 if (Pattern == AArch64MachineCombinerPattern::FMLAv4i32_indexed_OP2) { 8395 Opc = AArch64::FMLAv4i32_indexed; 8396 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 8397 FMAInstKind::Indexed); 8398 } else { 8399 Opc = AArch64::FMLAv4f32; 8400 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 8401 FMAInstKind::Accumulator); 8402 } 8403 break; 8404 8405 case AArch64MachineCombinerPattern::FMULSUBH_OP1: 8406 Opc = AArch64::FNMSUBHrrr; 8407 RC = &AArch64::FPR16RegClass; 8408 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 8409 break; 8410 case AArch64MachineCombinerPattern::FMULSUBS_OP1: 8411 Opc = AArch64::FNMSUBSrrr; 8412 RC = &AArch64::FPR32RegClass; 8413 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 8414 break; 8415 case AArch64MachineCombinerPattern::FMULSUBD_OP1: 8416 Opc = AArch64::FNMSUBDrrr; 8417 RC = &AArch64::FPR64RegClass; 8418 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 8419 break; 8420 8421 case AArch64MachineCombinerPattern::FNMULSUBH_OP1: 8422 Opc = AArch64::FNMADDHrrr; 8423 RC = &AArch64::FPR16RegClass; 8424 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 8425 break; 8426 case AArch64MachineCombinerPattern::FNMULSUBS_OP1: 8427 Opc = AArch64::FNMADDSrrr; 8428 RC = &AArch64::FPR32RegClass; 8429 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 8430 break; 8431 case AArch64MachineCombinerPattern::FNMULSUBD_OP1: 8432 Opc = AArch64::FNMADDDrrr; 8433 RC = &AArch64::FPR64RegClass; 8434 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 8435 break; 8436 8437 case AArch64MachineCombinerPattern::FMULSUBH_OP2: 8438 Opc = AArch64::FMSUBHrrr; 8439 RC = &AArch64::FPR16RegClass; 8440 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 8441 break; 8442 case AArch64MachineCombinerPattern::FMULSUBS_OP2: 8443 Opc = AArch64::FMSUBSrrr; 8444 RC = &AArch64::FPR32RegClass; 8445 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 8446 break; 8447 case AArch64MachineCombinerPattern::FMULSUBD_OP2: 8448 Opc = AArch64::FMSUBDrrr; 8449 RC = &AArch64::FPR64RegClass; 8450 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 8451 break; 8452 8453 case AArch64MachineCombinerPattern::FMLSv1i32_indexed_OP2: 8454 Opc = AArch64::FMLSv1i32_indexed; 8455 RC = &AArch64::FPR32RegClass; 8456 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 8457 FMAInstKind::Indexed); 8458 break; 8459 8460 case AArch64MachineCombinerPattern::FMLSv1i64_indexed_OP2: 8461 Opc = AArch64::FMLSv1i64_indexed; 8462 RC = &AArch64::FPR64RegClass; 8463 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 8464 FMAInstKind::Indexed); 8465 break; 8466 8467 case AArch64MachineCombinerPattern::FMLSv4f16_OP1: 8468 case AArch64MachineCombinerPattern::FMLSv4i16_indexed_OP1: { 8469 RC = &AArch64::FPR64RegClass; 8470 Register NewVR = MRI.createVirtualRegister(RC); 8471 MachineInstrBuilder MIB1 = 8472 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv4f16), NewVR) 8473 .add(Root.getOperand(2)); 8474 InsInstrs.push_back(MIB1); 8475 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 8476 if (Pattern == AArch64MachineCombinerPattern::FMLSv4f16_OP1) { 8477 Opc = AArch64::FMLAv4f16; 8478 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 8479 FMAInstKind::Accumulator, &NewVR); 8480 } else { 8481 Opc = AArch64::FMLAv4i16_indexed; 8482 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 8483 FMAInstKind::Indexed, &NewVR); 8484 } 8485 break; 8486 } 8487 case AArch64MachineCombinerPattern::FMLSv4f16_OP2: 8488 RC = &AArch64::FPR64RegClass; 8489 Opc = AArch64::FMLSv4f16; 8490 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 8491 FMAInstKind::Accumulator); 8492 break; 8493 case AArch64MachineCombinerPattern::FMLSv4i16_indexed_OP2: 8494 RC = &AArch64::FPR64RegClass; 8495 Opc = AArch64::FMLSv4i16_indexed; 8496 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 8497 FMAInstKind::Indexed); 8498 break; 8499 8500 case AArch64MachineCombinerPattern::FMLSv2f32_OP2: 8501 case AArch64MachineCombinerPattern::FMLSv2i32_indexed_OP2: 8502 RC = &AArch64::FPR64RegClass; 8503 if (Pattern == AArch64MachineCombinerPattern::FMLSv2i32_indexed_OP2) { 8504 Opc = AArch64::FMLSv2i32_indexed; 8505 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 8506 FMAInstKind::Indexed); 8507 } else { 8508 Opc = AArch64::FMLSv2f32; 8509 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 8510 FMAInstKind::Accumulator); 8511 } 8512 break; 8513 8514 case AArch64MachineCombinerPattern::FMLSv8f16_OP1: 8515 case AArch64MachineCombinerPattern::FMLSv8i16_indexed_OP1: { 8516 RC = &AArch64::FPR128RegClass; 8517 Register NewVR = MRI.createVirtualRegister(RC); 8518 MachineInstrBuilder MIB1 = 8519 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv8f16), NewVR) 8520 .add(Root.getOperand(2)); 8521 InsInstrs.push_back(MIB1); 8522 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 8523 if (Pattern == AArch64MachineCombinerPattern::FMLSv8f16_OP1) { 8524 Opc = AArch64::FMLAv8f16; 8525 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 8526 FMAInstKind::Accumulator, &NewVR); 8527 } else { 8528 Opc = AArch64::FMLAv8i16_indexed; 8529 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 8530 FMAInstKind::Indexed, &NewVR); 8531 } 8532 break; 8533 } 8534 case AArch64MachineCombinerPattern::FMLSv8f16_OP2: 8535 RC = &AArch64::FPR128RegClass; 8536 Opc = AArch64::FMLSv8f16; 8537 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 8538 FMAInstKind::Accumulator); 8539 break; 8540 case AArch64MachineCombinerPattern::FMLSv8i16_indexed_OP2: 8541 RC = &AArch64::FPR128RegClass; 8542 Opc = AArch64::FMLSv8i16_indexed; 8543 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 8544 FMAInstKind::Indexed); 8545 break; 8546 8547 case AArch64MachineCombinerPattern::FMLSv2f64_OP2: 8548 case AArch64MachineCombinerPattern::FMLSv2i64_indexed_OP2: 8549 RC = &AArch64::FPR128RegClass; 8550 if (Pattern == AArch64MachineCombinerPattern::FMLSv2i64_indexed_OP2) { 8551 Opc = AArch64::FMLSv2i64_indexed; 8552 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 8553 FMAInstKind::Indexed); 8554 } else { 8555 Opc = AArch64::FMLSv2f64; 8556 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 8557 FMAInstKind::Accumulator); 8558 } 8559 break; 8560 8561 case AArch64MachineCombinerPattern::FMLSv4f32_OP2: 8562 case AArch64MachineCombinerPattern::FMLSv4i32_indexed_OP2: 8563 RC = &AArch64::FPR128RegClass; 8564 if (Pattern == AArch64MachineCombinerPattern::FMLSv4i32_indexed_OP2) { 8565 Opc = AArch64::FMLSv4i32_indexed; 8566 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 8567 FMAInstKind::Indexed); 8568 } else { 8569 Opc = AArch64::FMLSv4f32; 8570 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 8571 FMAInstKind::Accumulator); 8572 } 8573 break; 8574 case AArch64MachineCombinerPattern::FMLSv2f32_OP1: 8575 case AArch64MachineCombinerPattern::FMLSv2i32_indexed_OP1: { 8576 RC = &AArch64::FPR64RegClass; 8577 Register NewVR = MRI.createVirtualRegister(RC); 8578 MachineInstrBuilder MIB1 = 8579 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv2f32), NewVR) 8580 .add(Root.getOperand(2)); 8581 InsInstrs.push_back(MIB1); 8582 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 8583 if (Pattern == AArch64MachineCombinerPattern::FMLSv2i32_indexed_OP1) { 8584 Opc = AArch64::FMLAv2i32_indexed; 8585 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 8586 FMAInstKind::Indexed, &NewVR); 8587 } else { 8588 Opc = AArch64::FMLAv2f32; 8589 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 8590 FMAInstKind::Accumulator, &NewVR); 8591 } 8592 break; 8593 } 8594 case AArch64MachineCombinerPattern::FMLSv4f32_OP1: 8595 case AArch64MachineCombinerPattern::FMLSv4i32_indexed_OP1: { 8596 RC = &AArch64::FPR128RegClass; 8597 Register NewVR = MRI.createVirtualRegister(RC); 8598 MachineInstrBuilder MIB1 = 8599 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv4f32), NewVR) 8600 .add(Root.getOperand(2)); 8601 InsInstrs.push_back(MIB1); 8602 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 8603 if (Pattern == AArch64MachineCombinerPattern::FMLSv4i32_indexed_OP1) { 8604 Opc = AArch64::FMLAv4i32_indexed; 8605 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 8606 FMAInstKind::Indexed, &NewVR); 8607 } else { 8608 Opc = AArch64::FMLAv4f32; 8609 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 8610 FMAInstKind::Accumulator, &NewVR); 8611 } 8612 break; 8613 } 8614 case AArch64MachineCombinerPattern::FMLSv2f64_OP1: 8615 case AArch64MachineCombinerPattern::FMLSv2i64_indexed_OP1: { 8616 RC = &AArch64::FPR128RegClass; 8617 Register NewVR = MRI.createVirtualRegister(RC); 8618 MachineInstrBuilder MIB1 = 8619 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv2f64), NewVR) 8620 .add(Root.getOperand(2)); 8621 InsInstrs.push_back(MIB1); 8622 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 8623 if (Pattern == AArch64MachineCombinerPattern::FMLSv2i64_indexed_OP1) { 8624 Opc = AArch64::FMLAv2i64_indexed; 8625 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 8626 FMAInstKind::Indexed, &NewVR); 8627 } else { 8628 Opc = AArch64::FMLAv2f64; 8629 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 8630 FMAInstKind::Accumulator, &NewVR); 8631 } 8632 break; 8633 } 8634 case AArch64MachineCombinerPattern::FMULv2i32_indexed_OP1: 8635 case AArch64MachineCombinerPattern::FMULv2i32_indexed_OP2: { 8636 unsigned IdxDupOp = 8637 (Pattern == AArch64MachineCombinerPattern::FMULv2i32_indexed_OP1) ? 1 8638 : 2; 8639 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i32_indexed, 8640 &AArch64::FPR128RegClass, MRI); 8641 break; 8642 } 8643 case AArch64MachineCombinerPattern::FMULv2i64_indexed_OP1: 8644 case AArch64MachineCombinerPattern::FMULv2i64_indexed_OP2: { 8645 unsigned IdxDupOp = 8646 (Pattern == AArch64MachineCombinerPattern::FMULv2i64_indexed_OP1) ? 1 8647 : 2; 8648 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i64_indexed, 8649 &AArch64::FPR128RegClass, MRI); 8650 break; 8651 } 8652 case AArch64MachineCombinerPattern::FMULv4i16_indexed_OP1: 8653 case AArch64MachineCombinerPattern::FMULv4i16_indexed_OP2: { 8654 unsigned IdxDupOp = 8655 (Pattern == AArch64MachineCombinerPattern::FMULv4i16_indexed_OP1) ? 1 8656 : 2; 8657 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i16_indexed, 8658 &AArch64::FPR128_loRegClass, MRI); 8659 break; 8660 } 8661 case AArch64MachineCombinerPattern::FMULv4i32_indexed_OP1: 8662 case AArch64MachineCombinerPattern::FMULv4i32_indexed_OP2: { 8663 unsigned IdxDupOp = 8664 (Pattern == AArch64MachineCombinerPattern::FMULv4i32_indexed_OP1) ? 1 8665 : 2; 8666 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i32_indexed, 8667 &AArch64::FPR128RegClass, MRI); 8668 break; 8669 } 8670 case AArch64MachineCombinerPattern::FMULv8i16_indexed_OP1: 8671 case AArch64MachineCombinerPattern::FMULv8i16_indexed_OP2: { 8672 unsigned IdxDupOp = 8673 (Pattern == AArch64MachineCombinerPattern::FMULv8i16_indexed_OP1) ? 1 8674 : 2; 8675 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv8i16_indexed, 8676 &AArch64::FPR128_loRegClass, MRI); 8677 break; 8678 } 8679 case AArch64MachineCombinerPattern::FNMADD: { 8680 MUL = genFNegatedMAD(MF, MRI, TII, Root, InsInstrs); 8681 break; 8682 } 8683 8684 } // end switch (Pattern) 8685 // Record MUL and ADD/SUB for deletion 8686 if (MUL) 8687 DelInstrs.push_back(MUL); 8688 DelInstrs.push_back(&Root); 8689 8690 // Set the flags on the inserted instructions to be the merged flags of the 8691 // instructions that we have combined. 8692 uint32_t Flags = Root.getFlags(); 8693 if (MUL) 8694 Flags = Root.mergeFlagsWith(*MUL); 8695 for (auto *MI : InsInstrs) 8696 MI->setFlags(Flags); 8697 } 8698 8699 /// Replace csincr-branch sequence by simple conditional branch 8700 /// 8701 /// Examples: 8702 /// 1. \code 8703 /// csinc w9, wzr, wzr, <condition code> 8704 /// tbnz w9, #0, 0x44 8705 /// \endcode 8706 /// to 8707 /// \code 8708 /// b.<inverted condition code> 8709 /// \endcode 8710 /// 8711 /// 2. \code 8712 /// csinc w9, wzr, wzr, <condition code> 8713 /// tbz w9, #0, 0x44 8714 /// \endcode 8715 /// to 8716 /// \code 8717 /// b.<condition code> 8718 /// \endcode 8719 /// 8720 /// Replace compare and branch sequence by TBZ/TBNZ instruction when the 8721 /// compare's constant operand is power of 2. 8722 /// 8723 /// Examples: 8724 /// \code 8725 /// and w8, w8, #0x400 8726 /// cbnz w8, L1 8727 /// \endcode 8728 /// to 8729 /// \code 8730 /// tbnz w8, #10, L1 8731 /// \endcode 8732 /// 8733 /// \param MI Conditional Branch 8734 /// \return True when the simple conditional branch is generated 8735 /// 8736 bool AArch64InstrInfo::optimizeCondBranch(MachineInstr &MI) const { 8737 bool IsNegativeBranch = false; 8738 bool IsTestAndBranch = false; 8739 unsigned TargetBBInMI = 0; 8740 switch (MI.getOpcode()) { 8741 default: 8742 llvm_unreachable("Unknown branch instruction?"); 8743 case AArch64::Bcc: 8744 case AArch64::CBWPri: 8745 case AArch64::CBXPri: 8746 case AArch64::CBWPrr: 8747 case AArch64::CBXPrr: 8748 return false; 8749 case AArch64::CBZW: 8750 case AArch64::CBZX: 8751 TargetBBInMI = 1; 8752 break; 8753 case AArch64::CBNZW: 8754 case AArch64::CBNZX: 8755 TargetBBInMI = 1; 8756 IsNegativeBranch = true; 8757 break; 8758 case AArch64::TBZW: 8759 case AArch64::TBZX: 8760 TargetBBInMI = 2; 8761 IsTestAndBranch = true; 8762 break; 8763 case AArch64::TBNZW: 8764 case AArch64::TBNZX: 8765 TargetBBInMI = 2; 8766 IsNegativeBranch = true; 8767 IsTestAndBranch = true; 8768 break; 8769 } 8770 // So we increment a zero register and test for bits other 8771 // than bit 0? Conservatively bail out in case the verifier 8772 // missed this case. 8773 if (IsTestAndBranch && MI.getOperand(1).getImm()) 8774 return false; 8775 8776 // Find Definition. 8777 assert(MI.getParent() && "Incomplete machine instruction\n"); 8778 MachineBasicBlock *MBB = MI.getParent(); 8779 MachineFunction *MF = MBB->getParent(); 8780 MachineRegisterInfo *MRI = &MF->getRegInfo(); 8781 Register VReg = MI.getOperand(0).getReg(); 8782 if (!VReg.isVirtual()) 8783 return false; 8784 8785 MachineInstr *DefMI = MRI->getVRegDef(VReg); 8786 8787 // Look through COPY instructions to find definition. 8788 while (DefMI->isCopy()) { 8789 Register CopyVReg = DefMI->getOperand(1).getReg(); 8790 if (!MRI->hasOneNonDBGUse(CopyVReg)) 8791 return false; 8792 if (!MRI->hasOneDef(CopyVReg)) 8793 return false; 8794 DefMI = MRI->getVRegDef(CopyVReg); 8795 } 8796 8797 switch (DefMI->getOpcode()) { 8798 default: 8799 return false; 8800 // Fold AND into a TBZ/TBNZ if constant operand is power of 2. 8801 case AArch64::ANDWri: 8802 case AArch64::ANDXri: { 8803 if (IsTestAndBranch) 8804 return false; 8805 if (DefMI->getParent() != MBB) 8806 return false; 8807 if (!MRI->hasOneNonDBGUse(VReg)) 8808 return false; 8809 8810 bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri); 8811 uint64_t Mask = AArch64_AM::decodeLogicalImmediate( 8812 DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64); 8813 if (!isPowerOf2_64(Mask)) 8814 return false; 8815 8816 MachineOperand &MO = DefMI->getOperand(1); 8817 Register NewReg = MO.getReg(); 8818 if (!NewReg.isVirtual()) 8819 return false; 8820 8821 assert(!MRI->def_empty(NewReg) && "Register must be defined."); 8822 8823 MachineBasicBlock &RefToMBB = *MBB; 8824 MachineBasicBlock *TBB = MI.getOperand(1).getMBB(); 8825 DebugLoc DL = MI.getDebugLoc(); 8826 unsigned Imm = Log2_64(Mask); 8827 unsigned Opc = (Imm < 32) 8828 ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW) 8829 : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX); 8830 MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc)) 8831 .addReg(NewReg) 8832 .addImm(Imm) 8833 .addMBB(TBB); 8834 // Register lives on to the CBZ now. 8835 MO.setIsKill(false); 8836 8837 // For immediate smaller than 32, we need to use the 32-bit 8838 // variant (W) in all cases. Indeed the 64-bit variant does not 8839 // allow to encode them. 8840 // Therefore, if the input register is 64-bit, we need to take the 8841 // 32-bit sub-part. 8842 if (!Is32Bit && Imm < 32) 8843 NewMI->getOperand(0).setSubReg(AArch64::sub_32); 8844 MI.eraseFromParent(); 8845 return true; 8846 } 8847 // Look for CSINC 8848 case AArch64::CSINCWr: 8849 case AArch64::CSINCXr: { 8850 if (!(DefMI->getOperand(1).getReg() == AArch64::WZR && 8851 DefMI->getOperand(2).getReg() == AArch64::WZR) && 8852 !(DefMI->getOperand(1).getReg() == AArch64::XZR && 8853 DefMI->getOperand(2).getReg() == AArch64::XZR)) 8854 return false; 8855 8856 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, 8857 true) != -1) 8858 return false; 8859 8860 AArch64CC::CondCode CC = (AArch64CC::CondCode)DefMI->getOperand(3).getImm(); 8861 // Convert only when the condition code is not modified between 8862 // the CSINC and the branch. The CC may be used by other 8863 // instructions in between. 8864 if (areCFlagsAccessedBetweenInstrs(DefMI, MI, &getRegisterInfo(), AK_Write)) 8865 return false; 8866 MachineBasicBlock &RefToMBB = *MBB; 8867 MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB(); 8868 DebugLoc DL = MI.getDebugLoc(); 8869 if (IsNegativeBranch) 8870 CC = AArch64CC::getInvertedCondCode(CC); 8871 BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB); 8872 MI.eraseFromParent(); 8873 return true; 8874 } 8875 } 8876 } 8877 8878 std::pair<unsigned, unsigned> 8879 AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const { 8880 const unsigned Mask = AArch64II::MO_FRAGMENT; 8881 return std::make_pair(TF & Mask, TF & ~Mask); 8882 } 8883 8884 ArrayRef<std::pair<unsigned, const char *>> 8885 AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const { 8886 using namespace AArch64II; 8887 8888 static const std::pair<unsigned, const char *> TargetFlags[] = { 8889 {MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"}, 8890 {MO_G3, "aarch64-g3"}, {MO_G2, "aarch64-g2"}, 8891 {MO_G1, "aarch64-g1"}, {MO_G0, "aarch64-g0"}, 8892 {MO_HI12, "aarch64-hi12"}}; 8893 return ArrayRef(TargetFlags); 8894 } 8895 8896 ArrayRef<std::pair<unsigned, const char *>> 8897 AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const { 8898 using namespace AArch64II; 8899 8900 static const std::pair<unsigned, const char *> TargetFlags[] = { 8901 {MO_COFFSTUB, "aarch64-coffstub"}, 8902 {MO_GOT, "aarch64-got"}, 8903 {MO_NC, "aarch64-nc"}, 8904 {MO_S, "aarch64-s"}, 8905 {MO_TLS, "aarch64-tls"}, 8906 {MO_DLLIMPORT, "aarch64-dllimport"}, 8907 {MO_PREL, "aarch64-prel"}, 8908 {MO_TAGGED, "aarch64-tagged"}, 8909 {MO_ARM64EC_CALLMANGLE, "aarch64-arm64ec-callmangle"}, 8910 }; 8911 return ArrayRef(TargetFlags); 8912 } 8913 8914 ArrayRef<std::pair<MachineMemOperand::Flags, const char *>> 8915 AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const { 8916 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] = 8917 {{MOSuppressPair, "aarch64-suppress-pair"}, 8918 {MOStridedAccess, "aarch64-strided-access"}}; 8919 return ArrayRef(TargetFlags); 8920 } 8921 8922 /// Constants defining how certain sequences should be outlined. 8923 /// This encompasses how an outlined function should be called, and what kind of 8924 /// frame should be emitted for that outlined function. 8925 /// 8926 /// \p MachineOutlinerDefault implies that the function should be called with 8927 /// a save and restore of LR to the stack. 8928 /// 8929 /// That is, 8930 /// 8931 /// I1 Save LR OUTLINED_FUNCTION: 8932 /// I2 --> BL OUTLINED_FUNCTION I1 8933 /// I3 Restore LR I2 8934 /// I3 8935 /// RET 8936 /// 8937 /// * Call construction overhead: 3 (save + BL + restore) 8938 /// * Frame construction overhead: 1 (ret) 8939 /// * Requires stack fixups? Yes 8940 /// 8941 /// \p MachineOutlinerTailCall implies that the function is being created from 8942 /// a sequence of instructions ending in a return. 8943 /// 8944 /// That is, 8945 /// 8946 /// I1 OUTLINED_FUNCTION: 8947 /// I2 --> B OUTLINED_FUNCTION I1 8948 /// RET I2 8949 /// RET 8950 /// 8951 /// * Call construction overhead: 1 (B) 8952 /// * Frame construction overhead: 0 (Return included in sequence) 8953 /// * Requires stack fixups? No 8954 /// 8955 /// \p MachineOutlinerNoLRSave implies that the function should be called using 8956 /// a BL instruction, but doesn't require LR to be saved and restored. This 8957 /// happens when LR is known to be dead. 8958 /// 8959 /// That is, 8960 /// 8961 /// I1 OUTLINED_FUNCTION: 8962 /// I2 --> BL OUTLINED_FUNCTION I1 8963 /// I3 I2 8964 /// I3 8965 /// RET 8966 /// 8967 /// * Call construction overhead: 1 (BL) 8968 /// * Frame construction overhead: 1 (RET) 8969 /// * Requires stack fixups? No 8970 /// 8971 /// \p MachineOutlinerThunk implies that the function is being created from 8972 /// a sequence of instructions ending in a call. The outlined function is 8973 /// called with a BL instruction, and the outlined function tail-calls the 8974 /// original call destination. 8975 /// 8976 /// That is, 8977 /// 8978 /// I1 OUTLINED_FUNCTION: 8979 /// I2 --> BL OUTLINED_FUNCTION I1 8980 /// BL f I2 8981 /// B f 8982 /// * Call construction overhead: 1 (BL) 8983 /// * Frame construction overhead: 0 8984 /// * Requires stack fixups? No 8985 /// 8986 /// \p MachineOutlinerRegSave implies that the function should be called with a 8987 /// save and restore of LR to an available register. This allows us to avoid 8988 /// stack fixups. Note that this outlining variant is compatible with the 8989 /// NoLRSave case. 8990 /// 8991 /// That is, 8992 /// 8993 /// I1 Save LR OUTLINED_FUNCTION: 8994 /// I2 --> BL OUTLINED_FUNCTION I1 8995 /// I3 Restore LR I2 8996 /// I3 8997 /// RET 8998 /// 8999 /// * Call construction overhead: 3 (save + BL + restore) 9000 /// * Frame construction overhead: 1 (ret) 9001 /// * Requires stack fixups? No 9002 enum MachineOutlinerClass { 9003 MachineOutlinerDefault, /// Emit a save, restore, call, and return. 9004 MachineOutlinerTailCall, /// Only emit a branch. 9005 MachineOutlinerNoLRSave, /// Emit a call and return. 9006 MachineOutlinerThunk, /// Emit a call and tail-call. 9007 MachineOutlinerRegSave /// Same as default, but save to a register. 9008 }; 9009 9010 enum MachineOutlinerMBBFlags { 9011 LRUnavailableSomewhere = 0x2, 9012 HasCalls = 0x4, 9013 UnsafeRegsDead = 0x8 9014 }; 9015 9016 Register 9017 AArch64InstrInfo::findRegisterToSaveLRTo(outliner::Candidate &C) const { 9018 MachineFunction *MF = C.getMF(); 9019 const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo(); 9020 const AArch64RegisterInfo *ARI = 9021 static_cast<const AArch64RegisterInfo *>(&TRI); 9022 // Check if there is an available register across the sequence that we can 9023 // use. 9024 for (unsigned Reg : AArch64::GPR64RegClass) { 9025 if (!ARI->isReservedReg(*MF, Reg) && 9026 Reg != AArch64::LR && // LR is not reserved, but don't use it. 9027 Reg != AArch64::X16 && // X16 is not guaranteed to be preserved. 9028 Reg != AArch64::X17 && // Ditto for X17. 9029 C.isAvailableAcrossAndOutOfSeq(Reg, TRI) && 9030 C.isAvailableInsideSeq(Reg, TRI)) 9031 return Reg; 9032 } 9033 return Register(); 9034 } 9035 9036 static bool 9037 outliningCandidatesSigningScopeConsensus(const outliner::Candidate &a, 9038 const outliner::Candidate &b) { 9039 const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>(); 9040 const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>(); 9041 9042 return MFIa->shouldSignReturnAddress(false) == MFIb->shouldSignReturnAddress(false) && 9043 MFIa->shouldSignReturnAddress(true) == MFIb->shouldSignReturnAddress(true); 9044 } 9045 9046 static bool 9047 outliningCandidatesSigningKeyConsensus(const outliner::Candidate &a, 9048 const outliner::Candidate &b) { 9049 const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>(); 9050 const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>(); 9051 9052 return MFIa->shouldSignWithBKey() == MFIb->shouldSignWithBKey(); 9053 } 9054 9055 static bool outliningCandidatesV8_3OpsConsensus(const outliner::Candidate &a, 9056 const outliner::Candidate &b) { 9057 const AArch64Subtarget &SubtargetA = 9058 a.getMF()->getSubtarget<AArch64Subtarget>(); 9059 const AArch64Subtarget &SubtargetB = 9060 b.getMF()->getSubtarget<AArch64Subtarget>(); 9061 return SubtargetA.hasV8_3aOps() == SubtargetB.hasV8_3aOps(); 9062 } 9063 9064 std::optional<std::unique_ptr<outliner::OutlinedFunction>> 9065 AArch64InstrInfo::getOutliningCandidateInfo( 9066 const MachineModuleInfo &MMI, 9067 std::vector<outliner::Candidate> &RepeatedSequenceLocs, 9068 unsigned MinRepeats) const { 9069 unsigned SequenceSize = 0; 9070 for (auto &MI : RepeatedSequenceLocs[0]) 9071 SequenceSize += getInstSizeInBytes(MI); 9072 9073 unsigned NumBytesToCreateFrame = 0; 9074 9075 // We only allow outlining for functions having exactly matching return 9076 // address signing attributes, i.e., all share the same value for the 9077 // attribute "sign-return-address" and all share the same type of key they 9078 // are signed with. 9079 // Additionally we require all functions to simultaneously either support 9080 // v8.3a features or not. Otherwise an outlined function could get signed 9081 // using dedicated v8.3 instructions and a call from a function that doesn't 9082 // support v8.3 instructions would therefore be invalid. 9083 if (std::adjacent_find( 9084 RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(), 9085 [](const outliner::Candidate &a, const outliner::Candidate &b) { 9086 // Return true if a and b are non-equal w.r.t. return address 9087 // signing or support of v8.3a features 9088 if (outliningCandidatesSigningScopeConsensus(a, b) && 9089 outliningCandidatesSigningKeyConsensus(a, b) && 9090 outliningCandidatesV8_3OpsConsensus(a, b)) { 9091 return false; 9092 } 9093 return true; 9094 }) != RepeatedSequenceLocs.end()) { 9095 return std::nullopt; 9096 } 9097 9098 // Since at this point all candidates agree on their return address signing 9099 // picking just one is fine. If the candidate functions potentially sign their 9100 // return addresses, the outlined function should do the same. Note that in 9101 // the case of "sign-return-address"="non-leaf" this is an assumption: It is 9102 // not certainly true that the outlined function will have to sign its return 9103 // address but this decision is made later, when the decision to outline 9104 // has already been made. 9105 // The same holds for the number of additional instructions we need: On 9106 // v8.3a RET can be replaced by RETAA/RETAB and no AUT instruction is 9107 // necessary. However, at this point we don't know if the outlined function 9108 // will have a RET instruction so we assume the worst. 9109 const TargetRegisterInfo &TRI = getRegisterInfo(); 9110 // Performing a tail call may require extra checks when PAuth is enabled. 9111 // If PAuth is disabled, set it to zero for uniformity. 9112 unsigned NumBytesToCheckLRInTCEpilogue = 0; 9113 if (RepeatedSequenceLocs[0] 9114 .getMF() 9115 ->getInfo<AArch64FunctionInfo>() 9116 ->shouldSignReturnAddress(true)) { 9117 // One PAC and one AUT instructions 9118 NumBytesToCreateFrame += 8; 9119 9120 // PAuth is enabled - set extra tail call cost, if any. 9121 auto LRCheckMethod = Subtarget.getAuthenticatedLRCheckMethod( 9122 *RepeatedSequenceLocs[0].getMF()); 9123 NumBytesToCheckLRInTCEpilogue = 9124 AArch64PAuth::getCheckerSizeInBytes(LRCheckMethod); 9125 // Checking the authenticated LR value may significantly impact 9126 // SequenceSize, so account for it for more precise results. 9127 if (isTailCallReturnInst(RepeatedSequenceLocs[0].back())) 9128 SequenceSize += NumBytesToCheckLRInTCEpilogue; 9129 9130 // We have to check if sp modifying instructions would get outlined. 9131 // If so we only allow outlining if sp is unchanged overall, so matching 9132 // sub and add instructions are okay to outline, all other sp modifications 9133 // are not 9134 auto hasIllegalSPModification = [&TRI](outliner::Candidate &C) { 9135 int SPValue = 0; 9136 for (auto &MI : C) { 9137 if (MI.modifiesRegister(AArch64::SP, &TRI)) { 9138 switch (MI.getOpcode()) { 9139 case AArch64::ADDXri: 9140 case AArch64::ADDWri: 9141 assert(MI.getNumOperands() == 4 && "Wrong number of operands"); 9142 assert(MI.getOperand(2).isImm() && 9143 "Expected operand to be immediate"); 9144 assert(MI.getOperand(1).isReg() && 9145 "Expected operand to be a register"); 9146 // Check if the add just increments sp. If so, we search for 9147 // matching sub instructions that decrement sp. If not, the 9148 // modification is illegal 9149 if (MI.getOperand(1).getReg() == AArch64::SP) 9150 SPValue += MI.getOperand(2).getImm(); 9151 else 9152 return true; 9153 break; 9154 case AArch64::SUBXri: 9155 case AArch64::SUBWri: 9156 assert(MI.getNumOperands() == 4 && "Wrong number of operands"); 9157 assert(MI.getOperand(2).isImm() && 9158 "Expected operand to be immediate"); 9159 assert(MI.getOperand(1).isReg() && 9160 "Expected operand to be a register"); 9161 // Check if the sub just decrements sp. If so, we search for 9162 // matching add instructions that increment sp. If not, the 9163 // modification is illegal 9164 if (MI.getOperand(1).getReg() == AArch64::SP) 9165 SPValue -= MI.getOperand(2).getImm(); 9166 else 9167 return true; 9168 break; 9169 default: 9170 return true; 9171 } 9172 } 9173 } 9174 if (SPValue) 9175 return true; 9176 return false; 9177 }; 9178 // Remove candidates with illegal stack modifying instructions 9179 llvm::erase_if(RepeatedSequenceLocs, hasIllegalSPModification); 9180 9181 // If the sequence doesn't have enough candidates left, then we're done. 9182 if (RepeatedSequenceLocs.size() < MinRepeats) 9183 return std::nullopt; 9184 } 9185 9186 // Properties about candidate MBBs that hold for all of them. 9187 unsigned FlagsSetInAll = 0xF; 9188 9189 // Compute liveness information for each candidate, and set FlagsSetInAll. 9190 for (outliner::Candidate &C : RepeatedSequenceLocs) 9191 FlagsSetInAll &= C.Flags; 9192 9193 unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back().getOpcode(); 9194 9195 // Helper lambda which sets call information for every candidate. 9196 auto SetCandidateCallInfo = 9197 [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) { 9198 for (outliner::Candidate &C : RepeatedSequenceLocs) 9199 C.setCallInfo(CallID, NumBytesForCall); 9200 }; 9201 9202 unsigned FrameID = MachineOutlinerDefault; 9203 NumBytesToCreateFrame += 4; 9204 9205 bool HasBTI = any_of(RepeatedSequenceLocs, [](outliner::Candidate &C) { 9206 return C.getMF()->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement(); 9207 }); 9208 9209 // We check to see if CFI Instructions are present, and if they are 9210 // we find the number of CFI Instructions in the candidates. 9211 unsigned CFICount = 0; 9212 for (auto &I : RepeatedSequenceLocs[0]) { 9213 if (I.isCFIInstruction()) 9214 CFICount++; 9215 } 9216 9217 // We compare the number of found CFI Instructions to the number of CFI 9218 // instructions in the parent function for each candidate. We must check this 9219 // since if we outline one of the CFI instructions in a function, we have to 9220 // outline them all for correctness. If we do not, the address offsets will be 9221 // incorrect between the two sections of the program. 9222 for (outliner::Candidate &C : RepeatedSequenceLocs) { 9223 std::vector<MCCFIInstruction> CFIInstructions = 9224 C.getMF()->getFrameInstructions(); 9225 9226 if (CFICount > 0 && CFICount != CFIInstructions.size()) 9227 return std::nullopt; 9228 } 9229 9230 // Returns true if an instructions is safe to fix up, false otherwise. 9231 auto IsSafeToFixup = [this, &TRI](MachineInstr &MI) { 9232 if (MI.isCall()) 9233 return true; 9234 9235 if (!MI.modifiesRegister(AArch64::SP, &TRI) && 9236 !MI.readsRegister(AArch64::SP, &TRI)) 9237 return true; 9238 9239 // Any modification of SP will break our code to save/restore LR. 9240 // FIXME: We could handle some instructions which add a constant 9241 // offset to SP, with a bit more work. 9242 if (MI.modifiesRegister(AArch64::SP, &TRI)) 9243 return false; 9244 9245 // At this point, we have a stack instruction that we might need to 9246 // fix up. We'll handle it if it's a load or store. 9247 if (MI.mayLoadOrStore()) { 9248 const MachineOperand *Base; // Filled with the base operand of MI. 9249 int64_t Offset; // Filled with the offset of MI. 9250 bool OffsetIsScalable; 9251 9252 // Does it allow us to offset the base operand and is the base the 9253 // register SP? 9254 if (!getMemOperandWithOffset(MI, Base, Offset, OffsetIsScalable, &TRI) || 9255 !Base->isReg() || Base->getReg() != AArch64::SP) 9256 return false; 9257 9258 // Fixe-up code below assumes bytes. 9259 if (OffsetIsScalable) 9260 return false; 9261 9262 // Find the minimum/maximum offset for this instruction and check 9263 // if fixing it up would be in range. 9264 int64_t MinOffset, 9265 MaxOffset; // Unscaled offsets for the instruction. 9266 // The scale to multiply the offsets by. 9267 TypeSize Scale(0U, false), DummyWidth(0U, false); 9268 getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset); 9269 9270 Offset += 16; // Update the offset to what it would be if we outlined. 9271 if (Offset < MinOffset * (int64_t)Scale.getFixedValue() || 9272 Offset > MaxOffset * (int64_t)Scale.getFixedValue()) 9273 return false; 9274 9275 // It's in range, so we can outline it. 9276 return true; 9277 } 9278 9279 // FIXME: Add handling for instructions like "add x0, sp, #8". 9280 9281 // We can't fix it up, so don't outline it. 9282 return false; 9283 }; 9284 9285 // True if it's possible to fix up each stack instruction in this sequence. 9286 // Important for frames/call variants that modify the stack. 9287 bool AllStackInstrsSafe = 9288 llvm::all_of(RepeatedSequenceLocs[0], IsSafeToFixup); 9289 9290 // If the last instruction in any candidate is a terminator, then we should 9291 // tail call all of the candidates. 9292 if (RepeatedSequenceLocs[0].back().isTerminator()) { 9293 FrameID = MachineOutlinerTailCall; 9294 NumBytesToCreateFrame = 0; 9295 unsigned NumBytesForCall = 4 + NumBytesToCheckLRInTCEpilogue; 9296 SetCandidateCallInfo(MachineOutlinerTailCall, NumBytesForCall); 9297 } 9298 9299 else if (LastInstrOpcode == AArch64::BL || 9300 ((LastInstrOpcode == AArch64::BLR || 9301 LastInstrOpcode == AArch64::BLRNoIP) && 9302 !HasBTI)) { 9303 // FIXME: Do we need to check if the code after this uses the value of LR? 9304 FrameID = MachineOutlinerThunk; 9305 NumBytesToCreateFrame = NumBytesToCheckLRInTCEpilogue; 9306 SetCandidateCallInfo(MachineOutlinerThunk, 4); 9307 } 9308 9309 else { 9310 // We need to decide how to emit calls + frames. We can always emit the same 9311 // frame if we don't need to save to the stack. If we have to save to the 9312 // stack, then we need a different frame. 9313 unsigned NumBytesNoStackCalls = 0; 9314 std::vector<outliner::Candidate> CandidatesWithoutStackFixups; 9315 9316 // Check if we have to save LR. 9317 for (outliner::Candidate &C : RepeatedSequenceLocs) { 9318 bool LRAvailable = 9319 (C.Flags & MachineOutlinerMBBFlags::LRUnavailableSomewhere) 9320 ? C.isAvailableAcrossAndOutOfSeq(AArch64::LR, TRI) 9321 : true; 9322 // If we have a noreturn caller, then we're going to be conservative and 9323 // say that we have to save LR. If we don't have a ret at the end of the 9324 // block, then we can't reason about liveness accurately. 9325 // 9326 // FIXME: We can probably do better than always disabling this in 9327 // noreturn functions by fixing up the liveness info. 9328 bool IsNoReturn = 9329 C.getMF()->getFunction().hasFnAttribute(Attribute::NoReturn); 9330 9331 // Is LR available? If so, we don't need a save. 9332 if (LRAvailable && !IsNoReturn) { 9333 NumBytesNoStackCalls += 4; 9334 C.setCallInfo(MachineOutlinerNoLRSave, 4); 9335 CandidatesWithoutStackFixups.push_back(C); 9336 } 9337 9338 // Is an unused register available? If so, we won't modify the stack, so 9339 // we can outline with the same frame type as those that don't save LR. 9340 else if (findRegisterToSaveLRTo(C)) { 9341 NumBytesNoStackCalls += 12; 9342 C.setCallInfo(MachineOutlinerRegSave, 12); 9343 CandidatesWithoutStackFixups.push_back(C); 9344 } 9345 9346 // Is SP used in the sequence at all? If not, we don't have to modify 9347 // the stack, so we are guaranteed to get the same frame. 9348 else if (C.isAvailableInsideSeq(AArch64::SP, TRI)) { 9349 NumBytesNoStackCalls += 12; 9350 C.setCallInfo(MachineOutlinerDefault, 12); 9351 CandidatesWithoutStackFixups.push_back(C); 9352 } 9353 9354 // If we outline this, we need to modify the stack. Pretend we don't 9355 // outline this by saving all of its bytes. 9356 else { 9357 NumBytesNoStackCalls += SequenceSize; 9358 } 9359 } 9360 9361 // If there are no places where we have to save LR, then note that we 9362 // don't have to update the stack. Otherwise, give every candidate the 9363 // default call type, as long as it's safe to do so. 9364 if (!AllStackInstrsSafe || 9365 NumBytesNoStackCalls <= RepeatedSequenceLocs.size() * 12) { 9366 RepeatedSequenceLocs = CandidatesWithoutStackFixups; 9367 FrameID = MachineOutlinerNoLRSave; 9368 if (RepeatedSequenceLocs.size() < MinRepeats) 9369 return std::nullopt; 9370 } else { 9371 SetCandidateCallInfo(MachineOutlinerDefault, 12); 9372 9373 // Bugzilla ID: 46767 9374 // TODO: Check if fixing up the stack more than once is safe so we can 9375 // outline these. 9376 // 9377 // An outline resulting in a caller that requires stack fixups at the 9378 // callsite to a callee that also requires stack fixups can happen when 9379 // there are no available registers at the candidate callsite for a 9380 // candidate that itself also has calls. 9381 // 9382 // In other words if function_containing_sequence in the following pseudo 9383 // assembly requires that we save LR at the point of the call, but there 9384 // are no available registers: in this case we save using SP and as a 9385 // result the SP offsets requires stack fixups by multiples of 16. 9386 // 9387 // function_containing_sequence: 9388 // ... 9389 // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N 9390 // call OUTLINED_FUNCTION_N 9391 // restore LR from SP 9392 // ... 9393 // 9394 // OUTLINED_FUNCTION_N: 9395 // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N 9396 // ... 9397 // bl foo 9398 // restore LR from SP 9399 // ret 9400 // 9401 // Because the code to handle more than one stack fixup does not 9402 // currently have the proper checks for legality, these cases will assert 9403 // in the AArch64 MachineOutliner. This is because the code to do this 9404 // needs more hardening, testing, better checks that generated code is 9405 // legal, etc and because it is only verified to handle a single pass of 9406 // stack fixup. 9407 // 9408 // The assert happens in AArch64InstrInfo::buildOutlinedFrame to catch 9409 // these cases until they are known to be handled. Bugzilla 46767 is 9410 // referenced in comments at the assert site. 9411 // 9412 // To avoid asserting (or generating non-legal code on noassert builds) 9413 // we remove all candidates which would need more than one stack fixup by 9414 // pruning the cases where the candidate has calls while also having no 9415 // available LR and having no available general purpose registers to copy 9416 // LR to (ie one extra stack save/restore). 9417 // 9418 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) { 9419 erase_if(RepeatedSequenceLocs, [this, &TRI](outliner::Candidate &C) { 9420 auto IsCall = [](const MachineInstr &MI) { return MI.isCall(); }; 9421 return (llvm::any_of(C, IsCall)) && 9422 (!C.isAvailableAcrossAndOutOfSeq(AArch64::LR, TRI) || 9423 !findRegisterToSaveLRTo(C)); 9424 }); 9425 } 9426 } 9427 9428 // If we dropped all of the candidates, bail out here. 9429 if (RepeatedSequenceLocs.size() < MinRepeats) 9430 return std::nullopt; 9431 } 9432 9433 // Does every candidate's MBB contain a call? If so, then we might have a call 9434 // in the range. 9435 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) { 9436 // Check if the range contains a call. These require a save + restore of the 9437 // link register. 9438 outliner::Candidate &FirstCand = RepeatedSequenceLocs[0]; 9439 bool ModStackToSaveLR = false; 9440 if (any_of(drop_end(FirstCand), 9441 [](const MachineInstr &MI) { return MI.isCall(); })) 9442 ModStackToSaveLR = true; 9443 9444 // Handle the last instruction separately. If this is a tail call, then the 9445 // last instruction is a call. We don't want to save + restore in this case. 9446 // However, it could be possible that the last instruction is a call without 9447 // it being valid to tail call this sequence. We should consider this as 9448 // well. 9449 else if (FrameID != MachineOutlinerThunk && 9450 FrameID != MachineOutlinerTailCall && FirstCand.back().isCall()) 9451 ModStackToSaveLR = true; 9452 9453 if (ModStackToSaveLR) { 9454 // We can't fix up the stack. Bail out. 9455 if (!AllStackInstrsSafe) 9456 return std::nullopt; 9457 9458 // Save + restore LR. 9459 NumBytesToCreateFrame += 8; 9460 } 9461 } 9462 9463 // If we have CFI instructions, we can only outline if the outlined section 9464 // can be a tail call 9465 if (FrameID != MachineOutlinerTailCall && CFICount > 0) 9466 return std::nullopt; 9467 9468 return std::make_unique<outliner::OutlinedFunction>( 9469 RepeatedSequenceLocs, SequenceSize, NumBytesToCreateFrame, FrameID); 9470 } 9471 9472 void AArch64InstrInfo::mergeOutliningCandidateAttributes( 9473 Function &F, std::vector<outliner::Candidate> &Candidates) const { 9474 // If a bunch of candidates reach this point they must agree on their return 9475 // address signing. It is therefore enough to just consider the signing 9476 // behaviour of one of them 9477 const auto &CFn = Candidates.front().getMF()->getFunction(); 9478 9479 if (CFn.hasFnAttribute("ptrauth-returns")) 9480 F.addFnAttr(CFn.getFnAttribute("ptrauth-returns")); 9481 if (CFn.hasFnAttribute("ptrauth-auth-traps")) 9482 F.addFnAttr(CFn.getFnAttribute("ptrauth-auth-traps")); 9483 // Since all candidates belong to the same module, just copy the 9484 // function-level attributes of an arbitrary function. 9485 if (CFn.hasFnAttribute("sign-return-address")) 9486 F.addFnAttr(CFn.getFnAttribute("sign-return-address")); 9487 if (CFn.hasFnAttribute("sign-return-address-key")) 9488 F.addFnAttr(CFn.getFnAttribute("sign-return-address-key")); 9489 9490 AArch64GenInstrInfo::mergeOutliningCandidateAttributes(F, Candidates); 9491 } 9492 9493 bool AArch64InstrInfo::isFunctionSafeToOutlineFrom( 9494 MachineFunction &MF, bool OutlineFromLinkOnceODRs) const { 9495 const Function &F = MF.getFunction(); 9496 9497 // Can F be deduplicated by the linker? If it can, don't outline from it. 9498 if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage()) 9499 return false; 9500 9501 // Don't outline from functions with section markings; the program could 9502 // expect that all the code is in the named section. 9503 // FIXME: Allow outlining from multiple functions with the same section 9504 // marking. 9505 if (F.hasSection()) 9506 return false; 9507 9508 // Outlining from functions with redzones is unsafe since the outliner may 9509 // modify the stack. Check if hasRedZone is true or unknown; if yes, don't 9510 // outline from it. 9511 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 9512 if (!AFI || AFI->hasRedZone().value_or(true)) 9513 return false; 9514 9515 // FIXME: Determine whether it is safe to outline from functions which contain 9516 // streaming-mode changes. We may need to ensure any smstart/smstop pairs are 9517 // outlined together and ensure it is safe to outline with async unwind info, 9518 // required for saving & restoring VG around calls. 9519 if (AFI->hasStreamingModeChanges()) 9520 return false; 9521 9522 // FIXME: Teach the outliner to generate/handle Windows unwind info. 9523 if (MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) 9524 return false; 9525 9526 // It's safe to outline from MF. 9527 return true; 9528 } 9529 9530 SmallVector<std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>> 9531 AArch64InstrInfo::getOutlinableRanges(MachineBasicBlock &MBB, 9532 unsigned &Flags) const { 9533 assert(MBB.getParent()->getRegInfo().tracksLiveness() && 9534 "Must track liveness!"); 9535 SmallVector< 9536 std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>> 9537 Ranges; 9538 // According to the AArch64 Procedure Call Standard, the following are 9539 // undefined on entry/exit from a function call: 9540 // 9541 // * Registers x16, x17, (and thus w16, w17) 9542 // * Condition codes (and thus the NZCV register) 9543 // 9544 // If any of these registers are used inside or live across an outlined 9545 // function, then they may be modified later, either by the compiler or 9546 // some other tool (like the linker). 9547 // 9548 // To avoid outlining in these situations, partition each block into ranges 9549 // where these registers are dead. We will only outline from those ranges. 9550 LiveRegUnits LRU(getRegisterInfo()); 9551 auto AreAllUnsafeRegsDead = [&LRU]() { 9552 return LRU.available(AArch64::W16) && LRU.available(AArch64::W17) && 9553 LRU.available(AArch64::NZCV); 9554 }; 9555 9556 // We need to know if LR is live across an outlining boundary later on in 9557 // order to decide how we'll create the outlined call, frame, etc. 9558 // 9559 // It's pretty expensive to check this for *every candidate* within a block. 9560 // That's some potentially n^2 behaviour, since in the worst case, we'd need 9561 // to compute liveness from the end of the block for O(n) candidates within 9562 // the block. 9563 // 9564 // So, to improve the average case, let's keep track of liveness from the end 9565 // of the block to the beginning of *every outlinable range*. If we know that 9566 // LR is available in every range we could outline from, then we know that 9567 // we don't need to check liveness for any candidate within that range. 9568 bool LRAvailableEverywhere = true; 9569 // Compute liveness bottom-up. 9570 LRU.addLiveOuts(MBB); 9571 // Update flags that require info about the entire MBB. 9572 auto UpdateWholeMBBFlags = [&Flags](const MachineInstr &MI) { 9573 if (MI.isCall() && !MI.isTerminator()) 9574 Flags |= MachineOutlinerMBBFlags::HasCalls; 9575 }; 9576 // Range: [RangeBegin, RangeEnd) 9577 MachineBasicBlock::instr_iterator RangeBegin, RangeEnd; 9578 unsigned RangeLen; 9579 auto CreateNewRangeStartingAt = 9580 [&RangeBegin, &RangeEnd, 9581 &RangeLen](MachineBasicBlock::instr_iterator NewBegin) { 9582 RangeBegin = NewBegin; 9583 RangeEnd = std::next(RangeBegin); 9584 RangeLen = 0; 9585 }; 9586 auto SaveRangeIfNonEmpty = [&RangeLen, &Ranges, &RangeBegin, &RangeEnd]() { 9587 // At least one unsafe register is not dead. We do not want to outline at 9588 // this point. If it is long enough to outline from, save the range 9589 // [RangeBegin, RangeEnd). 9590 if (RangeLen > 1) 9591 Ranges.push_back(std::make_pair(RangeBegin, RangeEnd)); 9592 }; 9593 // Find the first point where all unsafe registers are dead. 9594 // FIND: <safe instr> <-- end of first potential range 9595 // SKIP: <unsafe def> 9596 // SKIP: ... everything between ... 9597 // SKIP: <unsafe use> 9598 auto FirstPossibleEndPt = MBB.instr_rbegin(); 9599 for (; FirstPossibleEndPt != MBB.instr_rend(); ++FirstPossibleEndPt) { 9600 LRU.stepBackward(*FirstPossibleEndPt); 9601 // Update flags that impact how we outline across the entire block, 9602 // regardless of safety. 9603 UpdateWholeMBBFlags(*FirstPossibleEndPt); 9604 if (AreAllUnsafeRegsDead()) 9605 break; 9606 } 9607 // If we exhausted the entire block, we have no safe ranges to outline. 9608 if (FirstPossibleEndPt == MBB.instr_rend()) 9609 return Ranges; 9610 // Current range. 9611 CreateNewRangeStartingAt(FirstPossibleEndPt->getIterator()); 9612 // StartPt points to the first place where all unsafe registers 9613 // are dead (if there is any such point). Begin partitioning the MBB into 9614 // ranges. 9615 for (auto &MI : make_range(FirstPossibleEndPt, MBB.instr_rend())) { 9616 LRU.stepBackward(MI); 9617 UpdateWholeMBBFlags(MI); 9618 if (!AreAllUnsafeRegsDead()) { 9619 SaveRangeIfNonEmpty(); 9620 CreateNewRangeStartingAt(MI.getIterator()); 9621 continue; 9622 } 9623 LRAvailableEverywhere &= LRU.available(AArch64::LR); 9624 RangeBegin = MI.getIterator(); 9625 ++RangeLen; 9626 } 9627 // Above loop misses the last (or only) range. If we are still safe, then 9628 // let's save the range. 9629 if (AreAllUnsafeRegsDead()) 9630 SaveRangeIfNonEmpty(); 9631 if (Ranges.empty()) 9632 return Ranges; 9633 // We found the ranges bottom-up. Mapping expects the top-down. Reverse 9634 // the order. 9635 std::reverse(Ranges.begin(), Ranges.end()); 9636 // If there is at least one outlinable range where LR is unavailable 9637 // somewhere, remember that. 9638 if (!LRAvailableEverywhere) 9639 Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere; 9640 return Ranges; 9641 } 9642 9643 outliner::InstrType 9644 AArch64InstrInfo::getOutliningTypeImpl(const MachineModuleInfo &MMI, 9645 MachineBasicBlock::iterator &MIT, 9646 unsigned Flags) const { 9647 MachineInstr &MI = *MIT; 9648 9649 // Don't outline anything used for return address signing. The outlined 9650 // function will get signed later if needed 9651 switch (MI.getOpcode()) { 9652 case AArch64::PACM: 9653 case AArch64::PACIASP: 9654 case AArch64::PACIBSP: 9655 case AArch64::PACIASPPC: 9656 case AArch64::PACIBSPPC: 9657 case AArch64::AUTIASP: 9658 case AArch64::AUTIBSP: 9659 case AArch64::AUTIASPPCi: 9660 case AArch64::AUTIASPPCr: 9661 case AArch64::AUTIBSPPCi: 9662 case AArch64::AUTIBSPPCr: 9663 case AArch64::RETAA: 9664 case AArch64::RETAB: 9665 case AArch64::RETAASPPCi: 9666 case AArch64::RETAASPPCr: 9667 case AArch64::RETABSPPCi: 9668 case AArch64::RETABSPPCr: 9669 case AArch64::EMITBKEY: 9670 case AArch64::PAUTH_PROLOGUE: 9671 case AArch64::PAUTH_EPILOGUE: 9672 return outliner::InstrType::Illegal; 9673 } 9674 9675 // We can only outline these if we will tail call the outlined function, or 9676 // fix up the CFI offsets. Currently, CFI instructions are outlined only if 9677 // in a tail call. 9678 // 9679 // FIXME: If the proper fixups for the offset are implemented, this should be 9680 // possible. 9681 if (MI.isCFIInstruction()) 9682 return outliner::InstrType::Legal; 9683 9684 // Is this a terminator for a basic block? 9685 if (MI.isTerminator()) 9686 // TargetInstrInfo::getOutliningType has already filtered out anything 9687 // that would break this, so we can allow it here. 9688 return outliner::InstrType::Legal; 9689 9690 // Make sure none of the operands are un-outlinable. 9691 for (const MachineOperand &MOP : MI.operands()) { 9692 // A check preventing CFI indices was here before, but only CFI 9693 // instructions should have those. 9694 assert(!MOP.isCFIIndex()); 9695 9696 // If it uses LR or W30 explicitly, then don't touch it. 9697 if (MOP.isReg() && !MOP.isImplicit() && 9698 (MOP.getReg() == AArch64::LR || MOP.getReg() == AArch64::W30)) 9699 return outliner::InstrType::Illegal; 9700 } 9701 9702 // Special cases for instructions that can always be outlined, but will fail 9703 // the later tests. e.g, ADRPs, which are PC-relative use LR, but can always 9704 // be outlined because they don't require a *specific* value to be in LR. 9705 if (MI.getOpcode() == AArch64::ADRP) 9706 return outliner::InstrType::Legal; 9707 9708 // If MI is a call we might be able to outline it. We don't want to outline 9709 // any calls that rely on the position of items on the stack. When we outline 9710 // something containing a call, we have to emit a save and restore of LR in 9711 // the outlined function. Currently, this always happens by saving LR to the 9712 // stack. Thus, if we outline, say, half the parameters for a function call 9713 // plus the call, then we'll break the callee's expectations for the layout 9714 // of the stack. 9715 // 9716 // FIXME: Allow calls to functions which construct a stack frame, as long 9717 // as they don't access arguments on the stack. 9718 // FIXME: Figure out some way to analyze functions defined in other modules. 9719 // We should be able to compute the memory usage based on the IR calling 9720 // convention, even if we can't see the definition. 9721 if (MI.isCall()) { 9722 // Get the function associated with the call. Look at each operand and find 9723 // the one that represents the callee and get its name. 9724 const Function *Callee = nullptr; 9725 for (const MachineOperand &MOP : MI.operands()) { 9726 if (MOP.isGlobal()) { 9727 Callee = dyn_cast<Function>(MOP.getGlobal()); 9728 break; 9729 } 9730 } 9731 9732 // Never outline calls to mcount. There isn't any rule that would require 9733 // this, but the Linux kernel's "ftrace" feature depends on it. 9734 if (Callee && Callee->getName() == "\01_mcount") 9735 return outliner::InstrType::Illegal; 9736 9737 // If we don't know anything about the callee, assume it depends on the 9738 // stack layout of the caller. In that case, it's only legal to outline 9739 // as a tail-call. Explicitly list the call instructions we know about so we 9740 // don't get unexpected results with call pseudo-instructions. 9741 auto UnknownCallOutlineType = outliner::InstrType::Illegal; 9742 if (MI.getOpcode() == AArch64::BLR || 9743 MI.getOpcode() == AArch64::BLRNoIP || MI.getOpcode() == AArch64::BL) 9744 UnknownCallOutlineType = outliner::InstrType::LegalTerminator; 9745 9746 if (!Callee) 9747 return UnknownCallOutlineType; 9748 9749 // We have a function we have information about. Check it if it's something 9750 // can safely outline. 9751 MachineFunction *CalleeMF = MMI.getMachineFunction(*Callee); 9752 9753 // We don't know what's going on with the callee at all. Don't touch it. 9754 if (!CalleeMF) 9755 return UnknownCallOutlineType; 9756 9757 // Check if we know anything about the callee saves on the function. If we 9758 // don't, then don't touch it, since that implies that we haven't 9759 // computed anything about its stack frame yet. 9760 MachineFrameInfo &MFI = CalleeMF->getFrameInfo(); 9761 if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 || 9762 MFI.getNumObjects() > 0) 9763 return UnknownCallOutlineType; 9764 9765 // At this point, we can say that CalleeMF ought to not pass anything on the 9766 // stack. Therefore, we can outline it. 9767 return outliner::InstrType::Legal; 9768 } 9769 9770 // Don't touch the link register or W30. 9771 if (MI.readsRegister(AArch64::W30, &getRegisterInfo()) || 9772 MI.modifiesRegister(AArch64::W30, &getRegisterInfo())) 9773 return outliner::InstrType::Illegal; 9774 9775 // Don't outline BTI instructions, because that will prevent the outlining 9776 // site from being indirectly callable. 9777 if (hasBTISemantics(MI)) 9778 return outliner::InstrType::Illegal; 9779 9780 return outliner::InstrType::Legal; 9781 } 9782 9783 void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const { 9784 for (MachineInstr &MI : MBB) { 9785 const MachineOperand *Base; 9786 TypeSize Width(0, false); 9787 int64_t Offset; 9788 bool OffsetIsScalable; 9789 9790 // Is this a load or store with an immediate offset with SP as the base? 9791 if (!MI.mayLoadOrStore() || 9792 !getMemOperandWithOffsetWidth(MI, Base, Offset, OffsetIsScalable, Width, 9793 &RI) || 9794 (Base->isReg() && Base->getReg() != AArch64::SP)) 9795 continue; 9796 9797 // It is, so we have to fix it up. 9798 TypeSize Scale(0U, false); 9799 int64_t Dummy1, Dummy2; 9800 9801 MachineOperand &StackOffsetOperand = getMemOpBaseRegImmOfsOffsetOperand(MI); 9802 assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!"); 9803 getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2); 9804 assert(Scale != 0 && "Unexpected opcode!"); 9805 assert(!OffsetIsScalable && "Expected offset to be a byte offset"); 9806 9807 // We've pushed the return address to the stack, so add 16 to the offset. 9808 // This is safe, since we already checked if it would overflow when we 9809 // checked if this instruction was legal to outline. 9810 int64_t NewImm = (Offset + 16) / (int64_t)Scale.getFixedValue(); 9811 StackOffsetOperand.setImm(NewImm); 9812 } 9813 } 9814 9815 static void signOutlinedFunction(MachineFunction &MF, MachineBasicBlock &MBB, 9816 const AArch64InstrInfo *TII, 9817 bool ShouldSignReturnAddr) { 9818 if (!ShouldSignReturnAddr) 9819 return; 9820 9821 BuildMI(MBB, MBB.begin(), DebugLoc(), TII->get(AArch64::PAUTH_PROLOGUE)) 9822 .setMIFlag(MachineInstr::FrameSetup); 9823 BuildMI(MBB, MBB.getFirstInstrTerminator(), DebugLoc(), 9824 TII->get(AArch64::PAUTH_EPILOGUE)) 9825 .setMIFlag(MachineInstr::FrameDestroy); 9826 } 9827 9828 void AArch64InstrInfo::buildOutlinedFrame( 9829 MachineBasicBlock &MBB, MachineFunction &MF, 9830 const outliner::OutlinedFunction &OF) const { 9831 9832 AArch64FunctionInfo *FI = MF.getInfo<AArch64FunctionInfo>(); 9833 9834 if (OF.FrameConstructionID == MachineOutlinerTailCall) 9835 FI->setOutliningStyle("Tail Call"); 9836 else if (OF.FrameConstructionID == MachineOutlinerThunk) { 9837 // For thunk outlining, rewrite the last instruction from a call to a 9838 // tail-call. 9839 MachineInstr *Call = &*--MBB.instr_end(); 9840 unsigned TailOpcode; 9841 if (Call->getOpcode() == AArch64::BL) { 9842 TailOpcode = AArch64::TCRETURNdi; 9843 } else { 9844 assert(Call->getOpcode() == AArch64::BLR || 9845 Call->getOpcode() == AArch64::BLRNoIP); 9846 TailOpcode = AArch64::TCRETURNriALL; 9847 } 9848 MachineInstr *TC = BuildMI(MF, DebugLoc(), get(TailOpcode)) 9849 .add(Call->getOperand(0)) 9850 .addImm(0); 9851 MBB.insert(MBB.end(), TC); 9852 Call->eraseFromParent(); 9853 9854 FI->setOutliningStyle("Thunk"); 9855 } 9856 9857 bool IsLeafFunction = true; 9858 9859 // Is there a call in the outlined range? 9860 auto IsNonTailCall = [](const MachineInstr &MI) { 9861 return MI.isCall() && !MI.isReturn(); 9862 }; 9863 9864 if (llvm::any_of(MBB.instrs(), IsNonTailCall)) { 9865 // Fix up the instructions in the range, since we're going to modify the 9866 // stack. 9867 9868 // Bugzilla ID: 46767 9869 // TODO: Check if fixing up twice is safe so we can outline these. 9870 assert(OF.FrameConstructionID != MachineOutlinerDefault && 9871 "Can only fix up stack references once"); 9872 fixupPostOutline(MBB); 9873 9874 IsLeafFunction = false; 9875 9876 // LR has to be a live in so that we can save it. 9877 if (!MBB.isLiveIn(AArch64::LR)) 9878 MBB.addLiveIn(AArch64::LR); 9879 9880 MachineBasicBlock::iterator It = MBB.begin(); 9881 MachineBasicBlock::iterator Et = MBB.end(); 9882 9883 if (OF.FrameConstructionID == MachineOutlinerTailCall || 9884 OF.FrameConstructionID == MachineOutlinerThunk) 9885 Et = std::prev(MBB.end()); 9886 9887 // Insert a save before the outlined region 9888 MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre)) 9889 .addReg(AArch64::SP, RegState::Define) 9890 .addReg(AArch64::LR) 9891 .addReg(AArch64::SP) 9892 .addImm(-16); 9893 It = MBB.insert(It, STRXpre); 9894 9895 if (MF.getInfo<AArch64FunctionInfo>()->needsDwarfUnwindInfo(MF)) { 9896 CFIInstBuilder CFIBuilder(MBB, It, MachineInstr::FrameSetup); 9897 9898 // Add a CFI saying the stack was moved 16 B down. 9899 CFIBuilder.buildDefCFAOffset(16); 9900 9901 // Add a CFI saying that the LR that we want to find is now 16 B higher 9902 // than before. 9903 CFIBuilder.buildOffset(AArch64::LR, -16); 9904 } 9905 9906 // Insert a restore before the terminator for the function. 9907 MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost)) 9908 .addReg(AArch64::SP, RegState::Define) 9909 .addReg(AArch64::LR, RegState::Define) 9910 .addReg(AArch64::SP) 9911 .addImm(16); 9912 Et = MBB.insert(Et, LDRXpost); 9913 } 9914 9915 bool ShouldSignReturnAddr = FI->shouldSignReturnAddress(!IsLeafFunction); 9916 9917 // If this is a tail call outlined function, then there's already a return. 9918 if (OF.FrameConstructionID == MachineOutlinerTailCall || 9919 OF.FrameConstructionID == MachineOutlinerThunk) { 9920 signOutlinedFunction(MF, MBB, this, ShouldSignReturnAddr); 9921 return; 9922 } 9923 9924 // It's not a tail call, so we have to insert the return ourselves. 9925 9926 // LR has to be a live in so that we can return to it. 9927 if (!MBB.isLiveIn(AArch64::LR)) 9928 MBB.addLiveIn(AArch64::LR); 9929 9930 MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET)) 9931 .addReg(AArch64::LR); 9932 MBB.insert(MBB.end(), ret); 9933 9934 signOutlinedFunction(MF, MBB, this, ShouldSignReturnAddr); 9935 9936 FI->setOutliningStyle("Function"); 9937 9938 // Did we have to modify the stack by saving the link register? 9939 if (OF.FrameConstructionID != MachineOutlinerDefault) 9940 return; 9941 9942 // We modified the stack. 9943 // Walk over the basic block and fix up all the stack accesses. 9944 fixupPostOutline(MBB); 9945 } 9946 9947 MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall( 9948 Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, 9949 MachineFunction &MF, outliner::Candidate &C) const { 9950 9951 // Are we tail calling? 9952 if (C.CallConstructionID == MachineOutlinerTailCall) { 9953 // If yes, then we can just branch to the label. 9954 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::TCRETURNdi)) 9955 .addGlobalAddress(M.getNamedValue(MF.getName())) 9956 .addImm(0)); 9957 return It; 9958 } 9959 9960 // Are we saving the link register? 9961 if (C.CallConstructionID == MachineOutlinerNoLRSave || 9962 C.CallConstructionID == MachineOutlinerThunk) { 9963 // No, so just insert the call. 9964 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL)) 9965 .addGlobalAddress(M.getNamedValue(MF.getName()))); 9966 return It; 9967 } 9968 9969 // We want to return the spot where we inserted the call. 9970 MachineBasicBlock::iterator CallPt; 9971 9972 // Instructions for saving and restoring LR around the call instruction we're 9973 // going to insert. 9974 MachineInstr *Save; 9975 MachineInstr *Restore; 9976 // Can we save to a register? 9977 if (C.CallConstructionID == MachineOutlinerRegSave) { 9978 // FIXME: This logic should be sunk into a target-specific interface so that 9979 // we don't have to recompute the register. 9980 Register Reg = findRegisterToSaveLRTo(C); 9981 assert(Reg && "No callee-saved register available?"); 9982 9983 // LR has to be a live in so that we can save it. 9984 if (!MBB.isLiveIn(AArch64::LR)) 9985 MBB.addLiveIn(AArch64::LR); 9986 9987 // Save and restore LR from Reg. 9988 Save = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), Reg) 9989 .addReg(AArch64::XZR) 9990 .addReg(AArch64::LR) 9991 .addImm(0); 9992 Restore = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), AArch64::LR) 9993 .addReg(AArch64::XZR) 9994 .addReg(Reg) 9995 .addImm(0); 9996 } else { 9997 // We have the default case. Save and restore from SP. 9998 Save = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre)) 9999 .addReg(AArch64::SP, RegState::Define) 10000 .addReg(AArch64::LR) 10001 .addReg(AArch64::SP) 10002 .addImm(-16); 10003 Restore = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost)) 10004 .addReg(AArch64::SP, RegState::Define) 10005 .addReg(AArch64::LR, RegState::Define) 10006 .addReg(AArch64::SP) 10007 .addImm(16); 10008 } 10009 10010 It = MBB.insert(It, Save); 10011 It++; 10012 10013 // Insert the call. 10014 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL)) 10015 .addGlobalAddress(M.getNamedValue(MF.getName()))); 10016 CallPt = It; 10017 It++; 10018 10019 It = MBB.insert(It, Restore); 10020 return CallPt; 10021 } 10022 10023 bool AArch64InstrInfo::shouldOutlineFromFunctionByDefault( 10024 MachineFunction &MF) const { 10025 return MF.getFunction().hasMinSize(); 10026 } 10027 10028 void AArch64InstrInfo::buildClearRegister(Register Reg, MachineBasicBlock &MBB, 10029 MachineBasicBlock::iterator Iter, 10030 DebugLoc &DL, 10031 bool AllowSideEffects) const { 10032 const MachineFunction &MF = *MBB.getParent(); 10033 const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>(); 10034 const AArch64RegisterInfo &TRI = *STI.getRegisterInfo(); 10035 10036 if (TRI.isGeneralPurposeRegister(MF, Reg)) { 10037 BuildMI(MBB, Iter, DL, get(AArch64::MOVZXi), Reg).addImm(0).addImm(0); 10038 } else if (STI.isSVEorStreamingSVEAvailable()) { 10039 BuildMI(MBB, Iter, DL, get(AArch64::DUP_ZI_D), Reg) 10040 .addImm(0) 10041 .addImm(0); 10042 } else if (STI.isNeonAvailable()) { 10043 BuildMI(MBB, Iter, DL, get(AArch64::MOVIv2d_ns), Reg) 10044 .addImm(0); 10045 } else { 10046 // This is a streaming-compatible function without SVE. We don't have full 10047 // Neon (just FPRs), so we can at most use the first 64-bit sub-register. 10048 // So given `movi v..` would be illegal use `fmov d..` instead. 10049 assert(STI.hasNEON() && "Expected to have NEON."); 10050 Register Reg64 = TRI.getSubReg(Reg, AArch64::dsub); 10051 BuildMI(MBB, Iter, DL, get(AArch64::FMOVD0), Reg64); 10052 } 10053 } 10054 10055 std::optional<DestSourcePair> 10056 AArch64InstrInfo::isCopyInstrImpl(const MachineInstr &MI) const { 10057 10058 // AArch64::ORRWrs and AArch64::ORRXrs with WZR/XZR reg 10059 // and zero immediate operands used as an alias for mov instruction. 10060 if (((MI.getOpcode() == AArch64::ORRWrs && 10061 MI.getOperand(1).getReg() == AArch64::WZR && 10062 MI.getOperand(3).getImm() == 0x0) || 10063 (MI.getOpcode() == AArch64::ORRWrr && 10064 MI.getOperand(1).getReg() == AArch64::WZR)) && 10065 // Check that the w->w move is not a zero-extending w->x mov. 10066 (!MI.getOperand(0).getReg().isVirtual() || 10067 MI.getOperand(0).getSubReg() == 0) && 10068 (!MI.getOperand(0).getReg().isPhysical() || 10069 MI.findRegisterDefOperandIdx(getXRegFromWReg(MI.getOperand(0).getReg()), 10070 /*TRI=*/nullptr) == -1)) 10071 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)}; 10072 10073 if (MI.getOpcode() == AArch64::ORRXrs && 10074 MI.getOperand(1).getReg() == AArch64::XZR && 10075 MI.getOperand(3).getImm() == 0x0) 10076 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)}; 10077 10078 return std::nullopt; 10079 } 10080 10081 std::optional<DestSourcePair> 10082 AArch64InstrInfo::isCopyLikeInstrImpl(const MachineInstr &MI) const { 10083 if ((MI.getOpcode() == AArch64::ORRWrs && 10084 MI.getOperand(1).getReg() == AArch64::WZR && 10085 MI.getOperand(3).getImm() == 0x0) || 10086 (MI.getOpcode() == AArch64::ORRWrr && 10087 MI.getOperand(1).getReg() == AArch64::WZR)) 10088 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)}; 10089 return std::nullopt; 10090 } 10091 10092 std::optional<RegImmPair> 10093 AArch64InstrInfo::isAddImmediate(const MachineInstr &MI, Register Reg) const { 10094 int Sign = 1; 10095 int64_t Offset = 0; 10096 10097 // TODO: Handle cases where Reg is a super- or sub-register of the 10098 // destination register. 10099 const MachineOperand &Op0 = MI.getOperand(0); 10100 if (!Op0.isReg() || Reg != Op0.getReg()) 10101 return std::nullopt; 10102 10103 switch (MI.getOpcode()) { 10104 default: 10105 return std::nullopt; 10106 case AArch64::SUBWri: 10107 case AArch64::SUBXri: 10108 case AArch64::SUBSWri: 10109 case AArch64::SUBSXri: 10110 Sign *= -1; 10111 [[fallthrough]]; 10112 case AArch64::ADDSWri: 10113 case AArch64::ADDSXri: 10114 case AArch64::ADDWri: 10115 case AArch64::ADDXri: { 10116 // TODO: Third operand can be global address (usually some string). 10117 if (!MI.getOperand(0).isReg() || !MI.getOperand(1).isReg() || 10118 !MI.getOperand(2).isImm()) 10119 return std::nullopt; 10120 int Shift = MI.getOperand(3).getImm(); 10121 assert((Shift == 0 || Shift == 12) && "Shift can be either 0 or 12"); 10122 Offset = Sign * (MI.getOperand(2).getImm() << Shift); 10123 } 10124 } 10125 return RegImmPair{MI.getOperand(1).getReg(), Offset}; 10126 } 10127 10128 /// If the given ORR instruction is a copy, and \p DescribedReg overlaps with 10129 /// the destination register then, if possible, describe the value in terms of 10130 /// the source register. 10131 static std::optional<ParamLoadedValue> 10132 describeORRLoadedValue(const MachineInstr &MI, Register DescribedReg, 10133 const TargetInstrInfo *TII, 10134 const TargetRegisterInfo *TRI) { 10135 auto DestSrc = TII->isCopyLikeInstr(MI); 10136 if (!DestSrc) 10137 return std::nullopt; 10138 10139 Register DestReg = DestSrc->Destination->getReg(); 10140 Register SrcReg = DestSrc->Source->getReg(); 10141 10142 auto Expr = DIExpression::get(MI.getMF()->getFunction().getContext(), {}); 10143 10144 // If the described register is the destination, just return the source. 10145 if (DestReg == DescribedReg) 10146 return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr); 10147 10148 // ORRWrs zero-extends to 64-bits, so we need to consider such cases. 10149 if (MI.getOpcode() == AArch64::ORRWrs && 10150 TRI->isSuperRegister(DestReg, DescribedReg)) 10151 return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr); 10152 10153 // We may need to describe the lower part of a ORRXrs move. 10154 if (MI.getOpcode() == AArch64::ORRXrs && 10155 TRI->isSubRegister(DestReg, DescribedReg)) { 10156 Register SrcSubReg = TRI->getSubReg(SrcReg, AArch64::sub_32); 10157 return ParamLoadedValue(MachineOperand::CreateReg(SrcSubReg, false), Expr); 10158 } 10159 10160 assert(!TRI->isSuperOrSubRegisterEq(DestReg, DescribedReg) && 10161 "Unhandled ORR[XW]rs copy case"); 10162 10163 return std::nullopt; 10164 } 10165 10166 bool AArch64InstrInfo::isFunctionSafeToSplit(const MachineFunction &MF) const { 10167 // Functions cannot be split to different sections on AArch64 if they have 10168 // a red zone. This is because relaxing a cross-section branch may require 10169 // incrementing the stack pointer to spill a register, which would overwrite 10170 // the red zone. 10171 if (MF.getInfo<AArch64FunctionInfo>()->hasRedZone().value_or(true)) 10172 return false; 10173 10174 return TargetInstrInfo::isFunctionSafeToSplit(MF); 10175 } 10176 10177 bool AArch64InstrInfo::isMBBSafeToSplitToCold( 10178 const MachineBasicBlock &MBB) const { 10179 // Asm Goto blocks can contain conditional branches to goto labels, which can 10180 // get moved out of range of the branch instruction. 10181 auto isAsmGoto = [](const MachineInstr &MI) { 10182 return MI.getOpcode() == AArch64::INLINEASM_BR; 10183 }; 10184 if (llvm::any_of(MBB, isAsmGoto) || MBB.isInlineAsmBrIndirectTarget()) 10185 return false; 10186 10187 // Because jump tables are label-relative instead of table-relative, they all 10188 // must be in the same section or relocation fixup handling will fail. 10189 10190 // Check if MBB is a jump table target 10191 const MachineJumpTableInfo *MJTI = MBB.getParent()->getJumpTableInfo(); 10192 auto containsMBB = [&MBB](const MachineJumpTableEntry &JTE) { 10193 return llvm::is_contained(JTE.MBBs, &MBB); 10194 }; 10195 if (MJTI != nullptr && llvm::any_of(MJTI->getJumpTables(), containsMBB)) 10196 return false; 10197 10198 // Check if MBB contains a jump table lookup 10199 for (const MachineInstr &MI : MBB) { 10200 switch (MI.getOpcode()) { 10201 case TargetOpcode::G_BRJT: 10202 case AArch64::JumpTableDest32: 10203 case AArch64::JumpTableDest16: 10204 case AArch64::JumpTableDest8: 10205 return false; 10206 default: 10207 continue; 10208 } 10209 } 10210 10211 // MBB isn't a special case, so it's safe to be split to the cold section. 10212 return true; 10213 } 10214 10215 std::optional<ParamLoadedValue> 10216 AArch64InstrInfo::describeLoadedValue(const MachineInstr &MI, 10217 Register Reg) const { 10218 const MachineFunction *MF = MI.getMF(); 10219 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); 10220 switch (MI.getOpcode()) { 10221 case AArch64::MOVZWi: 10222 case AArch64::MOVZXi: { 10223 // MOVZWi may be used for producing zero-extended 32-bit immediates in 10224 // 64-bit parameters, so we need to consider super-registers. 10225 if (!TRI->isSuperRegisterEq(MI.getOperand(0).getReg(), Reg)) 10226 return std::nullopt; 10227 10228 if (!MI.getOperand(1).isImm()) 10229 return std::nullopt; 10230 int64_t Immediate = MI.getOperand(1).getImm(); 10231 int Shift = MI.getOperand(2).getImm(); 10232 return ParamLoadedValue(MachineOperand::CreateImm(Immediate << Shift), 10233 nullptr); 10234 } 10235 case AArch64::ORRWrs: 10236 case AArch64::ORRXrs: 10237 return describeORRLoadedValue(MI, Reg, this, TRI); 10238 } 10239 10240 return TargetInstrInfo::describeLoadedValue(MI, Reg); 10241 } 10242 10243 bool AArch64InstrInfo::isExtendLikelyToBeFolded( 10244 MachineInstr &ExtMI, MachineRegisterInfo &MRI) const { 10245 assert(ExtMI.getOpcode() == TargetOpcode::G_SEXT || 10246 ExtMI.getOpcode() == TargetOpcode::G_ZEXT || 10247 ExtMI.getOpcode() == TargetOpcode::G_ANYEXT); 10248 10249 // Anyexts are nops. 10250 if (ExtMI.getOpcode() == TargetOpcode::G_ANYEXT) 10251 return true; 10252 10253 Register DefReg = ExtMI.getOperand(0).getReg(); 10254 if (!MRI.hasOneNonDBGUse(DefReg)) 10255 return false; 10256 10257 // It's likely that a sext/zext as a G_PTR_ADD offset will be folded into an 10258 // addressing mode. 10259 auto *UserMI = &*MRI.use_instr_nodbg_begin(DefReg); 10260 return UserMI->getOpcode() == TargetOpcode::G_PTR_ADD; 10261 } 10262 10263 uint64_t AArch64InstrInfo::getElementSizeForOpcode(unsigned Opc) const { 10264 return get(Opc).TSFlags & AArch64::ElementSizeMask; 10265 } 10266 10267 bool AArch64InstrInfo::isPTestLikeOpcode(unsigned Opc) const { 10268 return get(Opc).TSFlags & AArch64::InstrFlagIsPTestLike; 10269 } 10270 10271 bool AArch64InstrInfo::isWhileOpcode(unsigned Opc) const { 10272 return get(Opc).TSFlags & AArch64::InstrFlagIsWhile; 10273 } 10274 10275 unsigned int 10276 AArch64InstrInfo::getTailDuplicateSize(CodeGenOptLevel OptLevel) const { 10277 return OptLevel >= CodeGenOptLevel::Aggressive ? 6 : 2; 10278 } 10279 10280 bool AArch64InstrInfo::isLegalAddressingMode(unsigned NumBytes, int64_t Offset, 10281 unsigned Scale) const { 10282 if (Offset && Scale) 10283 return false; 10284 10285 // Check Reg + Imm 10286 if (!Scale) { 10287 // 9-bit signed offset 10288 if (isInt<9>(Offset)) 10289 return true; 10290 10291 // 12-bit unsigned offset 10292 unsigned Shift = Log2_64(NumBytes); 10293 if (NumBytes && Offset > 0 && (Offset / NumBytes) <= (1LL << 12) - 1 && 10294 // Must be a multiple of NumBytes (NumBytes is a power of 2) 10295 (Offset >> Shift) << Shift == Offset) 10296 return true; 10297 return false; 10298 } 10299 10300 // Check reg1 + SIZE_IN_BYTES * reg2 and reg1 + reg2 10301 return Scale == 1 || (Scale > 0 && Scale == NumBytes); 10302 } 10303 10304 unsigned llvm::getBLRCallOpcode(const MachineFunction &MF) { 10305 if (MF.getSubtarget<AArch64Subtarget>().hardenSlsBlr()) 10306 return AArch64::BLRNoIP; 10307 else 10308 return AArch64::BLR; 10309 } 10310 10311 MachineBasicBlock::iterator 10312 AArch64InstrInfo::probedStackAlloc(MachineBasicBlock::iterator MBBI, 10313 Register TargetReg, bool FrameSetup) const { 10314 assert(TargetReg != AArch64::SP && "New top of stack cannot already be in SP"); 10315 10316 MachineBasicBlock &MBB = *MBBI->getParent(); 10317 MachineFunction &MF = *MBB.getParent(); 10318 const AArch64InstrInfo *TII = 10319 MF.getSubtarget<AArch64Subtarget>().getInstrInfo(); 10320 int64_t ProbeSize = MF.getInfo<AArch64FunctionInfo>()->getStackProbeSize(); 10321 DebugLoc DL = MBB.findDebugLoc(MBBI); 10322 10323 MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator()); 10324 MachineBasicBlock *LoopTestMBB = 10325 MF.CreateMachineBasicBlock(MBB.getBasicBlock()); 10326 MF.insert(MBBInsertPoint, LoopTestMBB); 10327 MachineBasicBlock *LoopBodyMBB = 10328 MF.CreateMachineBasicBlock(MBB.getBasicBlock()); 10329 MF.insert(MBBInsertPoint, LoopBodyMBB); 10330 MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(MBB.getBasicBlock()); 10331 MF.insert(MBBInsertPoint, ExitMBB); 10332 MachineInstr::MIFlag Flags = 10333 FrameSetup ? MachineInstr::FrameSetup : MachineInstr::NoFlags; 10334 10335 // LoopTest: 10336 // SUB SP, SP, #ProbeSize 10337 emitFrameOffset(*LoopTestMBB, LoopTestMBB->end(), DL, AArch64::SP, 10338 AArch64::SP, StackOffset::getFixed(-ProbeSize), TII, Flags); 10339 10340 // CMP SP, TargetReg 10341 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::SUBSXrx64), 10342 AArch64::XZR) 10343 .addReg(AArch64::SP) 10344 .addReg(TargetReg) 10345 .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 0)) 10346 .setMIFlags(Flags); 10347 10348 // B.<Cond> LoopExit 10349 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::Bcc)) 10350 .addImm(AArch64CC::LE) 10351 .addMBB(ExitMBB) 10352 .setMIFlags(Flags); 10353 10354 // STR XZR, [SP] 10355 BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::STRXui)) 10356 .addReg(AArch64::XZR) 10357 .addReg(AArch64::SP) 10358 .addImm(0) 10359 .setMIFlags(Flags); 10360 10361 // B loop 10362 BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::B)) 10363 .addMBB(LoopTestMBB) 10364 .setMIFlags(Flags); 10365 10366 // LoopExit: 10367 // MOV SP, TargetReg 10368 BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(AArch64::ADDXri), AArch64::SP) 10369 .addReg(TargetReg) 10370 .addImm(0) 10371 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)) 10372 .setMIFlags(Flags); 10373 10374 // LDR XZR, [SP] 10375 BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(AArch64::LDRXui)) 10376 .addReg(AArch64::XZR, RegState::Define) 10377 .addReg(AArch64::SP) 10378 .addImm(0) 10379 .setMIFlags(Flags); 10380 10381 ExitMBB->splice(ExitMBB->end(), &MBB, std::next(MBBI), MBB.end()); 10382 ExitMBB->transferSuccessorsAndUpdatePHIs(&MBB); 10383 10384 LoopTestMBB->addSuccessor(ExitMBB); 10385 LoopTestMBB->addSuccessor(LoopBodyMBB); 10386 LoopBodyMBB->addSuccessor(LoopTestMBB); 10387 MBB.addSuccessor(LoopTestMBB); 10388 10389 // Update liveins. 10390 if (MF.getRegInfo().reservedRegsFrozen()) 10391 fullyRecomputeLiveIns({ExitMBB, LoopBodyMBB, LoopTestMBB}); 10392 10393 return ExitMBB->begin(); 10394 } 10395 10396 namespace { 10397 class AArch64PipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo { 10398 MachineFunction *MF; 10399 const TargetInstrInfo *TII; 10400 const TargetRegisterInfo *TRI; 10401 MachineRegisterInfo &MRI; 10402 10403 /// The block of the loop 10404 MachineBasicBlock *LoopBB; 10405 /// The conditional branch of the loop 10406 MachineInstr *CondBranch; 10407 /// The compare instruction for loop control 10408 MachineInstr *Comp; 10409 /// The number of the operand of the loop counter value in Comp 10410 unsigned CompCounterOprNum; 10411 /// The instruction that updates the loop counter value 10412 MachineInstr *Update; 10413 /// The number of the operand of the loop counter value in Update 10414 unsigned UpdateCounterOprNum; 10415 /// The initial value of the loop counter 10416 Register Init; 10417 /// True iff Update is a predecessor of Comp 10418 bool IsUpdatePriorComp; 10419 10420 /// The normalized condition used by createTripCountGreaterCondition() 10421 SmallVector<MachineOperand, 4> Cond; 10422 10423 public: 10424 AArch64PipelinerLoopInfo(MachineBasicBlock *LoopBB, MachineInstr *CondBranch, 10425 MachineInstr *Comp, unsigned CompCounterOprNum, 10426 MachineInstr *Update, unsigned UpdateCounterOprNum, 10427 Register Init, bool IsUpdatePriorComp, 10428 const SmallVectorImpl<MachineOperand> &Cond) 10429 : MF(Comp->getParent()->getParent()), 10430 TII(MF->getSubtarget().getInstrInfo()), 10431 TRI(MF->getSubtarget().getRegisterInfo()), MRI(MF->getRegInfo()), 10432 LoopBB(LoopBB), CondBranch(CondBranch), Comp(Comp), 10433 CompCounterOprNum(CompCounterOprNum), Update(Update), 10434 UpdateCounterOprNum(UpdateCounterOprNum), Init(Init), 10435 IsUpdatePriorComp(IsUpdatePriorComp), Cond(Cond.begin(), Cond.end()) {} 10436 10437 bool shouldIgnoreForPipelining(const MachineInstr *MI) const override { 10438 // Make the instructions for loop control be placed in stage 0. 10439 // The predecessors of Comp are considered by the caller. 10440 return MI == Comp; 10441 } 10442 10443 std::optional<bool> createTripCountGreaterCondition( 10444 int TC, MachineBasicBlock &MBB, 10445 SmallVectorImpl<MachineOperand> &CondParam) override { 10446 // A branch instruction will be inserted as "if (Cond) goto epilogue". 10447 // Cond is normalized for such use. 10448 // The predecessors of the branch are assumed to have already been inserted. 10449 CondParam = Cond; 10450 return {}; 10451 } 10452 10453 void createRemainingIterationsGreaterCondition( 10454 int TC, MachineBasicBlock &MBB, SmallVectorImpl<MachineOperand> &Cond, 10455 DenseMap<MachineInstr *, MachineInstr *> &LastStage0Insts) override; 10456 10457 void setPreheader(MachineBasicBlock *NewPreheader) override {} 10458 10459 void adjustTripCount(int TripCountAdjust) override {} 10460 10461 bool isMVEExpanderSupported() override { return true; } 10462 }; 10463 } // namespace 10464 10465 /// Clone an instruction from MI. The register of ReplaceOprNum-th operand 10466 /// is replaced by ReplaceReg. The output register is newly created. 10467 /// The other operands are unchanged from MI. 10468 static Register cloneInstr(const MachineInstr *MI, unsigned ReplaceOprNum, 10469 Register ReplaceReg, MachineBasicBlock &MBB, 10470 MachineBasicBlock::iterator InsertTo) { 10471 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 10472 const TargetInstrInfo *TII = MBB.getParent()->getSubtarget().getInstrInfo(); 10473 const TargetRegisterInfo *TRI = 10474 MBB.getParent()->getSubtarget().getRegisterInfo(); 10475 MachineInstr *NewMI = MBB.getParent()->CloneMachineInstr(MI); 10476 Register Result = 0; 10477 for (unsigned I = 0; I < NewMI->getNumOperands(); ++I) { 10478 if (I == 0 && NewMI->getOperand(0).getReg().isVirtual()) { 10479 Result = MRI.createVirtualRegister( 10480 MRI.getRegClass(NewMI->getOperand(0).getReg())); 10481 NewMI->getOperand(I).setReg(Result); 10482 } else if (I == ReplaceOprNum) { 10483 MRI.constrainRegClass( 10484 ReplaceReg, 10485 TII->getRegClass(NewMI->getDesc(), I, TRI, *MBB.getParent())); 10486 NewMI->getOperand(I).setReg(ReplaceReg); 10487 } 10488 } 10489 MBB.insert(InsertTo, NewMI); 10490 return Result; 10491 } 10492 10493 void AArch64PipelinerLoopInfo::createRemainingIterationsGreaterCondition( 10494 int TC, MachineBasicBlock &MBB, SmallVectorImpl<MachineOperand> &Cond, 10495 DenseMap<MachineInstr *, MachineInstr *> &LastStage0Insts) { 10496 // Create and accumulate conditions for next TC iterations. 10497 // Example: 10498 // SUBSXrr N, counter, implicit-def $nzcv # compare instruction for the last 10499 // # iteration of the kernel 10500 // 10501 // # insert the following instructions 10502 // cond = CSINCXr 0, 0, C, implicit $nzcv 10503 // counter = ADDXri counter, 1 # clone from this->Update 10504 // SUBSXrr n, counter, implicit-def $nzcv # clone from this->Comp 10505 // cond = CSINCXr cond, cond, C, implicit $nzcv 10506 // ... (repeat TC times) 10507 // SUBSXri cond, 0, implicit-def $nzcv 10508 10509 assert(CondBranch->getOpcode() == AArch64::Bcc); 10510 // CondCode to exit the loop 10511 AArch64CC::CondCode CC = 10512 (AArch64CC::CondCode)CondBranch->getOperand(0).getImm(); 10513 if (CondBranch->getOperand(1).getMBB() == LoopBB) 10514 CC = AArch64CC::getInvertedCondCode(CC); 10515 10516 // Accumulate conditions to exit the loop 10517 Register AccCond = AArch64::XZR; 10518 10519 // If CC holds, CurCond+1 is returned; otherwise CurCond is returned. 10520 auto AccumulateCond = [&](Register CurCond, 10521 AArch64CC::CondCode CC) -> Register { 10522 Register NewCond = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass); 10523 BuildMI(MBB, MBB.end(), Comp->getDebugLoc(), TII->get(AArch64::CSINCXr)) 10524 .addReg(NewCond, RegState::Define) 10525 .addReg(CurCond) 10526 .addReg(CurCond) 10527 .addImm(AArch64CC::getInvertedCondCode(CC)); 10528 return NewCond; 10529 }; 10530 10531 if (!LastStage0Insts.empty() && LastStage0Insts[Comp]->getParent() == &MBB) { 10532 // Update and Comp for I==0 are already exists in MBB 10533 // (MBB is an unrolled kernel) 10534 Register Counter; 10535 for (int I = 0; I <= TC; ++I) { 10536 Register NextCounter; 10537 if (I != 0) 10538 NextCounter = 10539 cloneInstr(Comp, CompCounterOprNum, Counter, MBB, MBB.end()); 10540 10541 AccCond = AccumulateCond(AccCond, CC); 10542 10543 if (I != TC) { 10544 if (I == 0) { 10545 if (Update != Comp && IsUpdatePriorComp) { 10546 Counter = 10547 LastStage0Insts[Comp]->getOperand(CompCounterOprNum).getReg(); 10548 NextCounter = cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, 10549 MBB.end()); 10550 } else { 10551 // can use already calculated value 10552 NextCounter = LastStage0Insts[Update]->getOperand(0).getReg(); 10553 } 10554 } else if (Update != Comp) { 10555 NextCounter = 10556 cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end()); 10557 } 10558 } 10559 Counter = NextCounter; 10560 } 10561 } else { 10562 Register Counter; 10563 if (LastStage0Insts.empty()) { 10564 // use initial counter value (testing if the trip count is sufficient to 10565 // be executed by pipelined code) 10566 Counter = Init; 10567 if (IsUpdatePriorComp) 10568 Counter = 10569 cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end()); 10570 } else { 10571 // MBB is an epilogue block. LastStage0Insts[Comp] is in the kernel block. 10572 Counter = LastStage0Insts[Comp]->getOperand(CompCounterOprNum).getReg(); 10573 } 10574 10575 for (int I = 0; I <= TC; ++I) { 10576 Register NextCounter; 10577 NextCounter = 10578 cloneInstr(Comp, CompCounterOprNum, Counter, MBB, MBB.end()); 10579 AccCond = AccumulateCond(AccCond, CC); 10580 if (I != TC && Update != Comp) 10581 NextCounter = 10582 cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end()); 10583 Counter = NextCounter; 10584 } 10585 } 10586 10587 // If AccCond == 0, the remainder is greater than TC. 10588 BuildMI(MBB, MBB.end(), Comp->getDebugLoc(), TII->get(AArch64::SUBSXri)) 10589 .addReg(AArch64::XZR, RegState::Define | RegState::Dead) 10590 .addReg(AccCond) 10591 .addImm(0) 10592 .addImm(0); 10593 Cond.clear(); 10594 Cond.push_back(MachineOperand::CreateImm(AArch64CC::EQ)); 10595 } 10596 10597 static void extractPhiReg(const MachineInstr &Phi, const MachineBasicBlock *MBB, 10598 Register &RegMBB, Register &RegOther) { 10599 assert(Phi.getNumOperands() == 5); 10600 if (Phi.getOperand(2).getMBB() == MBB) { 10601 RegMBB = Phi.getOperand(1).getReg(); 10602 RegOther = Phi.getOperand(3).getReg(); 10603 } else { 10604 assert(Phi.getOperand(4).getMBB() == MBB); 10605 RegMBB = Phi.getOperand(3).getReg(); 10606 RegOther = Phi.getOperand(1).getReg(); 10607 } 10608 } 10609 10610 static bool isDefinedOutside(Register Reg, const MachineBasicBlock *BB) { 10611 if (!Reg.isVirtual()) 10612 return false; 10613 const MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 10614 return MRI.getVRegDef(Reg)->getParent() != BB; 10615 } 10616 10617 /// If Reg is an induction variable, return true and set some parameters 10618 static bool getIndVarInfo(Register Reg, const MachineBasicBlock *LoopBB, 10619 MachineInstr *&UpdateInst, 10620 unsigned &UpdateCounterOprNum, Register &InitReg, 10621 bool &IsUpdatePriorComp) { 10622 // Example: 10623 // 10624 // Preheader: 10625 // InitReg = ... 10626 // LoopBB: 10627 // Reg0 = PHI (InitReg, Preheader), (Reg1, LoopBB) 10628 // Reg = COPY Reg0 ; COPY is ignored. 10629 // Reg1 = ADD Reg, #1; UpdateInst. Incremented by a loop invariant value. 10630 // ; Reg is the value calculated in the previous 10631 // ; iteration, so IsUpdatePriorComp == false. 10632 10633 if (LoopBB->pred_size() != 2) 10634 return false; 10635 if (!Reg.isVirtual()) 10636 return false; 10637 const MachineRegisterInfo &MRI = LoopBB->getParent()->getRegInfo(); 10638 UpdateInst = nullptr; 10639 UpdateCounterOprNum = 0; 10640 InitReg = 0; 10641 IsUpdatePriorComp = true; 10642 Register CurReg = Reg; 10643 while (true) { 10644 MachineInstr *Def = MRI.getVRegDef(CurReg); 10645 if (Def->getParent() != LoopBB) 10646 return false; 10647 if (Def->isCopy()) { 10648 // Ignore copy instructions unless they contain subregisters 10649 if (Def->getOperand(0).getSubReg() || Def->getOperand(1).getSubReg()) 10650 return false; 10651 CurReg = Def->getOperand(1).getReg(); 10652 } else if (Def->isPHI()) { 10653 if (InitReg != 0) 10654 return false; 10655 if (!UpdateInst) 10656 IsUpdatePriorComp = false; 10657 extractPhiReg(*Def, LoopBB, CurReg, InitReg); 10658 } else { 10659 if (UpdateInst) 10660 return false; 10661 switch (Def->getOpcode()) { 10662 case AArch64::ADDSXri: 10663 case AArch64::ADDSWri: 10664 case AArch64::SUBSXri: 10665 case AArch64::SUBSWri: 10666 case AArch64::ADDXri: 10667 case AArch64::ADDWri: 10668 case AArch64::SUBXri: 10669 case AArch64::SUBWri: 10670 UpdateInst = Def; 10671 UpdateCounterOprNum = 1; 10672 break; 10673 case AArch64::ADDSXrr: 10674 case AArch64::ADDSWrr: 10675 case AArch64::SUBSXrr: 10676 case AArch64::SUBSWrr: 10677 case AArch64::ADDXrr: 10678 case AArch64::ADDWrr: 10679 case AArch64::SUBXrr: 10680 case AArch64::SUBWrr: 10681 UpdateInst = Def; 10682 if (isDefinedOutside(Def->getOperand(2).getReg(), LoopBB)) 10683 UpdateCounterOprNum = 1; 10684 else if (isDefinedOutside(Def->getOperand(1).getReg(), LoopBB)) 10685 UpdateCounterOprNum = 2; 10686 else 10687 return false; 10688 break; 10689 default: 10690 return false; 10691 } 10692 CurReg = Def->getOperand(UpdateCounterOprNum).getReg(); 10693 } 10694 10695 if (!CurReg.isVirtual()) 10696 return false; 10697 if (Reg == CurReg) 10698 break; 10699 } 10700 10701 if (!UpdateInst) 10702 return false; 10703 10704 return true; 10705 } 10706 10707 std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo> 10708 AArch64InstrInfo::analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const { 10709 // Accept loops that meet the following conditions 10710 // * The conditional branch is BCC 10711 // * The compare instruction is ADDS/SUBS/WHILEXX 10712 // * One operand of the compare is an induction variable and the other is a 10713 // loop invariant value 10714 // * The induction variable is incremented/decremented by a single instruction 10715 // * Does not contain CALL or instructions which have unmodeled side effects 10716 10717 for (MachineInstr &MI : *LoopBB) 10718 if (MI.isCall() || MI.hasUnmodeledSideEffects()) 10719 // This instruction may use NZCV, which interferes with the instruction to 10720 // be inserted for loop control. 10721 return nullptr; 10722 10723 MachineBasicBlock *TBB = nullptr, *FBB = nullptr; 10724 SmallVector<MachineOperand, 4> Cond; 10725 if (analyzeBranch(*LoopBB, TBB, FBB, Cond)) 10726 return nullptr; 10727 10728 // Infinite loops are not supported 10729 if (TBB == LoopBB && FBB == LoopBB) 10730 return nullptr; 10731 10732 // Must be conditional branch 10733 if (TBB != LoopBB && FBB == nullptr) 10734 return nullptr; 10735 10736 assert((TBB == LoopBB || FBB == LoopBB) && 10737 "The Loop must be a single-basic-block loop"); 10738 10739 MachineInstr *CondBranch = &*LoopBB->getFirstTerminator(); 10740 const TargetRegisterInfo &TRI = getRegisterInfo(); 10741 10742 if (CondBranch->getOpcode() != AArch64::Bcc) 10743 return nullptr; 10744 10745 // Normalization for createTripCountGreaterCondition() 10746 if (TBB == LoopBB) 10747 reverseBranchCondition(Cond); 10748 10749 MachineInstr *Comp = nullptr; 10750 unsigned CompCounterOprNum = 0; 10751 for (MachineInstr &MI : reverse(*LoopBB)) { 10752 if (MI.modifiesRegister(AArch64::NZCV, &TRI)) { 10753 // Guarantee that the compare is SUBS/ADDS/WHILEXX and that one of the 10754 // operands is a loop invariant value 10755 10756 switch (MI.getOpcode()) { 10757 case AArch64::SUBSXri: 10758 case AArch64::SUBSWri: 10759 case AArch64::ADDSXri: 10760 case AArch64::ADDSWri: 10761 Comp = &MI; 10762 CompCounterOprNum = 1; 10763 break; 10764 case AArch64::ADDSWrr: 10765 case AArch64::ADDSXrr: 10766 case AArch64::SUBSWrr: 10767 case AArch64::SUBSXrr: 10768 Comp = &MI; 10769 break; 10770 default: 10771 if (isWhileOpcode(MI.getOpcode())) { 10772 Comp = &MI; 10773 break; 10774 } 10775 return nullptr; 10776 } 10777 10778 if (CompCounterOprNum == 0) { 10779 if (isDefinedOutside(Comp->getOperand(1).getReg(), LoopBB)) 10780 CompCounterOprNum = 2; 10781 else if (isDefinedOutside(Comp->getOperand(2).getReg(), LoopBB)) 10782 CompCounterOprNum = 1; 10783 else 10784 return nullptr; 10785 } 10786 break; 10787 } 10788 } 10789 if (!Comp) 10790 return nullptr; 10791 10792 MachineInstr *Update = nullptr; 10793 Register Init; 10794 bool IsUpdatePriorComp; 10795 unsigned UpdateCounterOprNum; 10796 if (!getIndVarInfo(Comp->getOperand(CompCounterOprNum).getReg(), LoopBB, 10797 Update, UpdateCounterOprNum, Init, IsUpdatePriorComp)) 10798 return nullptr; 10799 10800 return std::make_unique<AArch64PipelinerLoopInfo>( 10801 LoopBB, CondBranch, Comp, CompCounterOprNum, Update, UpdateCounterOprNum, 10802 Init, IsUpdatePriorComp, Cond); 10803 } 10804 10805 /// verifyInstruction - Perform target specific instruction verification. 10806 bool AArch64InstrInfo::verifyInstruction(const MachineInstr &MI, 10807 StringRef &ErrInfo) const { 10808 10809 // Verify that immediate offsets on load/store instructions are within range. 10810 // Stack objects with an FI operand are excluded as they can be fixed up 10811 // during PEI. 10812 TypeSize Scale(0U, false), Width(0U, false); 10813 int64_t MinOffset, MaxOffset; 10814 if (getMemOpInfo(MI.getOpcode(), Scale, Width, MinOffset, MaxOffset)) { 10815 unsigned ImmIdx = getLoadStoreImmIdx(MI.getOpcode()); 10816 if (MI.getOperand(ImmIdx).isImm() && !MI.getOperand(ImmIdx - 1).isFI()) { 10817 int64_t Imm = MI.getOperand(ImmIdx).getImm(); 10818 if (Imm < MinOffset || Imm > MaxOffset) { 10819 ErrInfo = "Unexpected immediate on load/store instruction"; 10820 return false; 10821 } 10822 } 10823 } 10824 return true; 10825 } 10826 10827 #define GET_INSTRINFO_HELPERS 10828 #define GET_INSTRMAP_INFO 10829 #include "AArch64GenInstrInfo.inc" 10830