1 //===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the AArch64 implementation of the TargetInstrInfo class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AArch64InstrInfo.h" 14 #include "AArch64MachineFunctionInfo.h" 15 #include "AArch64Subtarget.h" 16 #include "MCTargetDesc/AArch64AddressingModes.h" 17 #include "Utils/AArch64BaseInfo.h" 18 #include "llvm/ADT/ArrayRef.h" 19 #include "llvm/ADT/STLExtras.h" 20 #include "llvm/ADT/SmallVector.h" 21 #include "llvm/CodeGen/MachineBasicBlock.h" 22 #include "llvm/CodeGen/MachineFrameInfo.h" 23 #include "llvm/CodeGen/MachineFunction.h" 24 #include "llvm/CodeGen/MachineInstr.h" 25 #include "llvm/CodeGen/MachineInstrBuilder.h" 26 #include "llvm/CodeGen/MachineMemOperand.h" 27 #include "llvm/CodeGen/MachineModuleInfo.h" 28 #include "llvm/CodeGen/MachineOperand.h" 29 #include "llvm/CodeGen/MachineRegisterInfo.h" 30 #include "llvm/CodeGen/StackMaps.h" 31 #include "llvm/CodeGen/TargetRegisterInfo.h" 32 #include "llvm/CodeGen/TargetSubtargetInfo.h" 33 #include "llvm/IR/DebugInfoMetadata.h" 34 #include "llvm/IR/DebugLoc.h" 35 #include "llvm/IR/GlobalValue.h" 36 #include "llvm/MC/MCAsmInfo.h" 37 #include "llvm/MC/MCInst.h" 38 #include "llvm/MC/MCInstBuilder.h" 39 #include "llvm/MC/MCInstrDesc.h" 40 #include "llvm/Support/Casting.h" 41 #include "llvm/Support/CodeGen.h" 42 #include "llvm/Support/CommandLine.h" 43 #include "llvm/Support/Compiler.h" 44 #include "llvm/Support/ErrorHandling.h" 45 #include "llvm/Support/MathExtras.h" 46 #include "llvm/Target/TargetMachine.h" 47 #include "llvm/Target/TargetOptions.h" 48 #include <cassert> 49 #include <cstdint> 50 #include <iterator> 51 #include <utility> 52 53 using namespace llvm; 54 55 #define GET_INSTRINFO_CTOR_DTOR 56 #include "AArch64GenInstrInfo.inc" 57 58 static cl::opt<unsigned> TBZDisplacementBits( 59 "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14), 60 cl::desc("Restrict range of TB[N]Z instructions (DEBUG)")); 61 62 static cl::opt<unsigned> CBZDisplacementBits( 63 "aarch64-cbz-offset-bits", cl::Hidden, cl::init(19), 64 cl::desc("Restrict range of CB[N]Z instructions (DEBUG)")); 65 66 static cl::opt<unsigned> 67 BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19), 68 cl::desc("Restrict range of Bcc instructions (DEBUG)")); 69 70 AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI) 71 : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP, 72 AArch64::CATCHRET), 73 RI(STI.getTargetTriple()), Subtarget(STI) {} 74 75 /// GetInstSize - Return the number of bytes of code the specified 76 /// instruction may be. This returns the maximum number of bytes. 77 unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { 78 const MachineBasicBlock &MBB = *MI.getParent(); 79 const MachineFunction *MF = MBB.getParent(); 80 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo(); 81 82 { 83 auto Op = MI.getOpcode(); 84 if (Op == AArch64::INLINEASM || Op == AArch64::INLINEASM_BR) 85 return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI); 86 } 87 88 // Meta-instructions emit no code. 89 if (MI.isMetaInstruction()) 90 return 0; 91 92 // FIXME: We currently only handle pseudoinstructions that don't get expanded 93 // before the assembly printer. 94 unsigned NumBytes = 0; 95 const MCInstrDesc &Desc = MI.getDesc(); 96 switch (Desc.getOpcode()) { 97 default: 98 // Anything not explicitly designated otherwise is a normal 4-byte insn. 99 NumBytes = 4; 100 break; 101 case TargetOpcode::STACKMAP: 102 // The upper bound for a stackmap intrinsic is the full length of its shadow 103 NumBytes = StackMapOpers(&MI).getNumPatchBytes(); 104 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!"); 105 break; 106 case TargetOpcode::PATCHPOINT: 107 // The size of the patchpoint intrinsic is the number of bytes requested 108 NumBytes = PatchPointOpers(&MI).getNumPatchBytes(); 109 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!"); 110 break; 111 case TargetOpcode::STATEPOINT: 112 NumBytes = StatepointOpers(&MI).getNumPatchBytes(); 113 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!"); 114 // No patch bytes means a normal call inst is emitted 115 if (NumBytes == 0) 116 NumBytes = 4; 117 break; 118 case AArch64::TLSDESC_CALLSEQ: 119 // This gets lowered to an instruction sequence which takes 16 bytes 120 NumBytes = 16; 121 break; 122 case AArch64::SpeculationBarrierISBDSBEndBB: 123 // This gets lowered to 2 4-byte instructions. 124 NumBytes = 8; 125 break; 126 case AArch64::SpeculationBarrierSBEndBB: 127 // This gets lowered to 1 4-byte instructions. 128 NumBytes = 4; 129 break; 130 case AArch64::JumpTableDest32: 131 case AArch64::JumpTableDest16: 132 case AArch64::JumpTableDest8: 133 NumBytes = 12; 134 break; 135 case AArch64::SPACE: 136 NumBytes = MI.getOperand(1).getImm(); 137 break; 138 case AArch64::StoreSwiftAsyncContext: 139 NumBytes = 20; 140 break; 141 case TargetOpcode::BUNDLE: 142 NumBytes = getInstBundleLength(MI); 143 break; 144 } 145 146 return NumBytes; 147 } 148 149 unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr &MI) const { 150 unsigned Size = 0; 151 MachineBasicBlock::const_instr_iterator I = MI.getIterator(); 152 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end(); 153 while (++I != E && I->isInsideBundle()) { 154 assert(!I->isBundle() && "No nested bundle!"); 155 Size += getInstSizeInBytes(*I); 156 } 157 return Size; 158 } 159 160 static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target, 161 SmallVectorImpl<MachineOperand> &Cond) { 162 // Block ends with fall-through condbranch. 163 switch (LastInst->getOpcode()) { 164 default: 165 llvm_unreachable("Unknown branch instruction?"); 166 case AArch64::Bcc: 167 Target = LastInst->getOperand(1).getMBB(); 168 Cond.push_back(LastInst->getOperand(0)); 169 break; 170 case AArch64::CBZW: 171 case AArch64::CBZX: 172 case AArch64::CBNZW: 173 case AArch64::CBNZX: 174 Target = LastInst->getOperand(1).getMBB(); 175 Cond.push_back(MachineOperand::CreateImm(-1)); 176 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode())); 177 Cond.push_back(LastInst->getOperand(0)); 178 break; 179 case AArch64::TBZW: 180 case AArch64::TBZX: 181 case AArch64::TBNZW: 182 case AArch64::TBNZX: 183 Target = LastInst->getOperand(2).getMBB(); 184 Cond.push_back(MachineOperand::CreateImm(-1)); 185 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode())); 186 Cond.push_back(LastInst->getOperand(0)); 187 Cond.push_back(LastInst->getOperand(1)); 188 } 189 } 190 191 static unsigned getBranchDisplacementBits(unsigned Opc) { 192 switch (Opc) { 193 default: 194 llvm_unreachable("unexpected opcode!"); 195 case AArch64::B: 196 return 64; 197 case AArch64::TBNZW: 198 case AArch64::TBZW: 199 case AArch64::TBNZX: 200 case AArch64::TBZX: 201 return TBZDisplacementBits; 202 case AArch64::CBNZW: 203 case AArch64::CBZW: 204 case AArch64::CBNZX: 205 case AArch64::CBZX: 206 return CBZDisplacementBits; 207 case AArch64::Bcc: 208 return BCCDisplacementBits; 209 } 210 } 211 212 bool AArch64InstrInfo::isBranchOffsetInRange(unsigned BranchOp, 213 int64_t BrOffset) const { 214 unsigned Bits = getBranchDisplacementBits(BranchOp); 215 assert(Bits >= 3 && "max branch displacement must be enough to jump" 216 "over conditional branch expansion"); 217 return isIntN(Bits, BrOffset / 4); 218 } 219 220 MachineBasicBlock * 221 AArch64InstrInfo::getBranchDestBlock(const MachineInstr &MI) const { 222 switch (MI.getOpcode()) { 223 default: 224 llvm_unreachable("unexpected opcode!"); 225 case AArch64::B: 226 return MI.getOperand(0).getMBB(); 227 case AArch64::TBZW: 228 case AArch64::TBNZW: 229 case AArch64::TBZX: 230 case AArch64::TBNZX: 231 return MI.getOperand(2).getMBB(); 232 case AArch64::CBZW: 233 case AArch64::CBNZW: 234 case AArch64::CBZX: 235 case AArch64::CBNZX: 236 case AArch64::Bcc: 237 return MI.getOperand(1).getMBB(); 238 } 239 } 240 241 // Branch analysis. 242 bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB, 243 MachineBasicBlock *&TBB, 244 MachineBasicBlock *&FBB, 245 SmallVectorImpl<MachineOperand> &Cond, 246 bool AllowModify) const { 247 // If the block has no terminators, it just falls into the block after it. 248 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); 249 if (I == MBB.end()) 250 return false; 251 252 // Skip over SpeculationBarrierEndBB terminators 253 if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB || 254 I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) { 255 --I; 256 } 257 258 if (!isUnpredicatedTerminator(*I)) 259 return false; 260 261 // Get the last instruction in the block. 262 MachineInstr *LastInst = &*I; 263 264 // If there is only one terminator instruction, process it. 265 unsigned LastOpc = LastInst->getOpcode(); 266 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) { 267 if (isUncondBranchOpcode(LastOpc)) { 268 TBB = LastInst->getOperand(0).getMBB(); 269 return false; 270 } 271 if (isCondBranchOpcode(LastOpc)) { 272 // Block ends with fall-through condbranch. 273 parseCondBranch(LastInst, TBB, Cond); 274 return false; 275 } 276 return true; // Can't handle indirect branch. 277 } 278 279 // Get the instruction before it if it is a terminator. 280 MachineInstr *SecondLastInst = &*I; 281 unsigned SecondLastOpc = SecondLastInst->getOpcode(); 282 283 // If AllowModify is true and the block ends with two or more unconditional 284 // branches, delete all but the first unconditional branch. 285 if (AllowModify && isUncondBranchOpcode(LastOpc)) { 286 while (isUncondBranchOpcode(SecondLastOpc)) { 287 LastInst->eraseFromParent(); 288 LastInst = SecondLastInst; 289 LastOpc = LastInst->getOpcode(); 290 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) { 291 // Return now the only terminator is an unconditional branch. 292 TBB = LastInst->getOperand(0).getMBB(); 293 return false; 294 } else { 295 SecondLastInst = &*I; 296 SecondLastOpc = SecondLastInst->getOpcode(); 297 } 298 } 299 } 300 301 // If we're allowed to modify and the block ends in a unconditional branch 302 // which could simply fallthrough, remove the branch. (Note: This case only 303 // matters when we can't understand the whole sequence, otherwise it's also 304 // handled by BranchFolding.cpp.) 305 if (AllowModify && isUncondBranchOpcode(LastOpc) && 306 MBB.isLayoutSuccessor(getBranchDestBlock(*LastInst))) { 307 LastInst->eraseFromParent(); 308 LastInst = SecondLastInst; 309 LastOpc = LastInst->getOpcode(); 310 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) { 311 assert(!isUncondBranchOpcode(LastOpc) && 312 "unreachable unconditional branches removed above"); 313 314 if (isCondBranchOpcode(LastOpc)) { 315 // Block ends with fall-through condbranch. 316 parseCondBranch(LastInst, TBB, Cond); 317 return false; 318 } 319 return true; // Can't handle indirect branch. 320 } else { 321 SecondLastInst = &*I; 322 SecondLastOpc = SecondLastInst->getOpcode(); 323 } 324 } 325 326 // If there are three terminators, we don't know what sort of block this is. 327 if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I)) 328 return true; 329 330 // If the block ends with a B and a Bcc, handle it. 331 if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { 332 parseCondBranch(SecondLastInst, TBB, Cond); 333 FBB = LastInst->getOperand(0).getMBB(); 334 return false; 335 } 336 337 // If the block ends with two unconditional branches, handle it. The second 338 // one is not executed, so remove it. 339 if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { 340 TBB = SecondLastInst->getOperand(0).getMBB(); 341 I = LastInst; 342 if (AllowModify) 343 I->eraseFromParent(); 344 return false; 345 } 346 347 // ...likewise if it ends with an indirect branch followed by an unconditional 348 // branch. 349 if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { 350 I = LastInst; 351 if (AllowModify) 352 I->eraseFromParent(); 353 return true; 354 } 355 356 // Otherwise, can't handle this. 357 return true; 358 } 359 360 bool AArch64InstrInfo::analyzeBranchPredicate(MachineBasicBlock &MBB, 361 MachineBranchPredicate &MBP, 362 bool AllowModify) const { 363 // For the moment, handle only a block which ends with a cb(n)zx followed by 364 // a fallthrough. Why this? Because it is a common form. 365 // TODO: Should we handle b.cc? 366 367 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); 368 if (I == MBB.end()) 369 return true; 370 371 // Skip over SpeculationBarrierEndBB terminators 372 if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB || 373 I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) { 374 --I; 375 } 376 377 if (!isUnpredicatedTerminator(*I)) 378 return true; 379 380 // Get the last instruction in the block. 381 MachineInstr *LastInst = &*I; 382 unsigned LastOpc = LastInst->getOpcode(); 383 if (!isCondBranchOpcode(LastOpc)) 384 return true; 385 386 switch (LastOpc) { 387 default: 388 return true; 389 case AArch64::CBZW: 390 case AArch64::CBZX: 391 case AArch64::CBNZW: 392 case AArch64::CBNZX: 393 break; 394 }; 395 396 MBP.TrueDest = LastInst->getOperand(1).getMBB(); 397 assert(MBP.TrueDest && "expected!"); 398 MBP.FalseDest = MBB.getNextNode(); 399 400 MBP.ConditionDef = nullptr; 401 MBP.SingleUseCondition = false; 402 403 MBP.LHS = LastInst->getOperand(0); 404 MBP.RHS = MachineOperand::CreateImm(0); 405 MBP.Predicate = LastOpc == AArch64::CBNZX ? MachineBranchPredicate::PRED_NE 406 : MachineBranchPredicate::PRED_EQ; 407 return false; 408 } 409 410 bool AArch64InstrInfo::reverseBranchCondition( 411 SmallVectorImpl<MachineOperand> &Cond) const { 412 if (Cond[0].getImm() != -1) { 413 // Regular Bcc 414 AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm(); 415 Cond[0].setImm(AArch64CC::getInvertedCondCode(CC)); 416 } else { 417 // Folded compare-and-branch 418 switch (Cond[1].getImm()) { 419 default: 420 llvm_unreachable("Unknown conditional branch!"); 421 case AArch64::CBZW: 422 Cond[1].setImm(AArch64::CBNZW); 423 break; 424 case AArch64::CBNZW: 425 Cond[1].setImm(AArch64::CBZW); 426 break; 427 case AArch64::CBZX: 428 Cond[1].setImm(AArch64::CBNZX); 429 break; 430 case AArch64::CBNZX: 431 Cond[1].setImm(AArch64::CBZX); 432 break; 433 case AArch64::TBZW: 434 Cond[1].setImm(AArch64::TBNZW); 435 break; 436 case AArch64::TBNZW: 437 Cond[1].setImm(AArch64::TBZW); 438 break; 439 case AArch64::TBZX: 440 Cond[1].setImm(AArch64::TBNZX); 441 break; 442 case AArch64::TBNZX: 443 Cond[1].setImm(AArch64::TBZX); 444 break; 445 } 446 } 447 448 return false; 449 } 450 451 unsigned AArch64InstrInfo::removeBranch(MachineBasicBlock &MBB, 452 int *BytesRemoved) const { 453 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); 454 if (I == MBB.end()) 455 return 0; 456 457 if (!isUncondBranchOpcode(I->getOpcode()) && 458 !isCondBranchOpcode(I->getOpcode())) 459 return 0; 460 461 // Remove the branch. 462 I->eraseFromParent(); 463 464 I = MBB.end(); 465 466 if (I == MBB.begin()) { 467 if (BytesRemoved) 468 *BytesRemoved = 4; 469 return 1; 470 } 471 --I; 472 if (!isCondBranchOpcode(I->getOpcode())) { 473 if (BytesRemoved) 474 *BytesRemoved = 4; 475 return 1; 476 } 477 478 // Remove the branch. 479 I->eraseFromParent(); 480 if (BytesRemoved) 481 *BytesRemoved = 8; 482 483 return 2; 484 } 485 486 void AArch64InstrInfo::instantiateCondBranch( 487 MachineBasicBlock &MBB, const DebugLoc &DL, MachineBasicBlock *TBB, 488 ArrayRef<MachineOperand> Cond) const { 489 if (Cond[0].getImm() != -1) { 490 // Regular Bcc 491 BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB); 492 } else { 493 // Folded compare-and-branch 494 // Note that we use addOperand instead of addReg to keep the flags. 495 const MachineInstrBuilder MIB = 496 BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]); 497 if (Cond.size() > 3) 498 MIB.addImm(Cond[3].getImm()); 499 MIB.addMBB(TBB); 500 } 501 } 502 503 unsigned AArch64InstrInfo::insertBranch( 504 MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, 505 ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const { 506 // Shouldn't be a fall through. 507 assert(TBB && "insertBranch must not be told to insert a fallthrough"); 508 509 if (!FBB) { 510 if (Cond.empty()) // Unconditional branch? 511 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB); 512 else 513 instantiateCondBranch(MBB, DL, TBB, Cond); 514 515 if (BytesAdded) 516 *BytesAdded = 4; 517 518 return 1; 519 } 520 521 // Two-way conditional branch. 522 instantiateCondBranch(MBB, DL, TBB, Cond); 523 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB); 524 525 if (BytesAdded) 526 *BytesAdded = 8; 527 528 return 2; 529 } 530 531 // Find the original register that VReg is copied from. 532 static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) { 533 while (Register::isVirtualRegister(VReg)) { 534 const MachineInstr *DefMI = MRI.getVRegDef(VReg); 535 if (!DefMI->isFullCopy()) 536 return VReg; 537 VReg = DefMI->getOperand(1).getReg(); 538 } 539 return VReg; 540 } 541 542 // Determine if VReg is defined by an instruction that can be folded into a 543 // csel instruction. If so, return the folded opcode, and the replacement 544 // register. 545 static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg, 546 unsigned *NewVReg = nullptr) { 547 VReg = removeCopies(MRI, VReg); 548 if (!Register::isVirtualRegister(VReg)) 549 return 0; 550 551 bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg)); 552 const MachineInstr *DefMI = MRI.getVRegDef(VReg); 553 unsigned Opc = 0; 554 unsigned SrcOpNum = 0; 555 switch (DefMI->getOpcode()) { 556 case AArch64::ADDSXri: 557 case AArch64::ADDSWri: 558 // if NZCV is used, do not fold. 559 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1) 560 return 0; 561 // fall-through to ADDXri and ADDWri. 562 LLVM_FALLTHROUGH; 563 case AArch64::ADDXri: 564 case AArch64::ADDWri: 565 // add x, 1 -> csinc. 566 if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 || 567 DefMI->getOperand(3).getImm() != 0) 568 return 0; 569 SrcOpNum = 1; 570 Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr; 571 break; 572 573 case AArch64::ORNXrr: 574 case AArch64::ORNWrr: { 575 // not x -> csinv, represented as orn dst, xzr, src. 576 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg()); 577 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR) 578 return 0; 579 SrcOpNum = 2; 580 Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr; 581 break; 582 } 583 584 case AArch64::SUBSXrr: 585 case AArch64::SUBSWrr: 586 // if NZCV is used, do not fold. 587 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1) 588 return 0; 589 // fall-through to SUBXrr and SUBWrr. 590 LLVM_FALLTHROUGH; 591 case AArch64::SUBXrr: 592 case AArch64::SUBWrr: { 593 // neg x -> csneg, represented as sub dst, xzr, src. 594 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg()); 595 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR) 596 return 0; 597 SrcOpNum = 2; 598 Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr; 599 break; 600 } 601 default: 602 return 0; 603 } 604 assert(Opc && SrcOpNum && "Missing parameters"); 605 606 if (NewVReg) 607 *NewVReg = DefMI->getOperand(SrcOpNum).getReg(); 608 return Opc; 609 } 610 611 bool AArch64InstrInfo::canInsertSelect(const MachineBasicBlock &MBB, 612 ArrayRef<MachineOperand> Cond, 613 Register DstReg, Register TrueReg, 614 Register FalseReg, int &CondCycles, 615 int &TrueCycles, 616 int &FalseCycles) const { 617 // Check register classes. 618 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 619 const TargetRegisterClass *RC = 620 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg)); 621 if (!RC) 622 return false; 623 624 // Also need to check the dest regclass, in case we're trying to optimize 625 // something like: 626 // %1(gpr) = PHI %2(fpr), bb1, %(fpr), bb2 627 if (!RI.getCommonSubClass(RC, MRI.getRegClass(DstReg))) 628 return false; 629 630 // Expanding cbz/tbz requires an extra cycle of latency on the condition. 631 unsigned ExtraCondLat = Cond.size() != 1; 632 633 // GPRs are handled by csel. 634 // FIXME: Fold in x+1, -x, and ~x when applicable. 635 if (AArch64::GPR64allRegClass.hasSubClassEq(RC) || 636 AArch64::GPR32allRegClass.hasSubClassEq(RC)) { 637 // Single-cycle csel, csinc, csinv, and csneg. 638 CondCycles = 1 + ExtraCondLat; 639 TrueCycles = FalseCycles = 1; 640 if (canFoldIntoCSel(MRI, TrueReg)) 641 TrueCycles = 0; 642 else if (canFoldIntoCSel(MRI, FalseReg)) 643 FalseCycles = 0; 644 return true; 645 } 646 647 // Scalar floating point is handled by fcsel. 648 // FIXME: Form fabs, fmin, and fmax when applicable. 649 if (AArch64::FPR64RegClass.hasSubClassEq(RC) || 650 AArch64::FPR32RegClass.hasSubClassEq(RC)) { 651 CondCycles = 5 + ExtraCondLat; 652 TrueCycles = FalseCycles = 2; 653 return true; 654 } 655 656 // Can't do vectors. 657 return false; 658 } 659 660 void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB, 661 MachineBasicBlock::iterator I, 662 const DebugLoc &DL, Register DstReg, 663 ArrayRef<MachineOperand> Cond, 664 Register TrueReg, Register FalseReg) const { 665 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 666 667 // Parse the condition code, see parseCondBranch() above. 668 AArch64CC::CondCode CC; 669 switch (Cond.size()) { 670 default: 671 llvm_unreachable("Unknown condition opcode in Cond"); 672 case 1: // b.cc 673 CC = AArch64CC::CondCode(Cond[0].getImm()); 674 break; 675 case 3: { // cbz/cbnz 676 // We must insert a compare against 0. 677 bool Is64Bit; 678 switch (Cond[1].getImm()) { 679 default: 680 llvm_unreachable("Unknown branch opcode in Cond"); 681 case AArch64::CBZW: 682 Is64Bit = false; 683 CC = AArch64CC::EQ; 684 break; 685 case AArch64::CBZX: 686 Is64Bit = true; 687 CC = AArch64CC::EQ; 688 break; 689 case AArch64::CBNZW: 690 Is64Bit = false; 691 CC = AArch64CC::NE; 692 break; 693 case AArch64::CBNZX: 694 Is64Bit = true; 695 CC = AArch64CC::NE; 696 break; 697 } 698 Register SrcReg = Cond[2].getReg(); 699 if (Is64Bit) { 700 // cmp reg, #0 is actually subs xzr, reg, #0. 701 MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass); 702 BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR) 703 .addReg(SrcReg) 704 .addImm(0) 705 .addImm(0); 706 } else { 707 MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass); 708 BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR) 709 .addReg(SrcReg) 710 .addImm(0) 711 .addImm(0); 712 } 713 break; 714 } 715 case 4: { // tbz/tbnz 716 // We must insert a tst instruction. 717 switch (Cond[1].getImm()) { 718 default: 719 llvm_unreachable("Unknown branch opcode in Cond"); 720 case AArch64::TBZW: 721 case AArch64::TBZX: 722 CC = AArch64CC::EQ; 723 break; 724 case AArch64::TBNZW: 725 case AArch64::TBNZX: 726 CC = AArch64CC::NE; 727 break; 728 } 729 // cmp reg, #foo is actually ands xzr, reg, #1<<foo. 730 if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW) 731 BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR) 732 .addReg(Cond[2].getReg()) 733 .addImm( 734 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32)); 735 else 736 BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR) 737 .addReg(Cond[2].getReg()) 738 .addImm( 739 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64)); 740 break; 741 } 742 } 743 744 unsigned Opc = 0; 745 const TargetRegisterClass *RC = nullptr; 746 bool TryFold = false; 747 if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) { 748 RC = &AArch64::GPR64RegClass; 749 Opc = AArch64::CSELXr; 750 TryFold = true; 751 } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) { 752 RC = &AArch64::GPR32RegClass; 753 Opc = AArch64::CSELWr; 754 TryFold = true; 755 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) { 756 RC = &AArch64::FPR64RegClass; 757 Opc = AArch64::FCSELDrrr; 758 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) { 759 RC = &AArch64::FPR32RegClass; 760 Opc = AArch64::FCSELSrrr; 761 } 762 assert(RC && "Unsupported regclass"); 763 764 // Try folding simple instructions into the csel. 765 if (TryFold) { 766 unsigned NewVReg = 0; 767 unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg); 768 if (FoldedOpc) { 769 // The folded opcodes csinc, csinc and csneg apply the operation to 770 // FalseReg, so we need to invert the condition. 771 CC = AArch64CC::getInvertedCondCode(CC); 772 TrueReg = FalseReg; 773 } else 774 FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg); 775 776 // Fold the operation. Leave any dead instructions for DCE to clean up. 777 if (FoldedOpc) { 778 FalseReg = NewVReg; 779 Opc = FoldedOpc; 780 // The extends the live range of NewVReg. 781 MRI.clearKillFlags(NewVReg); 782 } 783 } 784 785 // Pull all virtual register into the appropriate class. 786 MRI.constrainRegClass(TrueReg, RC); 787 MRI.constrainRegClass(FalseReg, RC); 788 789 // Insert the csel. 790 BuildMI(MBB, I, DL, get(Opc), DstReg) 791 .addReg(TrueReg) 792 .addReg(FalseReg) 793 .addImm(CC); 794 } 795 796 /// Returns true if a MOVi32imm or MOVi64imm can be expanded to an ORRxx. 797 static bool canBeExpandedToORR(const MachineInstr &MI, unsigned BitSize) { 798 uint64_t Imm = MI.getOperand(1).getImm(); 799 uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize); 800 uint64_t Encoding; 801 return AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding); 802 } 803 804 // FIXME: this implementation should be micro-architecture dependent, so a 805 // micro-architecture target hook should be introduced here in future. 806 bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const { 807 if (!Subtarget.hasCustomCheapAsMoveHandling()) 808 return MI.isAsCheapAsAMove(); 809 810 const unsigned Opcode = MI.getOpcode(); 811 812 // Firstly, check cases gated by features. 813 814 if (Subtarget.hasZeroCycleZeroingFP()) { 815 if (Opcode == AArch64::FMOVH0 || 816 Opcode == AArch64::FMOVS0 || 817 Opcode == AArch64::FMOVD0) 818 return true; 819 } 820 821 if (Subtarget.hasZeroCycleZeroingGP()) { 822 if (Opcode == TargetOpcode::COPY && 823 (MI.getOperand(1).getReg() == AArch64::WZR || 824 MI.getOperand(1).getReg() == AArch64::XZR)) 825 return true; 826 } 827 828 // Secondly, check cases specific to sub-targets. 829 830 if (Subtarget.hasExynosCheapAsMoveHandling()) { 831 if (isExynosCheapAsMove(MI)) 832 return true; 833 834 return MI.isAsCheapAsAMove(); 835 } 836 837 // Finally, check generic cases. 838 839 switch (Opcode) { 840 default: 841 return false; 842 843 // add/sub on register without shift 844 case AArch64::ADDWri: 845 case AArch64::ADDXri: 846 case AArch64::SUBWri: 847 case AArch64::SUBXri: 848 return (MI.getOperand(3).getImm() == 0); 849 850 // logical ops on immediate 851 case AArch64::ANDWri: 852 case AArch64::ANDXri: 853 case AArch64::EORWri: 854 case AArch64::EORXri: 855 case AArch64::ORRWri: 856 case AArch64::ORRXri: 857 return true; 858 859 // logical ops on register without shift 860 case AArch64::ANDWrr: 861 case AArch64::ANDXrr: 862 case AArch64::BICWrr: 863 case AArch64::BICXrr: 864 case AArch64::EONWrr: 865 case AArch64::EONXrr: 866 case AArch64::EORWrr: 867 case AArch64::EORXrr: 868 case AArch64::ORNWrr: 869 case AArch64::ORNXrr: 870 case AArch64::ORRWrr: 871 case AArch64::ORRXrr: 872 return true; 873 874 // If MOVi32imm or MOVi64imm can be expanded into ORRWri or 875 // ORRXri, it is as cheap as MOV 876 case AArch64::MOVi32imm: 877 return canBeExpandedToORR(MI, 32); 878 case AArch64::MOVi64imm: 879 return canBeExpandedToORR(MI, 64); 880 } 881 882 llvm_unreachable("Unknown opcode to check as cheap as a move!"); 883 } 884 885 bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) { 886 switch (MI.getOpcode()) { 887 default: 888 return false; 889 890 case AArch64::ADDWrs: 891 case AArch64::ADDXrs: 892 case AArch64::ADDSWrs: 893 case AArch64::ADDSXrs: { 894 unsigned Imm = MI.getOperand(3).getImm(); 895 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm); 896 if (ShiftVal == 0) 897 return true; 898 return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5; 899 } 900 901 case AArch64::ADDWrx: 902 case AArch64::ADDXrx: 903 case AArch64::ADDXrx64: 904 case AArch64::ADDSWrx: 905 case AArch64::ADDSXrx: 906 case AArch64::ADDSXrx64: { 907 unsigned Imm = MI.getOperand(3).getImm(); 908 switch (AArch64_AM::getArithExtendType(Imm)) { 909 default: 910 return false; 911 case AArch64_AM::UXTB: 912 case AArch64_AM::UXTH: 913 case AArch64_AM::UXTW: 914 case AArch64_AM::UXTX: 915 return AArch64_AM::getArithShiftValue(Imm) <= 4; 916 } 917 } 918 919 case AArch64::SUBWrs: 920 case AArch64::SUBSWrs: { 921 unsigned Imm = MI.getOperand(3).getImm(); 922 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm); 923 return ShiftVal == 0 || 924 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31); 925 } 926 927 case AArch64::SUBXrs: 928 case AArch64::SUBSXrs: { 929 unsigned Imm = MI.getOperand(3).getImm(); 930 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm); 931 return ShiftVal == 0 || 932 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63); 933 } 934 935 case AArch64::SUBWrx: 936 case AArch64::SUBXrx: 937 case AArch64::SUBXrx64: 938 case AArch64::SUBSWrx: 939 case AArch64::SUBSXrx: 940 case AArch64::SUBSXrx64: { 941 unsigned Imm = MI.getOperand(3).getImm(); 942 switch (AArch64_AM::getArithExtendType(Imm)) { 943 default: 944 return false; 945 case AArch64_AM::UXTB: 946 case AArch64_AM::UXTH: 947 case AArch64_AM::UXTW: 948 case AArch64_AM::UXTX: 949 return AArch64_AM::getArithShiftValue(Imm) == 0; 950 } 951 } 952 953 case AArch64::LDRBBroW: 954 case AArch64::LDRBBroX: 955 case AArch64::LDRBroW: 956 case AArch64::LDRBroX: 957 case AArch64::LDRDroW: 958 case AArch64::LDRDroX: 959 case AArch64::LDRHHroW: 960 case AArch64::LDRHHroX: 961 case AArch64::LDRHroW: 962 case AArch64::LDRHroX: 963 case AArch64::LDRQroW: 964 case AArch64::LDRQroX: 965 case AArch64::LDRSBWroW: 966 case AArch64::LDRSBWroX: 967 case AArch64::LDRSBXroW: 968 case AArch64::LDRSBXroX: 969 case AArch64::LDRSHWroW: 970 case AArch64::LDRSHWroX: 971 case AArch64::LDRSHXroW: 972 case AArch64::LDRSHXroX: 973 case AArch64::LDRSWroW: 974 case AArch64::LDRSWroX: 975 case AArch64::LDRSroW: 976 case AArch64::LDRSroX: 977 case AArch64::LDRWroW: 978 case AArch64::LDRWroX: 979 case AArch64::LDRXroW: 980 case AArch64::LDRXroX: 981 case AArch64::PRFMroW: 982 case AArch64::PRFMroX: 983 case AArch64::STRBBroW: 984 case AArch64::STRBBroX: 985 case AArch64::STRBroW: 986 case AArch64::STRBroX: 987 case AArch64::STRDroW: 988 case AArch64::STRDroX: 989 case AArch64::STRHHroW: 990 case AArch64::STRHHroX: 991 case AArch64::STRHroW: 992 case AArch64::STRHroX: 993 case AArch64::STRQroW: 994 case AArch64::STRQroX: 995 case AArch64::STRSroW: 996 case AArch64::STRSroX: 997 case AArch64::STRWroW: 998 case AArch64::STRWroX: 999 case AArch64::STRXroW: 1000 case AArch64::STRXroX: { 1001 unsigned IsSigned = MI.getOperand(3).getImm(); 1002 return !IsSigned; 1003 } 1004 } 1005 } 1006 1007 bool AArch64InstrInfo::isSEHInstruction(const MachineInstr &MI) { 1008 unsigned Opc = MI.getOpcode(); 1009 switch (Opc) { 1010 default: 1011 return false; 1012 case AArch64::SEH_StackAlloc: 1013 case AArch64::SEH_SaveFPLR: 1014 case AArch64::SEH_SaveFPLR_X: 1015 case AArch64::SEH_SaveReg: 1016 case AArch64::SEH_SaveReg_X: 1017 case AArch64::SEH_SaveRegP: 1018 case AArch64::SEH_SaveRegP_X: 1019 case AArch64::SEH_SaveFReg: 1020 case AArch64::SEH_SaveFReg_X: 1021 case AArch64::SEH_SaveFRegP: 1022 case AArch64::SEH_SaveFRegP_X: 1023 case AArch64::SEH_SetFP: 1024 case AArch64::SEH_AddFP: 1025 case AArch64::SEH_Nop: 1026 case AArch64::SEH_PrologEnd: 1027 case AArch64::SEH_EpilogStart: 1028 case AArch64::SEH_EpilogEnd: 1029 return true; 1030 } 1031 } 1032 1033 bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI, 1034 Register &SrcReg, Register &DstReg, 1035 unsigned &SubIdx) const { 1036 switch (MI.getOpcode()) { 1037 default: 1038 return false; 1039 case AArch64::SBFMXri: // aka sxtw 1040 case AArch64::UBFMXri: // aka uxtw 1041 // Check for the 32 -> 64 bit extension case, these instructions can do 1042 // much more. 1043 if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31) 1044 return false; 1045 // This is a signed or unsigned 32 -> 64 bit extension. 1046 SrcReg = MI.getOperand(1).getReg(); 1047 DstReg = MI.getOperand(0).getReg(); 1048 SubIdx = AArch64::sub_32; 1049 return true; 1050 } 1051 } 1052 1053 bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint( 1054 const MachineInstr &MIa, const MachineInstr &MIb) const { 1055 const TargetRegisterInfo *TRI = &getRegisterInfo(); 1056 const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr; 1057 int64_t OffsetA = 0, OffsetB = 0; 1058 unsigned WidthA = 0, WidthB = 0; 1059 bool OffsetAIsScalable = false, OffsetBIsScalable = false; 1060 1061 assert(MIa.mayLoadOrStore() && "MIa must be a load or store."); 1062 assert(MIb.mayLoadOrStore() && "MIb must be a load or store."); 1063 1064 if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() || 1065 MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef()) 1066 return false; 1067 1068 // Retrieve the base, offset from the base and width. Width 1069 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If 1070 // base are identical, and the offset of a lower memory access + 1071 // the width doesn't overlap the offset of a higher memory access, 1072 // then the memory accesses are different. 1073 // If OffsetAIsScalable and OffsetBIsScalable are both true, they 1074 // are assumed to have the same scale (vscale). 1075 if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, OffsetAIsScalable, 1076 WidthA, TRI) && 1077 getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, OffsetBIsScalable, 1078 WidthB, TRI)) { 1079 if (BaseOpA->isIdenticalTo(*BaseOpB) && 1080 OffsetAIsScalable == OffsetBIsScalable) { 1081 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB; 1082 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA; 1083 int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB; 1084 if (LowOffset + LowWidth <= HighOffset) 1085 return true; 1086 } 1087 } 1088 return false; 1089 } 1090 1091 bool AArch64InstrInfo::isSchedulingBoundary(const MachineInstr &MI, 1092 const MachineBasicBlock *MBB, 1093 const MachineFunction &MF) const { 1094 if (TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF)) 1095 return true; 1096 switch (MI.getOpcode()) { 1097 case AArch64::HINT: 1098 // CSDB hints are scheduling barriers. 1099 if (MI.getOperand(0).getImm() == 0x14) 1100 return true; 1101 break; 1102 case AArch64::DSB: 1103 case AArch64::ISB: 1104 // DSB and ISB also are scheduling barriers. 1105 return true; 1106 default:; 1107 } 1108 return isSEHInstruction(MI); 1109 } 1110 1111 /// analyzeCompare - For a comparison instruction, return the source registers 1112 /// in SrcReg and SrcReg2, and the value it compares against in CmpValue. 1113 /// Return true if the comparison instruction can be analyzed. 1114 bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg, 1115 Register &SrcReg2, int64_t &CmpMask, 1116 int64_t &CmpValue) const { 1117 // The first operand can be a frame index where we'd normally expect a 1118 // register. 1119 assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands"); 1120 if (!MI.getOperand(1).isReg()) 1121 return false; 1122 1123 switch (MI.getOpcode()) { 1124 default: 1125 break; 1126 case AArch64::PTEST_PP: 1127 SrcReg = MI.getOperand(0).getReg(); 1128 SrcReg2 = MI.getOperand(1).getReg(); 1129 // Not sure about the mask and value for now... 1130 CmpMask = ~0; 1131 CmpValue = 0; 1132 return true; 1133 case AArch64::SUBSWrr: 1134 case AArch64::SUBSWrs: 1135 case AArch64::SUBSWrx: 1136 case AArch64::SUBSXrr: 1137 case AArch64::SUBSXrs: 1138 case AArch64::SUBSXrx: 1139 case AArch64::ADDSWrr: 1140 case AArch64::ADDSWrs: 1141 case AArch64::ADDSWrx: 1142 case AArch64::ADDSXrr: 1143 case AArch64::ADDSXrs: 1144 case AArch64::ADDSXrx: 1145 // Replace SUBSWrr with SUBWrr if NZCV is not used. 1146 SrcReg = MI.getOperand(1).getReg(); 1147 SrcReg2 = MI.getOperand(2).getReg(); 1148 CmpMask = ~0; 1149 CmpValue = 0; 1150 return true; 1151 case AArch64::SUBSWri: 1152 case AArch64::ADDSWri: 1153 case AArch64::SUBSXri: 1154 case AArch64::ADDSXri: 1155 SrcReg = MI.getOperand(1).getReg(); 1156 SrcReg2 = 0; 1157 CmpMask = ~0; 1158 CmpValue = MI.getOperand(2).getImm(); 1159 return true; 1160 case AArch64::ANDSWri: 1161 case AArch64::ANDSXri: 1162 // ANDS does not use the same encoding scheme as the others xxxS 1163 // instructions. 1164 SrcReg = MI.getOperand(1).getReg(); 1165 SrcReg2 = 0; 1166 CmpMask = ~0; 1167 CmpValue = AArch64_AM::decodeLogicalImmediate( 1168 MI.getOperand(2).getImm(), 1169 MI.getOpcode() == AArch64::ANDSWri ? 32 : 64); 1170 return true; 1171 } 1172 1173 return false; 1174 } 1175 1176 static bool UpdateOperandRegClass(MachineInstr &Instr) { 1177 MachineBasicBlock *MBB = Instr.getParent(); 1178 assert(MBB && "Can't get MachineBasicBlock here"); 1179 MachineFunction *MF = MBB->getParent(); 1180 assert(MF && "Can't get MachineFunction here"); 1181 const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); 1182 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); 1183 MachineRegisterInfo *MRI = &MF->getRegInfo(); 1184 1185 for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx; 1186 ++OpIdx) { 1187 MachineOperand &MO = Instr.getOperand(OpIdx); 1188 const TargetRegisterClass *OpRegCstraints = 1189 Instr.getRegClassConstraint(OpIdx, TII, TRI); 1190 1191 // If there's no constraint, there's nothing to do. 1192 if (!OpRegCstraints) 1193 continue; 1194 // If the operand is a frame index, there's nothing to do here. 1195 // A frame index operand will resolve correctly during PEI. 1196 if (MO.isFI()) 1197 continue; 1198 1199 assert(MO.isReg() && 1200 "Operand has register constraints without being a register!"); 1201 1202 Register Reg = MO.getReg(); 1203 if (Register::isPhysicalRegister(Reg)) { 1204 if (!OpRegCstraints->contains(Reg)) 1205 return false; 1206 } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) && 1207 !MRI->constrainRegClass(Reg, OpRegCstraints)) 1208 return false; 1209 } 1210 1211 return true; 1212 } 1213 1214 /// Return the opcode that does not set flags when possible - otherwise 1215 /// return the original opcode. The caller is responsible to do the actual 1216 /// substitution and legality checking. 1217 static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI) { 1218 // Don't convert all compare instructions, because for some the zero register 1219 // encoding becomes the sp register. 1220 bool MIDefinesZeroReg = false; 1221 if (MI.definesRegister(AArch64::WZR) || MI.definesRegister(AArch64::XZR)) 1222 MIDefinesZeroReg = true; 1223 1224 switch (MI.getOpcode()) { 1225 default: 1226 return MI.getOpcode(); 1227 case AArch64::ADDSWrr: 1228 return AArch64::ADDWrr; 1229 case AArch64::ADDSWri: 1230 return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri; 1231 case AArch64::ADDSWrs: 1232 return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs; 1233 case AArch64::ADDSWrx: 1234 return AArch64::ADDWrx; 1235 case AArch64::ADDSXrr: 1236 return AArch64::ADDXrr; 1237 case AArch64::ADDSXri: 1238 return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri; 1239 case AArch64::ADDSXrs: 1240 return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs; 1241 case AArch64::ADDSXrx: 1242 return AArch64::ADDXrx; 1243 case AArch64::SUBSWrr: 1244 return AArch64::SUBWrr; 1245 case AArch64::SUBSWri: 1246 return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri; 1247 case AArch64::SUBSWrs: 1248 return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs; 1249 case AArch64::SUBSWrx: 1250 return AArch64::SUBWrx; 1251 case AArch64::SUBSXrr: 1252 return AArch64::SUBXrr; 1253 case AArch64::SUBSXri: 1254 return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri; 1255 case AArch64::SUBSXrs: 1256 return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs; 1257 case AArch64::SUBSXrx: 1258 return AArch64::SUBXrx; 1259 } 1260 } 1261 1262 enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 }; 1263 1264 /// True when condition flags are accessed (either by writing or reading) 1265 /// on the instruction trace starting at From and ending at To. 1266 /// 1267 /// Note: If From and To are from different blocks it's assumed CC are accessed 1268 /// on the path. 1269 static bool areCFlagsAccessedBetweenInstrs( 1270 MachineBasicBlock::iterator From, MachineBasicBlock::iterator To, 1271 const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) { 1272 // Early exit if To is at the beginning of the BB. 1273 if (To == To->getParent()->begin()) 1274 return true; 1275 1276 // Check whether the instructions are in the same basic block 1277 // If not, assume the condition flags might get modified somewhere. 1278 if (To->getParent() != From->getParent()) 1279 return true; 1280 1281 // From must be above To. 1282 assert(std::any_of( 1283 ++To.getReverse(), To->getParent()->rend(), 1284 [From](MachineInstr &MI) { return MI.getIterator() == From; })); 1285 1286 // We iterate backward starting at \p To until we hit \p From. 1287 for (const MachineInstr &Instr : 1288 instructionsWithoutDebug(++To.getReverse(), From.getReverse())) { 1289 if (((AccessToCheck & AK_Write) && 1290 Instr.modifiesRegister(AArch64::NZCV, TRI)) || 1291 ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI))) 1292 return true; 1293 } 1294 return false; 1295 } 1296 1297 /// optimizePTestInstr - Attempt to remove a ptest of a predicate-generating 1298 /// operation which could set the flags in an identical manner 1299 bool AArch64InstrInfo::optimizePTestInstr( 1300 MachineInstr *PTest, unsigned MaskReg, unsigned PredReg, 1301 const MachineRegisterInfo *MRI) const { 1302 auto *Mask = MRI->getUniqueVRegDef(MaskReg); 1303 auto *Pred = MRI->getUniqueVRegDef(PredReg); 1304 auto NewOp = Pred->getOpcode(); 1305 bool OpChanged = false; 1306 1307 unsigned MaskOpcode = Mask->getOpcode(); 1308 unsigned PredOpcode = Pred->getOpcode(); 1309 bool PredIsPTestLike = isPTestLikeOpcode(PredOpcode); 1310 bool PredIsWhileLike = isWhileOpcode(PredOpcode); 1311 1312 if (isPTrueOpcode(MaskOpcode) && (PredIsPTestLike || PredIsWhileLike)) { 1313 // For PTEST(PTRUE, OTHER_INST), PTEST is redundant when PTRUE doesn't 1314 // deactivate any lanes OTHER_INST might set. 1315 uint64_t MaskElementSize = getElementSizeForOpcode(MaskOpcode); 1316 uint64_t PredElementSize = getElementSizeForOpcode(PredOpcode); 1317 1318 // Must be an all active predicate of matching element size. 1319 if ((PredElementSize != MaskElementSize) || 1320 (Mask->getOperand(1).getImm() != 31)) 1321 return false; 1322 1323 // Fallthough to simply remove the PTEST. 1324 } else if ((Mask == Pred) && (PredIsPTestLike || PredIsWhileLike)) { 1325 // For PTEST(PG, PG), PTEST is redundant when PG is the result of an 1326 // instruction that sets the flags as PTEST would. 1327 1328 // Fallthough to simply remove the PTEST. 1329 } else if (PredIsPTestLike) { 1330 // For PTEST(PG_1, PTEST_LIKE(PG2, ...)), PTEST is redundant when both 1331 // instructions use the same predicate. 1332 auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg()); 1333 if (Mask != PTestLikeMask) 1334 return false; 1335 1336 // Fallthough to simply remove the PTEST. 1337 } else { 1338 switch (Pred->getOpcode()) { 1339 case AArch64::BRKB_PPzP: 1340 case AArch64::BRKPB_PPzPP: { 1341 // Op 0 is chain, 1 is the mask, 2 the previous predicate to 1342 // propagate, 3 the new predicate. 1343 1344 // Check to see if our mask is the same as the brkpb's. If 1345 // not the resulting flag bits may be different and we 1346 // can't remove the ptest. 1347 auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg()); 1348 if (Mask != PredMask) 1349 return false; 1350 1351 // Switch to the new opcode 1352 NewOp = Pred->getOpcode() == AArch64::BRKB_PPzP ? AArch64::BRKBS_PPzP 1353 : AArch64::BRKPBS_PPzPP; 1354 OpChanged = true; 1355 break; 1356 } 1357 case AArch64::BRKN_PPzP: { 1358 auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg()); 1359 if (Mask != PredMask) 1360 return false; 1361 1362 NewOp = AArch64::BRKNS_PPzP; 1363 OpChanged = true; 1364 break; 1365 } 1366 case AArch64::RDFFR_PPz: { 1367 // rdffr p1.b, PredMask=p0/z <--- Definition of Pred 1368 // ptest Mask=p0, Pred=p1.b <--- If equal masks, remove this and use 1369 // `rdffrs p1.b, p0/z` above. 1370 auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg()); 1371 if (Mask != PredMask) 1372 return false; 1373 1374 NewOp = AArch64::RDFFRS_PPz; 1375 OpChanged = true; 1376 break; 1377 } 1378 default: 1379 // Bail out if we don't recognize the input 1380 return false; 1381 } 1382 } 1383 1384 const TargetRegisterInfo *TRI = &getRegisterInfo(); 1385 1386 // If another instruction between Pred and PTest accesses flags, don't remove 1387 // the ptest or update the earlier instruction to modify them. 1388 if (areCFlagsAccessedBetweenInstrs(Pred, PTest, TRI)) 1389 return false; 1390 1391 // If we pass all the checks, it's safe to remove the PTEST and use the flags 1392 // as they are prior to PTEST. Sometimes this requires the tested PTEST 1393 // operand to be replaced with an equivalent instruction that also sets the 1394 // flags. 1395 Pred->setDesc(get(NewOp)); 1396 PTest->eraseFromParent(); 1397 if (OpChanged) { 1398 bool succeeded = UpdateOperandRegClass(*Pred); 1399 (void)succeeded; 1400 assert(succeeded && "Operands have incompatible register classes!"); 1401 Pred->addRegisterDefined(AArch64::NZCV, TRI); 1402 } 1403 1404 // Ensure that the flags def is live. 1405 if (Pred->registerDefIsDead(AArch64::NZCV, TRI)) { 1406 unsigned i = 0, e = Pred->getNumOperands(); 1407 for (; i != e; ++i) { 1408 MachineOperand &MO = Pred->getOperand(i); 1409 if (MO.isReg() && MO.isDef() && MO.getReg() == AArch64::NZCV) { 1410 MO.setIsDead(false); 1411 break; 1412 } 1413 } 1414 } 1415 return true; 1416 } 1417 1418 /// Try to optimize a compare instruction. A compare instruction is an 1419 /// instruction which produces AArch64::NZCV. It can be truly compare 1420 /// instruction 1421 /// when there are no uses of its destination register. 1422 /// 1423 /// The following steps are tried in order: 1424 /// 1. Convert CmpInstr into an unconditional version. 1425 /// 2. Remove CmpInstr if above there is an instruction producing a needed 1426 /// condition code or an instruction which can be converted into such an 1427 /// instruction. 1428 /// Only comparison with zero is supported. 1429 bool AArch64InstrInfo::optimizeCompareInstr( 1430 MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, 1431 int64_t CmpValue, const MachineRegisterInfo *MRI) const { 1432 assert(CmpInstr.getParent()); 1433 assert(MRI); 1434 1435 // Replace SUBSWrr with SUBWrr if NZCV is not used. 1436 int DeadNZCVIdx = CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, true); 1437 if (DeadNZCVIdx != -1) { 1438 if (CmpInstr.definesRegister(AArch64::WZR) || 1439 CmpInstr.definesRegister(AArch64::XZR)) { 1440 CmpInstr.eraseFromParent(); 1441 return true; 1442 } 1443 unsigned Opc = CmpInstr.getOpcode(); 1444 unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr); 1445 if (NewOpc == Opc) 1446 return false; 1447 const MCInstrDesc &MCID = get(NewOpc); 1448 CmpInstr.setDesc(MCID); 1449 CmpInstr.RemoveOperand(DeadNZCVIdx); 1450 bool succeeded = UpdateOperandRegClass(CmpInstr); 1451 (void)succeeded; 1452 assert(succeeded && "Some operands reg class are incompatible!"); 1453 return true; 1454 } 1455 1456 if (CmpInstr.getOpcode() == AArch64::PTEST_PP) 1457 return optimizePTestInstr(&CmpInstr, SrcReg, SrcReg2, MRI); 1458 1459 if (SrcReg2 != 0) 1460 return false; 1461 1462 // CmpInstr is a Compare instruction if destination register is not used. 1463 if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg())) 1464 return false; 1465 1466 if (CmpValue == 0 && substituteCmpToZero(CmpInstr, SrcReg, *MRI)) 1467 return true; 1468 return (CmpValue == 0 || CmpValue == 1) && 1469 removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *MRI); 1470 } 1471 1472 /// Get opcode of S version of Instr. 1473 /// If Instr is S version its opcode is returned. 1474 /// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version 1475 /// or we are not interested in it. 1476 static unsigned sForm(MachineInstr &Instr) { 1477 switch (Instr.getOpcode()) { 1478 default: 1479 return AArch64::INSTRUCTION_LIST_END; 1480 1481 case AArch64::ADDSWrr: 1482 case AArch64::ADDSWri: 1483 case AArch64::ADDSXrr: 1484 case AArch64::ADDSXri: 1485 case AArch64::SUBSWrr: 1486 case AArch64::SUBSWri: 1487 case AArch64::SUBSXrr: 1488 case AArch64::SUBSXri: 1489 return Instr.getOpcode(); 1490 1491 case AArch64::ADDWrr: 1492 return AArch64::ADDSWrr; 1493 case AArch64::ADDWri: 1494 return AArch64::ADDSWri; 1495 case AArch64::ADDXrr: 1496 return AArch64::ADDSXrr; 1497 case AArch64::ADDXri: 1498 return AArch64::ADDSXri; 1499 case AArch64::ADCWr: 1500 return AArch64::ADCSWr; 1501 case AArch64::ADCXr: 1502 return AArch64::ADCSXr; 1503 case AArch64::SUBWrr: 1504 return AArch64::SUBSWrr; 1505 case AArch64::SUBWri: 1506 return AArch64::SUBSWri; 1507 case AArch64::SUBXrr: 1508 return AArch64::SUBSXrr; 1509 case AArch64::SUBXri: 1510 return AArch64::SUBSXri; 1511 case AArch64::SBCWr: 1512 return AArch64::SBCSWr; 1513 case AArch64::SBCXr: 1514 return AArch64::SBCSXr; 1515 case AArch64::ANDWri: 1516 return AArch64::ANDSWri; 1517 case AArch64::ANDXri: 1518 return AArch64::ANDSXri; 1519 } 1520 } 1521 1522 /// Check if AArch64::NZCV should be alive in successors of MBB. 1523 static bool areCFlagsAliveInSuccessors(const MachineBasicBlock *MBB) { 1524 for (auto *BB : MBB->successors()) 1525 if (BB->isLiveIn(AArch64::NZCV)) 1526 return true; 1527 return false; 1528 } 1529 1530 /// \returns The condition code operand index for \p Instr if it is a branch 1531 /// or select and -1 otherwise. 1532 static int 1533 findCondCodeUseOperandIdxForBranchOrSelect(const MachineInstr &Instr) { 1534 switch (Instr.getOpcode()) { 1535 default: 1536 return -1; 1537 1538 case AArch64::Bcc: { 1539 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV); 1540 assert(Idx >= 2); 1541 return Idx - 2; 1542 } 1543 1544 case AArch64::CSINVWr: 1545 case AArch64::CSINVXr: 1546 case AArch64::CSINCWr: 1547 case AArch64::CSINCXr: 1548 case AArch64::CSELWr: 1549 case AArch64::CSELXr: 1550 case AArch64::CSNEGWr: 1551 case AArch64::CSNEGXr: 1552 case AArch64::FCSELSrrr: 1553 case AArch64::FCSELDrrr: { 1554 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV); 1555 assert(Idx >= 1); 1556 return Idx - 1; 1557 } 1558 } 1559 } 1560 1561 namespace { 1562 1563 struct UsedNZCV { 1564 bool N = false; 1565 bool Z = false; 1566 bool C = false; 1567 bool V = false; 1568 1569 UsedNZCV() = default; 1570 1571 UsedNZCV &operator|=(const UsedNZCV &UsedFlags) { 1572 this->N |= UsedFlags.N; 1573 this->Z |= UsedFlags.Z; 1574 this->C |= UsedFlags.C; 1575 this->V |= UsedFlags.V; 1576 return *this; 1577 } 1578 }; 1579 1580 } // end anonymous namespace 1581 1582 /// Find a condition code used by the instruction. 1583 /// Returns AArch64CC::Invalid if either the instruction does not use condition 1584 /// codes or we don't optimize CmpInstr in the presence of such instructions. 1585 static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr) { 1586 int CCIdx = findCondCodeUseOperandIdxForBranchOrSelect(Instr); 1587 return CCIdx >= 0 ? static_cast<AArch64CC::CondCode>( 1588 Instr.getOperand(CCIdx).getImm()) 1589 : AArch64CC::Invalid; 1590 } 1591 1592 static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) { 1593 assert(CC != AArch64CC::Invalid); 1594 UsedNZCV UsedFlags; 1595 switch (CC) { 1596 default: 1597 break; 1598 1599 case AArch64CC::EQ: // Z set 1600 case AArch64CC::NE: // Z clear 1601 UsedFlags.Z = true; 1602 break; 1603 1604 case AArch64CC::HI: // Z clear and C set 1605 case AArch64CC::LS: // Z set or C clear 1606 UsedFlags.Z = true; 1607 LLVM_FALLTHROUGH; 1608 case AArch64CC::HS: // C set 1609 case AArch64CC::LO: // C clear 1610 UsedFlags.C = true; 1611 break; 1612 1613 case AArch64CC::MI: // N set 1614 case AArch64CC::PL: // N clear 1615 UsedFlags.N = true; 1616 break; 1617 1618 case AArch64CC::VS: // V set 1619 case AArch64CC::VC: // V clear 1620 UsedFlags.V = true; 1621 break; 1622 1623 case AArch64CC::GT: // Z clear, N and V the same 1624 case AArch64CC::LE: // Z set, N and V differ 1625 UsedFlags.Z = true; 1626 LLVM_FALLTHROUGH; 1627 case AArch64CC::GE: // N and V the same 1628 case AArch64CC::LT: // N and V differ 1629 UsedFlags.N = true; 1630 UsedFlags.V = true; 1631 break; 1632 } 1633 return UsedFlags; 1634 } 1635 1636 /// \returns Conditions flags used after \p CmpInstr in its MachineBB if they 1637 /// are not containing C or V flags and NZCV flags are not alive in successors 1638 /// of the same \p CmpInstr and \p MI parent. \returns None otherwise. 1639 /// 1640 /// Collect instructions using that flags in \p CCUseInstrs if provided. 1641 static Optional<UsedNZCV> 1642 examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr, 1643 const TargetRegisterInfo &TRI, 1644 SmallVectorImpl<MachineInstr *> *CCUseInstrs = nullptr) { 1645 MachineBasicBlock *CmpParent = CmpInstr.getParent(); 1646 if (MI.getParent() != CmpParent) 1647 return None; 1648 1649 if (areCFlagsAliveInSuccessors(CmpParent)) 1650 return None; 1651 1652 UsedNZCV NZCVUsedAfterCmp; 1653 for (MachineInstr &Instr : instructionsWithoutDebug( 1654 std::next(CmpInstr.getIterator()), CmpParent->instr_end())) { 1655 if (Instr.readsRegister(AArch64::NZCV, &TRI)) { 1656 AArch64CC::CondCode CC = findCondCodeUsedByInstr(Instr); 1657 if (CC == AArch64CC::Invalid) // Unsupported conditional instruction 1658 return None; 1659 NZCVUsedAfterCmp |= getUsedNZCV(CC); 1660 if (CCUseInstrs) 1661 CCUseInstrs->push_back(&Instr); 1662 } 1663 if (Instr.modifiesRegister(AArch64::NZCV, &TRI)) 1664 break; 1665 } 1666 if (NZCVUsedAfterCmp.C || NZCVUsedAfterCmp.V) 1667 return None; 1668 return NZCVUsedAfterCmp; 1669 } 1670 1671 static bool isADDSRegImm(unsigned Opcode) { 1672 return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri; 1673 } 1674 1675 static bool isSUBSRegImm(unsigned Opcode) { 1676 return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri; 1677 } 1678 1679 /// Check if CmpInstr can be substituted by MI. 1680 /// 1681 /// CmpInstr can be substituted: 1682 /// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0' 1683 /// - and, MI and CmpInstr are from the same MachineBB 1684 /// - and, condition flags are not alive in successors of the CmpInstr parent 1685 /// - and, if MI opcode is the S form there must be no defs of flags between 1686 /// MI and CmpInstr 1687 /// or if MI opcode is not the S form there must be neither defs of flags 1688 /// nor uses of flags between MI and CmpInstr. 1689 /// - and C/V flags are not used after CmpInstr 1690 static bool canInstrSubstituteCmpInstr(MachineInstr &MI, MachineInstr &CmpInstr, 1691 const TargetRegisterInfo &TRI) { 1692 assert(sForm(MI) != AArch64::INSTRUCTION_LIST_END); 1693 1694 const unsigned CmpOpcode = CmpInstr.getOpcode(); 1695 if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode)) 1696 return false; 1697 1698 if (!examineCFlagsUse(MI, CmpInstr, TRI)) 1699 return false; 1700 1701 AccessKind AccessToCheck = AK_Write; 1702 if (sForm(MI) != MI.getOpcode()) 1703 AccessToCheck = AK_All; 1704 return !areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AccessToCheck); 1705 } 1706 1707 /// Substitute an instruction comparing to zero with another instruction 1708 /// which produces needed condition flags. 1709 /// 1710 /// Return true on success. 1711 bool AArch64InstrInfo::substituteCmpToZero( 1712 MachineInstr &CmpInstr, unsigned SrcReg, 1713 const MachineRegisterInfo &MRI) const { 1714 // Get the unique definition of SrcReg. 1715 MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg); 1716 if (!MI) 1717 return false; 1718 1719 const TargetRegisterInfo &TRI = getRegisterInfo(); 1720 1721 unsigned NewOpc = sForm(*MI); 1722 if (NewOpc == AArch64::INSTRUCTION_LIST_END) 1723 return false; 1724 1725 if (!canInstrSubstituteCmpInstr(*MI, CmpInstr, TRI)) 1726 return false; 1727 1728 // Update the instruction to set NZCV. 1729 MI->setDesc(get(NewOpc)); 1730 CmpInstr.eraseFromParent(); 1731 bool succeeded = UpdateOperandRegClass(*MI); 1732 (void)succeeded; 1733 assert(succeeded && "Some operands reg class are incompatible!"); 1734 MI->addRegisterDefined(AArch64::NZCV, &TRI); 1735 return true; 1736 } 1737 1738 /// \returns True if \p CmpInstr can be removed. 1739 /// 1740 /// \p IsInvertCC is true if, after removing \p CmpInstr, condition 1741 /// codes used in \p CCUseInstrs must be inverted. 1742 static bool canCmpInstrBeRemoved(MachineInstr &MI, MachineInstr &CmpInstr, 1743 int CmpValue, const TargetRegisterInfo &TRI, 1744 SmallVectorImpl<MachineInstr *> &CCUseInstrs, 1745 bool &IsInvertCC) { 1746 assert((CmpValue == 0 || CmpValue == 1) && 1747 "Only comparisons to 0 or 1 considered for removal!"); 1748 1749 // MI is 'CSINCWr %vreg, wzr, wzr, <cc>' or 'CSINCXr %vreg, xzr, xzr, <cc>' 1750 unsigned MIOpc = MI.getOpcode(); 1751 if (MIOpc == AArch64::CSINCWr) { 1752 if (MI.getOperand(1).getReg() != AArch64::WZR || 1753 MI.getOperand(2).getReg() != AArch64::WZR) 1754 return false; 1755 } else if (MIOpc == AArch64::CSINCXr) { 1756 if (MI.getOperand(1).getReg() != AArch64::XZR || 1757 MI.getOperand(2).getReg() != AArch64::XZR) 1758 return false; 1759 } else { 1760 return false; 1761 } 1762 AArch64CC::CondCode MICC = findCondCodeUsedByInstr(MI); 1763 if (MICC == AArch64CC::Invalid) 1764 return false; 1765 1766 // NZCV needs to be defined 1767 if (MI.findRegisterDefOperandIdx(AArch64::NZCV, true) != -1) 1768 return false; 1769 1770 // CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0' or 'SUBS %vreg, 1' 1771 const unsigned CmpOpcode = CmpInstr.getOpcode(); 1772 bool IsSubsRegImm = isSUBSRegImm(CmpOpcode); 1773 if (CmpValue && !IsSubsRegImm) 1774 return false; 1775 if (!CmpValue && !IsSubsRegImm && !isADDSRegImm(CmpOpcode)) 1776 return false; 1777 1778 // MI conditions allowed: eq, ne, mi, pl 1779 UsedNZCV MIUsedNZCV = getUsedNZCV(MICC); 1780 if (MIUsedNZCV.C || MIUsedNZCV.V) 1781 return false; 1782 1783 Optional<UsedNZCV> NZCVUsedAfterCmp = 1784 examineCFlagsUse(MI, CmpInstr, TRI, &CCUseInstrs); 1785 // Condition flags are not used in CmpInstr basic block successors and only 1786 // Z or N flags allowed to be used after CmpInstr within its basic block 1787 if (!NZCVUsedAfterCmp) 1788 return false; 1789 // Z or N flag used after CmpInstr must correspond to the flag used in MI 1790 if ((MIUsedNZCV.Z && NZCVUsedAfterCmp->N) || 1791 (MIUsedNZCV.N && NZCVUsedAfterCmp->Z)) 1792 return false; 1793 // If CmpInstr is comparison to zero MI conditions are limited to eq, ne 1794 if (MIUsedNZCV.N && !CmpValue) 1795 return false; 1796 1797 // There must be no defs of flags between MI and CmpInstr 1798 if (areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AK_Write)) 1799 return false; 1800 1801 // Condition code is inverted in the following cases: 1802 // 1. MI condition is ne; CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0' 1803 // 2. MI condition is eq, pl; CmpInstr is 'SUBS %vreg, 1' 1804 IsInvertCC = (CmpValue && (MICC == AArch64CC::EQ || MICC == AArch64CC::PL)) || 1805 (!CmpValue && MICC == AArch64CC::NE); 1806 return true; 1807 } 1808 1809 /// Remove comparision in csinc-cmp sequence 1810 /// 1811 /// Examples: 1812 /// 1. \code 1813 /// csinc w9, wzr, wzr, ne 1814 /// cmp w9, #0 1815 /// b.eq 1816 /// \endcode 1817 /// to 1818 /// \code 1819 /// csinc w9, wzr, wzr, ne 1820 /// b.ne 1821 /// \endcode 1822 /// 1823 /// 2. \code 1824 /// csinc x2, xzr, xzr, mi 1825 /// cmp x2, #1 1826 /// b.pl 1827 /// \endcode 1828 /// to 1829 /// \code 1830 /// csinc x2, xzr, xzr, mi 1831 /// b.pl 1832 /// \endcode 1833 /// 1834 /// \param CmpInstr comparison instruction 1835 /// \return True when comparison removed 1836 bool AArch64InstrInfo::removeCmpToZeroOrOne( 1837 MachineInstr &CmpInstr, unsigned SrcReg, int CmpValue, 1838 const MachineRegisterInfo &MRI) const { 1839 MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg); 1840 if (!MI) 1841 return false; 1842 const TargetRegisterInfo &TRI = getRegisterInfo(); 1843 SmallVector<MachineInstr *, 4> CCUseInstrs; 1844 bool IsInvertCC = false; 1845 if (!canCmpInstrBeRemoved(*MI, CmpInstr, CmpValue, TRI, CCUseInstrs, 1846 IsInvertCC)) 1847 return false; 1848 // Make transformation 1849 CmpInstr.eraseFromParent(); 1850 if (IsInvertCC) { 1851 // Invert condition codes in CmpInstr CC users 1852 for (MachineInstr *CCUseInstr : CCUseInstrs) { 1853 int Idx = findCondCodeUseOperandIdxForBranchOrSelect(*CCUseInstr); 1854 assert(Idx >= 0 && "Unexpected instruction using CC."); 1855 MachineOperand &CCOperand = CCUseInstr->getOperand(Idx); 1856 AArch64CC::CondCode CCUse = AArch64CC::getInvertedCondCode( 1857 static_cast<AArch64CC::CondCode>(CCOperand.getImm())); 1858 CCOperand.setImm(CCUse); 1859 } 1860 } 1861 return true; 1862 } 1863 1864 bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { 1865 if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD && 1866 MI.getOpcode() != AArch64::CATCHRET) 1867 return false; 1868 1869 MachineBasicBlock &MBB = *MI.getParent(); 1870 auto &Subtarget = MBB.getParent()->getSubtarget<AArch64Subtarget>(); 1871 auto TRI = Subtarget.getRegisterInfo(); 1872 DebugLoc DL = MI.getDebugLoc(); 1873 1874 if (MI.getOpcode() == AArch64::CATCHRET) { 1875 // Skip to the first instruction before the epilog. 1876 const TargetInstrInfo *TII = 1877 MBB.getParent()->getSubtarget().getInstrInfo(); 1878 MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB(); 1879 auto MBBI = MachineBasicBlock::iterator(MI); 1880 MachineBasicBlock::iterator FirstEpilogSEH = std::prev(MBBI); 1881 while (FirstEpilogSEH->getFlag(MachineInstr::FrameDestroy) && 1882 FirstEpilogSEH != MBB.begin()) 1883 FirstEpilogSEH = std::prev(FirstEpilogSEH); 1884 if (FirstEpilogSEH != MBB.begin()) 1885 FirstEpilogSEH = std::next(FirstEpilogSEH); 1886 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADRP)) 1887 .addReg(AArch64::X0, RegState::Define) 1888 .addMBB(TargetMBB); 1889 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADDXri)) 1890 .addReg(AArch64::X0, RegState::Define) 1891 .addReg(AArch64::X0) 1892 .addMBB(TargetMBB) 1893 .addImm(0); 1894 return true; 1895 } 1896 1897 Register Reg = MI.getOperand(0).getReg(); 1898 Module &M = *MBB.getParent()->getFunction().getParent(); 1899 if (M.getStackProtectorGuard() == "sysreg") { 1900 const AArch64SysReg::SysReg *SrcReg = 1901 AArch64SysReg::lookupSysRegByName(M.getStackProtectorGuardReg()); 1902 if (!SrcReg) 1903 report_fatal_error("Unknown SysReg for Stack Protector Guard Register"); 1904 1905 // mrs xN, sysreg 1906 BuildMI(MBB, MI, DL, get(AArch64::MRS)) 1907 .addDef(Reg, RegState::Renamable) 1908 .addImm(SrcReg->Encoding); 1909 int Offset = M.getStackProtectorGuardOffset(); 1910 if (Offset >= 0 && Offset <= 32760 && Offset % 8 == 0) { 1911 // ldr xN, [xN, #offset] 1912 BuildMI(MBB, MI, DL, get(AArch64::LDRXui)) 1913 .addDef(Reg) 1914 .addUse(Reg, RegState::Kill) 1915 .addImm(Offset / 8); 1916 } else if (Offset >= -256 && Offset <= 255) { 1917 // ldur xN, [xN, #offset] 1918 BuildMI(MBB, MI, DL, get(AArch64::LDURXi)) 1919 .addDef(Reg) 1920 .addUse(Reg, RegState::Kill) 1921 .addImm(Offset); 1922 } else if (Offset >= -4095 && Offset <= 4095) { 1923 if (Offset > 0) { 1924 // add xN, xN, #offset 1925 BuildMI(MBB, MI, DL, get(AArch64::ADDXri)) 1926 .addDef(Reg) 1927 .addUse(Reg, RegState::Kill) 1928 .addImm(Offset) 1929 .addImm(0); 1930 } else { 1931 // sub xN, xN, #offset 1932 BuildMI(MBB, MI, DL, get(AArch64::SUBXri)) 1933 .addDef(Reg) 1934 .addUse(Reg, RegState::Kill) 1935 .addImm(-Offset) 1936 .addImm(0); 1937 } 1938 // ldr xN, [xN] 1939 BuildMI(MBB, MI, DL, get(AArch64::LDRXui)) 1940 .addDef(Reg) 1941 .addUse(Reg, RegState::Kill) 1942 .addImm(0); 1943 } else { 1944 // Cases that are larger than +/- 4095 and not a multiple of 8, or larger 1945 // than 23760. 1946 // It might be nice to use AArch64::MOVi32imm here, which would get 1947 // expanded in PreSched2 after PostRA, but our lone scratch Reg already 1948 // contains the MRS result. findScratchNonCalleeSaveRegister() in 1949 // AArch64FrameLowering might help us find such a scratch register 1950 // though. If we failed to find a scratch register, we could emit a 1951 // stream of add instructions to build up the immediate. Or, we could try 1952 // to insert a AArch64::MOVi32imm before register allocation so that we 1953 // didn't need to scavenge for a scratch register. 1954 report_fatal_error("Unable to encode Stack Protector Guard Offset"); 1955 } 1956 MBB.erase(MI); 1957 return true; 1958 } 1959 1960 const GlobalValue *GV = 1961 cast<GlobalValue>((*MI.memoperands_begin())->getValue()); 1962 const TargetMachine &TM = MBB.getParent()->getTarget(); 1963 unsigned OpFlags = Subtarget.ClassifyGlobalReference(GV, TM); 1964 const unsigned char MO_NC = AArch64II::MO_NC; 1965 1966 if ((OpFlags & AArch64II::MO_GOT) != 0) { 1967 BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg) 1968 .addGlobalAddress(GV, 0, OpFlags); 1969 if (Subtarget.isTargetILP32()) { 1970 unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32); 1971 BuildMI(MBB, MI, DL, get(AArch64::LDRWui)) 1972 .addDef(Reg32, RegState::Dead) 1973 .addUse(Reg, RegState::Kill) 1974 .addImm(0) 1975 .addMemOperand(*MI.memoperands_begin()) 1976 .addDef(Reg, RegState::Implicit); 1977 } else { 1978 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg) 1979 .addReg(Reg, RegState::Kill) 1980 .addImm(0) 1981 .addMemOperand(*MI.memoperands_begin()); 1982 } 1983 } else if (TM.getCodeModel() == CodeModel::Large) { 1984 assert(!Subtarget.isTargetILP32() && "how can large exist in ILP32?"); 1985 BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg) 1986 .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC) 1987 .addImm(0); 1988 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg) 1989 .addReg(Reg, RegState::Kill) 1990 .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC) 1991 .addImm(16); 1992 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg) 1993 .addReg(Reg, RegState::Kill) 1994 .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC) 1995 .addImm(32); 1996 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg) 1997 .addReg(Reg, RegState::Kill) 1998 .addGlobalAddress(GV, 0, AArch64II::MO_G3) 1999 .addImm(48); 2000 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg) 2001 .addReg(Reg, RegState::Kill) 2002 .addImm(0) 2003 .addMemOperand(*MI.memoperands_begin()); 2004 } else if (TM.getCodeModel() == CodeModel::Tiny) { 2005 BuildMI(MBB, MI, DL, get(AArch64::ADR), Reg) 2006 .addGlobalAddress(GV, 0, OpFlags); 2007 } else { 2008 BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg) 2009 .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE); 2010 unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC; 2011 if (Subtarget.isTargetILP32()) { 2012 unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32); 2013 BuildMI(MBB, MI, DL, get(AArch64::LDRWui)) 2014 .addDef(Reg32, RegState::Dead) 2015 .addUse(Reg, RegState::Kill) 2016 .addGlobalAddress(GV, 0, LoFlags) 2017 .addMemOperand(*MI.memoperands_begin()) 2018 .addDef(Reg, RegState::Implicit); 2019 } else { 2020 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg) 2021 .addReg(Reg, RegState::Kill) 2022 .addGlobalAddress(GV, 0, LoFlags) 2023 .addMemOperand(*MI.memoperands_begin()); 2024 } 2025 } 2026 2027 MBB.erase(MI); 2028 2029 return true; 2030 } 2031 2032 // Return true if this instruction simply sets its single destination register 2033 // to zero. This is equivalent to a register rename of the zero-register. 2034 bool AArch64InstrInfo::isGPRZero(const MachineInstr &MI) { 2035 switch (MI.getOpcode()) { 2036 default: 2037 break; 2038 case AArch64::MOVZWi: 2039 case AArch64::MOVZXi: // movz Rd, #0 (LSL #0) 2040 if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) { 2041 assert(MI.getDesc().getNumOperands() == 3 && 2042 MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands"); 2043 return true; 2044 } 2045 break; 2046 case AArch64::ANDWri: // and Rd, Rzr, #imm 2047 return MI.getOperand(1).getReg() == AArch64::WZR; 2048 case AArch64::ANDXri: 2049 return MI.getOperand(1).getReg() == AArch64::XZR; 2050 case TargetOpcode::COPY: 2051 return MI.getOperand(1).getReg() == AArch64::WZR; 2052 } 2053 return false; 2054 } 2055 2056 // Return true if this instruction simply renames a general register without 2057 // modifying bits. 2058 bool AArch64InstrInfo::isGPRCopy(const MachineInstr &MI) { 2059 switch (MI.getOpcode()) { 2060 default: 2061 break; 2062 case TargetOpcode::COPY: { 2063 // GPR32 copies will by lowered to ORRXrs 2064 Register DstReg = MI.getOperand(0).getReg(); 2065 return (AArch64::GPR32RegClass.contains(DstReg) || 2066 AArch64::GPR64RegClass.contains(DstReg)); 2067 } 2068 case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0) 2069 if (MI.getOperand(1).getReg() == AArch64::XZR) { 2070 assert(MI.getDesc().getNumOperands() == 4 && 2071 MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands"); 2072 return true; 2073 } 2074 break; 2075 case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0) 2076 if (MI.getOperand(2).getImm() == 0) { 2077 assert(MI.getDesc().getNumOperands() == 4 && 2078 MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands"); 2079 return true; 2080 } 2081 break; 2082 } 2083 return false; 2084 } 2085 2086 // Return true if this instruction simply renames a general register without 2087 // modifying bits. 2088 bool AArch64InstrInfo::isFPRCopy(const MachineInstr &MI) { 2089 switch (MI.getOpcode()) { 2090 default: 2091 break; 2092 case TargetOpcode::COPY: { 2093 Register DstReg = MI.getOperand(0).getReg(); 2094 return AArch64::FPR128RegClass.contains(DstReg); 2095 } 2096 case AArch64::ORRv16i8: 2097 if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) { 2098 assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() && 2099 "invalid ORRv16i8 operands"); 2100 return true; 2101 } 2102 break; 2103 } 2104 return false; 2105 } 2106 2107 unsigned AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr &MI, 2108 int &FrameIndex) const { 2109 switch (MI.getOpcode()) { 2110 default: 2111 break; 2112 case AArch64::LDRWui: 2113 case AArch64::LDRXui: 2114 case AArch64::LDRBui: 2115 case AArch64::LDRHui: 2116 case AArch64::LDRSui: 2117 case AArch64::LDRDui: 2118 case AArch64::LDRQui: 2119 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() && 2120 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) { 2121 FrameIndex = MI.getOperand(1).getIndex(); 2122 return MI.getOperand(0).getReg(); 2123 } 2124 break; 2125 } 2126 2127 return 0; 2128 } 2129 2130 unsigned AArch64InstrInfo::isStoreToStackSlot(const MachineInstr &MI, 2131 int &FrameIndex) const { 2132 switch (MI.getOpcode()) { 2133 default: 2134 break; 2135 case AArch64::STRWui: 2136 case AArch64::STRXui: 2137 case AArch64::STRBui: 2138 case AArch64::STRHui: 2139 case AArch64::STRSui: 2140 case AArch64::STRDui: 2141 case AArch64::STRQui: 2142 case AArch64::LDR_PXI: 2143 case AArch64::STR_PXI: 2144 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() && 2145 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) { 2146 FrameIndex = MI.getOperand(1).getIndex(); 2147 return MI.getOperand(0).getReg(); 2148 } 2149 break; 2150 } 2151 return 0; 2152 } 2153 2154 /// Check all MachineMemOperands for a hint to suppress pairing. 2155 bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr &MI) { 2156 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) { 2157 return MMO->getFlags() & MOSuppressPair; 2158 }); 2159 } 2160 2161 /// Set a flag on the first MachineMemOperand to suppress pairing. 2162 void AArch64InstrInfo::suppressLdStPair(MachineInstr &MI) { 2163 if (MI.memoperands_empty()) 2164 return; 2165 (*MI.memoperands_begin())->setFlags(MOSuppressPair); 2166 } 2167 2168 /// Check all MachineMemOperands for a hint that the load/store is strided. 2169 bool AArch64InstrInfo::isStridedAccess(const MachineInstr &MI) { 2170 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) { 2171 return MMO->getFlags() & MOStridedAccess; 2172 }); 2173 } 2174 2175 bool AArch64InstrInfo::hasUnscaledLdStOffset(unsigned Opc) { 2176 switch (Opc) { 2177 default: 2178 return false; 2179 case AArch64::STURSi: 2180 case AArch64::STRSpre: 2181 case AArch64::STURDi: 2182 case AArch64::STRDpre: 2183 case AArch64::STURQi: 2184 case AArch64::STRQpre: 2185 case AArch64::STURBBi: 2186 case AArch64::STURHHi: 2187 case AArch64::STURWi: 2188 case AArch64::STRWpre: 2189 case AArch64::STURXi: 2190 case AArch64::STRXpre: 2191 case AArch64::LDURSi: 2192 case AArch64::LDRSpre: 2193 case AArch64::LDURDi: 2194 case AArch64::LDRDpre: 2195 case AArch64::LDURQi: 2196 case AArch64::LDRQpre: 2197 case AArch64::LDURWi: 2198 case AArch64::LDRWpre: 2199 case AArch64::LDURXi: 2200 case AArch64::LDRXpre: 2201 case AArch64::LDURSWi: 2202 case AArch64::LDURHHi: 2203 case AArch64::LDURBBi: 2204 case AArch64::LDURSBWi: 2205 case AArch64::LDURSHWi: 2206 return true; 2207 } 2208 } 2209 2210 Optional<unsigned> AArch64InstrInfo::getUnscaledLdSt(unsigned Opc) { 2211 switch (Opc) { 2212 default: return {}; 2213 case AArch64::PRFMui: return AArch64::PRFUMi; 2214 case AArch64::LDRXui: return AArch64::LDURXi; 2215 case AArch64::LDRWui: return AArch64::LDURWi; 2216 case AArch64::LDRBui: return AArch64::LDURBi; 2217 case AArch64::LDRHui: return AArch64::LDURHi; 2218 case AArch64::LDRSui: return AArch64::LDURSi; 2219 case AArch64::LDRDui: return AArch64::LDURDi; 2220 case AArch64::LDRQui: return AArch64::LDURQi; 2221 case AArch64::LDRBBui: return AArch64::LDURBBi; 2222 case AArch64::LDRHHui: return AArch64::LDURHHi; 2223 case AArch64::LDRSBXui: return AArch64::LDURSBXi; 2224 case AArch64::LDRSBWui: return AArch64::LDURSBWi; 2225 case AArch64::LDRSHXui: return AArch64::LDURSHXi; 2226 case AArch64::LDRSHWui: return AArch64::LDURSHWi; 2227 case AArch64::LDRSWui: return AArch64::LDURSWi; 2228 case AArch64::STRXui: return AArch64::STURXi; 2229 case AArch64::STRWui: return AArch64::STURWi; 2230 case AArch64::STRBui: return AArch64::STURBi; 2231 case AArch64::STRHui: return AArch64::STURHi; 2232 case AArch64::STRSui: return AArch64::STURSi; 2233 case AArch64::STRDui: return AArch64::STURDi; 2234 case AArch64::STRQui: return AArch64::STURQi; 2235 case AArch64::STRBBui: return AArch64::STURBBi; 2236 case AArch64::STRHHui: return AArch64::STURHHi; 2237 } 2238 } 2239 2240 unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) { 2241 switch (Opc) { 2242 default: 2243 return 2; 2244 case AArch64::LDPXi: 2245 case AArch64::LDPDi: 2246 case AArch64::STPXi: 2247 case AArch64::STPDi: 2248 case AArch64::LDNPXi: 2249 case AArch64::LDNPDi: 2250 case AArch64::STNPXi: 2251 case AArch64::STNPDi: 2252 case AArch64::LDPQi: 2253 case AArch64::STPQi: 2254 case AArch64::LDNPQi: 2255 case AArch64::STNPQi: 2256 case AArch64::LDPWi: 2257 case AArch64::LDPSi: 2258 case AArch64::STPWi: 2259 case AArch64::STPSi: 2260 case AArch64::LDNPWi: 2261 case AArch64::LDNPSi: 2262 case AArch64::STNPWi: 2263 case AArch64::STNPSi: 2264 case AArch64::LDG: 2265 case AArch64::STGPi: 2266 2267 case AArch64::LD1B_IMM: 2268 case AArch64::LD1B_H_IMM: 2269 case AArch64::LD1B_S_IMM: 2270 case AArch64::LD1B_D_IMM: 2271 case AArch64::LD1SB_H_IMM: 2272 case AArch64::LD1SB_S_IMM: 2273 case AArch64::LD1SB_D_IMM: 2274 case AArch64::LD1H_IMM: 2275 case AArch64::LD1H_S_IMM: 2276 case AArch64::LD1H_D_IMM: 2277 case AArch64::LD1SH_S_IMM: 2278 case AArch64::LD1SH_D_IMM: 2279 case AArch64::LD1W_IMM: 2280 case AArch64::LD1W_D_IMM: 2281 case AArch64::LD1SW_D_IMM: 2282 case AArch64::LD1D_IMM: 2283 2284 case AArch64::ST1B_IMM: 2285 case AArch64::ST1B_H_IMM: 2286 case AArch64::ST1B_S_IMM: 2287 case AArch64::ST1B_D_IMM: 2288 case AArch64::ST1H_IMM: 2289 case AArch64::ST1H_S_IMM: 2290 case AArch64::ST1H_D_IMM: 2291 case AArch64::ST1W_IMM: 2292 case AArch64::ST1W_D_IMM: 2293 case AArch64::ST1D_IMM: 2294 2295 case AArch64::LD1RB_IMM: 2296 case AArch64::LD1RB_H_IMM: 2297 case AArch64::LD1RB_S_IMM: 2298 case AArch64::LD1RB_D_IMM: 2299 case AArch64::LD1RSB_H_IMM: 2300 case AArch64::LD1RSB_S_IMM: 2301 case AArch64::LD1RSB_D_IMM: 2302 case AArch64::LD1RH_IMM: 2303 case AArch64::LD1RH_S_IMM: 2304 case AArch64::LD1RH_D_IMM: 2305 case AArch64::LD1RSH_S_IMM: 2306 case AArch64::LD1RSH_D_IMM: 2307 case AArch64::LD1RW_IMM: 2308 case AArch64::LD1RW_D_IMM: 2309 case AArch64::LD1RSW_IMM: 2310 case AArch64::LD1RD_IMM: 2311 2312 case AArch64::LDNT1B_ZRI: 2313 case AArch64::LDNT1H_ZRI: 2314 case AArch64::LDNT1W_ZRI: 2315 case AArch64::LDNT1D_ZRI: 2316 case AArch64::STNT1B_ZRI: 2317 case AArch64::STNT1H_ZRI: 2318 case AArch64::STNT1W_ZRI: 2319 case AArch64::STNT1D_ZRI: 2320 2321 case AArch64::LDNF1B_IMM: 2322 case AArch64::LDNF1B_H_IMM: 2323 case AArch64::LDNF1B_S_IMM: 2324 case AArch64::LDNF1B_D_IMM: 2325 case AArch64::LDNF1SB_H_IMM: 2326 case AArch64::LDNF1SB_S_IMM: 2327 case AArch64::LDNF1SB_D_IMM: 2328 case AArch64::LDNF1H_IMM: 2329 case AArch64::LDNF1H_S_IMM: 2330 case AArch64::LDNF1H_D_IMM: 2331 case AArch64::LDNF1SH_S_IMM: 2332 case AArch64::LDNF1SH_D_IMM: 2333 case AArch64::LDNF1W_IMM: 2334 case AArch64::LDNF1W_D_IMM: 2335 case AArch64::LDNF1SW_D_IMM: 2336 case AArch64::LDNF1D_IMM: 2337 return 3; 2338 case AArch64::ADDG: 2339 case AArch64::STGOffset: 2340 case AArch64::LDR_PXI: 2341 case AArch64::STR_PXI: 2342 return 2; 2343 } 2344 } 2345 2346 bool AArch64InstrInfo::isPairableLdStInst(const MachineInstr &MI) { 2347 switch (MI.getOpcode()) { 2348 default: 2349 return false; 2350 // Scaled instructions. 2351 case AArch64::STRSui: 2352 case AArch64::STRDui: 2353 case AArch64::STRQui: 2354 case AArch64::STRXui: 2355 case AArch64::STRWui: 2356 case AArch64::LDRSui: 2357 case AArch64::LDRDui: 2358 case AArch64::LDRQui: 2359 case AArch64::LDRXui: 2360 case AArch64::LDRWui: 2361 case AArch64::LDRSWui: 2362 // Unscaled instructions. 2363 case AArch64::STURSi: 2364 case AArch64::STRSpre: 2365 case AArch64::STURDi: 2366 case AArch64::STRDpre: 2367 case AArch64::STURQi: 2368 case AArch64::STRQpre: 2369 case AArch64::STURWi: 2370 case AArch64::STRWpre: 2371 case AArch64::STURXi: 2372 case AArch64::STRXpre: 2373 case AArch64::LDURSi: 2374 case AArch64::LDRSpre: 2375 case AArch64::LDURDi: 2376 case AArch64::LDRDpre: 2377 case AArch64::LDURQi: 2378 case AArch64::LDRQpre: 2379 case AArch64::LDURWi: 2380 case AArch64::LDRWpre: 2381 case AArch64::LDURXi: 2382 case AArch64::LDRXpre: 2383 case AArch64::LDURSWi: 2384 return true; 2385 } 2386 } 2387 2388 unsigned AArch64InstrInfo::convertToFlagSettingOpc(unsigned Opc, 2389 bool &Is64Bit) { 2390 switch (Opc) { 2391 default: 2392 llvm_unreachable("Opcode has no flag setting equivalent!"); 2393 // 32-bit cases: 2394 case AArch64::ADDWri: 2395 Is64Bit = false; 2396 return AArch64::ADDSWri; 2397 case AArch64::ADDWrr: 2398 Is64Bit = false; 2399 return AArch64::ADDSWrr; 2400 case AArch64::ADDWrs: 2401 Is64Bit = false; 2402 return AArch64::ADDSWrs; 2403 case AArch64::ADDWrx: 2404 Is64Bit = false; 2405 return AArch64::ADDSWrx; 2406 case AArch64::ANDWri: 2407 Is64Bit = false; 2408 return AArch64::ANDSWri; 2409 case AArch64::ANDWrr: 2410 Is64Bit = false; 2411 return AArch64::ANDSWrr; 2412 case AArch64::ANDWrs: 2413 Is64Bit = false; 2414 return AArch64::ANDSWrs; 2415 case AArch64::BICWrr: 2416 Is64Bit = false; 2417 return AArch64::BICSWrr; 2418 case AArch64::BICWrs: 2419 Is64Bit = false; 2420 return AArch64::BICSWrs; 2421 case AArch64::SUBWri: 2422 Is64Bit = false; 2423 return AArch64::SUBSWri; 2424 case AArch64::SUBWrr: 2425 Is64Bit = false; 2426 return AArch64::SUBSWrr; 2427 case AArch64::SUBWrs: 2428 Is64Bit = false; 2429 return AArch64::SUBSWrs; 2430 case AArch64::SUBWrx: 2431 Is64Bit = false; 2432 return AArch64::SUBSWrx; 2433 // 64-bit cases: 2434 case AArch64::ADDXri: 2435 Is64Bit = true; 2436 return AArch64::ADDSXri; 2437 case AArch64::ADDXrr: 2438 Is64Bit = true; 2439 return AArch64::ADDSXrr; 2440 case AArch64::ADDXrs: 2441 Is64Bit = true; 2442 return AArch64::ADDSXrs; 2443 case AArch64::ADDXrx: 2444 Is64Bit = true; 2445 return AArch64::ADDSXrx; 2446 case AArch64::ANDXri: 2447 Is64Bit = true; 2448 return AArch64::ANDSXri; 2449 case AArch64::ANDXrr: 2450 Is64Bit = true; 2451 return AArch64::ANDSXrr; 2452 case AArch64::ANDXrs: 2453 Is64Bit = true; 2454 return AArch64::ANDSXrs; 2455 case AArch64::BICXrr: 2456 Is64Bit = true; 2457 return AArch64::BICSXrr; 2458 case AArch64::BICXrs: 2459 Is64Bit = true; 2460 return AArch64::BICSXrs; 2461 case AArch64::SUBXri: 2462 Is64Bit = true; 2463 return AArch64::SUBSXri; 2464 case AArch64::SUBXrr: 2465 Is64Bit = true; 2466 return AArch64::SUBSXrr; 2467 case AArch64::SUBXrs: 2468 Is64Bit = true; 2469 return AArch64::SUBSXrs; 2470 case AArch64::SUBXrx: 2471 Is64Bit = true; 2472 return AArch64::SUBSXrx; 2473 } 2474 } 2475 2476 // Is this a candidate for ld/st merging or pairing? For example, we don't 2477 // touch volatiles or load/stores that have a hint to avoid pair formation. 2478 bool AArch64InstrInfo::isCandidateToMergeOrPair(const MachineInstr &MI) const { 2479 2480 bool IsPreLdSt = isPreLdSt(MI); 2481 2482 // If this is a volatile load/store, don't mess with it. 2483 if (MI.hasOrderedMemoryRef()) 2484 return false; 2485 2486 // Make sure this is a reg/fi+imm (as opposed to an address reloc). 2487 // For Pre-inc LD/ST, the operand is shifted by one. 2488 assert((MI.getOperand(IsPreLdSt ? 2 : 1).isReg() || 2489 MI.getOperand(IsPreLdSt ? 2 : 1).isFI()) && 2490 "Expected a reg or frame index operand."); 2491 2492 // For Pre-indexed addressing quadword instructions, the third operand is the 2493 // immediate value. 2494 bool IsImmPreLdSt = IsPreLdSt && MI.getOperand(3).isImm(); 2495 2496 if (!MI.getOperand(2).isImm() && !IsImmPreLdSt) 2497 return false; 2498 2499 // Can't merge/pair if the instruction modifies the base register. 2500 // e.g., ldr x0, [x0] 2501 // This case will never occur with an FI base. 2502 // However, if the instruction is an LDR/STR<S,D,Q,W,X>pre, it can be merged. 2503 // For example: 2504 // ldr q0, [x11, #32]! 2505 // ldr q1, [x11, #16] 2506 // to 2507 // ldp q0, q1, [x11, #32]! 2508 if (MI.getOperand(1).isReg() && !IsPreLdSt) { 2509 Register BaseReg = MI.getOperand(1).getReg(); 2510 const TargetRegisterInfo *TRI = &getRegisterInfo(); 2511 if (MI.modifiesRegister(BaseReg, TRI)) 2512 return false; 2513 } 2514 2515 // Check if this load/store has a hint to avoid pair formation. 2516 // MachineMemOperands hints are set by the AArch64StorePairSuppress pass. 2517 if (isLdStPairSuppressed(MI)) 2518 return false; 2519 2520 // Do not pair any callee-save store/reload instructions in the 2521 // prologue/epilogue if the CFI information encoded the operations as separate 2522 // instructions, as that will cause the size of the actual prologue to mismatch 2523 // with the prologue size recorded in the Windows CFI. 2524 const MCAsmInfo *MAI = MI.getMF()->getTarget().getMCAsmInfo(); 2525 bool NeedsWinCFI = MAI->usesWindowsCFI() && 2526 MI.getMF()->getFunction().needsUnwindTableEntry(); 2527 if (NeedsWinCFI && (MI.getFlag(MachineInstr::FrameSetup) || 2528 MI.getFlag(MachineInstr::FrameDestroy))) 2529 return false; 2530 2531 // On some CPUs quad load/store pairs are slower than two single load/stores. 2532 if (Subtarget.isPaired128Slow()) { 2533 switch (MI.getOpcode()) { 2534 default: 2535 break; 2536 case AArch64::LDURQi: 2537 case AArch64::STURQi: 2538 case AArch64::LDRQui: 2539 case AArch64::STRQui: 2540 return false; 2541 } 2542 } 2543 2544 return true; 2545 } 2546 2547 bool AArch64InstrInfo::getMemOperandsWithOffsetWidth( 2548 const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps, 2549 int64_t &Offset, bool &OffsetIsScalable, unsigned &Width, 2550 const TargetRegisterInfo *TRI) const { 2551 if (!LdSt.mayLoadOrStore()) 2552 return false; 2553 2554 const MachineOperand *BaseOp; 2555 if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, OffsetIsScalable, 2556 Width, TRI)) 2557 return false; 2558 BaseOps.push_back(BaseOp); 2559 return true; 2560 } 2561 2562 Optional<ExtAddrMode> 2563 AArch64InstrInfo::getAddrModeFromMemoryOp(const MachineInstr &MemI, 2564 const TargetRegisterInfo *TRI) const { 2565 const MachineOperand *Base; // Filled with the base operand of MI. 2566 int64_t Offset; // Filled with the offset of MI. 2567 bool OffsetIsScalable; 2568 if (!getMemOperandWithOffset(MemI, Base, Offset, OffsetIsScalable, TRI)) 2569 return None; 2570 2571 if (!Base->isReg()) 2572 return None; 2573 ExtAddrMode AM; 2574 AM.BaseReg = Base->getReg(); 2575 AM.Displacement = Offset; 2576 AM.ScaledReg = 0; 2577 return AM; 2578 } 2579 2580 bool AArch64InstrInfo::getMemOperandWithOffsetWidth( 2581 const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset, 2582 bool &OffsetIsScalable, unsigned &Width, 2583 const TargetRegisterInfo *TRI) const { 2584 assert(LdSt.mayLoadOrStore() && "Expected a memory operation."); 2585 // Handle only loads/stores with base register followed by immediate offset. 2586 if (LdSt.getNumExplicitOperands() == 3) { 2587 // Non-paired instruction (e.g., ldr x1, [x0, #8]). 2588 if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) || 2589 !LdSt.getOperand(2).isImm()) 2590 return false; 2591 } else if (LdSt.getNumExplicitOperands() == 4) { 2592 // Paired instruction (e.g., ldp x1, x2, [x0, #8]). 2593 if (!LdSt.getOperand(1).isReg() || 2594 (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()) || 2595 !LdSt.getOperand(3).isImm()) 2596 return false; 2597 } else 2598 return false; 2599 2600 // Get the scaling factor for the instruction and set the width for the 2601 // instruction. 2602 TypeSize Scale(0U, false); 2603 int64_t Dummy1, Dummy2; 2604 2605 // If this returns false, then it's an instruction we don't want to handle. 2606 if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2)) 2607 return false; 2608 2609 // Compute the offset. Offset is calculated as the immediate operand 2610 // multiplied by the scaling factor. Unscaled instructions have scaling factor 2611 // set to 1. 2612 if (LdSt.getNumExplicitOperands() == 3) { 2613 BaseOp = &LdSt.getOperand(1); 2614 Offset = LdSt.getOperand(2).getImm() * Scale.getKnownMinSize(); 2615 } else { 2616 assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands"); 2617 BaseOp = &LdSt.getOperand(2); 2618 Offset = LdSt.getOperand(3).getImm() * Scale.getKnownMinSize(); 2619 } 2620 OffsetIsScalable = Scale.isScalable(); 2621 2622 if (!BaseOp->isReg() && !BaseOp->isFI()) 2623 return false; 2624 2625 return true; 2626 } 2627 2628 MachineOperand & 2629 AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const { 2630 assert(LdSt.mayLoadOrStore() && "Expected a memory operation."); 2631 MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1); 2632 assert(OfsOp.isImm() && "Offset operand wasn't immediate."); 2633 return OfsOp; 2634 } 2635 2636 bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale, 2637 unsigned &Width, int64_t &MinOffset, 2638 int64_t &MaxOffset) { 2639 const unsigned SVEMaxBytesPerVector = AArch64::SVEMaxBitsPerVector / 8; 2640 switch (Opcode) { 2641 // Not a memory operation or something we want to handle. 2642 default: 2643 Scale = TypeSize::Fixed(0); 2644 Width = 0; 2645 MinOffset = MaxOffset = 0; 2646 return false; 2647 case AArch64::STRWpost: 2648 case AArch64::LDRWpost: 2649 Width = 32; 2650 Scale = TypeSize::Fixed(4); 2651 MinOffset = -256; 2652 MaxOffset = 255; 2653 break; 2654 case AArch64::LDURQi: 2655 case AArch64::STURQi: 2656 Width = 16; 2657 Scale = TypeSize::Fixed(1); 2658 MinOffset = -256; 2659 MaxOffset = 255; 2660 break; 2661 case AArch64::PRFUMi: 2662 case AArch64::LDURXi: 2663 case AArch64::LDURDi: 2664 case AArch64::STURXi: 2665 case AArch64::STURDi: 2666 Width = 8; 2667 Scale = TypeSize::Fixed(1); 2668 MinOffset = -256; 2669 MaxOffset = 255; 2670 break; 2671 case AArch64::LDURWi: 2672 case AArch64::LDURSi: 2673 case AArch64::LDURSWi: 2674 case AArch64::STURWi: 2675 case AArch64::STURSi: 2676 Width = 4; 2677 Scale = TypeSize::Fixed(1); 2678 MinOffset = -256; 2679 MaxOffset = 255; 2680 break; 2681 case AArch64::LDURHi: 2682 case AArch64::LDURHHi: 2683 case AArch64::LDURSHXi: 2684 case AArch64::LDURSHWi: 2685 case AArch64::STURHi: 2686 case AArch64::STURHHi: 2687 Width = 2; 2688 Scale = TypeSize::Fixed(1); 2689 MinOffset = -256; 2690 MaxOffset = 255; 2691 break; 2692 case AArch64::LDURBi: 2693 case AArch64::LDURBBi: 2694 case AArch64::LDURSBXi: 2695 case AArch64::LDURSBWi: 2696 case AArch64::STURBi: 2697 case AArch64::STURBBi: 2698 Width = 1; 2699 Scale = TypeSize::Fixed(1); 2700 MinOffset = -256; 2701 MaxOffset = 255; 2702 break; 2703 case AArch64::LDPQi: 2704 case AArch64::LDNPQi: 2705 case AArch64::STPQi: 2706 case AArch64::STNPQi: 2707 Scale = TypeSize::Fixed(16); 2708 Width = 32; 2709 MinOffset = -64; 2710 MaxOffset = 63; 2711 break; 2712 case AArch64::LDRQui: 2713 case AArch64::STRQui: 2714 Scale = TypeSize::Fixed(16); 2715 Width = 16; 2716 MinOffset = 0; 2717 MaxOffset = 4095; 2718 break; 2719 case AArch64::LDPXi: 2720 case AArch64::LDPDi: 2721 case AArch64::LDNPXi: 2722 case AArch64::LDNPDi: 2723 case AArch64::STPXi: 2724 case AArch64::STPDi: 2725 case AArch64::STNPXi: 2726 case AArch64::STNPDi: 2727 Scale = TypeSize::Fixed(8); 2728 Width = 16; 2729 MinOffset = -64; 2730 MaxOffset = 63; 2731 break; 2732 case AArch64::PRFMui: 2733 case AArch64::LDRXui: 2734 case AArch64::LDRDui: 2735 case AArch64::STRXui: 2736 case AArch64::STRDui: 2737 Scale = TypeSize::Fixed(8); 2738 Width = 8; 2739 MinOffset = 0; 2740 MaxOffset = 4095; 2741 break; 2742 case AArch64::StoreSwiftAsyncContext: 2743 // Store is an STRXui, but there might be an ADDXri in the expansion too. 2744 Scale = TypeSize::Fixed(1); 2745 Width = 8; 2746 MinOffset = 0; 2747 MaxOffset = 4095; 2748 break; 2749 case AArch64::LDPWi: 2750 case AArch64::LDPSi: 2751 case AArch64::LDNPWi: 2752 case AArch64::LDNPSi: 2753 case AArch64::STPWi: 2754 case AArch64::STPSi: 2755 case AArch64::STNPWi: 2756 case AArch64::STNPSi: 2757 Scale = TypeSize::Fixed(4); 2758 Width = 8; 2759 MinOffset = -64; 2760 MaxOffset = 63; 2761 break; 2762 case AArch64::LDRWui: 2763 case AArch64::LDRSui: 2764 case AArch64::LDRSWui: 2765 case AArch64::STRWui: 2766 case AArch64::STRSui: 2767 Scale = TypeSize::Fixed(4); 2768 Width = 4; 2769 MinOffset = 0; 2770 MaxOffset = 4095; 2771 break; 2772 case AArch64::LDRHui: 2773 case AArch64::LDRHHui: 2774 case AArch64::LDRSHWui: 2775 case AArch64::LDRSHXui: 2776 case AArch64::STRHui: 2777 case AArch64::STRHHui: 2778 Scale = TypeSize::Fixed(2); 2779 Width = 2; 2780 MinOffset = 0; 2781 MaxOffset = 4095; 2782 break; 2783 case AArch64::LDRBui: 2784 case AArch64::LDRBBui: 2785 case AArch64::LDRSBWui: 2786 case AArch64::LDRSBXui: 2787 case AArch64::STRBui: 2788 case AArch64::STRBBui: 2789 Scale = TypeSize::Fixed(1); 2790 Width = 1; 2791 MinOffset = 0; 2792 MaxOffset = 4095; 2793 break; 2794 case AArch64::STPXpre: 2795 case AArch64::LDPXpost: 2796 case AArch64::STPDpre: 2797 case AArch64::LDPDpost: 2798 Scale = TypeSize::Fixed(8); 2799 Width = 8; 2800 MinOffset = -512; 2801 MaxOffset = 504; 2802 break; 2803 case AArch64::STPQpre: 2804 case AArch64::LDPQpost: 2805 Scale = TypeSize::Fixed(16); 2806 Width = 16; 2807 MinOffset = -1024; 2808 MaxOffset = 1008; 2809 break; 2810 case AArch64::STRXpre: 2811 case AArch64::STRDpre: 2812 case AArch64::LDRXpost: 2813 case AArch64::LDRDpost: 2814 Scale = TypeSize::Fixed(1); 2815 Width = 8; 2816 MinOffset = -256; 2817 MaxOffset = 255; 2818 break; 2819 case AArch64::STRQpre: 2820 case AArch64::LDRQpost: 2821 Scale = TypeSize::Fixed(1); 2822 Width = 16; 2823 MinOffset = -256; 2824 MaxOffset = 255; 2825 break; 2826 case AArch64::ADDG: 2827 Scale = TypeSize::Fixed(16); 2828 Width = 0; 2829 MinOffset = 0; 2830 MaxOffset = 63; 2831 break; 2832 case AArch64::TAGPstack: 2833 Scale = TypeSize::Fixed(16); 2834 Width = 0; 2835 // TAGP with a negative offset turns into SUBP, which has a maximum offset 2836 // of 63 (not 64!). 2837 MinOffset = -63; 2838 MaxOffset = 63; 2839 break; 2840 case AArch64::LDG: 2841 case AArch64::STGOffset: 2842 case AArch64::STZGOffset: 2843 Scale = TypeSize::Fixed(16); 2844 Width = 16; 2845 MinOffset = -256; 2846 MaxOffset = 255; 2847 break; 2848 case AArch64::STR_ZZZZXI: 2849 case AArch64::LDR_ZZZZXI: 2850 Scale = TypeSize::Scalable(16); 2851 Width = SVEMaxBytesPerVector * 4; 2852 MinOffset = -256; 2853 MaxOffset = 252; 2854 break; 2855 case AArch64::STR_ZZZXI: 2856 case AArch64::LDR_ZZZXI: 2857 Scale = TypeSize::Scalable(16); 2858 Width = SVEMaxBytesPerVector * 3; 2859 MinOffset = -256; 2860 MaxOffset = 253; 2861 break; 2862 case AArch64::STR_ZZXI: 2863 case AArch64::LDR_ZZXI: 2864 Scale = TypeSize::Scalable(16); 2865 Width = SVEMaxBytesPerVector * 2; 2866 MinOffset = -256; 2867 MaxOffset = 254; 2868 break; 2869 case AArch64::LDR_PXI: 2870 case AArch64::STR_PXI: 2871 Scale = TypeSize::Scalable(2); 2872 Width = SVEMaxBytesPerVector / 8; 2873 MinOffset = -256; 2874 MaxOffset = 255; 2875 break; 2876 case AArch64::LDR_ZXI: 2877 case AArch64::STR_ZXI: 2878 Scale = TypeSize::Scalable(16); 2879 Width = SVEMaxBytesPerVector; 2880 MinOffset = -256; 2881 MaxOffset = 255; 2882 break; 2883 case AArch64::LD1B_IMM: 2884 case AArch64::LD1H_IMM: 2885 case AArch64::LD1W_IMM: 2886 case AArch64::LD1D_IMM: 2887 case AArch64::LDNT1B_ZRI: 2888 case AArch64::LDNT1H_ZRI: 2889 case AArch64::LDNT1W_ZRI: 2890 case AArch64::LDNT1D_ZRI: 2891 case AArch64::ST1B_IMM: 2892 case AArch64::ST1H_IMM: 2893 case AArch64::ST1W_IMM: 2894 case AArch64::ST1D_IMM: 2895 case AArch64::STNT1B_ZRI: 2896 case AArch64::STNT1H_ZRI: 2897 case AArch64::STNT1W_ZRI: 2898 case AArch64::STNT1D_ZRI: 2899 case AArch64::LDNF1B_IMM: 2900 case AArch64::LDNF1H_IMM: 2901 case AArch64::LDNF1W_IMM: 2902 case AArch64::LDNF1D_IMM: 2903 // A full vectors worth of data 2904 // Width = mbytes * elements 2905 Scale = TypeSize::Scalable(16); 2906 Width = SVEMaxBytesPerVector; 2907 MinOffset = -8; 2908 MaxOffset = 7; 2909 break; 2910 case AArch64::LD1B_H_IMM: 2911 case AArch64::LD1SB_H_IMM: 2912 case AArch64::LD1H_S_IMM: 2913 case AArch64::LD1SH_S_IMM: 2914 case AArch64::LD1W_D_IMM: 2915 case AArch64::LD1SW_D_IMM: 2916 case AArch64::ST1B_H_IMM: 2917 case AArch64::ST1H_S_IMM: 2918 case AArch64::ST1W_D_IMM: 2919 case AArch64::LDNF1B_H_IMM: 2920 case AArch64::LDNF1SB_H_IMM: 2921 case AArch64::LDNF1H_S_IMM: 2922 case AArch64::LDNF1SH_S_IMM: 2923 case AArch64::LDNF1W_D_IMM: 2924 case AArch64::LDNF1SW_D_IMM: 2925 // A half vector worth of data 2926 // Width = mbytes * elements 2927 Scale = TypeSize::Scalable(8); 2928 Width = SVEMaxBytesPerVector / 2; 2929 MinOffset = -8; 2930 MaxOffset = 7; 2931 break; 2932 case AArch64::LD1B_S_IMM: 2933 case AArch64::LD1SB_S_IMM: 2934 case AArch64::LD1H_D_IMM: 2935 case AArch64::LD1SH_D_IMM: 2936 case AArch64::ST1B_S_IMM: 2937 case AArch64::ST1H_D_IMM: 2938 case AArch64::LDNF1B_S_IMM: 2939 case AArch64::LDNF1SB_S_IMM: 2940 case AArch64::LDNF1H_D_IMM: 2941 case AArch64::LDNF1SH_D_IMM: 2942 // A quarter vector worth of data 2943 // Width = mbytes * elements 2944 Scale = TypeSize::Scalable(4); 2945 Width = SVEMaxBytesPerVector / 4; 2946 MinOffset = -8; 2947 MaxOffset = 7; 2948 break; 2949 case AArch64::LD1B_D_IMM: 2950 case AArch64::LD1SB_D_IMM: 2951 case AArch64::ST1B_D_IMM: 2952 case AArch64::LDNF1B_D_IMM: 2953 case AArch64::LDNF1SB_D_IMM: 2954 // A eighth vector worth of data 2955 // Width = mbytes * elements 2956 Scale = TypeSize::Scalable(2); 2957 Width = SVEMaxBytesPerVector / 8; 2958 MinOffset = -8; 2959 MaxOffset = 7; 2960 break; 2961 case AArch64::ST2GOffset: 2962 case AArch64::STZ2GOffset: 2963 Scale = TypeSize::Fixed(16); 2964 Width = 32; 2965 MinOffset = -256; 2966 MaxOffset = 255; 2967 break; 2968 case AArch64::STGPi: 2969 Scale = TypeSize::Fixed(16); 2970 Width = 16; 2971 MinOffset = -64; 2972 MaxOffset = 63; 2973 break; 2974 case AArch64::LD1RB_IMM: 2975 case AArch64::LD1RB_H_IMM: 2976 case AArch64::LD1RB_S_IMM: 2977 case AArch64::LD1RB_D_IMM: 2978 case AArch64::LD1RSB_H_IMM: 2979 case AArch64::LD1RSB_S_IMM: 2980 case AArch64::LD1RSB_D_IMM: 2981 Scale = TypeSize::Fixed(1); 2982 Width = 1; 2983 MinOffset = 0; 2984 MaxOffset = 63; 2985 break; 2986 case AArch64::LD1RH_IMM: 2987 case AArch64::LD1RH_S_IMM: 2988 case AArch64::LD1RH_D_IMM: 2989 case AArch64::LD1RSH_S_IMM: 2990 case AArch64::LD1RSH_D_IMM: 2991 Scale = TypeSize::Fixed(2); 2992 Width = 2; 2993 MinOffset = 0; 2994 MaxOffset = 63; 2995 break; 2996 case AArch64::LD1RW_IMM: 2997 case AArch64::LD1RW_D_IMM: 2998 case AArch64::LD1RSW_IMM: 2999 Scale = TypeSize::Fixed(4); 3000 Width = 4; 3001 MinOffset = 0; 3002 MaxOffset = 63; 3003 break; 3004 case AArch64::LD1RD_IMM: 3005 Scale = TypeSize::Fixed(8); 3006 Width = 8; 3007 MinOffset = 0; 3008 MaxOffset = 63; 3009 break; 3010 } 3011 3012 return true; 3013 } 3014 3015 // Scaling factor for unscaled load or store. 3016 int AArch64InstrInfo::getMemScale(unsigned Opc) { 3017 switch (Opc) { 3018 default: 3019 llvm_unreachable("Opcode has unknown scale!"); 3020 case AArch64::LDRBBui: 3021 case AArch64::LDURBBi: 3022 case AArch64::LDRSBWui: 3023 case AArch64::LDURSBWi: 3024 case AArch64::STRBBui: 3025 case AArch64::STURBBi: 3026 return 1; 3027 case AArch64::LDRHHui: 3028 case AArch64::LDURHHi: 3029 case AArch64::LDRSHWui: 3030 case AArch64::LDURSHWi: 3031 case AArch64::STRHHui: 3032 case AArch64::STURHHi: 3033 return 2; 3034 case AArch64::LDRSui: 3035 case AArch64::LDURSi: 3036 case AArch64::LDRSpre: 3037 case AArch64::LDRSWui: 3038 case AArch64::LDURSWi: 3039 case AArch64::LDRWpre: 3040 case AArch64::LDRWui: 3041 case AArch64::LDURWi: 3042 case AArch64::STRSui: 3043 case AArch64::STURSi: 3044 case AArch64::STRSpre: 3045 case AArch64::STRWui: 3046 case AArch64::STURWi: 3047 case AArch64::STRWpre: 3048 case AArch64::LDPSi: 3049 case AArch64::LDPSWi: 3050 case AArch64::LDPWi: 3051 case AArch64::STPSi: 3052 case AArch64::STPWi: 3053 return 4; 3054 case AArch64::LDRDui: 3055 case AArch64::LDURDi: 3056 case AArch64::LDRDpre: 3057 case AArch64::LDRXui: 3058 case AArch64::LDURXi: 3059 case AArch64::LDRXpre: 3060 case AArch64::STRDui: 3061 case AArch64::STURDi: 3062 case AArch64::STRDpre: 3063 case AArch64::STRXui: 3064 case AArch64::STURXi: 3065 case AArch64::STRXpre: 3066 case AArch64::LDPDi: 3067 case AArch64::LDPXi: 3068 case AArch64::STPDi: 3069 case AArch64::STPXi: 3070 return 8; 3071 case AArch64::LDRQui: 3072 case AArch64::LDURQi: 3073 case AArch64::STRQui: 3074 case AArch64::STURQi: 3075 case AArch64::STRQpre: 3076 case AArch64::LDPQi: 3077 case AArch64::LDRQpre: 3078 case AArch64::STPQi: 3079 case AArch64::STGOffset: 3080 case AArch64::STZGOffset: 3081 case AArch64::ST2GOffset: 3082 case AArch64::STZ2GOffset: 3083 case AArch64::STGPi: 3084 return 16; 3085 } 3086 } 3087 3088 bool AArch64InstrInfo::isPreLd(const MachineInstr &MI) { 3089 switch (MI.getOpcode()) { 3090 default: 3091 return false; 3092 case AArch64::LDRWpre: 3093 case AArch64::LDRXpre: 3094 case AArch64::LDRSpre: 3095 case AArch64::LDRDpre: 3096 case AArch64::LDRQpre: 3097 return true; 3098 } 3099 } 3100 3101 bool AArch64InstrInfo::isPreSt(const MachineInstr &MI) { 3102 switch (MI.getOpcode()) { 3103 default: 3104 return false; 3105 case AArch64::STRWpre: 3106 case AArch64::STRXpre: 3107 case AArch64::STRSpre: 3108 case AArch64::STRDpre: 3109 case AArch64::STRQpre: 3110 return true; 3111 } 3112 } 3113 3114 bool AArch64InstrInfo::isPreLdSt(const MachineInstr &MI) { 3115 return isPreLd(MI) || isPreSt(MI); 3116 } 3117 3118 // Scale the unscaled offsets. Returns false if the unscaled offset can't be 3119 // scaled. 3120 static bool scaleOffset(unsigned Opc, int64_t &Offset) { 3121 int Scale = AArch64InstrInfo::getMemScale(Opc); 3122 3123 // If the byte-offset isn't a multiple of the stride, we can't scale this 3124 // offset. 3125 if (Offset % Scale != 0) 3126 return false; 3127 3128 // Convert the byte-offset used by unscaled into an "element" offset used 3129 // by the scaled pair load/store instructions. 3130 Offset /= Scale; 3131 return true; 3132 } 3133 3134 static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) { 3135 if (FirstOpc == SecondOpc) 3136 return true; 3137 // We can also pair sign-ext and zero-ext instructions. 3138 switch (FirstOpc) { 3139 default: 3140 return false; 3141 case AArch64::LDRWui: 3142 case AArch64::LDURWi: 3143 return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi; 3144 case AArch64::LDRSWui: 3145 case AArch64::LDURSWi: 3146 return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi; 3147 } 3148 // These instructions can't be paired based on their opcodes. 3149 return false; 3150 } 3151 3152 static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1, 3153 int64_t Offset1, unsigned Opcode1, int FI2, 3154 int64_t Offset2, unsigned Opcode2) { 3155 // Accesses through fixed stack object frame indices may access a different 3156 // fixed stack slot. Check that the object offsets + offsets match. 3157 if (MFI.isFixedObjectIndex(FI1) && MFI.isFixedObjectIndex(FI2)) { 3158 int64_t ObjectOffset1 = MFI.getObjectOffset(FI1); 3159 int64_t ObjectOffset2 = MFI.getObjectOffset(FI2); 3160 assert(ObjectOffset1 <= ObjectOffset2 && "Object offsets are not ordered."); 3161 // Convert to scaled object offsets. 3162 int Scale1 = AArch64InstrInfo::getMemScale(Opcode1); 3163 if (ObjectOffset1 % Scale1 != 0) 3164 return false; 3165 ObjectOffset1 /= Scale1; 3166 int Scale2 = AArch64InstrInfo::getMemScale(Opcode2); 3167 if (ObjectOffset2 % Scale2 != 0) 3168 return false; 3169 ObjectOffset2 /= Scale2; 3170 ObjectOffset1 += Offset1; 3171 ObjectOffset2 += Offset2; 3172 return ObjectOffset1 + 1 == ObjectOffset2; 3173 } 3174 3175 return FI1 == FI2; 3176 } 3177 3178 /// Detect opportunities for ldp/stp formation. 3179 /// 3180 /// Only called for LdSt for which getMemOperandWithOffset returns true. 3181 bool AArch64InstrInfo::shouldClusterMemOps( 3182 ArrayRef<const MachineOperand *> BaseOps1, 3183 ArrayRef<const MachineOperand *> BaseOps2, unsigned NumLoads, 3184 unsigned NumBytes) const { 3185 assert(BaseOps1.size() == 1 && BaseOps2.size() == 1); 3186 const MachineOperand &BaseOp1 = *BaseOps1.front(); 3187 const MachineOperand &BaseOp2 = *BaseOps2.front(); 3188 const MachineInstr &FirstLdSt = *BaseOp1.getParent(); 3189 const MachineInstr &SecondLdSt = *BaseOp2.getParent(); 3190 if (BaseOp1.getType() != BaseOp2.getType()) 3191 return false; 3192 3193 assert((BaseOp1.isReg() || BaseOp1.isFI()) && 3194 "Only base registers and frame indices are supported."); 3195 3196 // Check for both base regs and base FI. 3197 if (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg()) 3198 return false; 3199 3200 // Only cluster up to a single pair. 3201 if (NumLoads > 2) 3202 return false; 3203 3204 if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt)) 3205 return false; 3206 3207 // Can we pair these instructions based on their opcodes? 3208 unsigned FirstOpc = FirstLdSt.getOpcode(); 3209 unsigned SecondOpc = SecondLdSt.getOpcode(); 3210 if (!canPairLdStOpc(FirstOpc, SecondOpc)) 3211 return false; 3212 3213 // Can't merge volatiles or load/stores that have a hint to avoid pair 3214 // formation, for example. 3215 if (!isCandidateToMergeOrPair(FirstLdSt) || 3216 !isCandidateToMergeOrPair(SecondLdSt)) 3217 return false; 3218 3219 // isCandidateToMergeOrPair guarantees that operand 2 is an immediate. 3220 int64_t Offset1 = FirstLdSt.getOperand(2).getImm(); 3221 if (hasUnscaledLdStOffset(FirstOpc) && !scaleOffset(FirstOpc, Offset1)) 3222 return false; 3223 3224 int64_t Offset2 = SecondLdSt.getOperand(2).getImm(); 3225 if (hasUnscaledLdStOffset(SecondOpc) && !scaleOffset(SecondOpc, Offset2)) 3226 return false; 3227 3228 // Pairwise instructions have a 7-bit signed offset field. 3229 if (Offset1 > 63 || Offset1 < -64) 3230 return false; 3231 3232 // The caller should already have ordered First/SecondLdSt by offset. 3233 // Note: except for non-equal frame index bases 3234 if (BaseOp1.isFI()) { 3235 assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) && 3236 "Caller should have ordered offsets."); 3237 3238 const MachineFrameInfo &MFI = 3239 FirstLdSt.getParent()->getParent()->getFrameInfo(); 3240 return shouldClusterFI(MFI, BaseOp1.getIndex(), Offset1, FirstOpc, 3241 BaseOp2.getIndex(), Offset2, SecondOpc); 3242 } 3243 3244 assert(Offset1 <= Offset2 && "Caller should have ordered offsets."); 3245 3246 return Offset1 + 1 == Offset2; 3247 } 3248 3249 static const MachineInstrBuilder &AddSubReg(const MachineInstrBuilder &MIB, 3250 unsigned Reg, unsigned SubIdx, 3251 unsigned State, 3252 const TargetRegisterInfo *TRI) { 3253 if (!SubIdx) 3254 return MIB.addReg(Reg, State); 3255 3256 if (Register::isPhysicalRegister(Reg)) 3257 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State); 3258 return MIB.addReg(Reg, State, SubIdx); 3259 } 3260 3261 static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg, 3262 unsigned NumRegs) { 3263 // We really want the positive remainder mod 32 here, that happens to be 3264 // easily obtainable with a mask. 3265 return ((DestReg - SrcReg) & 0x1f) < NumRegs; 3266 } 3267 3268 void AArch64InstrInfo::copyPhysRegTuple(MachineBasicBlock &MBB, 3269 MachineBasicBlock::iterator I, 3270 const DebugLoc &DL, MCRegister DestReg, 3271 MCRegister SrcReg, bool KillSrc, 3272 unsigned Opcode, 3273 ArrayRef<unsigned> Indices) const { 3274 assert(Subtarget.hasNEON() && "Unexpected register copy without NEON"); 3275 const TargetRegisterInfo *TRI = &getRegisterInfo(); 3276 uint16_t DestEncoding = TRI->getEncodingValue(DestReg); 3277 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg); 3278 unsigned NumRegs = Indices.size(); 3279 3280 int SubReg = 0, End = NumRegs, Incr = 1; 3281 if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) { 3282 SubReg = NumRegs - 1; 3283 End = -1; 3284 Incr = -1; 3285 } 3286 3287 for (; SubReg != End; SubReg += Incr) { 3288 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode)); 3289 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI); 3290 AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI); 3291 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI); 3292 } 3293 } 3294 3295 void AArch64InstrInfo::copyGPRRegTuple(MachineBasicBlock &MBB, 3296 MachineBasicBlock::iterator I, 3297 DebugLoc DL, unsigned DestReg, 3298 unsigned SrcReg, bool KillSrc, 3299 unsigned Opcode, unsigned ZeroReg, 3300 llvm::ArrayRef<unsigned> Indices) const { 3301 const TargetRegisterInfo *TRI = &getRegisterInfo(); 3302 unsigned NumRegs = Indices.size(); 3303 3304 #ifndef NDEBUG 3305 uint16_t DestEncoding = TRI->getEncodingValue(DestReg); 3306 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg); 3307 assert(DestEncoding % NumRegs == 0 && SrcEncoding % NumRegs == 0 && 3308 "GPR reg sequences should not be able to overlap"); 3309 #endif 3310 3311 for (unsigned SubReg = 0; SubReg != NumRegs; ++SubReg) { 3312 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode)); 3313 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI); 3314 MIB.addReg(ZeroReg); 3315 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI); 3316 MIB.addImm(0); 3317 } 3318 } 3319 3320 void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, 3321 MachineBasicBlock::iterator I, 3322 const DebugLoc &DL, MCRegister DestReg, 3323 MCRegister SrcReg, bool KillSrc) const { 3324 if (AArch64::GPR32spRegClass.contains(DestReg) && 3325 (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) { 3326 const TargetRegisterInfo *TRI = &getRegisterInfo(); 3327 3328 if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) { 3329 // If either operand is WSP, expand to ADD #0. 3330 if (Subtarget.hasZeroCycleRegMove()) { 3331 // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move. 3332 MCRegister DestRegX = TRI->getMatchingSuperReg( 3333 DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass); 3334 MCRegister SrcRegX = TRI->getMatchingSuperReg( 3335 SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass); 3336 // This instruction is reading and writing X registers. This may upset 3337 // the register scavenger and machine verifier, so we need to indicate 3338 // that we are reading an undefined value from SrcRegX, but a proper 3339 // value from SrcReg. 3340 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX) 3341 .addReg(SrcRegX, RegState::Undef) 3342 .addImm(0) 3343 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)) 3344 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc)); 3345 } else { 3346 BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg) 3347 .addReg(SrcReg, getKillRegState(KillSrc)) 3348 .addImm(0) 3349 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 3350 } 3351 } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroingGP()) { 3352 BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg) 3353 .addImm(0) 3354 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 3355 } else { 3356 if (Subtarget.hasZeroCycleRegMove()) { 3357 // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move. 3358 MCRegister DestRegX = TRI->getMatchingSuperReg( 3359 DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass); 3360 MCRegister SrcRegX = TRI->getMatchingSuperReg( 3361 SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass); 3362 // This instruction is reading and writing X registers. This may upset 3363 // the register scavenger and machine verifier, so we need to indicate 3364 // that we are reading an undefined value from SrcRegX, but a proper 3365 // value from SrcReg. 3366 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX) 3367 .addReg(AArch64::XZR) 3368 .addReg(SrcRegX, RegState::Undef) 3369 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc)); 3370 } else { 3371 // Otherwise, expand to ORR WZR. 3372 BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg) 3373 .addReg(AArch64::WZR) 3374 .addReg(SrcReg, getKillRegState(KillSrc)); 3375 } 3376 } 3377 return; 3378 } 3379 3380 // Copy a Predicate register by ORRing with itself. 3381 if (AArch64::PPRRegClass.contains(DestReg) && 3382 AArch64::PPRRegClass.contains(SrcReg)) { 3383 assert(Subtarget.hasSVE() && "Unexpected SVE register."); 3384 BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), DestReg) 3385 .addReg(SrcReg) // Pg 3386 .addReg(SrcReg) 3387 .addReg(SrcReg, getKillRegState(KillSrc)); 3388 return; 3389 } 3390 3391 // Copy a Z register by ORRing with itself. 3392 if (AArch64::ZPRRegClass.contains(DestReg) && 3393 AArch64::ZPRRegClass.contains(SrcReg)) { 3394 assert(Subtarget.hasSVE() && "Unexpected SVE register."); 3395 BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ), DestReg) 3396 .addReg(SrcReg) 3397 .addReg(SrcReg, getKillRegState(KillSrc)); 3398 return; 3399 } 3400 3401 // Copy a Z register pair by copying the individual sub-registers. 3402 if (AArch64::ZPR2RegClass.contains(DestReg) && 3403 AArch64::ZPR2RegClass.contains(SrcReg)) { 3404 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1}; 3405 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ, 3406 Indices); 3407 return; 3408 } 3409 3410 // Copy a Z register triple by copying the individual sub-registers. 3411 if (AArch64::ZPR3RegClass.contains(DestReg) && 3412 AArch64::ZPR3RegClass.contains(SrcReg)) { 3413 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1, 3414 AArch64::zsub2}; 3415 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ, 3416 Indices); 3417 return; 3418 } 3419 3420 // Copy a Z register quad by copying the individual sub-registers. 3421 if (AArch64::ZPR4RegClass.contains(DestReg) && 3422 AArch64::ZPR4RegClass.contains(SrcReg)) { 3423 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1, 3424 AArch64::zsub2, AArch64::zsub3}; 3425 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ, 3426 Indices); 3427 return; 3428 } 3429 3430 if (AArch64::GPR64spRegClass.contains(DestReg) && 3431 (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) { 3432 if (DestReg == AArch64::SP || SrcReg == AArch64::SP) { 3433 // If either operand is SP, expand to ADD #0. 3434 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg) 3435 .addReg(SrcReg, getKillRegState(KillSrc)) 3436 .addImm(0) 3437 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 3438 } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroingGP()) { 3439 BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg) 3440 .addImm(0) 3441 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 3442 } else { 3443 // Otherwise, expand to ORR XZR. 3444 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg) 3445 .addReg(AArch64::XZR) 3446 .addReg(SrcReg, getKillRegState(KillSrc)); 3447 } 3448 return; 3449 } 3450 3451 // Copy a DDDD register quad by copying the individual sub-registers. 3452 if (AArch64::DDDDRegClass.contains(DestReg) && 3453 AArch64::DDDDRegClass.contains(SrcReg)) { 3454 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1, 3455 AArch64::dsub2, AArch64::dsub3}; 3456 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8, 3457 Indices); 3458 return; 3459 } 3460 3461 // Copy a DDD register triple by copying the individual sub-registers. 3462 if (AArch64::DDDRegClass.contains(DestReg) && 3463 AArch64::DDDRegClass.contains(SrcReg)) { 3464 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1, 3465 AArch64::dsub2}; 3466 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8, 3467 Indices); 3468 return; 3469 } 3470 3471 // Copy a DD register pair by copying the individual sub-registers. 3472 if (AArch64::DDRegClass.contains(DestReg) && 3473 AArch64::DDRegClass.contains(SrcReg)) { 3474 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1}; 3475 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8, 3476 Indices); 3477 return; 3478 } 3479 3480 // Copy a QQQQ register quad by copying the individual sub-registers. 3481 if (AArch64::QQQQRegClass.contains(DestReg) && 3482 AArch64::QQQQRegClass.contains(SrcReg)) { 3483 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1, 3484 AArch64::qsub2, AArch64::qsub3}; 3485 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8, 3486 Indices); 3487 return; 3488 } 3489 3490 // Copy a QQQ register triple by copying the individual sub-registers. 3491 if (AArch64::QQQRegClass.contains(DestReg) && 3492 AArch64::QQQRegClass.contains(SrcReg)) { 3493 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1, 3494 AArch64::qsub2}; 3495 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8, 3496 Indices); 3497 return; 3498 } 3499 3500 // Copy a QQ register pair by copying the individual sub-registers. 3501 if (AArch64::QQRegClass.contains(DestReg) && 3502 AArch64::QQRegClass.contains(SrcReg)) { 3503 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1}; 3504 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8, 3505 Indices); 3506 return; 3507 } 3508 3509 if (AArch64::XSeqPairsClassRegClass.contains(DestReg) && 3510 AArch64::XSeqPairsClassRegClass.contains(SrcReg)) { 3511 static const unsigned Indices[] = {AArch64::sube64, AArch64::subo64}; 3512 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRXrs, 3513 AArch64::XZR, Indices); 3514 return; 3515 } 3516 3517 if (AArch64::WSeqPairsClassRegClass.contains(DestReg) && 3518 AArch64::WSeqPairsClassRegClass.contains(SrcReg)) { 3519 static const unsigned Indices[] = {AArch64::sube32, AArch64::subo32}; 3520 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRWrs, 3521 AArch64::WZR, Indices); 3522 return; 3523 } 3524 3525 if (AArch64::FPR128RegClass.contains(DestReg) && 3526 AArch64::FPR128RegClass.contains(SrcReg)) { 3527 if (Subtarget.hasNEON()) { 3528 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) 3529 .addReg(SrcReg) 3530 .addReg(SrcReg, getKillRegState(KillSrc)); 3531 } else { 3532 BuildMI(MBB, I, DL, get(AArch64::STRQpre)) 3533 .addReg(AArch64::SP, RegState::Define) 3534 .addReg(SrcReg, getKillRegState(KillSrc)) 3535 .addReg(AArch64::SP) 3536 .addImm(-16); 3537 BuildMI(MBB, I, DL, get(AArch64::LDRQpre)) 3538 .addReg(AArch64::SP, RegState::Define) 3539 .addReg(DestReg, RegState::Define) 3540 .addReg(AArch64::SP) 3541 .addImm(16); 3542 } 3543 return; 3544 } 3545 3546 if (AArch64::FPR64RegClass.contains(DestReg) && 3547 AArch64::FPR64RegClass.contains(SrcReg)) { 3548 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg) 3549 .addReg(SrcReg, getKillRegState(KillSrc)); 3550 return; 3551 } 3552 3553 if (AArch64::FPR32RegClass.contains(DestReg) && 3554 AArch64::FPR32RegClass.contains(SrcReg)) { 3555 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg) 3556 .addReg(SrcReg, getKillRegState(KillSrc)); 3557 return; 3558 } 3559 3560 if (AArch64::FPR16RegClass.contains(DestReg) && 3561 AArch64::FPR16RegClass.contains(SrcReg)) { 3562 DestReg = 3563 RI.getMatchingSuperReg(DestReg, AArch64::hsub, &AArch64::FPR32RegClass); 3564 SrcReg = 3565 RI.getMatchingSuperReg(SrcReg, AArch64::hsub, &AArch64::FPR32RegClass); 3566 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg) 3567 .addReg(SrcReg, getKillRegState(KillSrc)); 3568 return; 3569 } 3570 3571 if (AArch64::FPR8RegClass.contains(DestReg) && 3572 AArch64::FPR8RegClass.contains(SrcReg)) { 3573 DestReg = 3574 RI.getMatchingSuperReg(DestReg, AArch64::bsub, &AArch64::FPR32RegClass); 3575 SrcReg = 3576 RI.getMatchingSuperReg(SrcReg, AArch64::bsub, &AArch64::FPR32RegClass); 3577 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg) 3578 .addReg(SrcReg, getKillRegState(KillSrc)); 3579 return; 3580 } 3581 3582 // Copies between GPR64 and FPR64. 3583 if (AArch64::FPR64RegClass.contains(DestReg) && 3584 AArch64::GPR64RegClass.contains(SrcReg)) { 3585 BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg) 3586 .addReg(SrcReg, getKillRegState(KillSrc)); 3587 return; 3588 } 3589 if (AArch64::GPR64RegClass.contains(DestReg) && 3590 AArch64::FPR64RegClass.contains(SrcReg)) { 3591 BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg) 3592 .addReg(SrcReg, getKillRegState(KillSrc)); 3593 return; 3594 } 3595 // Copies between GPR32 and FPR32. 3596 if (AArch64::FPR32RegClass.contains(DestReg) && 3597 AArch64::GPR32RegClass.contains(SrcReg)) { 3598 BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg) 3599 .addReg(SrcReg, getKillRegState(KillSrc)); 3600 return; 3601 } 3602 if (AArch64::GPR32RegClass.contains(DestReg) && 3603 AArch64::FPR32RegClass.contains(SrcReg)) { 3604 BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg) 3605 .addReg(SrcReg, getKillRegState(KillSrc)); 3606 return; 3607 } 3608 3609 if (DestReg == AArch64::NZCV) { 3610 assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy"); 3611 BuildMI(MBB, I, DL, get(AArch64::MSR)) 3612 .addImm(AArch64SysReg::NZCV) 3613 .addReg(SrcReg, getKillRegState(KillSrc)) 3614 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define); 3615 return; 3616 } 3617 3618 if (SrcReg == AArch64::NZCV) { 3619 assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy"); 3620 BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg) 3621 .addImm(AArch64SysReg::NZCV) 3622 .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc)); 3623 return; 3624 } 3625 3626 #ifndef NDEBUG 3627 const TargetRegisterInfo &TRI = getRegisterInfo(); 3628 errs() << TRI.getRegAsmName(DestReg) << " = COPY " 3629 << TRI.getRegAsmName(SrcReg) << "\n"; 3630 #endif 3631 llvm_unreachable("unimplemented reg-to-reg copy"); 3632 } 3633 3634 static void storeRegPairToStackSlot(const TargetRegisterInfo &TRI, 3635 MachineBasicBlock &MBB, 3636 MachineBasicBlock::iterator InsertBefore, 3637 const MCInstrDesc &MCID, 3638 Register SrcReg, bool IsKill, 3639 unsigned SubIdx0, unsigned SubIdx1, int FI, 3640 MachineMemOperand *MMO) { 3641 Register SrcReg0 = SrcReg; 3642 Register SrcReg1 = SrcReg; 3643 if (Register::isPhysicalRegister(SrcReg)) { 3644 SrcReg0 = TRI.getSubReg(SrcReg, SubIdx0); 3645 SubIdx0 = 0; 3646 SrcReg1 = TRI.getSubReg(SrcReg, SubIdx1); 3647 SubIdx1 = 0; 3648 } 3649 BuildMI(MBB, InsertBefore, DebugLoc(), MCID) 3650 .addReg(SrcReg0, getKillRegState(IsKill), SubIdx0) 3651 .addReg(SrcReg1, getKillRegState(IsKill), SubIdx1) 3652 .addFrameIndex(FI) 3653 .addImm(0) 3654 .addMemOperand(MMO); 3655 } 3656 3657 void AArch64InstrInfo::storeRegToStackSlot( 3658 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, 3659 bool isKill, int FI, const TargetRegisterClass *RC, 3660 const TargetRegisterInfo *TRI) const { 3661 MachineFunction &MF = *MBB.getParent(); 3662 MachineFrameInfo &MFI = MF.getFrameInfo(); 3663 3664 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI); 3665 MachineMemOperand *MMO = 3666 MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, 3667 MFI.getObjectSize(FI), MFI.getObjectAlign(FI)); 3668 unsigned Opc = 0; 3669 bool Offset = true; 3670 unsigned StackID = TargetStackID::Default; 3671 switch (TRI->getSpillSize(*RC)) { 3672 case 1: 3673 if (AArch64::FPR8RegClass.hasSubClassEq(RC)) 3674 Opc = AArch64::STRBui; 3675 break; 3676 case 2: 3677 if (AArch64::FPR16RegClass.hasSubClassEq(RC)) 3678 Opc = AArch64::STRHui; 3679 else if (AArch64::PPRRegClass.hasSubClassEq(RC)) { 3680 assert(Subtarget.hasSVE() && "Unexpected register store without SVE"); 3681 Opc = AArch64::STR_PXI; 3682 StackID = TargetStackID::ScalableVector; 3683 } 3684 break; 3685 case 4: 3686 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) { 3687 Opc = AArch64::STRWui; 3688 if (Register::isVirtualRegister(SrcReg)) 3689 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass); 3690 else 3691 assert(SrcReg != AArch64::WSP); 3692 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC)) 3693 Opc = AArch64::STRSui; 3694 break; 3695 case 8: 3696 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) { 3697 Opc = AArch64::STRXui; 3698 if (Register::isVirtualRegister(SrcReg)) 3699 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass); 3700 else 3701 assert(SrcReg != AArch64::SP); 3702 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) { 3703 Opc = AArch64::STRDui; 3704 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) { 3705 storeRegPairToStackSlot(getRegisterInfo(), MBB, MBBI, 3706 get(AArch64::STPWi), SrcReg, isKill, 3707 AArch64::sube32, AArch64::subo32, FI, MMO); 3708 return; 3709 } 3710 break; 3711 case 16: 3712 if (AArch64::FPR128RegClass.hasSubClassEq(RC)) 3713 Opc = AArch64::STRQui; 3714 else if (AArch64::DDRegClass.hasSubClassEq(RC)) { 3715 assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); 3716 Opc = AArch64::ST1Twov1d; 3717 Offset = false; 3718 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) { 3719 storeRegPairToStackSlot(getRegisterInfo(), MBB, MBBI, 3720 get(AArch64::STPXi), SrcReg, isKill, 3721 AArch64::sube64, AArch64::subo64, FI, MMO); 3722 return; 3723 } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) { 3724 assert(Subtarget.hasSVE() && "Unexpected register store without SVE"); 3725 Opc = AArch64::STR_ZXI; 3726 StackID = TargetStackID::ScalableVector; 3727 } 3728 break; 3729 case 24: 3730 if (AArch64::DDDRegClass.hasSubClassEq(RC)) { 3731 assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); 3732 Opc = AArch64::ST1Threev1d; 3733 Offset = false; 3734 } 3735 break; 3736 case 32: 3737 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) { 3738 assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); 3739 Opc = AArch64::ST1Fourv1d; 3740 Offset = false; 3741 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) { 3742 assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); 3743 Opc = AArch64::ST1Twov2d; 3744 Offset = false; 3745 } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) { 3746 assert(Subtarget.hasSVE() && "Unexpected register store without SVE"); 3747 Opc = AArch64::STR_ZZXI; 3748 StackID = TargetStackID::ScalableVector; 3749 } 3750 break; 3751 case 48: 3752 if (AArch64::QQQRegClass.hasSubClassEq(RC)) { 3753 assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); 3754 Opc = AArch64::ST1Threev2d; 3755 Offset = false; 3756 } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) { 3757 assert(Subtarget.hasSVE() && "Unexpected register store without SVE"); 3758 Opc = AArch64::STR_ZZZXI; 3759 StackID = TargetStackID::ScalableVector; 3760 } 3761 break; 3762 case 64: 3763 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) { 3764 assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); 3765 Opc = AArch64::ST1Fourv2d; 3766 Offset = false; 3767 } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) { 3768 assert(Subtarget.hasSVE() && "Unexpected register store without SVE"); 3769 Opc = AArch64::STR_ZZZZXI; 3770 StackID = TargetStackID::ScalableVector; 3771 } 3772 break; 3773 } 3774 assert(Opc && "Unknown register class"); 3775 MFI.setStackID(FI, StackID); 3776 3777 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc)) 3778 .addReg(SrcReg, getKillRegState(isKill)) 3779 .addFrameIndex(FI); 3780 3781 if (Offset) 3782 MI.addImm(0); 3783 MI.addMemOperand(MMO); 3784 } 3785 3786 static void loadRegPairFromStackSlot(const TargetRegisterInfo &TRI, 3787 MachineBasicBlock &MBB, 3788 MachineBasicBlock::iterator InsertBefore, 3789 const MCInstrDesc &MCID, 3790 Register DestReg, unsigned SubIdx0, 3791 unsigned SubIdx1, int FI, 3792 MachineMemOperand *MMO) { 3793 Register DestReg0 = DestReg; 3794 Register DestReg1 = DestReg; 3795 bool IsUndef = true; 3796 if (Register::isPhysicalRegister(DestReg)) { 3797 DestReg0 = TRI.getSubReg(DestReg, SubIdx0); 3798 SubIdx0 = 0; 3799 DestReg1 = TRI.getSubReg(DestReg, SubIdx1); 3800 SubIdx1 = 0; 3801 IsUndef = false; 3802 } 3803 BuildMI(MBB, InsertBefore, DebugLoc(), MCID) 3804 .addReg(DestReg0, RegState::Define | getUndefRegState(IsUndef), SubIdx0) 3805 .addReg(DestReg1, RegState::Define | getUndefRegState(IsUndef), SubIdx1) 3806 .addFrameIndex(FI) 3807 .addImm(0) 3808 .addMemOperand(MMO); 3809 } 3810 3811 void AArch64InstrInfo::loadRegFromStackSlot( 3812 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, 3813 int FI, const TargetRegisterClass *RC, 3814 const TargetRegisterInfo *TRI) const { 3815 MachineFunction &MF = *MBB.getParent(); 3816 MachineFrameInfo &MFI = MF.getFrameInfo(); 3817 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI); 3818 MachineMemOperand *MMO = 3819 MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad, 3820 MFI.getObjectSize(FI), MFI.getObjectAlign(FI)); 3821 3822 unsigned Opc = 0; 3823 bool Offset = true; 3824 unsigned StackID = TargetStackID::Default; 3825 switch (TRI->getSpillSize(*RC)) { 3826 case 1: 3827 if (AArch64::FPR8RegClass.hasSubClassEq(RC)) 3828 Opc = AArch64::LDRBui; 3829 break; 3830 case 2: 3831 if (AArch64::FPR16RegClass.hasSubClassEq(RC)) 3832 Opc = AArch64::LDRHui; 3833 else if (AArch64::PPRRegClass.hasSubClassEq(RC)) { 3834 assert(Subtarget.hasSVE() && "Unexpected register load without SVE"); 3835 Opc = AArch64::LDR_PXI; 3836 StackID = TargetStackID::ScalableVector; 3837 } 3838 break; 3839 case 4: 3840 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) { 3841 Opc = AArch64::LDRWui; 3842 if (Register::isVirtualRegister(DestReg)) 3843 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass); 3844 else 3845 assert(DestReg != AArch64::WSP); 3846 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC)) 3847 Opc = AArch64::LDRSui; 3848 break; 3849 case 8: 3850 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) { 3851 Opc = AArch64::LDRXui; 3852 if (Register::isVirtualRegister(DestReg)) 3853 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass); 3854 else 3855 assert(DestReg != AArch64::SP); 3856 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) { 3857 Opc = AArch64::LDRDui; 3858 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) { 3859 loadRegPairFromStackSlot(getRegisterInfo(), MBB, MBBI, 3860 get(AArch64::LDPWi), DestReg, AArch64::sube32, 3861 AArch64::subo32, FI, MMO); 3862 return; 3863 } 3864 break; 3865 case 16: 3866 if (AArch64::FPR128RegClass.hasSubClassEq(RC)) 3867 Opc = AArch64::LDRQui; 3868 else if (AArch64::DDRegClass.hasSubClassEq(RC)) { 3869 assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); 3870 Opc = AArch64::LD1Twov1d; 3871 Offset = false; 3872 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) { 3873 loadRegPairFromStackSlot(getRegisterInfo(), MBB, MBBI, 3874 get(AArch64::LDPXi), DestReg, AArch64::sube64, 3875 AArch64::subo64, FI, MMO); 3876 return; 3877 } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) { 3878 assert(Subtarget.hasSVE() && "Unexpected register load without SVE"); 3879 Opc = AArch64::LDR_ZXI; 3880 StackID = TargetStackID::ScalableVector; 3881 } 3882 break; 3883 case 24: 3884 if (AArch64::DDDRegClass.hasSubClassEq(RC)) { 3885 assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); 3886 Opc = AArch64::LD1Threev1d; 3887 Offset = false; 3888 } 3889 break; 3890 case 32: 3891 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) { 3892 assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); 3893 Opc = AArch64::LD1Fourv1d; 3894 Offset = false; 3895 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) { 3896 assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); 3897 Opc = AArch64::LD1Twov2d; 3898 Offset = false; 3899 } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) { 3900 assert(Subtarget.hasSVE() && "Unexpected register load without SVE"); 3901 Opc = AArch64::LDR_ZZXI; 3902 StackID = TargetStackID::ScalableVector; 3903 } 3904 break; 3905 case 48: 3906 if (AArch64::QQQRegClass.hasSubClassEq(RC)) { 3907 assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); 3908 Opc = AArch64::LD1Threev2d; 3909 Offset = false; 3910 } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) { 3911 assert(Subtarget.hasSVE() && "Unexpected register load without SVE"); 3912 Opc = AArch64::LDR_ZZZXI; 3913 StackID = TargetStackID::ScalableVector; 3914 } 3915 break; 3916 case 64: 3917 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) { 3918 assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); 3919 Opc = AArch64::LD1Fourv2d; 3920 Offset = false; 3921 } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) { 3922 assert(Subtarget.hasSVE() && "Unexpected register load without SVE"); 3923 Opc = AArch64::LDR_ZZZZXI; 3924 StackID = TargetStackID::ScalableVector; 3925 } 3926 break; 3927 } 3928 3929 assert(Opc && "Unknown register class"); 3930 MFI.setStackID(FI, StackID); 3931 3932 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc)) 3933 .addReg(DestReg, getDefRegState(true)) 3934 .addFrameIndex(FI); 3935 if (Offset) 3936 MI.addImm(0); 3937 MI.addMemOperand(MMO); 3938 } 3939 3940 bool llvm::isNZCVTouchedInInstructionRange(const MachineInstr &DefMI, 3941 const MachineInstr &UseMI, 3942 const TargetRegisterInfo *TRI) { 3943 return any_of(instructionsWithoutDebug(std::next(DefMI.getIterator()), 3944 UseMI.getIterator()), 3945 [TRI](const MachineInstr &I) { 3946 return I.modifiesRegister(AArch64::NZCV, TRI) || 3947 I.readsRegister(AArch64::NZCV, TRI); 3948 }); 3949 } 3950 3951 void AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets( 3952 const StackOffset &Offset, int64_t &ByteSized, int64_t &VGSized) { 3953 // The smallest scalable element supported by scaled SVE addressing 3954 // modes are predicates, which are 2 scalable bytes in size. So the scalable 3955 // byte offset must always be a multiple of 2. 3956 assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset"); 3957 3958 // VGSized offsets are divided by '2', because the VG register is the 3959 // the number of 64bit granules as opposed to 128bit vector chunks, 3960 // which is how the 'n' in e.g. MVT::nxv1i8 is modelled. 3961 // So, for a stack offset of 16 MVT::nxv1i8's, the size is n x 16 bytes. 3962 // VG = n * 2 and the dwarf offset must be VG * 8 bytes. 3963 ByteSized = Offset.getFixed(); 3964 VGSized = Offset.getScalable() / 2; 3965 } 3966 3967 /// Returns the offset in parts to which this frame offset can be 3968 /// decomposed for the purpose of describing a frame offset. 3969 /// For non-scalable offsets this is simply its byte size. 3970 void AArch64InstrInfo::decomposeStackOffsetForFrameOffsets( 3971 const StackOffset &Offset, int64_t &NumBytes, int64_t &NumPredicateVectors, 3972 int64_t &NumDataVectors) { 3973 // The smallest scalable element supported by scaled SVE addressing 3974 // modes are predicates, which are 2 scalable bytes in size. So the scalable 3975 // byte offset must always be a multiple of 2. 3976 assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset"); 3977 3978 NumBytes = Offset.getFixed(); 3979 NumDataVectors = 0; 3980 NumPredicateVectors = Offset.getScalable() / 2; 3981 // This method is used to get the offsets to adjust the frame offset. 3982 // If the function requires ADDPL to be used and needs more than two ADDPL 3983 // instructions, part of the offset is folded into NumDataVectors so that it 3984 // uses ADDVL for part of it, reducing the number of ADDPL instructions. 3985 if (NumPredicateVectors % 8 == 0 || NumPredicateVectors < -64 || 3986 NumPredicateVectors > 62) { 3987 NumDataVectors = NumPredicateVectors / 8; 3988 NumPredicateVectors -= NumDataVectors * 8; 3989 } 3990 } 3991 3992 // Helper function to emit a frame offset adjustment from a given 3993 // pointer (SrcReg), stored into DestReg. This function is explicit 3994 // in that it requires the opcode. 3995 static void emitFrameOffsetAdj(MachineBasicBlock &MBB, 3996 MachineBasicBlock::iterator MBBI, 3997 const DebugLoc &DL, unsigned DestReg, 3998 unsigned SrcReg, int64_t Offset, unsigned Opc, 3999 const TargetInstrInfo *TII, 4000 MachineInstr::MIFlag Flag, bool NeedsWinCFI, 4001 bool *HasWinCFI) { 4002 int Sign = 1; 4003 unsigned MaxEncoding, ShiftSize; 4004 switch (Opc) { 4005 case AArch64::ADDXri: 4006 case AArch64::ADDSXri: 4007 case AArch64::SUBXri: 4008 case AArch64::SUBSXri: 4009 MaxEncoding = 0xfff; 4010 ShiftSize = 12; 4011 break; 4012 case AArch64::ADDVL_XXI: 4013 case AArch64::ADDPL_XXI: 4014 MaxEncoding = 31; 4015 ShiftSize = 0; 4016 if (Offset < 0) { 4017 MaxEncoding = 32; 4018 Sign = -1; 4019 Offset = -Offset; 4020 } 4021 break; 4022 default: 4023 llvm_unreachable("Unsupported opcode"); 4024 } 4025 4026 // FIXME: If the offset won't fit in 24-bits, compute the offset into a 4027 // scratch register. If DestReg is a virtual register, use it as the 4028 // scratch register; otherwise, create a new virtual register (to be 4029 // replaced by the scavenger at the end of PEI). That case can be optimized 4030 // slightly if DestReg is SP which is always 16-byte aligned, so the scratch 4031 // register can be loaded with offset%8 and the add/sub can use an extending 4032 // instruction with LSL#3. 4033 // Currently the function handles any offsets but generates a poor sequence 4034 // of code. 4035 // assert(Offset < (1 << 24) && "unimplemented reg plus immediate"); 4036 4037 const unsigned MaxEncodableValue = MaxEncoding << ShiftSize; 4038 Register TmpReg = DestReg; 4039 if (TmpReg == AArch64::XZR) 4040 TmpReg = MBB.getParent()->getRegInfo().createVirtualRegister( 4041 &AArch64::GPR64RegClass); 4042 do { 4043 uint64_t ThisVal = std::min<uint64_t>(Offset, MaxEncodableValue); 4044 unsigned LocalShiftSize = 0; 4045 if (ThisVal > MaxEncoding) { 4046 ThisVal = ThisVal >> ShiftSize; 4047 LocalShiftSize = ShiftSize; 4048 } 4049 assert((ThisVal >> ShiftSize) <= MaxEncoding && 4050 "Encoding cannot handle value that big"); 4051 4052 Offset -= ThisVal << LocalShiftSize; 4053 if (Offset == 0) 4054 TmpReg = DestReg; 4055 auto MBI = BuildMI(MBB, MBBI, DL, TII->get(Opc), TmpReg) 4056 .addReg(SrcReg) 4057 .addImm(Sign * (int)ThisVal); 4058 if (ShiftSize) 4059 MBI = MBI.addImm( 4060 AArch64_AM::getShifterImm(AArch64_AM::LSL, LocalShiftSize)); 4061 MBI = MBI.setMIFlag(Flag); 4062 4063 if (NeedsWinCFI) { 4064 assert(Sign == 1 && "SEH directives should always have a positive sign"); 4065 int Imm = (int)(ThisVal << LocalShiftSize); 4066 if ((DestReg == AArch64::FP && SrcReg == AArch64::SP) || 4067 (SrcReg == AArch64::FP && DestReg == AArch64::SP)) { 4068 if (HasWinCFI) 4069 *HasWinCFI = true; 4070 if (Imm == 0) 4071 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_SetFP)).setMIFlag(Flag); 4072 else 4073 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AddFP)) 4074 .addImm(Imm) 4075 .setMIFlag(Flag); 4076 assert(Offset == 0 && "Expected remaining offset to be zero to " 4077 "emit a single SEH directive"); 4078 } else if (DestReg == AArch64::SP) { 4079 if (HasWinCFI) 4080 *HasWinCFI = true; 4081 assert(SrcReg == AArch64::SP && "Unexpected SrcReg for SEH_StackAlloc"); 4082 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc)) 4083 .addImm(Imm) 4084 .setMIFlag(Flag); 4085 } 4086 if (HasWinCFI) 4087 *HasWinCFI = true; 4088 } 4089 4090 SrcReg = TmpReg; 4091 } while (Offset); 4092 } 4093 4094 void llvm::emitFrameOffset(MachineBasicBlock &MBB, 4095 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, 4096 unsigned DestReg, unsigned SrcReg, 4097 StackOffset Offset, const TargetInstrInfo *TII, 4098 MachineInstr::MIFlag Flag, bool SetNZCV, 4099 bool NeedsWinCFI, bool *HasWinCFI) { 4100 int64_t Bytes, NumPredicateVectors, NumDataVectors; 4101 AArch64InstrInfo::decomposeStackOffsetForFrameOffsets( 4102 Offset, Bytes, NumPredicateVectors, NumDataVectors); 4103 4104 // First emit non-scalable frame offsets, or a simple 'mov'. 4105 if (Bytes || (!Offset && SrcReg != DestReg)) { 4106 assert((DestReg != AArch64::SP || Bytes % 8 == 0) && 4107 "SP increment/decrement not 8-byte aligned"); 4108 unsigned Opc = SetNZCV ? AArch64::ADDSXri : AArch64::ADDXri; 4109 if (Bytes < 0) { 4110 Bytes = -Bytes; 4111 Opc = SetNZCV ? AArch64::SUBSXri : AArch64::SUBXri; 4112 } 4113 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, Bytes, Opc, TII, Flag, 4114 NeedsWinCFI, HasWinCFI); 4115 SrcReg = DestReg; 4116 } 4117 4118 assert(!(SetNZCV && (NumPredicateVectors || NumDataVectors)) && 4119 "SetNZCV not supported with SVE vectors"); 4120 assert(!(NeedsWinCFI && (NumPredicateVectors || NumDataVectors)) && 4121 "WinCFI not supported with SVE vectors"); 4122 4123 if (NumDataVectors) { 4124 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumDataVectors, 4125 AArch64::ADDVL_XXI, TII, Flag, NeedsWinCFI, nullptr); 4126 SrcReg = DestReg; 4127 } 4128 4129 if (NumPredicateVectors) { 4130 assert(DestReg != AArch64::SP && "Unaligned access to SP"); 4131 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumPredicateVectors, 4132 AArch64::ADDPL_XXI, TII, Flag, NeedsWinCFI, nullptr); 4133 } 4134 } 4135 4136 MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( 4137 MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops, 4138 MachineBasicBlock::iterator InsertPt, int FrameIndex, 4139 LiveIntervals *LIS, VirtRegMap *VRM) const { 4140 // This is a bit of a hack. Consider this instruction: 4141 // 4142 // %0 = COPY %sp; GPR64all:%0 4143 // 4144 // We explicitly chose GPR64all for the virtual register so such a copy might 4145 // be eliminated by RegisterCoalescer. However, that may not be possible, and 4146 // %0 may even spill. We can't spill %sp, and since it is in the GPR64all 4147 // register class, TargetInstrInfo::foldMemoryOperand() is going to try. 4148 // 4149 // To prevent that, we are going to constrain the %0 register class here. 4150 // 4151 // <rdar://problem/11522048> 4152 // 4153 if (MI.isFullCopy()) { 4154 Register DstReg = MI.getOperand(0).getReg(); 4155 Register SrcReg = MI.getOperand(1).getReg(); 4156 if (SrcReg == AArch64::SP && Register::isVirtualRegister(DstReg)) { 4157 MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass); 4158 return nullptr; 4159 } 4160 if (DstReg == AArch64::SP && Register::isVirtualRegister(SrcReg)) { 4161 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass); 4162 return nullptr; 4163 } 4164 } 4165 4166 // Handle the case where a copy is being spilled or filled but the source 4167 // and destination register class don't match. For example: 4168 // 4169 // %0 = COPY %xzr; GPR64common:%0 4170 // 4171 // In this case we can still safely fold away the COPY and generate the 4172 // following spill code: 4173 // 4174 // STRXui %xzr, %stack.0 4175 // 4176 // This also eliminates spilled cross register class COPYs (e.g. between x and 4177 // d regs) of the same size. For example: 4178 // 4179 // %0 = COPY %1; GPR64:%0, FPR64:%1 4180 // 4181 // will be filled as 4182 // 4183 // LDRDui %0, fi<#0> 4184 // 4185 // instead of 4186 // 4187 // LDRXui %Temp, fi<#0> 4188 // %0 = FMOV %Temp 4189 // 4190 if (MI.isCopy() && Ops.size() == 1 && 4191 // Make sure we're only folding the explicit COPY defs/uses. 4192 (Ops[0] == 0 || Ops[0] == 1)) { 4193 bool IsSpill = Ops[0] == 0; 4194 bool IsFill = !IsSpill; 4195 const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); 4196 const MachineRegisterInfo &MRI = MF.getRegInfo(); 4197 MachineBasicBlock &MBB = *MI.getParent(); 4198 const MachineOperand &DstMO = MI.getOperand(0); 4199 const MachineOperand &SrcMO = MI.getOperand(1); 4200 Register DstReg = DstMO.getReg(); 4201 Register SrcReg = SrcMO.getReg(); 4202 // This is slightly expensive to compute for physical regs since 4203 // getMinimalPhysRegClass is slow. 4204 auto getRegClass = [&](unsigned Reg) { 4205 return Register::isVirtualRegister(Reg) ? MRI.getRegClass(Reg) 4206 : TRI.getMinimalPhysRegClass(Reg); 4207 }; 4208 4209 if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) { 4210 assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) == 4211 TRI.getRegSizeInBits(*getRegClass(SrcReg)) && 4212 "Mismatched register size in non subreg COPY"); 4213 if (IsSpill) 4214 storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex, 4215 getRegClass(SrcReg), &TRI); 4216 else 4217 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, 4218 getRegClass(DstReg), &TRI); 4219 return &*--InsertPt; 4220 } 4221 4222 // Handle cases like spilling def of: 4223 // 4224 // %0:sub_32<def,read-undef> = COPY %wzr; GPR64common:%0 4225 // 4226 // where the physical register source can be widened and stored to the full 4227 // virtual reg destination stack slot, in this case producing: 4228 // 4229 // STRXui %xzr, %stack.0 4230 // 4231 if (IsSpill && DstMO.isUndef() && Register::isPhysicalRegister(SrcReg)) { 4232 assert(SrcMO.getSubReg() == 0 && 4233 "Unexpected subreg on physical register"); 4234 const TargetRegisterClass *SpillRC; 4235 unsigned SpillSubreg; 4236 switch (DstMO.getSubReg()) { 4237 default: 4238 SpillRC = nullptr; 4239 break; 4240 case AArch64::sub_32: 4241 case AArch64::ssub: 4242 if (AArch64::GPR32RegClass.contains(SrcReg)) { 4243 SpillRC = &AArch64::GPR64RegClass; 4244 SpillSubreg = AArch64::sub_32; 4245 } else if (AArch64::FPR32RegClass.contains(SrcReg)) { 4246 SpillRC = &AArch64::FPR64RegClass; 4247 SpillSubreg = AArch64::ssub; 4248 } else 4249 SpillRC = nullptr; 4250 break; 4251 case AArch64::dsub: 4252 if (AArch64::FPR64RegClass.contains(SrcReg)) { 4253 SpillRC = &AArch64::FPR128RegClass; 4254 SpillSubreg = AArch64::dsub; 4255 } else 4256 SpillRC = nullptr; 4257 break; 4258 } 4259 4260 if (SpillRC) 4261 if (unsigned WidenedSrcReg = 4262 TRI.getMatchingSuperReg(SrcReg, SpillSubreg, SpillRC)) { 4263 storeRegToStackSlot(MBB, InsertPt, WidenedSrcReg, SrcMO.isKill(), 4264 FrameIndex, SpillRC, &TRI); 4265 return &*--InsertPt; 4266 } 4267 } 4268 4269 // Handle cases like filling use of: 4270 // 4271 // %0:sub_32<def,read-undef> = COPY %1; GPR64:%0, GPR32:%1 4272 // 4273 // where we can load the full virtual reg source stack slot, into the subreg 4274 // destination, in this case producing: 4275 // 4276 // LDRWui %0:sub_32<def,read-undef>, %stack.0 4277 // 4278 if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) { 4279 const TargetRegisterClass *FillRC; 4280 switch (DstMO.getSubReg()) { 4281 default: 4282 FillRC = nullptr; 4283 break; 4284 case AArch64::sub_32: 4285 FillRC = &AArch64::GPR32RegClass; 4286 break; 4287 case AArch64::ssub: 4288 FillRC = &AArch64::FPR32RegClass; 4289 break; 4290 case AArch64::dsub: 4291 FillRC = &AArch64::FPR64RegClass; 4292 break; 4293 } 4294 4295 if (FillRC) { 4296 assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) == 4297 TRI.getRegSizeInBits(*FillRC) && 4298 "Mismatched regclass size on folded subreg COPY"); 4299 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI); 4300 MachineInstr &LoadMI = *--InsertPt; 4301 MachineOperand &LoadDst = LoadMI.getOperand(0); 4302 assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load"); 4303 LoadDst.setSubReg(DstMO.getSubReg()); 4304 LoadDst.setIsUndef(); 4305 return &LoadMI; 4306 } 4307 } 4308 } 4309 4310 // Cannot fold. 4311 return nullptr; 4312 } 4313 4314 int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, 4315 StackOffset &SOffset, 4316 bool *OutUseUnscaledOp, 4317 unsigned *OutUnscaledOp, 4318 int64_t *EmittableOffset) { 4319 // Set output values in case of early exit. 4320 if (EmittableOffset) 4321 *EmittableOffset = 0; 4322 if (OutUseUnscaledOp) 4323 *OutUseUnscaledOp = false; 4324 if (OutUnscaledOp) 4325 *OutUnscaledOp = 0; 4326 4327 // Exit early for structured vector spills/fills as they can't take an 4328 // immediate offset. 4329 switch (MI.getOpcode()) { 4330 default: 4331 break; 4332 case AArch64::LD1Twov2d: 4333 case AArch64::LD1Threev2d: 4334 case AArch64::LD1Fourv2d: 4335 case AArch64::LD1Twov1d: 4336 case AArch64::LD1Threev1d: 4337 case AArch64::LD1Fourv1d: 4338 case AArch64::ST1Twov2d: 4339 case AArch64::ST1Threev2d: 4340 case AArch64::ST1Fourv2d: 4341 case AArch64::ST1Twov1d: 4342 case AArch64::ST1Threev1d: 4343 case AArch64::ST1Fourv1d: 4344 case AArch64::ST1i8: 4345 case AArch64::ST1i16: 4346 case AArch64::ST1i32: 4347 case AArch64::ST1i64: 4348 case AArch64::IRG: 4349 case AArch64::IRGstack: 4350 case AArch64::STGloop: 4351 case AArch64::STZGloop: 4352 return AArch64FrameOffsetCannotUpdate; 4353 } 4354 4355 // Get the min/max offset and the scale. 4356 TypeSize ScaleValue(0U, false); 4357 unsigned Width; 4358 int64_t MinOff, MaxOff; 4359 if (!AArch64InstrInfo::getMemOpInfo(MI.getOpcode(), ScaleValue, Width, MinOff, 4360 MaxOff)) 4361 llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal"); 4362 4363 // Construct the complete offset. 4364 bool IsMulVL = ScaleValue.isScalable(); 4365 unsigned Scale = ScaleValue.getKnownMinSize(); 4366 int64_t Offset = IsMulVL ? SOffset.getScalable() : SOffset.getFixed(); 4367 4368 const MachineOperand &ImmOpnd = 4369 MI.getOperand(AArch64InstrInfo::getLoadStoreImmIdx(MI.getOpcode())); 4370 Offset += ImmOpnd.getImm() * Scale; 4371 4372 // If the offset doesn't match the scale, we rewrite the instruction to 4373 // use the unscaled instruction instead. Likewise, if we have a negative 4374 // offset and there is an unscaled op to use. 4375 Optional<unsigned> UnscaledOp = 4376 AArch64InstrInfo::getUnscaledLdSt(MI.getOpcode()); 4377 bool useUnscaledOp = UnscaledOp && (Offset % Scale || Offset < 0); 4378 if (useUnscaledOp && 4379 !AArch64InstrInfo::getMemOpInfo(*UnscaledOp, ScaleValue, Width, MinOff, 4380 MaxOff)) 4381 llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal"); 4382 4383 Scale = ScaleValue.getKnownMinSize(); 4384 assert(IsMulVL == ScaleValue.isScalable() && 4385 "Unscaled opcode has different value for scalable"); 4386 4387 int64_t Remainder = Offset % Scale; 4388 assert(!(Remainder && useUnscaledOp) && 4389 "Cannot have remainder when using unscaled op"); 4390 4391 assert(MinOff < MaxOff && "Unexpected Min/Max offsets"); 4392 int64_t NewOffset = Offset / Scale; 4393 if (MinOff <= NewOffset && NewOffset <= MaxOff) 4394 Offset = Remainder; 4395 else { 4396 NewOffset = NewOffset < 0 ? MinOff : MaxOff; 4397 Offset = Offset - NewOffset * Scale + Remainder; 4398 } 4399 4400 if (EmittableOffset) 4401 *EmittableOffset = NewOffset; 4402 if (OutUseUnscaledOp) 4403 *OutUseUnscaledOp = useUnscaledOp; 4404 if (OutUnscaledOp && UnscaledOp) 4405 *OutUnscaledOp = *UnscaledOp; 4406 4407 if (IsMulVL) 4408 SOffset = StackOffset::get(SOffset.getFixed(), Offset); 4409 else 4410 SOffset = StackOffset::get(Offset, SOffset.getScalable()); 4411 return AArch64FrameOffsetCanUpdate | 4412 (SOffset ? 0 : AArch64FrameOffsetIsLegal); 4413 } 4414 4415 bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, 4416 unsigned FrameReg, StackOffset &Offset, 4417 const AArch64InstrInfo *TII) { 4418 unsigned Opcode = MI.getOpcode(); 4419 unsigned ImmIdx = FrameRegIdx + 1; 4420 4421 if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) { 4422 Offset += StackOffset::getFixed(MI.getOperand(ImmIdx).getImm()); 4423 emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(), 4424 MI.getOperand(0).getReg(), FrameReg, Offset, TII, 4425 MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri)); 4426 MI.eraseFromParent(); 4427 Offset = StackOffset(); 4428 return true; 4429 } 4430 4431 int64_t NewOffset; 4432 unsigned UnscaledOp; 4433 bool UseUnscaledOp; 4434 int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp, 4435 &UnscaledOp, &NewOffset); 4436 if (Status & AArch64FrameOffsetCanUpdate) { 4437 if (Status & AArch64FrameOffsetIsLegal) 4438 // Replace the FrameIndex with FrameReg. 4439 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); 4440 if (UseUnscaledOp) 4441 MI.setDesc(TII->get(UnscaledOp)); 4442 4443 MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset); 4444 return !Offset; 4445 } 4446 4447 return false; 4448 } 4449 4450 MCInst AArch64InstrInfo::getNop() const { 4451 return MCInstBuilder(AArch64::HINT).addImm(0); 4452 } 4453 4454 // AArch64 supports MachineCombiner. 4455 bool AArch64InstrInfo::useMachineCombiner() const { return true; } 4456 4457 // True when Opc sets flag 4458 static bool isCombineInstrSettingFlag(unsigned Opc) { 4459 switch (Opc) { 4460 case AArch64::ADDSWrr: 4461 case AArch64::ADDSWri: 4462 case AArch64::ADDSXrr: 4463 case AArch64::ADDSXri: 4464 case AArch64::SUBSWrr: 4465 case AArch64::SUBSXrr: 4466 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi. 4467 case AArch64::SUBSWri: 4468 case AArch64::SUBSXri: 4469 return true; 4470 default: 4471 break; 4472 } 4473 return false; 4474 } 4475 4476 // 32b Opcodes that can be combined with a MUL 4477 static bool isCombineInstrCandidate32(unsigned Opc) { 4478 switch (Opc) { 4479 case AArch64::ADDWrr: 4480 case AArch64::ADDWri: 4481 case AArch64::SUBWrr: 4482 case AArch64::ADDSWrr: 4483 case AArch64::ADDSWri: 4484 case AArch64::SUBSWrr: 4485 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi. 4486 case AArch64::SUBWri: 4487 case AArch64::SUBSWri: 4488 return true; 4489 default: 4490 break; 4491 } 4492 return false; 4493 } 4494 4495 // 64b Opcodes that can be combined with a MUL 4496 static bool isCombineInstrCandidate64(unsigned Opc) { 4497 switch (Opc) { 4498 case AArch64::ADDXrr: 4499 case AArch64::ADDXri: 4500 case AArch64::SUBXrr: 4501 case AArch64::ADDSXrr: 4502 case AArch64::ADDSXri: 4503 case AArch64::SUBSXrr: 4504 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi. 4505 case AArch64::SUBXri: 4506 case AArch64::SUBSXri: 4507 case AArch64::ADDv8i8: 4508 case AArch64::ADDv16i8: 4509 case AArch64::ADDv4i16: 4510 case AArch64::ADDv8i16: 4511 case AArch64::ADDv2i32: 4512 case AArch64::ADDv4i32: 4513 case AArch64::SUBv8i8: 4514 case AArch64::SUBv16i8: 4515 case AArch64::SUBv4i16: 4516 case AArch64::SUBv8i16: 4517 case AArch64::SUBv2i32: 4518 case AArch64::SUBv4i32: 4519 return true; 4520 default: 4521 break; 4522 } 4523 return false; 4524 } 4525 4526 // FP Opcodes that can be combined with a FMUL. 4527 static bool isCombineInstrCandidateFP(const MachineInstr &Inst) { 4528 switch (Inst.getOpcode()) { 4529 default: 4530 break; 4531 case AArch64::FADDHrr: 4532 case AArch64::FADDSrr: 4533 case AArch64::FADDDrr: 4534 case AArch64::FADDv4f16: 4535 case AArch64::FADDv8f16: 4536 case AArch64::FADDv2f32: 4537 case AArch64::FADDv2f64: 4538 case AArch64::FADDv4f32: 4539 case AArch64::FSUBHrr: 4540 case AArch64::FSUBSrr: 4541 case AArch64::FSUBDrr: 4542 case AArch64::FSUBv4f16: 4543 case AArch64::FSUBv8f16: 4544 case AArch64::FSUBv2f32: 4545 case AArch64::FSUBv2f64: 4546 case AArch64::FSUBv4f32: 4547 TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options; 4548 // We can fuse FADD/FSUB with FMUL, if fusion is either allowed globally by 4549 // the target options or if FADD/FSUB has the contract fast-math flag. 4550 return Options.UnsafeFPMath || 4551 Options.AllowFPOpFusion == FPOpFusion::Fast || 4552 Inst.getFlag(MachineInstr::FmContract); 4553 return true; 4554 } 4555 return false; 4556 } 4557 4558 // Opcodes that can be combined with a MUL 4559 static bool isCombineInstrCandidate(unsigned Opc) { 4560 return (isCombineInstrCandidate32(Opc) || isCombineInstrCandidate64(Opc)); 4561 } 4562 4563 // 4564 // Utility routine that checks if \param MO is defined by an 4565 // \param CombineOpc instruction in the basic block \param MBB 4566 static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO, 4567 unsigned CombineOpc, unsigned ZeroReg = 0, 4568 bool CheckZeroReg = false) { 4569 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 4570 MachineInstr *MI = nullptr; 4571 4572 if (MO.isReg() && Register::isVirtualRegister(MO.getReg())) 4573 MI = MRI.getUniqueVRegDef(MO.getReg()); 4574 // And it needs to be in the trace (otherwise, it won't have a depth). 4575 if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != CombineOpc) 4576 return false; 4577 // Must only used by the user we combine with. 4578 if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg())) 4579 return false; 4580 4581 if (CheckZeroReg) { 4582 assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() && 4583 MI->getOperand(1).isReg() && MI->getOperand(2).isReg() && 4584 MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs"); 4585 // The third input reg must be zero. 4586 if (MI->getOperand(3).getReg() != ZeroReg) 4587 return false; 4588 } 4589 4590 return true; 4591 } 4592 4593 // 4594 // Is \param MO defined by an integer multiply and can be combined? 4595 static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO, 4596 unsigned MulOpc, unsigned ZeroReg) { 4597 return canCombine(MBB, MO, MulOpc, ZeroReg, true); 4598 } 4599 4600 // 4601 // Is \param MO defined by a floating-point multiply and can be combined? 4602 static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO, 4603 unsigned MulOpc) { 4604 return canCombine(MBB, MO, MulOpc); 4605 } 4606 4607 // TODO: There are many more machine instruction opcodes to match: 4608 // 1. Other data types (integer, vectors) 4609 // 2. Other math / logic operations (xor, or) 4610 // 3. Other forms of the same operation (intrinsics and other variants) 4611 bool AArch64InstrInfo::isAssociativeAndCommutative( 4612 const MachineInstr &Inst) const { 4613 switch (Inst.getOpcode()) { 4614 case AArch64::FADDDrr: 4615 case AArch64::FADDSrr: 4616 case AArch64::FADDv2f32: 4617 case AArch64::FADDv2f64: 4618 case AArch64::FADDv4f32: 4619 case AArch64::FMULDrr: 4620 case AArch64::FMULSrr: 4621 case AArch64::FMULX32: 4622 case AArch64::FMULX64: 4623 case AArch64::FMULXv2f32: 4624 case AArch64::FMULXv2f64: 4625 case AArch64::FMULXv4f32: 4626 case AArch64::FMULv2f32: 4627 case AArch64::FMULv2f64: 4628 case AArch64::FMULv4f32: 4629 return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath; 4630 default: 4631 return false; 4632 } 4633 } 4634 4635 /// Find instructions that can be turned into madd. 4636 static bool getMaddPatterns(MachineInstr &Root, 4637 SmallVectorImpl<MachineCombinerPattern> &Patterns) { 4638 unsigned Opc = Root.getOpcode(); 4639 MachineBasicBlock &MBB = *Root.getParent(); 4640 bool Found = false; 4641 4642 if (!isCombineInstrCandidate(Opc)) 4643 return false; 4644 if (isCombineInstrSettingFlag(Opc)) { 4645 int Cmp_NZCV = Root.findRegisterDefOperandIdx(AArch64::NZCV, true); 4646 // When NZCV is live bail out. 4647 if (Cmp_NZCV == -1) 4648 return false; 4649 unsigned NewOpc = convertToNonFlagSettingOpc(Root); 4650 // When opcode can't change bail out. 4651 // CHECKME: do we miss any cases for opcode conversion? 4652 if (NewOpc == Opc) 4653 return false; 4654 Opc = NewOpc; 4655 } 4656 4657 auto setFound = [&](int Opcode, int Operand, unsigned ZeroReg, 4658 MachineCombinerPattern Pattern) { 4659 if (canCombineWithMUL(MBB, Root.getOperand(Operand), Opcode, ZeroReg)) { 4660 Patterns.push_back(Pattern); 4661 Found = true; 4662 } 4663 }; 4664 4665 auto setVFound = [&](int Opcode, int Operand, MachineCombinerPattern Pattern) { 4666 if (canCombine(MBB, Root.getOperand(Operand), Opcode)) { 4667 Patterns.push_back(Pattern); 4668 Found = true; 4669 } 4670 }; 4671 4672 typedef MachineCombinerPattern MCP; 4673 4674 switch (Opc) { 4675 default: 4676 break; 4677 case AArch64::ADDWrr: 4678 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() && 4679 "ADDWrr does not have register operands"); 4680 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDW_OP1); 4681 setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULADDW_OP2); 4682 break; 4683 case AArch64::ADDXrr: 4684 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDX_OP1); 4685 setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULADDX_OP2); 4686 break; 4687 case AArch64::SUBWrr: 4688 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBW_OP1); 4689 setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULSUBW_OP2); 4690 break; 4691 case AArch64::SUBXrr: 4692 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBX_OP1); 4693 setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULSUBX_OP2); 4694 break; 4695 case AArch64::ADDWri: 4696 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDWI_OP1); 4697 break; 4698 case AArch64::ADDXri: 4699 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDXI_OP1); 4700 break; 4701 case AArch64::SUBWri: 4702 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBWI_OP1); 4703 break; 4704 case AArch64::SUBXri: 4705 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBXI_OP1); 4706 break; 4707 case AArch64::ADDv8i8: 4708 setVFound(AArch64::MULv8i8, 1, MCP::MULADDv8i8_OP1); 4709 setVFound(AArch64::MULv8i8, 2, MCP::MULADDv8i8_OP2); 4710 break; 4711 case AArch64::ADDv16i8: 4712 setVFound(AArch64::MULv16i8, 1, MCP::MULADDv16i8_OP1); 4713 setVFound(AArch64::MULv16i8, 2, MCP::MULADDv16i8_OP2); 4714 break; 4715 case AArch64::ADDv4i16: 4716 setVFound(AArch64::MULv4i16, 1, MCP::MULADDv4i16_OP1); 4717 setVFound(AArch64::MULv4i16, 2, MCP::MULADDv4i16_OP2); 4718 setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULADDv4i16_indexed_OP1); 4719 setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULADDv4i16_indexed_OP2); 4720 break; 4721 case AArch64::ADDv8i16: 4722 setVFound(AArch64::MULv8i16, 1, MCP::MULADDv8i16_OP1); 4723 setVFound(AArch64::MULv8i16, 2, MCP::MULADDv8i16_OP2); 4724 setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULADDv8i16_indexed_OP1); 4725 setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULADDv8i16_indexed_OP2); 4726 break; 4727 case AArch64::ADDv2i32: 4728 setVFound(AArch64::MULv2i32, 1, MCP::MULADDv2i32_OP1); 4729 setVFound(AArch64::MULv2i32, 2, MCP::MULADDv2i32_OP2); 4730 setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULADDv2i32_indexed_OP1); 4731 setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULADDv2i32_indexed_OP2); 4732 break; 4733 case AArch64::ADDv4i32: 4734 setVFound(AArch64::MULv4i32, 1, MCP::MULADDv4i32_OP1); 4735 setVFound(AArch64::MULv4i32, 2, MCP::MULADDv4i32_OP2); 4736 setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULADDv4i32_indexed_OP1); 4737 setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULADDv4i32_indexed_OP2); 4738 break; 4739 case AArch64::SUBv8i8: 4740 setVFound(AArch64::MULv8i8, 1, MCP::MULSUBv8i8_OP1); 4741 setVFound(AArch64::MULv8i8, 2, MCP::MULSUBv8i8_OP2); 4742 break; 4743 case AArch64::SUBv16i8: 4744 setVFound(AArch64::MULv16i8, 1, MCP::MULSUBv16i8_OP1); 4745 setVFound(AArch64::MULv16i8, 2, MCP::MULSUBv16i8_OP2); 4746 break; 4747 case AArch64::SUBv4i16: 4748 setVFound(AArch64::MULv4i16, 1, MCP::MULSUBv4i16_OP1); 4749 setVFound(AArch64::MULv4i16, 2, MCP::MULSUBv4i16_OP2); 4750 setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULSUBv4i16_indexed_OP1); 4751 setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULSUBv4i16_indexed_OP2); 4752 break; 4753 case AArch64::SUBv8i16: 4754 setVFound(AArch64::MULv8i16, 1, MCP::MULSUBv8i16_OP1); 4755 setVFound(AArch64::MULv8i16, 2, MCP::MULSUBv8i16_OP2); 4756 setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULSUBv8i16_indexed_OP1); 4757 setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULSUBv8i16_indexed_OP2); 4758 break; 4759 case AArch64::SUBv2i32: 4760 setVFound(AArch64::MULv2i32, 1, MCP::MULSUBv2i32_OP1); 4761 setVFound(AArch64::MULv2i32, 2, MCP::MULSUBv2i32_OP2); 4762 setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULSUBv2i32_indexed_OP1); 4763 setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULSUBv2i32_indexed_OP2); 4764 break; 4765 case AArch64::SUBv4i32: 4766 setVFound(AArch64::MULv4i32, 1, MCP::MULSUBv4i32_OP1); 4767 setVFound(AArch64::MULv4i32, 2, MCP::MULSUBv4i32_OP2); 4768 setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULSUBv4i32_indexed_OP1); 4769 setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULSUBv4i32_indexed_OP2); 4770 break; 4771 } 4772 return Found; 4773 } 4774 /// Floating-Point Support 4775 4776 /// Find instructions that can be turned into madd. 4777 static bool getFMAPatterns(MachineInstr &Root, 4778 SmallVectorImpl<MachineCombinerPattern> &Patterns) { 4779 4780 if (!isCombineInstrCandidateFP(Root)) 4781 return false; 4782 4783 MachineBasicBlock &MBB = *Root.getParent(); 4784 bool Found = false; 4785 4786 auto Match = [&](int Opcode, int Operand, 4787 MachineCombinerPattern Pattern) -> bool { 4788 if (canCombineWithFMUL(MBB, Root.getOperand(Operand), Opcode)) { 4789 Patterns.push_back(Pattern); 4790 return true; 4791 } 4792 return false; 4793 }; 4794 4795 typedef MachineCombinerPattern MCP; 4796 4797 switch (Root.getOpcode()) { 4798 default: 4799 assert(false && "Unsupported FP instruction in combiner\n"); 4800 break; 4801 case AArch64::FADDHrr: 4802 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() && 4803 "FADDHrr does not have register operands"); 4804 4805 Found = Match(AArch64::FMULHrr, 1, MCP::FMULADDH_OP1); 4806 Found |= Match(AArch64::FMULHrr, 2, MCP::FMULADDH_OP2); 4807 break; 4808 case AArch64::FADDSrr: 4809 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() && 4810 "FADDSrr does not have register operands"); 4811 4812 Found |= Match(AArch64::FMULSrr, 1, MCP::FMULADDS_OP1) || 4813 Match(AArch64::FMULv1i32_indexed, 1, MCP::FMLAv1i32_indexed_OP1); 4814 4815 Found |= Match(AArch64::FMULSrr, 2, MCP::FMULADDS_OP2) || 4816 Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLAv1i32_indexed_OP2); 4817 break; 4818 case AArch64::FADDDrr: 4819 Found |= Match(AArch64::FMULDrr, 1, MCP::FMULADDD_OP1) || 4820 Match(AArch64::FMULv1i64_indexed, 1, MCP::FMLAv1i64_indexed_OP1); 4821 4822 Found |= Match(AArch64::FMULDrr, 2, MCP::FMULADDD_OP2) || 4823 Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLAv1i64_indexed_OP2); 4824 break; 4825 case AArch64::FADDv4f16: 4826 Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLAv4i16_indexed_OP1) || 4827 Match(AArch64::FMULv4f16, 1, MCP::FMLAv4f16_OP1); 4828 4829 Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLAv4i16_indexed_OP2) || 4830 Match(AArch64::FMULv4f16, 2, MCP::FMLAv4f16_OP2); 4831 break; 4832 case AArch64::FADDv8f16: 4833 Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLAv8i16_indexed_OP1) || 4834 Match(AArch64::FMULv8f16, 1, MCP::FMLAv8f16_OP1); 4835 4836 Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLAv8i16_indexed_OP2) || 4837 Match(AArch64::FMULv8f16, 2, MCP::FMLAv8f16_OP2); 4838 break; 4839 case AArch64::FADDv2f32: 4840 Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLAv2i32_indexed_OP1) || 4841 Match(AArch64::FMULv2f32, 1, MCP::FMLAv2f32_OP1); 4842 4843 Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLAv2i32_indexed_OP2) || 4844 Match(AArch64::FMULv2f32, 2, MCP::FMLAv2f32_OP2); 4845 break; 4846 case AArch64::FADDv2f64: 4847 Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLAv2i64_indexed_OP1) || 4848 Match(AArch64::FMULv2f64, 1, MCP::FMLAv2f64_OP1); 4849 4850 Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLAv2i64_indexed_OP2) || 4851 Match(AArch64::FMULv2f64, 2, MCP::FMLAv2f64_OP2); 4852 break; 4853 case AArch64::FADDv4f32: 4854 Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLAv4i32_indexed_OP1) || 4855 Match(AArch64::FMULv4f32, 1, MCP::FMLAv4f32_OP1); 4856 4857 Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLAv4i32_indexed_OP2) || 4858 Match(AArch64::FMULv4f32, 2, MCP::FMLAv4f32_OP2); 4859 break; 4860 case AArch64::FSUBHrr: 4861 Found = Match(AArch64::FMULHrr, 1, MCP::FMULSUBH_OP1); 4862 Found |= Match(AArch64::FMULHrr, 2, MCP::FMULSUBH_OP2); 4863 Found |= Match(AArch64::FNMULHrr, 1, MCP::FNMULSUBH_OP1); 4864 break; 4865 case AArch64::FSUBSrr: 4866 Found = Match(AArch64::FMULSrr, 1, MCP::FMULSUBS_OP1); 4867 4868 Found |= Match(AArch64::FMULSrr, 2, MCP::FMULSUBS_OP2) || 4869 Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLSv1i32_indexed_OP2); 4870 4871 Found |= Match(AArch64::FNMULSrr, 1, MCP::FNMULSUBS_OP1); 4872 break; 4873 case AArch64::FSUBDrr: 4874 Found = Match(AArch64::FMULDrr, 1, MCP::FMULSUBD_OP1); 4875 4876 Found |= Match(AArch64::FMULDrr, 2, MCP::FMULSUBD_OP2) || 4877 Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLSv1i64_indexed_OP2); 4878 4879 Found |= Match(AArch64::FNMULDrr, 1, MCP::FNMULSUBD_OP1); 4880 break; 4881 case AArch64::FSUBv4f16: 4882 Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLSv4i16_indexed_OP2) || 4883 Match(AArch64::FMULv4f16, 2, MCP::FMLSv4f16_OP2); 4884 4885 Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLSv4i16_indexed_OP1) || 4886 Match(AArch64::FMULv4f16, 1, MCP::FMLSv4f16_OP1); 4887 break; 4888 case AArch64::FSUBv8f16: 4889 Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLSv8i16_indexed_OP2) || 4890 Match(AArch64::FMULv8f16, 2, MCP::FMLSv8f16_OP2); 4891 4892 Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLSv8i16_indexed_OP1) || 4893 Match(AArch64::FMULv8f16, 1, MCP::FMLSv8f16_OP1); 4894 break; 4895 case AArch64::FSUBv2f32: 4896 Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLSv2i32_indexed_OP2) || 4897 Match(AArch64::FMULv2f32, 2, MCP::FMLSv2f32_OP2); 4898 4899 Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLSv2i32_indexed_OP1) || 4900 Match(AArch64::FMULv2f32, 1, MCP::FMLSv2f32_OP1); 4901 break; 4902 case AArch64::FSUBv2f64: 4903 Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLSv2i64_indexed_OP2) || 4904 Match(AArch64::FMULv2f64, 2, MCP::FMLSv2f64_OP2); 4905 4906 Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLSv2i64_indexed_OP1) || 4907 Match(AArch64::FMULv2f64, 1, MCP::FMLSv2f64_OP1); 4908 break; 4909 case AArch64::FSUBv4f32: 4910 Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLSv4i32_indexed_OP2) || 4911 Match(AArch64::FMULv4f32, 2, MCP::FMLSv4f32_OP2); 4912 4913 Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLSv4i32_indexed_OP1) || 4914 Match(AArch64::FMULv4f32, 1, MCP::FMLSv4f32_OP1); 4915 break; 4916 } 4917 return Found; 4918 } 4919 4920 static bool getFMULPatterns(MachineInstr &Root, 4921 SmallVectorImpl<MachineCombinerPattern> &Patterns) { 4922 MachineBasicBlock &MBB = *Root.getParent(); 4923 bool Found = false; 4924 4925 auto Match = [&](unsigned Opcode, int Operand, 4926 MachineCombinerPattern Pattern) -> bool { 4927 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 4928 MachineOperand &MO = Root.getOperand(Operand); 4929 MachineInstr *MI = nullptr; 4930 if (MO.isReg() && Register::isVirtualRegister(MO.getReg())) 4931 MI = MRI.getUniqueVRegDef(MO.getReg()); 4932 if (MI && MI->getOpcode() == Opcode) { 4933 Patterns.push_back(Pattern); 4934 return true; 4935 } 4936 return false; 4937 }; 4938 4939 typedef MachineCombinerPattern MCP; 4940 4941 switch (Root.getOpcode()) { 4942 default: 4943 return false; 4944 case AArch64::FMULv2f32: 4945 Found = Match(AArch64::DUPv2i32lane, 1, MCP::FMULv2i32_indexed_OP1); 4946 Found |= Match(AArch64::DUPv2i32lane, 2, MCP::FMULv2i32_indexed_OP2); 4947 break; 4948 case AArch64::FMULv2f64: 4949 Found = Match(AArch64::DUPv2i64lane, 1, MCP::FMULv2i64_indexed_OP1); 4950 Found |= Match(AArch64::DUPv2i64lane, 2, MCP::FMULv2i64_indexed_OP2); 4951 break; 4952 case AArch64::FMULv4f16: 4953 Found = Match(AArch64::DUPv4i16lane, 1, MCP::FMULv4i16_indexed_OP1); 4954 Found |= Match(AArch64::DUPv4i16lane, 2, MCP::FMULv4i16_indexed_OP2); 4955 break; 4956 case AArch64::FMULv4f32: 4957 Found = Match(AArch64::DUPv4i32lane, 1, MCP::FMULv4i32_indexed_OP1); 4958 Found |= Match(AArch64::DUPv4i32lane, 2, MCP::FMULv4i32_indexed_OP2); 4959 break; 4960 case AArch64::FMULv8f16: 4961 Found = Match(AArch64::DUPv8i16lane, 1, MCP::FMULv8i16_indexed_OP1); 4962 Found |= Match(AArch64::DUPv8i16lane, 2, MCP::FMULv8i16_indexed_OP2); 4963 break; 4964 } 4965 4966 return Found; 4967 } 4968 4969 /// Return true when a code sequence can improve throughput. It 4970 /// should be called only for instructions in loops. 4971 /// \param Pattern - combiner pattern 4972 bool AArch64InstrInfo::isThroughputPattern( 4973 MachineCombinerPattern Pattern) const { 4974 switch (Pattern) { 4975 default: 4976 break; 4977 case MachineCombinerPattern::FMULADDH_OP1: 4978 case MachineCombinerPattern::FMULADDH_OP2: 4979 case MachineCombinerPattern::FMULSUBH_OP1: 4980 case MachineCombinerPattern::FMULSUBH_OP2: 4981 case MachineCombinerPattern::FMULADDS_OP1: 4982 case MachineCombinerPattern::FMULADDS_OP2: 4983 case MachineCombinerPattern::FMULSUBS_OP1: 4984 case MachineCombinerPattern::FMULSUBS_OP2: 4985 case MachineCombinerPattern::FMULADDD_OP1: 4986 case MachineCombinerPattern::FMULADDD_OP2: 4987 case MachineCombinerPattern::FMULSUBD_OP1: 4988 case MachineCombinerPattern::FMULSUBD_OP2: 4989 case MachineCombinerPattern::FNMULSUBH_OP1: 4990 case MachineCombinerPattern::FNMULSUBS_OP1: 4991 case MachineCombinerPattern::FNMULSUBD_OP1: 4992 case MachineCombinerPattern::FMLAv4i16_indexed_OP1: 4993 case MachineCombinerPattern::FMLAv4i16_indexed_OP2: 4994 case MachineCombinerPattern::FMLAv8i16_indexed_OP1: 4995 case MachineCombinerPattern::FMLAv8i16_indexed_OP2: 4996 case MachineCombinerPattern::FMLAv1i32_indexed_OP1: 4997 case MachineCombinerPattern::FMLAv1i32_indexed_OP2: 4998 case MachineCombinerPattern::FMLAv1i64_indexed_OP1: 4999 case MachineCombinerPattern::FMLAv1i64_indexed_OP2: 5000 case MachineCombinerPattern::FMLAv4f16_OP2: 5001 case MachineCombinerPattern::FMLAv4f16_OP1: 5002 case MachineCombinerPattern::FMLAv8f16_OP1: 5003 case MachineCombinerPattern::FMLAv8f16_OP2: 5004 case MachineCombinerPattern::FMLAv2f32_OP2: 5005 case MachineCombinerPattern::FMLAv2f32_OP1: 5006 case MachineCombinerPattern::FMLAv2f64_OP1: 5007 case MachineCombinerPattern::FMLAv2f64_OP2: 5008 case MachineCombinerPattern::FMLAv2i32_indexed_OP1: 5009 case MachineCombinerPattern::FMLAv2i32_indexed_OP2: 5010 case MachineCombinerPattern::FMLAv2i64_indexed_OP1: 5011 case MachineCombinerPattern::FMLAv2i64_indexed_OP2: 5012 case MachineCombinerPattern::FMLAv4f32_OP1: 5013 case MachineCombinerPattern::FMLAv4f32_OP2: 5014 case MachineCombinerPattern::FMLAv4i32_indexed_OP1: 5015 case MachineCombinerPattern::FMLAv4i32_indexed_OP2: 5016 case MachineCombinerPattern::FMLSv4i16_indexed_OP1: 5017 case MachineCombinerPattern::FMLSv4i16_indexed_OP2: 5018 case MachineCombinerPattern::FMLSv8i16_indexed_OP1: 5019 case MachineCombinerPattern::FMLSv8i16_indexed_OP2: 5020 case MachineCombinerPattern::FMLSv1i32_indexed_OP2: 5021 case MachineCombinerPattern::FMLSv1i64_indexed_OP2: 5022 case MachineCombinerPattern::FMLSv2i32_indexed_OP2: 5023 case MachineCombinerPattern::FMLSv2i64_indexed_OP2: 5024 case MachineCombinerPattern::FMLSv4f16_OP1: 5025 case MachineCombinerPattern::FMLSv4f16_OP2: 5026 case MachineCombinerPattern::FMLSv8f16_OP1: 5027 case MachineCombinerPattern::FMLSv8f16_OP2: 5028 case MachineCombinerPattern::FMLSv2f32_OP2: 5029 case MachineCombinerPattern::FMLSv2f64_OP2: 5030 case MachineCombinerPattern::FMLSv4i32_indexed_OP2: 5031 case MachineCombinerPattern::FMLSv4f32_OP2: 5032 case MachineCombinerPattern::FMULv2i32_indexed_OP1: 5033 case MachineCombinerPattern::FMULv2i32_indexed_OP2: 5034 case MachineCombinerPattern::FMULv2i64_indexed_OP1: 5035 case MachineCombinerPattern::FMULv2i64_indexed_OP2: 5036 case MachineCombinerPattern::FMULv4i16_indexed_OP1: 5037 case MachineCombinerPattern::FMULv4i16_indexed_OP2: 5038 case MachineCombinerPattern::FMULv4i32_indexed_OP1: 5039 case MachineCombinerPattern::FMULv4i32_indexed_OP2: 5040 case MachineCombinerPattern::FMULv8i16_indexed_OP1: 5041 case MachineCombinerPattern::FMULv8i16_indexed_OP2: 5042 case MachineCombinerPattern::MULADDv8i8_OP1: 5043 case MachineCombinerPattern::MULADDv8i8_OP2: 5044 case MachineCombinerPattern::MULADDv16i8_OP1: 5045 case MachineCombinerPattern::MULADDv16i8_OP2: 5046 case MachineCombinerPattern::MULADDv4i16_OP1: 5047 case MachineCombinerPattern::MULADDv4i16_OP2: 5048 case MachineCombinerPattern::MULADDv8i16_OP1: 5049 case MachineCombinerPattern::MULADDv8i16_OP2: 5050 case MachineCombinerPattern::MULADDv2i32_OP1: 5051 case MachineCombinerPattern::MULADDv2i32_OP2: 5052 case MachineCombinerPattern::MULADDv4i32_OP1: 5053 case MachineCombinerPattern::MULADDv4i32_OP2: 5054 case MachineCombinerPattern::MULSUBv8i8_OP1: 5055 case MachineCombinerPattern::MULSUBv8i8_OP2: 5056 case MachineCombinerPattern::MULSUBv16i8_OP1: 5057 case MachineCombinerPattern::MULSUBv16i8_OP2: 5058 case MachineCombinerPattern::MULSUBv4i16_OP1: 5059 case MachineCombinerPattern::MULSUBv4i16_OP2: 5060 case MachineCombinerPattern::MULSUBv8i16_OP1: 5061 case MachineCombinerPattern::MULSUBv8i16_OP2: 5062 case MachineCombinerPattern::MULSUBv2i32_OP1: 5063 case MachineCombinerPattern::MULSUBv2i32_OP2: 5064 case MachineCombinerPattern::MULSUBv4i32_OP1: 5065 case MachineCombinerPattern::MULSUBv4i32_OP2: 5066 case MachineCombinerPattern::MULADDv4i16_indexed_OP1: 5067 case MachineCombinerPattern::MULADDv4i16_indexed_OP2: 5068 case MachineCombinerPattern::MULADDv8i16_indexed_OP1: 5069 case MachineCombinerPattern::MULADDv8i16_indexed_OP2: 5070 case MachineCombinerPattern::MULADDv2i32_indexed_OP1: 5071 case MachineCombinerPattern::MULADDv2i32_indexed_OP2: 5072 case MachineCombinerPattern::MULADDv4i32_indexed_OP1: 5073 case MachineCombinerPattern::MULADDv4i32_indexed_OP2: 5074 case MachineCombinerPattern::MULSUBv4i16_indexed_OP1: 5075 case MachineCombinerPattern::MULSUBv4i16_indexed_OP2: 5076 case MachineCombinerPattern::MULSUBv8i16_indexed_OP1: 5077 case MachineCombinerPattern::MULSUBv8i16_indexed_OP2: 5078 case MachineCombinerPattern::MULSUBv2i32_indexed_OP1: 5079 case MachineCombinerPattern::MULSUBv2i32_indexed_OP2: 5080 case MachineCombinerPattern::MULSUBv4i32_indexed_OP1: 5081 case MachineCombinerPattern::MULSUBv4i32_indexed_OP2: 5082 return true; 5083 } // end switch (Pattern) 5084 return false; 5085 } 5086 /// Return true when there is potentially a faster code sequence for an 5087 /// instruction chain ending in \p Root. All potential patterns are listed in 5088 /// the \p Pattern vector. Pattern should be sorted in priority order since the 5089 /// pattern evaluator stops checking as soon as it finds a faster sequence. 5090 5091 bool AArch64InstrInfo::getMachineCombinerPatterns( 5092 MachineInstr &Root, SmallVectorImpl<MachineCombinerPattern> &Patterns, 5093 bool DoRegPressureReduce) const { 5094 // Integer patterns 5095 if (getMaddPatterns(Root, Patterns)) 5096 return true; 5097 // Floating point patterns 5098 if (getFMULPatterns(Root, Patterns)) 5099 return true; 5100 if (getFMAPatterns(Root, Patterns)) 5101 return true; 5102 5103 return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns, 5104 DoRegPressureReduce); 5105 } 5106 5107 enum class FMAInstKind { Default, Indexed, Accumulator }; 5108 /// genFusedMultiply - Generate fused multiply instructions. 5109 /// This function supports both integer and floating point instructions. 5110 /// A typical example: 5111 /// F|MUL I=A,B,0 5112 /// F|ADD R,I,C 5113 /// ==> F|MADD R,A,B,C 5114 /// \param MF Containing MachineFunction 5115 /// \param MRI Register information 5116 /// \param TII Target information 5117 /// \param Root is the F|ADD instruction 5118 /// \param [out] InsInstrs is a vector of machine instructions and will 5119 /// contain the generated madd instruction 5120 /// \param IdxMulOpd is index of operand in Root that is the result of 5121 /// the F|MUL. In the example above IdxMulOpd is 1. 5122 /// \param MaddOpc the opcode fo the f|madd instruction 5123 /// \param RC Register class of operands 5124 /// \param kind of fma instruction (addressing mode) to be generated 5125 /// \param ReplacedAddend is the result register from the instruction 5126 /// replacing the non-combined operand, if any. 5127 static MachineInstr * 5128 genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI, 5129 const TargetInstrInfo *TII, MachineInstr &Root, 5130 SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd, 5131 unsigned MaddOpc, const TargetRegisterClass *RC, 5132 FMAInstKind kind = FMAInstKind::Default, 5133 const Register *ReplacedAddend = nullptr) { 5134 assert(IdxMulOpd == 1 || IdxMulOpd == 2); 5135 5136 unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1; 5137 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg()); 5138 Register ResultReg = Root.getOperand(0).getReg(); 5139 Register SrcReg0 = MUL->getOperand(1).getReg(); 5140 bool Src0IsKill = MUL->getOperand(1).isKill(); 5141 Register SrcReg1 = MUL->getOperand(2).getReg(); 5142 bool Src1IsKill = MUL->getOperand(2).isKill(); 5143 5144 unsigned SrcReg2; 5145 bool Src2IsKill; 5146 if (ReplacedAddend) { 5147 // If we just generated a new addend, we must be it's only use. 5148 SrcReg2 = *ReplacedAddend; 5149 Src2IsKill = true; 5150 } else { 5151 SrcReg2 = Root.getOperand(IdxOtherOpd).getReg(); 5152 Src2IsKill = Root.getOperand(IdxOtherOpd).isKill(); 5153 } 5154 5155 if (Register::isVirtualRegister(ResultReg)) 5156 MRI.constrainRegClass(ResultReg, RC); 5157 if (Register::isVirtualRegister(SrcReg0)) 5158 MRI.constrainRegClass(SrcReg0, RC); 5159 if (Register::isVirtualRegister(SrcReg1)) 5160 MRI.constrainRegClass(SrcReg1, RC); 5161 if (Register::isVirtualRegister(SrcReg2)) 5162 MRI.constrainRegClass(SrcReg2, RC); 5163 5164 MachineInstrBuilder MIB; 5165 if (kind == FMAInstKind::Default) 5166 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg) 5167 .addReg(SrcReg0, getKillRegState(Src0IsKill)) 5168 .addReg(SrcReg1, getKillRegState(Src1IsKill)) 5169 .addReg(SrcReg2, getKillRegState(Src2IsKill)); 5170 else if (kind == FMAInstKind::Indexed) 5171 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg) 5172 .addReg(SrcReg2, getKillRegState(Src2IsKill)) 5173 .addReg(SrcReg0, getKillRegState(Src0IsKill)) 5174 .addReg(SrcReg1, getKillRegState(Src1IsKill)) 5175 .addImm(MUL->getOperand(3).getImm()); 5176 else if (kind == FMAInstKind::Accumulator) 5177 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg) 5178 .addReg(SrcReg2, getKillRegState(Src2IsKill)) 5179 .addReg(SrcReg0, getKillRegState(Src0IsKill)) 5180 .addReg(SrcReg1, getKillRegState(Src1IsKill)); 5181 else 5182 assert(false && "Invalid FMA instruction kind \n"); 5183 // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL) 5184 InsInstrs.push_back(MIB); 5185 return MUL; 5186 } 5187 5188 /// Fold (FMUL x (DUP y lane)) into (FMUL_indexed x y lane) 5189 static MachineInstr * 5190 genIndexedMultiply(MachineInstr &Root, 5191 SmallVectorImpl<MachineInstr *> &InsInstrs, 5192 unsigned IdxDupOp, unsigned MulOpc, 5193 const TargetRegisterClass *RC, MachineRegisterInfo &MRI) { 5194 assert(((IdxDupOp == 1) || (IdxDupOp == 2)) && 5195 "Invalid index of FMUL operand"); 5196 5197 MachineFunction &MF = *Root.getMF(); 5198 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); 5199 5200 MachineInstr *Dup = 5201 MF.getRegInfo().getUniqueVRegDef(Root.getOperand(IdxDupOp).getReg()); 5202 5203 Register DupSrcReg = Dup->getOperand(1).getReg(); 5204 MRI.clearKillFlags(DupSrcReg); 5205 MRI.constrainRegClass(DupSrcReg, RC); 5206 5207 unsigned DupSrcLane = Dup->getOperand(2).getImm(); 5208 5209 unsigned IdxMulOp = IdxDupOp == 1 ? 2 : 1; 5210 MachineOperand &MulOp = Root.getOperand(IdxMulOp); 5211 5212 Register ResultReg = Root.getOperand(0).getReg(); 5213 5214 MachineInstrBuilder MIB; 5215 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MulOpc), ResultReg) 5216 .add(MulOp) 5217 .addReg(DupSrcReg) 5218 .addImm(DupSrcLane); 5219 5220 InsInstrs.push_back(MIB); 5221 return &Root; 5222 } 5223 5224 /// genFusedMultiplyAcc - Helper to generate fused multiply accumulate 5225 /// instructions. 5226 /// 5227 /// \see genFusedMultiply 5228 static MachineInstr *genFusedMultiplyAcc( 5229 MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, 5230 MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs, 5231 unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) { 5232 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC, 5233 FMAInstKind::Accumulator); 5234 } 5235 5236 /// genNeg - Helper to generate an intermediate negation of the second operand 5237 /// of Root 5238 static Register genNeg(MachineFunction &MF, MachineRegisterInfo &MRI, 5239 const TargetInstrInfo *TII, MachineInstr &Root, 5240 SmallVectorImpl<MachineInstr *> &InsInstrs, 5241 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, 5242 unsigned MnegOpc, const TargetRegisterClass *RC) { 5243 Register NewVR = MRI.createVirtualRegister(RC); 5244 MachineInstrBuilder MIB = 5245 BuildMI(MF, Root.getDebugLoc(), TII->get(MnegOpc), NewVR) 5246 .add(Root.getOperand(2)); 5247 InsInstrs.push_back(MIB); 5248 5249 assert(InstrIdxForVirtReg.empty()); 5250 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 5251 5252 return NewVR; 5253 } 5254 5255 /// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate 5256 /// instructions with an additional negation of the accumulator 5257 static MachineInstr *genFusedMultiplyAccNeg( 5258 MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, 5259 MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs, 5260 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd, 5261 unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) { 5262 assert(IdxMulOpd == 1); 5263 5264 Register NewVR = 5265 genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC); 5266 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC, 5267 FMAInstKind::Accumulator, &NewVR); 5268 } 5269 5270 /// genFusedMultiplyIdx - Helper to generate fused multiply accumulate 5271 /// instructions. 5272 /// 5273 /// \see genFusedMultiply 5274 static MachineInstr *genFusedMultiplyIdx( 5275 MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, 5276 MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs, 5277 unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) { 5278 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC, 5279 FMAInstKind::Indexed); 5280 } 5281 5282 /// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate 5283 /// instructions with an additional negation of the accumulator 5284 static MachineInstr *genFusedMultiplyIdxNeg( 5285 MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, 5286 MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs, 5287 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd, 5288 unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) { 5289 assert(IdxMulOpd == 1); 5290 5291 Register NewVR = 5292 genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC); 5293 5294 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC, 5295 FMAInstKind::Indexed, &NewVR); 5296 } 5297 5298 /// genMaddR - Generate madd instruction and combine mul and add using 5299 /// an extra virtual register 5300 /// Example - an ADD intermediate needs to be stored in a register: 5301 /// MUL I=A,B,0 5302 /// ADD R,I,Imm 5303 /// ==> ORR V, ZR, Imm 5304 /// ==> MADD R,A,B,V 5305 /// \param MF Containing MachineFunction 5306 /// \param MRI Register information 5307 /// \param TII Target information 5308 /// \param Root is the ADD instruction 5309 /// \param [out] InsInstrs is a vector of machine instructions and will 5310 /// contain the generated madd instruction 5311 /// \param IdxMulOpd is index of operand in Root that is the result of 5312 /// the MUL. In the example above IdxMulOpd is 1. 5313 /// \param MaddOpc the opcode fo the madd instruction 5314 /// \param VR is a virtual register that holds the value of an ADD operand 5315 /// (V in the example above). 5316 /// \param RC Register class of operands 5317 static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI, 5318 const TargetInstrInfo *TII, MachineInstr &Root, 5319 SmallVectorImpl<MachineInstr *> &InsInstrs, 5320 unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR, 5321 const TargetRegisterClass *RC) { 5322 assert(IdxMulOpd == 1 || IdxMulOpd == 2); 5323 5324 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg()); 5325 Register ResultReg = Root.getOperand(0).getReg(); 5326 Register SrcReg0 = MUL->getOperand(1).getReg(); 5327 bool Src0IsKill = MUL->getOperand(1).isKill(); 5328 Register SrcReg1 = MUL->getOperand(2).getReg(); 5329 bool Src1IsKill = MUL->getOperand(2).isKill(); 5330 5331 if (Register::isVirtualRegister(ResultReg)) 5332 MRI.constrainRegClass(ResultReg, RC); 5333 if (Register::isVirtualRegister(SrcReg0)) 5334 MRI.constrainRegClass(SrcReg0, RC); 5335 if (Register::isVirtualRegister(SrcReg1)) 5336 MRI.constrainRegClass(SrcReg1, RC); 5337 if (Register::isVirtualRegister(VR)) 5338 MRI.constrainRegClass(VR, RC); 5339 5340 MachineInstrBuilder MIB = 5341 BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg) 5342 .addReg(SrcReg0, getKillRegState(Src0IsKill)) 5343 .addReg(SrcReg1, getKillRegState(Src1IsKill)) 5344 .addReg(VR); 5345 // Insert the MADD 5346 InsInstrs.push_back(MIB); 5347 return MUL; 5348 } 5349 5350 /// When getMachineCombinerPatterns() finds potential patterns, 5351 /// this function generates the instructions that could replace the 5352 /// original code sequence 5353 void AArch64InstrInfo::genAlternativeCodeSequence( 5354 MachineInstr &Root, MachineCombinerPattern Pattern, 5355 SmallVectorImpl<MachineInstr *> &InsInstrs, 5356 SmallVectorImpl<MachineInstr *> &DelInstrs, 5357 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const { 5358 MachineBasicBlock &MBB = *Root.getParent(); 5359 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 5360 MachineFunction &MF = *MBB.getParent(); 5361 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); 5362 5363 MachineInstr *MUL = nullptr; 5364 const TargetRegisterClass *RC; 5365 unsigned Opc; 5366 switch (Pattern) { 5367 default: 5368 // Reassociate instructions. 5369 TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs, 5370 DelInstrs, InstrIdxForVirtReg); 5371 return; 5372 case MachineCombinerPattern::MULADDW_OP1: 5373 case MachineCombinerPattern::MULADDX_OP1: 5374 // MUL I=A,B,0 5375 // ADD R,I,C 5376 // ==> MADD R,A,B,C 5377 // --- Create(MADD); 5378 if (Pattern == MachineCombinerPattern::MULADDW_OP1) { 5379 Opc = AArch64::MADDWrrr; 5380 RC = &AArch64::GPR32RegClass; 5381 } else { 5382 Opc = AArch64::MADDXrrr; 5383 RC = &AArch64::GPR64RegClass; 5384 } 5385 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 5386 break; 5387 case MachineCombinerPattern::MULADDW_OP2: 5388 case MachineCombinerPattern::MULADDX_OP2: 5389 // MUL I=A,B,0 5390 // ADD R,C,I 5391 // ==> MADD R,A,B,C 5392 // --- Create(MADD); 5393 if (Pattern == MachineCombinerPattern::MULADDW_OP2) { 5394 Opc = AArch64::MADDWrrr; 5395 RC = &AArch64::GPR32RegClass; 5396 } else { 5397 Opc = AArch64::MADDXrrr; 5398 RC = &AArch64::GPR64RegClass; 5399 } 5400 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 5401 break; 5402 case MachineCombinerPattern::MULADDWI_OP1: 5403 case MachineCombinerPattern::MULADDXI_OP1: { 5404 // MUL I=A,B,0 5405 // ADD R,I,Imm 5406 // ==> ORR V, ZR, Imm 5407 // ==> MADD R,A,B,V 5408 // --- Create(MADD); 5409 const TargetRegisterClass *OrrRC; 5410 unsigned BitSize, OrrOpc, ZeroReg; 5411 if (Pattern == MachineCombinerPattern::MULADDWI_OP1) { 5412 OrrOpc = AArch64::ORRWri; 5413 OrrRC = &AArch64::GPR32spRegClass; 5414 BitSize = 32; 5415 ZeroReg = AArch64::WZR; 5416 Opc = AArch64::MADDWrrr; 5417 RC = &AArch64::GPR32RegClass; 5418 } else { 5419 OrrOpc = AArch64::ORRXri; 5420 OrrRC = &AArch64::GPR64spRegClass; 5421 BitSize = 64; 5422 ZeroReg = AArch64::XZR; 5423 Opc = AArch64::MADDXrrr; 5424 RC = &AArch64::GPR64RegClass; 5425 } 5426 Register NewVR = MRI.createVirtualRegister(OrrRC); 5427 uint64_t Imm = Root.getOperand(2).getImm(); 5428 5429 if (Root.getOperand(3).isImm()) { 5430 unsigned Val = Root.getOperand(3).getImm(); 5431 Imm = Imm << Val; 5432 } 5433 uint64_t UImm = SignExtend64(Imm, BitSize); 5434 uint64_t Encoding; 5435 if (!AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) 5436 return; 5437 MachineInstrBuilder MIB1 = 5438 BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR) 5439 .addReg(ZeroReg) 5440 .addImm(Encoding); 5441 InsInstrs.push_back(MIB1); 5442 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 5443 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC); 5444 break; 5445 } 5446 case MachineCombinerPattern::MULSUBW_OP1: 5447 case MachineCombinerPattern::MULSUBX_OP1: { 5448 // MUL I=A,B,0 5449 // SUB R,I, C 5450 // ==> SUB V, 0, C 5451 // ==> MADD R,A,B,V // = -C + A*B 5452 // --- Create(MADD); 5453 const TargetRegisterClass *SubRC; 5454 unsigned SubOpc, ZeroReg; 5455 if (Pattern == MachineCombinerPattern::MULSUBW_OP1) { 5456 SubOpc = AArch64::SUBWrr; 5457 SubRC = &AArch64::GPR32spRegClass; 5458 ZeroReg = AArch64::WZR; 5459 Opc = AArch64::MADDWrrr; 5460 RC = &AArch64::GPR32RegClass; 5461 } else { 5462 SubOpc = AArch64::SUBXrr; 5463 SubRC = &AArch64::GPR64spRegClass; 5464 ZeroReg = AArch64::XZR; 5465 Opc = AArch64::MADDXrrr; 5466 RC = &AArch64::GPR64RegClass; 5467 } 5468 Register NewVR = MRI.createVirtualRegister(SubRC); 5469 // SUB NewVR, 0, C 5470 MachineInstrBuilder MIB1 = 5471 BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc), NewVR) 5472 .addReg(ZeroReg) 5473 .add(Root.getOperand(2)); 5474 InsInstrs.push_back(MIB1); 5475 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 5476 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC); 5477 break; 5478 } 5479 case MachineCombinerPattern::MULSUBW_OP2: 5480 case MachineCombinerPattern::MULSUBX_OP2: 5481 // MUL I=A,B,0 5482 // SUB R,C,I 5483 // ==> MSUB R,A,B,C (computes C - A*B) 5484 // --- Create(MSUB); 5485 if (Pattern == MachineCombinerPattern::MULSUBW_OP2) { 5486 Opc = AArch64::MSUBWrrr; 5487 RC = &AArch64::GPR32RegClass; 5488 } else { 5489 Opc = AArch64::MSUBXrrr; 5490 RC = &AArch64::GPR64RegClass; 5491 } 5492 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 5493 break; 5494 case MachineCombinerPattern::MULSUBWI_OP1: 5495 case MachineCombinerPattern::MULSUBXI_OP1: { 5496 // MUL I=A,B,0 5497 // SUB R,I, Imm 5498 // ==> ORR V, ZR, -Imm 5499 // ==> MADD R,A,B,V // = -Imm + A*B 5500 // --- Create(MADD); 5501 const TargetRegisterClass *OrrRC; 5502 unsigned BitSize, OrrOpc, ZeroReg; 5503 if (Pattern == MachineCombinerPattern::MULSUBWI_OP1) { 5504 OrrOpc = AArch64::ORRWri; 5505 OrrRC = &AArch64::GPR32spRegClass; 5506 BitSize = 32; 5507 ZeroReg = AArch64::WZR; 5508 Opc = AArch64::MADDWrrr; 5509 RC = &AArch64::GPR32RegClass; 5510 } else { 5511 OrrOpc = AArch64::ORRXri; 5512 OrrRC = &AArch64::GPR64spRegClass; 5513 BitSize = 64; 5514 ZeroReg = AArch64::XZR; 5515 Opc = AArch64::MADDXrrr; 5516 RC = &AArch64::GPR64RegClass; 5517 } 5518 Register NewVR = MRI.createVirtualRegister(OrrRC); 5519 uint64_t Imm = Root.getOperand(2).getImm(); 5520 if (Root.getOperand(3).isImm()) { 5521 unsigned Val = Root.getOperand(3).getImm(); 5522 Imm = Imm << Val; 5523 } 5524 uint64_t UImm = SignExtend64(-Imm, BitSize); 5525 uint64_t Encoding; 5526 if (!AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) 5527 return; 5528 MachineInstrBuilder MIB1 = 5529 BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR) 5530 .addReg(ZeroReg) 5531 .addImm(Encoding); 5532 InsInstrs.push_back(MIB1); 5533 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 5534 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC); 5535 break; 5536 } 5537 5538 case MachineCombinerPattern::MULADDv8i8_OP1: 5539 Opc = AArch64::MLAv8i8; 5540 RC = &AArch64::FPR64RegClass; 5541 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 5542 break; 5543 case MachineCombinerPattern::MULADDv8i8_OP2: 5544 Opc = AArch64::MLAv8i8; 5545 RC = &AArch64::FPR64RegClass; 5546 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 5547 break; 5548 case MachineCombinerPattern::MULADDv16i8_OP1: 5549 Opc = AArch64::MLAv16i8; 5550 RC = &AArch64::FPR128RegClass; 5551 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 5552 break; 5553 case MachineCombinerPattern::MULADDv16i8_OP2: 5554 Opc = AArch64::MLAv16i8; 5555 RC = &AArch64::FPR128RegClass; 5556 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 5557 break; 5558 case MachineCombinerPattern::MULADDv4i16_OP1: 5559 Opc = AArch64::MLAv4i16; 5560 RC = &AArch64::FPR64RegClass; 5561 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 5562 break; 5563 case MachineCombinerPattern::MULADDv4i16_OP2: 5564 Opc = AArch64::MLAv4i16; 5565 RC = &AArch64::FPR64RegClass; 5566 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 5567 break; 5568 case MachineCombinerPattern::MULADDv8i16_OP1: 5569 Opc = AArch64::MLAv8i16; 5570 RC = &AArch64::FPR128RegClass; 5571 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 5572 break; 5573 case MachineCombinerPattern::MULADDv8i16_OP2: 5574 Opc = AArch64::MLAv8i16; 5575 RC = &AArch64::FPR128RegClass; 5576 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 5577 break; 5578 case MachineCombinerPattern::MULADDv2i32_OP1: 5579 Opc = AArch64::MLAv2i32; 5580 RC = &AArch64::FPR64RegClass; 5581 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 5582 break; 5583 case MachineCombinerPattern::MULADDv2i32_OP2: 5584 Opc = AArch64::MLAv2i32; 5585 RC = &AArch64::FPR64RegClass; 5586 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 5587 break; 5588 case MachineCombinerPattern::MULADDv4i32_OP1: 5589 Opc = AArch64::MLAv4i32; 5590 RC = &AArch64::FPR128RegClass; 5591 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 5592 break; 5593 case MachineCombinerPattern::MULADDv4i32_OP2: 5594 Opc = AArch64::MLAv4i32; 5595 RC = &AArch64::FPR128RegClass; 5596 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 5597 break; 5598 5599 case MachineCombinerPattern::MULSUBv8i8_OP1: 5600 Opc = AArch64::MLAv8i8; 5601 RC = &AArch64::FPR64RegClass; 5602 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs, 5603 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i8, 5604 RC); 5605 break; 5606 case MachineCombinerPattern::MULSUBv8i8_OP2: 5607 Opc = AArch64::MLSv8i8; 5608 RC = &AArch64::FPR64RegClass; 5609 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 5610 break; 5611 case MachineCombinerPattern::MULSUBv16i8_OP1: 5612 Opc = AArch64::MLAv16i8; 5613 RC = &AArch64::FPR128RegClass; 5614 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs, 5615 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv16i8, 5616 RC); 5617 break; 5618 case MachineCombinerPattern::MULSUBv16i8_OP2: 5619 Opc = AArch64::MLSv16i8; 5620 RC = &AArch64::FPR128RegClass; 5621 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 5622 break; 5623 case MachineCombinerPattern::MULSUBv4i16_OP1: 5624 Opc = AArch64::MLAv4i16; 5625 RC = &AArch64::FPR64RegClass; 5626 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs, 5627 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16, 5628 RC); 5629 break; 5630 case MachineCombinerPattern::MULSUBv4i16_OP2: 5631 Opc = AArch64::MLSv4i16; 5632 RC = &AArch64::FPR64RegClass; 5633 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 5634 break; 5635 case MachineCombinerPattern::MULSUBv8i16_OP1: 5636 Opc = AArch64::MLAv8i16; 5637 RC = &AArch64::FPR128RegClass; 5638 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs, 5639 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16, 5640 RC); 5641 break; 5642 case MachineCombinerPattern::MULSUBv8i16_OP2: 5643 Opc = AArch64::MLSv8i16; 5644 RC = &AArch64::FPR128RegClass; 5645 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 5646 break; 5647 case MachineCombinerPattern::MULSUBv2i32_OP1: 5648 Opc = AArch64::MLAv2i32; 5649 RC = &AArch64::FPR64RegClass; 5650 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs, 5651 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32, 5652 RC); 5653 break; 5654 case MachineCombinerPattern::MULSUBv2i32_OP2: 5655 Opc = AArch64::MLSv2i32; 5656 RC = &AArch64::FPR64RegClass; 5657 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 5658 break; 5659 case MachineCombinerPattern::MULSUBv4i32_OP1: 5660 Opc = AArch64::MLAv4i32; 5661 RC = &AArch64::FPR128RegClass; 5662 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs, 5663 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32, 5664 RC); 5665 break; 5666 case MachineCombinerPattern::MULSUBv4i32_OP2: 5667 Opc = AArch64::MLSv4i32; 5668 RC = &AArch64::FPR128RegClass; 5669 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 5670 break; 5671 5672 case MachineCombinerPattern::MULADDv4i16_indexed_OP1: 5673 Opc = AArch64::MLAv4i16_indexed; 5674 RC = &AArch64::FPR64RegClass; 5675 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 5676 break; 5677 case MachineCombinerPattern::MULADDv4i16_indexed_OP2: 5678 Opc = AArch64::MLAv4i16_indexed; 5679 RC = &AArch64::FPR64RegClass; 5680 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 5681 break; 5682 case MachineCombinerPattern::MULADDv8i16_indexed_OP1: 5683 Opc = AArch64::MLAv8i16_indexed; 5684 RC = &AArch64::FPR128RegClass; 5685 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 5686 break; 5687 case MachineCombinerPattern::MULADDv8i16_indexed_OP2: 5688 Opc = AArch64::MLAv8i16_indexed; 5689 RC = &AArch64::FPR128RegClass; 5690 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 5691 break; 5692 case MachineCombinerPattern::MULADDv2i32_indexed_OP1: 5693 Opc = AArch64::MLAv2i32_indexed; 5694 RC = &AArch64::FPR64RegClass; 5695 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 5696 break; 5697 case MachineCombinerPattern::MULADDv2i32_indexed_OP2: 5698 Opc = AArch64::MLAv2i32_indexed; 5699 RC = &AArch64::FPR64RegClass; 5700 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 5701 break; 5702 case MachineCombinerPattern::MULADDv4i32_indexed_OP1: 5703 Opc = AArch64::MLAv4i32_indexed; 5704 RC = &AArch64::FPR128RegClass; 5705 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 5706 break; 5707 case MachineCombinerPattern::MULADDv4i32_indexed_OP2: 5708 Opc = AArch64::MLAv4i32_indexed; 5709 RC = &AArch64::FPR128RegClass; 5710 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 5711 break; 5712 5713 case MachineCombinerPattern::MULSUBv4i16_indexed_OP1: 5714 Opc = AArch64::MLAv4i16_indexed; 5715 RC = &AArch64::FPR64RegClass; 5716 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs, 5717 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16, 5718 RC); 5719 break; 5720 case MachineCombinerPattern::MULSUBv4i16_indexed_OP2: 5721 Opc = AArch64::MLSv4i16_indexed; 5722 RC = &AArch64::FPR64RegClass; 5723 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 5724 break; 5725 case MachineCombinerPattern::MULSUBv8i16_indexed_OP1: 5726 Opc = AArch64::MLAv8i16_indexed; 5727 RC = &AArch64::FPR128RegClass; 5728 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs, 5729 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16, 5730 RC); 5731 break; 5732 case MachineCombinerPattern::MULSUBv8i16_indexed_OP2: 5733 Opc = AArch64::MLSv8i16_indexed; 5734 RC = &AArch64::FPR128RegClass; 5735 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 5736 break; 5737 case MachineCombinerPattern::MULSUBv2i32_indexed_OP1: 5738 Opc = AArch64::MLAv2i32_indexed; 5739 RC = &AArch64::FPR64RegClass; 5740 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs, 5741 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32, 5742 RC); 5743 break; 5744 case MachineCombinerPattern::MULSUBv2i32_indexed_OP2: 5745 Opc = AArch64::MLSv2i32_indexed; 5746 RC = &AArch64::FPR64RegClass; 5747 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 5748 break; 5749 case MachineCombinerPattern::MULSUBv4i32_indexed_OP1: 5750 Opc = AArch64::MLAv4i32_indexed; 5751 RC = &AArch64::FPR128RegClass; 5752 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs, 5753 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32, 5754 RC); 5755 break; 5756 case MachineCombinerPattern::MULSUBv4i32_indexed_OP2: 5757 Opc = AArch64::MLSv4i32_indexed; 5758 RC = &AArch64::FPR128RegClass; 5759 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 5760 break; 5761 5762 // Floating Point Support 5763 case MachineCombinerPattern::FMULADDH_OP1: 5764 Opc = AArch64::FMADDHrrr; 5765 RC = &AArch64::FPR16RegClass; 5766 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 5767 break; 5768 case MachineCombinerPattern::FMULADDS_OP1: 5769 Opc = AArch64::FMADDSrrr; 5770 RC = &AArch64::FPR32RegClass; 5771 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 5772 break; 5773 case MachineCombinerPattern::FMULADDD_OP1: 5774 Opc = AArch64::FMADDDrrr; 5775 RC = &AArch64::FPR64RegClass; 5776 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 5777 break; 5778 5779 case MachineCombinerPattern::FMULADDH_OP2: 5780 Opc = AArch64::FMADDHrrr; 5781 RC = &AArch64::FPR16RegClass; 5782 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 5783 break; 5784 case MachineCombinerPattern::FMULADDS_OP2: 5785 Opc = AArch64::FMADDSrrr; 5786 RC = &AArch64::FPR32RegClass; 5787 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 5788 break; 5789 case MachineCombinerPattern::FMULADDD_OP2: 5790 Opc = AArch64::FMADDDrrr; 5791 RC = &AArch64::FPR64RegClass; 5792 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 5793 break; 5794 5795 case MachineCombinerPattern::FMLAv1i32_indexed_OP1: 5796 Opc = AArch64::FMLAv1i32_indexed; 5797 RC = &AArch64::FPR32RegClass; 5798 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 5799 FMAInstKind::Indexed); 5800 break; 5801 case MachineCombinerPattern::FMLAv1i32_indexed_OP2: 5802 Opc = AArch64::FMLAv1i32_indexed; 5803 RC = &AArch64::FPR32RegClass; 5804 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5805 FMAInstKind::Indexed); 5806 break; 5807 5808 case MachineCombinerPattern::FMLAv1i64_indexed_OP1: 5809 Opc = AArch64::FMLAv1i64_indexed; 5810 RC = &AArch64::FPR64RegClass; 5811 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 5812 FMAInstKind::Indexed); 5813 break; 5814 case MachineCombinerPattern::FMLAv1i64_indexed_OP2: 5815 Opc = AArch64::FMLAv1i64_indexed; 5816 RC = &AArch64::FPR64RegClass; 5817 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5818 FMAInstKind::Indexed); 5819 break; 5820 5821 case MachineCombinerPattern::FMLAv4i16_indexed_OP1: 5822 RC = &AArch64::FPR64RegClass; 5823 Opc = AArch64::FMLAv4i16_indexed; 5824 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 5825 FMAInstKind::Indexed); 5826 break; 5827 case MachineCombinerPattern::FMLAv4f16_OP1: 5828 RC = &AArch64::FPR64RegClass; 5829 Opc = AArch64::FMLAv4f16; 5830 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 5831 FMAInstKind::Accumulator); 5832 break; 5833 case MachineCombinerPattern::FMLAv4i16_indexed_OP2: 5834 RC = &AArch64::FPR64RegClass; 5835 Opc = AArch64::FMLAv4i16_indexed; 5836 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5837 FMAInstKind::Indexed); 5838 break; 5839 case MachineCombinerPattern::FMLAv4f16_OP2: 5840 RC = &AArch64::FPR64RegClass; 5841 Opc = AArch64::FMLAv4f16; 5842 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5843 FMAInstKind::Accumulator); 5844 break; 5845 5846 case MachineCombinerPattern::FMLAv2i32_indexed_OP1: 5847 case MachineCombinerPattern::FMLAv2f32_OP1: 5848 RC = &AArch64::FPR64RegClass; 5849 if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP1) { 5850 Opc = AArch64::FMLAv2i32_indexed; 5851 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 5852 FMAInstKind::Indexed); 5853 } else { 5854 Opc = AArch64::FMLAv2f32; 5855 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 5856 FMAInstKind::Accumulator); 5857 } 5858 break; 5859 case MachineCombinerPattern::FMLAv2i32_indexed_OP2: 5860 case MachineCombinerPattern::FMLAv2f32_OP2: 5861 RC = &AArch64::FPR64RegClass; 5862 if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP2) { 5863 Opc = AArch64::FMLAv2i32_indexed; 5864 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5865 FMAInstKind::Indexed); 5866 } else { 5867 Opc = AArch64::FMLAv2f32; 5868 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5869 FMAInstKind::Accumulator); 5870 } 5871 break; 5872 5873 case MachineCombinerPattern::FMLAv8i16_indexed_OP1: 5874 RC = &AArch64::FPR128RegClass; 5875 Opc = AArch64::FMLAv8i16_indexed; 5876 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 5877 FMAInstKind::Indexed); 5878 break; 5879 case MachineCombinerPattern::FMLAv8f16_OP1: 5880 RC = &AArch64::FPR128RegClass; 5881 Opc = AArch64::FMLAv8f16; 5882 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 5883 FMAInstKind::Accumulator); 5884 break; 5885 case MachineCombinerPattern::FMLAv8i16_indexed_OP2: 5886 RC = &AArch64::FPR128RegClass; 5887 Opc = AArch64::FMLAv8i16_indexed; 5888 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5889 FMAInstKind::Indexed); 5890 break; 5891 case MachineCombinerPattern::FMLAv8f16_OP2: 5892 RC = &AArch64::FPR128RegClass; 5893 Opc = AArch64::FMLAv8f16; 5894 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5895 FMAInstKind::Accumulator); 5896 break; 5897 5898 case MachineCombinerPattern::FMLAv2i64_indexed_OP1: 5899 case MachineCombinerPattern::FMLAv2f64_OP1: 5900 RC = &AArch64::FPR128RegClass; 5901 if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP1) { 5902 Opc = AArch64::FMLAv2i64_indexed; 5903 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 5904 FMAInstKind::Indexed); 5905 } else { 5906 Opc = AArch64::FMLAv2f64; 5907 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 5908 FMAInstKind::Accumulator); 5909 } 5910 break; 5911 case MachineCombinerPattern::FMLAv2i64_indexed_OP2: 5912 case MachineCombinerPattern::FMLAv2f64_OP2: 5913 RC = &AArch64::FPR128RegClass; 5914 if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP2) { 5915 Opc = AArch64::FMLAv2i64_indexed; 5916 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5917 FMAInstKind::Indexed); 5918 } else { 5919 Opc = AArch64::FMLAv2f64; 5920 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5921 FMAInstKind::Accumulator); 5922 } 5923 break; 5924 5925 case MachineCombinerPattern::FMLAv4i32_indexed_OP1: 5926 case MachineCombinerPattern::FMLAv4f32_OP1: 5927 RC = &AArch64::FPR128RegClass; 5928 if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP1) { 5929 Opc = AArch64::FMLAv4i32_indexed; 5930 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 5931 FMAInstKind::Indexed); 5932 } else { 5933 Opc = AArch64::FMLAv4f32; 5934 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 5935 FMAInstKind::Accumulator); 5936 } 5937 break; 5938 5939 case MachineCombinerPattern::FMLAv4i32_indexed_OP2: 5940 case MachineCombinerPattern::FMLAv4f32_OP2: 5941 RC = &AArch64::FPR128RegClass; 5942 if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP2) { 5943 Opc = AArch64::FMLAv4i32_indexed; 5944 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5945 FMAInstKind::Indexed); 5946 } else { 5947 Opc = AArch64::FMLAv4f32; 5948 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5949 FMAInstKind::Accumulator); 5950 } 5951 break; 5952 5953 case MachineCombinerPattern::FMULSUBH_OP1: 5954 Opc = AArch64::FNMSUBHrrr; 5955 RC = &AArch64::FPR16RegClass; 5956 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 5957 break; 5958 case MachineCombinerPattern::FMULSUBS_OP1: 5959 Opc = AArch64::FNMSUBSrrr; 5960 RC = &AArch64::FPR32RegClass; 5961 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 5962 break; 5963 case MachineCombinerPattern::FMULSUBD_OP1: 5964 Opc = AArch64::FNMSUBDrrr; 5965 RC = &AArch64::FPR64RegClass; 5966 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 5967 break; 5968 5969 case MachineCombinerPattern::FNMULSUBH_OP1: 5970 Opc = AArch64::FNMADDHrrr; 5971 RC = &AArch64::FPR16RegClass; 5972 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 5973 break; 5974 case MachineCombinerPattern::FNMULSUBS_OP1: 5975 Opc = AArch64::FNMADDSrrr; 5976 RC = &AArch64::FPR32RegClass; 5977 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 5978 break; 5979 case MachineCombinerPattern::FNMULSUBD_OP1: 5980 Opc = AArch64::FNMADDDrrr; 5981 RC = &AArch64::FPR64RegClass; 5982 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 5983 break; 5984 5985 case MachineCombinerPattern::FMULSUBH_OP2: 5986 Opc = AArch64::FMSUBHrrr; 5987 RC = &AArch64::FPR16RegClass; 5988 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 5989 break; 5990 case MachineCombinerPattern::FMULSUBS_OP2: 5991 Opc = AArch64::FMSUBSrrr; 5992 RC = &AArch64::FPR32RegClass; 5993 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 5994 break; 5995 case MachineCombinerPattern::FMULSUBD_OP2: 5996 Opc = AArch64::FMSUBDrrr; 5997 RC = &AArch64::FPR64RegClass; 5998 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 5999 break; 6000 6001 case MachineCombinerPattern::FMLSv1i32_indexed_OP2: 6002 Opc = AArch64::FMLSv1i32_indexed; 6003 RC = &AArch64::FPR32RegClass; 6004 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 6005 FMAInstKind::Indexed); 6006 break; 6007 6008 case MachineCombinerPattern::FMLSv1i64_indexed_OP2: 6009 Opc = AArch64::FMLSv1i64_indexed; 6010 RC = &AArch64::FPR64RegClass; 6011 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 6012 FMAInstKind::Indexed); 6013 break; 6014 6015 case MachineCombinerPattern::FMLSv4f16_OP1: 6016 case MachineCombinerPattern::FMLSv4i16_indexed_OP1: { 6017 RC = &AArch64::FPR64RegClass; 6018 Register NewVR = MRI.createVirtualRegister(RC); 6019 MachineInstrBuilder MIB1 = 6020 BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv4f16), NewVR) 6021 .add(Root.getOperand(2)); 6022 InsInstrs.push_back(MIB1); 6023 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 6024 if (Pattern == MachineCombinerPattern::FMLSv4f16_OP1) { 6025 Opc = AArch64::FMLAv4f16; 6026 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 6027 FMAInstKind::Accumulator, &NewVR); 6028 } else { 6029 Opc = AArch64::FMLAv4i16_indexed; 6030 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 6031 FMAInstKind::Indexed, &NewVR); 6032 } 6033 break; 6034 } 6035 case MachineCombinerPattern::FMLSv4f16_OP2: 6036 RC = &AArch64::FPR64RegClass; 6037 Opc = AArch64::FMLSv4f16; 6038 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 6039 FMAInstKind::Accumulator); 6040 break; 6041 case MachineCombinerPattern::FMLSv4i16_indexed_OP2: 6042 RC = &AArch64::FPR64RegClass; 6043 Opc = AArch64::FMLSv4i16_indexed; 6044 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 6045 FMAInstKind::Indexed); 6046 break; 6047 6048 case MachineCombinerPattern::FMLSv2f32_OP2: 6049 case MachineCombinerPattern::FMLSv2i32_indexed_OP2: 6050 RC = &AArch64::FPR64RegClass; 6051 if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP2) { 6052 Opc = AArch64::FMLSv2i32_indexed; 6053 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 6054 FMAInstKind::Indexed); 6055 } else { 6056 Opc = AArch64::FMLSv2f32; 6057 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 6058 FMAInstKind::Accumulator); 6059 } 6060 break; 6061 6062 case MachineCombinerPattern::FMLSv8f16_OP1: 6063 case MachineCombinerPattern::FMLSv8i16_indexed_OP1: { 6064 RC = &AArch64::FPR128RegClass; 6065 Register NewVR = MRI.createVirtualRegister(RC); 6066 MachineInstrBuilder MIB1 = 6067 BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv8f16), NewVR) 6068 .add(Root.getOperand(2)); 6069 InsInstrs.push_back(MIB1); 6070 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 6071 if (Pattern == MachineCombinerPattern::FMLSv8f16_OP1) { 6072 Opc = AArch64::FMLAv8f16; 6073 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 6074 FMAInstKind::Accumulator, &NewVR); 6075 } else { 6076 Opc = AArch64::FMLAv8i16_indexed; 6077 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 6078 FMAInstKind::Indexed, &NewVR); 6079 } 6080 break; 6081 } 6082 case MachineCombinerPattern::FMLSv8f16_OP2: 6083 RC = &AArch64::FPR128RegClass; 6084 Opc = AArch64::FMLSv8f16; 6085 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 6086 FMAInstKind::Accumulator); 6087 break; 6088 case MachineCombinerPattern::FMLSv8i16_indexed_OP2: 6089 RC = &AArch64::FPR128RegClass; 6090 Opc = AArch64::FMLSv8i16_indexed; 6091 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 6092 FMAInstKind::Indexed); 6093 break; 6094 6095 case MachineCombinerPattern::FMLSv2f64_OP2: 6096 case MachineCombinerPattern::FMLSv2i64_indexed_OP2: 6097 RC = &AArch64::FPR128RegClass; 6098 if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP2) { 6099 Opc = AArch64::FMLSv2i64_indexed; 6100 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 6101 FMAInstKind::Indexed); 6102 } else { 6103 Opc = AArch64::FMLSv2f64; 6104 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 6105 FMAInstKind::Accumulator); 6106 } 6107 break; 6108 6109 case MachineCombinerPattern::FMLSv4f32_OP2: 6110 case MachineCombinerPattern::FMLSv4i32_indexed_OP2: 6111 RC = &AArch64::FPR128RegClass; 6112 if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP2) { 6113 Opc = AArch64::FMLSv4i32_indexed; 6114 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 6115 FMAInstKind::Indexed); 6116 } else { 6117 Opc = AArch64::FMLSv4f32; 6118 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 6119 FMAInstKind::Accumulator); 6120 } 6121 break; 6122 case MachineCombinerPattern::FMLSv2f32_OP1: 6123 case MachineCombinerPattern::FMLSv2i32_indexed_OP1: { 6124 RC = &AArch64::FPR64RegClass; 6125 Register NewVR = MRI.createVirtualRegister(RC); 6126 MachineInstrBuilder MIB1 = 6127 BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f32), NewVR) 6128 .add(Root.getOperand(2)); 6129 InsInstrs.push_back(MIB1); 6130 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 6131 if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP1) { 6132 Opc = AArch64::FMLAv2i32_indexed; 6133 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 6134 FMAInstKind::Indexed, &NewVR); 6135 } else { 6136 Opc = AArch64::FMLAv2f32; 6137 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 6138 FMAInstKind::Accumulator, &NewVR); 6139 } 6140 break; 6141 } 6142 case MachineCombinerPattern::FMLSv4f32_OP1: 6143 case MachineCombinerPattern::FMLSv4i32_indexed_OP1: { 6144 RC = &AArch64::FPR128RegClass; 6145 Register NewVR = MRI.createVirtualRegister(RC); 6146 MachineInstrBuilder MIB1 = 6147 BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv4f32), NewVR) 6148 .add(Root.getOperand(2)); 6149 InsInstrs.push_back(MIB1); 6150 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 6151 if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP1) { 6152 Opc = AArch64::FMLAv4i32_indexed; 6153 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 6154 FMAInstKind::Indexed, &NewVR); 6155 } else { 6156 Opc = AArch64::FMLAv4f32; 6157 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 6158 FMAInstKind::Accumulator, &NewVR); 6159 } 6160 break; 6161 } 6162 case MachineCombinerPattern::FMLSv2f64_OP1: 6163 case MachineCombinerPattern::FMLSv2i64_indexed_OP1: { 6164 RC = &AArch64::FPR128RegClass; 6165 Register NewVR = MRI.createVirtualRegister(RC); 6166 MachineInstrBuilder MIB1 = 6167 BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f64), NewVR) 6168 .add(Root.getOperand(2)); 6169 InsInstrs.push_back(MIB1); 6170 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 6171 if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP1) { 6172 Opc = AArch64::FMLAv2i64_indexed; 6173 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 6174 FMAInstKind::Indexed, &NewVR); 6175 } else { 6176 Opc = AArch64::FMLAv2f64; 6177 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 6178 FMAInstKind::Accumulator, &NewVR); 6179 } 6180 break; 6181 } 6182 case MachineCombinerPattern::FMULv2i32_indexed_OP1: 6183 case MachineCombinerPattern::FMULv2i32_indexed_OP2: { 6184 unsigned IdxDupOp = 6185 (Pattern == MachineCombinerPattern::FMULv2i32_indexed_OP1) ? 1 : 2; 6186 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i32_indexed, 6187 &AArch64::FPR128RegClass, MRI); 6188 break; 6189 } 6190 case MachineCombinerPattern::FMULv2i64_indexed_OP1: 6191 case MachineCombinerPattern::FMULv2i64_indexed_OP2: { 6192 unsigned IdxDupOp = 6193 (Pattern == MachineCombinerPattern::FMULv2i64_indexed_OP1) ? 1 : 2; 6194 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i64_indexed, 6195 &AArch64::FPR128RegClass, MRI); 6196 break; 6197 } 6198 case MachineCombinerPattern::FMULv4i16_indexed_OP1: 6199 case MachineCombinerPattern::FMULv4i16_indexed_OP2: { 6200 unsigned IdxDupOp = 6201 (Pattern == MachineCombinerPattern::FMULv4i16_indexed_OP1) ? 1 : 2; 6202 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i16_indexed, 6203 &AArch64::FPR128_loRegClass, MRI); 6204 break; 6205 } 6206 case MachineCombinerPattern::FMULv4i32_indexed_OP1: 6207 case MachineCombinerPattern::FMULv4i32_indexed_OP2: { 6208 unsigned IdxDupOp = 6209 (Pattern == MachineCombinerPattern::FMULv4i32_indexed_OP1) ? 1 : 2; 6210 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i32_indexed, 6211 &AArch64::FPR128RegClass, MRI); 6212 break; 6213 } 6214 case MachineCombinerPattern::FMULv8i16_indexed_OP1: 6215 case MachineCombinerPattern::FMULv8i16_indexed_OP2: { 6216 unsigned IdxDupOp = 6217 (Pattern == MachineCombinerPattern::FMULv8i16_indexed_OP1) ? 1 : 2; 6218 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv8i16_indexed, 6219 &AArch64::FPR128_loRegClass, MRI); 6220 break; 6221 } 6222 } // end switch (Pattern) 6223 // Record MUL and ADD/SUB for deletion 6224 if (MUL) 6225 DelInstrs.push_back(MUL); 6226 DelInstrs.push_back(&Root); 6227 } 6228 6229 /// Replace csincr-branch sequence by simple conditional branch 6230 /// 6231 /// Examples: 6232 /// 1. \code 6233 /// csinc w9, wzr, wzr, <condition code> 6234 /// tbnz w9, #0, 0x44 6235 /// \endcode 6236 /// to 6237 /// \code 6238 /// b.<inverted condition code> 6239 /// \endcode 6240 /// 6241 /// 2. \code 6242 /// csinc w9, wzr, wzr, <condition code> 6243 /// tbz w9, #0, 0x44 6244 /// \endcode 6245 /// to 6246 /// \code 6247 /// b.<condition code> 6248 /// \endcode 6249 /// 6250 /// Replace compare and branch sequence by TBZ/TBNZ instruction when the 6251 /// compare's constant operand is power of 2. 6252 /// 6253 /// Examples: 6254 /// \code 6255 /// and w8, w8, #0x400 6256 /// cbnz w8, L1 6257 /// \endcode 6258 /// to 6259 /// \code 6260 /// tbnz w8, #10, L1 6261 /// \endcode 6262 /// 6263 /// \param MI Conditional Branch 6264 /// \return True when the simple conditional branch is generated 6265 /// 6266 bool AArch64InstrInfo::optimizeCondBranch(MachineInstr &MI) const { 6267 bool IsNegativeBranch = false; 6268 bool IsTestAndBranch = false; 6269 unsigned TargetBBInMI = 0; 6270 switch (MI.getOpcode()) { 6271 default: 6272 llvm_unreachable("Unknown branch instruction?"); 6273 case AArch64::Bcc: 6274 return false; 6275 case AArch64::CBZW: 6276 case AArch64::CBZX: 6277 TargetBBInMI = 1; 6278 break; 6279 case AArch64::CBNZW: 6280 case AArch64::CBNZX: 6281 TargetBBInMI = 1; 6282 IsNegativeBranch = true; 6283 break; 6284 case AArch64::TBZW: 6285 case AArch64::TBZX: 6286 TargetBBInMI = 2; 6287 IsTestAndBranch = true; 6288 break; 6289 case AArch64::TBNZW: 6290 case AArch64::TBNZX: 6291 TargetBBInMI = 2; 6292 IsNegativeBranch = true; 6293 IsTestAndBranch = true; 6294 break; 6295 } 6296 // So we increment a zero register and test for bits other 6297 // than bit 0? Conservatively bail out in case the verifier 6298 // missed this case. 6299 if (IsTestAndBranch && MI.getOperand(1).getImm()) 6300 return false; 6301 6302 // Find Definition. 6303 assert(MI.getParent() && "Incomplete machine instruciton\n"); 6304 MachineBasicBlock *MBB = MI.getParent(); 6305 MachineFunction *MF = MBB->getParent(); 6306 MachineRegisterInfo *MRI = &MF->getRegInfo(); 6307 Register VReg = MI.getOperand(0).getReg(); 6308 if (!Register::isVirtualRegister(VReg)) 6309 return false; 6310 6311 MachineInstr *DefMI = MRI->getVRegDef(VReg); 6312 6313 // Look through COPY instructions to find definition. 6314 while (DefMI->isCopy()) { 6315 Register CopyVReg = DefMI->getOperand(1).getReg(); 6316 if (!MRI->hasOneNonDBGUse(CopyVReg)) 6317 return false; 6318 if (!MRI->hasOneDef(CopyVReg)) 6319 return false; 6320 DefMI = MRI->getVRegDef(CopyVReg); 6321 } 6322 6323 switch (DefMI->getOpcode()) { 6324 default: 6325 return false; 6326 // Fold AND into a TBZ/TBNZ if constant operand is power of 2. 6327 case AArch64::ANDWri: 6328 case AArch64::ANDXri: { 6329 if (IsTestAndBranch) 6330 return false; 6331 if (DefMI->getParent() != MBB) 6332 return false; 6333 if (!MRI->hasOneNonDBGUse(VReg)) 6334 return false; 6335 6336 bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri); 6337 uint64_t Mask = AArch64_AM::decodeLogicalImmediate( 6338 DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64); 6339 if (!isPowerOf2_64(Mask)) 6340 return false; 6341 6342 MachineOperand &MO = DefMI->getOperand(1); 6343 Register NewReg = MO.getReg(); 6344 if (!Register::isVirtualRegister(NewReg)) 6345 return false; 6346 6347 assert(!MRI->def_empty(NewReg) && "Register must be defined."); 6348 6349 MachineBasicBlock &RefToMBB = *MBB; 6350 MachineBasicBlock *TBB = MI.getOperand(1).getMBB(); 6351 DebugLoc DL = MI.getDebugLoc(); 6352 unsigned Imm = Log2_64(Mask); 6353 unsigned Opc = (Imm < 32) 6354 ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW) 6355 : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX); 6356 MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc)) 6357 .addReg(NewReg) 6358 .addImm(Imm) 6359 .addMBB(TBB); 6360 // Register lives on to the CBZ now. 6361 MO.setIsKill(false); 6362 6363 // For immediate smaller than 32, we need to use the 32-bit 6364 // variant (W) in all cases. Indeed the 64-bit variant does not 6365 // allow to encode them. 6366 // Therefore, if the input register is 64-bit, we need to take the 6367 // 32-bit sub-part. 6368 if (!Is32Bit && Imm < 32) 6369 NewMI->getOperand(0).setSubReg(AArch64::sub_32); 6370 MI.eraseFromParent(); 6371 return true; 6372 } 6373 // Look for CSINC 6374 case AArch64::CSINCWr: 6375 case AArch64::CSINCXr: { 6376 if (!(DefMI->getOperand(1).getReg() == AArch64::WZR && 6377 DefMI->getOperand(2).getReg() == AArch64::WZR) && 6378 !(DefMI->getOperand(1).getReg() == AArch64::XZR && 6379 DefMI->getOperand(2).getReg() == AArch64::XZR)) 6380 return false; 6381 6382 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) != -1) 6383 return false; 6384 6385 AArch64CC::CondCode CC = (AArch64CC::CondCode)DefMI->getOperand(3).getImm(); 6386 // Convert only when the condition code is not modified between 6387 // the CSINC and the branch. The CC may be used by other 6388 // instructions in between. 6389 if (areCFlagsAccessedBetweenInstrs(DefMI, MI, &getRegisterInfo(), AK_Write)) 6390 return false; 6391 MachineBasicBlock &RefToMBB = *MBB; 6392 MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB(); 6393 DebugLoc DL = MI.getDebugLoc(); 6394 if (IsNegativeBranch) 6395 CC = AArch64CC::getInvertedCondCode(CC); 6396 BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB); 6397 MI.eraseFromParent(); 6398 return true; 6399 } 6400 } 6401 } 6402 6403 std::pair<unsigned, unsigned> 6404 AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const { 6405 const unsigned Mask = AArch64II::MO_FRAGMENT; 6406 return std::make_pair(TF & Mask, TF & ~Mask); 6407 } 6408 6409 ArrayRef<std::pair<unsigned, const char *>> 6410 AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const { 6411 using namespace AArch64II; 6412 6413 static const std::pair<unsigned, const char *> TargetFlags[] = { 6414 {MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"}, 6415 {MO_G3, "aarch64-g3"}, {MO_G2, "aarch64-g2"}, 6416 {MO_G1, "aarch64-g1"}, {MO_G0, "aarch64-g0"}, 6417 {MO_HI12, "aarch64-hi12"}}; 6418 return makeArrayRef(TargetFlags); 6419 } 6420 6421 ArrayRef<std::pair<unsigned, const char *>> 6422 AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const { 6423 using namespace AArch64II; 6424 6425 static const std::pair<unsigned, const char *> TargetFlags[] = { 6426 {MO_COFFSTUB, "aarch64-coffstub"}, 6427 {MO_GOT, "aarch64-got"}, 6428 {MO_NC, "aarch64-nc"}, 6429 {MO_S, "aarch64-s"}, 6430 {MO_TLS, "aarch64-tls"}, 6431 {MO_DLLIMPORT, "aarch64-dllimport"}, 6432 {MO_PREL, "aarch64-prel"}, 6433 {MO_TAGGED, "aarch64-tagged"}}; 6434 return makeArrayRef(TargetFlags); 6435 } 6436 6437 ArrayRef<std::pair<MachineMemOperand::Flags, const char *>> 6438 AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const { 6439 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] = 6440 {{MOSuppressPair, "aarch64-suppress-pair"}, 6441 {MOStridedAccess, "aarch64-strided-access"}}; 6442 return makeArrayRef(TargetFlags); 6443 } 6444 6445 /// Constants defining how certain sequences should be outlined. 6446 /// This encompasses how an outlined function should be called, and what kind of 6447 /// frame should be emitted for that outlined function. 6448 /// 6449 /// \p MachineOutlinerDefault implies that the function should be called with 6450 /// a save and restore of LR to the stack. 6451 /// 6452 /// That is, 6453 /// 6454 /// I1 Save LR OUTLINED_FUNCTION: 6455 /// I2 --> BL OUTLINED_FUNCTION I1 6456 /// I3 Restore LR I2 6457 /// I3 6458 /// RET 6459 /// 6460 /// * Call construction overhead: 3 (save + BL + restore) 6461 /// * Frame construction overhead: 1 (ret) 6462 /// * Requires stack fixups? Yes 6463 /// 6464 /// \p MachineOutlinerTailCall implies that the function is being created from 6465 /// a sequence of instructions ending in a return. 6466 /// 6467 /// That is, 6468 /// 6469 /// I1 OUTLINED_FUNCTION: 6470 /// I2 --> B OUTLINED_FUNCTION I1 6471 /// RET I2 6472 /// RET 6473 /// 6474 /// * Call construction overhead: 1 (B) 6475 /// * Frame construction overhead: 0 (Return included in sequence) 6476 /// * Requires stack fixups? No 6477 /// 6478 /// \p MachineOutlinerNoLRSave implies that the function should be called using 6479 /// a BL instruction, but doesn't require LR to be saved and restored. This 6480 /// happens when LR is known to be dead. 6481 /// 6482 /// That is, 6483 /// 6484 /// I1 OUTLINED_FUNCTION: 6485 /// I2 --> BL OUTLINED_FUNCTION I1 6486 /// I3 I2 6487 /// I3 6488 /// RET 6489 /// 6490 /// * Call construction overhead: 1 (BL) 6491 /// * Frame construction overhead: 1 (RET) 6492 /// * Requires stack fixups? No 6493 /// 6494 /// \p MachineOutlinerThunk implies that the function is being created from 6495 /// a sequence of instructions ending in a call. The outlined function is 6496 /// called with a BL instruction, and the outlined function tail-calls the 6497 /// original call destination. 6498 /// 6499 /// That is, 6500 /// 6501 /// I1 OUTLINED_FUNCTION: 6502 /// I2 --> BL OUTLINED_FUNCTION I1 6503 /// BL f I2 6504 /// B f 6505 /// * Call construction overhead: 1 (BL) 6506 /// * Frame construction overhead: 0 6507 /// * Requires stack fixups? No 6508 /// 6509 /// \p MachineOutlinerRegSave implies that the function should be called with a 6510 /// save and restore of LR to an available register. This allows us to avoid 6511 /// stack fixups. Note that this outlining variant is compatible with the 6512 /// NoLRSave case. 6513 /// 6514 /// That is, 6515 /// 6516 /// I1 Save LR OUTLINED_FUNCTION: 6517 /// I2 --> BL OUTLINED_FUNCTION I1 6518 /// I3 Restore LR I2 6519 /// I3 6520 /// RET 6521 /// 6522 /// * Call construction overhead: 3 (save + BL + restore) 6523 /// * Frame construction overhead: 1 (ret) 6524 /// * Requires stack fixups? No 6525 enum MachineOutlinerClass { 6526 MachineOutlinerDefault, /// Emit a save, restore, call, and return. 6527 MachineOutlinerTailCall, /// Only emit a branch. 6528 MachineOutlinerNoLRSave, /// Emit a call and return. 6529 MachineOutlinerThunk, /// Emit a call and tail-call. 6530 MachineOutlinerRegSave /// Same as default, but save to a register. 6531 }; 6532 6533 enum MachineOutlinerMBBFlags { 6534 LRUnavailableSomewhere = 0x2, 6535 HasCalls = 0x4, 6536 UnsafeRegsDead = 0x8 6537 }; 6538 6539 unsigned 6540 AArch64InstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const { 6541 assert(C.LRUWasSet && "LRU wasn't set?"); 6542 MachineFunction *MF = C.getMF(); 6543 const AArch64RegisterInfo *ARI = static_cast<const AArch64RegisterInfo *>( 6544 MF->getSubtarget().getRegisterInfo()); 6545 6546 // Check if there is an available register across the sequence that we can 6547 // use. 6548 for (unsigned Reg : AArch64::GPR64RegClass) { 6549 if (!ARI->isReservedReg(*MF, Reg) && 6550 Reg != AArch64::LR && // LR is not reserved, but don't use it. 6551 Reg != AArch64::X16 && // X16 is not guaranteed to be preserved. 6552 Reg != AArch64::X17 && // Ditto for X17. 6553 C.LRU.available(Reg) && C.UsedInSequence.available(Reg)) 6554 return Reg; 6555 } 6556 6557 // No suitable register. Return 0. 6558 return 0u; 6559 } 6560 6561 static bool 6562 outliningCandidatesSigningScopeConsensus(const outliner::Candidate &a, 6563 const outliner::Candidate &b) { 6564 const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>(); 6565 const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>(); 6566 6567 return MFIa->shouldSignReturnAddress(false) == MFIb->shouldSignReturnAddress(false) && 6568 MFIa->shouldSignReturnAddress(true) == MFIb->shouldSignReturnAddress(true); 6569 } 6570 6571 static bool 6572 outliningCandidatesSigningKeyConsensus(const outliner::Candidate &a, 6573 const outliner::Candidate &b) { 6574 const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>(); 6575 const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>(); 6576 6577 return MFIa->shouldSignWithBKey() == MFIb->shouldSignWithBKey(); 6578 } 6579 6580 static bool outliningCandidatesV8_3OpsConsensus(const outliner::Candidate &a, 6581 const outliner::Candidate &b) { 6582 const AArch64Subtarget &SubtargetA = 6583 a.getMF()->getSubtarget<AArch64Subtarget>(); 6584 const AArch64Subtarget &SubtargetB = 6585 b.getMF()->getSubtarget<AArch64Subtarget>(); 6586 return SubtargetA.hasV8_3aOps() == SubtargetB.hasV8_3aOps(); 6587 } 6588 6589 outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo( 6590 std::vector<outliner::Candidate> &RepeatedSequenceLocs) const { 6591 outliner::Candidate &FirstCand = RepeatedSequenceLocs[0]; 6592 unsigned SequenceSize = 6593 std::accumulate(FirstCand.front(), std::next(FirstCand.back()), 0, 6594 [this](unsigned Sum, const MachineInstr &MI) { 6595 return Sum + getInstSizeInBytes(MI); 6596 }); 6597 unsigned NumBytesToCreateFrame = 0; 6598 6599 // We only allow outlining for functions having exactly matching return 6600 // address signing attributes, i.e., all share the same value for the 6601 // attribute "sign-return-address" and all share the same type of key they 6602 // are signed with. 6603 // Additionally we require all functions to simultaniously either support 6604 // v8.3a features or not. Otherwise an outlined function could get signed 6605 // using dedicated v8.3 instructions and a call from a function that doesn't 6606 // support v8.3 instructions would therefore be invalid. 6607 if (std::adjacent_find( 6608 RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(), 6609 [](const outliner::Candidate &a, const outliner::Candidate &b) { 6610 // Return true if a and b are non-equal w.r.t. return address 6611 // signing or support of v8.3a features 6612 if (outliningCandidatesSigningScopeConsensus(a, b) && 6613 outliningCandidatesSigningKeyConsensus(a, b) && 6614 outliningCandidatesV8_3OpsConsensus(a, b)) { 6615 return false; 6616 } 6617 return true; 6618 }) != RepeatedSequenceLocs.end()) { 6619 return outliner::OutlinedFunction(); 6620 } 6621 6622 // Since at this point all candidates agree on their return address signing 6623 // picking just one is fine. If the candidate functions potentially sign their 6624 // return addresses, the outlined function should do the same. Note that in 6625 // the case of "sign-return-address"="non-leaf" this is an assumption: It is 6626 // not certainly true that the outlined function will have to sign its return 6627 // address but this decision is made later, when the decision to outline 6628 // has already been made. 6629 // The same holds for the number of additional instructions we need: On 6630 // v8.3a RET can be replaced by RETAA/RETAB and no AUT instruction is 6631 // necessary. However, at this point we don't know if the outlined function 6632 // will have a RET instruction so we assume the worst. 6633 const TargetRegisterInfo &TRI = getRegisterInfo(); 6634 if (FirstCand.getMF() 6635 ->getInfo<AArch64FunctionInfo>() 6636 ->shouldSignReturnAddress(true)) { 6637 // One PAC and one AUT instructions 6638 NumBytesToCreateFrame += 8; 6639 6640 // We have to check if sp modifying instructions would get outlined. 6641 // If so we only allow outlining if sp is unchanged overall, so matching 6642 // sub and add instructions are okay to outline, all other sp modifications 6643 // are not 6644 auto hasIllegalSPModification = [&TRI](outliner::Candidate &C) { 6645 int SPValue = 0; 6646 MachineBasicBlock::iterator MBBI = C.front(); 6647 for (;;) { 6648 if (MBBI->modifiesRegister(AArch64::SP, &TRI)) { 6649 switch (MBBI->getOpcode()) { 6650 case AArch64::ADDXri: 6651 case AArch64::ADDWri: 6652 assert(MBBI->getNumOperands() == 4 && "Wrong number of operands"); 6653 assert(MBBI->getOperand(2).isImm() && 6654 "Expected operand to be immediate"); 6655 assert(MBBI->getOperand(1).isReg() && 6656 "Expected operand to be a register"); 6657 // Check if the add just increments sp. If so, we search for 6658 // matching sub instructions that decrement sp. If not, the 6659 // modification is illegal 6660 if (MBBI->getOperand(1).getReg() == AArch64::SP) 6661 SPValue += MBBI->getOperand(2).getImm(); 6662 else 6663 return true; 6664 break; 6665 case AArch64::SUBXri: 6666 case AArch64::SUBWri: 6667 assert(MBBI->getNumOperands() == 4 && "Wrong number of operands"); 6668 assert(MBBI->getOperand(2).isImm() && 6669 "Expected operand to be immediate"); 6670 assert(MBBI->getOperand(1).isReg() && 6671 "Expected operand to be a register"); 6672 // Check if the sub just decrements sp. If so, we search for 6673 // matching add instructions that increment sp. If not, the 6674 // modification is illegal 6675 if (MBBI->getOperand(1).getReg() == AArch64::SP) 6676 SPValue -= MBBI->getOperand(2).getImm(); 6677 else 6678 return true; 6679 break; 6680 default: 6681 return true; 6682 } 6683 } 6684 if (MBBI == C.back()) 6685 break; 6686 ++MBBI; 6687 } 6688 if (SPValue) 6689 return true; 6690 return false; 6691 }; 6692 // Remove candidates with illegal stack modifying instructions 6693 llvm::erase_if(RepeatedSequenceLocs, hasIllegalSPModification); 6694 6695 // If the sequence doesn't have enough candidates left, then we're done. 6696 if (RepeatedSequenceLocs.size() < 2) 6697 return outliner::OutlinedFunction(); 6698 } 6699 6700 // Properties about candidate MBBs that hold for all of them. 6701 unsigned FlagsSetInAll = 0xF; 6702 6703 // Compute liveness information for each candidate, and set FlagsSetInAll. 6704 std::for_each(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(), 6705 [&FlagsSetInAll](outliner::Candidate &C) { 6706 FlagsSetInAll &= C.Flags; 6707 }); 6708 6709 // According to the AArch64 Procedure Call Standard, the following are 6710 // undefined on entry/exit from a function call: 6711 // 6712 // * Registers x16, x17, (and thus w16, w17) 6713 // * Condition codes (and thus the NZCV register) 6714 // 6715 // Because if this, we can't outline any sequence of instructions where 6716 // one 6717 // of these registers is live into/across it. Thus, we need to delete 6718 // those 6719 // candidates. 6720 auto CantGuaranteeValueAcrossCall = [&TRI](outliner::Candidate &C) { 6721 // If the unsafe registers in this block are all dead, then we don't need 6722 // to compute liveness here. 6723 if (C.Flags & UnsafeRegsDead) 6724 return false; 6725 C.initLRU(TRI); 6726 LiveRegUnits LRU = C.LRU; 6727 return (!LRU.available(AArch64::W16) || !LRU.available(AArch64::W17) || 6728 !LRU.available(AArch64::NZCV)); 6729 }; 6730 6731 // Are there any candidates where those registers are live? 6732 if (!(FlagsSetInAll & UnsafeRegsDead)) { 6733 // Erase every candidate that violates the restrictions above. (It could be 6734 // true that we have viable candidates, so it's not worth bailing out in 6735 // the case that, say, 1 out of 20 candidates violate the restructions.) 6736 llvm::erase_if(RepeatedSequenceLocs, CantGuaranteeValueAcrossCall); 6737 6738 // If the sequence doesn't have enough candidates left, then we're done. 6739 if (RepeatedSequenceLocs.size() < 2) 6740 return outliner::OutlinedFunction(); 6741 } 6742 6743 // At this point, we have only "safe" candidates to outline. Figure out 6744 // frame + call instruction information. 6745 6746 unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back()->getOpcode(); 6747 6748 // Helper lambda which sets call information for every candidate. 6749 auto SetCandidateCallInfo = 6750 [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) { 6751 for (outliner::Candidate &C : RepeatedSequenceLocs) 6752 C.setCallInfo(CallID, NumBytesForCall); 6753 }; 6754 6755 unsigned FrameID = MachineOutlinerDefault; 6756 NumBytesToCreateFrame += 4; 6757 6758 bool HasBTI = any_of(RepeatedSequenceLocs, [](outliner::Candidate &C) { 6759 return C.getMF()->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement(); 6760 }); 6761 6762 // We check to see if CFI Instructions are present, and if they are 6763 // we find the number of CFI Instructions in the candidates. 6764 unsigned CFICount = 0; 6765 MachineBasicBlock::iterator MBBI = RepeatedSequenceLocs[0].front(); 6766 for (unsigned Loc = RepeatedSequenceLocs[0].getStartIdx(); 6767 Loc < RepeatedSequenceLocs[0].getEndIdx() + 1; Loc++) { 6768 if (MBBI->isCFIInstruction()) 6769 CFICount++; 6770 MBBI++; 6771 } 6772 6773 // We compare the number of found CFI Instructions to the number of CFI 6774 // instructions in the parent function for each candidate. We must check this 6775 // since if we outline one of the CFI instructions in a function, we have to 6776 // outline them all for correctness. If we do not, the address offsets will be 6777 // incorrect between the two sections of the program. 6778 for (outliner::Candidate &C : RepeatedSequenceLocs) { 6779 std::vector<MCCFIInstruction> CFIInstructions = 6780 C.getMF()->getFrameInstructions(); 6781 6782 if (CFICount > 0 && CFICount != CFIInstructions.size()) 6783 return outliner::OutlinedFunction(); 6784 } 6785 6786 // Returns true if an instructions is safe to fix up, false otherwise. 6787 auto IsSafeToFixup = [this, &TRI](MachineInstr &MI) { 6788 if (MI.isCall()) 6789 return true; 6790 6791 if (!MI.modifiesRegister(AArch64::SP, &TRI) && 6792 !MI.readsRegister(AArch64::SP, &TRI)) 6793 return true; 6794 6795 // Any modification of SP will break our code to save/restore LR. 6796 // FIXME: We could handle some instructions which add a constant 6797 // offset to SP, with a bit more work. 6798 if (MI.modifiesRegister(AArch64::SP, &TRI)) 6799 return false; 6800 6801 // At this point, we have a stack instruction that we might need to 6802 // fix up. We'll handle it if it's a load or store. 6803 if (MI.mayLoadOrStore()) { 6804 const MachineOperand *Base; // Filled with the base operand of MI. 6805 int64_t Offset; // Filled with the offset of MI. 6806 bool OffsetIsScalable; 6807 6808 // Does it allow us to offset the base operand and is the base the 6809 // register SP? 6810 if (!getMemOperandWithOffset(MI, Base, Offset, OffsetIsScalable, &TRI) || 6811 !Base->isReg() || Base->getReg() != AArch64::SP) 6812 return false; 6813 6814 // Fixe-up code below assumes bytes. 6815 if (OffsetIsScalable) 6816 return false; 6817 6818 // Find the minimum/maximum offset for this instruction and check 6819 // if fixing it up would be in range. 6820 int64_t MinOffset, 6821 MaxOffset; // Unscaled offsets for the instruction. 6822 TypeSize Scale(0U, false); // The scale to multiply the offsets by. 6823 unsigned DummyWidth; 6824 getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset); 6825 6826 Offset += 16; // Update the offset to what it would be if we outlined. 6827 if (Offset < MinOffset * (int64_t)Scale.getFixedSize() || 6828 Offset > MaxOffset * (int64_t)Scale.getFixedSize()) 6829 return false; 6830 6831 // It's in range, so we can outline it. 6832 return true; 6833 } 6834 6835 // FIXME: Add handling for instructions like "add x0, sp, #8". 6836 6837 // We can't fix it up, so don't outline it. 6838 return false; 6839 }; 6840 6841 // True if it's possible to fix up each stack instruction in this sequence. 6842 // Important for frames/call variants that modify the stack. 6843 bool AllStackInstrsSafe = std::all_of( 6844 FirstCand.front(), std::next(FirstCand.back()), IsSafeToFixup); 6845 6846 // If the last instruction in any candidate is a terminator, then we should 6847 // tail call all of the candidates. 6848 if (RepeatedSequenceLocs[0].back()->isTerminator()) { 6849 FrameID = MachineOutlinerTailCall; 6850 NumBytesToCreateFrame = 0; 6851 SetCandidateCallInfo(MachineOutlinerTailCall, 4); 6852 } 6853 6854 else if (LastInstrOpcode == AArch64::BL || 6855 ((LastInstrOpcode == AArch64::BLR || 6856 LastInstrOpcode == AArch64::BLRNoIP) && 6857 !HasBTI)) { 6858 // FIXME: Do we need to check if the code after this uses the value of LR? 6859 FrameID = MachineOutlinerThunk; 6860 NumBytesToCreateFrame = 0; 6861 SetCandidateCallInfo(MachineOutlinerThunk, 4); 6862 } 6863 6864 else { 6865 // We need to decide how to emit calls + frames. We can always emit the same 6866 // frame if we don't need to save to the stack. If we have to save to the 6867 // stack, then we need a different frame. 6868 unsigned NumBytesNoStackCalls = 0; 6869 std::vector<outliner::Candidate> CandidatesWithoutStackFixups; 6870 6871 // Check if we have to save LR. 6872 for (outliner::Candidate &C : RepeatedSequenceLocs) { 6873 C.initLRU(TRI); 6874 6875 // If we have a noreturn caller, then we're going to be conservative and 6876 // say that we have to save LR. If we don't have a ret at the end of the 6877 // block, then we can't reason about liveness accurately. 6878 // 6879 // FIXME: We can probably do better than always disabling this in 6880 // noreturn functions by fixing up the liveness info. 6881 bool IsNoReturn = 6882 C.getMF()->getFunction().hasFnAttribute(Attribute::NoReturn); 6883 6884 // Is LR available? If so, we don't need a save. 6885 if (C.LRU.available(AArch64::LR) && !IsNoReturn) { 6886 NumBytesNoStackCalls += 4; 6887 C.setCallInfo(MachineOutlinerNoLRSave, 4); 6888 CandidatesWithoutStackFixups.push_back(C); 6889 } 6890 6891 // Is an unused register available? If so, we won't modify the stack, so 6892 // we can outline with the same frame type as those that don't save LR. 6893 else if (findRegisterToSaveLRTo(C)) { 6894 NumBytesNoStackCalls += 12; 6895 C.setCallInfo(MachineOutlinerRegSave, 12); 6896 CandidatesWithoutStackFixups.push_back(C); 6897 } 6898 6899 // Is SP used in the sequence at all? If not, we don't have to modify 6900 // the stack, so we are guaranteed to get the same frame. 6901 else if (C.UsedInSequence.available(AArch64::SP)) { 6902 NumBytesNoStackCalls += 12; 6903 C.setCallInfo(MachineOutlinerDefault, 12); 6904 CandidatesWithoutStackFixups.push_back(C); 6905 } 6906 6907 // If we outline this, we need to modify the stack. Pretend we don't 6908 // outline this by saving all of its bytes. 6909 else { 6910 NumBytesNoStackCalls += SequenceSize; 6911 } 6912 } 6913 6914 // If there are no places where we have to save LR, then note that we 6915 // don't have to update the stack. Otherwise, give every candidate the 6916 // default call type, as long as it's safe to do so. 6917 if (!AllStackInstrsSafe || 6918 NumBytesNoStackCalls <= RepeatedSequenceLocs.size() * 12) { 6919 RepeatedSequenceLocs = CandidatesWithoutStackFixups; 6920 FrameID = MachineOutlinerNoLRSave; 6921 } else { 6922 SetCandidateCallInfo(MachineOutlinerDefault, 12); 6923 6924 // Bugzilla ID: 46767 6925 // TODO: Check if fixing up the stack more than once is safe so we can 6926 // outline these. 6927 // 6928 // An outline resulting in a caller that requires stack fixups at the 6929 // callsite to a callee that also requires stack fixups can happen when 6930 // there are no available registers at the candidate callsite for a 6931 // candidate that itself also has calls. 6932 // 6933 // In other words if function_containing_sequence in the following pseudo 6934 // assembly requires that we save LR at the point of the call, but there 6935 // are no available registers: in this case we save using SP and as a 6936 // result the SP offsets requires stack fixups by multiples of 16. 6937 // 6938 // function_containing_sequence: 6939 // ... 6940 // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N 6941 // call OUTLINED_FUNCTION_N 6942 // restore LR from SP 6943 // ... 6944 // 6945 // OUTLINED_FUNCTION_N: 6946 // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N 6947 // ... 6948 // bl foo 6949 // restore LR from SP 6950 // ret 6951 // 6952 // Because the code to handle more than one stack fixup does not 6953 // currently have the proper checks for legality, these cases will assert 6954 // in the AArch64 MachineOutliner. This is because the code to do this 6955 // needs more hardening, testing, better checks that generated code is 6956 // legal, etc and because it is only verified to handle a single pass of 6957 // stack fixup. 6958 // 6959 // The assert happens in AArch64InstrInfo::buildOutlinedFrame to catch 6960 // these cases until they are known to be handled. Bugzilla 46767 is 6961 // referenced in comments at the assert site. 6962 // 6963 // To avoid asserting (or generating non-legal code on noassert builds) 6964 // we remove all candidates which would need more than one stack fixup by 6965 // pruning the cases where the candidate has calls while also having no 6966 // available LR and having no available general purpose registers to copy 6967 // LR to (ie one extra stack save/restore). 6968 // 6969 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) { 6970 erase_if(RepeatedSequenceLocs, [this](outliner::Candidate &C) { 6971 return (std::any_of( 6972 C.front(), std::next(C.back()), 6973 [](const MachineInstr &MI) { return MI.isCall(); })) && 6974 (!C.LRU.available(AArch64::LR) || !findRegisterToSaveLRTo(C)); 6975 }); 6976 } 6977 } 6978 6979 // If we dropped all of the candidates, bail out here. 6980 if (RepeatedSequenceLocs.size() < 2) { 6981 RepeatedSequenceLocs.clear(); 6982 return outliner::OutlinedFunction(); 6983 } 6984 } 6985 6986 // Does every candidate's MBB contain a call? If so, then we might have a call 6987 // in the range. 6988 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) { 6989 // Check if the range contains a call. These require a save + restore of the 6990 // link register. 6991 bool ModStackToSaveLR = false; 6992 if (std::any_of(FirstCand.front(), FirstCand.back(), 6993 [](const MachineInstr &MI) { return MI.isCall(); })) 6994 ModStackToSaveLR = true; 6995 6996 // Handle the last instruction separately. If this is a tail call, then the 6997 // last instruction is a call. We don't want to save + restore in this case. 6998 // However, it could be possible that the last instruction is a call without 6999 // it being valid to tail call this sequence. We should consider this as 7000 // well. 7001 else if (FrameID != MachineOutlinerThunk && 7002 FrameID != MachineOutlinerTailCall && FirstCand.back()->isCall()) 7003 ModStackToSaveLR = true; 7004 7005 if (ModStackToSaveLR) { 7006 // We can't fix up the stack. Bail out. 7007 if (!AllStackInstrsSafe) { 7008 RepeatedSequenceLocs.clear(); 7009 return outliner::OutlinedFunction(); 7010 } 7011 7012 // Save + restore LR. 7013 NumBytesToCreateFrame += 8; 7014 } 7015 } 7016 7017 // If we have CFI instructions, we can only outline if the outlined section 7018 // can be a tail call 7019 if (FrameID != MachineOutlinerTailCall && CFICount > 0) 7020 return outliner::OutlinedFunction(); 7021 7022 return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize, 7023 NumBytesToCreateFrame, FrameID); 7024 } 7025 7026 bool AArch64InstrInfo::isFunctionSafeToOutlineFrom( 7027 MachineFunction &MF, bool OutlineFromLinkOnceODRs) const { 7028 const Function &F = MF.getFunction(); 7029 7030 // Can F be deduplicated by the linker? If it can, don't outline from it. 7031 if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage()) 7032 return false; 7033 7034 // Don't outline from functions with section markings; the program could 7035 // expect that all the code is in the named section. 7036 // FIXME: Allow outlining from multiple functions with the same section 7037 // marking. 7038 if (F.hasSection()) 7039 return false; 7040 7041 // Outlining from functions with redzones is unsafe since the outliner may 7042 // modify the stack. Check if hasRedZone is true or unknown; if yes, don't 7043 // outline from it. 7044 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 7045 if (!AFI || AFI->hasRedZone().getValueOr(true)) 7046 return false; 7047 7048 // FIXME: Teach the outliner to generate/handle Windows unwind info. 7049 if (MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) 7050 return false; 7051 7052 // It's safe to outline from MF. 7053 return true; 7054 } 7055 7056 bool AArch64InstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB, 7057 unsigned &Flags) const { 7058 // Check if LR is available through all of the MBB. If it's not, then set 7059 // a flag. 7060 assert(MBB.getParent()->getRegInfo().tracksLiveness() && 7061 "Suitable Machine Function for outlining must track liveness"); 7062 LiveRegUnits LRU(getRegisterInfo()); 7063 7064 std::for_each(MBB.rbegin(), MBB.rend(), 7065 [&LRU](MachineInstr &MI) { LRU.accumulate(MI); }); 7066 7067 // Check if each of the unsafe registers are available... 7068 bool W16AvailableInBlock = LRU.available(AArch64::W16); 7069 bool W17AvailableInBlock = LRU.available(AArch64::W17); 7070 bool NZCVAvailableInBlock = LRU.available(AArch64::NZCV); 7071 7072 // If all of these are dead (and not live out), we know we don't have to check 7073 // them later. 7074 if (W16AvailableInBlock && W17AvailableInBlock && NZCVAvailableInBlock) 7075 Flags |= MachineOutlinerMBBFlags::UnsafeRegsDead; 7076 7077 // Now, add the live outs to the set. 7078 LRU.addLiveOuts(MBB); 7079 7080 // If any of these registers is available in the MBB, but also a live out of 7081 // the block, then we know outlining is unsafe. 7082 if (W16AvailableInBlock && !LRU.available(AArch64::W16)) 7083 return false; 7084 if (W17AvailableInBlock && !LRU.available(AArch64::W17)) 7085 return false; 7086 if (NZCVAvailableInBlock && !LRU.available(AArch64::NZCV)) 7087 return false; 7088 7089 // Check if there's a call inside this MachineBasicBlock. If there is, then 7090 // set a flag. 7091 if (any_of(MBB, [](MachineInstr &MI) { return MI.isCall(); })) 7092 Flags |= MachineOutlinerMBBFlags::HasCalls; 7093 7094 MachineFunction *MF = MBB.getParent(); 7095 7096 // In the event that we outline, we may have to save LR. If there is an 7097 // available register in the MBB, then we'll always save LR there. Check if 7098 // this is true. 7099 bool CanSaveLR = false; 7100 const AArch64RegisterInfo *ARI = static_cast<const AArch64RegisterInfo *>( 7101 MF->getSubtarget().getRegisterInfo()); 7102 7103 // Check if there is an available register across the sequence that we can 7104 // use. 7105 for (unsigned Reg : AArch64::GPR64RegClass) { 7106 if (!ARI->isReservedReg(*MF, Reg) && Reg != AArch64::LR && 7107 Reg != AArch64::X16 && Reg != AArch64::X17 && LRU.available(Reg)) { 7108 CanSaveLR = true; 7109 break; 7110 } 7111 } 7112 7113 // Check if we have a register we can save LR to, and if LR was used 7114 // somewhere. If both of those things are true, then we need to evaluate the 7115 // safety of outlining stack instructions later. 7116 if (!CanSaveLR && !LRU.available(AArch64::LR)) 7117 Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere; 7118 7119 return true; 7120 } 7121 7122 outliner::InstrType 7123 AArch64InstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT, 7124 unsigned Flags) const { 7125 MachineInstr &MI = *MIT; 7126 MachineBasicBlock *MBB = MI.getParent(); 7127 MachineFunction *MF = MBB->getParent(); 7128 AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>(); 7129 7130 // Don't outline anything used for return address signing. The outlined 7131 // function will get signed later if needed 7132 switch (MI.getOpcode()) { 7133 case AArch64::PACIASP: 7134 case AArch64::PACIBSP: 7135 case AArch64::AUTIASP: 7136 case AArch64::AUTIBSP: 7137 case AArch64::RETAA: 7138 case AArch64::RETAB: 7139 case AArch64::EMITBKEY: 7140 return outliner::InstrType::Illegal; 7141 } 7142 7143 // Don't outline LOHs. 7144 if (FuncInfo->getLOHRelated().count(&MI)) 7145 return outliner::InstrType::Illegal; 7146 7147 // We can only outline these if we will tail call the outlined function, or 7148 // fix up the CFI offsets. Currently, CFI instructions are outlined only if 7149 // in a tail call. 7150 // 7151 // FIXME: If the proper fixups for the offset are implemented, this should be 7152 // possible. 7153 if (MI.isCFIInstruction()) 7154 return outliner::InstrType::Legal; 7155 7156 // Don't allow debug values to impact outlining type. 7157 if (MI.isDebugInstr() || MI.isIndirectDebugValue()) 7158 return outliner::InstrType::Invisible; 7159 7160 // At this point, KILL instructions don't really tell us much so we can go 7161 // ahead and skip over them. 7162 if (MI.isKill()) 7163 return outliner::InstrType::Invisible; 7164 7165 // Is this a terminator for a basic block? 7166 if (MI.isTerminator()) { 7167 7168 // Is this the end of a function? 7169 if (MI.getParent()->succ_empty()) 7170 return outliner::InstrType::Legal; 7171 7172 // It's not, so don't outline it. 7173 return outliner::InstrType::Illegal; 7174 } 7175 7176 // Make sure none of the operands are un-outlinable. 7177 for (const MachineOperand &MOP : MI.operands()) { 7178 if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() || 7179 MOP.isTargetIndex()) 7180 return outliner::InstrType::Illegal; 7181 7182 // If it uses LR or W30 explicitly, then don't touch it. 7183 if (MOP.isReg() && !MOP.isImplicit() && 7184 (MOP.getReg() == AArch64::LR || MOP.getReg() == AArch64::W30)) 7185 return outliner::InstrType::Illegal; 7186 } 7187 7188 // Special cases for instructions that can always be outlined, but will fail 7189 // the later tests. e.g, ADRPs, which are PC-relative use LR, but can always 7190 // be outlined because they don't require a *specific* value to be in LR. 7191 if (MI.getOpcode() == AArch64::ADRP) 7192 return outliner::InstrType::Legal; 7193 7194 // If MI is a call we might be able to outline it. We don't want to outline 7195 // any calls that rely on the position of items on the stack. When we outline 7196 // something containing a call, we have to emit a save and restore of LR in 7197 // the outlined function. Currently, this always happens by saving LR to the 7198 // stack. Thus, if we outline, say, half the parameters for a function call 7199 // plus the call, then we'll break the callee's expectations for the layout 7200 // of the stack. 7201 // 7202 // FIXME: Allow calls to functions which construct a stack frame, as long 7203 // as they don't access arguments on the stack. 7204 // FIXME: Figure out some way to analyze functions defined in other modules. 7205 // We should be able to compute the memory usage based on the IR calling 7206 // convention, even if we can't see the definition. 7207 if (MI.isCall()) { 7208 // Get the function associated with the call. Look at each operand and find 7209 // the one that represents the callee and get its name. 7210 const Function *Callee = nullptr; 7211 for (const MachineOperand &MOP : MI.operands()) { 7212 if (MOP.isGlobal()) { 7213 Callee = dyn_cast<Function>(MOP.getGlobal()); 7214 break; 7215 } 7216 } 7217 7218 // Never outline calls to mcount. There isn't any rule that would require 7219 // this, but the Linux kernel's "ftrace" feature depends on it. 7220 if (Callee && Callee->getName() == "\01_mcount") 7221 return outliner::InstrType::Illegal; 7222 7223 // If we don't know anything about the callee, assume it depends on the 7224 // stack layout of the caller. In that case, it's only legal to outline 7225 // as a tail-call. Explicitly list the call instructions we know about so we 7226 // don't get unexpected results with call pseudo-instructions. 7227 auto UnknownCallOutlineType = outliner::InstrType::Illegal; 7228 if (MI.getOpcode() == AArch64::BLR || 7229 MI.getOpcode() == AArch64::BLRNoIP || MI.getOpcode() == AArch64::BL) 7230 UnknownCallOutlineType = outliner::InstrType::LegalTerminator; 7231 7232 if (!Callee) 7233 return UnknownCallOutlineType; 7234 7235 // We have a function we have information about. Check it if it's something 7236 // can safely outline. 7237 MachineFunction *CalleeMF = MF->getMMI().getMachineFunction(*Callee); 7238 7239 // We don't know what's going on with the callee at all. Don't touch it. 7240 if (!CalleeMF) 7241 return UnknownCallOutlineType; 7242 7243 // Check if we know anything about the callee saves on the function. If we 7244 // don't, then don't touch it, since that implies that we haven't 7245 // computed anything about its stack frame yet. 7246 MachineFrameInfo &MFI = CalleeMF->getFrameInfo(); 7247 if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 || 7248 MFI.getNumObjects() > 0) 7249 return UnknownCallOutlineType; 7250 7251 // At this point, we can say that CalleeMF ought to not pass anything on the 7252 // stack. Therefore, we can outline it. 7253 return outliner::InstrType::Legal; 7254 } 7255 7256 // Don't outline positions. 7257 if (MI.isPosition()) 7258 return outliner::InstrType::Illegal; 7259 7260 // Don't touch the link register or W30. 7261 if (MI.readsRegister(AArch64::W30, &getRegisterInfo()) || 7262 MI.modifiesRegister(AArch64::W30, &getRegisterInfo())) 7263 return outliner::InstrType::Illegal; 7264 7265 // Don't outline BTI instructions, because that will prevent the outlining 7266 // site from being indirectly callable. 7267 if (MI.getOpcode() == AArch64::HINT) { 7268 int64_t Imm = MI.getOperand(0).getImm(); 7269 if (Imm == 32 || Imm == 34 || Imm == 36 || Imm == 38) 7270 return outliner::InstrType::Illegal; 7271 } 7272 7273 return outliner::InstrType::Legal; 7274 } 7275 7276 void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const { 7277 for (MachineInstr &MI : MBB) { 7278 const MachineOperand *Base; 7279 unsigned Width; 7280 int64_t Offset; 7281 bool OffsetIsScalable; 7282 7283 // Is this a load or store with an immediate offset with SP as the base? 7284 if (!MI.mayLoadOrStore() || 7285 !getMemOperandWithOffsetWidth(MI, Base, Offset, OffsetIsScalable, Width, 7286 &RI) || 7287 (Base->isReg() && Base->getReg() != AArch64::SP)) 7288 continue; 7289 7290 // It is, so we have to fix it up. 7291 TypeSize Scale(0U, false); 7292 int64_t Dummy1, Dummy2; 7293 7294 MachineOperand &StackOffsetOperand = getMemOpBaseRegImmOfsOffsetOperand(MI); 7295 assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!"); 7296 getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2); 7297 assert(Scale != 0 && "Unexpected opcode!"); 7298 assert(!OffsetIsScalable && "Expected offset to be a byte offset"); 7299 7300 // We've pushed the return address to the stack, so add 16 to the offset. 7301 // This is safe, since we already checked if it would overflow when we 7302 // checked if this instruction was legal to outline. 7303 int64_t NewImm = (Offset + 16) / (int64_t)Scale.getFixedSize(); 7304 StackOffsetOperand.setImm(NewImm); 7305 } 7306 } 7307 7308 static void signOutlinedFunction(MachineFunction &MF, MachineBasicBlock &MBB, 7309 bool ShouldSignReturnAddr, 7310 bool ShouldSignReturnAddrWithAKey) { 7311 if (ShouldSignReturnAddr) { 7312 MachineBasicBlock::iterator MBBPAC = MBB.begin(); 7313 MachineBasicBlock::iterator MBBAUT = MBB.getFirstTerminator(); 7314 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); 7315 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 7316 DebugLoc DL; 7317 7318 if (MBBAUT != MBB.end()) 7319 DL = MBBAUT->getDebugLoc(); 7320 7321 // At the very beginning of the basic block we insert the following 7322 // depending on the key type 7323 // 7324 // a_key: b_key: 7325 // PACIASP EMITBKEY 7326 // CFI_INSTRUCTION PACIBSP 7327 // CFI_INSTRUCTION 7328 unsigned PACI; 7329 if (ShouldSignReturnAddrWithAKey) { 7330 PACI = Subtarget.hasPAuth() ? AArch64::PACIA : AArch64::PACIASP; 7331 } else { 7332 BuildMI(MBB, MBBPAC, DebugLoc(), TII->get(AArch64::EMITBKEY)) 7333 .setMIFlag(MachineInstr::FrameSetup); 7334 PACI = Subtarget.hasPAuth() ? AArch64::PACIB : AArch64::PACIBSP; 7335 } 7336 7337 auto MI = BuildMI(MBB, MBBPAC, DebugLoc(), TII->get(PACI)); 7338 if (Subtarget.hasPAuth()) 7339 MI.addReg(AArch64::LR, RegState::Define) 7340 .addReg(AArch64::LR) 7341 .addReg(AArch64::SP, RegState::InternalRead); 7342 MI.setMIFlag(MachineInstr::FrameSetup); 7343 7344 unsigned CFIIndex = 7345 MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr)); 7346 BuildMI(MBB, MBBPAC, DebugLoc(), TII->get(AArch64::CFI_INSTRUCTION)) 7347 .addCFIIndex(CFIIndex) 7348 .setMIFlags(MachineInstr::FrameSetup); 7349 7350 // If v8.3a features are available we can replace a RET instruction by 7351 // RETAA or RETAB and omit the AUT instructions. In this case the 7352 // DW_CFA_AARCH64_negate_ra_state can't be emitted. 7353 if (Subtarget.hasPAuth() && MBBAUT != MBB.end() && 7354 MBBAUT->getOpcode() == AArch64::RET) { 7355 BuildMI(MBB, MBBAUT, DL, 7356 TII->get(ShouldSignReturnAddrWithAKey ? AArch64::RETAA 7357 : AArch64::RETAB)) 7358 .copyImplicitOps(*MBBAUT); 7359 MBB.erase(MBBAUT); 7360 } else { 7361 BuildMI(MBB, MBBAUT, DL, 7362 TII->get(ShouldSignReturnAddrWithAKey ? AArch64::AUTIASP 7363 : AArch64::AUTIBSP)) 7364 .setMIFlag(MachineInstr::FrameDestroy); 7365 unsigned CFIIndexAuth = 7366 MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr)); 7367 BuildMI(MBB, MBBAUT, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) 7368 .addCFIIndex(CFIIndexAuth) 7369 .setMIFlags(MachineInstr::FrameDestroy); 7370 } 7371 } 7372 } 7373 7374 void AArch64InstrInfo::buildOutlinedFrame( 7375 MachineBasicBlock &MBB, MachineFunction &MF, 7376 const outliner::OutlinedFunction &OF) const { 7377 7378 AArch64FunctionInfo *FI = MF.getInfo<AArch64FunctionInfo>(); 7379 7380 if (OF.FrameConstructionID == MachineOutlinerTailCall) 7381 FI->setOutliningStyle("Tail Call"); 7382 else if (OF.FrameConstructionID == MachineOutlinerThunk) { 7383 // For thunk outlining, rewrite the last instruction from a call to a 7384 // tail-call. 7385 MachineInstr *Call = &*--MBB.instr_end(); 7386 unsigned TailOpcode; 7387 if (Call->getOpcode() == AArch64::BL) { 7388 TailOpcode = AArch64::TCRETURNdi; 7389 } else { 7390 assert(Call->getOpcode() == AArch64::BLR || 7391 Call->getOpcode() == AArch64::BLRNoIP); 7392 TailOpcode = AArch64::TCRETURNriALL; 7393 } 7394 MachineInstr *TC = BuildMI(MF, DebugLoc(), get(TailOpcode)) 7395 .add(Call->getOperand(0)) 7396 .addImm(0); 7397 MBB.insert(MBB.end(), TC); 7398 Call->eraseFromParent(); 7399 7400 FI->setOutliningStyle("Thunk"); 7401 } 7402 7403 bool IsLeafFunction = true; 7404 7405 // Is there a call in the outlined range? 7406 auto IsNonTailCall = [](const MachineInstr &MI) { 7407 return MI.isCall() && !MI.isReturn(); 7408 }; 7409 7410 if (llvm::any_of(MBB.instrs(), IsNonTailCall)) { 7411 // Fix up the instructions in the range, since we're going to modify the 7412 // stack. 7413 7414 // Bugzilla ID: 46767 7415 // TODO: Check if fixing up twice is safe so we can outline these. 7416 assert(OF.FrameConstructionID != MachineOutlinerDefault && 7417 "Can only fix up stack references once"); 7418 fixupPostOutline(MBB); 7419 7420 IsLeafFunction = false; 7421 7422 // LR has to be a live in so that we can save it. 7423 if (!MBB.isLiveIn(AArch64::LR)) 7424 MBB.addLiveIn(AArch64::LR); 7425 7426 MachineBasicBlock::iterator It = MBB.begin(); 7427 MachineBasicBlock::iterator Et = MBB.end(); 7428 7429 if (OF.FrameConstructionID == MachineOutlinerTailCall || 7430 OF.FrameConstructionID == MachineOutlinerThunk) 7431 Et = std::prev(MBB.end()); 7432 7433 // Insert a save before the outlined region 7434 MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre)) 7435 .addReg(AArch64::SP, RegState::Define) 7436 .addReg(AArch64::LR) 7437 .addReg(AArch64::SP) 7438 .addImm(-16); 7439 It = MBB.insert(It, STRXpre); 7440 7441 const TargetSubtargetInfo &STI = MF.getSubtarget(); 7442 const MCRegisterInfo *MRI = STI.getRegisterInfo(); 7443 unsigned DwarfReg = MRI->getDwarfRegNum(AArch64::LR, true); 7444 7445 // Add a CFI saying the stack was moved 16 B down. 7446 int64_t StackPosEntry = 7447 MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 16)); 7448 BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION)) 7449 .addCFIIndex(StackPosEntry) 7450 .setMIFlags(MachineInstr::FrameSetup); 7451 7452 // Add a CFI saying that the LR that we want to find is now 16 B higher than 7453 // before. 7454 int64_t LRPosEntry = 7455 MF.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfReg, -16)); 7456 BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION)) 7457 .addCFIIndex(LRPosEntry) 7458 .setMIFlags(MachineInstr::FrameSetup); 7459 7460 // Insert a restore before the terminator for the function. 7461 MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost)) 7462 .addReg(AArch64::SP, RegState::Define) 7463 .addReg(AArch64::LR, RegState::Define) 7464 .addReg(AArch64::SP) 7465 .addImm(16); 7466 Et = MBB.insert(Et, LDRXpost); 7467 } 7468 7469 // If a bunch of candidates reach this point they must agree on their return 7470 // address signing. It is therefore enough to just consider the signing 7471 // behaviour of one of them 7472 const auto &MFI = *OF.Candidates.front().getMF()->getInfo<AArch64FunctionInfo>(); 7473 bool ShouldSignReturnAddr = MFI.shouldSignReturnAddress(!IsLeafFunction); 7474 7475 // a_key is the default 7476 bool ShouldSignReturnAddrWithAKey = !MFI.shouldSignWithBKey(); 7477 7478 // If this is a tail call outlined function, then there's already a return. 7479 if (OF.FrameConstructionID == MachineOutlinerTailCall || 7480 OF.FrameConstructionID == MachineOutlinerThunk) { 7481 signOutlinedFunction(MF, MBB, ShouldSignReturnAddr, 7482 ShouldSignReturnAddrWithAKey); 7483 return; 7484 } 7485 7486 // It's not a tail call, so we have to insert the return ourselves. 7487 7488 // LR has to be a live in so that we can return to it. 7489 if (!MBB.isLiveIn(AArch64::LR)) 7490 MBB.addLiveIn(AArch64::LR); 7491 7492 MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET)) 7493 .addReg(AArch64::LR); 7494 MBB.insert(MBB.end(), ret); 7495 7496 signOutlinedFunction(MF, MBB, ShouldSignReturnAddr, 7497 ShouldSignReturnAddrWithAKey); 7498 7499 FI->setOutliningStyle("Function"); 7500 7501 // Did we have to modify the stack by saving the link register? 7502 if (OF.FrameConstructionID != MachineOutlinerDefault) 7503 return; 7504 7505 // We modified the stack. 7506 // Walk over the basic block and fix up all the stack accesses. 7507 fixupPostOutline(MBB); 7508 } 7509 7510 MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall( 7511 Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, 7512 MachineFunction &MF, const outliner::Candidate &C) const { 7513 7514 // Are we tail calling? 7515 if (C.CallConstructionID == MachineOutlinerTailCall) { 7516 // If yes, then we can just branch to the label. 7517 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::TCRETURNdi)) 7518 .addGlobalAddress(M.getNamedValue(MF.getName())) 7519 .addImm(0)); 7520 return It; 7521 } 7522 7523 // Are we saving the link register? 7524 if (C.CallConstructionID == MachineOutlinerNoLRSave || 7525 C.CallConstructionID == MachineOutlinerThunk) { 7526 // No, so just insert the call. 7527 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL)) 7528 .addGlobalAddress(M.getNamedValue(MF.getName()))); 7529 return It; 7530 } 7531 7532 // We want to return the spot where we inserted the call. 7533 MachineBasicBlock::iterator CallPt; 7534 7535 // Instructions for saving and restoring LR around the call instruction we're 7536 // going to insert. 7537 MachineInstr *Save; 7538 MachineInstr *Restore; 7539 // Can we save to a register? 7540 if (C.CallConstructionID == MachineOutlinerRegSave) { 7541 // FIXME: This logic should be sunk into a target-specific interface so that 7542 // we don't have to recompute the register. 7543 unsigned Reg = findRegisterToSaveLRTo(C); 7544 assert(Reg != 0 && "No callee-saved register available?"); 7545 7546 // LR has to be a live in so that we can save it. 7547 if (!MBB.isLiveIn(AArch64::LR)) 7548 MBB.addLiveIn(AArch64::LR); 7549 7550 // Save and restore LR from Reg. 7551 Save = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), Reg) 7552 .addReg(AArch64::XZR) 7553 .addReg(AArch64::LR) 7554 .addImm(0); 7555 Restore = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), AArch64::LR) 7556 .addReg(AArch64::XZR) 7557 .addReg(Reg) 7558 .addImm(0); 7559 } else { 7560 // We have the default case. Save and restore from SP. 7561 Save = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre)) 7562 .addReg(AArch64::SP, RegState::Define) 7563 .addReg(AArch64::LR) 7564 .addReg(AArch64::SP) 7565 .addImm(-16); 7566 Restore = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost)) 7567 .addReg(AArch64::SP, RegState::Define) 7568 .addReg(AArch64::LR, RegState::Define) 7569 .addReg(AArch64::SP) 7570 .addImm(16); 7571 } 7572 7573 It = MBB.insert(It, Save); 7574 It++; 7575 7576 // Insert the call. 7577 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL)) 7578 .addGlobalAddress(M.getNamedValue(MF.getName()))); 7579 CallPt = It; 7580 It++; 7581 7582 It = MBB.insert(It, Restore); 7583 return CallPt; 7584 } 7585 7586 bool AArch64InstrInfo::shouldOutlineFromFunctionByDefault( 7587 MachineFunction &MF) const { 7588 return MF.getFunction().hasMinSize(); 7589 } 7590 7591 Optional<DestSourcePair> 7592 AArch64InstrInfo::isCopyInstrImpl(const MachineInstr &MI) const { 7593 7594 // AArch64::ORRWrs and AArch64::ORRXrs with WZR/XZR reg 7595 // and zero immediate operands used as an alias for mov instruction. 7596 if (MI.getOpcode() == AArch64::ORRWrs && 7597 MI.getOperand(1).getReg() == AArch64::WZR && 7598 MI.getOperand(3).getImm() == 0x0) { 7599 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)}; 7600 } 7601 7602 if (MI.getOpcode() == AArch64::ORRXrs && 7603 MI.getOperand(1).getReg() == AArch64::XZR && 7604 MI.getOperand(3).getImm() == 0x0) { 7605 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)}; 7606 } 7607 7608 return None; 7609 } 7610 7611 Optional<RegImmPair> AArch64InstrInfo::isAddImmediate(const MachineInstr &MI, 7612 Register Reg) const { 7613 int Sign = 1; 7614 int64_t Offset = 0; 7615 7616 // TODO: Handle cases where Reg is a super- or sub-register of the 7617 // destination register. 7618 const MachineOperand &Op0 = MI.getOperand(0); 7619 if (!Op0.isReg() || Reg != Op0.getReg()) 7620 return None; 7621 7622 switch (MI.getOpcode()) { 7623 default: 7624 return None; 7625 case AArch64::SUBWri: 7626 case AArch64::SUBXri: 7627 case AArch64::SUBSWri: 7628 case AArch64::SUBSXri: 7629 Sign *= -1; 7630 LLVM_FALLTHROUGH; 7631 case AArch64::ADDSWri: 7632 case AArch64::ADDSXri: 7633 case AArch64::ADDWri: 7634 case AArch64::ADDXri: { 7635 // TODO: Third operand can be global address (usually some string). 7636 if (!MI.getOperand(0).isReg() || !MI.getOperand(1).isReg() || 7637 !MI.getOperand(2).isImm()) 7638 return None; 7639 int Shift = MI.getOperand(3).getImm(); 7640 assert((Shift == 0 || Shift == 12) && "Shift can be either 0 or 12"); 7641 Offset = Sign * (MI.getOperand(2).getImm() << Shift); 7642 } 7643 } 7644 return RegImmPair{MI.getOperand(1).getReg(), Offset}; 7645 } 7646 7647 /// If the given ORR instruction is a copy, and \p DescribedReg overlaps with 7648 /// the destination register then, if possible, describe the value in terms of 7649 /// the source register. 7650 static Optional<ParamLoadedValue> 7651 describeORRLoadedValue(const MachineInstr &MI, Register DescribedReg, 7652 const TargetInstrInfo *TII, 7653 const TargetRegisterInfo *TRI) { 7654 auto DestSrc = TII->isCopyInstr(MI); 7655 if (!DestSrc) 7656 return None; 7657 7658 Register DestReg = DestSrc->Destination->getReg(); 7659 Register SrcReg = DestSrc->Source->getReg(); 7660 7661 auto Expr = DIExpression::get(MI.getMF()->getFunction().getContext(), {}); 7662 7663 // If the described register is the destination, just return the source. 7664 if (DestReg == DescribedReg) 7665 return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr); 7666 7667 // ORRWrs zero-extends to 64-bits, so we need to consider such cases. 7668 if (MI.getOpcode() == AArch64::ORRWrs && 7669 TRI->isSuperRegister(DestReg, DescribedReg)) 7670 return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr); 7671 7672 // We may need to describe the lower part of a ORRXrs move. 7673 if (MI.getOpcode() == AArch64::ORRXrs && 7674 TRI->isSubRegister(DestReg, DescribedReg)) { 7675 Register SrcSubReg = TRI->getSubReg(SrcReg, AArch64::sub_32); 7676 return ParamLoadedValue(MachineOperand::CreateReg(SrcSubReg, false), Expr); 7677 } 7678 7679 assert(!TRI->isSuperOrSubRegisterEq(DestReg, DescribedReg) && 7680 "Unhandled ORR[XW]rs copy case"); 7681 7682 return None; 7683 } 7684 7685 Optional<ParamLoadedValue> 7686 AArch64InstrInfo::describeLoadedValue(const MachineInstr &MI, 7687 Register Reg) const { 7688 const MachineFunction *MF = MI.getMF(); 7689 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); 7690 switch (MI.getOpcode()) { 7691 case AArch64::MOVZWi: 7692 case AArch64::MOVZXi: { 7693 // MOVZWi may be used for producing zero-extended 32-bit immediates in 7694 // 64-bit parameters, so we need to consider super-registers. 7695 if (!TRI->isSuperRegisterEq(MI.getOperand(0).getReg(), Reg)) 7696 return None; 7697 7698 if (!MI.getOperand(1).isImm()) 7699 return None; 7700 int64_t Immediate = MI.getOperand(1).getImm(); 7701 int Shift = MI.getOperand(2).getImm(); 7702 return ParamLoadedValue(MachineOperand::CreateImm(Immediate << Shift), 7703 nullptr); 7704 } 7705 case AArch64::ORRWrs: 7706 case AArch64::ORRXrs: 7707 return describeORRLoadedValue(MI, Reg, this, TRI); 7708 } 7709 7710 return TargetInstrInfo::describeLoadedValue(MI, Reg); 7711 } 7712 7713 bool AArch64InstrInfo::isExtendLikelyToBeFolded( 7714 MachineInstr &ExtMI, MachineRegisterInfo &MRI) const { 7715 assert(ExtMI.getOpcode() == TargetOpcode::G_SEXT || 7716 ExtMI.getOpcode() == TargetOpcode::G_ZEXT || 7717 ExtMI.getOpcode() == TargetOpcode::G_ANYEXT); 7718 7719 // Anyexts are nops. 7720 if (ExtMI.getOpcode() == TargetOpcode::G_ANYEXT) 7721 return true; 7722 7723 Register DefReg = ExtMI.getOperand(0).getReg(); 7724 if (!MRI.hasOneNonDBGUse(DefReg)) 7725 return false; 7726 7727 // It's likely that a sext/zext as a G_PTR_ADD offset will be folded into an 7728 // addressing mode. 7729 auto *UserMI = &*MRI.use_instr_nodbg_begin(DefReg); 7730 return UserMI->getOpcode() == TargetOpcode::G_PTR_ADD; 7731 } 7732 7733 uint64_t AArch64InstrInfo::getElementSizeForOpcode(unsigned Opc) const { 7734 return get(Opc).TSFlags & AArch64::ElementSizeMask; 7735 } 7736 7737 bool AArch64InstrInfo::isPTestLikeOpcode(unsigned Opc) const { 7738 return get(Opc).TSFlags & AArch64::InstrFlagIsPTestLike; 7739 } 7740 7741 bool AArch64InstrInfo::isWhileOpcode(unsigned Opc) const { 7742 return get(Opc).TSFlags & AArch64::InstrFlagIsWhile; 7743 } 7744 7745 unsigned int 7746 AArch64InstrInfo::getTailDuplicateSize(CodeGenOpt::Level OptLevel) const { 7747 return OptLevel >= CodeGenOpt::Aggressive ? 6 : 2; 7748 } 7749 7750 unsigned llvm::getBLRCallOpcode(const MachineFunction &MF) { 7751 if (MF.getSubtarget<AArch64Subtarget>().hardenSlsBlr()) 7752 return AArch64::BLRNoIP; 7753 else 7754 return AArch64::BLR; 7755 } 7756 7757 #define GET_INSTRINFO_HELPERS 7758 #define GET_INSTRMAP_INFO 7759 #include "AArch64GenInstrInfo.inc" 7760