1 //===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the AArch64 implementation of the TargetInstrInfo class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AArch64InstrInfo.h" 14 #include "AArch64MachineFunctionInfo.h" 15 #include "AArch64Subtarget.h" 16 #include "MCTargetDesc/AArch64AddressingModes.h" 17 #include "Utils/AArch64BaseInfo.h" 18 #include "llvm/ADT/ArrayRef.h" 19 #include "llvm/ADT/STLExtras.h" 20 #include "llvm/ADT/SmallVector.h" 21 #include "llvm/CodeGen/MachineBasicBlock.h" 22 #include "llvm/CodeGen/MachineFrameInfo.h" 23 #include "llvm/CodeGen/MachineFunction.h" 24 #include "llvm/CodeGen/MachineInstr.h" 25 #include "llvm/CodeGen/MachineInstrBuilder.h" 26 #include "llvm/CodeGen/MachineMemOperand.h" 27 #include "llvm/CodeGen/MachineModuleInfo.h" 28 #include "llvm/CodeGen/MachineOperand.h" 29 #include "llvm/CodeGen/MachineRegisterInfo.h" 30 #include "llvm/CodeGen/StackMaps.h" 31 #include "llvm/CodeGen/TargetRegisterInfo.h" 32 #include "llvm/CodeGen/TargetSubtargetInfo.h" 33 #include "llvm/IR/DebugInfoMetadata.h" 34 #include "llvm/IR/DebugLoc.h" 35 #include "llvm/IR/GlobalValue.h" 36 #include "llvm/MC/MCAsmInfo.h" 37 #include "llvm/MC/MCInst.h" 38 #include "llvm/MC/MCInstrDesc.h" 39 #include "llvm/Support/Casting.h" 40 #include "llvm/Support/CodeGen.h" 41 #include "llvm/Support/CommandLine.h" 42 #include "llvm/Support/Compiler.h" 43 #include "llvm/Support/ErrorHandling.h" 44 #include "llvm/Support/MathExtras.h" 45 #include "llvm/Target/TargetMachine.h" 46 #include "llvm/Target/TargetOptions.h" 47 #include <cassert> 48 #include <cstdint> 49 #include <iterator> 50 #include <utility> 51 52 using namespace llvm; 53 54 #define GET_INSTRINFO_CTOR_DTOR 55 #include "AArch64GenInstrInfo.inc" 56 57 static cl::opt<unsigned> TBZDisplacementBits( 58 "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14), 59 cl::desc("Restrict range of TB[N]Z instructions (DEBUG)")); 60 61 static cl::opt<unsigned> CBZDisplacementBits( 62 "aarch64-cbz-offset-bits", cl::Hidden, cl::init(19), 63 cl::desc("Restrict range of CB[N]Z instructions (DEBUG)")); 64 65 static cl::opt<unsigned> 66 BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19), 67 cl::desc("Restrict range of Bcc instructions (DEBUG)")); 68 69 AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI) 70 : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP, 71 AArch64::CATCHRET), 72 RI(STI.getTargetTriple()), Subtarget(STI) {} 73 74 /// GetInstSize - Return the number of bytes of code the specified 75 /// instruction may be. This returns the maximum number of bytes. 76 unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { 77 const MachineBasicBlock &MBB = *MI.getParent(); 78 const MachineFunction *MF = MBB.getParent(); 79 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo(); 80 81 { 82 auto Op = MI.getOpcode(); 83 if (Op == AArch64::INLINEASM || Op == AArch64::INLINEASM_BR) 84 return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI); 85 } 86 87 // Meta-instructions emit no code. 88 if (MI.isMetaInstruction()) 89 return 0; 90 91 // FIXME: We currently only handle pseudoinstructions that don't get expanded 92 // before the assembly printer. 93 unsigned NumBytes = 0; 94 const MCInstrDesc &Desc = MI.getDesc(); 95 switch (Desc.getOpcode()) { 96 default: 97 // Anything not explicitly designated otherwise is a normal 4-byte insn. 98 NumBytes = 4; 99 break; 100 case TargetOpcode::STACKMAP: 101 // The upper bound for a stackmap intrinsic is the full length of its shadow 102 NumBytes = StackMapOpers(&MI).getNumPatchBytes(); 103 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!"); 104 break; 105 case TargetOpcode::PATCHPOINT: 106 // The size of the patchpoint intrinsic is the number of bytes requested 107 NumBytes = PatchPointOpers(&MI).getNumPatchBytes(); 108 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!"); 109 break; 110 case AArch64::TLSDESC_CALLSEQ: 111 // This gets lowered to an instruction sequence which takes 16 bytes 112 NumBytes = 16; 113 break; 114 case AArch64::SpeculationBarrierISBDSBEndBB: 115 // This gets lowered to 2 4-byte instructions. 116 NumBytes = 8; 117 break; 118 case AArch64::SpeculationBarrierSBEndBB: 119 // This gets lowered to 1 4-byte instructions. 120 NumBytes = 4; 121 break; 122 case AArch64::JumpTableDest32: 123 case AArch64::JumpTableDest16: 124 case AArch64::JumpTableDest8: 125 NumBytes = 12; 126 break; 127 case AArch64::SPACE: 128 NumBytes = MI.getOperand(1).getImm(); 129 break; 130 case TargetOpcode::BUNDLE: 131 NumBytes = getInstBundleLength(MI); 132 break; 133 } 134 135 return NumBytes; 136 } 137 138 unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr &MI) const { 139 unsigned Size = 0; 140 MachineBasicBlock::const_instr_iterator I = MI.getIterator(); 141 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end(); 142 while (++I != E && I->isInsideBundle()) { 143 assert(!I->isBundle() && "No nested bundle!"); 144 Size += getInstSizeInBytes(*I); 145 } 146 return Size; 147 } 148 149 static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target, 150 SmallVectorImpl<MachineOperand> &Cond) { 151 // Block ends with fall-through condbranch. 152 switch (LastInst->getOpcode()) { 153 default: 154 llvm_unreachable("Unknown branch instruction?"); 155 case AArch64::Bcc: 156 Target = LastInst->getOperand(1).getMBB(); 157 Cond.push_back(LastInst->getOperand(0)); 158 break; 159 case AArch64::CBZW: 160 case AArch64::CBZX: 161 case AArch64::CBNZW: 162 case AArch64::CBNZX: 163 Target = LastInst->getOperand(1).getMBB(); 164 Cond.push_back(MachineOperand::CreateImm(-1)); 165 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode())); 166 Cond.push_back(LastInst->getOperand(0)); 167 break; 168 case AArch64::TBZW: 169 case AArch64::TBZX: 170 case AArch64::TBNZW: 171 case AArch64::TBNZX: 172 Target = LastInst->getOperand(2).getMBB(); 173 Cond.push_back(MachineOperand::CreateImm(-1)); 174 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode())); 175 Cond.push_back(LastInst->getOperand(0)); 176 Cond.push_back(LastInst->getOperand(1)); 177 } 178 } 179 180 static unsigned getBranchDisplacementBits(unsigned Opc) { 181 switch (Opc) { 182 default: 183 llvm_unreachable("unexpected opcode!"); 184 case AArch64::B: 185 return 64; 186 case AArch64::TBNZW: 187 case AArch64::TBZW: 188 case AArch64::TBNZX: 189 case AArch64::TBZX: 190 return TBZDisplacementBits; 191 case AArch64::CBNZW: 192 case AArch64::CBZW: 193 case AArch64::CBNZX: 194 case AArch64::CBZX: 195 return CBZDisplacementBits; 196 case AArch64::Bcc: 197 return BCCDisplacementBits; 198 } 199 } 200 201 bool AArch64InstrInfo::isBranchOffsetInRange(unsigned BranchOp, 202 int64_t BrOffset) const { 203 unsigned Bits = getBranchDisplacementBits(BranchOp); 204 assert(Bits >= 3 && "max branch displacement must be enough to jump" 205 "over conditional branch expansion"); 206 return isIntN(Bits, BrOffset / 4); 207 } 208 209 MachineBasicBlock * 210 AArch64InstrInfo::getBranchDestBlock(const MachineInstr &MI) const { 211 switch (MI.getOpcode()) { 212 default: 213 llvm_unreachable("unexpected opcode!"); 214 case AArch64::B: 215 return MI.getOperand(0).getMBB(); 216 case AArch64::TBZW: 217 case AArch64::TBNZW: 218 case AArch64::TBZX: 219 case AArch64::TBNZX: 220 return MI.getOperand(2).getMBB(); 221 case AArch64::CBZW: 222 case AArch64::CBNZW: 223 case AArch64::CBZX: 224 case AArch64::CBNZX: 225 case AArch64::Bcc: 226 return MI.getOperand(1).getMBB(); 227 } 228 } 229 230 // Branch analysis. 231 bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB, 232 MachineBasicBlock *&TBB, 233 MachineBasicBlock *&FBB, 234 SmallVectorImpl<MachineOperand> &Cond, 235 bool AllowModify) const { 236 // If the block has no terminators, it just falls into the block after it. 237 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); 238 if (I == MBB.end()) 239 return false; 240 241 // Skip over SpeculationBarrierEndBB terminators 242 if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB || 243 I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) { 244 --I; 245 } 246 247 if (!isUnpredicatedTerminator(*I)) 248 return false; 249 250 // Get the last instruction in the block. 251 MachineInstr *LastInst = &*I; 252 253 // If there is only one terminator instruction, process it. 254 unsigned LastOpc = LastInst->getOpcode(); 255 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) { 256 if (isUncondBranchOpcode(LastOpc)) { 257 TBB = LastInst->getOperand(0).getMBB(); 258 return false; 259 } 260 if (isCondBranchOpcode(LastOpc)) { 261 // Block ends with fall-through condbranch. 262 parseCondBranch(LastInst, TBB, Cond); 263 return false; 264 } 265 return true; // Can't handle indirect branch. 266 } 267 268 // Get the instruction before it if it is a terminator. 269 MachineInstr *SecondLastInst = &*I; 270 unsigned SecondLastOpc = SecondLastInst->getOpcode(); 271 272 // If AllowModify is true and the block ends with two or more unconditional 273 // branches, delete all but the first unconditional branch. 274 if (AllowModify && isUncondBranchOpcode(LastOpc)) { 275 while (isUncondBranchOpcode(SecondLastOpc)) { 276 LastInst->eraseFromParent(); 277 LastInst = SecondLastInst; 278 LastOpc = LastInst->getOpcode(); 279 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) { 280 // Return now the only terminator is an unconditional branch. 281 TBB = LastInst->getOperand(0).getMBB(); 282 return false; 283 } else { 284 SecondLastInst = &*I; 285 SecondLastOpc = SecondLastInst->getOpcode(); 286 } 287 } 288 } 289 290 // If there are three terminators, we don't know what sort of block this is. 291 if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I)) 292 return true; 293 294 // If the block ends with a B and a Bcc, handle it. 295 if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { 296 parseCondBranch(SecondLastInst, TBB, Cond); 297 FBB = LastInst->getOperand(0).getMBB(); 298 return false; 299 } 300 301 // If the block ends with two unconditional branches, handle it. The second 302 // one is not executed, so remove it. 303 if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { 304 TBB = SecondLastInst->getOperand(0).getMBB(); 305 I = LastInst; 306 if (AllowModify) 307 I->eraseFromParent(); 308 return false; 309 } 310 311 // ...likewise if it ends with an indirect branch followed by an unconditional 312 // branch. 313 if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { 314 I = LastInst; 315 if (AllowModify) 316 I->eraseFromParent(); 317 return true; 318 } 319 320 // Otherwise, can't handle this. 321 return true; 322 } 323 324 bool AArch64InstrInfo::reverseBranchCondition( 325 SmallVectorImpl<MachineOperand> &Cond) const { 326 if (Cond[0].getImm() != -1) { 327 // Regular Bcc 328 AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm(); 329 Cond[0].setImm(AArch64CC::getInvertedCondCode(CC)); 330 } else { 331 // Folded compare-and-branch 332 switch (Cond[1].getImm()) { 333 default: 334 llvm_unreachable("Unknown conditional branch!"); 335 case AArch64::CBZW: 336 Cond[1].setImm(AArch64::CBNZW); 337 break; 338 case AArch64::CBNZW: 339 Cond[1].setImm(AArch64::CBZW); 340 break; 341 case AArch64::CBZX: 342 Cond[1].setImm(AArch64::CBNZX); 343 break; 344 case AArch64::CBNZX: 345 Cond[1].setImm(AArch64::CBZX); 346 break; 347 case AArch64::TBZW: 348 Cond[1].setImm(AArch64::TBNZW); 349 break; 350 case AArch64::TBNZW: 351 Cond[1].setImm(AArch64::TBZW); 352 break; 353 case AArch64::TBZX: 354 Cond[1].setImm(AArch64::TBNZX); 355 break; 356 case AArch64::TBNZX: 357 Cond[1].setImm(AArch64::TBZX); 358 break; 359 } 360 } 361 362 return false; 363 } 364 365 unsigned AArch64InstrInfo::removeBranch(MachineBasicBlock &MBB, 366 int *BytesRemoved) const { 367 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); 368 if (I == MBB.end()) 369 return 0; 370 371 if (!isUncondBranchOpcode(I->getOpcode()) && 372 !isCondBranchOpcode(I->getOpcode())) 373 return 0; 374 375 // Remove the branch. 376 I->eraseFromParent(); 377 378 I = MBB.end(); 379 380 if (I == MBB.begin()) { 381 if (BytesRemoved) 382 *BytesRemoved = 4; 383 return 1; 384 } 385 --I; 386 if (!isCondBranchOpcode(I->getOpcode())) { 387 if (BytesRemoved) 388 *BytesRemoved = 4; 389 return 1; 390 } 391 392 // Remove the branch. 393 I->eraseFromParent(); 394 if (BytesRemoved) 395 *BytesRemoved = 8; 396 397 return 2; 398 } 399 400 void AArch64InstrInfo::instantiateCondBranch( 401 MachineBasicBlock &MBB, const DebugLoc &DL, MachineBasicBlock *TBB, 402 ArrayRef<MachineOperand> Cond) const { 403 if (Cond[0].getImm() != -1) { 404 // Regular Bcc 405 BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB); 406 } else { 407 // Folded compare-and-branch 408 // Note that we use addOperand instead of addReg to keep the flags. 409 const MachineInstrBuilder MIB = 410 BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]); 411 if (Cond.size() > 3) 412 MIB.addImm(Cond[3].getImm()); 413 MIB.addMBB(TBB); 414 } 415 } 416 417 unsigned AArch64InstrInfo::insertBranch( 418 MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, 419 ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const { 420 // Shouldn't be a fall through. 421 assert(TBB && "insertBranch must not be told to insert a fallthrough"); 422 423 if (!FBB) { 424 if (Cond.empty()) // Unconditional branch? 425 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB); 426 else 427 instantiateCondBranch(MBB, DL, TBB, Cond); 428 429 if (BytesAdded) 430 *BytesAdded = 4; 431 432 return 1; 433 } 434 435 // Two-way conditional branch. 436 instantiateCondBranch(MBB, DL, TBB, Cond); 437 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB); 438 439 if (BytesAdded) 440 *BytesAdded = 8; 441 442 return 2; 443 } 444 445 // Find the original register that VReg is copied from. 446 static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) { 447 while (Register::isVirtualRegister(VReg)) { 448 const MachineInstr *DefMI = MRI.getVRegDef(VReg); 449 if (!DefMI->isFullCopy()) 450 return VReg; 451 VReg = DefMI->getOperand(1).getReg(); 452 } 453 return VReg; 454 } 455 456 // Determine if VReg is defined by an instruction that can be folded into a 457 // csel instruction. If so, return the folded opcode, and the replacement 458 // register. 459 static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg, 460 unsigned *NewVReg = nullptr) { 461 VReg = removeCopies(MRI, VReg); 462 if (!Register::isVirtualRegister(VReg)) 463 return 0; 464 465 bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg)); 466 const MachineInstr *DefMI = MRI.getVRegDef(VReg); 467 unsigned Opc = 0; 468 unsigned SrcOpNum = 0; 469 switch (DefMI->getOpcode()) { 470 case AArch64::ADDSXri: 471 case AArch64::ADDSWri: 472 // if NZCV is used, do not fold. 473 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1) 474 return 0; 475 // fall-through to ADDXri and ADDWri. 476 LLVM_FALLTHROUGH; 477 case AArch64::ADDXri: 478 case AArch64::ADDWri: 479 // add x, 1 -> csinc. 480 if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 || 481 DefMI->getOperand(3).getImm() != 0) 482 return 0; 483 SrcOpNum = 1; 484 Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr; 485 break; 486 487 case AArch64::ORNXrr: 488 case AArch64::ORNWrr: { 489 // not x -> csinv, represented as orn dst, xzr, src. 490 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg()); 491 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR) 492 return 0; 493 SrcOpNum = 2; 494 Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr; 495 break; 496 } 497 498 case AArch64::SUBSXrr: 499 case AArch64::SUBSWrr: 500 // if NZCV is used, do not fold. 501 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1) 502 return 0; 503 // fall-through to SUBXrr and SUBWrr. 504 LLVM_FALLTHROUGH; 505 case AArch64::SUBXrr: 506 case AArch64::SUBWrr: { 507 // neg x -> csneg, represented as sub dst, xzr, src. 508 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg()); 509 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR) 510 return 0; 511 SrcOpNum = 2; 512 Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr; 513 break; 514 } 515 default: 516 return 0; 517 } 518 assert(Opc && SrcOpNum && "Missing parameters"); 519 520 if (NewVReg) 521 *NewVReg = DefMI->getOperand(SrcOpNum).getReg(); 522 return Opc; 523 } 524 525 bool AArch64InstrInfo::canInsertSelect(const MachineBasicBlock &MBB, 526 ArrayRef<MachineOperand> Cond, 527 Register DstReg, Register TrueReg, 528 Register FalseReg, int &CondCycles, 529 int &TrueCycles, 530 int &FalseCycles) const { 531 // Check register classes. 532 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 533 const TargetRegisterClass *RC = 534 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg)); 535 if (!RC) 536 return false; 537 538 // Also need to check the dest regclass, in case we're trying to optimize 539 // something like: 540 // %1(gpr) = PHI %2(fpr), bb1, %(fpr), bb2 541 if (!RI.getCommonSubClass(RC, MRI.getRegClass(DstReg))) 542 return false; 543 544 // Expanding cbz/tbz requires an extra cycle of latency on the condition. 545 unsigned ExtraCondLat = Cond.size() != 1; 546 547 // GPRs are handled by csel. 548 // FIXME: Fold in x+1, -x, and ~x when applicable. 549 if (AArch64::GPR64allRegClass.hasSubClassEq(RC) || 550 AArch64::GPR32allRegClass.hasSubClassEq(RC)) { 551 // Single-cycle csel, csinc, csinv, and csneg. 552 CondCycles = 1 + ExtraCondLat; 553 TrueCycles = FalseCycles = 1; 554 if (canFoldIntoCSel(MRI, TrueReg)) 555 TrueCycles = 0; 556 else if (canFoldIntoCSel(MRI, FalseReg)) 557 FalseCycles = 0; 558 return true; 559 } 560 561 // Scalar floating point is handled by fcsel. 562 // FIXME: Form fabs, fmin, and fmax when applicable. 563 if (AArch64::FPR64RegClass.hasSubClassEq(RC) || 564 AArch64::FPR32RegClass.hasSubClassEq(RC)) { 565 CondCycles = 5 + ExtraCondLat; 566 TrueCycles = FalseCycles = 2; 567 return true; 568 } 569 570 // Can't do vectors. 571 return false; 572 } 573 574 void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB, 575 MachineBasicBlock::iterator I, 576 const DebugLoc &DL, Register DstReg, 577 ArrayRef<MachineOperand> Cond, 578 Register TrueReg, Register FalseReg) const { 579 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 580 581 // Parse the condition code, see parseCondBranch() above. 582 AArch64CC::CondCode CC; 583 switch (Cond.size()) { 584 default: 585 llvm_unreachable("Unknown condition opcode in Cond"); 586 case 1: // b.cc 587 CC = AArch64CC::CondCode(Cond[0].getImm()); 588 break; 589 case 3: { // cbz/cbnz 590 // We must insert a compare against 0. 591 bool Is64Bit; 592 switch (Cond[1].getImm()) { 593 default: 594 llvm_unreachable("Unknown branch opcode in Cond"); 595 case AArch64::CBZW: 596 Is64Bit = false; 597 CC = AArch64CC::EQ; 598 break; 599 case AArch64::CBZX: 600 Is64Bit = true; 601 CC = AArch64CC::EQ; 602 break; 603 case AArch64::CBNZW: 604 Is64Bit = false; 605 CC = AArch64CC::NE; 606 break; 607 case AArch64::CBNZX: 608 Is64Bit = true; 609 CC = AArch64CC::NE; 610 break; 611 } 612 Register SrcReg = Cond[2].getReg(); 613 if (Is64Bit) { 614 // cmp reg, #0 is actually subs xzr, reg, #0. 615 MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass); 616 BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR) 617 .addReg(SrcReg) 618 .addImm(0) 619 .addImm(0); 620 } else { 621 MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass); 622 BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR) 623 .addReg(SrcReg) 624 .addImm(0) 625 .addImm(0); 626 } 627 break; 628 } 629 case 4: { // tbz/tbnz 630 // We must insert a tst instruction. 631 switch (Cond[1].getImm()) { 632 default: 633 llvm_unreachable("Unknown branch opcode in Cond"); 634 case AArch64::TBZW: 635 case AArch64::TBZX: 636 CC = AArch64CC::EQ; 637 break; 638 case AArch64::TBNZW: 639 case AArch64::TBNZX: 640 CC = AArch64CC::NE; 641 break; 642 } 643 // cmp reg, #foo is actually ands xzr, reg, #1<<foo. 644 if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW) 645 BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR) 646 .addReg(Cond[2].getReg()) 647 .addImm( 648 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32)); 649 else 650 BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR) 651 .addReg(Cond[2].getReg()) 652 .addImm( 653 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64)); 654 break; 655 } 656 } 657 658 unsigned Opc = 0; 659 const TargetRegisterClass *RC = nullptr; 660 bool TryFold = false; 661 if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) { 662 RC = &AArch64::GPR64RegClass; 663 Opc = AArch64::CSELXr; 664 TryFold = true; 665 } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) { 666 RC = &AArch64::GPR32RegClass; 667 Opc = AArch64::CSELWr; 668 TryFold = true; 669 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) { 670 RC = &AArch64::FPR64RegClass; 671 Opc = AArch64::FCSELDrrr; 672 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) { 673 RC = &AArch64::FPR32RegClass; 674 Opc = AArch64::FCSELSrrr; 675 } 676 assert(RC && "Unsupported regclass"); 677 678 // Try folding simple instructions into the csel. 679 if (TryFold) { 680 unsigned NewVReg = 0; 681 unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg); 682 if (FoldedOpc) { 683 // The folded opcodes csinc, csinc and csneg apply the operation to 684 // FalseReg, so we need to invert the condition. 685 CC = AArch64CC::getInvertedCondCode(CC); 686 TrueReg = FalseReg; 687 } else 688 FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg); 689 690 // Fold the operation. Leave any dead instructions for DCE to clean up. 691 if (FoldedOpc) { 692 FalseReg = NewVReg; 693 Opc = FoldedOpc; 694 // The extends the live range of NewVReg. 695 MRI.clearKillFlags(NewVReg); 696 } 697 } 698 699 // Pull all virtual register into the appropriate class. 700 MRI.constrainRegClass(TrueReg, RC); 701 MRI.constrainRegClass(FalseReg, RC); 702 703 // Insert the csel. 704 BuildMI(MBB, I, DL, get(Opc), DstReg) 705 .addReg(TrueReg) 706 .addReg(FalseReg) 707 .addImm(CC); 708 } 709 710 /// Returns true if a MOVi32imm or MOVi64imm can be expanded to an ORRxx. 711 static bool canBeExpandedToORR(const MachineInstr &MI, unsigned BitSize) { 712 uint64_t Imm = MI.getOperand(1).getImm(); 713 uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize); 714 uint64_t Encoding; 715 return AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding); 716 } 717 718 // FIXME: this implementation should be micro-architecture dependent, so a 719 // micro-architecture target hook should be introduced here in future. 720 bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const { 721 if (!Subtarget.hasCustomCheapAsMoveHandling()) 722 return MI.isAsCheapAsAMove(); 723 724 const unsigned Opcode = MI.getOpcode(); 725 726 // Firstly, check cases gated by features. 727 728 if (Subtarget.hasZeroCycleZeroingFP()) { 729 if (Opcode == AArch64::FMOVH0 || 730 Opcode == AArch64::FMOVS0 || 731 Opcode == AArch64::FMOVD0) 732 return true; 733 } 734 735 if (Subtarget.hasZeroCycleZeroingGP()) { 736 if (Opcode == TargetOpcode::COPY && 737 (MI.getOperand(1).getReg() == AArch64::WZR || 738 MI.getOperand(1).getReg() == AArch64::XZR)) 739 return true; 740 } 741 742 // Secondly, check cases specific to sub-targets. 743 744 if (Subtarget.hasExynosCheapAsMoveHandling()) { 745 if (isExynosCheapAsMove(MI)) 746 return true; 747 748 return MI.isAsCheapAsAMove(); 749 } 750 751 // Finally, check generic cases. 752 753 switch (Opcode) { 754 default: 755 return false; 756 757 // add/sub on register without shift 758 case AArch64::ADDWri: 759 case AArch64::ADDXri: 760 case AArch64::SUBWri: 761 case AArch64::SUBXri: 762 return (MI.getOperand(3).getImm() == 0); 763 764 // logical ops on immediate 765 case AArch64::ANDWri: 766 case AArch64::ANDXri: 767 case AArch64::EORWri: 768 case AArch64::EORXri: 769 case AArch64::ORRWri: 770 case AArch64::ORRXri: 771 return true; 772 773 // logical ops on register without shift 774 case AArch64::ANDWrr: 775 case AArch64::ANDXrr: 776 case AArch64::BICWrr: 777 case AArch64::BICXrr: 778 case AArch64::EONWrr: 779 case AArch64::EONXrr: 780 case AArch64::EORWrr: 781 case AArch64::EORXrr: 782 case AArch64::ORNWrr: 783 case AArch64::ORNXrr: 784 case AArch64::ORRWrr: 785 case AArch64::ORRXrr: 786 return true; 787 788 // If MOVi32imm or MOVi64imm can be expanded into ORRWri or 789 // ORRXri, it is as cheap as MOV 790 case AArch64::MOVi32imm: 791 return canBeExpandedToORR(MI, 32); 792 case AArch64::MOVi64imm: 793 return canBeExpandedToORR(MI, 64); 794 } 795 796 llvm_unreachable("Unknown opcode to check as cheap as a move!"); 797 } 798 799 bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) { 800 switch (MI.getOpcode()) { 801 default: 802 return false; 803 804 case AArch64::ADDWrs: 805 case AArch64::ADDXrs: 806 case AArch64::ADDSWrs: 807 case AArch64::ADDSXrs: { 808 unsigned Imm = MI.getOperand(3).getImm(); 809 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm); 810 if (ShiftVal == 0) 811 return true; 812 return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5; 813 } 814 815 case AArch64::ADDWrx: 816 case AArch64::ADDXrx: 817 case AArch64::ADDXrx64: 818 case AArch64::ADDSWrx: 819 case AArch64::ADDSXrx: 820 case AArch64::ADDSXrx64: { 821 unsigned Imm = MI.getOperand(3).getImm(); 822 switch (AArch64_AM::getArithExtendType(Imm)) { 823 default: 824 return false; 825 case AArch64_AM::UXTB: 826 case AArch64_AM::UXTH: 827 case AArch64_AM::UXTW: 828 case AArch64_AM::UXTX: 829 return AArch64_AM::getArithShiftValue(Imm) <= 4; 830 } 831 } 832 833 case AArch64::SUBWrs: 834 case AArch64::SUBSWrs: { 835 unsigned Imm = MI.getOperand(3).getImm(); 836 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm); 837 return ShiftVal == 0 || 838 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31); 839 } 840 841 case AArch64::SUBXrs: 842 case AArch64::SUBSXrs: { 843 unsigned Imm = MI.getOperand(3).getImm(); 844 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm); 845 return ShiftVal == 0 || 846 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63); 847 } 848 849 case AArch64::SUBWrx: 850 case AArch64::SUBXrx: 851 case AArch64::SUBXrx64: 852 case AArch64::SUBSWrx: 853 case AArch64::SUBSXrx: 854 case AArch64::SUBSXrx64: { 855 unsigned Imm = MI.getOperand(3).getImm(); 856 switch (AArch64_AM::getArithExtendType(Imm)) { 857 default: 858 return false; 859 case AArch64_AM::UXTB: 860 case AArch64_AM::UXTH: 861 case AArch64_AM::UXTW: 862 case AArch64_AM::UXTX: 863 return AArch64_AM::getArithShiftValue(Imm) == 0; 864 } 865 } 866 867 case AArch64::LDRBBroW: 868 case AArch64::LDRBBroX: 869 case AArch64::LDRBroW: 870 case AArch64::LDRBroX: 871 case AArch64::LDRDroW: 872 case AArch64::LDRDroX: 873 case AArch64::LDRHHroW: 874 case AArch64::LDRHHroX: 875 case AArch64::LDRHroW: 876 case AArch64::LDRHroX: 877 case AArch64::LDRQroW: 878 case AArch64::LDRQroX: 879 case AArch64::LDRSBWroW: 880 case AArch64::LDRSBWroX: 881 case AArch64::LDRSBXroW: 882 case AArch64::LDRSBXroX: 883 case AArch64::LDRSHWroW: 884 case AArch64::LDRSHWroX: 885 case AArch64::LDRSHXroW: 886 case AArch64::LDRSHXroX: 887 case AArch64::LDRSWroW: 888 case AArch64::LDRSWroX: 889 case AArch64::LDRSroW: 890 case AArch64::LDRSroX: 891 case AArch64::LDRWroW: 892 case AArch64::LDRWroX: 893 case AArch64::LDRXroW: 894 case AArch64::LDRXroX: 895 case AArch64::PRFMroW: 896 case AArch64::PRFMroX: 897 case AArch64::STRBBroW: 898 case AArch64::STRBBroX: 899 case AArch64::STRBroW: 900 case AArch64::STRBroX: 901 case AArch64::STRDroW: 902 case AArch64::STRDroX: 903 case AArch64::STRHHroW: 904 case AArch64::STRHHroX: 905 case AArch64::STRHroW: 906 case AArch64::STRHroX: 907 case AArch64::STRQroW: 908 case AArch64::STRQroX: 909 case AArch64::STRSroW: 910 case AArch64::STRSroX: 911 case AArch64::STRWroW: 912 case AArch64::STRWroX: 913 case AArch64::STRXroW: 914 case AArch64::STRXroX: { 915 unsigned IsSigned = MI.getOperand(3).getImm(); 916 return !IsSigned; 917 } 918 } 919 } 920 921 bool AArch64InstrInfo::isSEHInstruction(const MachineInstr &MI) { 922 unsigned Opc = MI.getOpcode(); 923 switch (Opc) { 924 default: 925 return false; 926 case AArch64::SEH_StackAlloc: 927 case AArch64::SEH_SaveFPLR: 928 case AArch64::SEH_SaveFPLR_X: 929 case AArch64::SEH_SaveReg: 930 case AArch64::SEH_SaveReg_X: 931 case AArch64::SEH_SaveRegP: 932 case AArch64::SEH_SaveRegP_X: 933 case AArch64::SEH_SaveFReg: 934 case AArch64::SEH_SaveFReg_X: 935 case AArch64::SEH_SaveFRegP: 936 case AArch64::SEH_SaveFRegP_X: 937 case AArch64::SEH_SetFP: 938 case AArch64::SEH_AddFP: 939 case AArch64::SEH_Nop: 940 case AArch64::SEH_PrologEnd: 941 case AArch64::SEH_EpilogStart: 942 case AArch64::SEH_EpilogEnd: 943 return true; 944 } 945 } 946 947 bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI, 948 Register &SrcReg, Register &DstReg, 949 unsigned &SubIdx) const { 950 switch (MI.getOpcode()) { 951 default: 952 return false; 953 case AArch64::SBFMXri: // aka sxtw 954 case AArch64::UBFMXri: // aka uxtw 955 // Check for the 32 -> 64 bit extension case, these instructions can do 956 // much more. 957 if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31) 958 return false; 959 // This is a signed or unsigned 32 -> 64 bit extension. 960 SrcReg = MI.getOperand(1).getReg(); 961 DstReg = MI.getOperand(0).getReg(); 962 SubIdx = AArch64::sub_32; 963 return true; 964 } 965 } 966 967 bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint( 968 const MachineInstr &MIa, const MachineInstr &MIb) const { 969 const TargetRegisterInfo *TRI = &getRegisterInfo(); 970 const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr; 971 int64_t OffsetA = 0, OffsetB = 0; 972 unsigned WidthA = 0, WidthB = 0; 973 bool OffsetAIsScalable = false, OffsetBIsScalable = false; 974 975 assert(MIa.mayLoadOrStore() && "MIa must be a load or store."); 976 assert(MIb.mayLoadOrStore() && "MIb must be a load or store."); 977 978 if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() || 979 MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef()) 980 return false; 981 982 // Retrieve the base, offset from the base and width. Width 983 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If 984 // base are identical, and the offset of a lower memory access + 985 // the width doesn't overlap the offset of a higher memory access, 986 // then the memory accesses are different. 987 // If OffsetAIsScalable and OffsetBIsScalable are both true, they 988 // are assumed to have the same scale (vscale). 989 if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, OffsetAIsScalable, 990 WidthA, TRI) && 991 getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, OffsetBIsScalable, 992 WidthB, TRI)) { 993 if (BaseOpA->isIdenticalTo(*BaseOpB) && 994 OffsetAIsScalable == OffsetBIsScalable) { 995 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB; 996 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA; 997 int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB; 998 if (LowOffset + LowWidth <= HighOffset) 999 return true; 1000 } 1001 } 1002 return false; 1003 } 1004 1005 bool AArch64InstrInfo::isSchedulingBoundary(const MachineInstr &MI, 1006 const MachineBasicBlock *MBB, 1007 const MachineFunction &MF) const { 1008 if (TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF)) 1009 return true; 1010 switch (MI.getOpcode()) { 1011 case AArch64::HINT: 1012 // CSDB hints are scheduling barriers. 1013 if (MI.getOperand(0).getImm() == 0x14) 1014 return true; 1015 break; 1016 case AArch64::DSB: 1017 case AArch64::ISB: 1018 // DSB and ISB also are scheduling barriers. 1019 return true; 1020 default:; 1021 } 1022 return isSEHInstruction(MI); 1023 } 1024 1025 /// analyzeCompare - For a comparison instruction, return the source registers 1026 /// in SrcReg and SrcReg2, and the value it compares against in CmpValue. 1027 /// Return true if the comparison instruction can be analyzed. 1028 bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg, 1029 Register &SrcReg2, int &CmpMask, 1030 int &CmpValue) const { 1031 // The first operand can be a frame index where we'd normally expect a 1032 // register. 1033 assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands"); 1034 if (!MI.getOperand(1).isReg()) 1035 return false; 1036 1037 switch (MI.getOpcode()) { 1038 default: 1039 break; 1040 case AArch64::SUBSWrr: 1041 case AArch64::SUBSWrs: 1042 case AArch64::SUBSWrx: 1043 case AArch64::SUBSXrr: 1044 case AArch64::SUBSXrs: 1045 case AArch64::SUBSXrx: 1046 case AArch64::ADDSWrr: 1047 case AArch64::ADDSWrs: 1048 case AArch64::ADDSWrx: 1049 case AArch64::ADDSXrr: 1050 case AArch64::ADDSXrs: 1051 case AArch64::ADDSXrx: 1052 // Replace SUBSWrr with SUBWrr if NZCV is not used. 1053 SrcReg = MI.getOperand(1).getReg(); 1054 SrcReg2 = MI.getOperand(2).getReg(); 1055 CmpMask = ~0; 1056 CmpValue = 0; 1057 return true; 1058 case AArch64::SUBSWri: 1059 case AArch64::ADDSWri: 1060 case AArch64::SUBSXri: 1061 case AArch64::ADDSXri: 1062 SrcReg = MI.getOperand(1).getReg(); 1063 SrcReg2 = 0; 1064 CmpMask = ~0; 1065 // FIXME: In order to convert CmpValue to 0 or 1 1066 CmpValue = MI.getOperand(2).getImm() != 0; 1067 return true; 1068 case AArch64::ANDSWri: 1069 case AArch64::ANDSXri: 1070 // ANDS does not use the same encoding scheme as the others xxxS 1071 // instructions. 1072 SrcReg = MI.getOperand(1).getReg(); 1073 SrcReg2 = 0; 1074 CmpMask = ~0; 1075 // FIXME:The return val type of decodeLogicalImmediate is uint64_t, 1076 // while the type of CmpValue is int. When converting uint64_t to int, 1077 // the high 32 bits of uint64_t will be lost. 1078 // In fact it causes a bug in spec2006-483.xalancbmk 1079 // CmpValue is only used to compare with zero in OptimizeCompareInstr 1080 CmpValue = AArch64_AM::decodeLogicalImmediate( 1081 MI.getOperand(2).getImm(), 1082 MI.getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0; 1083 return true; 1084 } 1085 1086 return false; 1087 } 1088 1089 static bool UpdateOperandRegClass(MachineInstr &Instr) { 1090 MachineBasicBlock *MBB = Instr.getParent(); 1091 assert(MBB && "Can't get MachineBasicBlock here"); 1092 MachineFunction *MF = MBB->getParent(); 1093 assert(MF && "Can't get MachineFunction here"); 1094 const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); 1095 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); 1096 MachineRegisterInfo *MRI = &MF->getRegInfo(); 1097 1098 for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx; 1099 ++OpIdx) { 1100 MachineOperand &MO = Instr.getOperand(OpIdx); 1101 const TargetRegisterClass *OpRegCstraints = 1102 Instr.getRegClassConstraint(OpIdx, TII, TRI); 1103 1104 // If there's no constraint, there's nothing to do. 1105 if (!OpRegCstraints) 1106 continue; 1107 // If the operand is a frame index, there's nothing to do here. 1108 // A frame index operand will resolve correctly during PEI. 1109 if (MO.isFI()) 1110 continue; 1111 1112 assert(MO.isReg() && 1113 "Operand has register constraints without being a register!"); 1114 1115 Register Reg = MO.getReg(); 1116 if (Register::isPhysicalRegister(Reg)) { 1117 if (!OpRegCstraints->contains(Reg)) 1118 return false; 1119 } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) && 1120 !MRI->constrainRegClass(Reg, OpRegCstraints)) 1121 return false; 1122 } 1123 1124 return true; 1125 } 1126 1127 /// Return the opcode that does not set flags when possible - otherwise 1128 /// return the original opcode. The caller is responsible to do the actual 1129 /// substitution and legality checking. 1130 static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI) { 1131 // Don't convert all compare instructions, because for some the zero register 1132 // encoding becomes the sp register. 1133 bool MIDefinesZeroReg = false; 1134 if (MI.definesRegister(AArch64::WZR) || MI.definesRegister(AArch64::XZR)) 1135 MIDefinesZeroReg = true; 1136 1137 switch (MI.getOpcode()) { 1138 default: 1139 return MI.getOpcode(); 1140 case AArch64::ADDSWrr: 1141 return AArch64::ADDWrr; 1142 case AArch64::ADDSWri: 1143 return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri; 1144 case AArch64::ADDSWrs: 1145 return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs; 1146 case AArch64::ADDSWrx: 1147 return AArch64::ADDWrx; 1148 case AArch64::ADDSXrr: 1149 return AArch64::ADDXrr; 1150 case AArch64::ADDSXri: 1151 return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri; 1152 case AArch64::ADDSXrs: 1153 return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs; 1154 case AArch64::ADDSXrx: 1155 return AArch64::ADDXrx; 1156 case AArch64::SUBSWrr: 1157 return AArch64::SUBWrr; 1158 case AArch64::SUBSWri: 1159 return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri; 1160 case AArch64::SUBSWrs: 1161 return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs; 1162 case AArch64::SUBSWrx: 1163 return AArch64::SUBWrx; 1164 case AArch64::SUBSXrr: 1165 return AArch64::SUBXrr; 1166 case AArch64::SUBSXri: 1167 return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri; 1168 case AArch64::SUBSXrs: 1169 return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs; 1170 case AArch64::SUBSXrx: 1171 return AArch64::SUBXrx; 1172 } 1173 } 1174 1175 enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 }; 1176 1177 /// True when condition flags are accessed (either by writing or reading) 1178 /// on the instruction trace starting at From and ending at To. 1179 /// 1180 /// Note: If From and To are from different blocks it's assumed CC are accessed 1181 /// on the path. 1182 static bool areCFlagsAccessedBetweenInstrs( 1183 MachineBasicBlock::iterator From, MachineBasicBlock::iterator To, 1184 const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) { 1185 // Early exit if To is at the beginning of the BB. 1186 if (To == To->getParent()->begin()) 1187 return true; 1188 1189 // Check whether the instructions are in the same basic block 1190 // If not, assume the condition flags might get modified somewhere. 1191 if (To->getParent() != From->getParent()) 1192 return true; 1193 1194 // From must be above To. 1195 assert(std::find_if(++To.getReverse(), To->getParent()->rend(), 1196 [From](MachineInstr &MI) { 1197 return MI.getIterator() == From; 1198 }) != To->getParent()->rend()); 1199 1200 // We iterate backward starting at \p To until we hit \p From. 1201 for (const MachineInstr &Instr : 1202 instructionsWithoutDebug(++To.getReverse(), From.getReverse())) { 1203 if (((AccessToCheck & AK_Write) && 1204 Instr.modifiesRegister(AArch64::NZCV, TRI)) || 1205 ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI))) 1206 return true; 1207 } 1208 return false; 1209 } 1210 1211 /// Try to optimize a compare instruction. A compare instruction is an 1212 /// instruction which produces AArch64::NZCV. It can be truly compare 1213 /// instruction 1214 /// when there are no uses of its destination register. 1215 /// 1216 /// The following steps are tried in order: 1217 /// 1. Convert CmpInstr into an unconditional version. 1218 /// 2. Remove CmpInstr if above there is an instruction producing a needed 1219 /// condition code or an instruction which can be converted into such an 1220 /// instruction. 1221 /// Only comparison with zero is supported. 1222 bool AArch64InstrInfo::optimizeCompareInstr( 1223 MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int CmpMask, 1224 int CmpValue, const MachineRegisterInfo *MRI) const { 1225 assert(CmpInstr.getParent()); 1226 assert(MRI); 1227 1228 // Replace SUBSWrr with SUBWrr if NZCV is not used. 1229 int DeadNZCVIdx = CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, true); 1230 if (DeadNZCVIdx != -1) { 1231 if (CmpInstr.definesRegister(AArch64::WZR) || 1232 CmpInstr.definesRegister(AArch64::XZR)) { 1233 CmpInstr.eraseFromParent(); 1234 return true; 1235 } 1236 unsigned Opc = CmpInstr.getOpcode(); 1237 unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr); 1238 if (NewOpc == Opc) 1239 return false; 1240 const MCInstrDesc &MCID = get(NewOpc); 1241 CmpInstr.setDesc(MCID); 1242 CmpInstr.RemoveOperand(DeadNZCVIdx); 1243 bool succeeded = UpdateOperandRegClass(CmpInstr); 1244 (void)succeeded; 1245 assert(succeeded && "Some operands reg class are incompatible!"); 1246 return true; 1247 } 1248 1249 // Continue only if we have a "ri" where immediate is zero. 1250 // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare 1251 // function. 1252 assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!"); 1253 if (CmpValue != 0 || SrcReg2 != 0) 1254 return false; 1255 1256 // CmpInstr is a Compare instruction if destination register is not used. 1257 if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg())) 1258 return false; 1259 1260 return substituteCmpToZero(CmpInstr, SrcReg, MRI); 1261 } 1262 1263 /// Get opcode of S version of Instr. 1264 /// If Instr is S version its opcode is returned. 1265 /// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version 1266 /// or we are not interested in it. 1267 static unsigned sForm(MachineInstr &Instr) { 1268 switch (Instr.getOpcode()) { 1269 default: 1270 return AArch64::INSTRUCTION_LIST_END; 1271 1272 case AArch64::ADDSWrr: 1273 case AArch64::ADDSWri: 1274 case AArch64::ADDSXrr: 1275 case AArch64::ADDSXri: 1276 case AArch64::SUBSWrr: 1277 case AArch64::SUBSWri: 1278 case AArch64::SUBSXrr: 1279 case AArch64::SUBSXri: 1280 return Instr.getOpcode(); 1281 1282 case AArch64::ADDWrr: 1283 return AArch64::ADDSWrr; 1284 case AArch64::ADDWri: 1285 return AArch64::ADDSWri; 1286 case AArch64::ADDXrr: 1287 return AArch64::ADDSXrr; 1288 case AArch64::ADDXri: 1289 return AArch64::ADDSXri; 1290 case AArch64::ADCWr: 1291 return AArch64::ADCSWr; 1292 case AArch64::ADCXr: 1293 return AArch64::ADCSXr; 1294 case AArch64::SUBWrr: 1295 return AArch64::SUBSWrr; 1296 case AArch64::SUBWri: 1297 return AArch64::SUBSWri; 1298 case AArch64::SUBXrr: 1299 return AArch64::SUBSXrr; 1300 case AArch64::SUBXri: 1301 return AArch64::SUBSXri; 1302 case AArch64::SBCWr: 1303 return AArch64::SBCSWr; 1304 case AArch64::SBCXr: 1305 return AArch64::SBCSXr; 1306 case AArch64::ANDWri: 1307 return AArch64::ANDSWri; 1308 case AArch64::ANDXri: 1309 return AArch64::ANDSXri; 1310 } 1311 } 1312 1313 /// Check if AArch64::NZCV should be alive in successors of MBB. 1314 static bool areCFlagsAliveInSuccessors(MachineBasicBlock *MBB) { 1315 for (auto *BB : MBB->successors()) 1316 if (BB->isLiveIn(AArch64::NZCV)) 1317 return true; 1318 return false; 1319 } 1320 1321 namespace { 1322 1323 struct UsedNZCV { 1324 bool N = false; 1325 bool Z = false; 1326 bool C = false; 1327 bool V = false; 1328 1329 UsedNZCV() = default; 1330 1331 UsedNZCV &operator|=(const UsedNZCV &UsedFlags) { 1332 this->N |= UsedFlags.N; 1333 this->Z |= UsedFlags.Z; 1334 this->C |= UsedFlags.C; 1335 this->V |= UsedFlags.V; 1336 return *this; 1337 } 1338 }; 1339 1340 } // end anonymous namespace 1341 1342 /// Find a condition code used by the instruction. 1343 /// Returns AArch64CC::Invalid if either the instruction does not use condition 1344 /// codes or we don't optimize CmpInstr in the presence of such instructions. 1345 static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr) { 1346 switch (Instr.getOpcode()) { 1347 default: 1348 return AArch64CC::Invalid; 1349 1350 case AArch64::Bcc: { 1351 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV); 1352 assert(Idx >= 2); 1353 return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 2).getImm()); 1354 } 1355 1356 case AArch64::CSINVWr: 1357 case AArch64::CSINVXr: 1358 case AArch64::CSINCWr: 1359 case AArch64::CSINCXr: 1360 case AArch64::CSELWr: 1361 case AArch64::CSELXr: 1362 case AArch64::CSNEGWr: 1363 case AArch64::CSNEGXr: 1364 case AArch64::FCSELSrrr: 1365 case AArch64::FCSELDrrr: { 1366 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV); 1367 assert(Idx >= 1); 1368 return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 1).getImm()); 1369 } 1370 } 1371 } 1372 1373 static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) { 1374 assert(CC != AArch64CC::Invalid); 1375 UsedNZCV UsedFlags; 1376 switch (CC) { 1377 default: 1378 break; 1379 1380 case AArch64CC::EQ: // Z set 1381 case AArch64CC::NE: // Z clear 1382 UsedFlags.Z = true; 1383 break; 1384 1385 case AArch64CC::HI: // Z clear and C set 1386 case AArch64CC::LS: // Z set or C clear 1387 UsedFlags.Z = true; 1388 LLVM_FALLTHROUGH; 1389 case AArch64CC::HS: // C set 1390 case AArch64CC::LO: // C clear 1391 UsedFlags.C = true; 1392 break; 1393 1394 case AArch64CC::MI: // N set 1395 case AArch64CC::PL: // N clear 1396 UsedFlags.N = true; 1397 break; 1398 1399 case AArch64CC::VS: // V set 1400 case AArch64CC::VC: // V clear 1401 UsedFlags.V = true; 1402 break; 1403 1404 case AArch64CC::GT: // Z clear, N and V the same 1405 case AArch64CC::LE: // Z set, N and V differ 1406 UsedFlags.Z = true; 1407 LLVM_FALLTHROUGH; 1408 case AArch64CC::GE: // N and V the same 1409 case AArch64CC::LT: // N and V differ 1410 UsedFlags.N = true; 1411 UsedFlags.V = true; 1412 break; 1413 } 1414 return UsedFlags; 1415 } 1416 1417 static bool isADDSRegImm(unsigned Opcode) { 1418 return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri; 1419 } 1420 1421 static bool isSUBSRegImm(unsigned Opcode) { 1422 return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri; 1423 } 1424 1425 /// Check if CmpInstr can be substituted by MI. 1426 /// 1427 /// CmpInstr can be substituted: 1428 /// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0' 1429 /// - and, MI and CmpInstr are from the same MachineBB 1430 /// - and, condition flags are not alive in successors of the CmpInstr parent 1431 /// - and, if MI opcode is the S form there must be no defs of flags between 1432 /// MI and CmpInstr 1433 /// or if MI opcode is not the S form there must be neither defs of flags 1434 /// nor uses of flags between MI and CmpInstr. 1435 /// - and C/V flags are not used after CmpInstr 1436 static bool canInstrSubstituteCmpInstr(MachineInstr *MI, MachineInstr *CmpInstr, 1437 const TargetRegisterInfo *TRI) { 1438 assert(MI); 1439 assert(sForm(*MI) != AArch64::INSTRUCTION_LIST_END); 1440 assert(CmpInstr); 1441 1442 const unsigned CmpOpcode = CmpInstr->getOpcode(); 1443 if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode)) 1444 return false; 1445 1446 if (MI->getParent() != CmpInstr->getParent()) 1447 return false; 1448 1449 if (areCFlagsAliveInSuccessors(CmpInstr->getParent())) 1450 return false; 1451 1452 AccessKind AccessToCheck = AK_Write; 1453 if (sForm(*MI) != MI->getOpcode()) 1454 AccessToCheck = AK_All; 1455 if (areCFlagsAccessedBetweenInstrs(MI, CmpInstr, TRI, AccessToCheck)) 1456 return false; 1457 1458 UsedNZCV NZCVUsedAfterCmp; 1459 for (const MachineInstr &Instr : 1460 instructionsWithoutDebug(std::next(CmpInstr->getIterator()), 1461 CmpInstr->getParent()->instr_end())) { 1462 if (Instr.readsRegister(AArch64::NZCV, TRI)) { 1463 AArch64CC::CondCode CC = findCondCodeUsedByInstr(Instr); 1464 if (CC == AArch64CC::Invalid) // Unsupported conditional instruction 1465 return false; 1466 NZCVUsedAfterCmp |= getUsedNZCV(CC); 1467 } 1468 1469 if (Instr.modifiesRegister(AArch64::NZCV, TRI)) 1470 break; 1471 } 1472 1473 return !NZCVUsedAfterCmp.C && !NZCVUsedAfterCmp.V; 1474 } 1475 1476 /// Substitute an instruction comparing to zero with another instruction 1477 /// which produces needed condition flags. 1478 /// 1479 /// Return true on success. 1480 bool AArch64InstrInfo::substituteCmpToZero( 1481 MachineInstr &CmpInstr, unsigned SrcReg, 1482 const MachineRegisterInfo *MRI) const { 1483 assert(MRI); 1484 // Get the unique definition of SrcReg. 1485 MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg); 1486 if (!MI) 1487 return false; 1488 1489 const TargetRegisterInfo *TRI = &getRegisterInfo(); 1490 1491 unsigned NewOpc = sForm(*MI); 1492 if (NewOpc == AArch64::INSTRUCTION_LIST_END) 1493 return false; 1494 1495 if (!canInstrSubstituteCmpInstr(MI, &CmpInstr, TRI)) 1496 return false; 1497 1498 // Update the instruction to set NZCV. 1499 MI->setDesc(get(NewOpc)); 1500 CmpInstr.eraseFromParent(); 1501 bool succeeded = UpdateOperandRegClass(*MI); 1502 (void)succeeded; 1503 assert(succeeded && "Some operands reg class are incompatible!"); 1504 MI->addRegisterDefined(AArch64::NZCV, TRI); 1505 return true; 1506 } 1507 1508 bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { 1509 if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD && 1510 MI.getOpcode() != AArch64::CATCHRET) 1511 return false; 1512 1513 MachineBasicBlock &MBB = *MI.getParent(); 1514 auto &Subtarget = MBB.getParent()->getSubtarget<AArch64Subtarget>(); 1515 auto TRI = Subtarget.getRegisterInfo(); 1516 DebugLoc DL = MI.getDebugLoc(); 1517 1518 if (MI.getOpcode() == AArch64::CATCHRET) { 1519 // Skip to the first instruction before the epilog. 1520 const TargetInstrInfo *TII = 1521 MBB.getParent()->getSubtarget().getInstrInfo(); 1522 MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB(); 1523 auto MBBI = MachineBasicBlock::iterator(MI); 1524 MachineBasicBlock::iterator FirstEpilogSEH = std::prev(MBBI); 1525 while (FirstEpilogSEH->getFlag(MachineInstr::FrameDestroy) && 1526 FirstEpilogSEH != MBB.begin()) 1527 FirstEpilogSEH = std::prev(FirstEpilogSEH); 1528 if (FirstEpilogSEH != MBB.begin()) 1529 FirstEpilogSEH = std::next(FirstEpilogSEH); 1530 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADRP)) 1531 .addReg(AArch64::X0, RegState::Define) 1532 .addMBB(TargetMBB); 1533 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADDXri)) 1534 .addReg(AArch64::X0, RegState::Define) 1535 .addReg(AArch64::X0) 1536 .addMBB(TargetMBB) 1537 .addImm(0); 1538 return true; 1539 } 1540 1541 Register Reg = MI.getOperand(0).getReg(); 1542 const GlobalValue *GV = 1543 cast<GlobalValue>((*MI.memoperands_begin())->getValue()); 1544 const TargetMachine &TM = MBB.getParent()->getTarget(); 1545 unsigned OpFlags = Subtarget.ClassifyGlobalReference(GV, TM); 1546 const unsigned char MO_NC = AArch64II::MO_NC; 1547 1548 if ((OpFlags & AArch64II::MO_GOT) != 0) { 1549 BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg) 1550 .addGlobalAddress(GV, 0, OpFlags); 1551 if (Subtarget.isTargetILP32()) { 1552 unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32); 1553 BuildMI(MBB, MI, DL, get(AArch64::LDRWui)) 1554 .addDef(Reg32, RegState::Dead) 1555 .addUse(Reg, RegState::Kill) 1556 .addImm(0) 1557 .addMemOperand(*MI.memoperands_begin()) 1558 .addDef(Reg, RegState::Implicit); 1559 } else { 1560 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg) 1561 .addReg(Reg, RegState::Kill) 1562 .addImm(0) 1563 .addMemOperand(*MI.memoperands_begin()); 1564 } 1565 } else if (TM.getCodeModel() == CodeModel::Large) { 1566 assert(!Subtarget.isTargetILP32() && "how can large exist in ILP32?"); 1567 BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg) 1568 .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC) 1569 .addImm(0); 1570 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg) 1571 .addReg(Reg, RegState::Kill) 1572 .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC) 1573 .addImm(16); 1574 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg) 1575 .addReg(Reg, RegState::Kill) 1576 .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC) 1577 .addImm(32); 1578 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg) 1579 .addReg(Reg, RegState::Kill) 1580 .addGlobalAddress(GV, 0, AArch64II::MO_G3) 1581 .addImm(48); 1582 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg) 1583 .addReg(Reg, RegState::Kill) 1584 .addImm(0) 1585 .addMemOperand(*MI.memoperands_begin()); 1586 } else if (TM.getCodeModel() == CodeModel::Tiny) { 1587 BuildMI(MBB, MI, DL, get(AArch64::ADR), Reg) 1588 .addGlobalAddress(GV, 0, OpFlags); 1589 } else { 1590 BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg) 1591 .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE); 1592 unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC; 1593 if (Subtarget.isTargetILP32()) { 1594 unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32); 1595 BuildMI(MBB, MI, DL, get(AArch64::LDRWui)) 1596 .addDef(Reg32, RegState::Dead) 1597 .addUse(Reg, RegState::Kill) 1598 .addGlobalAddress(GV, 0, LoFlags) 1599 .addMemOperand(*MI.memoperands_begin()) 1600 .addDef(Reg, RegState::Implicit); 1601 } else { 1602 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg) 1603 .addReg(Reg, RegState::Kill) 1604 .addGlobalAddress(GV, 0, LoFlags) 1605 .addMemOperand(*MI.memoperands_begin()); 1606 } 1607 } 1608 1609 MBB.erase(MI); 1610 1611 return true; 1612 } 1613 1614 // Return true if this instruction simply sets its single destination register 1615 // to zero. This is equivalent to a register rename of the zero-register. 1616 bool AArch64InstrInfo::isGPRZero(const MachineInstr &MI) { 1617 switch (MI.getOpcode()) { 1618 default: 1619 break; 1620 case AArch64::MOVZWi: 1621 case AArch64::MOVZXi: // movz Rd, #0 (LSL #0) 1622 if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) { 1623 assert(MI.getDesc().getNumOperands() == 3 && 1624 MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands"); 1625 return true; 1626 } 1627 break; 1628 case AArch64::ANDWri: // and Rd, Rzr, #imm 1629 return MI.getOperand(1).getReg() == AArch64::WZR; 1630 case AArch64::ANDXri: 1631 return MI.getOperand(1).getReg() == AArch64::XZR; 1632 case TargetOpcode::COPY: 1633 return MI.getOperand(1).getReg() == AArch64::WZR; 1634 } 1635 return false; 1636 } 1637 1638 // Return true if this instruction simply renames a general register without 1639 // modifying bits. 1640 bool AArch64InstrInfo::isGPRCopy(const MachineInstr &MI) { 1641 switch (MI.getOpcode()) { 1642 default: 1643 break; 1644 case TargetOpcode::COPY: { 1645 // GPR32 copies will by lowered to ORRXrs 1646 Register DstReg = MI.getOperand(0).getReg(); 1647 return (AArch64::GPR32RegClass.contains(DstReg) || 1648 AArch64::GPR64RegClass.contains(DstReg)); 1649 } 1650 case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0) 1651 if (MI.getOperand(1).getReg() == AArch64::XZR) { 1652 assert(MI.getDesc().getNumOperands() == 4 && 1653 MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands"); 1654 return true; 1655 } 1656 break; 1657 case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0) 1658 if (MI.getOperand(2).getImm() == 0) { 1659 assert(MI.getDesc().getNumOperands() == 4 && 1660 MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands"); 1661 return true; 1662 } 1663 break; 1664 } 1665 return false; 1666 } 1667 1668 // Return true if this instruction simply renames a general register without 1669 // modifying bits. 1670 bool AArch64InstrInfo::isFPRCopy(const MachineInstr &MI) { 1671 switch (MI.getOpcode()) { 1672 default: 1673 break; 1674 case TargetOpcode::COPY: { 1675 // FPR64 copies will by lowered to ORR.16b 1676 Register DstReg = MI.getOperand(0).getReg(); 1677 return (AArch64::FPR64RegClass.contains(DstReg) || 1678 AArch64::FPR128RegClass.contains(DstReg)); 1679 } 1680 case AArch64::ORRv16i8: 1681 if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) { 1682 assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() && 1683 "invalid ORRv16i8 operands"); 1684 return true; 1685 } 1686 break; 1687 } 1688 return false; 1689 } 1690 1691 unsigned AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr &MI, 1692 int &FrameIndex) const { 1693 switch (MI.getOpcode()) { 1694 default: 1695 break; 1696 case AArch64::LDRWui: 1697 case AArch64::LDRXui: 1698 case AArch64::LDRBui: 1699 case AArch64::LDRHui: 1700 case AArch64::LDRSui: 1701 case AArch64::LDRDui: 1702 case AArch64::LDRQui: 1703 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() && 1704 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) { 1705 FrameIndex = MI.getOperand(1).getIndex(); 1706 return MI.getOperand(0).getReg(); 1707 } 1708 break; 1709 } 1710 1711 return 0; 1712 } 1713 1714 unsigned AArch64InstrInfo::isStoreToStackSlot(const MachineInstr &MI, 1715 int &FrameIndex) const { 1716 switch (MI.getOpcode()) { 1717 default: 1718 break; 1719 case AArch64::STRWui: 1720 case AArch64::STRXui: 1721 case AArch64::STRBui: 1722 case AArch64::STRHui: 1723 case AArch64::STRSui: 1724 case AArch64::STRDui: 1725 case AArch64::STRQui: 1726 case AArch64::LDR_PXI: 1727 case AArch64::STR_PXI: 1728 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() && 1729 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) { 1730 FrameIndex = MI.getOperand(1).getIndex(); 1731 return MI.getOperand(0).getReg(); 1732 } 1733 break; 1734 } 1735 return 0; 1736 } 1737 1738 /// Check all MachineMemOperands for a hint to suppress pairing. 1739 bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr &MI) { 1740 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) { 1741 return MMO->getFlags() & MOSuppressPair; 1742 }); 1743 } 1744 1745 /// Set a flag on the first MachineMemOperand to suppress pairing. 1746 void AArch64InstrInfo::suppressLdStPair(MachineInstr &MI) { 1747 if (MI.memoperands_empty()) 1748 return; 1749 (*MI.memoperands_begin())->setFlags(MOSuppressPair); 1750 } 1751 1752 /// Check all MachineMemOperands for a hint that the load/store is strided. 1753 bool AArch64InstrInfo::isStridedAccess(const MachineInstr &MI) { 1754 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) { 1755 return MMO->getFlags() & MOStridedAccess; 1756 }); 1757 } 1758 1759 bool AArch64InstrInfo::isUnscaledLdSt(unsigned Opc) { 1760 switch (Opc) { 1761 default: 1762 return false; 1763 case AArch64::STURSi: 1764 case AArch64::STURDi: 1765 case AArch64::STURQi: 1766 case AArch64::STURBBi: 1767 case AArch64::STURHHi: 1768 case AArch64::STURWi: 1769 case AArch64::STURXi: 1770 case AArch64::LDURSi: 1771 case AArch64::LDURDi: 1772 case AArch64::LDURQi: 1773 case AArch64::LDURWi: 1774 case AArch64::LDURXi: 1775 case AArch64::LDURSWi: 1776 case AArch64::LDURHHi: 1777 case AArch64::LDURBBi: 1778 case AArch64::LDURSBWi: 1779 case AArch64::LDURSHWi: 1780 return true; 1781 } 1782 } 1783 1784 Optional<unsigned> AArch64InstrInfo::getUnscaledLdSt(unsigned Opc) { 1785 switch (Opc) { 1786 default: return {}; 1787 case AArch64::PRFMui: return AArch64::PRFUMi; 1788 case AArch64::LDRXui: return AArch64::LDURXi; 1789 case AArch64::LDRWui: return AArch64::LDURWi; 1790 case AArch64::LDRBui: return AArch64::LDURBi; 1791 case AArch64::LDRHui: return AArch64::LDURHi; 1792 case AArch64::LDRSui: return AArch64::LDURSi; 1793 case AArch64::LDRDui: return AArch64::LDURDi; 1794 case AArch64::LDRQui: return AArch64::LDURQi; 1795 case AArch64::LDRBBui: return AArch64::LDURBBi; 1796 case AArch64::LDRHHui: return AArch64::LDURHHi; 1797 case AArch64::LDRSBXui: return AArch64::LDURSBXi; 1798 case AArch64::LDRSBWui: return AArch64::LDURSBWi; 1799 case AArch64::LDRSHXui: return AArch64::LDURSHXi; 1800 case AArch64::LDRSHWui: return AArch64::LDURSHWi; 1801 case AArch64::LDRSWui: return AArch64::LDURSWi; 1802 case AArch64::STRXui: return AArch64::STURXi; 1803 case AArch64::STRWui: return AArch64::STURWi; 1804 case AArch64::STRBui: return AArch64::STURBi; 1805 case AArch64::STRHui: return AArch64::STURHi; 1806 case AArch64::STRSui: return AArch64::STURSi; 1807 case AArch64::STRDui: return AArch64::STURDi; 1808 case AArch64::STRQui: return AArch64::STURQi; 1809 case AArch64::STRBBui: return AArch64::STURBBi; 1810 case AArch64::STRHHui: return AArch64::STURHHi; 1811 } 1812 } 1813 1814 unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) { 1815 switch (Opc) { 1816 default: 1817 return 2; 1818 case AArch64::LDPXi: 1819 case AArch64::LDPDi: 1820 case AArch64::STPXi: 1821 case AArch64::STPDi: 1822 case AArch64::LDNPXi: 1823 case AArch64::LDNPDi: 1824 case AArch64::STNPXi: 1825 case AArch64::STNPDi: 1826 case AArch64::LDPQi: 1827 case AArch64::STPQi: 1828 case AArch64::LDNPQi: 1829 case AArch64::STNPQi: 1830 case AArch64::LDPWi: 1831 case AArch64::LDPSi: 1832 case AArch64::STPWi: 1833 case AArch64::STPSi: 1834 case AArch64::LDNPWi: 1835 case AArch64::LDNPSi: 1836 case AArch64::STNPWi: 1837 case AArch64::STNPSi: 1838 case AArch64::LDG: 1839 case AArch64::STGPi: 1840 case AArch64::LD1B_IMM: 1841 case AArch64::LD1H_IMM: 1842 case AArch64::LD1W_IMM: 1843 case AArch64::LD1D_IMM: 1844 case AArch64::ST1B_IMM: 1845 case AArch64::ST1H_IMM: 1846 case AArch64::ST1W_IMM: 1847 case AArch64::ST1D_IMM: 1848 case AArch64::LD1B_H_IMM: 1849 case AArch64::LD1SB_H_IMM: 1850 case AArch64::LD1H_S_IMM: 1851 case AArch64::LD1SH_S_IMM: 1852 case AArch64::LD1W_D_IMM: 1853 case AArch64::LD1SW_D_IMM: 1854 case AArch64::ST1B_H_IMM: 1855 case AArch64::ST1H_S_IMM: 1856 case AArch64::ST1W_D_IMM: 1857 case AArch64::LD1B_S_IMM: 1858 case AArch64::LD1SB_S_IMM: 1859 case AArch64::LD1H_D_IMM: 1860 case AArch64::LD1SH_D_IMM: 1861 case AArch64::ST1B_S_IMM: 1862 case AArch64::ST1H_D_IMM: 1863 case AArch64::LD1B_D_IMM: 1864 case AArch64::LD1SB_D_IMM: 1865 case AArch64::ST1B_D_IMM: 1866 return 3; 1867 case AArch64::ADDG: 1868 case AArch64::STGOffset: 1869 case AArch64::LDR_PXI: 1870 case AArch64::STR_PXI: 1871 return 2; 1872 } 1873 } 1874 1875 bool AArch64InstrInfo::isPairableLdStInst(const MachineInstr &MI) { 1876 switch (MI.getOpcode()) { 1877 default: 1878 return false; 1879 // Scaled instructions. 1880 case AArch64::STRSui: 1881 case AArch64::STRDui: 1882 case AArch64::STRQui: 1883 case AArch64::STRXui: 1884 case AArch64::STRWui: 1885 case AArch64::LDRSui: 1886 case AArch64::LDRDui: 1887 case AArch64::LDRQui: 1888 case AArch64::LDRXui: 1889 case AArch64::LDRWui: 1890 case AArch64::LDRSWui: 1891 // Unscaled instructions. 1892 case AArch64::STURSi: 1893 case AArch64::STURDi: 1894 case AArch64::STURQi: 1895 case AArch64::STURWi: 1896 case AArch64::STURXi: 1897 case AArch64::LDURSi: 1898 case AArch64::LDURDi: 1899 case AArch64::LDURQi: 1900 case AArch64::LDURWi: 1901 case AArch64::LDURXi: 1902 case AArch64::LDURSWi: 1903 return true; 1904 } 1905 } 1906 1907 unsigned AArch64InstrInfo::convertToFlagSettingOpc(unsigned Opc, 1908 bool &Is64Bit) { 1909 switch (Opc) { 1910 default: 1911 llvm_unreachable("Opcode has no flag setting equivalent!"); 1912 // 32-bit cases: 1913 case AArch64::ADDWri: 1914 Is64Bit = false; 1915 return AArch64::ADDSWri; 1916 case AArch64::ADDWrr: 1917 Is64Bit = false; 1918 return AArch64::ADDSWrr; 1919 case AArch64::ADDWrs: 1920 Is64Bit = false; 1921 return AArch64::ADDSWrs; 1922 case AArch64::ADDWrx: 1923 Is64Bit = false; 1924 return AArch64::ADDSWrx; 1925 case AArch64::ANDWri: 1926 Is64Bit = false; 1927 return AArch64::ANDSWri; 1928 case AArch64::ANDWrr: 1929 Is64Bit = false; 1930 return AArch64::ANDSWrr; 1931 case AArch64::ANDWrs: 1932 Is64Bit = false; 1933 return AArch64::ANDSWrs; 1934 case AArch64::BICWrr: 1935 Is64Bit = false; 1936 return AArch64::BICSWrr; 1937 case AArch64::BICWrs: 1938 Is64Bit = false; 1939 return AArch64::BICSWrs; 1940 case AArch64::SUBWri: 1941 Is64Bit = false; 1942 return AArch64::SUBSWri; 1943 case AArch64::SUBWrr: 1944 Is64Bit = false; 1945 return AArch64::SUBSWrr; 1946 case AArch64::SUBWrs: 1947 Is64Bit = false; 1948 return AArch64::SUBSWrs; 1949 case AArch64::SUBWrx: 1950 Is64Bit = false; 1951 return AArch64::SUBSWrx; 1952 // 64-bit cases: 1953 case AArch64::ADDXri: 1954 Is64Bit = true; 1955 return AArch64::ADDSXri; 1956 case AArch64::ADDXrr: 1957 Is64Bit = true; 1958 return AArch64::ADDSXrr; 1959 case AArch64::ADDXrs: 1960 Is64Bit = true; 1961 return AArch64::ADDSXrs; 1962 case AArch64::ADDXrx: 1963 Is64Bit = true; 1964 return AArch64::ADDSXrx; 1965 case AArch64::ANDXri: 1966 Is64Bit = true; 1967 return AArch64::ANDSXri; 1968 case AArch64::ANDXrr: 1969 Is64Bit = true; 1970 return AArch64::ANDSXrr; 1971 case AArch64::ANDXrs: 1972 Is64Bit = true; 1973 return AArch64::ANDSXrs; 1974 case AArch64::BICXrr: 1975 Is64Bit = true; 1976 return AArch64::BICSXrr; 1977 case AArch64::BICXrs: 1978 Is64Bit = true; 1979 return AArch64::BICSXrs; 1980 case AArch64::SUBXri: 1981 Is64Bit = true; 1982 return AArch64::SUBSXri; 1983 case AArch64::SUBXrr: 1984 Is64Bit = true; 1985 return AArch64::SUBSXrr; 1986 case AArch64::SUBXrs: 1987 Is64Bit = true; 1988 return AArch64::SUBSXrs; 1989 case AArch64::SUBXrx: 1990 Is64Bit = true; 1991 return AArch64::SUBSXrx; 1992 } 1993 } 1994 1995 // Is this a candidate for ld/st merging or pairing? For example, we don't 1996 // touch volatiles or load/stores that have a hint to avoid pair formation. 1997 bool AArch64InstrInfo::isCandidateToMergeOrPair(const MachineInstr &MI) const { 1998 // If this is a volatile load/store, don't mess with it. 1999 if (MI.hasOrderedMemoryRef()) 2000 return false; 2001 2002 // Make sure this is a reg/fi+imm (as opposed to an address reloc). 2003 assert((MI.getOperand(1).isReg() || MI.getOperand(1).isFI()) && 2004 "Expected a reg or frame index operand."); 2005 if (!MI.getOperand(2).isImm()) 2006 return false; 2007 2008 // Can't merge/pair if the instruction modifies the base register. 2009 // e.g., ldr x0, [x0] 2010 // This case will never occur with an FI base. 2011 if (MI.getOperand(1).isReg()) { 2012 Register BaseReg = MI.getOperand(1).getReg(); 2013 const TargetRegisterInfo *TRI = &getRegisterInfo(); 2014 if (MI.modifiesRegister(BaseReg, TRI)) 2015 return false; 2016 } 2017 2018 // Check if this load/store has a hint to avoid pair formation. 2019 // MachineMemOperands hints are set by the AArch64StorePairSuppress pass. 2020 if (isLdStPairSuppressed(MI)) 2021 return false; 2022 2023 // Do not pair any callee-save store/reload instructions in the 2024 // prologue/epilogue if the CFI information encoded the operations as separate 2025 // instructions, as that will cause the size of the actual prologue to mismatch 2026 // with the prologue size recorded in the Windows CFI. 2027 const MCAsmInfo *MAI = MI.getMF()->getTarget().getMCAsmInfo(); 2028 bool NeedsWinCFI = MAI->usesWindowsCFI() && 2029 MI.getMF()->getFunction().needsUnwindTableEntry(); 2030 if (NeedsWinCFI && (MI.getFlag(MachineInstr::FrameSetup) || 2031 MI.getFlag(MachineInstr::FrameDestroy))) 2032 return false; 2033 2034 // On some CPUs quad load/store pairs are slower than two single load/stores. 2035 if (Subtarget.isPaired128Slow()) { 2036 switch (MI.getOpcode()) { 2037 default: 2038 break; 2039 case AArch64::LDURQi: 2040 case AArch64::STURQi: 2041 case AArch64::LDRQui: 2042 case AArch64::STRQui: 2043 return false; 2044 } 2045 } 2046 2047 return true; 2048 } 2049 2050 bool AArch64InstrInfo::getMemOperandsWithOffsetWidth( 2051 const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps, 2052 int64_t &Offset, bool &OffsetIsScalable, unsigned &Width, 2053 const TargetRegisterInfo *TRI) const { 2054 if (!LdSt.mayLoadOrStore()) 2055 return false; 2056 2057 const MachineOperand *BaseOp; 2058 if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, OffsetIsScalable, 2059 Width, TRI)) 2060 return false; 2061 BaseOps.push_back(BaseOp); 2062 return true; 2063 } 2064 2065 bool AArch64InstrInfo::getMemOperandWithOffsetWidth( 2066 const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset, 2067 bool &OffsetIsScalable, unsigned &Width, 2068 const TargetRegisterInfo *TRI) const { 2069 assert(LdSt.mayLoadOrStore() && "Expected a memory operation."); 2070 // Handle only loads/stores with base register followed by immediate offset. 2071 if (LdSt.getNumExplicitOperands() == 3) { 2072 // Non-paired instruction (e.g., ldr x1, [x0, #8]). 2073 if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) || 2074 !LdSt.getOperand(2).isImm()) 2075 return false; 2076 } else if (LdSt.getNumExplicitOperands() == 4) { 2077 // Paired instruction (e.g., ldp x1, x2, [x0, #8]). 2078 if (!LdSt.getOperand(1).isReg() || 2079 (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()) || 2080 !LdSt.getOperand(3).isImm()) 2081 return false; 2082 } else 2083 return false; 2084 2085 // Get the scaling factor for the instruction and set the width for the 2086 // instruction. 2087 TypeSize Scale(0U, false); 2088 int64_t Dummy1, Dummy2; 2089 2090 // If this returns false, then it's an instruction we don't want to handle. 2091 if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2)) 2092 return false; 2093 2094 // Compute the offset. Offset is calculated as the immediate operand 2095 // multiplied by the scaling factor. Unscaled instructions have scaling factor 2096 // set to 1. 2097 if (LdSt.getNumExplicitOperands() == 3) { 2098 BaseOp = &LdSt.getOperand(1); 2099 Offset = LdSt.getOperand(2).getImm() * Scale.getKnownMinSize(); 2100 } else { 2101 assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands"); 2102 BaseOp = &LdSt.getOperand(2); 2103 Offset = LdSt.getOperand(3).getImm() * Scale.getKnownMinSize(); 2104 } 2105 OffsetIsScalable = Scale.isScalable(); 2106 2107 if (!BaseOp->isReg() && !BaseOp->isFI()) 2108 return false; 2109 2110 return true; 2111 } 2112 2113 MachineOperand & 2114 AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const { 2115 assert(LdSt.mayLoadOrStore() && "Expected a memory operation."); 2116 MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1); 2117 assert(OfsOp.isImm() && "Offset operand wasn't immediate."); 2118 return OfsOp; 2119 } 2120 2121 bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale, 2122 unsigned &Width, int64_t &MinOffset, 2123 int64_t &MaxOffset) { 2124 const unsigned SVEMaxBytesPerVector = AArch64::SVEMaxBitsPerVector / 8; 2125 switch (Opcode) { 2126 // Not a memory operation or something we want to handle. 2127 default: 2128 Scale = TypeSize::Fixed(0); 2129 Width = 0; 2130 MinOffset = MaxOffset = 0; 2131 return false; 2132 case AArch64::STRWpost: 2133 case AArch64::LDRWpost: 2134 Width = 32; 2135 Scale = TypeSize::Fixed(4); 2136 MinOffset = -256; 2137 MaxOffset = 255; 2138 break; 2139 case AArch64::LDURQi: 2140 case AArch64::STURQi: 2141 Width = 16; 2142 Scale = TypeSize::Fixed(1); 2143 MinOffset = -256; 2144 MaxOffset = 255; 2145 break; 2146 case AArch64::PRFUMi: 2147 case AArch64::LDURXi: 2148 case AArch64::LDURDi: 2149 case AArch64::STURXi: 2150 case AArch64::STURDi: 2151 Width = 8; 2152 Scale = TypeSize::Fixed(1); 2153 MinOffset = -256; 2154 MaxOffset = 255; 2155 break; 2156 case AArch64::LDURWi: 2157 case AArch64::LDURSi: 2158 case AArch64::LDURSWi: 2159 case AArch64::STURWi: 2160 case AArch64::STURSi: 2161 Width = 4; 2162 Scale = TypeSize::Fixed(1); 2163 MinOffset = -256; 2164 MaxOffset = 255; 2165 break; 2166 case AArch64::LDURHi: 2167 case AArch64::LDURHHi: 2168 case AArch64::LDURSHXi: 2169 case AArch64::LDURSHWi: 2170 case AArch64::STURHi: 2171 case AArch64::STURHHi: 2172 Width = 2; 2173 Scale = TypeSize::Fixed(1); 2174 MinOffset = -256; 2175 MaxOffset = 255; 2176 break; 2177 case AArch64::LDURBi: 2178 case AArch64::LDURBBi: 2179 case AArch64::LDURSBXi: 2180 case AArch64::LDURSBWi: 2181 case AArch64::STURBi: 2182 case AArch64::STURBBi: 2183 Width = 1; 2184 Scale = TypeSize::Fixed(1); 2185 MinOffset = -256; 2186 MaxOffset = 255; 2187 break; 2188 case AArch64::LDPQi: 2189 case AArch64::LDNPQi: 2190 case AArch64::STPQi: 2191 case AArch64::STNPQi: 2192 Scale = TypeSize::Fixed(16); 2193 Width = 32; 2194 MinOffset = -64; 2195 MaxOffset = 63; 2196 break; 2197 case AArch64::LDRQui: 2198 case AArch64::STRQui: 2199 Scale = TypeSize::Fixed(16); 2200 Width = 16; 2201 MinOffset = 0; 2202 MaxOffset = 4095; 2203 break; 2204 case AArch64::LDPXi: 2205 case AArch64::LDPDi: 2206 case AArch64::LDNPXi: 2207 case AArch64::LDNPDi: 2208 case AArch64::STPXi: 2209 case AArch64::STPDi: 2210 case AArch64::STNPXi: 2211 case AArch64::STNPDi: 2212 Scale = TypeSize::Fixed(8); 2213 Width = 16; 2214 MinOffset = -64; 2215 MaxOffset = 63; 2216 break; 2217 case AArch64::PRFMui: 2218 case AArch64::LDRXui: 2219 case AArch64::LDRDui: 2220 case AArch64::STRXui: 2221 case AArch64::STRDui: 2222 Scale = TypeSize::Fixed(8); 2223 Width = 8; 2224 MinOffset = 0; 2225 MaxOffset = 4095; 2226 break; 2227 case AArch64::LDPWi: 2228 case AArch64::LDPSi: 2229 case AArch64::LDNPWi: 2230 case AArch64::LDNPSi: 2231 case AArch64::STPWi: 2232 case AArch64::STPSi: 2233 case AArch64::STNPWi: 2234 case AArch64::STNPSi: 2235 Scale = TypeSize::Fixed(4); 2236 Width = 8; 2237 MinOffset = -64; 2238 MaxOffset = 63; 2239 break; 2240 case AArch64::LDRWui: 2241 case AArch64::LDRSui: 2242 case AArch64::LDRSWui: 2243 case AArch64::STRWui: 2244 case AArch64::STRSui: 2245 Scale = TypeSize::Fixed(4); 2246 Width = 4; 2247 MinOffset = 0; 2248 MaxOffset = 4095; 2249 break; 2250 case AArch64::LDRHui: 2251 case AArch64::LDRHHui: 2252 case AArch64::LDRSHWui: 2253 case AArch64::LDRSHXui: 2254 case AArch64::STRHui: 2255 case AArch64::STRHHui: 2256 Scale = TypeSize::Fixed(2); 2257 Width = 2; 2258 MinOffset = 0; 2259 MaxOffset = 4095; 2260 break; 2261 case AArch64::LDRBui: 2262 case AArch64::LDRBBui: 2263 case AArch64::LDRSBWui: 2264 case AArch64::LDRSBXui: 2265 case AArch64::STRBui: 2266 case AArch64::STRBBui: 2267 Scale = TypeSize::Fixed(1); 2268 Width = 1; 2269 MinOffset = 0; 2270 MaxOffset = 4095; 2271 break; 2272 case AArch64::ADDG: 2273 Scale = TypeSize::Fixed(16); 2274 Width = 0; 2275 MinOffset = 0; 2276 MaxOffset = 63; 2277 break; 2278 case AArch64::TAGPstack: 2279 Scale = TypeSize::Fixed(16); 2280 Width = 0; 2281 // TAGP with a negative offset turns into SUBP, which has a maximum offset 2282 // of 63 (not 64!). 2283 MinOffset = -63; 2284 MaxOffset = 63; 2285 break; 2286 case AArch64::LDG: 2287 case AArch64::STGOffset: 2288 case AArch64::STZGOffset: 2289 Scale = TypeSize::Fixed(16); 2290 Width = 16; 2291 MinOffset = -256; 2292 MaxOffset = 255; 2293 break; 2294 case AArch64::STR_ZZZZXI: 2295 case AArch64::LDR_ZZZZXI: 2296 Scale = TypeSize::Scalable(16); 2297 Width = SVEMaxBytesPerVector * 4; 2298 MinOffset = -256; 2299 MaxOffset = 252; 2300 break; 2301 case AArch64::STR_ZZZXI: 2302 case AArch64::LDR_ZZZXI: 2303 Scale = TypeSize::Scalable(16); 2304 Width = SVEMaxBytesPerVector * 3; 2305 MinOffset = -256; 2306 MaxOffset = 253; 2307 break; 2308 case AArch64::STR_ZZXI: 2309 case AArch64::LDR_ZZXI: 2310 Scale = TypeSize::Scalable(16); 2311 Width = SVEMaxBytesPerVector * 2; 2312 MinOffset = -256; 2313 MaxOffset = 254; 2314 break; 2315 case AArch64::LDR_PXI: 2316 case AArch64::STR_PXI: 2317 Scale = TypeSize::Scalable(2); 2318 Width = SVEMaxBytesPerVector / 8; 2319 MinOffset = -256; 2320 MaxOffset = 255; 2321 break; 2322 case AArch64::LDR_ZXI: 2323 case AArch64::STR_ZXI: 2324 Scale = TypeSize::Scalable(16); 2325 Width = SVEMaxBytesPerVector; 2326 MinOffset = -256; 2327 MaxOffset = 255; 2328 break; 2329 case AArch64::LD1B_IMM: 2330 case AArch64::LD1H_IMM: 2331 case AArch64::LD1W_IMM: 2332 case AArch64::LD1D_IMM: 2333 case AArch64::ST1B_IMM: 2334 case AArch64::ST1H_IMM: 2335 case AArch64::ST1W_IMM: 2336 case AArch64::ST1D_IMM: 2337 // A full vectors worth of data 2338 // Width = mbytes * elements 2339 Scale = TypeSize::Scalable(16); 2340 Width = SVEMaxBytesPerVector; 2341 MinOffset = -8; 2342 MaxOffset = 7; 2343 break; 2344 case AArch64::LD1B_H_IMM: 2345 case AArch64::LD1SB_H_IMM: 2346 case AArch64::LD1H_S_IMM: 2347 case AArch64::LD1SH_S_IMM: 2348 case AArch64::LD1W_D_IMM: 2349 case AArch64::LD1SW_D_IMM: 2350 case AArch64::ST1B_H_IMM: 2351 case AArch64::ST1H_S_IMM: 2352 case AArch64::ST1W_D_IMM: 2353 // A half vector worth of data 2354 // Width = mbytes * elements 2355 Scale = TypeSize::Scalable(8); 2356 Width = SVEMaxBytesPerVector / 2; 2357 MinOffset = -8; 2358 MaxOffset = 7; 2359 break; 2360 case AArch64::LD1B_S_IMM: 2361 case AArch64::LD1SB_S_IMM: 2362 case AArch64::LD1H_D_IMM: 2363 case AArch64::LD1SH_D_IMM: 2364 case AArch64::ST1B_S_IMM: 2365 case AArch64::ST1H_D_IMM: 2366 // A quarter vector worth of data 2367 // Width = mbytes * elements 2368 Scale = TypeSize::Scalable(4); 2369 Width = SVEMaxBytesPerVector / 4; 2370 MinOffset = -8; 2371 MaxOffset = 7; 2372 break; 2373 case AArch64::LD1B_D_IMM: 2374 case AArch64::LD1SB_D_IMM: 2375 case AArch64::ST1B_D_IMM: 2376 // A eighth vector worth of data 2377 // Width = mbytes * elements 2378 Scale = TypeSize::Scalable(2); 2379 Width = SVEMaxBytesPerVector / 8; 2380 MinOffset = -8; 2381 MaxOffset = 7; 2382 break; 2383 case AArch64::ST2GOffset: 2384 case AArch64::STZ2GOffset: 2385 Scale = TypeSize::Fixed(16); 2386 Width = 32; 2387 MinOffset = -256; 2388 MaxOffset = 255; 2389 break; 2390 case AArch64::STGPi: 2391 Scale = TypeSize::Fixed(16); 2392 Width = 16; 2393 MinOffset = -64; 2394 MaxOffset = 63; 2395 break; 2396 } 2397 2398 return true; 2399 } 2400 2401 // Scaling factor for unscaled load or store. 2402 int AArch64InstrInfo::getMemScale(unsigned Opc) { 2403 switch (Opc) { 2404 default: 2405 llvm_unreachable("Opcode has unknown scale!"); 2406 case AArch64::LDRBBui: 2407 case AArch64::LDURBBi: 2408 case AArch64::LDRSBWui: 2409 case AArch64::LDURSBWi: 2410 case AArch64::STRBBui: 2411 case AArch64::STURBBi: 2412 return 1; 2413 case AArch64::LDRHHui: 2414 case AArch64::LDURHHi: 2415 case AArch64::LDRSHWui: 2416 case AArch64::LDURSHWi: 2417 case AArch64::STRHHui: 2418 case AArch64::STURHHi: 2419 return 2; 2420 case AArch64::LDRSui: 2421 case AArch64::LDURSi: 2422 case AArch64::LDRSWui: 2423 case AArch64::LDURSWi: 2424 case AArch64::LDRWui: 2425 case AArch64::LDURWi: 2426 case AArch64::STRSui: 2427 case AArch64::STURSi: 2428 case AArch64::STRWui: 2429 case AArch64::STURWi: 2430 case AArch64::LDPSi: 2431 case AArch64::LDPSWi: 2432 case AArch64::LDPWi: 2433 case AArch64::STPSi: 2434 case AArch64::STPWi: 2435 return 4; 2436 case AArch64::LDRDui: 2437 case AArch64::LDURDi: 2438 case AArch64::LDRXui: 2439 case AArch64::LDURXi: 2440 case AArch64::STRDui: 2441 case AArch64::STURDi: 2442 case AArch64::STRXui: 2443 case AArch64::STURXi: 2444 case AArch64::LDPDi: 2445 case AArch64::LDPXi: 2446 case AArch64::STPDi: 2447 case AArch64::STPXi: 2448 return 8; 2449 case AArch64::LDRQui: 2450 case AArch64::LDURQi: 2451 case AArch64::STRQui: 2452 case AArch64::STURQi: 2453 case AArch64::LDPQi: 2454 case AArch64::STPQi: 2455 case AArch64::STGOffset: 2456 case AArch64::STZGOffset: 2457 case AArch64::ST2GOffset: 2458 case AArch64::STZ2GOffset: 2459 case AArch64::STGPi: 2460 return 16; 2461 } 2462 } 2463 2464 // Scale the unscaled offsets. Returns false if the unscaled offset can't be 2465 // scaled. 2466 static bool scaleOffset(unsigned Opc, int64_t &Offset) { 2467 int Scale = AArch64InstrInfo::getMemScale(Opc); 2468 2469 // If the byte-offset isn't a multiple of the stride, we can't scale this 2470 // offset. 2471 if (Offset % Scale != 0) 2472 return false; 2473 2474 // Convert the byte-offset used by unscaled into an "element" offset used 2475 // by the scaled pair load/store instructions. 2476 Offset /= Scale; 2477 return true; 2478 } 2479 2480 static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) { 2481 if (FirstOpc == SecondOpc) 2482 return true; 2483 // We can also pair sign-ext and zero-ext instructions. 2484 switch (FirstOpc) { 2485 default: 2486 return false; 2487 case AArch64::LDRWui: 2488 case AArch64::LDURWi: 2489 return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi; 2490 case AArch64::LDRSWui: 2491 case AArch64::LDURSWi: 2492 return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi; 2493 } 2494 // These instructions can't be paired based on their opcodes. 2495 return false; 2496 } 2497 2498 static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1, 2499 int64_t Offset1, unsigned Opcode1, int FI2, 2500 int64_t Offset2, unsigned Opcode2) { 2501 // Accesses through fixed stack object frame indices may access a different 2502 // fixed stack slot. Check that the object offsets + offsets match. 2503 if (MFI.isFixedObjectIndex(FI1) && MFI.isFixedObjectIndex(FI2)) { 2504 int64_t ObjectOffset1 = MFI.getObjectOffset(FI1); 2505 int64_t ObjectOffset2 = MFI.getObjectOffset(FI2); 2506 assert(ObjectOffset1 <= ObjectOffset2 && "Object offsets are not ordered."); 2507 // Convert to scaled object offsets. 2508 int Scale1 = AArch64InstrInfo::getMemScale(Opcode1); 2509 if (ObjectOffset1 % Scale1 != 0) 2510 return false; 2511 ObjectOffset1 /= Scale1; 2512 int Scale2 = AArch64InstrInfo::getMemScale(Opcode2); 2513 if (ObjectOffset2 % Scale2 != 0) 2514 return false; 2515 ObjectOffset2 /= Scale2; 2516 ObjectOffset1 += Offset1; 2517 ObjectOffset2 += Offset2; 2518 return ObjectOffset1 + 1 == ObjectOffset2; 2519 } 2520 2521 return FI1 == FI2; 2522 } 2523 2524 /// Detect opportunities for ldp/stp formation. 2525 /// 2526 /// Only called for LdSt for which getMemOperandWithOffset returns true. 2527 bool AArch64InstrInfo::shouldClusterMemOps( 2528 ArrayRef<const MachineOperand *> BaseOps1, 2529 ArrayRef<const MachineOperand *> BaseOps2, unsigned NumLoads, 2530 unsigned NumBytes) const { 2531 assert(BaseOps1.size() == 1 && BaseOps2.size() == 1); 2532 const MachineOperand &BaseOp1 = *BaseOps1.front(); 2533 const MachineOperand &BaseOp2 = *BaseOps2.front(); 2534 const MachineInstr &FirstLdSt = *BaseOp1.getParent(); 2535 const MachineInstr &SecondLdSt = *BaseOp2.getParent(); 2536 if (BaseOp1.getType() != BaseOp2.getType()) 2537 return false; 2538 2539 assert((BaseOp1.isReg() || BaseOp1.isFI()) && 2540 "Only base registers and frame indices are supported."); 2541 2542 // Check for both base regs and base FI. 2543 if (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg()) 2544 return false; 2545 2546 // Only cluster up to a single pair. 2547 if (NumLoads > 2) 2548 return false; 2549 2550 if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt)) 2551 return false; 2552 2553 // Can we pair these instructions based on their opcodes? 2554 unsigned FirstOpc = FirstLdSt.getOpcode(); 2555 unsigned SecondOpc = SecondLdSt.getOpcode(); 2556 if (!canPairLdStOpc(FirstOpc, SecondOpc)) 2557 return false; 2558 2559 // Can't merge volatiles or load/stores that have a hint to avoid pair 2560 // formation, for example. 2561 if (!isCandidateToMergeOrPair(FirstLdSt) || 2562 !isCandidateToMergeOrPair(SecondLdSt)) 2563 return false; 2564 2565 // isCandidateToMergeOrPair guarantees that operand 2 is an immediate. 2566 int64_t Offset1 = FirstLdSt.getOperand(2).getImm(); 2567 if (isUnscaledLdSt(FirstOpc) && !scaleOffset(FirstOpc, Offset1)) 2568 return false; 2569 2570 int64_t Offset2 = SecondLdSt.getOperand(2).getImm(); 2571 if (isUnscaledLdSt(SecondOpc) && !scaleOffset(SecondOpc, Offset2)) 2572 return false; 2573 2574 // Pairwise instructions have a 7-bit signed offset field. 2575 if (Offset1 > 63 || Offset1 < -64) 2576 return false; 2577 2578 // The caller should already have ordered First/SecondLdSt by offset. 2579 // Note: except for non-equal frame index bases 2580 if (BaseOp1.isFI()) { 2581 assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) && 2582 "Caller should have ordered offsets."); 2583 2584 const MachineFrameInfo &MFI = 2585 FirstLdSt.getParent()->getParent()->getFrameInfo(); 2586 return shouldClusterFI(MFI, BaseOp1.getIndex(), Offset1, FirstOpc, 2587 BaseOp2.getIndex(), Offset2, SecondOpc); 2588 } 2589 2590 assert(Offset1 <= Offset2 && "Caller should have ordered offsets."); 2591 2592 return Offset1 + 1 == Offset2; 2593 } 2594 2595 static const MachineInstrBuilder &AddSubReg(const MachineInstrBuilder &MIB, 2596 unsigned Reg, unsigned SubIdx, 2597 unsigned State, 2598 const TargetRegisterInfo *TRI) { 2599 if (!SubIdx) 2600 return MIB.addReg(Reg, State); 2601 2602 if (Register::isPhysicalRegister(Reg)) 2603 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State); 2604 return MIB.addReg(Reg, State, SubIdx); 2605 } 2606 2607 static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg, 2608 unsigned NumRegs) { 2609 // We really want the positive remainder mod 32 here, that happens to be 2610 // easily obtainable with a mask. 2611 return ((DestReg - SrcReg) & 0x1f) < NumRegs; 2612 } 2613 2614 void AArch64InstrInfo::copyPhysRegTuple(MachineBasicBlock &MBB, 2615 MachineBasicBlock::iterator I, 2616 const DebugLoc &DL, MCRegister DestReg, 2617 MCRegister SrcReg, bool KillSrc, 2618 unsigned Opcode, 2619 ArrayRef<unsigned> Indices) const { 2620 assert(Subtarget.hasNEON() && "Unexpected register copy without NEON"); 2621 const TargetRegisterInfo *TRI = &getRegisterInfo(); 2622 uint16_t DestEncoding = TRI->getEncodingValue(DestReg); 2623 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg); 2624 unsigned NumRegs = Indices.size(); 2625 2626 int SubReg = 0, End = NumRegs, Incr = 1; 2627 if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) { 2628 SubReg = NumRegs - 1; 2629 End = -1; 2630 Incr = -1; 2631 } 2632 2633 for (; SubReg != End; SubReg += Incr) { 2634 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode)); 2635 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI); 2636 AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI); 2637 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI); 2638 } 2639 } 2640 2641 void AArch64InstrInfo::copyGPRRegTuple(MachineBasicBlock &MBB, 2642 MachineBasicBlock::iterator I, 2643 DebugLoc DL, unsigned DestReg, 2644 unsigned SrcReg, bool KillSrc, 2645 unsigned Opcode, unsigned ZeroReg, 2646 llvm::ArrayRef<unsigned> Indices) const { 2647 const TargetRegisterInfo *TRI = &getRegisterInfo(); 2648 unsigned NumRegs = Indices.size(); 2649 2650 #ifndef NDEBUG 2651 uint16_t DestEncoding = TRI->getEncodingValue(DestReg); 2652 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg); 2653 assert(DestEncoding % NumRegs == 0 && SrcEncoding % NumRegs == 0 && 2654 "GPR reg sequences should not be able to overlap"); 2655 #endif 2656 2657 for (unsigned SubReg = 0; SubReg != NumRegs; ++SubReg) { 2658 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode)); 2659 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI); 2660 MIB.addReg(ZeroReg); 2661 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI); 2662 MIB.addImm(0); 2663 } 2664 } 2665 2666 void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, 2667 MachineBasicBlock::iterator I, 2668 const DebugLoc &DL, MCRegister DestReg, 2669 MCRegister SrcReg, bool KillSrc) const { 2670 if (AArch64::GPR32spRegClass.contains(DestReg) && 2671 (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) { 2672 const TargetRegisterInfo *TRI = &getRegisterInfo(); 2673 2674 if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) { 2675 // If either operand is WSP, expand to ADD #0. 2676 if (Subtarget.hasZeroCycleRegMove()) { 2677 // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move. 2678 MCRegister DestRegX = TRI->getMatchingSuperReg( 2679 DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass); 2680 MCRegister SrcRegX = TRI->getMatchingSuperReg( 2681 SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass); 2682 // This instruction is reading and writing X registers. This may upset 2683 // the register scavenger and machine verifier, so we need to indicate 2684 // that we are reading an undefined value from SrcRegX, but a proper 2685 // value from SrcReg. 2686 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX) 2687 .addReg(SrcRegX, RegState::Undef) 2688 .addImm(0) 2689 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)) 2690 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc)); 2691 } else { 2692 BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg) 2693 .addReg(SrcReg, getKillRegState(KillSrc)) 2694 .addImm(0) 2695 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 2696 } 2697 } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroingGP()) { 2698 BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg) 2699 .addImm(0) 2700 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 2701 } else { 2702 if (Subtarget.hasZeroCycleRegMove()) { 2703 // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move. 2704 MCRegister DestRegX = TRI->getMatchingSuperReg( 2705 DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass); 2706 MCRegister SrcRegX = TRI->getMatchingSuperReg( 2707 SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass); 2708 // This instruction is reading and writing X registers. This may upset 2709 // the register scavenger and machine verifier, so we need to indicate 2710 // that we are reading an undefined value from SrcRegX, but a proper 2711 // value from SrcReg. 2712 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX) 2713 .addReg(AArch64::XZR) 2714 .addReg(SrcRegX, RegState::Undef) 2715 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc)); 2716 } else { 2717 // Otherwise, expand to ORR WZR. 2718 BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg) 2719 .addReg(AArch64::WZR) 2720 .addReg(SrcReg, getKillRegState(KillSrc)); 2721 } 2722 } 2723 return; 2724 } 2725 2726 // Copy a Predicate register by ORRing with itself. 2727 if (AArch64::PPRRegClass.contains(DestReg) && 2728 AArch64::PPRRegClass.contains(SrcReg)) { 2729 assert(Subtarget.hasSVE() && "Unexpected SVE register."); 2730 BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), DestReg) 2731 .addReg(SrcReg) // Pg 2732 .addReg(SrcReg) 2733 .addReg(SrcReg, getKillRegState(KillSrc)); 2734 return; 2735 } 2736 2737 // Copy a Z register by ORRing with itself. 2738 if (AArch64::ZPRRegClass.contains(DestReg) && 2739 AArch64::ZPRRegClass.contains(SrcReg)) { 2740 assert(Subtarget.hasSVE() && "Unexpected SVE register."); 2741 BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ), DestReg) 2742 .addReg(SrcReg) 2743 .addReg(SrcReg, getKillRegState(KillSrc)); 2744 return; 2745 } 2746 2747 // Copy a Z register pair by copying the individual sub-registers. 2748 if (AArch64::ZPR2RegClass.contains(DestReg) && 2749 AArch64::ZPR2RegClass.contains(SrcReg)) { 2750 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1}; 2751 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ, 2752 Indices); 2753 return; 2754 } 2755 2756 // Copy a Z register triple by copying the individual sub-registers. 2757 if (AArch64::ZPR3RegClass.contains(DestReg) && 2758 AArch64::ZPR3RegClass.contains(SrcReg)) { 2759 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1, 2760 AArch64::zsub2}; 2761 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ, 2762 Indices); 2763 return; 2764 } 2765 2766 // Copy a Z register quad by copying the individual sub-registers. 2767 if (AArch64::ZPR4RegClass.contains(DestReg) && 2768 AArch64::ZPR4RegClass.contains(SrcReg)) { 2769 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1, 2770 AArch64::zsub2, AArch64::zsub3}; 2771 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ, 2772 Indices); 2773 return; 2774 } 2775 2776 if (AArch64::GPR64spRegClass.contains(DestReg) && 2777 (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) { 2778 if (DestReg == AArch64::SP || SrcReg == AArch64::SP) { 2779 // If either operand is SP, expand to ADD #0. 2780 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg) 2781 .addReg(SrcReg, getKillRegState(KillSrc)) 2782 .addImm(0) 2783 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 2784 } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroingGP()) { 2785 BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg) 2786 .addImm(0) 2787 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 2788 } else { 2789 // Otherwise, expand to ORR XZR. 2790 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg) 2791 .addReg(AArch64::XZR) 2792 .addReg(SrcReg, getKillRegState(KillSrc)); 2793 } 2794 return; 2795 } 2796 2797 // Copy a DDDD register quad by copying the individual sub-registers. 2798 if (AArch64::DDDDRegClass.contains(DestReg) && 2799 AArch64::DDDDRegClass.contains(SrcReg)) { 2800 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1, 2801 AArch64::dsub2, AArch64::dsub3}; 2802 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8, 2803 Indices); 2804 return; 2805 } 2806 2807 // Copy a DDD register triple by copying the individual sub-registers. 2808 if (AArch64::DDDRegClass.contains(DestReg) && 2809 AArch64::DDDRegClass.contains(SrcReg)) { 2810 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1, 2811 AArch64::dsub2}; 2812 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8, 2813 Indices); 2814 return; 2815 } 2816 2817 // Copy a DD register pair by copying the individual sub-registers. 2818 if (AArch64::DDRegClass.contains(DestReg) && 2819 AArch64::DDRegClass.contains(SrcReg)) { 2820 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1}; 2821 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8, 2822 Indices); 2823 return; 2824 } 2825 2826 // Copy a QQQQ register quad by copying the individual sub-registers. 2827 if (AArch64::QQQQRegClass.contains(DestReg) && 2828 AArch64::QQQQRegClass.contains(SrcReg)) { 2829 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1, 2830 AArch64::qsub2, AArch64::qsub3}; 2831 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8, 2832 Indices); 2833 return; 2834 } 2835 2836 // Copy a QQQ register triple by copying the individual sub-registers. 2837 if (AArch64::QQQRegClass.contains(DestReg) && 2838 AArch64::QQQRegClass.contains(SrcReg)) { 2839 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1, 2840 AArch64::qsub2}; 2841 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8, 2842 Indices); 2843 return; 2844 } 2845 2846 // Copy a QQ register pair by copying the individual sub-registers. 2847 if (AArch64::QQRegClass.contains(DestReg) && 2848 AArch64::QQRegClass.contains(SrcReg)) { 2849 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1}; 2850 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8, 2851 Indices); 2852 return; 2853 } 2854 2855 if (AArch64::XSeqPairsClassRegClass.contains(DestReg) && 2856 AArch64::XSeqPairsClassRegClass.contains(SrcReg)) { 2857 static const unsigned Indices[] = {AArch64::sube64, AArch64::subo64}; 2858 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRXrs, 2859 AArch64::XZR, Indices); 2860 return; 2861 } 2862 2863 if (AArch64::WSeqPairsClassRegClass.contains(DestReg) && 2864 AArch64::WSeqPairsClassRegClass.contains(SrcReg)) { 2865 static const unsigned Indices[] = {AArch64::sube32, AArch64::subo32}; 2866 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRWrs, 2867 AArch64::WZR, Indices); 2868 return; 2869 } 2870 2871 if (AArch64::FPR128RegClass.contains(DestReg) && 2872 AArch64::FPR128RegClass.contains(SrcReg)) { 2873 if (Subtarget.hasNEON()) { 2874 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) 2875 .addReg(SrcReg) 2876 .addReg(SrcReg, getKillRegState(KillSrc)); 2877 } else { 2878 BuildMI(MBB, I, DL, get(AArch64::STRQpre)) 2879 .addReg(AArch64::SP, RegState::Define) 2880 .addReg(SrcReg, getKillRegState(KillSrc)) 2881 .addReg(AArch64::SP) 2882 .addImm(-16); 2883 BuildMI(MBB, I, DL, get(AArch64::LDRQpre)) 2884 .addReg(AArch64::SP, RegState::Define) 2885 .addReg(DestReg, RegState::Define) 2886 .addReg(AArch64::SP) 2887 .addImm(16); 2888 } 2889 return; 2890 } 2891 2892 if (AArch64::FPR64RegClass.contains(DestReg) && 2893 AArch64::FPR64RegClass.contains(SrcReg)) { 2894 if (Subtarget.hasNEON()) { 2895 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub, 2896 &AArch64::FPR128RegClass); 2897 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub, 2898 &AArch64::FPR128RegClass); 2899 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) 2900 .addReg(SrcReg) 2901 .addReg(SrcReg, getKillRegState(KillSrc)); 2902 } else { 2903 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg) 2904 .addReg(SrcReg, getKillRegState(KillSrc)); 2905 } 2906 return; 2907 } 2908 2909 if (AArch64::FPR32RegClass.contains(DestReg) && 2910 AArch64::FPR32RegClass.contains(SrcReg)) { 2911 if (Subtarget.hasNEON()) { 2912 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub, 2913 &AArch64::FPR128RegClass); 2914 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub, 2915 &AArch64::FPR128RegClass); 2916 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) 2917 .addReg(SrcReg) 2918 .addReg(SrcReg, getKillRegState(KillSrc)); 2919 } else { 2920 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg) 2921 .addReg(SrcReg, getKillRegState(KillSrc)); 2922 } 2923 return; 2924 } 2925 2926 if (AArch64::FPR16RegClass.contains(DestReg) && 2927 AArch64::FPR16RegClass.contains(SrcReg)) { 2928 if (Subtarget.hasNEON()) { 2929 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub, 2930 &AArch64::FPR128RegClass); 2931 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub, 2932 &AArch64::FPR128RegClass); 2933 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) 2934 .addReg(SrcReg) 2935 .addReg(SrcReg, getKillRegState(KillSrc)); 2936 } else { 2937 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub, 2938 &AArch64::FPR32RegClass); 2939 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub, 2940 &AArch64::FPR32RegClass); 2941 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg) 2942 .addReg(SrcReg, getKillRegState(KillSrc)); 2943 } 2944 return; 2945 } 2946 2947 if (AArch64::FPR8RegClass.contains(DestReg) && 2948 AArch64::FPR8RegClass.contains(SrcReg)) { 2949 if (Subtarget.hasNEON()) { 2950 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub, 2951 &AArch64::FPR128RegClass); 2952 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub, 2953 &AArch64::FPR128RegClass); 2954 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) 2955 .addReg(SrcReg) 2956 .addReg(SrcReg, getKillRegState(KillSrc)); 2957 } else { 2958 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub, 2959 &AArch64::FPR32RegClass); 2960 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub, 2961 &AArch64::FPR32RegClass); 2962 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg) 2963 .addReg(SrcReg, getKillRegState(KillSrc)); 2964 } 2965 return; 2966 } 2967 2968 // Copies between GPR64 and FPR64. 2969 if (AArch64::FPR64RegClass.contains(DestReg) && 2970 AArch64::GPR64RegClass.contains(SrcReg)) { 2971 BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg) 2972 .addReg(SrcReg, getKillRegState(KillSrc)); 2973 return; 2974 } 2975 if (AArch64::GPR64RegClass.contains(DestReg) && 2976 AArch64::FPR64RegClass.contains(SrcReg)) { 2977 BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg) 2978 .addReg(SrcReg, getKillRegState(KillSrc)); 2979 return; 2980 } 2981 // Copies between GPR32 and FPR32. 2982 if (AArch64::FPR32RegClass.contains(DestReg) && 2983 AArch64::GPR32RegClass.contains(SrcReg)) { 2984 BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg) 2985 .addReg(SrcReg, getKillRegState(KillSrc)); 2986 return; 2987 } 2988 if (AArch64::GPR32RegClass.contains(DestReg) && 2989 AArch64::FPR32RegClass.contains(SrcReg)) { 2990 BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg) 2991 .addReg(SrcReg, getKillRegState(KillSrc)); 2992 return; 2993 } 2994 2995 if (DestReg == AArch64::NZCV) { 2996 assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy"); 2997 BuildMI(MBB, I, DL, get(AArch64::MSR)) 2998 .addImm(AArch64SysReg::NZCV) 2999 .addReg(SrcReg, getKillRegState(KillSrc)) 3000 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define); 3001 return; 3002 } 3003 3004 if (SrcReg == AArch64::NZCV) { 3005 assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy"); 3006 BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg) 3007 .addImm(AArch64SysReg::NZCV) 3008 .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc)); 3009 return; 3010 } 3011 3012 llvm_unreachable("unimplemented reg-to-reg copy"); 3013 } 3014 3015 static void storeRegPairToStackSlot(const TargetRegisterInfo &TRI, 3016 MachineBasicBlock &MBB, 3017 MachineBasicBlock::iterator InsertBefore, 3018 const MCInstrDesc &MCID, 3019 Register SrcReg, bool IsKill, 3020 unsigned SubIdx0, unsigned SubIdx1, int FI, 3021 MachineMemOperand *MMO) { 3022 Register SrcReg0 = SrcReg; 3023 Register SrcReg1 = SrcReg; 3024 if (Register::isPhysicalRegister(SrcReg)) { 3025 SrcReg0 = TRI.getSubReg(SrcReg, SubIdx0); 3026 SubIdx0 = 0; 3027 SrcReg1 = TRI.getSubReg(SrcReg, SubIdx1); 3028 SubIdx1 = 0; 3029 } 3030 BuildMI(MBB, InsertBefore, DebugLoc(), MCID) 3031 .addReg(SrcReg0, getKillRegState(IsKill), SubIdx0) 3032 .addReg(SrcReg1, getKillRegState(IsKill), SubIdx1) 3033 .addFrameIndex(FI) 3034 .addImm(0) 3035 .addMemOperand(MMO); 3036 } 3037 3038 void AArch64InstrInfo::storeRegToStackSlot( 3039 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, 3040 bool isKill, int FI, const TargetRegisterClass *RC, 3041 const TargetRegisterInfo *TRI) const { 3042 MachineFunction &MF = *MBB.getParent(); 3043 MachineFrameInfo &MFI = MF.getFrameInfo(); 3044 3045 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI); 3046 MachineMemOperand *MMO = 3047 MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, 3048 MFI.getObjectSize(FI), MFI.getObjectAlign(FI)); 3049 unsigned Opc = 0; 3050 bool Offset = true; 3051 unsigned StackID = TargetStackID::Default; 3052 switch (TRI->getSpillSize(*RC)) { 3053 case 1: 3054 if (AArch64::FPR8RegClass.hasSubClassEq(RC)) 3055 Opc = AArch64::STRBui; 3056 break; 3057 case 2: 3058 if (AArch64::FPR16RegClass.hasSubClassEq(RC)) 3059 Opc = AArch64::STRHui; 3060 else if (AArch64::PPRRegClass.hasSubClassEq(RC)) { 3061 assert(Subtarget.hasSVE() && "Unexpected register store without SVE"); 3062 Opc = AArch64::STR_PXI; 3063 StackID = TargetStackID::SVEVector; 3064 } 3065 break; 3066 case 4: 3067 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) { 3068 Opc = AArch64::STRWui; 3069 if (Register::isVirtualRegister(SrcReg)) 3070 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass); 3071 else 3072 assert(SrcReg != AArch64::WSP); 3073 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC)) 3074 Opc = AArch64::STRSui; 3075 break; 3076 case 8: 3077 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) { 3078 Opc = AArch64::STRXui; 3079 if (Register::isVirtualRegister(SrcReg)) 3080 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass); 3081 else 3082 assert(SrcReg != AArch64::SP); 3083 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) { 3084 Opc = AArch64::STRDui; 3085 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) { 3086 storeRegPairToStackSlot(getRegisterInfo(), MBB, MBBI, 3087 get(AArch64::STPWi), SrcReg, isKill, 3088 AArch64::sube32, AArch64::subo32, FI, MMO); 3089 return; 3090 } 3091 break; 3092 case 16: 3093 if (AArch64::FPR128RegClass.hasSubClassEq(RC)) 3094 Opc = AArch64::STRQui; 3095 else if (AArch64::DDRegClass.hasSubClassEq(RC)) { 3096 assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); 3097 Opc = AArch64::ST1Twov1d; 3098 Offset = false; 3099 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) { 3100 storeRegPairToStackSlot(getRegisterInfo(), MBB, MBBI, 3101 get(AArch64::STPXi), SrcReg, isKill, 3102 AArch64::sube64, AArch64::subo64, FI, MMO); 3103 return; 3104 } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) { 3105 assert(Subtarget.hasSVE() && "Unexpected register store without SVE"); 3106 Opc = AArch64::STR_ZXI; 3107 StackID = TargetStackID::SVEVector; 3108 } 3109 break; 3110 case 24: 3111 if (AArch64::DDDRegClass.hasSubClassEq(RC)) { 3112 assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); 3113 Opc = AArch64::ST1Threev1d; 3114 Offset = false; 3115 } 3116 break; 3117 case 32: 3118 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) { 3119 assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); 3120 Opc = AArch64::ST1Fourv1d; 3121 Offset = false; 3122 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) { 3123 assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); 3124 Opc = AArch64::ST1Twov2d; 3125 Offset = false; 3126 } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) { 3127 assert(Subtarget.hasSVE() && "Unexpected register store without SVE"); 3128 Opc = AArch64::STR_ZZXI; 3129 StackID = TargetStackID::SVEVector; 3130 } 3131 break; 3132 case 48: 3133 if (AArch64::QQQRegClass.hasSubClassEq(RC)) { 3134 assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); 3135 Opc = AArch64::ST1Threev2d; 3136 Offset = false; 3137 } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) { 3138 assert(Subtarget.hasSVE() && "Unexpected register store without SVE"); 3139 Opc = AArch64::STR_ZZZXI; 3140 StackID = TargetStackID::SVEVector; 3141 } 3142 break; 3143 case 64: 3144 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) { 3145 assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); 3146 Opc = AArch64::ST1Fourv2d; 3147 Offset = false; 3148 } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) { 3149 assert(Subtarget.hasSVE() && "Unexpected register store without SVE"); 3150 Opc = AArch64::STR_ZZZZXI; 3151 StackID = TargetStackID::SVEVector; 3152 } 3153 break; 3154 } 3155 assert(Opc && "Unknown register class"); 3156 MFI.setStackID(FI, StackID); 3157 3158 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc)) 3159 .addReg(SrcReg, getKillRegState(isKill)) 3160 .addFrameIndex(FI); 3161 3162 if (Offset) 3163 MI.addImm(0); 3164 MI.addMemOperand(MMO); 3165 } 3166 3167 static void loadRegPairFromStackSlot(const TargetRegisterInfo &TRI, 3168 MachineBasicBlock &MBB, 3169 MachineBasicBlock::iterator InsertBefore, 3170 const MCInstrDesc &MCID, 3171 Register DestReg, unsigned SubIdx0, 3172 unsigned SubIdx1, int FI, 3173 MachineMemOperand *MMO) { 3174 Register DestReg0 = DestReg; 3175 Register DestReg1 = DestReg; 3176 bool IsUndef = true; 3177 if (Register::isPhysicalRegister(DestReg)) { 3178 DestReg0 = TRI.getSubReg(DestReg, SubIdx0); 3179 SubIdx0 = 0; 3180 DestReg1 = TRI.getSubReg(DestReg, SubIdx1); 3181 SubIdx1 = 0; 3182 IsUndef = false; 3183 } 3184 BuildMI(MBB, InsertBefore, DebugLoc(), MCID) 3185 .addReg(DestReg0, RegState::Define | getUndefRegState(IsUndef), SubIdx0) 3186 .addReg(DestReg1, RegState::Define | getUndefRegState(IsUndef), SubIdx1) 3187 .addFrameIndex(FI) 3188 .addImm(0) 3189 .addMemOperand(MMO); 3190 } 3191 3192 void AArch64InstrInfo::loadRegFromStackSlot( 3193 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, 3194 int FI, const TargetRegisterClass *RC, 3195 const TargetRegisterInfo *TRI) const { 3196 MachineFunction &MF = *MBB.getParent(); 3197 MachineFrameInfo &MFI = MF.getFrameInfo(); 3198 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI); 3199 MachineMemOperand *MMO = 3200 MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad, 3201 MFI.getObjectSize(FI), MFI.getObjectAlign(FI)); 3202 3203 unsigned Opc = 0; 3204 bool Offset = true; 3205 unsigned StackID = TargetStackID::Default; 3206 switch (TRI->getSpillSize(*RC)) { 3207 case 1: 3208 if (AArch64::FPR8RegClass.hasSubClassEq(RC)) 3209 Opc = AArch64::LDRBui; 3210 break; 3211 case 2: 3212 if (AArch64::FPR16RegClass.hasSubClassEq(RC)) 3213 Opc = AArch64::LDRHui; 3214 else if (AArch64::PPRRegClass.hasSubClassEq(RC)) { 3215 assert(Subtarget.hasSVE() && "Unexpected register load without SVE"); 3216 Opc = AArch64::LDR_PXI; 3217 StackID = TargetStackID::SVEVector; 3218 } 3219 break; 3220 case 4: 3221 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) { 3222 Opc = AArch64::LDRWui; 3223 if (Register::isVirtualRegister(DestReg)) 3224 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass); 3225 else 3226 assert(DestReg != AArch64::WSP); 3227 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC)) 3228 Opc = AArch64::LDRSui; 3229 break; 3230 case 8: 3231 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) { 3232 Opc = AArch64::LDRXui; 3233 if (Register::isVirtualRegister(DestReg)) 3234 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass); 3235 else 3236 assert(DestReg != AArch64::SP); 3237 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) { 3238 Opc = AArch64::LDRDui; 3239 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) { 3240 loadRegPairFromStackSlot(getRegisterInfo(), MBB, MBBI, 3241 get(AArch64::LDPWi), DestReg, AArch64::sube32, 3242 AArch64::subo32, FI, MMO); 3243 return; 3244 } 3245 break; 3246 case 16: 3247 if (AArch64::FPR128RegClass.hasSubClassEq(RC)) 3248 Opc = AArch64::LDRQui; 3249 else if (AArch64::DDRegClass.hasSubClassEq(RC)) { 3250 assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); 3251 Opc = AArch64::LD1Twov1d; 3252 Offset = false; 3253 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) { 3254 loadRegPairFromStackSlot(getRegisterInfo(), MBB, MBBI, 3255 get(AArch64::LDPXi), DestReg, AArch64::sube64, 3256 AArch64::subo64, FI, MMO); 3257 return; 3258 } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) { 3259 assert(Subtarget.hasSVE() && "Unexpected register load without SVE"); 3260 Opc = AArch64::LDR_ZXI; 3261 StackID = TargetStackID::SVEVector; 3262 } 3263 break; 3264 case 24: 3265 if (AArch64::DDDRegClass.hasSubClassEq(RC)) { 3266 assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); 3267 Opc = AArch64::LD1Threev1d; 3268 Offset = false; 3269 } 3270 break; 3271 case 32: 3272 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) { 3273 assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); 3274 Opc = AArch64::LD1Fourv1d; 3275 Offset = false; 3276 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) { 3277 assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); 3278 Opc = AArch64::LD1Twov2d; 3279 Offset = false; 3280 } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) { 3281 assert(Subtarget.hasSVE() && "Unexpected register load without SVE"); 3282 Opc = AArch64::LDR_ZZXI; 3283 StackID = TargetStackID::SVEVector; 3284 } 3285 break; 3286 case 48: 3287 if (AArch64::QQQRegClass.hasSubClassEq(RC)) { 3288 assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); 3289 Opc = AArch64::LD1Threev2d; 3290 Offset = false; 3291 } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) { 3292 assert(Subtarget.hasSVE() && "Unexpected register load without SVE"); 3293 Opc = AArch64::LDR_ZZZXI; 3294 StackID = TargetStackID::SVEVector; 3295 } 3296 break; 3297 case 64: 3298 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) { 3299 assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); 3300 Opc = AArch64::LD1Fourv2d; 3301 Offset = false; 3302 } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) { 3303 assert(Subtarget.hasSVE() && "Unexpected register load without SVE"); 3304 Opc = AArch64::LDR_ZZZZXI; 3305 StackID = TargetStackID::SVEVector; 3306 } 3307 break; 3308 } 3309 3310 assert(Opc && "Unknown register class"); 3311 MFI.setStackID(FI, StackID); 3312 3313 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc)) 3314 .addReg(DestReg, getDefRegState(true)) 3315 .addFrameIndex(FI); 3316 if (Offset) 3317 MI.addImm(0); 3318 MI.addMemOperand(MMO); 3319 } 3320 3321 bool llvm::isNZCVTouchedInInstructionRange(const MachineInstr &DefMI, 3322 const MachineInstr &UseMI, 3323 const TargetRegisterInfo *TRI) { 3324 return any_of(instructionsWithoutDebug(std::next(DefMI.getIterator()), 3325 UseMI.getIterator()), 3326 [TRI](const MachineInstr &I) { 3327 return I.modifiesRegister(AArch64::NZCV, TRI) || 3328 I.readsRegister(AArch64::NZCV, TRI); 3329 }); 3330 } 3331 3332 // Helper function to emit a frame offset adjustment from a given 3333 // pointer (SrcReg), stored into DestReg. This function is explicit 3334 // in that it requires the opcode. 3335 static void emitFrameOffsetAdj(MachineBasicBlock &MBB, 3336 MachineBasicBlock::iterator MBBI, 3337 const DebugLoc &DL, unsigned DestReg, 3338 unsigned SrcReg, int64_t Offset, unsigned Opc, 3339 const TargetInstrInfo *TII, 3340 MachineInstr::MIFlag Flag, bool NeedsWinCFI, 3341 bool *HasWinCFI) { 3342 int Sign = 1; 3343 unsigned MaxEncoding, ShiftSize; 3344 switch (Opc) { 3345 case AArch64::ADDXri: 3346 case AArch64::ADDSXri: 3347 case AArch64::SUBXri: 3348 case AArch64::SUBSXri: 3349 MaxEncoding = 0xfff; 3350 ShiftSize = 12; 3351 break; 3352 case AArch64::ADDVL_XXI: 3353 case AArch64::ADDPL_XXI: 3354 MaxEncoding = 31; 3355 ShiftSize = 0; 3356 if (Offset < 0) { 3357 MaxEncoding = 32; 3358 Sign = -1; 3359 Offset = -Offset; 3360 } 3361 break; 3362 default: 3363 llvm_unreachable("Unsupported opcode"); 3364 } 3365 3366 // FIXME: If the offset won't fit in 24-bits, compute the offset into a 3367 // scratch register. If DestReg is a virtual register, use it as the 3368 // scratch register; otherwise, create a new virtual register (to be 3369 // replaced by the scavenger at the end of PEI). That case can be optimized 3370 // slightly if DestReg is SP which is always 16-byte aligned, so the scratch 3371 // register can be loaded with offset%8 and the add/sub can use an extending 3372 // instruction with LSL#3. 3373 // Currently the function handles any offsets but generates a poor sequence 3374 // of code. 3375 // assert(Offset < (1 << 24) && "unimplemented reg plus immediate"); 3376 3377 const unsigned MaxEncodableValue = MaxEncoding << ShiftSize; 3378 Register TmpReg = DestReg; 3379 if (TmpReg == AArch64::XZR) 3380 TmpReg = MBB.getParent()->getRegInfo().createVirtualRegister( 3381 &AArch64::GPR64RegClass); 3382 do { 3383 uint64_t ThisVal = std::min<uint64_t>(Offset, MaxEncodableValue); 3384 unsigned LocalShiftSize = 0; 3385 if (ThisVal > MaxEncoding) { 3386 ThisVal = ThisVal >> ShiftSize; 3387 LocalShiftSize = ShiftSize; 3388 } 3389 assert((ThisVal >> ShiftSize) <= MaxEncoding && 3390 "Encoding cannot handle value that big"); 3391 3392 Offset -= ThisVal << LocalShiftSize; 3393 if (Offset == 0) 3394 TmpReg = DestReg; 3395 auto MBI = BuildMI(MBB, MBBI, DL, TII->get(Opc), TmpReg) 3396 .addReg(SrcReg) 3397 .addImm(Sign * (int)ThisVal); 3398 if (ShiftSize) 3399 MBI = MBI.addImm( 3400 AArch64_AM::getShifterImm(AArch64_AM::LSL, LocalShiftSize)); 3401 MBI = MBI.setMIFlag(Flag); 3402 3403 if (NeedsWinCFI) { 3404 assert(Sign == 1 && "SEH directives should always have a positive sign"); 3405 int Imm = (int)(ThisVal << LocalShiftSize); 3406 if ((DestReg == AArch64::FP && SrcReg == AArch64::SP) || 3407 (SrcReg == AArch64::FP && DestReg == AArch64::SP)) { 3408 if (HasWinCFI) 3409 *HasWinCFI = true; 3410 if (Imm == 0) 3411 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_SetFP)).setMIFlag(Flag); 3412 else 3413 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AddFP)) 3414 .addImm(Imm) 3415 .setMIFlag(Flag); 3416 assert(Offset == 0 && "Expected remaining offset to be zero to " 3417 "emit a single SEH directive"); 3418 } else if (DestReg == AArch64::SP) { 3419 if (HasWinCFI) 3420 *HasWinCFI = true; 3421 assert(SrcReg == AArch64::SP && "Unexpected SrcReg for SEH_StackAlloc"); 3422 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc)) 3423 .addImm(Imm) 3424 .setMIFlag(Flag); 3425 } 3426 if (HasWinCFI) 3427 *HasWinCFI = true; 3428 } 3429 3430 SrcReg = TmpReg; 3431 } while (Offset); 3432 } 3433 3434 void llvm::emitFrameOffset(MachineBasicBlock &MBB, 3435 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, 3436 unsigned DestReg, unsigned SrcReg, 3437 StackOffset Offset, const TargetInstrInfo *TII, 3438 MachineInstr::MIFlag Flag, bool SetNZCV, 3439 bool NeedsWinCFI, bool *HasWinCFI) { 3440 int64_t Bytes, NumPredicateVectors, NumDataVectors; 3441 Offset.getForFrameOffset(Bytes, NumPredicateVectors, NumDataVectors); 3442 3443 // First emit non-scalable frame offsets, or a simple 'mov'. 3444 if (Bytes || (!Offset && SrcReg != DestReg)) { 3445 assert((DestReg != AArch64::SP || Bytes % 16 == 0) && 3446 "SP increment/decrement not 16-byte aligned"); 3447 unsigned Opc = SetNZCV ? AArch64::ADDSXri : AArch64::ADDXri; 3448 if (Bytes < 0) { 3449 Bytes = -Bytes; 3450 Opc = SetNZCV ? AArch64::SUBSXri : AArch64::SUBXri; 3451 } 3452 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, Bytes, Opc, TII, Flag, 3453 NeedsWinCFI, HasWinCFI); 3454 SrcReg = DestReg; 3455 } 3456 3457 assert(!(SetNZCV && (NumPredicateVectors || NumDataVectors)) && 3458 "SetNZCV not supported with SVE vectors"); 3459 assert(!(NeedsWinCFI && (NumPredicateVectors || NumDataVectors)) && 3460 "WinCFI not supported with SVE vectors"); 3461 3462 if (NumDataVectors) { 3463 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumDataVectors, 3464 AArch64::ADDVL_XXI, TII, Flag, NeedsWinCFI, nullptr); 3465 SrcReg = DestReg; 3466 } 3467 3468 if (NumPredicateVectors) { 3469 assert(DestReg != AArch64::SP && "Unaligned access to SP"); 3470 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumPredicateVectors, 3471 AArch64::ADDPL_XXI, TII, Flag, NeedsWinCFI, nullptr); 3472 } 3473 } 3474 3475 MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( 3476 MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops, 3477 MachineBasicBlock::iterator InsertPt, int FrameIndex, 3478 LiveIntervals *LIS, VirtRegMap *VRM) const { 3479 // This is a bit of a hack. Consider this instruction: 3480 // 3481 // %0 = COPY %sp; GPR64all:%0 3482 // 3483 // We explicitly chose GPR64all for the virtual register so such a copy might 3484 // be eliminated by RegisterCoalescer. However, that may not be possible, and 3485 // %0 may even spill. We can't spill %sp, and since it is in the GPR64all 3486 // register class, TargetInstrInfo::foldMemoryOperand() is going to try. 3487 // 3488 // To prevent that, we are going to constrain the %0 register class here. 3489 // 3490 // <rdar://problem/11522048> 3491 // 3492 if (MI.isFullCopy()) { 3493 Register DstReg = MI.getOperand(0).getReg(); 3494 Register SrcReg = MI.getOperand(1).getReg(); 3495 if (SrcReg == AArch64::SP && Register::isVirtualRegister(DstReg)) { 3496 MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass); 3497 return nullptr; 3498 } 3499 if (DstReg == AArch64::SP && Register::isVirtualRegister(SrcReg)) { 3500 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass); 3501 return nullptr; 3502 } 3503 } 3504 3505 // Handle the case where a copy is being spilled or filled but the source 3506 // and destination register class don't match. For example: 3507 // 3508 // %0 = COPY %xzr; GPR64common:%0 3509 // 3510 // In this case we can still safely fold away the COPY and generate the 3511 // following spill code: 3512 // 3513 // STRXui %xzr, %stack.0 3514 // 3515 // This also eliminates spilled cross register class COPYs (e.g. between x and 3516 // d regs) of the same size. For example: 3517 // 3518 // %0 = COPY %1; GPR64:%0, FPR64:%1 3519 // 3520 // will be filled as 3521 // 3522 // LDRDui %0, fi<#0> 3523 // 3524 // instead of 3525 // 3526 // LDRXui %Temp, fi<#0> 3527 // %0 = FMOV %Temp 3528 // 3529 if (MI.isCopy() && Ops.size() == 1 && 3530 // Make sure we're only folding the explicit COPY defs/uses. 3531 (Ops[0] == 0 || Ops[0] == 1)) { 3532 bool IsSpill = Ops[0] == 0; 3533 bool IsFill = !IsSpill; 3534 const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); 3535 const MachineRegisterInfo &MRI = MF.getRegInfo(); 3536 MachineBasicBlock &MBB = *MI.getParent(); 3537 const MachineOperand &DstMO = MI.getOperand(0); 3538 const MachineOperand &SrcMO = MI.getOperand(1); 3539 Register DstReg = DstMO.getReg(); 3540 Register SrcReg = SrcMO.getReg(); 3541 // This is slightly expensive to compute for physical regs since 3542 // getMinimalPhysRegClass is slow. 3543 auto getRegClass = [&](unsigned Reg) { 3544 return Register::isVirtualRegister(Reg) ? MRI.getRegClass(Reg) 3545 : TRI.getMinimalPhysRegClass(Reg); 3546 }; 3547 3548 if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) { 3549 assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) == 3550 TRI.getRegSizeInBits(*getRegClass(SrcReg)) && 3551 "Mismatched register size in non subreg COPY"); 3552 if (IsSpill) 3553 storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex, 3554 getRegClass(SrcReg), &TRI); 3555 else 3556 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, 3557 getRegClass(DstReg), &TRI); 3558 return &*--InsertPt; 3559 } 3560 3561 // Handle cases like spilling def of: 3562 // 3563 // %0:sub_32<def,read-undef> = COPY %wzr; GPR64common:%0 3564 // 3565 // where the physical register source can be widened and stored to the full 3566 // virtual reg destination stack slot, in this case producing: 3567 // 3568 // STRXui %xzr, %stack.0 3569 // 3570 if (IsSpill && DstMO.isUndef() && Register::isPhysicalRegister(SrcReg)) { 3571 assert(SrcMO.getSubReg() == 0 && 3572 "Unexpected subreg on physical register"); 3573 const TargetRegisterClass *SpillRC; 3574 unsigned SpillSubreg; 3575 switch (DstMO.getSubReg()) { 3576 default: 3577 SpillRC = nullptr; 3578 break; 3579 case AArch64::sub_32: 3580 case AArch64::ssub: 3581 if (AArch64::GPR32RegClass.contains(SrcReg)) { 3582 SpillRC = &AArch64::GPR64RegClass; 3583 SpillSubreg = AArch64::sub_32; 3584 } else if (AArch64::FPR32RegClass.contains(SrcReg)) { 3585 SpillRC = &AArch64::FPR64RegClass; 3586 SpillSubreg = AArch64::ssub; 3587 } else 3588 SpillRC = nullptr; 3589 break; 3590 case AArch64::dsub: 3591 if (AArch64::FPR64RegClass.contains(SrcReg)) { 3592 SpillRC = &AArch64::FPR128RegClass; 3593 SpillSubreg = AArch64::dsub; 3594 } else 3595 SpillRC = nullptr; 3596 break; 3597 } 3598 3599 if (SpillRC) 3600 if (unsigned WidenedSrcReg = 3601 TRI.getMatchingSuperReg(SrcReg, SpillSubreg, SpillRC)) { 3602 storeRegToStackSlot(MBB, InsertPt, WidenedSrcReg, SrcMO.isKill(), 3603 FrameIndex, SpillRC, &TRI); 3604 return &*--InsertPt; 3605 } 3606 } 3607 3608 // Handle cases like filling use of: 3609 // 3610 // %0:sub_32<def,read-undef> = COPY %1; GPR64:%0, GPR32:%1 3611 // 3612 // where we can load the full virtual reg source stack slot, into the subreg 3613 // destination, in this case producing: 3614 // 3615 // LDRWui %0:sub_32<def,read-undef>, %stack.0 3616 // 3617 if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) { 3618 const TargetRegisterClass *FillRC; 3619 switch (DstMO.getSubReg()) { 3620 default: 3621 FillRC = nullptr; 3622 break; 3623 case AArch64::sub_32: 3624 FillRC = &AArch64::GPR32RegClass; 3625 break; 3626 case AArch64::ssub: 3627 FillRC = &AArch64::FPR32RegClass; 3628 break; 3629 case AArch64::dsub: 3630 FillRC = &AArch64::FPR64RegClass; 3631 break; 3632 } 3633 3634 if (FillRC) { 3635 assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) == 3636 TRI.getRegSizeInBits(*FillRC) && 3637 "Mismatched regclass size on folded subreg COPY"); 3638 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI); 3639 MachineInstr &LoadMI = *--InsertPt; 3640 MachineOperand &LoadDst = LoadMI.getOperand(0); 3641 assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load"); 3642 LoadDst.setSubReg(DstMO.getSubReg()); 3643 LoadDst.setIsUndef(); 3644 return &LoadMI; 3645 } 3646 } 3647 } 3648 3649 // Cannot fold. 3650 return nullptr; 3651 } 3652 3653 int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, 3654 StackOffset &SOffset, 3655 bool *OutUseUnscaledOp, 3656 unsigned *OutUnscaledOp, 3657 int64_t *EmittableOffset) { 3658 // Set output values in case of early exit. 3659 if (EmittableOffset) 3660 *EmittableOffset = 0; 3661 if (OutUseUnscaledOp) 3662 *OutUseUnscaledOp = false; 3663 if (OutUnscaledOp) 3664 *OutUnscaledOp = 0; 3665 3666 // Exit early for structured vector spills/fills as they can't take an 3667 // immediate offset. 3668 switch (MI.getOpcode()) { 3669 default: 3670 break; 3671 case AArch64::LD1Twov2d: 3672 case AArch64::LD1Threev2d: 3673 case AArch64::LD1Fourv2d: 3674 case AArch64::LD1Twov1d: 3675 case AArch64::LD1Threev1d: 3676 case AArch64::LD1Fourv1d: 3677 case AArch64::ST1Twov2d: 3678 case AArch64::ST1Threev2d: 3679 case AArch64::ST1Fourv2d: 3680 case AArch64::ST1Twov1d: 3681 case AArch64::ST1Threev1d: 3682 case AArch64::ST1Fourv1d: 3683 case AArch64::IRG: 3684 case AArch64::IRGstack: 3685 case AArch64::STGloop: 3686 case AArch64::STZGloop: 3687 return AArch64FrameOffsetCannotUpdate; 3688 } 3689 3690 // Get the min/max offset and the scale. 3691 TypeSize ScaleValue(0U, false); 3692 unsigned Width; 3693 int64_t MinOff, MaxOff; 3694 if (!AArch64InstrInfo::getMemOpInfo(MI.getOpcode(), ScaleValue, Width, MinOff, 3695 MaxOff)) 3696 llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal"); 3697 3698 // Construct the complete offset. 3699 bool IsMulVL = ScaleValue.isScalable(); 3700 unsigned Scale = ScaleValue.getKnownMinSize(); 3701 int64_t Offset = IsMulVL ? SOffset.getScalableBytes() : SOffset.getBytes(); 3702 3703 const MachineOperand &ImmOpnd = 3704 MI.getOperand(AArch64InstrInfo::getLoadStoreImmIdx(MI.getOpcode())); 3705 Offset += ImmOpnd.getImm() * Scale; 3706 3707 // If the offset doesn't match the scale, we rewrite the instruction to 3708 // use the unscaled instruction instead. Likewise, if we have a negative 3709 // offset and there is an unscaled op to use. 3710 Optional<unsigned> UnscaledOp = 3711 AArch64InstrInfo::getUnscaledLdSt(MI.getOpcode()); 3712 bool useUnscaledOp = UnscaledOp && (Offset % Scale || Offset < 0); 3713 if (useUnscaledOp && 3714 !AArch64InstrInfo::getMemOpInfo(*UnscaledOp, ScaleValue, Width, MinOff, 3715 MaxOff)) 3716 llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal"); 3717 3718 Scale = ScaleValue.getKnownMinSize(); 3719 assert(IsMulVL == ScaleValue.isScalable() && 3720 "Unscaled opcode has different value for scalable"); 3721 3722 int64_t Remainder = Offset % Scale; 3723 assert(!(Remainder && useUnscaledOp) && 3724 "Cannot have remainder when using unscaled op"); 3725 3726 assert(MinOff < MaxOff && "Unexpected Min/Max offsets"); 3727 int64_t NewOffset = Offset / Scale; 3728 if (MinOff <= NewOffset && NewOffset <= MaxOff) 3729 Offset = Remainder; 3730 else { 3731 NewOffset = NewOffset < 0 ? MinOff : MaxOff; 3732 Offset = Offset - NewOffset * Scale + Remainder; 3733 } 3734 3735 if (EmittableOffset) 3736 *EmittableOffset = NewOffset; 3737 if (OutUseUnscaledOp) 3738 *OutUseUnscaledOp = useUnscaledOp; 3739 if (OutUnscaledOp && UnscaledOp) 3740 *OutUnscaledOp = *UnscaledOp; 3741 3742 if (IsMulVL) 3743 SOffset = StackOffset(Offset, MVT::nxv1i8) + 3744 StackOffset(SOffset.getBytes(), MVT::i8); 3745 else 3746 SOffset = StackOffset(Offset, MVT::i8) + 3747 StackOffset(SOffset.getScalableBytes(), MVT::nxv1i8); 3748 return AArch64FrameOffsetCanUpdate | 3749 (SOffset ? 0 : AArch64FrameOffsetIsLegal); 3750 } 3751 3752 bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, 3753 unsigned FrameReg, StackOffset &Offset, 3754 const AArch64InstrInfo *TII) { 3755 unsigned Opcode = MI.getOpcode(); 3756 unsigned ImmIdx = FrameRegIdx + 1; 3757 3758 if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) { 3759 Offset += StackOffset(MI.getOperand(ImmIdx).getImm(), MVT::i8); 3760 emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(), 3761 MI.getOperand(0).getReg(), FrameReg, Offset, TII, 3762 MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri)); 3763 MI.eraseFromParent(); 3764 Offset = StackOffset(); 3765 return true; 3766 } 3767 3768 int64_t NewOffset; 3769 unsigned UnscaledOp; 3770 bool UseUnscaledOp; 3771 int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp, 3772 &UnscaledOp, &NewOffset); 3773 if (Status & AArch64FrameOffsetCanUpdate) { 3774 if (Status & AArch64FrameOffsetIsLegal) 3775 // Replace the FrameIndex with FrameReg. 3776 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); 3777 if (UseUnscaledOp) 3778 MI.setDesc(TII->get(UnscaledOp)); 3779 3780 MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset); 3781 return !Offset; 3782 } 3783 3784 return false; 3785 } 3786 3787 void AArch64InstrInfo::getNoop(MCInst &NopInst) const { 3788 NopInst.setOpcode(AArch64::HINT); 3789 NopInst.addOperand(MCOperand::createImm(0)); 3790 } 3791 3792 // AArch64 supports MachineCombiner. 3793 bool AArch64InstrInfo::useMachineCombiner() const { return true; } 3794 3795 // True when Opc sets flag 3796 static bool isCombineInstrSettingFlag(unsigned Opc) { 3797 switch (Opc) { 3798 case AArch64::ADDSWrr: 3799 case AArch64::ADDSWri: 3800 case AArch64::ADDSXrr: 3801 case AArch64::ADDSXri: 3802 case AArch64::SUBSWrr: 3803 case AArch64::SUBSXrr: 3804 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi. 3805 case AArch64::SUBSWri: 3806 case AArch64::SUBSXri: 3807 return true; 3808 default: 3809 break; 3810 } 3811 return false; 3812 } 3813 3814 // 32b Opcodes that can be combined with a MUL 3815 static bool isCombineInstrCandidate32(unsigned Opc) { 3816 switch (Opc) { 3817 case AArch64::ADDWrr: 3818 case AArch64::ADDWri: 3819 case AArch64::SUBWrr: 3820 case AArch64::ADDSWrr: 3821 case AArch64::ADDSWri: 3822 case AArch64::SUBSWrr: 3823 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi. 3824 case AArch64::SUBWri: 3825 case AArch64::SUBSWri: 3826 return true; 3827 default: 3828 break; 3829 } 3830 return false; 3831 } 3832 3833 // 64b Opcodes that can be combined with a MUL 3834 static bool isCombineInstrCandidate64(unsigned Opc) { 3835 switch (Opc) { 3836 case AArch64::ADDXrr: 3837 case AArch64::ADDXri: 3838 case AArch64::SUBXrr: 3839 case AArch64::ADDSXrr: 3840 case AArch64::ADDSXri: 3841 case AArch64::SUBSXrr: 3842 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi. 3843 case AArch64::SUBXri: 3844 case AArch64::SUBSXri: 3845 case AArch64::ADDv8i8: 3846 case AArch64::ADDv16i8: 3847 case AArch64::ADDv4i16: 3848 case AArch64::ADDv8i16: 3849 case AArch64::ADDv2i32: 3850 case AArch64::ADDv4i32: 3851 case AArch64::SUBv8i8: 3852 case AArch64::SUBv16i8: 3853 case AArch64::SUBv4i16: 3854 case AArch64::SUBv8i16: 3855 case AArch64::SUBv2i32: 3856 case AArch64::SUBv4i32: 3857 return true; 3858 default: 3859 break; 3860 } 3861 return false; 3862 } 3863 3864 // FP Opcodes that can be combined with a FMUL 3865 static bool isCombineInstrCandidateFP(const MachineInstr &Inst) { 3866 switch (Inst.getOpcode()) { 3867 default: 3868 break; 3869 case AArch64::FADDHrr: 3870 case AArch64::FADDSrr: 3871 case AArch64::FADDDrr: 3872 case AArch64::FADDv4f16: 3873 case AArch64::FADDv8f16: 3874 case AArch64::FADDv2f32: 3875 case AArch64::FADDv2f64: 3876 case AArch64::FADDv4f32: 3877 case AArch64::FSUBHrr: 3878 case AArch64::FSUBSrr: 3879 case AArch64::FSUBDrr: 3880 case AArch64::FSUBv4f16: 3881 case AArch64::FSUBv8f16: 3882 case AArch64::FSUBv2f32: 3883 case AArch64::FSUBv2f64: 3884 case AArch64::FSUBv4f32: 3885 TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options; 3886 return (Options.UnsafeFPMath || 3887 Options.AllowFPOpFusion == FPOpFusion::Fast); 3888 } 3889 return false; 3890 } 3891 3892 // Opcodes that can be combined with a MUL 3893 static bool isCombineInstrCandidate(unsigned Opc) { 3894 return (isCombineInstrCandidate32(Opc) || isCombineInstrCandidate64(Opc)); 3895 } 3896 3897 // 3898 // Utility routine that checks if \param MO is defined by an 3899 // \param CombineOpc instruction in the basic block \param MBB 3900 static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO, 3901 unsigned CombineOpc, unsigned ZeroReg = 0, 3902 bool CheckZeroReg = false) { 3903 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 3904 MachineInstr *MI = nullptr; 3905 3906 if (MO.isReg() && Register::isVirtualRegister(MO.getReg())) 3907 MI = MRI.getUniqueVRegDef(MO.getReg()); 3908 // And it needs to be in the trace (otherwise, it won't have a depth). 3909 if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != CombineOpc) 3910 return false; 3911 // Must only used by the user we combine with. 3912 if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg())) 3913 return false; 3914 3915 if (CheckZeroReg) { 3916 assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() && 3917 MI->getOperand(1).isReg() && MI->getOperand(2).isReg() && 3918 MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs"); 3919 // The third input reg must be zero. 3920 if (MI->getOperand(3).getReg() != ZeroReg) 3921 return false; 3922 } 3923 3924 return true; 3925 } 3926 3927 // 3928 // Is \param MO defined by an integer multiply and can be combined? 3929 static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO, 3930 unsigned MulOpc, unsigned ZeroReg) { 3931 return canCombine(MBB, MO, MulOpc, ZeroReg, true); 3932 } 3933 3934 // 3935 // Is \param MO defined by a floating-point multiply and can be combined? 3936 static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO, 3937 unsigned MulOpc) { 3938 return canCombine(MBB, MO, MulOpc); 3939 } 3940 3941 // TODO: There are many more machine instruction opcodes to match: 3942 // 1. Other data types (integer, vectors) 3943 // 2. Other math / logic operations (xor, or) 3944 // 3. Other forms of the same operation (intrinsics and other variants) 3945 bool AArch64InstrInfo::isAssociativeAndCommutative( 3946 const MachineInstr &Inst) const { 3947 switch (Inst.getOpcode()) { 3948 case AArch64::FADDDrr: 3949 case AArch64::FADDSrr: 3950 case AArch64::FADDv2f32: 3951 case AArch64::FADDv2f64: 3952 case AArch64::FADDv4f32: 3953 case AArch64::FMULDrr: 3954 case AArch64::FMULSrr: 3955 case AArch64::FMULX32: 3956 case AArch64::FMULX64: 3957 case AArch64::FMULXv2f32: 3958 case AArch64::FMULXv2f64: 3959 case AArch64::FMULXv4f32: 3960 case AArch64::FMULv2f32: 3961 case AArch64::FMULv2f64: 3962 case AArch64::FMULv4f32: 3963 return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath; 3964 default: 3965 return false; 3966 } 3967 } 3968 3969 /// Find instructions that can be turned into madd. 3970 static bool getMaddPatterns(MachineInstr &Root, 3971 SmallVectorImpl<MachineCombinerPattern> &Patterns) { 3972 unsigned Opc = Root.getOpcode(); 3973 MachineBasicBlock &MBB = *Root.getParent(); 3974 bool Found = false; 3975 3976 if (!isCombineInstrCandidate(Opc)) 3977 return false; 3978 if (isCombineInstrSettingFlag(Opc)) { 3979 int Cmp_NZCV = Root.findRegisterDefOperandIdx(AArch64::NZCV, true); 3980 // When NZCV is live bail out. 3981 if (Cmp_NZCV == -1) 3982 return false; 3983 unsigned NewOpc = convertToNonFlagSettingOpc(Root); 3984 // When opcode can't change bail out. 3985 // CHECKME: do we miss any cases for opcode conversion? 3986 if (NewOpc == Opc) 3987 return false; 3988 Opc = NewOpc; 3989 } 3990 3991 auto setFound = [&](int Opcode, int Operand, unsigned ZeroReg, 3992 MachineCombinerPattern Pattern) { 3993 if (canCombineWithMUL(MBB, Root.getOperand(Operand), Opcode, ZeroReg)) { 3994 Patterns.push_back(Pattern); 3995 Found = true; 3996 } 3997 }; 3998 3999 auto setVFound = [&](int Opcode, int Operand, MachineCombinerPattern Pattern) { 4000 if (canCombine(MBB, Root.getOperand(Operand), Opcode)) { 4001 Patterns.push_back(Pattern); 4002 Found = true; 4003 } 4004 }; 4005 4006 typedef MachineCombinerPattern MCP; 4007 4008 switch (Opc) { 4009 default: 4010 break; 4011 case AArch64::ADDWrr: 4012 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() && 4013 "ADDWrr does not have register operands"); 4014 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDW_OP1); 4015 setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULADDW_OP2); 4016 break; 4017 case AArch64::ADDXrr: 4018 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDX_OP1); 4019 setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULADDX_OP2); 4020 break; 4021 case AArch64::SUBWrr: 4022 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBW_OP1); 4023 setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULSUBW_OP2); 4024 break; 4025 case AArch64::SUBXrr: 4026 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBX_OP1); 4027 setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULSUBX_OP2); 4028 break; 4029 case AArch64::ADDWri: 4030 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDWI_OP1); 4031 break; 4032 case AArch64::ADDXri: 4033 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDXI_OP1); 4034 break; 4035 case AArch64::SUBWri: 4036 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBWI_OP1); 4037 break; 4038 case AArch64::SUBXri: 4039 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBXI_OP1); 4040 break; 4041 case AArch64::ADDv8i8: 4042 setVFound(AArch64::MULv8i8, 1, MCP::MULADDv8i8_OP1); 4043 setVFound(AArch64::MULv8i8, 2, MCP::MULADDv8i8_OP2); 4044 break; 4045 case AArch64::ADDv16i8: 4046 setVFound(AArch64::MULv16i8, 1, MCP::MULADDv16i8_OP1); 4047 setVFound(AArch64::MULv16i8, 2, MCP::MULADDv16i8_OP2); 4048 break; 4049 case AArch64::ADDv4i16: 4050 setVFound(AArch64::MULv4i16, 1, MCP::MULADDv4i16_OP1); 4051 setVFound(AArch64::MULv4i16, 2, MCP::MULADDv4i16_OP2); 4052 setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULADDv4i16_indexed_OP1); 4053 setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULADDv4i16_indexed_OP2); 4054 break; 4055 case AArch64::ADDv8i16: 4056 setVFound(AArch64::MULv8i16, 1, MCP::MULADDv8i16_OP1); 4057 setVFound(AArch64::MULv8i16, 2, MCP::MULADDv8i16_OP2); 4058 setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULADDv8i16_indexed_OP1); 4059 setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULADDv8i16_indexed_OP2); 4060 break; 4061 case AArch64::ADDv2i32: 4062 setVFound(AArch64::MULv2i32, 1, MCP::MULADDv2i32_OP1); 4063 setVFound(AArch64::MULv2i32, 2, MCP::MULADDv2i32_OP2); 4064 setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULADDv2i32_indexed_OP1); 4065 setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULADDv2i32_indexed_OP2); 4066 break; 4067 case AArch64::ADDv4i32: 4068 setVFound(AArch64::MULv4i32, 1, MCP::MULADDv4i32_OP1); 4069 setVFound(AArch64::MULv4i32, 2, MCP::MULADDv4i32_OP2); 4070 setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULADDv4i32_indexed_OP1); 4071 setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULADDv4i32_indexed_OP2); 4072 break; 4073 case AArch64::SUBv8i8: 4074 setVFound(AArch64::MULv8i8, 1, MCP::MULSUBv8i8_OP1); 4075 setVFound(AArch64::MULv8i8, 2, MCP::MULSUBv8i8_OP2); 4076 break; 4077 case AArch64::SUBv16i8: 4078 setVFound(AArch64::MULv16i8, 1, MCP::MULSUBv16i8_OP1); 4079 setVFound(AArch64::MULv16i8, 2, MCP::MULSUBv16i8_OP2); 4080 break; 4081 case AArch64::SUBv4i16: 4082 setVFound(AArch64::MULv4i16, 1, MCP::MULSUBv4i16_OP1); 4083 setVFound(AArch64::MULv4i16, 2, MCP::MULSUBv4i16_OP2); 4084 setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULSUBv4i16_indexed_OP1); 4085 setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULSUBv4i16_indexed_OP2); 4086 break; 4087 case AArch64::SUBv8i16: 4088 setVFound(AArch64::MULv8i16, 1, MCP::MULSUBv8i16_OP1); 4089 setVFound(AArch64::MULv8i16, 2, MCP::MULSUBv8i16_OP2); 4090 setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULSUBv8i16_indexed_OP1); 4091 setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULSUBv8i16_indexed_OP2); 4092 break; 4093 case AArch64::SUBv2i32: 4094 setVFound(AArch64::MULv2i32, 1, MCP::MULSUBv2i32_OP1); 4095 setVFound(AArch64::MULv2i32, 2, MCP::MULSUBv2i32_OP2); 4096 setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULSUBv2i32_indexed_OP1); 4097 setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULSUBv2i32_indexed_OP2); 4098 break; 4099 case AArch64::SUBv4i32: 4100 setVFound(AArch64::MULv4i32, 1, MCP::MULSUBv4i32_OP1); 4101 setVFound(AArch64::MULv4i32, 2, MCP::MULSUBv4i32_OP2); 4102 setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULSUBv4i32_indexed_OP1); 4103 setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULSUBv4i32_indexed_OP2); 4104 break; 4105 } 4106 return Found; 4107 } 4108 /// Floating-Point Support 4109 4110 /// Find instructions that can be turned into madd. 4111 static bool getFMAPatterns(MachineInstr &Root, 4112 SmallVectorImpl<MachineCombinerPattern> &Patterns) { 4113 4114 if (!isCombineInstrCandidateFP(Root)) 4115 return false; 4116 4117 MachineBasicBlock &MBB = *Root.getParent(); 4118 bool Found = false; 4119 4120 auto Match = [&](int Opcode, int Operand, 4121 MachineCombinerPattern Pattern) -> bool { 4122 if (canCombineWithFMUL(MBB, Root.getOperand(Operand), Opcode)) { 4123 Patterns.push_back(Pattern); 4124 return true; 4125 } 4126 return false; 4127 }; 4128 4129 typedef MachineCombinerPattern MCP; 4130 4131 switch (Root.getOpcode()) { 4132 default: 4133 assert(false && "Unsupported FP instruction in combiner\n"); 4134 break; 4135 case AArch64::FADDHrr: 4136 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() && 4137 "FADDHrr does not have register operands"); 4138 4139 Found = Match(AArch64::FMULHrr, 1, MCP::FMULADDH_OP1); 4140 Found |= Match(AArch64::FMULHrr, 2, MCP::FMULADDH_OP2); 4141 break; 4142 case AArch64::FADDSrr: 4143 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() && 4144 "FADDSrr does not have register operands"); 4145 4146 Found |= Match(AArch64::FMULSrr, 1, MCP::FMULADDS_OP1) || 4147 Match(AArch64::FMULv1i32_indexed, 1, MCP::FMLAv1i32_indexed_OP1); 4148 4149 Found |= Match(AArch64::FMULSrr, 2, MCP::FMULADDS_OP2) || 4150 Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLAv1i32_indexed_OP2); 4151 break; 4152 case AArch64::FADDDrr: 4153 Found |= Match(AArch64::FMULDrr, 1, MCP::FMULADDD_OP1) || 4154 Match(AArch64::FMULv1i64_indexed, 1, MCP::FMLAv1i64_indexed_OP1); 4155 4156 Found |= Match(AArch64::FMULDrr, 2, MCP::FMULADDD_OP2) || 4157 Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLAv1i64_indexed_OP2); 4158 break; 4159 case AArch64::FADDv4f16: 4160 Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLAv4i16_indexed_OP1) || 4161 Match(AArch64::FMULv4f16, 1, MCP::FMLAv4f16_OP1); 4162 4163 Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLAv4i16_indexed_OP2) || 4164 Match(AArch64::FMULv4f16, 2, MCP::FMLAv4f16_OP2); 4165 break; 4166 case AArch64::FADDv8f16: 4167 Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLAv8i16_indexed_OP1) || 4168 Match(AArch64::FMULv8f16, 1, MCP::FMLAv8f16_OP1); 4169 4170 Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLAv8i16_indexed_OP2) || 4171 Match(AArch64::FMULv8f16, 2, MCP::FMLAv8f16_OP2); 4172 break; 4173 case AArch64::FADDv2f32: 4174 Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLAv2i32_indexed_OP1) || 4175 Match(AArch64::FMULv2f32, 1, MCP::FMLAv2f32_OP1); 4176 4177 Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLAv2i32_indexed_OP2) || 4178 Match(AArch64::FMULv2f32, 2, MCP::FMLAv2f32_OP2); 4179 break; 4180 case AArch64::FADDv2f64: 4181 Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLAv2i64_indexed_OP1) || 4182 Match(AArch64::FMULv2f64, 1, MCP::FMLAv2f64_OP1); 4183 4184 Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLAv2i64_indexed_OP2) || 4185 Match(AArch64::FMULv2f64, 2, MCP::FMLAv2f64_OP2); 4186 break; 4187 case AArch64::FADDv4f32: 4188 Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLAv4i32_indexed_OP1) || 4189 Match(AArch64::FMULv4f32, 1, MCP::FMLAv4f32_OP1); 4190 4191 Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLAv4i32_indexed_OP2) || 4192 Match(AArch64::FMULv4f32, 2, MCP::FMLAv4f32_OP2); 4193 break; 4194 case AArch64::FSUBHrr: 4195 Found = Match(AArch64::FMULHrr, 1, MCP::FMULSUBH_OP1); 4196 Found |= Match(AArch64::FMULHrr, 2, MCP::FMULSUBH_OP2); 4197 Found |= Match(AArch64::FNMULHrr, 1, MCP::FNMULSUBH_OP1); 4198 break; 4199 case AArch64::FSUBSrr: 4200 Found = Match(AArch64::FMULSrr, 1, MCP::FMULSUBS_OP1); 4201 4202 Found |= Match(AArch64::FMULSrr, 2, MCP::FMULSUBS_OP2) || 4203 Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLSv1i32_indexed_OP2); 4204 4205 Found |= Match(AArch64::FNMULSrr, 1, MCP::FNMULSUBS_OP1); 4206 break; 4207 case AArch64::FSUBDrr: 4208 Found = Match(AArch64::FMULDrr, 1, MCP::FMULSUBD_OP1); 4209 4210 Found |= Match(AArch64::FMULDrr, 2, MCP::FMULSUBD_OP2) || 4211 Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLSv1i64_indexed_OP2); 4212 4213 Found |= Match(AArch64::FNMULDrr, 1, MCP::FNMULSUBD_OP1); 4214 break; 4215 case AArch64::FSUBv4f16: 4216 Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLSv4i16_indexed_OP2) || 4217 Match(AArch64::FMULv4f16, 2, MCP::FMLSv4f16_OP2); 4218 4219 Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLSv4i16_indexed_OP1) || 4220 Match(AArch64::FMULv4f16, 1, MCP::FMLSv4f16_OP1); 4221 break; 4222 case AArch64::FSUBv8f16: 4223 Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLSv8i16_indexed_OP2) || 4224 Match(AArch64::FMULv8f16, 2, MCP::FMLSv8f16_OP2); 4225 4226 Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLSv8i16_indexed_OP1) || 4227 Match(AArch64::FMULv8f16, 1, MCP::FMLSv8f16_OP1); 4228 break; 4229 case AArch64::FSUBv2f32: 4230 Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLSv2i32_indexed_OP2) || 4231 Match(AArch64::FMULv2f32, 2, MCP::FMLSv2f32_OP2); 4232 4233 Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLSv2i32_indexed_OP1) || 4234 Match(AArch64::FMULv2f32, 1, MCP::FMLSv2f32_OP1); 4235 break; 4236 case AArch64::FSUBv2f64: 4237 Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLSv2i64_indexed_OP2) || 4238 Match(AArch64::FMULv2f64, 2, MCP::FMLSv2f64_OP2); 4239 4240 Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLSv2i64_indexed_OP1) || 4241 Match(AArch64::FMULv2f64, 1, MCP::FMLSv2f64_OP1); 4242 break; 4243 case AArch64::FSUBv4f32: 4244 Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLSv4i32_indexed_OP2) || 4245 Match(AArch64::FMULv4f32, 2, MCP::FMLSv4f32_OP2); 4246 4247 Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLSv4i32_indexed_OP1) || 4248 Match(AArch64::FMULv4f32, 1, MCP::FMLSv4f32_OP1); 4249 break; 4250 } 4251 return Found; 4252 } 4253 4254 /// Return true when a code sequence can improve throughput. It 4255 /// should be called only for instructions in loops. 4256 /// \param Pattern - combiner pattern 4257 bool AArch64InstrInfo::isThroughputPattern( 4258 MachineCombinerPattern Pattern) const { 4259 switch (Pattern) { 4260 default: 4261 break; 4262 case MachineCombinerPattern::FMULADDH_OP1: 4263 case MachineCombinerPattern::FMULADDH_OP2: 4264 case MachineCombinerPattern::FMULSUBH_OP1: 4265 case MachineCombinerPattern::FMULSUBH_OP2: 4266 case MachineCombinerPattern::FMULADDS_OP1: 4267 case MachineCombinerPattern::FMULADDS_OP2: 4268 case MachineCombinerPattern::FMULSUBS_OP1: 4269 case MachineCombinerPattern::FMULSUBS_OP2: 4270 case MachineCombinerPattern::FMULADDD_OP1: 4271 case MachineCombinerPattern::FMULADDD_OP2: 4272 case MachineCombinerPattern::FMULSUBD_OP1: 4273 case MachineCombinerPattern::FMULSUBD_OP2: 4274 case MachineCombinerPattern::FNMULSUBH_OP1: 4275 case MachineCombinerPattern::FNMULSUBS_OP1: 4276 case MachineCombinerPattern::FNMULSUBD_OP1: 4277 case MachineCombinerPattern::FMLAv4i16_indexed_OP1: 4278 case MachineCombinerPattern::FMLAv4i16_indexed_OP2: 4279 case MachineCombinerPattern::FMLAv8i16_indexed_OP1: 4280 case MachineCombinerPattern::FMLAv8i16_indexed_OP2: 4281 case MachineCombinerPattern::FMLAv1i32_indexed_OP1: 4282 case MachineCombinerPattern::FMLAv1i32_indexed_OP2: 4283 case MachineCombinerPattern::FMLAv1i64_indexed_OP1: 4284 case MachineCombinerPattern::FMLAv1i64_indexed_OP2: 4285 case MachineCombinerPattern::FMLAv4f16_OP2: 4286 case MachineCombinerPattern::FMLAv4f16_OP1: 4287 case MachineCombinerPattern::FMLAv8f16_OP1: 4288 case MachineCombinerPattern::FMLAv8f16_OP2: 4289 case MachineCombinerPattern::FMLAv2f32_OP2: 4290 case MachineCombinerPattern::FMLAv2f32_OP1: 4291 case MachineCombinerPattern::FMLAv2f64_OP1: 4292 case MachineCombinerPattern::FMLAv2f64_OP2: 4293 case MachineCombinerPattern::FMLAv2i32_indexed_OP1: 4294 case MachineCombinerPattern::FMLAv2i32_indexed_OP2: 4295 case MachineCombinerPattern::FMLAv2i64_indexed_OP1: 4296 case MachineCombinerPattern::FMLAv2i64_indexed_OP2: 4297 case MachineCombinerPattern::FMLAv4f32_OP1: 4298 case MachineCombinerPattern::FMLAv4f32_OP2: 4299 case MachineCombinerPattern::FMLAv4i32_indexed_OP1: 4300 case MachineCombinerPattern::FMLAv4i32_indexed_OP2: 4301 case MachineCombinerPattern::FMLSv4i16_indexed_OP1: 4302 case MachineCombinerPattern::FMLSv4i16_indexed_OP2: 4303 case MachineCombinerPattern::FMLSv8i16_indexed_OP1: 4304 case MachineCombinerPattern::FMLSv8i16_indexed_OP2: 4305 case MachineCombinerPattern::FMLSv1i32_indexed_OP2: 4306 case MachineCombinerPattern::FMLSv1i64_indexed_OP2: 4307 case MachineCombinerPattern::FMLSv2i32_indexed_OP2: 4308 case MachineCombinerPattern::FMLSv2i64_indexed_OP2: 4309 case MachineCombinerPattern::FMLSv4f16_OP1: 4310 case MachineCombinerPattern::FMLSv4f16_OP2: 4311 case MachineCombinerPattern::FMLSv8f16_OP1: 4312 case MachineCombinerPattern::FMLSv8f16_OP2: 4313 case MachineCombinerPattern::FMLSv2f32_OP2: 4314 case MachineCombinerPattern::FMLSv2f64_OP2: 4315 case MachineCombinerPattern::FMLSv4i32_indexed_OP2: 4316 case MachineCombinerPattern::FMLSv4f32_OP2: 4317 case MachineCombinerPattern::MULADDv8i8_OP1: 4318 case MachineCombinerPattern::MULADDv8i8_OP2: 4319 case MachineCombinerPattern::MULADDv16i8_OP1: 4320 case MachineCombinerPattern::MULADDv16i8_OP2: 4321 case MachineCombinerPattern::MULADDv4i16_OP1: 4322 case MachineCombinerPattern::MULADDv4i16_OP2: 4323 case MachineCombinerPattern::MULADDv8i16_OP1: 4324 case MachineCombinerPattern::MULADDv8i16_OP2: 4325 case MachineCombinerPattern::MULADDv2i32_OP1: 4326 case MachineCombinerPattern::MULADDv2i32_OP2: 4327 case MachineCombinerPattern::MULADDv4i32_OP1: 4328 case MachineCombinerPattern::MULADDv4i32_OP2: 4329 case MachineCombinerPattern::MULSUBv8i8_OP1: 4330 case MachineCombinerPattern::MULSUBv8i8_OP2: 4331 case MachineCombinerPattern::MULSUBv16i8_OP1: 4332 case MachineCombinerPattern::MULSUBv16i8_OP2: 4333 case MachineCombinerPattern::MULSUBv4i16_OP1: 4334 case MachineCombinerPattern::MULSUBv4i16_OP2: 4335 case MachineCombinerPattern::MULSUBv8i16_OP1: 4336 case MachineCombinerPattern::MULSUBv8i16_OP2: 4337 case MachineCombinerPattern::MULSUBv2i32_OP1: 4338 case MachineCombinerPattern::MULSUBv2i32_OP2: 4339 case MachineCombinerPattern::MULSUBv4i32_OP1: 4340 case MachineCombinerPattern::MULSUBv4i32_OP2: 4341 case MachineCombinerPattern::MULADDv4i16_indexed_OP1: 4342 case MachineCombinerPattern::MULADDv4i16_indexed_OP2: 4343 case MachineCombinerPattern::MULADDv8i16_indexed_OP1: 4344 case MachineCombinerPattern::MULADDv8i16_indexed_OP2: 4345 case MachineCombinerPattern::MULADDv2i32_indexed_OP1: 4346 case MachineCombinerPattern::MULADDv2i32_indexed_OP2: 4347 case MachineCombinerPattern::MULADDv4i32_indexed_OP1: 4348 case MachineCombinerPattern::MULADDv4i32_indexed_OP2: 4349 case MachineCombinerPattern::MULSUBv4i16_indexed_OP1: 4350 case MachineCombinerPattern::MULSUBv4i16_indexed_OP2: 4351 case MachineCombinerPattern::MULSUBv8i16_indexed_OP1: 4352 case MachineCombinerPattern::MULSUBv8i16_indexed_OP2: 4353 case MachineCombinerPattern::MULSUBv2i32_indexed_OP1: 4354 case MachineCombinerPattern::MULSUBv2i32_indexed_OP2: 4355 case MachineCombinerPattern::MULSUBv4i32_indexed_OP1: 4356 case MachineCombinerPattern::MULSUBv4i32_indexed_OP2: 4357 return true; 4358 } // end switch (Pattern) 4359 return false; 4360 } 4361 /// Return true when there is potentially a faster code sequence for an 4362 /// instruction chain ending in \p Root. All potential patterns are listed in 4363 /// the \p Pattern vector. Pattern should be sorted in priority order since the 4364 /// pattern evaluator stops checking as soon as it finds a faster sequence. 4365 4366 bool AArch64InstrInfo::getMachineCombinerPatterns( 4367 MachineInstr &Root, 4368 SmallVectorImpl<MachineCombinerPattern> &Patterns) const { 4369 // Integer patterns 4370 if (getMaddPatterns(Root, Patterns)) 4371 return true; 4372 // Floating point patterns 4373 if (getFMAPatterns(Root, Patterns)) 4374 return true; 4375 4376 return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns); 4377 } 4378 4379 enum class FMAInstKind { Default, Indexed, Accumulator }; 4380 /// genFusedMultiply - Generate fused multiply instructions. 4381 /// This function supports both integer and floating point instructions. 4382 /// A typical example: 4383 /// F|MUL I=A,B,0 4384 /// F|ADD R,I,C 4385 /// ==> F|MADD R,A,B,C 4386 /// \param MF Containing MachineFunction 4387 /// \param MRI Register information 4388 /// \param TII Target information 4389 /// \param Root is the F|ADD instruction 4390 /// \param [out] InsInstrs is a vector of machine instructions and will 4391 /// contain the generated madd instruction 4392 /// \param IdxMulOpd is index of operand in Root that is the result of 4393 /// the F|MUL. In the example above IdxMulOpd is 1. 4394 /// \param MaddOpc the opcode fo the f|madd instruction 4395 /// \param RC Register class of operands 4396 /// \param kind of fma instruction (addressing mode) to be generated 4397 /// \param ReplacedAddend is the result register from the instruction 4398 /// replacing the non-combined operand, if any. 4399 static MachineInstr * 4400 genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI, 4401 const TargetInstrInfo *TII, MachineInstr &Root, 4402 SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd, 4403 unsigned MaddOpc, const TargetRegisterClass *RC, 4404 FMAInstKind kind = FMAInstKind::Default, 4405 const Register *ReplacedAddend = nullptr) { 4406 assert(IdxMulOpd == 1 || IdxMulOpd == 2); 4407 4408 unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1; 4409 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg()); 4410 Register ResultReg = Root.getOperand(0).getReg(); 4411 Register SrcReg0 = MUL->getOperand(1).getReg(); 4412 bool Src0IsKill = MUL->getOperand(1).isKill(); 4413 Register SrcReg1 = MUL->getOperand(2).getReg(); 4414 bool Src1IsKill = MUL->getOperand(2).isKill(); 4415 4416 unsigned SrcReg2; 4417 bool Src2IsKill; 4418 if (ReplacedAddend) { 4419 // If we just generated a new addend, we must be it's only use. 4420 SrcReg2 = *ReplacedAddend; 4421 Src2IsKill = true; 4422 } else { 4423 SrcReg2 = Root.getOperand(IdxOtherOpd).getReg(); 4424 Src2IsKill = Root.getOperand(IdxOtherOpd).isKill(); 4425 } 4426 4427 if (Register::isVirtualRegister(ResultReg)) 4428 MRI.constrainRegClass(ResultReg, RC); 4429 if (Register::isVirtualRegister(SrcReg0)) 4430 MRI.constrainRegClass(SrcReg0, RC); 4431 if (Register::isVirtualRegister(SrcReg1)) 4432 MRI.constrainRegClass(SrcReg1, RC); 4433 if (Register::isVirtualRegister(SrcReg2)) 4434 MRI.constrainRegClass(SrcReg2, RC); 4435 4436 MachineInstrBuilder MIB; 4437 if (kind == FMAInstKind::Default) 4438 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg) 4439 .addReg(SrcReg0, getKillRegState(Src0IsKill)) 4440 .addReg(SrcReg1, getKillRegState(Src1IsKill)) 4441 .addReg(SrcReg2, getKillRegState(Src2IsKill)); 4442 else if (kind == FMAInstKind::Indexed) 4443 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg) 4444 .addReg(SrcReg2, getKillRegState(Src2IsKill)) 4445 .addReg(SrcReg0, getKillRegState(Src0IsKill)) 4446 .addReg(SrcReg1, getKillRegState(Src1IsKill)) 4447 .addImm(MUL->getOperand(3).getImm()); 4448 else if (kind == FMAInstKind::Accumulator) 4449 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg) 4450 .addReg(SrcReg2, getKillRegState(Src2IsKill)) 4451 .addReg(SrcReg0, getKillRegState(Src0IsKill)) 4452 .addReg(SrcReg1, getKillRegState(Src1IsKill)); 4453 else 4454 assert(false && "Invalid FMA instruction kind \n"); 4455 // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL) 4456 InsInstrs.push_back(MIB); 4457 return MUL; 4458 } 4459 4460 /// genFusedMultiplyAcc - Helper to generate fused multiply accumulate 4461 /// instructions. 4462 /// 4463 /// \see genFusedMultiply 4464 static MachineInstr *genFusedMultiplyAcc( 4465 MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, 4466 MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs, 4467 unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) { 4468 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC, 4469 FMAInstKind::Accumulator); 4470 } 4471 4472 /// genNeg - Helper to generate an intermediate negation of the second operand 4473 /// of Root 4474 static Register genNeg(MachineFunction &MF, MachineRegisterInfo &MRI, 4475 const TargetInstrInfo *TII, MachineInstr &Root, 4476 SmallVectorImpl<MachineInstr *> &InsInstrs, 4477 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, 4478 unsigned MnegOpc, const TargetRegisterClass *RC) { 4479 Register NewVR = MRI.createVirtualRegister(RC); 4480 MachineInstrBuilder MIB = 4481 BuildMI(MF, Root.getDebugLoc(), TII->get(MnegOpc), NewVR) 4482 .add(Root.getOperand(2)); 4483 InsInstrs.push_back(MIB); 4484 4485 assert(InstrIdxForVirtReg.empty()); 4486 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 4487 4488 return NewVR; 4489 } 4490 4491 /// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate 4492 /// instructions with an additional negation of the accumulator 4493 static MachineInstr *genFusedMultiplyAccNeg( 4494 MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, 4495 MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs, 4496 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd, 4497 unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) { 4498 assert(IdxMulOpd == 1); 4499 4500 Register NewVR = 4501 genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC); 4502 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC, 4503 FMAInstKind::Accumulator, &NewVR); 4504 } 4505 4506 /// genFusedMultiplyIdx - Helper to generate fused multiply accumulate 4507 /// instructions. 4508 /// 4509 /// \see genFusedMultiply 4510 static MachineInstr *genFusedMultiplyIdx( 4511 MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, 4512 MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs, 4513 unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) { 4514 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC, 4515 FMAInstKind::Indexed); 4516 } 4517 4518 /// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate 4519 /// instructions with an additional negation of the accumulator 4520 static MachineInstr *genFusedMultiplyIdxNeg( 4521 MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, 4522 MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs, 4523 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd, 4524 unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) { 4525 assert(IdxMulOpd == 1); 4526 4527 Register NewVR = 4528 genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC); 4529 4530 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC, 4531 FMAInstKind::Indexed, &NewVR); 4532 } 4533 4534 /// genMaddR - Generate madd instruction and combine mul and add using 4535 /// an extra virtual register 4536 /// Example - an ADD intermediate needs to be stored in a register: 4537 /// MUL I=A,B,0 4538 /// ADD R,I,Imm 4539 /// ==> ORR V, ZR, Imm 4540 /// ==> MADD R,A,B,V 4541 /// \param MF Containing MachineFunction 4542 /// \param MRI Register information 4543 /// \param TII Target information 4544 /// \param Root is the ADD instruction 4545 /// \param [out] InsInstrs is a vector of machine instructions and will 4546 /// contain the generated madd instruction 4547 /// \param IdxMulOpd is index of operand in Root that is the result of 4548 /// the MUL. In the example above IdxMulOpd is 1. 4549 /// \param MaddOpc the opcode fo the madd instruction 4550 /// \param VR is a virtual register that holds the value of an ADD operand 4551 /// (V in the example above). 4552 /// \param RC Register class of operands 4553 static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI, 4554 const TargetInstrInfo *TII, MachineInstr &Root, 4555 SmallVectorImpl<MachineInstr *> &InsInstrs, 4556 unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR, 4557 const TargetRegisterClass *RC) { 4558 assert(IdxMulOpd == 1 || IdxMulOpd == 2); 4559 4560 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg()); 4561 Register ResultReg = Root.getOperand(0).getReg(); 4562 Register SrcReg0 = MUL->getOperand(1).getReg(); 4563 bool Src0IsKill = MUL->getOperand(1).isKill(); 4564 Register SrcReg1 = MUL->getOperand(2).getReg(); 4565 bool Src1IsKill = MUL->getOperand(2).isKill(); 4566 4567 if (Register::isVirtualRegister(ResultReg)) 4568 MRI.constrainRegClass(ResultReg, RC); 4569 if (Register::isVirtualRegister(SrcReg0)) 4570 MRI.constrainRegClass(SrcReg0, RC); 4571 if (Register::isVirtualRegister(SrcReg1)) 4572 MRI.constrainRegClass(SrcReg1, RC); 4573 if (Register::isVirtualRegister(VR)) 4574 MRI.constrainRegClass(VR, RC); 4575 4576 MachineInstrBuilder MIB = 4577 BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg) 4578 .addReg(SrcReg0, getKillRegState(Src0IsKill)) 4579 .addReg(SrcReg1, getKillRegState(Src1IsKill)) 4580 .addReg(VR); 4581 // Insert the MADD 4582 InsInstrs.push_back(MIB); 4583 return MUL; 4584 } 4585 4586 /// When getMachineCombinerPatterns() finds potential patterns, 4587 /// this function generates the instructions that could replace the 4588 /// original code sequence 4589 void AArch64InstrInfo::genAlternativeCodeSequence( 4590 MachineInstr &Root, MachineCombinerPattern Pattern, 4591 SmallVectorImpl<MachineInstr *> &InsInstrs, 4592 SmallVectorImpl<MachineInstr *> &DelInstrs, 4593 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const { 4594 MachineBasicBlock &MBB = *Root.getParent(); 4595 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 4596 MachineFunction &MF = *MBB.getParent(); 4597 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); 4598 4599 MachineInstr *MUL; 4600 const TargetRegisterClass *RC; 4601 unsigned Opc; 4602 switch (Pattern) { 4603 default: 4604 // Reassociate instructions. 4605 TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs, 4606 DelInstrs, InstrIdxForVirtReg); 4607 return; 4608 case MachineCombinerPattern::MULADDW_OP1: 4609 case MachineCombinerPattern::MULADDX_OP1: 4610 // MUL I=A,B,0 4611 // ADD R,I,C 4612 // ==> MADD R,A,B,C 4613 // --- Create(MADD); 4614 if (Pattern == MachineCombinerPattern::MULADDW_OP1) { 4615 Opc = AArch64::MADDWrrr; 4616 RC = &AArch64::GPR32RegClass; 4617 } else { 4618 Opc = AArch64::MADDXrrr; 4619 RC = &AArch64::GPR64RegClass; 4620 } 4621 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 4622 break; 4623 case MachineCombinerPattern::MULADDW_OP2: 4624 case MachineCombinerPattern::MULADDX_OP2: 4625 // MUL I=A,B,0 4626 // ADD R,C,I 4627 // ==> MADD R,A,B,C 4628 // --- Create(MADD); 4629 if (Pattern == MachineCombinerPattern::MULADDW_OP2) { 4630 Opc = AArch64::MADDWrrr; 4631 RC = &AArch64::GPR32RegClass; 4632 } else { 4633 Opc = AArch64::MADDXrrr; 4634 RC = &AArch64::GPR64RegClass; 4635 } 4636 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 4637 break; 4638 case MachineCombinerPattern::MULADDWI_OP1: 4639 case MachineCombinerPattern::MULADDXI_OP1: { 4640 // MUL I=A,B,0 4641 // ADD R,I,Imm 4642 // ==> ORR V, ZR, Imm 4643 // ==> MADD R,A,B,V 4644 // --- Create(MADD); 4645 const TargetRegisterClass *OrrRC; 4646 unsigned BitSize, OrrOpc, ZeroReg; 4647 if (Pattern == MachineCombinerPattern::MULADDWI_OP1) { 4648 OrrOpc = AArch64::ORRWri; 4649 OrrRC = &AArch64::GPR32spRegClass; 4650 BitSize = 32; 4651 ZeroReg = AArch64::WZR; 4652 Opc = AArch64::MADDWrrr; 4653 RC = &AArch64::GPR32RegClass; 4654 } else { 4655 OrrOpc = AArch64::ORRXri; 4656 OrrRC = &AArch64::GPR64spRegClass; 4657 BitSize = 64; 4658 ZeroReg = AArch64::XZR; 4659 Opc = AArch64::MADDXrrr; 4660 RC = &AArch64::GPR64RegClass; 4661 } 4662 Register NewVR = MRI.createVirtualRegister(OrrRC); 4663 uint64_t Imm = Root.getOperand(2).getImm(); 4664 4665 if (Root.getOperand(3).isImm()) { 4666 unsigned Val = Root.getOperand(3).getImm(); 4667 Imm = Imm << Val; 4668 } 4669 uint64_t UImm = SignExtend64(Imm, BitSize); 4670 uint64_t Encoding; 4671 if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) { 4672 MachineInstrBuilder MIB1 = 4673 BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR) 4674 .addReg(ZeroReg) 4675 .addImm(Encoding); 4676 InsInstrs.push_back(MIB1); 4677 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 4678 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC); 4679 } 4680 break; 4681 } 4682 case MachineCombinerPattern::MULSUBW_OP1: 4683 case MachineCombinerPattern::MULSUBX_OP1: { 4684 // MUL I=A,B,0 4685 // SUB R,I, C 4686 // ==> SUB V, 0, C 4687 // ==> MADD R,A,B,V // = -C + A*B 4688 // --- Create(MADD); 4689 const TargetRegisterClass *SubRC; 4690 unsigned SubOpc, ZeroReg; 4691 if (Pattern == MachineCombinerPattern::MULSUBW_OP1) { 4692 SubOpc = AArch64::SUBWrr; 4693 SubRC = &AArch64::GPR32spRegClass; 4694 ZeroReg = AArch64::WZR; 4695 Opc = AArch64::MADDWrrr; 4696 RC = &AArch64::GPR32RegClass; 4697 } else { 4698 SubOpc = AArch64::SUBXrr; 4699 SubRC = &AArch64::GPR64spRegClass; 4700 ZeroReg = AArch64::XZR; 4701 Opc = AArch64::MADDXrrr; 4702 RC = &AArch64::GPR64RegClass; 4703 } 4704 Register NewVR = MRI.createVirtualRegister(SubRC); 4705 // SUB NewVR, 0, C 4706 MachineInstrBuilder MIB1 = 4707 BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc), NewVR) 4708 .addReg(ZeroReg) 4709 .add(Root.getOperand(2)); 4710 InsInstrs.push_back(MIB1); 4711 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 4712 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC); 4713 break; 4714 } 4715 case MachineCombinerPattern::MULSUBW_OP2: 4716 case MachineCombinerPattern::MULSUBX_OP2: 4717 // MUL I=A,B,0 4718 // SUB R,C,I 4719 // ==> MSUB R,A,B,C (computes C - A*B) 4720 // --- Create(MSUB); 4721 if (Pattern == MachineCombinerPattern::MULSUBW_OP2) { 4722 Opc = AArch64::MSUBWrrr; 4723 RC = &AArch64::GPR32RegClass; 4724 } else { 4725 Opc = AArch64::MSUBXrrr; 4726 RC = &AArch64::GPR64RegClass; 4727 } 4728 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 4729 break; 4730 case MachineCombinerPattern::MULSUBWI_OP1: 4731 case MachineCombinerPattern::MULSUBXI_OP1: { 4732 // MUL I=A,B,0 4733 // SUB R,I, Imm 4734 // ==> ORR V, ZR, -Imm 4735 // ==> MADD R,A,B,V // = -Imm + A*B 4736 // --- Create(MADD); 4737 const TargetRegisterClass *OrrRC; 4738 unsigned BitSize, OrrOpc, ZeroReg; 4739 if (Pattern == MachineCombinerPattern::MULSUBWI_OP1) { 4740 OrrOpc = AArch64::ORRWri; 4741 OrrRC = &AArch64::GPR32spRegClass; 4742 BitSize = 32; 4743 ZeroReg = AArch64::WZR; 4744 Opc = AArch64::MADDWrrr; 4745 RC = &AArch64::GPR32RegClass; 4746 } else { 4747 OrrOpc = AArch64::ORRXri; 4748 OrrRC = &AArch64::GPR64spRegClass; 4749 BitSize = 64; 4750 ZeroReg = AArch64::XZR; 4751 Opc = AArch64::MADDXrrr; 4752 RC = &AArch64::GPR64RegClass; 4753 } 4754 Register NewVR = MRI.createVirtualRegister(OrrRC); 4755 uint64_t Imm = Root.getOperand(2).getImm(); 4756 if (Root.getOperand(3).isImm()) { 4757 unsigned Val = Root.getOperand(3).getImm(); 4758 Imm = Imm << Val; 4759 } 4760 uint64_t UImm = SignExtend64(-Imm, BitSize); 4761 uint64_t Encoding; 4762 if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) { 4763 MachineInstrBuilder MIB1 = 4764 BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR) 4765 .addReg(ZeroReg) 4766 .addImm(Encoding); 4767 InsInstrs.push_back(MIB1); 4768 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 4769 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC); 4770 } 4771 break; 4772 } 4773 4774 case MachineCombinerPattern::MULADDv8i8_OP1: 4775 Opc = AArch64::MLAv8i8; 4776 RC = &AArch64::FPR64RegClass; 4777 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 4778 break; 4779 case MachineCombinerPattern::MULADDv8i8_OP2: 4780 Opc = AArch64::MLAv8i8; 4781 RC = &AArch64::FPR64RegClass; 4782 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 4783 break; 4784 case MachineCombinerPattern::MULADDv16i8_OP1: 4785 Opc = AArch64::MLAv16i8; 4786 RC = &AArch64::FPR128RegClass; 4787 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 4788 break; 4789 case MachineCombinerPattern::MULADDv16i8_OP2: 4790 Opc = AArch64::MLAv16i8; 4791 RC = &AArch64::FPR128RegClass; 4792 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 4793 break; 4794 case MachineCombinerPattern::MULADDv4i16_OP1: 4795 Opc = AArch64::MLAv4i16; 4796 RC = &AArch64::FPR64RegClass; 4797 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 4798 break; 4799 case MachineCombinerPattern::MULADDv4i16_OP2: 4800 Opc = AArch64::MLAv4i16; 4801 RC = &AArch64::FPR64RegClass; 4802 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 4803 break; 4804 case MachineCombinerPattern::MULADDv8i16_OP1: 4805 Opc = AArch64::MLAv8i16; 4806 RC = &AArch64::FPR128RegClass; 4807 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 4808 break; 4809 case MachineCombinerPattern::MULADDv8i16_OP2: 4810 Opc = AArch64::MLAv8i16; 4811 RC = &AArch64::FPR128RegClass; 4812 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 4813 break; 4814 case MachineCombinerPattern::MULADDv2i32_OP1: 4815 Opc = AArch64::MLAv2i32; 4816 RC = &AArch64::FPR64RegClass; 4817 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 4818 break; 4819 case MachineCombinerPattern::MULADDv2i32_OP2: 4820 Opc = AArch64::MLAv2i32; 4821 RC = &AArch64::FPR64RegClass; 4822 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 4823 break; 4824 case MachineCombinerPattern::MULADDv4i32_OP1: 4825 Opc = AArch64::MLAv4i32; 4826 RC = &AArch64::FPR128RegClass; 4827 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 4828 break; 4829 case MachineCombinerPattern::MULADDv4i32_OP2: 4830 Opc = AArch64::MLAv4i32; 4831 RC = &AArch64::FPR128RegClass; 4832 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 4833 break; 4834 4835 case MachineCombinerPattern::MULSUBv8i8_OP1: 4836 Opc = AArch64::MLAv8i8; 4837 RC = &AArch64::FPR64RegClass; 4838 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs, 4839 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i8, 4840 RC); 4841 break; 4842 case MachineCombinerPattern::MULSUBv8i8_OP2: 4843 Opc = AArch64::MLSv8i8; 4844 RC = &AArch64::FPR64RegClass; 4845 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 4846 break; 4847 case MachineCombinerPattern::MULSUBv16i8_OP1: 4848 Opc = AArch64::MLAv16i8; 4849 RC = &AArch64::FPR128RegClass; 4850 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs, 4851 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv16i8, 4852 RC); 4853 break; 4854 case MachineCombinerPattern::MULSUBv16i8_OP2: 4855 Opc = AArch64::MLSv16i8; 4856 RC = &AArch64::FPR128RegClass; 4857 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 4858 break; 4859 case MachineCombinerPattern::MULSUBv4i16_OP1: 4860 Opc = AArch64::MLAv4i16; 4861 RC = &AArch64::FPR64RegClass; 4862 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs, 4863 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16, 4864 RC); 4865 break; 4866 case MachineCombinerPattern::MULSUBv4i16_OP2: 4867 Opc = AArch64::MLSv4i16; 4868 RC = &AArch64::FPR64RegClass; 4869 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 4870 break; 4871 case MachineCombinerPattern::MULSUBv8i16_OP1: 4872 Opc = AArch64::MLAv8i16; 4873 RC = &AArch64::FPR128RegClass; 4874 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs, 4875 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16, 4876 RC); 4877 break; 4878 case MachineCombinerPattern::MULSUBv8i16_OP2: 4879 Opc = AArch64::MLSv8i16; 4880 RC = &AArch64::FPR128RegClass; 4881 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 4882 break; 4883 case MachineCombinerPattern::MULSUBv2i32_OP1: 4884 Opc = AArch64::MLAv2i32; 4885 RC = &AArch64::FPR64RegClass; 4886 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs, 4887 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32, 4888 RC); 4889 break; 4890 case MachineCombinerPattern::MULSUBv2i32_OP2: 4891 Opc = AArch64::MLSv2i32; 4892 RC = &AArch64::FPR64RegClass; 4893 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 4894 break; 4895 case MachineCombinerPattern::MULSUBv4i32_OP1: 4896 Opc = AArch64::MLAv4i32; 4897 RC = &AArch64::FPR128RegClass; 4898 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs, 4899 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32, 4900 RC); 4901 break; 4902 case MachineCombinerPattern::MULSUBv4i32_OP2: 4903 Opc = AArch64::MLSv4i32; 4904 RC = &AArch64::FPR128RegClass; 4905 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 4906 break; 4907 4908 case MachineCombinerPattern::MULADDv4i16_indexed_OP1: 4909 Opc = AArch64::MLAv4i16_indexed; 4910 RC = &AArch64::FPR64RegClass; 4911 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 4912 break; 4913 case MachineCombinerPattern::MULADDv4i16_indexed_OP2: 4914 Opc = AArch64::MLAv4i16_indexed; 4915 RC = &AArch64::FPR64RegClass; 4916 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 4917 break; 4918 case MachineCombinerPattern::MULADDv8i16_indexed_OP1: 4919 Opc = AArch64::MLAv8i16_indexed; 4920 RC = &AArch64::FPR128RegClass; 4921 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 4922 break; 4923 case MachineCombinerPattern::MULADDv8i16_indexed_OP2: 4924 Opc = AArch64::MLAv8i16_indexed; 4925 RC = &AArch64::FPR128RegClass; 4926 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 4927 break; 4928 case MachineCombinerPattern::MULADDv2i32_indexed_OP1: 4929 Opc = AArch64::MLAv2i32_indexed; 4930 RC = &AArch64::FPR64RegClass; 4931 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 4932 break; 4933 case MachineCombinerPattern::MULADDv2i32_indexed_OP2: 4934 Opc = AArch64::MLAv2i32_indexed; 4935 RC = &AArch64::FPR64RegClass; 4936 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 4937 break; 4938 case MachineCombinerPattern::MULADDv4i32_indexed_OP1: 4939 Opc = AArch64::MLAv4i32_indexed; 4940 RC = &AArch64::FPR128RegClass; 4941 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 4942 break; 4943 case MachineCombinerPattern::MULADDv4i32_indexed_OP2: 4944 Opc = AArch64::MLAv4i32_indexed; 4945 RC = &AArch64::FPR128RegClass; 4946 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 4947 break; 4948 4949 case MachineCombinerPattern::MULSUBv4i16_indexed_OP1: 4950 Opc = AArch64::MLAv4i16_indexed; 4951 RC = &AArch64::FPR64RegClass; 4952 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs, 4953 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16, 4954 RC); 4955 break; 4956 case MachineCombinerPattern::MULSUBv4i16_indexed_OP2: 4957 Opc = AArch64::MLSv4i16_indexed; 4958 RC = &AArch64::FPR64RegClass; 4959 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 4960 break; 4961 case MachineCombinerPattern::MULSUBv8i16_indexed_OP1: 4962 Opc = AArch64::MLAv8i16_indexed; 4963 RC = &AArch64::FPR128RegClass; 4964 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs, 4965 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16, 4966 RC); 4967 break; 4968 case MachineCombinerPattern::MULSUBv8i16_indexed_OP2: 4969 Opc = AArch64::MLSv8i16_indexed; 4970 RC = &AArch64::FPR128RegClass; 4971 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 4972 break; 4973 case MachineCombinerPattern::MULSUBv2i32_indexed_OP1: 4974 Opc = AArch64::MLAv2i32_indexed; 4975 RC = &AArch64::FPR64RegClass; 4976 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs, 4977 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32, 4978 RC); 4979 break; 4980 case MachineCombinerPattern::MULSUBv2i32_indexed_OP2: 4981 Opc = AArch64::MLSv2i32_indexed; 4982 RC = &AArch64::FPR64RegClass; 4983 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 4984 break; 4985 case MachineCombinerPattern::MULSUBv4i32_indexed_OP1: 4986 Opc = AArch64::MLAv4i32_indexed; 4987 RC = &AArch64::FPR128RegClass; 4988 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs, 4989 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32, 4990 RC); 4991 break; 4992 case MachineCombinerPattern::MULSUBv4i32_indexed_OP2: 4993 Opc = AArch64::MLSv4i32_indexed; 4994 RC = &AArch64::FPR128RegClass; 4995 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 4996 break; 4997 4998 // Floating Point Support 4999 case MachineCombinerPattern::FMULADDH_OP1: 5000 Opc = AArch64::FMADDHrrr; 5001 RC = &AArch64::FPR16RegClass; 5002 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 5003 break; 5004 case MachineCombinerPattern::FMULADDS_OP1: 5005 Opc = AArch64::FMADDSrrr; 5006 RC = &AArch64::FPR32RegClass; 5007 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 5008 break; 5009 case MachineCombinerPattern::FMULADDD_OP1: 5010 Opc = AArch64::FMADDDrrr; 5011 RC = &AArch64::FPR64RegClass; 5012 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 5013 break; 5014 5015 case MachineCombinerPattern::FMULADDH_OP2: 5016 Opc = AArch64::FMADDHrrr; 5017 RC = &AArch64::FPR16RegClass; 5018 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 5019 break; 5020 case MachineCombinerPattern::FMULADDS_OP2: 5021 Opc = AArch64::FMADDSrrr; 5022 RC = &AArch64::FPR32RegClass; 5023 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 5024 break; 5025 case MachineCombinerPattern::FMULADDD_OP2: 5026 Opc = AArch64::FMADDDrrr; 5027 RC = &AArch64::FPR64RegClass; 5028 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 5029 break; 5030 5031 case MachineCombinerPattern::FMLAv1i32_indexed_OP1: 5032 Opc = AArch64::FMLAv1i32_indexed; 5033 RC = &AArch64::FPR32RegClass; 5034 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 5035 FMAInstKind::Indexed); 5036 break; 5037 case MachineCombinerPattern::FMLAv1i32_indexed_OP2: 5038 Opc = AArch64::FMLAv1i32_indexed; 5039 RC = &AArch64::FPR32RegClass; 5040 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5041 FMAInstKind::Indexed); 5042 break; 5043 5044 case MachineCombinerPattern::FMLAv1i64_indexed_OP1: 5045 Opc = AArch64::FMLAv1i64_indexed; 5046 RC = &AArch64::FPR64RegClass; 5047 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 5048 FMAInstKind::Indexed); 5049 break; 5050 case MachineCombinerPattern::FMLAv1i64_indexed_OP2: 5051 Opc = AArch64::FMLAv1i64_indexed; 5052 RC = &AArch64::FPR64RegClass; 5053 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5054 FMAInstKind::Indexed); 5055 break; 5056 5057 case MachineCombinerPattern::FMLAv4i16_indexed_OP1: 5058 RC = &AArch64::FPR64RegClass; 5059 Opc = AArch64::FMLAv4i16_indexed; 5060 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 5061 FMAInstKind::Indexed); 5062 break; 5063 case MachineCombinerPattern::FMLAv4f16_OP1: 5064 RC = &AArch64::FPR64RegClass; 5065 Opc = AArch64::FMLAv4f16; 5066 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 5067 FMAInstKind::Accumulator); 5068 break; 5069 case MachineCombinerPattern::FMLAv4i16_indexed_OP2: 5070 RC = &AArch64::FPR64RegClass; 5071 Opc = AArch64::FMLAv4i16_indexed; 5072 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5073 FMAInstKind::Indexed); 5074 break; 5075 case MachineCombinerPattern::FMLAv4f16_OP2: 5076 RC = &AArch64::FPR64RegClass; 5077 Opc = AArch64::FMLAv4f16; 5078 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5079 FMAInstKind::Accumulator); 5080 break; 5081 5082 case MachineCombinerPattern::FMLAv2i32_indexed_OP1: 5083 case MachineCombinerPattern::FMLAv2f32_OP1: 5084 RC = &AArch64::FPR64RegClass; 5085 if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP1) { 5086 Opc = AArch64::FMLAv2i32_indexed; 5087 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 5088 FMAInstKind::Indexed); 5089 } else { 5090 Opc = AArch64::FMLAv2f32; 5091 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 5092 FMAInstKind::Accumulator); 5093 } 5094 break; 5095 case MachineCombinerPattern::FMLAv2i32_indexed_OP2: 5096 case MachineCombinerPattern::FMLAv2f32_OP2: 5097 RC = &AArch64::FPR64RegClass; 5098 if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP2) { 5099 Opc = AArch64::FMLAv2i32_indexed; 5100 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5101 FMAInstKind::Indexed); 5102 } else { 5103 Opc = AArch64::FMLAv2f32; 5104 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5105 FMAInstKind::Accumulator); 5106 } 5107 break; 5108 5109 case MachineCombinerPattern::FMLAv8i16_indexed_OP1: 5110 RC = &AArch64::FPR128RegClass; 5111 Opc = AArch64::FMLAv8i16_indexed; 5112 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 5113 FMAInstKind::Indexed); 5114 break; 5115 case MachineCombinerPattern::FMLAv8f16_OP1: 5116 RC = &AArch64::FPR128RegClass; 5117 Opc = AArch64::FMLAv8f16; 5118 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 5119 FMAInstKind::Accumulator); 5120 break; 5121 case MachineCombinerPattern::FMLAv8i16_indexed_OP2: 5122 RC = &AArch64::FPR128RegClass; 5123 Opc = AArch64::FMLAv8i16_indexed; 5124 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5125 FMAInstKind::Indexed); 5126 break; 5127 case MachineCombinerPattern::FMLAv8f16_OP2: 5128 RC = &AArch64::FPR128RegClass; 5129 Opc = AArch64::FMLAv8f16; 5130 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5131 FMAInstKind::Accumulator); 5132 break; 5133 5134 case MachineCombinerPattern::FMLAv2i64_indexed_OP1: 5135 case MachineCombinerPattern::FMLAv2f64_OP1: 5136 RC = &AArch64::FPR128RegClass; 5137 if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP1) { 5138 Opc = AArch64::FMLAv2i64_indexed; 5139 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 5140 FMAInstKind::Indexed); 5141 } else { 5142 Opc = AArch64::FMLAv2f64; 5143 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 5144 FMAInstKind::Accumulator); 5145 } 5146 break; 5147 case MachineCombinerPattern::FMLAv2i64_indexed_OP2: 5148 case MachineCombinerPattern::FMLAv2f64_OP2: 5149 RC = &AArch64::FPR128RegClass; 5150 if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP2) { 5151 Opc = AArch64::FMLAv2i64_indexed; 5152 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5153 FMAInstKind::Indexed); 5154 } else { 5155 Opc = AArch64::FMLAv2f64; 5156 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5157 FMAInstKind::Accumulator); 5158 } 5159 break; 5160 5161 case MachineCombinerPattern::FMLAv4i32_indexed_OP1: 5162 case MachineCombinerPattern::FMLAv4f32_OP1: 5163 RC = &AArch64::FPR128RegClass; 5164 if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP1) { 5165 Opc = AArch64::FMLAv4i32_indexed; 5166 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 5167 FMAInstKind::Indexed); 5168 } else { 5169 Opc = AArch64::FMLAv4f32; 5170 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 5171 FMAInstKind::Accumulator); 5172 } 5173 break; 5174 5175 case MachineCombinerPattern::FMLAv4i32_indexed_OP2: 5176 case MachineCombinerPattern::FMLAv4f32_OP2: 5177 RC = &AArch64::FPR128RegClass; 5178 if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP2) { 5179 Opc = AArch64::FMLAv4i32_indexed; 5180 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5181 FMAInstKind::Indexed); 5182 } else { 5183 Opc = AArch64::FMLAv4f32; 5184 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5185 FMAInstKind::Accumulator); 5186 } 5187 break; 5188 5189 case MachineCombinerPattern::FMULSUBH_OP1: 5190 Opc = AArch64::FNMSUBHrrr; 5191 RC = &AArch64::FPR16RegClass; 5192 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 5193 break; 5194 case MachineCombinerPattern::FMULSUBS_OP1: 5195 Opc = AArch64::FNMSUBSrrr; 5196 RC = &AArch64::FPR32RegClass; 5197 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 5198 break; 5199 case MachineCombinerPattern::FMULSUBD_OP1: 5200 Opc = AArch64::FNMSUBDrrr; 5201 RC = &AArch64::FPR64RegClass; 5202 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 5203 break; 5204 5205 case MachineCombinerPattern::FNMULSUBH_OP1: 5206 Opc = AArch64::FNMADDHrrr; 5207 RC = &AArch64::FPR16RegClass; 5208 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 5209 break; 5210 case MachineCombinerPattern::FNMULSUBS_OP1: 5211 Opc = AArch64::FNMADDSrrr; 5212 RC = &AArch64::FPR32RegClass; 5213 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 5214 break; 5215 case MachineCombinerPattern::FNMULSUBD_OP1: 5216 Opc = AArch64::FNMADDDrrr; 5217 RC = &AArch64::FPR64RegClass; 5218 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 5219 break; 5220 5221 case MachineCombinerPattern::FMULSUBH_OP2: 5222 Opc = AArch64::FMSUBHrrr; 5223 RC = &AArch64::FPR16RegClass; 5224 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 5225 break; 5226 case MachineCombinerPattern::FMULSUBS_OP2: 5227 Opc = AArch64::FMSUBSrrr; 5228 RC = &AArch64::FPR32RegClass; 5229 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 5230 break; 5231 case MachineCombinerPattern::FMULSUBD_OP2: 5232 Opc = AArch64::FMSUBDrrr; 5233 RC = &AArch64::FPR64RegClass; 5234 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 5235 break; 5236 5237 case MachineCombinerPattern::FMLSv1i32_indexed_OP2: 5238 Opc = AArch64::FMLSv1i32_indexed; 5239 RC = &AArch64::FPR32RegClass; 5240 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5241 FMAInstKind::Indexed); 5242 break; 5243 5244 case MachineCombinerPattern::FMLSv1i64_indexed_OP2: 5245 Opc = AArch64::FMLSv1i64_indexed; 5246 RC = &AArch64::FPR64RegClass; 5247 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5248 FMAInstKind::Indexed); 5249 break; 5250 5251 case MachineCombinerPattern::FMLSv4f16_OP1: 5252 case MachineCombinerPattern::FMLSv4i16_indexed_OP1: { 5253 RC = &AArch64::FPR64RegClass; 5254 Register NewVR = MRI.createVirtualRegister(RC); 5255 MachineInstrBuilder MIB1 = 5256 BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv4f16), NewVR) 5257 .add(Root.getOperand(2)); 5258 InsInstrs.push_back(MIB1); 5259 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 5260 if (Pattern == MachineCombinerPattern::FMLSv4f16_OP1) { 5261 Opc = AArch64::FMLAv4f16; 5262 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 5263 FMAInstKind::Accumulator, &NewVR); 5264 } else { 5265 Opc = AArch64::FMLAv4i16_indexed; 5266 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 5267 FMAInstKind::Indexed, &NewVR); 5268 } 5269 break; 5270 } 5271 case MachineCombinerPattern::FMLSv4f16_OP2: 5272 RC = &AArch64::FPR64RegClass; 5273 Opc = AArch64::FMLSv4f16; 5274 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5275 FMAInstKind::Accumulator); 5276 break; 5277 case MachineCombinerPattern::FMLSv4i16_indexed_OP2: 5278 RC = &AArch64::FPR64RegClass; 5279 Opc = AArch64::FMLSv4i16_indexed; 5280 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5281 FMAInstKind::Indexed); 5282 break; 5283 5284 case MachineCombinerPattern::FMLSv2f32_OP2: 5285 case MachineCombinerPattern::FMLSv2i32_indexed_OP2: 5286 RC = &AArch64::FPR64RegClass; 5287 if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP2) { 5288 Opc = AArch64::FMLSv2i32_indexed; 5289 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5290 FMAInstKind::Indexed); 5291 } else { 5292 Opc = AArch64::FMLSv2f32; 5293 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5294 FMAInstKind::Accumulator); 5295 } 5296 break; 5297 5298 case MachineCombinerPattern::FMLSv8f16_OP1: 5299 case MachineCombinerPattern::FMLSv8i16_indexed_OP1: { 5300 RC = &AArch64::FPR128RegClass; 5301 Register NewVR = MRI.createVirtualRegister(RC); 5302 MachineInstrBuilder MIB1 = 5303 BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv8f16), NewVR) 5304 .add(Root.getOperand(2)); 5305 InsInstrs.push_back(MIB1); 5306 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 5307 if (Pattern == MachineCombinerPattern::FMLSv8f16_OP1) { 5308 Opc = AArch64::FMLAv8f16; 5309 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 5310 FMAInstKind::Accumulator, &NewVR); 5311 } else { 5312 Opc = AArch64::FMLAv8i16_indexed; 5313 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 5314 FMAInstKind::Indexed, &NewVR); 5315 } 5316 break; 5317 } 5318 case MachineCombinerPattern::FMLSv8f16_OP2: 5319 RC = &AArch64::FPR128RegClass; 5320 Opc = AArch64::FMLSv8f16; 5321 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5322 FMAInstKind::Accumulator); 5323 break; 5324 case MachineCombinerPattern::FMLSv8i16_indexed_OP2: 5325 RC = &AArch64::FPR128RegClass; 5326 Opc = AArch64::FMLSv8i16_indexed; 5327 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5328 FMAInstKind::Indexed); 5329 break; 5330 5331 case MachineCombinerPattern::FMLSv2f64_OP2: 5332 case MachineCombinerPattern::FMLSv2i64_indexed_OP2: 5333 RC = &AArch64::FPR128RegClass; 5334 if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP2) { 5335 Opc = AArch64::FMLSv2i64_indexed; 5336 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5337 FMAInstKind::Indexed); 5338 } else { 5339 Opc = AArch64::FMLSv2f64; 5340 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5341 FMAInstKind::Accumulator); 5342 } 5343 break; 5344 5345 case MachineCombinerPattern::FMLSv4f32_OP2: 5346 case MachineCombinerPattern::FMLSv4i32_indexed_OP2: 5347 RC = &AArch64::FPR128RegClass; 5348 if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP2) { 5349 Opc = AArch64::FMLSv4i32_indexed; 5350 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5351 FMAInstKind::Indexed); 5352 } else { 5353 Opc = AArch64::FMLSv4f32; 5354 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5355 FMAInstKind::Accumulator); 5356 } 5357 break; 5358 case MachineCombinerPattern::FMLSv2f32_OP1: 5359 case MachineCombinerPattern::FMLSv2i32_indexed_OP1: { 5360 RC = &AArch64::FPR64RegClass; 5361 Register NewVR = MRI.createVirtualRegister(RC); 5362 MachineInstrBuilder MIB1 = 5363 BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f32), NewVR) 5364 .add(Root.getOperand(2)); 5365 InsInstrs.push_back(MIB1); 5366 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 5367 if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP1) { 5368 Opc = AArch64::FMLAv2i32_indexed; 5369 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 5370 FMAInstKind::Indexed, &NewVR); 5371 } else { 5372 Opc = AArch64::FMLAv2f32; 5373 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 5374 FMAInstKind::Accumulator, &NewVR); 5375 } 5376 break; 5377 } 5378 case MachineCombinerPattern::FMLSv4f32_OP1: 5379 case MachineCombinerPattern::FMLSv4i32_indexed_OP1: { 5380 RC = &AArch64::FPR128RegClass; 5381 Register NewVR = MRI.createVirtualRegister(RC); 5382 MachineInstrBuilder MIB1 = 5383 BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv4f32), NewVR) 5384 .add(Root.getOperand(2)); 5385 InsInstrs.push_back(MIB1); 5386 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 5387 if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP1) { 5388 Opc = AArch64::FMLAv4i32_indexed; 5389 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 5390 FMAInstKind::Indexed, &NewVR); 5391 } else { 5392 Opc = AArch64::FMLAv4f32; 5393 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 5394 FMAInstKind::Accumulator, &NewVR); 5395 } 5396 break; 5397 } 5398 case MachineCombinerPattern::FMLSv2f64_OP1: 5399 case MachineCombinerPattern::FMLSv2i64_indexed_OP1: { 5400 RC = &AArch64::FPR128RegClass; 5401 Register NewVR = MRI.createVirtualRegister(RC); 5402 MachineInstrBuilder MIB1 = 5403 BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f64), NewVR) 5404 .add(Root.getOperand(2)); 5405 InsInstrs.push_back(MIB1); 5406 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 5407 if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP1) { 5408 Opc = AArch64::FMLAv2i64_indexed; 5409 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 5410 FMAInstKind::Indexed, &NewVR); 5411 } else { 5412 Opc = AArch64::FMLAv2f64; 5413 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 5414 FMAInstKind::Accumulator, &NewVR); 5415 } 5416 break; 5417 } 5418 } // end switch (Pattern) 5419 // Record MUL and ADD/SUB for deletion 5420 DelInstrs.push_back(MUL); 5421 DelInstrs.push_back(&Root); 5422 } 5423 5424 /// Replace csincr-branch sequence by simple conditional branch 5425 /// 5426 /// Examples: 5427 /// 1. \code 5428 /// csinc w9, wzr, wzr, <condition code> 5429 /// tbnz w9, #0, 0x44 5430 /// \endcode 5431 /// to 5432 /// \code 5433 /// b.<inverted condition code> 5434 /// \endcode 5435 /// 5436 /// 2. \code 5437 /// csinc w9, wzr, wzr, <condition code> 5438 /// tbz w9, #0, 0x44 5439 /// \endcode 5440 /// to 5441 /// \code 5442 /// b.<condition code> 5443 /// \endcode 5444 /// 5445 /// Replace compare and branch sequence by TBZ/TBNZ instruction when the 5446 /// compare's constant operand is power of 2. 5447 /// 5448 /// Examples: 5449 /// \code 5450 /// and w8, w8, #0x400 5451 /// cbnz w8, L1 5452 /// \endcode 5453 /// to 5454 /// \code 5455 /// tbnz w8, #10, L1 5456 /// \endcode 5457 /// 5458 /// \param MI Conditional Branch 5459 /// \return True when the simple conditional branch is generated 5460 /// 5461 bool AArch64InstrInfo::optimizeCondBranch(MachineInstr &MI) const { 5462 bool IsNegativeBranch = false; 5463 bool IsTestAndBranch = false; 5464 unsigned TargetBBInMI = 0; 5465 switch (MI.getOpcode()) { 5466 default: 5467 llvm_unreachable("Unknown branch instruction?"); 5468 case AArch64::Bcc: 5469 return false; 5470 case AArch64::CBZW: 5471 case AArch64::CBZX: 5472 TargetBBInMI = 1; 5473 break; 5474 case AArch64::CBNZW: 5475 case AArch64::CBNZX: 5476 TargetBBInMI = 1; 5477 IsNegativeBranch = true; 5478 break; 5479 case AArch64::TBZW: 5480 case AArch64::TBZX: 5481 TargetBBInMI = 2; 5482 IsTestAndBranch = true; 5483 break; 5484 case AArch64::TBNZW: 5485 case AArch64::TBNZX: 5486 TargetBBInMI = 2; 5487 IsNegativeBranch = true; 5488 IsTestAndBranch = true; 5489 break; 5490 } 5491 // So we increment a zero register and test for bits other 5492 // than bit 0? Conservatively bail out in case the verifier 5493 // missed this case. 5494 if (IsTestAndBranch && MI.getOperand(1).getImm()) 5495 return false; 5496 5497 // Find Definition. 5498 assert(MI.getParent() && "Incomplete machine instruciton\n"); 5499 MachineBasicBlock *MBB = MI.getParent(); 5500 MachineFunction *MF = MBB->getParent(); 5501 MachineRegisterInfo *MRI = &MF->getRegInfo(); 5502 Register VReg = MI.getOperand(0).getReg(); 5503 if (!Register::isVirtualRegister(VReg)) 5504 return false; 5505 5506 MachineInstr *DefMI = MRI->getVRegDef(VReg); 5507 5508 // Look through COPY instructions to find definition. 5509 while (DefMI->isCopy()) { 5510 Register CopyVReg = DefMI->getOperand(1).getReg(); 5511 if (!MRI->hasOneNonDBGUse(CopyVReg)) 5512 return false; 5513 if (!MRI->hasOneDef(CopyVReg)) 5514 return false; 5515 DefMI = MRI->getVRegDef(CopyVReg); 5516 } 5517 5518 switch (DefMI->getOpcode()) { 5519 default: 5520 return false; 5521 // Fold AND into a TBZ/TBNZ if constant operand is power of 2. 5522 case AArch64::ANDWri: 5523 case AArch64::ANDXri: { 5524 if (IsTestAndBranch) 5525 return false; 5526 if (DefMI->getParent() != MBB) 5527 return false; 5528 if (!MRI->hasOneNonDBGUse(VReg)) 5529 return false; 5530 5531 bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri); 5532 uint64_t Mask = AArch64_AM::decodeLogicalImmediate( 5533 DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64); 5534 if (!isPowerOf2_64(Mask)) 5535 return false; 5536 5537 MachineOperand &MO = DefMI->getOperand(1); 5538 Register NewReg = MO.getReg(); 5539 if (!Register::isVirtualRegister(NewReg)) 5540 return false; 5541 5542 assert(!MRI->def_empty(NewReg) && "Register must be defined."); 5543 5544 MachineBasicBlock &RefToMBB = *MBB; 5545 MachineBasicBlock *TBB = MI.getOperand(1).getMBB(); 5546 DebugLoc DL = MI.getDebugLoc(); 5547 unsigned Imm = Log2_64(Mask); 5548 unsigned Opc = (Imm < 32) 5549 ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW) 5550 : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX); 5551 MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc)) 5552 .addReg(NewReg) 5553 .addImm(Imm) 5554 .addMBB(TBB); 5555 // Register lives on to the CBZ now. 5556 MO.setIsKill(false); 5557 5558 // For immediate smaller than 32, we need to use the 32-bit 5559 // variant (W) in all cases. Indeed the 64-bit variant does not 5560 // allow to encode them. 5561 // Therefore, if the input register is 64-bit, we need to take the 5562 // 32-bit sub-part. 5563 if (!Is32Bit && Imm < 32) 5564 NewMI->getOperand(0).setSubReg(AArch64::sub_32); 5565 MI.eraseFromParent(); 5566 return true; 5567 } 5568 // Look for CSINC 5569 case AArch64::CSINCWr: 5570 case AArch64::CSINCXr: { 5571 if (!(DefMI->getOperand(1).getReg() == AArch64::WZR && 5572 DefMI->getOperand(2).getReg() == AArch64::WZR) && 5573 !(DefMI->getOperand(1).getReg() == AArch64::XZR && 5574 DefMI->getOperand(2).getReg() == AArch64::XZR)) 5575 return false; 5576 5577 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) != -1) 5578 return false; 5579 5580 AArch64CC::CondCode CC = (AArch64CC::CondCode)DefMI->getOperand(3).getImm(); 5581 // Convert only when the condition code is not modified between 5582 // the CSINC and the branch. The CC may be used by other 5583 // instructions in between. 5584 if (areCFlagsAccessedBetweenInstrs(DefMI, MI, &getRegisterInfo(), AK_Write)) 5585 return false; 5586 MachineBasicBlock &RefToMBB = *MBB; 5587 MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB(); 5588 DebugLoc DL = MI.getDebugLoc(); 5589 if (IsNegativeBranch) 5590 CC = AArch64CC::getInvertedCondCode(CC); 5591 BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB); 5592 MI.eraseFromParent(); 5593 return true; 5594 } 5595 } 5596 } 5597 5598 std::pair<unsigned, unsigned> 5599 AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const { 5600 const unsigned Mask = AArch64II::MO_FRAGMENT; 5601 return std::make_pair(TF & Mask, TF & ~Mask); 5602 } 5603 5604 ArrayRef<std::pair<unsigned, const char *>> 5605 AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const { 5606 using namespace AArch64II; 5607 5608 static const std::pair<unsigned, const char *> TargetFlags[] = { 5609 {MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"}, 5610 {MO_G3, "aarch64-g3"}, {MO_G2, "aarch64-g2"}, 5611 {MO_G1, "aarch64-g1"}, {MO_G0, "aarch64-g0"}, 5612 {MO_HI12, "aarch64-hi12"}}; 5613 return makeArrayRef(TargetFlags); 5614 } 5615 5616 ArrayRef<std::pair<unsigned, const char *>> 5617 AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const { 5618 using namespace AArch64II; 5619 5620 static const std::pair<unsigned, const char *> TargetFlags[] = { 5621 {MO_COFFSTUB, "aarch64-coffstub"}, 5622 {MO_GOT, "aarch64-got"}, 5623 {MO_NC, "aarch64-nc"}, 5624 {MO_S, "aarch64-s"}, 5625 {MO_TLS, "aarch64-tls"}, 5626 {MO_DLLIMPORT, "aarch64-dllimport"}, 5627 {MO_PREL, "aarch64-prel"}, 5628 {MO_TAGGED, "aarch64-tagged"}}; 5629 return makeArrayRef(TargetFlags); 5630 } 5631 5632 ArrayRef<std::pair<MachineMemOperand::Flags, const char *>> 5633 AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const { 5634 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] = 5635 {{MOSuppressPair, "aarch64-suppress-pair"}, 5636 {MOStridedAccess, "aarch64-strided-access"}}; 5637 return makeArrayRef(TargetFlags); 5638 } 5639 5640 /// Constants defining how certain sequences should be outlined. 5641 /// This encompasses how an outlined function should be called, and what kind of 5642 /// frame should be emitted for that outlined function. 5643 /// 5644 /// \p MachineOutlinerDefault implies that the function should be called with 5645 /// a save and restore of LR to the stack. 5646 /// 5647 /// That is, 5648 /// 5649 /// I1 Save LR OUTLINED_FUNCTION: 5650 /// I2 --> BL OUTLINED_FUNCTION I1 5651 /// I3 Restore LR I2 5652 /// I3 5653 /// RET 5654 /// 5655 /// * Call construction overhead: 3 (save + BL + restore) 5656 /// * Frame construction overhead: 1 (ret) 5657 /// * Requires stack fixups? Yes 5658 /// 5659 /// \p MachineOutlinerTailCall implies that the function is being created from 5660 /// a sequence of instructions ending in a return. 5661 /// 5662 /// That is, 5663 /// 5664 /// I1 OUTLINED_FUNCTION: 5665 /// I2 --> B OUTLINED_FUNCTION I1 5666 /// RET I2 5667 /// RET 5668 /// 5669 /// * Call construction overhead: 1 (B) 5670 /// * Frame construction overhead: 0 (Return included in sequence) 5671 /// * Requires stack fixups? No 5672 /// 5673 /// \p MachineOutlinerNoLRSave implies that the function should be called using 5674 /// a BL instruction, but doesn't require LR to be saved and restored. This 5675 /// happens when LR is known to be dead. 5676 /// 5677 /// That is, 5678 /// 5679 /// I1 OUTLINED_FUNCTION: 5680 /// I2 --> BL OUTLINED_FUNCTION I1 5681 /// I3 I2 5682 /// I3 5683 /// RET 5684 /// 5685 /// * Call construction overhead: 1 (BL) 5686 /// * Frame construction overhead: 1 (RET) 5687 /// * Requires stack fixups? No 5688 /// 5689 /// \p MachineOutlinerThunk implies that the function is being created from 5690 /// a sequence of instructions ending in a call. The outlined function is 5691 /// called with a BL instruction, and the outlined function tail-calls the 5692 /// original call destination. 5693 /// 5694 /// That is, 5695 /// 5696 /// I1 OUTLINED_FUNCTION: 5697 /// I2 --> BL OUTLINED_FUNCTION I1 5698 /// BL f I2 5699 /// B f 5700 /// * Call construction overhead: 1 (BL) 5701 /// * Frame construction overhead: 0 5702 /// * Requires stack fixups? No 5703 /// 5704 /// \p MachineOutlinerRegSave implies that the function should be called with a 5705 /// save and restore of LR to an available register. This allows us to avoid 5706 /// stack fixups. Note that this outlining variant is compatible with the 5707 /// NoLRSave case. 5708 /// 5709 /// That is, 5710 /// 5711 /// I1 Save LR OUTLINED_FUNCTION: 5712 /// I2 --> BL OUTLINED_FUNCTION I1 5713 /// I3 Restore LR I2 5714 /// I3 5715 /// RET 5716 /// 5717 /// * Call construction overhead: 3 (save + BL + restore) 5718 /// * Frame construction overhead: 1 (ret) 5719 /// * Requires stack fixups? No 5720 enum MachineOutlinerClass { 5721 MachineOutlinerDefault, /// Emit a save, restore, call, and return. 5722 MachineOutlinerTailCall, /// Only emit a branch. 5723 MachineOutlinerNoLRSave, /// Emit a call and return. 5724 MachineOutlinerThunk, /// Emit a call and tail-call. 5725 MachineOutlinerRegSave /// Same as default, but save to a register. 5726 }; 5727 5728 enum MachineOutlinerMBBFlags { 5729 LRUnavailableSomewhere = 0x2, 5730 HasCalls = 0x4, 5731 UnsafeRegsDead = 0x8 5732 }; 5733 5734 unsigned 5735 AArch64InstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const { 5736 assert(C.LRUWasSet && "LRU wasn't set?"); 5737 MachineFunction *MF = C.getMF(); 5738 const AArch64RegisterInfo *ARI = static_cast<const AArch64RegisterInfo *>( 5739 MF->getSubtarget().getRegisterInfo()); 5740 5741 // Check if there is an available register across the sequence that we can 5742 // use. 5743 for (unsigned Reg : AArch64::GPR64RegClass) { 5744 if (!ARI->isReservedReg(*MF, Reg) && 5745 Reg != AArch64::LR && // LR is not reserved, but don't use it. 5746 Reg != AArch64::X16 && // X16 is not guaranteed to be preserved. 5747 Reg != AArch64::X17 && // Ditto for X17. 5748 C.LRU.available(Reg) && C.UsedInSequence.available(Reg)) 5749 return Reg; 5750 } 5751 5752 // No suitable register. Return 0. 5753 return 0u; 5754 } 5755 5756 static bool 5757 outliningCandidatesSigningScopeConsensus(const outliner::Candidate &a, 5758 const outliner::Candidate &b) { 5759 const Function &Fa = a.getMF()->getFunction(); 5760 const Function &Fb = b.getMF()->getFunction(); 5761 5762 // If none of the functions have the "sign-return-address" attribute their 5763 // signing behaviour is equal 5764 if (!Fa.hasFnAttribute("sign-return-address") && 5765 !Fb.hasFnAttribute("sign-return-address")) { 5766 return true; 5767 } 5768 5769 // If both functions have the "sign-return-address" attribute their signing 5770 // behaviour is equal, if the values of the attributes are equal 5771 if (Fa.hasFnAttribute("sign-return-address") && 5772 Fb.hasFnAttribute("sign-return-address")) { 5773 StringRef ScopeA = 5774 Fa.getFnAttribute("sign-return-address").getValueAsString(); 5775 StringRef ScopeB = 5776 Fb.getFnAttribute("sign-return-address").getValueAsString(); 5777 return ScopeA.equals(ScopeB); 5778 } 5779 5780 // If function B doesn't have the "sign-return-address" attribute but A does, 5781 // the functions' signing behaviour is equal if A's value for 5782 // "sign-return-address" is "none" and vice versa. 5783 if (Fa.hasFnAttribute("sign-return-address")) { 5784 StringRef ScopeA = 5785 Fa.getFnAttribute("sign-return-address").getValueAsString(); 5786 return ScopeA.equals("none"); 5787 } 5788 5789 if (Fb.hasFnAttribute("sign-return-address")) { 5790 StringRef ScopeB = 5791 Fb.getFnAttribute("sign-return-address").getValueAsString(); 5792 return ScopeB.equals("none"); 5793 } 5794 5795 llvm_unreachable("Unkown combination of sign-return-address attributes"); 5796 } 5797 5798 static bool 5799 outliningCandidatesSigningKeyConsensus(const outliner::Candidate &a, 5800 const outliner::Candidate &b) { 5801 const Function &Fa = a.getMF()->getFunction(); 5802 const Function &Fb = b.getMF()->getFunction(); 5803 5804 // If none of the functions have the "sign-return-address-key" attribute 5805 // their keys are equal 5806 if (!Fa.hasFnAttribute("sign-return-address-key") && 5807 !Fb.hasFnAttribute("sign-return-address-key")) { 5808 return true; 5809 } 5810 5811 // If both functions have the "sign-return-address-key" attribute their 5812 // keys are equal if the values of "sign-return-address-key" are equal 5813 if (Fa.hasFnAttribute("sign-return-address-key") && 5814 Fb.hasFnAttribute("sign-return-address-key")) { 5815 StringRef KeyA = 5816 Fa.getFnAttribute("sign-return-address-key").getValueAsString(); 5817 StringRef KeyB = 5818 Fb.getFnAttribute("sign-return-address-key").getValueAsString(); 5819 return KeyA.equals(KeyB); 5820 } 5821 5822 // If B doesn't have the "sign-return-address-key" attribute, both keys are 5823 // equal, if function a has the default key (a_key) 5824 if (Fa.hasFnAttribute("sign-return-address-key")) { 5825 StringRef KeyA = 5826 Fa.getFnAttribute("sign-return-address-key").getValueAsString(); 5827 return KeyA.equals_lower("a_key"); 5828 } 5829 5830 if (Fb.hasFnAttribute("sign-return-address-key")) { 5831 StringRef KeyB = 5832 Fb.getFnAttribute("sign-return-address-key").getValueAsString(); 5833 return KeyB.equals_lower("a_key"); 5834 } 5835 5836 llvm_unreachable("Unkown combination of sign-return-address-key attributes"); 5837 } 5838 5839 static bool outliningCandidatesV8_3OpsConsensus(const outliner::Candidate &a, 5840 const outliner::Candidate &b) { 5841 const AArch64Subtarget &SubtargetA = 5842 a.getMF()->getSubtarget<AArch64Subtarget>(); 5843 const AArch64Subtarget &SubtargetB = 5844 b.getMF()->getSubtarget<AArch64Subtarget>(); 5845 return SubtargetA.hasV8_3aOps() == SubtargetB.hasV8_3aOps(); 5846 } 5847 5848 outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo( 5849 std::vector<outliner::Candidate> &RepeatedSequenceLocs) const { 5850 outliner::Candidate &FirstCand = RepeatedSequenceLocs[0]; 5851 unsigned SequenceSize = 5852 std::accumulate(FirstCand.front(), std::next(FirstCand.back()), 0, 5853 [this](unsigned Sum, const MachineInstr &MI) { 5854 return Sum + getInstSizeInBytes(MI); 5855 }); 5856 unsigned NumBytesToCreateFrame = 0; 5857 5858 // We only allow outlining for functions having exactly matching return 5859 // address signing attributes, i.e., all share the same value for the 5860 // attribute "sign-return-address" and all share the same type of key they 5861 // are signed with. 5862 // Additionally we require all functions to simultaniously either support 5863 // v8.3a features or not. Otherwise an outlined function could get signed 5864 // using dedicated v8.3 instructions and a call from a function that doesn't 5865 // support v8.3 instructions would therefore be invalid. 5866 if (std::adjacent_find( 5867 RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(), 5868 [](const outliner::Candidate &a, const outliner::Candidate &b) { 5869 // Return true if a and b are non-equal w.r.t. return address 5870 // signing or support of v8.3a features 5871 if (outliningCandidatesSigningScopeConsensus(a, b) && 5872 outliningCandidatesSigningKeyConsensus(a, b) && 5873 outliningCandidatesV8_3OpsConsensus(a, b)) { 5874 return false; 5875 } 5876 return true; 5877 }) != RepeatedSequenceLocs.end()) { 5878 return outliner::OutlinedFunction(); 5879 } 5880 5881 // Since at this point all candidates agree on their return address signing 5882 // picking just one is fine. If the candidate functions potentially sign their 5883 // return addresses, the outlined function should do the same. Note that in 5884 // the case of "sign-return-address"="non-leaf" this is an assumption: It is 5885 // not certainly true that the outlined function will have to sign its return 5886 // address but this decision is made later, when the decision to outline 5887 // has already been made. 5888 // The same holds for the number of additional instructions we need: On 5889 // v8.3a RET can be replaced by RETAA/RETAB and no AUT instruction is 5890 // necessary. However, at this point we don't know if the outlined function 5891 // will have a RET instruction so we assume the worst. 5892 const Function &FCF = FirstCand.getMF()->getFunction(); 5893 const TargetRegisterInfo &TRI = getRegisterInfo(); 5894 if (FCF.hasFnAttribute("sign-return-address")) { 5895 // One PAC and one AUT instructions 5896 NumBytesToCreateFrame += 8; 5897 5898 // We have to check if sp modifying instructions would get outlined. 5899 // If so we only allow outlining if sp is unchanged overall, so matching 5900 // sub and add instructions are okay to outline, all other sp modifications 5901 // are not 5902 auto hasIllegalSPModification = [&TRI](outliner::Candidate &C) { 5903 int SPValue = 0; 5904 MachineBasicBlock::iterator MBBI = C.front(); 5905 for (;;) { 5906 if (MBBI->modifiesRegister(AArch64::SP, &TRI)) { 5907 switch (MBBI->getOpcode()) { 5908 case AArch64::ADDXri: 5909 case AArch64::ADDWri: 5910 assert(MBBI->getNumOperands() == 4 && "Wrong number of operands"); 5911 assert(MBBI->getOperand(2).isImm() && 5912 "Expected operand to be immediate"); 5913 assert(MBBI->getOperand(1).isReg() && 5914 "Expected operand to be a register"); 5915 // Check if the add just increments sp. If so, we search for 5916 // matching sub instructions that decrement sp. If not, the 5917 // modification is illegal 5918 if (MBBI->getOperand(1).getReg() == AArch64::SP) 5919 SPValue += MBBI->getOperand(2).getImm(); 5920 else 5921 return true; 5922 break; 5923 case AArch64::SUBXri: 5924 case AArch64::SUBWri: 5925 assert(MBBI->getNumOperands() == 4 && "Wrong number of operands"); 5926 assert(MBBI->getOperand(2).isImm() && 5927 "Expected operand to be immediate"); 5928 assert(MBBI->getOperand(1).isReg() && 5929 "Expected operand to be a register"); 5930 // Check if the sub just decrements sp. If so, we search for 5931 // matching add instructions that increment sp. If not, the 5932 // modification is illegal 5933 if (MBBI->getOperand(1).getReg() == AArch64::SP) 5934 SPValue -= MBBI->getOperand(2).getImm(); 5935 else 5936 return true; 5937 break; 5938 default: 5939 return true; 5940 } 5941 } 5942 if (MBBI == C.back()) 5943 break; 5944 ++MBBI; 5945 } 5946 if (SPValue) 5947 return true; 5948 return false; 5949 }; 5950 // Remove candidates with illegal stack modifying instructions 5951 RepeatedSequenceLocs.erase(std::remove_if(RepeatedSequenceLocs.begin(), 5952 RepeatedSequenceLocs.end(), 5953 hasIllegalSPModification), 5954 RepeatedSequenceLocs.end()); 5955 5956 // If the sequence doesn't have enough candidates left, then we're done. 5957 if (RepeatedSequenceLocs.size() < 2) 5958 return outliner::OutlinedFunction(); 5959 } 5960 5961 // Properties about candidate MBBs that hold for all of them. 5962 unsigned FlagsSetInAll = 0xF; 5963 5964 // Compute liveness information for each candidate, and set FlagsSetInAll. 5965 std::for_each(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(), 5966 [&FlagsSetInAll](outliner::Candidate &C) { 5967 FlagsSetInAll &= C.Flags; 5968 }); 5969 5970 // According to the AArch64 Procedure Call Standard, the following are 5971 // undefined on entry/exit from a function call: 5972 // 5973 // * Registers x16, x17, (and thus w16, w17) 5974 // * Condition codes (and thus the NZCV register) 5975 // 5976 // Because if this, we can't outline any sequence of instructions where 5977 // one 5978 // of these registers is live into/across it. Thus, we need to delete 5979 // those 5980 // candidates. 5981 auto CantGuaranteeValueAcrossCall = [&TRI](outliner::Candidate &C) { 5982 // If the unsafe registers in this block are all dead, then we don't need 5983 // to compute liveness here. 5984 if (C.Flags & UnsafeRegsDead) 5985 return false; 5986 C.initLRU(TRI); 5987 LiveRegUnits LRU = C.LRU; 5988 return (!LRU.available(AArch64::W16) || !LRU.available(AArch64::W17) || 5989 !LRU.available(AArch64::NZCV)); 5990 }; 5991 5992 // Are there any candidates where those registers are live? 5993 if (!(FlagsSetInAll & UnsafeRegsDead)) { 5994 // Erase every candidate that violates the restrictions above. (It could be 5995 // true that we have viable candidates, so it's not worth bailing out in 5996 // the case that, say, 1 out of 20 candidates violate the restructions.) 5997 RepeatedSequenceLocs.erase(std::remove_if(RepeatedSequenceLocs.begin(), 5998 RepeatedSequenceLocs.end(), 5999 CantGuaranteeValueAcrossCall), 6000 RepeatedSequenceLocs.end()); 6001 6002 // If the sequence doesn't have enough candidates left, then we're done. 6003 if (RepeatedSequenceLocs.size() < 2) 6004 return outliner::OutlinedFunction(); 6005 } 6006 6007 // At this point, we have only "safe" candidates to outline. Figure out 6008 // frame + call instruction information. 6009 6010 unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back()->getOpcode(); 6011 6012 // Helper lambda which sets call information for every candidate. 6013 auto SetCandidateCallInfo = 6014 [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) { 6015 for (outliner::Candidate &C : RepeatedSequenceLocs) 6016 C.setCallInfo(CallID, NumBytesForCall); 6017 }; 6018 6019 unsigned FrameID = MachineOutlinerDefault; 6020 NumBytesToCreateFrame += 4; 6021 6022 bool HasBTI = any_of(RepeatedSequenceLocs, [](outliner::Candidate &C) { 6023 return C.getMF()->getFunction().hasFnAttribute("branch-target-enforcement"); 6024 }); 6025 6026 // We check to see if CFI Instructions are present, and if they are 6027 // we find the number of CFI Instructions in the candidates. 6028 unsigned CFICount = 0; 6029 MachineBasicBlock::iterator MBBI = RepeatedSequenceLocs[0].front(); 6030 for (unsigned Loc = RepeatedSequenceLocs[0].getStartIdx(); 6031 Loc < RepeatedSequenceLocs[0].getEndIdx() + 1; Loc++) { 6032 const std::vector<MCCFIInstruction> &CFIInstructions = 6033 RepeatedSequenceLocs[0].getMF()->getFrameInstructions(); 6034 if (MBBI->isCFIInstruction()) { 6035 unsigned CFIIndex = MBBI->getOperand(0).getCFIIndex(); 6036 MCCFIInstruction CFI = CFIInstructions[CFIIndex]; 6037 CFICount++; 6038 } 6039 MBBI++; 6040 } 6041 6042 // We compare the number of found CFI Instructions to the number of CFI 6043 // instructions in the parent function for each candidate. We must check this 6044 // since if we outline one of the CFI instructions in a function, we have to 6045 // outline them all for correctness. If we do not, the address offsets will be 6046 // incorrect between the two sections of the program. 6047 for (outliner::Candidate &C : RepeatedSequenceLocs) { 6048 std::vector<MCCFIInstruction> CFIInstructions = 6049 C.getMF()->getFrameInstructions(); 6050 6051 if (CFICount > 0 && CFICount != CFIInstructions.size()) 6052 return outliner::OutlinedFunction(); 6053 } 6054 6055 // Returns true if an instructions is safe to fix up, false otherwise. 6056 auto IsSafeToFixup = [this, &TRI](MachineInstr &MI) { 6057 if (MI.isCall()) 6058 return true; 6059 6060 if (!MI.modifiesRegister(AArch64::SP, &TRI) && 6061 !MI.readsRegister(AArch64::SP, &TRI)) 6062 return true; 6063 6064 // Any modification of SP will break our code to save/restore LR. 6065 // FIXME: We could handle some instructions which add a constant 6066 // offset to SP, with a bit more work. 6067 if (MI.modifiesRegister(AArch64::SP, &TRI)) 6068 return false; 6069 6070 // At this point, we have a stack instruction that we might need to 6071 // fix up. We'll handle it if it's a load or store. 6072 if (MI.mayLoadOrStore()) { 6073 const MachineOperand *Base; // Filled with the base operand of MI. 6074 int64_t Offset; // Filled with the offset of MI. 6075 bool OffsetIsScalable; 6076 6077 // Does it allow us to offset the base operand and is the base the 6078 // register SP? 6079 if (!getMemOperandWithOffset(MI, Base, Offset, OffsetIsScalable, &TRI) || 6080 !Base->isReg() || Base->getReg() != AArch64::SP) 6081 return false; 6082 6083 // Fixe-up code below assumes bytes. 6084 if (OffsetIsScalable) 6085 return false; 6086 6087 // Find the minimum/maximum offset for this instruction and check 6088 // if fixing it up would be in range. 6089 int64_t MinOffset, 6090 MaxOffset; // Unscaled offsets for the instruction. 6091 TypeSize Scale(0U, false); // The scale to multiply the offsets by. 6092 unsigned DummyWidth; 6093 getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset); 6094 6095 Offset += 16; // Update the offset to what it would be if we outlined. 6096 if (Offset < MinOffset * (int64_t)Scale.getFixedSize() || 6097 Offset > MaxOffset * (int64_t)Scale.getFixedSize()) 6098 return false; 6099 6100 // It's in range, so we can outline it. 6101 return true; 6102 } 6103 6104 // FIXME: Add handling for instructions like "add x0, sp, #8". 6105 6106 // We can't fix it up, so don't outline it. 6107 return false; 6108 }; 6109 6110 // True if it's possible to fix up each stack instruction in this sequence. 6111 // Important for frames/call variants that modify the stack. 6112 bool AllStackInstrsSafe = std::all_of( 6113 FirstCand.front(), std::next(FirstCand.back()), IsSafeToFixup); 6114 6115 // If the last instruction in any candidate is a terminator, then we should 6116 // tail call all of the candidates. 6117 if (RepeatedSequenceLocs[0].back()->isTerminator()) { 6118 FrameID = MachineOutlinerTailCall; 6119 NumBytesToCreateFrame = 0; 6120 SetCandidateCallInfo(MachineOutlinerTailCall, 4); 6121 } 6122 6123 else if (LastInstrOpcode == AArch64::BL || 6124 ((LastInstrOpcode == AArch64::BLR || 6125 LastInstrOpcode == AArch64::BLRNoIP) && 6126 !HasBTI)) { 6127 // FIXME: Do we need to check if the code after this uses the value of LR? 6128 FrameID = MachineOutlinerThunk; 6129 NumBytesToCreateFrame = 0; 6130 SetCandidateCallInfo(MachineOutlinerThunk, 4); 6131 } 6132 6133 else { 6134 // We need to decide how to emit calls + frames. We can always emit the same 6135 // frame if we don't need to save to the stack. If we have to save to the 6136 // stack, then we need a different frame. 6137 unsigned NumBytesNoStackCalls = 0; 6138 std::vector<outliner::Candidate> CandidatesWithoutStackFixups; 6139 6140 // Check if we have to save LR. 6141 for (outliner::Candidate &C : RepeatedSequenceLocs) { 6142 C.initLRU(TRI); 6143 6144 // If we have a noreturn caller, then we're going to be conservative and 6145 // say that we have to save LR. If we don't have a ret at the end of the 6146 // block, then we can't reason about liveness accurately. 6147 // 6148 // FIXME: We can probably do better than always disabling this in 6149 // noreturn functions by fixing up the liveness info. 6150 bool IsNoReturn = 6151 C.getMF()->getFunction().hasFnAttribute(Attribute::NoReturn); 6152 6153 // Is LR available? If so, we don't need a save. 6154 if (C.LRU.available(AArch64::LR) && !IsNoReturn) { 6155 NumBytesNoStackCalls += 4; 6156 C.setCallInfo(MachineOutlinerNoLRSave, 4); 6157 CandidatesWithoutStackFixups.push_back(C); 6158 } 6159 6160 // Is an unused register available? If so, we won't modify the stack, so 6161 // we can outline with the same frame type as those that don't save LR. 6162 else if (findRegisterToSaveLRTo(C)) { 6163 NumBytesNoStackCalls += 12; 6164 C.setCallInfo(MachineOutlinerRegSave, 12); 6165 CandidatesWithoutStackFixups.push_back(C); 6166 } 6167 6168 // Is SP used in the sequence at all? If not, we don't have to modify 6169 // the stack, so we are guaranteed to get the same frame. 6170 else if (C.UsedInSequence.available(AArch64::SP)) { 6171 NumBytesNoStackCalls += 12; 6172 C.setCallInfo(MachineOutlinerDefault, 12); 6173 CandidatesWithoutStackFixups.push_back(C); 6174 } 6175 6176 // If we outline this, we need to modify the stack. Pretend we don't 6177 // outline this by saving all of its bytes. 6178 else { 6179 NumBytesNoStackCalls += SequenceSize; 6180 } 6181 } 6182 6183 // If there are no places where we have to save LR, then note that we 6184 // don't have to update the stack. Otherwise, give every candidate the 6185 // default call type, as long as it's safe to do so. 6186 if (!AllStackInstrsSafe || 6187 NumBytesNoStackCalls <= RepeatedSequenceLocs.size() * 12) { 6188 RepeatedSequenceLocs = CandidatesWithoutStackFixups; 6189 FrameID = MachineOutlinerNoLRSave; 6190 } else { 6191 SetCandidateCallInfo(MachineOutlinerDefault, 12); 6192 } 6193 6194 // If we dropped all of the candidates, bail out here. 6195 if (RepeatedSequenceLocs.size() < 2) { 6196 RepeatedSequenceLocs.clear(); 6197 return outliner::OutlinedFunction(); 6198 } 6199 } 6200 6201 // Does every candidate's MBB contain a call? If so, then we might have a call 6202 // in the range. 6203 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) { 6204 // Check if the range contains a call. These require a save + restore of the 6205 // link register. 6206 bool ModStackToSaveLR = false; 6207 if (std::any_of(FirstCand.front(), FirstCand.back(), 6208 [](const MachineInstr &MI) { return MI.isCall(); })) 6209 ModStackToSaveLR = true; 6210 6211 // Handle the last instruction separately. If this is a tail call, then the 6212 // last instruction is a call. We don't want to save + restore in this case. 6213 // However, it could be possible that the last instruction is a call without 6214 // it being valid to tail call this sequence. We should consider this as 6215 // well. 6216 else if (FrameID != MachineOutlinerThunk && 6217 FrameID != MachineOutlinerTailCall && FirstCand.back()->isCall()) 6218 ModStackToSaveLR = true; 6219 6220 if (ModStackToSaveLR) { 6221 // We can't fix up the stack. Bail out. 6222 if (!AllStackInstrsSafe) { 6223 RepeatedSequenceLocs.clear(); 6224 return outliner::OutlinedFunction(); 6225 } 6226 6227 // Save + restore LR. 6228 NumBytesToCreateFrame += 8; 6229 } 6230 } 6231 6232 // If we have CFI instructions, we can only outline if the outlined section 6233 // can be a tail call 6234 if (FrameID != MachineOutlinerTailCall && CFICount > 0) 6235 return outliner::OutlinedFunction(); 6236 6237 return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize, 6238 NumBytesToCreateFrame, FrameID); 6239 } 6240 6241 bool AArch64InstrInfo::isFunctionSafeToOutlineFrom( 6242 MachineFunction &MF, bool OutlineFromLinkOnceODRs) const { 6243 const Function &F = MF.getFunction(); 6244 6245 // Can F be deduplicated by the linker? If it can, don't outline from it. 6246 if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage()) 6247 return false; 6248 6249 // Don't outline from functions with section markings; the program could 6250 // expect that all the code is in the named section. 6251 // FIXME: Allow outlining from multiple functions with the same section 6252 // marking. 6253 if (F.hasSection()) 6254 return false; 6255 6256 // Outlining from functions with redzones is unsafe since the outliner may 6257 // modify the stack. Check if hasRedZone is true or unknown; if yes, don't 6258 // outline from it. 6259 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 6260 if (!AFI || AFI->hasRedZone().getValueOr(true)) 6261 return false; 6262 6263 // FIXME: Teach the outliner to generate/handle Windows unwind info. 6264 if (MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) 6265 return false; 6266 6267 // It's safe to outline from MF. 6268 return true; 6269 } 6270 6271 bool AArch64InstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB, 6272 unsigned &Flags) const { 6273 // Check if LR is available through all of the MBB. If it's not, then set 6274 // a flag. 6275 assert(MBB.getParent()->getRegInfo().tracksLiveness() && 6276 "Suitable Machine Function for outlining must track liveness"); 6277 LiveRegUnits LRU(getRegisterInfo()); 6278 6279 std::for_each(MBB.rbegin(), MBB.rend(), 6280 [&LRU](MachineInstr &MI) { LRU.accumulate(MI); }); 6281 6282 // Check if each of the unsafe registers are available... 6283 bool W16AvailableInBlock = LRU.available(AArch64::W16); 6284 bool W17AvailableInBlock = LRU.available(AArch64::W17); 6285 bool NZCVAvailableInBlock = LRU.available(AArch64::NZCV); 6286 6287 // If all of these are dead (and not live out), we know we don't have to check 6288 // them later. 6289 if (W16AvailableInBlock && W17AvailableInBlock && NZCVAvailableInBlock) 6290 Flags |= MachineOutlinerMBBFlags::UnsafeRegsDead; 6291 6292 // Now, add the live outs to the set. 6293 LRU.addLiveOuts(MBB); 6294 6295 // If any of these registers is available in the MBB, but also a live out of 6296 // the block, then we know outlining is unsafe. 6297 if (W16AvailableInBlock && !LRU.available(AArch64::W16)) 6298 return false; 6299 if (W17AvailableInBlock && !LRU.available(AArch64::W17)) 6300 return false; 6301 if (NZCVAvailableInBlock && !LRU.available(AArch64::NZCV)) 6302 return false; 6303 6304 // Check if there's a call inside this MachineBasicBlock. If there is, then 6305 // set a flag. 6306 if (any_of(MBB, [](MachineInstr &MI) { return MI.isCall(); })) 6307 Flags |= MachineOutlinerMBBFlags::HasCalls; 6308 6309 MachineFunction *MF = MBB.getParent(); 6310 6311 // In the event that we outline, we may have to save LR. If there is an 6312 // available register in the MBB, then we'll always save LR there. Check if 6313 // this is true. 6314 bool CanSaveLR = false; 6315 const AArch64RegisterInfo *ARI = static_cast<const AArch64RegisterInfo *>( 6316 MF->getSubtarget().getRegisterInfo()); 6317 6318 // Check if there is an available register across the sequence that we can 6319 // use. 6320 for (unsigned Reg : AArch64::GPR64RegClass) { 6321 if (!ARI->isReservedReg(*MF, Reg) && Reg != AArch64::LR && 6322 Reg != AArch64::X16 && Reg != AArch64::X17 && LRU.available(Reg)) { 6323 CanSaveLR = true; 6324 break; 6325 } 6326 } 6327 6328 // Check if we have a register we can save LR to, and if LR was used 6329 // somewhere. If both of those things are true, then we need to evaluate the 6330 // safety of outlining stack instructions later. 6331 if (!CanSaveLR && !LRU.available(AArch64::LR)) 6332 Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere; 6333 6334 return true; 6335 } 6336 6337 outliner::InstrType 6338 AArch64InstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT, 6339 unsigned Flags) const { 6340 MachineInstr &MI = *MIT; 6341 MachineBasicBlock *MBB = MI.getParent(); 6342 MachineFunction *MF = MBB->getParent(); 6343 AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>(); 6344 6345 // Don't outline anything used for return address signing. The outlined 6346 // function will get signed later if needed 6347 switch (MI.getOpcode()) { 6348 case AArch64::PACIASP: 6349 case AArch64::PACIBSP: 6350 case AArch64::AUTIASP: 6351 case AArch64::AUTIBSP: 6352 case AArch64::RETAA: 6353 case AArch64::RETAB: 6354 case AArch64::EMITBKEY: 6355 return outliner::InstrType::Illegal; 6356 } 6357 6358 // Don't outline LOHs. 6359 if (FuncInfo->getLOHRelated().count(&MI)) 6360 return outliner::InstrType::Illegal; 6361 6362 // We can only outline these if we will tail call the outlined function, or 6363 // fix up the CFI offsets. Currently, CFI instructions are outlined only if 6364 // in a tail call. 6365 // 6366 // FIXME: If the proper fixups for the offset are implemented, this should be 6367 // possible. 6368 if (MI.isCFIInstruction()) 6369 return outliner::InstrType::Legal; 6370 6371 // Don't allow debug values to impact outlining type. 6372 if (MI.isDebugInstr() || MI.isIndirectDebugValue()) 6373 return outliner::InstrType::Invisible; 6374 6375 // At this point, KILL instructions don't really tell us much so we can go 6376 // ahead and skip over them. 6377 if (MI.isKill()) 6378 return outliner::InstrType::Invisible; 6379 6380 // Is this a terminator for a basic block? 6381 if (MI.isTerminator()) { 6382 6383 // Is this the end of a function? 6384 if (MI.getParent()->succ_empty()) 6385 return outliner::InstrType::Legal; 6386 6387 // It's not, so don't outline it. 6388 return outliner::InstrType::Illegal; 6389 } 6390 6391 // Make sure none of the operands are un-outlinable. 6392 for (const MachineOperand &MOP : MI.operands()) { 6393 if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() || 6394 MOP.isTargetIndex()) 6395 return outliner::InstrType::Illegal; 6396 6397 // If it uses LR or W30 explicitly, then don't touch it. 6398 if (MOP.isReg() && !MOP.isImplicit() && 6399 (MOP.getReg() == AArch64::LR || MOP.getReg() == AArch64::W30)) 6400 return outliner::InstrType::Illegal; 6401 } 6402 6403 // Special cases for instructions that can always be outlined, but will fail 6404 // the later tests. e.g, ADRPs, which are PC-relative use LR, but can always 6405 // be outlined because they don't require a *specific* value to be in LR. 6406 if (MI.getOpcode() == AArch64::ADRP) 6407 return outliner::InstrType::Legal; 6408 6409 // If MI is a call we might be able to outline it. We don't want to outline 6410 // any calls that rely on the position of items on the stack. When we outline 6411 // something containing a call, we have to emit a save and restore of LR in 6412 // the outlined function. Currently, this always happens by saving LR to the 6413 // stack. Thus, if we outline, say, half the parameters for a function call 6414 // plus the call, then we'll break the callee's expectations for the layout 6415 // of the stack. 6416 // 6417 // FIXME: Allow calls to functions which construct a stack frame, as long 6418 // as they don't access arguments on the stack. 6419 // FIXME: Figure out some way to analyze functions defined in other modules. 6420 // We should be able to compute the memory usage based on the IR calling 6421 // convention, even if we can't see the definition. 6422 if (MI.isCall()) { 6423 // Get the function associated with the call. Look at each operand and find 6424 // the one that represents the callee and get its name. 6425 const Function *Callee = nullptr; 6426 for (const MachineOperand &MOP : MI.operands()) { 6427 if (MOP.isGlobal()) { 6428 Callee = dyn_cast<Function>(MOP.getGlobal()); 6429 break; 6430 } 6431 } 6432 6433 // Never outline calls to mcount. There isn't any rule that would require 6434 // this, but the Linux kernel's "ftrace" feature depends on it. 6435 if (Callee && Callee->getName() == "\01_mcount") 6436 return outliner::InstrType::Illegal; 6437 6438 // If we don't know anything about the callee, assume it depends on the 6439 // stack layout of the caller. In that case, it's only legal to outline 6440 // as a tail-call. Explicitly list the call instructions we know about so we 6441 // don't get unexpected results with call pseudo-instructions. 6442 auto UnknownCallOutlineType = outliner::InstrType::Illegal; 6443 if (MI.getOpcode() == AArch64::BLR || 6444 MI.getOpcode() == AArch64::BLRNoIP || MI.getOpcode() == AArch64::BL) 6445 UnknownCallOutlineType = outliner::InstrType::LegalTerminator; 6446 6447 if (!Callee) 6448 return UnknownCallOutlineType; 6449 6450 // We have a function we have information about. Check it if it's something 6451 // can safely outline. 6452 MachineFunction *CalleeMF = MF->getMMI().getMachineFunction(*Callee); 6453 6454 // We don't know what's going on with the callee at all. Don't touch it. 6455 if (!CalleeMF) 6456 return UnknownCallOutlineType; 6457 6458 // Check if we know anything about the callee saves on the function. If we 6459 // don't, then don't touch it, since that implies that we haven't 6460 // computed anything about its stack frame yet. 6461 MachineFrameInfo &MFI = CalleeMF->getFrameInfo(); 6462 if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 || 6463 MFI.getNumObjects() > 0) 6464 return UnknownCallOutlineType; 6465 6466 // At this point, we can say that CalleeMF ought to not pass anything on the 6467 // stack. Therefore, we can outline it. 6468 return outliner::InstrType::Legal; 6469 } 6470 6471 // Don't outline positions. 6472 if (MI.isPosition()) 6473 return outliner::InstrType::Illegal; 6474 6475 // Don't touch the link register or W30. 6476 if (MI.readsRegister(AArch64::W30, &getRegisterInfo()) || 6477 MI.modifiesRegister(AArch64::W30, &getRegisterInfo())) 6478 return outliner::InstrType::Illegal; 6479 6480 // Don't outline BTI instructions, because that will prevent the outlining 6481 // site from being indirectly callable. 6482 if (MI.getOpcode() == AArch64::HINT) { 6483 int64_t Imm = MI.getOperand(0).getImm(); 6484 if (Imm == 32 || Imm == 34 || Imm == 36 || Imm == 38) 6485 return outliner::InstrType::Illegal; 6486 } 6487 6488 return outliner::InstrType::Legal; 6489 } 6490 6491 void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const { 6492 for (MachineInstr &MI : MBB) { 6493 const MachineOperand *Base; 6494 unsigned Width; 6495 int64_t Offset; 6496 bool OffsetIsScalable; 6497 6498 // Is this a load or store with an immediate offset with SP as the base? 6499 if (!MI.mayLoadOrStore() || 6500 !getMemOperandWithOffsetWidth(MI, Base, Offset, OffsetIsScalable, Width, 6501 &RI) || 6502 (Base->isReg() && Base->getReg() != AArch64::SP)) 6503 continue; 6504 6505 // It is, so we have to fix it up. 6506 TypeSize Scale(0U, false); 6507 int64_t Dummy1, Dummy2; 6508 6509 MachineOperand &StackOffsetOperand = getMemOpBaseRegImmOfsOffsetOperand(MI); 6510 assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!"); 6511 getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2); 6512 assert(Scale != 0 && "Unexpected opcode!"); 6513 assert(!OffsetIsScalable && "Expected offset to be a byte offset"); 6514 6515 // We've pushed the return address to the stack, so add 16 to the offset. 6516 // This is safe, since we already checked if it would overflow when we 6517 // checked if this instruction was legal to outline. 6518 int64_t NewImm = (Offset + 16) / (int64_t)Scale.getFixedSize(); 6519 StackOffsetOperand.setImm(NewImm); 6520 } 6521 } 6522 6523 static void signOutlinedFunction(MachineFunction &MF, MachineBasicBlock &MBB, 6524 bool ShouldSignReturnAddr, 6525 bool ShouldSignReturnAddrWithAKey) { 6526 if (ShouldSignReturnAddr) { 6527 MachineBasicBlock::iterator MBBPAC = MBB.begin(); 6528 MachineBasicBlock::iterator MBBAUT = MBB.getFirstTerminator(); 6529 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); 6530 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 6531 DebugLoc DL; 6532 6533 if (MBBAUT != MBB.end()) 6534 DL = MBBAUT->getDebugLoc(); 6535 6536 // At the very beginning of the basic block we insert the following 6537 // depending on the key type 6538 // 6539 // a_key: b_key: 6540 // PACIASP EMITBKEY 6541 // CFI_INSTRUCTION PACIBSP 6542 // CFI_INSTRUCTION 6543 if (ShouldSignReturnAddrWithAKey) { 6544 BuildMI(MBB, MBBPAC, DebugLoc(), TII->get(AArch64::PACIASP)) 6545 .setMIFlag(MachineInstr::FrameSetup); 6546 } else { 6547 BuildMI(MBB, MBBPAC, DebugLoc(), TII->get(AArch64::EMITBKEY)) 6548 .setMIFlag(MachineInstr::FrameSetup); 6549 BuildMI(MBB, MBBPAC, DebugLoc(), TII->get(AArch64::PACIBSP)) 6550 .setMIFlag(MachineInstr::FrameSetup); 6551 } 6552 unsigned CFIIndex = 6553 MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr)); 6554 BuildMI(MBB, MBBPAC, DebugLoc(), TII->get(AArch64::CFI_INSTRUCTION)) 6555 .addCFIIndex(CFIIndex) 6556 .setMIFlags(MachineInstr::FrameSetup); 6557 6558 // If v8.3a features are available we can replace a RET instruction by 6559 // RETAA or RETAB and omit the AUT instructions 6560 if (Subtarget.hasV8_3aOps() && MBBAUT != MBB.end() && 6561 MBBAUT->getOpcode() == AArch64::RET) { 6562 BuildMI(MBB, MBBAUT, DL, 6563 TII->get(ShouldSignReturnAddrWithAKey ? AArch64::RETAA 6564 : AArch64::RETAB)) 6565 .copyImplicitOps(*MBBAUT); 6566 MBB.erase(MBBAUT); 6567 } else { 6568 BuildMI(MBB, MBBAUT, DL, 6569 TII->get(ShouldSignReturnAddrWithAKey ? AArch64::AUTIASP 6570 : AArch64::AUTIBSP)) 6571 .setMIFlag(MachineInstr::FrameDestroy); 6572 } 6573 } 6574 } 6575 6576 void AArch64InstrInfo::buildOutlinedFrame( 6577 MachineBasicBlock &MBB, MachineFunction &MF, 6578 const outliner::OutlinedFunction &OF) const { 6579 6580 AArch64FunctionInfo *FI = MF.getInfo<AArch64FunctionInfo>(); 6581 6582 if (OF.FrameConstructionID == MachineOutlinerTailCall) 6583 FI->setOutliningStyle("Tail Call"); 6584 else if (OF.FrameConstructionID == MachineOutlinerThunk) { 6585 // For thunk outlining, rewrite the last instruction from a call to a 6586 // tail-call. 6587 MachineInstr *Call = &*--MBB.instr_end(); 6588 unsigned TailOpcode; 6589 if (Call->getOpcode() == AArch64::BL) { 6590 TailOpcode = AArch64::TCRETURNdi; 6591 } else { 6592 assert(Call->getOpcode() == AArch64::BLR || 6593 Call->getOpcode() == AArch64::BLRNoIP); 6594 TailOpcode = AArch64::TCRETURNriALL; 6595 } 6596 MachineInstr *TC = BuildMI(MF, DebugLoc(), get(TailOpcode)) 6597 .add(Call->getOperand(0)) 6598 .addImm(0); 6599 MBB.insert(MBB.end(), TC); 6600 Call->eraseFromParent(); 6601 6602 FI->setOutliningStyle("Thunk"); 6603 } 6604 6605 bool IsLeafFunction = true; 6606 6607 // Is there a call in the outlined range? 6608 auto IsNonTailCall = [](const MachineInstr &MI) { 6609 return MI.isCall() && !MI.isReturn(); 6610 }; 6611 6612 if (std::any_of(MBB.instr_begin(), MBB.instr_end(), IsNonTailCall)) { 6613 // Fix up the instructions in the range, since we're going to modify the 6614 // stack. 6615 assert(OF.FrameConstructionID != MachineOutlinerDefault && 6616 "Can only fix up stack references once"); 6617 fixupPostOutline(MBB); 6618 6619 IsLeafFunction = false; 6620 6621 // LR has to be a live in so that we can save it. 6622 if (!MBB.isLiveIn(AArch64::LR)) 6623 MBB.addLiveIn(AArch64::LR); 6624 6625 MachineBasicBlock::iterator It = MBB.begin(); 6626 MachineBasicBlock::iterator Et = MBB.end(); 6627 6628 if (OF.FrameConstructionID == MachineOutlinerTailCall || 6629 OF.FrameConstructionID == MachineOutlinerThunk) 6630 Et = std::prev(MBB.end()); 6631 6632 // Insert a save before the outlined region 6633 MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre)) 6634 .addReg(AArch64::SP, RegState::Define) 6635 .addReg(AArch64::LR) 6636 .addReg(AArch64::SP) 6637 .addImm(-16); 6638 It = MBB.insert(It, STRXpre); 6639 6640 const TargetSubtargetInfo &STI = MF.getSubtarget(); 6641 const MCRegisterInfo *MRI = STI.getRegisterInfo(); 6642 unsigned DwarfReg = MRI->getDwarfRegNum(AArch64::LR, true); 6643 6644 // Add a CFI saying the stack was moved 16 B down. 6645 int64_t StackPosEntry = 6646 MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 16)); 6647 BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION)) 6648 .addCFIIndex(StackPosEntry) 6649 .setMIFlags(MachineInstr::FrameSetup); 6650 6651 // Add a CFI saying that the LR that we want to find is now 16 B higher than 6652 // before. 6653 int64_t LRPosEntry = 6654 MF.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfReg, -16)); 6655 BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION)) 6656 .addCFIIndex(LRPosEntry) 6657 .setMIFlags(MachineInstr::FrameSetup); 6658 6659 // Insert a restore before the terminator for the function. 6660 MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost)) 6661 .addReg(AArch64::SP, RegState::Define) 6662 .addReg(AArch64::LR, RegState::Define) 6663 .addReg(AArch64::SP) 6664 .addImm(16); 6665 Et = MBB.insert(Et, LDRXpost); 6666 } 6667 6668 // If a bunch of candidates reach this point they must agree on their return 6669 // address signing. It is therefore enough to just consider the signing 6670 // behaviour of one of them 6671 const Function &CF = OF.Candidates.front().getMF()->getFunction(); 6672 bool ShouldSignReturnAddr = false; 6673 if (CF.hasFnAttribute("sign-return-address")) { 6674 StringRef Scope = 6675 CF.getFnAttribute("sign-return-address").getValueAsString(); 6676 if (Scope.equals("all")) 6677 ShouldSignReturnAddr = true; 6678 else if (Scope.equals("non-leaf") && !IsLeafFunction) 6679 ShouldSignReturnAddr = true; 6680 } 6681 6682 // a_key is the default 6683 bool ShouldSignReturnAddrWithAKey = true; 6684 if (CF.hasFnAttribute("sign-return-address-key")) { 6685 const StringRef Key = 6686 CF.getFnAttribute("sign-return-address-key").getValueAsString(); 6687 // Key can either be a_key or b_key 6688 assert((Key.equals_lower("a_key") || Key.equals_lower("b_key")) && 6689 "Return address signing key must be either a_key or b_key"); 6690 ShouldSignReturnAddrWithAKey = Key.equals_lower("a_key"); 6691 } 6692 6693 // If this is a tail call outlined function, then there's already a return. 6694 if (OF.FrameConstructionID == MachineOutlinerTailCall || 6695 OF.FrameConstructionID == MachineOutlinerThunk) { 6696 signOutlinedFunction(MF, MBB, ShouldSignReturnAddr, 6697 ShouldSignReturnAddrWithAKey); 6698 return; 6699 } 6700 6701 // It's not a tail call, so we have to insert the return ourselves. 6702 6703 // LR has to be a live in so that we can return to it. 6704 if (!MBB.isLiveIn(AArch64::LR)) 6705 MBB.addLiveIn(AArch64::LR); 6706 6707 MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET)) 6708 .addReg(AArch64::LR); 6709 MBB.insert(MBB.end(), ret); 6710 6711 signOutlinedFunction(MF, MBB, ShouldSignReturnAddr, 6712 ShouldSignReturnAddrWithAKey); 6713 6714 FI->setOutliningStyle("Function"); 6715 6716 // Did we have to modify the stack by saving the link register? 6717 if (OF.FrameConstructionID != MachineOutlinerDefault) 6718 return; 6719 6720 // We modified the stack. 6721 // Walk over the basic block and fix up all the stack accesses. 6722 fixupPostOutline(MBB); 6723 } 6724 6725 MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall( 6726 Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, 6727 MachineFunction &MF, const outliner::Candidate &C) const { 6728 6729 // Are we tail calling? 6730 if (C.CallConstructionID == MachineOutlinerTailCall) { 6731 // If yes, then we can just branch to the label. 6732 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::TCRETURNdi)) 6733 .addGlobalAddress(M.getNamedValue(MF.getName())) 6734 .addImm(0)); 6735 return It; 6736 } 6737 6738 // Are we saving the link register? 6739 if (C.CallConstructionID == MachineOutlinerNoLRSave || 6740 C.CallConstructionID == MachineOutlinerThunk) { 6741 // No, so just insert the call. 6742 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL)) 6743 .addGlobalAddress(M.getNamedValue(MF.getName()))); 6744 return It; 6745 } 6746 6747 // We want to return the spot where we inserted the call. 6748 MachineBasicBlock::iterator CallPt; 6749 6750 // Instructions for saving and restoring LR around the call instruction we're 6751 // going to insert. 6752 MachineInstr *Save; 6753 MachineInstr *Restore; 6754 // Can we save to a register? 6755 if (C.CallConstructionID == MachineOutlinerRegSave) { 6756 // FIXME: This logic should be sunk into a target-specific interface so that 6757 // we don't have to recompute the register. 6758 unsigned Reg = findRegisterToSaveLRTo(C); 6759 assert(Reg != 0 && "No callee-saved register available?"); 6760 6761 // Save and restore LR from that register. 6762 Save = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), Reg) 6763 .addReg(AArch64::XZR) 6764 .addReg(AArch64::LR) 6765 .addImm(0); 6766 Restore = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), AArch64::LR) 6767 .addReg(AArch64::XZR) 6768 .addReg(Reg) 6769 .addImm(0); 6770 } else { 6771 // We have the default case. Save and restore from SP. 6772 Save = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre)) 6773 .addReg(AArch64::SP, RegState::Define) 6774 .addReg(AArch64::LR) 6775 .addReg(AArch64::SP) 6776 .addImm(-16); 6777 Restore = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost)) 6778 .addReg(AArch64::SP, RegState::Define) 6779 .addReg(AArch64::LR, RegState::Define) 6780 .addReg(AArch64::SP) 6781 .addImm(16); 6782 } 6783 6784 It = MBB.insert(It, Save); 6785 It++; 6786 6787 // Insert the call. 6788 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL)) 6789 .addGlobalAddress(M.getNamedValue(MF.getName()))); 6790 CallPt = It; 6791 It++; 6792 6793 It = MBB.insert(It, Restore); 6794 return CallPt; 6795 } 6796 6797 bool AArch64InstrInfo::shouldOutlineFromFunctionByDefault( 6798 MachineFunction &MF) const { 6799 return MF.getFunction().hasMinSize(); 6800 } 6801 6802 Optional<DestSourcePair> 6803 AArch64InstrInfo::isCopyInstrImpl(const MachineInstr &MI) const { 6804 6805 // AArch64::ORRWrs and AArch64::ORRXrs with WZR/XZR reg 6806 // and zero immediate operands used as an alias for mov instruction. 6807 if (MI.getOpcode() == AArch64::ORRWrs && 6808 MI.getOperand(1).getReg() == AArch64::WZR && 6809 MI.getOperand(3).getImm() == 0x0) { 6810 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)}; 6811 } 6812 6813 if (MI.getOpcode() == AArch64::ORRXrs && 6814 MI.getOperand(1).getReg() == AArch64::XZR && 6815 MI.getOperand(3).getImm() == 0x0) { 6816 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)}; 6817 } 6818 6819 return None; 6820 } 6821 6822 Optional<RegImmPair> AArch64InstrInfo::isAddImmediate(const MachineInstr &MI, 6823 Register Reg) const { 6824 int Sign = 1; 6825 int64_t Offset = 0; 6826 6827 // TODO: Handle cases where Reg is a super- or sub-register of the 6828 // destination register. 6829 const MachineOperand &Op0 = MI.getOperand(0); 6830 if (!Op0.isReg() || Reg != Op0.getReg()) 6831 return None; 6832 6833 switch (MI.getOpcode()) { 6834 default: 6835 return None; 6836 case AArch64::SUBWri: 6837 case AArch64::SUBXri: 6838 case AArch64::SUBSWri: 6839 case AArch64::SUBSXri: 6840 Sign *= -1; 6841 LLVM_FALLTHROUGH; 6842 case AArch64::ADDSWri: 6843 case AArch64::ADDSXri: 6844 case AArch64::ADDWri: 6845 case AArch64::ADDXri: { 6846 // TODO: Third operand can be global address (usually some string). 6847 if (!MI.getOperand(0).isReg() || !MI.getOperand(1).isReg() || 6848 !MI.getOperand(2).isImm()) 6849 return None; 6850 Offset = MI.getOperand(2).getImm() * Sign; 6851 int Shift = MI.getOperand(3).getImm(); 6852 assert((Shift == 0 || Shift == 12) && "Shift can be either 0 or 12"); 6853 Offset = Offset << Shift; 6854 } 6855 } 6856 return RegImmPair{MI.getOperand(1).getReg(), Offset}; 6857 } 6858 6859 /// If the given ORR instruction is a copy, and \p DescribedReg overlaps with 6860 /// the destination register then, if possible, describe the value in terms of 6861 /// the source register. 6862 static Optional<ParamLoadedValue> 6863 describeORRLoadedValue(const MachineInstr &MI, Register DescribedReg, 6864 const TargetInstrInfo *TII, 6865 const TargetRegisterInfo *TRI) { 6866 auto DestSrc = TII->isCopyInstr(MI); 6867 if (!DestSrc) 6868 return None; 6869 6870 Register DestReg = DestSrc->Destination->getReg(); 6871 Register SrcReg = DestSrc->Source->getReg(); 6872 6873 auto Expr = DIExpression::get(MI.getMF()->getFunction().getContext(), {}); 6874 6875 // If the described register is the destination, just return the source. 6876 if (DestReg == DescribedReg) 6877 return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr); 6878 6879 // ORRWrs zero-extends to 64-bits, so we need to consider such cases. 6880 if (MI.getOpcode() == AArch64::ORRWrs && 6881 TRI->isSuperRegister(DestReg, DescribedReg)) 6882 return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr); 6883 6884 // We may need to describe the lower part of a ORRXrs move. 6885 if (MI.getOpcode() == AArch64::ORRXrs && 6886 TRI->isSubRegister(DestReg, DescribedReg)) { 6887 Register SrcSubReg = TRI->getSubReg(SrcReg, AArch64::sub_32); 6888 return ParamLoadedValue(MachineOperand::CreateReg(SrcSubReg, false), Expr); 6889 } 6890 6891 assert(!TRI->isSuperOrSubRegisterEq(DestReg, DescribedReg) && 6892 "Unhandled ORR[XW]rs copy case"); 6893 6894 return None; 6895 } 6896 6897 Optional<ParamLoadedValue> 6898 AArch64InstrInfo::describeLoadedValue(const MachineInstr &MI, 6899 Register Reg) const { 6900 const MachineFunction *MF = MI.getMF(); 6901 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); 6902 switch (MI.getOpcode()) { 6903 case AArch64::MOVZWi: 6904 case AArch64::MOVZXi: { 6905 // MOVZWi may be used for producing zero-extended 32-bit immediates in 6906 // 64-bit parameters, so we need to consider super-registers. 6907 if (!TRI->isSuperRegisterEq(MI.getOperand(0).getReg(), Reg)) 6908 return None; 6909 6910 if (!MI.getOperand(1).isImm()) 6911 return None; 6912 int64_t Immediate = MI.getOperand(1).getImm(); 6913 int Shift = MI.getOperand(2).getImm(); 6914 return ParamLoadedValue(MachineOperand::CreateImm(Immediate << Shift), 6915 nullptr); 6916 } 6917 case AArch64::ORRWrs: 6918 case AArch64::ORRXrs: 6919 return describeORRLoadedValue(MI, Reg, this, TRI); 6920 } 6921 6922 return TargetInstrInfo::describeLoadedValue(MI, Reg); 6923 } 6924 6925 uint64_t AArch64InstrInfo::getElementSizeForOpcode(unsigned Opc) const { 6926 return get(Opc).TSFlags & AArch64::ElementSizeMask; 6927 } 6928 6929 unsigned llvm::getBLRCallOpcode(const MachineFunction &MF) { 6930 if (MF.getSubtarget<AArch64Subtarget>().hardenSlsBlr()) 6931 return AArch64::BLRNoIP; 6932 else 6933 return AArch64::BLR; 6934 } 6935 6936 #define GET_INSTRINFO_HELPERS 6937 #define GET_INSTRMAP_INFO 6938 #include "AArch64GenInstrInfo.inc" 6939