1 //===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the AArch64 implementation of the TargetInstrInfo class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AArch64InstrInfo.h" 14 #include "AArch64MachineFunctionInfo.h" 15 #include "AArch64Subtarget.h" 16 #include "MCTargetDesc/AArch64AddressingModes.h" 17 #include "Utils/AArch64BaseInfo.h" 18 #include "llvm/ADT/ArrayRef.h" 19 #include "llvm/ADT/STLExtras.h" 20 #include "llvm/ADT/SmallVector.h" 21 #include "llvm/CodeGen/MachineBasicBlock.h" 22 #include "llvm/CodeGen/MachineFrameInfo.h" 23 #include "llvm/CodeGen/MachineFunction.h" 24 #include "llvm/CodeGen/MachineInstr.h" 25 #include "llvm/CodeGen/MachineInstrBuilder.h" 26 #include "llvm/CodeGen/MachineMemOperand.h" 27 #include "llvm/CodeGen/MachineOperand.h" 28 #include "llvm/CodeGen/MachineRegisterInfo.h" 29 #include "llvm/CodeGen/MachineModuleInfo.h" 30 #include "llvm/CodeGen/StackMaps.h" 31 #include "llvm/CodeGen/TargetRegisterInfo.h" 32 #include "llvm/CodeGen/TargetSubtargetInfo.h" 33 #include "llvm/IR/DebugLoc.h" 34 #include "llvm/IR/GlobalValue.h" 35 #include "llvm/MC/MCAsmInfo.h" 36 #include "llvm/MC/MCInst.h" 37 #include "llvm/MC/MCInstrDesc.h" 38 #include "llvm/Support/Casting.h" 39 #include "llvm/Support/CodeGen.h" 40 #include "llvm/Support/CommandLine.h" 41 #include "llvm/Support/Compiler.h" 42 #include "llvm/Support/ErrorHandling.h" 43 #include "llvm/Support/MathExtras.h" 44 #include "llvm/Target/TargetMachine.h" 45 #include "llvm/Target/TargetOptions.h" 46 #include <cassert> 47 #include <cstdint> 48 #include <iterator> 49 #include <utility> 50 51 using namespace llvm; 52 53 #define GET_INSTRINFO_CTOR_DTOR 54 #include "AArch64GenInstrInfo.inc" 55 56 static cl::opt<unsigned> TBZDisplacementBits( 57 "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14), 58 cl::desc("Restrict range of TB[N]Z instructions (DEBUG)")); 59 60 static cl::opt<unsigned> CBZDisplacementBits( 61 "aarch64-cbz-offset-bits", cl::Hidden, cl::init(19), 62 cl::desc("Restrict range of CB[N]Z instructions (DEBUG)")); 63 64 static cl::opt<unsigned> 65 BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19), 66 cl::desc("Restrict range of Bcc instructions (DEBUG)")); 67 68 AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI) 69 : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP, 70 AArch64::CATCHRET), 71 RI(STI.getTargetTriple()), Subtarget(STI) {} 72 73 /// GetInstSize - Return the number of bytes of code the specified 74 /// instruction may be. This returns the maximum number of bytes. 75 unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { 76 const MachineBasicBlock &MBB = *MI.getParent(); 77 const MachineFunction *MF = MBB.getParent(); 78 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo(); 79 80 { 81 auto Op = MI.getOpcode(); 82 if (Op == AArch64::INLINEASM || Op == AArch64::INLINEASM_BR) 83 return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI); 84 } 85 86 // FIXME: We currently only handle pseudoinstructions that don't get expanded 87 // before the assembly printer. 88 unsigned NumBytes = 0; 89 const MCInstrDesc &Desc = MI.getDesc(); 90 switch (Desc.getOpcode()) { 91 default: 92 // Anything not explicitly designated otherwise is a normal 4-byte insn. 93 NumBytes = 4; 94 break; 95 case TargetOpcode::DBG_VALUE: 96 case TargetOpcode::EH_LABEL: 97 case TargetOpcode::IMPLICIT_DEF: 98 case TargetOpcode::KILL: 99 NumBytes = 0; 100 break; 101 case TargetOpcode::STACKMAP: 102 // The upper bound for a stackmap intrinsic is the full length of its shadow 103 NumBytes = StackMapOpers(&MI).getNumPatchBytes(); 104 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!"); 105 break; 106 case TargetOpcode::PATCHPOINT: 107 // The size of the patchpoint intrinsic is the number of bytes requested 108 NumBytes = PatchPointOpers(&MI).getNumPatchBytes(); 109 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!"); 110 break; 111 case AArch64::TLSDESC_CALLSEQ: 112 // This gets lowered to an instruction sequence which takes 16 bytes 113 NumBytes = 16; 114 break; 115 case AArch64::JumpTableDest32: 116 case AArch64::JumpTableDest16: 117 case AArch64::JumpTableDest8: 118 NumBytes = 12; 119 break; 120 case AArch64::SPACE: 121 NumBytes = MI.getOperand(1).getImm(); 122 break; 123 } 124 125 return NumBytes; 126 } 127 128 static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target, 129 SmallVectorImpl<MachineOperand> &Cond) { 130 // Block ends with fall-through condbranch. 131 switch (LastInst->getOpcode()) { 132 default: 133 llvm_unreachable("Unknown branch instruction?"); 134 case AArch64::Bcc: 135 Target = LastInst->getOperand(1).getMBB(); 136 Cond.push_back(LastInst->getOperand(0)); 137 break; 138 case AArch64::CBZW: 139 case AArch64::CBZX: 140 case AArch64::CBNZW: 141 case AArch64::CBNZX: 142 Target = LastInst->getOperand(1).getMBB(); 143 Cond.push_back(MachineOperand::CreateImm(-1)); 144 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode())); 145 Cond.push_back(LastInst->getOperand(0)); 146 break; 147 case AArch64::TBZW: 148 case AArch64::TBZX: 149 case AArch64::TBNZW: 150 case AArch64::TBNZX: 151 Target = LastInst->getOperand(2).getMBB(); 152 Cond.push_back(MachineOperand::CreateImm(-1)); 153 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode())); 154 Cond.push_back(LastInst->getOperand(0)); 155 Cond.push_back(LastInst->getOperand(1)); 156 } 157 } 158 159 static unsigned getBranchDisplacementBits(unsigned Opc) { 160 switch (Opc) { 161 default: 162 llvm_unreachable("unexpected opcode!"); 163 case AArch64::B: 164 return 64; 165 case AArch64::TBNZW: 166 case AArch64::TBZW: 167 case AArch64::TBNZX: 168 case AArch64::TBZX: 169 return TBZDisplacementBits; 170 case AArch64::CBNZW: 171 case AArch64::CBZW: 172 case AArch64::CBNZX: 173 case AArch64::CBZX: 174 return CBZDisplacementBits; 175 case AArch64::Bcc: 176 return BCCDisplacementBits; 177 } 178 } 179 180 bool AArch64InstrInfo::isBranchOffsetInRange(unsigned BranchOp, 181 int64_t BrOffset) const { 182 unsigned Bits = getBranchDisplacementBits(BranchOp); 183 assert(Bits >= 3 && "max branch displacement must be enough to jump" 184 "over conditional branch expansion"); 185 return isIntN(Bits, BrOffset / 4); 186 } 187 188 MachineBasicBlock * 189 AArch64InstrInfo::getBranchDestBlock(const MachineInstr &MI) const { 190 switch (MI.getOpcode()) { 191 default: 192 llvm_unreachable("unexpected opcode!"); 193 case AArch64::B: 194 return MI.getOperand(0).getMBB(); 195 case AArch64::TBZW: 196 case AArch64::TBNZW: 197 case AArch64::TBZX: 198 case AArch64::TBNZX: 199 return MI.getOperand(2).getMBB(); 200 case AArch64::CBZW: 201 case AArch64::CBNZW: 202 case AArch64::CBZX: 203 case AArch64::CBNZX: 204 case AArch64::Bcc: 205 return MI.getOperand(1).getMBB(); 206 } 207 } 208 209 // Branch analysis. 210 bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB, 211 MachineBasicBlock *&TBB, 212 MachineBasicBlock *&FBB, 213 SmallVectorImpl<MachineOperand> &Cond, 214 bool AllowModify) const { 215 // If the block has no terminators, it just falls into the block after it. 216 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); 217 if (I == MBB.end()) 218 return false; 219 220 if (!isUnpredicatedTerminator(*I)) 221 return false; 222 223 // Get the last instruction in the block. 224 MachineInstr *LastInst = &*I; 225 226 // If there is only one terminator instruction, process it. 227 unsigned LastOpc = LastInst->getOpcode(); 228 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) { 229 if (isUncondBranchOpcode(LastOpc)) { 230 TBB = LastInst->getOperand(0).getMBB(); 231 return false; 232 } 233 if (isCondBranchOpcode(LastOpc)) { 234 // Block ends with fall-through condbranch. 235 parseCondBranch(LastInst, TBB, Cond); 236 return false; 237 } 238 return true; // Can't handle indirect branch. 239 } 240 241 // Get the instruction before it if it is a terminator. 242 MachineInstr *SecondLastInst = &*I; 243 unsigned SecondLastOpc = SecondLastInst->getOpcode(); 244 245 // If AllowModify is true and the block ends with two or more unconditional 246 // branches, delete all but the first unconditional branch. 247 if (AllowModify && isUncondBranchOpcode(LastOpc)) { 248 while (isUncondBranchOpcode(SecondLastOpc)) { 249 LastInst->eraseFromParent(); 250 LastInst = SecondLastInst; 251 LastOpc = LastInst->getOpcode(); 252 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) { 253 // Return now the only terminator is an unconditional branch. 254 TBB = LastInst->getOperand(0).getMBB(); 255 return false; 256 } else { 257 SecondLastInst = &*I; 258 SecondLastOpc = SecondLastInst->getOpcode(); 259 } 260 } 261 } 262 263 // If there are three terminators, we don't know what sort of block this is. 264 if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I)) 265 return true; 266 267 // If the block ends with a B and a Bcc, handle it. 268 if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { 269 parseCondBranch(SecondLastInst, TBB, Cond); 270 FBB = LastInst->getOperand(0).getMBB(); 271 return false; 272 } 273 274 // If the block ends with two unconditional branches, handle it. The second 275 // one is not executed, so remove it. 276 if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { 277 TBB = SecondLastInst->getOperand(0).getMBB(); 278 I = LastInst; 279 if (AllowModify) 280 I->eraseFromParent(); 281 return false; 282 } 283 284 // ...likewise if it ends with an indirect branch followed by an unconditional 285 // branch. 286 if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { 287 I = LastInst; 288 if (AllowModify) 289 I->eraseFromParent(); 290 return true; 291 } 292 293 // Otherwise, can't handle this. 294 return true; 295 } 296 297 bool AArch64InstrInfo::reverseBranchCondition( 298 SmallVectorImpl<MachineOperand> &Cond) const { 299 if (Cond[0].getImm() != -1) { 300 // Regular Bcc 301 AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm(); 302 Cond[0].setImm(AArch64CC::getInvertedCondCode(CC)); 303 } else { 304 // Folded compare-and-branch 305 switch (Cond[1].getImm()) { 306 default: 307 llvm_unreachable("Unknown conditional branch!"); 308 case AArch64::CBZW: 309 Cond[1].setImm(AArch64::CBNZW); 310 break; 311 case AArch64::CBNZW: 312 Cond[1].setImm(AArch64::CBZW); 313 break; 314 case AArch64::CBZX: 315 Cond[1].setImm(AArch64::CBNZX); 316 break; 317 case AArch64::CBNZX: 318 Cond[1].setImm(AArch64::CBZX); 319 break; 320 case AArch64::TBZW: 321 Cond[1].setImm(AArch64::TBNZW); 322 break; 323 case AArch64::TBNZW: 324 Cond[1].setImm(AArch64::TBZW); 325 break; 326 case AArch64::TBZX: 327 Cond[1].setImm(AArch64::TBNZX); 328 break; 329 case AArch64::TBNZX: 330 Cond[1].setImm(AArch64::TBZX); 331 break; 332 } 333 } 334 335 return false; 336 } 337 338 unsigned AArch64InstrInfo::removeBranch(MachineBasicBlock &MBB, 339 int *BytesRemoved) const { 340 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); 341 if (I == MBB.end()) 342 return 0; 343 344 if (!isUncondBranchOpcode(I->getOpcode()) && 345 !isCondBranchOpcode(I->getOpcode())) 346 return 0; 347 348 // Remove the branch. 349 I->eraseFromParent(); 350 351 I = MBB.end(); 352 353 if (I == MBB.begin()) { 354 if (BytesRemoved) 355 *BytesRemoved = 4; 356 return 1; 357 } 358 --I; 359 if (!isCondBranchOpcode(I->getOpcode())) { 360 if (BytesRemoved) 361 *BytesRemoved = 4; 362 return 1; 363 } 364 365 // Remove the branch. 366 I->eraseFromParent(); 367 if (BytesRemoved) 368 *BytesRemoved = 8; 369 370 return 2; 371 } 372 373 void AArch64InstrInfo::instantiateCondBranch( 374 MachineBasicBlock &MBB, const DebugLoc &DL, MachineBasicBlock *TBB, 375 ArrayRef<MachineOperand> Cond) const { 376 if (Cond[0].getImm() != -1) { 377 // Regular Bcc 378 BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB); 379 } else { 380 // Folded compare-and-branch 381 // Note that we use addOperand instead of addReg to keep the flags. 382 const MachineInstrBuilder MIB = 383 BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]); 384 if (Cond.size() > 3) 385 MIB.addImm(Cond[3].getImm()); 386 MIB.addMBB(TBB); 387 } 388 } 389 390 unsigned AArch64InstrInfo::insertBranch( 391 MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, 392 ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const { 393 // Shouldn't be a fall through. 394 assert(TBB && "insertBranch must not be told to insert a fallthrough"); 395 396 if (!FBB) { 397 if (Cond.empty()) // Unconditional branch? 398 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB); 399 else 400 instantiateCondBranch(MBB, DL, TBB, Cond); 401 402 if (BytesAdded) 403 *BytesAdded = 4; 404 405 return 1; 406 } 407 408 // Two-way conditional branch. 409 instantiateCondBranch(MBB, DL, TBB, Cond); 410 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB); 411 412 if (BytesAdded) 413 *BytesAdded = 8; 414 415 return 2; 416 } 417 418 // Find the original register that VReg is copied from. 419 static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) { 420 while (TargetRegisterInfo::isVirtualRegister(VReg)) { 421 const MachineInstr *DefMI = MRI.getVRegDef(VReg); 422 if (!DefMI->isFullCopy()) 423 return VReg; 424 VReg = DefMI->getOperand(1).getReg(); 425 } 426 return VReg; 427 } 428 429 // Determine if VReg is defined by an instruction that can be folded into a 430 // csel instruction. If so, return the folded opcode, and the replacement 431 // register. 432 static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg, 433 unsigned *NewVReg = nullptr) { 434 VReg = removeCopies(MRI, VReg); 435 if (!TargetRegisterInfo::isVirtualRegister(VReg)) 436 return 0; 437 438 bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg)); 439 const MachineInstr *DefMI = MRI.getVRegDef(VReg); 440 unsigned Opc = 0; 441 unsigned SrcOpNum = 0; 442 switch (DefMI->getOpcode()) { 443 case AArch64::ADDSXri: 444 case AArch64::ADDSWri: 445 // if NZCV is used, do not fold. 446 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1) 447 return 0; 448 // fall-through to ADDXri and ADDWri. 449 LLVM_FALLTHROUGH; 450 case AArch64::ADDXri: 451 case AArch64::ADDWri: 452 // add x, 1 -> csinc. 453 if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 || 454 DefMI->getOperand(3).getImm() != 0) 455 return 0; 456 SrcOpNum = 1; 457 Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr; 458 break; 459 460 case AArch64::ORNXrr: 461 case AArch64::ORNWrr: { 462 // not x -> csinv, represented as orn dst, xzr, src. 463 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg()); 464 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR) 465 return 0; 466 SrcOpNum = 2; 467 Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr; 468 break; 469 } 470 471 case AArch64::SUBSXrr: 472 case AArch64::SUBSWrr: 473 // if NZCV is used, do not fold. 474 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1) 475 return 0; 476 // fall-through to SUBXrr and SUBWrr. 477 LLVM_FALLTHROUGH; 478 case AArch64::SUBXrr: 479 case AArch64::SUBWrr: { 480 // neg x -> csneg, represented as sub dst, xzr, src. 481 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg()); 482 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR) 483 return 0; 484 SrcOpNum = 2; 485 Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr; 486 break; 487 } 488 default: 489 return 0; 490 } 491 assert(Opc && SrcOpNum && "Missing parameters"); 492 493 if (NewVReg) 494 *NewVReg = DefMI->getOperand(SrcOpNum).getReg(); 495 return Opc; 496 } 497 498 bool AArch64InstrInfo::canInsertSelect(const MachineBasicBlock &MBB, 499 ArrayRef<MachineOperand> Cond, 500 unsigned TrueReg, unsigned FalseReg, 501 int &CondCycles, int &TrueCycles, 502 int &FalseCycles) const { 503 // Check register classes. 504 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 505 const TargetRegisterClass *RC = 506 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg)); 507 if (!RC) 508 return false; 509 510 // Expanding cbz/tbz requires an extra cycle of latency on the condition. 511 unsigned ExtraCondLat = Cond.size() != 1; 512 513 // GPRs are handled by csel. 514 // FIXME: Fold in x+1, -x, and ~x when applicable. 515 if (AArch64::GPR64allRegClass.hasSubClassEq(RC) || 516 AArch64::GPR32allRegClass.hasSubClassEq(RC)) { 517 // Single-cycle csel, csinc, csinv, and csneg. 518 CondCycles = 1 + ExtraCondLat; 519 TrueCycles = FalseCycles = 1; 520 if (canFoldIntoCSel(MRI, TrueReg)) 521 TrueCycles = 0; 522 else if (canFoldIntoCSel(MRI, FalseReg)) 523 FalseCycles = 0; 524 return true; 525 } 526 527 // Scalar floating point is handled by fcsel. 528 // FIXME: Form fabs, fmin, and fmax when applicable. 529 if (AArch64::FPR64RegClass.hasSubClassEq(RC) || 530 AArch64::FPR32RegClass.hasSubClassEq(RC)) { 531 CondCycles = 5 + ExtraCondLat; 532 TrueCycles = FalseCycles = 2; 533 return true; 534 } 535 536 // Can't do vectors. 537 return false; 538 } 539 540 void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB, 541 MachineBasicBlock::iterator I, 542 const DebugLoc &DL, unsigned DstReg, 543 ArrayRef<MachineOperand> Cond, 544 unsigned TrueReg, unsigned FalseReg) const { 545 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 546 547 // Parse the condition code, see parseCondBranch() above. 548 AArch64CC::CondCode CC; 549 switch (Cond.size()) { 550 default: 551 llvm_unreachable("Unknown condition opcode in Cond"); 552 case 1: // b.cc 553 CC = AArch64CC::CondCode(Cond[0].getImm()); 554 break; 555 case 3: { // cbz/cbnz 556 // We must insert a compare against 0. 557 bool Is64Bit; 558 switch (Cond[1].getImm()) { 559 default: 560 llvm_unreachable("Unknown branch opcode in Cond"); 561 case AArch64::CBZW: 562 Is64Bit = false; 563 CC = AArch64CC::EQ; 564 break; 565 case AArch64::CBZX: 566 Is64Bit = true; 567 CC = AArch64CC::EQ; 568 break; 569 case AArch64::CBNZW: 570 Is64Bit = false; 571 CC = AArch64CC::NE; 572 break; 573 case AArch64::CBNZX: 574 Is64Bit = true; 575 CC = AArch64CC::NE; 576 break; 577 } 578 unsigned SrcReg = Cond[2].getReg(); 579 if (Is64Bit) { 580 // cmp reg, #0 is actually subs xzr, reg, #0. 581 MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass); 582 BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR) 583 .addReg(SrcReg) 584 .addImm(0) 585 .addImm(0); 586 } else { 587 MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass); 588 BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR) 589 .addReg(SrcReg) 590 .addImm(0) 591 .addImm(0); 592 } 593 break; 594 } 595 case 4: { // tbz/tbnz 596 // We must insert a tst instruction. 597 switch (Cond[1].getImm()) { 598 default: 599 llvm_unreachable("Unknown branch opcode in Cond"); 600 case AArch64::TBZW: 601 case AArch64::TBZX: 602 CC = AArch64CC::EQ; 603 break; 604 case AArch64::TBNZW: 605 case AArch64::TBNZX: 606 CC = AArch64CC::NE; 607 break; 608 } 609 // cmp reg, #foo is actually ands xzr, reg, #1<<foo. 610 if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW) 611 BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR) 612 .addReg(Cond[2].getReg()) 613 .addImm( 614 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32)); 615 else 616 BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR) 617 .addReg(Cond[2].getReg()) 618 .addImm( 619 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64)); 620 break; 621 } 622 } 623 624 unsigned Opc = 0; 625 const TargetRegisterClass *RC = nullptr; 626 bool TryFold = false; 627 if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) { 628 RC = &AArch64::GPR64RegClass; 629 Opc = AArch64::CSELXr; 630 TryFold = true; 631 } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) { 632 RC = &AArch64::GPR32RegClass; 633 Opc = AArch64::CSELWr; 634 TryFold = true; 635 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) { 636 RC = &AArch64::FPR64RegClass; 637 Opc = AArch64::FCSELDrrr; 638 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) { 639 RC = &AArch64::FPR32RegClass; 640 Opc = AArch64::FCSELSrrr; 641 } 642 assert(RC && "Unsupported regclass"); 643 644 // Try folding simple instructions into the csel. 645 if (TryFold) { 646 unsigned NewVReg = 0; 647 unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg); 648 if (FoldedOpc) { 649 // The folded opcodes csinc, csinc and csneg apply the operation to 650 // FalseReg, so we need to invert the condition. 651 CC = AArch64CC::getInvertedCondCode(CC); 652 TrueReg = FalseReg; 653 } else 654 FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg); 655 656 // Fold the operation. Leave any dead instructions for DCE to clean up. 657 if (FoldedOpc) { 658 FalseReg = NewVReg; 659 Opc = FoldedOpc; 660 // The extends the live range of NewVReg. 661 MRI.clearKillFlags(NewVReg); 662 } 663 } 664 665 // Pull all virtual register into the appropriate class. 666 MRI.constrainRegClass(TrueReg, RC); 667 MRI.constrainRegClass(FalseReg, RC); 668 669 // Insert the csel. 670 BuildMI(MBB, I, DL, get(Opc), DstReg) 671 .addReg(TrueReg) 672 .addReg(FalseReg) 673 .addImm(CC); 674 } 675 676 /// Returns true if a MOVi32imm or MOVi64imm can be expanded to an ORRxx. 677 static bool canBeExpandedToORR(const MachineInstr &MI, unsigned BitSize) { 678 uint64_t Imm = MI.getOperand(1).getImm(); 679 uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize); 680 uint64_t Encoding; 681 return AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding); 682 } 683 684 // FIXME: this implementation should be micro-architecture dependent, so a 685 // micro-architecture target hook should be introduced here in future. 686 bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const { 687 if (!Subtarget.hasCustomCheapAsMoveHandling()) 688 return MI.isAsCheapAsAMove(); 689 690 const unsigned Opcode = MI.getOpcode(); 691 692 // Firstly, check cases gated by features. 693 694 if (Subtarget.hasZeroCycleZeroingFP()) { 695 if (Opcode == AArch64::FMOVH0 || 696 Opcode == AArch64::FMOVS0 || 697 Opcode == AArch64::FMOVD0) 698 return true; 699 } 700 701 if (Subtarget.hasZeroCycleZeroingGP()) { 702 if (Opcode == TargetOpcode::COPY && 703 (MI.getOperand(1).getReg() == AArch64::WZR || 704 MI.getOperand(1).getReg() == AArch64::XZR)) 705 return true; 706 } 707 708 // Secondly, check cases specific to sub-targets. 709 710 if (Subtarget.hasExynosCheapAsMoveHandling()) { 711 if (isExynosCheapAsMove(MI)) 712 return true; 713 714 return MI.isAsCheapAsAMove(); 715 } 716 717 // Finally, check generic cases. 718 719 switch (Opcode) { 720 default: 721 return false; 722 723 // add/sub on register without shift 724 case AArch64::ADDWri: 725 case AArch64::ADDXri: 726 case AArch64::SUBWri: 727 case AArch64::SUBXri: 728 return (MI.getOperand(3).getImm() == 0); 729 730 // logical ops on immediate 731 case AArch64::ANDWri: 732 case AArch64::ANDXri: 733 case AArch64::EORWri: 734 case AArch64::EORXri: 735 case AArch64::ORRWri: 736 case AArch64::ORRXri: 737 return true; 738 739 // logical ops on register without shift 740 case AArch64::ANDWrr: 741 case AArch64::ANDXrr: 742 case AArch64::BICWrr: 743 case AArch64::BICXrr: 744 case AArch64::EONWrr: 745 case AArch64::EONXrr: 746 case AArch64::EORWrr: 747 case AArch64::EORXrr: 748 case AArch64::ORNWrr: 749 case AArch64::ORNXrr: 750 case AArch64::ORRWrr: 751 case AArch64::ORRXrr: 752 return true; 753 754 // If MOVi32imm or MOVi64imm can be expanded into ORRWri or 755 // ORRXri, it is as cheap as MOV 756 case AArch64::MOVi32imm: 757 return canBeExpandedToORR(MI, 32); 758 case AArch64::MOVi64imm: 759 return canBeExpandedToORR(MI, 64); 760 } 761 762 llvm_unreachable("Unknown opcode to check as cheap as a move!"); 763 } 764 765 bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) { 766 switch (MI.getOpcode()) { 767 default: 768 return false; 769 770 case AArch64::ADDWrs: 771 case AArch64::ADDXrs: 772 case AArch64::ADDSWrs: 773 case AArch64::ADDSXrs: { 774 unsigned Imm = MI.getOperand(3).getImm(); 775 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm); 776 if (ShiftVal == 0) 777 return true; 778 return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5; 779 } 780 781 case AArch64::ADDWrx: 782 case AArch64::ADDXrx: 783 case AArch64::ADDXrx64: 784 case AArch64::ADDSWrx: 785 case AArch64::ADDSXrx: 786 case AArch64::ADDSXrx64: { 787 unsigned Imm = MI.getOperand(3).getImm(); 788 switch (AArch64_AM::getArithExtendType(Imm)) { 789 default: 790 return false; 791 case AArch64_AM::UXTB: 792 case AArch64_AM::UXTH: 793 case AArch64_AM::UXTW: 794 case AArch64_AM::UXTX: 795 return AArch64_AM::getArithShiftValue(Imm) <= 4; 796 } 797 } 798 799 case AArch64::SUBWrs: 800 case AArch64::SUBSWrs: { 801 unsigned Imm = MI.getOperand(3).getImm(); 802 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm); 803 return ShiftVal == 0 || 804 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31); 805 } 806 807 case AArch64::SUBXrs: 808 case AArch64::SUBSXrs: { 809 unsigned Imm = MI.getOperand(3).getImm(); 810 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm); 811 return ShiftVal == 0 || 812 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63); 813 } 814 815 case AArch64::SUBWrx: 816 case AArch64::SUBXrx: 817 case AArch64::SUBXrx64: 818 case AArch64::SUBSWrx: 819 case AArch64::SUBSXrx: 820 case AArch64::SUBSXrx64: { 821 unsigned Imm = MI.getOperand(3).getImm(); 822 switch (AArch64_AM::getArithExtendType(Imm)) { 823 default: 824 return false; 825 case AArch64_AM::UXTB: 826 case AArch64_AM::UXTH: 827 case AArch64_AM::UXTW: 828 case AArch64_AM::UXTX: 829 return AArch64_AM::getArithShiftValue(Imm) == 0; 830 } 831 } 832 833 case AArch64::LDRBBroW: 834 case AArch64::LDRBBroX: 835 case AArch64::LDRBroW: 836 case AArch64::LDRBroX: 837 case AArch64::LDRDroW: 838 case AArch64::LDRDroX: 839 case AArch64::LDRHHroW: 840 case AArch64::LDRHHroX: 841 case AArch64::LDRHroW: 842 case AArch64::LDRHroX: 843 case AArch64::LDRQroW: 844 case AArch64::LDRQroX: 845 case AArch64::LDRSBWroW: 846 case AArch64::LDRSBWroX: 847 case AArch64::LDRSBXroW: 848 case AArch64::LDRSBXroX: 849 case AArch64::LDRSHWroW: 850 case AArch64::LDRSHWroX: 851 case AArch64::LDRSHXroW: 852 case AArch64::LDRSHXroX: 853 case AArch64::LDRSWroW: 854 case AArch64::LDRSWroX: 855 case AArch64::LDRSroW: 856 case AArch64::LDRSroX: 857 case AArch64::LDRWroW: 858 case AArch64::LDRWroX: 859 case AArch64::LDRXroW: 860 case AArch64::LDRXroX: 861 case AArch64::PRFMroW: 862 case AArch64::PRFMroX: 863 case AArch64::STRBBroW: 864 case AArch64::STRBBroX: 865 case AArch64::STRBroW: 866 case AArch64::STRBroX: 867 case AArch64::STRDroW: 868 case AArch64::STRDroX: 869 case AArch64::STRHHroW: 870 case AArch64::STRHHroX: 871 case AArch64::STRHroW: 872 case AArch64::STRHroX: 873 case AArch64::STRQroW: 874 case AArch64::STRQroX: 875 case AArch64::STRSroW: 876 case AArch64::STRSroX: 877 case AArch64::STRWroW: 878 case AArch64::STRWroX: 879 case AArch64::STRXroW: 880 case AArch64::STRXroX: { 881 unsigned IsSigned = MI.getOperand(3).getImm(); 882 return !IsSigned; 883 } 884 } 885 } 886 887 bool AArch64InstrInfo::isSEHInstruction(const MachineInstr &MI) { 888 unsigned Opc = MI.getOpcode(); 889 switch (Opc) { 890 default: 891 return false; 892 case AArch64::SEH_StackAlloc: 893 case AArch64::SEH_SaveFPLR: 894 case AArch64::SEH_SaveFPLR_X: 895 case AArch64::SEH_SaveReg: 896 case AArch64::SEH_SaveReg_X: 897 case AArch64::SEH_SaveRegP: 898 case AArch64::SEH_SaveRegP_X: 899 case AArch64::SEH_SaveFReg: 900 case AArch64::SEH_SaveFReg_X: 901 case AArch64::SEH_SaveFRegP: 902 case AArch64::SEH_SaveFRegP_X: 903 case AArch64::SEH_SetFP: 904 case AArch64::SEH_AddFP: 905 case AArch64::SEH_Nop: 906 case AArch64::SEH_PrologEnd: 907 case AArch64::SEH_EpilogStart: 908 case AArch64::SEH_EpilogEnd: 909 return true; 910 } 911 } 912 913 bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI, 914 unsigned &SrcReg, unsigned &DstReg, 915 unsigned &SubIdx) const { 916 switch (MI.getOpcode()) { 917 default: 918 return false; 919 case AArch64::SBFMXri: // aka sxtw 920 case AArch64::UBFMXri: // aka uxtw 921 // Check for the 32 -> 64 bit extension case, these instructions can do 922 // much more. 923 if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31) 924 return false; 925 // This is a signed or unsigned 32 -> 64 bit extension. 926 SrcReg = MI.getOperand(1).getReg(); 927 DstReg = MI.getOperand(0).getReg(); 928 SubIdx = AArch64::sub_32; 929 return true; 930 } 931 } 932 933 bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint( 934 const MachineInstr &MIa, const MachineInstr &MIb, AliasAnalysis *AA) const { 935 const TargetRegisterInfo *TRI = &getRegisterInfo(); 936 const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr; 937 int64_t OffsetA = 0, OffsetB = 0; 938 unsigned WidthA = 0, WidthB = 0; 939 940 assert(MIa.mayLoadOrStore() && "MIa must be a load or store."); 941 assert(MIb.mayLoadOrStore() && "MIb must be a load or store."); 942 943 if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() || 944 MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef()) 945 return false; 946 947 // Retrieve the base, offset from the base and width. Width 948 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If 949 // base are identical, and the offset of a lower memory access + 950 // the width doesn't overlap the offset of a higher memory access, 951 // then the memory accesses are different. 952 if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) && 953 getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) { 954 if (BaseOpA->isIdenticalTo(*BaseOpB)) { 955 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB; 956 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA; 957 int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB; 958 if (LowOffset + LowWidth <= HighOffset) 959 return true; 960 } 961 } 962 return false; 963 } 964 965 bool AArch64InstrInfo::isSchedulingBoundary(const MachineInstr &MI, 966 const MachineBasicBlock *MBB, 967 const MachineFunction &MF) const { 968 if (TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF)) 969 return true; 970 switch (MI.getOpcode()) { 971 case AArch64::HINT: 972 // CSDB hints are scheduling barriers. 973 if (MI.getOperand(0).getImm() == 0x14) 974 return true; 975 break; 976 case AArch64::DSB: 977 case AArch64::ISB: 978 // DSB and ISB also are scheduling barriers. 979 return true; 980 default:; 981 } 982 return isSEHInstruction(MI); 983 } 984 985 /// analyzeCompare - For a comparison instruction, return the source registers 986 /// in SrcReg and SrcReg2, and the value it compares against in CmpValue. 987 /// Return true if the comparison instruction can be analyzed. 988 bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg, 989 unsigned &SrcReg2, int &CmpMask, 990 int &CmpValue) const { 991 // The first operand can be a frame index where we'd normally expect a 992 // register. 993 assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands"); 994 if (!MI.getOperand(1).isReg()) 995 return false; 996 997 switch (MI.getOpcode()) { 998 default: 999 break; 1000 case AArch64::SUBSWrr: 1001 case AArch64::SUBSWrs: 1002 case AArch64::SUBSWrx: 1003 case AArch64::SUBSXrr: 1004 case AArch64::SUBSXrs: 1005 case AArch64::SUBSXrx: 1006 case AArch64::ADDSWrr: 1007 case AArch64::ADDSWrs: 1008 case AArch64::ADDSWrx: 1009 case AArch64::ADDSXrr: 1010 case AArch64::ADDSXrs: 1011 case AArch64::ADDSXrx: 1012 // Replace SUBSWrr with SUBWrr if NZCV is not used. 1013 SrcReg = MI.getOperand(1).getReg(); 1014 SrcReg2 = MI.getOperand(2).getReg(); 1015 CmpMask = ~0; 1016 CmpValue = 0; 1017 return true; 1018 case AArch64::SUBSWri: 1019 case AArch64::ADDSWri: 1020 case AArch64::SUBSXri: 1021 case AArch64::ADDSXri: 1022 SrcReg = MI.getOperand(1).getReg(); 1023 SrcReg2 = 0; 1024 CmpMask = ~0; 1025 // FIXME: In order to convert CmpValue to 0 or 1 1026 CmpValue = MI.getOperand(2).getImm() != 0; 1027 return true; 1028 case AArch64::ANDSWri: 1029 case AArch64::ANDSXri: 1030 // ANDS does not use the same encoding scheme as the others xxxS 1031 // instructions. 1032 SrcReg = MI.getOperand(1).getReg(); 1033 SrcReg2 = 0; 1034 CmpMask = ~0; 1035 // FIXME:The return val type of decodeLogicalImmediate is uint64_t, 1036 // while the type of CmpValue is int. When converting uint64_t to int, 1037 // the high 32 bits of uint64_t will be lost. 1038 // In fact it causes a bug in spec2006-483.xalancbmk 1039 // CmpValue is only used to compare with zero in OptimizeCompareInstr 1040 CmpValue = AArch64_AM::decodeLogicalImmediate( 1041 MI.getOperand(2).getImm(), 1042 MI.getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0; 1043 return true; 1044 } 1045 1046 return false; 1047 } 1048 1049 static bool UpdateOperandRegClass(MachineInstr &Instr) { 1050 MachineBasicBlock *MBB = Instr.getParent(); 1051 assert(MBB && "Can't get MachineBasicBlock here"); 1052 MachineFunction *MF = MBB->getParent(); 1053 assert(MF && "Can't get MachineFunction here"); 1054 const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); 1055 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); 1056 MachineRegisterInfo *MRI = &MF->getRegInfo(); 1057 1058 for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx; 1059 ++OpIdx) { 1060 MachineOperand &MO = Instr.getOperand(OpIdx); 1061 const TargetRegisterClass *OpRegCstraints = 1062 Instr.getRegClassConstraint(OpIdx, TII, TRI); 1063 1064 // If there's no constraint, there's nothing to do. 1065 if (!OpRegCstraints) 1066 continue; 1067 // If the operand is a frame index, there's nothing to do here. 1068 // A frame index operand will resolve correctly during PEI. 1069 if (MO.isFI()) 1070 continue; 1071 1072 assert(MO.isReg() && 1073 "Operand has register constraints without being a register!"); 1074 1075 unsigned Reg = MO.getReg(); 1076 if (TargetRegisterInfo::isPhysicalRegister(Reg)) { 1077 if (!OpRegCstraints->contains(Reg)) 1078 return false; 1079 } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) && 1080 !MRI->constrainRegClass(Reg, OpRegCstraints)) 1081 return false; 1082 } 1083 1084 return true; 1085 } 1086 1087 /// Return the opcode that does not set flags when possible - otherwise 1088 /// return the original opcode. The caller is responsible to do the actual 1089 /// substitution and legality checking. 1090 static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI) { 1091 // Don't convert all compare instructions, because for some the zero register 1092 // encoding becomes the sp register. 1093 bool MIDefinesZeroReg = false; 1094 if (MI.definesRegister(AArch64::WZR) || MI.definesRegister(AArch64::XZR)) 1095 MIDefinesZeroReg = true; 1096 1097 switch (MI.getOpcode()) { 1098 default: 1099 return MI.getOpcode(); 1100 case AArch64::ADDSWrr: 1101 return AArch64::ADDWrr; 1102 case AArch64::ADDSWri: 1103 return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri; 1104 case AArch64::ADDSWrs: 1105 return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs; 1106 case AArch64::ADDSWrx: 1107 return AArch64::ADDWrx; 1108 case AArch64::ADDSXrr: 1109 return AArch64::ADDXrr; 1110 case AArch64::ADDSXri: 1111 return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri; 1112 case AArch64::ADDSXrs: 1113 return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs; 1114 case AArch64::ADDSXrx: 1115 return AArch64::ADDXrx; 1116 case AArch64::SUBSWrr: 1117 return AArch64::SUBWrr; 1118 case AArch64::SUBSWri: 1119 return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri; 1120 case AArch64::SUBSWrs: 1121 return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs; 1122 case AArch64::SUBSWrx: 1123 return AArch64::SUBWrx; 1124 case AArch64::SUBSXrr: 1125 return AArch64::SUBXrr; 1126 case AArch64::SUBSXri: 1127 return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri; 1128 case AArch64::SUBSXrs: 1129 return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs; 1130 case AArch64::SUBSXrx: 1131 return AArch64::SUBXrx; 1132 } 1133 } 1134 1135 enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 }; 1136 1137 /// True when condition flags are accessed (either by writing or reading) 1138 /// on the instruction trace starting at From and ending at To. 1139 /// 1140 /// Note: If From and To are from different blocks it's assumed CC are accessed 1141 /// on the path. 1142 static bool areCFlagsAccessedBetweenInstrs( 1143 MachineBasicBlock::iterator From, MachineBasicBlock::iterator To, 1144 const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) { 1145 // Early exit if To is at the beginning of the BB. 1146 if (To == To->getParent()->begin()) 1147 return true; 1148 1149 // Check whether the instructions are in the same basic block 1150 // If not, assume the condition flags might get modified somewhere. 1151 if (To->getParent() != From->getParent()) 1152 return true; 1153 1154 // From must be above To. 1155 assert(std::find_if(++To.getReverse(), To->getParent()->rend(), 1156 [From](MachineInstr &MI) { 1157 return MI.getIterator() == From; 1158 }) != To->getParent()->rend()); 1159 1160 // We iterate backward starting \p To until we hit \p From. 1161 for (--To; To != From; --To) { 1162 const MachineInstr &Instr = *To; 1163 1164 if (((AccessToCheck & AK_Write) && 1165 Instr.modifiesRegister(AArch64::NZCV, TRI)) || 1166 ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI))) 1167 return true; 1168 } 1169 return false; 1170 } 1171 1172 /// Try to optimize a compare instruction. A compare instruction is an 1173 /// instruction which produces AArch64::NZCV. It can be truly compare 1174 /// instruction 1175 /// when there are no uses of its destination register. 1176 /// 1177 /// The following steps are tried in order: 1178 /// 1. Convert CmpInstr into an unconditional version. 1179 /// 2. Remove CmpInstr if above there is an instruction producing a needed 1180 /// condition code or an instruction which can be converted into such an 1181 /// instruction. 1182 /// Only comparison with zero is supported. 1183 bool AArch64InstrInfo::optimizeCompareInstr( 1184 MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask, 1185 int CmpValue, const MachineRegisterInfo *MRI) const { 1186 assert(CmpInstr.getParent()); 1187 assert(MRI); 1188 1189 // Replace SUBSWrr with SUBWrr if NZCV is not used. 1190 int DeadNZCVIdx = CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, true); 1191 if (DeadNZCVIdx != -1) { 1192 if (CmpInstr.definesRegister(AArch64::WZR) || 1193 CmpInstr.definesRegister(AArch64::XZR)) { 1194 CmpInstr.eraseFromParent(); 1195 return true; 1196 } 1197 unsigned Opc = CmpInstr.getOpcode(); 1198 unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr); 1199 if (NewOpc == Opc) 1200 return false; 1201 const MCInstrDesc &MCID = get(NewOpc); 1202 CmpInstr.setDesc(MCID); 1203 CmpInstr.RemoveOperand(DeadNZCVIdx); 1204 bool succeeded = UpdateOperandRegClass(CmpInstr); 1205 (void)succeeded; 1206 assert(succeeded && "Some operands reg class are incompatible!"); 1207 return true; 1208 } 1209 1210 // Continue only if we have a "ri" where immediate is zero. 1211 // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare 1212 // function. 1213 assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!"); 1214 if (CmpValue != 0 || SrcReg2 != 0) 1215 return false; 1216 1217 // CmpInstr is a Compare instruction if destination register is not used. 1218 if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg())) 1219 return false; 1220 1221 return substituteCmpToZero(CmpInstr, SrcReg, MRI); 1222 } 1223 1224 /// Get opcode of S version of Instr. 1225 /// If Instr is S version its opcode is returned. 1226 /// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version 1227 /// or we are not interested in it. 1228 static unsigned sForm(MachineInstr &Instr) { 1229 switch (Instr.getOpcode()) { 1230 default: 1231 return AArch64::INSTRUCTION_LIST_END; 1232 1233 case AArch64::ADDSWrr: 1234 case AArch64::ADDSWri: 1235 case AArch64::ADDSXrr: 1236 case AArch64::ADDSXri: 1237 case AArch64::SUBSWrr: 1238 case AArch64::SUBSWri: 1239 case AArch64::SUBSXrr: 1240 case AArch64::SUBSXri: 1241 return Instr.getOpcode(); 1242 1243 case AArch64::ADDWrr: 1244 return AArch64::ADDSWrr; 1245 case AArch64::ADDWri: 1246 return AArch64::ADDSWri; 1247 case AArch64::ADDXrr: 1248 return AArch64::ADDSXrr; 1249 case AArch64::ADDXri: 1250 return AArch64::ADDSXri; 1251 case AArch64::ADCWr: 1252 return AArch64::ADCSWr; 1253 case AArch64::ADCXr: 1254 return AArch64::ADCSXr; 1255 case AArch64::SUBWrr: 1256 return AArch64::SUBSWrr; 1257 case AArch64::SUBWri: 1258 return AArch64::SUBSWri; 1259 case AArch64::SUBXrr: 1260 return AArch64::SUBSXrr; 1261 case AArch64::SUBXri: 1262 return AArch64::SUBSXri; 1263 case AArch64::SBCWr: 1264 return AArch64::SBCSWr; 1265 case AArch64::SBCXr: 1266 return AArch64::SBCSXr; 1267 case AArch64::ANDWri: 1268 return AArch64::ANDSWri; 1269 case AArch64::ANDXri: 1270 return AArch64::ANDSXri; 1271 } 1272 } 1273 1274 /// Check if AArch64::NZCV should be alive in successors of MBB. 1275 static bool areCFlagsAliveInSuccessors(MachineBasicBlock *MBB) { 1276 for (auto *BB : MBB->successors()) 1277 if (BB->isLiveIn(AArch64::NZCV)) 1278 return true; 1279 return false; 1280 } 1281 1282 namespace { 1283 1284 struct UsedNZCV { 1285 bool N = false; 1286 bool Z = false; 1287 bool C = false; 1288 bool V = false; 1289 1290 UsedNZCV() = default; 1291 1292 UsedNZCV &operator|=(const UsedNZCV &UsedFlags) { 1293 this->N |= UsedFlags.N; 1294 this->Z |= UsedFlags.Z; 1295 this->C |= UsedFlags.C; 1296 this->V |= UsedFlags.V; 1297 return *this; 1298 } 1299 }; 1300 1301 } // end anonymous namespace 1302 1303 /// Find a condition code used by the instruction. 1304 /// Returns AArch64CC::Invalid if either the instruction does not use condition 1305 /// codes or we don't optimize CmpInstr in the presence of such instructions. 1306 static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr) { 1307 switch (Instr.getOpcode()) { 1308 default: 1309 return AArch64CC::Invalid; 1310 1311 case AArch64::Bcc: { 1312 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV); 1313 assert(Idx >= 2); 1314 return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 2).getImm()); 1315 } 1316 1317 case AArch64::CSINVWr: 1318 case AArch64::CSINVXr: 1319 case AArch64::CSINCWr: 1320 case AArch64::CSINCXr: 1321 case AArch64::CSELWr: 1322 case AArch64::CSELXr: 1323 case AArch64::CSNEGWr: 1324 case AArch64::CSNEGXr: 1325 case AArch64::FCSELSrrr: 1326 case AArch64::FCSELDrrr: { 1327 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV); 1328 assert(Idx >= 1); 1329 return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 1).getImm()); 1330 } 1331 } 1332 } 1333 1334 static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) { 1335 assert(CC != AArch64CC::Invalid); 1336 UsedNZCV UsedFlags; 1337 switch (CC) { 1338 default: 1339 break; 1340 1341 case AArch64CC::EQ: // Z set 1342 case AArch64CC::NE: // Z clear 1343 UsedFlags.Z = true; 1344 break; 1345 1346 case AArch64CC::HI: // Z clear and C set 1347 case AArch64CC::LS: // Z set or C clear 1348 UsedFlags.Z = true; 1349 LLVM_FALLTHROUGH; 1350 case AArch64CC::HS: // C set 1351 case AArch64CC::LO: // C clear 1352 UsedFlags.C = true; 1353 break; 1354 1355 case AArch64CC::MI: // N set 1356 case AArch64CC::PL: // N clear 1357 UsedFlags.N = true; 1358 break; 1359 1360 case AArch64CC::VS: // V set 1361 case AArch64CC::VC: // V clear 1362 UsedFlags.V = true; 1363 break; 1364 1365 case AArch64CC::GT: // Z clear, N and V the same 1366 case AArch64CC::LE: // Z set, N and V differ 1367 UsedFlags.Z = true; 1368 LLVM_FALLTHROUGH; 1369 case AArch64CC::GE: // N and V the same 1370 case AArch64CC::LT: // N and V differ 1371 UsedFlags.N = true; 1372 UsedFlags.V = true; 1373 break; 1374 } 1375 return UsedFlags; 1376 } 1377 1378 static bool isADDSRegImm(unsigned Opcode) { 1379 return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri; 1380 } 1381 1382 static bool isSUBSRegImm(unsigned Opcode) { 1383 return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri; 1384 } 1385 1386 /// Check if CmpInstr can be substituted by MI. 1387 /// 1388 /// CmpInstr can be substituted: 1389 /// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0' 1390 /// - and, MI and CmpInstr are from the same MachineBB 1391 /// - and, condition flags are not alive in successors of the CmpInstr parent 1392 /// - and, if MI opcode is the S form there must be no defs of flags between 1393 /// MI and CmpInstr 1394 /// or if MI opcode is not the S form there must be neither defs of flags 1395 /// nor uses of flags between MI and CmpInstr. 1396 /// - and C/V flags are not used after CmpInstr 1397 static bool canInstrSubstituteCmpInstr(MachineInstr *MI, MachineInstr *CmpInstr, 1398 const TargetRegisterInfo *TRI) { 1399 assert(MI); 1400 assert(sForm(*MI) != AArch64::INSTRUCTION_LIST_END); 1401 assert(CmpInstr); 1402 1403 const unsigned CmpOpcode = CmpInstr->getOpcode(); 1404 if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode)) 1405 return false; 1406 1407 if (MI->getParent() != CmpInstr->getParent()) 1408 return false; 1409 1410 if (areCFlagsAliveInSuccessors(CmpInstr->getParent())) 1411 return false; 1412 1413 AccessKind AccessToCheck = AK_Write; 1414 if (sForm(*MI) != MI->getOpcode()) 1415 AccessToCheck = AK_All; 1416 if (areCFlagsAccessedBetweenInstrs(MI, CmpInstr, TRI, AccessToCheck)) 1417 return false; 1418 1419 UsedNZCV NZCVUsedAfterCmp; 1420 for (auto I = std::next(CmpInstr->getIterator()), 1421 E = CmpInstr->getParent()->instr_end(); 1422 I != E; ++I) { 1423 const MachineInstr &Instr = *I; 1424 if (Instr.readsRegister(AArch64::NZCV, TRI)) { 1425 AArch64CC::CondCode CC = findCondCodeUsedByInstr(Instr); 1426 if (CC == AArch64CC::Invalid) // Unsupported conditional instruction 1427 return false; 1428 NZCVUsedAfterCmp |= getUsedNZCV(CC); 1429 } 1430 1431 if (Instr.modifiesRegister(AArch64::NZCV, TRI)) 1432 break; 1433 } 1434 1435 return !NZCVUsedAfterCmp.C && !NZCVUsedAfterCmp.V; 1436 } 1437 1438 /// Substitute an instruction comparing to zero with another instruction 1439 /// which produces needed condition flags. 1440 /// 1441 /// Return true on success. 1442 bool AArch64InstrInfo::substituteCmpToZero( 1443 MachineInstr &CmpInstr, unsigned SrcReg, 1444 const MachineRegisterInfo *MRI) const { 1445 assert(MRI); 1446 // Get the unique definition of SrcReg. 1447 MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg); 1448 if (!MI) 1449 return false; 1450 1451 const TargetRegisterInfo *TRI = &getRegisterInfo(); 1452 1453 unsigned NewOpc = sForm(*MI); 1454 if (NewOpc == AArch64::INSTRUCTION_LIST_END) 1455 return false; 1456 1457 if (!canInstrSubstituteCmpInstr(MI, &CmpInstr, TRI)) 1458 return false; 1459 1460 // Update the instruction to set NZCV. 1461 MI->setDesc(get(NewOpc)); 1462 CmpInstr.eraseFromParent(); 1463 bool succeeded = UpdateOperandRegClass(*MI); 1464 (void)succeeded; 1465 assert(succeeded && "Some operands reg class are incompatible!"); 1466 MI->addRegisterDefined(AArch64::NZCV, TRI); 1467 return true; 1468 } 1469 1470 bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { 1471 if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD && 1472 MI.getOpcode() != AArch64::CATCHRET) 1473 return false; 1474 1475 MachineBasicBlock &MBB = *MI.getParent(); 1476 DebugLoc DL = MI.getDebugLoc(); 1477 1478 if (MI.getOpcode() == AArch64::CATCHRET) { 1479 // Skip to the first instruction before the epilog. 1480 const TargetInstrInfo *TII = 1481 MBB.getParent()->getSubtarget().getInstrInfo(); 1482 MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB(); 1483 auto MBBI = MachineBasicBlock::iterator(MI); 1484 MachineBasicBlock::iterator FirstEpilogSEH = std::prev(MBBI); 1485 while (FirstEpilogSEH->getFlag(MachineInstr::FrameDestroy) && 1486 FirstEpilogSEH != MBB.begin()) 1487 FirstEpilogSEH = std::prev(FirstEpilogSEH); 1488 if (FirstEpilogSEH != MBB.begin()) 1489 FirstEpilogSEH = std::next(FirstEpilogSEH); 1490 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADRP)) 1491 .addReg(AArch64::X0, RegState::Define) 1492 .addMBB(TargetMBB); 1493 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADDXri)) 1494 .addReg(AArch64::X0, RegState::Define) 1495 .addReg(AArch64::X0) 1496 .addMBB(TargetMBB) 1497 .addImm(0); 1498 return true; 1499 } 1500 1501 unsigned Reg = MI.getOperand(0).getReg(); 1502 const GlobalValue *GV = 1503 cast<GlobalValue>((*MI.memoperands_begin())->getValue()); 1504 const TargetMachine &TM = MBB.getParent()->getTarget(); 1505 unsigned char OpFlags = Subtarget.ClassifyGlobalReference(GV, TM); 1506 const unsigned char MO_NC = AArch64II::MO_NC; 1507 1508 if ((OpFlags & AArch64II::MO_GOT) != 0) { 1509 BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg) 1510 .addGlobalAddress(GV, 0, OpFlags); 1511 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg) 1512 .addReg(Reg, RegState::Kill) 1513 .addImm(0) 1514 .addMemOperand(*MI.memoperands_begin()); 1515 } else if (TM.getCodeModel() == CodeModel::Large) { 1516 BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg) 1517 .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC) 1518 .addImm(0); 1519 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg) 1520 .addReg(Reg, RegState::Kill) 1521 .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC) 1522 .addImm(16); 1523 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg) 1524 .addReg(Reg, RegState::Kill) 1525 .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC) 1526 .addImm(32); 1527 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg) 1528 .addReg(Reg, RegState::Kill) 1529 .addGlobalAddress(GV, 0, AArch64II::MO_G3) 1530 .addImm(48); 1531 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg) 1532 .addReg(Reg, RegState::Kill) 1533 .addImm(0) 1534 .addMemOperand(*MI.memoperands_begin()); 1535 } else if (TM.getCodeModel() == CodeModel::Tiny) { 1536 BuildMI(MBB, MI, DL, get(AArch64::ADR), Reg) 1537 .addGlobalAddress(GV, 0, OpFlags); 1538 } else { 1539 BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg) 1540 .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE); 1541 unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC; 1542 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg) 1543 .addReg(Reg, RegState::Kill) 1544 .addGlobalAddress(GV, 0, LoFlags) 1545 .addMemOperand(*MI.memoperands_begin()); 1546 } 1547 1548 MBB.erase(MI); 1549 1550 return true; 1551 } 1552 1553 // Return true if this instruction simply sets its single destination register 1554 // to zero. This is equivalent to a register rename of the zero-register. 1555 bool AArch64InstrInfo::isGPRZero(const MachineInstr &MI) { 1556 switch (MI.getOpcode()) { 1557 default: 1558 break; 1559 case AArch64::MOVZWi: 1560 case AArch64::MOVZXi: // movz Rd, #0 (LSL #0) 1561 if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) { 1562 assert(MI.getDesc().getNumOperands() == 3 && 1563 MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands"); 1564 return true; 1565 } 1566 break; 1567 case AArch64::ANDWri: // and Rd, Rzr, #imm 1568 return MI.getOperand(1).getReg() == AArch64::WZR; 1569 case AArch64::ANDXri: 1570 return MI.getOperand(1).getReg() == AArch64::XZR; 1571 case TargetOpcode::COPY: 1572 return MI.getOperand(1).getReg() == AArch64::WZR; 1573 } 1574 return false; 1575 } 1576 1577 // Return true if this instruction simply renames a general register without 1578 // modifying bits. 1579 bool AArch64InstrInfo::isGPRCopy(const MachineInstr &MI) { 1580 switch (MI.getOpcode()) { 1581 default: 1582 break; 1583 case TargetOpcode::COPY: { 1584 // GPR32 copies will by lowered to ORRXrs 1585 unsigned DstReg = MI.getOperand(0).getReg(); 1586 return (AArch64::GPR32RegClass.contains(DstReg) || 1587 AArch64::GPR64RegClass.contains(DstReg)); 1588 } 1589 case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0) 1590 if (MI.getOperand(1).getReg() == AArch64::XZR) { 1591 assert(MI.getDesc().getNumOperands() == 4 && 1592 MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands"); 1593 return true; 1594 } 1595 break; 1596 case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0) 1597 if (MI.getOperand(2).getImm() == 0) { 1598 assert(MI.getDesc().getNumOperands() == 4 && 1599 MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands"); 1600 return true; 1601 } 1602 break; 1603 } 1604 return false; 1605 } 1606 1607 // Return true if this instruction simply renames a general register without 1608 // modifying bits. 1609 bool AArch64InstrInfo::isFPRCopy(const MachineInstr &MI) { 1610 switch (MI.getOpcode()) { 1611 default: 1612 break; 1613 case TargetOpcode::COPY: { 1614 // FPR64 copies will by lowered to ORR.16b 1615 unsigned DstReg = MI.getOperand(0).getReg(); 1616 return (AArch64::FPR64RegClass.contains(DstReg) || 1617 AArch64::FPR128RegClass.contains(DstReg)); 1618 } 1619 case AArch64::ORRv16i8: 1620 if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) { 1621 assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() && 1622 "invalid ORRv16i8 operands"); 1623 return true; 1624 } 1625 break; 1626 } 1627 return false; 1628 } 1629 1630 unsigned AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr &MI, 1631 int &FrameIndex) const { 1632 switch (MI.getOpcode()) { 1633 default: 1634 break; 1635 case AArch64::LDRWui: 1636 case AArch64::LDRXui: 1637 case AArch64::LDRBui: 1638 case AArch64::LDRHui: 1639 case AArch64::LDRSui: 1640 case AArch64::LDRDui: 1641 case AArch64::LDRQui: 1642 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() && 1643 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) { 1644 FrameIndex = MI.getOperand(1).getIndex(); 1645 return MI.getOperand(0).getReg(); 1646 } 1647 break; 1648 } 1649 1650 return 0; 1651 } 1652 1653 unsigned AArch64InstrInfo::isStoreToStackSlot(const MachineInstr &MI, 1654 int &FrameIndex) const { 1655 switch (MI.getOpcode()) { 1656 default: 1657 break; 1658 case AArch64::STRWui: 1659 case AArch64::STRXui: 1660 case AArch64::STRBui: 1661 case AArch64::STRHui: 1662 case AArch64::STRSui: 1663 case AArch64::STRDui: 1664 case AArch64::STRQui: 1665 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() && 1666 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) { 1667 FrameIndex = MI.getOperand(1).getIndex(); 1668 return MI.getOperand(0).getReg(); 1669 } 1670 break; 1671 } 1672 return 0; 1673 } 1674 1675 /// Check all MachineMemOperands for a hint to suppress pairing. 1676 bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr &MI) { 1677 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) { 1678 return MMO->getFlags() & MOSuppressPair; 1679 }); 1680 } 1681 1682 /// Set a flag on the first MachineMemOperand to suppress pairing. 1683 void AArch64InstrInfo::suppressLdStPair(MachineInstr &MI) { 1684 if (MI.memoperands_empty()) 1685 return; 1686 (*MI.memoperands_begin())->setFlags(MOSuppressPair); 1687 } 1688 1689 /// Check all MachineMemOperands for a hint that the load/store is strided. 1690 bool AArch64InstrInfo::isStridedAccess(const MachineInstr &MI) { 1691 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) { 1692 return MMO->getFlags() & MOStridedAccess; 1693 }); 1694 } 1695 1696 bool AArch64InstrInfo::isUnscaledLdSt(unsigned Opc) { 1697 switch (Opc) { 1698 default: 1699 return false; 1700 case AArch64::STURSi: 1701 case AArch64::STURDi: 1702 case AArch64::STURQi: 1703 case AArch64::STURBBi: 1704 case AArch64::STURHHi: 1705 case AArch64::STURWi: 1706 case AArch64::STURXi: 1707 case AArch64::LDURSi: 1708 case AArch64::LDURDi: 1709 case AArch64::LDURQi: 1710 case AArch64::LDURWi: 1711 case AArch64::LDURXi: 1712 case AArch64::LDURSWi: 1713 case AArch64::LDURHHi: 1714 case AArch64::LDURBBi: 1715 case AArch64::LDURSBWi: 1716 case AArch64::LDURSHWi: 1717 return true; 1718 } 1719 } 1720 1721 Optional<unsigned> AArch64InstrInfo::getUnscaledLdSt(unsigned Opc) { 1722 switch (Opc) { 1723 default: return {}; 1724 case AArch64::PRFMui: return AArch64::PRFUMi; 1725 case AArch64::LDRXui: return AArch64::LDURXi; 1726 case AArch64::LDRWui: return AArch64::LDURWi; 1727 case AArch64::LDRBui: return AArch64::LDURBi; 1728 case AArch64::LDRHui: return AArch64::LDURHi; 1729 case AArch64::LDRSui: return AArch64::LDURSi; 1730 case AArch64::LDRDui: return AArch64::LDURDi; 1731 case AArch64::LDRQui: return AArch64::LDURQi; 1732 case AArch64::LDRBBui: return AArch64::LDURBBi; 1733 case AArch64::LDRHHui: return AArch64::LDURHHi; 1734 case AArch64::LDRSBXui: return AArch64::LDURSBXi; 1735 case AArch64::LDRSBWui: return AArch64::LDURSBWi; 1736 case AArch64::LDRSHXui: return AArch64::LDURSHXi; 1737 case AArch64::LDRSHWui: return AArch64::LDURSHWi; 1738 case AArch64::LDRSWui: return AArch64::LDURSWi; 1739 case AArch64::STRXui: return AArch64::STURXi; 1740 case AArch64::STRWui: return AArch64::STURWi; 1741 case AArch64::STRBui: return AArch64::STURBi; 1742 case AArch64::STRHui: return AArch64::STURHi; 1743 case AArch64::STRSui: return AArch64::STURSi; 1744 case AArch64::STRDui: return AArch64::STURDi; 1745 case AArch64::STRQui: return AArch64::STURQi; 1746 case AArch64::STRBBui: return AArch64::STURBBi; 1747 case AArch64::STRHHui: return AArch64::STURHHi; 1748 } 1749 } 1750 1751 unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) { 1752 switch (Opc) { 1753 default: 1754 return 2; 1755 case AArch64::LDPXi: 1756 case AArch64::LDPDi: 1757 case AArch64::STPXi: 1758 case AArch64::STPDi: 1759 case AArch64::LDNPXi: 1760 case AArch64::LDNPDi: 1761 case AArch64::STNPXi: 1762 case AArch64::STNPDi: 1763 case AArch64::LDPQi: 1764 case AArch64::STPQi: 1765 case AArch64::LDNPQi: 1766 case AArch64::STNPQi: 1767 case AArch64::LDPWi: 1768 case AArch64::LDPSi: 1769 case AArch64::STPWi: 1770 case AArch64::STPSi: 1771 case AArch64::LDNPWi: 1772 case AArch64::LDNPSi: 1773 case AArch64::STNPWi: 1774 case AArch64::STNPSi: 1775 case AArch64::LDG: 1776 case AArch64::STGPi: 1777 return 3; 1778 case AArch64::ADDG: 1779 case AArch64::STGOffset: 1780 return 2; 1781 } 1782 } 1783 1784 bool AArch64InstrInfo::isPairableLdStInst(const MachineInstr &MI) { 1785 switch (MI.getOpcode()) { 1786 default: 1787 return false; 1788 // Scaled instructions. 1789 case AArch64::STRSui: 1790 case AArch64::STRDui: 1791 case AArch64::STRQui: 1792 case AArch64::STRXui: 1793 case AArch64::STRWui: 1794 case AArch64::LDRSui: 1795 case AArch64::LDRDui: 1796 case AArch64::LDRQui: 1797 case AArch64::LDRXui: 1798 case AArch64::LDRWui: 1799 case AArch64::LDRSWui: 1800 // Unscaled instructions. 1801 case AArch64::STURSi: 1802 case AArch64::STURDi: 1803 case AArch64::STURQi: 1804 case AArch64::STURWi: 1805 case AArch64::STURXi: 1806 case AArch64::LDURSi: 1807 case AArch64::LDURDi: 1808 case AArch64::LDURQi: 1809 case AArch64::LDURWi: 1810 case AArch64::LDURXi: 1811 case AArch64::LDURSWi: 1812 return true; 1813 } 1814 } 1815 1816 unsigned AArch64InstrInfo::convertToFlagSettingOpc(unsigned Opc, 1817 bool &Is64Bit) { 1818 switch (Opc) { 1819 default: 1820 llvm_unreachable("Opcode has no flag setting equivalent!"); 1821 // 32-bit cases: 1822 case AArch64::ADDWri: 1823 Is64Bit = false; 1824 return AArch64::ADDSWri; 1825 case AArch64::ADDWrr: 1826 Is64Bit = false; 1827 return AArch64::ADDSWrr; 1828 case AArch64::ADDWrs: 1829 Is64Bit = false; 1830 return AArch64::ADDSWrs; 1831 case AArch64::ADDWrx: 1832 Is64Bit = false; 1833 return AArch64::ADDSWrx; 1834 case AArch64::ANDWri: 1835 Is64Bit = false; 1836 return AArch64::ANDSWri; 1837 case AArch64::ANDWrr: 1838 Is64Bit = false; 1839 return AArch64::ANDSWrr; 1840 case AArch64::ANDWrs: 1841 Is64Bit = false; 1842 return AArch64::ANDSWrs; 1843 case AArch64::BICWrr: 1844 Is64Bit = false; 1845 return AArch64::BICSWrr; 1846 case AArch64::BICWrs: 1847 Is64Bit = false; 1848 return AArch64::BICSWrs; 1849 case AArch64::SUBWri: 1850 Is64Bit = false; 1851 return AArch64::SUBSWri; 1852 case AArch64::SUBWrr: 1853 Is64Bit = false; 1854 return AArch64::SUBSWrr; 1855 case AArch64::SUBWrs: 1856 Is64Bit = false; 1857 return AArch64::SUBSWrs; 1858 case AArch64::SUBWrx: 1859 Is64Bit = false; 1860 return AArch64::SUBSWrx; 1861 // 64-bit cases: 1862 case AArch64::ADDXri: 1863 Is64Bit = true; 1864 return AArch64::ADDSXri; 1865 case AArch64::ADDXrr: 1866 Is64Bit = true; 1867 return AArch64::ADDSXrr; 1868 case AArch64::ADDXrs: 1869 Is64Bit = true; 1870 return AArch64::ADDSXrs; 1871 case AArch64::ADDXrx: 1872 Is64Bit = true; 1873 return AArch64::ADDSXrx; 1874 case AArch64::ANDXri: 1875 Is64Bit = true; 1876 return AArch64::ANDSXri; 1877 case AArch64::ANDXrr: 1878 Is64Bit = true; 1879 return AArch64::ANDSXrr; 1880 case AArch64::ANDXrs: 1881 Is64Bit = true; 1882 return AArch64::ANDSXrs; 1883 case AArch64::BICXrr: 1884 Is64Bit = true; 1885 return AArch64::BICSXrr; 1886 case AArch64::BICXrs: 1887 Is64Bit = true; 1888 return AArch64::BICSXrs; 1889 case AArch64::SUBXri: 1890 Is64Bit = true; 1891 return AArch64::SUBSXri; 1892 case AArch64::SUBXrr: 1893 Is64Bit = true; 1894 return AArch64::SUBSXrr; 1895 case AArch64::SUBXrs: 1896 Is64Bit = true; 1897 return AArch64::SUBSXrs; 1898 case AArch64::SUBXrx: 1899 Is64Bit = true; 1900 return AArch64::SUBSXrx; 1901 } 1902 } 1903 1904 // Is this a candidate for ld/st merging or pairing? For example, we don't 1905 // touch volatiles or load/stores that have a hint to avoid pair formation. 1906 bool AArch64InstrInfo::isCandidateToMergeOrPair(const MachineInstr &MI) const { 1907 // If this is a volatile load/store, don't mess with it. 1908 if (MI.hasOrderedMemoryRef()) 1909 return false; 1910 1911 // Make sure this is a reg/fi+imm (as opposed to an address reloc). 1912 assert((MI.getOperand(1).isReg() || MI.getOperand(1).isFI()) && 1913 "Expected a reg or frame index operand."); 1914 if (!MI.getOperand(2).isImm()) 1915 return false; 1916 1917 // Can't merge/pair if the instruction modifies the base register. 1918 // e.g., ldr x0, [x0] 1919 // This case will never occur with an FI base. 1920 if (MI.getOperand(1).isReg()) { 1921 unsigned BaseReg = MI.getOperand(1).getReg(); 1922 const TargetRegisterInfo *TRI = &getRegisterInfo(); 1923 if (MI.modifiesRegister(BaseReg, TRI)) 1924 return false; 1925 } 1926 1927 // Check if this load/store has a hint to avoid pair formation. 1928 // MachineMemOperands hints are set by the AArch64StorePairSuppress pass. 1929 if (isLdStPairSuppressed(MI)) 1930 return false; 1931 1932 // Do not pair any callee-save store/reload instructions in the 1933 // prologue/epilogue if the CFI information encoded the operations as separate 1934 // instructions, as that will cause the size of the actual prologue to mismatch 1935 // with the prologue size recorded in the Windows CFI. 1936 const MCAsmInfo *MAI = MI.getMF()->getTarget().getMCAsmInfo(); 1937 bool NeedsWinCFI = MAI->usesWindowsCFI() && 1938 MI.getMF()->getFunction().needsUnwindTableEntry(); 1939 if (NeedsWinCFI && (MI.getFlag(MachineInstr::FrameSetup) || 1940 MI.getFlag(MachineInstr::FrameDestroy))) 1941 return false; 1942 1943 // On some CPUs quad load/store pairs are slower than two single load/stores. 1944 if (Subtarget.isPaired128Slow()) { 1945 switch (MI.getOpcode()) { 1946 default: 1947 break; 1948 case AArch64::LDURQi: 1949 case AArch64::STURQi: 1950 case AArch64::LDRQui: 1951 case AArch64::STRQui: 1952 return false; 1953 } 1954 } 1955 1956 return true; 1957 } 1958 1959 bool AArch64InstrInfo::getMemOperandWithOffset(const MachineInstr &LdSt, 1960 const MachineOperand *&BaseOp, 1961 int64_t &Offset, 1962 const TargetRegisterInfo *TRI) const { 1963 unsigned Width; 1964 return getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI); 1965 } 1966 1967 bool AArch64InstrInfo::getMemOperandWithOffsetWidth( 1968 const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset, 1969 unsigned &Width, const TargetRegisterInfo *TRI) const { 1970 assert(LdSt.mayLoadOrStore() && "Expected a memory operation."); 1971 // Handle only loads/stores with base register followed by immediate offset. 1972 if (LdSt.getNumExplicitOperands() == 3) { 1973 // Non-paired instruction (e.g., ldr x1, [x0, #8]). 1974 if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) || 1975 !LdSt.getOperand(2).isImm()) 1976 return false; 1977 } else if (LdSt.getNumExplicitOperands() == 4) { 1978 // Paired instruction (e.g., ldp x1, x2, [x0, #8]). 1979 if (!LdSt.getOperand(1).isReg() || 1980 (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()) || 1981 !LdSt.getOperand(3).isImm()) 1982 return false; 1983 } else 1984 return false; 1985 1986 // Get the scaling factor for the instruction and set the width for the 1987 // instruction. 1988 unsigned Scale = 0; 1989 int64_t Dummy1, Dummy2; 1990 1991 // If this returns false, then it's an instruction we don't want to handle. 1992 if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2)) 1993 return false; 1994 1995 // Compute the offset. Offset is calculated as the immediate operand 1996 // multiplied by the scaling factor. Unscaled instructions have scaling factor 1997 // set to 1. 1998 if (LdSt.getNumExplicitOperands() == 3) { 1999 BaseOp = &LdSt.getOperand(1); 2000 Offset = LdSt.getOperand(2).getImm() * Scale; 2001 } else { 2002 assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands"); 2003 BaseOp = &LdSt.getOperand(2); 2004 Offset = LdSt.getOperand(3).getImm() * Scale; 2005 } 2006 2007 assert((BaseOp->isReg() || BaseOp->isFI()) && 2008 "getMemOperandWithOffset only supports base " 2009 "operands of type register or frame index."); 2010 2011 return true; 2012 } 2013 2014 MachineOperand & 2015 AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const { 2016 assert(LdSt.mayLoadOrStore() && "Expected a memory operation."); 2017 MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1); 2018 assert(OfsOp.isImm() && "Offset operand wasn't immediate."); 2019 return OfsOp; 2020 } 2021 2022 bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale, 2023 unsigned &Width, int64_t &MinOffset, 2024 int64_t &MaxOffset) { 2025 switch (Opcode) { 2026 // Not a memory operation or something we want to handle. 2027 default: 2028 Scale = Width = 0; 2029 MinOffset = MaxOffset = 0; 2030 return false; 2031 case AArch64::STRWpost: 2032 case AArch64::LDRWpost: 2033 Width = 32; 2034 Scale = 4; 2035 MinOffset = -256; 2036 MaxOffset = 255; 2037 break; 2038 case AArch64::LDURQi: 2039 case AArch64::STURQi: 2040 Width = 16; 2041 Scale = 1; 2042 MinOffset = -256; 2043 MaxOffset = 255; 2044 break; 2045 case AArch64::PRFUMi: 2046 case AArch64::LDURXi: 2047 case AArch64::LDURDi: 2048 case AArch64::STURXi: 2049 case AArch64::STURDi: 2050 Width = 8; 2051 Scale = 1; 2052 MinOffset = -256; 2053 MaxOffset = 255; 2054 break; 2055 case AArch64::LDURWi: 2056 case AArch64::LDURSi: 2057 case AArch64::LDURSWi: 2058 case AArch64::STURWi: 2059 case AArch64::STURSi: 2060 Width = 4; 2061 Scale = 1; 2062 MinOffset = -256; 2063 MaxOffset = 255; 2064 break; 2065 case AArch64::LDURHi: 2066 case AArch64::LDURHHi: 2067 case AArch64::LDURSHXi: 2068 case AArch64::LDURSHWi: 2069 case AArch64::STURHi: 2070 case AArch64::STURHHi: 2071 Width = 2; 2072 Scale = 1; 2073 MinOffset = -256; 2074 MaxOffset = 255; 2075 break; 2076 case AArch64::LDURBi: 2077 case AArch64::LDURBBi: 2078 case AArch64::LDURSBXi: 2079 case AArch64::LDURSBWi: 2080 case AArch64::STURBi: 2081 case AArch64::STURBBi: 2082 Width = 1; 2083 Scale = 1; 2084 MinOffset = -256; 2085 MaxOffset = 255; 2086 break; 2087 case AArch64::LDPQi: 2088 case AArch64::LDNPQi: 2089 case AArch64::STPQi: 2090 case AArch64::STNPQi: 2091 Scale = 16; 2092 Width = 32; 2093 MinOffset = -64; 2094 MaxOffset = 63; 2095 break; 2096 case AArch64::LDRQui: 2097 case AArch64::STRQui: 2098 Scale = Width = 16; 2099 MinOffset = 0; 2100 MaxOffset = 4095; 2101 break; 2102 case AArch64::LDPXi: 2103 case AArch64::LDPDi: 2104 case AArch64::LDNPXi: 2105 case AArch64::LDNPDi: 2106 case AArch64::STPXi: 2107 case AArch64::STPDi: 2108 case AArch64::STNPXi: 2109 case AArch64::STNPDi: 2110 Scale = 8; 2111 Width = 16; 2112 MinOffset = -64; 2113 MaxOffset = 63; 2114 break; 2115 case AArch64::PRFMui: 2116 case AArch64::LDRXui: 2117 case AArch64::LDRDui: 2118 case AArch64::STRXui: 2119 case AArch64::STRDui: 2120 Scale = Width = 8; 2121 MinOffset = 0; 2122 MaxOffset = 4095; 2123 break; 2124 case AArch64::LDPWi: 2125 case AArch64::LDPSi: 2126 case AArch64::LDNPWi: 2127 case AArch64::LDNPSi: 2128 case AArch64::STPWi: 2129 case AArch64::STPSi: 2130 case AArch64::STNPWi: 2131 case AArch64::STNPSi: 2132 Scale = 4; 2133 Width = 8; 2134 MinOffset = -64; 2135 MaxOffset = 63; 2136 break; 2137 case AArch64::LDRWui: 2138 case AArch64::LDRSui: 2139 case AArch64::LDRSWui: 2140 case AArch64::STRWui: 2141 case AArch64::STRSui: 2142 Scale = Width = 4; 2143 MinOffset = 0; 2144 MaxOffset = 4095; 2145 break; 2146 case AArch64::LDRHui: 2147 case AArch64::LDRHHui: 2148 case AArch64::LDRSHWui: 2149 case AArch64::LDRSHXui: 2150 case AArch64::STRHui: 2151 case AArch64::STRHHui: 2152 Scale = Width = 2; 2153 MinOffset = 0; 2154 MaxOffset = 4095; 2155 break; 2156 case AArch64::LDRBui: 2157 case AArch64::LDRBBui: 2158 case AArch64::LDRSBWui: 2159 case AArch64::LDRSBXui: 2160 case AArch64::STRBui: 2161 case AArch64::STRBBui: 2162 Scale = Width = 1; 2163 MinOffset = 0; 2164 MaxOffset = 4095; 2165 break; 2166 case AArch64::ADDG: 2167 case AArch64::TAGPstack: 2168 Scale = 16; 2169 Width = 0; 2170 MinOffset = 0; 2171 MaxOffset = 63; 2172 break; 2173 case AArch64::LDG: 2174 case AArch64::STGOffset: 2175 case AArch64::STZGOffset: 2176 Scale = Width = 16; 2177 MinOffset = -256; 2178 MaxOffset = 255; 2179 break; 2180 case AArch64::ST2GOffset: 2181 case AArch64::STZ2GOffset: 2182 Scale = 16; 2183 Width = 32; 2184 MinOffset = -256; 2185 MaxOffset = 255; 2186 break; 2187 case AArch64::STGPi: 2188 Scale = Width = 16; 2189 MinOffset = -64; 2190 MaxOffset = 63; 2191 break; 2192 } 2193 2194 return true; 2195 } 2196 2197 static unsigned getOffsetStride(unsigned Opc) { 2198 switch (Opc) { 2199 default: 2200 return 0; 2201 case AArch64::LDURQi: 2202 case AArch64::STURQi: 2203 return 16; 2204 case AArch64::LDURXi: 2205 case AArch64::LDURDi: 2206 case AArch64::STURXi: 2207 case AArch64::STURDi: 2208 return 8; 2209 case AArch64::LDURWi: 2210 case AArch64::LDURSi: 2211 case AArch64::LDURSWi: 2212 case AArch64::STURWi: 2213 case AArch64::STURSi: 2214 return 4; 2215 } 2216 } 2217 2218 // Scale the unscaled offsets. Returns false if the unscaled offset can't be 2219 // scaled. 2220 static bool scaleOffset(unsigned Opc, int64_t &Offset) { 2221 unsigned OffsetStride = getOffsetStride(Opc); 2222 if (OffsetStride == 0) 2223 return false; 2224 // If the byte-offset isn't a multiple of the stride, we can't scale this 2225 // offset. 2226 if (Offset % OffsetStride != 0) 2227 return false; 2228 2229 // Convert the byte-offset used by unscaled into an "element" offset used 2230 // by the scaled pair load/store instructions. 2231 Offset /= OffsetStride; 2232 return true; 2233 } 2234 2235 // Unscale the scaled offsets. Returns false if the scaled offset can't be 2236 // unscaled. 2237 static bool unscaleOffset(unsigned Opc, int64_t &Offset) { 2238 unsigned OffsetStride = getOffsetStride(Opc); 2239 if (OffsetStride == 0) 2240 return false; 2241 2242 // Convert the "element" offset used by scaled pair load/store instructions 2243 // into the byte-offset used by unscaled. 2244 Offset *= OffsetStride; 2245 return true; 2246 } 2247 2248 static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) { 2249 if (FirstOpc == SecondOpc) 2250 return true; 2251 // We can also pair sign-ext and zero-ext instructions. 2252 switch (FirstOpc) { 2253 default: 2254 return false; 2255 case AArch64::LDRWui: 2256 case AArch64::LDURWi: 2257 return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi; 2258 case AArch64::LDRSWui: 2259 case AArch64::LDURSWi: 2260 return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi; 2261 } 2262 // These instructions can't be paired based on their opcodes. 2263 return false; 2264 } 2265 2266 static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1, 2267 int64_t Offset1, unsigned Opcode1, int FI2, 2268 int64_t Offset2, unsigned Opcode2) { 2269 // Accesses through fixed stack object frame indices may access a different 2270 // fixed stack slot. Check that the object offsets + offsets match. 2271 if (MFI.isFixedObjectIndex(FI1) && MFI.isFixedObjectIndex(FI2)) { 2272 int64_t ObjectOffset1 = MFI.getObjectOffset(FI1); 2273 int64_t ObjectOffset2 = MFI.getObjectOffset(FI2); 2274 assert(ObjectOffset1 <= ObjectOffset2 && "Object offsets are not ordered."); 2275 // Get the byte-offset from the object offset. 2276 if (!unscaleOffset(Opcode1, Offset1) || !unscaleOffset(Opcode2, Offset2)) 2277 return false; 2278 ObjectOffset1 += Offset1; 2279 ObjectOffset2 += Offset2; 2280 // Get the "element" index in the object. 2281 if (!scaleOffset(Opcode1, ObjectOffset1) || 2282 !scaleOffset(Opcode2, ObjectOffset2)) 2283 return false; 2284 return ObjectOffset1 + 1 == ObjectOffset2; 2285 } 2286 2287 return FI1 == FI2; 2288 } 2289 2290 /// Detect opportunities for ldp/stp formation. 2291 /// 2292 /// Only called for LdSt for which getMemOperandWithOffset returns true. 2293 bool AArch64InstrInfo::shouldClusterMemOps(const MachineOperand &BaseOp1, 2294 const MachineOperand &BaseOp2, 2295 unsigned NumLoads) const { 2296 const MachineInstr &FirstLdSt = *BaseOp1.getParent(); 2297 const MachineInstr &SecondLdSt = *BaseOp2.getParent(); 2298 if (BaseOp1.getType() != BaseOp2.getType()) 2299 return false; 2300 2301 assert((BaseOp1.isReg() || BaseOp1.isFI()) && 2302 "Only base registers and frame indices are supported."); 2303 2304 // Check for both base regs and base FI. 2305 if (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg()) 2306 return false; 2307 2308 // Only cluster up to a single pair. 2309 if (NumLoads > 1) 2310 return false; 2311 2312 if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt)) 2313 return false; 2314 2315 // Can we pair these instructions based on their opcodes? 2316 unsigned FirstOpc = FirstLdSt.getOpcode(); 2317 unsigned SecondOpc = SecondLdSt.getOpcode(); 2318 if (!canPairLdStOpc(FirstOpc, SecondOpc)) 2319 return false; 2320 2321 // Can't merge volatiles or load/stores that have a hint to avoid pair 2322 // formation, for example. 2323 if (!isCandidateToMergeOrPair(FirstLdSt) || 2324 !isCandidateToMergeOrPair(SecondLdSt)) 2325 return false; 2326 2327 // isCandidateToMergeOrPair guarantees that operand 2 is an immediate. 2328 int64_t Offset1 = FirstLdSt.getOperand(2).getImm(); 2329 if (isUnscaledLdSt(FirstOpc) && !scaleOffset(FirstOpc, Offset1)) 2330 return false; 2331 2332 int64_t Offset2 = SecondLdSt.getOperand(2).getImm(); 2333 if (isUnscaledLdSt(SecondOpc) && !scaleOffset(SecondOpc, Offset2)) 2334 return false; 2335 2336 // Pairwise instructions have a 7-bit signed offset field. 2337 if (Offset1 > 63 || Offset1 < -64) 2338 return false; 2339 2340 // The caller should already have ordered First/SecondLdSt by offset. 2341 // Note: except for non-equal frame index bases 2342 if (BaseOp1.isFI()) { 2343 assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 >= Offset2) && 2344 "Caller should have ordered offsets."); 2345 2346 const MachineFrameInfo &MFI = 2347 FirstLdSt.getParent()->getParent()->getFrameInfo(); 2348 return shouldClusterFI(MFI, BaseOp1.getIndex(), Offset1, FirstOpc, 2349 BaseOp2.getIndex(), Offset2, SecondOpc); 2350 } 2351 2352 assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) && 2353 "Caller should have ordered offsets."); 2354 2355 return Offset1 + 1 == Offset2; 2356 } 2357 2358 static const MachineInstrBuilder &AddSubReg(const MachineInstrBuilder &MIB, 2359 unsigned Reg, unsigned SubIdx, 2360 unsigned State, 2361 const TargetRegisterInfo *TRI) { 2362 if (!SubIdx) 2363 return MIB.addReg(Reg, State); 2364 2365 if (TargetRegisterInfo::isPhysicalRegister(Reg)) 2366 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State); 2367 return MIB.addReg(Reg, State, SubIdx); 2368 } 2369 2370 static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg, 2371 unsigned NumRegs) { 2372 // We really want the positive remainder mod 32 here, that happens to be 2373 // easily obtainable with a mask. 2374 return ((DestReg - SrcReg) & 0x1f) < NumRegs; 2375 } 2376 2377 void AArch64InstrInfo::copyPhysRegTuple(MachineBasicBlock &MBB, 2378 MachineBasicBlock::iterator I, 2379 const DebugLoc &DL, unsigned DestReg, 2380 unsigned SrcReg, bool KillSrc, 2381 unsigned Opcode, 2382 ArrayRef<unsigned> Indices) const { 2383 assert(Subtarget.hasNEON() && "Unexpected register copy without NEON"); 2384 const TargetRegisterInfo *TRI = &getRegisterInfo(); 2385 uint16_t DestEncoding = TRI->getEncodingValue(DestReg); 2386 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg); 2387 unsigned NumRegs = Indices.size(); 2388 2389 int SubReg = 0, End = NumRegs, Incr = 1; 2390 if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) { 2391 SubReg = NumRegs - 1; 2392 End = -1; 2393 Incr = -1; 2394 } 2395 2396 for (; SubReg != End; SubReg += Incr) { 2397 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode)); 2398 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI); 2399 AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI); 2400 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI); 2401 } 2402 } 2403 2404 void AArch64InstrInfo::copyGPRRegTuple(MachineBasicBlock &MBB, 2405 MachineBasicBlock::iterator I, 2406 DebugLoc DL, unsigned DestReg, 2407 unsigned SrcReg, bool KillSrc, 2408 unsigned Opcode, unsigned ZeroReg, 2409 llvm::ArrayRef<unsigned> Indices) const { 2410 const TargetRegisterInfo *TRI = &getRegisterInfo(); 2411 unsigned NumRegs = Indices.size(); 2412 2413 #ifndef NDEBUG 2414 uint16_t DestEncoding = TRI->getEncodingValue(DestReg); 2415 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg); 2416 assert(DestEncoding % NumRegs == 0 && SrcEncoding % NumRegs == 0 && 2417 "GPR reg sequences should not be able to overlap"); 2418 #endif 2419 2420 for (unsigned SubReg = 0; SubReg != NumRegs; ++SubReg) { 2421 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode)); 2422 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI); 2423 MIB.addReg(ZeroReg); 2424 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI); 2425 MIB.addImm(0); 2426 } 2427 } 2428 2429 void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, 2430 MachineBasicBlock::iterator I, 2431 const DebugLoc &DL, unsigned DestReg, 2432 unsigned SrcReg, bool KillSrc) const { 2433 if (AArch64::GPR32spRegClass.contains(DestReg) && 2434 (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) { 2435 const TargetRegisterInfo *TRI = &getRegisterInfo(); 2436 2437 if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) { 2438 // If either operand is WSP, expand to ADD #0. 2439 if (Subtarget.hasZeroCycleRegMove()) { 2440 // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move. 2441 unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32, 2442 &AArch64::GPR64spRegClass); 2443 unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32, 2444 &AArch64::GPR64spRegClass); 2445 // This instruction is reading and writing X registers. This may upset 2446 // the register scavenger and machine verifier, so we need to indicate 2447 // that we are reading an undefined value from SrcRegX, but a proper 2448 // value from SrcReg. 2449 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX) 2450 .addReg(SrcRegX, RegState::Undef) 2451 .addImm(0) 2452 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)) 2453 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc)); 2454 } else { 2455 BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg) 2456 .addReg(SrcReg, getKillRegState(KillSrc)) 2457 .addImm(0) 2458 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 2459 } 2460 } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroingGP()) { 2461 BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg) 2462 .addImm(0) 2463 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 2464 } else { 2465 if (Subtarget.hasZeroCycleRegMove()) { 2466 // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move. 2467 unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32, 2468 &AArch64::GPR64spRegClass); 2469 unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32, 2470 &AArch64::GPR64spRegClass); 2471 // This instruction is reading and writing X registers. This may upset 2472 // the register scavenger and machine verifier, so we need to indicate 2473 // that we are reading an undefined value from SrcRegX, but a proper 2474 // value from SrcReg. 2475 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX) 2476 .addReg(AArch64::XZR) 2477 .addReg(SrcRegX, RegState::Undef) 2478 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc)); 2479 } else { 2480 // Otherwise, expand to ORR WZR. 2481 BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg) 2482 .addReg(AArch64::WZR) 2483 .addReg(SrcReg, getKillRegState(KillSrc)); 2484 } 2485 } 2486 return; 2487 } 2488 2489 if (AArch64::GPR64spRegClass.contains(DestReg) && 2490 (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) { 2491 if (DestReg == AArch64::SP || SrcReg == AArch64::SP) { 2492 // If either operand is SP, expand to ADD #0. 2493 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg) 2494 .addReg(SrcReg, getKillRegState(KillSrc)) 2495 .addImm(0) 2496 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 2497 } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroingGP()) { 2498 BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg) 2499 .addImm(0) 2500 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 2501 } else { 2502 // Otherwise, expand to ORR XZR. 2503 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg) 2504 .addReg(AArch64::XZR) 2505 .addReg(SrcReg, getKillRegState(KillSrc)); 2506 } 2507 return; 2508 } 2509 2510 // Copy a DDDD register quad by copying the individual sub-registers. 2511 if (AArch64::DDDDRegClass.contains(DestReg) && 2512 AArch64::DDDDRegClass.contains(SrcReg)) { 2513 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1, 2514 AArch64::dsub2, AArch64::dsub3}; 2515 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8, 2516 Indices); 2517 return; 2518 } 2519 2520 // Copy a DDD register triple by copying the individual sub-registers. 2521 if (AArch64::DDDRegClass.contains(DestReg) && 2522 AArch64::DDDRegClass.contains(SrcReg)) { 2523 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1, 2524 AArch64::dsub2}; 2525 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8, 2526 Indices); 2527 return; 2528 } 2529 2530 // Copy a DD register pair by copying the individual sub-registers. 2531 if (AArch64::DDRegClass.contains(DestReg) && 2532 AArch64::DDRegClass.contains(SrcReg)) { 2533 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1}; 2534 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8, 2535 Indices); 2536 return; 2537 } 2538 2539 // Copy a QQQQ register quad by copying the individual sub-registers. 2540 if (AArch64::QQQQRegClass.contains(DestReg) && 2541 AArch64::QQQQRegClass.contains(SrcReg)) { 2542 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1, 2543 AArch64::qsub2, AArch64::qsub3}; 2544 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8, 2545 Indices); 2546 return; 2547 } 2548 2549 // Copy a QQQ register triple by copying the individual sub-registers. 2550 if (AArch64::QQQRegClass.contains(DestReg) && 2551 AArch64::QQQRegClass.contains(SrcReg)) { 2552 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1, 2553 AArch64::qsub2}; 2554 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8, 2555 Indices); 2556 return; 2557 } 2558 2559 // Copy a QQ register pair by copying the individual sub-registers. 2560 if (AArch64::QQRegClass.contains(DestReg) && 2561 AArch64::QQRegClass.contains(SrcReg)) { 2562 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1}; 2563 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8, 2564 Indices); 2565 return; 2566 } 2567 2568 if (AArch64::XSeqPairsClassRegClass.contains(DestReg) && 2569 AArch64::XSeqPairsClassRegClass.contains(SrcReg)) { 2570 static const unsigned Indices[] = {AArch64::sube64, AArch64::subo64}; 2571 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRXrs, 2572 AArch64::XZR, Indices); 2573 return; 2574 } 2575 2576 if (AArch64::WSeqPairsClassRegClass.contains(DestReg) && 2577 AArch64::WSeqPairsClassRegClass.contains(SrcReg)) { 2578 static const unsigned Indices[] = {AArch64::sube32, AArch64::subo32}; 2579 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRWrs, 2580 AArch64::WZR, Indices); 2581 return; 2582 } 2583 2584 if (AArch64::FPR128RegClass.contains(DestReg) && 2585 AArch64::FPR128RegClass.contains(SrcReg)) { 2586 if (Subtarget.hasNEON()) { 2587 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) 2588 .addReg(SrcReg) 2589 .addReg(SrcReg, getKillRegState(KillSrc)); 2590 } else { 2591 BuildMI(MBB, I, DL, get(AArch64::STRQpre)) 2592 .addReg(AArch64::SP, RegState::Define) 2593 .addReg(SrcReg, getKillRegState(KillSrc)) 2594 .addReg(AArch64::SP) 2595 .addImm(-16); 2596 BuildMI(MBB, I, DL, get(AArch64::LDRQpre)) 2597 .addReg(AArch64::SP, RegState::Define) 2598 .addReg(DestReg, RegState::Define) 2599 .addReg(AArch64::SP) 2600 .addImm(16); 2601 } 2602 return; 2603 } 2604 2605 if (AArch64::FPR64RegClass.contains(DestReg) && 2606 AArch64::FPR64RegClass.contains(SrcReg)) { 2607 if (Subtarget.hasNEON()) { 2608 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub, 2609 &AArch64::FPR128RegClass); 2610 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub, 2611 &AArch64::FPR128RegClass); 2612 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) 2613 .addReg(SrcReg) 2614 .addReg(SrcReg, getKillRegState(KillSrc)); 2615 } else { 2616 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg) 2617 .addReg(SrcReg, getKillRegState(KillSrc)); 2618 } 2619 return; 2620 } 2621 2622 if (AArch64::FPR32RegClass.contains(DestReg) && 2623 AArch64::FPR32RegClass.contains(SrcReg)) { 2624 if (Subtarget.hasNEON()) { 2625 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub, 2626 &AArch64::FPR128RegClass); 2627 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub, 2628 &AArch64::FPR128RegClass); 2629 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) 2630 .addReg(SrcReg) 2631 .addReg(SrcReg, getKillRegState(KillSrc)); 2632 } else { 2633 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg) 2634 .addReg(SrcReg, getKillRegState(KillSrc)); 2635 } 2636 return; 2637 } 2638 2639 if (AArch64::FPR16RegClass.contains(DestReg) && 2640 AArch64::FPR16RegClass.contains(SrcReg)) { 2641 if (Subtarget.hasNEON()) { 2642 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub, 2643 &AArch64::FPR128RegClass); 2644 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub, 2645 &AArch64::FPR128RegClass); 2646 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) 2647 .addReg(SrcReg) 2648 .addReg(SrcReg, getKillRegState(KillSrc)); 2649 } else { 2650 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub, 2651 &AArch64::FPR32RegClass); 2652 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub, 2653 &AArch64::FPR32RegClass); 2654 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg) 2655 .addReg(SrcReg, getKillRegState(KillSrc)); 2656 } 2657 return; 2658 } 2659 2660 if (AArch64::FPR8RegClass.contains(DestReg) && 2661 AArch64::FPR8RegClass.contains(SrcReg)) { 2662 if (Subtarget.hasNEON()) { 2663 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub, 2664 &AArch64::FPR128RegClass); 2665 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub, 2666 &AArch64::FPR128RegClass); 2667 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) 2668 .addReg(SrcReg) 2669 .addReg(SrcReg, getKillRegState(KillSrc)); 2670 } else { 2671 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub, 2672 &AArch64::FPR32RegClass); 2673 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub, 2674 &AArch64::FPR32RegClass); 2675 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg) 2676 .addReg(SrcReg, getKillRegState(KillSrc)); 2677 } 2678 return; 2679 } 2680 2681 // Copies between GPR64 and FPR64. 2682 if (AArch64::FPR64RegClass.contains(DestReg) && 2683 AArch64::GPR64RegClass.contains(SrcReg)) { 2684 BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg) 2685 .addReg(SrcReg, getKillRegState(KillSrc)); 2686 return; 2687 } 2688 if (AArch64::GPR64RegClass.contains(DestReg) && 2689 AArch64::FPR64RegClass.contains(SrcReg)) { 2690 BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg) 2691 .addReg(SrcReg, getKillRegState(KillSrc)); 2692 return; 2693 } 2694 // Copies between GPR32 and FPR32. 2695 if (AArch64::FPR32RegClass.contains(DestReg) && 2696 AArch64::GPR32RegClass.contains(SrcReg)) { 2697 BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg) 2698 .addReg(SrcReg, getKillRegState(KillSrc)); 2699 return; 2700 } 2701 if (AArch64::GPR32RegClass.contains(DestReg) && 2702 AArch64::FPR32RegClass.contains(SrcReg)) { 2703 BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg) 2704 .addReg(SrcReg, getKillRegState(KillSrc)); 2705 return; 2706 } 2707 2708 if (DestReg == AArch64::NZCV) { 2709 assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy"); 2710 BuildMI(MBB, I, DL, get(AArch64::MSR)) 2711 .addImm(AArch64SysReg::NZCV) 2712 .addReg(SrcReg, getKillRegState(KillSrc)) 2713 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define); 2714 return; 2715 } 2716 2717 if (SrcReg == AArch64::NZCV) { 2718 assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy"); 2719 BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg) 2720 .addImm(AArch64SysReg::NZCV) 2721 .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc)); 2722 return; 2723 } 2724 2725 llvm_unreachable("unimplemented reg-to-reg copy"); 2726 } 2727 2728 static void storeRegPairToStackSlot(const TargetRegisterInfo &TRI, 2729 MachineBasicBlock &MBB, 2730 MachineBasicBlock::iterator InsertBefore, 2731 const MCInstrDesc &MCID, 2732 unsigned SrcReg, bool IsKill, 2733 unsigned SubIdx0, unsigned SubIdx1, int FI, 2734 MachineMemOperand *MMO) { 2735 unsigned SrcReg0 = SrcReg; 2736 unsigned SrcReg1 = SrcReg; 2737 if (TargetRegisterInfo::isPhysicalRegister(SrcReg)) { 2738 SrcReg0 = TRI.getSubReg(SrcReg, SubIdx0); 2739 SubIdx0 = 0; 2740 SrcReg1 = TRI.getSubReg(SrcReg, SubIdx1); 2741 SubIdx1 = 0; 2742 } 2743 BuildMI(MBB, InsertBefore, DebugLoc(), MCID) 2744 .addReg(SrcReg0, getKillRegState(IsKill), SubIdx0) 2745 .addReg(SrcReg1, getKillRegState(IsKill), SubIdx1) 2746 .addFrameIndex(FI) 2747 .addImm(0) 2748 .addMemOperand(MMO); 2749 } 2750 2751 void AArch64InstrInfo::storeRegToStackSlot( 2752 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, 2753 bool isKill, int FI, const TargetRegisterClass *RC, 2754 const TargetRegisterInfo *TRI) const { 2755 MachineFunction &MF = *MBB.getParent(); 2756 MachineFrameInfo &MFI = MF.getFrameInfo(); 2757 unsigned Align = MFI.getObjectAlignment(FI); 2758 2759 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI); 2760 MachineMemOperand *MMO = MF.getMachineMemOperand( 2761 PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align); 2762 unsigned Opc = 0; 2763 bool Offset = true; 2764 switch (TRI->getSpillSize(*RC)) { 2765 case 1: 2766 if (AArch64::FPR8RegClass.hasSubClassEq(RC)) 2767 Opc = AArch64::STRBui; 2768 break; 2769 case 2: 2770 if (AArch64::FPR16RegClass.hasSubClassEq(RC)) 2771 Opc = AArch64::STRHui; 2772 break; 2773 case 4: 2774 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) { 2775 Opc = AArch64::STRWui; 2776 if (TargetRegisterInfo::isVirtualRegister(SrcReg)) 2777 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass); 2778 else 2779 assert(SrcReg != AArch64::WSP); 2780 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC)) 2781 Opc = AArch64::STRSui; 2782 break; 2783 case 8: 2784 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) { 2785 Opc = AArch64::STRXui; 2786 if (TargetRegisterInfo::isVirtualRegister(SrcReg)) 2787 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass); 2788 else 2789 assert(SrcReg != AArch64::SP); 2790 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) { 2791 Opc = AArch64::STRDui; 2792 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) { 2793 storeRegPairToStackSlot(getRegisterInfo(), MBB, MBBI, 2794 get(AArch64::STPWi), SrcReg, isKill, 2795 AArch64::sube32, AArch64::subo32, FI, MMO); 2796 return; 2797 } 2798 break; 2799 case 16: 2800 if (AArch64::FPR128RegClass.hasSubClassEq(RC)) 2801 Opc = AArch64::STRQui; 2802 else if (AArch64::DDRegClass.hasSubClassEq(RC)) { 2803 assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); 2804 Opc = AArch64::ST1Twov1d; 2805 Offset = false; 2806 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) { 2807 storeRegPairToStackSlot(getRegisterInfo(), MBB, MBBI, 2808 get(AArch64::STPXi), SrcReg, isKill, 2809 AArch64::sube64, AArch64::subo64, FI, MMO); 2810 return; 2811 } 2812 break; 2813 case 24: 2814 if (AArch64::DDDRegClass.hasSubClassEq(RC)) { 2815 assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); 2816 Opc = AArch64::ST1Threev1d; 2817 Offset = false; 2818 } 2819 break; 2820 case 32: 2821 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) { 2822 assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); 2823 Opc = AArch64::ST1Fourv1d; 2824 Offset = false; 2825 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) { 2826 assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); 2827 Opc = AArch64::ST1Twov2d; 2828 Offset = false; 2829 } 2830 break; 2831 case 48: 2832 if (AArch64::QQQRegClass.hasSubClassEq(RC)) { 2833 assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); 2834 Opc = AArch64::ST1Threev2d; 2835 Offset = false; 2836 } 2837 break; 2838 case 64: 2839 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) { 2840 assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); 2841 Opc = AArch64::ST1Fourv2d; 2842 Offset = false; 2843 } 2844 break; 2845 } 2846 assert(Opc && "Unknown register class"); 2847 2848 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc)) 2849 .addReg(SrcReg, getKillRegState(isKill)) 2850 .addFrameIndex(FI); 2851 2852 if (Offset) 2853 MI.addImm(0); 2854 MI.addMemOperand(MMO); 2855 } 2856 2857 static void loadRegPairFromStackSlot(const TargetRegisterInfo &TRI, 2858 MachineBasicBlock &MBB, 2859 MachineBasicBlock::iterator InsertBefore, 2860 const MCInstrDesc &MCID, 2861 unsigned DestReg, unsigned SubIdx0, 2862 unsigned SubIdx1, int FI, 2863 MachineMemOperand *MMO) { 2864 unsigned DestReg0 = DestReg; 2865 unsigned DestReg1 = DestReg; 2866 bool IsUndef = true; 2867 if (TargetRegisterInfo::isPhysicalRegister(DestReg)) { 2868 DestReg0 = TRI.getSubReg(DestReg, SubIdx0); 2869 SubIdx0 = 0; 2870 DestReg1 = TRI.getSubReg(DestReg, SubIdx1); 2871 SubIdx1 = 0; 2872 IsUndef = false; 2873 } 2874 BuildMI(MBB, InsertBefore, DebugLoc(), MCID) 2875 .addReg(DestReg0, RegState::Define | getUndefRegState(IsUndef), SubIdx0) 2876 .addReg(DestReg1, RegState::Define | getUndefRegState(IsUndef), SubIdx1) 2877 .addFrameIndex(FI) 2878 .addImm(0) 2879 .addMemOperand(MMO); 2880 } 2881 2882 void AArch64InstrInfo::loadRegFromStackSlot( 2883 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, 2884 int FI, const TargetRegisterClass *RC, 2885 const TargetRegisterInfo *TRI) const { 2886 MachineFunction &MF = *MBB.getParent(); 2887 MachineFrameInfo &MFI = MF.getFrameInfo(); 2888 unsigned Align = MFI.getObjectAlignment(FI); 2889 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI); 2890 MachineMemOperand *MMO = MF.getMachineMemOperand( 2891 PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align); 2892 2893 unsigned Opc = 0; 2894 bool Offset = true; 2895 switch (TRI->getSpillSize(*RC)) { 2896 case 1: 2897 if (AArch64::FPR8RegClass.hasSubClassEq(RC)) 2898 Opc = AArch64::LDRBui; 2899 break; 2900 case 2: 2901 if (AArch64::FPR16RegClass.hasSubClassEq(RC)) 2902 Opc = AArch64::LDRHui; 2903 break; 2904 case 4: 2905 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) { 2906 Opc = AArch64::LDRWui; 2907 if (TargetRegisterInfo::isVirtualRegister(DestReg)) 2908 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass); 2909 else 2910 assert(DestReg != AArch64::WSP); 2911 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC)) 2912 Opc = AArch64::LDRSui; 2913 break; 2914 case 8: 2915 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) { 2916 Opc = AArch64::LDRXui; 2917 if (TargetRegisterInfo::isVirtualRegister(DestReg)) 2918 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass); 2919 else 2920 assert(DestReg != AArch64::SP); 2921 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) { 2922 Opc = AArch64::LDRDui; 2923 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) { 2924 loadRegPairFromStackSlot(getRegisterInfo(), MBB, MBBI, 2925 get(AArch64::LDPWi), DestReg, AArch64::sube32, 2926 AArch64::subo32, FI, MMO); 2927 return; 2928 } 2929 break; 2930 case 16: 2931 if (AArch64::FPR128RegClass.hasSubClassEq(RC)) 2932 Opc = AArch64::LDRQui; 2933 else if (AArch64::DDRegClass.hasSubClassEq(RC)) { 2934 assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); 2935 Opc = AArch64::LD1Twov1d; 2936 Offset = false; 2937 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) { 2938 loadRegPairFromStackSlot(getRegisterInfo(), MBB, MBBI, 2939 get(AArch64::LDPXi), DestReg, AArch64::sube64, 2940 AArch64::subo64, FI, MMO); 2941 return; 2942 } 2943 break; 2944 case 24: 2945 if (AArch64::DDDRegClass.hasSubClassEq(RC)) { 2946 assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); 2947 Opc = AArch64::LD1Threev1d; 2948 Offset = false; 2949 } 2950 break; 2951 case 32: 2952 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) { 2953 assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); 2954 Opc = AArch64::LD1Fourv1d; 2955 Offset = false; 2956 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) { 2957 assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); 2958 Opc = AArch64::LD1Twov2d; 2959 Offset = false; 2960 } 2961 break; 2962 case 48: 2963 if (AArch64::QQQRegClass.hasSubClassEq(RC)) { 2964 assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); 2965 Opc = AArch64::LD1Threev2d; 2966 Offset = false; 2967 } 2968 break; 2969 case 64: 2970 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) { 2971 assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); 2972 Opc = AArch64::LD1Fourv2d; 2973 Offset = false; 2974 } 2975 break; 2976 } 2977 assert(Opc && "Unknown register class"); 2978 2979 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc)) 2980 .addReg(DestReg, getDefRegState(true)) 2981 .addFrameIndex(FI); 2982 if (Offset) 2983 MI.addImm(0); 2984 MI.addMemOperand(MMO); 2985 } 2986 2987 void llvm::emitFrameOffset(MachineBasicBlock &MBB, 2988 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, 2989 unsigned DestReg, unsigned SrcReg, int Offset, 2990 const TargetInstrInfo *TII, 2991 MachineInstr::MIFlag Flag, bool SetNZCV, 2992 bool NeedsWinCFI, bool *HasWinCFI) { 2993 if (DestReg == SrcReg && Offset == 0) 2994 return; 2995 2996 assert((DestReg != AArch64::SP || Offset % 16 == 0) && 2997 "SP increment/decrement not 16-byte aligned"); 2998 2999 bool isSub = Offset < 0; 3000 if (isSub) 3001 Offset = -Offset; 3002 3003 // FIXME: If the offset won't fit in 24-bits, compute the offset into a 3004 // scratch register. If DestReg is a virtual register, use it as the 3005 // scratch register; otherwise, create a new virtual register (to be 3006 // replaced by the scavenger at the end of PEI). That case can be optimized 3007 // slightly if DestReg is SP which is always 16-byte aligned, so the scratch 3008 // register can be loaded with offset%8 and the add/sub can use an extending 3009 // instruction with LSL#3. 3010 // Currently the function handles any offsets but generates a poor sequence 3011 // of code. 3012 // assert(Offset < (1 << 24) && "unimplemented reg plus immediate"); 3013 3014 unsigned Opc; 3015 if (SetNZCV) 3016 Opc = isSub ? AArch64::SUBSXri : AArch64::ADDSXri; 3017 else 3018 Opc = isSub ? AArch64::SUBXri : AArch64::ADDXri; 3019 const unsigned MaxEncoding = 0xfff; 3020 const unsigned ShiftSize = 12; 3021 const unsigned MaxEncodableValue = MaxEncoding << ShiftSize; 3022 while (((unsigned)Offset) >= (1 << ShiftSize)) { 3023 unsigned ThisVal; 3024 if (((unsigned)Offset) > MaxEncodableValue) { 3025 ThisVal = MaxEncodableValue; 3026 } else { 3027 ThisVal = Offset & MaxEncodableValue; 3028 } 3029 assert((ThisVal >> ShiftSize) <= MaxEncoding && 3030 "Encoding cannot handle value that big"); 3031 BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg) 3032 .addReg(SrcReg) 3033 .addImm(ThisVal >> ShiftSize) 3034 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftSize)) 3035 .setMIFlag(Flag); 3036 3037 if (NeedsWinCFI && SrcReg == AArch64::SP && DestReg == AArch64::SP) { 3038 if (HasWinCFI) 3039 *HasWinCFI = true; 3040 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc)) 3041 .addImm(ThisVal) 3042 .setMIFlag(Flag); 3043 } 3044 3045 SrcReg = DestReg; 3046 Offset -= ThisVal; 3047 if (Offset == 0) 3048 return; 3049 } 3050 BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg) 3051 .addReg(SrcReg) 3052 .addImm(Offset) 3053 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)) 3054 .setMIFlag(Flag); 3055 3056 if (NeedsWinCFI) { 3057 if ((DestReg == AArch64::FP && SrcReg == AArch64::SP) || 3058 (SrcReg == AArch64::FP && DestReg == AArch64::SP)) { 3059 if (HasWinCFI) 3060 *HasWinCFI = true; 3061 if (Offset == 0) 3062 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_SetFP)). 3063 setMIFlag(Flag); 3064 else 3065 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AddFP)). 3066 addImm(Offset).setMIFlag(Flag); 3067 } else if (DestReg == AArch64::SP) { 3068 if (HasWinCFI) 3069 *HasWinCFI = true; 3070 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc)). 3071 addImm(Offset).setMIFlag(Flag); 3072 } 3073 } 3074 } 3075 3076 MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( 3077 MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops, 3078 MachineBasicBlock::iterator InsertPt, int FrameIndex, 3079 LiveIntervals *LIS, VirtRegMap *VRM) const { 3080 // This is a bit of a hack. Consider this instruction: 3081 // 3082 // %0 = COPY %sp; GPR64all:%0 3083 // 3084 // We explicitly chose GPR64all for the virtual register so such a copy might 3085 // be eliminated by RegisterCoalescer. However, that may not be possible, and 3086 // %0 may even spill. We can't spill %sp, and since it is in the GPR64all 3087 // register class, TargetInstrInfo::foldMemoryOperand() is going to try. 3088 // 3089 // To prevent that, we are going to constrain the %0 register class here. 3090 // 3091 // <rdar://problem/11522048> 3092 // 3093 if (MI.isFullCopy()) { 3094 unsigned DstReg = MI.getOperand(0).getReg(); 3095 unsigned SrcReg = MI.getOperand(1).getReg(); 3096 if (SrcReg == AArch64::SP && 3097 TargetRegisterInfo::isVirtualRegister(DstReg)) { 3098 MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass); 3099 return nullptr; 3100 } 3101 if (DstReg == AArch64::SP && 3102 TargetRegisterInfo::isVirtualRegister(SrcReg)) { 3103 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass); 3104 return nullptr; 3105 } 3106 } 3107 3108 // Handle the case where a copy is being spilled or filled but the source 3109 // and destination register class don't match. For example: 3110 // 3111 // %0 = COPY %xzr; GPR64common:%0 3112 // 3113 // In this case we can still safely fold away the COPY and generate the 3114 // following spill code: 3115 // 3116 // STRXui %xzr, %stack.0 3117 // 3118 // This also eliminates spilled cross register class COPYs (e.g. between x and 3119 // d regs) of the same size. For example: 3120 // 3121 // %0 = COPY %1; GPR64:%0, FPR64:%1 3122 // 3123 // will be filled as 3124 // 3125 // LDRDui %0, fi<#0> 3126 // 3127 // instead of 3128 // 3129 // LDRXui %Temp, fi<#0> 3130 // %0 = FMOV %Temp 3131 // 3132 if (MI.isCopy() && Ops.size() == 1 && 3133 // Make sure we're only folding the explicit COPY defs/uses. 3134 (Ops[0] == 0 || Ops[0] == 1)) { 3135 bool IsSpill = Ops[0] == 0; 3136 bool IsFill = !IsSpill; 3137 const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); 3138 const MachineRegisterInfo &MRI = MF.getRegInfo(); 3139 MachineBasicBlock &MBB = *MI.getParent(); 3140 const MachineOperand &DstMO = MI.getOperand(0); 3141 const MachineOperand &SrcMO = MI.getOperand(1); 3142 unsigned DstReg = DstMO.getReg(); 3143 unsigned SrcReg = SrcMO.getReg(); 3144 // This is slightly expensive to compute for physical regs since 3145 // getMinimalPhysRegClass is slow. 3146 auto getRegClass = [&](unsigned Reg) { 3147 return TargetRegisterInfo::isVirtualRegister(Reg) 3148 ? MRI.getRegClass(Reg) 3149 : TRI.getMinimalPhysRegClass(Reg); 3150 }; 3151 3152 if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) { 3153 assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) == 3154 TRI.getRegSizeInBits(*getRegClass(SrcReg)) && 3155 "Mismatched register size in non subreg COPY"); 3156 if (IsSpill) 3157 storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex, 3158 getRegClass(SrcReg), &TRI); 3159 else 3160 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, 3161 getRegClass(DstReg), &TRI); 3162 return &*--InsertPt; 3163 } 3164 3165 // Handle cases like spilling def of: 3166 // 3167 // %0:sub_32<def,read-undef> = COPY %wzr; GPR64common:%0 3168 // 3169 // where the physical register source can be widened and stored to the full 3170 // virtual reg destination stack slot, in this case producing: 3171 // 3172 // STRXui %xzr, %stack.0 3173 // 3174 if (IsSpill && DstMO.isUndef() && 3175 TargetRegisterInfo::isPhysicalRegister(SrcReg)) { 3176 assert(SrcMO.getSubReg() == 0 && 3177 "Unexpected subreg on physical register"); 3178 const TargetRegisterClass *SpillRC; 3179 unsigned SpillSubreg; 3180 switch (DstMO.getSubReg()) { 3181 default: 3182 SpillRC = nullptr; 3183 break; 3184 case AArch64::sub_32: 3185 case AArch64::ssub: 3186 if (AArch64::GPR32RegClass.contains(SrcReg)) { 3187 SpillRC = &AArch64::GPR64RegClass; 3188 SpillSubreg = AArch64::sub_32; 3189 } else if (AArch64::FPR32RegClass.contains(SrcReg)) { 3190 SpillRC = &AArch64::FPR64RegClass; 3191 SpillSubreg = AArch64::ssub; 3192 } else 3193 SpillRC = nullptr; 3194 break; 3195 case AArch64::dsub: 3196 if (AArch64::FPR64RegClass.contains(SrcReg)) { 3197 SpillRC = &AArch64::FPR128RegClass; 3198 SpillSubreg = AArch64::dsub; 3199 } else 3200 SpillRC = nullptr; 3201 break; 3202 } 3203 3204 if (SpillRC) 3205 if (unsigned WidenedSrcReg = 3206 TRI.getMatchingSuperReg(SrcReg, SpillSubreg, SpillRC)) { 3207 storeRegToStackSlot(MBB, InsertPt, WidenedSrcReg, SrcMO.isKill(), 3208 FrameIndex, SpillRC, &TRI); 3209 return &*--InsertPt; 3210 } 3211 } 3212 3213 // Handle cases like filling use of: 3214 // 3215 // %0:sub_32<def,read-undef> = COPY %1; GPR64:%0, GPR32:%1 3216 // 3217 // where we can load the full virtual reg source stack slot, into the subreg 3218 // destination, in this case producing: 3219 // 3220 // LDRWui %0:sub_32<def,read-undef>, %stack.0 3221 // 3222 if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) { 3223 const TargetRegisterClass *FillRC; 3224 switch (DstMO.getSubReg()) { 3225 default: 3226 FillRC = nullptr; 3227 break; 3228 case AArch64::sub_32: 3229 FillRC = &AArch64::GPR32RegClass; 3230 break; 3231 case AArch64::ssub: 3232 FillRC = &AArch64::FPR32RegClass; 3233 break; 3234 case AArch64::dsub: 3235 FillRC = &AArch64::FPR64RegClass; 3236 break; 3237 } 3238 3239 if (FillRC) { 3240 assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) == 3241 TRI.getRegSizeInBits(*FillRC) && 3242 "Mismatched regclass size on folded subreg COPY"); 3243 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI); 3244 MachineInstr &LoadMI = *--InsertPt; 3245 MachineOperand &LoadDst = LoadMI.getOperand(0); 3246 assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load"); 3247 LoadDst.setSubReg(DstMO.getSubReg()); 3248 LoadDst.setIsUndef(); 3249 return &LoadMI; 3250 } 3251 } 3252 } 3253 3254 // Cannot fold. 3255 return nullptr; 3256 } 3257 3258 int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset, 3259 bool *OutUseUnscaledOp, 3260 unsigned *OutUnscaledOp, 3261 int *EmittableOffset) { 3262 // Set output values in case of early exit. 3263 if (EmittableOffset) 3264 *EmittableOffset = 0; 3265 if (OutUseUnscaledOp) 3266 *OutUseUnscaledOp = false; 3267 if (OutUnscaledOp) 3268 *OutUnscaledOp = 0; 3269 3270 // Exit early for structured vector spills/fills as they can't take an 3271 // immediate offset. 3272 switch (MI.getOpcode()) { 3273 default: 3274 break; 3275 case AArch64::LD1Twov2d: 3276 case AArch64::LD1Threev2d: 3277 case AArch64::LD1Fourv2d: 3278 case AArch64::LD1Twov1d: 3279 case AArch64::LD1Threev1d: 3280 case AArch64::LD1Fourv1d: 3281 case AArch64::ST1Twov2d: 3282 case AArch64::ST1Threev2d: 3283 case AArch64::ST1Fourv2d: 3284 case AArch64::ST1Twov1d: 3285 case AArch64::ST1Threev1d: 3286 case AArch64::ST1Fourv1d: 3287 case AArch64::IRG: 3288 case AArch64::IRGstack: 3289 return AArch64FrameOffsetCannotUpdate; 3290 } 3291 3292 // Get the min/max offset and the scale. 3293 unsigned Scale, Width; 3294 int64_t MinOff, MaxOff; 3295 if (!AArch64InstrInfo::getMemOpInfo(MI.getOpcode(), Scale, Width, MinOff, 3296 MaxOff)) 3297 llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal"); 3298 3299 // Construct the complete offset. 3300 const MachineOperand &ImmOpnd = 3301 MI.getOperand(AArch64InstrInfo::getLoadStoreImmIdx(MI.getOpcode())); 3302 Offset += ImmOpnd.getImm() * Scale; 3303 3304 // If the offset doesn't match the scale, we rewrite the instruction to 3305 // use the unscaled instruction instead. Likewise, if we have a negative 3306 // offset and there is an unscaled op to use. 3307 Optional<unsigned> UnscaledOp = 3308 AArch64InstrInfo::getUnscaledLdSt(MI.getOpcode()); 3309 bool useUnscaledOp = UnscaledOp && (Offset % Scale || Offset < 0); 3310 if (useUnscaledOp && 3311 !AArch64InstrInfo::getMemOpInfo(*UnscaledOp, Scale, Width, MinOff, MaxOff)) 3312 llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal"); 3313 3314 int64_t Remainder = Offset % Scale; 3315 assert(!(Remainder && useUnscaledOp) && 3316 "Cannot have remainder when using unscaled op"); 3317 3318 assert(MinOff < MaxOff && "Unexpected Min/Max offsets"); 3319 int NewOffset = Offset / Scale; 3320 if (MinOff <= NewOffset && NewOffset <= MaxOff) 3321 Offset = Remainder; 3322 else { 3323 NewOffset = NewOffset < 0 ? MinOff : MaxOff; 3324 Offset = Offset - NewOffset * Scale + Remainder; 3325 } 3326 3327 if (EmittableOffset) 3328 *EmittableOffset = NewOffset; 3329 if (OutUseUnscaledOp) 3330 *OutUseUnscaledOp = useUnscaledOp; 3331 if (OutUnscaledOp && UnscaledOp) 3332 *OutUnscaledOp = *UnscaledOp; 3333 3334 return AArch64FrameOffsetCanUpdate | 3335 (Offset == 0 ? AArch64FrameOffsetIsLegal : 0); 3336 } 3337 3338 bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, 3339 unsigned FrameReg, int &Offset, 3340 const AArch64InstrInfo *TII) { 3341 unsigned Opcode = MI.getOpcode(); 3342 unsigned ImmIdx = FrameRegIdx + 1; 3343 3344 if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) { 3345 Offset += MI.getOperand(ImmIdx).getImm(); 3346 emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(), 3347 MI.getOperand(0).getReg(), FrameReg, Offset, TII, 3348 MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri)); 3349 MI.eraseFromParent(); 3350 Offset = 0; 3351 return true; 3352 } 3353 3354 int NewOffset; 3355 unsigned UnscaledOp; 3356 bool UseUnscaledOp; 3357 int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp, 3358 &UnscaledOp, &NewOffset); 3359 if (Status & AArch64FrameOffsetCanUpdate) { 3360 if (Status & AArch64FrameOffsetIsLegal) 3361 // Replace the FrameIndex with FrameReg. 3362 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); 3363 if (UseUnscaledOp) 3364 MI.setDesc(TII->get(UnscaledOp)); 3365 3366 MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset); 3367 return Offset == 0; 3368 } 3369 3370 return false; 3371 } 3372 3373 void AArch64InstrInfo::getNoop(MCInst &NopInst) const { 3374 NopInst.setOpcode(AArch64::HINT); 3375 NopInst.addOperand(MCOperand::createImm(0)); 3376 } 3377 3378 // AArch64 supports MachineCombiner. 3379 bool AArch64InstrInfo::useMachineCombiner() const { return true; } 3380 3381 // True when Opc sets flag 3382 static bool isCombineInstrSettingFlag(unsigned Opc) { 3383 switch (Opc) { 3384 case AArch64::ADDSWrr: 3385 case AArch64::ADDSWri: 3386 case AArch64::ADDSXrr: 3387 case AArch64::ADDSXri: 3388 case AArch64::SUBSWrr: 3389 case AArch64::SUBSXrr: 3390 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi. 3391 case AArch64::SUBSWri: 3392 case AArch64::SUBSXri: 3393 return true; 3394 default: 3395 break; 3396 } 3397 return false; 3398 } 3399 3400 // 32b Opcodes that can be combined with a MUL 3401 static bool isCombineInstrCandidate32(unsigned Opc) { 3402 switch (Opc) { 3403 case AArch64::ADDWrr: 3404 case AArch64::ADDWri: 3405 case AArch64::SUBWrr: 3406 case AArch64::ADDSWrr: 3407 case AArch64::ADDSWri: 3408 case AArch64::SUBSWrr: 3409 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi. 3410 case AArch64::SUBWri: 3411 case AArch64::SUBSWri: 3412 return true; 3413 default: 3414 break; 3415 } 3416 return false; 3417 } 3418 3419 // 64b Opcodes that can be combined with a MUL 3420 static bool isCombineInstrCandidate64(unsigned Opc) { 3421 switch (Opc) { 3422 case AArch64::ADDXrr: 3423 case AArch64::ADDXri: 3424 case AArch64::SUBXrr: 3425 case AArch64::ADDSXrr: 3426 case AArch64::ADDSXri: 3427 case AArch64::SUBSXrr: 3428 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi. 3429 case AArch64::SUBXri: 3430 case AArch64::SUBSXri: 3431 return true; 3432 default: 3433 break; 3434 } 3435 return false; 3436 } 3437 3438 // FP Opcodes that can be combined with a FMUL 3439 static bool isCombineInstrCandidateFP(const MachineInstr &Inst) { 3440 switch (Inst.getOpcode()) { 3441 default: 3442 break; 3443 case AArch64::FADDSrr: 3444 case AArch64::FADDDrr: 3445 case AArch64::FADDv2f32: 3446 case AArch64::FADDv2f64: 3447 case AArch64::FADDv4f32: 3448 case AArch64::FSUBSrr: 3449 case AArch64::FSUBDrr: 3450 case AArch64::FSUBv2f32: 3451 case AArch64::FSUBv2f64: 3452 case AArch64::FSUBv4f32: 3453 TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options; 3454 return (Options.UnsafeFPMath || 3455 Options.AllowFPOpFusion == FPOpFusion::Fast); 3456 } 3457 return false; 3458 } 3459 3460 // Opcodes that can be combined with a MUL 3461 static bool isCombineInstrCandidate(unsigned Opc) { 3462 return (isCombineInstrCandidate32(Opc) || isCombineInstrCandidate64(Opc)); 3463 } 3464 3465 // 3466 // Utility routine that checks if \param MO is defined by an 3467 // \param CombineOpc instruction in the basic block \param MBB 3468 static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO, 3469 unsigned CombineOpc, unsigned ZeroReg = 0, 3470 bool CheckZeroReg = false) { 3471 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 3472 MachineInstr *MI = nullptr; 3473 3474 if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) 3475 MI = MRI.getUniqueVRegDef(MO.getReg()); 3476 // And it needs to be in the trace (otherwise, it won't have a depth). 3477 if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != CombineOpc) 3478 return false; 3479 // Must only used by the user we combine with. 3480 if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg())) 3481 return false; 3482 3483 if (CheckZeroReg) { 3484 assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() && 3485 MI->getOperand(1).isReg() && MI->getOperand(2).isReg() && 3486 MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs"); 3487 // The third input reg must be zero. 3488 if (MI->getOperand(3).getReg() != ZeroReg) 3489 return false; 3490 } 3491 3492 return true; 3493 } 3494 3495 // 3496 // Is \param MO defined by an integer multiply and can be combined? 3497 static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO, 3498 unsigned MulOpc, unsigned ZeroReg) { 3499 return canCombine(MBB, MO, MulOpc, ZeroReg, true); 3500 } 3501 3502 // 3503 // Is \param MO defined by a floating-point multiply and can be combined? 3504 static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO, 3505 unsigned MulOpc) { 3506 return canCombine(MBB, MO, MulOpc); 3507 } 3508 3509 // TODO: There are many more machine instruction opcodes to match: 3510 // 1. Other data types (integer, vectors) 3511 // 2. Other math / logic operations (xor, or) 3512 // 3. Other forms of the same operation (intrinsics and other variants) 3513 bool AArch64InstrInfo::isAssociativeAndCommutative( 3514 const MachineInstr &Inst) const { 3515 switch (Inst.getOpcode()) { 3516 case AArch64::FADDDrr: 3517 case AArch64::FADDSrr: 3518 case AArch64::FADDv2f32: 3519 case AArch64::FADDv2f64: 3520 case AArch64::FADDv4f32: 3521 case AArch64::FMULDrr: 3522 case AArch64::FMULSrr: 3523 case AArch64::FMULX32: 3524 case AArch64::FMULX64: 3525 case AArch64::FMULXv2f32: 3526 case AArch64::FMULXv2f64: 3527 case AArch64::FMULXv4f32: 3528 case AArch64::FMULv2f32: 3529 case AArch64::FMULv2f64: 3530 case AArch64::FMULv4f32: 3531 return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath; 3532 default: 3533 return false; 3534 } 3535 } 3536 3537 /// Find instructions that can be turned into madd. 3538 static bool getMaddPatterns(MachineInstr &Root, 3539 SmallVectorImpl<MachineCombinerPattern> &Patterns) { 3540 unsigned Opc = Root.getOpcode(); 3541 MachineBasicBlock &MBB = *Root.getParent(); 3542 bool Found = false; 3543 3544 if (!isCombineInstrCandidate(Opc)) 3545 return false; 3546 if (isCombineInstrSettingFlag(Opc)) { 3547 int Cmp_NZCV = Root.findRegisterDefOperandIdx(AArch64::NZCV, true); 3548 // When NZCV is live bail out. 3549 if (Cmp_NZCV == -1) 3550 return false; 3551 unsigned NewOpc = convertToNonFlagSettingOpc(Root); 3552 // When opcode can't change bail out. 3553 // CHECKME: do we miss any cases for opcode conversion? 3554 if (NewOpc == Opc) 3555 return false; 3556 Opc = NewOpc; 3557 } 3558 3559 switch (Opc) { 3560 default: 3561 break; 3562 case AArch64::ADDWrr: 3563 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() && 3564 "ADDWrr does not have register operands"); 3565 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr, 3566 AArch64::WZR)) { 3567 Patterns.push_back(MachineCombinerPattern::MULADDW_OP1); 3568 Found = true; 3569 } 3570 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr, 3571 AArch64::WZR)) { 3572 Patterns.push_back(MachineCombinerPattern::MULADDW_OP2); 3573 Found = true; 3574 } 3575 break; 3576 case AArch64::ADDXrr: 3577 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr, 3578 AArch64::XZR)) { 3579 Patterns.push_back(MachineCombinerPattern::MULADDX_OP1); 3580 Found = true; 3581 } 3582 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr, 3583 AArch64::XZR)) { 3584 Patterns.push_back(MachineCombinerPattern::MULADDX_OP2); 3585 Found = true; 3586 } 3587 break; 3588 case AArch64::SUBWrr: 3589 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr, 3590 AArch64::WZR)) { 3591 Patterns.push_back(MachineCombinerPattern::MULSUBW_OP1); 3592 Found = true; 3593 } 3594 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr, 3595 AArch64::WZR)) { 3596 Patterns.push_back(MachineCombinerPattern::MULSUBW_OP2); 3597 Found = true; 3598 } 3599 break; 3600 case AArch64::SUBXrr: 3601 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr, 3602 AArch64::XZR)) { 3603 Patterns.push_back(MachineCombinerPattern::MULSUBX_OP1); 3604 Found = true; 3605 } 3606 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr, 3607 AArch64::XZR)) { 3608 Patterns.push_back(MachineCombinerPattern::MULSUBX_OP2); 3609 Found = true; 3610 } 3611 break; 3612 case AArch64::ADDWri: 3613 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr, 3614 AArch64::WZR)) { 3615 Patterns.push_back(MachineCombinerPattern::MULADDWI_OP1); 3616 Found = true; 3617 } 3618 break; 3619 case AArch64::ADDXri: 3620 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr, 3621 AArch64::XZR)) { 3622 Patterns.push_back(MachineCombinerPattern::MULADDXI_OP1); 3623 Found = true; 3624 } 3625 break; 3626 case AArch64::SUBWri: 3627 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr, 3628 AArch64::WZR)) { 3629 Patterns.push_back(MachineCombinerPattern::MULSUBWI_OP1); 3630 Found = true; 3631 } 3632 break; 3633 case AArch64::SUBXri: 3634 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr, 3635 AArch64::XZR)) { 3636 Patterns.push_back(MachineCombinerPattern::MULSUBXI_OP1); 3637 Found = true; 3638 } 3639 break; 3640 } 3641 return Found; 3642 } 3643 /// Floating-Point Support 3644 3645 /// Find instructions that can be turned into madd. 3646 static bool getFMAPatterns(MachineInstr &Root, 3647 SmallVectorImpl<MachineCombinerPattern> &Patterns) { 3648 3649 if (!isCombineInstrCandidateFP(Root)) 3650 return false; 3651 3652 MachineBasicBlock &MBB = *Root.getParent(); 3653 bool Found = false; 3654 3655 switch (Root.getOpcode()) { 3656 default: 3657 assert(false && "Unsupported FP instruction in combiner\n"); 3658 break; 3659 case AArch64::FADDSrr: 3660 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() && 3661 "FADDWrr does not have register operands"); 3662 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) { 3663 Patterns.push_back(MachineCombinerPattern::FMULADDS_OP1); 3664 Found = true; 3665 } else if (canCombineWithFMUL(MBB, Root.getOperand(1), 3666 AArch64::FMULv1i32_indexed)) { 3667 Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP1); 3668 Found = true; 3669 } 3670 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) { 3671 Patterns.push_back(MachineCombinerPattern::FMULADDS_OP2); 3672 Found = true; 3673 } else if (canCombineWithFMUL(MBB, Root.getOperand(2), 3674 AArch64::FMULv1i32_indexed)) { 3675 Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP2); 3676 Found = true; 3677 } 3678 break; 3679 case AArch64::FADDDrr: 3680 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) { 3681 Patterns.push_back(MachineCombinerPattern::FMULADDD_OP1); 3682 Found = true; 3683 } else if (canCombineWithFMUL(MBB, Root.getOperand(1), 3684 AArch64::FMULv1i64_indexed)) { 3685 Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP1); 3686 Found = true; 3687 } 3688 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) { 3689 Patterns.push_back(MachineCombinerPattern::FMULADDD_OP2); 3690 Found = true; 3691 } else if (canCombineWithFMUL(MBB, Root.getOperand(2), 3692 AArch64::FMULv1i64_indexed)) { 3693 Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP2); 3694 Found = true; 3695 } 3696 break; 3697 case AArch64::FADDv2f32: 3698 if (canCombineWithFMUL(MBB, Root.getOperand(1), 3699 AArch64::FMULv2i32_indexed)) { 3700 Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP1); 3701 Found = true; 3702 } else if (canCombineWithFMUL(MBB, Root.getOperand(1), 3703 AArch64::FMULv2f32)) { 3704 Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP1); 3705 Found = true; 3706 } 3707 if (canCombineWithFMUL(MBB, Root.getOperand(2), 3708 AArch64::FMULv2i32_indexed)) { 3709 Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP2); 3710 Found = true; 3711 } else if (canCombineWithFMUL(MBB, Root.getOperand(2), 3712 AArch64::FMULv2f32)) { 3713 Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP2); 3714 Found = true; 3715 } 3716 break; 3717 case AArch64::FADDv2f64: 3718 if (canCombineWithFMUL(MBB, Root.getOperand(1), 3719 AArch64::FMULv2i64_indexed)) { 3720 Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP1); 3721 Found = true; 3722 } else if (canCombineWithFMUL(MBB, Root.getOperand(1), 3723 AArch64::FMULv2f64)) { 3724 Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP1); 3725 Found = true; 3726 } 3727 if (canCombineWithFMUL(MBB, Root.getOperand(2), 3728 AArch64::FMULv2i64_indexed)) { 3729 Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP2); 3730 Found = true; 3731 } else if (canCombineWithFMUL(MBB, Root.getOperand(2), 3732 AArch64::FMULv2f64)) { 3733 Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP2); 3734 Found = true; 3735 } 3736 break; 3737 case AArch64::FADDv4f32: 3738 if (canCombineWithFMUL(MBB, Root.getOperand(1), 3739 AArch64::FMULv4i32_indexed)) { 3740 Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP1); 3741 Found = true; 3742 } else if (canCombineWithFMUL(MBB, Root.getOperand(1), 3743 AArch64::FMULv4f32)) { 3744 Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP1); 3745 Found = true; 3746 } 3747 if (canCombineWithFMUL(MBB, Root.getOperand(2), 3748 AArch64::FMULv4i32_indexed)) { 3749 Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP2); 3750 Found = true; 3751 } else if (canCombineWithFMUL(MBB, Root.getOperand(2), 3752 AArch64::FMULv4f32)) { 3753 Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP2); 3754 Found = true; 3755 } 3756 break; 3757 3758 case AArch64::FSUBSrr: 3759 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) { 3760 Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP1); 3761 Found = true; 3762 } 3763 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) { 3764 Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP2); 3765 Found = true; 3766 } else if (canCombineWithFMUL(MBB, Root.getOperand(2), 3767 AArch64::FMULv1i32_indexed)) { 3768 Patterns.push_back(MachineCombinerPattern::FMLSv1i32_indexed_OP2); 3769 Found = true; 3770 } 3771 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FNMULSrr)) { 3772 Patterns.push_back(MachineCombinerPattern::FNMULSUBS_OP1); 3773 Found = true; 3774 } 3775 break; 3776 case AArch64::FSUBDrr: 3777 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) { 3778 Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP1); 3779 Found = true; 3780 } 3781 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) { 3782 Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP2); 3783 Found = true; 3784 } else if (canCombineWithFMUL(MBB, Root.getOperand(2), 3785 AArch64::FMULv1i64_indexed)) { 3786 Patterns.push_back(MachineCombinerPattern::FMLSv1i64_indexed_OP2); 3787 Found = true; 3788 } 3789 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FNMULDrr)) { 3790 Patterns.push_back(MachineCombinerPattern::FNMULSUBD_OP1); 3791 Found = true; 3792 } 3793 break; 3794 case AArch64::FSUBv2f32: 3795 if (canCombineWithFMUL(MBB, Root.getOperand(2), 3796 AArch64::FMULv2i32_indexed)) { 3797 Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP2); 3798 Found = true; 3799 } else if (canCombineWithFMUL(MBB, Root.getOperand(2), 3800 AArch64::FMULv2f32)) { 3801 Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP2); 3802 Found = true; 3803 } 3804 if (canCombineWithFMUL(MBB, Root.getOperand(1), 3805 AArch64::FMULv2i32_indexed)) { 3806 Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP1); 3807 Found = true; 3808 } else if (canCombineWithFMUL(MBB, Root.getOperand(1), 3809 AArch64::FMULv2f32)) { 3810 Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP1); 3811 Found = true; 3812 } 3813 break; 3814 case AArch64::FSUBv2f64: 3815 if (canCombineWithFMUL(MBB, Root.getOperand(2), 3816 AArch64::FMULv2i64_indexed)) { 3817 Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP2); 3818 Found = true; 3819 } else if (canCombineWithFMUL(MBB, Root.getOperand(2), 3820 AArch64::FMULv2f64)) { 3821 Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP2); 3822 Found = true; 3823 } 3824 if (canCombineWithFMUL(MBB, Root.getOperand(1), 3825 AArch64::FMULv2i64_indexed)) { 3826 Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP1); 3827 Found = true; 3828 } else if (canCombineWithFMUL(MBB, Root.getOperand(1), 3829 AArch64::FMULv2f64)) { 3830 Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP1); 3831 Found = true; 3832 } 3833 break; 3834 case AArch64::FSUBv4f32: 3835 if (canCombineWithFMUL(MBB, Root.getOperand(2), 3836 AArch64::FMULv4i32_indexed)) { 3837 Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP2); 3838 Found = true; 3839 } else if (canCombineWithFMUL(MBB, Root.getOperand(2), 3840 AArch64::FMULv4f32)) { 3841 Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP2); 3842 Found = true; 3843 } 3844 if (canCombineWithFMUL(MBB, Root.getOperand(1), 3845 AArch64::FMULv4i32_indexed)) { 3846 Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP1); 3847 Found = true; 3848 } else if (canCombineWithFMUL(MBB, Root.getOperand(1), 3849 AArch64::FMULv4f32)) { 3850 Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP1); 3851 Found = true; 3852 } 3853 break; 3854 } 3855 return Found; 3856 } 3857 3858 /// Return true when a code sequence can improve throughput. It 3859 /// should be called only for instructions in loops. 3860 /// \param Pattern - combiner pattern 3861 bool AArch64InstrInfo::isThroughputPattern( 3862 MachineCombinerPattern Pattern) const { 3863 switch (Pattern) { 3864 default: 3865 break; 3866 case MachineCombinerPattern::FMULADDS_OP1: 3867 case MachineCombinerPattern::FMULADDS_OP2: 3868 case MachineCombinerPattern::FMULSUBS_OP1: 3869 case MachineCombinerPattern::FMULSUBS_OP2: 3870 case MachineCombinerPattern::FMULADDD_OP1: 3871 case MachineCombinerPattern::FMULADDD_OP2: 3872 case MachineCombinerPattern::FMULSUBD_OP1: 3873 case MachineCombinerPattern::FMULSUBD_OP2: 3874 case MachineCombinerPattern::FNMULSUBS_OP1: 3875 case MachineCombinerPattern::FNMULSUBD_OP1: 3876 case MachineCombinerPattern::FMLAv1i32_indexed_OP1: 3877 case MachineCombinerPattern::FMLAv1i32_indexed_OP2: 3878 case MachineCombinerPattern::FMLAv1i64_indexed_OP1: 3879 case MachineCombinerPattern::FMLAv1i64_indexed_OP2: 3880 case MachineCombinerPattern::FMLAv2f32_OP2: 3881 case MachineCombinerPattern::FMLAv2f32_OP1: 3882 case MachineCombinerPattern::FMLAv2f64_OP1: 3883 case MachineCombinerPattern::FMLAv2f64_OP2: 3884 case MachineCombinerPattern::FMLAv2i32_indexed_OP1: 3885 case MachineCombinerPattern::FMLAv2i32_indexed_OP2: 3886 case MachineCombinerPattern::FMLAv2i64_indexed_OP1: 3887 case MachineCombinerPattern::FMLAv2i64_indexed_OP2: 3888 case MachineCombinerPattern::FMLAv4f32_OP1: 3889 case MachineCombinerPattern::FMLAv4f32_OP2: 3890 case MachineCombinerPattern::FMLAv4i32_indexed_OP1: 3891 case MachineCombinerPattern::FMLAv4i32_indexed_OP2: 3892 case MachineCombinerPattern::FMLSv1i32_indexed_OP2: 3893 case MachineCombinerPattern::FMLSv1i64_indexed_OP2: 3894 case MachineCombinerPattern::FMLSv2i32_indexed_OP2: 3895 case MachineCombinerPattern::FMLSv2i64_indexed_OP2: 3896 case MachineCombinerPattern::FMLSv2f32_OP2: 3897 case MachineCombinerPattern::FMLSv2f64_OP2: 3898 case MachineCombinerPattern::FMLSv4i32_indexed_OP2: 3899 case MachineCombinerPattern::FMLSv4f32_OP2: 3900 return true; 3901 } // end switch (Pattern) 3902 return false; 3903 } 3904 /// Return true when there is potentially a faster code sequence for an 3905 /// instruction chain ending in \p Root. All potential patterns are listed in 3906 /// the \p Pattern vector. Pattern should be sorted in priority order since the 3907 /// pattern evaluator stops checking as soon as it finds a faster sequence. 3908 3909 bool AArch64InstrInfo::getMachineCombinerPatterns( 3910 MachineInstr &Root, 3911 SmallVectorImpl<MachineCombinerPattern> &Patterns) const { 3912 // Integer patterns 3913 if (getMaddPatterns(Root, Patterns)) 3914 return true; 3915 // Floating point patterns 3916 if (getFMAPatterns(Root, Patterns)) 3917 return true; 3918 3919 return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns); 3920 } 3921 3922 enum class FMAInstKind { Default, Indexed, Accumulator }; 3923 /// genFusedMultiply - Generate fused multiply instructions. 3924 /// This function supports both integer and floating point instructions. 3925 /// A typical example: 3926 /// F|MUL I=A,B,0 3927 /// F|ADD R,I,C 3928 /// ==> F|MADD R,A,B,C 3929 /// \param MF Containing MachineFunction 3930 /// \param MRI Register information 3931 /// \param TII Target information 3932 /// \param Root is the F|ADD instruction 3933 /// \param [out] InsInstrs is a vector of machine instructions and will 3934 /// contain the generated madd instruction 3935 /// \param IdxMulOpd is index of operand in Root that is the result of 3936 /// the F|MUL. In the example above IdxMulOpd is 1. 3937 /// \param MaddOpc the opcode fo the f|madd instruction 3938 /// \param RC Register class of operands 3939 /// \param kind of fma instruction (addressing mode) to be generated 3940 /// \param ReplacedAddend is the result register from the instruction 3941 /// replacing the non-combined operand, if any. 3942 static MachineInstr * 3943 genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI, 3944 const TargetInstrInfo *TII, MachineInstr &Root, 3945 SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd, 3946 unsigned MaddOpc, const TargetRegisterClass *RC, 3947 FMAInstKind kind = FMAInstKind::Default, 3948 const unsigned *ReplacedAddend = nullptr) { 3949 assert(IdxMulOpd == 1 || IdxMulOpd == 2); 3950 3951 unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1; 3952 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg()); 3953 unsigned ResultReg = Root.getOperand(0).getReg(); 3954 unsigned SrcReg0 = MUL->getOperand(1).getReg(); 3955 bool Src0IsKill = MUL->getOperand(1).isKill(); 3956 unsigned SrcReg1 = MUL->getOperand(2).getReg(); 3957 bool Src1IsKill = MUL->getOperand(2).isKill(); 3958 3959 unsigned SrcReg2; 3960 bool Src2IsKill; 3961 if (ReplacedAddend) { 3962 // If we just generated a new addend, we must be it's only use. 3963 SrcReg2 = *ReplacedAddend; 3964 Src2IsKill = true; 3965 } else { 3966 SrcReg2 = Root.getOperand(IdxOtherOpd).getReg(); 3967 Src2IsKill = Root.getOperand(IdxOtherOpd).isKill(); 3968 } 3969 3970 if (TargetRegisterInfo::isVirtualRegister(ResultReg)) 3971 MRI.constrainRegClass(ResultReg, RC); 3972 if (TargetRegisterInfo::isVirtualRegister(SrcReg0)) 3973 MRI.constrainRegClass(SrcReg0, RC); 3974 if (TargetRegisterInfo::isVirtualRegister(SrcReg1)) 3975 MRI.constrainRegClass(SrcReg1, RC); 3976 if (TargetRegisterInfo::isVirtualRegister(SrcReg2)) 3977 MRI.constrainRegClass(SrcReg2, RC); 3978 3979 MachineInstrBuilder MIB; 3980 if (kind == FMAInstKind::Default) 3981 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg) 3982 .addReg(SrcReg0, getKillRegState(Src0IsKill)) 3983 .addReg(SrcReg1, getKillRegState(Src1IsKill)) 3984 .addReg(SrcReg2, getKillRegState(Src2IsKill)); 3985 else if (kind == FMAInstKind::Indexed) 3986 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg) 3987 .addReg(SrcReg2, getKillRegState(Src2IsKill)) 3988 .addReg(SrcReg0, getKillRegState(Src0IsKill)) 3989 .addReg(SrcReg1, getKillRegState(Src1IsKill)) 3990 .addImm(MUL->getOperand(3).getImm()); 3991 else if (kind == FMAInstKind::Accumulator) 3992 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg) 3993 .addReg(SrcReg2, getKillRegState(Src2IsKill)) 3994 .addReg(SrcReg0, getKillRegState(Src0IsKill)) 3995 .addReg(SrcReg1, getKillRegState(Src1IsKill)); 3996 else 3997 assert(false && "Invalid FMA instruction kind \n"); 3998 // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL) 3999 InsInstrs.push_back(MIB); 4000 return MUL; 4001 } 4002 4003 /// genMaddR - Generate madd instruction and combine mul and add using 4004 /// an extra virtual register 4005 /// Example - an ADD intermediate needs to be stored in a register: 4006 /// MUL I=A,B,0 4007 /// ADD R,I,Imm 4008 /// ==> ORR V, ZR, Imm 4009 /// ==> MADD R,A,B,V 4010 /// \param MF Containing MachineFunction 4011 /// \param MRI Register information 4012 /// \param TII Target information 4013 /// \param Root is the ADD instruction 4014 /// \param [out] InsInstrs is a vector of machine instructions and will 4015 /// contain the generated madd instruction 4016 /// \param IdxMulOpd is index of operand in Root that is the result of 4017 /// the MUL. In the example above IdxMulOpd is 1. 4018 /// \param MaddOpc the opcode fo the madd instruction 4019 /// \param VR is a virtual register that holds the value of an ADD operand 4020 /// (V in the example above). 4021 /// \param RC Register class of operands 4022 static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI, 4023 const TargetInstrInfo *TII, MachineInstr &Root, 4024 SmallVectorImpl<MachineInstr *> &InsInstrs, 4025 unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR, 4026 const TargetRegisterClass *RC) { 4027 assert(IdxMulOpd == 1 || IdxMulOpd == 2); 4028 4029 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg()); 4030 unsigned ResultReg = Root.getOperand(0).getReg(); 4031 unsigned SrcReg0 = MUL->getOperand(1).getReg(); 4032 bool Src0IsKill = MUL->getOperand(1).isKill(); 4033 unsigned SrcReg1 = MUL->getOperand(2).getReg(); 4034 bool Src1IsKill = MUL->getOperand(2).isKill(); 4035 4036 if (TargetRegisterInfo::isVirtualRegister(ResultReg)) 4037 MRI.constrainRegClass(ResultReg, RC); 4038 if (TargetRegisterInfo::isVirtualRegister(SrcReg0)) 4039 MRI.constrainRegClass(SrcReg0, RC); 4040 if (TargetRegisterInfo::isVirtualRegister(SrcReg1)) 4041 MRI.constrainRegClass(SrcReg1, RC); 4042 if (TargetRegisterInfo::isVirtualRegister(VR)) 4043 MRI.constrainRegClass(VR, RC); 4044 4045 MachineInstrBuilder MIB = 4046 BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg) 4047 .addReg(SrcReg0, getKillRegState(Src0IsKill)) 4048 .addReg(SrcReg1, getKillRegState(Src1IsKill)) 4049 .addReg(VR); 4050 // Insert the MADD 4051 InsInstrs.push_back(MIB); 4052 return MUL; 4053 } 4054 4055 /// When getMachineCombinerPatterns() finds potential patterns, 4056 /// this function generates the instructions that could replace the 4057 /// original code sequence 4058 void AArch64InstrInfo::genAlternativeCodeSequence( 4059 MachineInstr &Root, MachineCombinerPattern Pattern, 4060 SmallVectorImpl<MachineInstr *> &InsInstrs, 4061 SmallVectorImpl<MachineInstr *> &DelInstrs, 4062 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const { 4063 MachineBasicBlock &MBB = *Root.getParent(); 4064 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 4065 MachineFunction &MF = *MBB.getParent(); 4066 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); 4067 4068 MachineInstr *MUL; 4069 const TargetRegisterClass *RC; 4070 unsigned Opc; 4071 switch (Pattern) { 4072 default: 4073 // Reassociate instructions. 4074 TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs, 4075 DelInstrs, InstrIdxForVirtReg); 4076 return; 4077 case MachineCombinerPattern::MULADDW_OP1: 4078 case MachineCombinerPattern::MULADDX_OP1: 4079 // MUL I=A,B,0 4080 // ADD R,I,C 4081 // ==> MADD R,A,B,C 4082 // --- Create(MADD); 4083 if (Pattern == MachineCombinerPattern::MULADDW_OP1) { 4084 Opc = AArch64::MADDWrrr; 4085 RC = &AArch64::GPR32RegClass; 4086 } else { 4087 Opc = AArch64::MADDXrrr; 4088 RC = &AArch64::GPR64RegClass; 4089 } 4090 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 4091 break; 4092 case MachineCombinerPattern::MULADDW_OP2: 4093 case MachineCombinerPattern::MULADDX_OP2: 4094 // MUL I=A,B,0 4095 // ADD R,C,I 4096 // ==> MADD R,A,B,C 4097 // --- Create(MADD); 4098 if (Pattern == MachineCombinerPattern::MULADDW_OP2) { 4099 Opc = AArch64::MADDWrrr; 4100 RC = &AArch64::GPR32RegClass; 4101 } else { 4102 Opc = AArch64::MADDXrrr; 4103 RC = &AArch64::GPR64RegClass; 4104 } 4105 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 4106 break; 4107 case MachineCombinerPattern::MULADDWI_OP1: 4108 case MachineCombinerPattern::MULADDXI_OP1: { 4109 // MUL I=A,B,0 4110 // ADD R,I,Imm 4111 // ==> ORR V, ZR, Imm 4112 // ==> MADD R,A,B,V 4113 // --- Create(MADD); 4114 const TargetRegisterClass *OrrRC; 4115 unsigned BitSize, OrrOpc, ZeroReg; 4116 if (Pattern == MachineCombinerPattern::MULADDWI_OP1) { 4117 OrrOpc = AArch64::ORRWri; 4118 OrrRC = &AArch64::GPR32spRegClass; 4119 BitSize = 32; 4120 ZeroReg = AArch64::WZR; 4121 Opc = AArch64::MADDWrrr; 4122 RC = &AArch64::GPR32RegClass; 4123 } else { 4124 OrrOpc = AArch64::ORRXri; 4125 OrrRC = &AArch64::GPR64spRegClass; 4126 BitSize = 64; 4127 ZeroReg = AArch64::XZR; 4128 Opc = AArch64::MADDXrrr; 4129 RC = &AArch64::GPR64RegClass; 4130 } 4131 unsigned NewVR = MRI.createVirtualRegister(OrrRC); 4132 uint64_t Imm = Root.getOperand(2).getImm(); 4133 4134 if (Root.getOperand(3).isImm()) { 4135 unsigned Val = Root.getOperand(3).getImm(); 4136 Imm = Imm << Val; 4137 } 4138 uint64_t UImm = SignExtend64(Imm, BitSize); 4139 uint64_t Encoding; 4140 if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) { 4141 MachineInstrBuilder MIB1 = 4142 BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR) 4143 .addReg(ZeroReg) 4144 .addImm(Encoding); 4145 InsInstrs.push_back(MIB1); 4146 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 4147 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC); 4148 } 4149 break; 4150 } 4151 case MachineCombinerPattern::MULSUBW_OP1: 4152 case MachineCombinerPattern::MULSUBX_OP1: { 4153 // MUL I=A,B,0 4154 // SUB R,I, C 4155 // ==> SUB V, 0, C 4156 // ==> MADD R,A,B,V // = -C + A*B 4157 // --- Create(MADD); 4158 const TargetRegisterClass *SubRC; 4159 unsigned SubOpc, ZeroReg; 4160 if (Pattern == MachineCombinerPattern::MULSUBW_OP1) { 4161 SubOpc = AArch64::SUBWrr; 4162 SubRC = &AArch64::GPR32spRegClass; 4163 ZeroReg = AArch64::WZR; 4164 Opc = AArch64::MADDWrrr; 4165 RC = &AArch64::GPR32RegClass; 4166 } else { 4167 SubOpc = AArch64::SUBXrr; 4168 SubRC = &AArch64::GPR64spRegClass; 4169 ZeroReg = AArch64::XZR; 4170 Opc = AArch64::MADDXrrr; 4171 RC = &AArch64::GPR64RegClass; 4172 } 4173 unsigned NewVR = MRI.createVirtualRegister(SubRC); 4174 // SUB NewVR, 0, C 4175 MachineInstrBuilder MIB1 = 4176 BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc), NewVR) 4177 .addReg(ZeroReg) 4178 .add(Root.getOperand(2)); 4179 InsInstrs.push_back(MIB1); 4180 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 4181 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC); 4182 break; 4183 } 4184 case MachineCombinerPattern::MULSUBW_OP2: 4185 case MachineCombinerPattern::MULSUBX_OP2: 4186 // MUL I=A,B,0 4187 // SUB R,C,I 4188 // ==> MSUB R,A,B,C (computes C - A*B) 4189 // --- Create(MSUB); 4190 if (Pattern == MachineCombinerPattern::MULSUBW_OP2) { 4191 Opc = AArch64::MSUBWrrr; 4192 RC = &AArch64::GPR32RegClass; 4193 } else { 4194 Opc = AArch64::MSUBXrrr; 4195 RC = &AArch64::GPR64RegClass; 4196 } 4197 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 4198 break; 4199 case MachineCombinerPattern::MULSUBWI_OP1: 4200 case MachineCombinerPattern::MULSUBXI_OP1: { 4201 // MUL I=A,B,0 4202 // SUB R,I, Imm 4203 // ==> ORR V, ZR, -Imm 4204 // ==> MADD R,A,B,V // = -Imm + A*B 4205 // --- Create(MADD); 4206 const TargetRegisterClass *OrrRC; 4207 unsigned BitSize, OrrOpc, ZeroReg; 4208 if (Pattern == MachineCombinerPattern::MULSUBWI_OP1) { 4209 OrrOpc = AArch64::ORRWri; 4210 OrrRC = &AArch64::GPR32spRegClass; 4211 BitSize = 32; 4212 ZeroReg = AArch64::WZR; 4213 Opc = AArch64::MADDWrrr; 4214 RC = &AArch64::GPR32RegClass; 4215 } else { 4216 OrrOpc = AArch64::ORRXri; 4217 OrrRC = &AArch64::GPR64spRegClass; 4218 BitSize = 64; 4219 ZeroReg = AArch64::XZR; 4220 Opc = AArch64::MADDXrrr; 4221 RC = &AArch64::GPR64RegClass; 4222 } 4223 unsigned NewVR = MRI.createVirtualRegister(OrrRC); 4224 uint64_t Imm = Root.getOperand(2).getImm(); 4225 if (Root.getOperand(3).isImm()) { 4226 unsigned Val = Root.getOperand(3).getImm(); 4227 Imm = Imm << Val; 4228 } 4229 uint64_t UImm = SignExtend64(-Imm, BitSize); 4230 uint64_t Encoding; 4231 if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) { 4232 MachineInstrBuilder MIB1 = 4233 BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR) 4234 .addReg(ZeroReg) 4235 .addImm(Encoding); 4236 InsInstrs.push_back(MIB1); 4237 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 4238 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC); 4239 } 4240 break; 4241 } 4242 // Floating Point Support 4243 case MachineCombinerPattern::FMULADDS_OP1: 4244 case MachineCombinerPattern::FMULADDD_OP1: 4245 // MUL I=A,B,0 4246 // ADD R,I,C 4247 // ==> MADD R,A,B,C 4248 // --- Create(MADD); 4249 if (Pattern == MachineCombinerPattern::FMULADDS_OP1) { 4250 Opc = AArch64::FMADDSrrr; 4251 RC = &AArch64::FPR32RegClass; 4252 } else { 4253 Opc = AArch64::FMADDDrrr; 4254 RC = &AArch64::FPR64RegClass; 4255 } 4256 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 4257 break; 4258 case MachineCombinerPattern::FMULADDS_OP2: 4259 case MachineCombinerPattern::FMULADDD_OP2: 4260 // FMUL I=A,B,0 4261 // FADD R,C,I 4262 // ==> FMADD R,A,B,C 4263 // --- Create(FMADD); 4264 if (Pattern == MachineCombinerPattern::FMULADDS_OP2) { 4265 Opc = AArch64::FMADDSrrr; 4266 RC = &AArch64::FPR32RegClass; 4267 } else { 4268 Opc = AArch64::FMADDDrrr; 4269 RC = &AArch64::FPR64RegClass; 4270 } 4271 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 4272 break; 4273 4274 case MachineCombinerPattern::FMLAv1i32_indexed_OP1: 4275 Opc = AArch64::FMLAv1i32_indexed; 4276 RC = &AArch64::FPR32RegClass; 4277 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 4278 FMAInstKind::Indexed); 4279 break; 4280 case MachineCombinerPattern::FMLAv1i32_indexed_OP2: 4281 Opc = AArch64::FMLAv1i32_indexed; 4282 RC = &AArch64::FPR32RegClass; 4283 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 4284 FMAInstKind::Indexed); 4285 break; 4286 4287 case MachineCombinerPattern::FMLAv1i64_indexed_OP1: 4288 Opc = AArch64::FMLAv1i64_indexed; 4289 RC = &AArch64::FPR64RegClass; 4290 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 4291 FMAInstKind::Indexed); 4292 break; 4293 case MachineCombinerPattern::FMLAv1i64_indexed_OP2: 4294 Opc = AArch64::FMLAv1i64_indexed; 4295 RC = &AArch64::FPR64RegClass; 4296 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 4297 FMAInstKind::Indexed); 4298 break; 4299 4300 case MachineCombinerPattern::FMLAv2i32_indexed_OP1: 4301 case MachineCombinerPattern::FMLAv2f32_OP1: 4302 RC = &AArch64::FPR64RegClass; 4303 if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP1) { 4304 Opc = AArch64::FMLAv2i32_indexed; 4305 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 4306 FMAInstKind::Indexed); 4307 } else { 4308 Opc = AArch64::FMLAv2f32; 4309 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 4310 FMAInstKind::Accumulator); 4311 } 4312 break; 4313 case MachineCombinerPattern::FMLAv2i32_indexed_OP2: 4314 case MachineCombinerPattern::FMLAv2f32_OP2: 4315 RC = &AArch64::FPR64RegClass; 4316 if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP2) { 4317 Opc = AArch64::FMLAv2i32_indexed; 4318 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 4319 FMAInstKind::Indexed); 4320 } else { 4321 Opc = AArch64::FMLAv2f32; 4322 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 4323 FMAInstKind::Accumulator); 4324 } 4325 break; 4326 4327 case MachineCombinerPattern::FMLAv2i64_indexed_OP1: 4328 case MachineCombinerPattern::FMLAv2f64_OP1: 4329 RC = &AArch64::FPR128RegClass; 4330 if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP1) { 4331 Opc = AArch64::FMLAv2i64_indexed; 4332 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 4333 FMAInstKind::Indexed); 4334 } else { 4335 Opc = AArch64::FMLAv2f64; 4336 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 4337 FMAInstKind::Accumulator); 4338 } 4339 break; 4340 case MachineCombinerPattern::FMLAv2i64_indexed_OP2: 4341 case MachineCombinerPattern::FMLAv2f64_OP2: 4342 RC = &AArch64::FPR128RegClass; 4343 if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP2) { 4344 Opc = AArch64::FMLAv2i64_indexed; 4345 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 4346 FMAInstKind::Indexed); 4347 } else { 4348 Opc = AArch64::FMLAv2f64; 4349 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 4350 FMAInstKind::Accumulator); 4351 } 4352 break; 4353 4354 case MachineCombinerPattern::FMLAv4i32_indexed_OP1: 4355 case MachineCombinerPattern::FMLAv4f32_OP1: 4356 RC = &AArch64::FPR128RegClass; 4357 if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP1) { 4358 Opc = AArch64::FMLAv4i32_indexed; 4359 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 4360 FMAInstKind::Indexed); 4361 } else { 4362 Opc = AArch64::FMLAv4f32; 4363 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 4364 FMAInstKind::Accumulator); 4365 } 4366 break; 4367 4368 case MachineCombinerPattern::FMLAv4i32_indexed_OP2: 4369 case MachineCombinerPattern::FMLAv4f32_OP2: 4370 RC = &AArch64::FPR128RegClass; 4371 if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP2) { 4372 Opc = AArch64::FMLAv4i32_indexed; 4373 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 4374 FMAInstKind::Indexed); 4375 } else { 4376 Opc = AArch64::FMLAv4f32; 4377 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 4378 FMAInstKind::Accumulator); 4379 } 4380 break; 4381 4382 case MachineCombinerPattern::FMULSUBS_OP1: 4383 case MachineCombinerPattern::FMULSUBD_OP1: { 4384 // FMUL I=A,B,0 4385 // FSUB R,I,C 4386 // ==> FNMSUB R,A,B,C // = -C + A*B 4387 // --- Create(FNMSUB); 4388 if (Pattern == MachineCombinerPattern::FMULSUBS_OP1) { 4389 Opc = AArch64::FNMSUBSrrr; 4390 RC = &AArch64::FPR32RegClass; 4391 } else { 4392 Opc = AArch64::FNMSUBDrrr; 4393 RC = &AArch64::FPR64RegClass; 4394 } 4395 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 4396 break; 4397 } 4398 4399 case MachineCombinerPattern::FNMULSUBS_OP1: 4400 case MachineCombinerPattern::FNMULSUBD_OP1: { 4401 // FNMUL I=A,B,0 4402 // FSUB R,I,C 4403 // ==> FNMADD R,A,B,C // = -A*B - C 4404 // --- Create(FNMADD); 4405 if (Pattern == MachineCombinerPattern::FNMULSUBS_OP1) { 4406 Opc = AArch64::FNMADDSrrr; 4407 RC = &AArch64::FPR32RegClass; 4408 } else { 4409 Opc = AArch64::FNMADDDrrr; 4410 RC = &AArch64::FPR64RegClass; 4411 } 4412 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 4413 break; 4414 } 4415 4416 case MachineCombinerPattern::FMULSUBS_OP2: 4417 case MachineCombinerPattern::FMULSUBD_OP2: { 4418 // FMUL I=A,B,0 4419 // FSUB R,C,I 4420 // ==> FMSUB R,A,B,C (computes C - A*B) 4421 // --- Create(FMSUB); 4422 if (Pattern == MachineCombinerPattern::FMULSUBS_OP2) { 4423 Opc = AArch64::FMSUBSrrr; 4424 RC = &AArch64::FPR32RegClass; 4425 } else { 4426 Opc = AArch64::FMSUBDrrr; 4427 RC = &AArch64::FPR64RegClass; 4428 } 4429 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 4430 break; 4431 } 4432 4433 case MachineCombinerPattern::FMLSv1i32_indexed_OP2: 4434 Opc = AArch64::FMLSv1i32_indexed; 4435 RC = &AArch64::FPR32RegClass; 4436 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 4437 FMAInstKind::Indexed); 4438 break; 4439 4440 case MachineCombinerPattern::FMLSv1i64_indexed_OP2: 4441 Opc = AArch64::FMLSv1i64_indexed; 4442 RC = &AArch64::FPR64RegClass; 4443 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 4444 FMAInstKind::Indexed); 4445 break; 4446 4447 case MachineCombinerPattern::FMLSv2f32_OP2: 4448 case MachineCombinerPattern::FMLSv2i32_indexed_OP2: 4449 RC = &AArch64::FPR64RegClass; 4450 if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP2) { 4451 Opc = AArch64::FMLSv2i32_indexed; 4452 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 4453 FMAInstKind::Indexed); 4454 } else { 4455 Opc = AArch64::FMLSv2f32; 4456 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 4457 FMAInstKind::Accumulator); 4458 } 4459 break; 4460 4461 case MachineCombinerPattern::FMLSv2f64_OP2: 4462 case MachineCombinerPattern::FMLSv2i64_indexed_OP2: 4463 RC = &AArch64::FPR128RegClass; 4464 if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP2) { 4465 Opc = AArch64::FMLSv2i64_indexed; 4466 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 4467 FMAInstKind::Indexed); 4468 } else { 4469 Opc = AArch64::FMLSv2f64; 4470 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 4471 FMAInstKind::Accumulator); 4472 } 4473 break; 4474 4475 case MachineCombinerPattern::FMLSv4f32_OP2: 4476 case MachineCombinerPattern::FMLSv4i32_indexed_OP2: 4477 RC = &AArch64::FPR128RegClass; 4478 if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP2) { 4479 Opc = AArch64::FMLSv4i32_indexed; 4480 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 4481 FMAInstKind::Indexed); 4482 } else { 4483 Opc = AArch64::FMLSv4f32; 4484 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 4485 FMAInstKind::Accumulator); 4486 } 4487 break; 4488 case MachineCombinerPattern::FMLSv2f32_OP1: 4489 case MachineCombinerPattern::FMLSv2i32_indexed_OP1: { 4490 RC = &AArch64::FPR64RegClass; 4491 unsigned NewVR = MRI.createVirtualRegister(RC); 4492 MachineInstrBuilder MIB1 = 4493 BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f32), NewVR) 4494 .add(Root.getOperand(2)); 4495 InsInstrs.push_back(MIB1); 4496 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 4497 if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP1) { 4498 Opc = AArch64::FMLAv2i32_indexed; 4499 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 4500 FMAInstKind::Indexed, &NewVR); 4501 } else { 4502 Opc = AArch64::FMLAv2f32; 4503 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 4504 FMAInstKind::Accumulator, &NewVR); 4505 } 4506 break; 4507 } 4508 case MachineCombinerPattern::FMLSv4f32_OP1: 4509 case MachineCombinerPattern::FMLSv4i32_indexed_OP1: { 4510 RC = &AArch64::FPR128RegClass; 4511 unsigned NewVR = MRI.createVirtualRegister(RC); 4512 MachineInstrBuilder MIB1 = 4513 BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv4f32), NewVR) 4514 .add(Root.getOperand(2)); 4515 InsInstrs.push_back(MIB1); 4516 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 4517 if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP1) { 4518 Opc = AArch64::FMLAv4i32_indexed; 4519 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 4520 FMAInstKind::Indexed, &NewVR); 4521 } else { 4522 Opc = AArch64::FMLAv4f32; 4523 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 4524 FMAInstKind::Accumulator, &NewVR); 4525 } 4526 break; 4527 } 4528 case MachineCombinerPattern::FMLSv2f64_OP1: 4529 case MachineCombinerPattern::FMLSv2i64_indexed_OP1: { 4530 RC = &AArch64::FPR128RegClass; 4531 unsigned NewVR = MRI.createVirtualRegister(RC); 4532 MachineInstrBuilder MIB1 = 4533 BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f64), NewVR) 4534 .add(Root.getOperand(2)); 4535 InsInstrs.push_back(MIB1); 4536 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 4537 if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP1) { 4538 Opc = AArch64::FMLAv2i64_indexed; 4539 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 4540 FMAInstKind::Indexed, &NewVR); 4541 } else { 4542 Opc = AArch64::FMLAv2f64; 4543 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 4544 FMAInstKind::Accumulator, &NewVR); 4545 } 4546 break; 4547 } 4548 } // end switch (Pattern) 4549 // Record MUL and ADD/SUB for deletion 4550 DelInstrs.push_back(MUL); 4551 DelInstrs.push_back(&Root); 4552 } 4553 4554 /// Replace csincr-branch sequence by simple conditional branch 4555 /// 4556 /// Examples: 4557 /// 1. \code 4558 /// csinc w9, wzr, wzr, <condition code> 4559 /// tbnz w9, #0, 0x44 4560 /// \endcode 4561 /// to 4562 /// \code 4563 /// b.<inverted condition code> 4564 /// \endcode 4565 /// 4566 /// 2. \code 4567 /// csinc w9, wzr, wzr, <condition code> 4568 /// tbz w9, #0, 0x44 4569 /// \endcode 4570 /// to 4571 /// \code 4572 /// b.<condition code> 4573 /// \endcode 4574 /// 4575 /// Replace compare and branch sequence by TBZ/TBNZ instruction when the 4576 /// compare's constant operand is power of 2. 4577 /// 4578 /// Examples: 4579 /// \code 4580 /// and w8, w8, #0x400 4581 /// cbnz w8, L1 4582 /// \endcode 4583 /// to 4584 /// \code 4585 /// tbnz w8, #10, L1 4586 /// \endcode 4587 /// 4588 /// \param MI Conditional Branch 4589 /// \return True when the simple conditional branch is generated 4590 /// 4591 bool AArch64InstrInfo::optimizeCondBranch(MachineInstr &MI) const { 4592 bool IsNegativeBranch = false; 4593 bool IsTestAndBranch = false; 4594 unsigned TargetBBInMI = 0; 4595 switch (MI.getOpcode()) { 4596 default: 4597 llvm_unreachable("Unknown branch instruction?"); 4598 case AArch64::Bcc: 4599 return false; 4600 case AArch64::CBZW: 4601 case AArch64::CBZX: 4602 TargetBBInMI = 1; 4603 break; 4604 case AArch64::CBNZW: 4605 case AArch64::CBNZX: 4606 TargetBBInMI = 1; 4607 IsNegativeBranch = true; 4608 break; 4609 case AArch64::TBZW: 4610 case AArch64::TBZX: 4611 TargetBBInMI = 2; 4612 IsTestAndBranch = true; 4613 break; 4614 case AArch64::TBNZW: 4615 case AArch64::TBNZX: 4616 TargetBBInMI = 2; 4617 IsNegativeBranch = true; 4618 IsTestAndBranch = true; 4619 break; 4620 } 4621 // So we increment a zero register and test for bits other 4622 // than bit 0? Conservatively bail out in case the verifier 4623 // missed this case. 4624 if (IsTestAndBranch && MI.getOperand(1).getImm()) 4625 return false; 4626 4627 // Find Definition. 4628 assert(MI.getParent() && "Incomplete machine instruciton\n"); 4629 MachineBasicBlock *MBB = MI.getParent(); 4630 MachineFunction *MF = MBB->getParent(); 4631 MachineRegisterInfo *MRI = &MF->getRegInfo(); 4632 unsigned VReg = MI.getOperand(0).getReg(); 4633 if (!TargetRegisterInfo::isVirtualRegister(VReg)) 4634 return false; 4635 4636 MachineInstr *DefMI = MRI->getVRegDef(VReg); 4637 4638 // Look through COPY instructions to find definition. 4639 while (DefMI->isCopy()) { 4640 unsigned CopyVReg = DefMI->getOperand(1).getReg(); 4641 if (!MRI->hasOneNonDBGUse(CopyVReg)) 4642 return false; 4643 if (!MRI->hasOneDef(CopyVReg)) 4644 return false; 4645 DefMI = MRI->getVRegDef(CopyVReg); 4646 } 4647 4648 switch (DefMI->getOpcode()) { 4649 default: 4650 return false; 4651 // Fold AND into a TBZ/TBNZ if constant operand is power of 2. 4652 case AArch64::ANDWri: 4653 case AArch64::ANDXri: { 4654 if (IsTestAndBranch) 4655 return false; 4656 if (DefMI->getParent() != MBB) 4657 return false; 4658 if (!MRI->hasOneNonDBGUse(VReg)) 4659 return false; 4660 4661 bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri); 4662 uint64_t Mask = AArch64_AM::decodeLogicalImmediate( 4663 DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64); 4664 if (!isPowerOf2_64(Mask)) 4665 return false; 4666 4667 MachineOperand &MO = DefMI->getOperand(1); 4668 unsigned NewReg = MO.getReg(); 4669 if (!TargetRegisterInfo::isVirtualRegister(NewReg)) 4670 return false; 4671 4672 assert(!MRI->def_empty(NewReg) && "Register must be defined."); 4673 4674 MachineBasicBlock &RefToMBB = *MBB; 4675 MachineBasicBlock *TBB = MI.getOperand(1).getMBB(); 4676 DebugLoc DL = MI.getDebugLoc(); 4677 unsigned Imm = Log2_64(Mask); 4678 unsigned Opc = (Imm < 32) 4679 ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW) 4680 : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX); 4681 MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc)) 4682 .addReg(NewReg) 4683 .addImm(Imm) 4684 .addMBB(TBB); 4685 // Register lives on to the CBZ now. 4686 MO.setIsKill(false); 4687 4688 // For immediate smaller than 32, we need to use the 32-bit 4689 // variant (W) in all cases. Indeed the 64-bit variant does not 4690 // allow to encode them. 4691 // Therefore, if the input register is 64-bit, we need to take the 4692 // 32-bit sub-part. 4693 if (!Is32Bit && Imm < 32) 4694 NewMI->getOperand(0).setSubReg(AArch64::sub_32); 4695 MI.eraseFromParent(); 4696 return true; 4697 } 4698 // Look for CSINC 4699 case AArch64::CSINCWr: 4700 case AArch64::CSINCXr: { 4701 if (!(DefMI->getOperand(1).getReg() == AArch64::WZR && 4702 DefMI->getOperand(2).getReg() == AArch64::WZR) && 4703 !(DefMI->getOperand(1).getReg() == AArch64::XZR && 4704 DefMI->getOperand(2).getReg() == AArch64::XZR)) 4705 return false; 4706 4707 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) != -1) 4708 return false; 4709 4710 AArch64CC::CondCode CC = (AArch64CC::CondCode)DefMI->getOperand(3).getImm(); 4711 // Convert only when the condition code is not modified between 4712 // the CSINC and the branch. The CC may be used by other 4713 // instructions in between. 4714 if (areCFlagsAccessedBetweenInstrs(DefMI, MI, &getRegisterInfo(), AK_Write)) 4715 return false; 4716 MachineBasicBlock &RefToMBB = *MBB; 4717 MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB(); 4718 DebugLoc DL = MI.getDebugLoc(); 4719 if (IsNegativeBranch) 4720 CC = AArch64CC::getInvertedCondCode(CC); 4721 BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB); 4722 MI.eraseFromParent(); 4723 return true; 4724 } 4725 } 4726 } 4727 4728 std::pair<unsigned, unsigned> 4729 AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const { 4730 const unsigned Mask = AArch64II::MO_FRAGMENT; 4731 return std::make_pair(TF & Mask, TF & ~Mask); 4732 } 4733 4734 ArrayRef<std::pair<unsigned, const char *>> 4735 AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const { 4736 using namespace AArch64II; 4737 4738 static const std::pair<unsigned, const char *> TargetFlags[] = { 4739 {MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"}, 4740 {MO_G3, "aarch64-g3"}, {MO_G2, "aarch64-g2"}, 4741 {MO_G1, "aarch64-g1"}, {MO_G0, "aarch64-g0"}, 4742 {MO_HI12, "aarch64-hi12"}}; 4743 return makeArrayRef(TargetFlags); 4744 } 4745 4746 ArrayRef<std::pair<unsigned, const char *>> 4747 AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const { 4748 using namespace AArch64II; 4749 4750 static const std::pair<unsigned, const char *> TargetFlags[] = { 4751 {MO_COFFSTUB, "aarch64-coffstub"}, 4752 {MO_GOT, "aarch64-got"}, {MO_NC, "aarch64-nc"}, 4753 {MO_S, "aarch64-s"}, {MO_TLS, "aarch64-tls"}, 4754 {MO_DLLIMPORT, "aarch64-dllimport"}}; 4755 return makeArrayRef(TargetFlags); 4756 } 4757 4758 ArrayRef<std::pair<MachineMemOperand::Flags, const char *>> 4759 AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const { 4760 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] = 4761 {{MOSuppressPair, "aarch64-suppress-pair"}, 4762 {MOStridedAccess, "aarch64-strided-access"}}; 4763 return makeArrayRef(TargetFlags); 4764 } 4765 4766 /// Constants defining how certain sequences should be outlined. 4767 /// This encompasses how an outlined function should be called, and what kind of 4768 /// frame should be emitted for that outlined function. 4769 /// 4770 /// \p MachineOutlinerDefault implies that the function should be called with 4771 /// a save and restore of LR to the stack. 4772 /// 4773 /// That is, 4774 /// 4775 /// I1 Save LR OUTLINED_FUNCTION: 4776 /// I2 --> BL OUTLINED_FUNCTION I1 4777 /// I3 Restore LR I2 4778 /// I3 4779 /// RET 4780 /// 4781 /// * Call construction overhead: 3 (save + BL + restore) 4782 /// * Frame construction overhead: 1 (ret) 4783 /// * Requires stack fixups? Yes 4784 /// 4785 /// \p MachineOutlinerTailCall implies that the function is being created from 4786 /// a sequence of instructions ending in a return. 4787 /// 4788 /// That is, 4789 /// 4790 /// I1 OUTLINED_FUNCTION: 4791 /// I2 --> B OUTLINED_FUNCTION I1 4792 /// RET I2 4793 /// RET 4794 /// 4795 /// * Call construction overhead: 1 (B) 4796 /// * Frame construction overhead: 0 (Return included in sequence) 4797 /// * Requires stack fixups? No 4798 /// 4799 /// \p MachineOutlinerNoLRSave implies that the function should be called using 4800 /// a BL instruction, but doesn't require LR to be saved and restored. This 4801 /// happens when LR is known to be dead. 4802 /// 4803 /// That is, 4804 /// 4805 /// I1 OUTLINED_FUNCTION: 4806 /// I2 --> BL OUTLINED_FUNCTION I1 4807 /// I3 I2 4808 /// I3 4809 /// RET 4810 /// 4811 /// * Call construction overhead: 1 (BL) 4812 /// * Frame construction overhead: 1 (RET) 4813 /// * Requires stack fixups? No 4814 /// 4815 /// \p MachineOutlinerThunk implies that the function is being created from 4816 /// a sequence of instructions ending in a call. The outlined function is 4817 /// called with a BL instruction, and the outlined function tail-calls the 4818 /// original call destination. 4819 /// 4820 /// That is, 4821 /// 4822 /// I1 OUTLINED_FUNCTION: 4823 /// I2 --> BL OUTLINED_FUNCTION I1 4824 /// BL f I2 4825 /// B f 4826 /// * Call construction overhead: 1 (BL) 4827 /// * Frame construction overhead: 0 4828 /// * Requires stack fixups? No 4829 /// 4830 /// \p MachineOutlinerRegSave implies that the function should be called with a 4831 /// save and restore of LR to an available register. This allows us to avoid 4832 /// stack fixups. Note that this outlining variant is compatible with the 4833 /// NoLRSave case. 4834 /// 4835 /// That is, 4836 /// 4837 /// I1 Save LR OUTLINED_FUNCTION: 4838 /// I2 --> BL OUTLINED_FUNCTION I1 4839 /// I3 Restore LR I2 4840 /// I3 4841 /// RET 4842 /// 4843 /// * Call construction overhead: 3 (save + BL + restore) 4844 /// * Frame construction overhead: 1 (ret) 4845 /// * Requires stack fixups? No 4846 enum MachineOutlinerClass { 4847 MachineOutlinerDefault, /// Emit a save, restore, call, and return. 4848 MachineOutlinerTailCall, /// Only emit a branch. 4849 MachineOutlinerNoLRSave, /// Emit a call and return. 4850 MachineOutlinerThunk, /// Emit a call and tail-call. 4851 MachineOutlinerRegSave /// Same as default, but save to a register. 4852 }; 4853 4854 enum MachineOutlinerMBBFlags { 4855 LRUnavailableSomewhere = 0x2, 4856 HasCalls = 0x4, 4857 UnsafeRegsDead = 0x8 4858 }; 4859 4860 unsigned 4861 AArch64InstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const { 4862 assert(C.LRUWasSet && "LRU wasn't set?"); 4863 MachineFunction *MF = C.getMF(); 4864 const AArch64RegisterInfo *ARI = static_cast<const AArch64RegisterInfo *>( 4865 MF->getSubtarget().getRegisterInfo()); 4866 4867 // Check if there is an available register across the sequence that we can 4868 // use. 4869 for (unsigned Reg : AArch64::GPR64RegClass) { 4870 if (!ARI->isReservedReg(*MF, Reg) && 4871 Reg != AArch64::LR && // LR is not reserved, but don't use it. 4872 Reg != AArch64::X16 && // X16 is not guaranteed to be preserved. 4873 Reg != AArch64::X17 && // Ditto for X17. 4874 C.LRU.available(Reg) && C.UsedInSequence.available(Reg)) 4875 return Reg; 4876 } 4877 4878 // No suitable register. Return 0. 4879 return 0u; 4880 } 4881 4882 outliner::OutlinedFunction 4883 AArch64InstrInfo::getOutliningCandidateInfo( 4884 std::vector<outliner::Candidate> &RepeatedSequenceLocs) const { 4885 outliner::Candidate &FirstCand = RepeatedSequenceLocs[0]; 4886 unsigned SequenceSize = 4887 std::accumulate(FirstCand.front(), std::next(FirstCand.back()), 0, 4888 [this](unsigned Sum, const MachineInstr &MI) { 4889 return Sum + getInstSizeInBytes(MI); 4890 }); 4891 4892 // Properties about candidate MBBs that hold for all of them. 4893 unsigned FlagsSetInAll = 0xF; 4894 4895 // Compute liveness information for each candidate, and set FlagsSetInAll. 4896 const TargetRegisterInfo &TRI = getRegisterInfo(); 4897 std::for_each(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(), 4898 [&FlagsSetInAll](outliner::Candidate &C) { 4899 FlagsSetInAll &= C.Flags; 4900 }); 4901 4902 // According to the AArch64 Procedure Call Standard, the following are 4903 // undefined on entry/exit from a function call: 4904 // 4905 // * Registers x16, x17, (and thus w16, w17) 4906 // * Condition codes (and thus the NZCV register) 4907 // 4908 // Because if this, we can't outline any sequence of instructions where 4909 // one 4910 // of these registers is live into/across it. Thus, we need to delete 4911 // those 4912 // candidates. 4913 auto CantGuaranteeValueAcrossCall = [&TRI](outliner::Candidate &C) { 4914 // If the unsafe registers in this block are all dead, then we don't need 4915 // to compute liveness here. 4916 if (C.Flags & UnsafeRegsDead) 4917 return false; 4918 C.initLRU(TRI); 4919 LiveRegUnits LRU = C.LRU; 4920 return (!LRU.available(AArch64::W16) || !LRU.available(AArch64::W17) || 4921 !LRU.available(AArch64::NZCV)); 4922 }; 4923 4924 // Are there any candidates where those registers are live? 4925 if (!(FlagsSetInAll & UnsafeRegsDead)) { 4926 // Erase every candidate that violates the restrictions above. (It could be 4927 // true that we have viable candidates, so it's not worth bailing out in 4928 // the case that, say, 1 out of 20 candidates violate the restructions.) 4929 RepeatedSequenceLocs.erase(std::remove_if(RepeatedSequenceLocs.begin(), 4930 RepeatedSequenceLocs.end(), 4931 CantGuaranteeValueAcrossCall), 4932 RepeatedSequenceLocs.end()); 4933 4934 // If the sequence doesn't have enough candidates left, then we're done. 4935 if (RepeatedSequenceLocs.size() < 2) 4936 return outliner::OutlinedFunction(); 4937 } 4938 4939 // At this point, we have only "safe" candidates to outline. Figure out 4940 // frame + call instruction information. 4941 4942 unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back()->getOpcode(); 4943 4944 // Helper lambda which sets call information for every candidate. 4945 auto SetCandidateCallInfo = 4946 [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) { 4947 for (outliner::Candidate &C : RepeatedSequenceLocs) 4948 C.setCallInfo(CallID, NumBytesForCall); 4949 }; 4950 4951 unsigned FrameID = MachineOutlinerDefault; 4952 unsigned NumBytesToCreateFrame = 4; 4953 4954 bool HasBTI = any_of(RepeatedSequenceLocs, [](outliner::Candidate &C) { 4955 return C.getMF()->getFunction().hasFnAttribute("branch-target-enforcement"); 4956 }); 4957 4958 // Returns true if an instructions is safe to fix up, false otherwise. 4959 auto IsSafeToFixup = [this, &TRI](MachineInstr &MI) { 4960 if (MI.isCall()) 4961 return true; 4962 4963 if (!MI.modifiesRegister(AArch64::SP, &TRI) && 4964 !MI.readsRegister(AArch64::SP, &TRI)) 4965 return true; 4966 4967 // Any modification of SP will break our code to save/restore LR. 4968 // FIXME: We could handle some instructions which add a constant 4969 // offset to SP, with a bit more work. 4970 if (MI.modifiesRegister(AArch64::SP, &TRI)) 4971 return false; 4972 4973 // At this point, we have a stack instruction that we might need to 4974 // fix up. We'll handle it if it's a load or store. 4975 if (MI.mayLoadOrStore()) { 4976 const MachineOperand *Base; // Filled with the base operand of MI. 4977 int64_t Offset; // Filled with the offset of MI. 4978 4979 // Does it allow us to offset the base operand and is the base the 4980 // register SP? 4981 if (!getMemOperandWithOffset(MI, Base, Offset, &TRI) || !Base->isReg() || 4982 Base->getReg() != AArch64::SP) 4983 return false; 4984 4985 // Find the minimum/maximum offset for this instruction and check 4986 // if fixing it up would be in range. 4987 int64_t MinOffset, 4988 MaxOffset; // Unscaled offsets for the instruction. 4989 unsigned Scale; // The scale to multiply the offsets by. 4990 unsigned DummyWidth; 4991 getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset); 4992 4993 Offset += 16; // Update the offset to what it would be if we outlined. 4994 if (Offset < MinOffset * Scale || Offset > MaxOffset * Scale) 4995 return false; 4996 4997 // It's in range, so we can outline it. 4998 return true; 4999 } 5000 5001 // FIXME: Add handling for instructions like "add x0, sp, #8". 5002 5003 // We can't fix it up, so don't outline it. 5004 return false; 5005 }; 5006 5007 // True if it's possible to fix up each stack instruction in this sequence. 5008 // Important for frames/call variants that modify the stack. 5009 bool AllStackInstrsSafe = std::all_of( 5010 FirstCand.front(), std::next(FirstCand.back()), IsSafeToFixup); 5011 5012 // If the last instruction in any candidate is a terminator, then we should 5013 // tail call all of the candidates. 5014 if (RepeatedSequenceLocs[0].back()->isTerminator()) { 5015 FrameID = MachineOutlinerTailCall; 5016 NumBytesToCreateFrame = 0; 5017 SetCandidateCallInfo(MachineOutlinerTailCall, 4); 5018 } 5019 5020 else if (LastInstrOpcode == AArch64::BL || 5021 (LastInstrOpcode == AArch64::BLR && !HasBTI)) { 5022 // FIXME: Do we need to check if the code after this uses the value of LR? 5023 FrameID = MachineOutlinerThunk; 5024 NumBytesToCreateFrame = 0; 5025 SetCandidateCallInfo(MachineOutlinerThunk, 4); 5026 } 5027 5028 else { 5029 // We need to decide how to emit calls + frames. We can always emit the same 5030 // frame if we don't need to save to the stack. If we have to save to the 5031 // stack, then we need a different frame. 5032 unsigned NumBytesNoStackCalls = 0; 5033 std::vector<outliner::Candidate> CandidatesWithoutStackFixups; 5034 5035 for (outliner::Candidate &C : RepeatedSequenceLocs) { 5036 C.initLRU(TRI); 5037 5038 // Is LR available? If so, we don't need a save. 5039 if (C.LRU.available(AArch64::LR)) { 5040 NumBytesNoStackCalls += 4; 5041 C.setCallInfo(MachineOutlinerNoLRSave, 4); 5042 CandidatesWithoutStackFixups.push_back(C); 5043 } 5044 5045 // Is an unused register available? If so, we won't modify the stack, so 5046 // we can outline with the same frame type as those that don't save LR. 5047 else if (findRegisterToSaveLRTo(C)) { 5048 NumBytesNoStackCalls += 12; 5049 C.setCallInfo(MachineOutlinerRegSave, 12); 5050 CandidatesWithoutStackFixups.push_back(C); 5051 } 5052 5053 // Is SP used in the sequence at all? If not, we don't have to modify 5054 // the stack, so we are guaranteed to get the same frame. 5055 else if (C.UsedInSequence.available(AArch64::SP)) { 5056 NumBytesNoStackCalls += 12; 5057 C.setCallInfo(MachineOutlinerDefault, 12); 5058 CandidatesWithoutStackFixups.push_back(C); 5059 } 5060 5061 // If we outline this, we need to modify the stack. Pretend we don't 5062 // outline this by saving all of its bytes. 5063 else { 5064 NumBytesNoStackCalls += SequenceSize; 5065 } 5066 } 5067 5068 // If there are no places where we have to save LR, then note that we 5069 // don't have to update the stack. Otherwise, give every candidate the 5070 // default call type, as long as it's safe to do so. 5071 if (!AllStackInstrsSafe || 5072 NumBytesNoStackCalls <= RepeatedSequenceLocs.size() * 12) { 5073 RepeatedSequenceLocs = CandidatesWithoutStackFixups; 5074 FrameID = MachineOutlinerNoLRSave; 5075 } else { 5076 SetCandidateCallInfo(MachineOutlinerDefault, 12); 5077 } 5078 5079 // If we dropped all of the candidates, bail out here. 5080 if (RepeatedSequenceLocs.size() < 2) { 5081 RepeatedSequenceLocs.clear(); 5082 return outliner::OutlinedFunction(); 5083 } 5084 } 5085 5086 // Does every candidate's MBB contain a call? If so, then we might have a call 5087 // in the range. 5088 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) { 5089 // Check if the range contains a call. These require a save + restore of the 5090 // link register. 5091 bool ModStackToSaveLR = false; 5092 if (std::any_of(FirstCand.front(), FirstCand.back(), 5093 [](const MachineInstr &MI) { return MI.isCall(); })) 5094 ModStackToSaveLR = true; 5095 5096 // Handle the last instruction separately. If this is a tail call, then the 5097 // last instruction is a call. We don't want to save + restore in this case. 5098 // However, it could be possible that the last instruction is a call without 5099 // it being valid to tail call this sequence. We should consider this as 5100 // well. 5101 else if (FrameID != MachineOutlinerThunk && 5102 FrameID != MachineOutlinerTailCall && FirstCand.back()->isCall()) 5103 ModStackToSaveLR = true; 5104 5105 if (ModStackToSaveLR) { 5106 // We can't fix up the stack. Bail out. 5107 if (!AllStackInstrsSafe) { 5108 RepeatedSequenceLocs.clear(); 5109 return outliner::OutlinedFunction(); 5110 } 5111 5112 // Save + restore LR. 5113 NumBytesToCreateFrame += 8; 5114 } 5115 } 5116 5117 return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize, 5118 NumBytesToCreateFrame, FrameID); 5119 } 5120 5121 bool AArch64InstrInfo::isFunctionSafeToOutlineFrom( 5122 MachineFunction &MF, bool OutlineFromLinkOnceODRs) const { 5123 const Function &F = MF.getFunction(); 5124 5125 // Can F be deduplicated by the linker? If it can, don't outline from it. 5126 if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage()) 5127 return false; 5128 5129 // Don't outline from functions with section markings; the program could 5130 // expect that all the code is in the named section. 5131 // FIXME: Allow outlining from multiple functions with the same section 5132 // marking. 5133 if (F.hasSection()) 5134 return false; 5135 5136 // Outlining from functions with redzones is unsafe since the outliner may 5137 // modify the stack. Check if hasRedZone is true or unknown; if yes, don't 5138 // outline from it. 5139 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 5140 if (!AFI || AFI->hasRedZone().getValueOr(true)) 5141 return false; 5142 5143 // It's safe to outline from MF. 5144 return true; 5145 } 5146 5147 bool AArch64InstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB, 5148 unsigned &Flags) const { 5149 // Check if LR is available through all of the MBB. If it's not, then set 5150 // a flag. 5151 assert(MBB.getParent()->getRegInfo().tracksLiveness() && 5152 "Suitable Machine Function for outlining must track liveness"); 5153 LiveRegUnits LRU(getRegisterInfo()); 5154 5155 std::for_each(MBB.rbegin(), MBB.rend(), 5156 [&LRU](MachineInstr &MI) { LRU.accumulate(MI); }); 5157 5158 // Check if each of the unsafe registers are available... 5159 bool W16AvailableInBlock = LRU.available(AArch64::W16); 5160 bool W17AvailableInBlock = LRU.available(AArch64::W17); 5161 bool NZCVAvailableInBlock = LRU.available(AArch64::NZCV); 5162 5163 // If all of these are dead (and not live out), we know we don't have to check 5164 // them later. 5165 if (W16AvailableInBlock && W17AvailableInBlock && NZCVAvailableInBlock) 5166 Flags |= MachineOutlinerMBBFlags::UnsafeRegsDead; 5167 5168 // Now, add the live outs to the set. 5169 LRU.addLiveOuts(MBB); 5170 5171 // If any of these registers is available in the MBB, but also a live out of 5172 // the block, then we know outlining is unsafe. 5173 if (W16AvailableInBlock && !LRU.available(AArch64::W16)) 5174 return false; 5175 if (W17AvailableInBlock && !LRU.available(AArch64::W17)) 5176 return false; 5177 if (NZCVAvailableInBlock && !LRU.available(AArch64::NZCV)) 5178 return false; 5179 5180 // Check if there's a call inside this MachineBasicBlock. If there is, then 5181 // set a flag. 5182 if (any_of(MBB, [](MachineInstr &MI) { return MI.isCall(); })) 5183 Flags |= MachineOutlinerMBBFlags::HasCalls; 5184 5185 MachineFunction *MF = MBB.getParent(); 5186 5187 // In the event that we outline, we may have to save LR. If there is an 5188 // available register in the MBB, then we'll always save LR there. Check if 5189 // this is true. 5190 bool CanSaveLR = false; 5191 const AArch64RegisterInfo *ARI = static_cast<const AArch64RegisterInfo *>( 5192 MF->getSubtarget().getRegisterInfo()); 5193 5194 // Check if there is an available register across the sequence that we can 5195 // use. 5196 for (unsigned Reg : AArch64::GPR64RegClass) { 5197 if (!ARI->isReservedReg(*MF, Reg) && Reg != AArch64::LR && 5198 Reg != AArch64::X16 && Reg != AArch64::X17 && LRU.available(Reg)) { 5199 CanSaveLR = true; 5200 break; 5201 } 5202 } 5203 5204 // Check if we have a register we can save LR to, and if LR was used 5205 // somewhere. If both of those things are true, then we need to evaluate the 5206 // safety of outlining stack instructions later. 5207 if (!CanSaveLR && !LRU.available(AArch64::LR)) 5208 Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere; 5209 5210 return true; 5211 } 5212 5213 outliner::InstrType 5214 AArch64InstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT, 5215 unsigned Flags) const { 5216 MachineInstr &MI = *MIT; 5217 MachineBasicBlock *MBB = MI.getParent(); 5218 MachineFunction *MF = MBB->getParent(); 5219 AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>(); 5220 5221 // Don't outline LOHs. 5222 if (FuncInfo->getLOHRelated().count(&MI)) 5223 return outliner::InstrType::Illegal; 5224 5225 // Don't allow debug values to impact outlining type. 5226 if (MI.isDebugInstr() || MI.isIndirectDebugValue()) 5227 return outliner::InstrType::Invisible; 5228 5229 // At this point, KILL instructions don't really tell us much so we can go 5230 // ahead and skip over them. 5231 if (MI.isKill()) 5232 return outliner::InstrType::Invisible; 5233 5234 // Is this a terminator for a basic block? 5235 if (MI.isTerminator()) { 5236 5237 // Is this the end of a function? 5238 if (MI.getParent()->succ_empty()) 5239 return outliner::InstrType::Legal; 5240 5241 // It's not, so don't outline it. 5242 return outliner::InstrType::Illegal; 5243 } 5244 5245 // Make sure none of the operands are un-outlinable. 5246 for (const MachineOperand &MOP : MI.operands()) { 5247 if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() || 5248 MOP.isTargetIndex()) 5249 return outliner::InstrType::Illegal; 5250 5251 // If it uses LR or W30 explicitly, then don't touch it. 5252 if (MOP.isReg() && !MOP.isImplicit() && 5253 (MOP.getReg() == AArch64::LR || MOP.getReg() == AArch64::W30)) 5254 return outliner::InstrType::Illegal; 5255 } 5256 5257 // Special cases for instructions that can always be outlined, but will fail 5258 // the later tests. e.g, ADRPs, which are PC-relative use LR, but can always 5259 // be outlined because they don't require a *specific* value to be in LR. 5260 if (MI.getOpcode() == AArch64::ADRP) 5261 return outliner::InstrType::Legal; 5262 5263 // If MI is a call we might be able to outline it. We don't want to outline 5264 // any calls that rely on the position of items on the stack. When we outline 5265 // something containing a call, we have to emit a save and restore of LR in 5266 // the outlined function. Currently, this always happens by saving LR to the 5267 // stack. Thus, if we outline, say, half the parameters for a function call 5268 // plus the call, then we'll break the callee's expectations for the layout 5269 // of the stack. 5270 // 5271 // FIXME: Allow calls to functions which construct a stack frame, as long 5272 // as they don't access arguments on the stack. 5273 // FIXME: Figure out some way to analyze functions defined in other modules. 5274 // We should be able to compute the memory usage based on the IR calling 5275 // convention, even if we can't see the definition. 5276 if (MI.isCall()) { 5277 // Get the function associated with the call. Look at each operand and find 5278 // the one that represents the callee and get its name. 5279 const Function *Callee = nullptr; 5280 for (const MachineOperand &MOP : MI.operands()) { 5281 if (MOP.isGlobal()) { 5282 Callee = dyn_cast<Function>(MOP.getGlobal()); 5283 break; 5284 } 5285 } 5286 5287 // Never outline calls to mcount. There isn't any rule that would require 5288 // this, but the Linux kernel's "ftrace" feature depends on it. 5289 if (Callee && Callee->getName() == "\01_mcount") 5290 return outliner::InstrType::Illegal; 5291 5292 // If we don't know anything about the callee, assume it depends on the 5293 // stack layout of the caller. In that case, it's only legal to outline 5294 // as a tail-call. Whitelist the call instructions we know about so we 5295 // don't get unexpected results with call pseudo-instructions. 5296 auto UnknownCallOutlineType = outliner::InstrType::Illegal; 5297 if (MI.getOpcode() == AArch64::BLR || MI.getOpcode() == AArch64::BL) 5298 UnknownCallOutlineType = outliner::InstrType::LegalTerminator; 5299 5300 if (!Callee) 5301 return UnknownCallOutlineType; 5302 5303 // We have a function we have information about. Check it if it's something 5304 // can safely outline. 5305 MachineFunction *CalleeMF = MF->getMMI().getMachineFunction(*Callee); 5306 5307 // We don't know what's going on with the callee at all. Don't touch it. 5308 if (!CalleeMF) 5309 return UnknownCallOutlineType; 5310 5311 // Check if we know anything about the callee saves on the function. If we 5312 // don't, then don't touch it, since that implies that we haven't 5313 // computed anything about its stack frame yet. 5314 MachineFrameInfo &MFI = CalleeMF->getFrameInfo(); 5315 if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 || 5316 MFI.getNumObjects() > 0) 5317 return UnknownCallOutlineType; 5318 5319 // At this point, we can say that CalleeMF ought to not pass anything on the 5320 // stack. Therefore, we can outline it. 5321 return outliner::InstrType::Legal; 5322 } 5323 5324 // Don't outline positions. 5325 if (MI.isPosition()) 5326 return outliner::InstrType::Illegal; 5327 5328 // Don't touch the link register or W30. 5329 if (MI.readsRegister(AArch64::W30, &getRegisterInfo()) || 5330 MI.modifiesRegister(AArch64::W30, &getRegisterInfo())) 5331 return outliner::InstrType::Illegal; 5332 5333 // Don't outline BTI instructions, because that will prevent the outlining 5334 // site from being indirectly callable. 5335 if (MI.getOpcode() == AArch64::HINT) { 5336 int64_t Imm = MI.getOperand(0).getImm(); 5337 if (Imm == 32 || Imm == 34 || Imm == 36 || Imm == 38) 5338 return outliner::InstrType::Illegal; 5339 } 5340 5341 return outliner::InstrType::Legal; 5342 } 5343 5344 void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const { 5345 for (MachineInstr &MI : MBB) { 5346 const MachineOperand *Base; 5347 unsigned Width; 5348 int64_t Offset; 5349 5350 // Is this a load or store with an immediate offset with SP as the base? 5351 if (!MI.mayLoadOrStore() || 5352 !getMemOperandWithOffsetWidth(MI, Base, Offset, Width, &RI) || 5353 (Base->isReg() && Base->getReg() != AArch64::SP)) 5354 continue; 5355 5356 // It is, so we have to fix it up. 5357 unsigned Scale; 5358 int64_t Dummy1, Dummy2; 5359 5360 MachineOperand &StackOffsetOperand = getMemOpBaseRegImmOfsOffsetOperand(MI); 5361 assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!"); 5362 getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2); 5363 assert(Scale != 0 && "Unexpected opcode!"); 5364 5365 // We've pushed the return address to the stack, so add 16 to the offset. 5366 // This is safe, since we already checked if it would overflow when we 5367 // checked if this instruction was legal to outline. 5368 int64_t NewImm = (Offset + 16) / Scale; 5369 StackOffsetOperand.setImm(NewImm); 5370 } 5371 } 5372 5373 void AArch64InstrInfo::buildOutlinedFrame( 5374 MachineBasicBlock &MBB, MachineFunction &MF, 5375 const outliner::OutlinedFunction &OF) const { 5376 // For thunk outlining, rewrite the last instruction from a call to a 5377 // tail-call. 5378 if (OF.FrameConstructionID == MachineOutlinerThunk) { 5379 MachineInstr *Call = &*--MBB.instr_end(); 5380 unsigned TailOpcode; 5381 if (Call->getOpcode() == AArch64::BL) { 5382 TailOpcode = AArch64::TCRETURNdi; 5383 } else { 5384 assert(Call->getOpcode() == AArch64::BLR); 5385 TailOpcode = AArch64::TCRETURNriALL; 5386 } 5387 MachineInstr *TC = BuildMI(MF, DebugLoc(), get(TailOpcode)) 5388 .add(Call->getOperand(0)) 5389 .addImm(0); 5390 MBB.insert(MBB.end(), TC); 5391 Call->eraseFromParent(); 5392 } 5393 5394 // Is there a call in the outlined range? 5395 auto IsNonTailCall = [](MachineInstr &MI) { 5396 return MI.isCall() && !MI.isReturn(); 5397 }; 5398 if (std::any_of(MBB.instr_begin(), MBB.instr_end(), IsNonTailCall)) { 5399 // Fix up the instructions in the range, since we're going to modify the 5400 // stack. 5401 assert(OF.FrameConstructionID != MachineOutlinerDefault && 5402 "Can only fix up stack references once"); 5403 fixupPostOutline(MBB); 5404 5405 // LR has to be a live in so that we can save it. 5406 MBB.addLiveIn(AArch64::LR); 5407 5408 MachineBasicBlock::iterator It = MBB.begin(); 5409 MachineBasicBlock::iterator Et = MBB.end(); 5410 5411 if (OF.FrameConstructionID == MachineOutlinerTailCall || 5412 OF.FrameConstructionID == MachineOutlinerThunk) 5413 Et = std::prev(MBB.end()); 5414 5415 // Insert a save before the outlined region 5416 MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre)) 5417 .addReg(AArch64::SP, RegState::Define) 5418 .addReg(AArch64::LR) 5419 .addReg(AArch64::SP) 5420 .addImm(-16); 5421 It = MBB.insert(It, STRXpre); 5422 5423 const TargetSubtargetInfo &STI = MF.getSubtarget(); 5424 const MCRegisterInfo *MRI = STI.getRegisterInfo(); 5425 unsigned DwarfReg = MRI->getDwarfRegNum(AArch64::LR, true); 5426 5427 // Add a CFI saying the stack was moved 16 B down. 5428 int64_t StackPosEntry = 5429 MF.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 16)); 5430 BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION)) 5431 .addCFIIndex(StackPosEntry) 5432 .setMIFlags(MachineInstr::FrameSetup); 5433 5434 // Add a CFI saying that the LR that we want to find is now 16 B higher than 5435 // before. 5436 int64_t LRPosEntry = 5437 MF.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfReg, 16)); 5438 BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION)) 5439 .addCFIIndex(LRPosEntry) 5440 .setMIFlags(MachineInstr::FrameSetup); 5441 5442 // Insert a restore before the terminator for the function. 5443 MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost)) 5444 .addReg(AArch64::SP, RegState::Define) 5445 .addReg(AArch64::LR, RegState::Define) 5446 .addReg(AArch64::SP) 5447 .addImm(16); 5448 Et = MBB.insert(Et, LDRXpost); 5449 } 5450 5451 // If this is a tail call outlined function, then there's already a return. 5452 if (OF.FrameConstructionID == MachineOutlinerTailCall || 5453 OF.FrameConstructionID == MachineOutlinerThunk) 5454 return; 5455 5456 // It's not a tail call, so we have to insert the return ourselves. 5457 MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET)) 5458 .addReg(AArch64::LR, RegState::Undef); 5459 MBB.insert(MBB.end(), ret); 5460 5461 // Did we have to modify the stack by saving the link register? 5462 if (OF.FrameConstructionID != MachineOutlinerDefault) 5463 return; 5464 5465 // We modified the stack. 5466 // Walk over the basic block and fix up all the stack accesses. 5467 fixupPostOutline(MBB); 5468 } 5469 5470 MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall( 5471 Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, 5472 MachineFunction &MF, const outliner::Candidate &C) const { 5473 5474 // Are we tail calling? 5475 if (C.CallConstructionID == MachineOutlinerTailCall) { 5476 // If yes, then we can just branch to the label. 5477 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::TCRETURNdi)) 5478 .addGlobalAddress(M.getNamedValue(MF.getName())) 5479 .addImm(0)); 5480 return It; 5481 } 5482 5483 // Are we saving the link register? 5484 if (C.CallConstructionID == MachineOutlinerNoLRSave || 5485 C.CallConstructionID == MachineOutlinerThunk) { 5486 // No, so just insert the call. 5487 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL)) 5488 .addGlobalAddress(M.getNamedValue(MF.getName()))); 5489 return It; 5490 } 5491 5492 // We want to return the spot where we inserted the call. 5493 MachineBasicBlock::iterator CallPt; 5494 5495 // Instructions for saving and restoring LR around the call instruction we're 5496 // going to insert. 5497 MachineInstr *Save; 5498 MachineInstr *Restore; 5499 // Can we save to a register? 5500 if (C.CallConstructionID == MachineOutlinerRegSave) { 5501 // FIXME: This logic should be sunk into a target-specific interface so that 5502 // we don't have to recompute the register. 5503 unsigned Reg = findRegisterToSaveLRTo(C); 5504 assert(Reg != 0 && "No callee-saved register available?"); 5505 5506 // Save and restore LR from that register. 5507 Save = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), Reg) 5508 .addReg(AArch64::XZR) 5509 .addReg(AArch64::LR) 5510 .addImm(0); 5511 Restore = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), AArch64::LR) 5512 .addReg(AArch64::XZR) 5513 .addReg(Reg) 5514 .addImm(0); 5515 } else { 5516 // We have the default case. Save and restore from SP. 5517 Save = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre)) 5518 .addReg(AArch64::SP, RegState::Define) 5519 .addReg(AArch64::LR) 5520 .addReg(AArch64::SP) 5521 .addImm(-16); 5522 Restore = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost)) 5523 .addReg(AArch64::SP, RegState::Define) 5524 .addReg(AArch64::LR, RegState::Define) 5525 .addReg(AArch64::SP) 5526 .addImm(16); 5527 } 5528 5529 It = MBB.insert(It, Save); 5530 It++; 5531 5532 // Insert the call. 5533 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL)) 5534 .addGlobalAddress(M.getNamedValue(MF.getName()))); 5535 CallPt = It; 5536 It++; 5537 5538 It = MBB.insert(It, Restore); 5539 return CallPt; 5540 } 5541 5542 bool AArch64InstrInfo::shouldOutlineFromFunctionByDefault( 5543 MachineFunction &MF) const { 5544 return MF.getFunction().hasMinSize(); 5545 } 5546 5547 bool AArch64InstrInfo::isCopyInstrImpl( 5548 const MachineInstr &MI, const MachineOperand *&Source, 5549 const MachineOperand *&Destination) const { 5550 5551 // AArch64::ORRWrs and AArch64::ORRXrs with WZR/XZR reg 5552 // and zero immediate operands used as an alias for mov instruction. 5553 if (MI.getOpcode() == AArch64::ORRWrs && 5554 MI.getOperand(1).getReg() == AArch64::WZR && 5555 MI.getOperand(3).getImm() == 0x0) { 5556 Destination = &MI.getOperand(0); 5557 Source = &MI.getOperand(2); 5558 return true; 5559 } 5560 5561 if (MI.getOpcode() == AArch64::ORRXrs && 5562 MI.getOperand(1).getReg() == AArch64::XZR && 5563 MI.getOperand(3).getImm() == 0x0) { 5564 Destination = &MI.getOperand(0); 5565 Source = &MI.getOperand(2); 5566 return true; 5567 } 5568 5569 return false; 5570 } 5571 5572 #define GET_INSTRINFO_HELPERS 5573 #include "AArch64GenInstrInfo.inc" 5574