1 //===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines an instruction selector for the AArch64 target. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AArch64TargetMachine.h" 14 #include "MCTargetDesc/AArch64AddressingModes.h" 15 #include "llvm/ADT/APSInt.h" 16 #include "llvm/CodeGen/SelectionDAGISel.h" 17 #include "llvm/IR/Function.h" // To access function attributes. 18 #include "llvm/IR/GlobalValue.h" 19 #include "llvm/IR/Intrinsics.h" 20 #include "llvm/IR/IntrinsicsAArch64.h" 21 #include "llvm/Support/Debug.h" 22 #include "llvm/Support/ErrorHandling.h" 23 #include "llvm/Support/KnownBits.h" 24 #include "llvm/Support/MathExtras.h" 25 #include "llvm/Support/raw_ostream.h" 26 27 using namespace llvm; 28 29 #define DEBUG_TYPE "aarch64-isel" 30 31 //===--------------------------------------------------------------------===// 32 /// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine 33 /// instructions for SelectionDAG operations. 34 /// 35 namespace { 36 37 class AArch64DAGToDAGISel : public SelectionDAGISel { 38 39 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can 40 /// make the right decision when generating code for different targets. 41 const AArch64Subtarget *Subtarget; 42 43 public: 44 explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm, 45 CodeGenOpt::Level OptLevel) 46 : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr) {} 47 48 StringRef getPassName() const override { 49 return "AArch64 Instruction Selection"; 50 } 51 52 bool runOnMachineFunction(MachineFunction &MF) override { 53 Subtarget = &MF.getSubtarget<AArch64Subtarget>(); 54 return SelectionDAGISel::runOnMachineFunction(MF); 55 } 56 57 void Select(SDNode *Node) override; 58 59 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 60 /// inline asm expressions. 61 bool SelectInlineAsmMemoryOperand(const SDValue &Op, 62 unsigned ConstraintID, 63 std::vector<SDValue> &OutOps) override; 64 65 template <signed Low, signed High, signed Scale> 66 bool SelectRDVLImm(SDValue N, SDValue &Imm); 67 68 bool tryMLAV64LaneV128(SDNode *N); 69 bool tryMULLV64LaneV128(unsigned IntNo, SDNode *N); 70 bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift); 71 bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift); 72 bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift); 73 bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) { 74 return SelectShiftedRegister(N, false, Reg, Shift); 75 } 76 bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) { 77 return SelectShiftedRegister(N, true, Reg, Shift); 78 } 79 bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) { 80 return SelectAddrModeIndexed7S(N, 1, Base, OffImm); 81 } 82 bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) { 83 return SelectAddrModeIndexed7S(N, 2, Base, OffImm); 84 } 85 bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) { 86 return SelectAddrModeIndexed7S(N, 4, Base, OffImm); 87 } 88 bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) { 89 return SelectAddrModeIndexed7S(N, 8, Base, OffImm); 90 } 91 bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) { 92 return SelectAddrModeIndexed7S(N, 16, Base, OffImm); 93 } 94 bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) { 95 return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm); 96 } 97 bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) { 98 return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm); 99 } 100 bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) { 101 return SelectAddrModeIndexed(N, 1, Base, OffImm); 102 } 103 bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) { 104 return SelectAddrModeIndexed(N, 2, Base, OffImm); 105 } 106 bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) { 107 return SelectAddrModeIndexed(N, 4, Base, OffImm); 108 } 109 bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) { 110 return SelectAddrModeIndexed(N, 8, Base, OffImm); 111 } 112 bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) { 113 return SelectAddrModeIndexed(N, 16, Base, OffImm); 114 } 115 bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) { 116 return SelectAddrModeUnscaled(N, 1, Base, OffImm); 117 } 118 bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) { 119 return SelectAddrModeUnscaled(N, 2, Base, OffImm); 120 } 121 bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) { 122 return SelectAddrModeUnscaled(N, 4, Base, OffImm); 123 } 124 bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) { 125 return SelectAddrModeUnscaled(N, 8, Base, OffImm); 126 } 127 bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) { 128 return SelectAddrModeUnscaled(N, 16, Base, OffImm); 129 } 130 131 template<int Width> 132 bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset, 133 SDValue &SignExtend, SDValue &DoShift) { 134 return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift); 135 } 136 137 template<int Width> 138 bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset, 139 SDValue &SignExtend, SDValue &DoShift) { 140 return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift); 141 } 142 143 bool SelectDupZeroOrUndef(SDValue N) { 144 switch(N->getOpcode()) { 145 case ISD::UNDEF: 146 return true; 147 case AArch64ISD::DUP: 148 case ISD::SPLAT_VECTOR: { 149 auto Opnd0 = N->getOperand(0); 150 if (auto CN = dyn_cast<ConstantSDNode>(Opnd0)) 151 if (CN->isNullValue()) 152 return true; 153 if (auto CN = dyn_cast<ConstantFPSDNode>(Opnd0)) 154 if (CN->isZero()) 155 return true; 156 break; 157 } 158 default: 159 break; 160 } 161 162 return false; 163 } 164 165 bool SelectDupZero(SDValue N) { 166 switch(N->getOpcode()) { 167 case AArch64ISD::DUP: 168 case ISD::SPLAT_VECTOR: { 169 auto Opnd0 = N->getOperand(0); 170 if (auto CN = dyn_cast<ConstantSDNode>(Opnd0)) 171 if (CN->isNullValue()) 172 return true; 173 if (auto CN = dyn_cast<ConstantFPSDNode>(Opnd0)) 174 if (CN->isZero()) 175 return true; 176 break; 177 } 178 } 179 180 return false; 181 } 182 183 template<MVT::SimpleValueType VT> 184 bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) { 185 return SelectSVEAddSubImm(N, VT, Imm, Shift); 186 } 187 188 template<MVT::SimpleValueType VT> 189 bool SelectSVELogicalImm(SDValue N, SDValue &Imm) { 190 return SelectSVELogicalImm(N, VT, Imm); 191 } 192 193 template <unsigned Low, unsigned High> 194 bool SelectSVEShiftImm64(SDValue N, SDValue &Imm) { 195 return SelectSVEShiftImm64(N, Low, High, Imm); 196 } 197 198 // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N. 199 template<signed Min, signed Max, signed Scale, bool Shift> 200 bool SelectCntImm(SDValue N, SDValue &Imm) { 201 if (!isa<ConstantSDNode>(N)) 202 return false; 203 204 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue(); 205 if (Shift) 206 MulImm = 1LL << MulImm; 207 208 if ((MulImm % std::abs(Scale)) != 0) 209 return false; 210 211 MulImm /= Scale; 212 if ((MulImm >= Min) && (MulImm <= Max)) { 213 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32); 214 return true; 215 } 216 217 return false; 218 } 219 220 /// Form sequences of consecutive 64/128-bit registers for use in NEON 221 /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have 222 /// between 1 and 4 elements. If it contains a single element that is returned 223 /// unchanged; otherwise a REG_SEQUENCE value is returned. 224 SDValue createDTuple(ArrayRef<SDValue> Vecs); 225 SDValue createQTuple(ArrayRef<SDValue> Vecs); 226 // Form a sequence of SVE registers for instructions using list of vectors, 227 // e.g. structured loads and stores (ldN, stN). 228 SDValue createZTuple(ArrayRef<SDValue> Vecs); 229 230 /// Generic helper for the createDTuple/createQTuple 231 /// functions. Those should almost always be called instead. 232 SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[], 233 const unsigned SubRegs[]); 234 235 void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt); 236 237 bool tryIndexedLoad(SDNode *N); 238 239 bool trySelectStackSlotTagP(SDNode *N); 240 void SelectTagP(SDNode *N); 241 242 void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc, 243 unsigned SubRegIdx); 244 void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc, 245 unsigned SubRegIdx); 246 void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc); 247 void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc); 248 void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale, 249 unsigned Opc_rr, unsigned Opc_ri); 250 251 bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm); 252 /// SVE Reg+Imm addressing mode. 253 template <int64_t Min, int64_t Max> 254 bool SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base, 255 SDValue &OffImm); 256 /// SVE Reg+Reg address mode. 257 template <unsigned Scale> 258 bool SelectSVERegRegAddrMode(SDValue N, SDValue &Base, SDValue &Offset) { 259 return SelectSVERegRegAddrMode(N, Scale, Base, Offset); 260 } 261 262 void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc); 263 void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc); 264 void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc); 265 void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc); 266 void SelectPredicatedStore(SDNode *N, unsigned NumVecs, unsigned Scale, 267 unsigned Opc_rr, unsigned Opc_ri); 268 std::tuple<unsigned, SDValue, SDValue> 269 findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri, 270 const SDValue &OldBase, const SDValue &OldOffset, 271 unsigned Scale); 272 273 bool tryBitfieldExtractOp(SDNode *N); 274 bool tryBitfieldExtractOpFromSExt(SDNode *N); 275 bool tryBitfieldInsertOp(SDNode *N); 276 bool tryBitfieldInsertInZeroOp(SDNode *N); 277 bool tryShiftAmountMod(SDNode *N); 278 bool tryHighFPExt(SDNode *N); 279 280 bool tryReadRegister(SDNode *N); 281 bool tryWriteRegister(SDNode *N); 282 283 // Include the pieces autogenerated from the target description. 284 #include "AArch64GenDAGISel.inc" 285 286 private: 287 bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg, 288 SDValue &Shift); 289 bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base, 290 SDValue &OffImm) { 291 return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm); 292 } 293 bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW, 294 unsigned Size, SDValue &Base, 295 SDValue &OffImm); 296 bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base, 297 SDValue &OffImm); 298 bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base, 299 SDValue &OffImm); 300 bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base, 301 SDValue &Offset, SDValue &SignExtend, 302 SDValue &DoShift); 303 bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base, 304 SDValue &Offset, SDValue &SignExtend, 305 SDValue &DoShift); 306 bool isWorthFolding(SDValue V) const; 307 bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend, 308 SDValue &Offset, SDValue &SignExtend); 309 310 template<unsigned RegWidth> 311 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) { 312 return SelectCVTFixedPosOperand(N, FixedPos, RegWidth); 313 } 314 315 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width); 316 317 bool SelectCMP_SWAP(SDNode *N); 318 319 bool SelectSVE8BitLslImm(SDValue N, SDValue &Imm, SDValue &Shift); 320 321 bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift); 322 323 bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm); 324 325 bool SelectSVESignedArithImm(SDValue N, SDValue &Imm); 326 bool SelectSVEShiftImm64(SDValue N, uint64_t Low, uint64_t High, 327 SDValue &Imm); 328 329 bool SelectSVEArithImm(SDValue N, SDValue &Imm); 330 bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base, 331 SDValue &Offset); 332 }; 333 } // end anonymous namespace 334 335 /// isIntImmediate - This method tests to see if the node is a constant 336 /// operand. If so Imm will receive the 32-bit value. 337 static bool isIntImmediate(const SDNode *N, uint64_t &Imm) { 338 if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) { 339 Imm = C->getZExtValue(); 340 return true; 341 } 342 return false; 343 } 344 345 // isIntImmediate - This method tests to see if a constant operand. 346 // If so Imm will receive the value. 347 static bool isIntImmediate(SDValue N, uint64_t &Imm) { 348 return isIntImmediate(N.getNode(), Imm); 349 } 350 351 // isOpcWithIntImmediate - This method tests to see if the node is a specific 352 // opcode and that it has a immediate integer right operand. 353 // If so Imm will receive the 32 bit value. 354 static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, 355 uint64_t &Imm) { 356 return N->getOpcode() == Opc && 357 isIntImmediate(N->getOperand(1).getNode(), Imm); 358 } 359 360 bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand( 361 const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) { 362 switch(ConstraintID) { 363 default: 364 llvm_unreachable("Unexpected asm memory constraint"); 365 case InlineAsm::Constraint_m: 366 case InlineAsm::Constraint_Q: 367 // We need to make sure that this one operand does not end up in XZR, thus 368 // require the address to be in a PointerRegClass register. 369 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo(); 370 const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF); 371 SDLoc dl(Op); 372 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64); 373 SDValue NewOp = 374 SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, 375 dl, Op.getValueType(), 376 Op, RC), 0); 377 OutOps.push_back(NewOp); 378 return false; 379 } 380 return true; 381 } 382 383 /// SelectArithImmed - Select an immediate value that can be represented as 384 /// a 12-bit value shifted left by either 0 or 12. If so, return true with 385 /// Val set to the 12-bit value and Shift set to the shifter operand. 386 bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val, 387 SDValue &Shift) { 388 // This function is called from the addsub_shifted_imm ComplexPattern, 389 // which lists [imm] as the list of opcode it's interested in, however 390 // we still need to check whether the operand is actually an immediate 391 // here because the ComplexPattern opcode list is only used in 392 // root-level opcode matching. 393 if (!isa<ConstantSDNode>(N.getNode())) 394 return false; 395 396 uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue(); 397 unsigned ShiftAmt; 398 399 if (Immed >> 12 == 0) { 400 ShiftAmt = 0; 401 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) { 402 ShiftAmt = 12; 403 Immed = Immed >> 12; 404 } else 405 return false; 406 407 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt); 408 SDLoc dl(N); 409 Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32); 410 Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32); 411 return true; 412 } 413 414 /// SelectNegArithImmed - As above, but negates the value before trying to 415 /// select it. 416 bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val, 417 SDValue &Shift) { 418 // This function is called from the addsub_shifted_imm ComplexPattern, 419 // which lists [imm] as the list of opcode it's interested in, however 420 // we still need to check whether the operand is actually an immediate 421 // here because the ComplexPattern opcode list is only used in 422 // root-level opcode matching. 423 if (!isa<ConstantSDNode>(N.getNode())) 424 return false; 425 426 // The immediate operand must be a 24-bit zero-extended immediate. 427 uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue(); 428 429 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0" 430 // have the opposite effect on the C flag, so this pattern mustn't match under 431 // those circumstances. 432 if (Immed == 0) 433 return false; 434 435 if (N.getValueType() == MVT::i32) 436 Immed = ~((uint32_t)Immed) + 1; 437 else 438 Immed = ~Immed + 1ULL; 439 if (Immed & 0xFFFFFFFFFF000000ULL) 440 return false; 441 442 Immed &= 0xFFFFFFULL; 443 return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val, 444 Shift); 445 } 446 447 /// getShiftTypeForNode - Translate a shift node to the corresponding 448 /// ShiftType value. 449 static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) { 450 switch (N.getOpcode()) { 451 default: 452 return AArch64_AM::InvalidShiftExtend; 453 case ISD::SHL: 454 return AArch64_AM::LSL; 455 case ISD::SRL: 456 return AArch64_AM::LSR; 457 case ISD::SRA: 458 return AArch64_AM::ASR; 459 case ISD::ROTR: 460 return AArch64_AM::ROR; 461 } 462 } 463 464 /// Determine whether it is worth it to fold SHL into the addressing 465 /// mode. 466 static bool isWorthFoldingSHL(SDValue V) { 467 assert(V.getOpcode() == ISD::SHL && "invalid opcode"); 468 // It is worth folding logical shift of up to three places. 469 auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1)); 470 if (!CSD) 471 return false; 472 unsigned ShiftVal = CSD->getZExtValue(); 473 if (ShiftVal > 3) 474 return false; 475 476 // Check if this particular node is reused in any non-memory related 477 // operation. If yes, do not try to fold this node into the address 478 // computation, since the computation will be kept. 479 const SDNode *Node = V.getNode(); 480 for (SDNode *UI : Node->uses()) 481 if (!isa<MemSDNode>(*UI)) 482 for (SDNode *UII : UI->uses()) 483 if (!isa<MemSDNode>(*UII)) 484 return false; 485 return true; 486 } 487 488 /// Determine whether it is worth to fold V into an extended register. 489 bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const { 490 // Trivial if we are optimizing for code size or if there is only 491 // one use of the value. 492 if (CurDAG->shouldOptForSize() || V.hasOneUse()) 493 return true; 494 // If a subtarget has a fastpath LSL we can fold a logical shift into 495 // the addressing mode and save a cycle. 496 if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::SHL && 497 isWorthFoldingSHL(V)) 498 return true; 499 if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::ADD) { 500 const SDValue LHS = V.getOperand(0); 501 const SDValue RHS = V.getOperand(1); 502 if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS)) 503 return true; 504 if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS)) 505 return true; 506 } 507 508 // It hurts otherwise, since the value will be reused. 509 return false; 510 } 511 512 /// SelectShiftedRegister - Select a "shifted register" operand. If the value 513 /// is not shifted, set the Shift operand to default of "LSL 0". The logical 514 /// instructions allow the shifted register to be rotated, but the arithmetic 515 /// instructions do not. The AllowROR parameter specifies whether ROR is 516 /// supported. 517 bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR, 518 SDValue &Reg, SDValue &Shift) { 519 AArch64_AM::ShiftExtendType ShType = getShiftTypeForNode(N); 520 if (ShType == AArch64_AM::InvalidShiftExtend) 521 return false; 522 if (!AllowROR && ShType == AArch64_AM::ROR) 523 return false; 524 525 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 526 unsigned BitSize = N.getValueSizeInBits(); 527 unsigned Val = RHS->getZExtValue() & (BitSize - 1); 528 unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val); 529 530 Reg = N.getOperand(0); 531 Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32); 532 return isWorthFolding(N); 533 } 534 535 return false; 536 } 537 538 /// getExtendTypeForNode - Translate an extend node to the corresponding 539 /// ExtendType value. 540 static AArch64_AM::ShiftExtendType 541 getExtendTypeForNode(SDValue N, bool IsLoadStore = false) { 542 if (N.getOpcode() == ISD::SIGN_EXTEND || 543 N.getOpcode() == ISD::SIGN_EXTEND_INREG) { 544 EVT SrcVT; 545 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG) 546 SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT(); 547 else 548 SrcVT = N.getOperand(0).getValueType(); 549 550 if (!IsLoadStore && SrcVT == MVT::i8) 551 return AArch64_AM::SXTB; 552 else if (!IsLoadStore && SrcVT == MVT::i16) 553 return AArch64_AM::SXTH; 554 else if (SrcVT == MVT::i32) 555 return AArch64_AM::SXTW; 556 assert(SrcVT != MVT::i64 && "extend from 64-bits?"); 557 558 return AArch64_AM::InvalidShiftExtend; 559 } else if (N.getOpcode() == ISD::ZERO_EXTEND || 560 N.getOpcode() == ISD::ANY_EXTEND) { 561 EVT SrcVT = N.getOperand(0).getValueType(); 562 if (!IsLoadStore && SrcVT == MVT::i8) 563 return AArch64_AM::UXTB; 564 else if (!IsLoadStore && SrcVT == MVT::i16) 565 return AArch64_AM::UXTH; 566 else if (SrcVT == MVT::i32) 567 return AArch64_AM::UXTW; 568 assert(SrcVT != MVT::i64 && "extend from 64-bits?"); 569 570 return AArch64_AM::InvalidShiftExtend; 571 } else if (N.getOpcode() == ISD::AND) { 572 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); 573 if (!CSD) 574 return AArch64_AM::InvalidShiftExtend; 575 uint64_t AndMask = CSD->getZExtValue(); 576 577 switch (AndMask) { 578 default: 579 return AArch64_AM::InvalidShiftExtend; 580 case 0xFF: 581 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend; 582 case 0xFFFF: 583 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend; 584 case 0xFFFFFFFF: 585 return AArch64_AM::UXTW; 586 } 587 } 588 589 return AArch64_AM::InvalidShiftExtend; 590 } 591 592 // Helper for SelectMLAV64LaneV128 - Recognize high lane extracts. 593 static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx) { 594 if (DL->getOpcode() != AArch64ISD::DUPLANE16 && 595 DL->getOpcode() != AArch64ISD::DUPLANE32) 596 return false; 597 598 SDValue SV = DL->getOperand(0); 599 if (SV.getOpcode() != ISD::INSERT_SUBVECTOR) 600 return false; 601 602 SDValue EV = SV.getOperand(1); 603 if (EV.getOpcode() != ISD::EXTRACT_SUBVECTOR) 604 return false; 605 606 ConstantSDNode *DLidx = cast<ConstantSDNode>(DL->getOperand(1).getNode()); 607 ConstantSDNode *EVidx = cast<ConstantSDNode>(EV.getOperand(1).getNode()); 608 LaneIdx = DLidx->getSExtValue() + EVidx->getSExtValue(); 609 LaneOp = EV.getOperand(0); 610 611 return true; 612 } 613 614 // Helper for SelectOpcV64LaneV128 - Recognize operations where one operand is a 615 // high lane extract. 616 static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp, 617 SDValue &LaneOp, int &LaneIdx) { 618 619 if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) { 620 std::swap(Op0, Op1); 621 if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) 622 return false; 623 } 624 StdOp = Op1; 625 return true; 626 } 627 628 /// SelectMLAV64LaneV128 - AArch64 supports vector MLAs where one multiplicand 629 /// is a lane in the upper half of a 128-bit vector. Recognize and select this 630 /// so that we don't emit unnecessary lane extracts. 631 bool AArch64DAGToDAGISel::tryMLAV64LaneV128(SDNode *N) { 632 SDLoc dl(N); 633 SDValue Op0 = N->getOperand(0); 634 SDValue Op1 = N->getOperand(1); 635 SDValue MLAOp1; // Will hold ordinary multiplicand for MLA. 636 SDValue MLAOp2; // Will hold lane-accessed multiplicand for MLA. 637 int LaneIdx = -1; // Will hold the lane index. 638 639 if (Op1.getOpcode() != ISD::MUL || 640 !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2, 641 LaneIdx)) { 642 std::swap(Op0, Op1); 643 if (Op1.getOpcode() != ISD::MUL || 644 !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2, 645 LaneIdx)) 646 return false; 647 } 648 649 SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64); 650 651 SDValue Ops[] = { Op0, MLAOp1, MLAOp2, LaneIdxVal }; 652 653 unsigned MLAOpc = ~0U; 654 655 switch (N->getSimpleValueType(0).SimpleTy) { 656 default: 657 llvm_unreachable("Unrecognized MLA."); 658 case MVT::v4i16: 659 MLAOpc = AArch64::MLAv4i16_indexed; 660 break; 661 case MVT::v8i16: 662 MLAOpc = AArch64::MLAv8i16_indexed; 663 break; 664 case MVT::v2i32: 665 MLAOpc = AArch64::MLAv2i32_indexed; 666 break; 667 case MVT::v4i32: 668 MLAOpc = AArch64::MLAv4i32_indexed; 669 break; 670 } 671 672 ReplaceNode(N, CurDAG->getMachineNode(MLAOpc, dl, N->getValueType(0), Ops)); 673 return true; 674 } 675 676 bool AArch64DAGToDAGISel::tryMULLV64LaneV128(unsigned IntNo, SDNode *N) { 677 SDLoc dl(N); 678 SDValue SMULLOp0; 679 SDValue SMULLOp1; 680 int LaneIdx; 681 682 if (!checkV64LaneV128(N->getOperand(1), N->getOperand(2), SMULLOp0, SMULLOp1, 683 LaneIdx)) 684 return false; 685 686 SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64); 687 688 SDValue Ops[] = { SMULLOp0, SMULLOp1, LaneIdxVal }; 689 690 unsigned SMULLOpc = ~0U; 691 692 if (IntNo == Intrinsic::aarch64_neon_smull) { 693 switch (N->getSimpleValueType(0).SimpleTy) { 694 default: 695 llvm_unreachable("Unrecognized SMULL."); 696 case MVT::v4i32: 697 SMULLOpc = AArch64::SMULLv4i16_indexed; 698 break; 699 case MVT::v2i64: 700 SMULLOpc = AArch64::SMULLv2i32_indexed; 701 break; 702 } 703 } else if (IntNo == Intrinsic::aarch64_neon_umull) { 704 switch (N->getSimpleValueType(0).SimpleTy) { 705 default: 706 llvm_unreachable("Unrecognized SMULL."); 707 case MVT::v4i32: 708 SMULLOpc = AArch64::UMULLv4i16_indexed; 709 break; 710 case MVT::v2i64: 711 SMULLOpc = AArch64::UMULLv2i32_indexed; 712 break; 713 } 714 } else 715 llvm_unreachable("Unrecognized intrinsic."); 716 717 ReplaceNode(N, CurDAG->getMachineNode(SMULLOpc, dl, N->getValueType(0), Ops)); 718 return true; 719 } 720 721 /// Instructions that accept extend modifiers like UXTW expect the register 722 /// being extended to be a GPR32, but the incoming DAG might be acting on a 723 /// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if 724 /// this is the case. 725 static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N) { 726 if (N.getValueType() == MVT::i32) 727 return N; 728 729 SDLoc dl(N); 730 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32); 731 MachineSDNode *Node = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 732 dl, MVT::i32, N, SubReg); 733 return SDValue(Node, 0); 734 } 735 736 // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N. 737 template<signed Low, signed High, signed Scale> 738 bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) { 739 if (!isa<ConstantSDNode>(N)) 740 return false; 741 742 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue(); 743 if ((MulImm % std::abs(Scale)) == 0) { 744 int64_t RDVLImm = MulImm / Scale; 745 if ((RDVLImm >= Low) && (RDVLImm <= High)) { 746 Imm = CurDAG->getTargetConstant(RDVLImm, SDLoc(N), MVT::i32); 747 return true; 748 } 749 } 750 751 return false; 752 } 753 754 /// SelectArithExtendedRegister - Select a "extended register" operand. This 755 /// operand folds in an extend followed by an optional left shift. 756 bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg, 757 SDValue &Shift) { 758 unsigned ShiftVal = 0; 759 AArch64_AM::ShiftExtendType Ext; 760 761 if (N.getOpcode() == ISD::SHL) { 762 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); 763 if (!CSD) 764 return false; 765 ShiftVal = CSD->getZExtValue(); 766 if (ShiftVal > 4) 767 return false; 768 769 Ext = getExtendTypeForNode(N.getOperand(0)); 770 if (Ext == AArch64_AM::InvalidShiftExtend) 771 return false; 772 773 Reg = N.getOperand(0).getOperand(0); 774 } else { 775 Ext = getExtendTypeForNode(N); 776 if (Ext == AArch64_AM::InvalidShiftExtend) 777 return false; 778 779 Reg = N.getOperand(0); 780 781 // Don't match if free 32-bit -> 64-bit zext can be used instead. 782 if (Ext == AArch64_AM::UXTW && 783 Reg->getValueType(0).getSizeInBits() == 32 && isDef32(*Reg.getNode())) 784 return false; 785 } 786 787 // AArch64 mandates that the RHS of the operation must use the smallest 788 // register class that could contain the size being extended from. Thus, 789 // if we're folding a (sext i8), we need the RHS to be a GPR32, even though 790 // there might not be an actual 32-bit value in the program. We can 791 // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here. 792 assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX); 793 Reg = narrowIfNeeded(CurDAG, Reg); 794 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N), 795 MVT::i32); 796 return isWorthFolding(N); 797 } 798 799 /// If there's a use of this ADDlow that's not itself a load/store then we'll 800 /// need to create a real ADD instruction from it anyway and there's no point in 801 /// folding it into the mem op. Theoretically, it shouldn't matter, but there's 802 /// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding 803 /// leads to duplicated ADRP instructions. 804 static bool isWorthFoldingADDlow(SDValue N) { 805 for (auto Use : N->uses()) { 806 if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE && 807 Use->getOpcode() != ISD::ATOMIC_LOAD && 808 Use->getOpcode() != ISD::ATOMIC_STORE) 809 return false; 810 811 // ldar and stlr have much more restrictive addressing modes (just a 812 // register). 813 if (isStrongerThanMonotonic(cast<MemSDNode>(Use)->getOrdering())) 814 return false; 815 } 816 817 return true; 818 } 819 820 /// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit 821 /// immediate" address. The "Size" argument is the size in bytes of the memory 822 /// reference, which determines the scale. 823 bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, 824 unsigned BW, unsigned Size, 825 SDValue &Base, 826 SDValue &OffImm) { 827 SDLoc dl(N); 828 const DataLayout &DL = CurDAG->getDataLayout(); 829 const TargetLowering *TLI = getTargetLowering(); 830 if (N.getOpcode() == ISD::FrameIndex) { 831 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 832 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 833 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); 834 return true; 835 } 836 837 // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed 838 // selected here doesn't support labels/immediates, only base+offset. 839 if (CurDAG->isBaseWithConstantOffset(N)) { 840 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 841 if (IsSignedImm) { 842 int64_t RHSC = RHS->getSExtValue(); 843 unsigned Scale = Log2_32(Size); 844 int64_t Range = 0x1LL << (BW - 1); 845 846 if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) && 847 RHSC < (Range << Scale)) { 848 Base = N.getOperand(0); 849 if (Base.getOpcode() == ISD::FrameIndex) { 850 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 851 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 852 } 853 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64); 854 return true; 855 } 856 } else { 857 // unsigned Immediate 858 uint64_t RHSC = RHS->getZExtValue(); 859 unsigned Scale = Log2_32(Size); 860 uint64_t Range = 0x1ULL << BW; 861 862 if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) { 863 Base = N.getOperand(0); 864 if (Base.getOpcode() == ISD::FrameIndex) { 865 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 866 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 867 } 868 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64); 869 return true; 870 } 871 } 872 } 873 } 874 // Base only. The address will be materialized into a register before 875 // the memory is accessed. 876 // add x0, Xbase, #offset 877 // stp x1, x2, [x0] 878 Base = N; 879 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); 880 return true; 881 } 882 883 /// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit 884 /// immediate" address. The "Size" argument is the size in bytes of the memory 885 /// reference, which determines the scale. 886 bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size, 887 SDValue &Base, SDValue &OffImm) { 888 SDLoc dl(N); 889 const DataLayout &DL = CurDAG->getDataLayout(); 890 const TargetLowering *TLI = getTargetLowering(); 891 if (N.getOpcode() == ISD::FrameIndex) { 892 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 893 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 894 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); 895 return true; 896 } 897 898 if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) { 899 GlobalAddressSDNode *GAN = 900 dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode()); 901 Base = N.getOperand(0); 902 OffImm = N.getOperand(1); 903 if (!GAN) 904 return true; 905 906 if (GAN->getOffset() % Size == 0 && 907 GAN->getGlobal()->getPointerAlignment(DL) >= Size) 908 return true; 909 } 910 911 if (CurDAG->isBaseWithConstantOffset(N)) { 912 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 913 int64_t RHSC = (int64_t)RHS->getZExtValue(); 914 unsigned Scale = Log2_32(Size); 915 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) { 916 Base = N.getOperand(0); 917 if (Base.getOpcode() == ISD::FrameIndex) { 918 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 919 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 920 } 921 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64); 922 return true; 923 } 924 } 925 } 926 927 // Before falling back to our general case, check if the unscaled 928 // instructions can handle this. If so, that's preferable. 929 if (SelectAddrModeUnscaled(N, Size, Base, OffImm)) 930 return false; 931 932 // Base only. The address will be materialized into a register before 933 // the memory is accessed. 934 // add x0, Xbase, #offset 935 // ldr x0, [x0] 936 Base = N; 937 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); 938 return true; 939 } 940 941 /// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit 942 /// immediate" address. This should only match when there is an offset that 943 /// is not valid for a scaled immediate addressing mode. The "Size" argument 944 /// is the size in bytes of the memory reference, which is needed here to know 945 /// what is valid for a scaled immediate. 946 bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size, 947 SDValue &Base, 948 SDValue &OffImm) { 949 if (!CurDAG->isBaseWithConstantOffset(N)) 950 return false; 951 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 952 int64_t RHSC = RHS->getSExtValue(); 953 // If the offset is valid as a scaled immediate, don't match here. 954 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && 955 RHSC < (0x1000 << Log2_32(Size))) 956 return false; 957 if (RHSC >= -256 && RHSC < 256) { 958 Base = N.getOperand(0); 959 if (Base.getOpcode() == ISD::FrameIndex) { 960 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 961 const TargetLowering *TLI = getTargetLowering(); 962 Base = CurDAG->getTargetFrameIndex( 963 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 964 } 965 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64); 966 return true; 967 } 968 } 969 return false; 970 } 971 972 static SDValue Widen(SelectionDAG *CurDAG, SDValue N) { 973 SDLoc dl(N); 974 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32); 975 SDValue ImpDef = SDValue( 976 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0); 977 MachineSDNode *Node = CurDAG->getMachineNode( 978 TargetOpcode::INSERT_SUBREG, dl, MVT::i64, ImpDef, N, SubReg); 979 return SDValue(Node, 0); 980 } 981 982 /// Check if the given SHL node (\p N), can be used to form an 983 /// extended register for an addressing mode. 984 bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size, 985 bool WantExtend, SDValue &Offset, 986 SDValue &SignExtend) { 987 assert(N.getOpcode() == ISD::SHL && "Invalid opcode."); 988 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); 989 if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue()) 990 return false; 991 992 SDLoc dl(N); 993 if (WantExtend) { 994 AArch64_AM::ShiftExtendType Ext = 995 getExtendTypeForNode(N.getOperand(0), true); 996 if (Ext == AArch64_AM::InvalidShiftExtend) 997 return false; 998 999 Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0)); 1000 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl, 1001 MVT::i32); 1002 } else { 1003 Offset = N.getOperand(0); 1004 SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32); 1005 } 1006 1007 unsigned LegalShiftVal = Log2_32(Size); 1008 unsigned ShiftVal = CSD->getZExtValue(); 1009 1010 if (ShiftVal != 0 && ShiftVal != LegalShiftVal) 1011 return false; 1012 1013 return isWorthFolding(N); 1014 } 1015 1016 bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size, 1017 SDValue &Base, SDValue &Offset, 1018 SDValue &SignExtend, 1019 SDValue &DoShift) { 1020 if (N.getOpcode() != ISD::ADD) 1021 return false; 1022 SDValue LHS = N.getOperand(0); 1023 SDValue RHS = N.getOperand(1); 1024 SDLoc dl(N); 1025 1026 // We don't want to match immediate adds here, because they are better lowered 1027 // to the register-immediate addressing modes. 1028 if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS)) 1029 return false; 1030 1031 // Check if this particular node is reused in any non-memory related 1032 // operation. If yes, do not try to fold this node into the address 1033 // computation, since the computation will be kept. 1034 const SDNode *Node = N.getNode(); 1035 for (SDNode *UI : Node->uses()) { 1036 if (!isa<MemSDNode>(*UI)) 1037 return false; 1038 } 1039 1040 // Remember if it is worth folding N when it produces extended register. 1041 bool IsExtendedRegisterWorthFolding = isWorthFolding(N); 1042 1043 // Try to match a shifted extend on the RHS. 1044 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL && 1045 SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) { 1046 Base = LHS; 1047 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32); 1048 return true; 1049 } 1050 1051 // Try to match a shifted extend on the LHS. 1052 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL && 1053 SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) { 1054 Base = RHS; 1055 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32); 1056 return true; 1057 } 1058 1059 // There was no shift, whatever else we find. 1060 DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32); 1061 1062 AArch64_AM::ShiftExtendType Ext = AArch64_AM::InvalidShiftExtend; 1063 // Try to match an unshifted extend on the LHS. 1064 if (IsExtendedRegisterWorthFolding && 1065 (Ext = getExtendTypeForNode(LHS, true)) != 1066 AArch64_AM::InvalidShiftExtend) { 1067 Base = RHS; 1068 Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0)); 1069 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl, 1070 MVT::i32); 1071 if (isWorthFolding(LHS)) 1072 return true; 1073 } 1074 1075 // Try to match an unshifted extend on the RHS. 1076 if (IsExtendedRegisterWorthFolding && 1077 (Ext = getExtendTypeForNode(RHS, true)) != 1078 AArch64_AM::InvalidShiftExtend) { 1079 Base = LHS; 1080 Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0)); 1081 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl, 1082 MVT::i32); 1083 if (isWorthFolding(RHS)) 1084 return true; 1085 } 1086 1087 return false; 1088 } 1089 1090 // Check if the given immediate is preferred by ADD. If an immediate can be 1091 // encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be 1092 // encoded by one MOVZ, return true. 1093 static bool isPreferredADD(int64_t ImmOff) { 1094 // Constant in [0x0, 0xfff] can be encoded in ADD. 1095 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL) 1096 return true; 1097 // Check if it can be encoded in an "ADD LSL #12". 1098 if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL) 1099 // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant. 1100 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL && 1101 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL; 1102 return false; 1103 } 1104 1105 bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size, 1106 SDValue &Base, SDValue &Offset, 1107 SDValue &SignExtend, 1108 SDValue &DoShift) { 1109 if (N.getOpcode() != ISD::ADD) 1110 return false; 1111 SDValue LHS = N.getOperand(0); 1112 SDValue RHS = N.getOperand(1); 1113 SDLoc DL(N); 1114 1115 // Check if this particular node is reused in any non-memory related 1116 // operation. If yes, do not try to fold this node into the address 1117 // computation, since the computation will be kept. 1118 const SDNode *Node = N.getNode(); 1119 for (SDNode *UI : Node->uses()) { 1120 if (!isa<MemSDNode>(*UI)) 1121 return false; 1122 } 1123 1124 // Watch out if RHS is a wide immediate, it can not be selected into 1125 // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into 1126 // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate 1127 // instructions like: 1128 // MOV X0, WideImmediate 1129 // ADD X1, BaseReg, X0 1130 // LDR X2, [X1, 0] 1131 // For such situation, using [BaseReg, XReg] addressing mode can save one 1132 // ADD/SUB: 1133 // MOV X0, WideImmediate 1134 // LDR X2, [BaseReg, X0] 1135 if (isa<ConstantSDNode>(RHS)) { 1136 int64_t ImmOff = (int64_t)cast<ConstantSDNode>(RHS)->getZExtValue(); 1137 unsigned Scale = Log2_32(Size); 1138 // Skip the immediate can be selected by load/store addressing mode. 1139 // Also skip the immediate can be encoded by a single ADD (SUB is also 1140 // checked by using -ImmOff). 1141 if ((ImmOff % Size == 0 && ImmOff >= 0 && ImmOff < (0x1000 << Scale)) || 1142 isPreferredADD(ImmOff) || isPreferredADD(-ImmOff)) 1143 return false; 1144 1145 SDValue Ops[] = { RHS }; 1146 SDNode *MOVI = 1147 CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops); 1148 SDValue MOVIV = SDValue(MOVI, 0); 1149 // This ADD of two X register will be selected into [Reg+Reg] mode. 1150 N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV); 1151 } 1152 1153 // Remember if it is worth folding N when it produces extended register. 1154 bool IsExtendedRegisterWorthFolding = isWorthFolding(N); 1155 1156 // Try to match a shifted extend on the RHS. 1157 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL && 1158 SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) { 1159 Base = LHS; 1160 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32); 1161 return true; 1162 } 1163 1164 // Try to match a shifted extend on the LHS. 1165 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL && 1166 SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) { 1167 Base = RHS; 1168 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32); 1169 return true; 1170 } 1171 1172 // Match any non-shifted, non-extend, non-immediate add expression. 1173 Base = LHS; 1174 Offset = RHS; 1175 SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32); 1176 DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32); 1177 // Reg1 + Reg2 is free: no check needed. 1178 return true; 1179 } 1180 1181 SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) { 1182 static const unsigned RegClassIDs[] = { 1183 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID}; 1184 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1, 1185 AArch64::dsub2, AArch64::dsub3}; 1186 1187 return createTuple(Regs, RegClassIDs, SubRegs); 1188 } 1189 1190 SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) { 1191 static const unsigned RegClassIDs[] = { 1192 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID}; 1193 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1, 1194 AArch64::qsub2, AArch64::qsub3}; 1195 1196 return createTuple(Regs, RegClassIDs, SubRegs); 1197 } 1198 1199 SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef<SDValue> Regs) { 1200 static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID, 1201 AArch64::ZPR3RegClassID, 1202 AArch64::ZPR4RegClassID}; 1203 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1, 1204 AArch64::zsub2, AArch64::zsub3}; 1205 1206 return createTuple(Regs, RegClassIDs, SubRegs); 1207 } 1208 1209 SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs, 1210 const unsigned RegClassIDs[], 1211 const unsigned SubRegs[]) { 1212 // There's no special register-class for a vector-list of 1 element: it's just 1213 // a vector. 1214 if (Regs.size() == 1) 1215 return Regs[0]; 1216 1217 assert(Regs.size() >= 2 && Regs.size() <= 4); 1218 1219 SDLoc DL(Regs[0]); 1220 1221 SmallVector<SDValue, 4> Ops; 1222 1223 // First operand of REG_SEQUENCE is the desired RegClass. 1224 Ops.push_back( 1225 CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32)); 1226 1227 // Then we get pairs of source & subregister-position for the components. 1228 for (unsigned i = 0; i < Regs.size(); ++i) { 1229 Ops.push_back(Regs[i]); 1230 Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32)); 1231 } 1232 1233 SDNode *N = 1234 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops); 1235 return SDValue(N, 0); 1236 } 1237 1238 void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, 1239 bool isExt) { 1240 SDLoc dl(N); 1241 EVT VT = N->getValueType(0); 1242 1243 unsigned ExtOff = isExt; 1244 1245 // Form a REG_SEQUENCE to force register allocation. 1246 unsigned Vec0Off = ExtOff + 1; 1247 SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off, 1248 N->op_begin() + Vec0Off + NumVecs); 1249 SDValue RegSeq = createQTuple(Regs); 1250 1251 SmallVector<SDValue, 6> Ops; 1252 if (isExt) 1253 Ops.push_back(N->getOperand(1)); 1254 Ops.push_back(RegSeq); 1255 Ops.push_back(N->getOperand(NumVecs + ExtOff + 1)); 1256 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops)); 1257 } 1258 1259 bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) { 1260 LoadSDNode *LD = cast<LoadSDNode>(N); 1261 if (LD->isUnindexed()) 1262 return false; 1263 EVT VT = LD->getMemoryVT(); 1264 EVT DstVT = N->getValueType(0); 1265 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1266 bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC; 1267 1268 // We're not doing validity checking here. That was done when checking 1269 // if we should mark the load as indexed or not. We're just selecting 1270 // the right instruction. 1271 unsigned Opcode = 0; 1272 1273 ISD::LoadExtType ExtType = LD->getExtensionType(); 1274 bool InsertTo64 = false; 1275 if (VT == MVT::i64) 1276 Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost; 1277 else if (VT == MVT::i32) { 1278 if (ExtType == ISD::NON_EXTLOAD) 1279 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost; 1280 else if (ExtType == ISD::SEXTLOAD) 1281 Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost; 1282 else { 1283 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost; 1284 InsertTo64 = true; 1285 // The result of the load is only i32. It's the subreg_to_reg that makes 1286 // it into an i64. 1287 DstVT = MVT::i32; 1288 } 1289 } else if (VT == MVT::i16) { 1290 if (ExtType == ISD::SEXTLOAD) { 1291 if (DstVT == MVT::i64) 1292 Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost; 1293 else 1294 Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost; 1295 } else { 1296 Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost; 1297 InsertTo64 = DstVT == MVT::i64; 1298 // The result of the load is only i32. It's the subreg_to_reg that makes 1299 // it into an i64. 1300 DstVT = MVT::i32; 1301 } 1302 } else if (VT == MVT::i8) { 1303 if (ExtType == ISD::SEXTLOAD) { 1304 if (DstVT == MVT::i64) 1305 Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost; 1306 else 1307 Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost; 1308 } else { 1309 Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost; 1310 InsertTo64 = DstVT == MVT::i64; 1311 // The result of the load is only i32. It's the subreg_to_reg that makes 1312 // it into an i64. 1313 DstVT = MVT::i32; 1314 } 1315 } else if (VT == MVT::f16) { 1316 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost; 1317 } else if (VT == MVT::bf16) { 1318 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost; 1319 } else if (VT == MVT::f32) { 1320 Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost; 1321 } else if (VT == MVT::f64 || VT.is64BitVector()) { 1322 Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost; 1323 } else if (VT.is128BitVector()) { 1324 Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost; 1325 } else 1326 return false; 1327 SDValue Chain = LD->getChain(); 1328 SDValue Base = LD->getBasePtr(); 1329 ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset()); 1330 int OffsetVal = (int)OffsetOp->getZExtValue(); 1331 SDLoc dl(N); 1332 SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64); 1333 SDValue Ops[] = { Base, Offset, Chain }; 1334 SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT, 1335 MVT::Other, Ops); 1336 // Either way, we're replacing the node, so tell the caller that. 1337 SDValue LoadedVal = SDValue(Res, 1); 1338 if (InsertTo64) { 1339 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32); 1340 LoadedVal = 1341 SDValue(CurDAG->getMachineNode( 1342 AArch64::SUBREG_TO_REG, dl, MVT::i64, 1343 CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal, 1344 SubReg), 1345 0); 1346 } 1347 1348 ReplaceUses(SDValue(N, 0), LoadedVal); 1349 ReplaceUses(SDValue(N, 1), SDValue(Res, 0)); 1350 ReplaceUses(SDValue(N, 2), SDValue(Res, 2)); 1351 CurDAG->RemoveDeadNode(N); 1352 return true; 1353 } 1354 1355 void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc, 1356 unsigned SubRegIdx) { 1357 SDLoc dl(N); 1358 EVT VT = N->getValueType(0); 1359 SDValue Chain = N->getOperand(0); 1360 1361 SDValue Ops[] = {N->getOperand(2), // Mem operand; 1362 Chain}; 1363 1364 const EVT ResTys[] = {MVT::Untyped, MVT::Other}; 1365 1366 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1367 SDValue SuperReg = SDValue(Ld, 0); 1368 for (unsigned i = 0; i < NumVecs; ++i) 1369 ReplaceUses(SDValue(N, i), 1370 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg)); 1371 1372 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1)); 1373 1374 // Transfer memoperands. 1375 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 1376 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp}); 1377 1378 CurDAG->RemoveDeadNode(N); 1379 } 1380 1381 void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs, 1382 unsigned Opc, unsigned SubRegIdx) { 1383 SDLoc dl(N); 1384 EVT VT = N->getValueType(0); 1385 SDValue Chain = N->getOperand(0); 1386 1387 SDValue Ops[] = {N->getOperand(1), // Mem operand 1388 N->getOperand(2), // Incremental 1389 Chain}; 1390 1391 const EVT ResTys[] = {MVT::i64, // Type of the write back register 1392 MVT::Untyped, MVT::Other}; 1393 1394 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1395 1396 // Update uses of write back register 1397 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0)); 1398 1399 // Update uses of vector list 1400 SDValue SuperReg = SDValue(Ld, 1); 1401 if (NumVecs == 1) 1402 ReplaceUses(SDValue(N, 0), SuperReg); 1403 else 1404 for (unsigned i = 0; i < NumVecs; ++i) 1405 ReplaceUses(SDValue(N, i), 1406 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg)); 1407 1408 // Update the chain 1409 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2)); 1410 CurDAG->RemoveDeadNode(N); 1411 } 1412 1413 /// Optimize \param OldBase and \param OldOffset selecting the best addressing 1414 /// mode. Returns a tuple consisting of an Opcode, an SDValue representing the 1415 /// new Base and an SDValue representing the new offset. 1416 std::tuple<unsigned, SDValue, SDValue> 1417 AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, 1418 unsigned Opc_ri, 1419 const SDValue &OldBase, 1420 const SDValue &OldOffset, 1421 unsigned Scale) { 1422 SDValue NewBase = OldBase; 1423 SDValue NewOffset = OldOffset; 1424 // Detect a possible Reg+Imm addressing mode. 1425 const bool IsRegImm = SelectAddrModeIndexedSVE</*Min=*/-8, /*Max=*/7>( 1426 N, OldBase, NewBase, NewOffset); 1427 1428 // Detect a possible reg+reg addressing mode, but only if we haven't already 1429 // detected a Reg+Imm one. 1430 const bool IsRegReg = 1431 !IsRegImm && SelectSVERegRegAddrMode(OldBase, Scale, NewBase, NewOffset); 1432 1433 // Select the instruction. 1434 return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset); 1435 } 1436 1437 void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs, 1438 unsigned Scale, unsigned Opc_ri, 1439 unsigned Opc_rr) { 1440 assert(Scale < 4 && "Invalid scaling value."); 1441 SDLoc DL(N); 1442 EVT VT = N->getValueType(0); 1443 SDValue Chain = N->getOperand(0); 1444 1445 // Optimize addressing mode. 1446 SDValue Base, Offset; 1447 unsigned Opc; 1448 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore( 1449 N, Opc_rr, Opc_ri, N->getOperand(2), 1450 CurDAG->getTargetConstant(0, DL, MVT::i64), Scale); 1451 1452 SDValue Ops[] = {N->getOperand(1), // Predicate 1453 Base, // Memory operand 1454 Offset, Chain}; 1455 1456 const EVT ResTys[] = {MVT::Untyped, MVT::Other}; 1457 1458 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops); 1459 SDValue SuperReg = SDValue(Load, 0); 1460 for (unsigned i = 0; i < NumVecs; ++i) 1461 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg( 1462 AArch64::zsub0 + i, DL, VT, SuperReg)); 1463 1464 // Copy chain 1465 unsigned ChainIdx = NumVecs; 1466 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1)); 1467 CurDAG->RemoveDeadNode(N); 1468 } 1469 1470 void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs, 1471 unsigned Opc) { 1472 SDLoc dl(N); 1473 EVT VT = N->getOperand(2)->getValueType(0); 1474 1475 // Form a REG_SEQUENCE to force register allocation. 1476 bool Is128Bit = VT.getSizeInBits() == 128; 1477 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); 1478 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs); 1479 1480 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)}; 1481 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops); 1482 1483 // Transfer memoperands. 1484 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 1485 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 1486 1487 ReplaceNode(N, St); 1488 } 1489 1490 void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs, 1491 unsigned Scale, unsigned Opc_rr, 1492 unsigned Opc_ri) { 1493 SDLoc dl(N); 1494 1495 // Form a REG_SEQUENCE to force register allocation. 1496 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); 1497 SDValue RegSeq = createZTuple(Regs); 1498 1499 // Optimize addressing mode. 1500 unsigned Opc; 1501 SDValue Offset, Base; 1502 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore( 1503 N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3), 1504 CurDAG->getTargetConstant(0, dl, MVT::i64), Scale); 1505 1506 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), // predicate 1507 Base, // address 1508 Offset, // offset 1509 N->getOperand(0)}; // chain 1510 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops); 1511 1512 ReplaceNode(N, St); 1513 } 1514 1515 bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, 1516 SDValue &OffImm) { 1517 SDLoc dl(N); 1518 const DataLayout &DL = CurDAG->getDataLayout(); 1519 const TargetLowering *TLI = getTargetLowering(); 1520 1521 // Try to match it for the frame address 1522 if (auto FINode = dyn_cast<FrameIndexSDNode>(N)) { 1523 int FI = FINode->getIndex(); 1524 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 1525 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); 1526 return true; 1527 } 1528 1529 return false; 1530 } 1531 1532 void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs, 1533 unsigned Opc) { 1534 SDLoc dl(N); 1535 EVT VT = N->getOperand(2)->getValueType(0); 1536 const EVT ResTys[] = {MVT::i64, // Type of the write back register 1537 MVT::Other}; // Type for the Chain 1538 1539 // Form a REG_SEQUENCE to force register allocation. 1540 bool Is128Bit = VT.getSizeInBits() == 128; 1541 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); 1542 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs); 1543 1544 SDValue Ops[] = {RegSeq, 1545 N->getOperand(NumVecs + 1), // base register 1546 N->getOperand(NumVecs + 2), // Incremental 1547 N->getOperand(0)}; // Chain 1548 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1549 1550 ReplaceNode(N, St); 1551 } 1552 1553 namespace { 1554 /// WidenVector - Given a value in the V64 register class, produce the 1555 /// equivalent value in the V128 register class. 1556 class WidenVector { 1557 SelectionDAG &DAG; 1558 1559 public: 1560 WidenVector(SelectionDAG &DAG) : DAG(DAG) {} 1561 1562 SDValue operator()(SDValue V64Reg) { 1563 EVT VT = V64Reg.getValueType(); 1564 unsigned NarrowSize = VT.getVectorNumElements(); 1565 MVT EltTy = VT.getVectorElementType().getSimpleVT(); 1566 MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize); 1567 SDLoc DL(V64Reg); 1568 1569 SDValue Undef = 1570 SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0); 1571 return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg); 1572 } 1573 }; 1574 } // namespace 1575 1576 /// NarrowVector - Given a value in the V128 register class, produce the 1577 /// equivalent value in the V64 register class. 1578 static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) { 1579 EVT VT = V128Reg.getValueType(); 1580 unsigned WideSize = VT.getVectorNumElements(); 1581 MVT EltTy = VT.getVectorElementType().getSimpleVT(); 1582 MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2); 1583 1584 return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy, 1585 V128Reg); 1586 } 1587 1588 void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs, 1589 unsigned Opc) { 1590 SDLoc dl(N); 1591 EVT VT = N->getValueType(0); 1592 bool Narrow = VT.getSizeInBits() == 64; 1593 1594 // Form a REG_SEQUENCE to force register allocation. 1595 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); 1596 1597 if (Narrow) 1598 transform(Regs, Regs.begin(), 1599 WidenVector(*CurDAG)); 1600 1601 SDValue RegSeq = createQTuple(Regs); 1602 1603 const EVT ResTys[] = {MVT::Untyped, MVT::Other}; 1604 1605 unsigned LaneNo = 1606 cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue(); 1607 1608 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64), 1609 N->getOperand(NumVecs + 3), N->getOperand(0)}; 1610 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1611 SDValue SuperReg = SDValue(Ld, 0); 1612 1613 EVT WideVT = RegSeq.getOperand(1)->getValueType(0); 1614 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, 1615 AArch64::qsub2, AArch64::qsub3 }; 1616 for (unsigned i = 0; i < NumVecs; ++i) { 1617 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg); 1618 if (Narrow) 1619 NV = NarrowVector(NV, *CurDAG); 1620 ReplaceUses(SDValue(N, i), NV); 1621 } 1622 1623 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1)); 1624 CurDAG->RemoveDeadNode(N); 1625 } 1626 1627 void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs, 1628 unsigned Opc) { 1629 SDLoc dl(N); 1630 EVT VT = N->getValueType(0); 1631 bool Narrow = VT.getSizeInBits() == 64; 1632 1633 // Form a REG_SEQUENCE to force register allocation. 1634 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); 1635 1636 if (Narrow) 1637 transform(Regs, Regs.begin(), 1638 WidenVector(*CurDAG)); 1639 1640 SDValue RegSeq = createQTuple(Regs); 1641 1642 const EVT ResTys[] = {MVT::i64, // Type of the write back register 1643 RegSeq->getValueType(0), MVT::Other}; 1644 1645 unsigned LaneNo = 1646 cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue(); 1647 1648 SDValue Ops[] = {RegSeq, 1649 CurDAG->getTargetConstant(LaneNo, dl, 1650 MVT::i64), // Lane Number 1651 N->getOperand(NumVecs + 2), // Base register 1652 N->getOperand(NumVecs + 3), // Incremental 1653 N->getOperand(0)}; 1654 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1655 1656 // Update uses of the write back register 1657 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0)); 1658 1659 // Update uses of the vector list 1660 SDValue SuperReg = SDValue(Ld, 1); 1661 if (NumVecs == 1) { 1662 ReplaceUses(SDValue(N, 0), 1663 Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg); 1664 } else { 1665 EVT WideVT = RegSeq.getOperand(1)->getValueType(0); 1666 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, 1667 AArch64::qsub2, AArch64::qsub3 }; 1668 for (unsigned i = 0; i < NumVecs; ++i) { 1669 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, 1670 SuperReg); 1671 if (Narrow) 1672 NV = NarrowVector(NV, *CurDAG); 1673 ReplaceUses(SDValue(N, i), NV); 1674 } 1675 } 1676 1677 // Update the Chain 1678 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2)); 1679 CurDAG->RemoveDeadNode(N); 1680 } 1681 1682 void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs, 1683 unsigned Opc) { 1684 SDLoc dl(N); 1685 EVT VT = N->getOperand(2)->getValueType(0); 1686 bool Narrow = VT.getSizeInBits() == 64; 1687 1688 // Form a REG_SEQUENCE to force register allocation. 1689 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); 1690 1691 if (Narrow) 1692 transform(Regs, Regs.begin(), 1693 WidenVector(*CurDAG)); 1694 1695 SDValue RegSeq = createQTuple(Regs); 1696 1697 unsigned LaneNo = 1698 cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue(); 1699 1700 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64), 1701 N->getOperand(NumVecs + 3), N->getOperand(0)}; 1702 SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops); 1703 1704 // Transfer memoperands. 1705 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 1706 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 1707 1708 ReplaceNode(N, St); 1709 } 1710 1711 void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs, 1712 unsigned Opc) { 1713 SDLoc dl(N); 1714 EVT VT = N->getOperand(2)->getValueType(0); 1715 bool Narrow = VT.getSizeInBits() == 64; 1716 1717 // Form a REG_SEQUENCE to force register allocation. 1718 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); 1719 1720 if (Narrow) 1721 transform(Regs, Regs.begin(), 1722 WidenVector(*CurDAG)); 1723 1724 SDValue RegSeq = createQTuple(Regs); 1725 1726 const EVT ResTys[] = {MVT::i64, // Type of the write back register 1727 MVT::Other}; 1728 1729 unsigned LaneNo = 1730 cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue(); 1731 1732 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64), 1733 N->getOperand(NumVecs + 2), // Base Register 1734 N->getOperand(NumVecs + 3), // Incremental 1735 N->getOperand(0)}; 1736 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1737 1738 // Transfer memoperands. 1739 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 1740 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 1741 1742 ReplaceNode(N, St); 1743 } 1744 1745 static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, 1746 unsigned &Opc, SDValue &Opd0, 1747 unsigned &LSB, unsigned &MSB, 1748 unsigned NumberOfIgnoredLowBits, 1749 bool BiggerPattern) { 1750 assert(N->getOpcode() == ISD::AND && 1751 "N must be a AND operation to call this function"); 1752 1753 EVT VT = N->getValueType(0); 1754 1755 // Here we can test the type of VT and return false when the type does not 1756 // match, but since it is done prior to that call in the current context 1757 // we turned that into an assert to avoid redundant code. 1758 assert((VT == MVT::i32 || VT == MVT::i64) && 1759 "Type checking must have been done before calling this function"); 1760 1761 // FIXME: simplify-demanded-bits in DAGCombine will probably have 1762 // changed the AND node to a 32-bit mask operation. We'll have to 1763 // undo that as part of the transform here if we want to catch all 1764 // the opportunities. 1765 // Currently the NumberOfIgnoredLowBits argument helps to recover 1766 // form these situations when matching bigger pattern (bitfield insert). 1767 1768 // For unsigned extracts, check for a shift right and mask 1769 uint64_t AndImm = 0; 1770 if (!isOpcWithIntImmediate(N, ISD::AND, AndImm)) 1771 return false; 1772 1773 const SDNode *Op0 = N->getOperand(0).getNode(); 1774 1775 // Because of simplify-demanded-bits in DAGCombine, the mask may have been 1776 // simplified. Try to undo that 1777 AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits); 1778 1779 // The immediate is a mask of the low bits iff imm & (imm+1) == 0 1780 if (AndImm & (AndImm + 1)) 1781 return false; 1782 1783 bool ClampMSB = false; 1784 uint64_t SrlImm = 0; 1785 // Handle the SRL + ANY_EXTEND case. 1786 if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND && 1787 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) { 1788 // Extend the incoming operand of the SRL to 64-bit. 1789 Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0)); 1790 // Make sure to clamp the MSB so that we preserve the semantics of the 1791 // original operations. 1792 ClampMSB = true; 1793 } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE && 1794 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, 1795 SrlImm)) { 1796 // If the shift result was truncated, we can still combine them. 1797 Opd0 = Op0->getOperand(0).getOperand(0); 1798 1799 // Use the type of SRL node. 1800 VT = Opd0->getValueType(0); 1801 } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) { 1802 Opd0 = Op0->getOperand(0); 1803 } else if (BiggerPattern) { 1804 // Let's pretend a 0 shift right has been performed. 1805 // The resulting code will be at least as good as the original one 1806 // plus it may expose more opportunities for bitfield insert pattern. 1807 // FIXME: Currently we limit this to the bigger pattern, because 1808 // some optimizations expect AND and not UBFM. 1809 Opd0 = N->getOperand(0); 1810 } else 1811 return false; 1812 1813 // Bail out on large immediates. This happens when no proper 1814 // combining/constant folding was performed. 1815 if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) { 1816 LLVM_DEBUG( 1817 (dbgs() << N 1818 << ": Found large shift immediate, this should not happen\n")); 1819 return false; 1820 } 1821 1822 LSB = SrlImm; 1823 MSB = SrlImm + (VT == MVT::i32 ? countTrailingOnes<uint32_t>(AndImm) 1824 : countTrailingOnes<uint64_t>(AndImm)) - 1825 1; 1826 if (ClampMSB) 1827 // Since we're moving the extend before the right shift operation, we need 1828 // to clamp the MSB to make sure we don't shift in undefined bits instead of 1829 // the zeros which would get shifted in with the original right shift 1830 // operation. 1831 MSB = MSB > 31 ? 31 : MSB; 1832 1833 Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri; 1834 return true; 1835 } 1836 1837 static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc, 1838 SDValue &Opd0, unsigned &Immr, 1839 unsigned &Imms) { 1840 assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG); 1841 1842 EVT VT = N->getValueType(0); 1843 unsigned BitWidth = VT.getSizeInBits(); 1844 assert((VT == MVT::i32 || VT == MVT::i64) && 1845 "Type checking must have been done before calling this function"); 1846 1847 SDValue Op = N->getOperand(0); 1848 if (Op->getOpcode() == ISD::TRUNCATE) { 1849 Op = Op->getOperand(0); 1850 VT = Op->getValueType(0); 1851 BitWidth = VT.getSizeInBits(); 1852 } 1853 1854 uint64_t ShiftImm; 1855 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) && 1856 !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm)) 1857 return false; 1858 1859 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits(); 1860 if (ShiftImm + Width > BitWidth) 1861 return false; 1862 1863 Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri; 1864 Opd0 = Op.getOperand(0); 1865 Immr = ShiftImm; 1866 Imms = ShiftImm + Width - 1; 1867 return true; 1868 } 1869 1870 static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc, 1871 SDValue &Opd0, unsigned &LSB, 1872 unsigned &MSB) { 1873 // We are looking for the following pattern which basically extracts several 1874 // continuous bits from the source value and places it from the LSB of the 1875 // destination value, all other bits of the destination value or set to zero: 1876 // 1877 // Value2 = AND Value, MaskImm 1878 // SRL Value2, ShiftImm 1879 // 1880 // with MaskImm >> ShiftImm to search for the bit width. 1881 // 1882 // This gets selected into a single UBFM: 1883 // 1884 // UBFM Value, ShiftImm, BitWide + SrlImm -1 1885 // 1886 1887 if (N->getOpcode() != ISD::SRL) 1888 return false; 1889 1890 uint64_t AndMask = 0; 1891 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask)) 1892 return false; 1893 1894 Opd0 = N->getOperand(0).getOperand(0); 1895 1896 uint64_t SrlImm = 0; 1897 if (!isIntImmediate(N->getOperand(1), SrlImm)) 1898 return false; 1899 1900 // Check whether we really have several bits extract here. 1901 unsigned BitWide = 64 - countLeadingOnes(~(AndMask >> SrlImm)); 1902 if (BitWide && isMask_64(AndMask >> SrlImm)) { 1903 if (N->getValueType(0) == MVT::i32) 1904 Opc = AArch64::UBFMWri; 1905 else 1906 Opc = AArch64::UBFMXri; 1907 1908 LSB = SrlImm; 1909 MSB = BitWide + SrlImm - 1; 1910 return true; 1911 } 1912 1913 return false; 1914 } 1915 1916 static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, 1917 unsigned &Immr, unsigned &Imms, 1918 bool BiggerPattern) { 1919 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) && 1920 "N must be a SHR/SRA operation to call this function"); 1921 1922 EVT VT = N->getValueType(0); 1923 1924 // Here we can test the type of VT and return false when the type does not 1925 // match, but since it is done prior to that call in the current context 1926 // we turned that into an assert to avoid redundant code. 1927 assert((VT == MVT::i32 || VT == MVT::i64) && 1928 "Type checking must have been done before calling this function"); 1929 1930 // Check for AND + SRL doing several bits extract. 1931 if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms)) 1932 return true; 1933 1934 // We're looking for a shift of a shift. 1935 uint64_t ShlImm = 0; 1936 uint64_t TruncBits = 0; 1937 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) { 1938 Opd0 = N->getOperand(0).getOperand(0); 1939 } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL && 1940 N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) { 1941 // We are looking for a shift of truncate. Truncate from i64 to i32 could 1942 // be considered as setting high 32 bits as zero. Our strategy here is to 1943 // always generate 64bit UBFM. This consistency will help the CSE pass 1944 // later find more redundancy. 1945 Opd0 = N->getOperand(0).getOperand(0); 1946 TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits(); 1947 VT = Opd0.getValueType(); 1948 assert(VT == MVT::i64 && "the promoted type should be i64"); 1949 } else if (BiggerPattern) { 1950 // Let's pretend a 0 shift left has been performed. 1951 // FIXME: Currently we limit this to the bigger pattern case, 1952 // because some optimizations expect AND and not UBFM 1953 Opd0 = N->getOperand(0); 1954 } else 1955 return false; 1956 1957 // Missing combines/constant folding may have left us with strange 1958 // constants. 1959 if (ShlImm >= VT.getSizeInBits()) { 1960 LLVM_DEBUG( 1961 (dbgs() << N 1962 << ": Found large shift immediate, this should not happen\n")); 1963 return false; 1964 } 1965 1966 uint64_t SrlImm = 0; 1967 if (!isIntImmediate(N->getOperand(1), SrlImm)) 1968 return false; 1969 1970 assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() && 1971 "bad amount in shift node!"); 1972 int immr = SrlImm - ShlImm; 1973 Immr = immr < 0 ? immr + VT.getSizeInBits() : immr; 1974 Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1; 1975 // SRA requires a signed extraction 1976 if (VT == MVT::i32) 1977 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri; 1978 else 1979 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri; 1980 return true; 1981 } 1982 1983 bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) { 1984 assert(N->getOpcode() == ISD::SIGN_EXTEND); 1985 1986 EVT VT = N->getValueType(0); 1987 EVT NarrowVT = N->getOperand(0)->getValueType(0); 1988 if (VT != MVT::i64 || NarrowVT != MVT::i32) 1989 return false; 1990 1991 uint64_t ShiftImm; 1992 SDValue Op = N->getOperand(0); 1993 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm)) 1994 return false; 1995 1996 SDLoc dl(N); 1997 // Extend the incoming operand of the shift to 64-bits. 1998 SDValue Opd0 = Widen(CurDAG, Op.getOperand(0)); 1999 unsigned Immr = ShiftImm; 2000 unsigned Imms = NarrowVT.getSizeInBits() - 1; 2001 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT), 2002 CurDAG->getTargetConstant(Imms, dl, VT)}; 2003 CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops); 2004 return true; 2005 } 2006 2007 /// Try to form fcvtl2 instructions from a floating-point extend of a high-half 2008 /// extract of a subvector. 2009 bool AArch64DAGToDAGISel::tryHighFPExt(SDNode *N) { 2010 assert(N->getOpcode() == ISD::FP_EXTEND); 2011 2012 // There are 2 forms of fcvtl2 - extend to double or extend to float. 2013 SDValue Extract = N->getOperand(0); 2014 EVT VT = N->getValueType(0); 2015 EVT NarrowVT = Extract.getValueType(); 2016 if ((VT != MVT::v2f64 || NarrowVT != MVT::v2f32) && 2017 (VT != MVT::v4f32 || NarrowVT != MVT::v4f16)) 2018 return false; 2019 2020 // Optionally look past a bitcast. 2021 Extract = peekThroughBitcasts(Extract); 2022 if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR) 2023 return false; 2024 2025 // Match extract from start of high half index. 2026 // Example: v8i16 -> v4i16 means the extract must begin at index 4. 2027 unsigned ExtractIndex = Extract.getConstantOperandVal(1); 2028 if (ExtractIndex != Extract.getValueType().getVectorNumElements()) 2029 return false; 2030 2031 auto Opcode = VT == MVT::v2f64 ? AArch64::FCVTLv4i32 : AArch64::FCVTLv8i16; 2032 CurDAG->SelectNodeTo(N, Opcode, VT, Extract.getOperand(0)); 2033 return true; 2034 } 2035 2036 static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, 2037 SDValue &Opd0, unsigned &Immr, unsigned &Imms, 2038 unsigned NumberOfIgnoredLowBits = 0, 2039 bool BiggerPattern = false) { 2040 if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64) 2041 return false; 2042 2043 switch (N->getOpcode()) { 2044 default: 2045 if (!N->isMachineOpcode()) 2046 return false; 2047 break; 2048 case ISD::AND: 2049 return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms, 2050 NumberOfIgnoredLowBits, BiggerPattern); 2051 case ISD::SRL: 2052 case ISD::SRA: 2053 return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern); 2054 2055 case ISD::SIGN_EXTEND_INREG: 2056 return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms); 2057 } 2058 2059 unsigned NOpc = N->getMachineOpcode(); 2060 switch (NOpc) { 2061 default: 2062 return false; 2063 case AArch64::SBFMWri: 2064 case AArch64::UBFMWri: 2065 case AArch64::SBFMXri: 2066 case AArch64::UBFMXri: 2067 Opc = NOpc; 2068 Opd0 = N->getOperand(0); 2069 Immr = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue(); 2070 Imms = cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue(); 2071 return true; 2072 } 2073 // Unreachable 2074 return false; 2075 } 2076 2077 bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) { 2078 unsigned Opc, Immr, Imms; 2079 SDValue Opd0; 2080 if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms)) 2081 return false; 2082 2083 EVT VT = N->getValueType(0); 2084 SDLoc dl(N); 2085 2086 // If the bit extract operation is 64bit but the original type is 32bit, we 2087 // need to add one EXTRACT_SUBREG. 2088 if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) { 2089 SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64), 2090 CurDAG->getTargetConstant(Imms, dl, MVT::i64)}; 2091 2092 SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64); 2093 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32); 2094 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, 2095 MVT::i32, SDValue(BFM, 0), SubReg)); 2096 return true; 2097 } 2098 2099 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT), 2100 CurDAG->getTargetConstant(Imms, dl, VT)}; 2101 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 2102 return true; 2103 } 2104 2105 /// Does DstMask form a complementary pair with the mask provided by 2106 /// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking, 2107 /// this asks whether DstMask zeroes precisely those bits that will be set by 2108 /// the other half. 2109 static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted, 2110 unsigned NumberOfIgnoredHighBits, EVT VT) { 2111 assert((VT == MVT::i32 || VT == MVT::i64) && 2112 "i32 or i64 mask type expected!"); 2113 unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits; 2114 2115 APInt SignificantDstMask = APInt(BitWidth, DstMask); 2116 APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth); 2117 2118 return (SignificantDstMask & SignificantBitsToBeInserted) == 0 && 2119 (SignificantDstMask | SignificantBitsToBeInserted).isAllOnesValue(); 2120 } 2121 2122 // Look for bits that will be useful for later uses. 2123 // A bit is consider useless as soon as it is dropped and never used 2124 // before it as been dropped. 2125 // E.g., looking for useful bit of x 2126 // 1. y = x & 0x7 2127 // 2. z = y >> 2 2128 // After #1, x useful bits are 0x7, then the useful bits of x, live through 2129 // y. 2130 // After #2, the useful bits of x are 0x4. 2131 // However, if x is used on an unpredicatable instruction, then all its bits 2132 // are useful. 2133 // E.g. 2134 // 1. y = x & 0x7 2135 // 2. z = y >> 2 2136 // 3. str x, [@x] 2137 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0); 2138 2139 static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits, 2140 unsigned Depth) { 2141 uint64_t Imm = 2142 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue(); 2143 Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth()); 2144 UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm); 2145 getUsefulBits(Op, UsefulBits, Depth + 1); 2146 } 2147 2148 static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, 2149 uint64_t Imm, uint64_t MSB, 2150 unsigned Depth) { 2151 // inherit the bitwidth value 2152 APInt OpUsefulBits(UsefulBits); 2153 OpUsefulBits = 1; 2154 2155 if (MSB >= Imm) { 2156 OpUsefulBits <<= MSB - Imm + 1; 2157 --OpUsefulBits; 2158 // The interesting part will be in the lower part of the result 2159 getUsefulBits(Op, OpUsefulBits, Depth + 1); 2160 // The interesting part was starting at Imm in the argument 2161 OpUsefulBits <<= Imm; 2162 } else { 2163 OpUsefulBits <<= MSB + 1; 2164 --OpUsefulBits; 2165 // The interesting part will be shifted in the result 2166 OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm; 2167 getUsefulBits(Op, OpUsefulBits, Depth + 1); 2168 // The interesting part was at zero in the argument 2169 OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm); 2170 } 2171 2172 UsefulBits &= OpUsefulBits; 2173 } 2174 2175 static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits, 2176 unsigned Depth) { 2177 uint64_t Imm = 2178 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue(); 2179 uint64_t MSB = 2180 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue(); 2181 2182 getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth); 2183 } 2184 2185 static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, 2186 unsigned Depth) { 2187 uint64_t ShiftTypeAndValue = 2188 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue(); 2189 APInt Mask(UsefulBits); 2190 Mask.clearAllBits(); 2191 Mask.flipAllBits(); 2192 2193 if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) { 2194 // Shift Left 2195 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue); 2196 Mask <<= ShiftAmt; 2197 getUsefulBits(Op, Mask, Depth + 1); 2198 Mask.lshrInPlace(ShiftAmt); 2199 } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) { 2200 // Shift Right 2201 // We do not handle AArch64_AM::ASR, because the sign will change the 2202 // number of useful bits 2203 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue); 2204 Mask.lshrInPlace(ShiftAmt); 2205 getUsefulBits(Op, Mask, Depth + 1); 2206 Mask <<= ShiftAmt; 2207 } else 2208 return; 2209 2210 UsefulBits &= Mask; 2211 } 2212 2213 static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, 2214 unsigned Depth) { 2215 uint64_t Imm = 2216 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue(); 2217 uint64_t MSB = 2218 cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue(); 2219 2220 APInt OpUsefulBits(UsefulBits); 2221 OpUsefulBits = 1; 2222 2223 APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0); 2224 ResultUsefulBits.flipAllBits(); 2225 APInt Mask(UsefulBits.getBitWidth(), 0); 2226 2227 getUsefulBits(Op, ResultUsefulBits, Depth + 1); 2228 2229 if (MSB >= Imm) { 2230 // The instruction is a BFXIL. 2231 uint64_t Width = MSB - Imm + 1; 2232 uint64_t LSB = Imm; 2233 2234 OpUsefulBits <<= Width; 2235 --OpUsefulBits; 2236 2237 if (Op.getOperand(1) == Orig) { 2238 // Copy the low bits from the result to bits starting from LSB. 2239 Mask = ResultUsefulBits & OpUsefulBits; 2240 Mask <<= LSB; 2241 } 2242 2243 if (Op.getOperand(0) == Orig) 2244 // Bits starting from LSB in the input contribute to the result. 2245 Mask |= (ResultUsefulBits & ~OpUsefulBits); 2246 } else { 2247 // The instruction is a BFI. 2248 uint64_t Width = MSB + 1; 2249 uint64_t LSB = UsefulBits.getBitWidth() - Imm; 2250 2251 OpUsefulBits <<= Width; 2252 --OpUsefulBits; 2253 OpUsefulBits <<= LSB; 2254 2255 if (Op.getOperand(1) == Orig) { 2256 // Copy the bits from the result to the zero bits. 2257 Mask = ResultUsefulBits & OpUsefulBits; 2258 Mask.lshrInPlace(LSB); 2259 } 2260 2261 if (Op.getOperand(0) == Orig) 2262 Mask |= (ResultUsefulBits & ~OpUsefulBits); 2263 } 2264 2265 UsefulBits &= Mask; 2266 } 2267 2268 static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, 2269 SDValue Orig, unsigned Depth) { 2270 2271 // Users of this node should have already been instruction selected 2272 // FIXME: Can we turn that into an assert? 2273 if (!UserNode->isMachineOpcode()) 2274 return; 2275 2276 switch (UserNode->getMachineOpcode()) { 2277 default: 2278 return; 2279 case AArch64::ANDSWri: 2280 case AArch64::ANDSXri: 2281 case AArch64::ANDWri: 2282 case AArch64::ANDXri: 2283 // We increment Depth only when we call the getUsefulBits 2284 return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits, 2285 Depth); 2286 case AArch64::UBFMWri: 2287 case AArch64::UBFMXri: 2288 return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth); 2289 2290 case AArch64::ORRWrs: 2291 case AArch64::ORRXrs: 2292 if (UserNode->getOperand(1) != Orig) 2293 return; 2294 return getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits, 2295 Depth); 2296 case AArch64::BFMWri: 2297 case AArch64::BFMXri: 2298 return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth); 2299 2300 case AArch64::STRBBui: 2301 case AArch64::STURBBi: 2302 if (UserNode->getOperand(0) != Orig) 2303 return; 2304 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff); 2305 return; 2306 2307 case AArch64::STRHHui: 2308 case AArch64::STURHHi: 2309 if (UserNode->getOperand(0) != Orig) 2310 return; 2311 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff); 2312 return; 2313 } 2314 } 2315 2316 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) { 2317 if (Depth >= SelectionDAG::MaxRecursionDepth) 2318 return; 2319 // Initialize UsefulBits 2320 if (!Depth) { 2321 unsigned Bitwidth = Op.getScalarValueSizeInBits(); 2322 // At the beginning, assume every produced bits is useful 2323 UsefulBits = APInt(Bitwidth, 0); 2324 UsefulBits.flipAllBits(); 2325 } 2326 APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0); 2327 2328 for (SDNode *Node : Op.getNode()->uses()) { 2329 // A use cannot produce useful bits 2330 APInt UsefulBitsForUse = APInt(UsefulBits); 2331 getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth); 2332 UsersUsefulBits |= UsefulBitsForUse; 2333 } 2334 // UsefulBits contains the produced bits that are meaningful for the 2335 // current definition, thus a user cannot make a bit meaningful at 2336 // this point 2337 UsefulBits &= UsersUsefulBits; 2338 } 2339 2340 /// Create a machine node performing a notional SHL of Op by ShlAmount. If 2341 /// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is 2342 /// 0, return Op unchanged. 2343 static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) { 2344 if (ShlAmount == 0) 2345 return Op; 2346 2347 EVT VT = Op.getValueType(); 2348 SDLoc dl(Op); 2349 unsigned BitWidth = VT.getSizeInBits(); 2350 unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri; 2351 2352 SDNode *ShiftNode; 2353 if (ShlAmount > 0) { 2354 // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt 2355 ShiftNode = CurDAG->getMachineNode( 2356 UBFMOpc, dl, VT, Op, 2357 CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT), 2358 CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT)); 2359 } else { 2360 // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1 2361 assert(ShlAmount < 0 && "expected right shift"); 2362 int ShrAmount = -ShlAmount; 2363 ShiftNode = CurDAG->getMachineNode( 2364 UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT), 2365 CurDAG->getTargetConstant(BitWidth - 1, dl, VT)); 2366 } 2367 2368 return SDValue(ShiftNode, 0); 2369 } 2370 2371 /// Does this tree qualify as an attempt to move a bitfield into position, 2372 /// essentially "(and (shl VAL, N), Mask)". 2373 static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, 2374 bool BiggerPattern, 2375 SDValue &Src, int &ShiftAmount, 2376 int &MaskWidth) { 2377 EVT VT = Op.getValueType(); 2378 unsigned BitWidth = VT.getSizeInBits(); 2379 (void)BitWidth; 2380 assert(BitWidth == 32 || BitWidth == 64); 2381 2382 KnownBits Known = CurDAG->computeKnownBits(Op); 2383 2384 // Non-zero in the sense that they're not provably zero, which is the key 2385 // point if we want to use this value 2386 uint64_t NonZeroBits = (~Known.Zero).getZExtValue(); 2387 2388 // Discard a constant AND mask if present. It's safe because the node will 2389 // already have been factored into the computeKnownBits calculation above. 2390 uint64_t AndImm; 2391 if (isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm)) { 2392 assert((~APInt(BitWidth, AndImm) & ~Known.Zero) == 0); 2393 Op = Op.getOperand(0); 2394 } 2395 2396 // Don't match if the SHL has more than one use, since then we'll end up 2397 // generating SHL+UBFIZ instead of just keeping SHL+AND. 2398 if (!BiggerPattern && !Op.hasOneUse()) 2399 return false; 2400 2401 uint64_t ShlImm; 2402 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm)) 2403 return false; 2404 Op = Op.getOperand(0); 2405 2406 if (!isShiftedMask_64(NonZeroBits)) 2407 return false; 2408 2409 ShiftAmount = countTrailingZeros(NonZeroBits); 2410 MaskWidth = countTrailingOnes(NonZeroBits >> ShiftAmount); 2411 2412 // BFI encompasses sufficiently many nodes that it's worth inserting an extra 2413 // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL 2414 // amount. BiggerPattern is true when this pattern is being matched for BFI, 2415 // BiggerPattern is false when this pattern is being matched for UBFIZ, in 2416 // which case it is not profitable to insert an extra shift. 2417 if (ShlImm - ShiftAmount != 0 && !BiggerPattern) 2418 return false; 2419 Src = getLeftShift(CurDAG, Op, ShlImm - ShiftAmount); 2420 2421 return true; 2422 } 2423 2424 static bool isShiftedMask(uint64_t Mask, EVT VT) { 2425 assert(VT == MVT::i32 || VT == MVT::i64); 2426 if (VT == MVT::i32) 2427 return isShiftedMask_32(Mask); 2428 return isShiftedMask_64(Mask); 2429 } 2430 2431 // Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being 2432 // inserted only sets known zero bits. 2433 static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG) { 2434 assert(N->getOpcode() == ISD::OR && "Expect a OR operation"); 2435 2436 EVT VT = N->getValueType(0); 2437 if (VT != MVT::i32 && VT != MVT::i64) 2438 return false; 2439 2440 unsigned BitWidth = VT.getSizeInBits(); 2441 2442 uint64_t OrImm; 2443 if (!isOpcWithIntImmediate(N, ISD::OR, OrImm)) 2444 return false; 2445 2446 // Skip this transformation if the ORR immediate can be encoded in the ORR. 2447 // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely 2448 // performance neutral. 2449 if (AArch64_AM::isLogicalImmediate(OrImm, BitWidth)) 2450 return false; 2451 2452 uint64_t MaskImm; 2453 SDValue And = N->getOperand(0); 2454 // Must be a single use AND with an immediate operand. 2455 if (!And.hasOneUse() || 2456 !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm)) 2457 return false; 2458 2459 // Compute the Known Zero for the AND as this allows us to catch more general 2460 // cases than just looking for AND with imm. 2461 KnownBits Known = CurDAG->computeKnownBits(And); 2462 2463 // Non-zero in the sense that they're not provably zero, which is the key 2464 // point if we want to use this value. 2465 uint64_t NotKnownZero = (~Known.Zero).getZExtValue(); 2466 2467 // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00). 2468 if (!isShiftedMask(Known.Zero.getZExtValue(), VT)) 2469 return false; 2470 2471 // The bits being inserted must only set those bits that are known to be zero. 2472 if ((OrImm & NotKnownZero) != 0) { 2473 // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't 2474 // currently handle this case. 2475 return false; 2476 } 2477 2478 // BFI/BFXIL dst, src, #lsb, #width. 2479 int LSB = countTrailingOnes(NotKnownZero); 2480 int Width = BitWidth - APInt(BitWidth, NotKnownZero).countPopulation(); 2481 2482 // BFI/BFXIL is an alias of BFM, so translate to BFM operands. 2483 unsigned ImmR = (BitWidth - LSB) % BitWidth; 2484 unsigned ImmS = Width - 1; 2485 2486 // If we're creating a BFI instruction avoid cases where we need more 2487 // instructions to materialize the BFI constant as compared to the original 2488 // ORR. A BFXIL will use the same constant as the original ORR, so the code 2489 // should be no worse in this case. 2490 bool IsBFI = LSB != 0; 2491 uint64_t BFIImm = OrImm >> LSB; 2492 if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) { 2493 // We have a BFI instruction and we know the constant can't be materialized 2494 // with a ORR-immediate with the zero register. 2495 unsigned OrChunks = 0, BFIChunks = 0; 2496 for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) { 2497 if (((OrImm >> Shift) & 0xFFFF) != 0) 2498 ++OrChunks; 2499 if (((BFIImm >> Shift) & 0xFFFF) != 0) 2500 ++BFIChunks; 2501 } 2502 if (BFIChunks > OrChunks) 2503 return false; 2504 } 2505 2506 // Materialize the constant to be inserted. 2507 SDLoc DL(N); 2508 unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm; 2509 SDNode *MOVI = CurDAG->getMachineNode( 2510 MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT)); 2511 2512 // Create the BFI/BFXIL instruction. 2513 SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0), 2514 CurDAG->getTargetConstant(ImmR, DL, VT), 2515 CurDAG->getTargetConstant(ImmS, DL, VT)}; 2516 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri; 2517 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 2518 return true; 2519 } 2520 2521 static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits, 2522 SelectionDAG *CurDAG) { 2523 assert(N->getOpcode() == ISD::OR && "Expect a OR operation"); 2524 2525 EVT VT = N->getValueType(0); 2526 if (VT != MVT::i32 && VT != MVT::i64) 2527 return false; 2528 2529 unsigned BitWidth = VT.getSizeInBits(); 2530 2531 // Because of simplify-demanded-bits in DAGCombine, involved masks may not 2532 // have the expected shape. Try to undo that. 2533 2534 unsigned NumberOfIgnoredLowBits = UsefulBits.countTrailingZeros(); 2535 unsigned NumberOfIgnoredHighBits = UsefulBits.countLeadingZeros(); 2536 2537 // Given a OR operation, check if we have the following pattern 2538 // ubfm c, b, imm, imm2 (or something that does the same jobs, see 2539 // isBitfieldExtractOp) 2540 // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and 2541 // countTrailingZeros(mask2) == imm2 - imm + 1 2542 // f = d | c 2543 // if yes, replace the OR instruction with: 2544 // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2 2545 2546 // OR is commutative, check all combinations of operand order and values of 2547 // BiggerPattern, i.e. 2548 // Opd0, Opd1, BiggerPattern=false 2549 // Opd1, Opd0, BiggerPattern=false 2550 // Opd0, Opd1, BiggerPattern=true 2551 // Opd1, Opd0, BiggerPattern=true 2552 // Several of these combinations may match, so check with BiggerPattern=false 2553 // first since that will produce better results by matching more instructions 2554 // and/or inserting fewer extra instructions. 2555 for (int I = 0; I < 4; ++I) { 2556 2557 SDValue Dst, Src; 2558 unsigned ImmR, ImmS; 2559 bool BiggerPattern = I / 2; 2560 SDValue OrOpd0Val = N->getOperand(I % 2); 2561 SDNode *OrOpd0 = OrOpd0Val.getNode(); 2562 SDValue OrOpd1Val = N->getOperand((I + 1) % 2); 2563 SDNode *OrOpd1 = OrOpd1Val.getNode(); 2564 2565 unsigned BFXOpc; 2566 int DstLSB, Width; 2567 if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS, 2568 NumberOfIgnoredLowBits, BiggerPattern)) { 2569 // Check that the returned opcode is compatible with the pattern, 2570 // i.e., same type and zero extended (U and not S) 2571 if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) || 2572 (BFXOpc != AArch64::UBFMWri && VT == MVT::i32)) 2573 continue; 2574 2575 // Compute the width of the bitfield insertion 2576 DstLSB = 0; 2577 Width = ImmS - ImmR + 1; 2578 // FIXME: This constraint is to catch bitfield insertion we may 2579 // want to widen the pattern if we want to grab general bitfied 2580 // move case 2581 if (Width <= 0) 2582 continue; 2583 2584 // If the mask on the insertee is correct, we have a BFXIL operation. We 2585 // can share the ImmR and ImmS values from the already-computed UBFM. 2586 } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val, 2587 BiggerPattern, 2588 Src, DstLSB, Width)) { 2589 ImmR = (BitWidth - DstLSB) % BitWidth; 2590 ImmS = Width - 1; 2591 } else 2592 continue; 2593 2594 // Check the second part of the pattern 2595 EVT VT = OrOpd1Val.getValueType(); 2596 assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand"); 2597 2598 // Compute the Known Zero for the candidate of the first operand. 2599 // This allows to catch more general case than just looking for 2600 // AND with imm. Indeed, simplify-demanded-bits may have removed 2601 // the AND instruction because it proves it was useless. 2602 KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val); 2603 2604 // Check if there is enough room for the second operand to appear 2605 // in the first one 2606 APInt BitsToBeInserted = 2607 APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width); 2608 2609 if ((BitsToBeInserted & ~Known.Zero) != 0) 2610 continue; 2611 2612 // Set the first operand 2613 uint64_t Imm; 2614 if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) && 2615 isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT)) 2616 // In that case, we can eliminate the AND 2617 Dst = OrOpd1->getOperand(0); 2618 else 2619 // Maybe the AND has been removed by simplify-demanded-bits 2620 // or is useful because it discards more bits 2621 Dst = OrOpd1Val; 2622 2623 // both parts match 2624 SDLoc DL(N); 2625 SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT), 2626 CurDAG->getTargetConstant(ImmS, DL, VT)}; 2627 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri; 2628 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 2629 return true; 2630 } 2631 2632 // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff 2633 // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted 2634 // mask (e.g., 0x000ffff0). 2635 uint64_t Mask0Imm, Mask1Imm; 2636 SDValue And0 = N->getOperand(0); 2637 SDValue And1 = N->getOperand(1); 2638 if (And0.hasOneUse() && And1.hasOneUse() && 2639 isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) && 2640 isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) && 2641 APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) && 2642 (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) { 2643 2644 // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm), 2645 // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the 2646 // bits to be inserted. 2647 if (isShiftedMask(Mask0Imm, VT)) { 2648 std::swap(And0, And1); 2649 std::swap(Mask0Imm, Mask1Imm); 2650 } 2651 2652 SDValue Src = And1->getOperand(0); 2653 SDValue Dst = And0->getOperand(0); 2654 unsigned LSB = countTrailingZeros(Mask1Imm); 2655 int Width = BitWidth - APInt(BitWidth, Mask0Imm).countPopulation(); 2656 2657 // The BFXIL inserts the low-order bits from a source register, so right 2658 // shift the needed bits into place. 2659 SDLoc DL(N); 2660 unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri; 2661 SDNode *LSR = CurDAG->getMachineNode( 2662 ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LSB, DL, VT), 2663 CurDAG->getTargetConstant(BitWidth - 1, DL, VT)); 2664 2665 // BFXIL is an alias of BFM, so translate to BFM operands. 2666 unsigned ImmR = (BitWidth - LSB) % BitWidth; 2667 unsigned ImmS = Width - 1; 2668 2669 // Create the BFXIL instruction. 2670 SDValue Ops[] = {Dst, SDValue(LSR, 0), 2671 CurDAG->getTargetConstant(ImmR, DL, VT), 2672 CurDAG->getTargetConstant(ImmS, DL, VT)}; 2673 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri; 2674 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 2675 return true; 2676 } 2677 2678 return false; 2679 } 2680 2681 bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) { 2682 if (N->getOpcode() != ISD::OR) 2683 return false; 2684 2685 APInt NUsefulBits; 2686 getUsefulBits(SDValue(N, 0), NUsefulBits); 2687 2688 // If all bits are not useful, just return UNDEF. 2689 if (!NUsefulBits) { 2690 CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0)); 2691 return true; 2692 } 2693 2694 if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG)) 2695 return true; 2696 2697 return tryBitfieldInsertOpFromOrAndImm(N, CurDAG); 2698 } 2699 2700 /// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the 2701 /// equivalent of a left shift by a constant amount followed by an and masking 2702 /// out a contiguous set of bits. 2703 bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) { 2704 if (N->getOpcode() != ISD::AND) 2705 return false; 2706 2707 EVT VT = N->getValueType(0); 2708 if (VT != MVT::i32 && VT != MVT::i64) 2709 return false; 2710 2711 SDValue Op0; 2712 int DstLSB, Width; 2713 if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false, 2714 Op0, DstLSB, Width)) 2715 return false; 2716 2717 // ImmR is the rotate right amount. 2718 unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits(); 2719 // ImmS is the most significant bit of the source to be moved. 2720 unsigned ImmS = Width - 1; 2721 2722 SDLoc DL(N); 2723 SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT), 2724 CurDAG->getTargetConstant(ImmS, DL, VT)}; 2725 unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri; 2726 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 2727 return true; 2728 } 2729 2730 /// tryShiftAmountMod - Take advantage of built-in mod of shift amount in 2731 /// variable shift/rotate instructions. 2732 bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) { 2733 EVT VT = N->getValueType(0); 2734 2735 unsigned Opc; 2736 switch (N->getOpcode()) { 2737 case ISD::ROTR: 2738 Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr; 2739 break; 2740 case ISD::SHL: 2741 Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr; 2742 break; 2743 case ISD::SRL: 2744 Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr; 2745 break; 2746 case ISD::SRA: 2747 Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr; 2748 break; 2749 default: 2750 return false; 2751 } 2752 2753 uint64_t Size; 2754 uint64_t Bits; 2755 if (VT == MVT::i32) { 2756 Bits = 5; 2757 Size = 32; 2758 } else if (VT == MVT::i64) { 2759 Bits = 6; 2760 Size = 64; 2761 } else 2762 return false; 2763 2764 SDValue ShiftAmt = N->getOperand(1); 2765 SDLoc DL(N); 2766 SDValue NewShiftAmt; 2767 2768 // Skip over an extend of the shift amount. 2769 if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND || 2770 ShiftAmt->getOpcode() == ISD::ANY_EXTEND) 2771 ShiftAmt = ShiftAmt->getOperand(0); 2772 2773 if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) { 2774 SDValue Add0 = ShiftAmt->getOperand(0); 2775 SDValue Add1 = ShiftAmt->getOperand(1); 2776 uint64_t Add0Imm; 2777 uint64_t Add1Imm; 2778 // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X 2779 // to avoid the ADD/SUB. 2780 if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0)) 2781 NewShiftAmt = Add0; 2782 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to 2783 // generate a NEG instead of a SUB of a constant. 2784 else if (ShiftAmt->getOpcode() == ISD::SUB && 2785 isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 && 2786 (Add0Imm % Size == 0)) { 2787 unsigned NegOpc; 2788 unsigned ZeroReg; 2789 EVT SubVT = ShiftAmt->getValueType(0); 2790 if (SubVT == MVT::i32) { 2791 NegOpc = AArch64::SUBWrr; 2792 ZeroReg = AArch64::WZR; 2793 } else { 2794 assert(SubVT == MVT::i64); 2795 NegOpc = AArch64::SUBXrr; 2796 ZeroReg = AArch64::XZR; 2797 } 2798 SDValue Zero = 2799 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT); 2800 MachineSDNode *Neg = 2801 CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1); 2802 NewShiftAmt = SDValue(Neg, 0); 2803 } else 2804 return false; 2805 } else { 2806 // If the shift amount is masked with an AND, check that the mask covers the 2807 // bits that are implicitly ANDed off by the above opcodes and if so, skip 2808 // the AND. 2809 uint64_t MaskImm; 2810 if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm) && 2811 !isOpcWithIntImmediate(ShiftAmt.getNode(), AArch64ISD::ANDS, MaskImm)) 2812 return false; 2813 2814 if (countTrailingOnes(MaskImm) < Bits) 2815 return false; 2816 2817 NewShiftAmt = ShiftAmt->getOperand(0); 2818 } 2819 2820 // Narrow/widen the shift amount to match the size of the shift operation. 2821 if (VT == MVT::i32) 2822 NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt); 2823 else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) { 2824 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32); 2825 MachineSDNode *Ext = CurDAG->getMachineNode( 2826 AArch64::SUBREG_TO_REG, DL, VT, 2827 CurDAG->getTargetConstant(0, DL, MVT::i64), NewShiftAmt, SubReg); 2828 NewShiftAmt = SDValue(Ext, 0); 2829 } 2830 2831 SDValue Ops[] = {N->getOperand(0), NewShiftAmt}; 2832 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 2833 return true; 2834 } 2835 2836 bool 2837 AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, 2838 unsigned RegWidth) { 2839 APFloat FVal(0.0); 2840 if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N)) 2841 FVal = CN->getValueAPF(); 2842 else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) { 2843 // Some otherwise illegal constants are allowed in this case. 2844 if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow || 2845 !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1))) 2846 return false; 2847 2848 ConstantPoolSDNode *CN = 2849 dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)); 2850 FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF(); 2851 } else 2852 return false; 2853 2854 // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits 2855 // is between 1 and 32 for a destination w-register, or 1 and 64 for an 2856 // x-register. 2857 // 2858 // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we 2859 // want THIS_NODE to be 2^fbits. This is much easier to deal with using 2860 // integers. 2861 bool IsExact; 2862 2863 // fbits is between 1 and 64 in the worst-case, which means the fmul 2864 // could have 2^64 as an actual operand. Need 65 bits of precision. 2865 APSInt IntVal(65, true); 2866 FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact); 2867 2868 // N.b. isPowerOf2 also checks for > 0. 2869 if (!IsExact || !IntVal.isPowerOf2()) return false; 2870 unsigned FBits = IntVal.logBase2(); 2871 2872 // Checks above should have guaranteed that we haven't lost information in 2873 // finding FBits, but it must still be in range. 2874 if (FBits == 0 || FBits > RegWidth) return false; 2875 2876 FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32); 2877 return true; 2878 } 2879 2880 // Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields 2881 // of the string and obtains the integer values from them and combines these 2882 // into a single value to be used in the MRS/MSR instruction. 2883 static int getIntOperandFromRegisterString(StringRef RegString) { 2884 SmallVector<StringRef, 5> Fields; 2885 RegString.split(Fields, ':'); 2886 2887 if (Fields.size() == 1) 2888 return -1; 2889 2890 assert(Fields.size() == 5 2891 && "Invalid number of fields in read register string"); 2892 2893 SmallVector<int, 5> Ops; 2894 bool AllIntFields = true; 2895 2896 for (StringRef Field : Fields) { 2897 unsigned IntField; 2898 AllIntFields &= !Field.getAsInteger(10, IntField); 2899 Ops.push_back(IntField); 2900 } 2901 2902 assert(AllIntFields && 2903 "Unexpected non-integer value in special register string."); 2904 2905 // Need to combine the integer fields of the string into a single value 2906 // based on the bit encoding of MRS/MSR instruction. 2907 return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) | 2908 (Ops[3] << 3) | (Ops[4]); 2909 } 2910 2911 // Lower the read_register intrinsic to an MRS instruction node if the special 2912 // register string argument is either of the form detailed in the ALCE (the 2913 // form described in getIntOperandsFromRegsterString) or is a named register 2914 // known by the MRS SysReg mapper. 2915 bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) { 2916 const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1)); 2917 const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0)); 2918 SDLoc DL(N); 2919 2920 int Reg = getIntOperandFromRegisterString(RegString->getString()); 2921 if (Reg != -1) { 2922 ReplaceNode(N, CurDAG->getMachineNode( 2923 AArch64::MRS, DL, N->getSimpleValueType(0), MVT::Other, 2924 CurDAG->getTargetConstant(Reg, DL, MVT::i32), 2925 N->getOperand(0))); 2926 return true; 2927 } 2928 2929 // Use the sysreg mapper to map the remaining possible strings to the 2930 // value for the register to be used for the instruction operand. 2931 auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString()); 2932 if (TheReg && TheReg->Readable && 2933 TheReg->haveFeatures(Subtarget->getFeatureBits())) 2934 Reg = TheReg->Encoding; 2935 else 2936 Reg = AArch64SysReg::parseGenericRegister(RegString->getString()); 2937 2938 if (Reg != -1) { 2939 ReplaceNode(N, CurDAG->getMachineNode( 2940 AArch64::MRS, DL, N->getSimpleValueType(0), MVT::Other, 2941 CurDAG->getTargetConstant(Reg, DL, MVT::i32), 2942 N->getOperand(0))); 2943 return true; 2944 } 2945 2946 if (RegString->getString() == "pc") { 2947 ReplaceNode(N, CurDAG->getMachineNode( 2948 AArch64::ADR, DL, N->getSimpleValueType(0), MVT::Other, 2949 CurDAG->getTargetConstant(0, DL, MVT::i32), 2950 N->getOperand(0))); 2951 return true; 2952 } 2953 2954 return false; 2955 } 2956 2957 // Lower the write_register intrinsic to an MSR instruction node if the special 2958 // register string argument is either of the form detailed in the ALCE (the 2959 // form described in getIntOperandsFromRegsterString) or is a named register 2960 // known by the MSR SysReg mapper. 2961 bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) { 2962 const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1)); 2963 const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0)); 2964 SDLoc DL(N); 2965 2966 int Reg = getIntOperandFromRegisterString(RegString->getString()); 2967 if (Reg != -1) { 2968 ReplaceNode( 2969 N, CurDAG->getMachineNode(AArch64::MSR, DL, MVT::Other, 2970 CurDAG->getTargetConstant(Reg, DL, MVT::i32), 2971 N->getOperand(2), N->getOperand(0))); 2972 return true; 2973 } 2974 2975 // Check if the register was one of those allowed as the pstatefield value in 2976 // the MSR (immediate) instruction. To accept the values allowed in the 2977 // pstatefield for the MSR (immediate) instruction, we also require that an 2978 // immediate value has been provided as an argument, we know that this is 2979 // the case as it has been ensured by semantic checking. 2980 auto PMapper = AArch64PState::lookupPStateByName(RegString->getString()); 2981 if (PMapper) { 2982 assert (isa<ConstantSDNode>(N->getOperand(2)) 2983 && "Expected a constant integer expression."); 2984 unsigned Reg = PMapper->Encoding; 2985 uint64_t Immed = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(); 2986 unsigned State; 2987 if (Reg == AArch64PState::PAN || Reg == AArch64PState::UAO || Reg == AArch64PState::SSBS) { 2988 assert(Immed < 2 && "Bad imm"); 2989 State = AArch64::MSRpstateImm1; 2990 } else { 2991 assert(Immed < 16 && "Bad imm"); 2992 State = AArch64::MSRpstateImm4; 2993 } 2994 ReplaceNode(N, CurDAG->getMachineNode( 2995 State, DL, MVT::Other, 2996 CurDAG->getTargetConstant(Reg, DL, MVT::i32), 2997 CurDAG->getTargetConstant(Immed, DL, MVT::i16), 2998 N->getOperand(0))); 2999 return true; 3000 } 3001 3002 // Use the sysreg mapper to attempt to map the remaining possible strings 3003 // to the value for the register to be used for the MSR (register) 3004 // instruction operand. 3005 auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString()); 3006 if (TheReg && TheReg->Writeable && 3007 TheReg->haveFeatures(Subtarget->getFeatureBits())) 3008 Reg = TheReg->Encoding; 3009 else 3010 Reg = AArch64SysReg::parseGenericRegister(RegString->getString()); 3011 if (Reg != -1) { 3012 ReplaceNode(N, CurDAG->getMachineNode( 3013 AArch64::MSR, DL, MVT::Other, 3014 CurDAG->getTargetConstant(Reg, DL, MVT::i32), 3015 N->getOperand(2), N->getOperand(0))); 3016 return true; 3017 } 3018 3019 return false; 3020 } 3021 3022 /// We've got special pseudo-instructions for these 3023 bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) { 3024 unsigned Opcode; 3025 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT(); 3026 3027 // Leave IR for LSE if subtarget supports it. 3028 if (Subtarget->hasLSE()) return false; 3029 3030 if (MemTy == MVT::i8) 3031 Opcode = AArch64::CMP_SWAP_8; 3032 else if (MemTy == MVT::i16) 3033 Opcode = AArch64::CMP_SWAP_16; 3034 else if (MemTy == MVT::i32) 3035 Opcode = AArch64::CMP_SWAP_32; 3036 else if (MemTy == MVT::i64) 3037 Opcode = AArch64::CMP_SWAP_64; 3038 else 3039 llvm_unreachable("Unknown AtomicCmpSwap type"); 3040 3041 MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32; 3042 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3), 3043 N->getOperand(0)}; 3044 SDNode *CmpSwap = CurDAG->getMachineNode( 3045 Opcode, SDLoc(N), 3046 CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops); 3047 3048 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); 3049 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp}); 3050 3051 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0)); 3052 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2)); 3053 CurDAG->RemoveDeadNode(N); 3054 3055 return true; 3056 } 3057 3058 bool AArch64DAGToDAGISel::SelectSVE8BitLslImm(SDValue N, SDValue &Base, 3059 SDValue &Offset) { 3060 auto C = dyn_cast<ConstantSDNode>(N); 3061 if (!C) 3062 return false; 3063 3064 auto Ty = N->getValueType(0); 3065 3066 int64_t Imm = C->getSExtValue(); 3067 SDLoc DL(N); 3068 3069 if ((Imm >= -128) && (Imm <= 127)) { 3070 Base = CurDAG->getTargetConstant(Imm, DL, Ty); 3071 Offset = CurDAG->getTargetConstant(0, DL, Ty); 3072 return true; 3073 } 3074 3075 if (((Imm % 256) == 0) && (Imm >= -32768) && (Imm <= 32512)) { 3076 Base = CurDAG->getTargetConstant(Imm/256, DL, Ty); 3077 Offset = CurDAG->getTargetConstant(8, DL, Ty); 3078 return true; 3079 } 3080 3081 return false; 3082 } 3083 3084 bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift) { 3085 if (auto CNode = dyn_cast<ConstantSDNode>(N)) { 3086 const int64_t ImmVal = CNode->getZExtValue(); 3087 SDLoc DL(N); 3088 3089 switch (VT.SimpleTy) { 3090 case MVT::i8: 3091 if ((ImmVal & 0xFF) == ImmVal) { 3092 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32); 3093 Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32); 3094 return true; 3095 } 3096 break; 3097 case MVT::i16: 3098 case MVT::i32: 3099 case MVT::i64: 3100 if ((ImmVal & 0xFF) == ImmVal) { 3101 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32); 3102 Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32); 3103 return true; 3104 } else if ((ImmVal & 0xFF00) == ImmVal) { 3105 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32); 3106 Imm = CurDAG->getTargetConstant(ImmVal >> 8, DL, MVT::i32); 3107 return true; 3108 } 3109 break; 3110 default: 3111 break; 3112 } 3113 } 3114 3115 return false; 3116 } 3117 3118 bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) { 3119 if (auto CNode = dyn_cast<ConstantSDNode>(N)) { 3120 int64_t ImmVal = CNode->getSExtValue(); 3121 SDLoc DL(N); 3122 if (ImmVal >= -128 && ImmVal < 128) { 3123 Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32); 3124 return true; 3125 } 3126 } 3127 return false; 3128 } 3129 3130 bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, SDValue &Imm) { 3131 if (auto CNode = dyn_cast<ConstantSDNode>(N)) { 3132 uint64_t ImmVal = CNode->getSExtValue(); 3133 SDLoc DL(N); 3134 ImmVal = ImmVal & 0xFF; 3135 if (ImmVal < 256) { 3136 Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32); 3137 return true; 3138 } 3139 } 3140 return false; 3141 } 3142 3143 bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm) { 3144 if (auto CNode = dyn_cast<ConstantSDNode>(N)) { 3145 uint64_t ImmVal = CNode->getZExtValue(); 3146 SDLoc DL(N); 3147 3148 // Shift mask depending on type size. 3149 switch (VT.SimpleTy) { 3150 case MVT::i8: 3151 ImmVal &= 0xFF; 3152 ImmVal |= ImmVal << 8; 3153 ImmVal |= ImmVal << 16; 3154 ImmVal |= ImmVal << 32; 3155 break; 3156 case MVT::i16: 3157 ImmVal &= 0xFFFF; 3158 ImmVal |= ImmVal << 16; 3159 ImmVal |= ImmVal << 32; 3160 break; 3161 case MVT::i32: 3162 ImmVal &= 0xFFFFFFFF; 3163 ImmVal |= ImmVal << 32; 3164 break; 3165 case MVT::i64: 3166 break; 3167 default: 3168 llvm_unreachable("Unexpected type"); 3169 } 3170 3171 uint64_t encoding; 3172 if (AArch64_AM::processLogicalImmediate(ImmVal, 64, encoding)) { 3173 Imm = CurDAG->getTargetConstant(encoding, DL, MVT::i64); 3174 return true; 3175 } 3176 } 3177 return false; 3178 } 3179 3180 // This method is only needed to "cast" i64s into i32s when the value 3181 // is a valid shift which has been splatted into a vector with i64 elements. 3182 // Every other type is fine in tablegen. 3183 bool AArch64DAGToDAGISel::SelectSVEShiftImm64(SDValue N, uint64_t Low, 3184 uint64_t High, SDValue &Imm) { 3185 if (auto *CN = dyn_cast<ConstantSDNode>(N)) { 3186 uint64_t ImmVal = CN->getZExtValue(); 3187 SDLoc DL(N); 3188 3189 if (ImmVal >= Low && ImmVal <= High) { 3190 Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32); 3191 return true; 3192 } 3193 } 3194 3195 return false; 3196 } 3197 3198 bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) { 3199 // tagp(FrameIndex, IRGstack, tag_offset): 3200 // since the offset between FrameIndex and IRGstack is a compile-time 3201 // constant, this can be lowered to a single ADDG instruction. 3202 if (!(isa<FrameIndexSDNode>(N->getOperand(1)))) { 3203 return false; 3204 } 3205 3206 SDValue IRG_SP = N->getOperand(2); 3207 if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN || 3208 cast<ConstantSDNode>(IRG_SP->getOperand(1))->getZExtValue() != 3209 Intrinsic::aarch64_irg_sp) { 3210 return false; 3211 } 3212 3213 const TargetLowering *TLI = getTargetLowering(); 3214 SDLoc DL(N); 3215 int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex(); 3216 SDValue FiOp = CurDAG->getTargetFrameIndex( 3217 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 3218 int TagOffset = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(); 3219 3220 SDNode *Out = CurDAG->getMachineNode( 3221 AArch64::TAGPstack, DL, MVT::i64, 3222 {FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2), 3223 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)}); 3224 ReplaceNode(N, Out); 3225 return true; 3226 } 3227 3228 void AArch64DAGToDAGISel::SelectTagP(SDNode *N) { 3229 assert(isa<ConstantSDNode>(N->getOperand(3)) && 3230 "llvm.aarch64.tagp third argument must be an immediate"); 3231 if (trySelectStackSlotTagP(N)) 3232 return; 3233 // FIXME: above applies in any case when offset between Op1 and Op2 is a 3234 // compile-time constant, not just for stack allocations. 3235 3236 // General case for unrelated pointers in Op1 and Op2. 3237 SDLoc DL(N); 3238 int TagOffset = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(); 3239 SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64, 3240 {N->getOperand(1), N->getOperand(2)}); 3241 SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64, 3242 {SDValue(N1, 0), N->getOperand(2)}); 3243 SDNode *N3 = CurDAG->getMachineNode( 3244 AArch64::ADDG, DL, MVT::i64, 3245 {SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64), 3246 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)}); 3247 ReplaceNode(N, N3); 3248 } 3249 3250 // NOTE: We cannot use EXTRACT_SUBREG in all cases because the fixed length 3251 // vector types larger than NEON don't have a matching SubRegIndex. 3252 static SDNode *extractSubReg(SelectionDAG *DAG, EVT VT, SDValue V) { 3253 assert(V.getValueType().isScalableVector() && 3254 V.getValueType().getSizeInBits().getKnownMinSize() == 3255 AArch64::SVEBitsPerBlock && 3256 "Expected to extract from a packed scalable vector!"); 3257 assert(VT.isFixedLengthVector() && 3258 "Expected to extract a fixed length vector!"); 3259 3260 SDLoc DL(V); 3261 switch (VT.getSizeInBits()) { 3262 case 64: { 3263 auto SubReg = DAG->getTargetConstant(AArch64::dsub, DL, MVT::i32); 3264 return DAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT, V, SubReg); 3265 } 3266 case 128: { 3267 auto SubReg = DAG->getTargetConstant(AArch64::zsub, DL, MVT::i32); 3268 return DAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT, V, SubReg); 3269 } 3270 default: { 3271 auto RC = DAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64); 3272 return DAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC); 3273 } 3274 } 3275 } 3276 3277 // NOTE: We cannot use INSERT_SUBREG in all cases because the fixed length 3278 // vector types larger than NEON don't have a matching SubRegIndex. 3279 static SDNode *insertSubReg(SelectionDAG *DAG, EVT VT, SDValue V) { 3280 assert(VT.isScalableVector() && 3281 VT.getSizeInBits().getKnownMinSize() == AArch64::SVEBitsPerBlock && 3282 "Expected to insert into a packed scalable vector!"); 3283 assert(V.getValueType().isFixedLengthVector() && 3284 "Expected to insert a fixed length vector!"); 3285 3286 SDLoc DL(V); 3287 switch (V.getValueType().getSizeInBits()) { 3288 case 64: { 3289 auto SubReg = DAG->getTargetConstant(AArch64::dsub, DL, MVT::i32); 3290 auto Container = DAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT); 3291 return DAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, VT, 3292 SDValue(Container, 0), V, SubReg); 3293 } 3294 case 128: { 3295 auto SubReg = DAG->getTargetConstant(AArch64::zsub, DL, MVT::i32); 3296 auto Container = DAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT); 3297 return DAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, VT, 3298 SDValue(Container, 0), V, SubReg); 3299 } 3300 default: { 3301 auto RC = DAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64); 3302 return DAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC); 3303 } 3304 } 3305 } 3306 3307 void AArch64DAGToDAGISel::Select(SDNode *Node) { 3308 // If we have a custom node, we already have selected! 3309 if (Node->isMachineOpcode()) { 3310 LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n"); 3311 Node->setNodeId(-1); 3312 return; 3313 } 3314 3315 // Few custom selection stuff. 3316 EVT VT = Node->getValueType(0); 3317 3318 switch (Node->getOpcode()) { 3319 default: 3320 break; 3321 3322 case ISD::ATOMIC_CMP_SWAP: 3323 if (SelectCMP_SWAP(Node)) 3324 return; 3325 break; 3326 3327 case ISD::READ_REGISTER: 3328 if (tryReadRegister(Node)) 3329 return; 3330 break; 3331 3332 case ISD::WRITE_REGISTER: 3333 if (tryWriteRegister(Node)) 3334 return; 3335 break; 3336 3337 case ISD::ADD: 3338 if (tryMLAV64LaneV128(Node)) 3339 return; 3340 break; 3341 3342 case ISD::LOAD: { 3343 // Try to select as an indexed load. Fall through to normal processing 3344 // if we can't. 3345 if (tryIndexedLoad(Node)) 3346 return; 3347 break; 3348 } 3349 3350 case ISD::SRL: 3351 case ISD::AND: 3352 case ISD::SRA: 3353 case ISD::SIGN_EXTEND_INREG: 3354 if (tryBitfieldExtractOp(Node)) 3355 return; 3356 if (tryBitfieldInsertInZeroOp(Node)) 3357 return; 3358 LLVM_FALLTHROUGH; 3359 case ISD::ROTR: 3360 case ISD::SHL: 3361 if (tryShiftAmountMod(Node)) 3362 return; 3363 break; 3364 3365 case ISD::SIGN_EXTEND: 3366 if (tryBitfieldExtractOpFromSExt(Node)) 3367 return; 3368 break; 3369 3370 case ISD::FP_EXTEND: 3371 if (tryHighFPExt(Node)) 3372 return; 3373 break; 3374 3375 case ISD::OR: 3376 if (tryBitfieldInsertOp(Node)) 3377 return; 3378 break; 3379 3380 case ISD::EXTRACT_SUBVECTOR: { 3381 // Bail when not a "cast" like extract_subvector. 3382 if (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue() != 0) 3383 break; 3384 3385 // Bail when normal isel can do the job. 3386 EVT InVT = Node->getOperand(0).getValueType(); 3387 if (VT.isScalableVector() || InVT.isFixedLengthVector()) 3388 break; 3389 3390 // NOTE: We can only get here when doing fixed length SVE code generation. 3391 // We do manual selection because the types involved are not linked to real 3392 // registers (despite being legal) and must be coerced into SVE registers. 3393 // 3394 // NOTE: If the above changes, be aware that selection will still not work 3395 // because the td definition of extract_vector does not support extracting 3396 // a fixed length vector from a scalable vector. 3397 3398 ReplaceNode(Node, extractSubReg(CurDAG, VT, Node->getOperand(0))); 3399 return; 3400 } 3401 3402 case ISD::INSERT_SUBVECTOR: { 3403 // Bail when not a "cast" like insert_subvector. 3404 if (cast<ConstantSDNode>(Node->getOperand(2))->getZExtValue() != 0) 3405 break; 3406 if (!Node->getOperand(0).isUndef()) 3407 break; 3408 3409 // Bail when normal isel should do the job. 3410 EVT InVT = Node->getOperand(1).getValueType(); 3411 if (VT.isFixedLengthVector() || InVT.isScalableVector()) 3412 break; 3413 3414 // NOTE: We can only get here when doing fixed length SVE code generation. 3415 // We do manual selection because the types involved are not linked to real 3416 // registers (despite being legal) and must be coerced into SVE registers. 3417 // 3418 // NOTE: If the above changes, be aware that selection will still not work 3419 // because the td definition of insert_vector does not support inserting a 3420 // fixed length vector into a scalable vector. 3421 3422 ReplaceNode(Node, insertSubReg(CurDAG, VT, Node->getOperand(1))); 3423 return; 3424 } 3425 3426 case ISD::Constant: { 3427 // Materialize zero constants as copies from WZR/XZR. This allows 3428 // the coalescer to propagate these into other instructions. 3429 ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node); 3430 if (ConstNode->isNullValue()) { 3431 if (VT == MVT::i32) { 3432 SDValue New = CurDAG->getCopyFromReg( 3433 CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32); 3434 ReplaceNode(Node, New.getNode()); 3435 return; 3436 } else if (VT == MVT::i64) { 3437 SDValue New = CurDAG->getCopyFromReg( 3438 CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64); 3439 ReplaceNode(Node, New.getNode()); 3440 return; 3441 } 3442 } 3443 break; 3444 } 3445 3446 case ISD::FrameIndex: { 3447 // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm. 3448 int FI = cast<FrameIndexSDNode>(Node)->getIndex(); 3449 unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0); 3450 const TargetLowering *TLI = getTargetLowering(); 3451 SDValue TFI = CurDAG->getTargetFrameIndex( 3452 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 3453 SDLoc DL(Node); 3454 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32), 3455 CurDAG->getTargetConstant(Shifter, DL, MVT::i32) }; 3456 CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops); 3457 return; 3458 } 3459 case ISD::INTRINSIC_W_CHAIN: { 3460 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); 3461 switch (IntNo) { 3462 default: 3463 break; 3464 case Intrinsic::aarch64_ldaxp: 3465 case Intrinsic::aarch64_ldxp: { 3466 unsigned Op = 3467 IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX; 3468 SDValue MemAddr = Node->getOperand(2); 3469 SDLoc DL(Node); 3470 SDValue Chain = Node->getOperand(0); 3471 3472 SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64, 3473 MVT::Other, MemAddr, Chain); 3474 3475 // Transfer memoperands. 3476 MachineMemOperand *MemOp = 3477 cast<MemIntrinsicSDNode>(Node)->getMemOperand(); 3478 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp}); 3479 ReplaceNode(Node, Ld); 3480 return; 3481 } 3482 case Intrinsic::aarch64_stlxp: 3483 case Intrinsic::aarch64_stxp: { 3484 unsigned Op = 3485 IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX; 3486 SDLoc DL(Node); 3487 SDValue Chain = Node->getOperand(0); 3488 SDValue ValLo = Node->getOperand(2); 3489 SDValue ValHi = Node->getOperand(3); 3490 SDValue MemAddr = Node->getOperand(4); 3491 3492 // Place arguments in the right order. 3493 SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain}; 3494 3495 SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops); 3496 // Transfer memoperands. 3497 MachineMemOperand *MemOp = 3498 cast<MemIntrinsicSDNode>(Node)->getMemOperand(); 3499 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 3500 3501 ReplaceNode(Node, St); 3502 return; 3503 } 3504 case Intrinsic::aarch64_neon_ld1x2: 3505 if (VT == MVT::v8i8) { 3506 SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0); 3507 return; 3508 } else if (VT == MVT::v16i8) { 3509 SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0); 3510 return; 3511 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 3512 SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0); 3513 return; 3514 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 3515 SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0); 3516 return; 3517 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3518 SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0); 3519 return; 3520 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3521 SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0); 3522 return; 3523 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3524 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0); 3525 return; 3526 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3527 SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0); 3528 return; 3529 } 3530 break; 3531 case Intrinsic::aarch64_neon_ld1x3: 3532 if (VT == MVT::v8i8) { 3533 SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0); 3534 return; 3535 } else if (VT == MVT::v16i8) { 3536 SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0); 3537 return; 3538 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 3539 SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0); 3540 return; 3541 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 3542 SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0); 3543 return; 3544 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3545 SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0); 3546 return; 3547 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3548 SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0); 3549 return; 3550 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3551 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0); 3552 return; 3553 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3554 SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0); 3555 return; 3556 } 3557 break; 3558 case Intrinsic::aarch64_neon_ld1x4: 3559 if (VT == MVT::v8i8) { 3560 SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0); 3561 return; 3562 } else if (VT == MVT::v16i8) { 3563 SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0); 3564 return; 3565 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 3566 SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0); 3567 return; 3568 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 3569 SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0); 3570 return; 3571 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3572 SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0); 3573 return; 3574 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3575 SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0); 3576 return; 3577 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3578 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0); 3579 return; 3580 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3581 SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0); 3582 return; 3583 } 3584 break; 3585 case Intrinsic::aarch64_neon_ld2: 3586 if (VT == MVT::v8i8) { 3587 SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0); 3588 return; 3589 } else if (VT == MVT::v16i8) { 3590 SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0); 3591 return; 3592 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 3593 SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0); 3594 return; 3595 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 3596 SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0); 3597 return; 3598 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3599 SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0); 3600 return; 3601 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3602 SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0); 3603 return; 3604 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3605 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0); 3606 return; 3607 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3608 SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0); 3609 return; 3610 } 3611 break; 3612 case Intrinsic::aarch64_neon_ld3: 3613 if (VT == MVT::v8i8) { 3614 SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0); 3615 return; 3616 } else if (VT == MVT::v16i8) { 3617 SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0); 3618 return; 3619 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 3620 SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0); 3621 return; 3622 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 3623 SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0); 3624 return; 3625 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3626 SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0); 3627 return; 3628 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3629 SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0); 3630 return; 3631 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3632 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0); 3633 return; 3634 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3635 SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0); 3636 return; 3637 } 3638 break; 3639 case Intrinsic::aarch64_neon_ld4: 3640 if (VT == MVT::v8i8) { 3641 SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0); 3642 return; 3643 } else if (VT == MVT::v16i8) { 3644 SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0); 3645 return; 3646 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 3647 SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0); 3648 return; 3649 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 3650 SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0); 3651 return; 3652 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3653 SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0); 3654 return; 3655 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3656 SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0); 3657 return; 3658 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3659 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0); 3660 return; 3661 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3662 SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0); 3663 return; 3664 } 3665 break; 3666 case Intrinsic::aarch64_neon_ld2r: 3667 if (VT == MVT::v8i8) { 3668 SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0); 3669 return; 3670 } else if (VT == MVT::v16i8) { 3671 SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0); 3672 return; 3673 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 3674 SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0); 3675 return; 3676 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 3677 SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0); 3678 return; 3679 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3680 SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0); 3681 return; 3682 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3683 SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0); 3684 return; 3685 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3686 SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0); 3687 return; 3688 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3689 SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0); 3690 return; 3691 } 3692 break; 3693 case Intrinsic::aarch64_neon_ld3r: 3694 if (VT == MVT::v8i8) { 3695 SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0); 3696 return; 3697 } else if (VT == MVT::v16i8) { 3698 SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0); 3699 return; 3700 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 3701 SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0); 3702 return; 3703 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 3704 SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0); 3705 return; 3706 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3707 SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0); 3708 return; 3709 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3710 SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0); 3711 return; 3712 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3713 SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0); 3714 return; 3715 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3716 SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0); 3717 return; 3718 } 3719 break; 3720 case Intrinsic::aarch64_neon_ld4r: 3721 if (VT == MVT::v8i8) { 3722 SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0); 3723 return; 3724 } else if (VT == MVT::v16i8) { 3725 SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0); 3726 return; 3727 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 3728 SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0); 3729 return; 3730 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 3731 SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0); 3732 return; 3733 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3734 SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0); 3735 return; 3736 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3737 SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0); 3738 return; 3739 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3740 SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0); 3741 return; 3742 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3743 SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0); 3744 return; 3745 } 3746 break; 3747 case Intrinsic::aarch64_neon_ld2lane: 3748 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 3749 SelectLoadLane(Node, 2, AArch64::LD2i8); 3750 return; 3751 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 3752 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 3753 SelectLoadLane(Node, 2, AArch64::LD2i16); 3754 return; 3755 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 3756 VT == MVT::v2f32) { 3757 SelectLoadLane(Node, 2, AArch64::LD2i32); 3758 return; 3759 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 3760 VT == MVT::v1f64) { 3761 SelectLoadLane(Node, 2, AArch64::LD2i64); 3762 return; 3763 } 3764 break; 3765 case Intrinsic::aarch64_neon_ld3lane: 3766 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 3767 SelectLoadLane(Node, 3, AArch64::LD3i8); 3768 return; 3769 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 3770 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 3771 SelectLoadLane(Node, 3, AArch64::LD3i16); 3772 return; 3773 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 3774 VT == MVT::v2f32) { 3775 SelectLoadLane(Node, 3, AArch64::LD3i32); 3776 return; 3777 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 3778 VT == MVT::v1f64) { 3779 SelectLoadLane(Node, 3, AArch64::LD3i64); 3780 return; 3781 } 3782 break; 3783 case Intrinsic::aarch64_neon_ld4lane: 3784 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 3785 SelectLoadLane(Node, 4, AArch64::LD4i8); 3786 return; 3787 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 3788 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 3789 SelectLoadLane(Node, 4, AArch64::LD4i16); 3790 return; 3791 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 3792 VT == MVT::v2f32) { 3793 SelectLoadLane(Node, 4, AArch64::LD4i32); 3794 return; 3795 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 3796 VT == MVT::v1f64) { 3797 SelectLoadLane(Node, 4, AArch64::LD4i64); 3798 return; 3799 } 3800 break; 3801 } 3802 } break; 3803 case ISD::INTRINSIC_WO_CHAIN: { 3804 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue(); 3805 switch (IntNo) { 3806 default: 3807 break; 3808 case Intrinsic::aarch64_tagp: 3809 SelectTagP(Node); 3810 return; 3811 case Intrinsic::aarch64_neon_tbl2: 3812 SelectTable(Node, 2, 3813 VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two, 3814 false); 3815 return; 3816 case Intrinsic::aarch64_neon_tbl3: 3817 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three 3818 : AArch64::TBLv16i8Three, 3819 false); 3820 return; 3821 case Intrinsic::aarch64_neon_tbl4: 3822 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four 3823 : AArch64::TBLv16i8Four, 3824 false); 3825 return; 3826 case Intrinsic::aarch64_neon_tbx2: 3827 SelectTable(Node, 2, 3828 VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two, 3829 true); 3830 return; 3831 case Intrinsic::aarch64_neon_tbx3: 3832 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three 3833 : AArch64::TBXv16i8Three, 3834 true); 3835 return; 3836 case Intrinsic::aarch64_neon_tbx4: 3837 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four 3838 : AArch64::TBXv16i8Four, 3839 true); 3840 return; 3841 case Intrinsic::aarch64_neon_smull: 3842 case Intrinsic::aarch64_neon_umull: 3843 if (tryMULLV64LaneV128(IntNo, Node)) 3844 return; 3845 break; 3846 } 3847 break; 3848 } 3849 case ISD::INTRINSIC_VOID: { 3850 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); 3851 if (Node->getNumOperands() >= 3) 3852 VT = Node->getOperand(2)->getValueType(0); 3853 switch (IntNo) { 3854 default: 3855 break; 3856 case Intrinsic::aarch64_neon_st1x2: { 3857 if (VT == MVT::v8i8) { 3858 SelectStore(Node, 2, AArch64::ST1Twov8b); 3859 return; 3860 } else if (VT == MVT::v16i8) { 3861 SelectStore(Node, 2, AArch64::ST1Twov16b); 3862 return; 3863 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 3864 VT == MVT::v4bf16) { 3865 SelectStore(Node, 2, AArch64::ST1Twov4h); 3866 return; 3867 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 3868 VT == MVT::v8bf16) { 3869 SelectStore(Node, 2, AArch64::ST1Twov8h); 3870 return; 3871 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3872 SelectStore(Node, 2, AArch64::ST1Twov2s); 3873 return; 3874 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3875 SelectStore(Node, 2, AArch64::ST1Twov4s); 3876 return; 3877 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3878 SelectStore(Node, 2, AArch64::ST1Twov2d); 3879 return; 3880 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3881 SelectStore(Node, 2, AArch64::ST1Twov1d); 3882 return; 3883 } 3884 break; 3885 } 3886 case Intrinsic::aarch64_neon_st1x3: { 3887 if (VT == MVT::v8i8) { 3888 SelectStore(Node, 3, AArch64::ST1Threev8b); 3889 return; 3890 } else if (VT == MVT::v16i8) { 3891 SelectStore(Node, 3, AArch64::ST1Threev16b); 3892 return; 3893 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 3894 VT == MVT::v4bf16) { 3895 SelectStore(Node, 3, AArch64::ST1Threev4h); 3896 return; 3897 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 3898 VT == MVT::v8bf16) { 3899 SelectStore(Node, 3, AArch64::ST1Threev8h); 3900 return; 3901 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3902 SelectStore(Node, 3, AArch64::ST1Threev2s); 3903 return; 3904 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3905 SelectStore(Node, 3, AArch64::ST1Threev4s); 3906 return; 3907 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3908 SelectStore(Node, 3, AArch64::ST1Threev2d); 3909 return; 3910 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3911 SelectStore(Node, 3, AArch64::ST1Threev1d); 3912 return; 3913 } 3914 break; 3915 } 3916 case Intrinsic::aarch64_neon_st1x4: { 3917 if (VT == MVT::v8i8) { 3918 SelectStore(Node, 4, AArch64::ST1Fourv8b); 3919 return; 3920 } else if (VT == MVT::v16i8) { 3921 SelectStore(Node, 4, AArch64::ST1Fourv16b); 3922 return; 3923 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 3924 VT == MVT::v4bf16) { 3925 SelectStore(Node, 4, AArch64::ST1Fourv4h); 3926 return; 3927 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 3928 VT == MVT::v8bf16) { 3929 SelectStore(Node, 4, AArch64::ST1Fourv8h); 3930 return; 3931 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3932 SelectStore(Node, 4, AArch64::ST1Fourv2s); 3933 return; 3934 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3935 SelectStore(Node, 4, AArch64::ST1Fourv4s); 3936 return; 3937 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3938 SelectStore(Node, 4, AArch64::ST1Fourv2d); 3939 return; 3940 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3941 SelectStore(Node, 4, AArch64::ST1Fourv1d); 3942 return; 3943 } 3944 break; 3945 } 3946 case Intrinsic::aarch64_neon_st2: { 3947 if (VT == MVT::v8i8) { 3948 SelectStore(Node, 2, AArch64::ST2Twov8b); 3949 return; 3950 } else if (VT == MVT::v16i8) { 3951 SelectStore(Node, 2, AArch64::ST2Twov16b); 3952 return; 3953 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 3954 VT == MVT::v4bf16) { 3955 SelectStore(Node, 2, AArch64::ST2Twov4h); 3956 return; 3957 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 3958 VT == MVT::v8bf16) { 3959 SelectStore(Node, 2, AArch64::ST2Twov8h); 3960 return; 3961 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3962 SelectStore(Node, 2, AArch64::ST2Twov2s); 3963 return; 3964 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3965 SelectStore(Node, 2, AArch64::ST2Twov4s); 3966 return; 3967 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3968 SelectStore(Node, 2, AArch64::ST2Twov2d); 3969 return; 3970 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3971 SelectStore(Node, 2, AArch64::ST1Twov1d); 3972 return; 3973 } 3974 break; 3975 } 3976 case Intrinsic::aarch64_neon_st3: { 3977 if (VT == MVT::v8i8) { 3978 SelectStore(Node, 3, AArch64::ST3Threev8b); 3979 return; 3980 } else if (VT == MVT::v16i8) { 3981 SelectStore(Node, 3, AArch64::ST3Threev16b); 3982 return; 3983 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 3984 VT == MVT::v4bf16) { 3985 SelectStore(Node, 3, AArch64::ST3Threev4h); 3986 return; 3987 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 3988 VT == MVT::v8bf16) { 3989 SelectStore(Node, 3, AArch64::ST3Threev8h); 3990 return; 3991 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3992 SelectStore(Node, 3, AArch64::ST3Threev2s); 3993 return; 3994 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3995 SelectStore(Node, 3, AArch64::ST3Threev4s); 3996 return; 3997 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3998 SelectStore(Node, 3, AArch64::ST3Threev2d); 3999 return; 4000 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4001 SelectStore(Node, 3, AArch64::ST1Threev1d); 4002 return; 4003 } 4004 break; 4005 } 4006 case Intrinsic::aarch64_neon_st4: { 4007 if (VT == MVT::v8i8) { 4008 SelectStore(Node, 4, AArch64::ST4Fourv8b); 4009 return; 4010 } else if (VT == MVT::v16i8) { 4011 SelectStore(Node, 4, AArch64::ST4Fourv16b); 4012 return; 4013 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 4014 VT == MVT::v4bf16) { 4015 SelectStore(Node, 4, AArch64::ST4Fourv4h); 4016 return; 4017 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 4018 VT == MVT::v8bf16) { 4019 SelectStore(Node, 4, AArch64::ST4Fourv8h); 4020 return; 4021 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4022 SelectStore(Node, 4, AArch64::ST4Fourv2s); 4023 return; 4024 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4025 SelectStore(Node, 4, AArch64::ST4Fourv4s); 4026 return; 4027 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4028 SelectStore(Node, 4, AArch64::ST4Fourv2d); 4029 return; 4030 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4031 SelectStore(Node, 4, AArch64::ST1Fourv1d); 4032 return; 4033 } 4034 break; 4035 } 4036 case Intrinsic::aarch64_neon_st2lane: { 4037 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4038 SelectStoreLane(Node, 2, AArch64::ST2i8); 4039 return; 4040 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4041 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4042 SelectStoreLane(Node, 2, AArch64::ST2i16); 4043 return; 4044 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4045 VT == MVT::v2f32) { 4046 SelectStoreLane(Node, 2, AArch64::ST2i32); 4047 return; 4048 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4049 VT == MVT::v1f64) { 4050 SelectStoreLane(Node, 2, AArch64::ST2i64); 4051 return; 4052 } 4053 break; 4054 } 4055 case Intrinsic::aarch64_neon_st3lane: { 4056 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4057 SelectStoreLane(Node, 3, AArch64::ST3i8); 4058 return; 4059 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4060 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4061 SelectStoreLane(Node, 3, AArch64::ST3i16); 4062 return; 4063 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4064 VT == MVT::v2f32) { 4065 SelectStoreLane(Node, 3, AArch64::ST3i32); 4066 return; 4067 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4068 VT == MVT::v1f64) { 4069 SelectStoreLane(Node, 3, AArch64::ST3i64); 4070 return; 4071 } 4072 break; 4073 } 4074 case Intrinsic::aarch64_neon_st4lane: { 4075 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4076 SelectStoreLane(Node, 4, AArch64::ST4i8); 4077 return; 4078 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4079 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4080 SelectStoreLane(Node, 4, AArch64::ST4i16); 4081 return; 4082 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4083 VT == MVT::v2f32) { 4084 SelectStoreLane(Node, 4, AArch64::ST4i32); 4085 return; 4086 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4087 VT == MVT::v1f64) { 4088 SelectStoreLane(Node, 4, AArch64::ST4i64); 4089 return; 4090 } 4091 break; 4092 } 4093 case Intrinsic::aarch64_sve_st2: { 4094 if (VT == MVT::nxv16i8) { 4095 SelectPredicatedStore(Node, 2, 0, AArch64::ST2B, AArch64::ST2B_IMM); 4096 return; 4097 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 4098 (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) { 4099 SelectPredicatedStore(Node, 2, 1, AArch64::ST2H, AArch64::ST2H_IMM); 4100 return; 4101 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 4102 SelectPredicatedStore(Node, 2, 2, AArch64::ST2W, AArch64::ST2W_IMM); 4103 return; 4104 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 4105 SelectPredicatedStore(Node, 2, 3, AArch64::ST2D, AArch64::ST2D_IMM); 4106 return; 4107 } 4108 break; 4109 } 4110 case Intrinsic::aarch64_sve_st3: { 4111 if (VT == MVT::nxv16i8) { 4112 SelectPredicatedStore(Node, 3, 0, AArch64::ST3B, AArch64::ST3B_IMM); 4113 return; 4114 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 4115 (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) { 4116 SelectPredicatedStore(Node, 3, 1, AArch64::ST3H, AArch64::ST3H_IMM); 4117 return; 4118 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 4119 SelectPredicatedStore(Node, 3, 2, AArch64::ST3W, AArch64::ST3W_IMM); 4120 return; 4121 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 4122 SelectPredicatedStore(Node, 3, 3, AArch64::ST3D, AArch64::ST3D_IMM); 4123 return; 4124 } 4125 break; 4126 } 4127 case Intrinsic::aarch64_sve_st4: { 4128 if (VT == MVT::nxv16i8) { 4129 SelectPredicatedStore(Node, 4, 0, AArch64::ST4B, AArch64::ST4B_IMM); 4130 return; 4131 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 4132 (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) { 4133 SelectPredicatedStore(Node, 4, 1, AArch64::ST4H, AArch64::ST4H_IMM); 4134 return; 4135 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 4136 SelectPredicatedStore(Node, 4, 2, AArch64::ST4W, AArch64::ST4W_IMM); 4137 return; 4138 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 4139 SelectPredicatedStore(Node, 4, 3, AArch64::ST4D, AArch64::ST4D_IMM); 4140 return; 4141 } 4142 break; 4143 } 4144 } 4145 break; 4146 } 4147 case AArch64ISD::LD2post: { 4148 if (VT == MVT::v8i8) { 4149 SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0); 4150 return; 4151 } else if (VT == MVT::v16i8) { 4152 SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0); 4153 return; 4154 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4155 SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0); 4156 return; 4157 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4158 SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0); 4159 return; 4160 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4161 SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0); 4162 return; 4163 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4164 SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0); 4165 return; 4166 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4167 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0); 4168 return; 4169 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4170 SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0); 4171 return; 4172 } 4173 break; 4174 } 4175 case AArch64ISD::LD3post: { 4176 if (VT == MVT::v8i8) { 4177 SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0); 4178 return; 4179 } else if (VT == MVT::v16i8) { 4180 SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0); 4181 return; 4182 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4183 SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0); 4184 return; 4185 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4186 SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0); 4187 return; 4188 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4189 SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0); 4190 return; 4191 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4192 SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0); 4193 return; 4194 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4195 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0); 4196 return; 4197 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4198 SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0); 4199 return; 4200 } 4201 break; 4202 } 4203 case AArch64ISD::LD4post: { 4204 if (VT == MVT::v8i8) { 4205 SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0); 4206 return; 4207 } else if (VT == MVT::v16i8) { 4208 SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0); 4209 return; 4210 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4211 SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0); 4212 return; 4213 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4214 SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0); 4215 return; 4216 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4217 SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0); 4218 return; 4219 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4220 SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0); 4221 return; 4222 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4223 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0); 4224 return; 4225 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4226 SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0); 4227 return; 4228 } 4229 break; 4230 } 4231 case AArch64ISD::LD1x2post: { 4232 if (VT == MVT::v8i8) { 4233 SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0); 4234 return; 4235 } else if (VT == MVT::v16i8) { 4236 SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0); 4237 return; 4238 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4239 SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0); 4240 return; 4241 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4242 SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0); 4243 return; 4244 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4245 SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0); 4246 return; 4247 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4248 SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0); 4249 return; 4250 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4251 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0); 4252 return; 4253 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4254 SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0); 4255 return; 4256 } 4257 break; 4258 } 4259 case AArch64ISD::LD1x3post: { 4260 if (VT == MVT::v8i8) { 4261 SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0); 4262 return; 4263 } else if (VT == MVT::v16i8) { 4264 SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0); 4265 return; 4266 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4267 SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0); 4268 return; 4269 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4270 SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0); 4271 return; 4272 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4273 SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0); 4274 return; 4275 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4276 SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0); 4277 return; 4278 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4279 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0); 4280 return; 4281 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4282 SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0); 4283 return; 4284 } 4285 break; 4286 } 4287 case AArch64ISD::LD1x4post: { 4288 if (VT == MVT::v8i8) { 4289 SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0); 4290 return; 4291 } else if (VT == MVT::v16i8) { 4292 SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0); 4293 return; 4294 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4295 SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0); 4296 return; 4297 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4298 SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0); 4299 return; 4300 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4301 SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0); 4302 return; 4303 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4304 SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0); 4305 return; 4306 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4307 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0); 4308 return; 4309 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4310 SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0); 4311 return; 4312 } 4313 break; 4314 } 4315 case AArch64ISD::LD1DUPpost: { 4316 if (VT == MVT::v8i8) { 4317 SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0); 4318 return; 4319 } else if (VT == MVT::v16i8) { 4320 SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0); 4321 return; 4322 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4323 SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0); 4324 return; 4325 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4326 SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0); 4327 return; 4328 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4329 SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0); 4330 return; 4331 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4332 SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0); 4333 return; 4334 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4335 SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0); 4336 return; 4337 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4338 SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0); 4339 return; 4340 } 4341 break; 4342 } 4343 case AArch64ISD::LD2DUPpost: { 4344 if (VT == MVT::v8i8) { 4345 SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0); 4346 return; 4347 } else if (VT == MVT::v16i8) { 4348 SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0); 4349 return; 4350 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4351 SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0); 4352 return; 4353 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4354 SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0); 4355 return; 4356 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4357 SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0); 4358 return; 4359 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4360 SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0); 4361 return; 4362 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4363 SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0); 4364 return; 4365 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4366 SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0); 4367 return; 4368 } 4369 break; 4370 } 4371 case AArch64ISD::LD3DUPpost: { 4372 if (VT == MVT::v8i8) { 4373 SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0); 4374 return; 4375 } else if (VT == MVT::v16i8) { 4376 SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0); 4377 return; 4378 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4379 SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0); 4380 return; 4381 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4382 SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0); 4383 return; 4384 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4385 SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0); 4386 return; 4387 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4388 SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0); 4389 return; 4390 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4391 SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0); 4392 return; 4393 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4394 SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0); 4395 return; 4396 } 4397 break; 4398 } 4399 case AArch64ISD::LD4DUPpost: { 4400 if (VT == MVT::v8i8) { 4401 SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0); 4402 return; 4403 } else if (VT == MVT::v16i8) { 4404 SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0); 4405 return; 4406 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4407 SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0); 4408 return; 4409 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4410 SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0); 4411 return; 4412 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4413 SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0); 4414 return; 4415 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4416 SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0); 4417 return; 4418 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4419 SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0); 4420 return; 4421 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4422 SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0); 4423 return; 4424 } 4425 break; 4426 } 4427 case AArch64ISD::LD1LANEpost: { 4428 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4429 SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST); 4430 return; 4431 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4432 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4433 SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST); 4434 return; 4435 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4436 VT == MVT::v2f32) { 4437 SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST); 4438 return; 4439 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4440 VT == MVT::v1f64) { 4441 SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST); 4442 return; 4443 } 4444 break; 4445 } 4446 case AArch64ISD::LD2LANEpost: { 4447 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4448 SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST); 4449 return; 4450 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4451 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4452 SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST); 4453 return; 4454 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4455 VT == MVT::v2f32) { 4456 SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST); 4457 return; 4458 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4459 VT == MVT::v1f64) { 4460 SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST); 4461 return; 4462 } 4463 break; 4464 } 4465 case AArch64ISD::LD3LANEpost: { 4466 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4467 SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST); 4468 return; 4469 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4470 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4471 SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST); 4472 return; 4473 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4474 VT == MVT::v2f32) { 4475 SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST); 4476 return; 4477 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4478 VT == MVT::v1f64) { 4479 SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST); 4480 return; 4481 } 4482 break; 4483 } 4484 case AArch64ISD::LD4LANEpost: { 4485 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4486 SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST); 4487 return; 4488 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4489 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4490 SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST); 4491 return; 4492 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4493 VT == MVT::v2f32) { 4494 SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST); 4495 return; 4496 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4497 VT == MVT::v1f64) { 4498 SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST); 4499 return; 4500 } 4501 break; 4502 } 4503 case AArch64ISD::ST2post: { 4504 VT = Node->getOperand(1).getValueType(); 4505 if (VT == MVT::v8i8) { 4506 SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST); 4507 return; 4508 } else if (VT == MVT::v16i8) { 4509 SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST); 4510 return; 4511 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4512 SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST); 4513 return; 4514 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4515 SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST); 4516 return; 4517 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4518 SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST); 4519 return; 4520 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4521 SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST); 4522 return; 4523 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4524 SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST); 4525 return; 4526 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4527 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST); 4528 return; 4529 } 4530 break; 4531 } 4532 case AArch64ISD::ST3post: { 4533 VT = Node->getOperand(1).getValueType(); 4534 if (VT == MVT::v8i8) { 4535 SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST); 4536 return; 4537 } else if (VT == MVT::v16i8) { 4538 SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST); 4539 return; 4540 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4541 SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST); 4542 return; 4543 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4544 SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST); 4545 return; 4546 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4547 SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST); 4548 return; 4549 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4550 SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST); 4551 return; 4552 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4553 SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST); 4554 return; 4555 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4556 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST); 4557 return; 4558 } 4559 break; 4560 } 4561 case AArch64ISD::ST4post: { 4562 VT = Node->getOperand(1).getValueType(); 4563 if (VT == MVT::v8i8) { 4564 SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST); 4565 return; 4566 } else if (VT == MVT::v16i8) { 4567 SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST); 4568 return; 4569 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4570 SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST); 4571 return; 4572 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4573 SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST); 4574 return; 4575 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4576 SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST); 4577 return; 4578 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4579 SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST); 4580 return; 4581 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4582 SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST); 4583 return; 4584 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4585 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST); 4586 return; 4587 } 4588 break; 4589 } 4590 case AArch64ISD::ST1x2post: { 4591 VT = Node->getOperand(1).getValueType(); 4592 if (VT == MVT::v8i8) { 4593 SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST); 4594 return; 4595 } else if (VT == MVT::v16i8) { 4596 SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST); 4597 return; 4598 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4599 SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST); 4600 return; 4601 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4602 SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST); 4603 return; 4604 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4605 SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST); 4606 return; 4607 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4608 SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST); 4609 return; 4610 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4611 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST); 4612 return; 4613 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4614 SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST); 4615 return; 4616 } 4617 break; 4618 } 4619 case AArch64ISD::ST1x3post: { 4620 VT = Node->getOperand(1).getValueType(); 4621 if (VT == MVT::v8i8) { 4622 SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST); 4623 return; 4624 } else if (VT == MVT::v16i8) { 4625 SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST); 4626 return; 4627 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4628 SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST); 4629 return; 4630 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16 ) { 4631 SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST); 4632 return; 4633 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4634 SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST); 4635 return; 4636 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4637 SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST); 4638 return; 4639 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4640 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST); 4641 return; 4642 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4643 SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST); 4644 return; 4645 } 4646 break; 4647 } 4648 case AArch64ISD::ST1x4post: { 4649 VT = Node->getOperand(1).getValueType(); 4650 if (VT == MVT::v8i8) { 4651 SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST); 4652 return; 4653 } else if (VT == MVT::v16i8) { 4654 SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST); 4655 return; 4656 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4657 SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST); 4658 return; 4659 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4660 SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST); 4661 return; 4662 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4663 SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST); 4664 return; 4665 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4666 SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST); 4667 return; 4668 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4669 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST); 4670 return; 4671 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4672 SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST); 4673 return; 4674 } 4675 break; 4676 } 4677 case AArch64ISD::ST2LANEpost: { 4678 VT = Node->getOperand(1).getValueType(); 4679 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4680 SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST); 4681 return; 4682 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4683 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4684 SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST); 4685 return; 4686 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4687 VT == MVT::v2f32) { 4688 SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST); 4689 return; 4690 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4691 VT == MVT::v1f64) { 4692 SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST); 4693 return; 4694 } 4695 break; 4696 } 4697 case AArch64ISD::ST3LANEpost: { 4698 VT = Node->getOperand(1).getValueType(); 4699 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4700 SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST); 4701 return; 4702 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4703 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4704 SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST); 4705 return; 4706 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4707 VT == MVT::v2f32) { 4708 SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST); 4709 return; 4710 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4711 VT == MVT::v1f64) { 4712 SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST); 4713 return; 4714 } 4715 break; 4716 } 4717 case AArch64ISD::ST4LANEpost: { 4718 VT = Node->getOperand(1).getValueType(); 4719 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4720 SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST); 4721 return; 4722 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4723 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4724 SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST); 4725 return; 4726 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4727 VT == MVT::v2f32) { 4728 SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST); 4729 return; 4730 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4731 VT == MVT::v1f64) { 4732 SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST); 4733 return; 4734 } 4735 break; 4736 } 4737 case AArch64ISD::SVE_LD2_MERGE_ZERO: { 4738 if (VT == MVT::nxv16i8) { 4739 SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B); 4740 return; 4741 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 4742 (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) { 4743 SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H); 4744 return; 4745 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 4746 SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W); 4747 return; 4748 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 4749 SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D); 4750 return; 4751 } 4752 break; 4753 } 4754 case AArch64ISD::SVE_LD3_MERGE_ZERO: { 4755 if (VT == MVT::nxv16i8) { 4756 SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B); 4757 return; 4758 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 4759 (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) { 4760 SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H); 4761 return; 4762 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 4763 SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W); 4764 return; 4765 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 4766 SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D); 4767 return; 4768 } 4769 break; 4770 } 4771 case AArch64ISD::SVE_LD4_MERGE_ZERO: { 4772 if (VT == MVT::nxv16i8) { 4773 SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B); 4774 return; 4775 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 4776 (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) { 4777 SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H); 4778 return; 4779 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 4780 SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W); 4781 return; 4782 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 4783 SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D); 4784 return; 4785 } 4786 break; 4787 } 4788 } 4789 4790 // Select the default instruction 4791 SelectCode(Node); 4792 } 4793 4794 /// createAArch64ISelDag - This pass converts a legalized DAG into a 4795 /// AArch64-specific DAG, ready for instruction scheduling. 4796 FunctionPass *llvm::createAArch64ISelDag(AArch64TargetMachine &TM, 4797 CodeGenOpt::Level OptLevel) { 4798 return new AArch64DAGToDAGISel(TM, OptLevel); 4799 } 4800 4801 /// When \p PredVT is a scalable vector predicate in the form 4802 /// MVT::nx<M>xi1, it builds the correspondent scalable vector of 4803 /// integers MVT::nx<M>xi<bits> s.t. M x bits = 128. When targeting 4804 /// structured vectors (NumVec >1), the output data type is 4805 /// MVT::nx<M*NumVec>xi<bits> s.t. M x bits = 128. If the input 4806 /// PredVT is not in the form MVT::nx<M>xi1, it returns an invalid 4807 /// EVT. 4808 static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT, 4809 unsigned NumVec) { 4810 assert(NumVec > 0 && NumVec < 5 && "Invalid number of vectors."); 4811 if (!PredVT.isScalableVector() || PredVT.getVectorElementType() != MVT::i1) 4812 return EVT(); 4813 4814 if (PredVT != MVT::nxv16i1 && PredVT != MVT::nxv8i1 && 4815 PredVT != MVT::nxv4i1 && PredVT != MVT::nxv2i1) 4816 return EVT(); 4817 4818 ElementCount EC = PredVT.getVectorElementCount(); 4819 EVT ScalarVT = EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.Min); 4820 EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, EC * NumVec); 4821 4822 return MemVT; 4823 } 4824 4825 /// Return the EVT of the data associated to a memory operation in \p 4826 /// Root. If such EVT cannot be retrived, it returns an invalid EVT. 4827 static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root) { 4828 if (isa<MemSDNode>(Root)) 4829 return cast<MemSDNode>(Root)->getMemoryVT(); 4830 4831 if (isa<MemIntrinsicSDNode>(Root)) 4832 return cast<MemIntrinsicSDNode>(Root)->getMemoryVT(); 4833 4834 const unsigned Opcode = Root->getOpcode(); 4835 // For custom ISD nodes, we have to look at them individually to extract the 4836 // type of the data moved to/from memory. 4837 switch (Opcode) { 4838 case AArch64ISD::LD1_MERGE_ZERO: 4839 case AArch64ISD::LD1S_MERGE_ZERO: 4840 case AArch64ISD::LDNF1_MERGE_ZERO: 4841 case AArch64ISD::LDNF1S_MERGE_ZERO: 4842 return cast<VTSDNode>(Root->getOperand(3))->getVT(); 4843 case AArch64ISD::ST1_PRED: 4844 return cast<VTSDNode>(Root->getOperand(4))->getVT(); 4845 case AArch64ISD::SVE_LD2_MERGE_ZERO: 4846 return getPackedVectorTypeFromPredicateType( 4847 Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/2); 4848 case AArch64ISD::SVE_LD3_MERGE_ZERO: 4849 return getPackedVectorTypeFromPredicateType( 4850 Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/3); 4851 case AArch64ISD::SVE_LD4_MERGE_ZERO: 4852 return getPackedVectorTypeFromPredicateType( 4853 Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/4); 4854 default: 4855 break; 4856 } 4857 4858 if (Opcode != ISD::INTRINSIC_VOID) 4859 return EVT(); 4860 4861 const unsigned IntNo = 4862 cast<ConstantSDNode>(Root->getOperand(1))->getZExtValue(); 4863 if (IntNo != Intrinsic::aarch64_sve_prf) 4864 return EVT(); 4865 4866 // We are using an SVE prefetch intrinsic. Type must be inferred 4867 // from the width of the predicate. 4868 return getPackedVectorTypeFromPredicateType( 4869 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/1); 4870 } 4871 4872 /// SelectAddrModeIndexedSVE - Attempt selection of the addressing mode: 4873 /// Base + OffImm * sizeof(MemVT) for Min >= OffImm <= Max 4874 /// where Root is the memory access using N for its address. 4875 template <int64_t Min, int64_t Max> 4876 bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, 4877 SDValue &Base, 4878 SDValue &OffImm) { 4879 const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root); 4880 4881 if (MemVT == EVT()) 4882 return false; 4883 4884 if (N.getOpcode() != ISD::ADD) 4885 return false; 4886 4887 SDValue VScale = N.getOperand(1); 4888 if (VScale.getOpcode() != ISD::VSCALE) 4889 return false; 4890 4891 TypeSize TS = MemVT.getSizeInBits(); 4892 int64_t MemWidthBytes = static_cast<int64_t>(TS.getKnownMinSize()) / 8; 4893 int64_t MulImm = cast<ConstantSDNode>(VScale.getOperand(0))->getSExtValue(); 4894 4895 if ((MulImm % MemWidthBytes) != 0) 4896 return false; 4897 4898 int64_t Offset = MulImm / MemWidthBytes; 4899 if (Offset < Min || Offset > Max) 4900 return false; 4901 4902 Base = N.getOperand(0); 4903 OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64); 4904 return true; 4905 } 4906 4907 /// Select register plus register addressing mode for SVE, with scaled 4908 /// offset. 4909 bool AArch64DAGToDAGISel::SelectSVERegRegAddrMode(SDValue N, unsigned Scale, 4910 SDValue &Base, 4911 SDValue &Offset) { 4912 if (N.getOpcode() != ISD::ADD) 4913 return false; 4914 4915 // Process an ADD node. 4916 const SDValue LHS = N.getOperand(0); 4917 const SDValue RHS = N.getOperand(1); 4918 4919 // 8 bit data does not come with the SHL node, so it is treated 4920 // separately. 4921 if (Scale == 0) { 4922 Base = LHS; 4923 Offset = RHS; 4924 return true; 4925 } 4926 4927 // Check if the RHS is a shift node with a constant. 4928 if (RHS.getOpcode() != ISD::SHL) 4929 return false; 4930 4931 const SDValue ShiftRHS = RHS.getOperand(1); 4932 if (auto *C = dyn_cast<ConstantSDNode>(ShiftRHS)) 4933 if (C->getZExtValue() == Scale) { 4934 Base = LHS; 4935 Offset = RHS.getOperand(0); 4936 return true; 4937 } 4938 4939 return false; 4940 } 4941