1 //===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines an instruction selector for the AArch64 target. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AArch64MachineFunctionInfo.h" 14 #include "AArch64TargetMachine.h" 15 #include "MCTargetDesc/AArch64AddressingModes.h" 16 #include "llvm/ADT/APSInt.h" 17 #include "llvm/CodeGen/SelectionDAGISel.h" 18 #include "llvm/IR/Function.h" // To access function attributes. 19 #include "llvm/IR/GlobalValue.h" 20 #include "llvm/IR/Intrinsics.h" 21 #include "llvm/IR/IntrinsicsAArch64.h" 22 #include "llvm/Support/Debug.h" 23 #include "llvm/Support/ErrorHandling.h" 24 #include "llvm/Support/KnownBits.h" 25 #include "llvm/Support/MathExtras.h" 26 #include "llvm/Support/raw_ostream.h" 27 28 using namespace llvm; 29 30 #define DEBUG_TYPE "aarch64-isel" 31 32 //===--------------------------------------------------------------------===// 33 /// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine 34 /// instructions for SelectionDAG operations. 35 /// 36 namespace { 37 38 class AArch64DAGToDAGISel : public SelectionDAGISel { 39 40 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can 41 /// make the right decision when generating code for different targets. 42 const AArch64Subtarget *Subtarget; 43 44 public: 45 explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm, 46 CodeGenOpt::Level OptLevel) 47 : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr) {} 48 49 StringRef getPassName() const override { 50 return "AArch64 Instruction Selection"; 51 } 52 53 bool runOnMachineFunction(MachineFunction &MF) override { 54 Subtarget = &MF.getSubtarget<AArch64Subtarget>(); 55 return SelectionDAGISel::runOnMachineFunction(MF); 56 } 57 58 void Select(SDNode *Node) override; 59 60 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 61 /// inline asm expressions. 62 bool SelectInlineAsmMemoryOperand(const SDValue &Op, 63 unsigned ConstraintID, 64 std::vector<SDValue> &OutOps) override; 65 66 template <signed Low, signed High, signed Scale> 67 bool SelectRDVLImm(SDValue N, SDValue &Imm); 68 69 bool tryMLAV64LaneV128(SDNode *N); 70 bool tryMULLV64LaneV128(unsigned IntNo, SDNode *N); 71 bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift); 72 bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift); 73 bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift); 74 bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) { 75 return SelectShiftedRegister(N, false, Reg, Shift); 76 } 77 bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) { 78 return SelectShiftedRegister(N, true, Reg, Shift); 79 } 80 bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) { 81 return SelectAddrModeIndexed7S(N, 1, Base, OffImm); 82 } 83 bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) { 84 return SelectAddrModeIndexed7S(N, 2, Base, OffImm); 85 } 86 bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) { 87 return SelectAddrModeIndexed7S(N, 4, Base, OffImm); 88 } 89 bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) { 90 return SelectAddrModeIndexed7S(N, 8, Base, OffImm); 91 } 92 bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) { 93 return SelectAddrModeIndexed7S(N, 16, Base, OffImm); 94 } 95 bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) { 96 return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm); 97 } 98 bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) { 99 return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm); 100 } 101 bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) { 102 return SelectAddrModeIndexed(N, 1, Base, OffImm); 103 } 104 bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) { 105 return SelectAddrModeIndexed(N, 2, Base, OffImm); 106 } 107 bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) { 108 return SelectAddrModeIndexed(N, 4, Base, OffImm); 109 } 110 bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) { 111 return SelectAddrModeIndexed(N, 8, Base, OffImm); 112 } 113 bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) { 114 return SelectAddrModeIndexed(N, 16, Base, OffImm); 115 } 116 bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) { 117 return SelectAddrModeUnscaled(N, 1, Base, OffImm); 118 } 119 bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) { 120 return SelectAddrModeUnscaled(N, 2, Base, OffImm); 121 } 122 bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) { 123 return SelectAddrModeUnscaled(N, 4, Base, OffImm); 124 } 125 bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) { 126 return SelectAddrModeUnscaled(N, 8, Base, OffImm); 127 } 128 bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) { 129 return SelectAddrModeUnscaled(N, 16, Base, OffImm); 130 } 131 132 template<int Width> 133 bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset, 134 SDValue &SignExtend, SDValue &DoShift) { 135 return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift); 136 } 137 138 template<int Width> 139 bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset, 140 SDValue &SignExtend, SDValue &DoShift) { 141 return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift); 142 } 143 144 bool SelectDupZeroOrUndef(SDValue N) { 145 switch(N->getOpcode()) { 146 case ISD::UNDEF: 147 return true; 148 case AArch64ISD::DUP: 149 case ISD::SPLAT_VECTOR: { 150 auto Opnd0 = N->getOperand(0); 151 if (auto CN = dyn_cast<ConstantSDNode>(Opnd0)) 152 if (CN->isNullValue()) 153 return true; 154 if (auto CN = dyn_cast<ConstantFPSDNode>(Opnd0)) 155 if (CN->isZero()) 156 return true; 157 break; 158 } 159 default: 160 break; 161 } 162 163 return false; 164 } 165 166 bool SelectDupZero(SDValue N) { 167 switch(N->getOpcode()) { 168 case AArch64ISD::DUP: 169 case ISD::SPLAT_VECTOR: { 170 auto Opnd0 = N->getOperand(0); 171 if (auto CN = dyn_cast<ConstantSDNode>(Opnd0)) 172 if (CN->isNullValue()) 173 return true; 174 if (auto CN = dyn_cast<ConstantFPSDNode>(Opnd0)) 175 if (CN->isZero()) 176 return true; 177 break; 178 } 179 } 180 181 return false; 182 } 183 184 template<MVT::SimpleValueType VT> 185 bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) { 186 return SelectSVEAddSubImm(N, VT, Imm, Shift); 187 } 188 189 template<MVT::SimpleValueType VT> 190 bool SelectSVELogicalImm(SDValue N, SDValue &Imm) { 191 return SelectSVELogicalImm(N, VT, Imm); 192 } 193 194 template <MVT::SimpleValueType VT> 195 bool SelectSVEArithImm(SDValue N, SDValue &Imm) { 196 return SelectSVEArithImm(N, VT, Imm); 197 } 198 199 template <unsigned Low, unsigned High, bool AllowSaturation = false> 200 bool SelectSVEShiftImm(SDValue N, SDValue &Imm) { 201 return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm); 202 } 203 204 // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N. 205 template<signed Min, signed Max, signed Scale, bool Shift> 206 bool SelectCntImm(SDValue N, SDValue &Imm) { 207 if (!isa<ConstantSDNode>(N)) 208 return false; 209 210 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue(); 211 if (Shift) 212 MulImm = 1LL << MulImm; 213 214 if ((MulImm % std::abs(Scale)) != 0) 215 return false; 216 217 MulImm /= Scale; 218 if ((MulImm >= Min) && (MulImm <= Max)) { 219 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32); 220 return true; 221 } 222 223 return false; 224 } 225 226 /// Form sequences of consecutive 64/128-bit registers for use in NEON 227 /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have 228 /// between 1 and 4 elements. If it contains a single element that is returned 229 /// unchanged; otherwise a REG_SEQUENCE value is returned. 230 SDValue createDTuple(ArrayRef<SDValue> Vecs); 231 SDValue createQTuple(ArrayRef<SDValue> Vecs); 232 // Form a sequence of SVE registers for instructions using list of vectors, 233 // e.g. structured loads and stores (ldN, stN). 234 SDValue createZTuple(ArrayRef<SDValue> Vecs); 235 236 /// Generic helper for the createDTuple/createQTuple 237 /// functions. Those should almost always be called instead. 238 SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[], 239 const unsigned SubRegs[]); 240 241 void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt); 242 243 bool tryIndexedLoad(SDNode *N); 244 245 bool trySelectStackSlotTagP(SDNode *N); 246 void SelectTagP(SDNode *N); 247 248 void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc, 249 unsigned SubRegIdx); 250 void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc, 251 unsigned SubRegIdx); 252 void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc); 253 void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc); 254 void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale, 255 unsigned Opc_rr, unsigned Opc_ri); 256 257 bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm); 258 /// SVE Reg+Imm addressing mode. 259 template <int64_t Min, int64_t Max> 260 bool SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base, 261 SDValue &OffImm); 262 /// SVE Reg+Reg address mode. 263 template <unsigned Scale> 264 bool SelectSVERegRegAddrMode(SDValue N, SDValue &Base, SDValue &Offset) { 265 return SelectSVERegRegAddrMode(N, Scale, Base, Offset); 266 } 267 268 void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc); 269 void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc); 270 void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc); 271 void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc); 272 void SelectPredicatedStore(SDNode *N, unsigned NumVecs, unsigned Scale, 273 unsigned Opc_rr, unsigned Opc_ri); 274 std::tuple<unsigned, SDValue, SDValue> 275 findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri, 276 const SDValue &OldBase, const SDValue &OldOffset, 277 unsigned Scale); 278 279 bool tryBitfieldExtractOp(SDNode *N); 280 bool tryBitfieldExtractOpFromSExt(SDNode *N); 281 bool tryBitfieldInsertOp(SDNode *N); 282 bool tryBitfieldInsertInZeroOp(SDNode *N); 283 bool tryShiftAmountMod(SDNode *N); 284 bool tryHighFPExt(SDNode *N); 285 286 bool tryReadRegister(SDNode *N); 287 bool tryWriteRegister(SDNode *N); 288 289 // Include the pieces autogenerated from the target description. 290 #include "AArch64GenDAGISel.inc" 291 292 private: 293 bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg, 294 SDValue &Shift); 295 bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base, 296 SDValue &OffImm) { 297 return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm); 298 } 299 bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW, 300 unsigned Size, SDValue &Base, 301 SDValue &OffImm); 302 bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base, 303 SDValue &OffImm); 304 bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base, 305 SDValue &OffImm); 306 bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base, 307 SDValue &Offset, SDValue &SignExtend, 308 SDValue &DoShift); 309 bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base, 310 SDValue &Offset, SDValue &SignExtend, 311 SDValue &DoShift); 312 bool isWorthFolding(SDValue V) const; 313 bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend, 314 SDValue &Offset, SDValue &SignExtend); 315 316 template<unsigned RegWidth> 317 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) { 318 return SelectCVTFixedPosOperand(N, FixedPos, RegWidth); 319 } 320 321 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width); 322 323 bool SelectCMP_SWAP(SDNode *N); 324 325 bool SelectSVE8BitLslImm(SDValue N, SDValue &Imm, SDValue &Shift); 326 327 bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift); 328 329 bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm); 330 331 bool SelectSVESignedArithImm(SDValue N, SDValue &Imm); 332 bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High, 333 bool AllowSaturation, SDValue &Imm); 334 335 bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm); 336 bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base, 337 SDValue &Offset); 338 }; 339 } // end anonymous namespace 340 341 /// isIntImmediate - This method tests to see if the node is a constant 342 /// operand. If so Imm will receive the 32-bit value. 343 static bool isIntImmediate(const SDNode *N, uint64_t &Imm) { 344 if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) { 345 Imm = C->getZExtValue(); 346 return true; 347 } 348 return false; 349 } 350 351 // isIntImmediate - This method tests to see if a constant operand. 352 // If so Imm will receive the value. 353 static bool isIntImmediate(SDValue N, uint64_t &Imm) { 354 return isIntImmediate(N.getNode(), Imm); 355 } 356 357 // isOpcWithIntImmediate - This method tests to see if the node is a specific 358 // opcode and that it has a immediate integer right operand. 359 // If so Imm will receive the 32 bit value. 360 static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, 361 uint64_t &Imm) { 362 return N->getOpcode() == Opc && 363 isIntImmediate(N->getOperand(1).getNode(), Imm); 364 } 365 366 bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand( 367 const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) { 368 switch(ConstraintID) { 369 default: 370 llvm_unreachable("Unexpected asm memory constraint"); 371 case InlineAsm::Constraint_m: 372 case InlineAsm::Constraint_Q: 373 // We need to make sure that this one operand does not end up in XZR, thus 374 // require the address to be in a PointerRegClass register. 375 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo(); 376 const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF); 377 SDLoc dl(Op); 378 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64); 379 SDValue NewOp = 380 SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, 381 dl, Op.getValueType(), 382 Op, RC), 0); 383 OutOps.push_back(NewOp); 384 return false; 385 } 386 return true; 387 } 388 389 /// SelectArithImmed - Select an immediate value that can be represented as 390 /// a 12-bit value shifted left by either 0 or 12. If so, return true with 391 /// Val set to the 12-bit value and Shift set to the shifter operand. 392 bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val, 393 SDValue &Shift) { 394 // This function is called from the addsub_shifted_imm ComplexPattern, 395 // which lists [imm] as the list of opcode it's interested in, however 396 // we still need to check whether the operand is actually an immediate 397 // here because the ComplexPattern opcode list is only used in 398 // root-level opcode matching. 399 if (!isa<ConstantSDNode>(N.getNode())) 400 return false; 401 402 uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue(); 403 unsigned ShiftAmt; 404 405 if (Immed >> 12 == 0) { 406 ShiftAmt = 0; 407 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) { 408 ShiftAmt = 12; 409 Immed = Immed >> 12; 410 } else 411 return false; 412 413 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt); 414 SDLoc dl(N); 415 Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32); 416 Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32); 417 return true; 418 } 419 420 /// SelectNegArithImmed - As above, but negates the value before trying to 421 /// select it. 422 bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val, 423 SDValue &Shift) { 424 // This function is called from the addsub_shifted_imm ComplexPattern, 425 // which lists [imm] as the list of opcode it's interested in, however 426 // we still need to check whether the operand is actually an immediate 427 // here because the ComplexPattern opcode list is only used in 428 // root-level opcode matching. 429 if (!isa<ConstantSDNode>(N.getNode())) 430 return false; 431 432 // The immediate operand must be a 24-bit zero-extended immediate. 433 uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue(); 434 435 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0" 436 // have the opposite effect on the C flag, so this pattern mustn't match under 437 // those circumstances. 438 if (Immed == 0) 439 return false; 440 441 if (N.getValueType() == MVT::i32) 442 Immed = ~((uint32_t)Immed) + 1; 443 else 444 Immed = ~Immed + 1ULL; 445 if (Immed & 0xFFFFFFFFFF000000ULL) 446 return false; 447 448 Immed &= 0xFFFFFFULL; 449 return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val, 450 Shift); 451 } 452 453 /// getShiftTypeForNode - Translate a shift node to the corresponding 454 /// ShiftType value. 455 static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) { 456 switch (N.getOpcode()) { 457 default: 458 return AArch64_AM::InvalidShiftExtend; 459 case ISD::SHL: 460 return AArch64_AM::LSL; 461 case ISD::SRL: 462 return AArch64_AM::LSR; 463 case ISD::SRA: 464 return AArch64_AM::ASR; 465 case ISD::ROTR: 466 return AArch64_AM::ROR; 467 } 468 } 469 470 /// Determine whether it is worth it to fold SHL into the addressing 471 /// mode. 472 static bool isWorthFoldingSHL(SDValue V) { 473 assert(V.getOpcode() == ISD::SHL && "invalid opcode"); 474 // It is worth folding logical shift of up to three places. 475 auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1)); 476 if (!CSD) 477 return false; 478 unsigned ShiftVal = CSD->getZExtValue(); 479 if (ShiftVal > 3) 480 return false; 481 482 // Check if this particular node is reused in any non-memory related 483 // operation. If yes, do not try to fold this node into the address 484 // computation, since the computation will be kept. 485 const SDNode *Node = V.getNode(); 486 for (SDNode *UI : Node->uses()) 487 if (!isa<MemSDNode>(*UI)) 488 for (SDNode *UII : UI->uses()) 489 if (!isa<MemSDNode>(*UII)) 490 return false; 491 return true; 492 } 493 494 /// Determine whether it is worth to fold V into an extended register. 495 bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const { 496 // Trivial if we are optimizing for code size or if there is only 497 // one use of the value. 498 if (CurDAG->shouldOptForSize() || V.hasOneUse()) 499 return true; 500 // If a subtarget has a fastpath LSL we can fold a logical shift into 501 // the addressing mode and save a cycle. 502 if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::SHL && 503 isWorthFoldingSHL(V)) 504 return true; 505 if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::ADD) { 506 const SDValue LHS = V.getOperand(0); 507 const SDValue RHS = V.getOperand(1); 508 if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS)) 509 return true; 510 if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS)) 511 return true; 512 } 513 514 // It hurts otherwise, since the value will be reused. 515 return false; 516 } 517 518 /// SelectShiftedRegister - Select a "shifted register" operand. If the value 519 /// is not shifted, set the Shift operand to default of "LSL 0". The logical 520 /// instructions allow the shifted register to be rotated, but the arithmetic 521 /// instructions do not. The AllowROR parameter specifies whether ROR is 522 /// supported. 523 bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR, 524 SDValue &Reg, SDValue &Shift) { 525 AArch64_AM::ShiftExtendType ShType = getShiftTypeForNode(N); 526 if (ShType == AArch64_AM::InvalidShiftExtend) 527 return false; 528 if (!AllowROR && ShType == AArch64_AM::ROR) 529 return false; 530 531 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 532 unsigned BitSize = N.getValueSizeInBits(); 533 unsigned Val = RHS->getZExtValue() & (BitSize - 1); 534 unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val); 535 536 Reg = N.getOperand(0); 537 Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32); 538 return isWorthFolding(N); 539 } 540 541 return false; 542 } 543 544 /// getExtendTypeForNode - Translate an extend node to the corresponding 545 /// ExtendType value. 546 static AArch64_AM::ShiftExtendType 547 getExtendTypeForNode(SDValue N, bool IsLoadStore = false) { 548 if (N.getOpcode() == ISD::SIGN_EXTEND || 549 N.getOpcode() == ISD::SIGN_EXTEND_INREG) { 550 EVT SrcVT; 551 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG) 552 SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT(); 553 else 554 SrcVT = N.getOperand(0).getValueType(); 555 556 if (!IsLoadStore && SrcVT == MVT::i8) 557 return AArch64_AM::SXTB; 558 else if (!IsLoadStore && SrcVT == MVT::i16) 559 return AArch64_AM::SXTH; 560 else if (SrcVT == MVT::i32) 561 return AArch64_AM::SXTW; 562 assert(SrcVT != MVT::i64 && "extend from 64-bits?"); 563 564 return AArch64_AM::InvalidShiftExtend; 565 } else if (N.getOpcode() == ISD::ZERO_EXTEND || 566 N.getOpcode() == ISD::ANY_EXTEND) { 567 EVT SrcVT = N.getOperand(0).getValueType(); 568 if (!IsLoadStore && SrcVT == MVT::i8) 569 return AArch64_AM::UXTB; 570 else if (!IsLoadStore && SrcVT == MVT::i16) 571 return AArch64_AM::UXTH; 572 else if (SrcVT == MVT::i32) 573 return AArch64_AM::UXTW; 574 assert(SrcVT != MVT::i64 && "extend from 64-bits?"); 575 576 return AArch64_AM::InvalidShiftExtend; 577 } else if (N.getOpcode() == ISD::AND) { 578 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); 579 if (!CSD) 580 return AArch64_AM::InvalidShiftExtend; 581 uint64_t AndMask = CSD->getZExtValue(); 582 583 switch (AndMask) { 584 default: 585 return AArch64_AM::InvalidShiftExtend; 586 case 0xFF: 587 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend; 588 case 0xFFFF: 589 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend; 590 case 0xFFFFFFFF: 591 return AArch64_AM::UXTW; 592 } 593 } 594 595 return AArch64_AM::InvalidShiftExtend; 596 } 597 598 // Helper for SelectMLAV64LaneV128 - Recognize high lane extracts. 599 static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx) { 600 if (DL->getOpcode() != AArch64ISD::DUPLANE16 && 601 DL->getOpcode() != AArch64ISD::DUPLANE32) 602 return false; 603 604 SDValue SV = DL->getOperand(0); 605 if (SV.getOpcode() != ISD::INSERT_SUBVECTOR) 606 return false; 607 608 SDValue EV = SV.getOperand(1); 609 if (EV.getOpcode() != ISD::EXTRACT_SUBVECTOR) 610 return false; 611 612 ConstantSDNode *DLidx = cast<ConstantSDNode>(DL->getOperand(1).getNode()); 613 ConstantSDNode *EVidx = cast<ConstantSDNode>(EV.getOperand(1).getNode()); 614 LaneIdx = DLidx->getSExtValue() + EVidx->getSExtValue(); 615 LaneOp = EV.getOperand(0); 616 617 return true; 618 } 619 620 // Helper for SelectOpcV64LaneV128 - Recognize operations where one operand is a 621 // high lane extract. 622 static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp, 623 SDValue &LaneOp, int &LaneIdx) { 624 625 if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) { 626 std::swap(Op0, Op1); 627 if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) 628 return false; 629 } 630 StdOp = Op1; 631 return true; 632 } 633 634 /// SelectMLAV64LaneV128 - AArch64 supports vector MLAs where one multiplicand 635 /// is a lane in the upper half of a 128-bit vector. Recognize and select this 636 /// so that we don't emit unnecessary lane extracts. 637 bool AArch64DAGToDAGISel::tryMLAV64LaneV128(SDNode *N) { 638 SDLoc dl(N); 639 SDValue Op0 = N->getOperand(0); 640 SDValue Op1 = N->getOperand(1); 641 SDValue MLAOp1; // Will hold ordinary multiplicand for MLA. 642 SDValue MLAOp2; // Will hold lane-accessed multiplicand for MLA. 643 int LaneIdx = -1; // Will hold the lane index. 644 645 if (Op1.getOpcode() != ISD::MUL || 646 !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2, 647 LaneIdx)) { 648 std::swap(Op0, Op1); 649 if (Op1.getOpcode() != ISD::MUL || 650 !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2, 651 LaneIdx)) 652 return false; 653 } 654 655 SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64); 656 657 SDValue Ops[] = { Op0, MLAOp1, MLAOp2, LaneIdxVal }; 658 659 unsigned MLAOpc = ~0U; 660 661 switch (N->getSimpleValueType(0).SimpleTy) { 662 default: 663 llvm_unreachable("Unrecognized MLA."); 664 case MVT::v4i16: 665 MLAOpc = AArch64::MLAv4i16_indexed; 666 break; 667 case MVT::v8i16: 668 MLAOpc = AArch64::MLAv8i16_indexed; 669 break; 670 case MVT::v2i32: 671 MLAOpc = AArch64::MLAv2i32_indexed; 672 break; 673 case MVT::v4i32: 674 MLAOpc = AArch64::MLAv4i32_indexed; 675 break; 676 } 677 678 ReplaceNode(N, CurDAG->getMachineNode(MLAOpc, dl, N->getValueType(0), Ops)); 679 return true; 680 } 681 682 bool AArch64DAGToDAGISel::tryMULLV64LaneV128(unsigned IntNo, SDNode *N) { 683 SDLoc dl(N); 684 SDValue SMULLOp0; 685 SDValue SMULLOp1; 686 int LaneIdx; 687 688 if (!checkV64LaneV128(N->getOperand(1), N->getOperand(2), SMULLOp0, SMULLOp1, 689 LaneIdx)) 690 return false; 691 692 SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64); 693 694 SDValue Ops[] = { SMULLOp0, SMULLOp1, LaneIdxVal }; 695 696 unsigned SMULLOpc = ~0U; 697 698 if (IntNo == Intrinsic::aarch64_neon_smull) { 699 switch (N->getSimpleValueType(0).SimpleTy) { 700 default: 701 llvm_unreachable("Unrecognized SMULL."); 702 case MVT::v4i32: 703 SMULLOpc = AArch64::SMULLv4i16_indexed; 704 break; 705 case MVT::v2i64: 706 SMULLOpc = AArch64::SMULLv2i32_indexed; 707 break; 708 } 709 } else if (IntNo == Intrinsic::aarch64_neon_umull) { 710 switch (N->getSimpleValueType(0).SimpleTy) { 711 default: 712 llvm_unreachable("Unrecognized SMULL."); 713 case MVT::v4i32: 714 SMULLOpc = AArch64::UMULLv4i16_indexed; 715 break; 716 case MVT::v2i64: 717 SMULLOpc = AArch64::UMULLv2i32_indexed; 718 break; 719 } 720 } else 721 llvm_unreachable("Unrecognized intrinsic."); 722 723 ReplaceNode(N, CurDAG->getMachineNode(SMULLOpc, dl, N->getValueType(0), Ops)); 724 return true; 725 } 726 727 /// Instructions that accept extend modifiers like UXTW expect the register 728 /// being extended to be a GPR32, but the incoming DAG might be acting on a 729 /// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if 730 /// this is the case. 731 static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N) { 732 if (N.getValueType() == MVT::i32) 733 return N; 734 735 SDLoc dl(N); 736 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32); 737 MachineSDNode *Node = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 738 dl, MVT::i32, N, SubReg); 739 return SDValue(Node, 0); 740 } 741 742 // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N. 743 template<signed Low, signed High, signed Scale> 744 bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) { 745 if (!isa<ConstantSDNode>(N)) 746 return false; 747 748 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue(); 749 if ((MulImm % std::abs(Scale)) == 0) { 750 int64_t RDVLImm = MulImm / Scale; 751 if ((RDVLImm >= Low) && (RDVLImm <= High)) { 752 Imm = CurDAG->getTargetConstant(RDVLImm, SDLoc(N), MVT::i32); 753 return true; 754 } 755 } 756 757 return false; 758 } 759 760 /// SelectArithExtendedRegister - Select a "extended register" operand. This 761 /// operand folds in an extend followed by an optional left shift. 762 bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg, 763 SDValue &Shift) { 764 unsigned ShiftVal = 0; 765 AArch64_AM::ShiftExtendType Ext; 766 767 if (N.getOpcode() == ISD::SHL) { 768 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); 769 if (!CSD) 770 return false; 771 ShiftVal = CSD->getZExtValue(); 772 if (ShiftVal > 4) 773 return false; 774 775 Ext = getExtendTypeForNode(N.getOperand(0)); 776 if (Ext == AArch64_AM::InvalidShiftExtend) 777 return false; 778 779 Reg = N.getOperand(0).getOperand(0); 780 } else { 781 Ext = getExtendTypeForNode(N); 782 if (Ext == AArch64_AM::InvalidShiftExtend) 783 return false; 784 785 Reg = N.getOperand(0); 786 787 // Don't match if free 32-bit -> 64-bit zext can be used instead. 788 if (Ext == AArch64_AM::UXTW && 789 Reg->getValueType(0).getSizeInBits() == 32 && isDef32(*Reg.getNode())) 790 return false; 791 } 792 793 // AArch64 mandates that the RHS of the operation must use the smallest 794 // register class that could contain the size being extended from. Thus, 795 // if we're folding a (sext i8), we need the RHS to be a GPR32, even though 796 // there might not be an actual 32-bit value in the program. We can 797 // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here. 798 assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX); 799 Reg = narrowIfNeeded(CurDAG, Reg); 800 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N), 801 MVT::i32); 802 return isWorthFolding(N); 803 } 804 805 /// If there's a use of this ADDlow that's not itself a load/store then we'll 806 /// need to create a real ADD instruction from it anyway and there's no point in 807 /// folding it into the mem op. Theoretically, it shouldn't matter, but there's 808 /// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding 809 /// leads to duplicated ADRP instructions. 810 static bool isWorthFoldingADDlow(SDValue N) { 811 for (auto Use : N->uses()) { 812 if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE && 813 Use->getOpcode() != ISD::ATOMIC_LOAD && 814 Use->getOpcode() != ISD::ATOMIC_STORE) 815 return false; 816 817 // ldar and stlr have much more restrictive addressing modes (just a 818 // register). 819 if (isStrongerThanMonotonic(cast<MemSDNode>(Use)->getOrdering())) 820 return false; 821 } 822 823 return true; 824 } 825 826 /// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit 827 /// immediate" address. The "Size" argument is the size in bytes of the memory 828 /// reference, which determines the scale. 829 bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, 830 unsigned BW, unsigned Size, 831 SDValue &Base, 832 SDValue &OffImm) { 833 SDLoc dl(N); 834 const DataLayout &DL = CurDAG->getDataLayout(); 835 const TargetLowering *TLI = getTargetLowering(); 836 if (N.getOpcode() == ISD::FrameIndex) { 837 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 838 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 839 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); 840 return true; 841 } 842 843 // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed 844 // selected here doesn't support labels/immediates, only base+offset. 845 if (CurDAG->isBaseWithConstantOffset(N)) { 846 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 847 if (IsSignedImm) { 848 int64_t RHSC = RHS->getSExtValue(); 849 unsigned Scale = Log2_32(Size); 850 int64_t Range = 0x1LL << (BW - 1); 851 852 if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) && 853 RHSC < (Range << Scale)) { 854 Base = N.getOperand(0); 855 if (Base.getOpcode() == ISD::FrameIndex) { 856 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 857 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 858 } 859 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64); 860 return true; 861 } 862 } else { 863 // unsigned Immediate 864 uint64_t RHSC = RHS->getZExtValue(); 865 unsigned Scale = Log2_32(Size); 866 uint64_t Range = 0x1ULL << BW; 867 868 if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) { 869 Base = N.getOperand(0); 870 if (Base.getOpcode() == ISD::FrameIndex) { 871 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 872 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 873 } 874 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64); 875 return true; 876 } 877 } 878 } 879 } 880 // Base only. The address will be materialized into a register before 881 // the memory is accessed. 882 // add x0, Xbase, #offset 883 // stp x1, x2, [x0] 884 Base = N; 885 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); 886 return true; 887 } 888 889 /// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit 890 /// immediate" address. The "Size" argument is the size in bytes of the memory 891 /// reference, which determines the scale. 892 bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size, 893 SDValue &Base, SDValue &OffImm) { 894 SDLoc dl(N); 895 const DataLayout &DL = CurDAG->getDataLayout(); 896 const TargetLowering *TLI = getTargetLowering(); 897 if (N.getOpcode() == ISD::FrameIndex) { 898 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 899 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 900 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); 901 return true; 902 } 903 904 if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) { 905 GlobalAddressSDNode *GAN = 906 dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode()); 907 Base = N.getOperand(0); 908 OffImm = N.getOperand(1); 909 if (!GAN) 910 return true; 911 912 if (GAN->getOffset() % Size == 0 && 913 GAN->getGlobal()->getPointerAlignment(DL) >= Size) 914 return true; 915 } 916 917 if (CurDAG->isBaseWithConstantOffset(N)) { 918 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 919 int64_t RHSC = (int64_t)RHS->getZExtValue(); 920 unsigned Scale = Log2_32(Size); 921 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) { 922 Base = N.getOperand(0); 923 if (Base.getOpcode() == ISD::FrameIndex) { 924 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 925 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 926 } 927 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64); 928 return true; 929 } 930 } 931 } 932 933 // Before falling back to our general case, check if the unscaled 934 // instructions can handle this. If so, that's preferable. 935 if (SelectAddrModeUnscaled(N, Size, Base, OffImm)) 936 return false; 937 938 // Base only. The address will be materialized into a register before 939 // the memory is accessed. 940 // add x0, Xbase, #offset 941 // ldr x0, [x0] 942 Base = N; 943 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); 944 return true; 945 } 946 947 /// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit 948 /// immediate" address. This should only match when there is an offset that 949 /// is not valid for a scaled immediate addressing mode. The "Size" argument 950 /// is the size in bytes of the memory reference, which is needed here to know 951 /// what is valid for a scaled immediate. 952 bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size, 953 SDValue &Base, 954 SDValue &OffImm) { 955 if (!CurDAG->isBaseWithConstantOffset(N)) 956 return false; 957 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 958 int64_t RHSC = RHS->getSExtValue(); 959 // If the offset is valid as a scaled immediate, don't match here. 960 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && 961 RHSC < (0x1000 << Log2_32(Size))) 962 return false; 963 if (RHSC >= -256 && RHSC < 256) { 964 Base = N.getOperand(0); 965 if (Base.getOpcode() == ISD::FrameIndex) { 966 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 967 const TargetLowering *TLI = getTargetLowering(); 968 Base = CurDAG->getTargetFrameIndex( 969 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 970 } 971 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64); 972 return true; 973 } 974 } 975 return false; 976 } 977 978 static SDValue Widen(SelectionDAG *CurDAG, SDValue N) { 979 SDLoc dl(N); 980 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32); 981 SDValue ImpDef = SDValue( 982 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0); 983 MachineSDNode *Node = CurDAG->getMachineNode( 984 TargetOpcode::INSERT_SUBREG, dl, MVT::i64, ImpDef, N, SubReg); 985 return SDValue(Node, 0); 986 } 987 988 /// Check if the given SHL node (\p N), can be used to form an 989 /// extended register for an addressing mode. 990 bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size, 991 bool WantExtend, SDValue &Offset, 992 SDValue &SignExtend) { 993 assert(N.getOpcode() == ISD::SHL && "Invalid opcode."); 994 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); 995 if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue()) 996 return false; 997 998 SDLoc dl(N); 999 if (WantExtend) { 1000 AArch64_AM::ShiftExtendType Ext = 1001 getExtendTypeForNode(N.getOperand(0), true); 1002 if (Ext == AArch64_AM::InvalidShiftExtend) 1003 return false; 1004 1005 Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0)); 1006 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl, 1007 MVT::i32); 1008 } else { 1009 Offset = N.getOperand(0); 1010 SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32); 1011 } 1012 1013 unsigned LegalShiftVal = Log2_32(Size); 1014 unsigned ShiftVal = CSD->getZExtValue(); 1015 1016 if (ShiftVal != 0 && ShiftVal != LegalShiftVal) 1017 return false; 1018 1019 return isWorthFolding(N); 1020 } 1021 1022 bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size, 1023 SDValue &Base, SDValue &Offset, 1024 SDValue &SignExtend, 1025 SDValue &DoShift) { 1026 if (N.getOpcode() != ISD::ADD) 1027 return false; 1028 SDValue LHS = N.getOperand(0); 1029 SDValue RHS = N.getOperand(1); 1030 SDLoc dl(N); 1031 1032 // We don't want to match immediate adds here, because they are better lowered 1033 // to the register-immediate addressing modes. 1034 if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS)) 1035 return false; 1036 1037 // Check if this particular node is reused in any non-memory related 1038 // operation. If yes, do not try to fold this node into the address 1039 // computation, since the computation will be kept. 1040 const SDNode *Node = N.getNode(); 1041 for (SDNode *UI : Node->uses()) { 1042 if (!isa<MemSDNode>(*UI)) 1043 return false; 1044 } 1045 1046 // Remember if it is worth folding N when it produces extended register. 1047 bool IsExtendedRegisterWorthFolding = isWorthFolding(N); 1048 1049 // Try to match a shifted extend on the RHS. 1050 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL && 1051 SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) { 1052 Base = LHS; 1053 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32); 1054 return true; 1055 } 1056 1057 // Try to match a shifted extend on the LHS. 1058 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL && 1059 SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) { 1060 Base = RHS; 1061 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32); 1062 return true; 1063 } 1064 1065 // There was no shift, whatever else we find. 1066 DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32); 1067 1068 AArch64_AM::ShiftExtendType Ext = AArch64_AM::InvalidShiftExtend; 1069 // Try to match an unshifted extend on the LHS. 1070 if (IsExtendedRegisterWorthFolding && 1071 (Ext = getExtendTypeForNode(LHS, true)) != 1072 AArch64_AM::InvalidShiftExtend) { 1073 Base = RHS; 1074 Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0)); 1075 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl, 1076 MVT::i32); 1077 if (isWorthFolding(LHS)) 1078 return true; 1079 } 1080 1081 // Try to match an unshifted extend on the RHS. 1082 if (IsExtendedRegisterWorthFolding && 1083 (Ext = getExtendTypeForNode(RHS, true)) != 1084 AArch64_AM::InvalidShiftExtend) { 1085 Base = LHS; 1086 Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0)); 1087 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl, 1088 MVT::i32); 1089 if (isWorthFolding(RHS)) 1090 return true; 1091 } 1092 1093 return false; 1094 } 1095 1096 // Check if the given immediate is preferred by ADD. If an immediate can be 1097 // encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be 1098 // encoded by one MOVZ, return true. 1099 static bool isPreferredADD(int64_t ImmOff) { 1100 // Constant in [0x0, 0xfff] can be encoded in ADD. 1101 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL) 1102 return true; 1103 // Check if it can be encoded in an "ADD LSL #12". 1104 if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL) 1105 // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant. 1106 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL && 1107 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL; 1108 return false; 1109 } 1110 1111 bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size, 1112 SDValue &Base, SDValue &Offset, 1113 SDValue &SignExtend, 1114 SDValue &DoShift) { 1115 if (N.getOpcode() != ISD::ADD) 1116 return false; 1117 SDValue LHS = N.getOperand(0); 1118 SDValue RHS = N.getOperand(1); 1119 SDLoc DL(N); 1120 1121 // Check if this particular node is reused in any non-memory related 1122 // operation. If yes, do not try to fold this node into the address 1123 // computation, since the computation will be kept. 1124 const SDNode *Node = N.getNode(); 1125 for (SDNode *UI : Node->uses()) { 1126 if (!isa<MemSDNode>(*UI)) 1127 return false; 1128 } 1129 1130 // Watch out if RHS is a wide immediate, it can not be selected into 1131 // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into 1132 // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate 1133 // instructions like: 1134 // MOV X0, WideImmediate 1135 // ADD X1, BaseReg, X0 1136 // LDR X2, [X1, 0] 1137 // For such situation, using [BaseReg, XReg] addressing mode can save one 1138 // ADD/SUB: 1139 // MOV X0, WideImmediate 1140 // LDR X2, [BaseReg, X0] 1141 if (isa<ConstantSDNode>(RHS)) { 1142 int64_t ImmOff = (int64_t)cast<ConstantSDNode>(RHS)->getZExtValue(); 1143 unsigned Scale = Log2_32(Size); 1144 // Skip the immediate can be selected by load/store addressing mode. 1145 // Also skip the immediate can be encoded by a single ADD (SUB is also 1146 // checked by using -ImmOff). 1147 if ((ImmOff % Size == 0 && ImmOff >= 0 && ImmOff < (0x1000 << Scale)) || 1148 isPreferredADD(ImmOff) || isPreferredADD(-ImmOff)) 1149 return false; 1150 1151 SDValue Ops[] = { RHS }; 1152 SDNode *MOVI = 1153 CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops); 1154 SDValue MOVIV = SDValue(MOVI, 0); 1155 // This ADD of two X register will be selected into [Reg+Reg] mode. 1156 N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV); 1157 } 1158 1159 // Remember if it is worth folding N when it produces extended register. 1160 bool IsExtendedRegisterWorthFolding = isWorthFolding(N); 1161 1162 // Try to match a shifted extend on the RHS. 1163 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL && 1164 SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) { 1165 Base = LHS; 1166 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32); 1167 return true; 1168 } 1169 1170 // Try to match a shifted extend on the LHS. 1171 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL && 1172 SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) { 1173 Base = RHS; 1174 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32); 1175 return true; 1176 } 1177 1178 // Match any non-shifted, non-extend, non-immediate add expression. 1179 Base = LHS; 1180 Offset = RHS; 1181 SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32); 1182 DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32); 1183 // Reg1 + Reg2 is free: no check needed. 1184 return true; 1185 } 1186 1187 SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) { 1188 static const unsigned RegClassIDs[] = { 1189 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID}; 1190 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1, 1191 AArch64::dsub2, AArch64::dsub3}; 1192 1193 return createTuple(Regs, RegClassIDs, SubRegs); 1194 } 1195 1196 SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) { 1197 static const unsigned RegClassIDs[] = { 1198 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID}; 1199 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1, 1200 AArch64::qsub2, AArch64::qsub3}; 1201 1202 return createTuple(Regs, RegClassIDs, SubRegs); 1203 } 1204 1205 SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef<SDValue> Regs) { 1206 static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID, 1207 AArch64::ZPR3RegClassID, 1208 AArch64::ZPR4RegClassID}; 1209 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1, 1210 AArch64::zsub2, AArch64::zsub3}; 1211 1212 return createTuple(Regs, RegClassIDs, SubRegs); 1213 } 1214 1215 SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs, 1216 const unsigned RegClassIDs[], 1217 const unsigned SubRegs[]) { 1218 // There's no special register-class for a vector-list of 1 element: it's just 1219 // a vector. 1220 if (Regs.size() == 1) 1221 return Regs[0]; 1222 1223 assert(Regs.size() >= 2 && Regs.size() <= 4); 1224 1225 SDLoc DL(Regs[0]); 1226 1227 SmallVector<SDValue, 4> Ops; 1228 1229 // First operand of REG_SEQUENCE is the desired RegClass. 1230 Ops.push_back( 1231 CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32)); 1232 1233 // Then we get pairs of source & subregister-position for the components. 1234 for (unsigned i = 0; i < Regs.size(); ++i) { 1235 Ops.push_back(Regs[i]); 1236 Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32)); 1237 } 1238 1239 SDNode *N = 1240 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops); 1241 return SDValue(N, 0); 1242 } 1243 1244 void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, 1245 bool isExt) { 1246 SDLoc dl(N); 1247 EVT VT = N->getValueType(0); 1248 1249 unsigned ExtOff = isExt; 1250 1251 // Form a REG_SEQUENCE to force register allocation. 1252 unsigned Vec0Off = ExtOff + 1; 1253 SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off, 1254 N->op_begin() + Vec0Off + NumVecs); 1255 SDValue RegSeq = createQTuple(Regs); 1256 1257 SmallVector<SDValue, 6> Ops; 1258 if (isExt) 1259 Ops.push_back(N->getOperand(1)); 1260 Ops.push_back(RegSeq); 1261 Ops.push_back(N->getOperand(NumVecs + ExtOff + 1)); 1262 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops)); 1263 } 1264 1265 bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) { 1266 LoadSDNode *LD = cast<LoadSDNode>(N); 1267 if (LD->isUnindexed()) 1268 return false; 1269 EVT VT = LD->getMemoryVT(); 1270 EVT DstVT = N->getValueType(0); 1271 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1272 bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC; 1273 1274 // We're not doing validity checking here. That was done when checking 1275 // if we should mark the load as indexed or not. We're just selecting 1276 // the right instruction. 1277 unsigned Opcode = 0; 1278 1279 ISD::LoadExtType ExtType = LD->getExtensionType(); 1280 bool InsertTo64 = false; 1281 if (VT == MVT::i64) 1282 Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost; 1283 else if (VT == MVT::i32) { 1284 if (ExtType == ISD::NON_EXTLOAD) 1285 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost; 1286 else if (ExtType == ISD::SEXTLOAD) 1287 Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost; 1288 else { 1289 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost; 1290 InsertTo64 = true; 1291 // The result of the load is only i32. It's the subreg_to_reg that makes 1292 // it into an i64. 1293 DstVT = MVT::i32; 1294 } 1295 } else if (VT == MVT::i16) { 1296 if (ExtType == ISD::SEXTLOAD) { 1297 if (DstVT == MVT::i64) 1298 Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost; 1299 else 1300 Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost; 1301 } else { 1302 Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost; 1303 InsertTo64 = DstVT == MVT::i64; 1304 // The result of the load is only i32. It's the subreg_to_reg that makes 1305 // it into an i64. 1306 DstVT = MVT::i32; 1307 } 1308 } else if (VT == MVT::i8) { 1309 if (ExtType == ISD::SEXTLOAD) { 1310 if (DstVT == MVT::i64) 1311 Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost; 1312 else 1313 Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost; 1314 } else { 1315 Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost; 1316 InsertTo64 = DstVT == MVT::i64; 1317 // The result of the load is only i32. It's the subreg_to_reg that makes 1318 // it into an i64. 1319 DstVT = MVT::i32; 1320 } 1321 } else if (VT == MVT::f16) { 1322 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost; 1323 } else if (VT == MVT::bf16) { 1324 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost; 1325 } else if (VT == MVT::f32) { 1326 Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost; 1327 } else if (VT == MVT::f64 || VT.is64BitVector()) { 1328 Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost; 1329 } else if (VT.is128BitVector()) { 1330 Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost; 1331 } else 1332 return false; 1333 SDValue Chain = LD->getChain(); 1334 SDValue Base = LD->getBasePtr(); 1335 ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset()); 1336 int OffsetVal = (int)OffsetOp->getZExtValue(); 1337 SDLoc dl(N); 1338 SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64); 1339 SDValue Ops[] = { Base, Offset, Chain }; 1340 SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT, 1341 MVT::Other, Ops); 1342 // Either way, we're replacing the node, so tell the caller that. 1343 SDValue LoadedVal = SDValue(Res, 1); 1344 if (InsertTo64) { 1345 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32); 1346 LoadedVal = 1347 SDValue(CurDAG->getMachineNode( 1348 AArch64::SUBREG_TO_REG, dl, MVT::i64, 1349 CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal, 1350 SubReg), 1351 0); 1352 } 1353 1354 ReplaceUses(SDValue(N, 0), LoadedVal); 1355 ReplaceUses(SDValue(N, 1), SDValue(Res, 0)); 1356 ReplaceUses(SDValue(N, 2), SDValue(Res, 2)); 1357 CurDAG->RemoveDeadNode(N); 1358 return true; 1359 } 1360 1361 void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc, 1362 unsigned SubRegIdx) { 1363 SDLoc dl(N); 1364 EVT VT = N->getValueType(0); 1365 SDValue Chain = N->getOperand(0); 1366 1367 SDValue Ops[] = {N->getOperand(2), // Mem operand; 1368 Chain}; 1369 1370 const EVT ResTys[] = {MVT::Untyped, MVT::Other}; 1371 1372 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1373 SDValue SuperReg = SDValue(Ld, 0); 1374 for (unsigned i = 0; i < NumVecs; ++i) 1375 ReplaceUses(SDValue(N, i), 1376 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg)); 1377 1378 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1)); 1379 1380 // Transfer memoperands. In the case of AArch64::LD64B, there won't be one, 1381 // because it's too simple to have needed special treatment during lowering. 1382 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(N)) { 1383 MachineMemOperand *MemOp = MemIntr->getMemOperand(); 1384 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp}); 1385 } 1386 1387 CurDAG->RemoveDeadNode(N); 1388 } 1389 1390 void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs, 1391 unsigned Opc, unsigned SubRegIdx) { 1392 SDLoc dl(N); 1393 EVT VT = N->getValueType(0); 1394 SDValue Chain = N->getOperand(0); 1395 1396 SDValue Ops[] = {N->getOperand(1), // Mem operand 1397 N->getOperand(2), // Incremental 1398 Chain}; 1399 1400 const EVT ResTys[] = {MVT::i64, // Type of the write back register 1401 MVT::Untyped, MVT::Other}; 1402 1403 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1404 1405 // Update uses of write back register 1406 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0)); 1407 1408 // Update uses of vector list 1409 SDValue SuperReg = SDValue(Ld, 1); 1410 if (NumVecs == 1) 1411 ReplaceUses(SDValue(N, 0), SuperReg); 1412 else 1413 for (unsigned i = 0; i < NumVecs; ++i) 1414 ReplaceUses(SDValue(N, i), 1415 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg)); 1416 1417 // Update the chain 1418 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2)); 1419 CurDAG->RemoveDeadNode(N); 1420 } 1421 1422 /// Optimize \param OldBase and \param OldOffset selecting the best addressing 1423 /// mode. Returns a tuple consisting of an Opcode, an SDValue representing the 1424 /// new Base and an SDValue representing the new offset. 1425 std::tuple<unsigned, SDValue, SDValue> 1426 AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, 1427 unsigned Opc_ri, 1428 const SDValue &OldBase, 1429 const SDValue &OldOffset, 1430 unsigned Scale) { 1431 SDValue NewBase = OldBase; 1432 SDValue NewOffset = OldOffset; 1433 // Detect a possible Reg+Imm addressing mode. 1434 const bool IsRegImm = SelectAddrModeIndexedSVE</*Min=*/-8, /*Max=*/7>( 1435 N, OldBase, NewBase, NewOffset); 1436 1437 // Detect a possible reg+reg addressing mode, but only if we haven't already 1438 // detected a Reg+Imm one. 1439 const bool IsRegReg = 1440 !IsRegImm && SelectSVERegRegAddrMode(OldBase, Scale, NewBase, NewOffset); 1441 1442 // Select the instruction. 1443 return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset); 1444 } 1445 1446 void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs, 1447 unsigned Scale, unsigned Opc_ri, 1448 unsigned Opc_rr) { 1449 assert(Scale < 4 && "Invalid scaling value."); 1450 SDLoc DL(N); 1451 EVT VT = N->getValueType(0); 1452 SDValue Chain = N->getOperand(0); 1453 1454 // Optimize addressing mode. 1455 SDValue Base, Offset; 1456 unsigned Opc; 1457 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore( 1458 N, Opc_rr, Opc_ri, N->getOperand(2), 1459 CurDAG->getTargetConstant(0, DL, MVT::i64), Scale); 1460 1461 SDValue Ops[] = {N->getOperand(1), // Predicate 1462 Base, // Memory operand 1463 Offset, Chain}; 1464 1465 const EVT ResTys[] = {MVT::Untyped, MVT::Other}; 1466 1467 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops); 1468 SDValue SuperReg = SDValue(Load, 0); 1469 for (unsigned i = 0; i < NumVecs; ++i) 1470 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg( 1471 AArch64::zsub0 + i, DL, VT, SuperReg)); 1472 1473 // Copy chain 1474 unsigned ChainIdx = NumVecs; 1475 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1)); 1476 CurDAG->RemoveDeadNode(N); 1477 } 1478 1479 void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs, 1480 unsigned Opc) { 1481 SDLoc dl(N); 1482 EVT VT = N->getOperand(2)->getValueType(0); 1483 1484 // Form a REG_SEQUENCE to force register allocation. 1485 bool Is128Bit = VT.getSizeInBits() == 128; 1486 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); 1487 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs); 1488 1489 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)}; 1490 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops); 1491 1492 // Transfer memoperands. 1493 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 1494 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 1495 1496 ReplaceNode(N, St); 1497 } 1498 1499 void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs, 1500 unsigned Scale, unsigned Opc_rr, 1501 unsigned Opc_ri) { 1502 SDLoc dl(N); 1503 1504 // Form a REG_SEQUENCE to force register allocation. 1505 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); 1506 SDValue RegSeq = createZTuple(Regs); 1507 1508 // Optimize addressing mode. 1509 unsigned Opc; 1510 SDValue Offset, Base; 1511 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore( 1512 N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3), 1513 CurDAG->getTargetConstant(0, dl, MVT::i64), Scale); 1514 1515 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), // predicate 1516 Base, // address 1517 Offset, // offset 1518 N->getOperand(0)}; // chain 1519 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops); 1520 1521 ReplaceNode(N, St); 1522 } 1523 1524 bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, 1525 SDValue &OffImm) { 1526 SDLoc dl(N); 1527 const DataLayout &DL = CurDAG->getDataLayout(); 1528 const TargetLowering *TLI = getTargetLowering(); 1529 1530 // Try to match it for the frame address 1531 if (auto FINode = dyn_cast<FrameIndexSDNode>(N)) { 1532 int FI = FINode->getIndex(); 1533 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 1534 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); 1535 return true; 1536 } 1537 1538 return false; 1539 } 1540 1541 void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs, 1542 unsigned Opc) { 1543 SDLoc dl(N); 1544 EVT VT = N->getOperand(2)->getValueType(0); 1545 const EVT ResTys[] = {MVT::i64, // Type of the write back register 1546 MVT::Other}; // Type for the Chain 1547 1548 // Form a REG_SEQUENCE to force register allocation. 1549 bool Is128Bit = VT.getSizeInBits() == 128; 1550 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); 1551 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs); 1552 1553 SDValue Ops[] = {RegSeq, 1554 N->getOperand(NumVecs + 1), // base register 1555 N->getOperand(NumVecs + 2), // Incremental 1556 N->getOperand(0)}; // Chain 1557 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1558 1559 ReplaceNode(N, St); 1560 } 1561 1562 namespace { 1563 /// WidenVector - Given a value in the V64 register class, produce the 1564 /// equivalent value in the V128 register class. 1565 class WidenVector { 1566 SelectionDAG &DAG; 1567 1568 public: 1569 WidenVector(SelectionDAG &DAG) : DAG(DAG) {} 1570 1571 SDValue operator()(SDValue V64Reg) { 1572 EVT VT = V64Reg.getValueType(); 1573 unsigned NarrowSize = VT.getVectorNumElements(); 1574 MVT EltTy = VT.getVectorElementType().getSimpleVT(); 1575 MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize); 1576 SDLoc DL(V64Reg); 1577 1578 SDValue Undef = 1579 SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0); 1580 return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg); 1581 } 1582 }; 1583 } // namespace 1584 1585 /// NarrowVector - Given a value in the V128 register class, produce the 1586 /// equivalent value in the V64 register class. 1587 static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) { 1588 EVT VT = V128Reg.getValueType(); 1589 unsigned WideSize = VT.getVectorNumElements(); 1590 MVT EltTy = VT.getVectorElementType().getSimpleVT(); 1591 MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2); 1592 1593 return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy, 1594 V128Reg); 1595 } 1596 1597 void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs, 1598 unsigned Opc) { 1599 SDLoc dl(N); 1600 EVT VT = N->getValueType(0); 1601 bool Narrow = VT.getSizeInBits() == 64; 1602 1603 // Form a REG_SEQUENCE to force register allocation. 1604 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); 1605 1606 if (Narrow) 1607 transform(Regs, Regs.begin(), 1608 WidenVector(*CurDAG)); 1609 1610 SDValue RegSeq = createQTuple(Regs); 1611 1612 const EVT ResTys[] = {MVT::Untyped, MVT::Other}; 1613 1614 unsigned LaneNo = 1615 cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue(); 1616 1617 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64), 1618 N->getOperand(NumVecs + 3), N->getOperand(0)}; 1619 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1620 SDValue SuperReg = SDValue(Ld, 0); 1621 1622 EVT WideVT = RegSeq.getOperand(1)->getValueType(0); 1623 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, 1624 AArch64::qsub2, AArch64::qsub3 }; 1625 for (unsigned i = 0; i < NumVecs; ++i) { 1626 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg); 1627 if (Narrow) 1628 NV = NarrowVector(NV, *CurDAG); 1629 ReplaceUses(SDValue(N, i), NV); 1630 } 1631 1632 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1)); 1633 CurDAG->RemoveDeadNode(N); 1634 } 1635 1636 void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs, 1637 unsigned Opc) { 1638 SDLoc dl(N); 1639 EVT VT = N->getValueType(0); 1640 bool Narrow = VT.getSizeInBits() == 64; 1641 1642 // Form a REG_SEQUENCE to force register allocation. 1643 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); 1644 1645 if (Narrow) 1646 transform(Regs, Regs.begin(), 1647 WidenVector(*CurDAG)); 1648 1649 SDValue RegSeq = createQTuple(Regs); 1650 1651 const EVT ResTys[] = {MVT::i64, // Type of the write back register 1652 RegSeq->getValueType(0), MVT::Other}; 1653 1654 unsigned LaneNo = 1655 cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue(); 1656 1657 SDValue Ops[] = {RegSeq, 1658 CurDAG->getTargetConstant(LaneNo, dl, 1659 MVT::i64), // Lane Number 1660 N->getOperand(NumVecs + 2), // Base register 1661 N->getOperand(NumVecs + 3), // Incremental 1662 N->getOperand(0)}; 1663 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1664 1665 // Update uses of the write back register 1666 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0)); 1667 1668 // Update uses of the vector list 1669 SDValue SuperReg = SDValue(Ld, 1); 1670 if (NumVecs == 1) { 1671 ReplaceUses(SDValue(N, 0), 1672 Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg); 1673 } else { 1674 EVT WideVT = RegSeq.getOperand(1)->getValueType(0); 1675 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, 1676 AArch64::qsub2, AArch64::qsub3 }; 1677 for (unsigned i = 0; i < NumVecs; ++i) { 1678 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, 1679 SuperReg); 1680 if (Narrow) 1681 NV = NarrowVector(NV, *CurDAG); 1682 ReplaceUses(SDValue(N, i), NV); 1683 } 1684 } 1685 1686 // Update the Chain 1687 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2)); 1688 CurDAG->RemoveDeadNode(N); 1689 } 1690 1691 void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs, 1692 unsigned Opc) { 1693 SDLoc dl(N); 1694 EVT VT = N->getOperand(2)->getValueType(0); 1695 bool Narrow = VT.getSizeInBits() == 64; 1696 1697 // Form a REG_SEQUENCE to force register allocation. 1698 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); 1699 1700 if (Narrow) 1701 transform(Regs, Regs.begin(), 1702 WidenVector(*CurDAG)); 1703 1704 SDValue RegSeq = createQTuple(Regs); 1705 1706 unsigned LaneNo = 1707 cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue(); 1708 1709 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64), 1710 N->getOperand(NumVecs + 3), N->getOperand(0)}; 1711 SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops); 1712 1713 // Transfer memoperands. 1714 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 1715 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 1716 1717 ReplaceNode(N, St); 1718 } 1719 1720 void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs, 1721 unsigned Opc) { 1722 SDLoc dl(N); 1723 EVT VT = N->getOperand(2)->getValueType(0); 1724 bool Narrow = VT.getSizeInBits() == 64; 1725 1726 // Form a REG_SEQUENCE to force register allocation. 1727 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); 1728 1729 if (Narrow) 1730 transform(Regs, Regs.begin(), 1731 WidenVector(*CurDAG)); 1732 1733 SDValue RegSeq = createQTuple(Regs); 1734 1735 const EVT ResTys[] = {MVT::i64, // Type of the write back register 1736 MVT::Other}; 1737 1738 unsigned LaneNo = 1739 cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue(); 1740 1741 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64), 1742 N->getOperand(NumVecs + 2), // Base Register 1743 N->getOperand(NumVecs + 3), // Incremental 1744 N->getOperand(0)}; 1745 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1746 1747 // Transfer memoperands. 1748 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 1749 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 1750 1751 ReplaceNode(N, St); 1752 } 1753 1754 static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, 1755 unsigned &Opc, SDValue &Opd0, 1756 unsigned &LSB, unsigned &MSB, 1757 unsigned NumberOfIgnoredLowBits, 1758 bool BiggerPattern) { 1759 assert(N->getOpcode() == ISD::AND && 1760 "N must be a AND operation to call this function"); 1761 1762 EVT VT = N->getValueType(0); 1763 1764 // Here we can test the type of VT and return false when the type does not 1765 // match, but since it is done prior to that call in the current context 1766 // we turned that into an assert to avoid redundant code. 1767 assert((VT == MVT::i32 || VT == MVT::i64) && 1768 "Type checking must have been done before calling this function"); 1769 1770 // FIXME: simplify-demanded-bits in DAGCombine will probably have 1771 // changed the AND node to a 32-bit mask operation. We'll have to 1772 // undo that as part of the transform here if we want to catch all 1773 // the opportunities. 1774 // Currently the NumberOfIgnoredLowBits argument helps to recover 1775 // form these situations when matching bigger pattern (bitfield insert). 1776 1777 // For unsigned extracts, check for a shift right and mask 1778 uint64_t AndImm = 0; 1779 if (!isOpcWithIntImmediate(N, ISD::AND, AndImm)) 1780 return false; 1781 1782 const SDNode *Op0 = N->getOperand(0).getNode(); 1783 1784 // Because of simplify-demanded-bits in DAGCombine, the mask may have been 1785 // simplified. Try to undo that 1786 AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits); 1787 1788 // The immediate is a mask of the low bits iff imm & (imm+1) == 0 1789 if (AndImm & (AndImm + 1)) 1790 return false; 1791 1792 bool ClampMSB = false; 1793 uint64_t SrlImm = 0; 1794 // Handle the SRL + ANY_EXTEND case. 1795 if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND && 1796 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) { 1797 // Extend the incoming operand of the SRL to 64-bit. 1798 Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0)); 1799 // Make sure to clamp the MSB so that we preserve the semantics of the 1800 // original operations. 1801 ClampMSB = true; 1802 } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE && 1803 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, 1804 SrlImm)) { 1805 // If the shift result was truncated, we can still combine them. 1806 Opd0 = Op0->getOperand(0).getOperand(0); 1807 1808 // Use the type of SRL node. 1809 VT = Opd0->getValueType(0); 1810 } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) { 1811 Opd0 = Op0->getOperand(0); 1812 } else if (BiggerPattern) { 1813 // Let's pretend a 0 shift right has been performed. 1814 // The resulting code will be at least as good as the original one 1815 // plus it may expose more opportunities for bitfield insert pattern. 1816 // FIXME: Currently we limit this to the bigger pattern, because 1817 // some optimizations expect AND and not UBFM. 1818 Opd0 = N->getOperand(0); 1819 } else 1820 return false; 1821 1822 // Bail out on large immediates. This happens when no proper 1823 // combining/constant folding was performed. 1824 if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) { 1825 LLVM_DEBUG( 1826 (dbgs() << N 1827 << ": Found large shift immediate, this should not happen\n")); 1828 return false; 1829 } 1830 1831 LSB = SrlImm; 1832 MSB = SrlImm + (VT == MVT::i32 ? countTrailingOnes<uint32_t>(AndImm) 1833 : countTrailingOnes<uint64_t>(AndImm)) - 1834 1; 1835 if (ClampMSB) 1836 // Since we're moving the extend before the right shift operation, we need 1837 // to clamp the MSB to make sure we don't shift in undefined bits instead of 1838 // the zeros which would get shifted in with the original right shift 1839 // operation. 1840 MSB = MSB > 31 ? 31 : MSB; 1841 1842 Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri; 1843 return true; 1844 } 1845 1846 static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc, 1847 SDValue &Opd0, unsigned &Immr, 1848 unsigned &Imms) { 1849 assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG); 1850 1851 EVT VT = N->getValueType(0); 1852 unsigned BitWidth = VT.getSizeInBits(); 1853 assert((VT == MVT::i32 || VT == MVT::i64) && 1854 "Type checking must have been done before calling this function"); 1855 1856 SDValue Op = N->getOperand(0); 1857 if (Op->getOpcode() == ISD::TRUNCATE) { 1858 Op = Op->getOperand(0); 1859 VT = Op->getValueType(0); 1860 BitWidth = VT.getSizeInBits(); 1861 } 1862 1863 uint64_t ShiftImm; 1864 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) && 1865 !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm)) 1866 return false; 1867 1868 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits(); 1869 if (ShiftImm + Width > BitWidth) 1870 return false; 1871 1872 Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri; 1873 Opd0 = Op.getOperand(0); 1874 Immr = ShiftImm; 1875 Imms = ShiftImm + Width - 1; 1876 return true; 1877 } 1878 1879 static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc, 1880 SDValue &Opd0, unsigned &LSB, 1881 unsigned &MSB) { 1882 // We are looking for the following pattern which basically extracts several 1883 // continuous bits from the source value and places it from the LSB of the 1884 // destination value, all other bits of the destination value or set to zero: 1885 // 1886 // Value2 = AND Value, MaskImm 1887 // SRL Value2, ShiftImm 1888 // 1889 // with MaskImm >> ShiftImm to search for the bit width. 1890 // 1891 // This gets selected into a single UBFM: 1892 // 1893 // UBFM Value, ShiftImm, BitWide + SrlImm -1 1894 // 1895 1896 if (N->getOpcode() != ISD::SRL) 1897 return false; 1898 1899 uint64_t AndMask = 0; 1900 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask)) 1901 return false; 1902 1903 Opd0 = N->getOperand(0).getOperand(0); 1904 1905 uint64_t SrlImm = 0; 1906 if (!isIntImmediate(N->getOperand(1), SrlImm)) 1907 return false; 1908 1909 // Check whether we really have several bits extract here. 1910 unsigned BitWide = 64 - countLeadingOnes(~(AndMask >> SrlImm)); 1911 if (BitWide && isMask_64(AndMask >> SrlImm)) { 1912 if (N->getValueType(0) == MVT::i32) 1913 Opc = AArch64::UBFMWri; 1914 else 1915 Opc = AArch64::UBFMXri; 1916 1917 LSB = SrlImm; 1918 MSB = BitWide + SrlImm - 1; 1919 return true; 1920 } 1921 1922 return false; 1923 } 1924 1925 static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, 1926 unsigned &Immr, unsigned &Imms, 1927 bool BiggerPattern) { 1928 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) && 1929 "N must be a SHR/SRA operation to call this function"); 1930 1931 EVT VT = N->getValueType(0); 1932 1933 // Here we can test the type of VT and return false when the type does not 1934 // match, but since it is done prior to that call in the current context 1935 // we turned that into an assert to avoid redundant code. 1936 assert((VT == MVT::i32 || VT == MVT::i64) && 1937 "Type checking must have been done before calling this function"); 1938 1939 // Check for AND + SRL doing several bits extract. 1940 if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms)) 1941 return true; 1942 1943 // We're looking for a shift of a shift. 1944 uint64_t ShlImm = 0; 1945 uint64_t TruncBits = 0; 1946 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) { 1947 Opd0 = N->getOperand(0).getOperand(0); 1948 } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL && 1949 N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) { 1950 // We are looking for a shift of truncate. Truncate from i64 to i32 could 1951 // be considered as setting high 32 bits as zero. Our strategy here is to 1952 // always generate 64bit UBFM. This consistency will help the CSE pass 1953 // later find more redundancy. 1954 Opd0 = N->getOperand(0).getOperand(0); 1955 TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits(); 1956 VT = Opd0.getValueType(); 1957 assert(VT == MVT::i64 && "the promoted type should be i64"); 1958 } else if (BiggerPattern) { 1959 // Let's pretend a 0 shift left has been performed. 1960 // FIXME: Currently we limit this to the bigger pattern case, 1961 // because some optimizations expect AND and not UBFM 1962 Opd0 = N->getOperand(0); 1963 } else 1964 return false; 1965 1966 // Missing combines/constant folding may have left us with strange 1967 // constants. 1968 if (ShlImm >= VT.getSizeInBits()) { 1969 LLVM_DEBUG( 1970 (dbgs() << N 1971 << ": Found large shift immediate, this should not happen\n")); 1972 return false; 1973 } 1974 1975 uint64_t SrlImm = 0; 1976 if (!isIntImmediate(N->getOperand(1), SrlImm)) 1977 return false; 1978 1979 assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() && 1980 "bad amount in shift node!"); 1981 int immr = SrlImm - ShlImm; 1982 Immr = immr < 0 ? immr + VT.getSizeInBits() : immr; 1983 Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1; 1984 // SRA requires a signed extraction 1985 if (VT == MVT::i32) 1986 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri; 1987 else 1988 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri; 1989 return true; 1990 } 1991 1992 bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) { 1993 assert(N->getOpcode() == ISD::SIGN_EXTEND); 1994 1995 EVT VT = N->getValueType(0); 1996 EVT NarrowVT = N->getOperand(0)->getValueType(0); 1997 if (VT != MVT::i64 || NarrowVT != MVT::i32) 1998 return false; 1999 2000 uint64_t ShiftImm; 2001 SDValue Op = N->getOperand(0); 2002 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm)) 2003 return false; 2004 2005 SDLoc dl(N); 2006 // Extend the incoming operand of the shift to 64-bits. 2007 SDValue Opd0 = Widen(CurDAG, Op.getOperand(0)); 2008 unsigned Immr = ShiftImm; 2009 unsigned Imms = NarrowVT.getSizeInBits() - 1; 2010 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT), 2011 CurDAG->getTargetConstant(Imms, dl, VT)}; 2012 CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops); 2013 return true; 2014 } 2015 2016 /// Try to form fcvtl2 instructions from a floating-point extend of a high-half 2017 /// extract of a subvector. 2018 bool AArch64DAGToDAGISel::tryHighFPExt(SDNode *N) { 2019 assert(N->getOpcode() == ISD::FP_EXTEND); 2020 2021 // There are 2 forms of fcvtl2 - extend to double or extend to float. 2022 SDValue Extract = N->getOperand(0); 2023 EVT VT = N->getValueType(0); 2024 EVT NarrowVT = Extract.getValueType(); 2025 if ((VT != MVT::v2f64 || NarrowVT != MVT::v2f32) && 2026 (VT != MVT::v4f32 || NarrowVT != MVT::v4f16)) 2027 return false; 2028 2029 // Optionally look past a bitcast. 2030 Extract = peekThroughBitcasts(Extract); 2031 if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR) 2032 return false; 2033 2034 // Match extract from start of high half index. 2035 // Example: v8i16 -> v4i16 means the extract must begin at index 4. 2036 unsigned ExtractIndex = Extract.getConstantOperandVal(1); 2037 if (ExtractIndex != Extract.getValueType().getVectorNumElements()) 2038 return false; 2039 2040 auto Opcode = VT == MVT::v2f64 ? AArch64::FCVTLv4i32 : AArch64::FCVTLv8i16; 2041 CurDAG->SelectNodeTo(N, Opcode, VT, Extract.getOperand(0)); 2042 return true; 2043 } 2044 2045 static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, 2046 SDValue &Opd0, unsigned &Immr, unsigned &Imms, 2047 unsigned NumberOfIgnoredLowBits = 0, 2048 bool BiggerPattern = false) { 2049 if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64) 2050 return false; 2051 2052 switch (N->getOpcode()) { 2053 default: 2054 if (!N->isMachineOpcode()) 2055 return false; 2056 break; 2057 case ISD::AND: 2058 return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms, 2059 NumberOfIgnoredLowBits, BiggerPattern); 2060 case ISD::SRL: 2061 case ISD::SRA: 2062 return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern); 2063 2064 case ISD::SIGN_EXTEND_INREG: 2065 return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms); 2066 } 2067 2068 unsigned NOpc = N->getMachineOpcode(); 2069 switch (NOpc) { 2070 default: 2071 return false; 2072 case AArch64::SBFMWri: 2073 case AArch64::UBFMWri: 2074 case AArch64::SBFMXri: 2075 case AArch64::UBFMXri: 2076 Opc = NOpc; 2077 Opd0 = N->getOperand(0); 2078 Immr = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue(); 2079 Imms = cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue(); 2080 return true; 2081 } 2082 // Unreachable 2083 return false; 2084 } 2085 2086 bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) { 2087 unsigned Opc, Immr, Imms; 2088 SDValue Opd0; 2089 if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms)) 2090 return false; 2091 2092 EVT VT = N->getValueType(0); 2093 SDLoc dl(N); 2094 2095 // If the bit extract operation is 64bit but the original type is 32bit, we 2096 // need to add one EXTRACT_SUBREG. 2097 if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) { 2098 SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64), 2099 CurDAG->getTargetConstant(Imms, dl, MVT::i64)}; 2100 2101 SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64); 2102 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32); 2103 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, 2104 MVT::i32, SDValue(BFM, 0), SubReg)); 2105 return true; 2106 } 2107 2108 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT), 2109 CurDAG->getTargetConstant(Imms, dl, VT)}; 2110 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 2111 return true; 2112 } 2113 2114 /// Does DstMask form a complementary pair with the mask provided by 2115 /// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking, 2116 /// this asks whether DstMask zeroes precisely those bits that will be set by 2117 /// the other half. 2118 static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted, 2119 unsigned NumberOfIgnoredHighBits, EVT VT) { 2120 assert((VT == MVT::i32 || VT == MVT::i64) && 2121 "i32 or i64 mask type expected!"); 2122 unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits; 2123 2124 APInt SignificantDstMask = APInt(BitWidth, DstMask); 2125 APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth); 2126 2127 return (SignificantDstMask & SignificantBitsToBeInserted) == 0 && 2128 (SignificantDstMask | SignificantBitsToBeInserted).isAllOnesValue(); 2129 } 2130 2131 // Look for bits that will be useful for later uses. 2132 // A bit is consider useless as soon as it is dropped and never used 2133 // before it as been dropped. 2134 // E.g., looking for useful bit of x 2135 // 1. y = x & 0x7 2136 // 2. z = y >> 2 2137 // After #1, x useful bits are 0x7, then the useful bits of x, live through 2138 // y. 2139 // After #2, the useful bits of x are 0x4. 2140 // However, if x is used on an unpredicatable instruction, then all its bits 2141 // are useful. 2142 // E.g. 2143 // 1. y = x & 0x7 2144 // 2. z = y >> 2 2145 // 3. str x, [@x] 2146 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0); 2147 2148 static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits, 2149 unsigned Depth) { 2150 uint64_t Imm = 2151 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue(); 2152 Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth()); 2153 UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm); 2154 getUsefulBits(Op, UsefulBits, Depth + 1); 2155 } 2156 2157 static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, 2158 uint64_t Imm, uint64_t MSB, 2159 unsigned Depth) { 2160 // inherit the bitwidth value 2161 APInt OpUsefulBits(UsefulBits); 2162 OpUsefulBits = 1; 2163 2164 if (MSB >= Imm) { 2165 OpUsefulBits <<= MSB - Imm + 1; 2166 --OpUsefulBits; 2167 // The interesting part will be in the lower part of the result 2168 getUsefulBits(Op, OpUsefulBits, Depth + 1); 2169 // The interesting part was starting at Imm in the argument 2170 OpUsefulBits <<= Imm; 2171 } else { 2172 OpUsefulBits <<= MSB + 1; 2173 --OpUsefulBits; 2174 // The interesting part will be shifted in the result 2175 OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm; 2176 getUsefulBits(Op, OpUsefulBits, Depth + 1); 2177 // The interesting part was at zero in the argument 2178 OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm); 2179 } 2180 2181 UsefulBits &= OpUsefulBits; 2182 } 2183 2184 static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits, 2185 unsigned Depth) { 2186 uint64_t Imm = 2187 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue(); 2188 uint64_t MSB = 2189 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue(); 2190 2191 getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth); 2192 } 2193 2194 static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, 2195 unsigned Depth) { 2196 uint64_t ShiftTypeAndValue = 2197 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue(); 2198 APInt Mask(UsefulBits); 2199 Mask.clearAllBits(); 2200 Mask.flipAllBits(); 2201 2202 if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) { 2203 // Shift Left 2204 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue); 2205 Mask <<= ShiftAmt; 2206 getUsefulBits(Op, Mask, Depth + 1); 2207 Mask.lshrInPlace(ShiftAmt); 2208 } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) { 2209 // Shift Right 2210 // We do not handle AArch64_AM::ASR, because the sign will change the 2211 // number of useful bits 2212 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue); 2213 Mask.lshrInPlace(ShiftAmt); 2214 getUsefulBits(Op, Mask, Depth + 1); 2215 Mask <<= ShiftAmt; 2216 } else 2217 return; 2218 2219 UsefulBits &= Mask; 2220 } 2221 2222 static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, 2223 unsigned Depth) { 2224 uint64_t Imm = 2225 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue(); 2226 uint64_t MSB = 2227 cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue(); 2228 2229 APInt OpUsefulBits(UsefulBits); 2230 OpUsefulBits = 1; 2231 2232 APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0); 2233 ResultUsefulBits.flipAllBits(); 2234 APInt Mask(UsefulBits.getBitWidth(), 0); 2235 2236 getUsefulBits(Op, ResultUsefulBits, Depth + 1); 2237 2238 if (MSB >= Imm) { 2239 // The instruction is a BFXIL. 2240 uint64_t Width = MSB - Imm + 1; 2241 uint64_t LSB = Imm; 2242 2243 OpUsefulBits <<= Width; 2244 --OpUsefulBits; 2245 2246 if (Op.getOperand(1) == Orig) { 2247 // Copy the low bits from the result to bits starting from LSB. 2248 Mask = ResultUsefulBits & OpUsefulBits; 2249 Mask <<= LSB; 2250 } 2251 2252 if (Op.getOperand(0) == Orig) 2253 // Bits starting from LSB in the input contribute to the result. 2254 Mask |= (ResultUsefulBits & ~OpUsefulBits); 2255 } else { 2256 // The instruction is a BFI. 2257 uint64_t Width = MSB + 1; 2258 uint64_t LSB = UsefulBits.getBitWidth() - Imm; 2259 2260 OpUsefulBits <<= Width; 2261 --OpUsefulBits; 2262 OpUsefulBits <<= LSB; 2263 2264 if (Op.getOperand(1) == Orig) { 2265 // Copy the bits from the result to the zero bits. 2266 Mask = ResultUsefulBits & OpUsefulBits; 2267 Mask.lshrInPlace(LSB); 2268 } 2269 2270 if (Op.getOperand(0) == Orig) 2271 Mask |= (ResultUsefulBits & ~OpUsefulBits); 2272 } 2273 2274 UsefulBits &= Mask; 2275 } 2276 2277 static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, 2278 SDValue Orig, unsigned Depth) { 2279 2280 // Users of this node should have already been instruction selected 2281 // FIXME: Can we turn that into an assert? 2282 if (!UserNode->isMachineOpcode()) 2283 return; 2284 2285 switch (UserNode->getMachineOpcode()) { 2286 default: 2287 return; 2288 case AArch64::ANDSWri: 2289 case AArch64::ANDSXri: 2290 case AArch64::ANDWri: 2291 case AArch64::ANDXri: 2292 // We increment Depth only when we call the getUsefulBits 2293 return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits, 2294 Depth); 2295 case AArch64::UBFMWri: 2296 case AArch64::UBFMXri: 2297 return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth); 2298 2299 case AArch64::ORRWrs: 2300 case AArch64::ORRXrs: 2301 if (UserNode->getOperand(1) != Orig) 2302 return; 2303 return getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits, 2304 Depth); 2305 case AArch64::BFMWri: 2306 case AArch64::BFMXri: 2307 return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth); 2308 2309 case AArch64::STRBBui: 2310 case AArch64::STURBBi: 2311 if (UserNode->getOperand(0) != Orig) 2312 return; 2313 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff); 2314 return; 2315 2316 case AArch64::STRHHui: 2317 case AArch64::STURHHi: 2318 if (UserNode->getOperand(0) != Orig) 2319 return; 2320 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff); 2321 return; 2322 } 2323 } 2324 2325 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) { 2326 if (Depth >= SelectionDAG::MaxRecursionDepth) 2327 return; 2328 // Initialize UsefulBits 2329 if (!Depth) { 2330 unsigned Bitwidth = Op.getScalarValueSizeInBits(); 2331 // At the beginning, assume every produced bits is useful 2332 UsefulBits = APInt(Bitwidth, 0); 2333 UsefulBits.flipAllBits(); 2334 } 2335 APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0); 2336 2337 for (SDNode *Node : Op.getNode()->uses()) { 2338 // A use cannot produce useful bits 2339 APInt UsefulBitsForUse = APInt(UsefulBits); 2340 getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth); 2341 UsersUsefulBits |= UsefulBitsForUse; 2342 } 2343 // UsefulBits contains the produced bits that are meaningful for the 2344 // current definition, thus a user cannot make a bit meaningful at 2345 // this point 2346 UsefulBits &= UsersUsefulBits; 2347 } 2348 2349 /// Create a machine node performing a notional SHL of Op by ShlAmount. If 2350 /// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is 2351 /// 0, return Op unchanged. 2352 static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) { 2353 if (ShlAmount == 0) 2354 return Op; 2355 2356 EVT VT = Op.getValueType(); 2357 SDLoc dl(Op); 2358 unsigned BitWidth = VT.getSizeInBits(); 2359 unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri; 2360 2361 SDNode *ShiftNode; 2362 if (ShlAmount > 0) { 2363 // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt 2364 ShiftNode = CurDAG->getMachineNode( 2365 UBFMOpc, dl, VT, Op, 2366 CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT), 2367 CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT)); 2368 } else { 2369 // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1 2370 assert(ShlAmount < 0 && "expected right shift"); 2371 int ShrAmount = -ShlAmount; 2372 ShiftNode = CurDAG->getMachineNode( 2373 UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT), 2374 CurDAG->getTargetConstant(BitWidth - 1, dl, VT)); 2375 } 2376 2377 return SDValue(ShiftNode, 0); 2378 } 2379 2380 /// Does this tree qualify as an attempt to move a bitfield into position, 2381 /// essentially "(and (shl VAL, N), Mask)". 2382 static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, 2383 bool BiggerPattern, 2384 SDValue &Src, int &ShiftAmount, 2385 int &MaskWidth) { 2386 EVT VT = Op.getValueType(); 2387 unsigned BitWidth = VT.getSizeInBits(); 2388 (void)BitWidth; 2389 assert(BitWidth == 32 || BitWidth == 64); 2390 2391 KnownBits Known = CurDAG->computeKnownBits(Op); 2392 2393 // Non-zero in the sense that they're not provably zero, which is the key 2394 // point if we want to use this value 2395 uint64_t NonZeroBits = (~Known.Zero).getZExtValue(); 2396 2397 // Discard a constant AND mask if present. It's safe because the node will 2398 // already have been factored into the computeKnownBits calculation above. 2399 uint64_t AndImm; 2400 if (isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm)) { 2401 assert((~APInt(BitWidth, AndImm) & ~Known.Zero) == 0); 2402 Op = Op.getOperand(0); 2403 } 2404 2405 // Don't match if the SHL has more than one use, since then we'll end up 2406 // generating SHL+UBFIZ instead of just keeping SHL+AND. 2407 if (!BiggerPattern && !Op.hasOneUse()) 2408 return false; 2409 2410 uint64_t ShlImm; 2411 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm)) 2412 return false; 2413 Op = Op.getOperand(0); 2414 2415 if (!isShiftedMask_64(NonZeroBits)) 2416 return false; 2417 2418 ShiftAmount = countTrailingZeros(NonZeroBits); 2419 MaskWidth = countTrailingOnes(NonZeroBits >> ShiftAmount); 2420 2421 // BFI encompasses sufficiently many nodes that it's worth inserting an extra 2422 // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL 2423 // amount. BiggerPattern is true when this pattern is being matched for BFI, 2424 // BiggerPattern is false when this pattern is being matched for UBFIZ, in 2425 // which case it is not profitable to insert an extra shift. 2426 if (ShlImm - ShiftAmount != 0 && !BiggerPattern) 2427 return false; 2428 Src = getLeftShift(CurDAG, Op, ShlImm - ShiftAmount); 2429 2430 return true; 2431 } 2432 2433 static bool isShiftedMask(uint64_t Mask, EVT VT) { 2434 assert(VT == MVT::i32 || VT == MVT::i64); 2435 if (VT == MVT::i32) 2436 return isShiftedMask_32(Mask); 2437 return isShiftedMask_64(Mask); 2438 } 2439 2440 // Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being 2441 // inserted only sets known zero bits. 2442 static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG) { 2443 assert(N->getOpcode() == ISD::OR && "Expect a OR operation"); 2444 2445 EVT VT = N->getValueType(0); 2446 if (VT != MVT::i32 && VT != MVT::i64) 2447 return false; 2448 2449 unsigned BitWidth = VT.getSizeInBits(); 2450 2451 uint64_t OrImm; 2452 if (!isOpcWithIntImmediate(N, ISD::OR, OrImm)) 2453 return false; 2454 2455 // Skip this transformation if the ORR immediate can be encoded in the ORR. 2456 // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely 2457 // performance neutral. 2458 if (AArch64_AM::isLogicalImmediate(OrImm, BitWidth)) 2459 return false; 2460 2461 uint64_t MaskImm; 2462 SDValue And = N->getOperand(0); 2463 // Must be a single use AND with an immediate operand. 2464 if (!And.hasOneUse() || 2465 !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm)) 2466 return false; 2467 2468 // Compute the Known Zero for the AND as this allows us to catch more general 2469 // cases than just looking for AND with imm. 2470 KnownBits Known = CurDAG->computeKnownBits(And); 2471 2472 // Non-zero in the sense that they're not provably zero, which is the key 2473 // point if we want to use this value. 2474 uint64_t NotKnownZero = (~Known.Zero).getZExtValue(); 2475 2476 // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00). 2477 if (!isShiftedMask(Known.Zero.getZExtValue(), VT)) 2478 return false; 2479 2480 // The bits being inserted must only set those bits that are known to be zero. 2481 if ((OrImm & NotKnownZero) != 0) { 2482 // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't 2483 // currently handle this case. 2484 return false; 2485 } 2486 2487 // BFI/BFXIL dst, src, #lsb, #width. 2488 int LSB = countTrailingOnes(NotKnownZero); 2489 int Width = BitWidth - APInt(BitWidth, NotKnownZero).countPopulation(); 2490 2491 // BFI/BFXIL is an alias of BFM, so translate to BFM operands. 2492 unsigned ImmR = (BitWidth - LSB) % BitWidth; 2493 unsigned ImmS = Width - 1; 2494 2495 // If we're creating a BFI instruction avoid cases where we need more 2496 // instructions to materialize the BFI constant as compared to the original 2497 // ORR. A BFXIL will use the same constant as the original ORR, so the code 2498 // should be no worse in this case. 2499 bool IsBFI = LSB != 0; 2500 uint64_t BFIImm = OrImm >> LSB; 2501 if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) { 2502 // We have a BFI instruction and we know the constant can't be materialized 2503 // with a ORR-immediate with the zero register. 2504 unsigned OrChunks = 0, BFIChunks = 0; 2505 for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) { 2506 if (((OrImm >> Shift) & 0xFFFF) != 0) 2507 ++OrChunks; 2508 if (((BFIImm >> Shift) & 0xFFFF) != 0) 2509 ++BFIChunks; 2510 } 2511 if (BFIChunks > OrChunks) 2512 return false; 2513 } 2514 2515 // Materialize the constant to be inserted. 2516 SDLoc DL(N); 2517 unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm; 2518 SDNode *MOVI = CurDAG->getMachineNode( 2519 MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT)); 2520 2521 // Create the BFI/BFXIL instruction. 2522 SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0), 2523 CurDAG->getTargetConstant(ImmR, DL, VT), 2524 CurDAG->getTargetConstant(ImmS, DL, VT)}; 2525 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri; 2526 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 2527 return true; 2528 } 2529 2530 static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits, 2531 SelectionDAG *CurDAG) { 2532 assert(N->getOpcode() == ISD::OR && "Expect a OR operation"); 2533 2534 EVT VT = N->getValueType(0); 2535 if (VT != MVT::i32 && VT != MVT::i64) 2536 return false; 2537 2538 unsigned BitWidth = VT.getSizeInBits(); 2539 2540 // Because of simplify-demanded-bits in DAGCombine, involved masks may not 2541 // have the expected shape. Try to undo that. 2542 2543 unsigned NumberOfIgnoredLowBits = UsefulBits.countTrailingZeros(); 2544 unsigned NumberOfIgnoredHighBits = UsefulBits.countLeadingZeros(); 2545 2546 // Given a OR operation, check if we have the following pattern 2547 // ubfm c, b, imm, imm2 (or something that does the same jobs, see 2548 // isBitfieldExtractOp) 2549 // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and 2550 // countTrailingZeros(mask2) == imm2 - imm + 1 2551 // f = d | c 2552 // if yes, replace the OR instruction with: 2553 // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2 2554 2555 // OR is commutative, check all combinations of operand order and values of 2556 // BiggerPattern, i.e. 2557 // Opd0, Opd1, BiggerPattern=false 2558 // Opd1, Opd0, BiggerPattern=false 2559 // Opd0, Opd1, BiggerPattern=true 2560 // Opd1, Opd0, BiggerPattern=true 2561 // Several of these combinations may match, so check with BiggerPattern=false 2562 // first since that will produce better results by matching more instructions 2563 // and/or inserting fewer extra instructions. 2564 for (int I = 0; I < 4; ++I) { 2565 2566 SDValue Dst, Src; 2567 unsigned ImmR, ImmS; 2568 bool BiggerPattern = I / 2; 2569 SDValue OrOpd0Val = N->getOperand(I % 2); 2570 SDNode *OrOpd0 = OrOpd0Val.getNode(); 2571 SDValue OrOpd1Val = N->getOperand((I + 1) % 2); 2572 SDNode *OrOpd1 = OrOpd1Val.getNode(); 2573 2574 unsigned BFXOpc; 2575 int DstLSB, Width; 2576 if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS, 2577 NumberOfIgnoredLowBits, BiggerPattern)) { 2578 // Check that the returned opcode is compatible with the pattern, 2579 // i.e., same type and zero extended (U and not S) 2580 if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) || 2581 (BFXOpc != AArch64::UBFMWri && VT == MVT::i32)) 2582 continue; 2583 2584 // Compute the width of the bitfield insertion 2585 DstLSB = 0; 2586 Width = ImmS - ImmR + 1; 2587 // FIXME: This constraint is to catch bitfield insertion we may 2588 // want to widen the pattern if we want to grab general bitfied 2589 // move case 2590 if (Width <= 0) 2591 continue; 2592 2593 // If the mask on the insertee is correct, we have a BFXIL operation. We 2594 // can share the ImmR and ImmS values from the already-computed UBFM. 2595 } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val, 2596 BiggerPattern, 2597 Src, DstLSB, Width)) { 2598 ImmR = (BitWidth - DstLSB) % BitWidth; 2599 ImmS = Width - 1; 2600 } else 2601 continue; 2602 2603 // Check the second part of the pattern 2604 EVT VT = OrOpd1Val.getValueType(); 2605 assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand"); 2606 2607 // Compute the Known Zero for the candidate of the first operand. 2608 // This allows to catch more general case than just looking for 2609 // AND with imm. Indeed, simplify-demanded-bits may have removed 2610 // the AND instruction because it proves it was useless. 2611 KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val); 2612 2613 // Check if there is enough room for the second operand to appear 2614 // in the first one 2615 APInt BitsToBeInserted = 2616 APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width); 2617 2618 if ((BitsToBeInserted & ~Known.Zero) != 0) 2619 continue; 2620 2621 // Set the first operand 2622 uint64_t Imm; 2623 if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) && 2624 isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT)) 2625 // In that case, we can eliminate the AND 2626 Dst = OrOpd1->getOperand(0); 2627 else 2628 // Maybe the AND has been removed by simplify-demanded-bits 2629 // or is useful because it discards more bits 2630 Dst = OrOpd1Val; 2631 2632 // both parts match 2633 SDLoc DL(N); 2634 SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT), 2635 CurDAG->getTargetConstant(ImmS, DL, VT)}; 2636 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri; 2637 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 2638 return true; 2639 } 2640 2641 // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff 2642 // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted 2643 // mask (e.g., 0x000ffff0). 2644 uint64_t Mask0Imm, Mask1Imm; 2645 SDValue And0 = N->getOperand(0); 2646 SDValue And1 = N->getOperand(1); 2647 if (And0.hasOneUse() && And1.hasOneUse() && 2648 isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) && 2649 isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) && 2650 APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) && 2651 (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) { 2652 2653 // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm), 2654 // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the 2655 // bits to be inserted. 2656 if (isShiftedMask(Mask0Imm, VT)) { 2657 std::swap(And0, And1); 2658 std::swap(Mask0Imm, Mask1Imm); 2659 } 2660 2661 SDValue Src = And1->getOperand(0); 2662 SDValue Dst = And0->getOperand(0); 2663 unsigned LSB = countTrailingZeros(Mask1Imm); 2664 int Width = BitWidth - APInt(BitWidth, Mask0Imm).countPopulation(); 2665 2666 // The BFXIL inserts the low-order bits from a source register, so right 2667 // shift the needed bits into place. 2668 SDLoc DL(N); 2669 unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri; 2670 SDNode *LSR = CurDAG->getMachineNode( 2671 ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LSB, DL, VT), 2672 CurDAG->getTargetConstant(BitWidth - 1, DL, VT)); 2673 2674 // BFXIL is an alias of BFM, so translate to BFM operands. 2675 unsigned ImmR = (BitWidth - LSB) % BitWidth; 2676 unsigned ImmS = Width - 1; 2677 2678 // Create the BFXIL instruction. 2679 SDValue Ops[] = {Dst, SDValue(LSR, 0), 2680 CurDAG->getTargetConstant(ImmR, DL, VT), 2681 CurDAG->getTargetConstant(ImmS, DL, VT)}; 2682 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri; 2683 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 2684 return true; 2685 } 2686 2687 return false; 2688 } 2689 2690 bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) { 2691 if (N->getOpcode() != ISD::OR) 2692 return false; 2693 2694 APInt NUsefulBits; 2695 getUsefulBits(SDValue(N, 0), NUsefulBits); 2696 2697 // If all bits are not useful, just return UNDEF. 2698 if (!NUsefulBits) { 2699 CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0)); 2700 return true; 2701 } 2702 2703 if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG)) 2704 return true; 2705 2706 return tryBitfieldInsertOpFromOrAndImm(N, CurDAG); 2707 } 2708 2709 /// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the 2710 /// equivalent of a left shift by a constant amount followed by an and masking 2711 /// out a contiguous set of bits. 2712 bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) { 2713 if (N->getOpcode() != ISD::AND) 2714 return false; 2715 2716 EVT VT = N->getValueType(0); 2717 if (VT != MVT::i32 && VT != MVT::i64) 2718 return false; 2719 2720 SDValue Op0; 2721 int DstLSB, Width; 2722 if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false, 2723 Op0, DstLSB, Width)) 2724 return false; 2725 2726 // ImmR is the rotate right amount. 2727 unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits(); 2728 // ImmS is the most significant bit of the source to be moved. 2729 unsigned ImmS = Width - 1; 2730 2731 SDLoc DL(N); 2732 SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT), 2733 CurDAG->getTargetConstant(ImmS, DL, VT)}; 2734 unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri; 2735 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 2736 return true; 2737 } 2738 2739 /// tryShiftAmountMod - Take advantage of built-in mod of shift amount in 2740 /// variable shift/rotate instructions. 2741 bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) { 2742 EVT VT = N->getValueType(0); 2743 2744 unsigned Opc; 2745 switch (N->getOpcode()) { 2746 case ISD::ROTR: 2747 Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr; 2748 break; 2749 case ISD::SHL: 2750 Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr; 2751 break; 2752 case ISD::SRL: 2753 Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr; 2754 break; 2755 case ISD::SRA: 2756 Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr; 2757 break; 2758 default: 2759 return false; 2760 } 2761 2762 uint64_t Size; 2763 uint64_t Bits; 2764 if (VT == MVT::i32) { 2765 Bits = 5; 2766 Size = 32; 2767 } else if (VT == MVT::i64) { 2768 Bits = 6; 2769 Size = 64; 2770 } else 2771 return false; 2772 2773 SDValue ShiftAmt = N->getOperand(1); 2774 SDLoc DL(N); 2775 SDValue NewShiftAmt; 2776 2777 // Skip over an extend of the shift amount. 2778 if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND || 2779 ShiftAmt->getOpcode() == ISD::ANY_EXTEND) 2780 ShiftAmt = ShiftAmt->getOperand(0); 2781 2782 if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) { 2783 SDValue Add0 = ShiftAmt->getOperand(0); 2784 SDValue Add1 = ShiftAmt->getOperand(1); 2785 uint64_t Add0Imm; 2786 uint64_t Add1Imm; 2787 // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X 2788 // to avoid the ADD/SUB. 2789 if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0)) 2790 NewShiftAmt = Add0; 2791 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to 2792 // generate a NEG instead of a SUB of a constant. 2793 else if (ShiftAmt->getOpcode() == ISD::SUB && 2794 isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 && 2795 (Add0Imm % Size == 0)) { 2796 unsigned NegOpc; 2797 unsigned ZeroReg; 2798 EVT SubVT = ShiftAmt->getValueType(0); 2799 if (SubVT == MVT::i32) { 2800 NegOpc = AArch64::SUBWrr; 2801 ZeroReg = AArch64::WZR; 2802 } else { 2803 assert(SubVT == MVT::i64); 2804 NegOpc = AArch64::SUBXrr; 2805 ZeroReg = AArch64::XZR; 2806 } 2807 SDValue Zero = 2808 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT); 2809 MachineSDNode *Neg = 2810 CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1); 2811 NewShiftAmt = SDValue(Neg, 0); 2812 } else 2813 return false; 2814 } else { 2815 // If the shift amount is masked with an AND, check that the mask covers the 2816 // bits that are implicitly ANDed off by the above opcodes and if so, skip 2817 // the AND. 2818 uint64_t MaskImm; 2819 if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm) && 2820 !isOpcWithIntImmediate(ShiftAmt.getNode(), AArch64ISD::ANDS, MaskImm)) 2821 return false; 2822 2823 if (countTrailingOnes(MaskImm) < Bits) 2824 return false; 2825 2826 NewShiftAmt = ShiftAmt->getOperand(0); 2827 } 2828 2829 // Narrow/widen the shift amount to match the size of the shift operation. 2830 if (VT == MVT::i32) 2831 NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt); 2832 else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) { 2833 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32); 2834 MachineSDNode *Ext = CurDAG->getMachineNode( 2835 AArch64::SUBREG_TO_REG, DL, VT, 2836 CurDAG->getTargetConstant(0, DL, MVT::i64), NewShiftAmt, SubReg); 2837 NewShiftAmt = SDValue(Ext, 0); 2838 } 2839 2840 SDValue Ops[] = {N->getOperand(0), NewShiftAmt}; 2841 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 2842 return true; 2843 } 2844 2845 bool 2846 AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, 2847 unsigned RegWidth) { 2848 APFloat FVal(0.0); 2849 if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N)) 2850 FVal = CN->getValueAPF(); 2851 else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) { 2852 // Some otherwise illegal constants are allowed in this case. 2853 if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow || 2854 !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1))) 2855 return false; 2856 2857 ConstantPoolSDNode *CN = 2858 dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)); 2859 FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF(); 2860 } else 2861 return false; 2862 2863 // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits 2864 // is between 1 and 32 for a destination w-register, or 1 and 64 for an 2865 // x-register. 2866 // 2867 // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we 2868 // want THIS_NODE to be 2^fbits. This is much easier to deal with using 2869 // integers. 2870 bool IsExact; 2871 2872 // fbits is between 1 and 64 in the worst-case, which means the fmul 2873 // could have 2^64 as an actual operand. Need 65 bits of precision. 2874 APSInt IntVal(65, true); 2875 FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact); 2876 2877 // N.b. isPowerOf2 also checks for > 0. 2878 if (!IsExact || !IntVal.isPowerOf2()) return false; 2879 unsigned FBits = IntVal.logBase2(); 2880 2881 // Checks above should have guaranteed that we haven't lost information in 2882 // finding FBits, but it must still be in range. 2883 if (FBits == 0 || FBits > RegWidth) return false; 2884 2885 FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32); 2886 return true; 2887 } 2888 2889 // Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields 2890 // of the string and obtains the integer values from them and combines these 2891 // into a single value to be used in the MRS/MSR instruction. 2892 static int getIntOperandFromRegisterString(StringRef RegString) { 2893 SmallVector<StringRef, 5> Fields; 2894 RegString.split(Fields, ':'); 2895 2896 if (Fields.size() == 1) 2897 return -1; 2898 2899 assert(Fields.size() == 5 2900 && "Invalid number of fields in read register string"); 2901 2902 SmallVector<int, 5> Ops; 2903 bool AllIntFields = true; 2904 2905 for (StringRef Field : Fields) { 2906 unsigned IntField; 2907 AllIntFields &= !Field.getAsInteger(10, IntField); 2908 Ops.push_back(IntField); 2909 } 2910 2911 assert(AllIntFields && 2912 "Unexpected non-integer value in special register string."); 2913 2914 // Need to combine the integer fields of the string into a single value 2915 // based on the bit encoding of MRS/MSR instruction. 2916 return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) | 2917 (Ops[3] << 3) | (Ops[4]); 2918 } 2919 2920 // Lower the read_register intrinsic to an MRS instruction node if the special 2921 // register string argument is either of the form detailed in the ALCE (the 2922 // form described in getIntOperandsFromRegsterString) or is a named register 2923 // known by the MRS SysReg mapper. 2924 bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) { 2925 const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1)); 2926 const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0)); 2927 SDLoc DL(N); 2928 2929 int Reg = getIntOperandFromRegisterString(RegString->getString()); 2930 if (Reg != -1) { 2931 ReplaceNode(N, CurDAG->getMachineNode( 2932 AArch64::MRS, DL, N->getSimpleValueType(0), MVT::Other, 2933 CurDAG->getTargetConstant(Reg, DL, MVT::i32), 2934 N->getOperand(0))); 2935 return true; 2936 } 2937 2938 // Use the sysreg mapper to map the remaining possible strings to the 2939 // value for the register to be used for the instruction operand. 2940 auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString()); 2941 if (TheReg && TheReg->Readable && 2942 TheReg->haveFeatures(Subtarget->getFeatureBits())) 2943 Reg = TheReg->Encoding; 2944 else 2945 Reg = AArch64SysReg::parseGenericRegister(RegString->getString()); 2946 2947 if (Reg != -1) { 2948 ReplaceNode(N, CurDAG->getMachineNode( 2949 AArch64::MRS, DL, N->getSimpleValueType(0), MVT::Other, 2950 CurDAG->getTargetConstant(Reg, DL, MVT::i32), 2951 N->getOperand(0))); 2952 return true; 2953 } 2954 2955 if (RegString->getString() == "pc") { 2956 ReplaceNode(N, CurDAG->getMachineNode( 2957 AArch64::ADR, DL, N->getSimpleValueType(0), MVT::Other, 2958 CurDAG->getTargetConstant(0, DL, MVT::i32), 2959 N->getOperand(0))); 2960 return true; 2961 } 2962 2963 return false; 2964 } 2965 2966 // Lower the write_register intrinsic to an MSR instruction node if the special 2967 // register string argument is either of the form detailed in the ALCE (the 2968 // form described in getIntOperandsFromRegsterString) or is a named register 2969 // known by the MSR SysReg mapper. 2970 bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) { 2971 const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1)); 2972 const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0)); 2973 SDLoc DL(N); 2974 2975 int Reg = getIntOperandFromRegisterString(RegString->getString()); 2976 if (Reg != -1) { 2977 ReplaceNode( 2978 N, CurDAG->getMachineNode(AArch64::MSR, DL, MVT::Other, 2979 CurDAG->getTargetConstant(Reg, DL, MVT::i32), 2980 N->getOperand(2), N->getOperand(0))); 2981 return true; 2982 } 2983 2984 // Check if the register was one of those allowed as the pstatefield value in 2985 // the MSR (immediate) instruction. To accept the values allowed in the 2986 // pstatefield for the MSR (immediate) instruction, we also require that an 2987 // immediate value has been provided as an argument, we know that this is 2988 // the case as it has been ensured by semantic checking. 2989 auto PMapper = AArch64PState::lookupPStateByName(RegString->getString()); 2990 if (PMapper) { 2991 assert (isa<ConstantSDNode>(N->getOperand(2)) 2992 && "Expected a constant integer expression."); 2993 unsigned Reg = PMapper->Encoding; 2994 uint64_t Immed = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(); 2995 unsigned State; 2996 if (Reg == AArch64PState::PAN || Reg == AArch64PState::UAO || Reg == AArch64PState::SSBS) { 2997 assert(Immed < 2 && "Bad imm"); 2998 State = AArch64::MSRpstateImm1; 2999 } else { 3000 assert(Immed < 16 && "Bad imm"); 3001 State = AArch64::MSRpstateImm4; 3002 } 3003 ReplaceNode(N, CurDAG->getMachineNode( 3004 State, DL, MVT::Other, 3005 CurDAG->getTargetConstant(Reg, DL, MVT::i32), 3006 CurDAG->getTargetConstant(Immed, DL, MVT::i16), 3007 N->getOperand(0))); 3008 return true; 3009 } 3010 3011 // Use the sysreg mapper to attempt to map the remaining possible strings 3012 // to the value for the register to be used for the MSR (register) 3013 // instruction operand. 3014 auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString()); 3015 if (TheReg && TheReg->Writeable && 3016 TheReg->haveFeatures(Subtarget->getFeatureBits())) 3017 Reg = TheReg->Encoding; 3018 else 3019 Reg = AArch64SysReg::parseGenericRegister(RegString->getString()); 3020 if (Reg != -1) { 3021 ReplaceNode(N, CurDAG->getMachineNode( 3022 AArch64::MSR, DL, MVT::Other, 3023 CurDAG->getTargetConstant(Reg, DL, MVT::i32), 3024 N->getOperand(2), N->getOperand(0))); 3025 return true; 3026 } 3027 3028 return false; 3029 } 3030 3031 /// We've got special pseudo-instructions for these 3032 bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) { 3033 unsigned Opcode; 3034 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT(); 3035 3036 // Leave IR for LSE if subtarget supports it. 3037 if (Subtarget->hasLSE()) return false; 3038 3039 if (MemTy == MVT::i8) 3040 Opcode = AArch64::CMP_SWAP_8; 3041 else if (MemTy == MVT::i16) 3042 Opcode = AArch64::CMP_SWAP_16; 3043 else if (MemTy == MVT::i32) 3044 Opcode = AArch64::CMP_SWAP_32; 3045 else if (MemTy == MVT::i64) 3046 Opcode = AArch64::CMP_SWAP_64; 3047 else 3048 llvm_unreachable("Unknown AtomicCmpSwap type"); 3049 3050 MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32; 3051 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3), 3052 N->getOperand(0)}; 3053 SDNode *CmpSwap = CurDAG->getMachineNode( 3054 Opcode, SDLoc(N), 3055 CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops); 3056 3057 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); 3058 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp}); 3059 3060 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0)); 3061 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2)); 3062 CurDAG->RemoveDeadNode(N); 3063 3064 return true; 3065 } 3066 3067 bool AArch64DAGToDAGISel::SelectSVE8BitLslImm(SDValue N, SDValue &Base, 3068 SDValue &Offset) { 3069 auto C = dyn_cast<ConstantSDNode>(N); 3070 if (!C) 3071 return false; 3072 3073 auto Ty = N->getValueType(0); 3074 3075 int64_t Imm = C->getSExtValue(); 3076 SDLoc DL(N); 3077 3078 if ((Imm >= -128) && (Imm <= 127)) { 3079 Base = CurDAG->getTargetConstant(Imm, DL, Ty); 3080 Offset = CurDAG->getTargetConstant(0, DL, Ty); 3081 return true; 3082 } 3083 3084 if (((Imm % 256) == 0) && (Imm >= -32768) && (Imm <= 32512)) { 3085 Base = CurDAG->getTargetConstant(Imm/256, DL, Ty); 3086 Offset = CurDAG->getTargetConstant(8, DL, Ty); 3087 return true; 3088 } 3089 3090 return false; 3091 } 3092 3093 bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift) { 3094 if (auto CNode = dyn_cast<ConstantSDNode>(N)) { 3095 const int64_t ImmVal = CNode->getZExtValue(); 3096 SDLoc DL(N); 3097 3098 switch (VT.SimpleTy) { 3099 case MVT::i8: 3100 if ((ImmVal & 0xFF) == ImmVal) { 3101 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32); 3102 Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32); 3103 return true; 3104 } 3105 break; 3106 case MVT::i16: 3107 case MVT::i32: 3108 case MVT::i64: 3109 if ((ImmVal & 0xFF) == ImmVal) { 3110 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32); 3111 Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32); 3112 return true; 3113 } else if ((ImmVal & 0xFF00) == ImmVal) { 3114 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32); 3115 Imm = CurDAG->getTargetConstant(ImmVal >> 8, DL, MVT::i32); 3116 return true; 3117 } 3118 break; 3119 default: 3120 break; 3121 } 3122 } 3123 3124 return false; 3125 } 3126 3127 bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) { 3128 if (auto CNode = dyn_cast<ConstantSDNode>(N)) { 3129 int64_t ImmVal = CNode->getSExtValue(); 3130 SDLoc DL(N); 3131 if (ImmVal >= -128 && ImmVal < 128) { 3132 Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32); 3133 return true; 3134 } 3135 } 3136 return false; 3137 } 3138 3139 bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) { 3140 if (auto CNode = dyn_cast<ConstantSDNode>(N)) { 3141 uint64_t ImmVal = CNode->getZExtValue(); 3142 3143 switch (VT.SimpleTy) { 3144 case MVT::i8: 3145 ImmVal &= 0xFF; 3146 break; 3147 case MVT::i16: 3148 ImmVal &= 0xFFFF; 3149 break; 3150 case MVT::i32: 3151 ImmVal &= 0xFFFFFFFF; 3152 break; 3153 case MVT::i64: 3154 break; 3155 default: 3156 llvm_unreachable("Unexpected type"); 3157 } 3158 3159 if (ImmVal < 256) { 3160 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32); 3161 return true; 3162 } 3163 } 3164 return false; 3165 } 3166 3167 bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm) { 3168 if (auto CNode = dyn_cast<ConstantSDNode>(N)) { 3169 uint64_t ImmVal = CNode->getZExtValue(); 3170 SDLoc DL(N); 3171 3172 // Shift mask depending on type size. 3173 switch (VT.SimpleTy) { 3174 case MVT::i8: 3175 ImmVal &= 0xFF; 3176 ImmVal |= ImmVal << 8; 3177 ImmVal |= ImmVal << 16; 3178 ImmVal |= ImmVal << 32; 3179 break; 3180 case MVT::i16: 3181 ImmVal &= 0xFFFF; 3182 ImmVal |= ImmVal << 16; 3183 ImmVal |= ImmVal << 32; 3184 break; 3185 case MVT::i32: 3186 ImmVal &= 0xFFFFFFFF; 3187 ImmVal |= ImmVal << 32; 3188 break; 3189 case MVT::i64: 3190 break; 3191 default: 3192 llvm_unreachable("Unexpected type"); 3193 } 3194 3195 uint64_t encoding; 3196 if (AArch64_AM::processLogicalImmediate(ImmVal, 64, encoding)) { 3197 Imm = CurDAG->getTargetConstant(encoding, DL, MVT::i64); 3198 return true; 3199 } 3200 } 3201 return false; 3202 } 3203 3204 // SVE shift intrinsics allow shift amounts larger than the element's bitwidth. 3205 // Rather than attempt to normalise everything we can sometimes saturate the 3206 // shift amount during selection. This function also allows for consistent 3207 // isel patterns by ensuring the resulting "Imm" node is of the i32 type 3208 // required by the instructions. 3209 bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low, 3210 uint64_t High, bool AllowSaturation, 3211 SDValue &Imm) { 3212 if (auto *CN = dyn_cast<ConstantSDNode>(N)) { 3213 uint64_t ImmVal = CN->getZExtValue(); 3214 3215 // Reject shift amounts that are too small. 3216 if (ImmVal < Low) 3217 return false; 3218 3219 // Reject or saturate shift amounts that are too big. 3220 if (ImmVal > High) { 3221 if (!AllowSaturation) 3222 return false; 3223 ImmVal = High; 3224 } 3225 3226 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32); 3227 return true; 3228 } 3229 3230 return false; 3231 } 3232 3233 bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) { 3234 // tagp(FrameIndex, IRGstack, tag_offset): 3235 // since the offset between FrameIndex and IRGstack is a compile-time 3236 // constant, this can be lowered to a single ADDG instruction. 3237 if (!(isa<FrameIndexSDNode>(N->getOperand(1)))) { 3238 return false; 3239 } 3240 3241 SDValue IRG_SP = N->getOperand(2); 3242 if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN || 3243 cast<ConstantSDNode>(IRG_SP->getOperand(1))->getZExtValue() != 3244 Intrinsic::aarch64_irg_sp) { 3245 return false; 3246 } 3247 3248 const TargetLowering *TLI = getTargetLowering(); 3249 SDLoc DL(N); 3250 int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex(); 3251 SDValue FiOp = CurDAG->getTargetFrameIndex( 3252 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 3253 int TagOffset = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(); 3254 3255 SDNode *Out = CurDAG->getMachineNode( 3256 AArch64::TAGPstack, DL, MVT::i64, 3257 {FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2), 3258 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)}); 3259 ReplaceNode(N, Out); 3260 return true; 3261 } 3262 3263 void AArch64DAGToDAGISel::SelectTagP(SDNode *N) { 3264 assert(isa<ConstantSDNode>(N->getOperand(3)) && 3265 "llvm.aarch64.tagp third argument must be an immediate"); 3266 if (trySelectStackSlotTagP(N)) 3267 return; 3268 // FIXME: above applies in any case when offset between Op1 and Op2 is a 3269 // compile-time constant, not just for stack allocations. 3270 3271 // General case for unrelated pointers in Op1 and Op2. 3272 SDLoc DL(N); 3273 int TagOffset = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(); 3274 SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64, 3275 {N->getOperand(1), N->getOperand(2)}); 3276 SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64, 3277 {SDValue(N1, 0), N->getOperand(2)}); 3278 SDNode *N3 = CurDAG->getMachineNode( 3279 AArch64::ADDG, DL, MVT::i64, 3280 {SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64), 3281 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)}); 3282 ReplaceNode(N, N3); 3283 } 3284 3285 // NOTE: We cannot use EXTRACT_SUBREG in all cases because the fixed length 3286 // vector types larger than NEON don't have a matching SubRegIndex. 3287 static SDNode *extractSubReg(SelectionDAG *DAG, EVT VT, SDValue V) { 3288 assert(V.getValueType().isScalableVector() && 3289 V.getValueType().getSizeInBits().getKnownMinSize() == 3290 AArch64::SVEBitsPerBlock && 3291 "Expected to extract from a packed scalable vector!"); 3292 assert(VT.isFixedLengthVector() && 3293 "Expected to extract a fixed length vector!"); 3294 3295 SDLoc DL(V); 3296 switch (VT.getSizeInBits()) { 3297 case 64: { 3298 auto SubReg = DAG->getTargetConstant(AArch64::dsub, DL, MVT::i32); 3299 return DAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT, V, SubReg); 3300 } 3301 case 128: { 3302 auto SubReg = DAG->getTargetConstant(AArch64::zsub, DL, MVT::i32); 3303 return DAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT, V, SubReg); 3304 } 3305 default: { 3306 auto RC = DAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64); 3307 return DAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC); 3308 } 3309 } 3310 } 3311 3312 // NOTE: We cannot use INSERT_SUBREG in all cases because the fixed length 3313 // vector types larger than NEON don't have a matching SubRegIndex. 3314 static SDNode *insertSubReg(SelectionDAG *DAG, EVT VT, SDValue V) { 3315 assert(VT.isScalableVector() && 3316 VT.getSizeInBits().getKnownMinSize() == AArch64::SVEBitsPerBlock && 3317 "Expected to insert into a packed scalable vector!"); 3318 assert(V.getValueType().isFixedLengthVector() && 3319 "Expected to insert a fixed length vector!"); 3320 3321 SDLoc DL(V); 3322 switch (V.getValueType().getSizeInBits()) { 3323 case 64: { 3324 auto SubReg = DAG->getTargetConstant(AArch64::dsub, DL, MVT::i32); 3325 auto Container = DAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT); 3326 return DAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, VT, 3327 SDValue(Container, 0), V, SubReg); 3328 } 3329 case 128: { 3330 auto SubReg = DAG->getTargetConstant(AArch64::zsub, DL, MVT::i32); 3331 auto Container = DAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT); 3332 return DAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, VT, 3333 SDValue(Container, 0), V, SubReg); 3334 } 3335 default: { 3336 auto RC = DAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64); 3337 return DAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC); 3338 } 3339 } 3340 } 3341 3342 void AArch64DAGToDAGISel::Select(SDNode *Node) { 3343 // If we have a custom node, we already have selected! 3344 if (Node->isMachineOpcode()) { 3345 LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n"); 3346 Node->setNodeId(-1); 3347 return; 3348 } 3349 3350 // Few custom selection stuff. 3351 EVT VT = Node->getValueType(0); 3352 3353 switch (Node->getOpcode()) { 3354 default: 3355 break; 3356 3357 case ISD::ATOMIC_CMP_SWAP: 3358 if (SelectCMP_SWAP(Node)) 3359 return; 3360 break; 3361 3362 case ISD::READ_REGISTER: 3363 if (tryReadRegister(Node)) 3364 return; 3365 break; 3366 3367 case ISD::WRITE_REGISTER: 3368 if (tryWriteRegister(Node)) 3369 return; 3370 break; 3371 3372 case ISD::ADD: 3373 if (tryMLAV64LaneV128(Node)) 3374 return; 3375 break; 3376 3377 case ISD::LOAD: { 3378 // Try to select as an indexed load. Fall through to normal processing 3379 // if we can't. 3380 if (tryIndexedLoad(Node)) 3381 return; 3382 break; 3383 } 3384 3385 case ISD::SRL: 3386 case ISD::AND: 3387 case ISD::SRA: 3388 case ISD::SIGN_EXTEND_INREG: 3389 if (tryBitfieldExtractOp(Node)) 3390 return; 3391 if (tryBitfieldInsertInZeroOp(Node)) 3392 return; 3393 LLVM_FALLTHROUGH; 3394 case ISD::ROTR: 3395 case ISD::SHL: 3396 if (tryShiftAmountMod(Node)) 3397 return; 3398 break; 3399 3400 case ISD::SIGN_EXTEND: 3401 if (tryBitfieldExtractOpFromSExt(Node)) 3402 return; 3403 break; 3404 3405 case ISD::FP_EXTEND: 3406 if (tryHighFPExt(Node)) 3407 return; 3408 break; 3409 3410 case ISD::OR: 3411 if (tryBitfieldInsertOp(Node)) 3412 return; 3413 break; 3414 3415 case ISD::EXTRACT_SUBVECTOR: { 3416 // Bail when not a "cast" like extract_subvector. 3417 if (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue() != 0) 3418 break; 3419 3420 // Bail when normal isel can do the job. 3421 EVT InVT = Node->getOperand(0).getValueType(); 3422 if (VT.isScalableVector() || InVT.isFixedLengthVector()) 3423 break; 3424 3425 // NOTE: We can only get here when doing fixed length SVE code generation. 3426 // We do manual selection because the types involved are not linked to real 3427 // registers (despite being legal) and must be coerced into SVE registers. 3428 // 3429 // NOTE: If the above changes, be aware that selection will still not work 3430 // because the td definition of extract_vector does not support extracting 3431 // a fixed length vector from a scalable vector. 3432 3433 ReplaceNode(Node, extractSubReg(CurDAG, VT, Node->getOperand(0))); 3434 return; 3435 } 3436 3437 case ISD::INSERT_SUBVECTOR: { 3438 // Bail when not a "cast" like insert_subvector. 3439 if (cast<ConstantSDNode>(Node->getOperand(2))->getZExtValue() != 0) 3440 break; 3441 if (!Node->getOperand(0).isUndef()) 3442 break; 3443 3444 // Bail when normal isel should do the job. 3445 EVT InVT = Node->getOperand(1).getValueType(); 3446 if (VT.isFixedLengthVector() || InVT.isScalableVector()) 3447 break; 3448 3449 // NOTE: We can only get here when doing fixed length SVE code generation. 3450 // We do manual selection because the types involved are not linked to real 3451 // registers (despite being legal) and must be coerced into SVE registers. 3452 // 3453 // NOTE: If the above changes, be aware that selection will still not work 3454 // because the td definition of insert_vector does not support inserting a 3455 // fixed length vector into a scalable vector. 3456 3457 ReplaceNode(Node, insertSubReg(CurDAG, VT, Node->getOperand(1))); 3458 return; 3459 } 3460 3461 case ISD::Constant: { 3462 // Materialize zero constants as copies from WZR/XZR. This allows 3463 // the coalescer to propagate these into other instructions. 3464 ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node); 3465 if (ConstNode->isNullValue()) { 3466 if (VT == MVT::i32) { 3467 SDValue New = CurDAG->getCopyFromReg( 3468 CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32); 3469 ReplaceNode(Node, New.getNode()); 3470 return; 3471 } else if (VT == MVT::i64) { 3472 SDValue New = CurDAG->getCopyFromReg( 3473 CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64); 3474 ReplaceNode(Node, New.getNode()); 3475 return; 3476 } 3477 } 3478 break; 3479 } 3480 3481 case ISD::FrameIndex: { 3482 // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm. 3483 int FI = cast<FrameIndexSDNode>(Node)->getIndex(); 3484 unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0); 3485 const TargetLowering *TLI = getTargetLowering(); 3486 SDValue TFI = CurDAG->getTargetFrameIndex( 3487 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 3488 SDLoc DL(Node); 3489 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32), 3490 CurDAG->getTargetConstant(Shifter, DL, MVT::i32) }; 3491 CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops); 3492 return; 3493 } 3494 case ISD::INTRINSIC_W_CHAIN: { 3495 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); 3496 switch (IntNo) { 3497 default: 3498 break; 3499 case Intrinsic::aarch64_ldaxp: 3500 case Intrinsic::aarch64_ldxp: { 3501 unsigned Op = 3502 IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX; 3503 SDValue MemAddr = Node->getOperand(2); 3504 SDLoc DL(Node); 3505 SDValue Chain = Node->getOperand(0); 3506 3507 SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64, 3508 MVT::Other, MemAddr, Chain); 3509 3510 // Transfer memoperands. 3511 MachineMemOperand *MemOp = 3512 cast<MemIntrinsicSDNode>(Node)->getMemOperand(); 3513 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp}); 3514 ReplaceNode(Node, Ld); 3515 return; 3516 } 3517 case Intrinsic::aarch64_stlxp: 3518 case Intrinsic::aarch64_stxp: { 3519 unsigned Op = 3520 IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX; 3521 SDLoc DL(Node); 3522 SDValue Chain = Node->getOperand(0); 3523 SDValue ValLo = Node->getOperand(2); 3524 SDValue ValHi = Node->getOperand(3); 3525 SDValue MemAddr = Node->getOperand(4); 3526 3527 // Place arguments in the right order. 3528 SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain}; 3529 3530 SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops); 3531 // Transfer memoperands. 3532 MachineMemOperand *MemOp = 3533 cast<MemIntrinsicSDNode>(Node)->getMemOperand(); 3534 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 3535 3536 ReplaceNode(Node, St); 3537 return; 3538 } 3539 case Intrinsic::aarch64_neon_ld1x2: 3540 if (VT == MVT::v8i8) { 3541 SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0); 3542 return; 3543 } else if (VT == MVT::v16i8) { 3544 SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0); 3545 return; 3546 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 3547 SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0); 3548 return; 3549 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 3550 SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0); 3551 return; 3552 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3553 SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0); 3554 return; 3555 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3556 SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0); 3557 return; 3558 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3559 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0); 3560 return; 3561 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3562 SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0); 3563 return; 3564 } 3565 break; 3566 case Intrinsic::aarch64_neon_ld1x3: 3567 if (VT == MVT::v8i8) { 3568 SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0); 3569 return; 3570 } else if (VT == MVT::v16i8) { 3571 SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0); 3572 return; 3573 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 3574 SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0); 3575 return; 3576 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 3577 SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0); 3578 return; 3579 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3580 SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0); 3581 return; 3582 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3583 SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0); 3584 return; 3585 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3586 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0); 3587 return; 3588 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3589 SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0); 3590 return; 3591 } 3592 break; 3593 case Intrinsic::aarch64_neon_ld1x4: 3594 if (VT == MVT::v8i8) { 3595 SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0); 3596 return; 3597 } else if (VT == MVT::v16i8) { 3598 SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0); 3599 return; 3600 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 3601 SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0); 3602 return; 3603 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 3604 SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0); 3605 return; 3606 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3607 SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0); 3608 return; 3609 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3610 SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0); 3611 return; 3612 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3613 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0); 3614 return; 3615 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3616 SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0); 3617 return; 3618 } 3619 break; 3620 case Intrinsic::aarch64_neon_ld2: 3621 if (VT == MVT::v8i8) { 3622 SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0); 3623 return; 3624 } else if (VT == MVT::v16i8) { 3625 SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0); 3626 return; 3627 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 3628 SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0); 3629 return; 3630 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 3631 SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0); 3632 return; 3633 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3634 SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0); 3635 return; 3636 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3637 SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0); 3638 return; 3639 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3640 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0); 3641 return; 3642 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3643 SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0); 3644 return; 3645 } 3646 break; 3647 case Intrinsic::aarch64_neon_ld3: 3648 if (VT == MVT::v8i8) { 3649 SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0); 3650 return; 3651 } else if (VT == MVT::v16i8) { 3652 SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0); 3653 return; 3654 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 3655 SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0); 3656 return; 3657 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 3658 SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0); 3659 return; 3660 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3661 SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0); 3662 return; 3663 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3664 SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0); 3665 return; 3666 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3667 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0); 3668 return; 3669 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3670 SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0); 3671 return; 3672 } 3673 break; 3674 case Intrinsic::aarch64_neon_ld4: 3675 if (VT == MVT::v8i8) { 3676 SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0); 3677 return; 3678 } else if (VT == MVT::v16i8) { 3679 SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0); 3680 return; 3681 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 3682 SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0); 3683 return; 3684 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 3685 SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0); 3686 return; 3687 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3688 SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0); 3689 return; 3690 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3691 SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0); 3692 return; 3693 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3694 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0); 3695 return; 3696 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3697 SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0); 3698 return; 3699 } 3700 break; 3701 case Intrinsic::aarch64_neon_ld2r: 3702 if (VT == MVT::v8i8) { 3703 SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0); 3704 return; 3705 } else if (VT == MVT::v16i8) { 3706 SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0); 3707 return; 3708 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 3709 SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0); 3710 return; 3711 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 3712 SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0); 3713 return; 3714 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3715 SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0); 3716 return; 3717 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3718 SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0); 3719 return; 3720 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3721 SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0); 3722 return; 3723 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3724 SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0); 3725 return; 3726 } 3727 break; 3728 case Intrinsic::aarch64_neon_ld3r: 3729 if (VT == MVT::v8i8) { 3730 SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0); 3731 return; 3732 } else if (VT == MVT::v16i8) { 3733 SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0); 3734 return; 3735 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 3736 SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0); 3737 return; 3738 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 3739 SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0); 3740 return; 3741 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3742 SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0); 3743 return; 3744 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3745 SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0); 3746 return; 3747 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3748 SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0); 3749 return; 3750 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3751 SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0); 3752 return; 3753 } 3754 break; 3755 case Intrinsic::aarch64_neon_ld4r: 3756 if (VT == MVT::v8i8) { 3757 SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0); 3758 return; 3759 } else if (VT == MVT::v16i8) { 3760 SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0); 3761 return; 3762 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 3763 SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0); 3764 return; 3765 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 3766 SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0); 3767 return; 3768 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3769 SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0); 3770 return; 3771 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3772 SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0); 3773 return; 3774 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3775 SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0); 3776 return; 3777 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3778 SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0); 3779 return; 3780 } 3781 break; 3782 case Intrinsic::aarch64_neon_ld2lane: 3783 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 3784 SelectLoadLane(Node, 2, AArch64::LD2i8); 3785 return; 3786 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 3787 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 3788 SelectLoadLane(Node, 2, AArch64::LD2i16); 3789 return; 3790 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 3791 VT == MVT::v2f32) { 3792 SelectLoadLane(Node, 2, AArch64::LD2i32); 3793 return; 3794 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 3795 VT == MVT::v1f64) { 3796 SelectLoadLane(Node, 2, AArch64::LD2i64); 3797 return; 3798 } 3799 break; 3800 case Intrinsic::aarch64_neon_ld3lane: 3801 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 3802 SelectLoadLane(Node, 3, AArch64::LD3i8); 3803 return; 3804 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 3805 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 3806 SelectLoadLane(Node, 3, AArch64::LD3i16); 3807 return; 3808 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 3809 VT == MVT::v2f32) { 3810 SelectLoadLane(Node, 3, AArch64::LD3i32); 3811 return; 3812 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 3813 VT == MVT::v1f64) { 3814 SelectLoadLane(Node, 3, AArch64::LD3i64); 3815 return; 3816 } 3817 break; 3818 case Intrinsic::aarch64_neon_ld4lane: 3819 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 3820 SelectLoadLane(Node, 4, AArch64::LD4i8); 3821 return; 3822 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 3823 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 3824 SelectLoadLane(Node, 4, AArch64::LD4i16); 3825 return; 3826 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 3827 VT == MVT::v2f32) { 3828 SelectLoadLane(Node, 4, AArch64::LD4i32); 3829 return; 3830 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 3831 VT == MVT::v1f64) { 3832 SelectLoadLane(Node, 4, AArch64::LD4i64); 3833 return; 3834 } 3835 break; 3836 case Intrinsic::aarch64_ld64b: 3837 SelectLoad(Node, 8, AArch64::LD64B, AArch64::x8sub_0); 3838 return; 3839 } 3840 } break; 3841 case ISD::INTRINSIC_WO_CHAIN: { 3842 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue(); 3843 switch (IntNo) { 3844 default: 3845 break; 3846 case Intrinsic::aarch64_tagp: 3847 SelectTagP(Node); 3848 return; 3849 case Intrinsic::aarch64_neon_tbl2: 3850 SelectTable(Node, 2, 3851 VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two, 3852 false); 3853 return; 3854 case Intrinsic::aarch64_neon_tbl3: 3855 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three 3856 : AArch64::TBLv16i8Three, 3857 false); 3858 return; 3859 case Intrinsic::aarch64_neon_tbl4: 3860 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four 3861 : AArch64::TBLv16i8Four, 3862 false); 3863 return; 3864 case Intrinsic::aarch64_neon_tbx2: 3865 SelectTable(Node, 2, 3866 VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two, 3867 true); 3868 return; 3869 case Intrinsic::aarch64_neon_tbx3: 3870 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three 3871 : AArch64::TBXv16i8Three, 3872 true); 3873 return; 3874 case Intrinsic::aarch64_neon_tbx4: 3875 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four 3876 : AArch64::TBXv16i8Four, 3877 true); 3878 return; 3879 case Intrinsic::aarch64_neon_smull: 3880 case Intrinsic::aarch64_neon_umull: 3881 if (tryMULLV64LaneV128(IntNo, Node)) 3882 return; 3883 break; 3884 } 3885 break; 3886 } 3887 case ISD::INTRINSIC_VOID: { 3888 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); 3889 if (Node->getNumOperands() >= 3) 3890 VT = Node->getOperand(2)->getValueType(0); 3891 switch (IntNo) { 3892 default: 3893 break; 3894 case Intrinsic::aarch64_neon_st1x2: { 3895 if (VT == MVT::v8i8) { 3896 SelectStore(Node, 2, AArch64::ST1Twov8b); 3897 return; 3898 } else if (VT == MVT::v16i8) { 3899 SelectStore(Node, 2, AArch64::ST1Twov16b); 3900 return; 3901 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 3902 VT == MVT::v4bf16) { 3903 SelectStore(Node, 2, AArch64::ST1Twov4h); 3904 return; 3905 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 3906 VT == MVT::v8bf16) { 3907 SelectStore(Node, 2, AArch64::ST1Twov8h); 3908 return; 3909 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3910 SelectStore(Node, 2, AArch64::ST1Twov2s); 3911 return; 3912 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3913 SelectStore(Node, 2, AArch64::ST1Twov4s); 3914 return; 3915 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3916 SelectStore(Node, 2, AArch64::ST1Twov2d); 3917 return; 3918 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3919 SelectStore(Node, 2, AArch64::ST1Twov1d); 3920 return; 3921 } 3922 break; 3923 } 3924 case Intrinsic::aarch64_neon_st1x3: { 3925 if (VT == MVT::v8i8) { 3926 SelectStore(Node, 3, AArch64::ST1Threev8b); 3927 return; 3928 } else if (VT == MVT::v16i8) { 3929 SelectStore(Node, 3, AArch64::ST1Threev16b); 3930 return; 3931 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 3932 VT == MVT::v4bf16) { 3933 SelectStore(Node, 3, AArch64::ST1Threev4h); 3934 return; 3935 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 3936 VT == MVT::v8bf16) { 3937 SelectStore(Node, 3, AArch64::ST1Threev8h); 3938 return; 3939 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3940 SelectStore(Node, 3, AArch64::ST1Threev2s); 3941 return; 3942 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3943 SelectStore(Node, 3, AArch64::ST1Threev4s); 3944 return; 3945 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3946 SelectStore(Node, 3, AArch64::ST1Threev2d); 3947 return; 3948 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3949 SelectStore(Node, 3, AArch64::ST1Threev1d); 3950 return; 3951 } 3952 break; 3953 } 3954 case Intrinsic::aarch64_neon_st1x4: { 3955 if (VT == MVT::v8i8) { 3956 SelectStore(Node, 4, AArch64::ST1Fourv8b); 3957 return; 3958 } else if (VT == MVT::v16i8) { 3959 SelectStore(Node, 4, AArch64::ST1Fourv16b); 3960 return; 3961 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 3962 VT == MVT::v4bf16) { 3963 SelectStore(Node, 4, AArch64::ST1Fourv4h); 3964 return; 3965 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 3966 VT == MVT::v8bf16) { 3967 SelectStore(Node, 4, AArch64::ST1Fourv8h); 3968 return; 3969 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3970 SelectStore(Node, 4, AArch64::ST1Fourv2s); 3971 return; 3972 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3973 SelectStore(Node, 4, AArch64::ST1Fourv4s); 3974 return; 3975 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3976 SelectStore(Node, 4, AArch64::ST1Fourv2d); 3977 return; 3978 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3979 SelectStore(Node, 4, AArch64::ST1Fourv1d); 3980 return; 3981 } 3982 break; 3983 } 3984 case Intrinsic::aarch64_neon_st2: { 3985 if (VT == MVT::v8i8) { 3986 SelectStore(Node, 2, AArch64::ST2Twov8b); 3987 return; 3988 } else if (VT == MVT::v16i8) { 3989 SelectStore(Node, 2, AArch64::ST2Twov16b); 3990 return; 3991 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 3992 VT == MVT::v4bf16) { 3993 SelectStore(Node, 2, AArch64::ST2Twov4h); 3994 return; 3995 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 3996 VT == MVT::v8bf16) { 3997 SelectStore(Node, 2, AArch64::ST2Twov8h); 3998 return; 3999 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4000 SelectStore(Node, 2, AArch64::ST2Twov2s); 4001 return; 4002 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4003 SelectStore(Node, 2, AArch64::ST2Twov4s); 4004 return; 4005 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4006 SelectStore(Node, 2, AArch64::ST2Twov2d); 4007 return; 4008 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4009 SelectStore(Node, 2, AArch64::ST1Twov1d); 4010 return; 4011 } 4012 break; 4013 } 4014 case Intrinsic::aarch64_neon_st3: { 4015 if (VT == MVT::v8i8) { 4016 SelectStore(Node, 3, AArch64::ST3Threev8b); 4017 return; 4018 } else if (VT == MVT::v16i8) { 4019 SelectStore(Node, 3, AArch64::ST3Threev16b); 4020 return; 4021 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 4022 VT == MVT::v4bf16) { 4023 SelectStore(Node, 3, AArch64::ST3Threev4h); 4024 return; 4025 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 4026 VT == MVT::v8bf16) { 4027 SelectStore(Node, 3, AArch64::ST3Threev8h); 4028 return; 4029 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4030 SelectStore(Node, 3, AArch64::ST3Threev2s); 4031 return; 4032 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4033 SelectStore(Node, 3, AArch64::ST3Threev4s); 4034 return; 4035 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4036 SelectStore(Node, 3, AArch64::ST3Threev2d); 4037 return; 4038 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4039 SelectStore(Node, 3, AArch64::ST1Threev1d); 4040 return; 4041 } 4042 break; 4043 } 4044 case Intrinsic::aarch64_neon_st4: { 4045 if (VT == MVT::v8i8) { 4046 SelectStore(Node, 4, AArch64::ST4Fourv8b); 4047 return; 4048 } else if (VT == MVT::v16i8) { 4049 SelectStore(Node, 4, AArch64::ST4Fourv16b); 4050 return; 4051 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 4052 VT == MVT::v4bf16) { 4053 SelectStore(Node, 4, AArch64::ST4Fourv4h); 4054 return; 4055 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 4056 VT == MVT::v8bf16) { 4057 SelectStore(Node, 4, AArch64::ST4Fourv8h); 4058 return; 4059 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4060 SelectStore(Node, 4, AArch64::ST4Fourv2s); 4061 return; 4062 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4063 SelectStore(Node, 4, AArch64::ST4Fourv4s); 4064 return; 4065 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4066 SelectStore(Node, 4, AArch64::ST4Fourv2d); 4067 return; 4068 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4069 SelectStore(Node, 4, AArch64::ST1Fourv1d); 4070 return; 4071 } 4072 break; 4073 } 4074 case Intrinsic::aarch64_neon_st2lane: { 4075 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4076 SelectStoreLane(Node, 2, AArch64::ST2i8); 4077 return; 4078 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4079 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4080 SelectStoreLane(Node, 2, AArch64::ST2i16); 4081 return; 4082 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4083 VT == MVT::v2f32) { 4084 SelectStoreLane(Node, 2, AArch64::ST2i32); 4085 return; 4086 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4087 VT == MVT::v1f64) { 4088 SelectStoreLane(Node, 2, AArch64::ST2i64); 4089 return; 4090 } 4091 break; 4092 } 4093 case Intrinsic::aarch64_neon_st3lane: { 4094 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4095 SelectStoreLane(Node, 3, AArch64::ST3i8); 4096 return; 4097 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4098 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4099 SelectStoreLane(Node, 3, AArch64::ST3i16); 4100 return; 4101 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4102 VT == MVT::v2f32) { 4103 SelectStoreLane(Node, 3, AArch64::ST3i32); 4104 return; 4105 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4106 VT == MVT::v1f64) { 4107 SelectStoreLane(Node, 3, AArch64::ST3i64); 4108 return; 4109 } 4110 break; 4111 } 4112 case Intrinsic::aarch64_neon_st4lane: { 4113 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4114 SelectStoreLane(Node, 4, AArch64::ST4i8); 4115 return; 4116 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4117 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4118 SelectStoreLane(Node, 4, AArch64::ST4i16); 4119 return; 4120 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4121 VT == MVT::v2f32) { 4122 SelectStoreLane(Node, 4, AArch64::ST4i32); 4123 return; 4124 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4125 VT == MVT::v1f64) { 4126 SelectStoreLane(Node, 4, AArch64::ST4i64); 4127 return; 4128 } 4129 break; 4130 } 4131 case Intrinsic::aarch64_sve_st2: { 4132 if (VT == MVT::nxv16i8) { 4133 SelectPredicatedStore(Node, 2, 0, AArch64::ST2B, AArch64::ST2B_IMM); 4134 return; 4135 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 4136 (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) { 4137 SelectPredicatedStore(Node, 2, 1, AArch64::ST2H, AArch64::ST2H_IMM); 4138 return; 4139 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 4140 SelectPredicatedStore(Node, 2, 2, AArch64::ST2W, AArch64::ST2W_IMM); 4141 return; 4142 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 4143 SelectPredicatedStore(Node, 2, 3, AArch64::ST2D, AArch64::ST2D_IMM); 4144 return; 4145 } 4146 break; 4147 } 4148 case Intrinsic::aarch64_sve_st3: { 4149 if (VT == MVT::nxv16i8) { 4150 SelectPredicatedStore(Node, 3, 0, AArch64::ST3B, AArch64::ST3B_IMM); 4151 return; 4152 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 4153 (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) { 4154 SelectPredicatedStore(Node, 3, 1, AArch64::ST3H, AArch64::ST3H_IMM); 4155 return; 4156 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 4157 SelectPredicatedStore(Node, 3, 2, AArch64::ST3W, AArch64::ST3W_IMM); 4158 return; 4159 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 4160 SelectPredicatedStore(Node, 3, 3, AArch64::ST3D, AArch64::ST3D_IMM); 4161 return; 4162 } 4163 break; 4164 } 4165 case Intrinsic::aarch64_sve_st4: { 4166 if (VT == MVT::nxv16i8) { 4167 SelectPredicatedStore(Node, 4, 0, AArch64::ST4B, AArch64::ST4B_IMM); 4168 return; 4169 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 4170 (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) { 4171 SelectPredicatedStore(Node, 4, 1, AArch64::ST4H, AArch64::ST4H_IMM); 4172 return; 4173 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 4174 SelectPredicatedStore(Node, 4, 2, AArch64::ST4W, AArch64::ST4W_IMM); 4175 return; 4176 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 4177 SelectPredicatedStore(Node, 4, 3, AArch64::ST4D, AArch64::ST4D_IMM); 4178 return; 4179 } 4180 break; 4181 } 4182 } 4183 break; 4184 } 4185 case AArch64ISD::LD2post: { 4186 if (VT == MVT::v8i8) { 4187 SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0); 4188 return; 4189 } else if (VT == MVT::v16i8) { 4190 SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0); 4191 return; 4192 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4193 SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0); 4194 return; 4195 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4196 SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0); 4197 return; 4198 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4199 SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0); 4200 return; 4201 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4202 SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0); 4203 return; 4204 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4205 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0); 4206 return; 4207 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4208 SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0); 4209 return; 4210 } 4211 break; 4212 } 4213 case AArch64ISD::LD3post: { 4214 if (VT == MVT::v8i8) { 4215 SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0); 4216 return; 4217 } else if (VT == MVT::v16i8) { 4218 SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0); 4219 return; 4220 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4221 SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0); 4222 return; 4223 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4224 SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0); 4225 return; 4226 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4227 SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0); 4228 return; 4229 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4230 SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0); 4231 return; 4232 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4233 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0); 4234 return; 4235 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4236 SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0); 4237 return; 4238 } 4239 break; 4240 } 4241 case AArch64ISD::LD4post: { 4242 if (VT == MVT::v8i8) { 4243 SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0); 4244 return; 4245 } else if (VT == MVT::v16i8) { 4246 SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0); 4247 return; 4248 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4249 SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0); 4250 return; 4251 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4252 SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0); 4253 return; 4254 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4255 SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0); 4256 return; 4257 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4258 SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0); 4259 return; 4260 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4261 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0); 4262 return; 4263 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4264 SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0); 4265 return; 4266 } 4267 break; 4268 } 4269 case AArch64ISD::LD1x2post: { 4270 if (VT == MVT::v8i8) { 4271 SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0); 4272 return; 4273 } else if (VT == MVT::v16i8) { 4274 SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0); 4275 return; 4276 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4277 SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0); 4278 return; 4279 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4280 SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0); 4281 return; 4282 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4283 SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0); 4284 return; 4285 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4286 SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0); 4287 return; 4288 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4289 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0); 4290 return; 4291 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4292 SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0); 4293 return; 4294 } 4295 break; 4296 } 4297 case AArch64ISD::LD1x3post: { 4298 if (VT == MVT::v8i8) { 4299 SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0); 4300 return; 4301 } else if (VT == MVT::v16i8) { 4302 SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0); 4303 return; 4304 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4305 SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0); 4306 return; 4307 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4308 SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0); 4309 return; 4310 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4311 SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0); 4312 return; 4313 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4314 SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0); 4315 return; 4316 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4317 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0); 4318 return; 4319 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4320 SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0); 4321 return; 4322 } 4323 break; 4324 } 4325 case AArch64ISD::LD1x4post: { 4326 if (VT == MVT::v8i8) { 4327 SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0); 4328 return; 4329 } else if (VT == MVT::v16i8) { 4330 SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0); 4331 return; 4332 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4333 SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0); 4334 return; 4335 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4336 SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0); 4337 return; 4338 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4339 SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0); 4340 return; 4341 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4342 SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0); 4343 return; 4344 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4345 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0); 4346 return; 4347 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4348 SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0); 4349 return; 4350 } 4351 break; 4352 } 4353 case AArch64ISD::LD1DUPpost: { 4354 if (VT == MVT::v8i8) { 4355 SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0); 4356 return; 4357 } else if (VT == MVT::v16i8) { 4358 SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0); 4359 return; 4360 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4361 SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0); 4362 return; 4363 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4364 SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0); 4365 return; 4366 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4367 SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0); 4368 return; 4369 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4370 SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0); 4371 return; 4372 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4373 SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0); 4374 return; 4375 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4376 SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0); 4377 return; 4378 } 4379 break; 4380 } 4381 case AArch64ISD::LD2DUPpost: { 4382 if (VT == MVT::v8i8) { 4383 SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0); 4384 return; 4385 } else if (VT == MVT::v16i8) { 4386 SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0); 4387 return; 4388 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4389 SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0); 4390 return; 4391 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4392 SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0); 4393 return; 4394 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4395 SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0); 4396 return; 4397 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4398 SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0); 4399 return; 4400 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4401 SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0); 4402 return; 4403 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4404 SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0); 4405 return; 4406 } 4407 break; 4408 } 4409 case AArch64ISD::LD3DUPpost: { 4410 if (VT == MVT::v8i8) { 4411 SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0); 4412 return; 4413 } else if (VT == MVT::v16i8) { 4414 SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0); 4415 return; 4416 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4417 SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0); 4418 return; 4419 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4420 SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0); 4421 return; 4422 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4423 SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0); 4424 return; 4425 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4426 SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0); 4427 return; 4428 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4429 SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0); 4430 return; 4431 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4432 SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0); 4433 return; 4434 } 4435 break; 4436 } 4437 case AArch64ISD::LD4DUPpost: { 4438 if (VT == MVT::v8i8) { 4439 SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0); 4440 return; 4441 } else if (VT == MVT::v16i8) { 4442 SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0); 4443 return; 4444 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4445 SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0); 4446 return; 4447 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4448 SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0); 4449 return; 4450 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4451 SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0); 4452 return; 4453 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4454 SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0); 4455 return; 4456 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4457 SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0); 4458 return; 4459 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4460 SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0); 4461 return; 4462 } 4463 break; 4464 } 4465 case AArch64ISD::LD1LANEpost: { 4466 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4467 SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST); 4468 return; 4469 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4470 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4471 SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST); 4472 return; 4473 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4474 VT == MVT::v2f32) { 4475 SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST); 4476 return; 4477 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4478 VT == MVT::v1f64) { 4479 SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST); 4480 return; 4481 } 4482 break; 4483 } 4484 case AArch64ISD::LD2LANEpost: { 4485 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4486 SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST); 4487 return; 4488 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4489 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4490 SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST); 4491 return; 4492 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4493 VT == MVT::v2f32) { 4494 SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST); 4495 return; 4496 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4497 VT == MVT::v1f64) { 4498 SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST); 4499 return; 4500 } 4501 break; 4502 } 4503 case AArch64ISD::LD3LANEpost: { 4504 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4505 SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST); 4506 return; 4507 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4508 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4509 SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST); 4510 return; 4511 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4512 VT == MVT::v2f32) { 4513 SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST); 4514 return; 4515 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4516 VT == MVT::v1f64) { 4517 SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST); 4518 return; 4519 } 4520 break; 4521 } 4522 case AArch64ISD::LD4LANEpost: { 4523 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4524 SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST); 4525 return; 4526 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4527 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4528 SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST); 4529 return; 4530 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4531 VT == MVT::v2f32) { 4532 SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST); 4533 return; 4534 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4535 VT == MVT::v1f64) { 4536 SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST); 4537 return; 4538 } 4539 break; 4540 } 4541 case AArch64ISD::ST2post: { 4542 VT = Node->getOperand(1).getValueType(); 4543 if (VT == MVT::v8i8) { 4544 SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST); 4545 return; 4546 } else if (VT == MVT::v16i8) { 4547 SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST); 4548 return; 4549 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4550 SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST); 4551 return; 4552 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4553 SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST); 4554 return; 4555 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4556 SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST); 4557 return; 4558 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4559 SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST); 4560 return; 4561 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4562 SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST); 4563 return; 4564 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4565 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST); 4566 return; 4567 } 4568 break; 4569 } 4570 case AArch64ISD::ST3post: { 4571 VT = Node->getOperand(1).getValueType(); 4572 if (VT == MVT::v8i8) { 4573 SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST); 4574 return; 4575 } else if (VT == MVT::v16i8) { 4576 SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST); 4577 return; 4578 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4579 SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST); 4580 return; 4581 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4582 SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST); 4583 return; 4584 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4585 SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST); 4586 return; 4587 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4588 SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST); 4589 return; 4590 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4591 SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST); 4592 return; 4593 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4594 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST); 4595 return; 4596 } 4597 break; 4598 } 4599 case AArch64ISD::ST4post: { 4600 VT = Node->getOperand(1).getValueType(); 4601 if (VT == MVT::v8i8) { 4602 SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST); 4603 return; 4604 } else if (VT == MVT::v16i8) { 4605 SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST); 4606 return; 4607 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4608 SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST); 4609 return; 4610 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4611 SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST); 4612 return; 4613 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4614 SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST); 4615 return; 4616 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4617 SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST); 4618 return; 4619 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4620 SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST); 4621 return; 4622 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4623 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST); 4624 return; 4625 } 4626 break; 4627 } 4628 case AArch64ISD::ST1x2post: { 4629 VT = Node->getOperand(1).getValueType(); 4630 if (VT == MVT::v8i8) { 4631 SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST); 4632 return; 4633 } else if (VT == MVT::v16i8) { 4634 SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST); 4635 return; 4636 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4637 SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST); 4638 return; 4639 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4640 SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST); 4641 return; 4642 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4643 SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST); 4644 return; 4645 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4646 SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST); 4647 return; 4648 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4649 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST); 4650 return; 4651 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4652 SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST); 4653 return; 4654 } 4655 break; 4656 } 4657 case AArch64ISD::ST1x3post: { 4658 VT = Node->getOperand(1).getValueType(); 4659 if (VT == MVT::v8i8) { 4660 SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST); 4661 return; 4662 } else if (VT == MVT::v16i8) { 4663 SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST); 4664 return; 4665 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4666 SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST); 4667 return; 4668 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16 ) { 4669 SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST); 4670 return; 4671 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4672 SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST); 4673 return; 4674 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4675 SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST); 4676 return; 4677 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4678 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST); 4679 return; 4680 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4681 SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST); 4682 return; 4683 } 4684 break; 4685 } 4686 case AArch64ISD::ST1x4post: { 4687 VT = Node->getOperand(1).getValueType(); 4688 if (VT == MVT::v8i8) { 4689 SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST); 4690 return; 4691 } else if (VT == MVT::v16i8) { 4692 SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST); 4693 return; 4694 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4695 SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST); 4696 return; 4697 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4698 SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST); 4699 return; 4700 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4701 SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST); 4702 return; 4703 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4704 SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST); 4705 return; 4706 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4707 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST); 4708 return; 4709 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4710 SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST); 4711 return; 4712 } 4713 break; 4714 } 4715 case AArch64ISD::ST2LANEpost: { 4716 VT = Node->getOperand(1).getValueType(); 4717 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4718 SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST); 4719 return; 4720 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4721 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4722 SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST); 4723 return; 4724 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4725 VT == MVT::v2f32) { 4726 SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST); 4727 return; 4728 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4729 VT == MVT::v1f64) { 4730 SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST); 4731 return; 4732 } 4733 break; 4734 } 4735 case AArch64ISD::ST3LANEpost: { 4736 VT = Node->getOperand(1).getValueType(); 4737 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4738 SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST); 4739 return; 4740 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4741 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4742 SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST); 4743 return; 4744 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4745 VT == MVT::v2f32) { 4746 SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST); 4747 return; 4748 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4749 VT == MVT::v1f64) { 4750 SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST); 4751 return; 4752 } 4753 break; 4754 } 4755 case AArch64ISD::ST4LANEpost: { 4756 VT = Node->getOperand(1).getValueType(); 4757 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4758 SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST); 4759 return; 4760 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4761 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4762 SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST); 4763 return; 4764 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4765 VT == MVT::v2f32) { 4766 SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST); 4767 return; 4768 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4769 VT == MVT::v1f64) { 4770 SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST); 4771 return; 4772 } 4773 break; 4774 } 4775 case AArch64ISD::SVE_LD2_MERGE_ZERO: { 4776 if (VT == MVT::nxv16i8) { 4777 SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B); 4778 return; 4779 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 4780 (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) { 4781 SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H); 4782 return; 4783 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 4784 SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W); 4785 return; 4786 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 4787 SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D); 4788 return; 4789 } 4790 break; 4791 } 4792 case AArch64ISD::SVE_LD3_MERGE_ZERO: { 4793 if (VT == MVT::nxv16i8) { 4794 SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B); 4795 return; 4796 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 4797 (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) { 4798 SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H); 4799 return; 4800 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 4801 SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W); 4802 return; 4803 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 4804 SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D); 4805 return; 4806 } 4807 break; 4808 } 4809 case AArch64ISD::SVE_LD4_MERGE_ZERO: { 4810 if (VT == MVT::nxv16i8) { 4811 SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B); 4812 return; 4813 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 4814 (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) { 4815 SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H); 4816 return; 4817 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 4818 SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W); 4819 return; 4820 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 4821 SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D); 4822 return; 4823 } 4824 break; 4825 } 4826 } 4827 4828 // Select the default instruction 4829 SelectCode(Node); 4830 } 4831 4832 /// createAArch64ISelDag - This pass converts a legalized DAG into a 4833 /// AArch64-specific DAG, ready for instruction scheduling. 4834 FunctionPass *llvm::createAArch64ISelDag(AArch64TargetMachine &TM, 4835 CodeGenOpt::Level OptLevel) { 4836 return new AArch64DAGToDAGISel(TM, OptLevel); 4837 } 4838 4839 /// When \p PredVT is a scalable vector predicate in the form 4840 /// MVT::nx<M>xi1, it builds the correspondent scalable vector of 4841 /// integers MVT::nx<M>xi<bits> s.t. M x bits = 128. When targeting 4842 /// structured vectors (NumVec >1), the output data type is 4843 /// MVT::nx<M*NumVec>xi<bits> s.t. M x bits = 128. If the input 4844 /// PredVT is not in the form MVT::nx<M>xi1, it returns an invalid 4845 /// EVT. 4846 static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT, 4847 unsigned NumVec) { 4848 assert(NumVec > 0 && NumVec < 5 && "Invalid number of vectors."); 4849 if (!PredVT.isScalableVector() || PredVT.getVectorElementType() != MVT::i1) 4850 return EVT(); 4851 4852 if (PredVT != MVT::nxv16i1 && PredVT != MVT::nxv8i1 && 4853 PredVT != MVT::nxv4i1 && PredVT != MVT::nxv2i1) 4854 return EVT(); 4855 4856 ElementCount EC = PredVT.getVectorElementCount(); 4857 EVT ScalarVT = 4858 EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.getKnownMinValue()); 4859 EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, EC * NumVec); 4860 4861 return MemVT; 4862 } 4863 4864 /// Return the EVT of the data associated to a memory operation in \p 4865 /// Root. If such EVT cannot be retrived, it returns an invalid EVT. 4866 static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root) { 4867 if (isa<MemSDNode>(Root)) 4868 return cast<MemSDNode>(Root)->getMemoryVT(); 4869 4870 if (isa<MemIntrinsicSDNode>(Root)) 4871 return cast<MemIntrinsicSDNode>(Root)->getMemoryVT(); 4872 4873 const unsigned Opcode = Root->getOpcode(); 4874 // For custom ISD nodes, we have to look at them individually to extract the 4875 // type of the data moved to/from memory. 4876 switch (Opcode) { 4877 case AArch64ISD::LD1_MERGE_ZERO: 4878 case AArch64ISD::LD1S_MERGE_ZERO: 4879 case AArch64ISD::LDNF1_MERGE_ZERO: 4880 case AArch64ISD::LDNF1S_MERGE_ZERO: 4881 return cast<VTSDNode>(Root->getOperand(3))->getVT(); 4882 case AArch64ISD::ST1_PRED: 4883 return cast<VTSDNode>(Root->getOperand(4))->getVT(); 4884 case AArch64ISD::SVE_LD2_MERGE_ZERO: 4885 return getPackedVectorTypeFromPredicateType( 4886 Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/2); 4887 case AArch64ISD::SVE_LD3_MERGE_ZERO: 4888 return getPackedVectorTypeFromPredicateType( 4889 Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/3); 4890 case AArch64ISD::SVE_LD4_MERGE_ZERO: 4891 return getPackedVectorTypeFromPredicateType( 4892 Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/4); 4893 default: 4894 break; 4895 } 4896 4897 if (Opcode != ISD::INTRINSIC_VOID) 4898 return EVT(); 4899 4900 const unsigned IntNo = 4901 cast<ConstantSDNode>(Root->getOperand(1))->getZExtValue(); 4902 if (IntNo != Intrinsic::aarch64_sve_prf) 4903 return EVT(); 4904 4905 // We are using an SVE prefetch intrinsic. Type must be inferred 4906 // from the width of the predicate. 4907 return getPackedVectorTypeFromPredicateType( 4908 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/1); 4909 } 4910 4911 /// SelectAddrModeIndexedSVE - Attempt selection of the addressing mode: 4912 /// Base + OffImm * sizeof(MemVT) for Min >= OffImm <= Max 4913 /// where Root is the memory access using N for its address. 4914 template <int64_t Min, int64_t Max> 4915 bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, 4916 SDValue &Base, 4917 SDValue &OffImm) { 4918 const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root); 4919 4920 if (MemVT == EVT()) 4921 return false; 4922 4923 if (N.getOpcode() != ISD::ADD) 4924 return false; 4925 4926 SDValue VScale = N.getOperand(1); 4927 if (VScale.getOpcode() != ISD::VSCALE) 4928 return false; 4929 4930 TypeSize TS = MemVT.getSizeInBits(); 4931 int64_t MemWidthBytes = static_cast<int64_t>(TS.getKnownMinSize()) / 8; 4932 int64_t MulImm = cast<ConstantSDNode>(VScale.getOperand(0))->getSExtValue(); 4933 4934 if ((MulImm % MemWidthBytes) != 0) 4935 return false; 4936 4937 int64_t Offset = MulImm / MemWidthBytes; 4938 if (Offset < Min || Offset > Max) 4939 return false; 4940 4941 Base = N.getOperand(0); 4942 OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64); 4943 return true; 4944 } 4945 4946 /// Select register plus register addressing mode for SVE, with scaled 4947 /// offset. 4948 bool AArch64DAGToDAGISel::SelectSVERegRegAddrMode(SDValue N, unsigned Scale, 4949 SDValue &Base, 4950 SDValue &Offset) { 4951 if (N.getOpcode() != ISD::ADD) 4952 return false; 4953 4954 // Process an ADD node. 4955 const SDValue LHS = N.getOperand(0); 4956 const SDValue RHS = N.getOperand(1); 4957 4958 // 8 bit data does not come with the SHL node, so it is treated 4959 // separately. 4960 if (Scale == 0) { 4961 Base = LHS; 4962 Offset = RHS; 4963 return true; 4964 } 4965 4966 // Check if the RHS is a shift node with a constant. 4967 if (RHS.getOpcode() != ISD::SHL) 4968 return false; 4969 4970 const SDValue ShiftRHS = RHS.getOperand(1); 4971 if (auto *C = dyn_cast<ConstantSDNode>(ShiftRHS)) 4972 if (C->getZExtValue() == Scale) { 4973 Base = LHS; 4974 Offset = RHS.getOperand(0); 4975 return true; 4976 } 4977 4978 return false; 4979 } 4980