1 //===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines an instruction selector for the AArch64 target. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AArch64MachineFunctionInfo.h" 14 #include "AArch64TargetMachine.h" 15 #include "MCTargetDesc/AArch64AddressingModes.h" 16 #include "llvm/ADT/APSInt.h" 17 #include "llvm/CodeGen/SelectionDAGISel.h" 18 #include "llvm/IR/Function.h" // To access function attributes. 19 #include "llvm/IR/GlobalValue.h" 20 #include "llvm/IR/Intrinsics.h" 21 #include "llvm/IR/IntrinsicsAArch64.h" 22 #include "llvm/Support/Debug.h" 23 #include "llvm/Support/ErrorHandling.h" 24 #include "llvm/Support/KnownBits.h" 25 #include "llvm/Support/MathExtras.h" 26 #include "llvm/Support/raw_ostream.h" 27 28 using namespace llvm; 29 30 #define DEBUG_TYPE "aarch64-isel" 31 32 //===--------------------------------------------------------------------===// 33 /// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine 34 /// instructions for SelectionDAG operations. 35 /// 36 namespace { 37 38 class AArch64DAGToDAGISel : public SelectionDAGISel { 39 40 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can 41 /// make the right decision when generating code for different targets. 42 const AArch64Subtarget *Subtarget; 43 44 public: 45 explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm, 46 CodeGenOpt::Level OptLevel) 47 : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr) {} 48 49 StringRef getPassName() const override { 50 return "AArch64 Instruction Selection"; 51 } 52 53 bool runOnMachineFunction(MachineFunction &MF) override { 54 Subtarget = &MF.getSubtarget<AArch64Subtarget>(); 55 return SelectionDAGISel::runOnMachineFunction(MF); 56 } 57 58 void Select(SDNode *Node) override; 59 60 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 61 /// inline asm expressions. 62 bool SelectInlineAsmMemoryOperand(const SDValue &Op, 63 unsigned ConstraintID, 64 std::vector<SDValue> &OutOps) override; 65 66 template <signed Low, signed High, signed Scale> 67 bool SelectRDVLImm(SDValue N, SDValue &Imm); 68 69 bool tryMLAV64LaneV128(SDNode *N); 70 bool tryMULLV64LaneV128(unsigned IntNo, SDNode *N); 71 bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift); 72 bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift); 73 bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift); 74 bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) { 75 return SelectShiftedRegister(N, false, Reg, Shift); 76 } 77 bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) { 78 return SelectShiftedRegister(N, true, Reg, Shift); 79 } 80 bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) { 81 return SelectAddrModeIndexed7S(N, 1, Base, OffImm); 82 } 83 bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) { 84 return SelectAddrModeIndexed7S(N, 2, Base, OffImm); 85 } 86 bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) { 87 return SelectAddrModeIndexed7S(N, 4, Base, OffImm); 88 } 89 bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) { 90 return SelectAddrModeIndexed7S(N, 8, Base, OffImm); 91 } 92 bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) { 93 return SelectAddrModeIndexed7S(N, 16, Base, OffImm); 94 } 95 bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) { 96 return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm); 97 } 98 bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) { 99 return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm); 100 } 101 bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) { 102 return SelectAddrModeIndexed(N, 1, Base, OffImm); 103 } 104 bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) { 105 return SelectAddrModeIndexed(N, 2, Base, OffImm); 106 } 107 bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) { 108 return SelectAddrModeIndexed(N, 4, Base, OffImm); 109 } 110 bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) { 111 return SelectAddrModeIndexed(N, 8, Base, OffImm); 112 } 113 bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) { 114 return SelectAddrModeIndexed(N, 16, Base, OffImm); 115 } 116 bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) { 117 return SelectAddrModeUnscaled(N, 1, Base, OffImm); 118 } 119 bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) { 120 return SelectAddrModeUnscaled(N, 2, Base, OffImm); 121 } 122 bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) { 123 return SelectAddrModeUnscaled(N, 4, Base, OffImm); 124 } 125 bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) { 126 return SelectAddrModeUnscaled(N, 8, Base, OffImm); 127 } 128 bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) { 129 return SelectAddrModeUnscaled(N, 16, Base, OffImm); 130 } 131 template <unsigned Size, unsigned Max> 132 bool SelectAddrModeIndexedUImm(SDValue N, SDValue &Base, SDValue &OffImm) { 133 // Test if there is an appropriate addressing mode and check if the 134 // immediate fits. 135 bool Found = SelectAddrModeIndexed(N, Size, Base, OffImm); 136 if (Found) { 137 if (auto *CI = dyn_cast<ConstantSDNode>(OffImm)) { 138 int64_t C = CI->getSExtValue(); 139 if (C <= Max) 140 return true; 141 } 142 } 143 144 // Otherwise, base only, materialize address in register. 145 Base = N; 146 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64); 147 return true; 148 } 149 150 template<int Width> 151 bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset, 152 SDValue &SignExtend, SDValue &DoShift) { 153 return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift); 154 } 155 156 template<int Width> 157 bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset, 158 SDValue &SignExtend, SDValue &DoShift) { 159 return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift); 160 } 161 162 bool SelectDupZeroOrUndef(SDValue N) { 163 switch(N->getOpcode()) { 164 case ISD::UNDEF: 165 return true; 166 case AArch64ISD::DUP: 167 case ISD::SPLAT_VECTOR: { 168 auto Opnd0 = N->getOperand(0); 169 if (auto CN = dyn_cast<ConstantSDNode>(Opnd0)) 170 if (CN->isZero()) 171 return true; 172 if (auto CN = dyn_cast<ConstantFPSDNode>(Opnd0)) 173 if (CN->isZero()) 174 return true; 175 break; 176 } 177 default: 178 break; 179 } 180 181 return false; 182 } 183 184 bool SelectDupZero(SDValue N) { 185 switch(N->getOpcode()) { 186 case AArch64ISD::DUP: 187 case ISD::SPLAT_VECTOR: { 188 auto Opnd0 = N->getOperand(0); 189 if (auto CN = dyn_cast<ConstantSDNode>(Opnd0)) 190 if (CN->isZero()) 191 return true; 192 if (auto CN = dyn_cast<ConstantFPSDNode>(Opnd0)) 193 if (CN->isZero()) 194 return true; 195 break; 196 } 197 } 198 199 return false; 200 } 201 202 template<MVT::SimpleValueType VT> 203 bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) { 204 return SelectSVEAddSubImm(N, VT, Imm, Shift); 205 } 206 207 template <MVT::SimpleValueType VT, bool Invert = false> 208 bool SelectSVELogicalImm(SDValue N, SDValue &Imm) { 209 return SelectSVELogicalImm(N, VT, Imm, Invert); 210 } 211 212 template <MVT::SimpleValueType VT> 213 bool SelectSVEArithImm(SDValue N, SDValue &Imm) { 214 return SelectSVEArithImm(N, VT, Imm); 215 } 216 217 template <unsigned Low, unsigned High, bool AllowSaturation = false> 218 bool SelectSVEShiftImm(SDValue N, SDValue &Imm) { 219 return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm); 220 } 221 222 // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N. 223 template<signed Min, signed Max, signed Scale, bool Shift> 224 bool SelectCntImm(SDValue N, SDValue &Imm) { 225 if (!isa<ConstantSDNode>(N)) 226 return false; 227 228 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue(); 229 if (Shift) 230 MulImm = 1LL << MulImm; 231 232 if ((MulImm % std::abs(Scale)) != 0) 233 return false; 234 235 MulImm /= Scale; 236 if ((MulImm >= Min) && (MulImm <= Max)) { 237 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32); 238 return true; 239 } 240 241 return false; 242 } 243 244 template <signed Max, signed Scale> 245 bool SelectEXTImm(SDValue N, SDValue &Imm) { 246 if (!isa<ConstantSDNode>(N)) 247 return false; 248 249 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue(); 250 251 if (MulImm >= 0 && MulImm <= Max) { 252 MulImm *= Scale; 253 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32); 254 return true; 255 } 256 257 return false; 258 } 259 260 /// Form sequences of consecutive 64/128-bit registers for use in NEON 261 /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have 262 /// between 1 and 4 elements. If it contains a single element that is returned 263 /// unchanged; otherwise a REG_SEQUENCE value is returned. 264 SDValue createDTuple(ArrayRef<SDValue> Vecs); 265 SDValue createQTuple(ArrayRef<SDValue> Vecs); 266 // Form a sequence of SVE registers for instructions using list of vectors, 267 // e.g. structured loads and stores (ldN, stN). 268 SDValue createZTuple(ArrayRef<SDValue> Vecs); 269 270 /// Generic helper for the createDTuple/createQTuple 271 /// functions. Those should almost always be called instead. 272 SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[], 273 const unsigned SubRegs[]); 274 275 void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt); 276 277 bool tryIndexedLoad(SDNode *N); 278 279 bool trySelectStackSlotTagP(SDNode *N); 280 void SelectTagP(SDNode *N); 281 282 void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc, 283 unsigned SubRegIdx); 284 void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc, 285 unsigned SubRegIdx); 286 void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc); 287 void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc); 288 void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale, 289 unsigned Opc_rr, unsigned Opc_ri, 290 bool IsIntr = false); 291 292 bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm); 293 /// SVE Reg+Imm addressing mode. 294 template <int64_t Min, int64_t Max> 295 bool SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base, 296 SDValue &OffImm); 297 /// SVE Reg+Reg address mode. 298 template <unsigned Scale> 299 bool SelectSVERegRegAddrMode(SDValue N, SDValue &Base, SDValue &Offset) { 300 return SelectSVERegRegAddrMode(N, Scale, Base, Offset); 301 } 302 303 void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc); 304 void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc); 305 void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc); 306 void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc); 307 void SelectPredicatedStore(SDNode *N, unsigned NumVecs, unsigned Scale, 308 unsigned Opc_rr, unsigned Opc_ri); 309 std::tuple<unsigned, SDValue, SDValue> 310 findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri, 311 const SDValue &OldBase, const SDValue &OldOffset, 312 unsigned Scale); 313 314 bool tryBitfieldExtractOp(SDNode *N); 315 bool tryBitfieldExtractOpFromSExt(SDNode *N); 316 bool tryBitfieldInsertOp(SDNode *N); 317 bool tryBitfieldInsertInZeroOp(SDNode *N); 318 bool tryShiftAmountMod(SDNode *N); 319 bool tryHighFPExt(SDNode *N); 320 321 bool tryReadRegister(SDNode *N); 322 bool tryWriteRegister(SDNode *N); 323 324 // Include the pieces autogenerated from the target description. 325 #include "AArch64GenDAGISel.inc" 326 327 private: 328 bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg, 329 SDValue &Shift); 330 bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base, 331 SDValue &OffImm) { 332 return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm); 333 } 334 bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW, 335 unsigned Size, SDValue &Base, 336 SDValue &OffImm); 337 bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base, 338 SDValue &OffImm); 339 bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base, 340 SDValue &OffImm); 341 bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base, 342 SDValue &Offset, SDValue &SignExtend, 343 SDValue &DoShift); 344 bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base, 345 SDValue &Offset, SDValue &SignExtend, 346 SDValue &DoShift); 347 bool isWorthFolding(SDValue V) const; 348 bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend, 349 SDValue &Offset, SDValue &SignExtend); 350 351 template<unsigned RegWidth> 352 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) { 353 return SelectCVTFixedPosOperand(N, FixedPos, RegWidth); 354 } 355 356 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width); 357 358 bool SelectCMP_SWAP(SDNode *N); 359 360 bool SelectSVE8BitLslImm(SDValue N, SDValue &Imm, SDValue &Shift); 361 362 bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift); 363 364 bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert); 365 366 bool SelectSVESignedArithImm(SDValue N, SDValue &Imm); 367 bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High, 368 bool AllowSaturation, SDValue &Imm); 369 370 bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm); 371 bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base, 372 SDValue &Offset); 373 374 bool SelectAllActivePredicate(SDValue N); 375 }; 376 } // end anonymous namespace 377 378 /// isIntImmediate - This method tests to see if the node is a constant 379 /// operand. If so Imm will receive the 32-bit value. 380 static bool isIntImmediate(const SDNode *N, uint64_t &Imm) { 381 if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) { 382 Imm = C->getZExtValue(); 383 return true; 384 } 385 return false; 386 } 387 388 // isIntImmediate - This method tests to see if a constant operand. 389 // If so Imm will receive the value. 390 static bool isIntImmediate(SDValue N, uint64_t &Imm) { 391 return isIntImmediate(N.getNode(), Imm); 392 } 393 394 // isOpcWithIntImmediate - This method tests to see if the node is a specific 395 // opcode and that it has a immediate integer right operand. 396 // If so Imm will receive the 32 bit value. 397 static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, 398 uint64_t &Imm) { 399 return N->getOpcode() == Opc && 400 isIntImmediate(N->getOperand(1).getNode(), Imm); 401 } 402 403 bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand( 404 const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) { 405 switch(ConstraintID) { 406 default: 407 llvm_unreachable("Unexpected asm memory constraint"); 408 case InlineAsm::Constraint_m: 409 case InlineAsm::Constraint_o: 410 case InlineAsm::Constraint_Q: 411 // We need to make sure that this one operand does not end up in XZR, thus 412 // require the address to be in a PointerRegClass register. 413 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo(); 414 const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF); 415 SDLoc dl(Op); 416 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64); 417 SDValue NewOp = 418 SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, 419 dl, Op.getValueType(), 420 Op, RC), 0); 421 OutOps.push_back(NewOp); 422 return false; 423 } 424 return true; 425 } 426 427 /// SelectArithImmed - Select an immediate value that can be represented as 428 /// a 12-bit value shifted left by either 0 or 12. If so, return true with 429 /// Val set to the 12-bit value and Shift set to the shifter operand. 430 bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val, 431 SDValue &Shift) { 432 // This function is called from the addsub_shifted_imm ComplexPattern, 433 // which lists [imm] as the list of opcode it's interested in, however 434 // we still need to check whether the operand is actually an immediate 435 // here because the ComplexPattern opcode list is only used in 436 // root-level opcode matching. 437 if (!isa<ConstantSDNode>(N.getNode())) 438 return false; 439 440 uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue(); 441 unsigned ShiftAmt; 442 443 if (Immed >> 12 == 0) { 444 ShiftAmt = 0; 445 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) { 446 ShiftAmt = 12; 447 Immed = Immed >> 12; 448 } else 449 return false; 450 451 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt); 452 SDLoc dl(N); 453 Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32); 454 Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32); 455 return true; 456 } 457 458 /// SelectNegArithImmed - As above, but negates the value before trying to 459 /// select it. 460 bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val, 461 SDValue &Shift) { 462 // This function is called from the addsub_shifted_imm ComplexPattern, 463 // which lists [imm] as the list of opcode it's interested in, however 464 // we still need to check whether the operand is actually an immediate 465 // here because the ComplexPattern opcode list is only used in 466 // root-level opcode matching. 467 if (!isa<ConstantSDNode>(N.getNode())) 468 return false; 469 470 // The immediate operand must be a 24-bit zero-extended immediate. 471 uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue(); 472 473 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0" 474 // have the opposite effect on the C flag, so this pattern mustn't match under 475 // those circumstances. 476 if (Immed == 0) 477 return false; 478 479 if (N.getValueType() == MVT::i32) 480 Immed = ~((uint32_t)Immed) + 1; 481 else 482 Immed = ~Immed + 1ULL; 483 if (Immed & 0xFFFFFFFFFF000000ULL) 484 return false; 485 486 Immed &= 0xFFFFFFULL; 487 return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val, 488 Shift); 489 } 490 491 /// getShiftTypeForNode - Translate a shift node to the corresponding 492 /// ShiftType value. 493 static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) { 494 switch (N.getOpcode()) { 495 default: 496 return AArch64_AM::InvalidShiftExtend; 497 case ISD::SHL: 498 return AArch64_AM::LSL; 499 case ISD::SRL: 500 return AArch64_AM::LSR; 501 case ISD::SRA: 502 return AArch64_AM::ASR; 503 case ISD::ROTR: 504 return AArch64_AM::ROR; 505 } 506 } 507 508 /// Determine whether it is worth it to fold SHL into the addressing 509 /// mode. 510 static bool isWorthFoldingSHL(SDValue V) { 511 assert(V.getOpcode() == ISD::SHL && "invalid opcode"); 512 // It is worth folding logical shift of up to three places. 513 auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1)); 514 if (!CSD) 515 return false; 516 unsigned ShiftVal = CSD->getZExtValue(); 517 if (ShiftVal > 3) 518 return false; 519 520 // Check if this particular node is reused in any non-memory related 521 // operation. If yes, do not try to fold this node into the address 522 // computation, since the computation will be kept. 523 const SDNode *Node = V.getNode(); 524 for (SDNode *UI : Node->uses()) 525 if (!isa<MemSDNode>(*UI)) 526 for (SDNode *UII : UI->uses()) 527 if (!isa<MemSDNode>(*UII)) 528 return false; 529 return true; 530 } 531 532 /// Determine whether it is worth to fold V into an extended register. 533 bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const { 534 // Trivial if we are optimizing for code size or if there is only 535 // one use of the value. 536 if (CurDAG->shouldOptForSize() || V.hasOneUse()) 537 return true; 538 // If a subtarget has a fastpath LSL we can fold a logical shift into 539 // the addressing mode and save a cycle. 540 if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::SHL && 541 isWorthFoldingSHL(V)) 542 return true; 543 if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::ADD) { 544 const SDValue LHS = V.getOperand(0); 545 const SDValue RHS = V.getOperand(1); 546 if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS)) 547 return true; 548 if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS)) 549 return true; 550 } 551 552 // It hurts otherwise, since the value will be reused. 553 return false; 554 } 555 556 /// SelectShiftedRegister - Select a "shifted register" operand. If the value 557 /// is not shifted, set the Shift operand to default of "LSL 0". The logical 558 /// instructions allow the shifted register to be rotated, but the arithmetic 559 /// instructions do not. The AllowROR parameter specifies whether ROR is 560 /// supported. 561 bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR, 562 SDValue &Reg, SDValue &Shift) { 563 AArch64_AM::ShiftExtendType ShType = getShiftTypeForNode(N); 564 if (ShType == AArch64_AM::InvalidShiftExtend) 565 return false; 566 if (!AllowROR && ShType == AArch64_AM::ROR) 567 return false; 568 569 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 570 unsigned BitSize = N.getValueSizeInBits(); 571 unsigned Val = RHS->getZExtValue() & (BitSize - 1); 572 unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val); 573 574 Reg = N.getOperand(0); 575 Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32); 576 return isWorthFolding(N); 577 } 578 579 return false; 580 } 581 582 /// getExtendTypeForNode - Translate an extend node to the corresponding 583 /// ExtendType value. 584 static AArch64_AM::ShiftExtendType 585 getExtendTypeForNode(SDValue N, bool IsLoadStore = false) { 586 if (N.getOpcode() == ISD::SIGN_EXTEND || 587 N.getOpcode() == ISD::SIGN_EXTEND_INREG) { 588 EVT SrcVT; 589 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG) 590 SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT(); 591 else 592 SrcVT = N.getOperand(0).getValueType(); 593 594 if (!IsLoadStore && SrcVT == MVT::i8) 595 return AArch64_AM::SXTB; 596 else if (!IsLoadStore && SrcVT == MVT::i16) 597 return AArch64_AM::SXTH; 598 else if (SrcVT == MVT::i32) 599 return AArch64_AM::SXTW; 600 assert(SrcVT != MVT::i64 && "extend from 64-bits?"); 601 602 return AArch64_AM::InvalidShiftExtend; 603 } else if (N.getOpcode() == ISD::ZERO_EXTEND || 604 N.getOpcode() == ISD::ANY_EXTEND) { 605 EVT SrcVT = N.getOperand(0).getValueType(); 606 if (!IsLoadStore && SrcVT == MVT::i8) 607 return AArch64_AM::UXTB; 608 else if (!IsLoadStore && SrcVT == MVT::i16) 609 return AArch64_AM::UXTH; 610 else if (SrcVT == MVT::i32) 611 return AArch64_AM::UXTW; 612 assert(SrcVT != MVT::i64 && "extend from 64-bits?"); 613 614 return AArch64_AM::InvalidShiftExtend; 615 } else if (N.getOpcode() == ISD::AND) { 616 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); 617 if (!CSD) 618 return AArch64_AM::InvalidShiftExtend; 619 uint64_t AndMask = CSD->getZExtValue(); 620 621 switch (AndMask) { 622 default: 623 return AArch64_AM::InvalidShiftExtend; 624 case 0xFF: 625 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend; 626 case 0xFFFF: 627 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend; 628 case 0xFFFFFFFF: 629 return AArch64_AM::UXTW; 630 } 631 } 632 633 return AArch64_AM::InvalidShiftExtend; 634 } 635 636 // Helper for SelectMLAV64LaneV128 - Recognize high lane extracts. 637 static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx) { 638 if (DL->getOpcode() != AArch64ISD::DUPLANE16 && 639 DL->getOpcode() != AArch64ISD::DUPLANE32) 640 return false; 641 642 SDValue SV = DL->getOperand(0); 643 if (SV.getOpcode() != ISD::INSERT_SUBVECTOR) 644 return false; 645 646 SDValue EV = SV.getOperand(1); 647 if (EV.getOpcode() != ISD::EXTRACT_SUBVECTOR) 648 return false; 649 650 ConstantSDNode *DLidx = cast<ConstantSDNode>(DL->getOperand(1).getNode()); 651 ConstantSDNode *EVidx = cast<ConstantSDNode>(EV.getOperand(1).getNode()); 652 LaneIdx = DLidx->getSExtValue() + EVidx->getSExtValue(); 653 LaneOp = EV.getOperand(0); 654 655 return true; 656 } 657 658 // Helper for SelectOpcV64LaneV128 - Recognize operations where one operand is a 659 // high lane extract. 660 static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp, 661 SDValue &LaneOp, int &LaneIdx) { 662 663 if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) { 664 std::swap(Op0, Op1); 665 if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) 666 return false; 667 } 668 StdOp = Op1; 669 return true; 670 } 671 672 /// SelectMLAV64LaneV128 - AArch64 supports vector MLAs where one multiplicand 673 /// is a lane in the upper half of a 128-bit vector. Recognize and select this 674 /// so that we don't emit unnecessary lane extracts. 675 bool AArch64DAGToDAGISel::tryMLAV64LaneV128(SDNode *N) { 676 SDLoc dl(N); 677 SDValue Op0 = N->getOperand(0); 678 SDValue Op1 = N->getOperand(1); 679 SDValue MLAOp1; // Will hold ordinary multiplicand for MLA. 680 SDValue MLAOp2; // Will hold lane-accessed multiplicand for MLA. 681 int LaneIdx = -1; // Will hold the lane index. 682 683 if (Op1.getOpcode() != ISD::MUL || 684 !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2, 685 LaneIdx)) { 686 std::swap(Op0, Op1); 687 if (Op1.getOpcode() != ISD::MUL || 688 !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2, 689 LaneIdx)) 690 return false; 691 } 692 693 SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64); 694 695 SDValue Ops[] = { Op0, MLAOp1, MLAOp2, LaneIdxVal }; 696 697 unsigned MLAOpc = ~0U; 698 699 switch (N->getSimpleValueType(0).SimpleTy) { 700 default: 701 llvm_unreachable("Unrecognized MLA."); 702 case MVT::v4i16: 703 MLAOpc = AArch64::MLAv4i16_indexed; 704 break; 705 case MVT::v8i16: 706 MLAOpc = AArch64::MLAv8i16_indexed; 707 break; 708 case MVT::v2i32: 709 MLAOpc = AArch64::MLAv2i32_indexed; 710 break; 711 case MVT::v4i32: 712 MLAOpc = AArch64::MLAv4i32_indexed; 713 break; 714 } 715 716 ReplaceNode(N, CurDAG->getMachineNode(MLAOpc, dl, N->getValueType(0), Ops)); 717 return true; 718 } 719 720 bool AArch64DAGToDAGISel::tryMULLV64LaneV128(unsigned IntNo, SDNode *N) { 721 SDLoc dl(N); 722 SDValue SMULLOp0; 723 SDValue SMULLOp1; 724 int LaneIdx; 725 726 if (!checkV64LaneV128(N->getOperand(1), N->getOperand(2), SMULLOp0, SMULLOp1, 727 LaneIdx)) 728 return false; 729 730 SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64); 731 732 SDValue Ops[] = { SMULLOp0, SMULLOp1, LaneIdxVal }; 733 734 unsigned SMULLOpc = ~0U; 735 736 if (IntNo == Intrinsic::aarch64_neon_smull) { 737 switch (N->getSimpleValueType(0).SimpleTy) { 738 default: 739 llvm_unreachable("Unrecognized SMULL."); 740 case MVT::v4i32: 741 SMULLOpc = AArch64::SMULLv4i16_indexed; 742 break; 743 case MVT::v2i64: 744 SMULLOpc = AArch64::SMULLv2i32_indexed; 745 break; 746 } 747 } else if (IntNo == Intrinsic::aarch64_neon_umull) { 748 switch (N->getSimpleValueType(0).SimpleTy) { 749 default: 750 llvm_unreachable("Unrecognized SMULL."); 751 case MVT::v4i32: 752 SMULLOpc = AArch64::UMULLv4i16_indexed; 753 break; 754 case MVT::v2i64: 755 SMULLOpc = AArch64::UMULLv2i32_indexed; 756 break; 757 } 758 } else 759 llvm_unreachable("Unrecognized intrinsic."); 760 761 ReplaceNode(N, CurDAG->getMachineNode(SMULLOpc, dl, N->getValueType(0), Ops)); 762 return true; 763 } 764 765 /// Instructions that accept extend modifiers like UXTW expect the register 766 /// being extended to be a GPR32, but the incoming DAG might be acting on a 767 /// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if 768 /// this is the case. 769 static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N) { 770 if (N.getValueType() == MVT::i32) 771 return N; 772 773 SDLoc dl(N); 774 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32); 775 MachineSDNode *Node = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 776 dl, MVT::i32, N, SubReg); 777 return SDValue(Node, 0); 778 } 779 780 // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N. 781 template<signed Low, signed High, signed Scale> 782 bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) { 783 if (!isa<ConstantSDNode>(N)) 784 return false; 785 786 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue(); 787 if ((MulImm % std::abs(Scale)) == 0) { 788 int64_t RDVLImm = MulImm / Scale; 789 if ((RDVLImm >= Low) && (RDVLImm <= High)) { 790 Imm = CurDAG->getTargetConstant(RDVLImm, SDLoc(N), MVT::i32); 791 return true; 792 } 793 } 794 795 return false; 796 } 797 798 /// SelectArithExtendedRegister - Select a "extended register" operand. This 799 /// operand folds in an extend followed by an optional left shift. 800 bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg, 801 SDValue &Shift) { 802 unsigned ShiftVal = 0; 803 AArch64_AM::ShiftExtendType Ext; 804 805 if (N.getOpcode() == ISD::SHL) { 806 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); 807 if (!CSD) 808 return false; 809 ShiftVal = CSD->getZExtValue(); 810 if (ShiftVal > 4) 811 return false; 812 813 Ext = getExtendTypeForNode(N.getOperand(0)); 814 if (Ext == AArch64_AM::InvalidShiftExtend) 815 return false; 816 817 Reg = N.getOperand(0).getOperand(0); 818 } else { 819 Ext = getExtendTypeForNode(N); 820 if (Ext == AArch64_AM::InvalidShiftExtend) 821 return false; 822 823 Reg = N.getOperand(0); 824 825 // Don't match if free 32-bit -> 64-bit zext can be used instead. 826 if (Ext == AArch64_AM::UXTW && 827 Reg->getValueType(0).getSizeInBits() == 32 && isDef32(*Reg.getNode())) 828 return false; 829 } 830 831 // AArch64 mandates that the RHS of the operation must use the smallest 832 // register class that could contain the size being extended from. Thus, 833 // if we're folding a (sext i8), we need the RHS to be a GPR32, even though 834 // there might not be an actual 32-bit value in the program. We can 835 // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here. 836 assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX); 837 Reg = narrowIfNeeded(CurDAG, Reg); 838 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N), 839 MVT::i32); 840 return isWorthFolding(N); 841 } 842 843 /// If there's a use of this ADDlow that's not itself a load/store then we'll 844 /// need to create a real ADD instruction from it anyway and there's no point in 845 /// folding it into the mem op. Theoretically, it shouldn't matter, but there's 846 /// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding 847 /// leads to duplicated ADRP instructions. 848 static bool isWorthFoldingADDlow(SDValue N) { 849 for (auto Use : N->uses()) { 850 if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE && 851 Use->getOpcode() != ISD::ATOMIC_LOAD && 852 Use->getOpcode() != ISD::ATOMIC_STORE) 853 return false; 854 855 // ldar and stlr have much more restrictive addressing modes (just a 856 // register). 857 if (isStrongerThanMonotonic(cast<MemSDNode>(Use)->getSuccessOrdering())) 858 return false; 859 } 860 861 return true; 862 } 863 864 /// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit 865 /// immediate" address. The "Size" argument is the size in bytes of the memory 866 /// reference, which determines the scale. 867 bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, 868 unsigned BW, unsigned Size, 869 SDValue &Base, 870 SDValue &OffImm) { 871 SDLoc dl(N); 872 const DataLayout &DL = CurDAG->getDataLayout(); 873 const TargetLowering *TLI = getTargetLowering(); 874 if (N.getOpcode() == ISD::FrameIndex) { 875 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 876 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 877 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); 878 return true; 879 } 880 881 // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed 882 // selected here doesn't support labels/immediates, only base+offset. 883 if (CurDAG->isBaseWithConstantOffset(N)) { 884 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 885 if (IsSignedImm) { 886 int64_t RHSC = RHS->getSExtValue(); 887 unsigned Scale = Log2_32(Size); 888 int64_t Range = 0x1LL << (BW - 1); 889 890 if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) && 891 RHSC < (Range << Scale)) { 892 Base = N.getOperand(0); 893 if (Base.getOpcode() == ISD::FrameIndex) { 894 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 895 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 896 } 897 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64); 898 return true; 899 } 900 } else { 901 // unsigned Immediate 902 uint64_t RHSC = RHS->getZExtValue(); 903 unsigned Scale = Log2_32(Size); 904 uint64_t Range = 0x1ULL << BW; 905 906 if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) { 907 Base = N.getOperand(0); 908 if (Base.getOpcode() == ISD::FrameIndex) { 909 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 910 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 911 } 912 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64); 913 return true; 914 } 915 } 916 } 917 } 918 // Base only. The address will be materialized into a register before 919 // the memory is accessed. 920 // add x0, Xbase, #offset 921 // stp x1, x2, [x0] 922 Base = N; 923 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); 924 return true; 925 } 926 927 /// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit 928 /// immediate" address. The "Size" argument is the size in bytes of the memory 929 /// reference, which determines the scale. 930 bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size, 931 SDValue &Base, SDValue &OffImm) { 932 SDLoc dl(N); 933 const DataLayout &DL = CurDAG->getDataLayout(); 934 const TargetLowering *TLI = getTargetLowering(); 935 if (N.getOpcode() == ISD::FrameIndex) { 936 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 937 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 938 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); 939 return true; 940 } 941 942 if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) { 943 GlobalAddressSDNode *GAN = 944 dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode()); 945 Base = N.getOperand(0); 946 OffImm = N.getOperand(1); 947 if (!GAN) 948 return true; 949 950 if (GAN->getOffset() % Size == 0 && 951 GAN->getGlobal()->getPointerAlignment(DL) >= Size) 952 return true; 953 } 954 955 if (CurDAG->isBaseWithConstantOffset(N)) { 956 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 957 int64_t RHSC = (int64_t)RHS->getZExtValue(); 958 unsigned Scale = Log2_32(Size); 959 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) { 960 Base = N.getOperand(0); 961 if (Base.getOpcode() == ISD::FrameIndex) { 962 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 963 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 964 } 965 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64); 966 return true; 967 } 968 } 969 } 970 971 // Before falling back to our general case, check if the unscaled 972 // instructions can handle this. If so, that's preferable. 973 if (SelectAddrModeUnscaled(N, Size, Base, OffImm)) 974 return false; 975 976 // Base only. The address will be materialized into a register before 977 // the memory is accessed. 978 // add x0, Xbase, #offset 979 // ldr x0, [x0] 980 Base = N; 981 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); 982 return true; 983 } 984 985 /// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit 986 /// immediate" address. This should only match when there is an offset that 987 /// is not valid for a scaled immediate addressing mode. The "Size" argument 988 /// is the size in bytes of the memory reference, which is needed here to know 989 /// what is valid for a scaled immediate. 990 bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size, 991 SDValue &Base, 992 SDValue &OffImm) { 993 if (!CurDAG->isBaseWithConstantOffset(N)) 994 return false; 995 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 996 int64_t RHSC = RHS->getSExtValue(); 997 // If the offset is valid as a scaled immediate, don't match here. 998 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && 999 RHSC < (0x1000 << Log2_32(Size))) 1000 return false; 1001 if (RHSC >= -256 && RHSC < 256) { 1002 Base = N.getOperand(0); 1003 if (Base.getOpcode() == ISD::FrameIndex) { 1004 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1005 const TargetLowering *TLI = getTargetLowering(); 1006 Base = CurDAG->getTargetFrameIndex( 1007 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1008 } 1009 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64); 1010 return true; 1011 } 1012 } 1013 return false; 1014 } 1015 1016 static SDValue Widen(SelectionDAG *CurDAG, SDValue N) { 1017 SDLoc dl(N); 1018 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32); 1019 SDValue ImpDef = SDValue( 1020 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0); 1021 MachineSDNode *Node = CurDAG->getMachineNode( 1022 TargetOpcode::INSERT_SUBREG, dl, MVT::i64, ImpDef, N, SubReg); 1023 return SDValue(Node, 0); 1024 } 1025 1026 /// Check if the given SHL node (\p N), can be used to form an 1027 /// extended register for an addressing mode. 1028 bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size, 1029 bool WantExtend, SDValue &Offset, 1030 SDValue &SignExtend) { 1031 assert(N.getOpcode() == ISD::SHL && "Invalid opcode."); 1032 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); 1033 if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue()) 1034 return false; 1035 1036 SDLoc dl(N); 1037 if (WantExtend) { 1038 AArch64_AM::ShiftExtendType Ext = 1039 getExtendTypeForNode(N.getOperand(0), true); 1040 if (Ext == AArch64_AM::InvalidShiftExtend) 1041 return false; 1042 1043 Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0)); 1044 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl, 1045 MVT::i32); 1046 } else { 1047 Offset = N.getOperand(0); 1048 SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32); 1049 } 1050 1051 unsigned LegalShiftVal = Log2_32(Size); 1052 unsigned ShiftVal = CSD->getZExtValue(); 1053 1054 if (ShiftVal != 0 && ShiftVal != LegalShiftVal) 1055 return false; 1056 1057 return isWorthFolding(N); 1058 } 1059 1060 bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size, 1061 SDValue &Base, SDValue &Offset, 1062 SDValue &SignExtend, 1063 SDValue &DoShift) { 1064 if (N.getOpcode() != ISD::ADD) 1065 return false; 1066 SDValue LHS = N.getOperand(0); 1067 SDValue RHS = N.getOperand(1); 1068 SDLoc dl(N); 1069 1070 // We don't want to match immediate adds here, because they are better lowered 1071 // to the register-immediate addressing modes. 1072 if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS)) 1073 return false; 1074 1075 // Check if this particular node is reused in any non-memory related 1076 // operation. If yes, do not try to fold this node into the address 1077 // computation, since the computation will be kept. 1078 const SDNode *Node = N.getNode(); 1079 for (SDNode *UI : Node->uses()) { 1080 if (!isa<MemSDNode>(*UI)) 1081 return false; 1082 } 1083 1084 // Remember if it is worth folding N when it produces extended register. 1085 bool IsExtendedRegisterWorthFolding = isWorthFolding(N); 1086 1087 // Try to match a shifted extend on the RHS. 1088 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL && 1089 SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) { 1090 Base = LHS; 1091 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32); 1092 return true; 1093 } 1094 1095 // Try to match a shifted extend on the LHS. 1096 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL && 1097 SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) { 1098 Base = RHS; 1099 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32); 1100 return true; 1101 } 1102 1103 // There was no shift, whatever else we find. 1104 DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32); 1105 1106 AArch64_AM::ShiftExtendType Ext = AArch64_AM::InvalidShiftExtend; 1107 // Try to match an unshifted extend on the LHS. 1108 if (IsExtendedRegisterWorthFolding && 1109 (Ext = getExtendTypeForNode(LHS, true)) != 1110 AArch64_AM::InvalidShiftExtend) { 1111 Base = RHS; 1112 Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0)); 1113 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl, 1114 MVT::i32); 1115 if (isWorthFolding(LHS)) 1116 return true; 1117 } 1118 1119 // Try to match an unshifted extend on the RHS. 1120 if (IsExtendedRegisterWorthFolding && 1121 (Ext = getExtendTypeForNode(RHS, true)) != 1122 AArch64_AM::InvalidShiftExtend) { 1123 Base = LHS; 1124 Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0)); 1125 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl, 1126 MVT::i32); 1127 if (isWorthFolding(RHS)) 1128 return true; 1129 } 1130 1131 return false; 1132 } 1133 1134 // Check if the given immediate is preferred by ADD. If an immediate can be 1135 // encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be 1136 // encoded by one MOVZ, return true. 1137 static bool isPreferredADD(int64_t ImmOff) { 1138 // Constant in [0x0, 0xfff] can be encoded in ADD. 1139 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL) 1140 return true; 1141 // Check if it can be encoded in an "ADD LSL #12". 1142 if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL) 1143 // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant. 1144 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL && 1145 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL; 1146 return false; 1147 } 1148 1149 bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size, 1150 SDValue &Base, SDValue &Offset, 1151 SDValue &SignExtend, 1152 SDValue &DoShift) { 1153 if (N.getOpcode() != ISD::ADD) 1154 return false; 1155 SDValue LHS = N.getOperand(0); 1156 SDValue RHS = N.getOperand(1); 1157 SDLoc DL(N); 1158 1159 // Check if this particular node is reused in any non-memory related 1160 // operation. If yes, do not try to fold this node into the address 1161 // computation, since the computation will be kept. 1162 const SDNode *Node = N.getNode(); 1163 for (SDNode *UI : Node->uses()) { 1164 if (!isa<MemSDNode>(*UI)) 1165 return false; 1166 } 1167 1168 // Watch out if RHS is a wide immediate, it can not be selected into 1169 // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into 1170 // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate 1171 // instructions like: 1172 // MOV X0, WideImmediate 1173 // ADD X1, BaseReg, X0 1174 // LDR X2, [X1, 0] 1175 // For such situation, using [BaseReg, XReg] addressing mode can save one 1176 // ADD/SUB: 1177 // MOV X0, WideImmediate 1178 // LDR X2, [BaseReg, X0] 1179 if (isa<ConstantSDNode>(RHS)) { 1180 int64_t ImmOff = (int64_t)cast<ConstantSDNode>(RHS)->getZExtValue(); 1181 unsigned Scale = Log2_32(Size); 1182 // Skip the immediate can be selected by load/store addressing mode. 1183 // Also skip the immediate can be encoded by a single ADD (SUB is also 1184 // checked by using -ImmOff). 1185 if ((ImmOff % Size == 0 && ImmOff >= 0 && ImmOff < (0x1000 << Scale)) || 1186 isPreferredADD(ImmOff) || isPreferredADD(-ImmOff)) 1187 return false; 1188 1189 SDValue Ops[] = { RHS }; 1190 SDNode *MOVI = 1191 CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops); 1192 SDValue MOVIV = SDValue(MOVI, 0); 1193 // This ADD of two X register will be selected into [Reg+Reg] mode. 1194 N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV); 1195 } 1196 1197 // Remember if it is worth folding N when it produces extended register. 1198 bool IsExtendedRegisterWorthFolding = isWorthFolding(N); 1199 1200 // Try to match a shifted extend on the RHS. 1201 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL && 1202 SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) { 1203 Base = LHS; 1204 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32); 1205 return true; 1206 } 1207 1208 // Try to match a shifted extend on the LHS. 1209 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL && 1210 SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) { 1211 Base = RHS; 1212 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32); 1213 return true; 1214 } 1215 1216 // Match any non-shifted, non-extend, non-immediate add expression. 1217 Base = LHS; 1218 Offset = RHS; 1219 SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32); 1220 DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32); 1221 // Reg1 + Reg2 is free: no check needed. 1222 return true; 1223 } 1224 1225 SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) { 1226 static const unsigned RegClassIDs[] = { 1227 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID}; 1228 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1, 1229 AArch64::dsub2, AArch64::dsub3}; 1230 1231 return createTuple(Regs, RegClassIDs, SubRegs); 1232 } 1233 1234 SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) { 1235 static const unsigned RegClassIDs[] = { 1236 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID}; 1237 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1, 1238 AArch64::qsub2, AArch64::qsub3}; 1239 1240 return createTuple(Regs, RegClassIDs, SubRegs); 1241 } 1242 1243 SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef<SDValue> Regs) { 1244 static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID, 1245 AArch64::ZPR3RegClassID, 1246 AArch64::ZPR4RegClassID}; 1247 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1, 1248 AArch64::zsub2, AArch64::zsub3}; 1249 1250 return createTuple(Regs, RegClassIDs, SubRegs); 1251 } 1252 1253 SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs, 1254 const unsigned RegClassIDs[], 1255 const unsigned SubRegs[]) { 1256 // There's no special register-class for a vector-list of 1 element: it's just 1257 // a vector. 1258 if (Regs.size() == 1) 1259 return Regs[0]; 1260 1261 assert(Regs.size() >= 2 && Regs.size() <= 4); 1262 1263 SDLoc DL(Regs[0]); 1264 1265 SmallVector<SDValue, 4> Ops; 1266 1267 // First operand of REG_SEQUENCE is the desired RegClass. 1268 Ops.push_back( 1269 CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32)); 1270 1271 // Then we get pairs of source & subregister-position for the components. 1272 for (unsigned i = 0; i < Regs.size(); ++i) { 1273 Ops.push_back(Regs[i]); 1274 Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32)); 1275 } 1276 1277 SDNode *N = 1278 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops); 1279 return SDValue(N, 0); 1280 } 1281 1282 void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, 1283 bool isExt) { 1284 SDLoc dl(N); 1285 EVT VT = N->getValueType(0); 1286 1287 unsigned ExtOff = isExt; 1288 1289 // Form a REG_SEQUENCE to force register allocation. 1290 unsigned Vec0Off = ExtOff + 1; 1291 SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off, 1292 N->op_begin() + Vec0Off + NumVecs); 1293 SDValue RegSeq = createQTuple(Regs); 1294 1295 SmallVector<SDValue, 6> Ops; 1296 if (isExt) 1297 Ops.push_back(N->getOperand(1)); 1298 Ops.push_back(RegSeq); 1299 Ops.push_back(N->getOperand(NumVecs + ExtOff + 1)); 1300 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops)); 1301 } 1302 1303 bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) { 1304 LoadSDNode *LD = cast<LoadSDNode>(N); 1305 if (LD->isUnindexed()) 1306 return false; 1307 EVT VT = LD->getMemoryVT(); 1308 EVT DstVT = N->getValueType(0); 1309 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1310 bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC; 1311 1312 // We're not doing validity checking here. That was done when checking 1313 // if we should mark the load as indexed or not. We're just selecting 1314 // the right instruction. 1315 unsigned Opcode = 0; 1316 1317 ISD::LoadExtType ExtType = LD->getExtensionType(); 1318 bool InsertTo64 = false; 1319 if (VT == MVT::i64) 1320 Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost; 1321 else if (VT == MVT::i32) { 1322 if (ExtType == ISD::NON_EXTLOAD) 1323 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost; 1324 else if (ExtType == ISD::SEXTLOAD) 1325 Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost; 1326 else { 1327 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost; 1328 InsertTo64 = true; 1329 // The result of the load is only i32. It's the subreg_to_reg that makes 1330 // it into an i64. 1331 DstVT = MVT::i32; 1332 } 1333 } else if (VT == MVT::i16) { 1334 if (ExtType == ISD::SEXTLOAD) { 1335 if (DstVT == MVT::i64) 1336 Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost; 1337 else 1338 Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost; 1339 } else { 1340 Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost; 1341 InsertTo64 = DstVT == MVT::i64; 1342 // The result of the load is only i32. It's the subreg_to_reg that makes 1343 // it into an i64. 1344 DstVT = MVT::i32; 1345 } 1346 } else if (VT == MVT::i8) { 1347 if (ExtType == ISD::SEXTLOAD) { 1348 if (DstVT == MVT::i64) 1349 Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost; 1350 else 1351 Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost; 1352 } else { 1353 Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost; 1354 InsertTo64 = DstVT == MVT::i64; 1355 // The result of the load is only i32. It's the subreg_to_reg that makes 1356 // it into an i64. 1357 DstVT = MVT::i32; 1358 } 1359 } else if (VT == MVT::f16) { 1360 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost; 1361 } else if (VT == MVT::bf16) { 1362 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost; 1363 } else if (VT == MVT::f32) { 1364 Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost; 1365 } else if (VT == MVT::f64 || VT.is64BitVector()) { 1366 Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost; 1367 } else if (VT.is128BitVector()) { 1368 Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost; 1369 } else 1370 return false; 1371 SDValue Chain = LD->getChain(); 1372 SDValue Base = LD->getBasePtr(); 1373 ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset()); 1374 int OffsetVal = (int)OffsetOp->getZExtValue(); 1375 SDLoc dl(N); 1376 SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64); 1377 SDValue Ops[] = { Base, Offset, Chain }; 1378 SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT, 1379 MVT::Other, Ops); 1380 1381 // Transfer memoperands. 1382 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); 1383 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Res), {MemOp}); 1384 1385 // Either way, we're replacing the node, so tell the caller that. 1386 SDValue LoadedVal = SDValue(Res, 1); 1387 if (InsertTo64) { 1388 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32); 1389 LoadedVal = 1390 SDValue(CurDAG->getMachineNode( 1391 AArch64::SUBREG_TO_REG, dl, MVT::i64, 1392 CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal, 1393 SubReg), 1394 0); 1395 } 1396 1397 ReplaceUses(SDValue(N, 0), LoadedVal); 1398 ReplaceUses(SDValue(N, 1), SDValue(Res, 0)); 1399 ReplaceUses(SDValue(N, 2), SDValue(Res, 2)); 1400 CurDAG->RemoveDeadNode(N); 1401 return true; 1402 } 1403 1404 void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc, 1405 unsigned SubRegIdx) { 1406 SDLoc dl(N); 1407 EVT VT = N->getValueType(0); 1408 SDValue Chain = N->getOperand(0); 1409 1410 SDValue Ops[] = {N->getOperand(2), // Mem operand; 1411 Chain}; 1412 1413 const EVT ResTys[] = {MVT::Untyped, MVT::Other}; 1414 1415 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1416 SDValue SuperReg = SDValue(Ld, 0); 1417 for (unsigned i = 0; i < NumVecs; ++i) 1418 ReplaceUses(SDValue(N, i), 1419 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg)); 1420 1421 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1)); 1422 1423 // Transfer memoperands. In the case of AArch64::LD64B, there won't be one, 1424 // because it's too simple to have needed special treatment during lowering. 1425 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(N)) { 1426 MachineMemOperand *MemOp = MemIntr->getMemOperand(); 1427 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp}); 1428 } 1429 1430 CurDAG->RemoveDeadNode(N); 1431 } 1432 1433 void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs, 1434 unsigned Opc, unsigned SubRegIdx) { 1435 SDLoc dl(N); 1436 EVT VT = N->getValueType(0); 1437 SDValue Chain = N->getOperand(0); 1438 1439 SDValue Ops[] = {N->getOperand(1), // Mem operand 1440 N->getOperand(2), // Incremental 1441 Chain}; 1442 1443 const EVT ResTys[] = {MVT::i64, // Type of the write back register 1444 MVT::Untyped, MVT::Other}; 1445 1446 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1447 1448 // Update uses of write back register 1449 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0)); 1450 1451 // Update uses of vector list 1452 SDValue SuperReg = SDValue(Ld, 1); 1453 if (NumVecs == 1) 1454 ReplaceUses(SDValue(N, 0), SuperReg); 1455 else 1456 for (unsigned i = 0; i < NumVecs; ++i) 1457 ReplaceUses(SDValue(N, i), 1458 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg)); 1459 1460 // Update the chain 1461 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2)); 1462 CurDAG->RemoveDeadNode(N); 1463 } 1464 1465 /// Optimize \param OldBase and \param OldOffset selecting the best addressing 1466 /// mode. Returns a tuple consisting of an Opcode, an SDValue representing the 1467 /// new Base and an SDValue representing the new offset. 1468 std::tuple<unsigned, SDValue, SDValue> 1469 AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, 1470 unsigned Opc_ri, 1471 const SDValue &OldBase, 1472 const SDValue &OldOffset, 1473 unsigned Scale) { 1474 SDValue NewBase = OldBase; 1475 SDValue NewOffset = OldOffset; 1476 // Detect a possible Reg+Imm addressing mode. 1477 const bool IsRegImm = SelectAddrModeIndexedSVE</*Min=*/-8, /*Max=*/7>( 1478 N, OldBase, NewBase, NewOffset); 1479 1480 // Detect a possible reg+reg addressing mode, but only if we haven't already 1481 // detected a Reg+Imm one. 1482 const bool IsRegReg = 1483 !IsRegImm && SelectSVERegRegAddrMode(OldBase, Scale, NewBase, NewOffset); 1484 1485 // Select the instruction. 1486 return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset); 1487 } 1488 1489 void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs, 1490 unsigned Scale, unsigned Opc_ri, 1491 unsigned Opc_rr, bool IsIntr) { 1492 assert(Scale < 4 && "Invalid scaling value."); 1493 SDLoc DL(N); 1494 EVT VT = N->getValueType(0); 1495 SDValue Chain = N->getOperand(0); 1496 1497 // Optimize addressing mode. 1498 SDValue Base, Offset; 1499 unsigned Opc; 1500 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore( 1501 N, Opc_rr, Opc_ri, N->getOperand(IsIntr ? 3 : 2), 1502 CurDAG->getTargetConstant(0, DL, MVT::i64), Scale); 1503 1504 SDValue Ops[] = {N->getOperand(IsIntr ? 2 : 1), // Predicate 1505 Base, // Memory operand 1506 Offset, Chain}; 1507 1508 const EVT ResTys[] = {MVT::Untyped, MVT::Other}; 1509 1510 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops); 1511 SDValue SuperReg = SDValue(Load, 0); 1512 for (unsigned i = 0; i < NumVecs; ++i) 1513 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg( 1514 AArch64::zsub0 + i, DL, VT, SuperReg)); 1515 1516 // Copy chain 1517 unsigned ChainIdx = NumVecs; 1518 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1)); 1519 CurDAG->RemoveDeadNode(N); 1520 } 1521 1522 void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs, 1523 unsigned Opc) { 1524 SDLoc dl(N); 1525 EVT VT = N->getOperand(2)->getValueType(0); 1526 1527 // Form a REG_SEQUENCE to force register allocation. 1528 bool Is128Bit = VT.getSizeInBits() == 128; 1529 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); 1530 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs); 1531 1532 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)}; 1533 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops); 1534 1535 // Transfer memoperands. 1536 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 1537 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 1538 1539 ReplaceNode(N, St); 1540 } 1541 1542 void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs, 1543 unsigned Scale, unsigned Opc_rr, 1544 unsigned Opc_ri) { 1545 SDLoc dl(N); 1546 1547 // Form a REG_SEQUENCE to force register allocation. 1548 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); 1549 SDValue RegSeq = createZTuple(Regs); 1550 1551 // Optimize addressing mode. 1552 unsigned Opc; 1553 SDValue Offset, Base; 1554 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore( 1555 N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3), 1556 CurDAG->getTargetConstant(0, dl, MVT::i64), Scale); 1557 1558 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), // predicate 1559 Base, // address 1560 Offset, // offset 1561 N->getOperand(0)}; // chain 1562 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops); 1563 1564 ReplaceNode(N, St); 1565 } 1566 1567 bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, 1568 SDValue &OffImm) { 1569 SDLoc dl(N); 1570 const DataLayout &DL = CurDAG->getDataLayout(); 1571 const TargetLowering *TLI = getTargetLowering(); 1572 1573 // Try to match it for the frame address 1574 if (auto FINode = dyn_cast<FrameIndexSDNode>(N)) { 1575 int FI = FINode->getIndex(); 1576 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 1577 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); 1578 return true; 1579 } 1580 1581 return false; 1582 } 1583 1584 void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs, 1585 unsigned Opc) { 1586 SDLoc dl(N); 1587 EVT VT = N->getOperand(2)->getValueType(0); 1588 const EVT ResTys[] = {MVT::i64, // Type of the write back register 1589 MVT::Other}; // Type for the Chain 1590 1591 // Form a REG_SEQUENCE to force register allocation. 1592 bool Is128Bit = VT.getSizeInBits() == 128; 1593 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); 1594 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs); 1595 1596 SDValue Ops[] = {RegSeq, 1597 N->getOperand(NumVecs + 1), // base register 1598 N->getOperand(NumVecs + 2), // Incremental 1599 N->getOperand(0)}; // Chain 1600 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1601 1602 ReplaceNode(N, St); 1603 } 1604 1605 namespace { 1606 /// WidenVector - Given a value in the V64 register class, produce the 1607 /// equivalent value in the V128 register class. 1608 class WidenVector { 1609 SelectionDAG &DAG; 1610 1611 public: 1612 WidenVector(SelectionDAG &DAG) : DAG(DAG) {} 1613 1614 SDValue operator()(SDValue V64Reg) { 1615 EVT VT = V64Reg.getValueType(); 1616 unsigned NarrowSize = VT.getVectorNumElements(); 1617 MVT EltTy = VT.getVectorElementType().getSimpleVT(); 1618 MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize); 1619 SDLoc DL(V64Reg); 1620 1621 SDValue Undef = 1622 SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0); 1623 return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg); 1624 } 1625 }; 1626 } // namespace 1627 1628 /// NarrowVector - Given a value in the V128 register class, produce the 1629 /// equivalent value in the V64 register class. 1630 static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) { 1631 EVT VT = V128Reg.getValueType(); 1632 unsigned WideSize = VT.getVectorNumElements(); 1633 MVT EltTy = VT.getVectorElementType().getSimpleVT(); 1634 MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2); 1635 1636 return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy, 1637 V128Reg); 1638 } 1639 1640 void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs, 1641 unsigned Opc) { 1642 SDLoc dl(N); 1643 EVT VT = N->getValueType(0); 1644 bool Narrow = VT.getSizeInBits() == 64; 1645 1646 // Form a REG_SEQUENCE to force register allocation. 1647 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); 1648 1649 if (Narrow) 1650 transform(Regs, Regs.begin(), 1651 WidenVector(*CurDAG)); 1652 1653 SDValue RegSeq = createQTuple(Regs); 1654 1655 const EVT ResTys[] = {MVT::Untyped, MVT::Other}; 1656 1657 unsigned LaneNo = 1658 cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue(); 1659 1660 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64), 1661 N->getOperand(NumVecs + 3), N->getOperand(0)}; 1662 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1663 SDValue SuperReg = SDValue(Ld, 0); 1664 1665 EVT WideVT = RegSeq.getOperand(1)->getValueType(0); 1666 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, 1667 AArch64::qsub2, AArch64::qsub3 }; 1668 for (unsigned i = 0; i < NumVecs; ++i) { 1669 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg); 1670 if (Narrow) 1671 NV = NarrowVector(NV, *CurDAG); 1672 ReplaceUses(SDValue(N, i), NV); 1673 } 1674 1675 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1)); 1676 CurDAG->RemoveDeadNode(N); 1677 } 1678 1679 void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs, 1680 unsigned Opc) { 1681 SDLoc dl(N); 1682 EVT VT = N->getValueType(0); 1683 bool Narrow = VT.getSizeInBits() == 64; 1684 1685 // Form a REG_SEQUENCE to force register allocation. 1686 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); 1687 1688 if (Narrow) 1689 transform(Regs, Regs.begin(), 1690 WidenVector(*CurDAG)); 1691 1692 SDValue RegSeq = createQTuple(Regs); 1693 1694 const EVT ResTys[] = {MVT::i64, // Type of the write back register 1695 RegSeq->getValueType(0), MVT::Other}; 1696 1697 unsigned LaneNo = 1698 cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue(); 1699 1700 SDValue Ops[] = {RegSeq, 1701 CurDAG->getTargetConstant(LaneNo, dl, 1702 MVT::i64), // Lane Number 1703 N->getOperand(NumVecs + 2), // Base register 1704 N->getOperand(NumVecs + 3), // Incremental 1705 N->getOperand(0)}; 1706 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1707 1708 // Update uses of the write back register 1709 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0)); 1710 1711 // Update uses of the vector list 1712 SDValue SuperReg = SDValue(Ld, 1); 1713 if (NumVecs == 1) { 1714 ReplaceUses(SDValue(N, 0), 1715 Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg); 1716 } else { 1717 EVT WideVT = RegSeq.getOperand(1)->getValueType(0); 1718 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, 1719 AArch64::qsub2, AArch64::qsub3 }; 1720 for (unsigned i = 0; i < NumVecs; ++i) { 1721 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, 1722 SuperReg); 1723 if (Narrow) 1724 NV = NarrowVector(NV, *CurDAG); 1725 ReplaceUses(SDValue(N, i), NV); 1726 } 1727 } 1728 1729 // Update the Chain 1730 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2)); 1731 CurDAG->RemoveDeadNode(N); 1732 } 1733 1734 void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs, 1735 unsigned Opc) { 1736 SDLoc dl(N); 1737 EVT VT = N->getOperand(2)->getValueType(0); 1738 bool Narrow = VT.getSizeInBits() == 64; 1739 1740 // Form a REG_SEQUENCE to force register allocation. 1741 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); 1742 1743 if (Narrow) 1744 transform(Regs, Regs.begin(), 1745 WidenVector(*CurDAG)); 1746 1747 SDValue RegSeq = createQTuple(Regs); 1748 1749 unsigned LaneNo = 1750 cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue(); 1751 1752 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64), 1753 N->getOperand(NumVecs + 3), N->getOperand(0)}; 1754 SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops); 1755 1756 // Transfer memoperands. 1757 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 1758 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 1759 1760 ReplaceNode(N, St); 1761 } 1762 1763 void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs, 1764 unsigned Opc) { 1765 SDLoc dl(N); 1766 EVT VT = N->getOperand(2)->getValueType(0); 1767 bool Narrow = VT.getSizeInBits() == 64; 1768 1769 // Form a REG_SEQUENCE to force register allocation. 1770 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); 1771 1772 if (Narrow) 1773 transform(Regs, Regs.begin(), 1774 WidenVector(*CurDAG)); 1775 1776 SDValue RegSeq = createQTuple(Regs); 1777 1778 const EVT ResTys[] = {MVT::i64, // Type of the write back register 1779 MVT::Other}; 1780 1781 unsigned LaneNo = 1782 cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue(); 1783 1784 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64), 1785 N->getOperand(NumVecs + 2), // Base Register 1786 N->getOperand(NumVecs + 3), // Incremental 1787 N->getOperand(0)}; 1788 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1789 1790 // Transfer memoperands. 1791 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 1792 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 1793 1794 ReplaceNode(N, St); 1795 } 1796 1797 static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, 1798 unsigned &Opc, SDValue &Opd0, 1799 unsigned &LSB, unsigned &MSB, 1800 unsigned NumberOfIgnoredLowBits, 1801 bool BiggerPattern) { 1802 assert(N->getOpcode() == ISD::AND && 1803 "N must be a AND operation to call this function"); 1804 1805 EVT VT = N->getValueType(0); 1806 1807 // Here we can test the type of VT and return false when the type does not 1808 // match, but since it is done prior to that call in the current context 1809 // we turned that into an assert to avoid redundant code. 1810 assert((VT == MVT::i32 || VT == MVT::i64) && 1811 "Type checking must have been done before calling this function"); 1812 1813 // FIXME: simplify-demanded-bits in DAGCombine will probably have 1814 // changed the AND node to a 32-bit mask operation. We'll have to 1815 // undo that as part of the transform here if we want to catch all 1816 // the opportunities. 1817 // Currently the NumberOfIgnoredLowBits argument helps to recover 1818 // form these situations when matching bigger pattern (bitfield insert). 1819 1820 // For unsigned extracts, check for a shift right and mask 1821 uint64_t AndImm = 0; 1822 if (!isOpcWithIntImmediate(N, ISD::AND, AndImm)) 1823 return false; 1824 1825 const SDNode *Op0 = N->getOperand(0).getNode(); 1826 1827 // Because of simplify-demanded-bits in DAGCombine, the mask may have been 1828 // simplified. Try to undo that 1829 AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits); 1830 1831 // The immediate is a mask of the low bits iff imm & (imm+1) == 0 1832 if (AndImm & (AndImm + 1)) 1833 return false; 1834 1835 bool ClampMSB = false; 1836 uint64_t SrlImm = 0; 1837 // Handle the SRL + ANY_EXTEND case. 1838 if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND && 1839 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) { 1840 // Extend the incoming operand of the SRL to 64-bit. 1841 Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0)); 1842 // Make sure to clamp the MSB so that we preserve the semantics of the 1843 // original operations. 1844 ClampMSB = true; 1845 } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE && 1846 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, 1847 SrlImm)) { 1848 // If the shift result was truncated, we can still combine them. 1849 Opd0 = Op0->getOperand(0).getOperand(0); 1850 1851 // Use the type of SRL node. 1852 VT = Opd0->getValueType(0); 1853 } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) { 1854 Opd0 = Op0->getOperand(0); 1855 } else if (BiggerPattern) { 1856 // Let's pretend a 0 shift right has been performed. 1857 // The resulting code will be at least as good as the original one 1858 // plus it may expose more opportunities for bitfield insert pattern. 1859 // FIXME: Currently we limit this to the bigger pattern, because 1860 // some optimizations expect AND and not UBFM. 1861 Opd0 = N->getOperand(0); 1862 } else 1863 return false; 1864 1865 // Bail out on large immediates. This happens when no proper 1866 // combining/constant folding was performed. 1867 if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) { 1868 LLVM_DEBUG( 1869 (dbgs() << N 1870 << ": Found large shift immediate, this should not happen\n")); 1871 return false; 1872 } 1873 1874 LSB = SrlImm; 1875 MSB = SrlImm + (VT == MVT::i32 ? countTrailingOnes<uint32_t>(AndImm) 1876 : countTrailingOnes<uint64_t>(AndImm)) - 1877 1; 1878 if (ClampMSB) 1879 // Since we're moving the extend before the right shift operation, we need 1880 // to clamp the MSB to make sure we don't shift in undefined bits instead of 1881 // the zeros which would get shifted in with the original right shift 1882 // operation. 1883 MSB = MSB > 31 ? 31 : MSB; 1884 1885 Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri; 1886 return true; 1887 } 1888 1889 static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc, 1890 SDValue &Opd0, unsigned &Immr, 1891 unsigned &Imms) { 1892 assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG); 1893 1894 EVT VT = N->getValueType(0); 1895 unsigned BitWidth = VT.getSizeInBits(); 1896 assert((VT == MVT::i32 || VT == MVT::i64) && 1897 "Type checking must have been done before calling this function"); 1898 1899 SDValue Op = N->getOperand(0); 1900 if (Op->getOpcode() == ISD::TRUNCATE) { 1901 Op = Op->getOperand(0); 1902 VT = Op->getValueType(0); 1903 BitWidth = VT.getSizeInBits(); 1904 } 1905 1906 uint64_t ShiftImm; 1907 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) && 1908 !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm)) 1909 return false; 1910 1911 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits(); 1912 if (ShiftImm + Width > BitWidth) 1913 return false; 1914 1915 Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri; 1916 Opd0 = Op.getOperand(0); 1917 Immr = ShiftImm; 1918 Imms = ShiftImm + Width - 1; 1919 return true; 1920 } 1921 1922 static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc, 1923 SDValue &Opd0, unsigned &LSB, 1924 unsigned &MSB) { 1925 // We are looking for the following pattern which basically extracts several 1926 // continuous bits from the source value and places it from the LSB of the 1927 // destination value, all other bits of the destination value or set to zero: 1928 // 1929 // Value2 = AND Value, MaskImm 1930 // SRL Value2, ShiftImm 1931 // 1932 // with MaskImm >> ShiftImm to search for the bit width. 1933 // 1934 // This gets selected into a single UBFM: 1935 // 1936 // UBFM Value, ShiftImm, BitWide + SrlImm -1 1937 // 1938 1939 if (N->getOpcode() != ISD::SRL) 1940 return false; 1941 1942 uint64_t AndMask = 0; 1943 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask)) 1944 return false; 1945 1946 Opd0 = N->getOperand(0).getOperand(0); 1947 1948 uint64_t SrlImm = 0; 1949 if (!isIntImmediate(N->getOperand(1), SrlImm)) 1950 return false; 1951 1952 // Check whether we really have several bits extract here. 1953 unsigned BitWide = 64 - countLeadingOnes(~(AndMask >> SrlImm)); 1954 if (BitWide && isMask_64(AndMask >> SrlImm)) { 1955 if (N->getValueType(0) == MVT::i32) 1956 Opc = AArch64::UBFMWri; 1957 else 1958 Opc = AArch64::UBFMXri; 1959 1960 LSB = SrlImm; 1961 MSB = BitWide + SrlImm - 1; 1962 return true; 1963 } 1964 1965 return false; 1966 } 1967 1968 static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, 1969 unsigned &Immr, unsigned &Imms, 1970 bool BiggerPattern) { 1971 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) && 1972 "N must be a SHR/SRA operation to call this function"); 1973 1974 EVT VT = N->getValueType(0); 1975 1976 // Here we can test the type of VT and return false when the type does not 1977 // match, but since it is done prior to that call in the current context 1978 // we turned that into an assert to avoid redundant code. 1979 assert((VT == MVT::i32 || VT == MVT::i64) && 1980 "Type checking must have been done before calling this function"); 1981 1982 // Check for AND + SRL doing several bits extract. 1983 if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms)) 1984 return true; 1985 1986 // We're looking for a shift of a shift. 1987 uint64_t ShlImm = 0; 1988 uint64_t TruncBits = 0; 1989 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) { 1990 Opd0 = N->getOperand(0).getOperand(0); 1991 } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL && 1992 N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) { 1993 // We are looking for a shift of truncate. Truncate from i64 to i32 could 1994 // be considered as setting high 32 bits as zero. Our strategy here is to 1995 // always generate 64bit UBFM. This consistency will help the CSE pass 1996 // later find more redundancy. 1997 Opd0 = N->getOperand(0).getOperand(0); 1998 TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits(); 1999 VT = Opd0.getValueType(); 2000 assert(VT == MVT::i64 && "the promoted type should be i64"); 2001 } else if (BiggerPattern) { 2002 // Let's pretend a 0 shift left has been performed. 2003 // FIXME: Currently we limit this to the bigger pattern case, 2004 // because some optimizations expect AND and not UBFM 2005 Opd0 = N->getOperand(0); 2006 } else 2007 return false; 2008 2009 // Missing combines/constant folding may have left us with strange 2010 // constants. 2011 if (ShlImm >= VT.getSizeInBits()) { 2012 LLVM_DEBUG( 2013 (dbgs() << N 2014 << ": Found large shift immediate, this should not happen\n")); 2015 return false; 2016 } 2017 2018 uint64_t SrlImm = 0; 2019 if (!isIntImmediate(N->getOperand(1), SrlImm)) 2020 return false; 2021 2022 assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() && 2023 "bad amount in shift node!"); 2024 int immr = SrlImm - ShlImm; 2025 Immr = immr < 0 ? immr + VT.getSizeInBits() : immr; 2026 Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1; 2027 // SRA requires a signed extraction 2028 if (VT == MVT::i32) 2029 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri; 2030 else 2031 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri; 2032 return true; 2033 } 2034 2035 bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) { 2036 assert(N->getOpcode() == ISD::SIGN_EXTEND); 2037 2038 EVT VT = N->getValueType(0); 2039 EVT NarrowVT = N->getOperand(0)->getValueType(0); 2040 if (VT != MVT::i64 || NarrowVT != MVT::i32) 2041 return false; 2042 2043 uint64_t ShiftImm; 2044 SDValue Op = N->getOperand(0); 2045 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm)) 2046 return false; 2047 2048 SDLoc dl(N); 2049 // Extend the incoming operand of the shift to 64-bits. 2050 SDValue Opd0 = Widen(CurDAG, Op.getOperand(0)); 2051 unsigned Immr = ShiftImm; 2052 unsigned Imms = NarrowVT.getSizeInBits() - 1; 2053 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT), 2054 CurDAG->getTargetConstant(Imms, dl, VT)}; 2055 CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops); 2056 return true; 2057 } 2058 2059 /// Try to form fcvtl2 instructions from a floating-point extend of a high-half 2060 /// extract of a subvector. 2061 bool AArch64DAGToDAGISel::tryHighFPExt(SDNode *N) { 2062 assert(N->getOpcode() == ISD::FP_EXTEND); 2063 2064 // There are 2 forms of fcvtl2 - extend to double or extend to float. 2065 SDValue Extract = N->getOperand(0); 2066 EVT VT = N->getValueType(0); 2067 EVT NarrowVT = Extract.getValueType(); 2068 if ((VT != MVT::v2f64 || NarrowVT != MVT::v2f32) && 2069 (VT != MVT::v4f32 || NarrowVT != MVT::v4f16)) 2070 return false; 2071 2072 // Optionally look past a bitcast. 2073 Extract = peekThroughBitcasts(Extract); 2074 if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR) 2075 return false; 2076 2077 // Match extract from start of high half index. 2078 // Example: v8i16 -> v4i16 means the extract must begin at index 4. 2079 unsigned ExtractIndex = Extract.getConstantOperandVal(1); 2080 if (ExtractIndex != Extract.getValueType().getVectorNumElements()) 2081 return false; 2082 2083 auto Opcode = VT == MVT::v2f64 ? AArch64::FCVTLv4i32 : AArch64::FCVTLv8i16; 2084 CurDAG->SelectNodeTo(N, Opcode, VT, Extract.getOperand(0)); 2085 return true; 2086 } 2087 2088 static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, 2089 SDValue &Opd0, unsigned &Immr, unsigned &Imms, 2090 unsigned NumberOfIgnoredLowBits = 0, 2091 bool BiggerPattern = false) { 2092 if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64) 2093 return false; 2094 2095 switch (N->getOpcode()) { 2096 default: 2097 if (!N->isMachineOpcode()) 2098 return false; 2099 break; 2100 case ISD::AND: 2101 return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms, 2102 NumberOfIgnoredLowBits, BiggerPattern); 2103 case ISD::SRL: 2104 case ISD::SRA: 2105 return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern); 2106 2107 case ISD::SIGN_EXTEND_INREG: 2108 return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms); 2109 } 2110 2111 unsigned NOpc = N->getMachineOpcode(); 2112 switch (NOpc) { 2113 default: 2114 return false; 2115 case AArch64::SBFMWri: 2116 case AArch64::UBFMWri: 2117 case AArch64::SBFMXri: 2118 case AArch64::UBFMXri: 2119 Opc = NOpc; 2120 Opd0 = N->getOperand(0); 2121 Immr = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue(); 2122 Imms = cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue(); 2123 return true; 2124 } 2125 // Unreachable 2126 return false; 2127 } 2128 2129 bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) { 2130 unsigned Opc, Immr, Imms; 2131 SDValue Opd0; 2132 if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms)) 2133 return false; 2134 2135 EVT VT = N->getValueType(0); 2136 SDLoc dl(N); 2137 2138 // If the bit extract operation is 64bit but the original type is 32bit, we 2139 // need to add one EXTRACT_SUBREG. 2140 if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) { 2141 SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64), 2142 CurDAG->getTargetConstant(Imms, dl, MVT::i64)}; 2143 2144 SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64); 2145 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32); 2146 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, 2147 MVT::i32, SDValue(BFM, 0), SubReg)); 2148 return true; 2149 } 2150 2151 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT), 2152 CurDAG->getTargetConstant(Imms, dl, VT)}; 2153 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 2154 return true; 2155 } 2156 2157 /// Does DstMask form a complementary pair with the mask provided by 2158 /// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking, 2159 /// this asks whether DstMask zeroes precisely those bits that will be set by 2160 /// the other half. 2161 static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted, 2162 unsigned NumberOfIgnoredHighBits, EVT VT) { 2163 assert((VT == MVT::i32 || VT == MVT::i64) && 2164 "i32 or i64 mask type expected!"); 2165 unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits; 2166 2167 APInt SignificantDstMask = APInt(BitWidth, DstMask); 2168 APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth); 2169 2170 return (SignificantDstMask & SignificantBitsToBeInserted) == 0 && 2171 (SignificantDstMask | SignificantBitsToBeInserted).isAllOnes(); 2172 } 2173 2174 // Look for bits that will be useful for later uses. 2175 // A bit is consider useless as soon as it is dropped and never used 2176 // before it as been dropped. 2177 // E.g., looking for useful bit of x 2178 // 1. y = x & 0x7 2179 // 2. z = y >> 2 2180 // After #1, x useful bits are 0x7, then the useful bits of x, live through 2181 // y. 2182 // After #2, the useful bits of x are 0x4. 2183 // However, if x is used on an unpredicatable instruction, then all its bits 2184 // are useful. 2185 // E.g. 2186 // 1. y = x & 0x7 2187 // 2. z = y >> 2 2188 // 3. str x, [@x] 2189 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0); 2190 2191 static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits, 2192 unsigned Depth) { 2193 uint64_t Imm = 2194 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue(); 2195 Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth()); 2196 UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm); 2197 getUsefulBits(Op, UsefulBits, Depth + 1); 2198 } 2199 2200 static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, 2201 uint64_t Imm, uint64_t MSB, 2202 unsigned Depth) { 2203 // inherit the bitwidth value 2204 APInt OpUsefulBits(UsefulBits); 2205 OpUsefulBits = 1; 2206 2207 if (MSB >= Imm) { 2208 OpUsefulBits <<= MSB - Imm + 1; 2209 --OpUsefulBits; 2210 // The interesting part will be in the lower part of the result 2211 getUsefulBits(Op, OpUsefulBits, Depth + 1); 2212 // The interesting part was starting at Imm in the argument 2213 OpUsefulBits <<= Imm; 2214 } else { 2215 OpUsefulBits <<= MSB + 1; 2216 --OpUsefulBits; 2217 // The interesting part will be shifted in the result 2218 OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm; 2219 getUsefulBits(Op, OpUsefulBits, Depth + 1); 2220 // The interesting part was at zero in the argument 2221 OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm); 2222 } 2223 2224 UsefulBits &= OpUsefulBits; 2225 } 2226 2227 static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits, 2228 unsigned Depth) { 2229 uint64_t Imm = 2230 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue(); 2231 uint64_t MSB = 2232 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue(); 2233 2234 getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth); 2235 } 2236 2237 static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, 2238 unsigned Depth) { 2239 uint64_t ShiftTypeAndValue = 2240 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue(); 2241 APInt Mask(UsefulBits); 2242 Mask.clearAllBits(); 2243 Mask.flipAllBits(); 2244 2245 if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) { 2246 // Shift Left 2247 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue); 2248 Mask <<= ShiftAmt; 2249 getUsefulBits(Op, Mask, Depth + 1); 2250 Mask.lshrInPlace(ShiftAmt); 2251 } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) { 2252 // Shift Right 2253 // We do not handle AArch64_AM::ASR, because the sign will change the 2254 // number of useful bits 2255 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue); 2256 Mask.lshrInPlace(ShiftAmt); 2257 getUsefulBits(Op, Mask, Depth + 1); 2258 Mask <<= ShiftAmt; 2259 } else 2260 return; 2261 2262 UsefulBits &= Mask; 2263 } 2264 2265 static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, 2266 unsigned Depth) { 2267 uint64_t Imm = 2268 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue(); 2269 uint64_t MSB = 2270 cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue(); 2271 2272 APInt OpUsefulBits(UsefulBits); 2273 OpUsefulBits = 1; 2274 2275 APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0); 2276 ResultUsefulBits.flipAllBits(); 2277 APInt Mask(UsefulBits.getBitWidth(), 0); 2278 2279 getUsefulBits(Op, ResultUsefulBits, Depth + 1); 2280 2281 if (MSB >= Imm) { 2282 // The instruction is a BFXIL. 2283 uint64_t Width = MSB - Imm + 1; 2284 uint64_t LSB = Imm; 2285 2286 OpUsefulBits <<= Width; 2287 --OpUsefulBits; 2288 2289 if (Op.getOperand(1) == Orig) { 2290 // Copy the low bits from the result to bits starting from LSB. 2291 Mask = ResultUsefulBits & OpUsefulBits; 2292 Mask <<= LSB; 2293 } 2294 2295 if (Op.getOperand(0) == Orig) 2296 // Bits starting from LSB in the input contribute to the result. 2297 Mask |= (ResultUsefulBits & ~OpUsefulBits); 2298 } else { 2299 // The instruction is a BFI. 2300 uint64_t Width = MSB + 1; 2301 uint64_t LSB = UsefulBits.getBitWidth() - Imm; 2302 2303 OpUsefulBits <<= Width; 2304 --OpUsefulBits; 2305 OpUsefulBits <<= LSB; 2306 2307 if (Op.getOperand(1) == Orig) { 2308 // Copy the bits from the result to the zero bits. 2309 Mask = ResultUsefulBits & OpUsefulBits; 2310 Mask.lshrInPlace(LSB); 2311 } 2312 2313 if (Op.getOperand(0) == Orig) 2314 Mask |= (ResultUsefulBits & ~OpUsefulBits); 2315 } 2316 2317 UsefulBits &= Mask; 2318 } 2319 2320 static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, 2321 SDValue Orig, unsigned Depth) { 2322 2323 // Users of this node should have already been instruction selected 2324 // FIXME: Can we turn that into an assert? 2325 if (!UserNode->isMachineOpcode()) 2326 return; 2327 2328 switch (UserNode->getMachineOpcode()) { 2329 default: 2330 return; 2331 case AArch64::ANDSWri: 2332 case AArch64::ANDSXri: 2333 case AArch64::ANDWri: 2334 case AArch64::ANDXri: 2335 // We increment Depth only when we call the getUsefulBits 2336 return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits, 2337 Depth); 2338 case AArch64::UBFMWri: 2339 case AArch64::UBFMXri: 2340 return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth); 2341 2342 case AArch64::ORRWrs: 2343 case AArch64::ORRXrs: 2344 if (UserNode->getOperand(0) != Orig && UserNode->getOperand(1) == Orig) 2345 getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits, 2346 Depth); 2347 return; 2348 case AArch64::BFMWri: 2349 case AArch64::BFMXri: 2350 return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth); 2351 2352 case AArch64::STRBBui: 2353 case AArch64::STURBBi: 2354 if (UserNode->getOperand(0) != Orig) 2355 return; 2356 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff); 2357 return; 2358 2359 case AArch64::STRHHui: 2360 case AArch64::STURHHi: 2361 if (UserNode->getOperand(0) != Orig) 2362 return; 2363 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff); 2364 return; 2365 } 2366 } 2367 2368 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) { 2369 if (Depth >= SelectionDAG::MaxRecursionDepth) 2370 return; 2371 // Initialize UsefulBits 2372 if (!Depth) { 2373 unsigned Bitwidth = Op.getScalarValueSizeInBits(); 2374 // At the beginning, assume every produced bits is useful 2375 UsefulBits = APInt(Bitwidth, 0); 2376 UsefulBits.flipAllBits(); 2377 } 2378 APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0); 2379 2380 for (SDNode *Node : Op.getNode()->uses()) { 2381 // A use cannot produce useful bits 2382 APInt UsefulBitsForUse = APInt(UsefulBits); 2383 getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth); 2384 UsersUsefulBits |= UsefulBitsForUse; 2385 } 2386 // UsefulBits contains the produced bits that are meaningful for the 2387 // current definition, thus a user cannot make a bit meaningful at 2388 // this point 2389 UsefulBits &= UsersUsefulBits; 2390 } 2391 2392 /// Create a machine node performing a notional SHL of Op by ShlAmount. If 2393 /// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is 2394 /// 0, return Op unchanged. 2395 static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) { 2396 if (ShlAmount == 0) 2397 return Op; 2398 2399 EVT VT = Op.getValueType(); 2400 SDLoc dl(Op); 2401 unsigned BitWidth = VT.getSizeInBits(); 2402 unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri; 2403 2404 SDNode *ShiftNode; 2405 if (ShlAmount > 0) { 2406 // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt 2407 ShiftNode = CurDAG->getMachineNode( 2408 UBFMOpc, dl, VT, Op, 2409 CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT), 2410 CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT)); 2411 } else { 2412 // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1 2413 assert(ShlAmount < 0 && "expected right shift"); 2414 int ShrAmount = -ShlAmount; 2415 ShiftNode = CurDAG->getMachineNode( 2416 UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT), 2417 CurDAG->getTargetConstant(BitWidth - 1, dl, VT)); 2418 } 2419 2420 return SDValue(ShiftNode, 0); 2421 } 2422 2423 /// Does this tree qualify as an attempt to move a bitfield into position, 2424 /// essentially "(and (shl VAL, N), Mask)". 2425 static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, 2426 bool BiggerPattern, 2427 SDValue &Src, int &ShiftAmount, 2428 int &MaskWidth) { 2429 EVT VT = Op.getValueType(); 2430 unsigned BitWidth = VT.getSizeInBits(); 2431 (void)BitWidth; 2432 assert(BitWidth == 32 || BitWidth == 64); 2433 2434 KnownBits Known = CurDAG->computeKnownBits(Op); 2435 2436 // Non-zero in the sense that they're not provably zero, which is the key 2437 // point if we want to use this value 2438 uint64_t NonZeroBits = (~Known.Zero).getZExtValue(); 2439 2440 // Discard a constant AND mask if present. It's safe because the node will 2441 // already have been factored into the computeKnownBits calculation above. 2442 uint64_t AndImm; 2443 if (isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm)) { 2444 assert((~APInt(BitWidth, AndImm) & ~Known.Zero) == 0); 2445 Op = Op.getOperand(0); 2446 } 2447 2448 // Don't match if the SHL has more than one use, since then we'll end up 2449 // generating SHL+UBFIZ instead of just keeping SHL+AND. 2450 if (!BiggerPattern && !Op.hasOneUse()) 2451 return false; 2452 2453 uint64_t ShlImm; 2454 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm)) 2455 return false; 2456 Op = Op.getOperand(0); 2457 2458 if (!isShiftedMask_64(NonZeroBits)) 2459 return false; 2460 2461 ShiftAmount = countTrailingZeros(NonZeroBits); 2462 MaskWidth = countTrailingOnes(NonZeroBits >> ShiftAmount); 2463 2464 // BFI encompasses sufficiently many nodes that it's worth inserting an extra 2465 // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL 2466 // amount. BiggerPattern is true when this pattern is being matched for BFI, 2467 // BiggerPattern is false when this pattern is being matched for UBFIZ, in 2468 // which case it is not profitable to insert an extra shift. 2469 if (ShlImm - ShiftAmount != 0 && !BiggerPattern) 2470 return false; 2471 Src = getLeftShift(CurDAG, Op, ShlImm - ShiftAmount); 2472 2473 return true; 2474 } 2475 2476 static bool isShiftedMask(uint64_t Mask, EVT VT) { 2477 assert(VT == MVT::i32 || VT == MVT::i64); 2478 if (VT == MVT::i32) 2479 return isShiftedMask_32(Mask); 2480 return isShiftedMask_64(Mask); 2481 } 2482 2483 // Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being 2484 // inserted only sets known zero bits. 2485 static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG) { 2486 assert(N->getOpcode() == ISD::OR && "Expect a OR operation"); 2487 2488 EVT VT = N->getValueType(0); 2489 if (VT != MVT::i32 && VT != MVT::i64) 2490 return false; 2491 2492 unsigned BitWidth = VT.getSizeInBits(); 2493 2494 uint64_t OrImm; 2495 if (!isOpcWithIntImmediate(N, ISD::OR, OrImm)) 2496 return false; 2497 2498 // Skip this transformation if the ORR immediate can be encoded in the ORR. 2499 // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely 2500 // performance neutral. 2501 if (AArch64_AM::isLogicalImmediate(OrImm, BitWidth)) 2502 return false; 2503 2504 uint64_t MaskImm; 2505 SDValue And = N->getOperand(0); 2506 // Must be a single use AND with an immediate operand. 2507 if (!And.hasOneUse() || 2508 !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm)) 2509 return false; 2510 2511 // Compute the Known Zero for the AND as this allows us to catch more general 2512 // cases than just looking for AND with imm. 2513 KnownBits Known = CurDAG->computeKnownBits(And); 2514 2515 // Non-zero in the sense that they're not provably zero, which is the key 2516 // point if we want to use this value. 2517 uint64_t NotKnownZero = (~Known.Zero).getZExtValue(); 2518 2519 // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00). 2520 if (!isShiftedMask(Known.Zero.getZExtValue(), VT)) 2521 return false; 2522 2523 // The bits being inserted must only set those bits that are known to be zero. 2524 if ((OrImm & NotKnownZero) != 0) { 2525 // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't 2526 // currently handle this case. 2527 return false; 2528 } 2529 2530 // BFI/BFXIL dst, src, #lsb, #width. 2531 int LSB = countTrailingOnes(NotKnownZero); 2532 int Width = BitWidth - APInt(BitWidth, NotKnownZero).countPopulation(); 2533 2534 // BFI/BFXIL is an alias of BFM, so translate to BFM operands. 2535 unsigned ImmR = (BitWidth - LSB) % BitWidth; 2536 unsigned ImmS = Width - 1; 2537 2538 // If we're creating a BFI instruction avoid cases where we need more 2539 // instructions to materialize the BFI constant as compared to the original 2540 // ORR. A BFXIL will use the same constant as the original ORR, so the code 2541 // should be no worse in this case. 2542 bool IsBFI = LSB != 0; 2543 uint64_t BFIImm = OrImm >> LSB; 2544 if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) { 2545 // We have a BFI instruction and we know the constant can't be materialized 2546 // with a ORR-immediate with the zero register. 2547 unsigned OrChunks = 0, BFIChunks = 0; 2548 for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) { 2549 if (((OrImm >> Shift) & 0xFFFF) != 0) 2550 ++OrChunks; 2551 if (((BFIImm >> Shift) & 0xFFFF) != 0) 2552 ++BFIChunks; 2553 } 2554 if (BFIChunks > OrChunks) 2555 return false; 2556 } 2557 2558 // Materialize the constant to be inserted. 2559 SDLoc DL(N); 2560 unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm; 2561 SDNode *MOVI = CurDAG->getMachineNode( 2562 MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT)); 2563 2564 // Create the BFI/BFXIL instruction. 2565 SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0), 2566 CurDAG->getTargetConstant(ImmR, DL, VT), 2567 CurDAG->getTargetConstant(ImmS, DL, VT)}; 2568 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri; 2569 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 2570 return true; 2571 } 2572 2573 static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits, 2574 SelectionDAG *CurDAG) { 2575 assert(N->getOpcode() == ISD::OR && "Expect a OR operation"); 2576 2577 EVT VT = N->getValueType(0); 2578 if (VT != MVT::i32 && VT != MVT::i64) 2579 return false; 2580 2581 unsigned BitWidth = VT.getSizeInBits(); 2582 2583 // Because of simplify-demanded-bits in DAGCombine, involved masks may not 2584 // have the expected shape. Try to undo that. 2585 2586 unsigned NumberOfIgnoredLowBits = UsefulBits.countTrailingZeros(); 2587 unsigned NumberOfIgnoredHighBits = UsefulBits.countLeadingZeros(); 2588 2589 // Given a OR operation, check if we have the following pattern 2590 // ubfm c, b, imm, imm2 (or something that does the same jobs, see 2591 // isBitfieldExtractOp) 2592 // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and 2593 // countTrailingZeros(mask2) == imm2 - imm + 1 2594 // f = d | c 2595 // if yes, replace the OR instruction with: 2596 // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2 2597 2598 // OR is commutative, check all combinations of operand order and values of 2599 // BiggerPattern, i.e. 2600 // Opd0, Opd1, BiggerPattern=false 2601 // Opd1, Opd0, BiggerPattern=false 2602 // Opd0, Opd1, BiggerPattern=true 2603 // Opd1, Opd0, BiggerPattern=true 2604 // Several of these combinations may match, so check with BiggerPattern=false 2605 // first since that will produce better results by matching more instructions 2606 // and/or inserting fewer extra instructions. 2607 for (int I = 0; I < 4; ++I) { 2608 2609 SDValue Dst, Src; 2610 unsigned ImmR, ImmS; 2611 bool BiggerPattern = I / 2; 2612 SDValue OrOpd0Val = N->getOperand(I % 2); 2613 SDNode *OrOpd0 = OrOpd0Val.getNode(); 2614 SDValue OrOpd1Val = N->getOperand((I + 1) % 2); 2615 SDNode *OrOpd1 = OrOpd1Val.getNode(); 2616 2617 unsigned BFXOpc; 2618 int DstLSB, Width; 2619 if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS, 2620 NumberOfIgnoredLowBits, BiggerPattern)) { 2621 // Check that the returned opcode is compatible with the pattern, 2622 // i.e., same type and zero extended (U and not S) 2623 if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) || 2624 (BFXOpc != AArch64::UBFMWri && VT == MVT::i32)) 2625 continue; 2626 2627 // Compute the width of the bitfield insertion 2628 DstLSB = 0; 2629 Width = ImmS - ImmR + 1; 2630 // FIXME: This constraint is to catch bitfield insertion we may 2631 // want to widen the pattern if we want to grab general bitfied 2632 // move case 2633 if (Width <= 0) 2634 continue; 2635 2636 // If the mask on the insertee is correct, we have a BFXIL operation. We 2637 // can share the ImmR and ImmS values from the already-computed UBFM. 2638 } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val, 2639 BiggerPattern, 2640 Src, DstLSB, Width)) { 2641 ImmR = (BitWidth - DstLSB) % BitWidth; 2642 ImmS = Width - 1; 2643 } else 2644 continue; 2645 2646 // Check the second part of the pattern 2647 EVT VT = OrOpd1Val.getValueType(); 2648 assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand"); 2649 2650 // Compute the Known Zero for the candidate of the first operand. 2651 // This allows to catch more general case than just looking for 2652 // AND with imm. Indeed, simplify-demanded-bits may have removed 2653 // the AND instruction because it proves it was useless. 2654 KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val); 2655 2656 // Check if there is enough room for the second operand to appear 2657 // in the first one 2658 APInt BitsToBeInserted = 2659 APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width); 2660 2661 if ((BitsToBeInserted & ~Known.Zero) != 0) 2662 continue; 2663 2664 // Set the first operand 2665 uint64_t Imm; 2666 if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) && 2667 isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT)) 2668 // In that case, we can eliminate the AND 2669 Dst = OrOpd1->getOperand(0); 2670 else 2671 // Maybe the AND has been removed by simplify-demanded-bits 2672 // or is useful because it discards more bits 2673 Dst = OrOpd1Val; 2674 2675 // both parts match 2676 SDLoc DL(N); 2677 SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT), 2678 CurDAG->getTargetConstant(ImmS, DL, VT)}; 2679 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri; 2680 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 2681 return true; 2682 } 2683 2684 // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff 2685 // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted 2686 // mask (e.g., 0x000ffff0). 2687 uint64_t Mask0Imm, Mask1Imm; 2688 SDValue And0 = N->getOperand(0); 2689 SDValue And1 = N->getOperand(1); 2690 if (And0.hasOneUse() && And1.hasOneUse() && 2691 isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) && 2692 isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) && 2693 APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) && 2694 (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) { 2695 2696 // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm), 2697 // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the 2698 // bits to be inserted. 2699 if (isShiftedMask(Mask0Imm, VT)) { 2700 std::swap(And0, And1); 2701 std::swap(Mask0Imm, Mask1Imm); 2702 } 2703 2704 SDValue Src = And1->getOperand(0); 2705 SDValue Dst = And0->getOperand(0); 2706 unsigned LSB = countTrailingZeros(Mask1Imm); 2707 int Width = BitWidth - APInt(BitWidth, Mask0Imm).countPopulation(); 2708 2709 // The BFXIL inserts the low-order bits from a source register, so right 2710 // shift the needed bits into place. 2711 SDLoc DL(N); 2712 unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri; 2713 SDNode *LSR = CurDAG->getMachineNode( 2714 ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LSB, DL, VT), 2715 CurDAG->getTargetConstant(BitWidth - 1, DL, VT)); 2716 2717 // BFXIL is an alias of BFM, so translate to BFM operands. 2718 unsigned ImmR = (BitWidth - LSB) % BitWidth; 2719 unsigned ImmS = Width - 1; 2720 2721 // Create the BFXIL instruction. 2722 SDValue Ops[] = {Dst, SDValue(LSR, 0), 2723 CurDAG->getTargetConstant(ImmR, DL, VT), 2724 CurDAG->getTargetConstant(ImmS, DL, VT)}; 2725 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri; 2726 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 2727 return true; 2728 } 2729 2730 return false; 2731 } 2732 2733 bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) { 2734 if (N->getOpcode() != ISD::OR) 2735 return false; 2736 2737 APInt NUsefulBits; 2738 getUsefulBits(SDValue(N, 0), NUsefulBits); 2739 2740 // If all bits are not useful, just return UNDEF. 2741 if (!NUsefulBits) { 2742 CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0)); 2743 return true; 2744 } 2745 2746 if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG)) 2747 return true; 2748 2749 return tryBitfieldInsertOpFromOrAndImm(N, CurDAG); 2750 } 2751 2752 /// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the 2753 /// equivalent of a left shift by a constant amount followed by an and masking 2754 /// out a contiguous set of bits. 2755 bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) { 2756 if (N->getOpcode() != ISD::AND) 2757 return false; 2758 2759 EVT VT = N->getValueType(0); 2760 if (VT != MVT::i32 && VT != MVT::i64) 2761 return false; 2762 2763 SDValue Op0; 2764 int DstLSB, Width; 2765 if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false, 2766 Op0, DstLSB, Width)) 2767 return false; 2768 2769 // ImmR is the rotate right amount. 2770 unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits(); 2771 // ImmS is the most significant bit of the source to be moved. 2772 unsigned ImmS = Width - 1; 2773 2774 SDLoc DL(N); 2775 SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT), 2776 CurDAG->getTargetConstant(ImmS, DL, VT)}; 2777 unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri; 2778 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 2779 return true; 2780 } 2781 2782 /// tryShiftAmountMod - Take advantage of built-in mod of shift amount in 2783 /// variable shift/rotate instructions. 2784 bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) { 2785 EVT VT = N->getValueType(0); 2786 2787 unsigned Opc; 2788 switch (N->getOpcode()) { 2789 case ISD::ROTR: 2790 Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr; 2791 break; 2792 case ISD::SHL: 2793 Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr; 2794 break; 2795 case ISD::SRL: 2796 Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr; 2797 break; 2798 case ISD::SRA: 2799 Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr; 2800 break; 2801 default: 2802 return false; 2803 } 2804 2805 uint64_t Size; 2806 uint64_t Bits; 2807 if (VT == MVT::i32) { 2808 Bits = 5; 2809 Size = 32; 2810 } else if (VT == MVT::i64) { 2811 Bits = 6; 2812 Size = 64; 2813 } else 2814 return false; 2815 2816 SDValue ShiftAmt = N->getOperand(1); 2817 SDLoc DL(N); 2818 SDValue NewShiftAmt; 2819 2820 // Skip over an extend of the shift amount. 2821 if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND || 2822 ShiftAmt->getOpcode() == ISD::ANY_EXTEND) 2823 ShiftAmt = ShiftAmt->getOperand(0); 2824 2825 if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) { 2826 SDValue Add0 = ShiftAmt->getOperand(0); 2827 SDValue Add1 = ShiftAmt->getOperand(1); 2828 uint64_t Add0Imm; 2829 uint64_t Add1Imm; 2830 // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X 2831 // to avoid the ADD/SUB. 2832 if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0)) 2833 NewShiftAmt = Add0; 2834 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to 2835 // generate a NEG instead of a SUB of a constant. 2836 else if (ShiftAmt->getOpcode() == ISD::SUB && 2837 isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 && 2838 (Add0Imm % Size == 0)) { 2839 unsigned NegOpc; 2840 unsigned ZeroReg; 2841 EVT SubVT = ShiftAmt->getValueType(0); 2842 if (SubVT == MVT::i32) { 2843 NegOpc = AArch64::SUBWrr; 2844 ZeroReg = AArch64::WZR; 2845 } else { 2846 assert(SubVT == MVT::i64); 2847 NegOpc = AArch64::SUBXrr; 2848 ZeroReg = AArch64::XZR; 2849 } 2850 SDValue Zero = 2851 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT); 2852 MachineSDNode *Neg = 2853 CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1); 2854 NewShiftAmt = SDValue(Neg, 0); 2855 } else 2856 return false; 2857 } else { 2858 // If the shift amount is masked with an AND, check that the mask covers the 2859 // bits that are implicitly ANDed off by the above opcodes and if so, skip 2860 // the AND. 2861 uint64_t MaskImm; 2862 if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm) && 2863 !isOpcWithIntImmediate(ShiftAmt.getNode(), AArch64ISD::ANDS, MaskImm)) 2864 return false; 2865 2866 if (countTrailingOnes(MaskImm) < Bits) 2867 return false; 2868 2869 NewShiftAmt = ShiftAmt->getOperand(0); 2870 } 2871 2872 // Narrow/widen the shift amount to match the size of the shift operation. 2873 if (VT == MVT::i32) 2874 NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt); 2875 else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) { 2876 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32); 2877 MachineSDNode *Ext = CurDAG->getMachineNode( 2878 AArch64::SUBREG_TO_REG, DL, VT, 2879 CurDAG->getTargetConstant(0, DL, MVT::i64), NewShiftAmt, SubReg); 2880 NewShiftAmt = SDValue(Ext, 0); 2881 } 2882 2883 SDValue Ops[] = {N->getOperand(0), NewShiftAmt}; 2884 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 2885 return true; 2886 } 2887 2888 bool 2889 AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, 2890 unsigned RegWidth) { 2891 APFloat FVal(0.0); 2892 if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N)) 2893 FVal = CN->getValueAPF(); 2894 else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) { 2895 // Some otherwise illegal constants are allowed in this case. 2896 if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow || 2897 !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1))) 2898 return false; 2899 2900 ConstantPoolSDNode *CN = 2901 dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)); 2902 FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF(); 2903 } else 2904 return false; 2905 2906 // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits 2907 // is between 1 and 32 for a destination w-register, or 1 and 64 for an 2908 // x-register. 2909 // 2910 // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we 2911 // want THIS_NODE to be 2^fbits. This is much easier to deal with using 2912 // integers. 2913 bool IsExact; 2914 2915 // fbits is between 1 and 64 in the worst-case, which means the fmul 2916 // could have 2^64 as an actual operand. Need 65 bits of precision. 2917 APSInt IntVal(65, true); 2918 FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact); 2919 2920 // N.b. isPowerOf2 also checks for > 0. 2921 if (!IsExact || !IntVal.isPowerOf2()) return false; 2922 unsigned FBits = IntVal.logBase2(); 2923 2924 // Checks above should have guaranteed that we haven't lost information in 2925 // finding FBits, but it must still be in range. 2926 if (FBits == 0 || FBits > RegWidth) return false; 2927 2928 FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32); 2929 return true; 2930 } 2931 2932 // Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields 2933 // of the string and obtains the integer values from them and combines these 2934 // into a single value to be used in the MRS/MSR instruction. 2935 static int getIntOperandFromRegisterString(StringRef RegString) { 2936 SmallVector<StringRef, 5> Fields; 2937 RegString.split(Fields, ':'); 2938 2939 if (Fields.size() == 1) 2940 return -1; 2941 2942 assert(Fields.size() == 5 2943 && "Invalid number of fields in read register string"); 2944 2945 SmallVector<int, 5> Ops; 2946 bool AllIntFields = true; 2947 2948 for (StringRef Field : Fields) { 2949 unsigned IntField; 2950 AllIntFields &= !Field.getAsInteger(10, IntField); 2951 Ops.push_back(IntField); 2952 } 2953 2954 assert(AllIntFields && 2955 "Unexpected non-integer value in special register string."); 2956 (void)AllIntFields; 2957 2958 // Need to combine the integer fields of the string into a single value 2959 // based on the bit encoding of MRS/MSR instruction. 2960 return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) | 2961 (Ops[3] << 3) | (Ops[4]); 2962 } 2963 2964 // Lower the read_register intrinsic to an MRS instruction node if the special 2965 // register string argument is either of the form detailed in the ALCE (the 2966 // form described in getIntOperandsFromRegsterString) or is a named register 2967 // known by the MRS SysReg mapper. 2968 bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) { 2969 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1)); 2970 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0)); 2971 SDLoc DL(N); 2972 2973 int Reg = getIntOperandFromRegisterString(RegString->getString()); 2974 if (Reg != -1) { 2975 ReplaceNode(N, CurDAG->getMachineNode( 2976 AArch64::MRS, DL, N->getSimpleValueType(0), MVT::Other, 2977 CurDAG->getTargetConstant(Reg, DL, MVT::i32), 2978 N->getOperand(0))); 2979 return true; 2980 } 2981 2982 // Use the sysreg mapper to map the remaining possible strings to the 2983 // value for the register to be used for the instruction operand. 2984 auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString()); 2985 if (TheReg && TheReg->Readable && 2986 TheReg->haveFeatures(Subtarget->getFeatureBits())) 2987 Reg = TheReg->Encoding; 2988 else 2989 Reg = AArch64SysReg::parseGenericRegister(RegString->getString()); 2990 2991 if (Reg != -1) { 2992 ReplaceNode(N, CurDAG->getMachineNode( 2993 AArch64::MRS, DL, N->getSimpleValueType(0), MVT::Other, 2994 CurDAG->getTargetConstant(Reg, DL, MVT::i32), 2995 N->getOperand(0))); 2996 return true; 2997 } 2998 2999 if (RegString->getString() == "pc") { 3000 ReplaceNode(N, CurDAG->getMachineNode( 3001 AArch64::ADR, DL, N->getSimpleValueType(0), MVT::Other, 3002 CurDAG->getTargetConstant(0, DL, MVT::i32), 3003 N->getOperand(0))); 3004 return true; 3005 } 3006 3007 return false; 3008 } 3009 3010 // Lower the write_register intrinsic to an MSR instruction node if the special 3011 // register string argument is either of the form detailed in the ALCE (the 3012 // form described in getIntOperandsFromRegsterString) or is a named register 3013 // known by the MSR SysReg mapper. 3014 bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) { 3015 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1)); 3016 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0)); 3017 SDLoc DL(N); 3018 3019 int Reg = getIntOperandFromRegisterString(RegString->getString()); 3020 if (Reg != -1) { 3021 ReplaceNode( 3022 N, CurDAG->getMachineNode(AArch64::MSR, DL, MVT::Other, 3023 CurDAG->getTargetConstant(Reg, DL, MVT::i32), 3024 N->getOperand(2), N->getOperand(0))); 3025 return true; 3026 } 3027 3028 // Check if the register was one of those allowed as the pstatefield value in 3029 // the MSR (immediate) instruction. To accept the values allowed in the 3030 // pstatefield for the MSR (immediate) instruction, we also require that an 3031 // immediate value has been provided as an argument, we know that this is 3032 // the case as it has been ensured by semantic checking. 3033 auto PMapper = AArch64PState::lookupPStateByName(RegString->getString()); 3034 if (PMapper) { 3035 assert (isa<ConstantSDNode>(N->getOperand(2)) 3036 && "Expected a constant integer expression."); 3037 unsigned Reg = PMapper->Encoding; 3038 uint64_t Immed = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(); 3039 unsigned State; 3040 if (Reg == AArch64PState::PAN || Reg == AArch64PState::UAO || Reg == AArch64PState::SSBS) { 3041 assert(Immed < 2 && "Bad imm"); 3042 State = AArch64::MSRpstateImm1; 3043 } else { 3044 assert(Immed < 16 && "Bad imm"); 3045 State = AArch64::MSRpstateImm4; 3046 } 3047 ReplaceNode(N, CurDAG->getMachineNode( 3048 State, DL, MVT::Other, 3049 CurDAG->getTargetConstant(Reg, DL, MVT::i32), 3050 CurDAG->getTargetConstant(Immed, DL, MVT::i16), 3051 N->getOperand(0))); 3052 return true; 3053 } 3054 3055 // Use the sysreg mapper to attempt to map the remaining possible strings 3056 // to the value for the register to be used for the MSR (register) 3057 // instruction operand. 3058 auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString()); 3059 if (TheReg && TheReg->Writeable && 3060 TheReg->haveFeatures(Subtarget->getFeatureBits())) 3061 Reg = TheReg->Encoding; 3062 else 3063 Reg = AArch64SysReg::parseGenericRegister(RegString->getString()); 3064 if (Reg != -1) { 3065 ReplaceNode(N, CurDAG->getMachineNode( 3066 AArch64::MSR, DL, MVT::Other, 3067 CurDAG->getTargetConstant(Reg, DL, MVT::i32), 3068 N->getOperand(2), N->getOperand(0))); 3069 return true; 3070 } 3071 3072 return false; 3073 } 3074 3075 /// We've got special pseudo-instructions for these 3076 bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) { 3077 unsigned Opcode; 3078 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT(); 3079 3080 // Leave IR for LSE if subtarget supports it. 3081 if (Subtarget->hasLSE()) return false; 3082 3083 if (MemTy == MVT::i8) 3084 Opcode = AArch64::CMP_SWAP_8; 3085 else if (MemTy == MVT::i16) 3086 Opcode = AArch64::CMP_SWAP_16; 3087 else if (MemTy == MVT::i32) 3088 Opcode = AArch64::CMP_SWAP_32; 3089 else if (MemTy == MVT::i64) 3090 Opcode = AArch64::CMP_SWAP_64; 3091 else 3092 llvm_unreachable("Unknown AtomicCmpSwap type"); 3093 3094 MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32; 3095 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3), 3096 N->getOperand(0)}; 3097 SDNode *CmpSwap = CurDAG->getMachineNode( 3098 Opcode, SDLoc(N), 3099 CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops); 3100 3101 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); 3102 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp}); 3103 3104 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0)); 3105 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2)); 3106 CurDAG->RemoveDeadNode(N); 3107 3108 return true; 3109 } 3110 3111 bool AArch64DAGToDAGISel::SelectSVE8BitLslImm(SDValue N, SDValue &Base, 3112 SDValue &Offset) { 3113 auto C = dyn_cast<ConstantSDNode>(N); 3114 if (!C) 3115 return false; 3116 3117 auto Ty = N->getValueType(0); 3118 3119 int64_t Imm = C->getSExtValue(); 3120 SDLoc DL(N); 3121 3122 if ((Imm >= -128) && (Imm <= 127)) { 3123 Base = CurDAG->getTargetConstant(Imm, DL, Ty); 3124 Offset = CurDAG->getTargetConstant(0, DL, Ty); 3125 return true; 3126 } 3127 3128 if (((Imm % 256) == 0) && (Imm >= -32768) && (Imm <= 32512)) { 3129 Base = CurDAG->getTargetConstant(Imm/256, DL, Ty); 3130 Offset = CurDAG->getTargetConstant(8, DL, Ty); 3131 return true; 3132 } 3133 3134 return false; 3135 } 3136 3137 bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift) { 3138 if (auto CNode = dyn_cast<ConstantSDNode>(N)) { 3139 const int64_t ImmVal = CNode->getSExtValue(); 3140 SDLoc DL(N); 3141 3142 switch (VT.SimpleTy) { 3143 case MVT::i8: 3144 // Can always select i8s, no shift, mask the immediate value to 3145 // deal with sign-extended value from lowering. 3146 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32); 3147 Imm = CurDAG->getTargetConstant(ImmVal & 0xFF, DL, MVT::i32); 3148 return true; 3149 case MVT::i16: 3150 // i16 values get sign-extended to 32-bits during lowering. 3151 if ((ImmVal & 0xFF) == ImmVal) { 3152 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32); 3153 Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32); 3154 return true; 3155 } else if ((ImmVal & 0xFF) == 0) { 3156 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32); 3157 Imm = CurDAG->getTargetConstant((ImmVal >> 8) & 0xFF, DL, MVT::i32); 3158 return true; 3159 } 3160 break; 3161 case MVT::i32: 3162 case MVT::i64: 3163 // Range of immediate won't trigger signedness problems for 32/64b. 3164 if ((ImmVal & 0xFF) == ImmVal) { 3165 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32); 3166 Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32); 3167 return true; 3168 } else if ((ImmVal & 0xFF00) == ImmVal) { 3169 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32); 3170 Imm = CurDAG->getTargetConstant(ImmVal >> 8, DL, MVT::i32); 3171 return true; 3172 } 3173 break; 3174 default: 3175 break; 3176 } 3177 } 3178 3179 return false; 3180 } 3181 3182 bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) { 3183 if (auto CNode = dyn_cast<ConstantSDNode>(N)) { 3184 int64_t ImmVal = CNode->getSExtValue(); 3185 SDLoc DL(N); 3186 if (ImmVal >= -128 && ImmVal < 128) { 3187 Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32); 3188 return true; 3189 } 3190 } 3191 return false; 3192 } 3193 3194 bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) { 3195 if (auto CNode = dyn_cast<ConstantSDNode>(N)) { 3196 uint64_t ImmVal = CNode->getZExtValue(); 3197 3198 switch (VT.SimpleTy) { 3199 case MVT::i8: 3200 ImmVal &= 0xFF; 3201 break; 3202 case MVT::i16: 3203 ImmVal &= 0xFFFF; 3204 break; 3205 case MVT::i32: 3206 ImmVal &= 0xFFFFFFFF; 3207 break; 3208 case MVT::i64: 3209 break; 3210 default: 3211 llvm_unreachable("Unexpected type"); 3212 } 3213 3214 if (ImmVal < 256) { 3215 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32); 3216 return true; 3217 } 3218 } 3219 return false; 3220 } 3221 3222 bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, 3223 bool Invert) { 3224 if (auto CNode = dyn_cast<ConstantSDNode>(N)) { 3225 uint64_t ImmVal = CNode->getZExtValue(); 3226 SDLoc DL(N); 3227 3228 if (Invert) 3229 ImmVal = ~ImmVal; 3230 3231 // Shift mask depending on type size. 3232 switch (VT.SimpleTy) { 3233 case MVT::i8: 3234 ImmVal &= 0xFF; 3235 ImmVal |= ImmVal << 8; 3236 ImmVal |= ImmVal << 16; 3237 ImmVal |= ImmVal << 32; 3238 break; 3239 case MVT::i16: 3240 ImmVal &= 0xFFFF; 3241 ImmVal |= ImmVal << 16; 3242 ImmVal |= ImmVal << 32; 3243 break; 3244 case MVT::i32: 3245 ImmVal &= 0xFFFFFFFF; 3246 ImmVal |= ImmVal << 32; 3247 break; 3248 case MVT::i64: 3249 break; 3250 default: 3251 llvm_unreachable("Unexpected type"); 3252 } 3253 3254 uint64_t encoding; 3255 if (AArch64_AM::processLogicalImmediate(ImmVal, 64, encoding)) { 3256 Imm = CurDAG->getTargetConstant(encoding, DL, MVT::i64); 3257 return true; 3258 } 3259 } 3260 return false; 3261 } 3262 3263 // SVE shift intrinsics allow shift amounts larger than the element's bitwidth. 3264 // Rather than attempt to normalise everything we can sometimes saturate the 3265 // shift amount during selection. This function also allows for consistent 3266 // isel patterns by ensuring the resulting "Imm" node is of the i32 type 3267 // required by the instructions. 3268 bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low, 3269 uint64_t High, bool AllowSaturation, 3270 SDValue &Imm) { 3271 if (auto *CN = dyn_cast<ConstantSDNode>(N)) { 3272 uint64_t ImmVal = CN->getZExtValue(); 3273 3274 // Reject shift amounts that are too small. 3275 if (ImmVal < Low) 3276 return false; 3277 3278 // Reject or saturate shift amounts that are too big. 3279 if (ImmVal > High) { 3280 if (!AllowSaturation) 3281 return false; 3282 ImmVal = High; 3283 } 3284 3285 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32); 3286 return true; 3287 } 3288 3289 return false; 3290 } 3291 3292 bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) { 3293 // tagp(FrameIndex, IRGstack, tag_offset): 3294 // since the offset between FrameIndex and IRGstack is a compile-time 3295 // constant, this can be lowered to a single ADDG instruction. 3296 if (!(isa<FrameIndexSDNode>(N->getOperand(1)))) { 3297 return false; 3298 } 3299 3300 SDValue IRG_SP = N->getOperand(2); 3301 if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN || 3302 cast<ConstantSDNode>(IRG_SP->getOperand(1))->getZExtValue() != 3303 Intrinsic::aarch64_irg_sp) { 3304 return false; 3305 } 3306 3307 const TargetLowering *TLI = getTargetLowering(); 3308 SDLoc DL(N); 3309 int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex(); 3310 SDValue FiOp = CurDAG->getTargetFrameIndex( 3311 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 3312 int TagOffset = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(); 3313 3314 SDNode *Out = CurDAG->getMachineNode( 3315 AArch64::TAGPstack, DL, MVT::i64, 3316 {FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2), 3317 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)}); 3318 ReplaceNode(N, Out); 3319 return true; 3320 } 3321 3322 void AArch64DAGToDAGISel::SelectTagP(SDNode *N) { 3323 assert(isa<ConstantSDNode>(N->getOperand(3)) && 3324 "llvm.aarch64.tagp third argument must be an immediate"); 3325 if (trySelectStackSlotTagP(N)) 3326 return; 3327 // FIXME: above applies in any case when offset between Op1 and Op2 is a 3328 // compile-time constant, not just for stack allocations. 3329 3330 // General case for unrelated pointers in Op1 and Op2. 3331 SDLoc DL(N); 3332 int TagOffset = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(); 3333 SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64, 3334 {N->getOperand(1), N->getOperand(2)}); 3335 SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64, 3336 {SDValue(N1, 0), N->getOperand(2)}); 3337 SDNode *N3 = CurDAG->getMachineNode( 3338 AArch64::ADDG, DL, MVT::i64, 3339 {SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64), 3340 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)}); 3341 ReplaceNode(N, N3); 3342 } 3343 3344 // NOTE: We cannot use EXTRACT_SUBREG in all cases because the fixed length 3345 // vector types larger than NEON don't have a matching SubRegIndex. 3346 static SDNode *extractSubReg(SelectionDAG *DAG, EVT VT, SDValue V) { 3347 assert(V.getValueType().isScalableVector() && 3348 V.getValueType().getSizeInBits().getKnownMinSize() == 3349 AArch64::SVEBitsPerBlock && 3350 "Expected to extract from a packed scalable vector!"); 3351 assert(VT.isFixedLengthVector() && 3352 "Expected to extract a fixed length vector!"); 3353 3354 SDLoc DL(V); 3355 switch (VT.getSizeInBits()) { 3356 case 64: { 3357 auto SubReg = DAG->getTargetConstant(AArch64::dsub, DL, MVT::i32); 3358 return DAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT, V, SubReg); 3359 } 3360 case 128: { 3361 auto SubReg = DAG->getTargetConstant(AArch64::zsub, DL, MVT::i32); 3362 return DAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT, V, SubReg); 3363 } 3364 default: { 3365 auto RC = DAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64); 3366 return DAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC); 3367 } 3368 } 3369 } 3370 3371 // NOTE: We cannot use INSERT_SUBREG in all cases because the fixed length 3372 // vector types larger than NEON don't have a matching SubRegIndex. 3373 static SDNode *insertSubReg(SelectionDAG *DAG, EVT VT, SDValue V) { 3374 assert(VT.isScalableVector() && 3375 VT.getSizeInBits().getKnownMinSize() == AArch64::SVEBitsPerBlock && 3376 "Expected to insert into a packed scalable vector!"); 3377 assert(V.getValueType().isFixedLengthVector() && 3378 "Expected to insert a fixed length vector!"); 3379 3380 SDLoc DL(V); 3381 switch (V.getValueType().getSizeInBits()) { 3382 case 64: { 3383 auto SubReg = DAG->getTargetConstant(AArch64::dsub, DL, MVT::i32); 3384 auto Container = DAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT); 3385 return DAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, VT, 3386 SDValue(Container, 0), V, SubReg); 3387 } 3388 case 128: { 3389 auto SubReg = DAG->getTargetConstant(AArch64::zsub, DL, MVT::i32); 3390 auto Container = DAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT); 3391 return DAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, VT, 3392 SDValue(Container, 0), V, SubReg); 3393 } 3394 default: { 3395 auto RC = DAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64); 3396 return DAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC); 3397 } 3398 } 3399 } 3400 3401 void AArch64DAGToDAGISel::Select(SDNode *Node) { 3402 // If we have a custom node, we already have selected! 3403 if (Node->isMachineOpcode()) { 3404 LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n"); 3405 Node->setNodeId(-1); 3406 return; 3407 } 3408 3409 // Few custom selection stuff. 3410 EVT VT = Node->getValueType(0); 3411 3412 switch (Node->getOpcode()) { 3413 default: 3414 break; 3415 3416 case ISD::ATOMIC_CMP_SWAP: 3417 if (SelectCMP_SWAP(Node)) 3418 return; 3419 break; 3420 3421 case ISD::READ_REGISTER: 3422 if (tryReadRegister(Node)) 3423 return; 3424 break; 3425 3426 case ISD::WRITE_REGISTER: 3427 if (tryWriteRegister(Node)) 3428 return; 3429 break; 3430 3431 case ISD::ADD: 3432 if (tryMLAV64LaneV128(Node)) 3433 return; 3434 break; 3435 3436 case ISD::LOAD: { 3437 // Try to select as an indexed load. Fall through to normal processing 3438 // if we can't. 3439 if (tryIndexedLoad(Node)) 3440 return; 3441 break; 3442 } 3443 3444 case ISD::SRL: 3445 case ISD::AND: 3446 case ISD::SRA: 3447 case ISD::SIGN_EXTEND_INREG: 3448 if (tryBitfieldExtractOp(Node)) 3449 return; 3450 if (tryBitfieldInsertInZeroOp(Node)) 3451 return; 3452 LLVM_FALLTHROUGH; 3453 case ISD::ROTR: 3454 case ISD::SHL: 3455 if (tryShiftAmountMod(Node)) 3456 return; 3457 break; 3458 3459 case ISD::SIGN_EXTEND: 3460 if (tryBitfieldExtractOpFromSExt(Node)) 3461 return; 3462 break; 3463 3464 case ISD::FP_EXTEND: 3465 if (tryHighFPExt(Node)) 3466 return; 3467 break; 3468 3469 case ISD::OR: 3470 if (tryBitfieldInsertOp(Node)) 3471 return; 3472 break; 3473 3474 case ISD::EXTRACT_SUBVECTOR: { 3475 // Bail when not a "cast" like extract_subvector. 3476 if (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue() != 0) 3477 break; 3478 3479 // Bail when normal isel can do the job. 3480 EVT InVT = Node->getOperand(0).getValueType(); 3481 if (VT.isScalableVector() || InVT.isFixedLengthVector()) 3482 break; 3483 3484 // NOTE: We can only get here when doing fixed length SVE code generation. 3485 // We do manual selection because the types involved are not linked to real 3486 // registers (despite being legal) and must be coerced into SVE registers. 3487 // 3488 // NOTE: If the above changes, be aware that selection will still not work 3489 // because the td definition of extract_vector does not support extracting 3490 // a fixed length vector from a scalable vector. 3491 3492 ReplaceNode(Node, extractSubReg(CurDAG, VT, Node->getOperand(0))); 3493 return; 3494 } 3495 3496 case ISD::INSERT_SUBVECTOR: { 3497 // Bail when not a "cast" like insert_subvector. 3498 if (cast<ConstantSDNode>(Node->getOperand(2))->getZExtValue() != 0) 3499 break; 3500 if (!Node->getOperand(0).isUndef()) 3501 break; 3502 3503 // Bail when normal isel should do the job. 3504 EVT InVT = Node->getOperand(1).getValueType(); 3505 if (VT.isFixedLengthVector() || InVT.isScalableVector()) 3506 break; 3507 3508 // NOTE: We can only get here when doing fixed length SVE code generation. 3509 // We do manual selection because the types involved are not linked to real 3510 // registers (despite being legal) and must be coerced into SVE registers. 3511 // 3512 // NOTE: If the above changes, be aware that selection will still not work 3513 // because the td definition of insert_vector does not support inserting a 3514 // fixed length vector into a scalable vector. 3515 3516 ReplaceNode(Node, insertSubReg(CurDAG, VT, Node->getOperand(1))); 3517 return; 3518 } 3519 3520 case ISD::Constant: { 3521 // Materialize zero constants as copies from WZR/XZR. This allows 3522 // the coalescer to propagate these into other instructions. 3523 ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node); 3524 if (ConstNode->isZero()) { 3525 if (VT == MVT::i32) { 3526 SDValue New = CurDAG->getCopyFromReg( 3527 CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32); 3528 ReplaceNode(Node, New.getNode()); 3529 return; 3530 } else if (VT == MVT::i64) { 3531 SDValue New = CurDAG->getCopyFromReg( 3532 CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64); 3533 ReplaceNode(Node, New.getNode()); 3534 return; 3535 } 3536 } 3537 break; 3538 } 3539 3540 case ISD::FrameIndex: { 3541 // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm. 3542 int FI = cast<FrameIndexSDNode>(Node)->getIndex(); 3543 unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0); 3544 const TargetLowering *TLI = getTargetLowering(); 3545 SDValue TFI = CurDAG->getTargetFrameIndex( 3546 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 3547 SDLoc DL(Node); 3548 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32), 3549 CurDAG->getTargetConstant(Shifter, DL, MVT::i32) }; 3550 CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops); 3551 return; 3552 } 3553 case ISD::INTRINSIC_W_CHAIN: { 3554 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); 3555 switch (IntNo) { 3556 default: 3557 break; 3558 case Intrinsic::aarch64_ldaxp: 3559 case Intrinsic::aarch64_ldxp: { 3560 unsigned Op = 3561 IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX; 3562 SDValue MemAddr = Node->getOperand(2); 3563 SDLoc DL(Node); 3564 SDValue Chain = Node->getOperand(0); 3565 3566 SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64, 3567 MVT::Other, MemAddr, Chain); 3568 3569 // Transfer memoperands. 3570 MachineMemOperand *MemOp = 3571 cast<MemIntrinsicSDNode>(Node)->getMemOperand(); 3572 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp}); 3573 ReplaceNode(Node, Ld); 3574 return; 3575 } 3576 case Intrinsic::aarch64_stlxp: 3577 case Intrinsic::aarch64_stxp: { 3578 unsigned Op = 3579 IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX; 3580 SDLoc DL(Node); 3581 SDValue Chain = Node->getOperand(0); 3582 SDValue ValLo = Node->getOperand(2); 3583 SDValue ValHi = Node->getOperand(3); 3584 SDValue MemAddr = Node->getOperand(4); 3585 3586 // Place arguments in the right order. 3587 SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain}; 3588 3589 SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops); 3590 // Transfer memoperands. 3591 MachineMemOperand *MemOp = 3592 cast<MemIntrinsicSDNode>(Node)->getMemOperand(); 3593 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 3594 3595 ReplaceNode(Node, St); 3596 return; 3597 } 3598 case Intrinsic::aarch64_neon_ld1x2: 3599 if (VT == MVT::v8i8) { 3600 SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0); 3601 return; 3602 } else if (VT == MVT::v16i8) { 3603 SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0); 3604 return; 3605 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 3606 SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0); 3607 return; 3608 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 3609 SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0); 3610 return; 3611 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3612 SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0); 3613 return; 3614 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3615 SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0); 3616 return; 3617 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3618 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0); 3619 return; 3620 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3621 SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0); 3622 return; 3623 } 3624 break; 3625 case Intrinsic::aarch64_neon_ld1x3: 3626 if (VT == MVT::v8i8) { 3627 SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0); 3628 return; 3629 } else if (VT == MVT::v16i8) { 3630 SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0); 3631 return; 3632 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 3633 SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0); 3634 return; 3635 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 3636 SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0); 3637 return; 3638 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3639 SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0); 3640 return; 3641 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3642 SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0); 3643 return; 3644 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3645 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0); 3646 return; 3647 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3648 SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0); 3649 return; 3650 } 3651 break; 3652 case Intrinsic::aarch64_neon_ld1x4: 3653 if (VT == MVT::v8i8) { 3654 SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0); 3655 return; 3656 } else if (VT == MVT::v16i8) { 3657 SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0); 3658 return; 3659 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 3660 SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0); 3661 return; 3662 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 3663 SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0); 3664 return; 3665 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3666 SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0); 3667 return; 3668 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3669 SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0); 3670 return; 3671 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3672 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0); 3673 return; 3674 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3675 SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0); 3676 return; 3677 } 3678 break; 3679 case Intrinsic::aarch64_neon_ld2: 3680 if (VT == MVT::v8i8) { 3681 SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0); 3682 return; 3683 } else if (VT == MVT::v16i8) { 3684 SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0); 3685 return; 3686 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 3687 SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0); 3688 return; 3689 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 3690 SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0); 3691 return; 3692 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3693 SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0); 3694 return; 3695 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3696 SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0); 3697 return; 3698 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3699 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0); 3700 return; 3701 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3702 SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0); 3703 return; 3704 } 3705 break; 3706 case Intrinsic::aarch64_neon_ld3: 3707 if (VT == MVT::v8i8) { 3708 SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0); 3709 return; 3710 } else if (VT == MVT::v16i8) { 3711 SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0); 3712 return; 3713 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 3714 SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0); 3715 return; 3716 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 3717 SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0); 3718 return; 3719 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3720 SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0); 3721 return; 3722 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3723 SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0); 3724 return; 3725 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3726 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0); 3727 return; 3728 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3729 SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0); 3730 return; 3731 } 3732 break; 3733 case Intrinsic::aarch64_neon_ld4: 3734 if (VT == MVT::v8i8) { 3735 SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0); 3736 return; 3737 } else if (VT == MVT::v16i8) { 3738 SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0); 3739 return; 3740 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 3741 SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0); 3742 return; 3743 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 3744 SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0); 3745 return; 3746 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3747 SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0); 3748 return; 3749 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3750 SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0); 3751 return; 3752 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3753 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0); 3754 return; 3755 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3756 SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0); 3757 return; 3758 } 3759 break; 3760 case Intrinsic::aarch64_neon_ld2r: 3761 if (VT == MVT::v8i8) { 3762 SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0); 3763 return; 3764 } else if (VT == MVT::v16i8) { 3765 SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0); 3766 return; 3767 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 3768 SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0); 3769 return; 3770 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 3771 SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0); 3772 return; 3773 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3774 SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0); 3775 return; 3776 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3777 SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0); 3778 return; 3779 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3780 SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0); 3781 return; 3782 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3783 SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0); 3784 return; 3785 } 3786 break; 3787 case Intrinsic::aarch64_neon_ld3r: 3788 if (VT == MVT::v8i8) { 3789 SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0); 3790 return; 3791 } else if (VT == MVT::v16i8) { 3792 SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0); 3793 return; 3794 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 3795 SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0); 3796 return; 3797 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 3798 SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0); 3799 return; 3800 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3801 SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0); 3802 return; 3803 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3804 SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0); 3805 return; 3806 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3807 SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0); 3808 return; 3809 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3810 SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0); 3811 return; 3812 } 3813 break; 3814 case Intrinsic::aarch64_neon_ld4r: 3815 if (VT == MVT::v8i8) { 3816 SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0); 3817 return; 3818 } else if (VT == MVT::v16i8) { 3819 SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0); 3820 return; 3821 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 3822 SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0); 3823 return; 3824 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 3825 SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0); 3826 return; 3827 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3828 SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0); 3829 return; 3830 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3831 SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0); 3832 return; 3833 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3834 SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0); 3835 return; 3836 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3837 SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0); 3838 return; 3839 } 3840 break; 3841 case Intrinsic::aarch64_neon_ld2lane: 3842 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 3843 SelectLoadLane(Node, 2, AArch64::LD2i8); 3844 return; 3845 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 3846 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 3847 SelectLoadLane(Node, 2, AArch64::LD2i16); 3848 return; 3849 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 3850 VT == MVT::v2f32) { 3851 SelectLoadLane(Node, 2, AArch64::LD2i32); 3852 return; 3853 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 3854 VT == MVT::v1f64) { 3855 SelectLoadLane(Node, 2, AArch64::LD2i64); 3856 return; 3857 } 3858 break; 3859 case Intrinsic::aarch64_neon_ld3lane: 3860 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 3861 SelectLoadLane(Node, 3, AArch64::LD3i8); 3862 return; 3863 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 3864 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 3865 SelectLoadLane(Node, 3, AArch64::LD3i16); 3866 return; 3867 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 3868 VT == MVT::v2f32) { 3869 SelectLoadLane(Node, 3, AArch64::LD3i32); 3870 return; 3871 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 3872 VT == MVT::v1f64) { 3873 SelectLoadLane(Node, 3, AArch64::LD3i64); 3874 return; 3875 } 3876 break; 3877 case Intrinsic::aarch64_neon_ld4lane: 3878 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 3879 SelectLoadLane(Node, 4, AArch64::LD4i8); 3880 return; 3881 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 3882 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 3883 SelectLoadLane(Node, 4, AArch64::LD4i16); 3884 return; 3885 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 3886 VT == MVT::v2f32) { 3887 SelectLoadLane(Node, 4, AArch64::LD4i32); 3888 return; 3889 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 3890 VT == MVT::v1f64) { 3891 SelectLoadLane(Node, 4, AArch64::LD4i64); 3892 return; 3893 } 3894 break; 3895 case Intrinsic::aarch64_ld64b: 3896 SelectLoad(Node, 8, AArch64::LD64B, AArch64::x8sub_0); 3897 return; 3898 case Intrinsic::aarch64_sve_ld2_sret: { 3899 if (VT == MVT::nxv16i8) { 3900 SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B, 3901 true); 3902 return; 3903 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 3904 (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) { 3905 SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H, 3906 true); 3907 return; 3908 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 3909 SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W, 3910 true); 3911 return; 3912 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 3913 SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D, 3914 true); 3915 return; 3916 } 3917 break; 3918 } 3919 case Intrinsic::aarch64_sve_ld3_sret: { 3920 if (VT == MVT::nxv16i8) { 3921 SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B, 3922 true); 3923 return; 3924 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 3925 (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) { 3926 SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H, 3927 true); 3928 return; 3929 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 3930 SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W, 3931 true); 3932 return; 3933 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 3934 SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D, 3935 true); 3936 return; 3937 } 3938 break; 3939 } 3940 case Intrinsic::aarch64_sve_ld4_sret: { 3941 if (VT == MVT::nxv16i8) { 3942 SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B, 3943 true); 3944 return; 3945 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 3946 (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) { 3947 SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H, 3948 true); 3949 return; 3950 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 3951 SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W, 3952 true); 3953 return; 3954 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 3955 SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D, 3956 true); 3957 return; 3958 } 3959 break; 3960 } 3961 } 3962 } break; 3963 case ISD::INTRINSIC_WO_CHAIN: { 3964 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue(); 3965 switch (IntNo) { 3966 default: 3967 break; 3968 case Intrinsic::aarch64_tagp: 3969 SelectTagP(Node); 3970 return; 3971 case Intrinsic::aarch64_neon_tbl2: 3972 SelectTable(Node, 2, 3973 VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two, 3974 false); 3975 return; 3976 case Intrinsic::aarch64_neon_tbl3: 3977 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three 3978 : AArch64::TBLv16i8Three, 3979 false); 3980 return; 3981 case Intrinsic::aarch64_neon_tbl4: 3982 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four 3983 : AArch64::TBLv16i8Four, 3984 false); 3985 return; 3986 case Intrinsic::aarch64_neon_tbx2: 3987 SelectTable(Node, 2, 3988 VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two, 3989 true); 3990 return; 3991 case Intrinsic::aarch64_neon_tbx3: 3992 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three 3993 : AArch64::TBXv16i8Three, 3994 true); 3995 return; 3996 case Intrinsic::aarch64_neon_tbx4: 3997 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four 3998 : AArch64::TBXv16i8Four, 3999 true); 4000 return; 4001 case Intrinsic::aarch64_neon_smull: 4002 case Intrinsic::aarch64_neon_umull: 4003 if (tryMULLV64LaneV128(IntNo, Node)) 4004 return; 4005 break; 4006 case Intrinsic::swift_async_context_addr: { 4007 SDLoc DL(Node); 4008 CurDAG->SelectNodeTo(Node, AArch64::SUBXri, MVT::i64, 4009 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, 4010 AArch64::FP, MVT::i64), 4011 CurDAG->getTargetConstant(8, DL, MVT::i32), 4012 CurDAG->getTargetConstant(0, DL, MVT::i32)); 4013 auto &MF = CurDAG->getMachineFunction(); 4014 MF.getFrameInfo().setFrameAddressIsTaken(true); 4015 MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true); 4016 return; 4017 } 4018 } 4019 break; 4020 } 4021 case ISD::INTRINSIC_VOID: { 4022 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); 4023 if (Node->getNumOperands() >= 3) 4024 VT = Node->getOperand(2)->getValueType(0); 4025 switch (IntNo) { 4026 default: 4027 break; 4028 case Intrinsic::aarch64_neon_st1x2: { 4029 if (VT == MVT::v8i8) { 4030 SelectStore(Node, 2, AArch64::ST1Twov8b); 4031 return; 4032 } else if (VT == MVT::v16i8) { 4033 SelectStore(Node, 2, AArch64::ST1Twov16b); 4034 return; 4035 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 4036 VT == MVT::v4bf16) { 4037 SelectStore(Node, 2, AArch64::ST1Twov4h); 4038 return; 4039 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 4040 VT == MVT::v8bf16) { 4041 SelectStore(Node, 2, AArch64::ST1Twov8h); 4042 return; 4043 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4044 SelectStore(Node, 2, AArch64::ST1Twov2s); 4045 return; 4046 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4047 SelectStore(Node, 2, AArch64::ST1Twov4s); 4048 return; 4049 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4050 SelectStore(Node, 2, AArch64::ST1Twov2d); 4051 return; 4052 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4053 SelectStore(Node, 2, AArch64::ST1Twov1d); 4054 return; 4055 } 4056 break; 4057 } 4058 case Intrinsic::aarch64_neon_st1x3: { 4059 if (VT == MVT::v8i8) { 4060 SelectStore(Node, 3, AArch64::ST1Threev8b); 4061 return; 4062 } else if (VT == MVT::v16i8) { 4063 SelectStore(Node, 3, AArch64::ST1Threev16b); 4064 return; 4065 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 4066 VT == MVT::v4bf16) { 4067 SelectStore(Node, 3, AArch64::ST1Threev4h); 4068 return; 4069 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 4070 VT == MVT::v8bf16) { 4071 SelectStore(Node, 3, AArch64::ST1Threev8h); 4072 return; 4073 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4074 SelectStore(Node, 3, AArch64::ST1Threev2s); 4075 return; 4076 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4077 SelectStore(Node, 3, AArch64::ST1Threev4s); 4078 return; 4079 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4080 SelectStore(Node, 3, AArch64::ST1Threev2d); 4081 return; 4082 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4083 SelectStore(Node, 3, AArch64::ST1Threev1d); 4084 return; 4085 } 4086 break; 4087 } 4088 case Intrinsic::aarch64_neon_st1x4: { 4089 if (VT == MVT::v8i8) { 4090 SelectStore(Node, 4, AArch64::ST1Fourv8b); 4091 return; 4092 } else if (VT == MVT::v16i8) { 4093 SelectStore(Node, 4, AArch64::ST1Fourv16b); 4094 return; 4095 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 4096 VT == MVT::v4bf16) { 4097 SelectStore(Node, 4, AArch64::ST1Fourv4h); 4098 return; 4099 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 4100 VT == MVT::v8bf16) { 4101 SelectStore(Node, 4, AArch64::ST1Fourv8h); 4102 return; 4103 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4104 SelectStore(Node, 4, AArch64::ST1Fourv2s); 4105 return; 4106 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4107 SelectStore(Node, 4, AArch64::ST1Fourv4s); 4108 return; 4109 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4110 SelectStore(Node, 4, AArch64::ST1Fourv2d); 4111 return; 4112 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4113 SelectStore(Node, 4, AArch64::ST1Fourv1d); 4114 return; 4115 } 4116 break; 4117 } 4118 case Intrinsic::aarch64_neon_st2: { 4119 if (VT == MVT::v8i8) { 4120 SelectStore(Node, 2, AArch64::ST2Twov8b); 4121 return; 4122 } else if (VT == MVT::v16i8) { 4123 SelectStore(Node, 2, AArch64::ST2Twov16b); 4124 return; 4125 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 4126 VT == MVT::v4bf16) { 4127 SelectStore(Node, 2, AArch64::ST2Twov4h); 4128 return; 4129 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 4130 VT == MVT::v8bf16) { 4131 SelectStore(Node, 2, AArch64::ST2Twov8h); 4132 return; 4133 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4134 SelectStore(Node, 2, AArch64::ST2Twov2s); 4135 return; 4136 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4137 SelectStore(Node, 2, AArch64::ST2Twov4s); 4138 return; 4139 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4140 SelectStore(Node, 2, AArch64::ST2Twov2d); 4141 return; 4142 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4143 SelectStore(Node, 2, AArch64::ST1Twov1d); 4144 return; 4145 } 4146 break; 4147 } 4148 case Intrinsic::aarch64_neon_st3: { 4149 if (VT == MVT::v8i8) { 4150 SelectStore(Node, 3, AArch64::ST3Threev8b); 4151 return; 4152 } else if (VT == MVT::v16i8) { 4153 SelectStore(Node, 3, AArch64::ST3Threev16b); 4154 return; 4155 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 4156 VT == MVT::v4bf16) { 4157 SelectStore(Node, 3, AArch64::ST3Threev4h); 4158 return; 4159 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 4160 VT == MVT::v8bf16) { 4161 SelectStore(Node, 3, AArch64::ST3Threev8h); 4162 return; 4163 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4164 SelectStore(Node, 3, AArch64::ST3Threev2s); 4165 return; 4166 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4167 SelectStore(Node, 3, AArch64::ST3Threev4s); 4168 return; 4169 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4170 SelectStore(Node, 3, AArch64::ST3Threev2d); 4171 return; 4172 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4173 SelectStore(Node, 3, AArch64::ST1Threev1d); 4174 return; 4175 } 4176 break; 4177 } 4178 case Intrinsic::aarch64_neon_st4: { 4179 if (VT == MVT::v8i8) { 4180 SelectStore(Node, 4, AArch64::ST4Fourv8b); 4181 return; 4182 } else if (VT == MVT::v16i8) { 4183 SelectStore(Node, 4, AArch64::ST4Fourv16b); 4184 return; 4185 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 4186 VT == MVT::v4bf16) { 4187 SelectStore(Node, 4, AArch64::ST4Fourv4h); 4188 return; 4189 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 4190 VT == MVT::v8bf16) { 4191 SelectStore(Node, 4, AArch64::ST4Fourv8h); 4192 return; 4193 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4194 SelectStore(Node, 4, AArch64::ST4Fourv2s); 4195 return; 4196 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4197 SelectStore(Node, 4, AArch64::ST4Fourv4s); 4198 return; 4199 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4200 SelectStore(Node, 4, AArch64::ST4Fourv2d); 4201 return; 4202 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4203 SelectStore(Node, 4, AArch64::ST1Fourv1d); 4204 return; 4205 } 4206 break; 4207 } 4208 case Intrinsic::aarch64_neon_st2lane: { 4209 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4210 SelectStoreLane(Node, 2, AArch64::ST2i8); 4211 return; 4212 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4213 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4214 SelectStoreLane(Node, 2, AArch64::ST2i16); 4215 return; 4216 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4217 VT == MVT::v2f32) { 4218 SelectStoreLane(Node, 2, AArch64::ST2i32); 4219 return; 4220 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4221 VT == MVT::v1f64) { 4222 SelectStoreLane(Node, 2, AArch64::ST2i64); 4223 return; 4224 } 4225 break; 4226 } 4227 case Intrinsic::aarch64_neon_st3lane: { 4228 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4229 SelectStoreLane(Node, 3, AArch64::ST3i8); 4230 return; 4231 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4232 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4233 SelectStoreLane(Node, 3, AArch64::ST3i16); 4234 return; 4235 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4236 VT == MVT::v2f32) { 4237 SelectStoreLane(Node, 3, AArch64::ST3i32); 4238 return; 4239 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4240 VT == MVT::v1f64) { 4241 SelectStoreLane(Node, 3, AArch64::ST3i64); 4242 return; 4243 } 4244 break; 4245 } 4246 case Intrinsic::aarch64_neon_st4lane: { 4247 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4248 SelectStoreLane(Node, 4, AArch64::ST4i8); 4249 return; 4250 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4251 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4252 SelectStoreLane(Node, 4, AArch64::ST4i16); 4253 return; 4254 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4255 VT == MVT::v2f32) { 4256 SelectStoreLane(Node, 4, AArch64::ST4i32); 4257 return; 4258 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4259 VT == MVT::v1f64) { 4260 SelectStoreLane(Node, 4, AArch64::ST4i64); 4261 return; 4262 } 4263 break; 4264 } 4265 case Intrinsic::aarch64_sve_st2: { 4266 if (VT == MVT::nxv16i8) { 4267 SelectPredicatedStore(Node, 2, 0, AArch64::ST2B, AArch64::ST2B_IMM); 4268 return; 4269 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 4270 (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) { 4271 SelectPredicatedStore(Node, 2, 1, AArch64::ST2H, AArch64::ST2H_IMM); 4272 return; 4273 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 4274 SelectPredicatedStore(Node, 2, 2, AArch64::ST2W, AArch64::ST2W_IMM); 4275 return; 4276 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 4277 SelectPredicatedStore(Node, 2, 3, AArch64::ST2D, AArch64::ST2D_IMM); 4278 return; 4279 } 4280 break; 4281 } 4282 case Intrinsic::aarch64_sve_st3: { 4283 if (VT == MVT::nxv16i8) { 4284 SelectPredicatedStore(Node, 3, 0, AArch64::ST3B, AArch64::ST3B_IMM); 4285 return; 4286 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 4287 (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) { 4288 SelectPredicatedStore(Node, 3, 1, AArch64::ST3H, AArch64::ST3H_IMM); 4289 return; 4290 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 4291 SelectPredicatedStore(Node, 3, 2, AArch64::ST3W, AArch64::ST3W_IMM); 4292 return; 4293 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 4294 SelectPredicatedStore(Node, 3, 3, AArch64::ST3D, AArch64::ST3D_IMM); 4295 return; 4296 } 4297 break; 4298 } 4299 case Intrinsic::aarch64_sve_st4: { 4300 if (VT == MVT::nxv16i8) { 4301 SelectPredicatedStore(Node, 4, 0, AArch64::ST4B, AArch64::ST4B_IMM); 4302 return; 4303 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 4304 (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) { 4305 SelectPredicatedStore(Node, 4, 1, AArch64::ST4H, AArch64::ST4H_IMM); 4306 return; 4307 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 4308 SelectPredicatedStore(Node, 4, 2, AArch64::ST4W, AArch64::ST4W_IMM); 4309 return; 4310 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 4311 SelectPredicatedStore(Node, 4, 3, AArch64::ST4D, AArch64::ST4D_IMM); 4312 return; 4313 } 4314 break; 4315 } 4316 } 4317 break; 4318 } 4319 case AArch64ISD::LD2post: { 4320 if (VT == MVT::v8i8) { 4321 SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0); 4322 return; 4323 } else if (VT == MVT::v16i8) { 4324 SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0); 4325 return; 4326 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4327 SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0); 4328 return; 4329 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4330 SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0); 4331 return; 4332 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4333 SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0); 4334 return; 4335 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4336 SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0); 4337 return; 4338 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4339 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0); 4340 return; 4341 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4342 SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0); 4343 return; 4344 } 4345 break; 4346 } 4347 case AArch64ISD::LD3post: { 4348 if (VT == MVT::v8i8) { 4349 SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0); 4350 return; 4351 } else if (VT == MVT::v16i8) { 4352 SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0); 4353 return; 4354 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4355 SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0); 4356 return; 4357 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4358 SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0); 4359 return; 4360 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4361 SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0); 4362 return; 4363 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4364 SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0); 4365 return; 4366 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4367 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0); 4368 return; 4369 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4370 SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0); 4371 return; 4372 } 4373 break; 4374 } 4375 case AArch64ISD::LD4post: { 4376 if (VT == MVT::v8i8) { 4377 SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0); 4378 return; 4379 } else if (VT == MVT::v16i8) { 4380 SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0); 4381 return; 4382 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4383 SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0); 4384 return; 4385 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4386 SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0); 4387 return; 4388 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4389 SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0); 4390 return; 4391 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4392 SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0); 4393 return; 4394 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4395 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0); 4396 return; 4397 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4398 SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0); 4399 return; 4400 } 4401 break; 4402 } 4403 case AArch64ISD::LD1x2post: { 4404 if (VT == MVT::v8i8) { 4405 SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0); 4406 return; 4407 } else if (VT == MVT::v16i8) { 4408 SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0); 4409 return; 4410 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4411 SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0); 4412 return; 4413 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4414 SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0); 4415 return; 4416 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4417 SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0); 4418 return; 4419 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4420 SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0); 4421 return; 4422 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4423 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0); 4424 return; 4425 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4426 SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0); 4427 return; 4428 } 4429 break; 4430 } 4431 case AArch64ISD::LD1x3post: { 4432 if (VT == MVT::v8i8) { 4433 SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0); 4434 return; 4435 } else if (VT == MVT::v16i8) { 4436 SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0); 4437 return; 4438 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4439 SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0); 4440 return; 4441 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4442 SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0); 4443 return; 4444 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4445 SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0); 4446 return; 4447 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4448 SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0); 4449 return; 4450 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4451 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0); 4452 return; 4453 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4454 SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0); 4455 return; 4456 } 4457 break; 4458 } 4459 case AArch64ISD::LD1x4post: { 4460 if (VT == MVT::v8i8) { 4461 SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0); 4462 return; 4463 } else if (VT == MVT::v16i8) { 4464 SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0); 4465 return; 4466 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4467 SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0); 4468 return; 4469 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4470 SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0); 4471 return; 4472 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4473 SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0); 4474 return; 4475 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4476 SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0); 4477 return; 4478 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4479 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0); 4480 return; 4481 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4482 SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0); 4483 return; 4484 } 4485 break; 4486 } 4487 case AArch64ISD::LD1DUPpost: { 4488 if (VT == MVT::v8i8) { 4489 SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0); 4490 return; 4491 } else if (VT == MVT::v16i8) { 4492 SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0); 4493 return; 4494 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4495 SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0); 4496 return; 4497 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4498 SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0); 4499 return; 4500 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4501 SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0); 4502 return; 4503 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4504 SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0); 4505 return; 4506 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4507 SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0); 4508 return; 4509 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4510 SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0); 4511 return; 4512 } 4513 break; 4514 } 4515 case AArch64ISD::LD2DUPpost: { 4516 if (VT == MVT::v8i8) { 4517 SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0); 4518 return; 4519 } else if (VT == MVT::v16i8) { 4520 SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0); 4521 return; 4522 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4523 SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0); 4524 return; 4525 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4526 SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0); 4527 return; 4528 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4529 SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0); 4530 return; 4531 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4532 SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0); 4533 return; 4534 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4535 SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0); 4536 return; 4537 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4538 SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0); 4539 return; 4540 } 4541 break; 4542 } 4543 case AArch64ISD::LD3DUPpost: { 4544 if (VT == MVT::v8i8) { 4545 SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0); 4546 return; 4547 } else if (VT == MVT::v16i8) { 4548 SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0); 4549 return; 4550 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4551 SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0); 4552 return; 4553 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4554 SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0); 4555 return; 4556 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4557 SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0); 4558 return; 4559 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4560 SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0); 4561 return; 4562 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4563 SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0); 4564 return; 4565 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4566 SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0); 4567 return; 4568 } 4569 break; 4570 } 4571 case AArch64ISD::LD4DUPpost: { 4572 if (VT == MVT::v8i8) { 4573 SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0); 4574 return; 4575 } else if (VT == MVT::v16i8) { 4576 SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0); 4577 return; 4578 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4579 SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0); 4580 return; 4581 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4582 SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0); 4583 return; 4584 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4585 SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0); 4586 return; 4587 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4588 SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0); 4589 return; 4590 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4591 SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0); 4592 return; 4593 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4594 SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0); 4595 return; 4596 } 4597 break; 4598 } 4599 case AArch64ISD::LD1LANEpost: { 4600 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4601 SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST); 4602 return; 4603 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4604 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4605 SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST); 4606 return; 4607 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4608 VT == MVT::v2f32) { 4609 SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST); 4610 return; 4611 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4612 VT == MVT::v1f64) { 4613 SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST); 4614 return; 4615 } 4616 break; 4617 } 4618 case AArch64ISD::LD2LANEpost: { 4619 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4620 SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST); 4621 return; 4622 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4623 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4624 SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST); 4625 return; 4626 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4627 VT == MVT::v2f32) { 4628 SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST); 4629 return; 4630 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4631 VT == MVT::v1f64) { 4632 SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST); 4633 return; 4634 } 4635 break; 4636 } 4637 case AArch64ISD::LD3LANEpost: { 4638 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4639 SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST); 4640 return; 4641 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4642 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4643 SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST); 4644 return; 4645 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4646 VT == MVT::v2f32) { 4647 SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST); 4648 return; 4649 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4650 VT == MVT::v1f64) { 4651 SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST); 4652 return; 4653 } 4654 break; 4655 } 4656 case AArch64ISD::LD4LANEpost: { 4657 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4658 SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST); 4659 return; 4660 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4661 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4662 SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST); 4663 return; 4664 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4665 VT == MVT::v2f32) { 4666 SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST); 4667 return; 4668 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4669 VT == MVT::v1f64) { 4670 SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST); 4671 return; 4672 } 4673 break; 4674 } 4675 case AArch64ISD::ST2post: { 4676 VT = Node->getOperand(1).getValueType(); 4677 if (VT == MVT::v8i8) { 4678 SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST); 4679 return; 4680 } else if (VT == MVT::v16i8) { 4681 SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST); 4682 return; 4683 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4684 SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST); 4685 return; 4686 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4687 SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST); 4688 return; 4689 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4690 SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST); 4691 return; 4692 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4693 SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST); 4694 return; 4695 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4696 SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST); 4697 return; 4698 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4699 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST); 4700 return; 4701 } 4702 break; 4703 } 4704 case AArch64ISD::ST3post: { 4705 VT = Node->getOperand(1).getValueType(); 4706 if (VT == MVT::v8i8) { 4707 SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST); 4708 return; 4709 } else if (VT == MVT::v16i8) { 4710 SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST); 4711 return; 4712 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4713 SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST); 4714 return; 4715 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4716 SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST); 4717 return; 4718 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4719 SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST); 4720 return; 4721 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4722 SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST); 4723 return; 4724 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4725 SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST); 4726 return; 4727 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4728 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST); 4729 return; 4730 } 4731 break; 4732 } 4733 case AArch64ISD::ST4post: { 4734 VT = Node->getOperand(1).getValueType(); 4735 if (VT == MVT::v8i8) { 4736 SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST); 4737 return; 4738 } else if (VT == MVT::v16i8) { 4739 SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST); 4740 return; 4741 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4742 SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST); 4743 return; 4744 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4745 SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST); 4746 return; 4747 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4748 SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST); 4749 return; 4750 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4751 SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST); 4752 return; 4753 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4754 SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST); 4755 return; 4756 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4757 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST); 4758 return; 4759 } 4760 break; 4761 } 4762 case AArch64ISD::ST1x2post: { 4763 VT = Node->getOperand(1).getValueType(); 4764 if (VT == MVT::v8i8) { 4765 SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST); 4766 return; 4767 } else if (VT == MVT::v16i8) { 4768 SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST); 4769 return; 4770 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4771 SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST); 4772 return; 4773 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4774 SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST); 4775 return; 4776 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4777 SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST); 4778 return; 4779 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4780 SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST); 4781 return; 4782 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4783 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST); 4784 return; 4785 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4786 SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST); 4787 return; 4788 } 4789 break; 4790 } 4791 case AArch64ISD::ST1x3post: { 4792 VT = Node->getOperand(1).getValueType(); 4793 if (VT == MVT::v8i8) { 4794 SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST); 4795 return; 4796 } else if (VT == MVT::v16i8) { 4797 SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST); 4798 return; 4799 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4800 SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST); 4801 return; 4802 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16 ) { 4803 SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST); 4804 return; 4805 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4806 SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST); 4807 return; 4808 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4809 SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST); 4810 return; 4811 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4812 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST); 4813 return; 4814 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4815 SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST); 4816 return; 4817 } 4818 break; 4819 } 4820 case AArch64ISD::ST1x4post: { 4821 VT = Node->getOperand(1).getValueType(); 4822 if (VT == MVT::v8i8) { 4823 SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST); 4824 return; 4825 } else if (VT == MVT::v16i8) { 4826 SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST); 4827 return; 4828 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4829 SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST); 4830 return; 4831 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4832 SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST); 4833 return; 4834 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4835 SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST); 4836 return; 4837 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4838 SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST); 4839 return; 4840 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4841 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST); 4842 return; 4843 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4844 SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST); 4845 return; 4846 } 4847 break; 4848 } 4849 case AArch64ISD::ST2LANEpost: { 4850 VT = Node->getOperand(1).getValueType(); 4851 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4852 SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST); 4853 return; 4854 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4855 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4856 SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST); 4857 return; 4858 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4859 VT == MVT::v2f32) { 4860 SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST); 4861 return; 4862 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4863 VT == MVT::v1f64) { 4864 SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST); 4865 return; 4866 } 4867 break; 4868 } 4869 case AArch64ISD::ST3LANEpost: { 4870 VT = Node->getOperand(1).getValueType(); 4871 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4872 SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST); 4873 return; 4874 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4875 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4876 SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST); 4877 return; 4878 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4879 VT == MVT::v2f32) { 4880 SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST); 4881 return; 4882 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4883 VT == MVT::v1f64) { 4884 SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST); 4885 return; 4886 } 4887 break; 4888 } 4889 case AArch64ISD::ST4LANEpost: { 4890 VT = Node->getOperand(1).getValueType(); 4891 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4892 SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST); 4893 return; 4894 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4895 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4896 SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST); 4897 return; 4898 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4899 VT == MVT::v2f32) { 4900 SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST); 4901 return; 4902 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4903 VT == MVT::v1f64) { 4904 SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST); 4905 return; 4906 } 4907 break; 4908 } 4909 case AArch64ISD::SVE_LD2_MERGE_ZERO: { 4910 if (VT == MVT::nxv16i8) { 4911 SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B); 4912 return; 4913 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 4914 (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) { 4915 SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H); 4916 return; 4917 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 4918 SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W); 4919 return; 4920 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 4921 SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D); 4922 return; 4923 } 4924 break; 4925 } 4926 case AArch64ISD::SVE_LD3_MERGE_ZERO: { 4927 if (VT == MVT::nxv16i8) { 4928 SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B); 4929 return; 4930 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 4931 (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) { 4932 SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H); 4933 return; 4934 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 4935 SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W); 4936 return; 4937 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 4938 SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D); 4939 return; 4940 } 4941 break; 4942 } 4943 case AArch64ISD::SVE_LD4_MERGE_ZERO: { 4944 if (VT == MVT::nxv16i8) { 4945 SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B); 4946 return; 4947 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 4948 (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) { 4949 SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H); 4950 return; 4951 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 4952 SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W); 4953 return; 4954 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 4955 SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D); 4956 return; 4957 } 4958 break; 4959 } 4960 } 4961 4962 // Select the default instruction 4963 SelectCode(Node); 4964 } 4965 4966 /// createAArch64ISelDag - This pass converts a legalized DAG into a 4967 /// AArch64-specific DAG, ready for instruction scheduling. 4968 FunctionPass *llvm::createAArch64ISelDag(AArch64TargetMachine &TM, 4969 CodeGenOpt::Level OptLevel) { 4970 return new AArch64DAGToDAGISel(TM, OptLevel); 4971 } 4972 4973 /// When \p PredVT is a scalable vector predicate in the form 4974 /// MVT::nx<M>xi1, it builds the correspondent scalable vector of 4975 /// integers MVT::nx<M>xi<bits> s.t. M x bits = 128. When targeting 4976 /// structured vectors (NumVec >1), the output data type is 4977 /// MVT::nx<M*NumVec>xi<bits> s.t. M x bits = 128. If the input 4978 /// PredVT is not in the form MVT::nx<M>xi1, it returns an invalid 4979 /// EVT. 4980 static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT, 4981 unsigned NumVec) { 4982 assert(NumVec > 0 && NumVec < 5 && "Invalid number of vectors."); 4983 if (!PredVT.isScalableVector() || PredVT.getVectorElementType() != MVT::i1) 4984 return EVT(); 4985 4986 if (PredVT != MVT::nxv16i1 && PredVT != MVT::nxv8i1 && 4987 PredVT != MVT::nxv4i1 && PredVT != MVT::nxv2i1) 4988 return EVT(); 4989 4990 ElementCount EC = PredVT.getVectorElementCount(); 4991 EVT ScalarVT = 4992 EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.getKnownMinValue()); 4993 EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, EC * NumVec); 4994 4995 return MemVT; 4996 } 4997 4998 /// Return the EVT of the data associated to a memory operation in \p 4999 /// Root. If such EVT cannot be retrived, it returns an invalid EVT. 5000 static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root) { 5001 if (isa<MemSDNode>(Root)) 5002 return cast<MemSDNode>(Root)->getMemoryVT(); 5003 5004 if (isa<MemIntrinsicSDNode>(Root)) 5005 return cast<MemIntrinsicSDNode>(Root)->getMemoryVT(); 5006 5007 const unsigned Opcode = Root->getOpcode(); 5008 // For custom ISD nodes, we have to look at them individually to extract the 5009 // type of the data moved to/from memory. 5010 switch (Opcode) { 5011 case AArch64ISD::LD1_MERGE_ZERO: 5012 case AArch64ISD::LD1S_MERGE_ZERO: 5013 case AArch64ISD::LDNF1_MERGE_ZERO: 5014 case AArch64ISD::LDNF1S_MERGE_ZERO: 5015 return cast<VTSDNode>(Root->getOperand(3))->getVT(); 5016 case AArch64ISD::ST1_PRED: 5017 return cast<VTSDNode>(Root->getOperand(4))->getVT(); 5018 case AArch64ISD::SVE_LD2_MERGE_ZERO: 5019 return getPackedVectorTypeFromPredicateType( 5020 Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/2); 5021 case AArch64ISD::SVE_LD3_MERGE_ZERO: 5022 return getPackedVectorTypeFromPredicateType( 5023 Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/3); 5024 case AArch64ISD::SVE_LD4_MERGE_ZERO: 5025 return getPackedVectorTypeFromPredicateType( 5026 Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/4); 5027 default: 5028 break; 5029 } 5030 5031 if (Opcode != ISD::INTRINSIC_VOID) 5032 return EVT(); 5033 5034 const unsigned IntNo = 5035 cast<ConstantSDNode>(Root->getOperand(1))->getZExtValue(); 5036 if (IntNo != Intrinsic::aarch64_sve_prf) 5037 return EVT(); 5038 5039 // We are using an SVE prefetch intrinsic. Type must be inferred 5040 // from the width of the predicate. 5041 return getPackedVectorTypeFromPredicateType( 5042 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/1); 5043 } 5044 5045 /// SelectAddrModeIndexedSVE - Attempt selection of the addressing mode: 5046 /// Base + OffImm * sizeof(MemVT) for Min >= OffImm <= Max 5047 /// where Root is the memory access using N for its address. 5048 template <int64_t Min, int64_t Max> 5049 bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, 5050 SDValue &Base, 5051 SDValue &OffImm) { 5052 const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root); 5053 const DataLayout &DL = CurDAG->getDataLayout(); 5054 5055 if (N.getOpcode() == ISD::FrameIndex) { 5056 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 5057 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 5058 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64); 5059 return true; 5060 } 5061 5062 if (MemVT == EVT()) 5063 return false; 5064 5065 if (N.getOpcode() != ISD::ADD) 5066 return false; 5067 5068 SDValue VScale = N.getOperand(1); 5069 if (VScale.getOpcode() != ISD::VSCALE) 5070 return false; 5071 5072 TypeSize TS = MemVT.getSizeInBits(); 5073 int64_t MemWidthBytes = static_cast<int64_t>(TS.getKnownMinSize()) / 8; 5074 int64_t MulImm = cast<ConstantSDNode>(VScale.getOperand(0))->getSExtValue(); 5075 5076 if ((MulImm % MemWidthBytes) != 0) 5077 return false; 5078 5079 int64_t Offset = MulImm / MemWidthBytes; 5080 if (Offset < Min || Offset > Max) 5081 return false; 5082 5083 Base = N.getOperand(0); 5084 if (Base.getOpcode() == ISD::FrameIndex) { 5085 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 5086 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 5087 } 5088 5089 OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64); 5090 return true; 5091 } 5092 5093 /// Select register plus register addressing mode for SVE, with scaled 5094 /// offset. 5095 bool AArch64DAGToDAGISel::SelectSVERegRegAddrMode(SDValue N, unsigned Scale, 5096 SDValue &Base, 5097 SDValue &Offset) { 5098 if (N.getOpcode() != ISD::ADD) 5099 return false; 5100 5101 // Process an ADD node. 5102 const SDValue LHS = N.getOperand(0); 5103 const SDValue RHS = N.getOperand(1); 5104 5105 // 8 bit data does not come with the SHL node, so it is treated 5106 // separately. 5107 if (Scale == 0) { 5108 Base = LHS; 5109 Offset = RHS; 5110 return true; 5111 } 5112 5113 if (auto C = dyn_cast<ConstantSDNode>(RHS)) { 5114 int64_t ImmOff = C->getSExtValue(); 5115 unsigned Size = 1 << Scale; 5116 5117 // To use the reg+reg addressing mode, the immediate must be a multiple of 5118 // the vector element's byte size. 5119 if (ImmOff % Size) 5120 return false; 5121 5122 SDLoc DL(N); 5123 Base = LHS; 5124 Offset = CurDAG->getTargetConstant(ImmOff >> Scale, DL, MVT::i64); 5125 SDValue Ops[] = {Offset}; 5126 SDNode *MI = CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops); 5127 Offset = SDValue(MI, 0); 5128 return true; 5129 } 5130 5131 // Check if the RHS is a shift node with a constant. 5132 if (RHS.getOpcode() != ISD::SHL) 5133 return false; 5134 5135 const SDValue ShiftRHS = RHS.getOperand(1); 5136 if (auto *C = dyn_cast<ConstantSDNode>(ShiftRHS)) 5137 if (C->getZExtValue() == Scale) { 5138 Base = LHS; 5139 Offset = RHS.getOperand(0); 5140 return true; 5141 } 5142 5143 return false; 5144 } 5145 5146 bool AArch64DAGToDAGISel::SelectAllActivePredicate(SDValue N) { 5147 const AArch64TargetLowering *TLI = 5148 static_cast<const AArch64TargetLowering *>(getTargetLowering()); 5149 5150 return TLI->isAllActivePredicate(*CurDAG, N); 5151 } 5152