1 //===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines an instruction selector for the AArch64 target. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AArch64MachineFunctionInfo.h" 14 #include "AArch64TargetMachine.h" 15 #include "MCTargetDesc/AArch64AddressingModes.h" 16 #include "llvm/ADT/APSInt.h" 17 #include "llvm/CodeGen/SelectionDAGISel.h" 18 #include "llvm/IR/Function.h" // To access function attributes. 19 #include "llvm/IR/GlobalValue.h" 20 #include "llvm/IR/Intrinsics.h" 21 #include "llvm/IR/IntrinsicsAArch64.h" 22 #include "llvm/Support/Debug.h" 23 #include "llvm/Support/ErrorHandling.h" 24 #include "llvm/Support/KnownBits.h" 25 #include "llvm/Support/MathExtras.h" 26 #include "llvm/Support/raw_ostream.h" 27 28 using namespace llvm; 29 30 #define DEBUG_TYPE "aarch64-isel" 31 32 //===--------------------------------------------------------------------===// 33 /// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine 34 /// instructions for SelectionDAG operations. 35 /// 36 namespace { 37 38 class AArch64DAGToDAGISel : public SelectionDAGISel { 39 40 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can 41 /// make the right decision when generating code for different targets. 42 const AArch64Subtarget *Subtarget; 43 44 public: 45 explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm, 46 CodeGenOpt::Level OptLevel) 47 : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr) {} 48 49 StringRef getPassName() const override { 50 return "AArch64 Instruction Selection"; 51 } 52 53 bool runOnMachineFunction(MachineFunction &MF) override { 54 Subtarget = &MF.getSubtarget<AArch64Subtarget>(); 55 return SelectionDAGISel::runOnMachineFunction(MF); 56 } 57 58 void Select(SDNode *Node) override; 59 60 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 61 /// inline asm expressions. 62 bool SelectInlineAsmMemoryOperand(const SDValue &Op, 63 unsigned ConstraintID, 64 std::vector<SDValue> &OutOps) override; 65 66 template <signed Low, signed High, signed Scale> 67 bool SelectRDVLImm(SDValue N, SDValue &Imm); 68 69 bool tryMLAV64LaneV128(SDNode *N); 70 bool tryMULLV64LaneV128(unsigned IntNo, SDNode *N); 71 bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift); 72 bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift); 73 bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift); 74 bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) { 75 return SelectShiftedRegister(N, false, Reg, Shift); 76 } 77 bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) { 78 return SelectShiftedRegister(N, true, Reg, Shift); 79 } 80 bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) { 81 return SelectAddrModeIndexed7S(N, 1, Base, OffImm); 82 } 83 bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) { 84 return SelectAddrModeIndexed7S(N, 2, Base, OffImm); 85 } 86 bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) { 87 return SelectAddrModeIndexed7S(N, 4, Base, OffImm); 88 } 89 bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) { 90 return SelectAddrModeIndexed7S(N, 8, Base, OffImm); 91 } 92 bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) { 93 return SelectAddrModeIndexed7S(N, 16, Base, OffImm); 94 } 95 bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) { 96 return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm); 97 } 98 bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) { 99 return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm); 100 } 101 bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) { 102 return SelectAddrModeIndexed(N, 1, Base, OffImm); 103 } 104 bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) { 105 return SelectAddrModeIndexed(N, 2, Base, OffImm); 106 } 107 bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) { 108 return SelectAddrModeIndexed(N, 4, Base, OffImm); 109 } 110 bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) { 111 return SelectAddrModeIndexed(N, 8, Base, OffImm); 112 } 113 bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) { 114 return SelectAddrModeIndexed(N, 16, Base, OffImm); 115 } 116 bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) { 117 return SelectAddrModeUnscaled(N, 1, Base, OffImm); 118 } 119 bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) { 120 return SelectAddrModeUnscaled(N, 2, Base, OffImm); 121 } 122 bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) { 123 return SelectAddrModeUnscaled(N, 4, Base, OffImm); 124 } 125 bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) { 126 return SelectAddrModeUnscaled(N, 8, Base, OffImm); 127 } 128 bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) { 129 return SelectAddrModeUnscaled(N, 16, Base, OffImm); 130 } 131 template <unsigned Size, unsigned Max> 132 bool SelectAddrModeIndexedUImm(SDValue N, SDValue &Base, SDValue &OffImm) { 133 // Test if there is an appropriate addressing mode and check if the 134 // immediate fits. 135 bool Found = SelectAddrModeIndexed(N, Size, Base, OffImm); 136 if (Found) { 137 if (auto *CI = dyn_cast<ConstantSDNode>(OffImm)) { 138 int64_t C = CI->getSExtValue(); 139 if (C <= Max) 140 return true; 141 } 142 } 143 144 // Otherwise, base only, materialize address in register. 145 Base = N; 146 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64); 147 return true; 148 } 149 150 template<int Width> 151 bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset, 152 SDValue &SignExtend, SDValue &DoShift) { 153 return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift); 154 } 155 156 template<int Width> 157 bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset, 158 SDValue &SignExtend, SDValue &DoShift) { 159 return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift); 160 } 161 162 bool SelectDupZeroOrUndef(SDValue N) { 163 switch(N->getOpcode()) { 164 case ISD::UNDEF: 165 return true; 166 case AArch64ISD::DUP: 167 case ISD::SPLAT_VECTOR: { 168 auto Opnd0 = N->getOperand(0); 169 if (auto CN = dyn_cast<ConstantSDNode>(Opnd0)) 170 if (CN->isNullValue()) 171 return true; 172 if (auto CN = dyn_cast<ConstantFPSDNode>(Opnd0)) 173 if (CN->isZero()) 174 return true; 175 break; 176 } 177 default: 178 break; 179 } 180 181 return false; 182 } 183 184 bool SelectDupZero(SDValue N) { 185 switch(N->getOpcode()) { 186 case AArch64ISD::DUP: 187 case ISD::SPLAT_VECTOR: { 188 auto Opnd0 = N->getOperand(0); 189 if (auto CN = dyn_cast<ConstantSDNode>(Opnd0)) 190 if (CN->isNullValue()) 191 return true; 192 if (auto CN = dyn_cast<ConstantFPSDNode>(Opnd0)) 193 if (CN->isZero()) 194 return true; 195 break; 196 } 197 } 198 199 return false; 200 } 201 202 template<MVT::SimpleValueType VT> 203 bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) { 204 return SelectSVEAddSubImm(N, VT, Imm, Shift); 205 } 206 207 template <MVT::SimpleValueType VT, bool Invert = false> 208 bool SelectSVELogicalImm(SDValue N, SDValue &Imm) { 209 return SelectSVELogicalImm(N, VT, Imm, Invert); 210 } 211 212 template <MVT::SimpleValueType VT> 213 bool SelectSVEArithImm(SDValue N, SDValue &Imm) { 214 return SelectSVEArithImm(N, VT, Imm); 215 } 216 217 template <unsigned Low, unsigned High, bool AllowSaturation = false> 218 bool SelectSVEShiftImm(SDValue N, SDValue &Imm) { 219 return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm); 220 } 221 222 // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N. 223 template<signed Min, signed Max, signed Scale, bool Shift> 224 bool SelectCntImm(SDValue N, SDValue &Imm) { 225 if (!isa<ConstantSDNode>(N)) 226 return false; 227 228 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue(); 229 if (Shift) 230 MulImm = 1LL << MulImm; 231 232 if ((MulImm % std::abs(Scale)) != 0) 233 return false; 234 235 MulImm /= Scale; 236 if ((MulImm >= Min) && (MulImm <= Max)) { 237 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32); 238 return true; 239 } 240 241 return false; 242 } 243 244 template <signed Max, signed Scale> 245 bool SelectEXTImm(SDValue N, SDValue &Imm) { 246 if (!isa<ConstantSDNode>(N)) 247 return false; 248 249 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue(); 250 251 if (MulImm >= 0 && MulImm <= Max) { 252 MulImm *= Scale; 253 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32); 254 return true; 255 } 256 257 return false; 258 } 259 260 /// Form sequences of consecutive 64/128-bit registers for use in NEON 261 /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have 262 /// between 1 and 4 elements. If it contains a single element that is returned 263 /// unchanged; otherwise a REG_SEQUENCE value is returned. 264 SDValue createDTuple(ArrayRef<SDValue> Vecs); 265 SDValue createQTuple(ArrayRef<SDValue> Vecs); 266 // Form a sequence of SVE registers for instructions using list of vectors, 267 // e.g. structured loads and stores (ldN, stN). 268 SDValue createZTuple(ArrayRef<SDValue> Vecs); 269 270 /// Generic helper for the createDTuple/createQTuple 271 /// functions. Those should almost always be called instead. 272 SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[], 273 const unsigned SubRegs[]); 274 275 void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt); 276 277 bool tryIndexedLoad(SDNode *N); 278 279 bool trySelectStackSlotTagP(SDNode *N); 280 void SelectTagP(SDNode *N); 281 282 void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc, 283 unsigned SubRegIdx); 284 void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc, 285 unsigned SubRegIdx); 286 void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc); 287 void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc); 288 void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale, 289 unsigned Opc_rr, unsigned Opc_ri); 290 291 bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm); 292 /// SVE Reg+Imm addressing mode. 293 template <int64_t Min, int64_t Max> 294 bool SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base, 295 SDValue &OffImm); 296 /// SVE Reg+Reg address mode. 297 template <unsigned Scale> 298 bool SelectSVERegRegAddrMode(SDValue N, SDValue &Base, SDValue &Offset) { 299 return SelectSVERegRegAddrMode(N, Scale, Base, Offset); 300 } 301 302 void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc); 303 void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc); 304 void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc); 305 void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc); 306 void SelectPredicatedStore(SDNode *N, unsigned NumVecs, unsigned Scale, 307 unsigned Opc_rr, unsigned Opc_ri); 308 std::tuple<unsigned, SDValue, SDValue> 309 findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri, 310 const SDValue &OldBase, const SDValue &OldOffset, 311 unsigned Scale); 312 313 bool tryBitfieldExtractOp(SDNode *N); 314 bool tryBitfieldExtractOpFromSExt(SDNode *N); 315 bool tryBitfieldInsertOp(SDNode *N); 316 bool tryBitfieldInsertInZeroOp(SDNode *N); 317 bool tryShiftAmountMod(SDNode *N); 318 bool tryHighFPExt(SDNode *N); 319 320 bool tryReadRegister(SDNode *N); 321 bool tryWriteRegister(SDNode *N); 322 323 // Include the pieces autogenerated from the target description. 324 #include "AArch64GenDAGISel.inc" 325 326 private: 327 bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg, 328 SDValue &Shift); 329 bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base, 330 SDValue &OffImm) { 331 return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm); 332 } 333 bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW, 334 unsigned Size, SDValue &Base, 335 SDValue &OffImm); 336 bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base, 337 SDValue &OffImm); 338 bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base, 339 SDValue &OffImm); 340 bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base, 341 SDValue &Offset, SDValue &SignExtend, 342 SDValue &DoShift); 343 bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base, 344 SDValue &Offset, SDValue &SignExtend, 345 SDValue &DoShift); 346 bool isWorthFolding(SDValue V) const; 347 bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend, 348 SDValue &Offset, SDValue &SignExtend); 349 350 template<unsigned RegWidth> 351 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) { 352 return SelectCVTFixedPosOperand(N, FixedPos, RegWidth); 353 } 354 355 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width); 356 357 bool SelectCMP_SWAP(SDNode *N); 358 359 bool SelectSVE8BitLslImm(SDValue N, SDValue &Imm, SDValue &Shift); 360 361 bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift); 362 363 bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert); 364 365 bool SelectSVESignedArithImm(SDValue N, SDValue &Imm); 366 bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High, 367 bool AllowSaturation, SDValue &Imm); 368 369 bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm); 370 bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base, 371 SDValue &Offset); 372 373 bool SelectAllActivePredicate(SDValue N); 374 }; 375 } // end anonymous namespace 376 377 /// isIntImmediate - This method tests to see if the node is a constant 378 /// operand. If so Imm will receive the 32-bit value. 379 static bool isIntImmediate(const SDNode *N, uint64_t &Imm) { 380 if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) { 381 Imm = C->getZExtValue(); 382 return true; 383 } 384 return false; 385 } 386 387 // isIntImmediate - This method tests to see if a constant operand. 388 // If so Imm will receive the value. 389 static bool isIntImmediate(SDValue N, uint64_t &Imm) { 390 return isIntImmediate(N.getNode(), Imm); 391 } 392 393 // isOpcWithIntImmediate - This method tests to see if the node is a specific 394 // opcode and that it has a immediate integer right operand. 395 // If so Imm will receive the 32 bit value. 396 static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, 397 uint64_t &Imm) { 398 return N->getOpcode() == Opc && 399 isIntImmediate(N->getOperand(1).getNode(), Imm); 400 } 401 402 bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand( 403 const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) { 404 switch(ConstraintID) { 405 default: 406 llvm_unreachable("Unexpected asm memory constraint"); 407 case InlineAsm::Constraint_m: 408 case InlineAsm::Constraint_o: 409 case InlineAsm::Constraint_Q: 410 // We need to make sure that this one operand does not end up in XZR, thus 411 // require the address to be in a PointerRegClass register. 412 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo(); 413 const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF); 414 SDLoc dl(Op); 415 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64); 416 SDValue NewOp = 417 SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, 418 dl, Op.getValueType(), 419 Op, RC), 0); 420 OutOps.push_back(NewOp); 421 return false; 422 } 423 return true; 424 } 425 426 /// SelectArithImmed - Select an immediate value that can be represented as 427 /// a 12-bit value shifted left by either 0 or 12. If so, return true with 428 /// Val set to the 12-bit value and Shift set to the shifter operand. 429 bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val, 430 SDValue &Shift) { 431 // This function is called from the addsub_shifted_imm ComplexPattern, 432 // which lists [imm] as the list of opcode it's interested in, however 433 // we still need to check whether the operand is actually an immediate 434 // here because the ComplexPattern opcode list is only used in 435 // root-level opcode matching. 436 if (!isa<ConstantSDNode>(N.getNode())) 437 return false; 438 439 uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue(); 440 unsigned ShiftAmt; 441 442 if (Immed >> 12 == 0) { 443 ShiftAmt = 0; 444 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) { 445 ShiftAmt = 12; 446 Immed = Immed >> 12; 447 } else 448 return false; 449 450 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt); 451 SDLoc dl(N); 452 Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32); 453 Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32); 454 return true; 455 } 456 457 /// SelectNegArithImmed - As above, but negates the value before trying to 458 /// select it. 459 bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val, 460 SDValue &Shift) { 461 // This function is called from the addsub_shifted_imm ComplexPattern, 462 // which lists [imm] as the list of opcode it's interested in, however 463 // we still need to check whether the operand is actually an immediate 464 // here because the ComplexPattern opcode list is only used in 465 // root-level opcode matching. 466 if (!isa<ConstantSDNode>(N.getNode())) 467 return false; 468 469 // The immediate operand must be a 24-bit zero-extended immediate. 470 uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue(); 471 472 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0" 473 // have the opposite effect on the C flag, so this pattern mustn't match under 474 // those circumstances. 475 if (Immed == 0) 476 return false; 477 478 if (N.getValueType() == MVT::i32) 479 Immed = ~((uint32_t)Immed) + 1; 480 else 481 Immed = ~Immed + 1ULL; 482 if (Immed & 0xFFFFFFFFFF000000ULL) 483 return false; 484 485 Immed &= 0xFFFFFFULL; 486 return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val, 487 Shift); 488 } 489 490 /// getShiftTypeForNode - Translate a shift node to the corresponding 491 /// ShiftType value. 492 static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) { 493 switch (N.getOpcode()) { 494 default: 495 return AArch64_AM::InvalidShiftExtend; 496 case ISD::SHL: 497 return AArch64_AM::LSL; 498 case ISD::SRL: 499 return AArch64_AM::LSR; 500 case ISD::SRA: 501 return AArch64_AM::ASR; 502 case ISD::ROTR: 503 return AArch64_AM::ROR; 504 } 505 } 506 507 /// Determine whether it is worth it to fold SHL into the addressing 508 /// mode. 509 static bool isWorthFoldingSHL(SDValue V) { 510 assert(V.getOpcode() == ISD::SHL && "invalid opcode"); 511 // It is worth folding logical shift of up to three places. 512 auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1)); 513 if (!CSD) 514 return false; 515 unsigned ShiftVal = CSD->getZExtValue(); 516 if (ShiftVal > 3) 517 return false; 518 519 // Check if this particular node is reused in any non-memory related 520 // operation. If yes, do not try to fold this node into the address 521 // computation, since the computation will be kept. 522 const SDNode *Node = V.getNode(); 523 for (SDNode *UI : Node->uses()) 524 if (!isa<MemSDNode>(*UI)) 525 for (SDNode *UII : UI->uses()) 526 if (!isa<MemSDNode>(*UII)) 527 return false; 528 return true; 529 } 530 531 /// Determine whether it is worth to fold V into an extended register. 532 bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const { 533 // Trivial if we are optimizing for code size or if there is only 534 // one use of the value. 535 if (CurDAG->shouldOptForSize() || V.hasOneUse()) 536 return true; 537 // If a subtarget has a fastpath LSL we can fold a logical shift into 538 // the addressing mode and save a cycle. 539 if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::SHL && 540 isWorthFoldingSHL(V)) 541 return true; 542 if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::ADD) { 543 const SDValue LHS = V.getOperand(0); 544 const SDValue RHS = V.getOperand(1); 545 if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS)) 546 return true; 547 if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS)) 548 return true; 549 } 550 551 // It hurts otherwise, since the value will be reused. 552 return false; 553 } 554 555 /// SelectShiftedRegister - Select a "shifted register" operand. If the value 556 /// is not shifted, set the Shift operand to default of "LSL 0". The logical 557 /// instructions allow the shifted register to be rotated, but the arithmetic 558 /// instructions do not. The AllowROR parameter specifies whether ROR is 559 /// supported. 560 bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR, 561 SDValue &Reg, SDValue &Shift) { 562 AArch64_AM::ShiftExtendType ShType = getShiftTypeForNode(N); 563 if (ShType == AArch64_AM::InvalidShiftExtend) 564 return false; 565 if (!AllowROR && ShType == AArch64_AM::ROR) 566 return false; 567 568 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 569 unsigned BitSize = N.getValueSizeInBits(); 570 unsigned Val = RHS->getZExtValue() & (BitSize - 1); 571 unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val); 572 573 Reg = N.getOperand(0); 574 Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32); 575 return isWorthFolding(N); 576 } 577 578 return false; 579 } 580 581 /// getExtendTypeForNode - Translate an extend node to the corresponding 582 /// ExtendType value. 583 static AArch64_AM::ShiftExtendType 584 getExtendTypeForNode(SDValue N, bool IsLoadStore = false) { 585 if (N.getOpcode() == ISD::SIGN_EXTEND || 586 N.getOpcode() == ISD::SIGN_EXTEND_INREG) { 587 EVT SrcVT; 588 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG) 589 SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT(); 590 else 591 SrcVT = N.getOperand(0).getValueType(); 592 593 if (!IsLoadStore && SrcVT == MVT::i8) 594 return AArch64_AM::SXTB; 595 else if (!IsLoadStore && SrcVT == MVT::i16) 596 return AArch64_AM::SXTH; 597 else if (SrcVT == MVT::i32) 598 return AArch64_AM::SXTW; 599 assert(SrcVT != MVT::i64 && "extend from 64-bits?"); 600 601 return AArch64_AM::InvalidShiftExtend; 602 } else if (N.getOpcode() == ISD::ZERO_EXTEND || 603 N.getOpcode() == ISD::ANY_EXTEND) { 604 EVT SrcVT = N.getOperand(0).getValueType(); 605 if (!IsLoadStore && SrcVT == MVT::i8) 606 return AArch64_AM::UXTB; 607 else if (!IsLoadStore && SrcVT == MVT::i16) 608 return AArch64_AM::UXTH; 609 else if (SrcVT == MVT::i32) 610 return AArch64_AM::UXTW; 611 assert(SrcVT != MVT::i64 && "extend from 64-bits?"); 612 613 return AArch64_AM::InvalidShiftExtend; 614 } else if (N.getOpcode() == ISD::AND) { 615 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); 616 if (!CSD) 617 return AArch64_AM::InvalidShiftExtend; 618 uint64_t AndMask = CSD->getZExtValue(); 619 620 switch (AndMask) { 621 default: 622 return AArch64_AM::InvalidShiftExtend; 623 case 0xFF: 624 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend; 625 case 0xFFFF: 626 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend; 627 case 0xFFFFFFFF: 628 return AArch64_AM::UXTW; 629 } 630 } 631 632 return AArch64_AM::InvalidShiftExtend; 633 } 634 635 // Helper for SelectMLAV64LaneV128 - Recognize high lane extracts. 636 static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx) { 637 if (DL->getOpcode() != AArch64ISD::DUPLANE16 && 638 DL->getOpcode() != AArch64ISD::DUPLANE32) 639 return false; 640 641 SDValue SV = DL->getOperand(0); 642 if (SV.getOpcode() != ISD::INSERT_SUBVECTOR) 643 return false; 644 645 SDValue EV = SV.getOperand(1); 646 if (EV.getOpcode() != ISD::EXTRACT_SUBVECTOR) 647 return false; 648 649 ConstantSDNode *DLidx = cast<ConstantSDNode>(DL->getOperand(1).getNode()); 650 ConstantSDNode *EVidx = cast<ConstantSDNode>(EV.getOperand(1).getNode()); 651 LaneIdx = DLidx->getSExtValue() + EVidx->getSExtValue(); 652 LaneOp = EV.getOperand(0); 653 654 return true; 655 } 656 657 // Helper for SelectOpcV64LaneV128 - Recognize operations where one operand is a 658 // high lane extract. 659 static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp, 660 SDValue &LaneOp, int &LaneIdx) { 661 662 if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) { 663 std::swap(Op0, Op1); 664 if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) 665 return false; 666 } 667 StdOp = Op1; 668 return true; 669 } 670 671 /// SelectMLAV64LaneV128 - AArch64 supports vector MLAs where one multiplicand 672 /// is a lane in the upper half of a 128-bit vector. Recognize and select this 673 /// so that we don't emit unnecessary lane extracts. 674 bool AArch64DAGToDAGISel::tryMLAV64LaneV128(SDNode *N) { 675 SDLoc dl(N); 676 SDValue Op0 = N->getOperand(0); 677 SDValue Op1 = N->getOperand(1); 678 SDValue MLAOp1; // Will hold ordinary multiplicand for MLA. 679 SDValue MLAOp2; // Will hold lane-accessed multiplicand for MLA. 680 int LaneIdx = -1; // Will hold the lane index. 681 682 if (Op1.getOpcode() != ISD::MUL || 683 !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2, 684 LaneIdx)) { 685 std::swap(Op0, Op1); 686 if (Op1.getOpcode() != ISD::MUL || 687 !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2, 688 LaneIdx)) 689 return false; 690 } 691 692 SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64); 693 694 SDValue Ops[] = { Op0, MLAOp1, MLAOp2, LaneIdxVal }; 695 696 unsigned MLAOpc = ~0U; 697 698 switch (N->getSimpleValueType(0).SimpleTy) { 699 default: 700 llvm_unreachable("Unrecognized MLA."); 701 case MVT::v4i16: 702 MLAOpc = AArch64::MLAv4i16_indexed; 703 break; 704 case MVT::v8i16: 705 MLAOpc = AArch64::MLAv8i16_indexed; 706 break; 707 case MVT::v2i32: 708 MLAOpc = AArch64::MLAv2i32_indexed; 709 break; 710 case MVT::v4i32: 711 MLAOpc = AArch64::MLAv4i32_indexed; 712 break; 713 } 714 715 ReplaceNode(N, CurDAG->getMachineNode(MLAOpc, dl, N->getValueType(0), Ops)); 716 return true; 717 } 718 719 bool AArch64DAGToDAGISel::tryMULLV64LaneV128(unsigned IntNo, SDNode *N) { 720 SDLoc dl(N); 721 SDValue SMULLOp0; 722 SDValue SMULLOp1; 723 int LaneIdx; 724 725 if (!checkV64LaneV128(N->getOperand(1), N->getOperand(2), SMULLOp0, SMULLOp1, 726 LaneIdx)) 727 return false; 728 729 SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64); 730 731 SDValue Ops[] = { SMULLOp0, SMULLOp1, LaneIdxVal }; 732 733 unsigned SMULLOpc = ~0U; 734 735 if (IntNo == Intrinsic::aarch64_neon_smull) { 736 switch (N->getSimpleValueType(0).SimpleTy) { 737 default: 738 llvm_unreachable("Unrecognized SMULL."); 739 case MVT::v4i32: 740 SMULLOpc = AArch64::SMULLv4i16_indexed; 741 break; 742 case MVT::v2i64: 743 SMULLOpc = AArch64::SMULLv2i32_indexed; 744 break; 745 } 746 } else if (IntNo == Intrinsic::aarch64_neon_umull) { 747 switch (N->getSimpleValueType(0).SimpleTy) { 748 default: 749 llvm_unreachable("Unrecognized SMULL."); 750 case MVT::v4i32: 751 SMULLOpc = AArch64::UMULLv4i16_indexed; 752 break; 753 case MVT::v2i64: 754 SMULLOpc = AArch64::UMULLv2i32_indexed; 755 break; 756 } 757 } else 758 llvm_unreachable("Unrecognized intrinsic."); 759 760 ReplaceNode(N, CurDAG->getMachineNode(SMULLOpc, dl, N->getValueType(0), Ops)); 761 return true; 762 } 763 764 /// Instructions that accept extend modifiers like UXTW expect the register 765 /// being extended to be a GPR32, but the incoming DAG might be acting on a 766 /// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if 767 /// this is the case. 768 static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N) { 769 if (N.getValueType() == MVT::i32) 770 return N; 771 772 SDLoc dl(N); 773 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32); 774 MachineSDNode *Node = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 775 dl, MVT::i32, N, SubReg); 776 return SDValue(Node, 0); 777 } 778 779 // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N. 780 template<signed Low, signed High, signed Scale> 781 bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) { 782 if (!isa<ConstantSDNode>(N)) 783 return false; 784 785 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue(); 786 if ((MulImm % std::abs(Scale)) == 0) { 787 int64_t RDVLImm = MulImm / Scale; 788 if ((RDVLImm >= Low) && (RDVLImm <= High)) { 789 Imm = CurDAG->getTargetConstant(RDVLImm, SDLoc(N), MVT::i32); 790 return true; 791 } 792 } 793 794 return false; 795 } 796 797 /// SelectArithExtendedRegister - Select a "extended register" operand. This 798 /// operand folds in an extend followed by an optional left shift. 799 bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg, 800 SDValue &Shift) { 801 unsigned ShiftVal = 0; 802 AArch64_AM::ShiftExtendType Ext; 803 804 if (N.getOpcode() == ISD::SHL) { 805 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); 806 if (!CSD) 807 return false; 808 ShiftVal = CSD->getZExtValue(); 809 if (ShiftVal > 4) 810 return false; 811 812 Ext = getExtendTypeForNode(N.getOperand(0)); 813 if (Ext == AArch64_AM::InvalidShiftExtend) 814 return false; 815 816 Reg = N.getOperand(0).getOperand(0); 817 } else { 818 Ext = getExtendTypeForNode(N); 819 if (Ext == AArch64_AM::InvalidShiftExtend) 820 return false; 821 822 Reg = N.getOperand(0); 823 824 // Don't match if free 32-bit -> 64-bit zext can be used instead. 825 if (Ext == AArch64_AM::UXTW && 826 Reg->getValueType(0).getSizeInBits() == 32 && isDef32(*Reg.getNode())) 827 return false; 828 } 829 830 // AArch64 mandates that the RHS of the operation must use the smallest 831 // register class that could contain the size being extended from. Thus, 832 // if we're folding a (sext i8), we need the RHS to be a GPR32, even though 833 // there might not be an actual 32-bit value in the program. We can 834 // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here. 835 assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX); 836 Reg = narrowIfNeeded(CurDAG, Reg); 837 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N), 838 MVT::i32); 839 return isWorthFolding(N); 840 } 841 842 /// If there's a use of this ADDlow that's not itself a load/store then we'll 843 /// need to create a real ADD instruction from it anyway and there's no point in 844 /// folding it into the mem op. Theoretically, it shouldn't matter, but there's 845 /// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding 846 /// leads to duplicated ADRP instructions. 847 static bool isWorthFoldingADDlow(SDValue N) { 848 for (auto Use : N->uses()) { 849 if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE && 850 Use->getOpcode() != ISD::ATOMIC_LOAD && 851 Use->getOpcode() != ISD::ATOMIC_STORE) 852 return false; 853 854 // ldar and stlr have much more restrictive addressing modes (just a 855 // register). 856 if (isStrongerThanMonotonic(cast<MemSDNode>(Use)->getSuccessOrdering())) 857 return false; 858 } 859 860 return true; 861 } 862 863 /// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit 864 /// immediate" address. The "Size" argument is the size in bytes of the memory 865 /// reference, which determines the scale. 866 bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, 867 unsigned BW, unsigned Size, 868 SDValue &Base, 869 SDValue &OffImm) { 870 SDLoc dl(N); 871 const DataLayout &DL = CurDAG->getDataLayout(); 872 const TargetLowering *TLI = getTargetLowering(); 873 if (N.getOpcode() == ISD::FrameIndex) { 874 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 875 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 876 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); 877 return true; 878 } 879 880 // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed 881 // selected here doesn't support labels/immediates, only base+offset. 882 if (CurDAG->isBaseWithConstantOffset(N)) { 883 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 884 if (IsSignedImm) { 885 int64_t RHSC = RHS->getSExtValue(); 886 unsigned Scale = Log2_32(Size); 887 int64_t Range = 0x1LL << (BW - 1); 888 889 if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) && 890 RHSC < (Range << Scale)) { 891 Base = N.getOperand(0); 892 if (Base.getOpcode() == ISD::FrameIndex) { 893 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 894 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 895 } 896 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64); 897 return true; 898 } 899 } else { 900 // unsigned Immediate 901 uint64_t RHSC = RHS->getZExtValue(); 902 unsigned Scale = Log2_32(Size); 903 uint64_t Range = 0x1ULL << BW; 904 905 if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) { 906 Base = N.getOperand(0); 907 if (Base.getOpcode() == ISD::FrameIndex) { 908 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 909 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 910 } 911 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64); 912 return true; 913 } 914 } 915 } 916 } 917 // Base only. The address will be materialized into a register before 918 // the memory is accessed. 919 // add x0, Xbase, #offset 920 // stp x1, x2, [x0] 921 Base = N; 922 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); 923 return true; 924 } 925 926 /// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit 927 /// immediate" address. The "Size" argument is the size in bytes of the memory 928 /// reference, which determines the scale. 929 bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size, 930 SDValue &Base, SDValue &OffImm) { 931 SDLoc dl(N); 932 const DataLayout &DL = CurDAG->getDataLayout(); 933 const TargetLowering *TLI = getTargetLowering(); 934 if (N.getOpcode() == ISD::FrameIndex) { 935 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 936 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 937 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); 938 return true; 939 } 940 941 if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) { 942 GlobalAddressSDNode *GAN = 943 dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode()); 944 Base = N.getOperand(0); 945 OffImm = N.getOperand(1); 946 if (!GAN) 947 return true; 948 949 if (GAN->getOffset() % Size == 0 && 950 GAN->getGlobal()->getPointerAlignment(DL) >= Size) 951 return true; 952 } 953 954 if (CurDAG->isBaseWithConstantOffset(N)) { 955 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 956 int64_t RHSC = (int64_t)RHS->getZExtValue(); 957 unsigned Scale = Log2_32(Size); 958 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) { 959 Base = N.getOperand(0); 960 if (Base.getOpcode() == ISD::FrameIndex) { 961 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 962 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 963 } 964 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64); 965 return true; 966 } 967 } 968 } 969 970 // Before falling back to our general case, check if the unscaled 971 // instructions can handle this. If so, that's preferable. 972 if (SelectAddrModeUnscaled(N, Size, Base, OffImm)) 973 return false; 974 975 // Base only. The address will be materialized into a register before 976 // the memory is accessed. 977 // add x0, Xbase, #offset 978 // ldr x0, [x0] 979 Base = N; 980 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); 981 return true; 982 } 983 984 /// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit 985 /// immediate" address. This should only match when there is an offset that 986 /// is not valid for a scaled immediate addressing mode. The "Size" argument 987 /// is the size in bytes of the memory reference, which is needed here to know 988 /// what is valid for a scaled immediate. 989 bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size, 990 SDValue &Base, 991 SDValue &OffImm) { 992 if (!CurDAG->isBaseWithConstantOffset(N)) 993 return false; 994 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 995 int64_t RHSC = RHS->getSExtValue(); 996 // If the offset is valid as a scaled immediate, don't match here. 997 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && 998 RHSC < (0x1000 << Log2_32(Size))) 999 return false; 1000 if (RHSC >= -256 && RHSC < 256) { 1001 Base = N.getOperand(0); 1002 if (Base.getOpcode() == ISD::FrameIndex) { 1003 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1004 const TargetLowering *TLI = getTargetLowering(); 1005 Base = CurDAG->getTargetFrameIndex( 1006 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1007 } 1008 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64); 1009 return true; 1010 } 1011 } 1012 return false; 1013 } 1014 1015 static SDValue Widen(SelectionDAG *CurDAG, SDValue N) { 1016 SDLoc dl(N); 1017 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32); 1018 SDValue ImpDef = SDValue( 1019 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0); 1020 MachineSDNode *Node = CurDAG->getMachineNode( 1021 TargetOpcode::INSERT_SUBREG, dl, MVT::i64, ImpDef, N, SubReg); 1022 return SDValue(Node, 0); 1023 } 1024 1025 /// Check if the given SHL node (\p N), can be used to form an 1026 /// extended register for an addressing mode. 1027 bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size, 1028 bool WantExtend, SDValue &Offset, 1029 SDValue &SignExtend) { 1030 assert(N.getOpcode() == ISD::SHL && "Invalid opcode."); 1031 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); 1032 if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue()) 1033 return false; 1034 1035 SDLoc dl(N); 1036 if (WantExtend) { 1037 AArch64_AM::ShiftExtendType Ext = 1038 getExtendTypeForNode(N.getOperand(0), true); 1039 if (Ext == AArch64_AM::InvalidShiftExtend) 1040 return false; 1041 1042 Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0)); 1043 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl, 1044 MVT::i32); 1045 } else { 1046 Offset = N.getOperand(0); 1047 SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32); 1048 } 1049 1050 unsigned LegalShiftVal = Log2_32(Size); 1051 unsigned ShiftVal = CSD->getZExtValue(); 1052 1053 if (ShiftVal != 0 && ShiftVal != LegalShiftVal) 1054 return false; 1055 1056 return isWorthFolding(N); 1057 } 1058 1059 bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size, 1060 SDValue &Base, SDValue &Offset, 1061 SDValue &SignExtend, 1062 SDValue &DoShift) { 1063 if (N.getOpcode() != ISD::ADD) 1064 return false; 1065 SDValue LHS = N.getOperand(0); 1066 SDValue RHS = N.getOperand(1); 1067 SDLoc dl(N); 1068 1069 // We don't want to match immediate adds here, because they are better lowered 1070 // to the register-immediate addressing modes. 1071 if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS)) 1072 return false; 1073 1074 // Check if this particular node is reused in any non-memory related 1075 // operation. If yes, do not try to fold this node into the address 1076 // computation, since the computation will be kept. 1077 const SDNode *Node = N.getNode(); 1078 for (SDNode *UI : Node->uses()) { 1079 if (!isa<MemSDNode>(*UI)) 1080 return false; 1081 } 1082 1083 // Remember if it is worth folding N when it produces extended register. 1084 bool IsExtendedRegisterWorthFolding = isWorthFolding(N); 1085 1086 // Try to match a shifted extend on the RHS. 1087 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL && 1088 SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) { 1089 Base = LHS; 1090 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32); 1091 return true; 1092 } 1093 1094 // Try to match a shifted extend on the LHS. 1095 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL && 1096 SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) { 1097 Base = RHS; 1098 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32); 1099 return true; 1100 } 1101 1102 // There was no shift, whatever else we find. 1103 DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32); 1104 1105 AArch64_AM::ShiftExtendType Ext = AArch64_AM::InvalidShiftExtend; 1106 // Try to match an unshifted extend on the LHS. 1107 if (IsExtendedRegisterWorthFolding && 1108 (Ext = getExtendTypeForNode(LHS, true)) != 1109 AArch64_AM::InvalidShiftExtend) { 1110 Base = RHS; 1111 Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0)); 1112 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl, 1113 MVT::i32); 1114 if (isWorthFolding(LHS)) 1115 return true; 1116 } 1117 1118 // Try to match an unshifted extend on the RHS. 1119 if (IsExtendedRegisterWorthFolding && 1120 (Ext = getExtendTypeForNode(RHS, true)) != 1121 AArch64_AM::InvalidShiftExtend) { 1122 Base = LHS; 1123 Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0)); 1124 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl, 1125 MVT::i32); 1126 if (isWorthFolding(RHS)) 1127 return true; 1128 } 1129 1130 return false; 1131 } 1132 1133 // Check if the given immediate is preferred by ADD. If an immediate can be 1134 // encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be 1135 // encoded by one MOVZ, return true. 1136 static bool isPreferredADD(int64_t ImmOff) { 1137 // Constant in [0x0, 0xfff] can be encoded in ADD. 1138 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL) 1139 return true; 1140 // Check if it can be encoded in an "ADD LSL #12". 1141 if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL) 1142 // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant. 1143 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL && 1144 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL; 1145 return false; 1146 } 1147 1148 bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size, 1149 SDValue &Base, SDValue &Offset, 1150 SDValue &SignExtend, 1151 SDValue &DoShift) { 1152 if (N.getOpcode() != ISD::ADD) 1153 return false; 1154 SDValue LHS = N.getOperand(0); 1155 SDValue RHS = N.getOperand(1); 1156 SDLoc DL(N); 1157 1158 // Check if this particular node is reused in any non-memory related 1159 // operation. If yes, do not try to fold this node into the address 1160 // computation, since the computation will be kept. 1161 const SDNode *Node = N.getNode(); 1162 for (SDNode *UI : Node->uses()) { 1163 if (!isa<MemSDNode>(*UI)) 1164 return false; 1165 } 1166 1167 // Watch out if RHS is a wide immediate, it can not be selected into 1168 // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into 1169 // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate 1170 // instructions like: 1171 // MOV X0, WideImmediate 1172 // ADD X1, BaseReg, X0 1173 // LDR X2, [X1, 0] 1174 // For such situation, using [BaseReg, XReg] addressing mode can save one 1175 // ADD/SUB: 1176 // MOV X0, WideImmediate 1177 // LDR X2, [BaseReg, X0] 1178 if (isa<ConstantSDNode>(RHS)) { 1179 int64_t ImmOff = (int64_t)cast<ConstantSDNode>(RHS)->getZExtValue(); 1180 unsigned Scale = Log2_32(Size); 1181 // Skip the immediate can be selected by load/store addressing mode. 1182 // Also skip the immediate can be encoded by a single ADD (SUB is also 1183 // checked by using -ImmOff). 1184 if ((ImmOff % Size == 0 && ImmOff >= 0 && ImmOff < (0x1000 << Scale)) || 1185 isPreferredADD(ImmOff) || isPreferredADD(-ImmOff)) 1186 return false; 1187 1188 SDValue Ops[] = { RHS }; 1189 SDNode *MOVI = 1190 CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops); 1191 SDValue MOVIV = SDValue(MOVI, 0); 1192 // This ADD of two X register will be selected into [Reg+Reg] mode. 1193 N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV); 1194 } 1195 1196 // Remember if it is worth folding N when it produces extended register. 1197 bool IsExtendedRegisterWorthFolding = isWorthFolding(N); 1198 1199 // Try to match a shifted extend on the RHS. 1200 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL && 1201 SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) { 1202 Base = LHS; 1203 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32); 1204 return true; 1205 } 1206 1207 // Try to match a shifted extend on the LHS. 1208 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL && 1209 SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) { 1210 Base = RHS; 1211 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32); 1212 return true; 1213 } 1214 1215 // Match any non-shifted, non-extend, non-immediate add expression. 1216 Base = LHS; 1217 Offset = RHS; 1218 SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32); 1219 DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32); 1220 // Reg1 + Reg2 is free: no check needed. 1221 return true; 1222 } 1223 1224 SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) { 1225 static const unsigned RegClassIDs[] = { 1226 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID}; 1227 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1, 1228 AArch64::dsub2, AArch64::dsub3}; 1229 1230 return createTuple(Regs, RegClassIDs, SubRegs); 1231 } 1232 1233 SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) { 1234 static const unsigned RegClassIDs[] = { 1235 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID}; 1236 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1, 1237 AArch64::qsub2, AArch64::qsub3}; 1238 1239 return createTuple(Regs, RegClassIDs, SubRegs); 1240 } 1241 1242 SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef<SDValue> Regs) { 1243 static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID, 1244 AArch64::ZPR3RegClassID, 1245 AArch64::ZPR4RegClassID}; 1246 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1, 1247 AArch64::zsub2, AArch64::zsub3}; 1248 1249 return createTuple(Regs, RegClassIDs, SubRegs); 1250 } 1251 1252 SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs, 1253 const unsigned RegClassIDs[], 1254 const unsigned SubRegs[]) { 1255 // There's no special register-class for a vector-list of 1 element: it's just 1256 // a vector. 1257 if (Regs.size() == 1) 1258 return Regs[0]; 1259 1260 assert(Regs.size() >= 2 && Regs.size() <= 4); 1261 1262 SDLoc DL(Regs[0]); 1263 1264 SmallVector<SDValue, 4> Ops; 1265 1266 // First operand of REG_SEQUENCE is the desired RegClass. 1267 Ops.push_back( 1268 CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32)); 1269 1270 // Then we get pairs of source & subregister-position for the components. 1271 for (unsigned i = 0; i < Regs.size(); ++i) { 1272 Ops.push_back(Regs[i]); 1273 Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32)); 1274 } 1275 1276 SDNode *N = 1277 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops); 1278 return SDValue(N, 0); 1279 } 1280 1281 void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, 1282 bool isExt) { 1283 SDLoc dl(N); 1284 EVT VT = N->getValueType(0); 1285 1286 unsigned ExtOff = isExt; 1287 1288 // Form a REG_SEQUENCE to force register allocation. 1289 unsigned Vec0Off = ExtOff + 1; 1290 SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off, 1291 N->op_begin() + Vec0Off + NumVecs); 1292 SDValue RegSeq = createQTuple(Regs); 1293 1294 SmallVector<SDValue, 6> Ops; 1295 if (isExt) 1296 Ops.push_back(N->getOperand(1)); 1297 Ops.push_back(RegSeq); 1298 Ops.push_back(N->getOperand(NumVecs + ExtOff + 1)); 1299 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops)); 1300 } 1301 1302 bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) { 1303 LoadSDNode *LD = cast<LoadSDNode>(N); 1304 if (LD->isUnindexed()) 1305 return false; 1306 EVT VT = LD->getMemoryVT(); 1307 EVT DstVT = N->getValueType(0); 1308 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1309 bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC; 1310 1311 // We're not doing validity checking here. That was done when checking 1312 // if we should mark the load as indexed or not. We're just selecting 1313 // the right instruction. 1314 unsigned Opcode = 0; 1315 1316 ISD::LoadExtType ExtType = LD->getExtensionType(); 1317 bool InsertTo64 = false; 1318 if (VT == MVT::i64) 1319 Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost; 1320 else if (VT == MVT::i32) { 1321 if (ExtType == ISD::NON_EXTLOAD) 1322 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost; 1323 else if (ExtType == ISD::SEXTLOAD) 1324 Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost; 1325 else { 1326 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost; 1327 InsertTo64 = true; 1328 // The result of the load is only i32. It's the subreg_to_reg that makes 1329 // it into an i64. 1330 DstVT = MVT::i32; 1331 } 1332 } else if (VT == MVT::i16) { 1333 if (ExtType == ISD::SEXTLOAD) { 1334 if (DstVT == MVT::i64) 1335 Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost; 1336 else 1337 Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost; 1338 } else { 1339 Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost; 1340 InsertTo64 = DstVT == MVT::i64; 1341 // The result of the load is only i32. It's the subreg_to_reg that makes 1342 // it into an i64. 1343 DstVT = MVT::i32; 1344 } 1345 } else if (VT == MVT::i8) { 1346 if (ExtType == ISD::SEXTLOAD) { 1347 if (DstVT == MVT::i64) 1348 Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost; 1349 else 1350 Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost; 1351 } else { 1352 Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost; 1353 InsertTo64 = DstVT == MVT::i64; 1354 // The result of the load is only i32. It's the subreg_to_reg that makes 1355 // it into an i64. 1356 DstVT = MVT::i32; 1357 } 1358 } else if (VT == MVT::f16) { 1359 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost; 1360 } else if (VT == MVT::bf16) { 1361 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost; 1362 } else if (VT == MVT::f32) { 1363 Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost; 1364 } else if (VT == MVT::f64 || VT.is64BitVector()) { 1365 Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost; 1366 } else if (VT.is128BitVector()) { 1367 Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost; 1368 } else 1369 return false; 1370 SDValue Chain = LD->getChain(); 1371 SDValue Base = LD->getBasePtr(); 1372 ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset()); 1373 int OffsetVal = (int)OffsetOp->getZExtValue(); 1374 SDLoc dl(N); 1375 SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64); 1376 SDValue Ops[] = { Base, Offset, Chain }; 1377 SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT, 1378 MVT::Other, Ops); 1379 1380 // Transfer memoperands. 1381 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); 1382 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Res), {MemOp}); 1383 1384 // Either way, we're replacing the node, so tell the caller that. 1385 SDValue LoadedVal = SDValue(Res, 1); 1386 if (InsertTo64) { 1387 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32); 1388 LoadedVal = 1389 SDValue(CurDAG->getMachineNode( 1390 AArch64::SUBREG_TO_REG, dl, MVT::i64, 1391 CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal, 1392 SubReg), 1393 0); 1394 } 1395 1396 ReplaceUses(SDValue(N, 0), LoadedVal); 1397 ReplaceUses(SDValue(N, 1), SDValue(Res, 0)); 1398 ReplaceUses(SDValue(N, 2), SDValue(Res, 2)); 1399 CurDAG->RemoveDeadNode(N); 1400 return true; 1401 } 1402 1403 void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc, 1404 unsigned SubRegIdx) { 1405 SDLoc dl(N); 1406 EVT VT = N->getValueType(0); 1407 SDValue Chain = N->getOperand(0); 1408 1409 SDValue Ops[] = {N->getOperand(2), // Mem operand; 1410 Chain}; 1411 1412 const EVT ResTys[] = {MVT::Untyped, MVT::Other}; 1413 1414 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1415 SDValue SuperReg = SDValue(Ld, 0); 1416 for (unsigned i = 0; i < NumVecs; ++i) 1417 ReplaceUses(SDValue(N, i), 1418 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg)); 1419 1420 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1)); 1421 1422 // Transfer memoperands. In the case of AArch64::LD64B, there won't be one, 1423 // because it's too simple to have needed special treatment during lowering. 1424 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(N)) { 1425 MachineMemOperand *MemOp = MemIntr->getMemOperand(); 1426 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp}); 1427 } 1428 1429 CurDAG->RemoveDeadNode(N); 1430 } 1431 1432 void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs, 1433 unsigned Opc, unsigned SubRegIdx) { 1434 SDLoc dl(N); 1435 EVT VT = N->getValueType(0); 1436 SDValue Chain = N->getOperand(0); 1437 1438 SDValue Ops[] = {N->getOperand(1), // Mem operand 1439 N->getOperand(2), // Incremental 1440 Chain}; 1441 1442 const EVT ResTys[] = {MVT::i64, // Type of the write back register 1443 MVT::Untyped, MVT::Other}; 1444 1445 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1446 1447 // Update uses of write back register 1448 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0)); 1449 1450 // Update uses of vector list 1451 SDValue SuperReg = SDValue(Ld, 1); 1452 if (NumVecs == 1) 1453 ReplaceUses(SDValue(N, 0), SuperReg); 1454 else 1455 for (unsigned i = 0; i < NumVecs; ++i) 1456 ReplaceUses(SDValue(N, i), 1457 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg)); 1458 1459 // Update the chain 1460 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2)); 1461 CurDAG->RemoveDeadNode(N); 1462 } 1463 1464 /// Optimize \param OldBase and \param OldOffset selecting the best addressing 1465 /// mode. Returns a tuple consisting of an Opcode, an SDValue representing the 1466 /// new Base and an SDValue representing the new offset. 1467 std::tuple<unsigned, SDValue, SDValue> 1468 AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, 1469 unsigned Opc_ri, 1470 const SDValue &OldBase, 1471 const SDValue &OldOffset, 1472 unsigned Scale) { 1473 SDValue NewBase = OldBase; 1474 SDValue NewOffset = OldOffset; 1475 // Detect a possible Reg+Imm addressing mode. 1476 const bool IsRegImm = SelectAddrModeIndexedSVE</*Min=*/-8, /*Max=*/7>( 1477 N, OldBase, NewBase, NewOffset); 1478 1479 // Detect a possible reg+reg addressing mode, but only if we haven't already 1480 // detected a Reg+Imm one. 1481 const bool IsRegReg = 1482 !IsRegImm && SelectSVERegRegAddrMode(OldBase, Scale, NewBase, NewOffset); 1483 1484 // Select the instruction. 1485 return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset); 1486 } 1487 1488 void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs, 1489 unsigned Scale, unsigned Opc_ri, 1490 unsigned Opc_rr) { 1491 assert(Scale < 4 && "Invalid scaling value."); 1492 SDLoc DL(N); 1493 EVT VT = N->getValueType(0); 1494 SDValue Chain = N->getOperand(0); 1495 1496 // Optimize addressing mode. 1497 SDValue Base, Offset; 1498 unsigned Opc; 1499 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore( 1500 N, Opc_rr, Opc_ri, N->getOperand(2), 1501 CurDAG->getTargetConstant(0, DL, MVT::i64), Scale); 1502 1503 SDValue Ops[] = {N->getOperand(1), // Predicate 1504 Base, // Memory operand 1505 Offset, Chain}; 1506 1507 const EVT ResTys[] = {MVT::Untyped, MVT::Other}; 1508 1509 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops); 1510 SDValue SuperReg = SDValue(Load, 0); 1511 for (unsigned i = 0; i < NumVecs; ++i) 1512 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg( 1513 AArch64::zsub0 + i, DL, VT, SuperReg)); 1514 1515 // Copy chain 1516 unsigned ChainIdx = NumVecs; 1517 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1)); 1518 CurDAG->RemoveDeadNode(N); 1519 } 1520 1521 void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs, 1522 unsigned Opc) { 1523 SDLoc dl(N); 1524 EVT VT = N->getOperand(2)->getValueType(0); 1525 1526 // Form a REG_SEQUENCE to force register allocation. 1527 bool Is128Bit = VT.getSizeInBits() == 128; 1528 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); 1529 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs); 1530 1531 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)}; 1532 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops); 1533 1534 // Transfer memoperands. 1535 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 1536 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 1537 1538 ReplaceNode(N, St); 1539 } 1540 1541 void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs, 1542 unsigned Scale, unsigned Opc_rr, 1543 unsigned Opc_ri) { 1544 SDLoc dl(N); 1545 1546 // Form a REG_SEQUENCE to force register allocation. 1547 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); 1548 SDValue RegSeq = createZTuple(Regs); 1549 1550 // Optimize addressing mode. 1551 unsigned Opc; 1552 SDValue Offset, Base; 1553 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore( 1554 N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3), 1555 CurDAG->getTargetConstant(0, dl, MVT::i64), Scale); 1556 1557 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), // predicate 1558 Base, // address 1559 Offset, // offset 1560 N->getOperand(0)}; // chain 1561 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops); 1562 1563 ReplaceNode(N, St); 1564 } 1565 1566 bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, 1567 SDValue &OffImm) { 1568 SDLoc dl(N); 1569 const DataLayout &DL = CurDAG->getDataLayout(); 1570 const TargetLowering *TLI = getTargetLowering(); 1571 1572 // Try to match it for the frame address 1573 if (auto FINode = dyn_cast<FrameIndexSDNode>(N)) { 1574 int FI = FINode->getIndex(); 1575 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 1576 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); 1577 return true; 1578 } 1579 1580 return false; 1581 } 1582 1583 void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs, 1584 unsigned Opc) { 1585 SDLoc dl(N); 1586 EVT VT = N->getOperand(2)->getValueType(0); 1587 const EVT ResTys[] = {MVT::i64, // Type of the write back register 1588 MVT::Other}; // Type for the Chain 1589 1590 // Form a REG_SEQUENCE to force register allocation. 1591 bool Is128Bit = VT.getSizeInBits() == 128; 1592 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); 1593 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs); 1594 1595 SDValue Ops[] = {RegSeq, 1596 N->getOperand(NumVecs + 1), // base register 1597 N->getOperand(NumVecs + 2), // Incremental 1598 N->getOperand(0)}; // Chain 1599 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1600 1601 ReplaceNode(N, St); 1602 } 1603 1604 namespace { 1605 /// WidenVector - Given a value in the V64 register class, produce the 1606 /// equivalent value in the V128 register class. 1607 class WidenVector { 1608 SelectionDAG &DAG; 1609 1610 public: 1611 WidenVector(SelectionDAG &DAG) : DAG(DAG) {} 1612 1613 SDValue operator()(SDValue V64Reg) { 1614 EVT VT = V64Reg.getValueType(); 1615 unsigned NarrowSize = VT.getVectorNumElements(); 1616 MVT EltTy = VT.getVectorElementType().getSimpleVT(); 1617 MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize); 1618 SDLoc DL(V64Reg); 1619 1620 SDValue Undef = 1621 SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0); 1622 return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg); 1623 } 1624 }; 1625 } // namespace 1626 1627 /// NarrowVector - Given a value in the V128 register class, produce the 1628 /// equivalent value in the V64 register class. 1629 static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) { 1630 EVT VT = V128Reg.getValueType(); 1631 unsigned WideSize = VT.getVectorNumElements(); 1632 MVT EltTy = VT.getVectorElementType().getSimpleVT(); 1633 MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2); 1634 1635 return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy, 1636 V128Reg); 1637 } 1638 1639 void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs, 1640 unsigned Opc) { 1641 SDLoc dl(N); 1642 EVT VT = N->getValueType(0); 1643 bool Narrow = VT.getSizeInBits() == 64; 1644 1645 // Form a REG_SEQUENCE to force register allocation. 1646 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); 1647 1648 if (Narrow) 1649 transform(Regs, Regs.begin(), 1650 WidenVector(*CurDAG)); 1651 1652 SDValue RegSeq = createQTuple(Regs); 1653 1654 const EVT ResTys[] = {MVT::Untyped, MVT::Other}; 1655 1656 unsigned LaneNo = 1657 cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue(); 1658 1659 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64), 1660 N->getOperand(NumVecs + 3), N->getOperand(0)}; 1661 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1662 SDValue SuperReg = SDValue(Ld, 0); 1663 1664 EVT WideVT = RegSeq.getOperand(1)->getValueType(0); 1665 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, 1666 AArch64::qsub2, AArch64::qsub3 }; 1667 for (unsigned i = 0; i < NumVecs; ++i) { 1668 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg); 1669 if (Narrow) 1670 NV = NarrowVector(NV, *CurDAG); 1671 ReplaceUses(SDValue(N, i), NV); 1672 } 1673 1674 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1)); 1675 CurDAG->RemoveDeadNode(N); 1676 } 1677 1678 void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs, 1679 unsigned Opc) { 1680 SDLoc dl(N); 1681 EVT VT = N->getValueType(0); 1682 bool Narrow = VT.getSizeInBits() == 64; 1683 1684 // Form a REG_SEQUENCE to force register allocation. 1685 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); 1686 1687 if (Narrow) 1688 transform(Regs, Regs.begin(), 1689 WidenVector(*CurDAG)); 1690 1691 SDValue RegSeq = createQTuple(Regs); 1692 1693 const EVT ResTys[] = {MVT::i64, // Type of the write back register 1694 RegSeq->getValueType(0), MVT::Other}; 1695 1696 unsigned LaneNo = 1697 cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue(); 1698 1699 SDValue Ops[] = {RegSeq, 1700 CurDAG->getTargetConstant(LaneNo, dl, 1701 MVT::i64), // Lane Number 1702 N->getOperand(NumVecs + 2), // Base register 1703 N->getOperand(NumVecs + 3), // Incremental 1704 N->getOperand(0)}; 1705 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1706 1707 // Update uses of the write back register 1708 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0)); 1709 1710 // Update uses of the vector list 1711 SDValue SuperReg = SDValue(Ld, 1); 1712 if (NumVecs == 1) { 1713 ReplaceUses(SDValue(N, 0), 1714 Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg); 1715 } else { 1716 EVT WideVT = RegSeq.getOperand(1)->getValueType(0); 1717 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, 1718 AArch64::qsub2, AArch64::qsub3 }; 1719 for (unsigned i = 0; i < NumVecs; ++i) { 1720 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, 1721 SuperReg); 1722 if (Narrow) 1723 NV = NarrowVector(NV, *CurDAG); 1724 ReplaceUses(SDValue(N, i), NV); 1725 } 1726 } 1727 1728 // Update the Chain 1729 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2)); 1730 CurDAG->RemoveDeadNode(N); 1731 } 1732 1733 void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs, 1734 unsigned Opc) { 1735 SDLoc dl(N); 1736 EVT VT = N->getOperand(2)->getValueType(0); 1737 bool Narrow = VT.getSizeInBits() == 64; 1738 1739 // Form a REG_SEQUENCE to force register allocation. 1740 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); 1741 1742 if (Narrow) 1743 transform(Regs, Regs.begin(), 1744 WidenVector(*CurDAG)); 1745 1746 SDValue RegSeq = createQTuple(Regs); 1747 1748 unsigned LaneNo = 1749 cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue(); 1750 1751 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64), 1752 N->getOperand(NumVecs + 3), N->getOperand(0)}; 1753 SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops); 1754 1755 // Transfer memoperands. 1756 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 1757 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 1758 1759 ReplaceNode(N, St); 1760 } 1761 1762 void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs, 1763 unsigned Opc) { 1764 SDLoc dl(N); 1765 EVT VT = N->getOperand(2)->getValueType(0); 1766 bool Narrow = VT.getSizeInBits() == 64; 1767 1768 // Form a REG_SEQUENCE to force register allocation. 1769 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); 1770 1771 if (Narrow) 1772 transform(Regs, Regs.begin(), 1773 WidenVector(*CurDAG)); 1774 1775 SDValue RegSeq = createQTuple(Regs); 1776 1777 const EVT ResTys[] = {MVT::i64, // Type of the write back register 1778 MVT::Other}; 1779 1780 unsigned LaneNo = 1781 cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue(); 1782 1783 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64), 1784 N->getOperand(NumVecs + 2), // Base Register 1785 N->getOperand(NumVecs + 3), // Incremental 1786 N->getOperand(0)}; 1787 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1788 1789 // Transfer memoperands. 1790 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 1791 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 1792 1793 ReplaceNode(N, St); 1794 } 1795 1796 static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, 1797 unsigned &Opc, SDValue &Opd0, 1798 unsigned &LSB, unsigned &MSB, 1799 unsigned NumberOfIgnoredLowBits, 1800 bool BiggerPattern) { 1801 assert(N->getOpcode() == ISD::AND && 1802 "N must be a AND operation to call this function"); 1803 1804 EVT VT = N->getValueType(0); 1805 1806 // Here we can test the type of VT and return false when the type does not 1807 // match, but since it is done prior to that call in the current context 1808 // we turned that into an assert to avoid redundant code. 1809 assert((VT == MVT::i32 || VT == MVT::i64) && 1810 "Type checking must have been done before calling this function"); 1811 1812 // FIXME: simplify-demanded-bits in DAGCombine will probably have 1813 // changed the AND node to a 32-bit mask operation. We'll have to 1814 // undo that as part of the transform here if we want to catch all 1815 // the opportunities. 1816 // Currently the NumberOfIgnoredLowBits argument helps to recover 1817 // form these situations when matching bigger pattern (bitfield insert). 1818 1819 // For unsigned extracts, check for a shift right and mask 1820 uint64_t AndImm = 0; 1821 if (!isOpcWithIntImmediate(N, ISD::AND, AndImm)) 1822 return false; 1823 1824 const SDNode *Op0 = N->getOperand(0).getNode(); 1825 1826 // Because of simplify-demanded-bits in DAGCombine, the mask may have been 1827 // simplified. Try to undo that 1828 AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits); 1829 1830 // The immediate is a mask of the low bits iff imm & (imm+1) == 0 1831 if (AndImm & (AndImm + 1)) 1832 return false; 1833 1834 bool ClampMSB = false; 1835 uint64_t SrlImm = 0; 1836 // Handle the SRL + ANY_EXTEND case. 1837 if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND && 1838 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) { 1839 // Extend the incoming operand of the SRL to 64-bit. 1840 Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0)); 1841 // Make sure to clamp the MSB so that we preserve the semantics of the 1842 // original operations. 1843 ClampMSB = true; 1844 } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE && 1845 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, 1846 SrlImm)) { 1847 // If the shift result was truncated, we can still combine them. 1848 Opd0 = Op0->getOperand(0).getOperand(0); 1849 1850 // Use the type of SRL node. 1851 VT = Opd0->getValueType(0); 1852 } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) { 1853 Opd0 = Op0->getOperand(0); 1854 } else if (BiggerPattern) { 1855 // Let's pretend a 0 shift right has been performed. 1856 // The resulting code will be at least as good as the original one 1857 // plus it may expose more opportunities for bitfield insert pattern. 1858 // FIXME: Currently we limit this to the bigger pattern, because 1859 // some optimizations expect AND and not UBFM. 1860 Opd0 = N->getOperand(0); 1861 } else 1862 return false; 1863 1864 // Bail out on large immediates. This happens when no proper 1865 // combining/constant folding was performed. 1866 if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) { 1867 LLVM_DEBUG( 1868 (dbgs() << N 1869 << ": Found large shift immediate, this should not happen\n")); 1870 return false; 1871 } 1872 1873 LSB = SrlImm; 1874 MSB = SrlImm + (VT == MVT::i32 ? countTrailingOnes<uint32_t>(AndImm) 1875 : countTrailingOnes<uint64_t>(AndImm)) - 1876 1; 1877 if (ClampMSB) 1878 // Since we're moving the extend before the right shift operation, we need 1879 // to clamp the MSB to make sure we don't shift in undefined bits instead of 1880 // the zeros which would get shifted in with the original right shift 1881 // operation. 1882 MSB = MSB > 31 ? 31 : MSB; 1883 1884 Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri; 1885 return true; 1886 } 1887 1888 static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc, 1889 SDValue &Opd0, unsigned &Immr, 1890 unsigned &Imms) { 1891 assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG); 1892 1893 EVT VT = N->getValueType(0); 1894 unsigned BitWidth = VT.getSizeInBits(); 1895 assert((VT == MVT::i32 || VT == MVT::i64) && 1896 "Type checking must have been done before calling this function"); 1897 1898 SDValue Op = N->getOperand(0); 1899 if (Op->getOpcode() == ISD::TRUNCATE) { 1900 Op = Op->getOperand(0); 1901 VT = Op->getValueType(0); 1902 BitWidth = VT.getSizeInBits(); 1903 } 1904 1905 uint64_t ShiftImm; 1906 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) && 1907 !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm)) 1908 return false; 1909 1910 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits(); 1911 if (ShiftImm + Width > BitWidth) 1912 return false; 1913 1914 Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri; 1915 Opd0 = Op.getOperand(0); 1916 Immr = ShiftImm; 1917 Imms = ShiftImm + Width - 1; 1918 return true; 1919 } 1920 1921 static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc, 1922 SDValue &Opd0, unsigned &LSB, 1923 unsigned &MSB) { 1924 // We are looking for the following pattern which basically extracts several 1925 // continuous bits from the source value and places it from the LSB of the 1926 // destination value, all other bits of the destination value or set to zero: 1927 // 1928 // Value2 = AND Value, MaskImm 1929 // SRL Value2, ShiftImm 1930 // 1931 // with MaskImm >> ShiftImm to search for the bit width. 1932 // 1933 // This gets selected into a single UBFM: 1934 // 1935 // UBFM Value, ShiftImm, BitWide + SrlImm -1 1936 // 1937 1938 if (N->getOpcode() != ISD::SRL) 1939 return false; 1940 1941 uint64_t AndMask = 0; 1942 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask)) 1943 return false; 1944 1945 Opd0 = N->getOperand(0).getOperand(0); 1946 1947 uint64_t SrlImm = 0; 1948 if (!isIntImmediate(N->getOperand(1), SrlImm)) 1949 return false; 1950 1951 // Check whether we really have several bits extract here. 1952 unsigned BitWide = 64 - countLeadingOnes(~(AndMask >> SrlImm)); 1953 if (BitWide && isMask_64(AndMask >> SrlImm)) { 1954 if (N->getValueType(0) == MVT::i32) 1955 Opc = AArch64::UBFMWri; 1956 else 1957 Opc = AArch64::UBFMXri; 1958 1959 LSB = SrlImm; 1960 MSB = BitWide + SrlImm - 1; 1961 return true; 1962 } 1963 1964 return false; 1965 } 1966 1967 static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, 1968 unsigned &Immr, unsigned &Imms, 1969 bool BiggerPattern) { 1970 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) && 1971 "N must be a SHR/SRA operation to call this function"); 1972 1973 EVT VT = N->getValueType(0); 1974 1975 // Here we can test the type of VT and return false when the type does not 1976 // match, but since it is done prior to that call in the current context 1977 // we turned that into an assert to avoid redundant code. 1978 assert((VT == MVT::i32 || VT == MVT::i64) && 1979 "Type checking must have been done before calling this function"); 1980 1981 // Check for AND + SRL doing several bits extract. 1982 if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms)) 1983 return true; 1984 1985 // We're looking for a shift of a shift. 1986 uint64_t ShlImm = 0; 1987 uint64_t TruncBits = 0; 1988 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) { 1989 Opd0 = N->getOperand(0).getOperand(0); 1990 } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL && 1991 N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) { 1992 // We are looking for a shift of truncate. Truncate from i64 to i32 could 1993 // be considered as setting high 32 bits as zero. Our strategy here is to 1994 // always generate 64bit UBFM. This consistency will help the CSE pass 1995 // later find more redundancy. 1996 Opd0 = N->getOperand(0).getOperand(0); 1997 TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits(); 1998 VT = Opd0.getValueType(); 1999 assert(VT == MVT::i64 && "the promoted type should be i64"); 2000 } else if (BiggerPattern) { 2001 // Let's pretend a 0 shift left has been performed. 2002 // FIXME: Currently we limit this to the bigger pattern case, 2003 // because some optimizations expect AND and not UBFM 2004 Opd0 = N->getOperand(0); 2005 } else 2006 return false; 2007 2008 // Missing combines/constant folding may have left us with strange 2009 // constants. 2010 if (ShlImm >= VT.getSizeInBits()) { 2011 LLVM_DEBUG( 2012 (dbgs() << N 2013 << ": Found large shift immediate, this should not happen\n")); 2014 return false; 2015 } 2016 2017 uint64_t SrlImm = 0; 2018 if (!isIntImmediate(N->getOperand(1), SrlImm)) 2019 return false; 2020 2021 assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() && 2022 "bad amount in shift node!"); 2023 int immr = SrlImm - ShlImm; 2024 Immr = immr < 0 ? immr + VT.getSizeInBits() : immr; 2025 Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1; 2026 // SRA requires a signed extraction 2027 if (VT == MVT::i32) 2028 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri; 2029 else 2030 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri; 2031 return true; 2032 } 2033 2034 bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) { 2035 assert(N->getOpcode() == ISD::SIGN_EXTEND); 2036 2037 EVT VT = N->getValueType(0); 2038 EVT NarrowVT = N->getOperand(0)->getValueType(0); 2039 if (VT != MVT::i64 || NarrowVT != MVT::i32) 2040 return false; 2041 2042 uint64_t ShiftImm; 2043 SDValue Op = N->getOperand(0); 2044 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm)) 2045 return false; 2046 2047 SDLoc dl(N); 2048 // Extend the incoming operand of the shift to 64-bits. 2049 SDValue Opd0 = Widen(CurDAG, Op.getOperand(0)); 2050 unsigned Immr = ShiftImm; 2051 unsigned Imms = NarrowVT.getSizeInBits() - 1; 2052 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT), 2053 CurDAG->getTargetConstant(Imms, dl, VT)}; 2054 CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops); 2055 return true; 2056 } 2057 2058 /// Try to form fcvtl2 instructions from a floating-point extend of a high-half 2059 /// extract of a subvector. 2060 bool AArch64DAGToDAGISel::tryHighFPExt(SDNode *N) { 2061 assert(N->getOpcode() == ISD::FP_EXTEND); 2062 2063 // There are 2 forms of fcvtl2 - extend to double or extend to float. 2064 SDValue Extract = N->getOperand(0); 2065 EVT VT = N->getValueType(0); 2066 EVT NarrowVT = Extract.getValueType(); 2067 if ((VT != MVT::v2f64 || NarrowVT != MVT::v2f32) && 2068 (VT != MVT::v4f32 || NarrowVT != MVT::v4f16)) 2069 return false; 2070 2071 // Optionally look past a bitcast. 2072 Extract = peekThroughBitcasts(Extract); 2073 if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR) 2074 return false; 2075 2076 // Match extract from start of high half index. 2077 // Example: v8i16 -> v4i16 means the extract must begin at index 4. 2078 unsigned ExtractIndex = Extract.getConstantOperandVal(1); 2079 if (ExtractIndex != Extract.getValueType().getVectorNumElements()) 2080 return false; 2081 2082 auto Opcode = VT == MVT::v2f64 ? AArch64::FCVTLv4i32 : AArch64::FCVTLv8i16; 2083 CurDAG->SelectNodeTo(N, Opcode, VT, Extract.getOperand(0)); 2084 return true; 2085 } 2086 2087 static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, 2088 SDValue &Opd0, unsigned &Immr, unsigned &Imms, 2089 unsigned NumberOfIgnoredLowBits = 0, 2090 bool BiggerPattern = false) { 2091 if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64) 2092 return false; 2093 2094 switch (N->getOpcode()) { 2095 default: 2096 if (!N->isMachineOpcode()) 2097 return false; 2098 break; 2099 case ISD::AND: 2100 return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms, 2101 NumberOfIgnoredLowBits, BiggerPattern); 2102 case ISD::SRL: 2103 case ISD::SRA: 2104 return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern); 2105 2106 case ISD::SIGN_EXTEND_INREG: 2107 return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms); 2108 } 2109 2110 unsigned NOpc = N->getMachineOpcode(); 2111 switch (NOpc) { 2112 default: 2113 return false; 2114 case AArch64::SBFMWri: 2115 case AArch64::UBFMWri: 2116 case AArch64::SBFMXri: 2117 case AArch64::UBFMXri: 2118 Opc = NOpc; 2119 Opd0 = N->getOperand(0); 2120 Immr = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue(); 2121 Imms = cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue(); 2122 return true; 2123 } 2124 // Unreachable 2125 return false; 2126 } 2127 2128 bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) { 2129 unsigned Opc, Immr, Imms; 2130 SDValue Opd0; 2131 if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms)) 2132 return false; 2133 2134 EVT VT = N->getValueType(0); 2135 SDLoc dl(N); 2136 2137 // If the bit extract operation is 64bit but the original type is 32bit, we 2138 // need to add one EXTRACT_SUBREG. 2139 if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) { 2140 SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64), 2141 CurDAG->getTargetConstant(Imms, dl, MVT::i64)}; 2142 2143 SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64); 2144 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32); 2145 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, 2146 MVT::i32, SDValue(BFM, 0), SubReg)); 2147 return true; 2148 } 2149 2150 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT), 2151 CurDAG->getTargetConstant(Imms, dl, VT)}; 2152 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 2153 return true; 2154 } 2155 2156 /// Does DstMask form a complementary pair with the mask provided by 2157 /// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking, 2158 /// this asks whether DstMask zeroes precisely those bits that will be set by 2159 /// the other half. 2160 static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted, 2161 unsigned NumberOfIgnoredHighBits, EVT VT) { 2162 assert((VT == MVT::i32 || VT == MVT::i64) && 2163 "i32 or i64 mask type expected!"); 2164 unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits; 2165 2166 APInt SignificantDstMask = APInt(BitWidth, DstMask); 2167 APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth); 2168 2169 return (SignificantDstMask & SignificantBitsToBeInserted) == 0 && 2170 (SignificantDstMask | SignificantBitsToBeInserted).isAllOnesValue(); 2171 } 2172 2173 // Look for bits that will be useful for later uses. 2174 // A bit is consider useless as soon as it is dropped and never used 2175 // before it as been dropped. 2176 // E.g., looking for useful bit of x 2177 // 1. y = x & 0x7 2178 // 2. z = y >> 2 2179 // After #1, x useful bits are 0x7, then the useful bits of x, live through 2180 // y. 2181 // After #2, the useful bits of x are 0x4. 2182 // However, if x is used on an unpredicatable instruction, then all its bits 2183 // are useful. 2184 // E.g. 2185 // 1. y = x & 0x7 2186 // 2. z = y >> 2 2187 // 3. str x, [@x] 2188 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0); 2189 2190 static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits, 2191 unsigned Depth) { 2192 uint64_t Imm = 2193 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue(); 2194 Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth()); 2195 UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm); 2196 getUsefulBits(Op, UsefulBits, Depth + 1); 2197 } 2198 2199 static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, 2200 uint64_t Imm, uint64_t MSB, 2201 unsigned Depth) { 2202 // inherit the bitwidth value 2203 APInt OpUsefulBits(UsefulBits); 2204 OpUsefulBits = 1; 2205 2206 if (MSB >= Imm) { 2207 OpUsefulBits <<= MSB - Imm + 1; 2208 --OpUsefulBits; 2209 // The interesting part will be in the lower part of the result 2210 getUsefulBits(Op, OpUsefulBits, Depth + 1); 2211 // The interesting part was starting at Imm in the argument 2212 OpUsefulBits <<= Imm; 2213 } else { 2214 OpUsefulBits <<= MSB + 1; 2215 --OpUsefulBits; 2216 // The interesting part will be shifted in the result 2217 OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm; 2218 getUsefulBits(Op, OpUsefulBits, Depth + 1); 2219 // The interesting part was at zero in the argument 2220 OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm); 2221 } 2222 2223 UsefulBits &= OpUsefulBits; 2224 } 2225 2226 static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits, 2227 unsigned Depth) { 2228 uint64_t Imm = 2229 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue(); 2230 uint64_t MSB = 2231 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue(); 2232 2233 getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth); 2234 } 2235 2236 static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, 2237 unsigned Depth) { 2238 uint64_t ShiftTypeAndValue = 2239 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue(); 2240 APInt Mask(UsefulBits); 2241 Mask.clearAllBits(); 2242 Mask.flipAllBits(); 2243 2244 if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) { 2245 // Shift Left 2246 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue); 2247 Mask <<= ShiftAmt; 2248 getUsefulBits(Op, Mask, Depth + 1); 2249 Mask.lshrInPlace(ShiftAmt); 2250 } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) { 2251 // Shift Right 2252 // We do not handle AArch64_AM::ASR, because the sign will change the 2253 // number of useful bits 2254 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue); 2255 Mask.lshrInPlace(ShiftAmt); 2256 getUsefulBits(Op, Mask, Depth + 1); 2257 Mask <<= ShiftAmt; 2258 } else 2259 return; 2260 2261 UsefulBits &= Mask; 2262 } 2263 2264 static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, 2265 unsigned Depth) { 2266 uint64_t Imm = 2267 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue(); 2268 uint64_t MSB = 2269 cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue(); 2270 2271 APInt OpUsefulBits(UsefulBits); 2272 OpUsefulBits = 1; 2273 2274 APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0); 2275 ResultUsefulBits.flipAllBits(); 2276 APInt Mask(UsefulBits.getBitWidth(), 0); 2277 2278 getUsefulBits(Op, ResultUsefulBits, Depth + 1); 2279 2280 if (MSB >= Imm) { 2281 // The instruction is a BFXIL. 2282 uint64_t Width = MSB - Imm + 1; 2283 uint64_t LSB = Imm; 2284 2285 OpUsefulBits <<= Width; 2286 --OpUsefulBits; 2287 2288 if (Op.getOperand(1) == Orig) { 2289 // Copy the low bits from the result to bits starting from LSB. 2290 Mask = ResultUsefulBits & OpUsefulBits; 2291 Mask <<= LSB; 2292 } 2293 2294 if (Op.getOperand(0) == Orig) 2295 // Bits starting from LSB in the input contribute to the result. 2296 Mask |= (ResultUsefulBits & ~OpUsefulBits); 2297 } else { 2298 // The instruction is a BFI. 2299 uint64_t Width = MSB + 1; 2300 uint64_t LSB = UsefulBits.getBitWidth() - Imm; 2301 2302 OpUsefulBits <<= Width; 2303 --OpUsefulBits; 2304 OpUsefulBits <<= LSB; 2305 2306 if (Op.getOperand(1) == Orig) { 2307 // Copy the bits from the result to the zero bits. 2308 Mask = ResultUsefulBits & OpUsefulBits; 2309 Mask.lshrInPlace(LSB); 2310 } 2311 2312 if (Op.getOperand(0) == Orig) 2313 Mask |= (ResultUsefulBits & ~OpUsefulBits); 2314 } 2315 2316 UsefulBits &= Mask; 2317 } 2318 2319 static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, 2320 SDValue Orig, unsigned Depth) { 2321 2322 // Users of this node should have already been instruction selected 2323 // FIXME: Can we turn that into an assert? 2324 if (!UserNode->isMachineOpcode()) 2325 return; 2326 2327 switch (UserNode->getMachineOpcode()) { 2328 default: 2329 return; 2330 case AArch64::ANDSWri: 2331 case AArch64::ANDSXri: 2332 case AArch64::ANDWri: 2333 case AArch64::ANDXri: 2334 // We increment Depth only when we call the getUsefulBits 2335 return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits, 2336 Depth); 2337 case AArch64::UBFMWri: 2338 case AArch64::UBFMXri: 2339 return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth); 2340 2341 case AArch64::ORRWrs: 2342 case AArch64::ORRXrs: 2343 if (UserNode->getOperand(0) != Orig && UserNode->getOperand(1) == Orig) 2344 getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits, 2345 Depth); 2346 return; 2347 case AArch64::BFMWri: 2348 case AArch64::BFMXri: 2349 return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth); 2350 2351 case AArch64::STRBBui: 2352 case AArch64::STURBBi: 2353 if (UserNode->getOperand(0) != Orig) 2354 return; 2355 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff); 2356 return; 2357 2358 case AArch64::STRHHui: 2359 case AArch64::STURHHi: 2360 if (UserNode->getOperand(0) != Orig) 2361 return; 2362 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff); 2363 return; 2364 } 2365 } 2366 2367 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) { 2368 if (Depth >= SelectionDAG::MaxRecursionDepth) 2369 return; 2370 // Initialize UsefulBits 2371 if (!Depth) { 2372 unsigned Bitwidth = Op.getScalarValueSizeInBits(); 2373 // At the beginning, assume every produced bits is useful 2374 UsefulBits = APInt(Bitwidth, 0); 2375 UsefulBits.flipAllBits(); 2376 } 2377 APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0); 2378 2379 for (SDNode *Node : Op.getNode()->uses()) { 2380 // A use cannot produce useful bits 2381 APInt UsefulBitsForUse = APInt(UsefulBits); 2382 getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth); 2383 UsersUsefulBits |= UsefulBitsForUse; 2384 } 2385 // UsefulBits contains the produced bits that are meaningful for the 2386 // current definition, thus a user cannot make a bit meaningful at 2387 // this point 2388 UsefulBits &= UsersUsefulBits; 2389 } 2390 2391 /// Create a machine node performing a notional SHL of Op by ShlAmount. If 2392 /// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is 2393 /// 0, return Op unchanged. 2394 static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) { 2395 if (ShlAmount == 0) 2396 return Op; 2397 2398 EVT VT = Op.getValueType(); 2399 SDLoc dl(Op); 2400 unsigned BitWidth = VT.getSizeInBits(); 2401 unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri; 2402 2403 SDNode *ShiftNode; 2404 if (ShlAmount > 0) { 2405 // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt 2406 ShiftNode = CurDAG->getMachineNode( 2407 UBFMOpc, dl, VT, Op, 2408 CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT), 2409 CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT)); 2410 } else { 2411 // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1 2412 assert(ShlAmount < 0 && "expected right shift"); 2413 int ShrAmount = -ShlAmount; 2414 ShiftNode = CurDAG->getMachineNode( 2415 UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT), 2416 CurDAG->getTargetConstant(BitWidth - 1, dl, VT)); 2417 } 2418 2419 return SDValue(ShiftNode, 0); 2420 } 2421 2422 /// Does this tree qualify as an attempt to move a bitfield into position, 2423 /// essentially "(and (shl VAL, N), Mask)". 2424 static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, 2425 bool BiggerPattern, 2426 SDValue &Src, int &ShiftAmount, 2427 int &MaskWidth) { 2428 EVT VT = Op.getValueType(); 2429 unsigned BitWidth = VT.getSizeInBits(); 2430 (void)BitWidth; 2431 assert(BitWidth == 32 || BitWidth == 64); 2432 2433 KnownBits Known = CurDAG->computeKnownBits(Op); 2434 2435 // Non-zero in the sense that they're not provably zero, which is the key 2436 // point if we want to use this value 2437 uint64_t NonZeroBits = (~Known.Zero).getZExtValue(); 2438 2439 // Discard a constant AND mask if present. It's safe because the node will 2440 // already have been factored into the computeKnownBits calculation above. 2441 uint64_t AndImm; 2442 if (isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm)) { 2443 assert((~APInt(BitWidth, AndImm) & ~Known.Zero) == 0); 2444 Op = Op.getOperand(0); 2445 } 2446 2447 // Don't match if the SHL has more than one use, since then we'll end up 2448 // generating SHL+UBFIZ instead of just keeping SHL+AND. 2449 if (!BiggerPattern && !Op.hasOneUse()) 2450 return false; 2451 2452 uint64_t ShlImm; 2453 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm)) 2454 return false; 2455 Op = Op.getOperand(0); 2456 2457 if (!isShiftedMask_64(NonZeroBits)) 2458 return false; 2459 2460 ShiftAmount = countTrailingZeros(NonZeroBits); 2461 MaskWidth = countTrailingOnes(NonZeroBits >> ShiftAmount); 2462 2463 // BFI encompasses sufficiently many nodes that it's worth inserting an extra 2464 // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL 2465 // amount. BiggerPattern is true when this pattern is being matched for BFI, 2466 // BiggerPattern is false when this pattern is being matched for UBFIZ, in 2467 // which case it is not profitable to insert an extra shift. 2468 if (ShlImm - ShiftAmount != 0 && !BiggerPattern) 2469 return false; 2470 Src = getLeftShift(CurDAG, Op, ShlImm - ShiftAmount); 2471 2472 return true; 2473 } 2474 2475 static bool isShiftedMask(uint64_t Mask, EVT VT) { 2476 assert(VT == MVT::i32 || VT == MVT::i64); 2477 if (VT == MVT::i32) 2478 return isShiftedMask_32(Mask); 2479 return isShiftedMask_64(Mask); 2480 } 2481 2482 // Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being 2483 // inserted only sets known zero bits. 2484 static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG) { 2485 assert(N->getOpcode() == ISD::OR && "Expect a OR operation"); 2486 2487 EVT VT = N->getValueType(0); 2488 if (VT != MVT::i32 && VT != MVT::i64) 2489 return false; 2490 2491 unsigned BitWidth = VT.getSizeInBits(); 2492 2493 uint64_t OrImm; 2494 if (!isOpcWithIntImmediate(N, ISD::OR, OrImm)) 2495 return false; 2496 2497 // Skip this transformation if the ORR immediate can be encoded in the ORR. 2498 // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely 2499 // performance neutral. 2500 if (AArch64_AM::isLogicalImmediate(OrImm, BitWidth)) 2501 return false; 2502 2503 uint64_t MaskImm; 2504 SDValue And = N->getOperand(0); 2505 // Must be a single use AND with an immediate operand. 2506 if (!And.hasOneUse() || 2507 !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm)) 2508 return false; 2509 2510 // Compute the Known Zero for the AND as this allows us to catch more general 2511 // cases than just looking for AND with imm. 2512 KnownBits Known = CurDAG->computeKnownBits(And); 2513 2514 // Non-zero in the sense that they're not provably zero, which is the key 2515 // point if we want to use this value. 2516 uint64_t NotKnownZero = (~Known.Zero).getZExtValue(); 2517 2518 // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00). 2519 if (!isShiftedMask(Known.Zero.getZExtValue(), VT)) 2520 return false; 2521 2522 // The bits being inserted must only set those bits that are known to be zero. 2523 if ((OrImm & NotKnownZero) != 0) { 2524 // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't 2525 // currently handle this case. 2526 return false; 2527 } 2528 2529 // BFI/BFXIL dst, src, #lsb, #width. 2530 int LSB = countTrailingOnes(NotKnownZero); 2531 int Width = BitWidth - APInt(BitWidth, NotKnownZero).countPopulation(); 2532 2533 // BFI/BFXIL is an alias of BFM, so translate to BFM operands. 2534 unsigned ImmR = (BitWidth - LSB) % BitWidth; 2535 unsigned ImmS = Width - 1; 2536 2537 // If we're creating a BFI instruction avoid cases where we need more 2538 // instructions to materialize the BFI constant as compared to the original 2539 // ORR. A BFXIL will use the same constant as the original ORR, so the code 2540 // should be no worse in this case. 2541 bool IsBFI = LSB != 0; 2542 uint64_t BFIImm = OrImm >> LSB; 2543 if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) { 2544 // We have a BFI instruction and we know the constant can't be materialized 2545 // with a ORR-immediate with the zero register. 2546 unsigned OrChunks = 0, BFIChunks = 0; 2547 for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) { 2548 if (((OrImm >> Shift) & 0xFFFF) != 0) 2549 ++OrChunks; 2550 if (((BFIImm >> Shift) & 0xFFFF) != 0) 2551 ++BFIChunks; 2552 } 2553 if (BFIChunks > OrChunks) 2554 return false; 2555 } 2556 2557 // Materialize the constant to be inserted. 2558 SDLoc DL(N); 2559 unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm; 2560 SDNode *MOVI = CurDAG->getMachineNode( 2561 MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT)); 2562 2563 // Create the BFI/BFXIL instruction. 2564 SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0), 2565 CurDAG->getTargetConstant(ImmR, DL, VT), 2566 CurDAG->getTargetConstant(ImmS, DL, VT)}; 2567 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri; 2568 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 2569 return true; 2570 } 2571 2572 static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits, 2573 SelectionDAG *CurDAG) { 2574 assert(N->getOpcode() == ISD::OR && "Expect a OR operation"); 2575 2576 EVT VT = N->getValueType(0); 2577 if (VT != MVT::i32 && VT != MVT::i64) 2578 return false; 2579 2580 unsigned BitWidth = VT.getSizeInBits(); 2581 2582 // Because of simplify-demanded-bits in DAGCombine, involved masks may not 2583 // have the expected shape. Try to undo that. 2584 2585 unsigned NumberOfIgnoredLowBits = UsefulBits.countTrailingZeros(); 2586 unsigned NumberOfIgnoredHighBits = UsefulBits.countLeadingZeros(); 2587 2588 // Given a OR operation, check if we have the following pattern 2589 // ubfm c, b, imm, imm2 (or something that does the same jobs, see 2590 // isBitfieldExtractOp) 2591 // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and 2592 // countTrailingZeros(mask2) == imm2 - imm + 1 2593 // f = d | c 2594 // if yes, replace the OR instruction with: 2595 // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2 2596 2597 // OR is commutative, check all combinations of operand order and values of 2598 // BiggerPattern, i.e. 2599 // Opd0, Opd1, BiggerPattern=false 2600 // Opd1, Opd0, BiggerPattern=false 2601 // Opd0, Opd1, BiggerPattern=true 2602 // Opd1, Opd0, BiggerPattern=true 2603 // Several of these combinations may match, so check with BiggerPattern=false 2604 // first since that will produce better results by matching more instructions 2605 // and/or inserting fewer extra instructions. 2606 for (int I = 0; I < 4; ++I) { 2607 2608 SDValue Dst, Src; 2609 unsigned ImmR, ImmS; 2610 bool BiggerPattern = I / 2; 2611 SDValue OrOpd0Val = N->getOperand(I % 2); 2612 SDNode *OrOpd0 = OrOpd0Val.getNode(); 2613 SDValue OrOpd1Val = N->getOperand((I + 1) % 2); 2614 SDNode *OrOpd1 = OrOpd1Val.getNode(); 2615 2616 unsigned BFXOpc; 2617 int DstLSB, Width; 2618 if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS, 2619 NumberOfIgnoredLowBits, BiggerPattern)) { 2620 // Check that the returned opcode is compatible with the pattern, 2621 // i.e., same type and zero extended (U and not S) 2622 if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) || 2623 (BFXOpc != AArch64::UBFMWri && VT == MVT::i32)) 2624 continue; 2625 2626 // Compute the width of the bitfield insertion 2627 DstLSB = 0; 2628 Width = ImmS - ImmR + 1; 2629 // FIXME: This constraint is to catch bitfield insertion we may 2630 // want to widen the pattern if we want to grab general bitfied 2631 // move case 2632 if (Width <= 0) 2633 continue; 2634 2635 // If the mask on the insertee is correct, we have a BFXIL operation. We 2636 // can share the ImmR and ImmS values from the already-computed UBFM. 2637 } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val, 2638 BiggerPattern, 2639 Src, DstLSB, Width)) { 2640 ImmR = (BitWidth - DstLSB) % BitWidth; 2641 ImmS = Width - 1; 2642 } else 2643 continue; 2644 2645 // Check the second part of the pattern 2646 EVT VT = OrOpd1Val.getValueType(); 2647 assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand"); 2648 2649 // Compute the Known Zero for the candidate of the first operand. 2650 // This allows to catch more general case than just looking for 2651 // AND with imm. Indeed, simplify-demanded-bits may have removed 2652 // the AND instruction because it proves it was useless. 2653 KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val); 2654 2655 // Check if there is enough room for the second operand to appear 2656 // in the first one 2657 APInt BitsToBeInserted = 2658 APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width); 2659 2660 if ((BitsToBeInserted & ~Known.Zero) != 0) 2661 continue; 2662 2663 // Set the first operand 2664 uint64_t Imm; 2665 if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) && 2666 isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT)) 2667 // In that case, we can eliminate the AND 2668 Dst = OrOpd1->getOperand(0); 2669 else 2670 // Maybe the AND has been removed by simplify-demanded-bits 2671 // or is useful because it discards more bits 2672 Dst = OrOpd1Val; 2673 2674 // both parts match 2675 SDLoc DL(N); 2676 SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT), 2677 CurDAG->getTargetConstant(ImmS, DL, VT)}; 2678 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri; 2679 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 2680 return true; 2681 } 2682 2683 // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff 2684 // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted 2685 // mask (e.g., 0x000ffff0). 2686 uint64_t Mask0Imm, Mask1Imm; 2687 SDValue And0 = N->getOperand(0); 2688 SDValue And1 = N->getOperand(1); 2689 if (And0.hasOneUse() && And1.hasOneUse() && 2690 isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) && 2691 isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) && 2692 APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) && 2693 (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) { 2694 2695 // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm), 2696 // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the 2697 // bits to be inserted. 2698 if (isShiftedMask(Mask0Imm, VT)) { 2699 std::swap(And0, And1); 2700 std::swap(Mask0Imm, Mask1Imm); 2701 } 2702 2703 SDValue Src = And1->getOperand(0); 2704 SDValue Dst = And0->getOperand(0); 2705 unsigned LSB = countTrailingZeros(Mask1Imm); 2706 int Width = BitWidth - APInt(BitWidth, Mask0Imm).countPopulation(); 2707 2708 // The BFXIL inserts the low-order bits from a source register, so right 2709 // shift the needed bits into place. 2710 SDLoc DL(N); 2711 unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri; 2712 SDNode *LSR = CurDAG->getMachineNode( 2713 ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LSB, DL, VT), 2714 CurDAG->getTargetConstant(BitWidth - 1, DL, VT)); 2715 2716 // BFXIL is an alias of BFM, so translate to BFM operands. 2717 unsigned ImmR = (BitWidth - LSB) % BitWidth; 2718 unsigned ImmS = Width - 1; 2719 2720 // Create the BFXIL instruction. 2721 SDValue Ops[] = {Dst, SDValue(LSR, 0), 2722 CurDAG->getTargetConstant(ImmR, DL, VT), 2723 CurDAG->getTargetConstant(ImmS, DL, VT)}; 2724 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri; 2725 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 2726 return true; 2727 } 2728 2729 return false; 2730 } 2731 2732 bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) { 2733 if (N->getOpcode() != ISD::OR) 2734 return false; 2735 2736 APInt NUsefulBits; 2737 getUsefulBits(SDValue(N, 0), NUsefulBits); 2738 2739 // If all bits are not useful, just return UNDEF. 2740 if (!NUsefulBits) { 2741 CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0)); 2742 return true; 2743 } 2744 2745 if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG)) 2746 return true; 2747 2748 return tryBitfieldInsertOpFromOrAndImm(N, CurDAG); 2749 } 2750 2751 /// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the 2752 /// equivalent of a left shift by a constant amount followed by an and masking 2753 /// out a contiguous set of bits. 2754 bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) { 2755 if (N->getOpcode() != ISD::AND) 2756 return false; 2757 2758 EVT VT = N->getValueType(0); 2759 if (VT != MVT::i32 && VT != MVT::i64) 2760 return false; 2761 2762 SDValue Op0; 2763 int DstLSB, Width; 2764 if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false, 2765 Op0, DstLSB, Width)) 2766 return false; 2767 2768 // ImmR is the rotate right amount. 2769 unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits(); 2770 // ImmS is the most significant bit of the source to be moved. 2771 unsigned ImmS = Width - 1; 2772 2773 SDLoc DL(N); 2774 SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT), 2775 CurDAG->getTargetConstant(ImmS, DL, VT)}; 2776 unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri; 2777 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 2778 return true; 2779 } 2780 2781 /// tryShiftAmountMod - Take advantage of built-in mod of shift amount in 2782 /// variable shift/rotate instructions. 2783 bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) { 2784 EVT VT = N->getValueType(0); 2785 2786 unsigned Opc; 2787 switch (N->getOpcode()) { 2788 case ISD::ROTR: 2789 Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr; 2790 break; 2791 case ISD::SHL: 2792 Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr; 2793 break; 2794 case ISD::SRL: 2795 Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr; 2796 break; 2797 case ISD::SRA: 2798 Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr; 2799 break; 2800 default: 2801 return false; 2802 } 2803 2804 uint64_t Size; 2805 uint64_t Bits; 2806 if (VT == MVT::i32) { 2807 Bits = 5; 2808 Size = 32; 2809 } else if (VT == MVT::i64) { 2810 Bits = 6; 2811 Size = 64; 2812 } else 2813 return false; 2814 2815 SDValue ShiftAmt = N->getOperand(1); 2816 SDLoc DL(N); 2817 SDValue NewShiftAmt; 2818 2819 // Skip over an extend of the shift amount. 2820 if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND || 2821 ShiftAmt->getOpcode() == ISD::ANY_EXTEND) 2822 ShiftAmt = ShiftAmt->getOperand(0); 2823 2824 if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) { 2825 SDValue Add0 = ShiftAmt->getOperand(0); 2826 SDValue Add1 = ShiftAmt->getOperand(1); 2827 uint64_t Add0Imm; 2828 uint64_t Add1Imm; 2829 // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X 2830 // to avoid the ADD/SUB. 2831 if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0)) 2832 NewShiftAmt = Add0; 2833 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to 2834 // generate a NEG instead of a SUB of a constant. 2835 else if (ShiftAmt->getOpcode() == ISD::SUB && 2836 isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 && 2837 (Add0Imm % Size == 0)) { 2838 unsigned NegOpc; 2839 unsigned ZeroReg; 2840 EVT SubVT = ShiftAmt->getValueType(0); 2841 if (SubVT == MVT::i32) { 2842 NegOpc = AArch64::SUBWrr; 2843 ZeroReg = AArch64::WZR; 2844 } else { 2845 assert(SubVT == MVT::i64); 2846 NegOpc = AArch64::SUBXrr; 2847 ZeroReg = AArch64::XZR; 2848 } 2849 SDValue Zero = 2850 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT); 2851 MachineSDNode *Neg = 2852 CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1); 2853 NewShiftAmt = SDValue(Neg, 0); 2854 } else 2855 return false; 2856 } else { 2857 // If the shift amount is masked with an AND, check that the mask covers the 2858 // bits that are implicitly ANDed off by the above opcodes and if so, skip 2859 // the AND. 2860 uint64_t MaskImm; 2861 if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm) && 2862 !isOpcWithIntImmediate(ShiftAmt.getNode(), AArch64ISD::ANDS, MaskImm)) 2863 return false; 2864 2865 if (countTrailingOnes(MaskImm) < Bits) 2866 return false; 2867 2868 NewShiftAmt = ShiftAmt->getOperand(0); 2869 } 2870 2871 // Narrow/widen the shift amount to match the size of the shift operation. 2872 if (VT == MVT::i32) 2873 NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt); 2874 else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) { 2875 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32); 2876 MachineSDNode *Ext = CurDAG->getMachineNode( 2877 AArch64::SUBREG_TO_REG, DL, VT, 2878 CurDAG->getTargetConstant(0, DL, MVT::i64), NewShiftAmt, SubReg); 2879 NewShiftAmt = SDValue(Ext, 0); 2880 } 2881 2882 SDValue Ops[] = {N->getOperand(0), NewShiftAmt}; 2883 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 2884 return true; 2885 } 2886 2887 bool 2888 AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, 2889 unsigned RegWidth) { 2890 APFloat FVal(0.0); 2891 if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N)) 2892 FVal = CN->getValueAPF(); 2893 else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) { 2894 // Some otherwise illegal constants are allowed in this case. 2895 if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow || 2896 !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1))) 2897 return false; 2898 2899 ConstantPoolSDNode *CN = 2900 dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)); 2901 FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF(); 2902 } else 2903 return false; 2904 2905 // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits 2906 // is between 1 and 32 for a destination w-register, or 1 and 64 for an 2907 // x-register. 2908 // 2909 // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we 2910 // want THIS_NODE to be 2^fbits. This is much easier to deal with using 2911 // integers. 2912 bool IsExact; 2913 2914 // fbits is between 1 and 64 in the worst-case, which means the fmul 2915 // could have 2^64 as an actual operand. Need 65 bits of precision. 2916 APSInt IntVal(65, true); 2917 FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact); 2918 2919 // N.b. isPowerOf2 also checks for > 0. 2920 if (!IsExact || !IntVal.isPowerOf2()) return false; 2921 unsigned FBits = IntVal.logBase2(); 2922 2923 // Checks above should have guaranteed that we haven't lost information in 2924 // finding FBits, but it must still be in range. 2925 if (FBits == 0 || FBits > RegWidth) return false; 2926 2927 FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32); 2928 return true; 2929 } 2930 2931 // Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields 2932 // of the string and obtains the integer values from them and combines these 2933 // into a single value to be used in the MRS/MSR instruction. 2934 static int getIntOperandFromRegisterString(StringRef RegString) { 2935 SmallVector<StringRef, 5> Fields; 2936 RegString.split(Fields, ':'); 2937 2938 if (Fields.size() == 1) 2939 return -1; 2940 2941 assert(Fields.size() == 5 2942 && "Invalid number of fields in read register string"); 2943 2944 SmallVector<int, 5> Ops; 2945 bool AllIntFields = true; 2946 2947 for (StringRef Field : Fields) { 2948 unsigned IntField; 2949 AllIntFields &= !Field.getAsInteger(10, IntField); 2950 Ops.push_back(IntField); 2951 } 2952 2953 assert(AllIntFields && 2954 "Unexpected non-integer value in special register string."); 2955 (void)AllIntFields; 2956 2957 // Need to combine the integer fields of the string into a single value 2958 // based on the bit encoding of MRS/MSR instruction. 2959 return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) | 2960 (Ops[3] << 3) | (Ops[4]); 2961 } 2962 2963 // Lower the read_register intrinsic to an MRS instruction node if the special 2964 // register string argument is either of the form detailed in the ALCE (the 2965 // form described in getIntOperandsFromRegsterString) or is a named register 2966 // known by the MRS SysReg mapper. 2967 bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) { 2968 const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1)); 2969 const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0)); 2970 SDLoc DL(N); 2971 2972 int Reg = getIntOperandFromRegisterString(RegString->getString()); 2973 if (Reg != -1) { 2974 ReplaceNode(N, CurDAG->getMachineNode( 2975 AArch64::MRS, DL, N->getSimpleValueType(0), MVT::Other, 2976 CurDAG->getTargetConstant(Reg, DL, MVT::i32), 2977 N->getOperand(0))); 2978 return true; 2979 } 2980 2981 // Use the sysreg mapper to map the remaining possible strings to the 2982 // value for the register to be used for the instruction operand. 2983 auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString()); 2984 if (TheReg && TheReg->Readable && 2985 TheReg->haveFeatures(Subtarget->getFeatureBits())) 2986 Reg = TheReg->Encoding; 2987 else 2988 Reg = AArch64SysReg::parseGenericRegister(RegString->getString()); 2989 2990 if (Reg != -1) { 2991 ReplaceNode(N, CurDAG->getMachineNode( 2992 AArch64::MRS, DL, N->getSimpleValueType(0), MVT::Other, 2993 CurDAG->getTargetConstant(Reg, DL, MVT::i32), 2994 N->getOperand(0))); 2995 return true; 2996 } 2997 2998 if (RegString->getString() == "pc") { 2999 ReplaceNode(N, CurDAG->getMachineNode( 3000 AArch64::ADR, DL, N->getSimpleValueType(0), MVT::Other, 3001 CurDAG->getTargetConstant(0, DL, MVT::i32), 3002 N->getOperand(0))); 3003 return true; 3004 } 3005 3006 return false; 3007 } 3008 3009 // Lower the write_register intrinsic to an MSR instruction node if the special 3010 // register string argument is either of the form detailed in the ALCE (the 3011 // form described in getIntOperandsFromRegsterString) or is a named register 3012 // known by the MSR SysReg mapper. 3013 bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) { 3014 const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1)); 3015 const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0)); 3016 SDLoc DL(N); 3017 3018 int Reg = getIntOperandFromRegisterString(RegString->getString()); 3019 if (Reg != -1) { 3020 ReplaceNode( 3021 N, CurDAG->getMachineNode(AArch64::MSR, DL, MVT::Other, 3022 CurDAG->getTargetConstant(Reg, DL, MVT::i32), 3023 N->getOperand(2), N->getOperand(0))); 3024 return true; 3025 } 3026 3027 // Check if the register was one of those allowed as the pstatefield value in 3028 // the MSR (immediate) instruction. To accept the values allowed in the 3029 // pstatefield for the MSR (immediate) instruction, we also require that an 3030 // immediate value has been provided as an argument, we know that this is 3031 // the case as it has been ensured by semantic checking. 3032 auto PMapper = AArch64PState::lookupPStateByName(RegString->getString()); 3033 if (PMapper) { 3034 assert (isa<ConstantSDNode>(N->getOperand(2)) 3035 && "Expected a constant integer expression."); 3036 unsigned Reg = PMapper->Encoding; 3037 uint64_t Immed = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(); 3038 unsigned State; 3039 if (Reg == AArch64PState::PAN || Reg == AArch64PState::UAO || Reg == AArch64PState::SSBS) { 3040 assert(Immed < 2 && "Bad imm"); 3041 State = AArch64::MSRpstateImm1; 3042 } else { 3043 assert(Immed < 16 && "Bad imm"); 3044 State = AArch64::MSRpstateImm4; 3045 } 3046 ReplaceNode(N, CurDAG->getMachineNode( 3047 State, DL, MVT::Other, 3048 CurDAG->getTargetConstant(Reg, DL, MVT::i32), 3049 CurDAG->getTargetConstant(Immed, DL, MVT::i16), 3050 N->getOperand(0))); 3051 return true; 3052 } 3053 3054 // Use the sysreg mapper to attempt to map the remaining possible strings 3055 // to the value for the register to be used for the MSR (register) 3056 // instruction operand. 3057 auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString()); 3058 if (TheReg && TheReg->Writeable && 3059 TheReg->haveFeatures(Subtarget->getFeatureBits())) 3060 Reg = TheReg->Encoding; 3061 else 3062 Reg = AArch64SysReg::parseGenericRegister(RegString->getString()); 3063 if (Reg != -1) { 3064 ReplaceNode(N, CurDAG->getMachineNode( 3065 AArch64::MSR, DL, MVT::Other, 3066 CurDAG->getTargetConstant(Reg, DL, MVT::i32), 3067 N->getOperand(2), N->getOperand(0))); 3068 return true; 3069 } 3070 3071 return false; 3072 } 3073 3074 /// We've got special pseudo-instructions for these 3075 bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) { 3076 unsigned Opcode; 3077 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT(); 3078 3079 // Leave IR for LSE if subtarget supports it. 3080 if (Subtarget->hasLSE()) return false; 3081 3082 if (MemTy == MVT::i8) 3083 Opcode = AArch64::CMP_SWAP_8; 3084 else if (MemTy == MVT::i16) 3085 Opcode = AArch64::CMP_SWAP_16; 3086 else if (MemTy == MVT::i32) 3087 Opcode = AArch64::CMP_SWAP_32; 3088 else if (MemTy == MVT::i64) 3089 Opcode = AArch64::CMP_SWAP_64; 3090 else 3091 llvm_unreachable("Unknown AtomicCmpSwap type"); 3092 3093 MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32; 3094 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3), 3095 N->getOperand(0)}; 3096 SDNode *CmpSwap = CurDAG->getMachineNode( 3097 Opcode, SDLoc(N), 3098 CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops); 3099 3100 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); 3101 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp}); 3102 3103 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0)); 3104 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2)); 3105 CurDAG->RemoveDeadNode(N); 3106 3107 return true; 3108 } 3109 3110 bool AArch64DAGToDAGISel::SelectSVE8BitLslImm(SDValue N, SDValue &Base, 3111 SDValue &Offset) { 3112 auto C = dyn_cast<ConstantSDNode>(N); 3113 if (!C) 3114 return false; 3115 3116 auto Ty = N->getValueType(0); 3117 3118 int64_t Imm = C->getSExtValue(); 3119 SDLoc DL(N); 3120 3121 if ((Imm >= -128) && (Imm <= 127)) { 3122 Base = CurDAG->getTargetConstant(Imm, DL, Ty); 3123 Offset = CurDAG->getTargetConstant(0, DL, Ty); 3124 return true; 3125 } 3126 3127 if (((Imm % 256) == 0) && (Imm >= -32768) && (Imm <= 32512)) { 3128 Base = CurDAG->getTargetConstant(Imm/256, DL, Ty); 3129 Offset = CurDAG->getTargetConstant(8, DL, Ty); 3130 return true; 3131 } 3132 3133 return false; 3134 } 3135 3136 bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift) { 3137 if (auto CNode = dyn_cast<ConstantSDNode>(N)) { 3138 const int64_t ImmVal = CNode->getSExtValue(); 3139 SDLoc DL(N); 3140 3141 switch (VT.SimpleTy) { 3142 case MVT::i8: 3143 // Can always select i8s, no shift, mask the immediate value to 3144 // deal with sign-extended value from lowering. 3145 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32); 3146 Imm = CurDAG->getTargetConstant(ImmVal & 0xFF, DL, MVT::i32); 3147 return true; 3148 case MVT::i16: 3149 // i16 values get sign-extended to 32-bits during lowering. 3150 if ((ImmVal & 0xFF) == ImmVal) { 3151 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32); 3152 Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32); 3153 return true; 3154 } else if ((ImmVal & 0xFF) == 0) { 3155 assert((ImmVal >= -32768) && (ImmVal <= 32512)); 3156 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32); 3157 Imm = CurDAG->getTargetConstant((ImmVal >> 8) & 0xFF, DL, MVT::i32); 3158 return true; 3159 } 3160 break; 3161 case MVT::i32: 3162 case MVT::i64: 3163 // Range of immediate won't trigger signedness problems for 32/64b. 3164 if ((ImmVal & 0xFF) == ImmVal) { 3165 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32); 3166 Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32); 3167 return true; 3168 } else if ((ImmVal & 0xFF00) == ImmVal) { 3169 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32); 3170 Imm = CurDAG->getTargetConstant(ImmVal >> 8, DL, MVT::i32); 3171 return true; 3172 } 3173 break; 3174 default: 3175 break; 3176 } 3177 } 3178 3179 return false; 3180 } 3181 3182 bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) { 3183 if (auto CNode = dyn_cast<ConstantSDNode>(N)) { 3184 int64_t ImmVal = CNode->getSExtValue(); 3185 SDLoc DL(N); 3186 if (ImmVal >= -128 && ImmVal < 128) { 3187 Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32); 3188 return true; 3189 } 3190 } 3191 return false; 3192 } 3193 3194 bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) { 3195 if (auto CNode = dyn_cast<ConstantSDNode>(N)) { 3196 uint64_t ImmVal = CNode->getZExtValue(); 3197 3198 switch (VT.SimpleTy) { 3199 case MVT::i8: 3200 ImmVal &= 0xFF; 3201 break; 3202 case MVT::i16: 3203 ImmVal &= 0xFFFF; 3204 break; 3205 case MVT::i32: 3206 ImmVal &= 0xFFFFFFFF; 3207 break; 3208 case MVT::i64: 3209 break; 3210 default: 3211 llvm_unreachable("Unexpected type"); 3212 } 3213 3214 if (ImmVal < 256) { 3215 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32); 3216 return true; 3217 } 3218 } 3219 return false; 3220 } 3221 3222 bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, 3223 bool Invert) { 3224 if (auto CNode = dyn_cast<ConstantSDNode>(N)) { 3225 uint64_t ImmVal = CNode->getZExtValue(); 3226 SDLoc DL(N); 3227 3228 if (Invert) 3229 ImmVal = ~ImmVal; 3230 3231 // Shift mask depending on type size. 3232 switch (VT.SimpleTy) { 3233 case MVT::i8: 3234 ImmVal &= 0xFF; 3235 ImmVal |= ImmVal << 8; 3236 ImmVal |= ImmVal << 16; 3237 ImmVal |= ImmVal << 32; 3238 break; 3239 case MVT::i16: 3240 ImmVal &= 0xFFFF; 3241 ImmVal |= ImmVal << 16; 3242 ImmVal |= ImmVal << 32; 3243 break; 3244 case MVT::i32: 3245 ImmVal &= 0xFFFFFFFF; 3246 ImmVal |= ImmVal << 32; 3247 break; 3248 case MVT::i64: 3249 break; 3250 default: 3251 llvm_unreachable("Unexpected type"); 3252 } 3253 3254 uint64_t encoding; 3255 if (AArch64_AM::processLogicalImmediate(ImmVal, 64, encoding)) { 3256 Imm = CurDAG->getTargetConstant(encoding, DL, MVT::i64); 3257 return true; 3258 } 3259 } 3260 return false; 3261 } 3262 3263 // SVE shift intrinsics allow shift amounts larger than the element's bitwidth. 3264 // Rather than attempt to normalise everything we can sometimes saturate the 3265 // shift amount during selection. This function also allows for consistent 3266 // isel patterns by ensuring the resulting "Imm" node is of the i32 type 3267 // required by the instructions. 3268 bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low, 3269 uint64_t High, bool AllowSaturation, 3270 SDValue &Imm) { 3271 if (auto *CN = dyn_cast<ConstantSDNode>(N)) { 3272 uint64_t ImmVal = CN->getZExtValue(); 3273 3274 // Reject shift amounts that are too small. 3275 if (ImmVal < Low) 3276 return false; 3277 3278 // Reject or saturate shift amounts that are too big. 3279 if (ImmVal > High) { 3280 if (!AllowSaturation) 3281 return false; 3282 ImmVal = High; 3283 } 3284 3285 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32); 3286 return true; 3287 } 3288 3289 return false; 3290 } 3291 3292 bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) { 3293 // tagp(FrameIndex, IRGstack, tag_offset): 3294 // since the offset between FrameIndex and IRGstack is a compile-time 3295 // constant, this can be lowered to a single ADDG instruction. 3296 if (!(isa<FrameIndexSDNode>(N->getOperand(1)))) { 3297 return false; 3298 } 3299 3300 SDValue IRG_SP = N->getOperand(2); 3301 if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN || 3302 cast<ConstantSDNode>(IRG_SP->getOperand(1))->getZExtValue() != 3303 Intrinsic::aarch64_irg_sp) { 3304 return false; 3305 } 3306 3307 const TargetLowering *TLI = getTargetLowering(); 3308 SDLoc DL(N); 3309 int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex(); 3310 SDValue FiOp = CurDAG->getTargetFrameIndex( 3311 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 3312 int TagOffset = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(); 3313 3314 SDNode *Out = CurDAG->getMachineNode( 3315 AArch64::TAGPstack, DL, MVT::i64, 3316 {FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2), 3317 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)}); 3318 ReplaceNode(N, Out); 3319 return true; 3320 } 3321 3322 void AArch64DAGToDAGISel::SelectTagP(SDNode *N) { 3323 assert(isa<ConstantSDNode>(N->getOperand(3)) && 3324 "llvm.aarch64.tagp third argument must be an immediate"); 3325 if (trySelectStackSlotTagP(N)) 3326 return; 3327 // FIXME: above applies in any case when offset between Op1 and Op2 is a 3328 // compile-time constant, not just for stack allocations. 3329 3330 // General case for unrelated pointers in Op1 and Op2. 3331 SDLoc DL(N); 3332 int TagOffset = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(); 3333 SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64, 3334 {N->getOperand(1), N->getOperand(2)}); 3335 SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64, 3336 {SDValue(N1, 0), N->getOperand(2)}); 3337 SDNode *N3 = CurDAG->getMachineNode( 3338 AArch64::ADDG, DL, MVT::i64, 3339 {SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64), 3340 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)}); 3341 ReplaceNode(N, N3); 3342 } 3343 3344 // NOTE: We cannot use EXTRACT_SUBREG in all cases because the fixed length 3345 // vector types larger than NEON don't have a matching SubRegIndex. 3346 static SDNode *extractSubReg(SelectionDAG *DAG, EVT VT, SDValue V) { 3347 assert(V.getValueType().isScalableVector() && 3348 V.getValueType().getSizeInBits().getKnownMinSize() == 3349 AArch64::SVEBitsPerBlock && 3350 "Expected to extract from a packed scalable vector!"); 3351 assert(VT.isFixedLengthVector() && 3352 "Expected to extract a fixed length vector!"); 3353 3354 SDLoc DL(V); 3355 switch (VT.getSizeInBits()) { 3356 case 64: { 3357 auto SubReg = DAG->getTargetConstant(AArch64::dsub, DL, MVT::i32); 3358 return DAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT, V, SubReg); 3359 } 3360 case 128: { 3361 auto SubReg = DAG->getTargetConstant(AArch64::zsub, DL, MVT::i32); 3362 return DAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT, V, SubReg); 3363 } 3364 default: { 3365 auto RC = DAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64); 3366 return DAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC); 3367 } 3368 } 3369 } 3370 3371 // NOTE: We cannot use INSERT_SUBREG in all cases because the fixed length 3372 // vector types larger than NEON don't have a matching SubRegIndex. 3373 static SDNode *insertSubReg(SelectionDAG *DAG, EVT VT, SDValue V) { 3374 assert(VT.isScalableVector() && 3375 VT.getSizeInBits().getKnownMinSize() == AArch64::SVEBitsPerBlock && 3376 "Expected to insert into a packed scalable vector!"); 3377 assert(V.getValueType().isFixedLengthVector() && 3378 "Expected to insert a fixed length vector!"); 3379 3380 SDLoc DL(V); 3381 switch (V.getValueType().getSizeInBits()) { 3382 case 64: { 3383 auto SubReg = DAG->getTargetConstant(AArch64::dsub, DL, MVT::i32); 3384 auto Container = DAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT); 3385 return DAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, VT, 3386 SDValue(Container, 0), V, SubReg); 3387 } 3388 case 128: { 3389 auto SubReg = DAG->getTargetConstant(AArch64::zsub, DL, MVT::i32); 3390 auto Container = DAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT); 3391 return DAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, VT, 3392 SDValue(Container, 0), V, SubReg); 3393 } 3394 default: { 3395 auto RC = DAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64); 3396 return DAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC); 3397 } 3398 } 3399 } 3400 3401 void AArch64DAGToDAGISel::Select(SDNode *Node) { 3402 // If we have a custom node, we already have selected! 3403 if (Node->isMachineOpcode()) { 3404 LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n"); 3405 Node->setNodeId(-1); 3406 return; 3407 } 3408 3409 // Few custom selection stuff. 3410 EVT VT = Node->getValueType(0); 3411 3412 switch (Node->getOpcode()) { 3413 default: 3414 break; 3415 3416 case ISD::ATOMIC_CMP_SWAP: 3417 if (SelectCMP_SWAP(Node)) 3418 return; 3419 break; 3420 3421 case ISD::READ_REGISTER: 3422 if (tryReadRegister(Node)) 3423 return; 3424 break; 3425 3426 case ISD::WRITE_REGISTER: 3427 if (tryWriteRegister(Node)) 3428 return; 3429 break; 3430 3431 case ISD::ADD: 3432 if (tryMLAV64LaneV128(Node)) 3433 return; 3434 break; 3435 3436 case ISD::LOAD: { 3437 // Try to select as an indexed load. Fall through to normal processing 3438 // if we can't. 3439 if (tryIndexedLoad(Node)) 3440 return; 3441 break; 3442 } 3443 3444 case ISD::SRL: 3445 case ISD::AND: 3446 case ISD::SRA: 3447 case ISD::SIGN_EXTEND_INREG: 3448 if (tryBitfieldExtractOp(Node)) 3449 return; 3450 if (tryBitfieldInsertInZeroOp(Node)) 3451 return; 3452 LLVM_FALLTHROUGH; 3453 case ISD::ROTR: 3454 case ISD::SHL: 3455 if (tryShiftAmountMod(Node)) 3456 return; 3457 break; 3458 3459 case ISD::SIGN_EXTEND: 3460 if (tryBitfieldExtractOpFromSExt(Node)) 3461 return; 3462 break; 3463 3464 case ISD::FP_EXTEND: 3465 if (tryHighFPExt(Node)) 3466 return; 3467 break; 3468 3469 case ISD::OR: 3470 if (tryBitfieldInsertOp(Node)) 3471 return; 3472 break; 3473 3474 case ISD::EXTRACT_SUBVECTOR: { 3475 // Bail when not a "cast" like extract_subvector. 3476 if (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue() != 0) 3477 break; 3478 3479 // Bail when normal isel can do the job. 3480 EVT InVT = Node->getOperand(0).getValueType(); 3481 if (VT.isScalableVector() || InVT.isFixedLengthVector()) 3482 break; 3483 3484 // NOTE: We can only get here when doing fixed length SVE code generation. 3485 // We do manual selection because the types involved are not linked to real 3486 // registers (despite being legal) and must be coerced into SVE registers. 3487 // 3488 // NOTE: If the above changes, be aware that selection will still not work 3489 // because the td definition of extract_vector does not support extracting 3490 // a fixed length vector from a scalable vector. 3491 3492 ReplaceNode(Node, extractSubReg(CurDAG, VT, Node->getOperand(0))); 3493 return; 3494 } 3495 3496 case ISD::INSERT_SUBVECTOR: { 3497 // Bail when not a "cast" like insert_subvector. 3498 if (cast<ConstantSDNode>(Node->getOperand(2))->getZExtValue() != 0) 3499 break; 3500 if (!Node->getOperand(0).isUndef()) 3501 break; 3502 3503 // Bail when normal isel should do the job. 3504 EVT InVT = Node->getOperand(1).getValueType(); 3505 if (VT.isFixedLengthVector() || InVT.isScalableVector()) 3506 break; 3507 3508 // NOTE: We can only get here when doing fixed length SVE code generation. 3509 // We do manual selection because the types involved are not linked to real 3510 // registers (despite being legal) and must be coerced into SVE registers. 3511 // 3512 // NOTE: If the above changes, be aware that selection will still not work 3513 // because the td definition of insert_vector does not support inserting a 3514 // fixed length vector into a scalable vector. 3515 3516 ReplaceNode(Node, insertSubReg(CurDAG, VT, Node->getOperand(1))); 3517 return; 3518 } 3519 3520 case ISD::Constant: { 3521 // Materialize zero constants as copies from WZR/XZR. This allows 3522 // the coalescer to propagate these into other instructions. 3523 ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node); 3524 if (ConstNode->isNullValue()) { 3525 if (VT == MVT::i32) { 3526 SDValue New = CurDAG->getCopyFromReg( 3527 CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32); 3528 ReplaceNode(Node, New.getNode()); 3529 return; 3530 } else if (VT == MVT::i64) { 3531 SDValue New = CurDAG->getCopyFromReg( 3532 CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64); 3533 ReplaceNode(Node, New.getNode()); 3534 return; 3535 } 3536 } 3537 break; 3538 } 3539 3540 case ISD::FrameIndex: { 3541 // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm. 3542 int FI = cast<FrameIndexSDNode>(Node)->getIndex(); 3543 unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0); 3544 const TargetLowering *TLI = getTargetLowering(); 3545 SDValue TFI = CurDAG->getTargetFrameIndex( 3546 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 3547 SDLoc DL(Node); 3548 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32), 3549 CurDAG->getTargetConstant(Shifter, DL, MVT::i32) }; 3550 CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops); 3551 return; 3552 } 3553 case ISD::INTRINSIC_W_CHAIN: { 3554 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); 3555 switch (IntNo) { 3556 default: 3557 break; 3558 case Intrinsic::aarch64_ldaxp: 3559 case Intrinsic::aarch64_ldxp: { 3560 unsigned Op = 3561 IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX; 3562 SDValue MemAddr = Node->getOperand(2); 3563 SDLoc DL(Node); 3564 SDValue Chain = Node->getOperand(0); 3565 3566 SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64, 3567 MVT::Other, MemAddr, Chain); 3568 3569 // Transfer memoperands. 3570 MachineMemOperand *MemOp = 3571 cast<MemIntrinsicSDNode>(Node)->getMemOperand(); 3572 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp}); 3573 ReplaceNode(Node, Ld); 3574 return; 3575 } 3576 case Intrinsic::aarch64_stlxp: 3577 case Intrinsic::aarch64_stxp: { 3578 unsigned Op = 3579 IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX; 3580 SDLoc DL(Node); 3581 SDValue Chain = Node->getOperand(0); 3582 SDValue ValLo = Node->getOperand(2); 3583 SDValue ValHi = Node->getOperand(3); 3584 SDValue MemAddr = Node->getOperand(4); 3585 3586 // Place arguments in the right order. 3587 SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain}; 3588 3589 SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops); 3590 // Transfer memoperands. 3591 MachineMemOperand *MemOp = 3592 cast<MemIntrinsicSDNode>(Node)->getMemOperand(); 3593 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 3594 3595 ReplaceNode(Node, St); 3596 return; 3597 } 3598 case Intrinsic::aarch64_neon_ld1x2: 3599 if (VT == MVT::v8i8) { 3600 SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0); 3601 return; 3602 } else if (VT == MVT::v16i8) { 3603 SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0); 3604 return; 3605 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 3606 SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0); 3607 return; 3608 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 3609 SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0); 3610 return; 3611 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3612 SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0); 3613 return; 3614 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3615 SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0); 3616 return; 3617 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3618 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0); 3619 return; 3620 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3621 SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0); 3622 return; 3623 } 3624 break; 3625 case Intrinsic::aarch64_neon_ld1x3: 3626 if (VT == MVT::v8i8) { 3627 SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0); 3628 return; 3629 } else if (VT == MVT::v16i8) { 3630 SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0); 3631 return; 3632 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 3633 SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0); 3634 return; 3635 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 3636 SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0); 3637 return; 3638 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3639 SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0); 3640 return; 3641 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3642 SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0); 3643 return; 3644 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3645 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0); 3646 return; 3647 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3648 SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0); 3649 return; 3650 } 3651 break; 3652 case Intrinsic::aarch64_neon_ld1x4: 3653 if (VT == MVT::v8i8) { 3654 SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0); 3655 return; 3656 } else if (VT == MVT::v16i8) { 3657 SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0); 3658 return; 3659 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 3660 SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0); 3661 return; 3662 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 3663 SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0); 3664 return; 3665 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3666 SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0); 3667 return; 3668 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3669 SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0); 3670 return; 3671 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3672 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0); 3673 return; 3674 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3675 SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0); 3676 return; 3677 } 3678 break; 3679 case Intrinsic::aarch64_neon_ld2: 3680 if (VT == MVT::v8i8) { 3681 SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0); 3682 return; 3683 } else if (VT == MVT::v16i8) { 3684 SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0); 3685 return; 3686 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 3687 SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0); 3688 return; 3689 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 3690 SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0); 3691 return; 3692 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3693 SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0); 3694 return; 3695 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3696 SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0); 3697 return; 3698 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3699 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0); 3700 return; 3701 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3702 SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0); 3703 return; 3704 } 3705 break; 3706 case Intrinsic::aarch64_neon_ld3: 3707 if (VT == MVT::v8i8) { 3708 SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0); 3709 return; 3710 } else if (VT == MVT::v16i8) { 3711 SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0); 3712 return; 3713 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 3714 SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0); 3715 return; 3716 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 3717 SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0); 3718 return; 3719 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3720 SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0); 3721 return; 3722 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3723 SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0); 3724 return; 3725 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3726 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0); 3727 return; 3728 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3729 SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0); 3730 return; 3731 } 3732 break; 3733 case Intrinsic::aarch64_neon_ld4: 3734 if (VT == MVT::v8i8) { 3735 SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0); 3736 return; 3737 } else if (VT == MVT::v16i8) { 3738 SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0); 3739 return; 3740 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 3741 SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0); 3742 return; 3743 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 3744 SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0); 3745 return; 3746 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3747 SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0); 3748 return; 3749 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3750 SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0); 3751 return; 3752 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3753 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0); 3754 return; 3755 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3756 SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0); 3757 return; 3758 } 3759 break; 3760 case Intrinsic::aarch64_neon_ld2r: 3761 if (VT == MVT::v8i8) { 3762 SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0); 3763 return; 3764 } else if (VT == MVT::v16i8) { 3765 SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0); 3766 return; 3767 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 3768 SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0); 3769 return; 3770 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 3771 SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0); 3772 return; 3773 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3774 SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0); 3775 return; 3776 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3777 SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0); 3778 return; 3779 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3780 SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0); 3781 return; 3782 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3783 SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0); 3784 return; 3785 } 3786 break; 3787 case Intrinsic::aarch64_neon_ld3r: 3788 if (VT == MVT::v8i8) { 3789 SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0); 3790 return; 3791 } else if (VT == MVT::v16i8) { 3792 SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0); 3793 return; 3794 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 3795 SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0); 3796 return; 3797 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 3798 SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0); 3799 return; 3800 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3801 SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0); 3802 return; 3803 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3804 SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0); 3805 return; 3806 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3807 SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0); 3808 return; 3809 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3810 SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0); 3811 return; 3812 } 3813 break; 3814 case Intrinsic::aarch64_neon_ld4r: 3815 if (VT == MVT::v8i8) { 3816 SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0); 3817 return; 3818 } else if (VT == MVT::v16i8) { 3819 SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0); 3820 return; 3821 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 3822 SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0); 3823 return; 3824 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 3825 SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0); 3826 return; 3827 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3828 SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0); 3829 return; 3830 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3831 SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0); 3832 return; 3833 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3834 SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0); 3835 return; 3836 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3837 SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0); 3838 return; 3839 } 3840 break; 3841 case Intrinsic::aarch64_neon_ld2lane: 3842 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 3843 SelectLoadLane(Node, 2, AArch64::LD2i8); 3844 return; 3845 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 3846 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 3847 SelectLoadLane(Node, 2, AArch64::LD2i16); 3848 return; 3849 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 3850 VT == MVT::v2f32) { 3851 SelectLoadLane(Node, 2, AArch64::LD2i32); 3852 return; 3853 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 3854 VT == MVT::v1f64) { 3855 SelectLoadLane(Node, 2, AArch64::LD2i64); 3856 return; 3857 } 3858 break; 3859 case Intrinsic::aarch64_neon_ld3lane: 3860 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 3861 SelectLoadLane(Node, 3, AArch64::LD3i8); 3862 return; 3863 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 3864 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 3865 SelectLoadLane(Node, 3, AArch64::LD3i16); 3866 return; 3867 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 3868 VT == MVT::v2f32) { 3869 SelectLoadLane(Node, 3, AArch64::LD3i32); 3870 return; 3871 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 3872 VT == MVT::v1f64) { 3873 SelectLoadLane(Node, 3, AArch64::LD3i64); 3874 return; 3875 } 3876 break; 3877 case Intrinsic::aarch64_neon_ld4lane: 3878 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 3879 SelectLoadLane(Node, 4, AArch64::LD4i8); 3880 return; 3881 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 3882 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 3883 SelectLoadLane(Node, 4, AArch64::LD4i16); 3884 return; 3885 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 3886 VT == MVT::v2f32) { 3887 SelectLoadLane(Node, 4, AArch64::LD4i32); 3888 return; 3889 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 3890 VT == MVT::v1f64) { 3891 SelectLoadLane(Node, 4, AArch64::LD4i64); 3892 return; 3893 } 3894 break; 3895 case Intrinsic::aarch64_ld64b: 3896 SelectLoad(Node, 8, AArch64::LD64B, AArch64::x8sub_0); 3897 return; 3898 } 3899 } break; 3900 case ISD::INTRINSIC_WO_CHAIN: { 3901 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue(); 3902 switch (IntNo) { 3903 default: 3904 break; 3905 case Intrinsic::aarch64_tagp: 3906 SelectTagP(Node); 3907 return; 3908 case Intrinsic::aarch64_neon_tbl2: 3909 SelectTable(Node, 2, 3910 VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two, 3911 false); 3912 return; 3913 case Intrinsic::aarch64_neon_tbl3: 3914 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three 3915 : AArch64::TBLv16i8Three, 3916 false); 3917 return; 3918 case Intrinsic::aarch64_neon_tbl4: 3919 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four 3920 : AArch64::TBLv16i8Four, 3921 false); 3922 return; 3923 case Intrinsic::aarch64_neon_tbx2: 3924 SelectTable(Node, 2, 3925 VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two, 3926 true); 3927 return; 3928 case Intrinsic::aarch64_neon_tbx3: 3929 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three 3930 : AArch64::TBXv16i8Three, 3931 true); 3932 return; 3933 case Intrinsic::aarch64_neon_tbx4: 3934 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four 3935 : AArch64::TBXv16i8Four, 3936 true); 3937 return; 3938 case Intrinsic::aarch64_neon_smull: 3939 case Intrinsic::aarch64_neon_umull: 3940 if (tryMULLV64LaneV128(IntNo, Node)) 3941 return; 3942 break; 3943 case Intrinsic::swift_async_context_addr: { 3944 SDLoc DL(Node); 3945 CurDAG->SelectNodeTo(Node, AArch64::SUBXri, MVT::i64, 3946 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, 3947 AArch64::FP, MVT::i64), 3948 CurDAG->getTargetConstant(8, DL, MVT::i32), 3949 CurDAG->getTargetConstant(0, DL, MVT::i32)); 3950 auto &MF = CurDAG->getMachineFunction(); 3951 MF.getFrameInfo().setFrameAddressIsTaken(true); 3952 MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true); 3953 return; 3954 } 3955 } 3956 break; 3957 } 3958 case ISD::INTRINSIC_VOID: { 3959 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); 3960 if (Node->getNumOperands() >= 3) 3961 VT = Node->getOperand(2)->getValueType(0); 3962 switch (IntNo) { 3963 default: 3964 break; 3965 case Intrinsic::aarch64_neon_st1x2: { 3966 if (VT == MVT::v8i8) { 3967 SelectStore(Node, 2, AArch64::ST1Twov8b); 3968 return; 3969 } else if (VT == MVT::v16i8) { 3970 SelectStore(Node, 2, AArch64::ST1Twov16b); 3971 return; 3972 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 3973 VT == MVT::v4bf16) { 3974 SelectStore(Node, 2, AArch64::ST1Twov4h); 3975 return; 3976 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 3977 VT == MVT::v8bf16) { 3978 SelectStore(Node, 2, AArch64::ST1Twov8h); 3979 return; 3980 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3981 SelectStore(Node, 2, AArch64::ST1Twov2s); 3982 return; 3983 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3984 SelectStore(Node, 2, AArch64::ST1Twov4s); 3985 return; 3986 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3987 SelectStore(Node, 2, AArch64::ST1Twov2d); 3988 return; 3989 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3990 SelectStore(Node, 2, AArch64::ST1Twov1d); 3991 return; 3992 } 3993 break; 3994 } 3995 case Intrinsic::aarch64_neon_st1x3: { 3996 if (VT == MVT::v8i8) { 3997 SelectStore(Node, 3, AArch64::ST1Threev8b); 3998 return; 3999 } else if (VT == MVT::v16i8) { 4000 SelectStore(Node, 3, AArch64::ST1Threev16b); 4001 return; 4002 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 4003 VT == MVT::v4bf16) { 4004 SelectStore(Node, 3, AArch64::ST1Threev4h); 4005 return; 4006 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 4007 VT == MVT::v8bf16) { 4008 SelectStore(Node, 3, AArch64::ST1Threev8h); 4009 return; 4010 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4011 SelectStore(Node, 3, AArch64::ST1Threev2s); 4012 return; 4013 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4014 SelectStore(Node, 3, AArch64::ST1Threev4s); 4015 return; 4016 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4017 SelectStore(Node, 3, AArch64::ST1Threev2d); 4018 return; 4019 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4020 SelectStore(Node, 3, AArch64::ST1Threev1d); 4021 return; 4022 } 4023 break; 4024 } 4025 case Intrinsic::aarch64_neon_st1x4: { 4026 if (VT == MVT::v8i8) { 4027 SelectStore(Node, 4, AArch64::ST1Fourv8b); 4028 return; 4029 } else if (VT == MVT::v16i8) { 4030 SelectStore(Node, 4, AArch64::ST1Fourv16b); 4031 return; 4032 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 4033 VT == MVT::v4bf16) { 4034 SelectStore(Node, 4, AArch64::ST1Fourv4h); 4035 return; 4036 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 4037 VT == MVT::v8bf16) { 4038 SelectStore(Node, 4, AArch64::ST1Fourv8h); 4039 return; 4040 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4041 SelectStore(Node, 4, AArch64::ST1Fourv2s); 4042 return; 4043 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4044 SelectStore(Node, 4, AArch64::ST1Fourv4s); 4045 return; 4046 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4047 SelectStore(Node, 4, AArch64::ST1Fourv2d); 4048 return; 4049 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4050 SelectStore(Node, 4, AArch64::ST1Fourv1d); 4051 return; 4052 } 4053 break; 4054 } 4055 case Intrinsic::aarch64_neon_st2: { 4056 if (VT == MVT::v8i8) { 4057 SelectStore(Node, 2, AArch64::ST2Twov8b); 4058 return; 4059 } else if (VT == MVT::v16i8) { 4060 SelectStore(Node, 2, AArch64::ST2Twov16b); 4061 return; 4062 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 4063 VT == MVT::v4bf16) { 4064 SelectStore(Node, 2, AArch64::ST2Twov4h); 4065 return; 4066 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 4067 VT == MVT::v8bf16) { 4068 SelectStore(Node, 2, AArch64::ST2Twov8h); 4069 return; 4070 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4071 SelectStore(Node, 2, AArch64::ST2Twov2s); 4072 return; 4073 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4074 SelectStore(Node, 2, AArch64::ST2Twov4s); 4075 return; 4076 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4077 SelectStore(Node, 2, AArch64::ST2Twov2d); 4078 return; 4079 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4080 SelectStore(Node, 2, AArch64::ST1Twov1d); 4081 return; 4082 } 4083 break; 4084 } 4085 case Intrinsic::aarch64_neon_st3: { 4086 if (VT == MVT::v8i8) { 4087 SelectStore(Node, 3, AArch64::ST3Threev8b); 4088 return; 4089 } else if (VT == MVT::v16i8) { 4090 SelectStore(Node, 3, AArch64::ST3Threev16b); 4091 return; 4092 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 4093 VT == MVT::v4bf16) { 4094 SelectStore(Node, 3, AArch64::ST3Threev4h); 4095 return; 4096 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 4097 VT == MVT::v8bf16) { 4098 SelectStore(Node, 3, AArch64::ST3Threev8h); 4099 return; 4100 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4101 SelectStore(Node, 3, AArch64::ST3Threev2s); 4102 return; 4103 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4104 SelectStore(Node, 3, AArch64::ST3Threev4s); 4105 return; 4106 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4107 SelectStore(Node, 3, AArch64::ST3Threev2d); 4108 return; 4109 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4110 SelectStore(Node, 3, AArch64::ST1Threev1d); 4111 return; 4112 } 4113 break; 4114 } 4115 case Intrinsic::aarch64_neon_st4: { 4116 if (VT == MVT::v8i8) { 4117 SelectStore(Node, 4, AArch64::ST4Fourv8b); 4118 return; 4119 } else if (VT == MVT::v16i8) { 4120 SelectStore(Node, 4, AArch64::ST4Fourv16b); 4121 return; 4122 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 4123 VT == MVT::v4bf16) { 4124 SelectStore(Node, 4, AArch64::ST4Fourv4h); 4125 return; 4126 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 4127 VT == MVT::v8bf16) { 4128 SelectStore(Node, 4, AArch64::ST4Fourv8h); 4129 return; 4130 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4131 SelectStore(Node, 4, AArch64::ST4Fourv2s); 4132 return; 4133 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4134 SelectStore(Node, 4, AArch64::ST4Fourv4s); 4135 return; 4136 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4137 SelectStore(Node, 4, AArch64::ST4Fourv2d); 4138 return; 4139 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4140 SelectStore(Node, 4, AArch64::ST1Fourv1d); 4141 return; 4142 } 4143 break; 4144 } 4145 case Intrinsic::aarch64_neon_st2lane: { 4146 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4147 SelectStoreLane(Node, 2, AArch64::ST2i8); 4148 return; 4149 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4150 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4151 SelectStoreLane(Node, 2, AArch64::ST2i16); 4152 return; 4153 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4154 VT == MVT::v2f32) { 4155 SelectStoreLane(Node, 2, AArch64::ST2i32); 4156 return; 4157 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4158 VT == MVT::v1f64) { 4159 SelectStoreLane(Node, 2, AArch64::ST2i64); 4160 return; 4161 } 4162 break; 4163 } 4164 case Intrinsic::aarch64_neon_st3lane: { 4165 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4166 SelectStoreLane(Node, 3, AArch64::ST3i8); 4167 return; 4168 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4169 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4170 SelectStoreLane(Node, 3, AArch64::ST3i16); 4171 return; 4172 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4173 VT == MVT::v2f32) { 4174 SelectStoreLane(Node, 3, AArch64::ST3i32); 4175 return; 4176 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4177 VT == MVT::v1f64) { 4178 SelectStoreLane(Node, 3, AArch64::ST3i64); 4179 return; 4180 } 4181 break; 4182 } 4183 case Intrinsic::aarch64_neon_st4lane: { 4184 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4185 SelectStoreLane(Node, 4, AArch64::ST4i8); 4186 return; 4187 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4188 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4189 SelectStoreLane(Node, 4, AArch64::ST4i16); 4190 return; 4191 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4192 VT == MVT::v2f32) { 4193 SelectStoreLane(Node, 4, AArch64::ST4i32); 4194 return; 4195 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4196 VT == MVT::v1f64) { 4197 SelectStoreLane(Node, 4, AArch64::ST4i64); 4198 return; 4199 } 4200 break; 4201 } 4202 case Intrinsic::aarch64_sve_st2: { 4203 if (VT == MVT::nxv16i8) { 4204 SelectPredicatedStore(Node, 2, 0, AArch64::ST2B, AArch64::ST2B_IMM); 4205 return; 4206 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 4207 (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) { 4208 SelectPredicatedStore(Node, 2, 1, AArch64::ST2H, AArch64::ST2H_IMM); 4209 return; 4210 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 4211 SelectPredicatedStore(Node, 2, 2, AArch64::ST2W, AArch64::ST2W_IMM); 4212 return; 4213 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 4214 SelectPredicatedStore(Node, 2, 3, AArch64::ST2D, AArch64::ST2D_IMM); 4215 return; 4216 } 4217 break; 4218 } 4219 case Intrinsic::aarch64_sve_st3: { 4220 if (VT == MVT::nxv16i8) { 4221 SelectPredicatedStore(Node, 3, 0, AArch64::ST3B, AArch64::ST3B_IMM); 4222 return; 4223 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 4224 (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) { 4225 SelectPredicatedStore(Node, 3, 1, AArch64::ST3H, AArch64::ST3H_IMM); 4226 return; 4227 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 4228 SelectPredicatedStore(Node, 3, 2, AArch64::ST3W, AArch64::ST3W_IMM); 4229 return; 4230 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 4231 SelectPredicatedStore(Node, 3, 3, AArch64::ST3D, AArch64::ST3D_IMM); 4232 return; 4233 } 4234 break; 4235 } 4236 case Intrinsic::aarch64_sve_st4: { 4237 if (VT == MVT::nxv16i8) { 4238 SelectPredicatedStore(Node, 4, 0, AArch64::ST4B, AArch64::ST4B_IMM); 4239 return; 4240 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 4241 (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) { 4242 SelectPredicatedStore(Node, 4, 1, AArch64::ST4H, AArch64::ST4H_IMM); 4243 return; 4244 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 4245 SelectPredicatedStore(Node, 4, 2, AArch64::ST4W, AArch64::ST4W_IMM); 4246 return; 4247 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 4248 SelectPredicatedStore(Node, 4, 3, AArch64::ST4D, AArch64::ST4D_IMM); 4249 return; 4250 } 4251 break; 4252 } 4253 } 4254 break; 4255 } 4256 case AArch64ISD::LD2post: { 4257 if (VT == MVT::v8i8) { 4258 SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0); 4259 return; 4260 } else if (VT == MVT::v16i8) { 4261 SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0); 4262 return; 4263 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4264 SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0); 4265 return; 4266 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4267 SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0); 4268 return; 4269 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4270 SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0); 4271 return; 4272 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4273 SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0); 4274 return; 4275 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4276 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0); 4277 return; 4278 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4279 SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0); 4280 return; 4281 } 4282 break; 4283 } 4284 case AArch64ISD::LD3post: { 4285 if (VT == MVT::v8i8) { 4286 SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0); 4287 return; 4288 } else if (VT == MVT::v16i8) { 4289 SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0); 4290 return; 4291 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4292 SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0); 4293 return; 4294 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4295 SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0); 4296 return; 4297 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4298 SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0); 4299 return; 4300 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4301 SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0); 4302 return; 4303 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4304 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0); 4305 return; 4306 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4307 SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0); 4308 return; 4309 } 4310 break; 4311 } 4312 case AArch64ISD::LD4post: { 4313 if (VT == MVT::v8i8) { 4314 SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0); 4315 return; 4316 } else if (VT == MVT::v16i8) { 4317 SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0); 4318 return; 4319 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4320 SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0); 4321 return; 4322 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4323 SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0); 4324 return; 4325 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4326 SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0); 4327 return; 4328 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4329 SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0); 4330 return; 4331 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4332 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0); 4333 return; 4334 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4335 SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0); 4336 return; 4337 } 4338 break; 4339 } 4340 case AArch64ISD::LD1x2post: { 4341 if (VT == MVT::v8i8) { 4342 SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0); 4343 return; 4344 } else if (VT == MVT::v16i8) { 4345 SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0); 4346 return; 4347 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4348 SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0); 4349 return; 4350 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4351 SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0); 4352 return; 4353 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4354 SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0); 4355 return; 4356 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4357 SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0); 4358 return; 4359 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4360 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0); 4361 return; 4362 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4363 SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0); 4364 return; 4365 } 4366 break; 4367 } 4368 case AArch64ISD::LD1x3post: { 4369 if (VT == MVT::v8i8) { 4370 SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0); 4371 return; 4372 } else if (VT == MVT::v16i8) { 4373 SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0); 4374 return; 4375 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4376 SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0); 4377 return; 4378 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4379 SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0); 4380 return; 4381 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4382 SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0); 4383 return; 4384 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4385 SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0); 4386 return; 4387 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4388 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0); 4389 return; 4390 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4391 SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0); 4392 return; 4393 } 4394 break; 4395 } 4396 case AArch64ISD::LD1x4post: { 4397 if (VT == MVT::v8i8) { 4398 SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0); 4399 return; 4400 } else if (VT == MVT::v16i8) { 4401 SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0); 4402 return; 4403 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4404 SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0); 4405 return; 4406 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4407 SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0); 4408 return; 4409 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4410 SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0); 4411 return; 4412 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4413 SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0); 4414 return; 4415 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4416 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0); 4417 return; 4418 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4419 SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0); 4420 return; 4421 } 4422 break; 4423 } 4424 case AArch64ISD::LD1DUPpost: { 4425 if (VT == MVT::v8i8) { 4426 SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0); 4427 return; 4428 } else if (VT == MVT::v16i8) { 4429 SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0); 4430 return; 4431 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4432 SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0); 4433 return; 4434 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4435 SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0); 4436 return; 4437 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4438 SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0); 4439 return; 4440 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4441 SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0); 4442 return; 4443 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4444 SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0); 4445 return; 4446 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4447 SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0); 4448 return; 4449 } 4450 break; 4451 } 4452 case AArch64ISD::LD2DUPpost: { 4453 if (VT == MVT::v8i8) { 4454 SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0); 4455 return; 4456 } else if (VT == MVT::v16i8) { 4457 SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0); 4458 return; 4459 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4460 SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0); 4461 return; 4462 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4463 SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0); 4464 return; 4465 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4466 SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0); 4467 return; 4468 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4469 SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0); 4470 return; 4471 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4472 SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0); 4473 return; 4474 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4475 SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0); 4476 return; 4477 } 4478 break; 4479 } 4480 case AArch64ISD::LD3DUPpost: { 4481 if (VT == MVT::v8i8) { 4482 SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0); 4483 return; 4484 } else if (VT == MVT::v16i8) { 4485 SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0); 4486 return; 4487 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4488 SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0); 4489 return; 4490 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4491 SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0); 4492 return; 4493 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4494 SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0); 4495 return; 4496 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4497 SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0); 4498 return; 4499 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4500 SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0); 4501 return; 4502 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4503 SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0); 4504 return; 4505 } 4506 break; 4507 } 4508 case AArch64ISD::LD4DUPpost: { 4509 if (VT == MVT::v8i8) { 4510 SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0); 4511 return; 4512 } else if (VT == MVT::v16i8) { 4513 SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0); 4514 return; 4515 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4516 SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0); 4517 return; 4518 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4519 SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0); 4520 return; 4521 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4522 SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0); 4523 return; 4524 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4525 SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0); 4526 return; 4527 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4528 SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0); 4529 return; 4530 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4531 SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0); 4532 return; 4533 } 4534 break; 4535 } 4536 case AArch64ISD::LD1LANEpost: { 4537 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4538 SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST); 4539 return; 4540 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4541 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4542 SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST); 4543 return; 4544 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4545 VT == MVT::v2f32) { 4546 SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST); 4547 return; 4548 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4549 VT == MVT::v1f64) { 4550 SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST); 4551 return; 4552 } 4553 break; 4554 } 4555 case AArch64ISD::LD2LANEpost: { 4556 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4557 SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST); 4558 return; 4559 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4560 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4561 SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST); 4562 return; 4563 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4564 VT == MVT::v2f32) { 4565 SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST); 4566 return; 4567 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4568 VT == MVT::v1f64) { 4569 SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST); 4570 return; 4571 } 4572 break; 4573 } 4574 case AArch64ISD::LD3LANEpost: { 4575 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4576 SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST); 4577 return; 4578 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4579 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4580 SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST); 4581 return; 4582 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4583 VT == MVT::v2f32) { 4584 SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST); 4585 return; 4586 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4587 VT == MVT::v1f64) { 4588 SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST); 4589 return; 4590 } 4591 break; 4592 } 4593 case AArch64ISD::LD4LANEpost: { 4594 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4595 SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST); 4596 return; 4597 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4598 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4599 SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST); 4600 return; 4601 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4602 VT == MVT::v2f32) { 4603 SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST); 4604 return; 4605 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4606 VT == MVT::v1f64) { 4607 SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST); 4608 return; 4609 } 4610 break; 4611 } 4612 case AArch64ISD::ST2post: { 4613 VT = Node->getOperand(1).getValueType(); 4614 if (VT == MVT::v8i8) { 4615 SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST); 4616 return; 4617 } else if (VT == MVT::v16i8) { 4618 SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST); 4619 return; 4620 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4621 SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST); 4622 return; 4623 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4624 SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST); 4625 return; 4626 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4627 SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST); 4628 return; 4629 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4630 SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST); 4631 return; 4632 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4633 SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST); 4634 return; 4635 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4636 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST); 4637 return; 4638 } 4639 break; 4640 } 4641 case AArch64ISD::ST3post: { 4642 VT = Node->getOperand(1).getValueType(); 4643 if (VT == MVT::v8i8) { 4644 SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST); 4645 return; 4646 } else if (VT == MVT::v16i8) { 4647 SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST); 4648 return; 4649 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4650 SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST); 4651 return; 4652 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4653 SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST); 4654 return; 4655 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4656 SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST); 4657 return; 4658 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4659 SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST); 4660 return; 4661 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4662 SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST); 4663 return; 4664 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4665 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST); 4666 return; 4667 } 4668 break; 4669 } 4670 case AArch64ISD::ST4post: { 4671 VT = Node->getOperand(1).getValueType(); 4672 if (VT == MVT::v8i8) { 4673 SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST); 4674 return; 4675 } else if (VT == MVT::v16i8) { 4676 SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST); 4677 return; 4678 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4679 SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST); 4680 return; 4681 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4682 SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST); 4683 return; 4684 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4685 SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST); 4686 return; 4687 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4688 SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST); 4689 return; 4690 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4691 SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST); 4692 return; 4693 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4694 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST); 4695 return; 4696 } 4697 break; 4698 } 4699 case AArch64ISD::ST1x2post: { 4700 VT = Node->getOperand(1).getValueType(); 4701 if (VT == MVT::v8i8) { 4702 SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST); 4703 return; 4704 } else if (VT == MVT::v16i8) { 4705 SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST); 4706 return; 4707 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4708 SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST); 4709 return; 4710 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4711 SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST); 4712 return; 4713 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4714 SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST); 4715 return; 4716 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4717 SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST); 4718 return; 4719 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4720 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST); 4721 return; 4722 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4723 SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST); 4724 return; 4725 } 4726 break; 4727 } 4728 case AArch64ISD::ST1x3post: { 4729 VT = Node->getOperand(1).getValueType(); 4730 if (VT == MVT::v8i8) { 4731 SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST); 4732 return; 4733 } else if (VT == MVT::v16i8) { 4734 SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST); 4735 return; 4736 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4737 SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST); 4738 return; 4739 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16 ) { 4740 SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST); 4741 return; 4742 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4743 SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST); 4744 return; 4745 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4746 SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST); 4747 return; 4748 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4749 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST); 4750 return; 4751 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4752 SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST); 4753 return; 4754 } 4755 break; 4756 } 4757 case AArch64ISD::ST1x4post: { 4758 VT = Node->getOperand(1).getValueType(); 4759 if (VT == MVT::v8i8) { 4760 SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST); 4761 return; 4762 } else if (VT == MVT::v16i8) { 4763 SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST); 4764 return; 4765 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4766 SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST); 4767 return; 4768 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4769 SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST); 4770 return; 4771 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4772 SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST); 4773 return; 4774 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4775 SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST); 4776 return; 4777 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4778 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST); 4779 return; 4780 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4781 SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST); 4782 return; 4783 } 4784 break; 4785 } 4786 case AArch64ISD::ST2LANEpost: { 4787 VT = Node->getOperand(1).getValueType(); 4788 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4789 SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST); 4790 return; 4791 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4792 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4793 SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST); 4794 return; 4795 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4796 VT == MVT::v2f32) { 4797 SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST); 4798 return; 4799 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4800 VT == MVT::v1f64) { 4801 SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST); 4802 return; 4803 } 4804 break; 4805 } 4806 case AArch64ISD::ST3LANEpost: { 4807 VT = Node->getOperand(1).getValueType(); 4808 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4809 SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST); 4810 return; 4811 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4812 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4813 SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST); 4814 return; 4815 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4816 VT == MVT::v2f32) { 4817 SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST); 4818 return; 4819 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4820 VT == MVT::v1f64) { 4821 SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST); 4822 return; 4823 } 4824 break; 4825 } 4826 case AArch64ISD::ST4LANEpost: { 4827 VT = Node->getOperand(1).getValueType(); 4828 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4829 SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST); 4830 return; 4831 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4832 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4833 SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST); 4834 return; 4835 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4836 VT == MVT::v2f32) { 4837 SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST); 4838 return; 4839 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4840 VT == MVT::v1f64) { 4841 SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST); 4842 return; 4843 } 4844 break; 4845 } 4846 case AArch64ISD::SVE_LD2_MERGE_ZERO: { 4847 if (VT == MVT::nxv16i8) { 4848 SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B); 4849 return; 4850 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 4851 (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) { 4852 SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H); 4853 return; 4854 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 4855 SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W); 4856 return; 4857 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 4858 SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D); 4859 return; 4860 } 4861 break; 4862 } 4863 case AArch64ISD::SVE_LD3_MERGE_ZERO: { 4864 if (VT == MVT::nxv16i8) { 4865 SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B); 4866 return; 4867 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 4868 (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) { 4869 SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H); 4870 return; 4871 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 4872 SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W); 4873 return; 4874 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 4875 SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D); 4876 return; 4877 } 4878 break; 4879 } 4880 case AArch64ISD::SVE_LD4_MERGE_ZERO: { 4881 if (VT == MVT::nxv16i8) { 4882 SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B); 4883 return; 4884 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 4885 (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) { 4886 SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H); 4887 return; 4888 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 4889 SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W); 4890 return; 4891 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 4892 SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D); 4893 return; 4894 } 4895 break; 4896 } 4897 } 4898 4899 // Select the default instruction 4900 SelectCode(Node); 4901 } 4902 4903 /// createAArch64ISelDag - This pass converts a legalized DAG into a 4904 /// AArch64-specific DAG, ready for instruction scheduling. 4905 FunctionPass *llvm::createAArch64ISelDag(AArch64TargetMachine &TM, 4906 CodeGenOpt::Level OptLevel) { 4907 return new AArch64DAGToDAGISel(TM, OptLevel); 4908 } 4909 4910 /// When \p PredVT is a scalable vector predicate in the form 4911 /// MVT::nx<M>xi1, it builds the correspondent scalable vector of 4912 /// integers MVT::nx<M>xi<bits> s.t. M x bits = 128. When targeting 4913 /// structured vectors (NumVec >1), the output data type is 4914 /// MVT::nx<M*NumVec>xi<bits> s.t. M x bits = 128. If the input 4915 /// PredVT is not in the form MVT::nx<M>xi1, it returns an invalid 4916 /// EVT. 4917 static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT, 4918 unsigned NumVec) { 4919 assert(NumVec > 0 && NumVec < 5 && "Invalid number of vectors."); 4920 if (!PredVT.isScalableVector() || PredVT.getVectorElementType() != MVT::i1) 4921 return EVT(); 4922 4923 if (PredVT != MVT::nxv16i1 && PredVT != MVT::nxv8i1 && 4924 PredVT != MVT::nxv4i1 && PredVT != MVT::nxv2i1) 4925 return EVT(); 4926 4927 ElementCount EC = PredVT.getVectorElementCount(); 4928 EVT ScalarVT = 4929 EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.getKnownMinValue()); 4930 EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, EC * NumVec); 4931 4932 return MemVT; 4933 } 4934 4935 /// Return the EVT of the data associated to a memory operation in \p 4936 /// Root. If such EVT cannot be retrived, it returns an invalid EVT. 4937 static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root) { 4938 if (isa<MemSDNode>(Root)) 4939 return cast<MemSDNode>(Root)->getMemoryVT(); 4940 4941 if (isa<MemIntrinsicSDNode>(Root)) 4942 return cast<MemIntrinsicSDNode>(Root)->getMemoryVT(); 4943 4944 const unsigned Opcode = Root->getOpcode(); 4945 // For custom ISD nodes, we have to look at them individually to extract the 4946 // type of the data moved to/from memory. 4947 switch (Opcode) { 4948 case AArch64ISD::LD1_MERGE_ZERO: 4949 case AArch64ISD::LD1S_MERGE_ZERO: 4950 case AArch64ISD::LDNF1_MERGE_ZERO: 4951 case AArch64ISD::LDNF1S_MERGE_ZERO: 4952 return cast<VTSDNode>(Root->getOperand(3))->getVT(); 4953 case AArch64ISD::ST1_PRED: 4954 return cast<VTSDNode>(Root->getOperand(4))->getVT(); 4955 case AArch64ISD::SVE_LD2_MERGE_ZERO: 4956 return getPackedVectorTypeFromPredicateType( 4957 Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/2); 4958 case AArch64ISD::SVE_LD3_MERGE_ZERO: 4959 return getPackedVectorTypeFromPredicateType( 4960 Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/3); 4961 case AArch64ISD::SVE_LD4_MERGE_ZERO: 4962 return getPackedVectorTypeFromPredicateType( 4963 Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/4); 4964 default: 4965 break; 4966 } 4967 4968 if (Opcode != ISD::INTRINSIC_VOID) 4969 return EVT(); 4970 4971 const unsigned IntNo = 4972 cast<ConstantSDNode>(Root->getOperand(1))->getZExtValue(); 4973 if (IntNo != Intrinsic::aarch64_sve_prf) 4974 return EVT(); 4975 4976 // We are using an SVE prefetch intrinsic. Type must be inferred 4977 // from the width of the predicate. 4978 return getPackedVectorTypeFromPredicateType( 4979 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/1); 4980 } 4981 4982 /// SelectAddrModeIndexedSVE - Attempt selection of the addressing mode: 4983 /// Base + OffImm * sizeof(MemVT) for Min >= OffImm <= Max 4984 /// where Root is the memory access using N for its address. 4985 template <int64_t Min, int64_t Max> 4986 bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, 4987 SDValue &Base, 4988 SDValue &OffImm) { 4989 const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root); 4990 4991 if (MemVT == EVT()) 4992 return false; 4993 4994 if (N.getOpcode() != ISD::ADD) 4995 return false; 4996 4997 SDValue VScale = N.getOperand(1); 4998 if (VScale.getOpcode() != ISD::VSCALE) 4999 return false; 5000 5001 TypeSize TS = MemVT.getSizeInBits(); 5002 int64_t MemWidthBytes = static_cast<int64_t>(TS.getKnownMinSize()) / 8; 5003 int64_t MulImm = cast<ConstantSDNode>(VScale.getOperand(0))->getSExtValue(); 5004 5005 if ((MulImm % MemWidthBytes) != 0) 5006 return false; 5007 5008 int64_t Offset = MulImm / MemWidthBytes; 5009 if (Offset < Min || Offset > Max) 5010 return false; 5011 5012 Base = N.getOperand(0); 5013 OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64); 5014 return true; 5015 } 5016 5017 /// Select register plus register addressing mode for SVE, with scaled 5018 /// offset. 5019 bool AArch64DAGToDAGISel::SelectSVERegRegAddrMode(SDValue N, unsigned Scale, 5020 SDValue &Base, 5021 SDValue &Offset) { 5022 if (N.getOpcode() != ISD::ADD) 5023 return false; 5024 5025 // Process an ADD node. 5026 const SDValue LHS = N.getOperand(0); 5027 const SDValue RHS = N.getOperand(1); 5028 5029 // 8 bit data does not come with the SHL node, so it is treated 5030 // separately. 5031 if (Scale == 0) { 5032 Base = LHS; 5033 Offset = RHS; 5034 return true; 5035 } 5036 5037 if (auto C = dyn_cast<ConstantSDNode>(RHS)) { 5038 int64_t ImmOff = C->getSExtValue(); 5039 unsigned Size = 1 << Scale; 5040 5041 // To use the reg+reg addressing mode, the immediate must be a multiple of 5042 // the vector element's byte size. 5043 if (ImmOff % Size) 5044 return false; 5045 5046 SDLoc DL(N); 5047 Base = LHS; 5048 Offset = CurDAG->getTargetConstant(ImmOff >> Scale, DL, MVT::i64); 5049 SDValue Ops[] = {Offset}; 5050 SDNode *MI = CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops); 5051 Offset = SDValue(MI, 0); 5052 return true; 5053 } 5054 5055 // Check if the RHS is a shift node with a constant. 5056 if (RHS.getOpcode() != ISD::SHL) 5057 return false; 5058 5059 const SDValue ShiftRHS = RHS.getOperand(1); 5060 if (auto *C = dyn_cast<ConstantSDNode>(ShiftRHS)) 5061 if (C->getZExtValue() == Scale) { 5062 Base = LHS; 5063 Offset = RHS.getOperand(0); 5064 return true; 5065 } 5066 5067 return false; 5068 } 5069 5070 bool AArch64DAGToDAGISel::SelectAllActivePredicate(SDValue N) { 5071 const AArch64TargetLowering *TLI = 5072 static_cast<const AArch64TargetLowering *>(getTargetLowering()); 5073 5074 return TLI->isAllActivePredicate(N); 5075 } 5076