1 //===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines an instruction selector for the AArch64 target. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AArch64MachineFunctionInfo.h" 14 #include "AArch64TargetMachine.h" 15 #include "MCTargetDesc/AArch64AddressingModes.h" 16 #include "llvm/ADT/APSInt.h" 17 #include "llvm/CodeGen/ISDOpcodes.h" 18 #include "llvm/CodeGen/SelectionDAGISel.h" 19 #include "llvm/IR/Function.h" // To access function attributes. 20 #include "llvm/IR/GlobalValue.h" 21 #include "llvm/IR/Intrinsics.h" 22 #include "llvm/IR/IntrinsicsAArch64.h" 23 #include "llvm/Support/Debug.h" 24 #include "llvm/Support/ErrorHandling.h" 25 #include "llvm/Support/KnownBits.h" 26 #include "llvm/Support/MathExtras.h" 27 #include "llvm/Support/raw_ostream.h" 28 29 using namespace llvm; 30 31 #define DEBUG_TYPE "aarch64-isel" 32 #define PASS_NAME "AArch64 Instruction Selection" 33 34 //===--------------------------------------------------------------------===// 35 /// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine 36 /// instructions for SelectionDAG operations. 37 /// 38 namespace { 39 40 class AArch64DAGToDAGISel : public SelectionDAGISel { 41 42 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can 43 /// make the right decision when generating code for different targets. 44 const AArch64Subtarget *Subtarget; 45 46 public: 47 static char ID; 48 49 AArch64DAGToDAGISel() = delete; 50 51 explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm, 52 CodeGenOptLevel OptLevel) 53 : SelectionDAGISel(ID, tm, OptLevel), Subtarget(nullptr) {} 54 55 bool runOnMachineFunction(MachineFunction &MF) override { 56 Subtarget = &MF.getSubtarget<AArch64Subtarget>(); 57 return SelectionDAGISel::runOnMachineFunction(MF); 58 } 59 60 void Select(SDNode *Node) override; 61 62 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 63 /// inline asm expressions. 64 bool SelectInlineAsmMemoryOperand(const SDValue &Op, 65 InlineAsm::ConstraintCode ConstraintID, 66 std::vector<SDValue> &OutOps) override; 67 68 template <signed Low, signed High, signed Scale> 69 bool SelectRDVLImm(SDValue N, SDValue &Imm); 70 71 bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift); 72 bool SelectArithUXTXRegister(SDValue N, SDValue &Reg, SDValue &Shift); 73 bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift); 74 bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift); 75 bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) { 76 return SelectShiftedRegister(N, false, Reg, Shift); 77 } 78 bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) { 79 return SelectShiftedRegister(N, true, Reg, Shift); 80 } 81 bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) { 82 return SelectAddrModeIndexed7S(N, 1, Base, OffImm); 83 } 84 bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) { 85 return SelectAddrModeIndexed7S(N, 2, Base, OffImm); 86 } 87 bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) { 88 return SelectAddrModeIndexed7S(N, 4, Base, OffImm); 89 } 90 bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) { 91 return SelectAddrModeIndexed7S(N, 8, Base, OffImm); 92 } 93 bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) { 94 return SelectAddrModeIndexed7S(N, 16, Base, OffImm); 95 } 96 bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) { 97 return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm); 98 } 99 bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) { 100 return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm); 101 } 102 bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) { 103 return SelectAddrModeIndexed(N, 1, Base, OffImm); 104 } 105 bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) { 106 return SelectAddrModeIndexed(N, 2, Base, OffImm); 107 } 108 bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) { 109 return SelectAddrModeIndexed(N, 4, Base, OffImm); 110 } 111 bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) { 112 return SelectAddrModeIndexed(N, 8, Base, OffImm); 113 } 114 bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) { 115 return SelectAddrModeIndexed(N, 16, Base, OffImm); 116 } 117 bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) { 118 return SelectAddrModeUnscaled(N, 1, Base, OffImm); 119 } 120 bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) { 121 return SelectAddrModeUnscaled(N, 2, Base, OffImm); 122 } 123 bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) { 124 return SelectAddrModeUnscaled(N, 4, Base, OffImm); 125 } 126 bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) { 127 return SelectAddrModeUnscaled(N, 8, Base, OffImm); 128 } 129 bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) { 130 return SelectAddrModeUnscaled(N, 16, Base, OffImm); 131 } 132 template <unsigned Size, unsigned Max> 133 bool SelectAddrModeIndexedUImm(SDValue N, SDValue &Base, SDValue &OffImm) { 134 // Test if there is an appropriate addressing mode and check if the 135 // immediate fits. 136 bool Found = SelectAddrModeIndexed(N, Size, Base, OffImm); 137 if (Found) { 138 if (auto *CI = dyn_cast<ConstantSDNode>(OffImm)) { 139 int64_t C = CI->getSExtValue(); 140 if (C <= Max) 141 return true; 142 } 143 } 144 145 // Otherwise, base only, materialize address in register. 146 Base = N; 147 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64); 148 return true; 149 } 150 151 template<int Width> 152 bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset, 153 SDValue &SignExtend, SDValue &DoShift) { 154 return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift); 155 } 156 157 template<int Width> 158 bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset, 159 SDValue &SignExtend, SDValue &DoShift) { 160 return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift); 161 } 162 163 bool SelectExtractHigh(SDValue N, SDValue &Res) { 164 if (Subtarget->isLittleEndian() && N->getOpcode() == ISD::BITCAST) 165 N = N->getOperand(0); 166 if (N->getOpcode() != ISD::EXTRACT_SUBVECTOR || 167 !isa<ConstantSDNode>(N->getOperand(1))) 168 return false; 169 EVT VT = N->getValueType(0); 170 EVT LVT = N->getOperand(0).getValueType(); 171 unsigned Index = N->getConstantOperandVal(1); 172 if (!VT.is64BitVector() || !LVT.is128BitVector() || 173 Index != VT.getVectorNumElements()) 174 return false; 175 Res = N->getOperand(0); 176 return true; 177 } 178 179 bool SelectRoundingVLShr(SDValue N, SDValue &Res1, SDValue &Res2) { 180 if (N.getOpcode() != AArch64ISD::VLSHR) 181 return false; 182 SDValue Op = N->getOperand(0); 183 EVT VT = Op.getValueType(); 184 unsigned ShtAmt = N->getConstantOperandVal(1); 185 if (ShtAmt > VT.getScalarSizeInBits() / 2 || Op.getOpcode() != ISD::ADD) 186 return false; 187 188 APInt Imm; 189 if (Op.getOperand(1).getOpcode() == AArch64ISD::MOVIshift) 190 Imm = APInt(VT.getScalarSizeInBits(), 191 Op.getOperand(1).getConstantOperandVal(0) 192 << Op.getOperand(1).getConstantOperandVal(1)); 193 else if (Op.getOperand(1).getOpcode() == AArch64ISD::DUP && 194 isa<ConstantSDNode>(Op.getOperand(1).getOperand(0))) 195 Imm = APInt(VT.getScalarSizeInBits(), 196 Op.getOperand(1).getConstantOperandVal(0)); 197 else 198 return false; 199 200 if (Imm != 1ULL << (ShtAmt - 1)) 201 return false; 202 203 Res1 = Op.getOperand(0); 204 Res2 = CurDAG->getTargetConstant(ShtAmt, SDLoc(N), MVT::i32); 205 return true; 206 } 207 208 bool SelectDupZeroOrUndef(SDValue N) { 209 switch(N->getOpcode()) { 210 case ISD::UNDEF: 211 return true; 212 case AArch64ISD::DUP: 213 case ISD::SPLAT_VECTOR: { 214 auto Opnd0 = N->getOperand(0); 215 if (isNullConstant(Opnd0)) 216 return true; 217 if (isNullFPConstant(Opnd0)) 218 return true; 219 break; 220 } 221 default: 222 break; 223 } 224 225 return false; 226 } 227 228 bool SelectDupZero(SDValue N) { 229 switch(N->getOpcode()) { 230 case AArch64ISD::DUP: 231 case ISD::SPLAT_VECTOR: { 232 auto Opnd0 = N->getOperand(0); 233 if (isNullConstant(Opnd0)) 234 return true; 235 if (isNullFPConstant(Opnd0)) 236 return true; 237 break; 238 } 239 } 240 241 return false; 242 } 243 244 bool SelectDupNegativeZero(SDValue N) { 245 switch(N->getOpcode()) { 246 case AArch64ISD::DUP: 247 case ISD::SPLAT_VECTOR: { 248 ConstantFPSDNode *Const = dyn_cast<ConstantFPSDNode>(N->getOperand(0)); 249 return Const && Const->isZero() && Const->isNegative(); 250 } 251 } 252 253 return false; 254 } 255 256 template<MVT::SimpleValueType VT> 257 bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) { 258 return SelectSVEAddSubImm(N, VT, Imm, Shift); 259 } 260 261 template <MVT::SimpleValueType VT> 262 bool SelectSVECpyDupImm(SDValue N, SDValue &Imm, SDValue &Shift) { 263 return SelectSVECpyDupImm(N, VT, Imm, Shift); 264 } 265 266 template <MVT::SimpleValueType VT, bool Invert = false> 267 bool SelectSVELogicalImm(SDValue N, SDValue &Imm) { 268 return SelectSVELogicalImm(N, VT, Imm, Invert); 269 } 270 271 template <MVT::SimpleValueType VT> 272 bool SelectSVEArithImm(SDValue N, SDValue &Imm) { 273 return SelectSVEArithImm(N, VT, Imm); 274 } 275 276 template <unsigned Low, unsigned High, bool AllowSaturation = false> 277 bool SelectSVEShiftImm(SDValue N, SDValue &Imm) { 278 return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm); 279 } 280 281 bool SelectSVEShiftSplatImmR(SDValue N, SDValue &Imm) { 282 if (N->getOpcode() != ISD::SPLAT_VECTOR) 283 return false; 284 285 EVT EltVT = N->getValueType(0).getVectorElementType(); 286 return SelectSVEShiftImm(N->getOperand(0), /* Low */ 1, 287 /* High */ EltVT.getFixedSizeInBits(), 288 /* AllowSaturation */ true, Imm); 289 } 290 291 // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N. 292 template<signed Min, signed Max, signed Scale, bool Shift> 293 bool SelectCntImm(SDValue N, SDValue &Imm) { 294 if (!isa<ConstantSDNode>(N)) 295 return false; 296 297 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue(); 298 if (Shift) 299 MulImm = 1LL << MulImm; 300 301 if ((MulImm % std::abs(Scale)) != 0) 302 return false; 303 304 MulImm /= Scale; 305 if ((MulImm >= Min) && (MulImm <= Max)) { 306 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32); 307 return true; 308 } 309 310 return false; 311 } 312 313 template <signed Max, signed Scale> 314 bool SelectEXTImm(SDValue N, SDValue &Imm) { 315 if (!isa<ConstantSDNode>(N)) 316 return false; 317 318 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue(); 319 320 if (MulImm >= 0 && MulImm <= Max) { 321 MulImm *= Scale; 322 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32); 323 return true; 324 } 325 326 return false; 327 } 328 329 template <unsigned BaseReg, unsigned Max> 330 bool ImmToReg(SDValue N, SDValue &Imm) { 331 if (auto *CI = dyn_cast<ConstantSDNode>(N)) { 332 uint64_t C = CI->getZExtValue(); 333 334 if (C > Max) 335 return false; 336 337 Imm = CurDAG->getRegister(BaseReg + C, MVT::Other); 338 return true; 339 } 340 return false; 341 } 342 343 /// Form sequences of consecutive 64/128-bit registers for use in NEON 344 /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have 345 /// between 1 and 4 elements. If it contains a single element that is returned 346 /// unchanged; otherwise a REG_SEQUENCE value is returned. 347 SDValue createDTuple(ArrayRef<SDValue> Vecs); 348 SDValue createQTuple(ArrayRef<SDValue> Vecs); 349 // Form a sequence of SVE registers for instructions using list of vectors, 350 // e.g. structured loads and stores (ldN, stN). 351 SDValue createZTuple(ArrayRef<SDValue> Vecs); 352 353 // Similar to above, except the register must start at a multiple of the 354 // tuple, e.g. z2 for a 2-tuple, or z8 for a 4-tuple. 355 SDValue createZMulTuple(ArrayRef<SDValue> Regs); 356 357 /// Generic helper for the createDTuple/createQTuple 358 /// functions. Those should almost always be called instead. 359 SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[], 360 const unsigned SubRegs[]); 361 362 void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt); 363 364 bool tryIndexedLoad(SDNode *N); 365 366 bool trySelectStackSlotTagP(SDNode *N); 367 void SelectTagP(SDNode *N); 368 369 void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc, 370 unsigned SubRegIdx); 371 void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc, 372 unsigned SubRegIdx); 373 void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc); 374 void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc); 375 void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale, 376 unsigned Opc_rr, unsigned Opc_ri, 377 bool IsIntr = false); 378 void SelectContiguousMultiVectorLoad(SDNode *N, unsigned NumVecs, 379 unsigned Scale, unsigned Opc_ri, 380 unsigned Opc_rr); 381 void SelectDestructiveMultiIntrinsic(SDNode *N, unsigned NumVecs, 382 bool IsZmMulti, unsigned Opcode, 383 bool HasPred = false); 384 void SelectPExtPair(SDNode *N, unsigned Opc); 385 void SelectWhilePair(SDNode *N, unsigned Opc); 386 void SelectCVTIntrinsic(SDNode *N, unsigned NumVecs, unsigned Opcode); 387 void SelectClamp(SDNode *N, unsigned NumVecs, unsigned Opcode); 388 void SelectUnaryMultiIntrinsic(SDNode *N, unsigned NumOutVecs, 389 bool IsTupleInput, unsigned Opc); 390 void SelectFrintFromVT(SDNode *N, unsigned NumVecs, unsigned Opcode); 391 392 template <unsigned MaxIdx, unsigned Scale> 393 void SelectMultiVectorMove(SDNode *N, unsigned NumVecs, unsigned BaseReg, 394 unsigned Op); 395 396 bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm); 397 /// SVE Reg+Imm addressing mode. 398 template <int64_t Min, int64_t Max> 399 bool SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base, 400 SDValue &OffImm); 401 /// SVE Reg+Reg address mode. 402 template <unsigned Scale> 403 bool SelectSVERegRegAddrMode(SDValue N, SDValue &Base, SDValue &Offset) { 404 return SelectSVERegRegAddrMode(N, Scale, Base, Offset); 405 } 406 407 void SelectMultiVectorLuti(SDNode *Node, unsigned NumOutVecs, unsigned Opc, 408 uint32_t MaxImm); 409 410 template <unsigned MaxIdx, unsigned Scale> 411 bool SelectSMETileSlice(SDValue N, SDValue &Vector, SDValue &Offset) { 412 return SelectSMETileSlice(N, MaxIdx, Vector, Offset, Scale); 413 } 414 415 void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc); 416 void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc); 417 void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc); 418 void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc); 419 void SelectPredicatedStore(SDNode *N, unsigned NumVecs, unsigned Scale, 420 unsigned Opc_rr, unsigned Opc_ri); 421 std::tuple<unsigned, SDValue, SDValue> 422 findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri, 423 const SDValue &OldBase, const SDValue &OldOffset, 424 unsigned Scale); 425 426 bool tryBitfieldExtractOp(SDNode *N); 427 bool tryBitfieldExtractOpFromSExt(SDNode *N); 428 bool tryBitfieldInsertOp(SDNode *N); 429 bool tryBitfieldInsertInZeroOp(SDNode *N); 430 bool tryShiftAmountMod(SDNode *N); 431 432 bool tryReadRegister(SDNode *N); 433 bool tryWriteRegister(SDNode *N); 434 435 bool trySelectCastFixedLengthToScalableVector(SDNode *N); 436 bool trySelectCastScalableToFixedLengthVector(SDNode *N); 437 438 bool trySelectXAR(SDNode *N); 439 440 // Include the pieces autogenerated from the target description. 441 #include "AArch64GenDAGISel.inc" 442 443 private: 444 bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg, 445 SDValue &Shift); 446 bool SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg, SDValue &Shift); 447 bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base, 448 SDValue &OffImm) { 449 return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm); 450 } 451 bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW, 452 unsigned Size, SDValue &Base, 453 SDValue &OffImm); 454 bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base, 455 SDValue &OffImm); 456 bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base, 457 SDValue &OffImm); 458 bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base, 459 SDValue &Offset, SDValue &SignExtend, 460 SDValue &DoShift); 461 bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base, 462 SDValue &Offset, SDValue &SignExtend, 463 SDValue &DoShift); 464 bool isWorthFoldingALU(SDValue V, bool LSL = false) const; 465 bool isWorthFoldingAddr(SDValue V) const; 466 bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend, 467 SDValue &Offset, SDValue &SignExtend); 468 469 template<unsigned RegWidth> 470 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) { 471 return SelectCVTFixedPosOperand(N, FixedPos, RegWidth); 472 } 473 474 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width); 475 476 template<unsigned RegWidth> 477 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos) { 478 return SelectCVTFixedPosRecipOperand(N, FixedPos, RegWidth); 479 } 480 481 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos, 482 unsigned Width); 483 484 bool SelectCMP_SWAP(SDNode *N); 485 486 bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift); 487 bool SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift); 488 bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert); 489 490 bool SelectSVESignedArithImm(SDValue N, SDValue &Imm); 491 bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High, 492 bool AllowSaturation, SDValue &Imm); 493 494 bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm); 495 bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base, 496 SDValue &Offset); 497 bool SelectSMETileSlice(SDValue N, unsigned MaxSize, SDValue &Vector, 498 SDValue &Offset, unsigned Scale = 1); 499 500 bool SelectAllActivePredicate(SDValue N); 501 bool SelectAnyPredicate(SDValue N); 502 }; 503 } // end anonymous namespace 504 505 char AArch64DAGToDAGISel::ID = 0; 506 507 INITIALIZE_PASS(AArch64DAGToDAGISel, DEBUG_TYPE, PASS_NAME, false, false) 508 509 /// isIntImmediate - This method tests to see if the node is a constant 510 /// operand. If so Imm will receive the 32-bit value. 511 static bool isIntImmediate(const SDNode *N, uint64_t &Imm) { 512 if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) { 513 Imm = C->getZExtValue(); 514 return true; 515 } 516 return false; 517 } 518 519 // isIntImmediate - This method tests to see if a constant operand. 520 // If so Imm will receive the value. 521 static bool isIntImmediate(SDValue N, uint64_t &Imm) { 522 return isIntImmediate(N.getNode(), Imm); 523 } 524 525 // isOpcWithIntImmediate - This method tests to see if the node is a specific 526 // opcode and that it has a immediate integer right operand. 527 // If so Imm will receive the 32 bit value. 528 static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, 529 uint64_t &Imm) { 530 return N->getOpcode() == Opc && 531 isIntImmediate(N->getOperand(1).getNode(), Imm); 532 } 533 534 // isIntImmediateEq - This method tests to see if N is a constant operand that 535 // is equivalent to 'ImmExpected'. 536 #ifndef NDEBUG 537 static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected) { 538 uint64_t Imm; 539 if (!isIntImmediate(N.getNode(), Imm)) 540 return false; 541 return Imm == ImmExpected; 542 } 543 #endif 544 545 bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand( 546 const SDValue &Op, const InlineAsm::ConstraintCode ConstraintID, 547 std::vector<SDValue> &OutOps) { 548 switch(ConstraintID) { 549 default: 550 llvm_unreachable("Unexpected asm memory constraint"); 551 case InlineAsm::ConstraintCode::m: 552 case InlineAsm::ConstraintCode::o: 553 case InlineAsm::ConstraintCode::Q: 554 // We need to make sure that this one operand does not end up in XZR, thus 555 // require the address to be in a PointerRegClass register. 556 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo(); 557 const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF); 558 SDLoc dl(Op); 559 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64); 560 SDValue NewOp = 561 SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, 562 dl, Op.getValueType(), 563 Op, RC), 0); 564 OutOps.push_back(NewOp); 565 return false; 566 } 567 return true; 568 } 569 570 /// SelectArithImmed - Select an immediate value that can be represented as 571 /// a 12-bit value shifted left by either 0 or 12. If so, return true with 572 /// Val set to the 12-bit value and Shift set to the shifter operand. 573 bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val, 574 SDValue &Shift) { 575 // This function is called from the addsub_shifted_imm ComplexPattern, 576 // which lists [imm] as the list of opcode it's interested in, however 577 // we still need to check whether the operand is actually an immediate 578 // here because the ComplexPattern opcode list is only used in 579 // root-level opcode matching. 580 if (!isa<ConstantSDNode>(N.getNode())) 581 return false; 582 583 uint64_t Immed = N.getNode()->getAsZExtVal(); 584 unsigned ShiftAmt; 585 586 if (Immed >> 12 == 0) { 587 ShiftAmt = 0; 588 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) { 589 ShiftAmt = 12; 590 Immed = Immed >> 12; 591 } else 592 return false; 593 594 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt); 595 SDLoc dl(N); 596 Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32); 597 Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32); 598 return true; 599 } 600 601 /// SelectNegArithImmed - As above, but negates the value before trying to 602 /// select it. 603 bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val, 604 SDValue &Shift) { 605 // This function is called from the addsub_shifted_imm ComplexPattern, 606 // which lists [imm] as the list of opcode it's interested in, however 607 // we still need to check whether the operand is actually an immediate 608 // here because the ComplexPattern opcode list is only used in 609 // root-level opcode matching. 610 if (!isa<ConstantSDNode>(N.getNode())) 611 return false; 612 613 // The immediate operand must be a 24-bit zero-extended immediate. 614 uint64_t Immed = N.getNode()->getAsZExtVal(); 615 616 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0" 617 // have the opposite effect on the C flag, so this pattern mustn't match under 618 // those circumstances. 619 if (Immed == 0) 620 return false; 621 622 if (N.getValueType() == MVT::i32) 623 Immed = ~((uint32_t)Immed) + 1; 624 else 625 Immed = ~Immed + 1ULL; 626 if (Immed & 0xFFFFFFFFFF000000ULL) 627 return false; 628 629 Immed &= 0xFFFFFFULL; 630 return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val, 631 Shift); 632 } 633 634 /// getShiftTypeForNode - Translate a shift node to the corresponding 635 /// ShiftType value. 636 static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) { 637 switch (N.getOpcode()) { 638 default: 639 return AArch64_AM::InvalidShiftExtend; 640 case ISD::SHL: 641 return AArch64_AM::LSL; 642 case ISD::SRL: 643 return AArch64_AM::LSR; 644 case ISD::SRA: 645 return AArch64_AM::ASR; 646 case ISD::ROTR: 647 return AArch64_AM::ROR; 648 } 649 } 650 651 /// Determine whether it is worth it to fold SHL into the addressing 652 /// mode. 653 static bool isWorthFoldingSHL(SDValue V) { 654 assert(V.getOpcode() == ISD::SHL && "invalid opcode"); 655 // It is worth folding logical shift of up to three places. 656 auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1)); 657 if (!CSD) 658 return false; 659 unsigned ShiftVal = CSD->getZExtValue(); 660 if (ShiftVal > 3) 661 return false; 662 663 // Check if this particular node is reused in any non-memory related 664 // operation. If yes, do not try to fold this node into the address 665 // computation, since the computation will be kept. 666 const SDNode *Node = V.getNode(); 667 for (SDNode *UI : Node->uses()) 668 if (!isa<MemSDNode>(*UI)) 669 for (SDNode *UII : UI->uses()) 670 if (!isa<MemSDNode>(*UII)) 671 return false; 672 return true; 673 } 674 675 /// Determine whether it is worth to fold V into an extended register addressing 676 /// mode. 677 bool AArch64DAGToDAGISel::isWorthFoldingAddr(SDValue V) const { 678 // Trivial if we are optimizing for code size or if there is only 679 // one use of the value. 680 if (CurDAG->shouldOptForSize() || V.hasOneUse()) 681 return true; 682 // If a subtarget has a fastpath LSL we can fold a logical shift into 683 // the addressing mode and save a cycle. 684 if (Subtarget->hasAddrLSLFast() && V.getOpcode() == ISD::SHL && 685 isWorthFoldingSHL(V)) 686 return true; 687 if (Subtarget->hasAddrLSLFast() && V.getOpcode() == ISD::ADD) { 688 const SDValue LHS = V.getOperand(0); 689 const SDValue RHS = V.getOperand(1); 690 if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS)) 691 return true; 692 if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS)) 693 return true; 694 } 695 696 // It hurts otherwise, since the value will be reused. 697 return false; 698 } 699 700 /// and (shl/srl/sra, x, c), mask --> shl (srl/sra, x, c1), c2 701 /// to select more shifted register 702 bool AArch64DAGToDAGISel::SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg, 703 SDValue &Shift) { 704 EVT VT = N.getValueType(); 705 if (VT != MVT::i32 && VT != MVT::i64) 706 return false; 707 708 if (N->getOpcode() != ISD::AND || !N->hasOneUse()) 709 return false; 710 SDValue LHS = N.getOperand(0); 711 if (!LHS->hasOneUse()) 712 return false; 713 714 unsigned LHSOpcode = LHS->getOpcode(); 715 if (LHSOpcode != ISD::SHL && LHSOpcode != ISD::SRL && LHSOpcode != ISD::SRA) 716 return false; 717 718 ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(LHS.getOperand(1)); 719 if (!ShiftAmtNode) 720 return false; 721 722 uint64_t ShiftAmtC = ShiftAmtNode->getZExtValue(); 723 ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N.getOperand(1)); 724 if (!RHSC) 725 return false; 726 727 APInt AndMask = RHSC->getAPIntValue(); 728 unsigned LowZBits, MaskLen; 729 if (!AndMask.isShiftedMask(LowZBits, MaskLen)) 730 return false; 731 732 unsigned BitWidth = N.getValueSizeInBits(); 733 SDLoc DL(LHS); 734 uint64_t NewShiftC; 735 unsigned NewShiftOp; 736 if (LHSOpcode == ISD::SHL) { 737 // LowZBits <= ShiftAmtC will fall into isBitfieldPositioningOp 738 // BitWidth != LowZBits + MaskLen doesn't match the pattern 739 if (LowZBits <= ShiftAmtC || (BitWidth != LowZBits + MaskLen)) 740 return false; 741 742 NewShiftC = LowZBits - ShiftAmtC; 743 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri; 744 } else { 745 if (LowZBits == 0) 746 return false; 747 748 // NewShiftC >= BitWidth will fall into isBitfieldExtractOp 749 NewShiftC = LowZBits + ShiftAmtC; 750 if (NewShiftC >= BitWidth) 751 return false; 752 753 // SRA need all high bits 754 if (LHSOpcode == ISD::SRA && (BitWidth != (LowZBits + MaskLen))) 755 return false; 756 757 // SRL high bits can be 0 or 1 758 if (LHSOpcode == ISD::SRL && (BitWidth > (NewShiftC + MaskLen))) 759 return false; 760 761 if (LHSOpcode == ISD::SRL) 762 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri; 763 else 764 NewShiftOp = VT == MVT::i64 ? AArch64::SBFMXri : AArch64::SBFMWri; 765 } 766 767 assert(NewShiftC < BitWidth && "Invalid shift amount"); 768 SDValue NewShiftAmt = CurDAG->getTargetConstant(NewShiftC, DL, VT); 769 SDValue BitWidthMinus1 = CurDAG->getTargetConstant(BitWidth - 1, DL, VT); 770 Reg = SDValue(CurDAG->getMachineNode(NewShiftOp, DL, VT, LHS->getOperand(0), 771 NewShiftAmt, BitWidthMinus1), 772 0); 773 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, LowZBits); 774 Shift = CurDAG->getTargetConstant(ShVal, DL, MVT::i32); 775 return true; 776 } 777 778 /// getExtendTypeForNode - Translate an extend node to the corresponding 779 /// ExtendType value. 780 static AArch64_AM::ShiftExtendType 781 getExtendTypeForNode(SDValue N, bool IsLoadStore = false) { 782 if (N.getOpcode() == ISD::SIGN_EXTEND || 783 N.getOpcode() == ISD::SIGN_EXTEND_INREG) { 784 EVT SrcVT; 785 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG) 786 SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT(); 787 else 788 SrcVT = N.getOperand(0).getValueType(); 789 790 if (!IsLoadStore && SrcVT == MVT::i8) 791 return AArch64_AM::SXTB; 792 else if (!IsLoadStore && SrcVT == MVT::i16) 793 return AArch64_AM::SXTH; 794 else if (SrcVT == MVT::i32) 795 return AArch64_AM::SXTW; 796 assert(SrcVT != MVT::i64 && "extend from 64-bits?"); 797 798 return AArch64_AM::InvalidShiftExtend; 799 } else if (N.getOpcode() == ISD::ZERO_EXTEND || 800 N.getOpcode() == ISD::ANY_EXTEND) { 801 EVT SrcVT = N.getOperand(0).getValueType(); 802 if (!IsLoadStore && SrcVT == MVT::i8) 803 return AArch64_AM::UXTB; 804 else if (!IsLoadStore && SrcVT == MVT::i16) 805 return AArch64_AM::UXTH; 806 else if (SrcVT == MVT::i32) 807 return AArch64_AM::UXTW; 808 assert(SrcVT != MVT::i64 && "extend from 64-bits?"); 809 810 return AArch64_AM::InvalidShiftExtend; 811 } else if (N.getOpcode() == ISD::AND) { 812 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); 813 if (!CSD) 814 return AArch64_AM::InvalidShiftExtend; 815 uint64_t AndMask = CSD->getZExtValue(); 816 817 switch (AndMask) { 818 default: 819 return AArch64_AM::InvalidShiftExtend; 820 case 0xFF: 821 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend; 822 case 0xFFFF: 823 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend; 824 case 0xFFFFFFFF: 825 return AArch64_AM::UXTW; 826 } 827 } 828 829 return AArch64_AM::InvalidShiftExtend; 830 } 831 832 /// Determine whether it is worth to fold V into an extended register of an 833 /// Add/Sub. LSL means we are folding into an `add w0, w1, w2, lsl #N` 834 /// instruction, and the shift should be treated as worth folding even if has 835 /// multiple uses. 836 bool AArch64DAGToDAGISel::isWorthFoldingALU(SDValue V, bool LSL) const { 837 // Trivial if we are optimizing for code size or if there is only 838 // one use of the value. 839 if (CurDAG->shouldOptForSize() || V.hasOneUse()) 840 return true; 841 842 // If a subtarget has a fastpath LSL we can fold a logical shift into 843 // the add/sub and save a cycle. 844 if (LSL && Subtarget->hasALULSLFast() && V.getOpcode() == ISD::SHL && 845 V.getConstantOperandVal(1) <= 4 && 846 getExtendTypeForNode(V.getOperand(0)) == AArch64_AM::InvalidShiftExtend) 847 return true; 848 849 // It hurts otherwise, since the value will be reused. 850 return false; 851 } 852 853 /// SelectShiftedRegister - Select a "shifted register" operand. If the value 854 /// is not shifted, set the Shift operand to default of "LSL 0". The logical 855 /// instructions allow the shifted register to be rotated, but the arithmetic 856 /// instructions do not. The AllowROR parameter specifies whether ROR is 857 /// supported. 858 bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR, 859 SDValue &Reg, SDValue &Shift) { 860 if (SelectShiftedRegisterFromAnd(N, Reg, Shift)) 861 return true; 862 863 AArch64_AM::ShiftExtendType ShType = getShiftTypeForNode(N); 864 if (ShType == AArch64_AM::InvalidShiftExtend) 865 return false; 866 if (!AllowROR && ShType == AArch64_AM::ROR) 867 return false; 868 869 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 870 unsigned BitSize = N.getValueSizeInBits(); 871 unsigned Val = RHS->getZExtValue() & (BitSize - 1); 872 unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val); 873 874 Reg = N.getOperand(0); 875 Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32); 876 return isWorthFoldingALU(N, true); 877 } 878 879 return false; 880 } 881 882 /// Instructions that accept extend modifiers like UXTW expect the register 883 /// being extended to be a GPR32, but the incoming DAG might be acting on a 884 /// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if 885 /// this is the case. 886 static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N) { 887 if (N.getValueType() == MVT::i32) 888 return N; 889 890 SDLoc dl(N); 891 return CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl, MVT::i32, N); 892 } 893 894 // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N. 895 template<signed Low, signed High, signed Scale> 896 bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) { 897 if (!isa<ConstantSDNode>(N)) 898 return false; 899 900 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue(); 901 if ((MulImm % std::abs(Scale)) == 0) { 902 int64_t RDVLImm = MulImm / Scale; 903 if ((RDVLImm >= Low) && (RDVLImm <= High)) { 904 Imm = CurDAG->getTargetConstant(RDVLImm, SDLoc(N), MVT::i32); 905 return true; 906 } 907 } 908 909 return false; 910 } 911 912 /// SelectArithExtendedRegister - Select a "extended register" operand. This 913 /// operand folds in an extend followed by an optional left shift. 914 bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg, 915 SDValue &Shift) { 916 unsigned ShiftVal = 0; 917 AArch64_AM::ShiftExtendType Ext; 918 919 if (N.getOpcode() == ISD::SHL) { 920 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); 921 if (!CSD) 922 return false; 923 ShiftVal = CSD->getZExtValue(); 924 if (ShiftVal > 4) 925 return false; 926 927 Ext = getExtendTypeForNode(N.getOperand(0)); 928 if (Ext == AArch64_AM::InvalidShiftExtend) 929 return false; 930 931 Reg = N.getOperand(0).getOperand(0); 932 } else { 933 Ext = getExtendTypeForNode(N); 934 if (Ext == AArch64_AM::InvalidShiftExtend) 935 return false; 936 937 Reg = N.getOperand(0); 938 939 // Don't match if free 32-bit -> 64-bit zext can be used instead. Use the 940 // isDef32 as a heuristic for when the operand is likely to be a 32bit def. 941 auto isDef32 = [](SDValue N) { 942 unsigned Opc = N.getOpcode(); 943 return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG && 944 Opc != ISD::CopyFromReg && Opc != ISD::AssertSext && 945 Opc != ISD::AssertZext && Opc != ISD::AssertAlign && 946 Opc != ISD::FREEZE; 947 }; 948 if (Ext == AArch64_AM::UXTW && Reg->getValueType(0).getSizeInBits() == 32 && 949 isDef32(Reg)) 950 return false; 951 } 952 953 // AArch64 mandates that the RHS of the operation must use the smallest 954 // register class that could contain the size being extended from. Thus, 955 // if we're folding a (sext i8), we need the RHS to be a GPR32, even though 956 // there might not be an actual 32-bit value in the program. We can 957 // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here. 958 assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX); 959 Reg = narrowIfNeeded(CurDAG, Reg); 960 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N), 961 MVT::i32); 962 return isWorthFoldingALU(N); 963 } 964 965 /// SelectArithUXTXRegister - Select a "UXTX register" operand. This 966 /// operand is refered by the instructions have SP operand 967 bool AArch64DAGToDAGISel::SelectArithUXTXRegister(SDValue N, SDValue &Reg, 968 SDValue &Shift) { 969 unsigned ShiftVal = 0; 970 AArch64_AM::ShiftExtendType Ext; 971 972 if (N.getOpcode() != ISD::SHL) 973 return false; 974 975 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); 976 if (!CSD) 977 return false; 978 ShiftVal = CSD->getZExtValue(); 979 if (ShiftVal > 4) 980 return false; 981 982 Ext = AArch64_AM::UXTX; 983 Reg = N.getOperand(0); 984 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N), 985 MVT::i32); 986 return isWorthFoldingALU(N); 987 } 988 989 /// If there's a use of this ADDlow that's not itself a load/store then we'll 990 /// need to create a real ADD instruction from it anyway and there's no point in 991 /// folding it into the mem op. Theoretically, it shouldn't matter, but there's 992 /// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding 993 /// leads to duplicated ADRP instructions. 994 static bool isWorthFoldingADDlow(SDValue N) { 995 for (auto *Use : N->uses()) { 996 if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE && 997 Use->getOpcode() != ISD::ATOMIC_LOAD && 998 Use->getOpcode() != ISD::ATOMIC_STORE) 999 return false; 1000 1001 // ldar and stlr have much more restrictive addressing modes (just a 1002 // register). 1003 if (isStrongerThanMonotonic(cast<MemSDNode>(Use)->getSuccessOrdering())) 1004 return false; 1005 } 1006 1007 return true; 1008 } 1009 1010 /// Check if the immediate offset is valid as a scaled immediate. 1011 static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range, 1012 unsigned Size) { 1013 if ((Offset & (Size - 1)) == 0 && Offset >= 0 && 1014 Offset < (Range << Log2_32(Size))) 1015 return true; 1016 return false; 1017 } 1018 1019 /// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit 1020 /// immediate" address. The "Size" argument is the size in bytes of the memory 1021 /// reference, which determines the scale. 1022 bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, 1023 unsigned BW, unsigned Size, 1024 SDValue &Base, 1025 SDValue &OffImm) { 1026 SDLoc dl(N); 1027 const DataLayout &DL = CurDAG->getDataLayout(); 1028 const TargetLowering *TLI = getTargetLowering(); 1029 if (N.getOpcode() == ISD::FrameIndex) { 1030 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 1031 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 1032 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); 1033 return true; 1034 } 1035 1036 // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed 1037 // selected here doesn't support labels/immediates, only base+offset. 1038 if (CurDAG->isBaseWithConstantOffset(N)) { 1039 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1040 if (IsSignedImm) { 1041 int64_t RHSC = RHS->getSExtValue(); 1042 unsigned Scale = Log2_32(Size); 1043 int64_t Range = 0x1LL << (BW - 1); 1044 1045 if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) && 1046 RHSC < (Range << Scale)) { 1047 Base = N.getOperand(0); 1048 if (Base.getOpcode() == ISD::FrameIndex) { 1049 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1050 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 1051 } 1052 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64); 1053 return true; 1054 } 1055 } else { 1056 // unsigned Immediate 1057 uint64_t RHSC = RHS->getZExtValue(); 1058 unsigned Scale = Log2_32(Size); 1059 uint64_t Range = 0x1ULL << BW; 1060 1061 if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) { 1062 Base = N.getOperand(0); 1063 if (Base.getOpcode() == ISD::FrameIndex) { 1064 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1065 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 1066 } 1067 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64); 1068 return true; 1069 } 1070 } 1071 } 1072 } 1073 // Base only. The address will be materialized into a register before 1074 // the memory is accessed. 1075 // add x0, Xbase, #offset 1076 // stp x1, x2, [x0] 1077 Base = N; 1078 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); 1079 return true; 1080 } 1081 1082 /// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit 1083 /// immediate" address. The "Size" argument is the size in bytes of the memory 1084 /// reference, which determines the scale. 1085 bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size, 1086 SDValue &Base, SDValue &OffImm) { 1087 SDLoc dl(N); 1088 const DataLayout &DL = CurDAG->getDataLayout(); 1089 const TargetLowering *TLI = getTargetLowering(); 1090 if (N.getOpcode() == ISD::FrameIndex) { 1091 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 1092 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 1093 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); 1094 return true; 1095 } 1096 1097 if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) { 1098 GlobalAddressSDNode *GAN = 1099 dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode()); 1100 Base = N.getOperand(0); 1101 OffImm = N.getOperand(1); 1102 if (!GAN) 1103 return true; 1104 1105 if (GAN->getOffset() % Size == 0 && 1106 GAN->getGlobal()->getPointerAlignment(DL) >= Size) 1107 return true; 1108 } 1109 1110 if (CurDAG->isBaseWithConstantOffset(N)) { 1111 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1112 int64_t RHSC = (int64_t)RHS->getZExtValue(); 1113 unsigned Scale = Log2_32(Size); 1114 if (isValidAsScaledImmediate(RHSC, 0x1000, Size)) { 1115 Base = N.getOperand(0); 1116 if (Base.getOpcode() == ISD::FrameIndex) { 1117 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1118 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 1119 } 1120 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64); 1121 return true; 1122 } 1123 } 1124 } 1125 1126 // Before falling back to our general case, check if the unscaled 1127 // instructions can handle this. If so, that's preferable. 1128 if (SelectAddrModeUnscaled(N, Size, Base, OffImm)) 1129 return false; 1130 1131 // Base only. The address will be materialized into a register before 1132 // the memory is accessed. 1133 // add x0, Xbase, #offset 1134 // ldr x0, [x0] 1135 Base = N; 1136 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); 1137 return true; 1138 } 1139 1140 /// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit 1141 /// immediate" address. This should only match when there is an offset that 1142 /// is not valid for a scaled immediate addressing mode. The "Size" argument 1143 /// is the size in bytes of the memory reference, which is needed here to know 1144 /// what is valid for a scaled immediate. 1145 bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size, 1146 SDValue &Base, 1147 SDValue &OffImm) { 1148 if (!CurDAG->isBaseWithConstantOffset(N)) 1149 return false; 1150 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1151 int64_t RHSC = RHS->getSExtValue(); 1152 if (RHSC >= -256 && RHSC < 256) { 1153 Base = N.getOperand(0); 1154 if (Base.getOpcode() == ISD::FrameIndex) { 1155 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1156 const TargetLowering *TLI = getTargetLowering(); 1157 Base = CurDAG->getTargetFrameIndex( 1158 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1159 } 1160 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64); 1161 return true; 1162 } 1163 } 1164 return false; 1165 } 1166 1167 static SDValue Widen(SelectionDAG *CurDAG, SDValue N) { 1168 SDLoc dl(N); 1169 SDValue ImpDef = SDValue( 1170 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0); 1171 return CurDAG->getTargetInsertSubreg(AArch64::sub_32, dl, MVT::i64, ImpDef, 1172 N); 1173 } 1174 1175 /// Check if the given SHL node (\p N), can be used to form an 1176 /// extended register for an addressing mode. 1177 bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size, 1178 bool WantExtend, SDValue &Offset, 1179 SDValue &SignExtend) { 1180 assert(N.getOpcode() == ISD::SHL && "Invalid opcode."); 1181 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); 1182 if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue()) 1183 return false; 1184 1185 SDLoc dl(N); 1186 if (WantExtend) { 1187 AArch64_AM::ShiftExtendType Ext = 1188 getExtendTypeForNode(N.getOperand(0), true); 1189 if (Ext == AArch64_AM::InvalidShiftExtend) 1190 return false; 1191 1192 Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0)); 1193 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl, 1194 MVT::i32); 1195 } else { 1196 Offset = N.getOperand(0); 1197 SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32); 1198 } 1199 1200 unsigned LegalShiftVal = Log2_32(Size); 1201 unsigned ShiftVal = CSD->getZExtValue(); 1202 1203 if (ShiftVal != 0 && ShiftVal != LegalShiftVal) 1204 return false; 1205 1206 return isWorthFoldingAddr(N); 1207 } 1208 1209 bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size, 1210 SDValue &Base, SDValue &Offset, 1211 SDValue &SignExtend, 1212 SDValue &DoShift) { 1213 if (N.getOpcode() != ISD::ADD) 1214 return false; 1215 SDValue LHS = N.getOperand(0); 1216 SDValue RHS = N.getOperand(1); 1217 SDLoc dl(N); 1218 1219 // We don't want to match immediate adds here, because they are better lowered 1220 // to the register-immediate addressing modes. 1221 if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS)) 1222 return false; 1223 1224 // Check if this particular node is reused in any non-memory related 1225 // operation. If yes, do not try to fold this node into the address 1226 // computation, since the computation will be kept. 1227 const SDNode *Node = N.getNode(); 1228 for (SDNode *UI : Node->uses()) { 1229 if (!isa<MemSDNode>(*UI)) 1230 return false; 1231 } 1232 1233 // Remember if it is worth folding N when it produces extended register. 1234 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N); 1235 1236 // Try to match a shifted extend on the RHS. 1237 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL && 1238 SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) { 1239 Base = LHS; 1240 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32); 1241 return true; 1242 } 1243 1244 // Try to match a shifted extend on the LHS. 1245 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL && 1246 SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) { 1247 Base = RHS; 1248 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32); 1249 return true; 1250 } 1251 1252 // There was no shift, whatever else we find. 1253 DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32); 1254 1255 AArch64_AM::ShiftExtendType Ext = AArch64_AM::InvalidShiftExtend; 1256 // Try to match an unshifted extend on the LHS. 1257 if (IsExtendedRegisterWorthFolding && 1258 (Ext = getExtendTypeForNode(LHS, true)) != 1259 AArch64_AM::InvalidShiftExtend) { 1260 Base = RHS; 1261 Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0)); 1262 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl, 1263 MVT::i32); 1264 if (isWorthFoldingAddr(LHS)) 1265 return true; 1266 } 1267 1268 // Try to match an unshifted extend on the RHS. 1269 if (IsExtendedRegisterWorthFolding && 1270 (Ext = getExtendTypeForNode(RHS, true)) != 1271 AArch64_AM::InvalidShiftExtend) { 1272 Base = LHS; 1273 Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0)); 1274 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl, 1275 MVT::i32); 1276 if (isWorthFoldingAddr(RHS)) 1277 return true; 1278 } 1279 1280 return false; 1281 } 1282 1283 // Check if the given immediate is preferred by ADD. If an immediate can be 1284 // encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be 1285 // encoded by one MOVZ, return true. 1286 static bool isPreferredADD(int64_t ImmOff) { 1287 // Constant in [0x0, 0xfff] can be encoded in ADD. 1288 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL) 1289 return true; 1290 // Check if it can be encoded in an "ADD LSL #12". 1291 if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL) 1292 // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant. 1293 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL && 1294 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL; 1295 return false; 1296 } 1297 1298 bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size, 1299 SDValue &Base, SDValue &Offset, 1300 SDValue &SignExtend, 1301 SDValue &DoShift) { 1302 if (N.getOpcode() != ISD::ADD) 1303 return false; 1304 SDValue LHS = N.getOperand(0); 1305 SDValue RHS = N.getOperand(1); 1306 SDLoc DL(N); 1307 1308 // Check if this particular node is reused in any non-memory related 1309 // operation. If yes, do not try to fold this node into the address 1310 // computation, since the computation will be kept. 1311 const SDNode *Node = N.getNode(); 1312 for (SDNode *UI : Node->uses()) { 1313 if (!isa<MemSDNode>(*UI)) 1314 return false; 1315 } 1316 1317 // Watch out if RHS is a wide immediate, it can not be selected into 1318 // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into 1319 // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate 1320 // instructions like: 1321 // MOV X0, WideImmediate 1322 // ADD X1, BaseReg, X0 1323 // LDR X2, [X1, 0] 1324 // For such situation, using [BaseReg, XReg] addressing mode can save one 1325 // ADD/SUB: 1326 // MOV X0, WideImmediate 1327 // LDR X2, [BaseReg, X0] 1328 if (isa<ConstantSDNode>(RHS)) { 1329 int64_t ImmOff = (int64_t)RHS->getAsZExtVal(); 1330 // Skip the immediate can be selected by load/store addressing mode. 1331 // Also skip the immediate can be encoded by a single ADD (SUB is also 1332 // checked by using -ImmOff). 1333 if (isValidAsScaledImmediate(ImmOff, 0x1000, Size) || 1334 isPreferredADD(ImmOff) || isPreferredADD(-ImmOff)) 1335 return false; 1336 1337 SDValue Ops[] = { RHS }; 1338 SDNode *MOVI = 1339 CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops); 1340 SDValue MOVIV = SDValue(MOVI, 0); 1341 // This ADD of two X register will be selected into [Reg+Reg] mode. 1342 N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV); 1343 } 1344 1345 // Remember if it is worth folding N when it produces extended register. 1346 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N); 1347 1348 // Try to match a shifted extend on the RHS. 1349 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL && 1350 SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) { 1351 Base = LHS; 1352 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32); 1353 return true; 1354 } 1355 1356 // Try to match a shifted extend on the LHS. 1357 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL && 1358 SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) { 1359 Base = RHS; 1360 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32); 1361 return true; 1362 } 1363 1364 // Match any non-shifted, non-extend, non-immediate add expression. 1365 Base = LHS; 1366 Offset = RHS; 1367 SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32); 1368 DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32); 1369 // Reg1 + Reg2 is free: no check needed. 1370 return true; 1371 } 1372 1373 SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) { 1374 static const unsigned RegClassIDs[] = { 1375 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID}; 1376 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1, 1377 AArch64::dsub2, AArch64::dsub3}; 1378 1379 return createTuple(Regs, RegClassIDs, SubRegs); 1380 } 1381 1382 SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) { 1383 static const unsigned RegClassIDs[] = { 1384 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID}; 1385 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1, 1386 AArch64::qsub2, AArch64::qsub3}; 1387 1388 return createTuple(Regs, RegClassIDs, SubRegs); 1389 } 1390 1391 SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef<SDValue> Regs) { 1392 static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID, 1393 AArch64::ZPR3RegClassID, 1394 AArch64::ZPR4RegClassID}; 1395 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1, 1396 AArch64::zsub2, AArch64::zsub3}; 1397 1398 return createTuple(Regs, RegClassIDs, SubRegs); 1399 } 1400 1401 SDValue AArch64DAGToDAGISel::createZMulTuple(ArrayRef<SDValue> Regs) { 1402 assert(Regs.size() == 2 || Regs.size() == 4); 1403 1404 // The createTuple interface requires 3 RegClassIDs for each possible 1405 // tuple type even though we only have them for ZPR2 and ZPR4. 1406 static const unsigned RegClassIDs[] = {AArch64::ZPR2Mul2RegClassID, 0, 1407 AArch64::ZPR4Mul4RegClassID}; 1408 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1, 1409 AArch64::zsub2, AArch64::zsub3}; 1410 return createTuple(Regs, RegClassIDs, SubRegs); 1411 } 1412 1413 SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs, 1414 const unsigned RegClassIDs[], 1415 const unsigned SubRegs[]) { 1416 // There's no special register-class for a vector-list of 1 element: it's just 1417 // a vector. 1418 if (Regs.size() == 1) 1419 return Regs[0]; 1420 1421 assert(Regs.size() >= 2 && Regs.size() <= 4); 1422 1423 SDLoc DL(Regs[0]); 1424 1425 SmallVector<SDValue, 4> Ops; 1426 1427 // First operand of REG_SEQUENCE is the desired RegClass. 1428 Ops.push_back( 1429 CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32)); 1430 1431 // Then we get pairs of source & subregister-position for the components. 1432 for (unsigned i = 0; i < Regs.size(); ++i) { 1433 Ops.push_back(Regs[i]); 1434 Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32)); 1435 } 1436 1437 SDNode *N = 1438 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops); 1439 return SDValue(N, 0); 1440 } 1441 1442 void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, 1443 bool isExt) { 1444 SDLoc dl(N); 1445 EVT VT = N->getValueType(0); 1446 1447 unsigned ExtOff = isExt; 1448 1449 // Form a REG_SEQUENCE to force register allocation. 1450 unsigned Vec0Off = ExtOff + 1; 1451 SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off, 1452 N->op_begin() + Vec0Off + NumVecs); 1453 SDValue RegSeq = createQTuple(Regs); 1454 1455 SmallVector<SDValue, 6> Ops; 1456 if (isExt) 1457 Ops.push_back(N->getOperand(1)); 1458 Ops.push_back(RegSeq); 1459 Ops.push_back(N->getOperand(NumVecs + ExtOff + 1)); 1460 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops)); 1461 } 1462 1463 bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) { 1464 LoadSDNode *LD = cast<LoadSDNode>(N); 1465 if (LD->isUnindexed()) 1466 return false; 1467 EVT VT = LD->getMemoryVT(); 1468 EVT DstVT = N->getValueType(0); 1469 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1470 bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC; 1471 1472 // We're not doing validity checking here. That was done when checking 1473 // if we should mark the load as indexed or not. We're just selecting 1474 // the right instruction. 1475 unsigned Opcode = 0; 1476 1477 ISD::LoadExtType ExtType = LD->getExtensionType(); 1478 bool InsertTo64 = false; 1479 if (VT == MVT::i64) 1480 Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost; 1481 else if (VT == MVT::i32) { 1482 if (ExtType == ISD::NON_EXTLOAD) 1483 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost; 1484 else if (ExtType == ISD::SEXTLOAD) 1485 Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost; 1486 else { 1487 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost; 1488 InsertTo64 = true; 1489 // The result of the load is only i32. It's the subreg_to_reg that makes 1490 // it into an i64. 1491 DstVT = MVT::i32; 1492 } 1493 } else if (VT == MVT::i16) { 1494 if (ExtType == ISD::SEXTLOAD) { 1495 if (DstVT == MVT::i64) 1496 Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost; 1497 else 1498 Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost; 1499 } else { 1500 Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost; 1501 InsertTo64 = DstVT == MVT::i64; 1502 // The result of the load is only i32. It's the subreg_to_reg that makes 1503 // it into an i64. 1504 DstVT = MVT::i32; 1505 } 1506 } else if (VT == MVT::i8) { 1507 if (ExtType == ISD::SEXTLOAD) { 1508 if (DstVT == MVT::i64) 1509 Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost; 1510 else 1511 Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost; 1512 } else { 1513 Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost; 1514 InsertTo64 = DstVT == MVT::i64; 1515 // The result of the load is only i32. It's the subreg_to_reg that makes 1516 // it into an i64. 1517 DstVT = MVT::i32; 1518 } 1519 } else if (VT == MVT::f16) { 1520 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost; 1521 } else if (VT == MVT::bf16) { 1522 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost; 1523 } else if (VT == MVT::f32) { 1524 Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost; 1525 } else if (VT == MVT::f64 || VT.is64BitVector()) { 1526 Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost; 1527 } else if (VT.is128BitVector()) { 1528 Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost; 1529 } else 1530 return false; 1531 SDValue Chain = LD->getChain(); 1532 SDValue Base = LD->getBasePtr(); 1533 ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset()); 1534 int OffsetVal = (int)OffsetOp->getZExtValue(); 1535 SDLoc dl(N); 1536 SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64); 1537 SDValue Ops[] = { Base, Offset, Chain }; 1538 SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT, 1539 MVT::Other, Ops); 1540 1541 // Transfer memoperands. 1542 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); 1543 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Res), {MemOp}); 1544 1545 // Either way, we're replacing the node, so tell the caller that. 1546 SDValue LoadedVal = SDValue(Res, 1); 1547 if (InsertTo64) { 1548 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32); 1549 LoadedVal = 1550 SDValue(CurDAG->getMachineNode( 1551 AArch64::SUBREG_TO_REG, dl, MVT::i64, 1552 CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal, 1553 SubReg), 1554 0); 1555 } 1556 1557 ReplaceUses(SDValue(N, 0), LoadedVal); 1558 ReplaceUses(SDValue(N, 1), SDValue(Res, 0)); 1559 ReplaceUses(SDValue(N, 2), SDValue(Res, 2)); 1560 CurDAG->RemoveDeadNode(N); 1561 return true; 1562 } 1563 1564 void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc, 1565 unsigned SubRegIdx) { 1566 SDLoc dl(N); 1567 EVT VT = N->getValueType(0); 1568 SDValue Chain = N->getOperand(0); 1569 1570 SDValue Ops[] = {N->getOperand(2), // Mem operand; 1571 Chain}; 1572 1573 const EVT ResTys[] = {MVT::Untyped, MVT::Other}; 1574 1575 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1576 SDValue SuperReg = SDValue(Ld, 0); 1577 for (unsigned i = 0; i < NumVecs; ++i) 1578 ReplaceUses(SDValue(N, i), 1579 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg)); 1580 1581 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1)); 1582 1583 // Transfer memoperands. In the case of AArch64::LD64B, there won't be one, 1584 // because it's too simple to have needed special treatment during lowering. 1585 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(N)) { 1586 MachineMemOperand *MemOp = MemIntr->getMemOperand(); 1587 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp}); 1588 } 1589 1590 CurDAG->RemoveDeadNode(N); 1591 } 1592 1593 void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs, 1594 unsigned Opc, unsigned SubRegIdx) { 1595 SDLoc dl(N); 1596 EVT VT = N->getValueType(0); 1597 SDValue Chain = N->getOperand(0); 1598 1599 SDValue Ops[] = {N->getOperand(1), // Mem operand 1600 N->getOperand(2), // Incremental 1601 Chain}; 1602 1603 const EVT ResTys[] = {MVT::i64, // Type of the write back register 1604 MVT::Untyped, MVT::Other}; 1605 1606 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1607 1608 // Update uses of write back register 1609 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0)); 1610 1611 // Update uses of vector list 1612 SDValue SuperReg = SDValue(Ld, 1); 1613 if (NumVecs == 1) 1614 ReplaceUses(SDValue(N, 0), SuperReg); 1615 else 1616 for (unsigned i = 0; i < NumVecs; ++i) 1617 ReplaceUses(SDValue(N, i), 1618 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg)); 1619 1620 // Update the chain 1621 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2)); 1622 CurDAG->RemoveDeadNode(N); 1623 } 1624 1625 /// Optimize \param OldBase and \param OldOffset selecting the best addressing 1626 /// mode. Returns a tuple consisting of an Opcode, an SDValue representing the 1627 /// new Base and an SDValue representing the new offset. 1628 std::tuple<unsigned, SDValue, SDValue> 1629 AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, 1630 unsigned Opc_ri, 1631 const SDValue &OldBase, 1632 const SDValue &OldOffset, 1633 unsigned Scale) { 1634 SDValue NewBase = OldBase; 1635 SDValue NewOffset = OldOffset; 1636 // Detect a possible Reg+Imm addressing mode. 1637 const bool IsRegImm = SelectAddrModeIndexedSVE</*Min=*/-8, /*Max=*/7>( 1638 N, OldBase, NewBase, NewOffset); 1639 1640 // Detect a possible reg+reg addressing mode, but only if we haven't already 1641 // detected a Reg+Imm one. 1642 const bool IsRegReg = 1643 !IsRegImm && SelectSVERegRegAddrMode(OldBase, Scale, NewBase, NewOffset); 1644 1645 // Select the instruction. 1646 return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset); 1647 } 1648 1649 enum class SelectTypeKind { 1650 Int1 = 0, 1651 Int = 1, 1652 FP = 2, 1653 AnyType = 3, 1654 }; 1655 1656 /// This function selects an opcode from a list of opcodes, which is 1657 /// expected to be the opcode for { 8-bit, 16-bit, 32-bit, 64-bit } 1658 /// element types, in this order. 1659 template <SelectTypeKind Kind> 1660 static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef<unsigned> Opcodes) { 1661 // Only match scalable vector VTs 1662 if (!VT.isScalableVector()) 1663 return 0; 1664 1665 EVT EltVT = VT.getVectorElementType(); 1666 switch (Kind) { 1667 case SelectTypeKind::AnyType: 1668 break; 1669 case SelectTypeKind::Int: 1670 if (EltVT != MVT::i8 && EltVT != MVT::i16 && EltVT != MVT::i32 && 1671 EltVT != MVT::i64) 1672 return 0; 1673 break; 1674 case SelectTypeKind::Int1: 1675 if (EltVT != MVT::i1) 1676 return 0; 1677 break; 1678 case SelectTypeKind::FP: 1679 if (EltVT != MVT::f16 && EltVT != MVT::f32 && EltVT != MVT::f64) 1680 return 0; 1681 break; 1682 } 1683 1684 unsigned Offset; 1685 switch (VT.getVectorMinNumElements()) { 1686 case 16: // 8-bit 1687 Offset = 0; 1688 break; 1689 case 8: // 16-bit 1690 Offset = 1; 1691 break; 1692 case 4: // 32-bit 1693 Offset = 2; 1694 break; 1695 case 2: // 64-bit 1696 Offset = 3; 1697 break; 1698 default: 1699 return 0; 1700 } 1701 1702 return (Opcodes.size() <= Offset) ? 0 : Opcodes[Offset]; 1703 } 1704 1705 // This function is almost identical to SelectWhilePair, but has an 1706 // extra check on the range of the immediate operand. 1707 // TODO: Merge these two functions together at some point? 1708 void AArch64DAGToDAGISel::SelectPExtPair(SDNode *N, unsigned Opc) { 1709 // Immediate can be either 0 or 1. 1710 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(N->getOperand(2))) 1711 if (Imm->getZExtValue() > 1) 1712 return; 1713 1714 SDLoc DL(N); 1715 EVT VT = N->getValueType(0); 1716 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)}; 1717 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops); 1718 SDValue SuperReg = SDValue(WhilePair, 0); 1719 1720 for (unsigned I = 0; I < 2; ++I) 1721 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg( 1722 AArch64::psub0 + I, DL, VT, SuperReg)); 1723 1724 CurDAG->RemoveDeadNode(N); 1725 } 1726 1727 void AArch64DAGToDAGISel::SelectWhilePair(SDNode *N, unsigned Opc) { 1728 SDLoc DL(N); 1729 EVT VT = N->getValueType(0); 1730 1731 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)}; 1732 1733 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops); 1734 SDValue SuperReg = SDValue(WhilePair, 0); 1735 1736 for (unsigned I = 0; I < 2; ++I) 1737 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg( 1738 AArch64::psub0 + I, DL, VT, SuperReg)); 1739 1740 CurDAG->RemoveDeadNode(N); 1741 } 1742 1743 void AArch64DAGToDAGISel::SelectCVTIntrinsic(SDNode *N, unsigned NumVecs, 1744 unsigned Opcode) { 1745 EVT VT = N->getValueType(0); 1746 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); 1747 SDValue Ops = createZTuple(Regs); 1748 SDLoc DL(N); 1749 SDNode *Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Ops); 1750 SDValue SuperReg = SDValue(Intrinsic, 0); 1751 for (unsigned i = 0; i < NumVecs; ++i) 1752 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg( 1753 AArch64::zsub0 + i, DL, VT, SuperReg)); 1754 1755 CurDAG->RemoveDeadNode(N); 1756 } 1757 1758 void AArch64DAGToDAGISel::SelectDestructiveMultiIntrinsic(SDNode *N, 1759 unsigned NumVecs, 1760 bool IsZmMulti, 1761 unsigned Opcode, 1762 bool HasPred) { 1763 assert(Opcode != 0 && "Unexpected opcode"); 1764 1765 SDLoc DL(N); 1766 EVT VT = N->getValueType(0); 1767 unsigned FirstVecIdx = HasPred ? 2 : 1; 1768 1769 auto GetMultiVecOperand = [=](unsigned StartIdx) { 1770 SmallVector<SDValue, 4> Regs(N->op_begin() + StartIdx, 1771 N->op_begin() + StartIdx + NumVecs); 1772 return createZMulTuple(Regs); 1773 }; 1774 1775 SDValue Zdn = GetMultiVecOperand(FirstVecIdx); 1776 1777 SDValue Zm; 1778 if (IsZmMulti) 1779 Zm = GetMultiVecOperand(NumVecs + FirstVecIdx); 1780 else 1781 Zm = N->getOperand(NumVecs + FirstVecIdx); 1782 1783 SDNode *Intrinsic; 1784 if (HasPred) 1785 Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, 1786 N->getOperand(1), Zdn, Zm); 1787 else 1788 Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Zdn, Zm); 1789 SDValue SuperReg = SDValue(Intrinsic, 0); 1790 for (unsigned i = 0; i < NumVecs; ++i) 1791 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg( 1792 AArch64::zsub0 + i, DL, VT, SuperReg)); 1793 1794 CurDAG->RemoveDeadNode(N); 1795 } 1796 1797 void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs, 1798 unsigned Scale, unsigned Opc_ri, 1799 unsigned Opc_rr, bool IsIntr) { 1800 assert(Scale < 5 && "Invalid scaling value."); 1801 SDLoc DL(N); 1802 EVT VT = N->getValueType(0); 1803 SDValue Chain = N->getOperand(0); 1804 1805 // Optimize addressing mode. 1806 SDValue Base, Offset; 1807 unsigned Opc; 1808 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore( 1809 N, Opc_rr, Opc_ri, N->getOperand(IsIntr ? 3 : 2), 1810 CurDAG->getTargetConstant(0, DL, MVT::i64), Scale); 1811 1812 SDValue Ops[] = {N->getOperand(IsIntr ? 2 : 1), // Predicate 1813 Base, // Memory operand 1814 Offset, Chain}; 1815 1816 const EVT ResTys[] = {MVT::Untyped, MVT::Other}; 1817 1818 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops); 1819 SDValue SuperReg = SDValue(Load, 0); 1820 for (unsigned i = 0; i < NumVecs; ++i) 1821 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg( 1822 AArch64::zsub0 + i, DL, VT, SuperReg)); 1823 1824 // Copy chain 1825 unsigned ChainIdx = NumVecs; 1826 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1)); 1827 CurDAG->RemoveDeadNode(N); 1828 } 1829 1830 void AArch64DAGToDAGISel::SelectContiguousMultiVectorLoad(SDNode *N, 1831 unsigned NumVecs, 1832 unsigned Scale, 1833 unsigned Opc_ri, 1834 unsigned Opc_rr) { 1835 assert(Scale < 4 && "Invalid scaling value."); 1836 SDLoc DL(N); 1837 EVT VT = N->getValueType(0); 1838 SDValue Chain = N->getOperand(0); 1839 1840 SDValue PNg = N->getOperand(2); 1841 SDValue Base = N->getOperand(3); 1842 SDValue Offset = CurDAG->getTargetConstant(0, DL, MVT::i64); 1843 unsigned Opc; 1844 std::tie(Opc, Base, Offset) = 1845 findAddrModeSVELoadStore(N, Opc_rr, Opc_ri, Base, Offset, Scale); 1846 1847 SDValue Ops[] = {PNg, // Predicate-as-counter 1848 Base, // Memory operand 1849 Offset, Chain}; 1850 1851 const EVT ResTys[] = {MVT::Untyped, MVT::Other}; 1852 1853 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops); 1854 SDValue SuperReg = SDValue(Load, 0); 1855 for (unsigned i = 0; i < NumVecs; ++i) 1856 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg( 1857 AArch64::zsub0 + i, DL, VT, SuperReg)); 1858 1859 // Copy chain 1860 unsigned ChainIdx = NumVecs; 1861 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1)); 1862 CurDAG->RemoveDeadNode(N); 1863 } 1864 1865 void AArch64DAGToDAGISel::SelectFrintFromVT(SDNode *N, unsigned NumVecs, 1866 unsigned Opcode) { 1867 if (N->getValueType(0) != MVT::nxv4f32) 1868 return; 1869 SelectUnaryMultiIntrinsic(N, NumVecs, true, Opcode); 1870 } 1871 1872 void AArch64DAGToDAGISel::SelectMultiVectorLuti(SDNode *Node, 1873 unsigned NumOutVecs, 1874 unsigned Opc, uint32_t MaxImm) { 1875 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Node->getOperand(4))) 1876 if (Imm->getZExtValue() > MaxImm) 1877 return; 1878 1879 SDValue ZtValue; 1880 if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue)) 1881 return; 1882 SDValue Ops[] = {ZtValue, Node->getOperand(3), Node->getOperand(4)}; 1883 SDLoc DL(Node); 1884 EVT VT = Node->getValueType(0); 1885 1886 SDNode *Instruction = 1887 CurDAG->getMachineNode(Opc, DL, {MVT::Untyped, MVT::Other}, Ops); 1888 SDValue SuperReg = SDValue(Instruction, 0); 1889 1890 for (unsigned I = 0; I < NumOutVecs; ++I) 1891 ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg( 1892 AArch64::zsub0 + I, DL, VT, SuperReg)); 1893 1894 // Copy chain 1895 unsigned ChainIdx = NumOutVecs; 1896 ReplaceUses(SDValue(Node, ChainIdx), SDValue(Instruction, 1)); 1897 CurDAG->RemoveDeadNode(Node); 1898 } 1899 1900 void AArch64DAGToDAGISel::SelectClamp(SDNode *N, unsigned NumVecs, 1901 unsigned Op) { 1902 SDLoc DL(N); 1903 EVT VT = N->getValueType(0); 1904 1905 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); 1906 SDValue Zd = createZMulTuple(Regs); 1907 SDValue Zn = N->getOperand(1 + NumVecs); 1908 SDValue Zm = N->getOperand(2 + NumVecs); 1909 1910 SDValue Ops[] = {Zd, Zn, Zm}; 1911 1912 SDNode *Intrinsic = CurDAG->getMachineNode(Op, DL, MVT::Untyped, Ops); 1913 SDValue SuperReg = SDValue(Intrinsic, 0); 1914 for (unsigned i = 0; i < NumVecs; ++i) 1915 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg( 1916 AArch64::zsub0 + i, DL, VT, SuperReg)); 1917 1918 CurDAG->RemoveDeadNode(N); 1919 } 1920 1921 bool SelectSMETile(unsigned &BaseReg, unsigned TileNum) { 1922 switch (BaseReg) { 1923 default: 1924 return false; 1925 case AArch64::ZA: 1926 case AArch64::ZAB0: 1927 if (TileNum == 0) 1928 break; 1929 return false; 1930 case AArch64::ZAH0: 1931 if (TileNum <= 1) 1932 break; 1933 return false; 1934 case AArch64::ZAS0: 1935 if (TileNum <= 3) 1936 break; 1937 return false; 1938 case AArch64::ZAD0: 1939 if (TileNum <= 7) 1940 break; 1941 return false; 1942 } 1943 1944 BaseReg += TileNum; 1945 return true; 1946 } 1947 1948 template <unsigned MaxIdx, unsigned Scale> 1949 void AArch64DAGToDAGISel::SelectMultiVectorMove(SDNode *N, unsigned NumVecs, 1950 unsigned BaseReg, unsigned Op) { 1951 unsigned TileNum = 0; 1952 if (BaseReg != AArch64::ZA) 1953 TileNum = N->getConstantOperandVal(2); 1954 1955 if (!SelectSMETile(BaseReg, TileNum)) 1956 return; 1957 1958 SDValue SliceBase, Base, Offset; 1959 if (BaseReg == AArch64::ZA) 1960 SliceBase = N->getOperand(2); 1961 else 1962 SliceBase = N->getOperand(3); 1963 1964 if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale)) 1965 return; 1966 1967 SDLoc DL(N); 1968 SDValue SubReg = CurDAG->getRegister(BaseReg, MVT::Other); 1969 SDValue Ops[] = {SubReg, Base, Offset, /*Chain*/ N->getOperand(0)}; 1970 SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops); 1971 1972 EVT VT = N->getValueType(0); 1973 for (unsigned I = 0; I < NumVecs; ++I) 1974 ReplaceUses(SDValue(N, I), 1975 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT, 1976 SDValue(Mov, 0))); 1977 // Copy chain 1978 unsigned ChainIdx = NumVecs; 1979 ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1)); 1980 CurDAG->RemoveDeadNode(N); 1981 } 1982 1983 void AArch64DAGToDAGISel::SelectUnaryMultiIntrinsic(SDNode *N, 1984 unsigned NumOutVecs, 1985 bool IsTupleInput, 1986 unsigned Opc) { 1987 SDLoc DL(N); 1988 EVT VT = N->getValueType(0); 1989 unsigned NumInVecs = N->getNumOperands() - 1; 1990 1991 SmallVector<SDValue, 6> Ops; 1992 if (IsTupleInput) { 1993 assert((NumInVecs == 2 || NumInVecs == 4) && 1994 "Don't know how to handle multi-register input!"); 1995 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, 1996 N->op_begin() + 1 + NumInVecs); 1997 Ops.push_back(createZMulTuple(Regs)); 1998 } else { 1999 // All intrinsic nodes have the ID as the first operand, hence the "1 + I". 2000 for (unsigned I = 0; I < NumInVecs; I++) 2001 Ops.push_back(N->getOperand(1 + I)); 2002 } 2003 2004 SDNode *Res = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops); 2005 SDValue SuperReg = SDValue(Res, 0); 2006 2007 for (unsigned I = 0; I < NumOutVecs; I++) 2008 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg( 2009 AArch64::zsub0 + I, DL, VT, SuperReg)); 2010 CurDAG->RemoveDeadNode(N); 2011 } 2012 2013 void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs, 2014 unsigned Opc) { 2015 SDLoc dl(N); 2016 EVT VT = N->getOperand(2)->getValueType(0); 2017 2018 // Form a REG_SEQUENCE to force register allocation. 2019 bool Is128Bit = VT.getSizeInBits() == 128; 2020 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); 2021 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs); 2022 2023 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)}; 2024 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops); 2025 2026 // Transfer memoperands. 2027 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2028 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 2029 2030 ReplaceNode(N, St); 2031 } 2032 2033 void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs, 2034 unsigned Scale, unsigned Opc_rr, 2035 unsigned Opc_ri) { 2036 SDLoc dl(N); 2037 2038 // Form a REG_SEQUENCE to force register allocation. 2039 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); 2040 SDValue RegSeq = createZTuple(Regs); 2041 2042 // Optimize addressing mode. 2043 unsigned Opc; 2044 SDValue Offset, Base; 2045 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore( 2046 N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3), 2047 CurDAG->getTargetConstant(0, dl, MVT::i64), Scale); 2048 2049 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), // predicate 2050 Base, // address 2051 Offset, // offset 2052 N->getOperand(0)}; // chain 2053 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops); 2054 2055 ReplaceNode(N, St); 2056 } 2057 2058 bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, 2059 SDValue &OffImm) { 2060 SDLoc dl(N); 2061 const DataLayout &DL = CurDAG->getDataLayout(); 2062 const TargetLowering *TLI = getTargetLowering(); 2063 2064 // Try to match it for the frame address 2065 if (auto FINode = dyn_cast<FrameIndexSDNode>(N)) { 2066 int FI = FINode->getIndex(); 2067 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 2068 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); 2069 return true; 2070 } 2071 2072 return false; 2073 } 2074 2075 void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs, 2076 unsigned Opc) { 2077 SDLoc dl(N); 2078 EVT VT = N->getOperand(2)->getValueType(0); 2079 const EVT ResTys[] = {MVT::i64, // Type of the write back register 2080 MVT::Other}; // Type for the Chain 2081 2082 // Form a REG_SEQUENCE to force register allocation. 2083 bool Is128Bit = VT.getSizeInBits() == 128; 2084 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); 2085 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs); 2086 2087 SDValue Ops[] = {RegSeq, 2088 N->getOperand(NumVecs + 1), // base register 2089 N->getOperand(NumVecs + 2), // Incremental 2090 N->getOperand(0)}; // Chain 2091 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2092 2093 ReplaceNode(N, St); 2094 } 2095 2096 namespace { 2097 /// WidenVector - Given a value in the V64 register class, produce the 2098 /// equivalent value in the V128 register class. 2099 class WidenVector { 2100 SelectionDAG &DAG; 2101 2102 public: 2103 WidenVector(SelectionDAG &DAG) : DAG(DAG) {} 2104 2105 SDValue operator()(SDValue V64Reg) { 2106 EVT VT = V64Reg.getValueType(); 2107 unsigned NarrowSize = VT.getVectorNumElements(); 2108 MVT EltTy = VT.getVectorElementType().getSimpleVT(); 2109 MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize); 2110 SDLoc DL(V64Reg); 2111 2112 SDValue Undef = 2113 SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0); 2114 return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg); 2115 } 2116 }; 2117 } // namespace 2118 2119 /// NarrowVector - Given a value in the V128 register class, produce the 2120 /// equivalent value in the V64 register class. 2121 static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) { 2122 EVT VT = V128Reg.getValueType(); 2123 unsigned WideSize = VT.getVectorNumElements(); 2124 MVT EltTy = VT.getVectorElementType().getSimpleVT(); 2125 MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2); 2126 2127 return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy, 2128 V128Reg); 2129 } 2130 2131 void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs, 2132 unsigned Opc) { 2133 SDLoc dl(N); 2134 EVT VT = N->getValueType(0); 2135 bool Narrow = VT.getSizeInBits() == 64; 2136 2137 // Form a REG_SEQUENCE to force register allocation. 2138 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); 2139 2140 if (Narrow) 2141 transform(Regs, Regs.begin(), 2142 WidenVector(*CurDAG)); 2143 2144 SDValue RegSeq = createQTuple(Regs); 2145 2146 const EVT ResTys[] = {MVT::Untyped, MVT::Other}; 2147 2148 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2); 2149 2150 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64), 2151 N->getOperand(NumVecs + 3), N->getOperand(0)}; 2152 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2153 SDValue SuperReg = SDValue(Ld, 0); 2154 2155 EVT WideVT = RegSeq.getOperand(1)->getValueType(0); 2156 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, 2157 AArch64::qsub2, AArch64::qsub3 }; 2158 for (unsigned i = 0; i < NumVecs; ++i) { 2159 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg); 2160 if (Narrow) 2161 NV = NarrowVector(NV, *CurDAG); 2162 ReplaceUses(SDValue(N, i), NV); 2163 } 2164 2165 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1)); 2166 CurDAG->RemoveDeadNode(N); 2167 } 2168 2169 void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs, 2170 unsigned Opc) { 2171 SDLoc dl(N); 2172 EVT VT = N->getValueType(0); 2173 bool Narrow = VT.getSizeInBits() == 64; 2174 2175 // Form a REG_SEQUENCE to force register allocation. 2176 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); 2177 2178 if (Narrow) 2179 transform(Regs, Regs.begin(), 2180 WidenVector(*CurDAG)); 2181 2182 SDValue RegSeq = createQTuple(Regs); 2183 2184 const EVT ResTys[] = {MVT::i64, // Type of the write back register 2185 RegSeq->getValueType(0), MVT::Other}; 2186 2187 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1); 2188 2189 SDValue Ops[] = {RegSeq, 2190 CurDAG->getTargetConstant(LaneNo, dl, 2191 MVT::i64), // Lane Number 2192 N->getOperand(NumVecs + 2), // Base register 2193 N->getOperand(NumVecs + 3), // Incremental 2194 N->getOperand(0)}; 2195 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2196 2197 // Update uses of the write back register 2198 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0)); 2199 2200 // Update uses of the vector list 2201 SDValue SuperReg = SDValue(Ld, 1); 2202 if (NumVecs == 1) { 2203 ReplaceUses(SDValue(N, 0), 2204 Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg); 2205 } else { 2206 EVT WideVT = RegSeq.getOperand(1)->getValueType(0); 2207 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, 2208 AArch64::qsub2, AArch64::qsub3 }; 2209 for (unsigned i = 0; i < NumVecs; ++i) { 2210 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, 2211 SuperReg); 2212 if (Narrow) 2213 NV = NarrowVector(NV, *CurDAG); 2214 ReplaceUses(SDValue(N, i), NV); 2215 } 2216 } 2217 2218 // Update the Chain 2219 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2)); 2220 CurDAG->RemoveDeadNode(N); 2221 } 2222 2223 void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs, 2224 unsigned Opc) { 2225 SDLoc dl(N); 2226 EVT VT = N->getOperand(2)->getValueType(0); 2227 bool Narrow = VT.getSizeInBits() == 64; 2228 2229 // Form a REG_SEQUENCE to force register allocation. 2230 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); 2231 2232 if (Narrow) 2233 transform(Regs, Regs.begin(), 2234 WidenVector(*CurDAG)); 2235 2236 SDValue RegSeq = createQTuple(Regs); 2237 2238 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2); 2239 2240 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64), 2241 N->getOperand(NumVecs + 3), N->getOperand(0)}; 2242 SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops); 2243 2244 // Transfer memoperands. 2245 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2246 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 2247 2248 ReplaceNode(N, St); 2249 } 2250 2251 void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs, 2252 unsigned Opc) { 2253 SDLoc dl(N); 2254 EVT VT = N->getOperand(2)->getValueType(0); 2255 bool Narrow = VT.getSizeInBits() == 64; 2256 2257 // Form a REG_SEQUENCE to force register allocation. 2258 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); 2259 2260 if (Narrow) 2261 transform(Regs, Regs.begin(), 2262 WidenVector(*CurDAG)); 2263 2264 SDValue RegSeq = createQTuple(Regs); 2265 2266 const EVT ResTys[] = {MVT::i64, // Type of the write back register 2267 MVT::Other}; 2268 2269 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1); 2270 2271 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64), 2272 N->getOperand(NumVecs + 2), // Base Register 2273 N->getOperand(NumVecs + 3), // Incremental 2274 N->getOperand(0)}; 2275 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2276 2277 // Transfer memoperands. 2278 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2279 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 2280 2281 ReplaceNode(N, St); 2282 } 2283 2284 static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, 2285 unsigned &Opc, SDValue &Opd0, 2286 unsigned &LSB, unsigned &MSB, 2287 unsigned NumberOfIgnoredLowBits, 2288 bool BiggerPattern) { 2289 assert(N->getOpcode() == ISD::AND && 2290 "N must be a AND operation to call this function"); 2291 2292 EVT VT = N->getValueType(0); 2293 2294 // Here we can test the type of VT and return false when the type does not 2295 // match, but since it is done prior to that call in the current context 2296 // we turned that into an assert to avoid redundant code. 2297 assert((VT == MVT::i32 || VT == MVT::i64) && 2298 "Type checking must have been done before calling this function"); 2299 2300 // FIXME: simplify-demanded-bits in DAGCombine will probably have 2301 // changed the AND node to a 32-bit mask operation. We'll have to 2302 // undo that as part of the transform here if we want to catch all 2303 // the opportunities. 2304 // Currently the NumberOfIgnoredLowBits argument helps to recover 2305 // from these situations when matching bigger pattern (bitfield insert). 2306 2307 // For unsigned extracts, check for a shift right and mask 2308 uint64_t AndImm = 0; 2309 if (!isOpcWithIntImmediate(N, ISD::AND, AndImm)) 2310 return false; 2311 2312 const SDNode *Op0 = N->getOperand(0).getNode(); 2313 2314 // Because of simplify-demanded-bits in DAGCombine, the mask may have been 2315 // simplified. Try to undo that 2316 AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits); 2317 2318 // The immediate is a mask of the low bits iff imm & (imm+1) == 0 2319 if (AndImm & (AndImm + 1)) 2320 return false; 2321 2322 bool ClampMSB = false; 2323 uint64_t SrlImm = 0; 2324 // Handle the SRL + ANY_EXTEND case. 2325 if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND && 2326 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) { 2327 // Extend the incoming operand of the SRL to 64-bit. 2328 Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0)); 2329 // Make sure to clamp the MSB so that we preserve the semantics of the 2330 // original operations. 2331 ClampMSB = true; 2332 } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE && 2333 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, 2334 SrlImm)) { 2335 // If the shift result was truncated, we can still combine them. 2336 Opd0 = Op0->getOperand(0).getOperand(0); 2337 2338 // Use the type of SRL node. 2339 VT = Opd0->getValueType(0); 2340 } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) { 2341 Opd0 = Op0->getOperand(0); 2342 ClampMSB = (VT == MVT::i32); 2343 } else if (BiggerPattern) { 2344 // Let's pretend a 0 shift right has been performed. 2345 // The resulting code will be at least as good as the original one 2346 // plus it may expose more opportunities for bitfield insert pattern. 2347 // FIXME: Currently we limit this to the bigger pattern, because 2348 // some optimizations expect AND and not UBFM. 2349 Opd0 = N->getOperand(0); 2350 } else 2351 return false; 2352 2353 // Bail out on large immediates. This happens when no proper 2354 // combining/constant folding was performed. 2355 if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) { 2356 LLVM_DEBUG( 2357 (dbgs() << N 2358 << ": Found large shift immediate, this should not happen\n")); 2359 return false; 2360 } 2361 2362 LSB = SrlImm; 2363 MSB = SrlImm + 2364 (VT == MVT::i32 ? llvm::countr_one<uint32_t>(AndImm) 2365 : llvm::countr_one<uint64_t>(AndImm)) - 2366 1; 2367 if (ClampMSB) 2368 // Since we're moving the extend before the right shift operation, we need 2369 // to clamp the MSB to make sure we don't shift in undefined bits instead of 2370 // the zeros which would get shifted in with the original right shift 2371 // operation. 2372 MSB = MSB > 31 ? 31 : MSB; 2373 2374 Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri; 2375 return true; 2376 } 2377 2378 static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc, 2379 SDValue &Opd0, unsigned &Immr, 2380 unsigned &Imms) { 2381 assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG); 2382 2383 EVT VT = N->getValueType(0); 2384 unsigned BitWidth = VT.getSizeInBits(); 2385 assert((VT == MVT::i32 || VT == MVT::i64) && 2386 "Type checking must have been done before calling this function"); 2387 2388 SDValue Op = N->getOperand(0); 2389 if (Op->getOpcode() == ISD::TRUNCATE) { 2390 Op = Op->getOperand(0); 2391 VT = Op->getValueType(0); 2392 BitWidth = VT.getSizeInBits(); 2393 } 2394 2395 uint64_t ShiftImm; 2396 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) && 2397 !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm)) 2398 return false; 2399 2400 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits(); 2401 if (ShiftImm + Width > BitWidth) 2402 return false; 2403 2404 Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri; 2405 Opd0 = Op.getOperand(0); 2406 Immr = ShiftImm; 2407 Imms = ShiftImm + Width - 1; 2408 return true; 2409 } 2410 2411 static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc, 2412 SDValue &Opd0, unsigned &LSB, 2413 unsigned &MSB) { 2414 // We are looking for the following pattern which basically extracts several 2415 // continuous bits from the source value and places it from the LSB of the 2416 // destination value, all other bits of the destination value or set to zero: 2417 // 2418 // Value2 = AND Value, MaskImm 2419 // SRL Value2, ShiftImm 2420 // 2421 // with MaskImm >> ShiftImm to search for the bit width. 2422 // 2423 // This gets selected into a single UBFM: 2424 // 2425 // UBFM Value, ShiftImm, Log2_64(MaskImm) 2426 // 2427 2428 if (N->getOpcode() != ISD::SRL) 2429 return false; 2430 2431 uint64_t AndMask = 0; 2432 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask)) 2433 return false; 2434 2435 Opd0 = N->getOperand(0).getOperand(0); 2436 2437 uint64_t SrlImm = 0; 2438 if (!isIntImmediate(N->getOperand(1), SrlImm)) 2439 return false; 2440 2441 // Check whether we really have several bits extract here. 2442 if (!isMask_64(AndMask >> SrlImm)) 2443 return false; 2444 2445 Opc = N->getValueType(0) == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri; 2446 LSB = SrlImm; 2447 MSB = llvm::Log2_64(AndMask); 2448 return true; 2449 } 2450 2451 static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, 2452 unsigned &Immr, unsigned &Imms, 2453 bool BiggerPattern) { 2454 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) && 2455 "N must be a SHR/SRA operation to call this function"); 2456 2457 EVT VT = N->getValueType(0); 2458 2459 // Here we can test the type of VT and return false when the type does not 2460 // match, but since it is done prior to that call in the current context 2461 // we turned that into an assert to avoid redundant code. 2462 assert((VT == MVT::i32 || VT == MVT::i64) && 2463 "Type checking must have been done before calling this function"); 2464 2465 // Check for AND + SRL doing several bits extract. 2466 if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms)) 2467 return true; 2468 2469 // We're looking for a shift of a shift. 2470 uint64_t ShlImm = 0; 2471 uint64_t TruncBits = 0; 2472 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) { 2473 Opd0 = N->getOperand(0).getOperand(0); 2474 } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL && 2475 N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) { 2476 // We are looking for a shift of truncate. Truncate from i64 to i32 could 2477 // be considered as setting high 32 bits as zero. Our strategy here is to 2478 // always generate 64bit UBFM. This consistency will help the CSE pass 2479 // later find more redundancy. 2480 Opd0 = N->getOperand(0).getOperand(0); 2481 TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits(); 2482 VT = Opd0.getValueType(); 2483 assert(VT == MVT::i64 && "the promoted type should be i64"); 2484 } else if (BiggerPattern) { 2485 // Let's pretend a 0 shift left has been performed. 2486 // FIXME: Currently we limit this to the bigger pattern case, 2487 // because some optimizations expect AND and not UBFM 2488 Opd0 = N->getOperand(0); 2489 } else 2490 return false; 2491 2492 // Missing combines/constant folding may have left us with strange 2493 // constants. 2494 if (ShlImm >= VT.getSizeInBits()) { 2495 LLVM_DEBUG( 2496 (dbgs() << N 2497 << ": Found large shift immediate, this should not happen\n")); 2498 return false; 2499 } 2500 2501 uint64_t SrlImm = 0; 2502 if (!isIntImmediate(N->getOperand(1), SrlImm)) 2503 return false; 2504 2505 assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() && 2506 "bad amount in shift node!"); 2507 int immr = SrlImm - ShlImm; 2508 Immr = immr < 0 ? immr + VT.getSizeInBits() : immr; 2509 Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1; 2510 // SRA requires a signed extraction 2511 if (VT == MVT::i32) 2512 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri; 2513 else 2514 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri; 2515 return true; 2516 } 2517 2518 bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) { 2519 assert(N->getOpcode() == ISD::SIGN_EXTEND); 2520 2521 EVT VT = N->getValueType(0); 2522 EVT NarrowVT = N->getOperand(0)->getValueType(0); 2523 if (VT != MVT::i64 || NarrowVT != MVT::i32) 2524 return false; 2525 2526 uint64_t ShiftImm; 2527 SDValue Op = N->getOperand(0); 2528 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm)) 2529 return false; 2530 2531 SDLoc dl(N); 2532 // Extend the incoming operand of the shift to 64-bits. 2533 SDValue Opd0 = Widen(CurDAG, Op.getOperand(0)); 2534 unsigned Immr = ShiftImm; 2535 unsigned Imms = NarrowVT.getSizeInBits() - 1; 2536 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT), 2537 CurDAG->getTargetConstant(Imms, dl, VT)}; 2538 CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops); 2539 return true; 2540 } 2541 2542 static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, 2543 SDValue &Opd0, unsigned &Immr, unsigned &Imms, 2544 unsigned NumberOfIgnoredLowBits = 0, 2545 bool BiggerPattern = false) { 2546 if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64) 2547 return false; 2548 2549 switch (N->getOpcode()) { 2550 default: 2551 if (!N->isMachineOpcode()) 2552 return false; 2553 break; 2554 case ISD::AND: 2555 return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms, 2556 NumberOfIgnoredLowBits, BiggerPattern); 2557 case ISD::SRL: 2558 case ISD::SRA: 2559 return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern); 2560 2561 case ISD::SIGN_EXTEND_INREG: 2562 return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms); 2563 } 2564 2565 unsigned NOpc = N->getMachineOpcode(); 2566 switch (NOpc) { 2567 default: 2568 return false; 2569 case AArch64::SBFMWri: 2570 case AArch64::UBFMWri: 2571 case AArch64::SBFMXri: 2572 case AArch64::UBFMXri: 2573 Opc = NOpc; 2574 Opd0 = N->getOperand(0); 2575 Immr = N->getConstantOperandVal(1); 2576 Imms = N->getConstantOperandVal(2); 2577 return true; 2578 } 2579 // Unreachable 2580 return false; 2581 } 2582 2583 bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) { 2584 unsigned Opc, Immr, Imms; 2585 SDValue Opd0; 2586 if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms)) 2587 return false; 2588 2589 EVT VT = N->getValueType(0); 2590 SDLoc dl(N); 2591 2592 // If the bit extract operation is 64bit but the original type is 32bit, we 2593 // need to add one EXTRACT_SUBREG. 2594 if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) { 2595 SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64), 2596 CurDAG->getTargetConstant(Imms, dl, MVT::i64)}; 2597 2598 SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64); 2599 SDValue Inner = CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl, 2600 MVT::i32, SDValue(BFM, 0)); 2601 ReplaceNode(N, Inner.getNode()); 2602 return true; 2603 } 2604 2605 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT), 2606 CurDAG->getTargetConstant(Imms, dl, VT)}; 2607 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 2608 return true; 2609 } 2610 2611 /// Does DstMask form a complementary pair with the mask provided by 2612 /// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking, 2613 /// this asks whether DstMask zeroes precisely those bits that will be set by 2614 /// the other half. 2615 static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted, 2616 unsigned NumberOfIgnoredHighBits, EVT VT) { 2617 assert((VT == MVT::i32 || VT == MVT::i64) && 2618 "i32 or i64 mask type expected!"); 2619 unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits; 2620 2621 APInt SignificantDstMask = APInt(BitWidth, DstMask); 2622 APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth); 2623 2624 return (SignificantDstMask & SignificantBitsToBeInserted) == 0 && 2625 (SignificantDstMask | SignificantBitsToBeInserted).isAllOnes(); 2626 } 2627 2628 // Look for bits that will be useful for later uses. 2629 // A bit is consider useless as soon as it is dropped and never used 2630 // before it as been dropped. 2631 // E.g., looking for useful bit of x 2632 // 1. y = x & 0x7 2633 // 2. z = y >> 2 2634 // After #1, x useful bits are 0x7, then the useful bits of x, live through 2635 // y. 2636 // After #2, the useful bits of x are 0x4. 2637 // However, if x is used on an unpredicatable instruction, then all its bits 2638 // are useful. 2639 // E.g. 2640 // 1. y = x & 0x7 2641 // 2. z = y >> 2 2642 // 3. str x, [@x] 2643 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0); 2644 2645 static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits, 2646 unsigned Depth) { 2647 uint64_t Imm = 2648 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue(); 2649 Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth()); 2650 UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm); 2651 getUsefulBits(Op, UsefulBits, Depth + 1); 2652 } 2653 2654 static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, 2655 uint64_t Imm, uint64_t MSB, 2656 unsigned Depth) { 2657 // inherit the bitwidth value 2658 APInt OpUsefulBits(UsefulBits); 2659 OpUsefulBits = 1; 2660 2661 if (MSB >= Imm) { 2662 OpUsefulBits <<= MSB - Imm + 1; 2663 --OpUsefulBits; 2664 // The interesting part will be in the lower part of the result 2665 getUsefulBits(Op, OpUsefulBits, Depth + 1); 2666 // The interesting part was starting at Imm in the argument 2667 OpUsefulBits <<= Imm; 2668 } else { 2669 OpUsefulBits <<= MSB + 1; 2670 --OpUsefulBits; 2671 // The interesting part will be shifted in the result 2672 OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm; 2673 getUsefulBits(Op, OpUsefulBits, Depth + 1); 2674 // The interesting part was at zero in the argument 2675 OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm); 2676 } 2677 2678 UsefulBits &= OpUsefulBits; 2679 } 2680 2681 static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits, 2682 unsigned Depth) { 2683 uint64_t Imm = 2684 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue(); 2685 uint64_t MSB = 2686 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue(); 2687 2688 getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth); 2689 } 2690 2691 static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, 2692 unsigned Depth) { 2693 uint64_t ShiftTypeAndValue = 2694 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue(); 2695 APInt Mask(UsefulBits); 2696 Mask.clearAllBits(); 2697 Mask.flipAllBits(); 2698 2699 if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) { 2700 // Shift Left 2701 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue); 2702 Mask <<= ShiftAmt; 2703 getUsefulBits(Op, Mask, Depth + 1); 2704 Mask.lshrInPlace(ShiftAmt); 2705 } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) { 2706 // Shift Right 2707 // We do not handle AArch64_AM::ASR, because the sign will change the 2708 // number of useful bits 2709 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue); 2710 Mask.lshrInPlace(ShiftAmt); 2711 getUsefulBits(Op, Mask, Depth + 1); 2712 Mask <<= ShiftAmt; 2713 } else 2714 return; 2715 2716 UsefulBits &= Mask; 2717 } 2718 2719 static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, 2720 unsigned Depth) { 2721 uint64_t Imm = 2722 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue(); 2723 uint64_t MSB = 2724 cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue(); 2725 2726 APInt OpUsefulBits(UsefulBits); 2727 OpUsefulBits = 1; 2728 2729 APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0); 2730 ResultUsefulBits.flipAllBits(); 2731 APInt Mask(UsefulBits.getBitWidth(), 0); 2732 2733 getUsefulBits(Op, ResultUsefulBits, Depth + 1); 2734 2735 if (MSB >= Imm) { 2736 // The instruction is a BFXIL. 2737 uint64_t Width = MSB - Imm + 1; 2738 uint64_t LSB = Imm; 2739 2740 OpUsefulBits <<= Width; 2741 --OpUsefulBits; 2742 2743 if (Op.getOperand(1) == Orig) { 2744 // Copy the low bits from the result to bits starting from LSB. 2745 Mask = ResultUsefulBits & OpUsefulBits; 2746 Mask <<= LSB; 2747 } 2748 2749 if (Op.getOperand(0) == Orig) 2750 // Bits starting from LSB in the input contribute to the result. 2751 Mask |= (ResultUsefulBits & ~OpUsefulBits); 2752 } else { 2753 // The instruction is a BFI. 2754 uint64_t Width = MSB + 1; 2755 uint64_t LSB = UsefulBits.getBitWidth() - Imm; 2756 2757 OpUsefulBits <<= Width; 2758 --OpUsefulBits; 2759 OpUsefulBits <<= LSB; 2760 2761 if (Op.getOperand(1) == Orig) { 2762 // Copy the bits from the result to the zero bits. 2763 Mask = ResultUsefulBits & OpUsefulBits; 2764 Mask.lshrInPlace(LSB); 2765 } 2766 2767 if (Op.getOperand(0) == Orig) 2768 Mask |= (ResultUsefulBits & ~OpUsefulBits); 2769 } 2770 2771 UsefulBits &= Mask; 2772 } 2773 2774 static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, 2775 SDValue Orig, unsigned Depth) { 2776 2777 // Users of this node should have already been instruction selected 2778 // FIXME: Can we turn that into an assert? 2779 if (!UserNode->isMachineOpcode()) 2780 return; 2781 2782 switch (UserNode->getMachineOpcode()) { 2783 default: 2784 return; 2785 case AArch64::ANDSWri: 2786 case AArch64::ANDSXri: 2787 case AArch64::ANDWri: 2788 case AArch64::ANDXri: 2789 // We increment Depth only when we call the getUsefulBits 2790 return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits, 2791 Depth); 2792 case AArch64::UBFMWri: 2793 case AArch64::UBFMXri: 2794 return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth); 2795 2796 case AArch64::ORRWrs: 2797 case AArch64::ORRXrs: 2798 if (UserNode->getOperand(0) != Orig && UserNode->getOperand(1) == Orig) 2799 getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits, 2800 Depth); 2801 return; 2802 case AArch64::BFMWri: 2803 case AArch64::BFMXri: 2804 return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth); 2805 2806 case AArch64::STRBBui: 2807 case AArch64::STURBBi: 2808 if (UserNode->getOperand(0) != Orig) 2809 return; 2810 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff); 2811 return; 2812 2813 case AArch64::STRHHui: 2814 case AArch64::STURHHi: 2815 if (UserNode->getOperand(0) != Orig) 2816 return; 2817 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff); 2818 return; 2819 } 2820 } 2821 2822 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) { 2823 if (Depth >= SelectionDAG::MaxRecursionDepth) 2824 return; 2825 // Initialize UsefulBits 2826 if (!Depth) { 2827 unsigned Bitwidth = Op.getScalarValueSizeInBits(); 2828 // At the beginning, assume every produced bits is useful 2829 UsefulBits = APInt(Bitwidth, 0); 2830 UsefulBits.flipAllBits(); 2831 } 2832 APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0); 2833 2834 for (SDNode *Node : Op.getNode()->uses()) { 2835 // A use cannot produce useful bits 2836 APInt UsefulBitsForUse = APInt(UsefulBits); 2837 getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth); 2838 UsersUsefulBits |= UsefulBitsForUse; 2839 } 2840 // UsefulBits contains the produced bits that are meaningful for the 2841 // current definition, thus a user cannot make a bit meaningful at 2842 // this point 2843 UsefulBits &= UsersUsefulBits; 2844 } 2845 2846 /// Create a machine node performing a notional SHL of Op by ShlAmount. If 2847 /// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is 2848 /// 0, return Op unchanged. 2849 static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) { 2850 if (ShlAmount == 0) 2851 return Op; 2852 2853 EVT VT = Op.getValueType(); 2854 SDLoc dl(Op); 2855 unsigned BitWidth = VT.getSizeInBits(); 2856 unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri; 2857 2858 SDNode *ShiftNode; 2859 if (ShlAmount > 0) { 2860 // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt 2861 ShiftNode = CurDAG->getMachineNode( 2862 UBFMOpc, dl, VT, Op, 2863 CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT), 2864 CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT)); 2865 } else { 2866 // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1 2867 assert(ShlAmount < 0 && "expected right shift"); 2868 int ShrAmount = -ShlAmount; 2869 ShiftNode = CurDAG->getMachineNode( 2870 UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT), 2871 CurDAG->getTargetConstant(BitWidth - 1, dl, VT)); 2872 } 2873 2874 return SDValue(ShiftNode, 0); 2875 } 2876 2877 // For bit-field-positioning pattern "(and (shl VAL, N), ShiftedMask)". 2878 static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op, 2879 bool BiggerPattern, 2880 const uint64_t NonZeroBits, 2881 SDValue &Src, int &DstLSB, 2882 int &Width); 2883 2884 // For bit-field-positioning pattern "shl VAL, N)". 2885 static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op, 2886 bool BiggerPattern, 2887 const uint64_t NonZeroBits, 2888 SDValue &Src, int &DstLSB, 2889 int &Width); 2890 2891 /// Does this tree qualify as an attempt to move a bitfield into position, 2892 /// essentially "(and (shl VAL, N), Mask)" or (shl VAL, N). 2893 static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, 2894 bool BiggerPattern, SDValue &Src, 2895 int &DstLSB, int &Width) { 2896 EVT VT = Op.getValueType(); 2897 unsigned BitWidth = VT.getSizeInBits(); 2898 (void)BitWidth; 2899 assert(BitWidth == 32 || BitWidth == 64); 2900 2901 KnownBits Known = CurDAG->computeKnownBits(Op); 2902 2903 // Non-zero in the sense that they're not provably zero, which is the key 2904 // point if we want to use this value 2905 const uint64_t NonZeroBits = (~Known.Zero).getZExtValue(); 2906 if (!isShiftedMask_64(NonZeroBits)) 2907 return false; 2908 2909 switch (Op.getOpcode()) { 2910 default: 2911 break; 2912 case ISD::AND: 2913 return isBitfieldPositioningOpFromAnd(CurDAG, Op, BiggerPattern, 2914 NonZeroBits, Src, DstLSB, Width); 2915 case ISD::SHL: 2916 return isBitfieldPositioningOpFromShl(CurDAG, Op, BiggerPattern, 2917 NonZeroBits, Src, DstLSB, Width); 2918 } 2919 2920 return false; 2921 } 2922 2923 static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op, 2924 bool BiggerPattern, 2925 const uint64_t NonZeroBits, 2926 SDValue &Src, int &DstLSB, 2927 int &Width) { 2928 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed"); 2929 2930 EVT VT = Op.getValueType(); 2931 assert((VT == MVT::i32 || VT == MVT::i64) && 2932 "Caller guarantees VT is one of i32 or i64"); 2933 (void)VT; 2934 2935 uint64_t AndImm; 2936 if (!isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm)) 2937 return false; 2938 2939 // If (~AndImm & NonZeroBits) is not zero at POS, we know that 2940 // 1) (AndImm & (1 << POS) == 0) 2941 // 2) the result of AND is not zero at POS bit (according to NonZeroBits) 2942 // 2943 // 1) and 2) don't agree so something must be wrong (e.g., in 2944 // 'SelectionDAG::computeKnownBits') 2945 assert((~AndImm & NonZeroBits) == 0 && 2946 "Something must be wrong (e.g., in SelectionDAG::computeKnownBits)"); 2947 2948 SDValue AndOp0 = Op.getOperand(0); 2949 2950 uint64_t ShlImm; 2951 SDValue ShlOp0; 2952 if (isOpcWithIntImmediate(AndOp0.getNode(), ISD::SHL, ShlImm)) { 2953 // For pattern "and(shl(val, N), shifted-mask)", 'ShlOp0' is set to 'val'. 2954 ShlOp0 = AndOp0.getOperand(0); 2955 } else if (VT == MVT::i64 && AndOp0.getOpcode() == ISD::ANY_EXTEND && 2956 isOpcWithIntImmediate(AndOp0.getOperand(0).getNode(), ISD::SHL, 2957 ShlImm)) { 2958 // For pattern "and(any_extend(shl(val, N)), shifted-mask)" 2959 2960 // ShlVal == shl(val, N), which is a left shift on a smaller type. 2961 SDValue ShlVal = AndOp0.getOperand(0); 2962 2963 // Since this is after type legalization and ShlVal is extended to MVT::i64, 2964 // expect VT to be MVT::i32. 2965 assert((ShlVal.getValueType() == MVT::i32) && "Expect VT to be MVT::i32."); 2966 2967 // Widens 'val' to MVT::i64 as the source of bit field positioning. 2968 ShlOp0 = Widen(CurDAG, ShlVal.getOperand(0)); 2969 } else 2970 return false; 2971 2972 // For !BiggerPattern, bail out if the AndOp0 has more than one use, since 2973 // then we'll end up generating AndOp0+UBFIZ instead of just keeping 2974 // AndOp0+AND. 2975 if (!BiggerPattern && !AndOp0.hasOneUse()) 2976 return false; 2977 2978 DstLSB = llvm::countr_zero(NonZeroBits); 2979 Width = llvm::countr_one(NonZeroBits >> DstLSB); 2980 2981 // Bail out on large Width. This happens when no proper combining / constant 2982 // folding was performed. 2983 if (Width >= (int)VT.getSizeInBits()) { 2984 // If VT is i64, Width > 64 is insensible since NonZeroBits is uint64_t, and 2985 // Width == 64 indicates a missed dag-combine from "(and val, AllOnes)" to 2986 // "val". 2987 // If VT is i32, what Width >= 32 means: 2988 // - For "(and (any_extend(shl val, N)), shifted-mask)", the`and` Op 2989 // demands at least 'Width' bits (after dag-combiner). This together with 2990 // `any_extend` Op (undefined higher bits) indicates missed combination 2991 // when lowering the 'and' IR instruction to an machine IR instruction. 2992 LLVM_DEBUG( 2993 dbgs() 2994 << "Found large Width in bit-field-positioning -- this indicates no " 2995 "proper combining / constant folding was performed\n"); 2996 return false; 2997 } 2998 2999 // BFI encompasses sufficiently many nodes that it's worth inserting an extra 3000 // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL 3001 // amount. BiggerPattern is true when this pattern is being matched for BFI, 3002 // BiggerPattern is false when this pattern is being matched for UBFIZ, in 3003 // which case it is not profitable to insert an extra shift. 3004 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern) 3005 return false; 3006 3007 Src = getLeftShift(CurDAG, ShlOp0, ShlImm - DstLSB); 3008 return true; 3009 } 3010 3011 // For node (shl (and val, mask), N)), returns true if the node is equivalent to 3012 // UBFIZ. 3013 static bool isSeveralBitsPositioningOpFromShl(const uint64_t ShlImm, SDValue Op, 3014 SDValue &Src, int &DstLSB, 3015 int &Width) { 3016 // Caller should have verified that N is a left shift with constant shift 3017 // amount; asserts that. 3018 assert(Op.getOpcode() == ISD::SHL && 3019 "Op.getNode() should be a SHL node to call this function"); 3020 assert(isIntImmediateEq(Op.getOperand(1), ShlImm) && 3021 "Op.getNode() should shift ShlImm to call this function"); 3022 3023 uint64_t AndImm = 0; 3024 SDValue Op0 = Op.getOperand(0); 3025 if (!isOpcWithIntImmediate(Op0.getNode(), ISD::AND, AndImm)) 3026 return false; 3027 3028 const uint64_t ShiftedAndImm = ((AndImm << ShlImm) >> ShlImm); 3029 if (isMask_64(ShiftedAndImm)) { 3030 // AndImm is a superset of (AllOnes >> ShlImm); in other words, AndImm 3031 // should end with Mask, and could be prefixed with random bits if those 3032 // bits are shifted out. 3033 // 3034 // For example, xyz11111 (with {x,y,z} being 0 or 1) is fine if ShlImm >= 3; 3035 // the AND result corresponding to those bits are shifted out, so it's fine 3036 // to not extract them. 3037 Width = llvm::countr_one(ShiftedAndImm); 3038 DstLSB = ShlImm; 3039 Src = Op0.getOperand(0); 3040 return true; 3041 } 3042 return false; 3043 } 3044 3045 static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op, 3046 bool BiggerPattern, 3047 const uint64_t NonZeroBits, 3048 SDValue &Src, int &DstLSB, 3049 int &Width) { 3050 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed"); 3051 3052 EVT VT = Op.getValueType(); 3053 assert((VT == MVT::i32 || VT == MVT::i64) && 3054 "Caller guarantees that type is i32 or i64"); 3055 (void)VT; 3056 3057 uint64_t ShlImm; 3058 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm)) 3059 return false; 3060 3061 if (!BiggerPattern && !Op.hasOneUse()) 3062 return false; 3063 3064 if (isSeveralBitsPositioningOpFromShl(ShlImm, Op, Src, DstLSB, Width)) 3065 return true; 3066 3067 DstLSB = llvm::countr_zero(NonZeroBits); 3068 Width = llvm::countr_one(NonZeroBits >> DstLSB); 3069 3070 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern) 3071 return false; 3072 3073 Src = getLeftShift(CurDAG, Op.getOperand(0), ShlImm - DstLSB); 3074 return true; 3075 } 3076 3077 static bool isShiftedMask(uint64_t Mask, EVT VT) { 3078 assert(VT == MVT::i32 || VT == MVT::i64); 3079 if (VT == MVT::i32) 3080 return isShiftedMask_32(Mask); 3081 return isShiftedMask_64(Mask); 3082 } 3083 3084 // Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being 3085 // inserted only sets known zero bits. 3086 static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG) { 3087 assert(N->getOpcode() == ISD::OR && "Expect a OR operation"); 3088 3089 EVT VT = N->getValueType(0); 3090 if (VT != MVT::i32 && VT != MVT::i64) 3091 return false; 3092 3093 unsigned BitWidth = VT.getSizeInBits(); 3094 3095 uint64_t OrImm; 3096 if (!isOpcWithIntImmediate(N, ISD::OR, OrImm)) 3097 return false; 3098 3099 // Skip this transformation if the ORR immediate can be encoded in the ORR. 3100 // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely 3101 // performance neutral. 3102 if (AArch64_AM::isLogicalImmediate(OrImm, BitWidth)) 3103 return false; 3104 3105 uint64_t MaskImm; 3106 SDValue And = N->getOperand(0); 3107 // Must be a single use AND with an immediate operand. 3108 if (!And.hasOneUse() || 3109 !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm)) 3110 return false; 3111 3112 // Compute the Known Zero for the AND as this allows us to catch more general 3113 // cases than just looking for AND with imm. 3114 KnownBits Known = CurDAG->computeKnownBits(And); 3115 3116 // Non-zero in the sense that they're not provably zero, which is the key 3117 // point if we want to use this value. 3118 uint64_t NotKnownZero = (~Known.Zero).getZExtValue(); 3119 3120 // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00). 3121 if (!isShiftedMask(Known.Zero.getZExtValue(), VT)) 3122 return false; 3123 3124 // The bits being inserted must only set those bits that are known to be zero. 3125 if ((OrImm & NotKnownZero) != 0) { 3126 // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't 3127 // currently handle this case. 3128 return false; 3129 } 3130 3131 // BFI/BFXIL dst, src, #lsb, #width. 3132 int LSB = llvm::countr_one(NotKnownZero); 3133 int Width = BitWidth - APInt(BitWidth, NotKnownZero).popcount(); 3134 3135 // BFI/BFXIL is an alias of BFM, so translate to BFM operands. 3136 unsigned ImmR = (BitWidth - LSB) % BitWidth; 3137 unsigned ImmS = Width - 1; 3138 3139 // If we're creating a BFI instruction avoid cases where we need more 3140 // instructions to materialize the BFI constant as compared to the original 3141 // ORR. A BFXIL will use the same constant as the original ORR, so the code 3142 // should be no worse in this case. 3143 bool IsBFI = LSB != 0; 3144 uint64_t BFIImm = OrImm >> LSB; 3145 if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) { 3146 // We have a BFI instruction and we know the constant can't be materialized 3147 // with a ORR-immediate with the zero register. 3148 unsigned OrChunks = 0, BFIChunks = 0; 3149 for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) { 3150 if (((OrImm >> Shift) & 0xFFFF) != 0) 3151 ++OrChunks; 3152 if (((BFIImm >> Shift) & 0xFFFF) != 0) 3153 ++BFIChunks; 3154 } 3155 if (BFIChunks > OrChunks) 3156 return false; 3157 } 3158 3159 // Materialize the constant to be inserted. 3160 SDLoc DL(N); 3161 unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm; 3162 SDNode *MOVI = CurDAG->getMachineNode( 3163 MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT)); 3164 3165 // Create the BFI/BFXIL instruction. 3166 SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0), 3167 CurDAG->getTargetConstant(ImmR, DL, VT), 3168 CurDAG->getTargetConstant(ImmS, DL, VT)}; 3169 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri; 3170 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 3171 return true; 3172 } 3173 3174 static bool isWorthFoldingIntoOrrWithShift(SDValue Dst, SelectionDAG *CurDAG, 3175 SDValue &ShiftedOperand, 3176 uint64_t &EncodedShiftImm) { 3177 // Avoid folding Dst into ORR-with-shift if Dst has other uses than ORR. 3178 if (!Dst.hasOneUse()) 3179 return false; 3180 3181 EVT VT = Dst.getValueType(); 3182 assert((VT == MVT::i32 || VT == MVT::i64) && 3183 "Caller should guarantee that VT is one of i32 or i64"); 3184 const unsigned SizeInBits = VT.getSizeInBits(); 3185 3186 SDLoc DL(Dst.getNode()); 3187 uint64_t AndImm, ShlImm; 3188 if (isOpcWithIntImmediate(Dst.getNode(), ISD::AND, AndImm) && 3189 isShiftedMask_64(AndImm)) { 3190 // Avoid transforming 'DstOp0' if it has other uses than the AND node. 3191 SDValue DstOp0 = Dst.getOperand(0); 3192 if (!DstOp0.hasOneUse()) 3193 return false; 3194 3195 // An example to illustrate the transformation 3196 // From: 3197 // lsr x8, x1, #1 3198 // and x8, x8, #0x3f80 3199 // bfxil x8, x1, #0, #7 3200 // To: 3201 // and x8, x23, #0x7f 3202 // ubfx x9, x23, #8, #7 3203 // orr x23, x8, x9, lsl #7 3204 // 3205 // The number of instructions remains the same, but ORR is faster than BFXIL 3206 // on many AArch64 processors (or as good as BFXIL if not faster). Besides, 3207 // the dependency chain is improved after the transformation. 3208 uint64_t SrlImm; 3209 if (isOpcWithIntImmediate(DstOp0.getNode(), ISD::SRL, SrlImm)) { 3210 uint64_t NumTrailingZeroInShiftedMask = llvm::countr_zero(AndImm); 3211 if ((SrlImm + NumTrailingZeroInShiftedMask) < SizeInBits) { 3212 unsigned MaskWidth = 3213 llvm::countr_one(AndImm >> NumTrailingZeroInShiftedMask); 3214 unsigned UBFMOpc = 3215 (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri; 3216 SDNode *UBFMNode = CurDAG->getMachineNode( 3217 UBFMOpc, DL, VT, DstOp0.getOperand(0), 3218 CurDAG->getTargetConstant(SrlImm + NumTrailingZeroInShiftedMask, DL, 3219 VT), 3220 CurDAG->getTargetConstant( 3221 SrlImm + NumTrailingZeroInShiftedMask + MaskWidth - 1, DL, VT)); 3222 ShiftedOperand = SDValue(UBFMNode, 0); 3223 EncodedShiftImm = AArch64_AM::getShifterImm( 3224 AArch64_AM::LSL, NumTrailingZeroInShiftedMask); 3225 return true; 3226 } 3227 } 3228 return false; 3229 } 3230 3231 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SHL, ShlImm)) { 3232 ShiftedOperand = Dst.getOperand(0); 3233 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm); 3234 return true; 3235 } 3236 3237 uint64_t SrlImm; 3238 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SRL, SrlImm)) { 3239 ShiftedOperand = Dst.getOperand(0); 3240 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSR, SrlImm); 3241 return true; 3242 } 3243 return false; 3244 } 3245 3246 // Given an 'ISD::OR' node that is going to be selected as BFM, analyze 3247 // the operands and select it to AArch64::ORR with shifted registers if 3248 // that's more efficient. Returns true iff selection to AArch64::ORR happens. 3249 static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1, 3250 SDValue Src, SDValue Dst, SelectionDAG *CurDAG, 3251 const bool BiggerPattern) { 3252 EVT VT = N->getValueType(0); 3253 assert(N->getOpcode() == ISD::OR && "Expect N to be an OR node"); 3254 assert(((N->getOperand(0) == OrOpd0 && N->getOperand(1) == OrOpd1) || 3255 (N->getOperand(1) == OrOpd0 && N->getOperand(0) == OrOpd1)) && 3256 "Expect OrOpd0 and OrOpd1 to be operands of ISD::OR"); 3257 assert((VT == MVT::i32 || VT == MVT::i64) && 3258 "Expect result type to be i32 or i64 since N is combinable to BFM"); 3259 SDLoc DL(N); 3260 3261 // Bail out if BFM simplifies away one node in BFM Dst. 3262 if (OrOpd1 != Dst) 3263 return false; 3264 3265 const unsigned OrrOpc = (VT == MVT::i32) ? AArch64::ORRWrs : AArch64::ORRXrs; 3266 // For "BFM Rd, Rn, #immr, #imms", it's known that BFM simplifies away fewer 3267 // nodes from Rn (or inserts additional shift node) if BiggerPattern is true. 3268 if (BiggerPattern) { 3269 uint64_t SrcAndImm; 3270 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::AND, SrcAndImm) && 3271 isMask_64(SrcAndImm) && OrOpd0.getOperand(0) == Src) { 3272 // OrOpd0 = AND Src, #Mask 3273 // So BFM simplifies away one AND node from Src and doesn't simplify away 3274 // nodes from Dst. If ORR with left-shifted operand also simplifies away 3275 // one node (from Rd), ORR is better since it has higher throughput and 3276 // smaller latency than BFM on many AArch64 processors (and for the rest 3277 // ORR is at least as good as BFM). 3278 SDValue ShiftedOperand; 3279 uint64_t EncodedShiftImm; 3280 if (isWorthFoldingIntoOrrWithShift(Dst, CurDAG, ShiftedOperand, 3281 EncodedShiftImm)) { 3282 SDValue Ops[] = {OrOpd0, ShiftedOperand, 3283 CurDAG->getTargetConstant(EncodedShiftImm, DL, VT)}; 3284 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops); 3285 return true; 3286 } 3287 } 3288 return false; 3289 } 3290 3291 assert((!BiggerPattern) && "BiggerPattern should be handled above"); 3292 3293 uint64_t ShlImm; 3294 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SHL, ShlImm)) { 3295 if (OrOpd0.getOperand(0) == Src && OrOpd0.hasOneUse()) { 3296 SDValue Ops[] = { 3297 Dst, Src, 3298 CurDAG->getTargetConstant( 3299 AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm), DL, VT)}; 3300 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops); 3301 return true; 3302 } 3303 3304 // Select the following pattern to left-shifted operand rather than BFI. 3305 // %val1 = op .. 3306 // %val2 = shl %val1, #imm 3307 // %res = or %val1, %val2 3308 // 3309 // If N is selected to be BFI, we know that 3310 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into 3311 // BFI) 2) OrOpd1 would be the destination operand (i.e., preserved) 3312 // 3313 // Instead of selecting N to BFI, fold OrOpd0 as a left shift directly. 3314 if (OrOpd0.getOperand(0) == OrOpd1) { 3315 SDValue Ops[] = { 3316 OrOpd1, OrOpd1, 3317 CurDAG->getTargetConstant( 3318 AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm), DL, VT)}; 3319 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops); 3320 return true; 3321 } 3322 } 3323 3324 uint64_t SrlImm; 3325 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SRL, SrlImm)) { 3326 // Select the following pattern to right-shifted operand rather than BFXIL. 3327 // %val1 = op .. 3328 // %val2 = lshr %val1, #imm 3329 // %res = or %val1, %val2 3330 // 3331 // If N is selected to be BFXIL, we know that 3332 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into 3333 // BFXIL) 2) OrOpd1 would be the destination operand (i.e., preserved) 3334 // 3335 // Instead of selecting N to BFXIL, fold OrOpd0 as a right shift directly. 3336 if (OrOpd0.getOperand(0) == OrOpd1) { 3337 SDValue Ops[] = { 3338 OrOpd1, OrOpd1, 3339 CurDAG->getTargetConstant( 3340 AArch64_AM::getShifterImm(AArch64_AM::LSR, SrlImm), DL, VT)}; 3341 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops); 3342 return true; 3343 } 3344 } 3345 3346 return false; 3347 } 3348 3349 static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits, 3350 SelectionDAG *CurDAG) { 3351 assert(N->getOpcode() == ISD::OR && "Expect a OR operation"); 3352 3353 EVT VT = N->getValueType(0); 3354 if (VT != MVT::i32 && VT != MVT::i64) 3355 return false; 3356 3357 unsigned BitWidth = VT.getSizeInBits(); 3358 3359 // Because of simplify-demanded-bits in DAGCombine, involved masks may not 3360 // have the expected shape. Try to undo that. 3361 3362 unsigned NumberOfIgnoredLowBits = UsefulBits.countr_zero(); 3363 unsigned NumberOfIgnoredHighBits = UsefulBits.countl_zero(); 3364 3365 // Given a OR operation, check if we have the following pattern 3366 // ubfm c, b, imm, imm2 (or something that does the same jobs, see 3367 // isBitfieldExtractOp) 3368 // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and 3369 // countTrailingZeros(mask2) == imm2 - imm + 1 3370 // f = d | c 3371 // if yes, replace the OR instruction with: 3372 // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2 3373 3374 // OR is commutative, check all combinations of operand order and values of 3375 // BiggerPattern, i.e. 3376 // Opd0, Opd1, BiggerPattern=false 3377 // Opd1, Opd0, BiggerPattern=false 3378 // Opd0, Opd1, BiggerPattern=true 3379 // Opd1, Opd0, BiggerPattern=true 3380 // Several of these combinations may match, so check with BiggerPattern=false 3381 // first since that will produce better results by matching more instructions 3382 // and/or inserting fewer extra instructions. 3383 for (int I = 0; I < 4; ++I) { 3384 3385 SDValue Dst, Src; 3386 unsigned ImmR, ImmS; 3387 bool BiggerPattern = I / 2; 3388 SDValue OrOpd0Val = N->getOperand(I % 2); 3389 SDNode *OrOpd0 = OrOpd0Val.getNode(); 3390 SDValue OrOpd1Val = N->getOperand((I + 1) % 2); 3391 SDNode *OrOpd1 = OrOpd1Val.getNode(); 3392 3393 unsigned BFXOpc; 3394 int DstLSB, Width; 3395 if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS, 3396 NumberOfIgnoredLowBits, BiggerPattern)) { 3397 // Check that the returned opcode is compatible with the pattern, 3398 // i.e., same type and zero extended (U and not S) 3399 if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) || 3400 (BFXOpc != AArch64::UBFMWri && VT == MVT::i32)) 3401 continue; 3402 3403 // Compute the width of the bitfield insertion 3404 DstLSB = 0; 3405 Width = ImmS - ImmR + 1; 3406 // FIXME: This constraint is to catch bitfield insertion we may 3407 // want to widen the pattern if we want to grab general bitfied 3408 // move case 3409 if (Width <= 0) 3410 continue; 3411 3412 // If the mask on the insertee is correct, we have a BFXIL operation. We 3413 // can share the ImmR and ImmS values from the already-computed UBFM. 3414 } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val, 3415 BiggerPattern, 3416 Src, DstLSB, Width)) { 3417 ImmR = (BitWidth - DstLSB) % BitWidth; 3418 ImmS = Width - 1; 3419 } else 3420 continue; 3421 3422 // Check the second part of the pattern 3423 EVT VT = OrOpd1Val.getValueType(); 3424 assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand"); 3425 3426 // Compute the Known Zero for the candidate of the first operand. 3427 // This allows to catch more general case than just looking for 3428 // AND with imm. Indeed, simplify-demanded-bits may have removed 3429 // the AND instruction because it proves it was useless. 3430 KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val); 3431 3432 // Check if there is enough room for the second operand to appear 3433 // in the first one 3434 APInt BitsToBeInserted = 3435 APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width); 3436 3437 if ((BitsToBeInserted & ~Known.Zero) != 0) 3438 continue; 3439 3440 // Set the first operand 3441 uint64_t Imm; 3442 if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) && 3443 isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT)) 3444 // In that case, we can eliminate the AND 3445 Dst = OrOpd1->getOperand(0); 3446 else 3447 // Maybe the AND has been removed by simplify-demanded-bits 3448 // or is useful because it discards more bits 3449 Dst = OrOpd1Val; 3450 3451 // Before selecting ISD::OR node to AArch64::BFM, see if an AArch64::ORR 3452 // with shifted operand is more efficient. 3453 if (tryOrrWithShift(N, OrOpd0Val, OrOpd1Val, Src, Dst, CurDAG, 3454 BiggerPattern)) 3455 return true; 3456 3457 // both parts match 3458 SDLoc DL(N); 3459 SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT), 3460 CurDAG->getTargetConstant(ImmS, DL, VT)}; 3461 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri; 3462 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 3463 return true; 3464 } 3465 3466 // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff 3467 // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted 3468 // mask (e.g., 0x000ffff0). 3469 uint64_t Mask0Imm, Mask1Imm; 3470 SDValue And0 = N->getOperand(0); 3471 SDValue And1 = N->getOperand(1); 3472 if (And0.hasOneUse() && And1.hasOneUse() && 3473 isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) && 3474 isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) && 3475 APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) && 3476 (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) { 3477 3478 // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm), 3479 // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the 3480 // bits to be inserted. 3481 if (isShiftedMask(Mask0Imm, VT)) { 3482 std::swap(And0, And1); 3483 std::swap(Mask0Imm, Mask1Imm); 3484 } 3485 3486 SDValue Src = And1->getOperand(0); 3487 SDValue Dst = And0->getOperand(0); 3488 unsigned LSB = llvm::countr_zero(Mask1Imm); 3489 int Width = BitWidth - APInt(BitWidth, Mask0Imm).popcount(); 3490 3491 // The BFXIL inserts the low-order bits from a source register, so right 3492 // shift the needed bits into place. 3493 SDLoc DL(N); 3494 unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri; 3495 uint64_t LsrImm = LSB; 3496 if (Src->hasOneUse() && 3497 isOpcWithIntImmediate(Src.getNode(), ISD::SRL, LsrImm) && 3498 (LsrImm + LSB) < BitWidth) { 3499 Src = Src->getOperand(0); 3500 LsrImm += LSB; 3501 } 3502 3503 SDNode *LSR = CurDAG->getMachineNode( 3504 ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LsrImm, DL, VT), 3505 CurDAG->getTargetConstant(BitWidth - 1, DL, VT)); 3506 3507 // BFXIL is an alias of BFM, so translate to BFM operands. 3508 unsigned ImmR = (BitWidth - LSB) % BitWidth; 3509 unsigned ImmS = Width - 1; 3510 3511 // Create the BFXIL instruction. 3512 SDValue Ops[] = {Dst, SDValue(LSR, 0), 3513 CurDAG->getTargetConstant(ImmR, DL, VT), 3514 CurDAG->getTargetConstant(ImmS, DL, VT)}; 3515 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri; 3516 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 3517 return true; 3518 } 3519 3520 return false; 3521 } 3522 3523 bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) { 3524 if (N->getOpcode() != ISD::OR) 3525 return false; 3526 3527 APInt NUsefulBits; 3528 getUsefulBits(SDValue(N, 0), NUsefulBits); 3529 3530 // If all bits are not useful, just return UNDEF. 3531 if (!NUsefulBits) { 3532 CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0)); 3533 return true; 3534 } 3535 3536 if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG)) 3537 return true; 3538 3539 return tryBitfieldInsertOpFromOrAndImm(N, CurDAG); 3540 } 3541 3542 /// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the 3543 /// equivalent of a left shift by a constant amount followed by an and masking 3544 /// out a contiguous set of bits. 3545 bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) { 3546 if (N->getOpcode() != ISD::AND) 3547 return false; 3548 3549 EVT VT = N->getValueType(0); 3550 if (VT != MVT::i32 && VT != MVT::i64) 3551 return false; 3552 3553 SDValue Op0; 3554 int DstLSB, Width; 3555 if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false, 3556 Op0, DstLSB, Width)) 3557 return false; 3558 3559 // ImmR is the rotate right amount. 3560 unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits(); 3561 // ImmS is the most significant bit of the source to be moved. 3562 unsigned ImmS = Width - 1; 3563 3564 SDLoc DL(N); 3565 SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT), 3566 CurDAG->getTargetConstant(ImmS, DL, VT)}; 3567 unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri; 3568 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 3569 return true; 3570 } 3571 3572 /// tryShiftAmountMod - Take advantage of built-in mod of shift amount in 3573 /// variable shift/rotate instructions. 3574 bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) { 3575 EVT VT = N->getValueType(0); 3576 3577 unsigned Opc; 3578 switch (N->getOpcode()) { 3579 case ISD::ROTR: 3580 Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr; 3581 break; 3582 case ISD::SHL: 3583 Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr; 3584 break; 3585 case ISD::SRL: 3586 Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr; 3587 break; 3588 case ISD::SRA: 3589 Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr; 3590 break; 3591 default: 3592 return false; 3593 } 3594 3595 uint64_t Size; 3596 uint64_t Bits; 3597 if (VT == MVT::i32) { 3598 Bits = 5; 3599 Size = 32; 3600 } else if (VT == MVT::i64) { 3601 Bits = 6; 3602 Size = 64; 3603 } else 3604 return false; 3605 3606 SDValue ShiftAmt = N->getOperand(1); 3607 SDLoc DL(N); 3608 SDValue NewShiftAmt; 3609 3610 // Skip over an extend of the shift amount. 3611 if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND || 3612 ShiftAmt->getOpcode() == ISD::ANY_EXTEND) 3613 ShiftAmt = ShiftAmt->getOperand(0); 3614 3615 if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) { 3616 SDValue Add0 = ShiftAmt->getOperand(0); 3617 SDValue Add1 = ShiftAmt->getOperand(1); 3618 uint64_t Add0Imm; 3619 uint64_t Add1Imm; 3620 if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0)) { 3621 // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X 3622 // to avoid the ADD/SUB. 3623 NewShiftAmt = Add0; 3624 } else if (ShiftAmt->getOpcode() == ISD::SUB && 3625 isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 && 3626 (Add0Imm % Size == 0)) { 3627 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X 3628 // to generate a NEG instead of a SUB from a constant. 3629 unsigned NegOpc; 3630 unsigned ZeroReg; 3631 EVT SubVT = ShiftAmt->getValueType(0); 3632 if (SubVT == MVT::i32) { 3633 NegOpc = AArch64::SUBWrr; 3634 ZeroReg = AArch64::WZR; 3635 } else { 3636 assert(SubVT == MVT::i64); 3637 NegOpc = AArch64::SUBXrr; 3638 ZeroReg = AArch64::XZR; 3639 } 3640 SDValue Zero = 3641 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT); 3642 MachineSDNode *Neg = 3643 CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1); 3644 NewShiftAmt = SDValue(Neg, 0); 3645 } else if (ShiftAmt->getOpcode() == ISD::SUB && 3646 isIntImmediate(Add0, Add0Imm) && (Add0Imm % Size == Size - 1)) { 3647 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X 3648 // to generate a NOT instead of a SUB from a constant. 3649 unsigned NotOpc; 3650 unsigned ZeroReg; 3651 EVT SubVT = ShiftAmt->getValueType(0); 3652 if (SubVT == MVT::i32) { 3653 NotOpc = AArch64::ORNWrr; 3654 ZeroReg = AArch64::WZR; 3655 } else { 3656 assert(SubVT == MVT::i64); 3657 NotOpc = AArch64::ORNXrr; 3658 ZeroReg = AArch64::XZR; 3659 } 3660 SDValue Zero = 3661 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT); 3662 MachineSDNode *Not = 3663 CurDAG->getMachineNode(NotOpc, DL, SubVT, Zero, Add1); 3664 NewShiftAmt = SDValue(Not, 0); 3665 } else 3666 return false; 3667 } else { 3668 // If the shift amount is masked with an AND, check that the mask covers the 3669 // bits that are implicitly ANDed off by the above opcodes and if so, skip 3670 // the AND. 3671 uint64_t MaskImm; 3672 if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm) && 3673 !isOpcWithIntImmediate(ShiftAmt.getNode(), AArch64ISD::ANDS, MaskImm)) 3674 return false; 3675 3676 if ((unsigned)llvm::countr_one(MaskImm) < Bits) 3677 return false; 3678 3679 NewShiftAmt = ShiftAmt->getOperand(0); 3680 } 3681 3682 // Narrow/widen the shift amount to match the size of the shift operation. 3683 if (VT == MVT::i32) 3684 NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt); 3685 else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) { 3686 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32); 3687 MachineSDNode *Ext = CurDAG->getMachineNode( 3688 AArch64::SUBREG_TO_REG, DL, VT, 3689 CurDAG->getTargetConstant(0, DL, MVT::i64), NewShiftAmt, SubReg); 3690 NewShiftAmt = SDValue(Ext, 0); 3691 } 3692 3693 SDValue Ops[] = {N->getOperand(0), NewShiftAmt}; 3694 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 3695 return true; 3696 } 3697 3698 static bool checkCVTFixedPointOperandWithFBits(SelectionDAG *CurDAG, SDValue N, 3699 SDValue &FixedPos, 3700 unsigned RegWidth, 3701 bool isReciprocal) { 3702 APFloat FVal(0.0); 3703 if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N)) 3704 FVal = CN->getValueAPF(); 3705 else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) { 3706 // Some otherwise illegal constants are allowed in this case. 3707 if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow || 3708 !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1))) 3709 return false; 3710 3711 ConstantPoolSDNode *CN = 3712 dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)); 3713 FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF(); 3714 } else 3715 return false; 3716 3717 // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits 3718 // is between 1 and 32 for a destination w-register, or 1 and 64 for an 3719 // x-register. 3720 // 3721 // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we 3722 // want THIS_NODE to be 2^fbits. This is much easier to deal with using 3723 // integers. 3724 bool IsExact; 3725 3726 if (isReciprocal) 3727 if (!FVal.getExactInverse(&FVal)) 3728 return false; 3729 3730 // fbits is between 1 and 64 in the worst-case, which means the fmul 3731 // could have 2^64 as an actual operand. Need 65 bits of precision. 3732 APSInt IntVal(65, true); 3733 FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact); 3734 3735 // N.b. isPowerOf2 also checks for > 0. 3736 if (!IsExact || !IntVal.isPowerOf2()) 3737 return false; 3738 unsigned FBits = IntVal.logBase2(); 3739 3740 // Checks above should have guaranteed that we haven't lost information in 3741 // finding FBits, but it must still be in range. 3742 if (FBits == 0 || FBits > RegWidth) return false; 3743 3744 FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32); 3745 return true; 3746 } 3747 3748 bool AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, 3749 unsigned RegWidth) { 3750 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth, 3751 false); 3752 } 3753 3754 bool AArch64DAGToDAGISel::SelectCVTFixedPosRecipOperand(SDValue N, 3755 SDValue &FixedPos, 3756 unsigned RegWidth) { 3757 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth, 3758 true); 3759 } 3760 3761 // Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields 3762 // of the string and obtains the integer values from them and combines these 3763 // into a single value to be used in the MRS/MSR instruction. 3764 static int getIntOperandFromRegisterString(StringRef RegString) { 3765 SmallVector<StringRef, 5> Fields; 3766 RegString.split(Fields, ':'); 3767 3768 if (Fields.size() == 1) 3769 return -1; 3770 3771 assert(Fields.size() == 5 3772 && "Invalid number of fields in read register string"); 3773 3774 SmallVector<int, 5> Ops; 3775 bool AllIntFields = true; 3776 3777 for (StringRef Field : Fields) { 3778 unsigned IntField; 3779 AllIntFields &= !Field.getAsInteger(10, IntField); 3780 Ops.push_back(IntField); 3781 } 3782 3783 assert(AllIntFields && 3784 "Unexpected non-integer value in special register string."); 3785 (void)AllIntFields; 3786 3787 // Need to combine the integer fields of the string into a single value 3788 // based on the bit encoding of MRS/MSR instruction. 3789 return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) | 3790 (Ops[3] << 3) | (Ops[4]); 3791 } 3792 3793 // Lower the read_register intrinsic to an MRS instruction node if the special 3794 // register string argument is either of the form detailed in the ALCE (the 3795 // form described in getIntOperandsFromRegsterString) or is a named register 3796 // known by the MRS SysReg mapper. 3797 bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) { 3798 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1)); 3799 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0)); 3800 SDLoc DL(N); 3801 3802 bool ReadIs128Bit = N->getOpcode() == AArch64ISD::MRRS; 3803 3804 unsigned Opcode64Bit = AArch64::MRS; 3805 int Imm = getIntOperandFromRegisterString(RegString->getString()); 3806 if (Imm == -1) { 3807 // No match, Use the sysreg mapper to map the remaining possible strings to 3808 // the value for the register to be used for the instruction operand. 3809 const auto *TheReg = 3810 AArch64SysReg::lookupSysRegByName(RegString->getString()); 3811 if (TheReg && TheReg->Readable && 3812 TheReg->haveFeatures(Subtarget->getFeatureBits())) 3813 Imm = TheReg->Encoding; 3814 else 3815 Imm = AArch64SysReg::parseGenericRegister(RegString->getString()); 3816 3817 if (Imm == -1) { 3818 // Still no match, see if this is "pc" or give up. 3819 if (!ReadIs128Bit && RegString->getString() == "pc") { 3820 Opcode64Bit = AArch64::ADR; 3821 Imm = 0; 3822 } else { 3823 return false; 3824 } 3825 } 3826 } 3827 3828 SDValue InChain = N->getOperand(0); 3829 SDValue SysRegImm = CurDAG->getTargetConstant(Imm, DL, MVT::i32); 3830 if (!ReadIs128Bit) { 3831 CurDAG->SelectNodeTo(N, Opcode64Bit, MVT::i64, MVT::Other /* Chain */, 3832 {SysRegImm, InChain}); 3833 } else { 3834 SDNode *MRRS = CurDAG->getMachineNode( 3835 AArch64::MRRS, DL, 3836 {MVT::Untyped /* XSeqPair */, MVT::Other /* Chain */}, 3837 {SysRegImm, InChain}); 3838 3839 // Sysregs are not endian. The even register always contains the low half 3840 // of the register. 3841 SDValue Lo = CurDAG->getTargetExtractSubreg(AArch64::sube64, DL, MVT::i64, 3842 SDValue(MRRS, 0)); 3843 SDValue Hi = CurDAG->getTargetExtractSubreg(AArch64::subo64, DL, MVT::i64, 3844 SDValue(MRRS, 0)); 3845 SDValue OutChain = SDValue(MRRS, 1); 3846 3847 ReplaceUses(SDValue(N, 0), Lo); 3848 ReplaceUses(SDValue(N, 1), Hi); 3849 ReplaceUses(SDValue(N, 2), OutChain); 3850 }; 3851 return true; 3852 } 3853 3854 // Lower the write_register intrinsic to an MSR instruction node if the special 3855 // register string argument is either of the form detailed in the ALCE (the 3856 // form described in getIntOperandsFromRegsterString) or is a named register 3857 // known by the MSR SysReg mapper. 3858 bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) { 3859 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1)); 3860 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0)); 3861 SDLoc DL(N); 3862 3863 bool WriteIs128Bit = N->getOpcode() == AArch64ISD::MSRR; 3864 3865 if (!WriteIs128Bit) { 3866 // Check if the register was one of those allowed as the pstatefield value 3867 // in the MSR (immediate) instruction. To accept the values allowed in the 3868 // pstatefield for the MSR (immediate) instruction, we also require that an 3869 // immediate value has been provided as an argument, we know that this is 3870 // the case as it has been ensured by semantic checking. 3871 auto trySelectPState = [&](auto PMapper, unsigned State) { 3872 if (PMapper) { 3873 assert(isa<ConstantSDNode>(N->getOperand(2)) && 3874 "Expected a constant integer expression."); 3875 unsigned Reg = PMapper->Encoding; 3876 uint64_t Immed = N->getConstantOperandVal(2); 3877 CurDAG->SelectNodeTo( 3878 N, State, MVT::Other, CurDAG->getTargetConstant(Reg, DL, MVT::i32), 3879 CurDAG->getTargetConstant(Immed, DL, MVT::i16), N->getOperand(0)); 3880 return true; 3881 } 3882 return false; 3883 }; 3884 3885 if (trySelectPState( 3886 AArch64PState::lookupPStateImm0_15ByName(RegString->getString()), 3887 AArch64::MSRpstateImm4)) 3888 return true; 3889 if (trySelectPState( 3890 AArch64PState::lookupPStateImm0_1ByName(RegString->getString()), 3891 AArch64::MSRpstateImm1)) 3892 return true; 3893 } 3894 3895 int Imm = getIntOperandFromRegisterString(RegString->getString()); 3896 if (Imm == -1) { 3897 // Use the sysreg mapper to attempt to map the remaining possible strings 3898 // to the value for the register to be used for the MSR (register) 3899 // instruction operand. 3900 auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString()); 3901 if (TheReg && TheReg->Writeable && 3902 TheReg->haveFeatures(Subtarget->getFeatureBits())) 3903 Imm = TheReg->Encoding; 3904 else 3905 Imm = AArch64SysReg::parseGenericRegister(RegString->getString()); 3906 3907 if (Imm == -1) 3908 return false; 3909 } 3910 3911 SDValue InChain = N->getOperand(0); 3912 if (!WriteIs128Bit) { 3913 CurDAG->SelectNodeTo(N, AArch64::MSR, MVT::Other, 3914 CurDAG->getTargetConstant(Imm, DL, MVT::i32), 3915 N->getOperand(2), InChain); 3916 } else { 3917 // No endian swap. The lower half always goes into the even subreg, and the 3918 // higher half always into the odd supreg. 3919 SDNode *Pair = CurDAG->getMachineNode( 3920 TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped /* XSeqPair */, 3921 {CurDAG->getTargetConstant(AArch64::XSeqPairsClassRegClass.getID(), DL, 3922 MVT::i32), 3923 N->getOperand(2), 3924 CurDAG->getTargetConstant(AArch64::sube64, DL, MVT::i32), 3925 N->getOperand(3), 3926 CurDAG->getTargetConstant(AArch64::subo64, DL, MVT::i32)}); 3927 3928 CurDAG->SelectNodeTo(N, AArch64::MSRR, MVT::Other, 3929 CurDAG->getTargetConstant(Imm, DL, MVT::i32), 3930 SDValue(Pair, 0), InChain); 3931 } 3932 3933 return true; 3934 } 3935 3936 /// We've got special pseudo-instructions for these 3937 bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) { 3938 unsigned Opcode; 3939 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT(); 3940 3941 // Leave IR for LSE if subtarget supports it. 3942 if (Subtarget->hasLSE()) return false; 3943 3944 if (MemTy == MVT::i8) 3945 Opcode = AArch64::CMP_SWAP_8; 3946 else if (MemTy == MVT::i16) 3947 Opcode = AArch64::CMP_SWAP_16; 3948 else if (MemTy == MVT::i32) 3949 Opcode = AArch64::CMP_SWAP_32; 3950 else if (MemTy == MVT::i64) 3951 Opcode = AArch64::CMP_SWAP_64; 3952 else 3953 llvm_unreachable("Unknown AtomicCmpSwap type"); 3954 3955 MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32; 3956 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3), 3957 N->getOperand(0)}; 3958 SDNode *CmpSwap = CurDAG->getMachineNode( 3959 Opcode, SDLoc(N), 3960 CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops); 3961 3962 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); 3963 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp}); 3964 3965 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0)); 3966 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2)); 3967 CurDAG->RemoveDeadNode(N); 3968 3969 return true; 3970 } 3971 3972 bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, 3973 SDValue &Shift) { 3974 if (!isa<ConstantSDNode>(N)) 3975 return false; 3976 3977 SDLoc DL(N); 3978 uint64_t Val = cast<ConstantSDNode>(N) 3979 ->getAPIntValue() 3980 .trunc(VT.getFixedSizeInBits()) 3981 .getZExtValue(); 3982 3983 switch (VT.SimpleTy) { 3984 case MVT::i8: 3985 // All immediates are supported. 3986 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32); 3987 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32); 3988 return true; 3989 case MVT::i16: 3990 case MVT::i32: 3991 case MVT::i64: 3992 // Support 8bit unsigned immediates. 3993 if (Val <= 255) { 3994 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32); 3995 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32); 3996 return true; 3997 } 3998 // Support 16bit unsigned immediates that are a multiple of 256. 3999 if (Val <= 65280 && Val % 256 == 0) { 4000 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32); 4001 Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32); 4002 return true; 4003 } 4004 break; 4005 default: 4006 break; 4007 } 4008 4009 return false; 4010 } 4011 4012 bool AArch64DAGToDAGISel::SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, 4013 SDValue &Shift) { 4014 if (!isa<ConstantSDNode>(N)) 4015 return false; 4016 4017 SDLoc DL(N); 4018 int64_t Val = cast<ConstantSDNode>(N) 4019 ->getAPIntValue() 4020 .trunc(VT.getFixedSizeInBits()) 4021 .getSExtValue(); 4022 4023 switch (VT.SimpleTy) { 4024 case MVT::i8: 4025 // All immediates are supported. 4026 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32); 4027 Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32); 4028 return true; 4029 case MVT::i16: 4030 case MVT::i32: 4031 case MVT::i64: 4032 // Support 8bit signed immediates. 4033 if (Val >= -128 && Val <= 127) { 4034 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32); 4035 Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32); 4036 return true; 4037 } 4038 // Support 16bit signed immediates that are a multiple of 256. 4039 if (Val >= -32768 && Val <= 32512 && Val % 256 == 0) { 4040 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32); 4041 Imm = CurDAG->getTargetConstant((Val >> 8) & 0xFF, DL, MVT::i32); 4042 return true; 4043 } 4044 break; 4045 default: 4046 break; 4047 } 4048 4049 return false; 4050 } 4051 4052 bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) { 4053 if (auto CNode = dyn_cast<ConstantSDNode>(N)) { 4054 int64_t ImmVal = CNode->getSExtValue(); 4055 SDLoc DL(N); 4056 if (ImmVal >= -128 && ImmVal < 128) { 4057 Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32); 4058 return true; 4059 } 4060 } 4061 return false; 4062 } 4063 4064 bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) { 4065 if (auto CNode = dyn_cast<ConstantSDNode>(N)) { 4066 uint64_t ImmVal = CNode->getZExtValue(); 4067 4068 switch (VT.SimpleTy) { 4069 case MVT::i8: 4070 ImmVal &= 0xFF; 4071 break; 4072 case MVT::i16: 4073 ImmVal &= 0xFFFF; 4074 break; 4075 case MVT::i32: 4076 ImmVal &= 0xFFFFFFFF; 4077 break; 4078 case MVT::i64: 4079 break; 4080 default: 4081 llvm_unreachable("Unexpected type"); 4082 } 4083 4084 if (ImmVal < 256) { 4085 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32); 4086 return true; 4087 } 4088 } 4089 return false; 4090 } 4091 4092 bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, 4093 bool Invert) { 4094 if (auto CNode = dyn_cast<ConstantSDNode>(N)) { 4095 uint64_t ImmVal = CNode->getZExtValue(); 4096 SDLoc DL(N); 4097 4098 if (Invert) 4099 ImmVal = ~ImmVal; 4100 4101 // Shift mask depending on type size. 4102 switch (VT.SimpleTy) { 4103 case MVT::i8: 4104 ImmVal &= 0xFF; 4105 ImmVal |= ImmVal << 8; 4106 ImmVal |= ImmVal << 16; 4107 ImmVal |= ImmVal << 32; 4108 break; 4109 case MVT::i16: 4110 ImmVal &= 0xFFFF; 4111 ImmVal |= ImmVal << 16; 4112 ImmVal |= ImmVal << 32; 4113 break; 4114 case MVT::i32: 4115 ImmVal &= 0xFFFFFFFF; 4116 ImmVal |= ImmVal << 32; 4117 break; 4118 case MVT::i64: 4119 break; 4120 default: 4121 llvm_unreachable("Unexpected type"); 4122 } 4123 4124 uint64_t encoding; 4125 if (AArch64_AM::processLogicalImmediate(ImmVal, 64, encoding)) { 4126 Imm = CurDAG->getTargetConstant(encoding, DL, MVT::i64); 4127 return true; 4128 } 4129 } 4130 return false; 4131 } 4132 4133 // SVE shift intrinsics allow shift amounts larger than the element's bitwidth. 4134 // Rather than attempt to normalise everything we can sometimes saturate the 4135 // shift amount during selection. This function also allows for consistent 4136 // isel patterns by ensuring the resulting "Imm" node is of the i32 type 4137 // required by the instructions. 4138 bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low, 4139 uint64_t High, bool AllowSaturation, 4140 SDValue &Imm) { 4141 if (auto *CN = dyn_cast<ConstantSDNode>(N)) { 4142 uint64_t ImmVal = CN->getZExtValue(); 4143 4144 // Reject shift amounts that are too small. 4145 if (ImmVal < Low) 4146 return false; 4147 4148 // Reject or saturate shift amounts that are too big. 4149 if (ImmVal > High) { 4150 if (!AllowSaturation) 4151 return false; 4152 ImmVal = High; 4153 } 4154 4155 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32); 4156 return true; 4157 } 4158 4159 return false; 4160 } 4161 4162 bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) { 4163 // tagp(FrameIndex, IRGstack, tag_offset): 4164 // since the offset between FrameIndex and IRGstack is a compile-time 4165 // constant, this can be lowered to a single ADDG instruction. 4166 if (!(isa<FrameIndexSDNode>(N->getOperand(1)))) { 4167 return false; 4168 } 4169 4170 SDValue IRG_SP = N->getOperand(2); 4171 if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN || 4172 IRG_SP->getConstantOperandVal(1) != Intrinsic::aarch64_irg_sp) { 4173 return false; 4174 } 4175 4176 const TargetLowering *TLI = getTargetLowering(); 4177 SDLoc DL(N); 4178 int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex(); 4179 SDValue FiOp = CurDAG->getTargetFrameIndex( 4180 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 4181 int TagOffset = N->getConstantOperandVal(3); 4182 4183 SDNode *Out = CurDAG->getMachineNode( 4184 AArch64::TAGPstack, DL, MVT::i64, 4185 {FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2), 4186 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)}); 4187 ReplaceNode(N, Out); 4188 return true; 4189 } 4190 4191 void AArch64DAGToDAGISel::SelectTagP(SDNode *N) { 4192 assert(isa<ConstantSDNode>(N->getOperand(3)) && 4193 "llvm.aarch64.tagp third argument must be an immediate"); 4194 if (trySelectStackSlotTagP(N)) 4195 return; 4196 // FIXME: above applies in any case when offset between Op1 and Op2 is a 4197 // compile-time constant, not just for stack allocations. 4198 4199 // General case for unrelated pointers in Op1 and Op2. 4200 SDLoc DL(N); 4201 int TagOffset = N->getConstantOperandVal(3); 4202 SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64, 4203 {N->getOperand(1), N->getOperand(2)}); 4204 SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64, 4205 {SDValue(N1, 0), N->getOperand(2)}); 4206 SDNode *N3 = CurDAG->getMachineNode( 4207 AArch64::ADDG, DL, MVT::i64, 4208 {SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64), 4209 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)}); 4210 ReplaceNode(N, N3); 4211 } 4212 4213 bool AArch64DAGToDAGISel::trySelectCastFixedLengthToScalableVector(SDNode *N) { 4214 assert(N->getOpcode() == ISD::INSERT_SUBVECTOR && "Invalid Node!"); 4215 4216 // Bail when not a "cast" like insert_subvector. 4217 if (N->getConstantOperandVal(2) != 0) 4218 return false; 4219 if (!N->getOperand(0).isUndef()) 4220 return false; 4221 4222 // Bail when normal isel should do the job. 4223 EVT VT = N->getValueType(0); 4224 EVT InVT = N->getOperand(1).getValueType(); 4225 if (VT.isFixedLengthVector() || InVT.isScalableVector()) 4226 return false; 4227 if (InVT.getSizeInBits() <= 128) 4228 return false; 4229 4230 // NOTE: We can only get here when doing fixed length SVE code generation. 4231 // We do manual selection because the types involved are not linked to real 4232 // registers (despite being legal) and must be coerced into SVE registers. 4233 4234 assert(VT.getSizeInBits().getKnownMinValue() == AArch64::SVEBitsPerBlock && 4235 "Expected to insert into a packed scalable vector!"); 4236 4237 SDLoc DL(N); 4238 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64); 4239 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, 4240 N->getOperand(1), RC)); 4241 return true; 4242 } 4243 4244 bool AArch64DAGToDAGISel::trySelectCastScalableToFixedLengthVector(SDNode *N) { 4245 assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR && "Invalid Node!"); 4246 4247 // Bail when not a "cast" like extract_subvector. 4248 if (N->getConstantOperandVal(1) != 0) 4249 return false; 4250 4251 // Bail when normal isel can do the job. 4252 EVT VT = N->getValueType(0); 4253 EVT InVT = N->getOperand(0).getValueType(); 4254 if (VT.isScalableVector() || InVT.isFixedLengthVector()) 4255 return false; 4256 if (VT.getSizeInBits() <= 128) 4257 return false; 4258 4259 // NOTE: We can only get here when doing fixed length SVE code generation. 4260 // We do manual selection because the types involved are not linked to real 4261 // registers (despite being legal) and must be coerced into SVE registers. 4262 4263 assert(InVT.getSizeInBits().getKnownMinValue() == AArch64::SVEBitsPerBlock && 4264 "Expected to extract from a packed scalable vector!"); 4265 4266 SDLoc DL(N); 4267 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64); 4268 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, 4269 N->getOperand(0), RC)); 4270 return true; 4271 } 4272 4273 bool AArch64DAGToDAGISel::trySelectXAR(SDNode *N) { 4274 assert(N->getOpcode() == ISD::OR && "Expected OR instruction"); 4275 4276 SDValue N0 = N->getOperand(0); 4277 SDValue N1 = N->getOperand(1); 4278 4279 if (N0->getOpcode() != AArch64ISD::VSHL || 4280 N1->getOpcode() != AArch64ISD::VLSHR) 4281 return false; 4282 4283 if (N0->getOperand(0) != N1->getOperand(0) || 4284 N1->getOperand(0)->getOpcode() != ISD::XOR) 4285 return false; 4286 4287 SDValue XOR = N0.getOperand(0); 4288 SDValue R1 = XOR.getOperand(0); 4289 SDValue R2 = XOR.getOperand(1); 4290 4291 unsigned HsAmt = N0.getConstantOperandVal(1); 4292 unsigned ShAmt = N1.getConstantOperandVal(1); 4293 4294 SDLoc DL = SDLoc(N0.getOperand(1)); 4295 SDValue Imm = CurDAG->getTargetConstant( 4296 ShAmt, DL, N0.getOperand(1).getValueType(), false); 4297 4298 if (ShAmt + HsAmt != 64) 4299 return false; 4300 4301 SDValue Ops[] = {R1, R2, Imm}; 4302 CurDAG->SelectNodeTo(N, AArch64::XAR, N0.getValueType(), Ops); 4303 4304 return true; 4305 } 4306 4307 void AArch64DAGToDAGISel::Select(SDNode *Node) { 4308 // If we have a custom node, we already have selected! 4309 if (Node->isMachineOpcode()) { 4310 LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n"); 4311 Node->setNodeId(-1); 4312 return; 4313 } 4314 4315 // Few custom selection stuff. 4316 EVT VT = Node->getValueType(0); 4317 4318 switch (Node->getOpcode()) { 4319 default: 4320 break; 4321 4322 case ISD::ATOMIC_CMP_SWAP: 4323 if (SelectCMP_SWAP(Node)) 4324 return; 4325 break; 4326 4327 case ISD::READ_REGISTER: 4328 case AArch64ISD::MRRS: 4329 if (tryReadRegister(Node)) 4330 return; 4331 break; 4332 4333 case ISD::WRITE_REGISTER: 4334 case AArch64ISD::MSRR: 4335 if (tryWriteRegister(Node)) 4336 return; 4337 break; 4338 4339 case ISD::LOAD: { 4340 // Try to select as an indexed load. Fall through to normal processing 4341 // if we can't. 4342 if (tryIndexedLoad(Node)) 4343 return; 4344 break; 4345 } 4346 4347 case ISD::SRL: 4348 case ISD::AND: 4349 case ISD::SRA: 4350 case ISD::SIGN_EXTEND_INREG: 4351 if (tryBitfieldExtractOp(Node)) 4352 return; 4353 if (tryBitfieldInsertInZeroOp(Node)) 4354 return; 4355 [[fallthrough]]; 4356 case ISD::ROTR: 4357 case ISD::SHL: 4358 if (tryShiftAmountMod(Node)) 4359 return; 4360 break; 4361 4362 case ISD::SIGN_EXTEND: 4363 if (tryBitfieldExtractOpFromSExt(Node)) 4364 return; 4365 break; 4366 4367 case ISD::OR: 4368 if (tryBitfieldInsertOp(Node)) 4369 return; 4370 if (Subtarget->hasSHA3() && trySelectXAR(Node)) 4371 return; 4372 break; 4373 4374 case ISD::EXTRACT_SUBVECTOR: { 4375 if (trySelectCastScalableToFixedLengthVector(Node)) 4376 return; 4377 break; 4378 } 4379 4380 case ISD::INSERT_SUBVECTOR: { 4381 if (trySelectCastFixedLengthToScalableVector(Node)) 4382 return; 4383 break; 4384 } 4385 4386 case ISD::Constant: { 4387 // Materialize zero constants as copies from WZR/XZR. This allows 4388 // the coalescer to propagate these into other instructions. 4389 ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node); 4390 if (ConstNode->isZero()) { 4391 if (VT == MVT::i32) { 4392 SDValue New = CurDAG->getCopyFromReg( 4393 CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32); 4394 ReplaceNode(Node, New.getNode()); 4395 return; 4396 } else if (VT == MVT::i64) { 4397 SDValue New = CurDAG->getCopyFromReg( 4398 CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64); 4399 ReplaceNode(Node, New.getNode()); 4400 return; 4401 } 4402 } 4403 break; 4404 } 4405 4406 case ISD::FrameIndex: { 4407 // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm. 4408 int FI = cast<FrameIndexSDNode>(Node)->getIndex(); 4409 unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0); 4410 const TargetLowering *TLI = getTargetLowering(); 4411 SDValue TFI = CurDAG->getTargetFrameIndex( 4412 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 4413 SDLoc DL(Node); 4414 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32), 4415 CurDAG->getTargetConstant(Shifter, DL, MVT::i32) }; 4416 CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops); 4417 return; 4418 } 4419 case ISD::INTRINSIC_W_CHAIN: { 4420 unsigned IntNo = Node->getConstantOperandVal(1); 4421 switch (IntNo) { 4422 default: 4423 break; 4424 case Intrinsic::aarch64_ldaxp: 4425 case Intrinsic::aarch64_ldxp: { 4426 unsigned Op = 4427 IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX; 4428 SDValue MemAddr = Node->getOperand(2); 4429 SDLoc DL(Node); 4430 SDValue Chain = Node->getOperand(0); 4431 4432 SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64, 4433 MVT::Other, MemAddr, Chain); 4434 4435 // Transfer memoperands. 4436 MachineMemOperand *MemOp = 4437 cast<MemIntrinsicSDNode>(Node)->getMemOperand(); 4438 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp}); 4439 ReplaceNode(Node, Ld); 4440 return; 4441 } 4442 case Intrinsic::aarch64_stlxp: 4443 case Intrinsic::aarch64_stxp: { 4444 unsigned Op = 4445 IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX; 4446 SDLoc DL(Node); 4447 SDValue Chain = Node->getOperand(0); 4448 SDValue ValLo = Node->getOperand(2); 4449 SDValue ValHi = Node->getOperand(3); 4450 SDValue MemAddr = Node->getOperand(4); 4451 4452 // Place arguments in the right order. 4453 SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain}; 4454 4455 SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops); 4456 // Transfer memoperands. 4457 MachineMemOperand *MemOp = 4458 cast<MemIntrinsicSDNode>(Node)->getMemOperand(); 4459 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 4460 4461 ReplaceNode(Node, St); 4462 return; 4463 } 4464 case Intrinsic::aarch64_neon_ld1x2: 4465 if (VT == MVT::v8i8) { 4466 SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0); 4467 return; 4468 } else if (VT == MVT::v16i8) { 4469 SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0); 4470 return; 4471 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4472 SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0); 4473 return; 4474 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4475 SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0); 4476 return; 4477 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4478 SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0); 4479 return; 4480 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4481 SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0); 4482 return; 4483 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4484 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0); 4485 return; 4486 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4487 SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0); 4488 return; 4489 } 4490 break; 4491 case Intrinsic::aarch64_neon_ld1x3: 4492 if (VT == MVT::v8i8) { 4493 SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0); 4494 return; 4495 } else if (VT == MVT::v16i8) { 4496 SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0); 4497 return; 4498 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4499 SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0); 4500 return; 4501 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4502 SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0); 4503 return; 4504 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4505 SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0); 4506 return; 4507 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4508 SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0); 4509 return; 4510 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4511 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0); 4512 return; 4513 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4514 SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0); 4515 return; 4516 } 4517 break; 4518 case Intrinsic::aarch64_neon_ld1x4: 4519 if (VT == MVT::v8i8) { 4520 SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0); 4521 return; 4522 } else if (VT == MVT::v16i8) { 4523 SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0); 4524 return; 4525 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4526 SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0); 4527 return; 4528 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4529 SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0); 4530 return; 4531 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4532 SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0); 4533 return; 4534 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4535 SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0); 4536 return; 4537 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4538 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0); 4539 return; 4540 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4541 SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0); 4542 return; 4543 } 4544 break; 4545 case Intrinsic::aarch64_neon_ld2: 4546 if (VT == MVT::v8i8) { 4547 SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0); 4548 return; 4549 } else if (VT == MVT::v16i8) { 4550 SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0); 4551 return; 4552 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4553 SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0); 4554 return; 4555 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4556 SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0); 4557 return; 4558 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4559 SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0); 4560 return; 4561 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4562 SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0); 4563 return; 4564 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4565 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0); 4566 return; 4567 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4568 SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0); 4569 return; 4570 } 4571 break; 4572 case Intrinsic::aarch64_neon_ld3: 4573 if (VT == MVT::v8i8) { 4574 SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0); 4575 return; 4576 } else if (VT == MVT::v16i8) { 4577 SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0); 4578 return; 4579 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4580 SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0); 4581 return; 4582 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4583 SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0); 4584 return; 4585 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4586 SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0); 4587 return; 4588 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4589 SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0); 4590 return; 4591 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4592 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0); 4593 return; 4594 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4595 SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0); 4596 return; 4597 } 4598 break; 4599 case Intrinsic::aarch64_neon_ld4: 4600 if (VT == MVT::v8i8) { 4601 SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0); 4602 return; 4603 } else if (VT == MVT::v16i8) { 4604 SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0); 4605 return; 4606 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4607 SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0); 4608 return; 4609 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4610 SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0); 4611 return; 4612 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4613 SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0); 4614 return; 4615 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4616 SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0); 4617 return; 4618 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4619 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0); 4620 return; 4621 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4622 SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0); 4623 return; 4624 } 4625 break; 4626 case Intrinsic::aarch64_neon_ld2r: 4627 if (VT == MVT::v8i8) { 4628 SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0); 4629 return; 4630 } else if (VT == MVT::v16i8) { 4631 SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0); 4632 return; 4633 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4634 SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0); 4635 return; 4636 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4637 SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0); 4638 return; 4639 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4640 SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0); 4641 return; 4642 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4643 SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0); 4644 return; 4645 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4646 SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0); 4647 return; 4648 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4649 SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0); 4650 return; 4651 } 4652 break; 4653 case Intrinsic::aarch64_neon_ld3r: 4654 if (VT == MVT::v8i8) { 4655 SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0); 4656 return; 4657 } else if (VT == MVT::v16i8) { 4658 SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0); 4659 return; 4660 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4661 SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0); 4662 return; 4663 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4664 SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0); 4665 return; 4666 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4667 SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0); 4668 return; 4669 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4670 SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0); 4671 return; 4672 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4673 SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0); 4674 return; 4675 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4676 SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0); 4677 return; 4678 } 4679 break; 4680 case Intrinsic::aarch64_neon_ld4r: 4681 if (VT == MVT::v8i8) { 4682 SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0); 4683 return; 4684 } else if (VT == MVT::v16i8) { 4685 SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0); 4686 return; 4687 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4688 SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0); 4689 return; 4690 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4691 SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0); 4692 return; 4693 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4694 SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0); 4695 return; 4696 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4697 SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0); 4698 return; 4699 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4700 SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0); 4701 return; 4702 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4703 SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0); 4704 return; 4705 } 4706 break; 4707 case Intrinsic::aarch64_neon_ld2lane: 4708 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4709 SelectLoadLane(Node, 2, AArch64::LD2i8); 4710 return; 4711 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4712 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4713 SelectLoadLane(Node, 2, AArch64::LD2i16); 4714 return; 4715 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4716 VT == MVT::v2f32) { 4717 SelectLoadLane(Node, 2, AArch64::LD2i32); 4718 return; 4719 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4720 VT == MVT::v1f64) { 4721 SelectLoadLane(Node, 2, AArch64::LD2i64); 4722 return; 4723 } 4724 break; 4725 case Intrinsic::aarch64_neon_ld3lane: 4726 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4727 SelectLoadLane(Node, 3, AArch64::LD3i8); 4728 return; 4729 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4730 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4731 SelectLoadLane(Node, 3, AArch64::LD3i16); 4732 return; 4733 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4734 VT == MVT::v2f32) { 4735 SelectLoadLane(Node, 3, AArch64::LD3i32); 4736 return; 4737 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4738 VT == MVT::v1f64) { 4739 SelectLoadLane(Node, 3, AArch64::LD3i64); 4740 return; 4741 } 4742 break; 4743 case Intrinsic::aarch64_neon_ld4lane: 4744 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4745 SelectLoadLane(Node, 4, AArch64::LD4i8); 4746 return; 4747 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4748 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4749 SelectLoadLane(Node, 4, AArch64::LD4i16); 4750 return; 4751 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4752 VT == MVT::v2f32) { 4753 SelectLoadLane(Node, 4, AArch64::LD4i32); 4754 return; 4755 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4756 VT == MVT::v1f64) { 4757 SelectLoadLane(Node, 4, AArch64::LD4i64); 4758 return; 4759 } 4760 break; 4761 case Intrinsic::aarch64_ld64b: 4762 SelectLoad(Node, 8, AArch64::LD64B, AArch64::x8sub_0); 4763 return; 4764 case Intrinsic::aarch64_sve_ld2q_sret: { 4765 SelectPredicatedLoad(Node, 2, 4, AArch64::LD2Q_IMM, AArch64::LD2Q, true); 4766 return; 4767 } 4768 case Intrinsic::aarch64_sve_ld3q_sret: { 4769 SelectPredicatedLoad(Node, 3, 4, AArch64::LD3Q_IMM, AArch64::LD3Q, true); 4770 return; 4771 } 4772 case Intrinsic::aarch64_sve_ld4q_sret: { 4773 SelectPredicatedLoad(Node, 4, 4, AArch64::LD4Q_IMM, AArch64::LD4Q, true); 4774 return; 4775 } 4776 case Intrinsic::aarch64_sve_ld2_sret: { 4777 if (VT == MVT::nxv16i8) { 4778 SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B, 4779 true); 4780 return; 4781 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 4782 VT == MVT::nxv8bf16) { 4783 SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H, 4784 true); 4785 return; 4786 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 4787 SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W, 4788 true); 4789 return; 4790 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 4791 SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D, 4792 true); 4793 return; 4794 } 4795 break; 4796 } 4797 case Intrinsic::aarch64_sve_ld1_pn_x2: { 4798 if (VT == MVT::nxv16i8) { 4799 if (Subtarget->hasSME2()) 4800 SelectContiguousMultiVectorLoad( 4801 Node, 2, 0, AArch64::LD1B_2Z_IMM_PSEUDO, AArch64::LD1B_2Z_PSEUDO); 4802 else if (Subtarget->hasSVE2p1()) 4803 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LD1B_2Z_IMM, 4804 AArch64::LD1B_2Z); 4805 else 4806 break; 4807 return; 4808 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 4809 VT == MVT::nxv8bf16) { 4810 if (Subtarget->hasSME2()) 4811 SelectContiguousMultiVectorLoad( 4812 Node, 2, 1, AArch64::LD1H_2Z_IMM_PSEUDO, AArch64::LD1H_2Z_PSEUDO); 4813 else if (Subtarget->hasSVE2p1()) 4814 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LD1H_2Z_IMM, 4815 AArch64::LD1H_2Z); 4816 else 4817 break; 4818 return; 4819 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 4820 if (Subtarget->hasSME2()) 4821 SelectContiguousMultiVectorLoad( 4822 Node, 2, 2, AArch64::LD1W_2Z_IMM_PSEUDO, AArch64::LD1W_2Z_PSEUDO); 4823 else if (Subtarget->hasSVE2p1()) 4824 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LD1W_2Z_IMM, 4825 AArch64::LD1W_2Z); 4826 else 4827 break; 4828 return; 4829 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 4830 if (Subtarget->hasSME2()) 4831 SelectContiguousMultiVectorLoad( 4832 Node, 2, 3, AArch64::LD1D_2Z_IMM_PSEUDO, AArch64::LD1D_2Z_PSEUDO); 4833 else if (Subtarget->hasSVE2p1()) 4834 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LD1D_2Z_IMM, 4835 AArch64::LD1D_2Z); 4836 else 4837 break; 4838 return; 4839 } 4840 break; 4841 } 4842 case Intrinsic::aarch64_sve_ld1_pn_x4: { 4843 if (VT == MVT::nxv16i8) { 4844 if (Subtarget->hasSME2()) 4845 SelectContiguousMultiVectorLoad( 4846 Node, 4, 0, AArch64::LD1B_4Z_IMM_PSEUDO, AArch64::LD1B_4Z_PSEUDO); 4847 else if (Subtarget->hasSVE2p1()) 4848 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LD1B_4Z_IMM, 4849 AArch64::LD1B_4Z); 4850 else 4851 break; 4852 return; 4853 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 4854 VT == MVT::nxv8bf16) { 4855 if (Subtarget->hasSME2()) 4856 SelectContiguousMultiVectorLoad( 4857 Node, 4, 1, AArch64::LD1H_4Z_IMM_PSEUDO, AArch64::LD1H_4Z_PSEUDO); 4858 else if (Subtarget->hasSVE2p1()) 4859 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LD1H_4Z_IMM, 4860 AArch64::LD1H_4Z); 4861 else 4862 break; 4863 return; 4864 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 4865 if (Subtarget->hasSME2()) 4866 SelectContiguousMultiVectorLoad( 4867 Node, 4, 2, AArch64::LD1W_4Z_IMM_PSEUDO, AArch64::LD1W_4Z_PSEUDO); 4868 else if (Subtarget->hasSVE2p1()) 4869 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LD1W_4Z_IMM, 4870 AArch64::LD1W_4Z); 4871 else 4872 break; 4873 return; 4874 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 4875 if (Subtarget->hasSME2()) 4876 SelectContiguousMultiVectorLoad( 4877 Node, 4, 3, AArch64::LD1D_4Z_IMM_PSEUDO, AArch64::LD1D_4Z_PSEUDO); 4878 else if (Subtarget->hasSVE2p1()) 4879 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LD1D_4Z_IMM, 4880 AArch64::LD1D_4Z); 4881 else 4882 break; 4883 return; 4884 } 4885 break; 4886 } 4887 case Intrinsic::aarch64_sve_ldnt1_pn_x2: { 4888 if (VT == MVT::nxv16i8) { 4889 if (Subtarget->hasSME2()) 4890 SelectContiguousMultiVectorLoad(Node, 2, 0, 4891 AArch64::LDNT1B_2Z_IMM_PSEUDO, 4892 AArch64::LDNT1B_2Z_PSEUDO); 4893 else if (Subtarget->hasSVE2p1()) 4894 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LDNT1B_2Z_IMM, 4895 AArch64::LDNT1B_2Z); 4896 else 4897 break; 4898 return; 4899 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 4900 VT == MVT::nxv8bf16) { 4901 if (Subtarget->hasSME2()) 4902 SelectContiguousMultiVectorLoad(Node, 2, 1, 4903 AArch64::LDNT1H_2Z_IMM_PSEUDO, 4904 AArch64::LDNT1H_2Z_PSEUDO); 4905 else if (Subtarget->hasSVE2p1()) 4906 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LDNT1H_2Z_IMM, 4907 AArch64::LDNT1H_2Z); 4908 else 4909 break; 4910 return; 4911 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 4912 if (Subtarget->hasSME2()) 4913 SelectContiguousMultiVectorLoad(Node, 2, 2, 4914 AArch64::LDNT1W_2Z_IMM_PSEUDO, 4915 AArch64::LDNT1W_2Z_PSEUDO); 4916 else if (Subtarget->hasSVE2p1()) 4917 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LDNT1W_2Z_IMM, 4918 AArch64::LDNT1W_2Z); 4919 else 4920 break; 4921 return; 4922 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 4923 if (Subtarget->hasSME2()) 4924 SelectContiguousMultiVectorLoad(Node, 2, 3, 4925 AArch64::LDNT1D_2Z_IMM_PSEUDO, 4926 AArch64::LDNT1D_2Z_PSEUDO); 4927 else if (Subtarget->hasSVE2p1()) 4928 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LDNT1D_2Z_IMM, 4929 AArch64::LDNT1D_2Z); 4930 else 4931 break; 4932 return; 4933 } 4934 break; 4935 } 4936 case Intrinsic::aarch64_sve_ldnt1_pn_x4: { 4937 if (VT == MVT::nxv16i8) { 4938 if (Subtarget->hasSME2()) 4939 SelectContiguousMultiVectorLoad(Node, 4, 0, 4940 AArch64::LDNT1B_4Z_IMM_PSEUDO, 4941 AArch64::LDNT1B_4Z_PSEUDO); 4942 else if (Subtarget->hasSVE2p1()) 4943 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LDNT1B_4Z_IMM, 4944 AArch64::LDNT1B_4Z); 4945 else 4946 break; 4947 return; 4948 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 4949 VT == MVT::nxv8bf16) { 4950 if (Subtarget->hasSME2()) 4951 SelectContiguousMultiVectorLoad(Node, 4, 1, 4952 AArch64::LDNT1H_4Z_IMM_PSEUDO, 4953 AArch64::LDNT1H_4Z_PSEUDO); 4954 else if (Subtarget->hasSVE2p1()) 4955 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LDNT1H_4Z_IMM, 4956 AArch64::LDNT1H_4Z); 4957 else 4958 break; 4959 return; 4960 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 4961 if (Subtarget->hasSME2()) 4962 SelectContiguousMultiVectorLoad(Node, 4, 2, 4963 AArch64::LDNT1W_4Z_IMM_PSEUDO, 4964 AArch64::LDNT1W_4Z_PSEUDO); 4965 else if (Subtarget->hasSVE2p1()) 4966 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LDNT1W_4Z_IMM, 4967 AArch64::LDNT1W_4Z); 4968 else 4969 break; 4970 return; 4971 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 4972 if (Subtarget->hasSME2()) 4973 SelectContiguousMultiVectorLoad(Node, 4, 3, 4974 AArch64::LDNT1D_4Z_IMM_PSEUDO, 4975 AArch64::LDNT1D_4Z_PSEUDO); 4976 else if (Subtarget->hasSVE2p1()) 4977 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LDNT1D_4Z_IMM, 4978 AArch64::LDNT1D_4Z); 4979 else 4980 break; 4981 return; 4982 } 4983 break; 4984 } 4985 case Intrinsic::aarch64_sve_ld3_sret: { 4986 if (VT == MVT::nxv16i8) { 4987 SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B, 4988 true); 4989 return; 4990 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 4991 VT == MVT::nxv8bf16) { 4992 SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H, 4993 true); 4994 return; 4995 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 4996 SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W, 4997 true); 4998 return; 4999 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 5000 SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D, 5001 true); 5002 return; 5003 } 5004 break; 5005 } 5006 case Intrinsic::aarch64_sve_ld4_sret: { 5007 if (VT == MVT::nxv16i8) { 5008 SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B, 5009 true); 5010 return; 5011 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 5012 VT == MVT::nxv8bf16) { 5013 SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H, 5014 true); 5015 return; 5016 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 5017 SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W, 5018 true); 5019 return; 5020 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 5021 SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D, 5022 true); 5023 return; 5024 } 5025 break; 5026 } 5027 case Intrinsic::aarch64_sme_read_hor_vg2: { 5028 if (VT == MVT::nxv16i8) { 5029 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0, 5030 AArch64::MOVA_2ZMXI_H_B); 5031 return; 5032 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 5033 VT == MVT::nxv8bf16) { 5034 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0, 5035 AArch64::MOVA_2ZMXI_H_H); 5036 return; 5037 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 5038 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0, 5039 AArch64::MOVA_2ZMXI_H_S); 5040 return; 5041 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 5042 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0, 5043 AArch64::MOVA_2ZMXI_H_D); 5044 return; 5045 } 5046 break; 5047 } 5048 case Intrinsic::aarch64_sme_read_ver_vg2: { 5049 if (VT == MVT::nxv16i8) { 5050 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0, 5051 AArch64::MOVA_2ZMXI_V_B); 5052 return; 5053 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 5054 VT == MVT::nxv8bf16) { 5055 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0, 5056 AArch64::MOVA_2ZMXI_V_H); 5057 return; 5058 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 5059 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0, 5060 AArch64::MOVA_2ZMXI_V_S); 5061 return; 5062 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 5063 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0, 5064 AArch64::MOVA_2ZMXI_V_D); 5065 return; 5066 } 5067 break; 5068 } 5069 case Intrinsic::aarch64_sme_read_hor_vg4: { 5070 if (VT == MVT::nxv16i8) { 5071 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0, 5072 AArch64::MOVA_4ZMXI_H_B); 5073 return; 5074 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 5075 VT == MVT::nxv8bf16) { 5076 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0, 5077 AArch64::MOVA_4ZMXI_H_H); 5078 return; 5079 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 5080 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAS0, 5081 AArch64::MOVA_4ZMXI_H_S); 5082 return; 5083 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 5084 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAD0, 5085 AArch64::MOVA_4ZMXI_H_D); 5086 return; 5087 } 5088 break; 5089 } 5090 case Intrinsic::aarch64_sme_read_ver_vg4: { 5091 if (VT == MVT::nxv16i8) { 5092 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0, 5093 AArch64::MOVA_4ZMXI_V_B); 5094 return; 5095 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 5096 VT == MVT::nxv8bf16) { 5097 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0, 5098 AArch64::MOVA_4ZMXI_V_H); 5099 return; 5100 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 5101 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAS0, 5102 AArch64::MOVA_4ZMXI_V_S); 5103 return; 5104 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 5105 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAD0, 5106 AArch64::MOVA_4ZMXI_V_D); 5107 return; 5108 } 5109 break; 5110 } 5111 case Intrinsic::aarch64_sme_read_vg1x2: { 5112 SelectMultiVectorMove<7, 1>(Node, 2, AArch64::ZA, 5113 AArch64::MOVA_VG2_2ZMXI); 5114 return; 5115 } 5116 case Intrinsic::aarch64_sme_read_vg1x4: { 5117 SelectMultiVectorMove<7, 1>(Node, 4, AArch64::ZA, 5118 AArch64::MOVA_VG4_4ZMXI); 5119 return; 5120 } 5121 case Intrinsic::swift_async_context_addr: { 5122 SDLoc DL(Node); 5123 SDValue Chain = Node->getOperand(0); 5124 SDValue CopyFP = CurDAG->getCopyFromReg(Chain, DL, AArch64::FP, MVT::i64); 5125 SDValue Res = SDValue( 5126 CurDAG->getMachineNode(AArch64::SUBXri, DL, MVT::i64, CopyFP, 5127 CurDAG->getTargetConstant(8, DL, MVT::i32), 5128 CurDAG->getTargetConstant(0, DL, MVT::i32)), 5129 0); 5130 ReplaceUses(SDValue(Node, 0), Res); 5131 ReplaceUses(SDValue(Node, 1), CopyFP.getValue(1)); 5132 CurDAG->RemoveDeadNode(Node); 5133 5134 auto &MF = CurDAG->getMachineFunction(); 5135 MF.getFrameInfo().setFrameAddressIsTaken(true); 5136 MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true); 5137 return; 5138 } 5139 case Intrinsic::aarch64_sme_luti2_lane_zt_x4: { 5140 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>( 5141 Node->getValueType(0), 5142 {AArch64::LUTI2_4ZTZI_B, AArch64::LUTI2_4ZTZI_H, 5143 AArch64::LUTI2_4ZTZI_S})) 5144 // Second Immediate must be <= 3: 5145 SelectMultiVectorLuti(Node, 4, Opc, 3); 5146 return; 5147 } 5148 case Intrinsic::aarch64_sme_luti4_lane_zt_x4: { 5149 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>( 5150 Node->getValueType(0), 5151 {0, AArch64::LUTI4_4ZTZI_H, AArch64::LUTI4_4ZTZI_S})) 5152 // Second Immediate must be <= 1: 5153 SelectMultiVectorLuti(Node, 4, Opc, 1); 5154 return; 5155 } 5156 case Intrinsic::aarch64_sme_luti2_lane_zt_x2: { 5157 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>( 5158 Node->getValueType(0), 5159 {AArch64::LUTI2_2ZTZI_B, AArch64::LUTI2_2ZTZI_H, 5160 AArch64::LUTI2_2ZTZI_S})) 5161 // Second Immediate must be <= 7: 5162 SelectMultiVectorLuti(Node, 2, Opc, 7); 5163 return; 5164 } 5165 case Intrinsic::aarch64_sme_luti4_lane_zt_x2: { 5166 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>( 5167 Node->getValueType(0), 5168 {AArch64::LUTI4_2ZTZI_B, AArch64::LUTI4_2ZTZI_H, 5169 AArch64::LUTI4_2ZTZI_S})) 5170 // Second Immediate must be <= 3: 5171 SelectMultiVectorLuti(Node, 2, Opc, 3); 5172 return; 5173 } 5174 } 5175 } break; 5176 case ISD::INTRINSIC_WO_CHAIN: { 5177 unsigned IntNo = Node->getConstantOperandVal(0); 5178 switch (IntNo) { 5179 default: 5180 break; 5181 case Intrinsic::aarch64_tagp: 5182 SelectTagP(Node); 5183 return; 5184 case Intrinsic::aarch64_neon_tbl2: 5185 SelectTable(Node, 2, 5186 VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two, 5187 false); 5188 return; 5189 case Intrinsic::aarch64_neon_tbl3: 5190 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three 5191 : AArch64::TBLv16i8Three, 5192 false); 5193 return; 5194 case Intrinsic::aarch64_neon_tbl4: 5195 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four 5196 : AArch64::TBLv16i8Four, 5197 false); 5198 return; 5199 case Intrinsic::aarch64_neon_tbx2: 5200 SelectTable(Node, 2, 5201 VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two, 5202 true); 5203 return; 5204 case Intrinsic::aarch64_neon_tbx3: 5205 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three 5206 : AArch64::TBXv16i8Three, 5207 true); 5208 return; 5209 case Intrinsic::aarch64_neon_tbx4: 5210 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four 5211 : AArch64::TBXv16i8Four, 5212 true); 5213 return; 5214 case Intrinsic::aarch64_sve_srshl_single_x2: 5215 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5216 Node->getValueType(0), 5217 {AArch64::SRSHL_VG2_2ZZ_B, AArch64::SRSHL_VG2_2ZZ_H, 5218 AArch64::SRSHL_VG2_2ZZ_S, AArch64::SRSHL_VG2_2ZZ_D})) 5219 SelectDestructiveMultiIntrinsic(Node, 2, false, Op); 5220 return; 5221 case Intrinsic::aarch64_sve_srshl_single_x4: 5222 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5223 Node->getValueType(0), 5224 {AArch64::SRSHL_VG4_4ZZ_B, AArch64::SRSHL_VG4_4ZZ_H, 5225 AArch64::SRSHL_VG4_4ZZ_S, AArch64::SRSHL_VG4_4ZZ_D})) 5226 SelectDestructiveMultiIntrinsic(Node, 4, false, Op); 5227 return; 5228 case Intrinsic::aarch64_sve_urshl_single_x2: 5229 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5230 Node->getValueType(0), 5231 {AArch64::URSHL_VG2_2ZZ_B, AArch64::URSHL_VG2_2ZZ_H, 5232 AArch64::URSHL_VG2_2ZZ_S, AArch64::URSHL_VG2_2ZZ_D})) 5233 SelectDestructiveMultiIntrinsic(Node, 2, false, Op); 5234 return; 5235 case Intrinsic::aarch64_sve_urshl_single_x4: 5236 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5237 Node->getValueType(0), 5238 {AArch64::URSHL_VG4_4ZZ_B, AArch64::URSHL_VG4_4ZZ_H, 5239 AArch64::URSHL_VG4_4ZZ_S, AArch64::URSHL_VG4_4ZZ_D})) 5240 SelectDestructiveMultiIntrinsic(Node, 4, false, Op); 5241 return; 5242 case Intrinsic::aarch64_sve_srshl_x2: 5243 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5244 Node->getValueType(0), 5245 {AArch64::SRSHL_VG2_2Z2Z_B, AArch64::SRSHL_VG2_2Z2Z_H, 5246 AArch64::SRSHL_VG2_2Z2Z_S, AArch64::SRSHL_VG2_2Z2Z_D})) 5247 SelectDestructiveMultiIntrinsic(Node, 2, true, Op); 5248 return; 5249 case Intrinsic::aarch64_sve_srshl_x4: 5250 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5251 Node->getValueType(0), 5252 {AArch64::SRSHL_VG4_4Z4Z_B, AArch64::SRSHL_VG4_4Z4Z_H, 5253 AArch64::SRSHL_VG4_4Z4Z_S, AArch64::SRSHL_VG4_4Z4Z_D})) 5254 SelectDestructiveMultiIntrinsic(Node, 4, true, Op); 5255 return; 5256 case Intrinsic::aarch64_sve_urshl_x2: 5257 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5258 Node->getValueType(0), 5259 {AArch64::URSHL_VG2_2Z2Z_B, AArch64::URSHL_VG2_2Z2Z_H, 5260 AArch64::URSHL_VG2_2Z2Z_S, AArch64::URSHL_VG2_2Z2Z_D})) 5261 SelectDestructiveMultiIntrinsic(Node, 2, true, Op); 5262 return; 5263 case Intrinsic::aarch64_sve_urshl_x4: 5264 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5265 Node->getValueType(0), 5266 {AArch64::URSHL_VG4_4Z4Z_B, AArch64::URSHL_VG4_4Z4Z_H, 5267 AArch64::URSHL_VG4_4Z4Z_S, AArch64::URSHL_VG4_4Z4Z_D})) 5268 SelectDestructiveMultiIntrinsic(Node, 4, true, Op); 5269 return; 5270 case Intrinsic::aarch64_sve_sqdmulh_single_vgx2: 5271 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5272 Node->getValueType(0), 5273 {AArch64::SQDMULH_VG2_2ZZ_B, AArch64::SQDMULH_VG2_2ZZ_H, 5274 AArch64::SQDMULH_VG2_2ZZ_S, AArch64::SQDMULH_VG2_2ZZ_D})) 5275 SelectDestructiveMultiIntrinsic(Node, 2, false, Op); 5276 return; 5277 case Intrinsic::aarch64_sve_sqdmulh_single_vgx4: 5278 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5279 Node->getValueType(0), 5280 {AArch64::SQDMULH_VG4_4ZZ_B, AArch64::SQDMULH_VG4_4ZZ_H, 5281 AArch64::SQDMULH_VG4_4ZZ_S, AArch64::SQDMULH_VG4_4ZZ_D})) 5282 SelectDestructiveMultiIntrinsic(Node, 4, false, Op); 5283 return; 5284 case Intrinsic::aarch64_sve_sqdmulh_vgx2: 5285 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5286 Node->getValueType(0), 5287 {AArch64::SQDMULH_VG2_2Z2Z_B, AArch64::SQDMULH_VG2_2Z2Z_H, 5288 AArch64::SQDMULH_VG2_2Z2Z_S, AArch64::SQDMULH_VG2_2Z2Z_D})) 5289 SelectDestructiveMultiIntrinsic(Node, 2, true, Op); 5290 return; 5291 case Intrinsic::aarch64_sve_sqdmulh_vgx4: 5292 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5293 Node->getValueType(0), 5294 {AArch64::SQDMULH_VG4_4Z4Z_B, AArch64::SQDMULH_VG4_4Z4Z_H, 5295 AArch64::SQDMULH_VG4_4Z4Z_S, AArch64::SQDMULH_VG4_4Z4Z_D})) 5296 SelectDestructiveMultiIntrinsic(Node, 4, true, Op); 5297 return; 5298 case Intrinsic::aarch64_sve_whilege_x2: 5299 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>( 5300 Node->getValueType(0), 5301 {AArch64::WHILEGE_2PXX_B, AArch64::WHILEGE_2PXX_H, 5302 AArch64::WHILEGE_2PXX_S, AArch64::WHILEGE_2PXX_D})) 5303 SelectWhilePair(Node, Op); 5304 return; 5305 case Intrinsic::aarch64_sve_whilegt_x2: 5306 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>( 5307 Node->getValueType(0), 5308 {AArch64::WHILEGT_2PXX_B, AArch64::WHILEGT_2PXX_H, 5309 AArch64::WHILEGT_2PXX_S, AArch64::WHILEGT_2PXX_D})) 5310 SelectWhilePair(Node, Op); 5311 return; 5312 case Intrinsic::aarch64_sve_whilehi_x2: 5313 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>( 5314 Node->getValueType(0), 5315 {AArch64::WHILEHI_2PXX_B, AArch64::WHILEHI_2PXX_H, 5316 AArch64::WHILEHI_2PXX_S, AArch64::WHILEHI_2PXX_D})) 5317 SelectWhilePair(Node, Op); 5318 return; 5319 case Intrinsic::aarch64_sve_whilehs_x2: 5320 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>( 5321 Node->getValueType(0), 5322 {AArch64::WHILEHS_2PXX_B, AArch64::WHILEHS_2PXX_H, 5323 AArch64::WHILEHS_2PXX_S, AArch64::WHILEHS_2PXX_D})) 5324 SelectWhilePair(Node, Op); 5325 return; 5326 case Intrinsic::aarch64_sve_whilele_x2: 5327 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>( 5328 Node->getValueType(0), 5329 {AArch64::WHILELE_2PXX_B, AArch64::WHILELE_2PXX_H, 5330 AArch64::WHILELE_2PXX_S, AArch64::WHILELE_2PXX_D})) 5331 SelectWhilePair(Node, Op); 5332 return; 5333 case Intrinsic::aarch64_sve_whilelo_x2: 5334 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>( 5335 Node->getValueType(0), 5336 {AArch64::WHILELO_2PXX_B, AArch64::WHILELO_2PXX_H, 5337 AArch64::WHILELO_2PXX_S, AArch64::WHILELO_2PXX_D})) 5338 SelectWhilePair(Node, Op); 5339 return; 5340 case Intrinsic::aarch64_sve_whilels_x2: 5341 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>( 5342 Node->getValueType(0), 5343 {AArch64::WHILELS_2PXX_B, AArch64::WHILELS_2PXX_H, 5344 AArch64::WHILELS_2PXX_S, AArch64::WHILELS_2PXX_D})) 5345 SelectWhilePair(Node, Op); 5346 return; 5347 case Intrinsic::aarch64_sve_whilelt_x2: 5348 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>( 5349 Node->getValueType(0), 5350 {AArch64::WHILELT_2PXX_B, AArch64::WHILELT_2PXX_H, 5351 AArch64::WHILELT_2PXX_S, AArch64::WHILELT_2PXX_D})) 5352 SelectWhilePair(Node, Op); 5353 return; 5354 case Intrinsic::aarch64_sve_smax_single_x2: 5355 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5356 Node->getValueType(0), 5357 {AArch64::SMAX_VG2_2ZZ_B, AArch64::SMAX_VG2_2ZZ_H, 5358 AArch64::SMAX_VG2_2ZZ_S, AArch64::SMAX_VG2_2ZZ_D})) 5359 SelectDestructiveMultiIntrinsic(Node, 2, false, Op); 5360 return; 5361 case Intrinsic::aarch64_sve_umax_single_x2: 5362 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5363 Node->getValueType(0), 5364 {AArch64::UMAX_VG2_2ZZ_B, AArch64::UMAX_VG2_2ZZ_H, 5365 AArch64::UMAX_VG2_2ZZ_S, AArch64::UMAX_VG2_2ZZ_D})) 5366 SelectDestructiveMultiIntrinsic(Node, 2, false, Op); 5367 return; 5368 case Intrinsic::aarch64_sve_fmax_single_x2: 5369 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 5370 Node->getValueType(0), 5371 {0, AArch64::FMAX_VG2_2ZZ_H, AArch64::FMAX_VG2_2ZZ_S, 5372 AArch64::FMAX_VG2_2ZZ_D})) 5373 SelectDestructiveMultiIntrinsic(Node, 2, false, Op); 5374 return; 5375 case Intrinsic::aarch64_sve_smax_single_x4: 5376 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5377 Node->getValueType(0), 5378 {AArch64::SMAX_VG4_4ZZ_B, AArch64::SMAX_VG4_4ZZ_H, 5379 AArch64::SMAX_VG4_4ZZ_S, AArch64::SMAX_VG4_4ZZ_D})) 5380 SelectDestructiveMultiIntrinsic(Node, 4, false, Op); 5381 return; 5382 case Intrinsic::aarch64_sve_umax_single_x4: 5383 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5384 Node->getValueType(0), 5385 {AArch64::UMAX_VG4_4ZZ_B, AArch64::UMAX_VG4_4ZZ_H, 5386 AArch64::UMAX_VG4_4ZZ_S, AArch64::UMAX_VG4_4ZZ_D})) 5387 SelectDestructiveMultiIntrinsic(Node, 4, false, Op); 5388 return; 5389 case Intrinsic::aarch64_sve_fmax_single_x4: 5390 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 5391 Node->getValueType(0), 5392 {0, AArch64::FMAX_VG4_4ZZ_H, AArch64::FMAX_VG4_4ZZ_S, 5393 AArch64::FMAX_VG4_4ZZ_D})) 5394 SelectDestructiveMultiIntrinsic(Node, 4, false, Op); 5395 return; 5396 case Intrinsic::aarch64_sve_smin_single_x2: 5397 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5398 Node->getValueType(0), 5399 {AArch64::SMIN_VG2_2ZZ_B, AArch64::SMIN_VG2_2ZZ_H, 5400 AArch64::SMIN_VG2_2ZZ_S, AArch64::SMIN_VG2_2ZZ_D})) 5401 SelectDestructiveMultiIntrinsic(Node, 2, false, Op); 5402 return; 5403 case Intrinsic::aarch64_sve_umin_single_x2: 5404 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5405 Node->getValueType(0), 5406 {AArch64::UMIN_VG2_2ZZ_B, AArch64::UMIN_VG2_2ZZ_H, 5407 AArch64::UMIN_VG2_2ZZ_S, AArch64::UMIN_VG2_2ZZ_D})) 5408 SelectDestructiveMultiIntrinsic(Node, 2, false, Op); 5409 return; 5410 case Intrinsic::aarch64_sve_fmin_single_x2: 5411 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 5412 Node->getValueType(0), 5413 {0, AArch64::FMIN_VG2_2ZZ_H, AArch64::FMIN_VG2_2ZZ_S, 5414 AArch64::FMIN_VG2_2ZZ_D})) 5415 SelectDestructiveMultiIntrinsic(Node, 2, false, Op); 5416 return; 5417 case Intrinsic::aarch64_sve_smin_single_x4: 5418 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5419 Node->getValueType(0), 5420 {AArch64::SMIN_VG4_4ZZ_B, AArch64::SMIN_VG4_4ZZ_H, 5421 AArch64::SMIN_VG4_4ZZ_S, AArch64::SMIN_VG4_4ZZ_D})) 5422 SelectDestructiveMultiIntrinsic(Node, 4, false, Op); 5423 return; 5424 case Intrinsic::aarch64_sve_umin_single_x4: 5425 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5426 Node->getValueType(0), 5427 {AArch64::UMIN_VG4_4ZZ_B, AArch64::UMIN_VG4_4ZZ_H, 5428 AArch64::UMIN_VG4_4ZZ_S, AArch64::UMIN_VG4_4ZZ_D})) 5429 SelectDestructiveMultiIntrinsic(Node, 4, false, Op); 5430 return; 5431 case Intrinsic::aarch64_sve_fmin_single_x4: 5432 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 5433 Node->getValueType(0), 5434 {0, AArch64::FMIN_VG4_4ZZ_H, AArch64::FMIN_VG4_4ZZ_S, 5435 AArch64::FMIN_VG4_4ZZ_D})) 5436 SelectDestructiveMultiIntrinsic(Node, 4, false, Op); 5437 return; 5438 case Intrinsic::aarch64_sve_smax_x2: 5439 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5440 Node->getValueType(0), 5441 {AArch64::SMAX_VG2_2Z2Z_B, AArch64::SMAX_VG2_2Z2Z_H, 5442 AArch64::SMAX_VG2_2Z2Z_S, AArch64::SMAX_VG2_2Z2Z_D})) 5443 SelectDestructiveMultiIntrinsic(Node, 2, true, Op); 5444 return; 5445 case Intrinsic::aarch64_sve_umax_x2: 5446 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5447 Node->getValueType(0), 5448 {AArch64::UMAX_VG2_2Z2Z_B, AArch64::UMAX_VG2_2Z2Z_H, 5449 AArch64::UMAX_VG2_2Z2Z_S, AArch64::UMAX_VG2_2Z2Z_D})) 5450 SelectDestructiveMultiIntrinsic(Node, 2, true, Op); 5451 return; 5452 case Intrinsic::aarch64_sve_fmax_x2: 5453 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 5454 Node->getValueType(0), 5455 {0, AArch64::FMAX_VG2_2Z2Z_H, AArch64::FMAX_VG2_2Z2Z_S, 5456 AArch64::FMAX_VG2_2Z2Z_D})) 5457 SelectDestructiveMultiIntrinsic(Node, 2, true, Op); 5458 return; 5459 case Intrinsic::aarch64_sve_smax_x4: 5460 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5461 Node->getValueType(0), 5462 {AArch64::SMAX_VG4_4Z4Z_B, AArch64::SMAX_VG4_4Z4Z_H, 5463 AArch64::SMAX_VG4_4Z4Z_S, AArch64::SMAX_VG4_4Z4Z_D})) 5464 SelectDestructiveMultiIntrinsic(Node, 4, true, Op); 5465 return; 5466 case Intrinsic::aarch64_sve_umax_x4: 5467 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5468 Node->getValueType(0), 5469 {AArch64::UMAX_VG4_4Z4Z_B, AArch64::UMAX_VG4_4Z4Z_H, 5470 AArch64::UMAX_VG4_4Z4Z_S, AArch64::UMAX_VG4_4Z4Z_D})) 5471 SelectDestructiveMultiIntrinsic(Node, 4, true, Op); 5472 return; 5473 case Intrinsic::aarch64_sve_fmax_x4: 5474 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 5475 Node->getValueType(0), 5476 {0, AArch64::FMAX_VG4_4Z4Z_H, AArch64::FMAX_VG4_4Z4Z_S, 5477 AArch64::FMAX_VG4_4Z4Z_D})) 5478 SelectDestructiveMultiIntrinsic(Node, 4, true, Op); 5479 return; 5480 case Intrinsic::aarch64_sve_smin_x2: 5481 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5482 Node->getValueType(0), 5483 {AArch64::SMIN_VG2_2Z2Z_B, AArch64::SMIN_VG2_2Z2Z_H, 5484 AArch64::SMIN_VG2_2Z2Z_S, AArch64::SMIN_VG2_2Z2Z_D})) 5485 SelectDestructiveMultiIntrinsic(Node, 2, true, Op); 5486 return; 5487 case Intrinsic::aarch64_sve_umin_x2: 5488 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5489 Node->getValueType(0), 5490 {AArch64::UMIN_VG2_2Z2Z_B, AArch64::UMIN_VG2_2Z2Z_H, 5491 AArch64::UMIN_VG2_2Z2Z_S, AArch64::UMIN_VG2_2Z2Z_D})) 5492 SelectDestructiveMultiIntrinsic(Node, 2, true, Op); 5493 return; 5494 case Intrinsic::aarch64_sve_fmin_x2: 5495 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 5496 Node->getValueType(0), 5497 {0, AArch64::FMIN_VG2_2Z2Z_H, AArch64::FMIN_VG2_2Z2Z_S, 5498 AArch64::FMIN_VG2_2Z2Z_D})) 5499 SelectDestructiveMultiIntrinsic(Node, 2, true, Op); 5500 return; 5501 case Intrinsic::aarch64_sve_smin_x4: 5502 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5503 Node->getValueType(0), 5504 {AArch64::SMIN_VG4_4Z4Z_B, AArch64::SMIN_VG4_4Z4Z_H, 5505 AArch64::SMIN_VG4_4Z4Z_S, AArch64::SMIN_VG4_4Z4Z_D})) 5506 SelectDestructiveMultiIntrinsic(Node, 4, true, Op); 5507 return; 5508 case Intrinsic::aarch64_sve_umin_x4: 5509 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5510 Node->getValueType(0), 5511 {AArch64::UMIN_VG4_4Z4Z_B, AArch64::UMIN_VG4_4Z4Z_H, 5512 AArch64::UMIN_VG4_4Z4Z_S, AArch64::UMIN_VG4_4Z4Z_D})) 5513 SelectDestructiveMultiIntrinsic(Node, 4, true, Op); 5514 return; 5515 case Intrinsic::aarch64_sve_fmin_x4: 5516 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 5517 Node->getValueType(0), 5518 {0, AArch64::FMIN_VG4_4Z4Z_H, AArch64::FMIN_VG4_4Z4Z_S, 5519 AArch64::FMIN_VG4_4Z4Z_D})) 5520 SelectDestructiveMultiIntrinsic(Node, 4, true, Op); 5521 return; 5522 case Intrinsic::aarch64_sve_fmaxnm_single_x2 : 5523 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 5524 Node->getValueType(0), 5525 {0, AArch64::FMAXNM_VG2_2ZZ_H, AArch64::FMAXNM_VG2_2ZZ_S, 5526 AArch64::FMAXNM_VG2_2ZZ_D})) 5527 SelectDestructiveMultiIntrinsic(Node, 2, false, Op); 5528 return; 5529 case Intrinsic::aarch64_sve_fmaxnm_single_x4 : 5530 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 5531 Node->getValueType(0), 5532 {0, AArch64::FMAXNM_VG4_4ZZ_H, AArch64::FMAXNM_VG4_4ZZ_S, 5533 AArch64::FMAXNM_VG4_4ZZ_D})) 5534 SelectDestructiveMultiIntrinsic(Node, 4, false, Op); 5535 return; 5536 case Intrinsic::aarch64_sve_fminnm_single_x2: 5537 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 5538 Node->getValueType(0), 5539 {0, AArch64::FMINNM_VG2_2ZZ_H, AArch64::FMINNM_VG2_2ZZ_S, 5540 AArch64::FMINNM_VG2_2ZZ_D})) 5541 SelectDestructiveMultiIntrinsic(Node, 2, false, Op); 5542 return; 5543 case Intrinsic::aarch64_sve_fminnm_single_x4: 5544 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 5545 Node->getValueType(0), 5546 {0, AArch64::FMINNM_VG4_4ZZ_H, AArch64::FMINNM_VG4_4ZZ_S, 5547 AArch64::FMINNM_VG4_4ZZ_D})) 5548 SelectDestructiveMultiIntrinsic(Node, 4, false, Op); 5549 return; 5550 case Intrinsic::aarch64_sve_fmaxnm_x2: 5551 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 5552 Node->getValueType(0), 5553 {0, AArch64::FMAXNM_VG2_2Z2Z_H, AArch64::FMAXNM_VG2_2Z2Z_S, 5554 AArch64::FMAXNM_VG2_2Z2Z_D})) 5555 SelectDestructiveMultiIntrinsic(Node, 2, true, Op); 5556 return; 5557 case Intrinsic::aarch64_sve_fmaxnm_x4: 5558 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 5559 Node->getValueType(0), 5560 {0, AArch64::FMAXNM_VG4_4Z4Z_H, AArch64::FMAXNM_VG4_4Z4Z_S, 5561 AArch64::FMAXNM_VG4_4Z4Z_D})) 5562 SelectDestructiveMultiIntrinsic(Node, 4, true, Op); 5563 return; 5564 case Intrinsic::aarch64_sve_fminnm_x2: 5565 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 5566 Node->getValueType(0), 5567 {0, AArch64::FMINNM_VG2_2Z2Z_H, AArch64::FMINNM_VG2_2Z2Z_S, 5568 AArch64::FMINNM_VG2_2Z2Z_D})) 5569 SelectDestructiveMultiIntrinsic(Node, 2, true, Op); 5570 return; 5571 case Intrinsic::aarch64_sve_fminnm_x4: 5572 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 5573 Node->getValueType(0), 5574 {0, AArch64::FMINNM_VG4_4Z4Z_H, AArch64::FMINNM_VG4_4Z4Z_S, 5575 AArch64::FMINNM_VG4_4Z4Z_D})) 5576 SelectDestructiveMultiIntrinsic(Node, 4, true, Op); 5577 return; 5578 case Intrinsic::aarch64_sve_fcvts_x2: 5579 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZS_2Z2Z_StoS); 5580 return; 5581 case Intrinsic::aarch64_sve_scvtf_x2: 5582 SelectCVTIntrinsic(Node, 2, AArch64::SCVTF_2Z2Z_StoS); 5583 return; 5584 case Intrinsic::aarch64_sve_fcvtu_x2: 5585 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZU_2Z2Z_StoS); 5586 return; 5587 case Intrinsic::aarch64_sve_ucvtf_x2: 5588 SelectCVTIntrinsic(Node, 2, AArch64::UCVTF_2Z2Z_StoS); 5589 return; 5590 case Intrinsic::aarch64_sve_fcvts_x4: 5591 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZS_4Z4Z_StoS); 5592 return; 5593 case Intrinsic::aarch64_sve_scvtf_x4: 5594 SelectCVTIntrinsic(Node, 4, AArch64::SCVTF_4Z4Z_StoS); 5595 return; 5596 case Intrinsic::aarch64_sve_fcvtu_x4: 5597 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZU_4Z4Z_StoS); 5598 return; 5599 case Intrinsic::aarch64_sve_ucvtf_x4: 5600 SelectCVTIntrinsic(Node, 4, AArch64::UCVTF_4Z4Z_StoS); 5601 return; 5602 case Intrinsic::aarch64_sve_sclamp_single_x2: 5603 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5604 Node->getValueType(0), 5605 {AArch64::SCLAMP_VG2_2Z2Z_B, AArch64::SCLAMP_VG2_2Z2Z_H, 5606 AArch64::SCLAMP_VG2_2Z2Z_S, AArch64::SCLAMP_VG2_2Z2Z_D})) 5607 SelectClamp(Node, 2, Op); 5608 return; 5609 case Intrinsic::aarch64_sve_uclamp_single_x2: 5610 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5611 Node->getValueType(0), 5612 {AArch64::UCLAMP_VG2_2Z2Z_B, AArch64::UCLAMP_VG2_2Z2Z_H, 5613 AArch64::UCLAMP_VG2_2Z2Z_S, AArch64::UCLAMP_VG2_2Z2Z_D})) 5614 SelectClamp(Node, 2, Op); 5615 return; 5616 case Intrinsic::aarch64_sve_fclamp_single_x2: 5617 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 5618 Node->getValueType(0), 5619 {0, AArch64::FCLAMP_VG2_2Z2Z_H, AArch64::FCLAMP_VG2_2Z2Z_S, 5620 AArch64::FCLAMP_VG2_2Z2Z_D})) 5621 SelectClamp(Node, 2, Op); 5622 return; 5623 case Intrinsic::aarch64_sve_sclamp_single_x4: 5624 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5625 Node->getValueType(0), 5626 {AArch64::SCLAMP_VG4_4Z4Z_B, AArch64::SCLAMP_VG4_4Z4Z_H, 5627 AArch64::SCLAMP_VG4_4Z4Z_S, AArch64::SCLAMP_VG4_4Z4Z_D})) 5628 SelectClamp(Node, 4, Op); 5629 return; 5630 case Intrinsic::aarch64_sve_uclamp_single_x4: 5631 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5632 Node->getValueType(0), 5633 {AArch64::UCLAMP_VG4_4Z4Z_B, AArch64::UCLAMP_VG4_4Z4Z_H, 5634 AArch64::UCLAMP_VG4_4Z4Z_S, AArch64::UCLAMP_VG4_4Z4Z_D})) 5635 SelectClamp(Node, 4, Op); 5636 return; 5637 case Intrinsic::aarch64_sve_fclamp_single_x4: 5638 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 5639 Node->getValueType(0), 5640 {0, AArch64::FCLAMP_VG4_4Z4Z_H, AArch64::FCLAMP_VG4_4Z4Z_S, 5641 AArch64::FCLAMP_VG4_4Z4Z_D})) 5642 SelectClamp(Node, 4, Op); 5643 return; 5644 case Intrinsic::aarch64_sve_add_single_x2: 5645 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5646 Node->getValueType(0), 5647 {AArch64::ADD_VG2_2ZZ_B, AArch64::ADD_VG2_2ZZ_H, 5648 AArch64::ADD_VG2_2ZZ_S, AArch64::ADD_VG2_2ZZ_D})) 5649 SelectDestructiveMultiIntrinsic(Node, 2, false, Op); 5650 return; 5651 case Intrinsic::aarch64_sve_add_single_x4: 5652 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5653 Node->getValueType(0), 5654 {AArch64::ADD_VG4_4ZZ_B, AArch64::ADD_VG4_4ZZ_H, 5655 AArch64::ADD_VG4_4ZZ_S, AArch64::ADD_VG4_4ZZ_D})) 5656 SelectDestructiveMultiIntrinsic(Node, 4, false, Op); 5657 return; 5658 case Intrinsic::aarch64_sve_zip_x2: 5659 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>( 5660 Node->getValueType(0), 5661 {AArch64::ZIP_VG2_2ZZZ_B, AArch64::ZIP_VG2_2ZZZ_H, 5662 AArch64::ZIP_VG2_2ZZZ_S, AArch64::ZIP_VG2_2ZZZ_D})) 5663 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op); 5664 return; 5665 case Intrinsic::aarch64_sve_zipq_x2: 5666 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, 5667 AArch64::ZIP_VG2_2ZZZ_Q); 5668 return; 5669 case Intrinsic::aarch64_sve_zip_x4: 5670 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>( 5671 Node->getValueType(0), 5672 {AArch64::ZIP_VG4_4Z4Z_B, AArch64::ZIP_VG4_4Z4Z_H, 5673 AArch64::ZIP_VG4_4Z4Z_S, AArch64::ZIP_VG4_4Z4Z_D})) 5674 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op); 5675 return; 5676 case Intrinsic::aarch64_sve_zipq_x4: 5677 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, 5678 AArch64::ZIP_VG4_4Z4Z_Q); 5679 return; 5680 case Intrinsic::aarch64_sve_uzp_x2: 5681 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>( 5682 Node->getValueType(0), 5683 {AArch64::UZP_VG2_2ZZZ_B, AArch64::UZP_VG2_2ZZZ_H, 5684 AArch64::UZP_VG2_2ZZZ_S, AArch64::UZP_VG2_2ZZZ_D})) 5685 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op); 5686 return; 5687 case Intrinsic::aarch64_sve_uzpq_x2: 5688 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, 5689 AArch64::UZP_VG2_2ZZZ_Q); 5690 return; 5691 case Intrinsic::aarch64_sve_uzp_x4: 5692 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>( 5693 Node->getValueType(0), 5694 {AArch64::UZP_VG4_4Z4Z_B, AArch64::UZP_VG4_4Z4Z_H, 5695 AArch64::UZP_VG4_4Z4Z_S, AArch64::UZP_VG4_4Z4Z_D})) 5696 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op); 5697 return; 5698 case Intrinsic::aarch64_sve_uzpq_x4: 5699 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, 5700 AArch64::UZP_VG4_4Z4Z_Q); 5701 return; 5702 case Intrinsic::aarch64_sve_sel_x2: 5703 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>( 5704 Node->getValueType(0), 5705 {AArch64::SEL_VG2_2ZC2Z2Z_B, AArch64::SEL_VG2_2ZC2Z2Z_H, 5706 AArch64::SEL_VG2_2ZC2Z2Z_S, AArch64::SEL_VG2_2ZC2Z2Z_D})) 5707 SelectDestructiveMultiIntrinsic(Node, 2, true, Op, /*HasPred=*/true); 5708 return; 5709 case Intrinsic::aarch64_sve_sel_x4: 5710 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>( 5711 Node->getValueType(0), 5712 {AArch64::SEL_VG4_4ZC4Z4Z_B, AArch64::SEL_VG4_4ZC4Z4Z_H, 5713 AArch64::SEL_VG4_4ZC4Z4Z_S, AArch64::SEL_VG4_4ZC4Z4Z_D})) 5714 SelectDestructiveMultiIntrinsic(Node, 4, true, Op, /*HasPred=*/true); 5715 return; 5716 case Intrinsic::aarch64_sve_frinta_x2: 5717 SelectFrintFromVT(Node, 2, AArch64::FRINTA_2Z2Z_S); 5718 return; 5719 case Intrinsic::aarch64_sve_frinta_x4: 5720 SelectFrintFromVT(Node, 4, AArch64::FRINTA_4Z4Z_S); 5721 return; 5722 case Intrinsic::aarch64_sve_frintm_x2: 5723 SelectFrintFromVT(Node, 2, AArch64::FRINTM_2Z2Z_S); 5724 return; 5725 case Intrinsic::aarch64_sve_frintm_x4: 5726 SelectFrintFromVT(Node, 4, AArch64::FRINTM_4Z4Z_S); 5727 return; 5728 case Intrinsic::aarch64_sve_frintn_x2: 5729 SelectFrintFromVT(Node, 2, AArch64::FRINTN_2Z2Z_S); 5730 return; 5731 case Intrinsic::aarch64_sve_frintn_x4: 5732 SelectFrintFromVT(Node, 4, AArch64::FRINTN_4Z4Z_S); 5733 return; 5734 case Intrinsic::aarch64_sve_frintp_x2: 5735 SelectFrintFromVT(Node, 2, AArch64::FRINTP_2Z2Z_S); 5736 return; 5737 case Intrinsic::aarch64_sve_frintp_x4: 5738 SelectFrintFromVT(Node, 4, AArch64::FRINTP_4Z4Z_S); 5739 return; 5740 case Intrinsic::aarch64_sve_sunpk_x2: 5741 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5742 Node->getValueType(0), 5743 {0, AArch64::SUNPK_VG2_2ZZ_H, AArch64::SUNPK_VG2_2ZZ_S, 5744 AArch64::SUNPK_VG2_2ZZ_D})) 5745 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op); 5746 return; 5747 case Intrinsic::aarch64_sve_uunpk_x2: 5748 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5749 Node->getValueType(0), 5750 {0, AArch64::UUNPK_VG2_2ZZ_H, AArch64::UUNPK_VG2_2ZZ_S, 5751 AArch64::UUNPK_VG2_2ZZ_D})) 5752 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op); 5753 return; 5754 case Intrinsic::aarch64_sve_sunpk_x4: 5755 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5756 Node->getValueType(0), 5757 {0, AArch64::SUNPK_VG4_4Z2Z_H, AArch64::SUNPK_VG4_4Z2Z_S, 5758 AArch64::SUNPK_VG4_4Z2Z_D})) 5759 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op); 5760 return; 5761 case Intrinsic::aarch64_sve_uunpk_x4: 5762 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5763 Node->getValueType(0), 5764 {0, AArch64::UUNPK_VG4_4Z2Z_H, AArch64::UUNPK_VG4_4Z2Z_S, 5765 AArch64::UUNPK_VG4_4Z2Z_D})) 5766 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op); 5767 return; 5768 case Intrinsic::aarch64_sve_pext_x2: { 5769 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>( 5770 Node->getValueType(0), 5771 {AArch64::PEXT_2PCI_B, AArch64::PEXT_2PCI_H, AArch64::PEXT_2PCI_S, 5772 AArch64::PEXT_2PCI_D})) 5773 SelectPExtPair(Node, Op); 5774 return; 5775 } 5776 } 5777 break; 5778 } 5779 case ISD::INTRINSIC_VOID: { 5780 unsigned IntNo = Node->getConstantOperandVal(1); 5781 if (Node->getNumOperands() >= 3) 5782 VT = Node->getOperand(2)->getValueType(0); 5783 switch (IntNo) { 5784 default: 5785 break; 5786 case Intrinsic::aarch64_neon_st1x2: { 5787 if (VT == MVT::v8i8) { 5788 SelectStore(Node, 2, AArch64::ST1Twov8b); 5789 return; 5790 } else if (VT == MVT::v16i8) { 5791 SelectStore(Node, 2, AArch64::ST1Twov16b); 5792 return; 5793 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 5794 VT == MVT::v4bf16) { 5795 SelectStore(Node, 2, AArch64::ST1Twov4h); 5796 return; 5797 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 5798 VT == MVT::v8bf16) { 5799 SelectStore(Node, 2, AArch64::ST1Twov8h); 5800 return; 5801 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 5802 SelectStore(Node, 2, AArch64::ST1Twov2s); 5803 return; 5804 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 5805 SelectStore(Node, 2, AArch64::ST1Twov4s); 5806 return; 5807 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 5808 SelectStore(Node, 2, AArch64::ST1Twov2d); 5809 return; 5810 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 5811 SelectStore(Node, 2, AArch64::ST1Twov1d); 5812 return; 5813 } 5814 break; 5815 } 5816 case Intrinsic::aarch64_neon_st1x3: { 5817 if (VT == MVT::v8i8) { 5818 SelectStore(Node, 3, AArch64::ST1Threev8b); 5819 return; 5820 } else if (VT == MVT::v16i8) { 5821 SelectStore(Node, 3, AArch64::ST1Threev16b); 5822 return; 5823 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 5824 VT == MVT::v4bf16) { 5825 SelectStore(Node, 3, AArch64::ST1Threev4h); 5826 return; 5827 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 5828 VT == MVT::v8bf16) { 5829 SelectStore(Node, 3, AArch64::ST1Threev8h); 5830 return; 5831 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 5832 SelectStore(Node, 3, AArch64::ST1Threev2s); 5833 return; 5834 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 5835 SelectStore(Node, 3, AArch64::ST1Threev4s); 5836 return; 5837 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 5838 SelectStore(Node, 3, AArch64::ST1Threev2d); 5839 return; 5840 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 5841 SelectStore(Node, 3, AArch64::ST1Threev1d); 5842 return; 5843 } 5844 break; 5845 } 5846 case Intrinsic::aarch64_neon_st1x4: { 5847 if (VT == MVT::v8i8) { 5848 SelectStore(Node, 4, AArch64::ST1Fourv8b); 5849 return; 5850 } else if (VT == MVT::v16i8) { 5851 SelectStore(Node, 4, AArch64::ST1Fourv16b); 5852 return; 5853 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 5854 VT == MVT::v4bf16) { 5855 SelectStore(Node, 4, AArch64::ST1Fourv4h); 5856 return; 5857 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 5858 VT == MVT::v8bf16) { 5859 SelectStore(Node, 4, AArch64::ST1Fourv8h); 5860 return; 5861 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 5862 SelectStore(Node, 4, AArch64::ST1Fourv2s); 5863 return; 5864 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 5865 SelectStore(Node, 4, AArch64::ST1Fourv4s); 5866 return; 5867 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 5868 SelectStore(Node, 4, AArch64::ST1Fourv2d); 5869 return; 5870 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 5871 SelectStore(Node, 4, AArch64::ST1Fourv1d); 5872 return; 5873 } 5874 break; 5875 } 5876 case Intrinsic::aarch64_neon_st2: { 5877 if (VT == MVT::v8i8) { 5878 SelectStore(Node, 2, AArch64::ST2Twov8b); 5879 return; 5880 } else if (VT == MVT::v16i8) { 5881 SelectStore(Node, 2, AArch64::ST2Twov16b); 5882 return; 5883 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 5884 VT == MVT::v4bf16) { 5885 SelectStore(Node, 2, AArch64::ST2Twov4h); 5886 return; 5887 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 5888 VT == MVT::v8bf16) { 5889 SelectStore(Node, 2, AArch64::ST2Twov8h); 5890 return; 5891 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 5892 SelectStore(Node, 2, AArch64::ST2Twov2s); 5893 return; 5894 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 5895 SelectStore(Node, 2, AArch64::ST2Twov4s); 5896 return; 5897 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 5898 SelectStore(Node, 2, AArch64::ST2Twov2d); 5899 return; 5900 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 5901 SelectStore(Node, 2, AArch64::ST1Twov1d); 5902 return; 5903 } 5904 break; 5905 } 5906 case Intrinsic::aarch64_neon_st3: { 5907 if (VT == MVT::v8i8) { 5908 SelectStore(Node, 3, AArch64::ST3Threev8b); 5909 return; 5910 } else if (VT == MVT::v16i8) { 5911 SelectStore(Node, 3, AArch64::ST3Threev16b); 5912 return; 5913 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 5914 VT == MVT::v4bf16) { 5915 SelectStore(Node, 3, AArch64::ST3Threev4h); 5916 return; 5917 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 5918 VT == MVT::v8bf16) { 5919 SelectStore(Node, 3, AArch64::ST3Threev8h); 5920 return; 5921 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 5922 SelectStore(Node, 3, AArch64::ST3Threev2s); 5923 return; 5924 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 5925 SelectStore(Node, 3, AArch64::ST3Threev4s); 5926 return; 5927 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 5928 SelectStore(Node, 3, AArch64::ST3Threev2d); 5929 return; 5930 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 5931 SelectStore(Node, 3, AArch64::ST1Threev1d); 5932 return; 5933 } 5934 break; 5935 } 5936 case Intrinsic::aarch64_neon_st4: { 5937 if (VT == MVT::v8i8) { 5938 SelectStore(Node, 4, AArch64::ST4Fourv8b); 5939 return; 5940 } else if (VT == MVT::v16i8) { 5941 SelectStore(Node, 4, AArch64::ST4Fourv16b); 5942 return; 5943 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 5944 VT == MVT::v4bf16) { 5945 SelectStore(Node, 4, AArch64::ST4Fourv4h); 5946 return; 5947 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 5948 VT == MVT::v8bf16) { 5949 SelectStore(Node, 4, AArch64::ST4Fourv8h); 5950 return; 5951 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 5952 SelectStore(Node, 4, AArch64::ST4Fourv2s); 5953 return; 5954 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 5955 SelectStore(Node, 4, AArch64::ST4Fourv4s); 5956 return; 5957 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 5958 SelectStore(Node, 4, AArch64::ST4Fourv2d); 5959 return; 5960 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 5961 SelectStore(Node, 4, AArch64::ST1Fourv1d); 5962 return; 5963 } 5964 break; 5965 } 5966 case Intrinsic::aarch64_neon_st2lane: { 5967 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 5968 SelectStoreLane(Node, 2, AArch64::ST2i8); 5969 return; 5970 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 5971 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 5972 SelectStoreLane(Node, 2, AArch64::ST2i16); 5973 return; 5974 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 5975 VT == MVT::v2f32) { 5976 SelectStoreLane(Node, 2, AArch64::ST2i32); 5977 return; 5978 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 5979 VT == MVT::v1f64) { 5980 SelectStoreLane(Node, 2, AArch64::ST2i64); 5981 return; 5982 } 5983 break; 5984 } 5985 case Intrinsic::aarch64_neon_st3lane: { 5986 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 5987 SelectStoreLane(Node, 3, AArch64::ST3i8); 5988 return; 5989 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 5990 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 5991 SelectStoreLane(Node, 3, AArch64::ST3i16); 5992 return; 5993 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 5994 VT == MVT::v2f32) { 5995 SelectStoreLane(Node, 3, AArch64::ST3i32); 5996 return; 5997 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 5998 VT == MVT::v1f64) { 5999 SelectStoreLane(Node, 3, AArch64::ST3i64); 6000 return; 6001 } 6002 break; 6003 } 6004 case Intrinsic::aarch64_neon_st4lane: { 6005 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 6006 SelectStoreLane(Node, 4, AArch64::ST4i8); 6007 return; 6008 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 6009 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 6010 SelectStoreLane(Node, 4, AArch64::ST4i16); 6011 return; 6012 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 6013 VT == MVT::v2f32) { 6014 SelectStoreLane(Node, 4, AArch64::ST4i32); 6015 return; 6016 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 6017 VT == MVT::v1f64) { 6018 SelectStoreLane(Node, 4, AArch64::ST4i64); 6019 return; 6020 } 6021 break; 6022 } 6023 case Intrinsic::aarch64_sve_st2q: { 6024 SelectPredicatedStore(Node, 2, 4, AArch64::ST2Q, AArch64::ST2Q_IMM); 6025 return; 6026 } 6027 case Intrinsic::aarch64_sve_st3q: { 6028 SelectPredicatedStore(Node, 3, 4, AArch64::ST3Q, AArch64::ST3Q_IMM); 6029 return; 6030 } 6031 case Intrinsic::aarch64_sve_st4q: { 6032 SelectPredicatedStore(Node, 4, 4, AArch64::ST4Q, AArch64::ST4Q_IMM); 6033 return; 6034 } 6035 case Intrinsic::aarch64_sve_st2: { 6036 if (VT == MVT::nxv16i8) { 6037 SelectPredicatedStore(Node, 2, 0, AArch64::ST2B, AArch64::ST2B_IMM); 6038 return; 6039 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 6040 VT == MVT::nxv8bf16) { 6041 SelectPredicatedStore(Node, 2, 1, AArch64::ST2H, AArch64::ST2H_IMM); 6042 return; 6043 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 6044 SelectPredicatedStore(Node, 2, 2, AArch64::ST2W, AArch64::ST2W_IMM); 6045 return; 6046 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 6047 SelectPredicatedStore(Node, 2, 3, AArch64::ST2D, AArch64::ST2D_IMM); 6048 return; 6049 } 6050 break; 6051 } 6052 case Intrinsic::aarch64_sve_st3: { 6053 if (VT == MVT::nxv16i8) { 6054 SelectPredicatedStore(Node, 3, 0, AArch64::ST3B, AArch64::ST3B_IMM); 6055 return; 6056 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 6057 VT == MVT::nxv8bf16) { 6058 SelectPredicatedStore(Node, 3, 1, AArch64::ST3H, AArch64::ST3H_IMM); 6059 return; 6060 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 6061 SelectPredicatedStore(Node, 3, 2, AArch64::ST3W, AArch64::ST3W_IMM); 6062 return; 6063 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 6064 SelectPredicatedStore(Node, 3, 3, AArch64::ST3D, AArch64::ST3D_IMM); 6065 return; 6066 } 6067 break; 6068 } 6069 case Intrinsic::aarch64_sve_st4: { 6070 if (VT == MVT::nxv16i8) { 6071 SelectPredicatedStore(Node, 4, 0, AArch64::ST4B, AArch64::ST4B_IMM); 6072 return; 6073 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 6074 VT == MVT::nxv8bf16) { 6075 SelectPredicatedStore(Node, 4, 1, AArch64::ST4H, AArch64::ST4H_IMM); 6076 return; 6077 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 6078 SelectPredicatedStore(Node, 4, 2, AArch64::ST4W, AArch64::ST4W_IMM); 6079 return; 6080 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 6081 SelectPredicatedStore(Node, 4, 3, AArch64::ST4D, AArch64::ST4D_IMM); 6082 return; 6083 } 6084 break; 6085 } 6086 } 6087 break; 6088 } 6089 case AArch64ISD::LD2post: { 6090 if (VT == MVT::v8i8) { 6091 SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0); 6092 return; 6093 } else if (VT == MVT::v16i8) { 6094 SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0); 6095 return; 6096 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 6097 SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0); 6098 return; 6099 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 6100 SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0); 6101 return; 6102 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 6103 SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0); 6104 return; 6105 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 6106 SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0); 6107 return; 6108 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 6109 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0); 6110 return; 6111 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 6112 SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0); 6113 return; 6114 } 6115 break; 6116 } 6117 case AArch64ISD::LD3post: { 6118 if (VT == MVT::v8i8) { 6119 SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0); 6120 return; 6121 } else if (VT == MVT::v16i8) { 6122 SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0); 6123 return; 6124 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 6125 SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0); 6126 return; 6127 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 6128 SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0); 6129 return; 6130 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 6131 SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0); 6132 return; 6133 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 6134 SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0); 6135 return; 6136 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 6137 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0); 6138 return; 6139 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 6140 SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0); 6141 return; 6142 } 6143 break; 6144 } 6145 case AArch64ISD::LD4post: { 6146 if (VT == MVT::v8i8) { 6147 SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0); 6148 return; 6149 } else if (VT == MVT::v16i8) { 6150 SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0); 6151 return; 6152 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 6153 SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0); 6154 return; 6155 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 6156 SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0); 6157 return; 6158 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 6159 SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0); 6160 return; 6161 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 6162 SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0); 6163 return; 6164 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 6165 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0); 6166 return; 6167 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 6168 SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0); 6169 return; 6170 } 6171 break; 6172 } 6173 case AArch64ISD::LD1x2post: { 6174 if (VT == MVT::v8i8) { 6175 SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0); 6176 return; 6177 } else if (VT == MVT::v16i8) { 6178 SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0); 6179 return; 6180 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 6181 SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0); 6182 return; 6183 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 6184 SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0); 6185 return; 6186 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 6187 SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0); 6188 return; 6189 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 6190 SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0); 6191 return; 6192 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 6193 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0); 6194 return; 6195 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 6196 SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0); 6197 return; 6198 } 6199 break; 6200 } 6201 case AArch64ISD::LD1x3post: { 6202 if (VT == MVT::v8i8) { 6203 SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0); 6204 return; 6205 } else if (VT == MVT::v16i8) { 6206 SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0); 6207 return; 6208 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 6209 SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0); 6210 return; 6211 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 6212 SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0); 6213 return; 6214 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 6215 SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0); 6216 return; 6217 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 6218 SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0); 6219 return; 6220 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 6221 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0); 6222 return; 6223 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 6224 SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0); 6225 return; 6226 } 6227 break; 6228 } 6229 case AArch64ISD::LD1x4post: { 6230 if (VT == MVT::v8i8) { 6231 SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0); 6232 return; 6233 } else if (VT == MVT::v16i8) { 6234 SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0); 6235 return; 6236 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 6237 SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0); 6238 return; 6239 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 6240 SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0); 6241 return; 6242 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 6243 SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0); 6244 return; 6245 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 6246 SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0); 6247 return; 6248 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 6249 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0); 6250 return; 6251 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 6252 SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0); 6253 return; 6254 } 6255 break; 6256 } 6257 case AArch64ISD::LD1DUPpost: { 6258 if (VT == MVT::v8i8) { 6259 SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0); 6260 return; 6261 } else if (VT == MVT::v16i8) { 6262 SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0); 6263 return; 6264 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 6265 SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0); 6266 return; 6267 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 6268 SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0); 6269 return; 6270 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 6271 SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0); 6272 return; 6273 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 6274 SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0); 6275 return; 6276 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 6277 SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0); 6278 return; 6279 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 6280 SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0); 6281 return; 6282 } 6283 break; 6284 } 6285 case AArch64ISD::LD2DUPpost: { 6286 if (VT == MVT::v8i8) { 6287 SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0); 6288 return; 6289 } else if (VT == MVT::v16i8) { 6290 SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0); 6291 return; 6292 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 6293 SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0); 6294 return; 6295 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 6296 SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0); 6297 return; 6298 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 6299 SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0); 6300 return; 6301 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 6302 SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0); 6303 return; 6304 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 6305 SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0); 6306 return; 6307 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 6308 SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0); 6309 return; 6310 } 6311 break; 6312 } 6313 case AArch64ISD::LD3DUPpost: { 6314 if (VT == MVT::v8i8) { 6315 SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0); 6316 return; 6317 } else if (VT == MVT::v16i8) { 6318 SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0); 6319 return; 6320 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 6321 SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0); 6322 return; 6323 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 6324 SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0); 6325 return; 6326 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 6327 SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0); 6328 return; 6329 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 6330 SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0); 6331 return; 6332 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 6333 SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0); 6334 return; 6335 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 6336 SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0); 6337 return; 6338 } 6339 break; 6340 } 6341 case AArch64ISD::LD4DUPpost: { 6342 if (VT == MVT::v8i8) { 6343 SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0); 6344 return; 6345 } else if (VT == MVT::v16i8) { 6346 SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0); 6347 return; 6348 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 6349 SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0); 6350 return; 6351 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 6352 SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0); 6353 return; 6354 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 6355 SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0); 6356 return; 6357 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 6358 SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0); 6359 return; 6360 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 6361 SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0); 6362 return; 6363 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 6364 SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0); 6365 return; 6366 } 6367 break; 6368 } 6369 case AArch64ISD::LD1LANEpost: { 6370 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 6371 SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST); 6372 return; 6373 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 6374 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 6375 SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST); 6376 return; 6377 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 6378 VT == MVT::v2f32) { 6379 SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST); 6380 return; 6381 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 6382 VT == MVT::v1f64) { 6383 SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST); 6384 return; 6385 } 6386 break; 6387 } 6388 case AArch64ISD::LD2LANEpost: { 6389 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 6390 SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST); 6391 return; 6392 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 6393 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 6394 SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST); 6395 return; 6396 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 6397 VT == MVT::v2f32) { 6398 SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST); 6399 return; 6400 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 6401 VT == MVT::v1f64) { 6402 SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST); 6403 return; 6404 } 6405 break; 6406 } 6407 case AArch64ISD::LD3LANEpost: { 6408 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 6409 SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST); 6410 return; 6411 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 6412 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 6413 SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST); 6414 return; 6415 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 6416 VT == MVT::v2f32) { 6417 SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST); 6418 return; 6419 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 6420 VT == MVT::v1f64) { 6421 SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST); 6422 return; 6423 } 6424 break; 6425 } 6426 case AArch64ISD::LD4LANEpost: { 6427 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 6428 SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST); 6429 return; 6430 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 6431 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 6432 SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST); 6433 return; 6434 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 6435 VT == MVT::v2f32) { 6436 SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST); 6437 return; 6438 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 6439 VT == MVT::v1f64) { 6440 SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST); 6441 return; 6442 } 6443 break; 6444 } 6445 case AArch64ISD::ST2post: { 6446 VT = Node->getOperand(1).getValueType(); 6447 if (VT == MVT::v8i8) { 6448 SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST); 6449 return; 6450 } else if (VT == MVT::v16i8) { 6451 SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST); 6452 return; 6453 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 6454 SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST); 6455 return; 6456 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 6457 SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST); 6458 return; 6459 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 6460 SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST); 6461 return; 6462 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 6463 SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST); 6464 return; 6465 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 6466 SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST); 6467 return; 6468 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 6469 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST); 6470 return; 6471 } 6472 break; 6473 } 6474 case AArch64ISD::ST3post: { 6475 VT = Node->getOperand(1).getValueType(); 6476 if (VT == MVT::v8i8) { 6477 SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST); 6478 return; 6479 } else if (VT == MVT::v16i8) { 6480 SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST); 6481 return; 6482 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 6483 SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST); 6484 return; 6485 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 6486 SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST); 6487 return; 6488 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 6489 SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST); 6490 return; 6491 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 6492 SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST); 6493 return; 6494 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 6495 SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST); 6496 return; 6497 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 6498 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST); 6499 return; 6500 } 6501 break; 6502 } 6503 case AArch64ISD::ST4post: { 6504 VT = Node->getOperand(1).getValueType(); 6505 if (VT == MVT::v8i8) { 6506 SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST); 6507 return; 6508 } else if (VT == MVT::v16i8) { 6509 SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST); 6510 return; 6511 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 6512 SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST); 6513 return; 6514 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 6515 SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST); 6516 return; 6517 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 6518 SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST); 6519 return; 6520 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 6521 SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST); 6522 return; 6523 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 6524 SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST); 6525 return; 6526 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 6527 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST); 6528 return; 6529 } 6530 break; 6531 } 6532 case AArch64ISD::ST1x2post: { 6533 VT = Node->getOperand(1).getValueType(); 6534 if (VT == MVT::v8i8) { 6535 SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST); 6536 return; 6537 } else if (VT == MVT::v16i8) { 6538 SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST); 6539 return; 6540 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 6541 SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST); 6542 return; 6543 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 6544 SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST); 6545 return; 6546 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 6547 SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST); 6548 return; 6549 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 6550 SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST); 6551 return; 6552 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 6553 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST); 6554 return; 6555 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 6556 SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST); 6557 return; 6558 } 6559 break; 6560 } 6561 case AArch64ISD::ST1x3post: { 6562 VT = Node->getOperand(1).getValueType(); 6563 if (VT == MVT::v8i8) { 6564 SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST); 6565 return; 6566 } else if (VT == MVT::v16i8) { 6567 SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST); 6568 return; 6569 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 6570 SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST); 6571 return; 6572 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16 ) { 6573 SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST); 6574 return; 6575 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 6576 SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST); 6577 return; 6578 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 6579 SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST); 6580 return; 6581 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 6582 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST); 6583 return; 6584 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 6585 SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST); 6586 return; 6587 } 6588 break; 6589 } 6590 case AArch64ISD::ST1x4post: { 6591 VT = Node->getOperand(1).getValueType(); 6592 if (VT == MVT::v8i8) { 6593 SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST); 6594 return; 6595 } else if (VT == MVT::v16i8) { 6596 SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST); 6597 return; 6598 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 6599 SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST); 6600 return; 6601 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 6602 SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST); 6603 return; 6604 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 6605 SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST); 6606 return; 6607 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 6608 SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST); 6609 return; 6610 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 6611 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST); 6612 return; 6613 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 6614 SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST); 6615 return; 6616 } 6617 break; 6618 } 6619 case AArch64ISD::ST2LANEpost: { 6620 VT = Node->getOperand(1).getValueType(); 6621 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 6622 SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST); 6623 return; 6624 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 6625 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 6626 SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST); 6627 return; 6628 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 6629 VT == MVT::v2f32) { 6630 SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST); 6631 return; 6632 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 6633 VT == MVT::v1f64) { 6634 SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST); 6635 return; 6636 } 6637 break; 6638 } 6639 case AArch64ISD::ST3LANEpost: { 6640 VT = Node->getOperand(1).getValueType(); 6641 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 6642 SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST); 6643 return; 6644 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 6645 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 6646 SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST); 6647 return; 6648 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 6649 VT == MVT::v2f32) { 6650 SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST); 6651 return; 6652 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 6653 VT == MVT::v1f64) { 6654 SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST); 6655 return; 6656 } 6657 break; 6658 } 6659 case AArch64ISD::ST4LANEpost: { 6660 VT = Node->getOperand(1).getValueType(); 6661 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 6662 SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST); 6663 return; 6664 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 6665 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 6666 SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST); 6667 return; 6668 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 6669 VT == MVT::v2f32) { 6670 SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST); 6671 return; 6672 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 6673 VT == MVT::v1f64) { 6674 SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST); 6675 return; 6676 } 6677 break; 6678 } 6679 case AArch64ISD::SVE_LD2_MERGE_ZERO: { 6680 if (VT == MVT::nxv16i8) { 6681 SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B); 6682 return; 6683 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 6684 VT == MVT::nxv8bf16) { 6685 SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H); 6686 return; 6687 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 6688 SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W); 6689 return; 6690 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 6691 SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D); 6692 return; 6693 } 6694 break; 6695 } 6696 case AArch64ISD::SVE_LD3_MERGE_ZERO: { 6697 if (VT == MVT::nxv16i8) { 6698 SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B); 6699 return; 6700 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 6701 VT == MVT::nxv8bf16) { 6702 SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H); 6703 return; 6704 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 6705 SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W); 6706 return; 6707 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 6708 SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D); 6709 return; 6710 } 6711 break; 6712 } 6713 case AArch64ISD::SVE_LD4_MERGE_ZERO: { 6714 if (VT == MVT::nxv16i8) { 6715 SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B); 6716 return; 6717 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 6718 VT == MVT::nxv8bf16) { 6719 SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H); 6720 return; 6721 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 6722 SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W); 6723 return; 6724 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 6725 SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D); 6726 return; 6727 } 6728 break; 6729 } 6730 } 6731 6732 // Select the default instruction 6733 SelectCode(Node); 6734 } 6735 6736 /// createAArch64ISelDag - This pass converts a legalized DAG into a 6737 /// AArch64-specific DAG, ready for instruction scheduling. 6738 FunctionPass *llvm::createAArch64ISelDag(AArch64TargetMachine &TM, 6739 CodeGenOptLevel OptLevel) { 6740 return new AArch64DAGToDAGISel(TM, OptLevel); 6741 } 6742 6743 /// When \p PredVT is a scalable vector predicate in the form 6744 /// MVT::nx<M>xi1, it builds the correspondent scalable vector of 6745 /// integers MVT::nx<M>xi<bits> s.t. M x bits = 128. When targeting 6746 /// structured vectors (NumVec >1), the output data type is 6747 /// MVT::nx<M*NumVec>xi<bits> s.t. M x bits = 128. If the input 6748 /// PredVT is not in the form MVT::nx<M>xi1, it returns an invalid 6749 /// EVT. 6750 static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT, 6751 unsigned NumVec) { 6752 assert(NumVec > 0 && NumVec < 5 && "Invalid number of vectors."); 6753 if (!PredVT.isScalableVector() || PredVT.getVectorElementType() != MVT::i1) 6754 return EVT(); 6755 6756 if (PredVT != MVT::nxv16i1 && PredVT != MVT::nxv8i1 && 6757 PredVT != MVT::nxv4i1 && PredVT != MVT::nxv2i1) 6758 return EVT(); 6759 6760 ElementCount EC = PredVT.getVectorElementCount(); 6761 EVT ScalarVT = 6762 EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.getKnownMinValue()); 6763 EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, EC * NumVec); 6764 6765 return MemVT; 6766 } 6767 6768 /// Return the EVT of the data associated to a memory operation in \p 6769 /// Root. If such EVT cannot be retrived, it returns an invalid EVT. 6770 static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root) { 6771 if (isa<MemSDNode>(Root)) 6772 return cast<MemSDNode>(Root)->getMemoryVT(); 6773 6774 if (isa<MemIntrinsicSDNode>(Root)) 6775 return cast<MemIntrinsicSDNode>(Root)->getMemoryVT(); 6776 6777 const unsigned Opcode = Root->getOpcode(); 6778 // For custom ISD nodes, we have to look at them individually to extract the 6779 // type of the data moved to/from memory. 6780 switch (Opcode) { 6781 case AArch64ISD::LD1_MERGE_ZERO: 6782 case AArch64ISD::LD1S_MERGE_ZERO: 6783 case AArch64ISD::LDNF1_MERGE_ZERO: 6784 case AArch64ISD::LDNF1S_MERGE_ZERO: 6785 return cast<VTSDNode>(Root->getOperand(3))->getVT(); 6786 case AArch64ISD::ST1_PRED: 6787 return cast<VTSDNode>(Root->getOperand(4))->getVT(); 6788 case AArch64ISD::SVE_LD2_MERGE_ZERO: 6789 return getPackedVectorTypeFromPredicateType( 6790 Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/2); 6791 case AArch64ISD::SVE_LD3_MERGE_ZERO: 6792 return getPackedVectorTypeFromPredicateType( 6793 Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/3); 6794 case AArch64ISD::SVE_LD4_MERGE_ZERO: 6795 return getPackedVectorTypeFromPredicateType( 6796 Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/4); 6797 default: 6798 break; 6799 } 6800 6801 if (Opcode != ISD::INTRINSIC_VOID && Opcode != ISD::INTRINSIC_W_CHAIN) 6802 return EVT(); 6803 6804 switch (Root->getConstantOperandVal(1)) { 6805 default: 6806 return EVT(); 6807 case Intrinsic::aarch64_sme_ldr: 6808 case Intrinsic::aarch64_sme_str: 6809 return MVT::nxv16i8; 6810 case Intrinsic::aarch64_sve_prf: 6811 // We are using an SVE prefetch intrinsic. Type must be inferred from the 6812 // width of the predicate. 6813 return getPackedVectorTypeFromPredicateType( 6814 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/1); 6815 case Intrinsic::aarch64_sve_ld2_sret: 6816 case Intrinsic::aarch64_sve_ld2q_sret: 6817 return getPackedVectorTypeFromPredicateType( 6818 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/2); 6819 case Intrinsic::aarch64_sve_st2q: 6820 return getPackedVectorTypeFromPredicateType( 6821 Ctx, Root->getOperand(4)->getValueType(0), /*NumVec=*/2); 6822 case Intrinsic::aarch64_sve_ld3_sret: 6823 case Intrinsic::aarch64_sve_ld3q_sret: 6824 return getPackedVectorTypeFromPredicateType( 6825 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/3); 6826 case Intrinsic::aarch64_sve_st3q: 6827 return getPackedVectorTypeFromPredicateType( 6828 Ctx, Root->getOperand(5)->getValueType(0), /*NumVec=*/3); 6829 case Intrinsic::aarch64_sve_ld4_sret: 6830 case Intrinsic::aarch64_sve_ld4q_sret: 6831 return getPackedVectorTypeFromPredicateType( 6832 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/4); 6833 case Intrinsic::aarch64_sve_st4q: 6834 return getPackedVectorTypeFromPredicateType( 6835 Ctx, Root->getOperand(6)->getValueType(0), /*NumVec=*/4); 6836 case Intrinsic::aarch64_sve_ld1udq: 6837 case Intrinsic::aarch64_sve_st1udq: 6838 return EVT(MVT::nxv1i64); 6839 case Intrinsic::aarch64_sve_ld1uwq: 6840 case Intrinsic::aarch64_sve_st1uwq: 6841 return EVT(MVT::nxv1i32); 6842 } 6843 } 6844 6845 /// SelectAddrModeIndexedSVE - Attempt selection of the addressing mode: 6846 /// Base + OffImm * sizeof(MemVT) for Min >= OffImm <= Max 6847 /// where Root is the memory access using N for its address. 6848 template <int64_t Min, int64_t Max> 6849 bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, 6850 SDValue &Base, 6851 SDValue &OffImm) { 6852 const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root); 6853 const DataLayout &DL = CurDAG->getDataLayout(); 6854 const MachineFrameInfo &MFI = MF->getFrameInfo(); 6855 6856 if (N.getOpcode() == ISD::FrameIndex) { 6857 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 6858 // We can only encode VL scaled offsets, so only fold in frame indexes 6859 // referencing SVE objects. 6860 if (MFI.getStackID(FI) == TargetStackID::ScalableVector) { 6861 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 6862 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64); 6863 return true; 6864 } 6865 6866 return false; 6867 } 6868 6869 if (MemVT == EVT()) 6870 return false; 6871 6872 if (N.getOpcode() != ISD::ADD) 6873 return false; 6874 6875 SDValue VScale = N.getOperand(1); 6876 if (VScale.getOpcode() != ISD::VSCALE) 6877 return false; 6878 6879 TypeSize TS = MemVT.getSizeInBits(); 6880 int64_t MemWidthBytes = static_cast<int64_t>(TS.getKnownMinValue()) / 8; 6881 int64_t MulImm = cast<ConstantSDNode>(VScale.getOperand(0))->getSExtValue(); 6882 6883 if ((MulImm % MemWidthBytes) != 0) 6884 return false; 6885 6886 int64_t Offset = MulImm / MemWidthBytes; 6887 if (Offset < Min || Offset > Max) 6888 return false; 6889 6890 Base = N.getOperand(0); 6891 if (Base.getOpcode() == ISD::FrameIndex) { 6892 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 6893 // We can only encode VL scaled offsets, so only fold in frame indexes 6894 // referencing SVE objects. 6895 if (MFI.getStackID(FI) == TargetStackID::ScalableVector) 6896 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 6897 } 6898 6899 OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64); 6900 return true; 6901 } 6902 6903 /// Select register plus register addressing mode for SVE, with scaled 6904 /// offset. 6905 bool AArch64DAGToDAGISel::SelectSVERegRegAddrMode(SDValue N, unsigned Scale, 6906 SDValue &Base, 6907 SDValue &Offset) { 6908 if (N.getOpcode() != ISD::ADD) 6909 return false; 6910 6911 // Process an ADD node. 6912 const SDValue LHS = N.getOperand(0); 6913 const SDValue RHS = N.getOperand(1); 6914 6915 // 8 bit data does not come with the SHL node, so it is treated 6916 // separately. 6917 if (Scale == 0) { 6918 Base = LHS; 6919 Offset = RHS; 6920 return true; 6921 } 6922 6923 if (auto C = dyn_cast<ConstantSDNode>(RHS)) { 6924 int64_t ImmOff = C->getSExtValue(); 6925 unsigned Size = 1 << Scale; 6926 6927 // To use the reg+reg addressing mode, the immediate must be a multiple of 6928 // the vector element's byte size. 6929 if (ImmOff % Size) 6930 return false; 6931 6932 SDLoc DL(N); 6933 Base = LHS; 6934 Offset = CurDAG->getTargetConstant(ImmOff >> Scale, DL, MVT::i64); 6935 SDValue Ops[] = {Offset}; 6936 SDNode *MI = CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops); 6937 Offset = SDValue(MI, 0); 6938 return true; 6939 } 6940 6941 // Check if the RHS is a shift node with a constant. 6942 if (RHS.getOpcode() != ISD::SHL) 6943 return false; 6944 6945 const SDValue ShiftRHS = RHS.getOperand(1); 6946 if (auto *C = dyn_cast<ConstantSDNode>(ShiftRHS)) 6947 if (C->getZExtValue() == Scale) { 6948 Base = LHS; 6949 Offset = RHS.getOperand(0); 6950 return true; 6951 } 6952 6953 return false; 6954 } 6955 6956 bool AArch64DAGToDAGISel::SelectAllActivePredicate(SDValue N) { 6957 const AArch64TargetLowering *TLI = 6958 static_cast<const AArch64TargetLowering *>(getTargetLowering()); 6959 6960 return TLI->isAllActivePredicate(*CurDAG, N); 6961 } 6962 6963 bool AArch64DAGToDAGISel::SelectAnyPredicate(SDValue N) { 6964 EVT VT = N.getValueType(); 6965 return VT.isScalableVector() && VT.getVectorElementType() == MVT::i1; 6966 } 6967 6968 bool AArch64DAGToDAGISel::SelectSMETileSlice(SDValue N, unsigned MaxSize, 6969 SDValue &Base, SDValue &Offset, 6970 unsigned Scale) { 6971 // Try to untangle an ADD node into a 'reg + offset' 6972 if (N.getOpcode() == ISD::ADD) 6973 if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 6974 int64_t ImmOff = C->getSExtValue(); 6975 if ((ImmOff > 0 && ImmOff <= MaxSize && (ImmOff % Scale == 0))) { 6976 Base = N.getOperand(0); 6977 Offset = CurDAG->getTargetConstant(ImmOff / Scale, SDLoc(N), MVT::i64); 6978 return true; 6979 } 6980 } 6981 6982 // By default, just match reg + 0. 6983 Base = N; 6984 Offset = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64); 6985 return true; 6986 } 6987