1 //===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines an instruction selector for the AArch64 target. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AArch64MachineFunctionInfo.h" 14 #include "AArch64TargetMachine.h" 15 #include "MCTargetDesc/AArch64AddressingModes.h" 16 #include "llvm/ADT/APSInt.h" 17 #include "llvm/CodeGen/ISDOpcodes.h" 18 #include "llvm/CodeGen/SelectionDAGISel.h" 19 #include "llvm/IR/Function.h" // To access function attributes. 20 #include "llvm/IR/GlobalValue.h" 21 #include "llvm/IR/Intrinsics.h" 22 #include "llvm/IR/IntrinsicsAArch64.h" 23 #include "llvm/Support/Debug.h" 24 #include "llvm/Support/ErrorHandling.h" 25 #include "llvm/Support/KnownBits.h" 26 #include "llvm/Support/MathExtras.h" 27 #include "llvm/Support/raw_ostream.h" 28 29 using namespace llvm; 30 31 #define DEBUG_TYPE "aarch64-isel" 32 #define PASS_NAME "AArch64 Instruction Selection" 33 34 //===--------------------------------------------------------------------===// 35 /// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine 36 /// instructions for SelectionDAG operations. 37 /// 38 namespace { 39 40 class AArch64DAGToDAGISel : public SelectionDAGISel { 41 42 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can 43 /// make the right decision when generating code for different targets. 44 const AArch64Subtarget *Subtarget; 45 46 public: 47 static char ID; 48 49 AArch64DAGToDAGISel() = delete; 50 51 explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm, 52 CodeGenOptLevel OptLevel) 53 : SelectionDAGISel(ID, tm, OptLevel), Subtarget(nullptr) {} 54 55 bool runOnMachineFunction(MachineFunction &MF) override { 56 Subtarget = &MF.getSubtarget<AArch64Subtarget>(); 57 return SelectionDAGISel::runOnMachineFunction(MF); 58 } 59 60 void Select(SDNode *Node) override; 61 62 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 63 /// inline asm expressions. 64 bool SelectInlineAsmMemoryOperand(const SDValue &Op, 65 InlineAsm::ConstraintCode ConstraintID, 66 std::vector<SDValue> &OutOps) override; 67 68 template <signed Low, signed High, signed Scale> 69 bool SelectRDVLImm(SDValue N, SDValue &Imm); 70 71 bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift); 72 bool SelectArithUXTXRegister(SDValue N, SDValue &Reg, SDValue &Shift); 73 bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift); 74 bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift); 75 bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) { 76 return SelectShiftedRegister(N, false, Reg, Shift); 77 } 78 bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) { 79 return SelectShiftedRegister(N, true, Reg, Shift); 80 } 81 bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) { 82 return SelectAddrModeIndexed7S(N, 1, Base, OffImm); 83 } 84 bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) { 85 return SelectAddrModeIndexed7S(N, 2, Base, OffImm); 86 } 87 bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) { 88 return SelectAddrModeIndexed7S(N, 4, Base, OffImm); 89 } 90 bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) { 91 return SelectAddrModeIndexed7S(N, 8, Base, OffImm); 92 } 93 bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) { 94 return SelectAddrModeIndexed7S(N, 16, Base, OffImm); 95 } 96 bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) { 97 return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm); 98 } 99 bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) { 100 return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm); 101 } 102 bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) { 103 return SelectAddrModeIndexed(N, 1, Base, OffImm); 104 } 105 bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) { 106 return SelectAddrModeIndexed(N, 2, Base, OffImm); 107 } 108 bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) { 109 return SelectAddrModeIndexed(N, 4, Base, OffImm); 110 } 111 bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) { 112 return SelectAddrModeIndexed(N, 8, Base, OffImm); 113 } 114 bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) { 115 return SelectAddrModeIndexed(N, 16, Base, OffImm); 116 } 117 bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) { 118 return SelectAddrModeUnscaled(N, 1, Base, OffImm); 119 } 120 bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) { 121 return SelectAddrModeUnscaled(N, 2, Base, OffImm); 122 } 123 bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) { 124 return SelectAddrModeUnscaled(N, 4, Base, OffImm); 125 } 126 bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) { 127 return SelectAddrModeUnscaled(N, 8, Base, OffImm); 128 } 129 bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) { 130 return SelectAddrModeUnscaled(N, 16, Base, OffImm); 131 } 132 template <unsigned Size, unsigned Max> 133 bool SelectAddrModeIndexedUImm(SDValue N, SDValue &Base, SDValue &OffImm) { 134 // Test if there is an appropriate addressing mode and check if the 135 // immediate fits. 136 bool Found = SelectAddrModeIndexed(N, Size, Base, OffImm); 137 if (Found) { 138 if (auto *CI = dyn_cast<ConstantSDNode>(OffImm)) { 139 int64_t C = CI->getSExtValue(); 140 if (C <= Max) 141 return true; 142 } 143 } 144 145 // Otherwise, base only, materialize address in register. 146 Base = N; 147 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64); 148 return true; 149 } 150 151 template<int Width> 152 bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset, 153 SDValue &SignExtend, SDValue &DoShift) { 154 return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift); 155 } 156 157 template<int Width> 158 bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset, 159 SDValue &SignExtend, SDValue &DoShift) { 160 return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift); 161 } 162 163 bool SelectExtractHigh(SDValue N, SDValue &Res) { 164 if (Subtarget->isLittleEndian() && N->getOpcode() == ISD::BITCAST) 165 N = N->getOperand(0); 166 if (N->getOpcode() != ISD::EXTRACT_SUBVECTOR || 167 !isa<ConstantSDNode>(N->getOperand(1))) 168 return false; 169 EVT VT = N->getValueType(0); 170 EVT LVT = N->getOperand(0).getValueType(); 171 unsigned Index = N->getConstantOperandVal(1); 172 if (!VT.is64BitVector() || !LVT.is128BitVector() || 173 Index != VT.getVectorNumElements()) 174 return false; 175 Res = N->getOperand(0); 176 return true; 177 } 178 179 bool SelectRoundingVLShr(SDValue N, SDValue &Res1, SDValue &Res2) { 180 if (N.getOpcode() != AArch64ISD::VLSHR) 181 return false; 182 SDValue Op = N->getOperand(0); 183 EVT VT = Op.getValueType(); 184 unsigned ShtAmt = N->getConstantOperandVal(1); 185 if (ShtAmt > VT.getScalarSizeInBits() / 2 || Op.getOpcode() != ISD::ADD) 186 return false; 187 188 APInt Imm; 189 if (Op.getOperand(1).getOpcode() == AArch64ISD::MOVIshift) 190 Imm = APInt(VT.getScalarSizeInBits(), 191 Op.getOperand(1).getConstantOperandVal(0) 192 << Op.getOperand(1).getConstantOperandVal(1)); 193 else if (Op.getOperand(1).getOpcode() == AArch64ISD::DUP && 194 isa<ConstantSDNode>(Op.getOperand(1).getOperand(0))) 195 Imm = APInt(VT.getScalarSizeInBits(), 196 Op.getOperand(1).getConstantOperandVal(0)); 197 else 198 return false; 199 200 if (Imm != 1ULL << (ShtAmt - 1)) 201 return false; 202 203 Res1 = Op.getOperand(0); 204 Res2 = CurDAG->getTargetConstant(ShtAmt, SDLoc(N), MVT::i32); 205 return true; 206 } 207 208 bool SelectDupZeroOrUndef(SDValue N) { 209 switch(N->getOpcode()) { 210 case ISD::UNDEF: 211 return true; 212 case AArch64ISD::DUP: 213 case ISD::SPLAT_VECTOR: { 214 auto Opnd0 = N->getOperand(0); 215 if (isNullConstant(Opnd0)) 216 return true; 217 if (isNullFPConstant(Opnd0)) 218 return true; 219 break; 220 } 221 default: 222 break; 223 } 224 225 return false; 226 } 227 228 bool SelectDupZero(SDValue N) { 229 switch(N->getOpcode()) { 230 case AArch64ISD::DUP: 231 case ISD::SPLAT_VECTOR: { 232 auto Opnd0 = N->getOperand(0); 233 if (isNullConstant(Opnd0)) 234 return true; 235 if (isNullFPConstant(Opnd0)) 236 return true; 237 break; 238 } 239 } 240 241 return false; 242 } 243 244 bool SelectDupNegativeZero(SDValue N) { 245 switch(N->getOpcode()) { 246 case AArch64ISD::DUP: 247 case ISD::SPLAT_VECTOR: { 248 ConstantFPSDNode *Const = dyn_cast<ConstantFPSDNode>(N->getOperand(0)); 249 return Const && Const->isZero() && Const->isNegative(); 250 } 251 } 252 253 return false; 254 } 255 256 template<MVT::SimpleValueType VT> 257 bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) { 258 return SelectSVEAddSubImm(N, VT, Imm, Shift); 259 } 260 261 template <MVT::SimpleValueType VT> 262 bool SelectSVECpyDupImm(SDValue N, SDValue &Imm, SDValue &Shift) { 263 return SelectSVECpyDupImm(N, VT, Imm, Shift); 264 } 265 266 template <MVT::SimpleValueType VT, bool Invert = false> 267 bool SelectSVELogicalImm(SDValue N, SDValue &Imm) { 268 return SelectSVELogicalImm(N, VT, Imm, Invert); 269 } 270 271 template <MVT::SimpleValueType VT> 272 bool SelectSVEArithImm(SDValue N, SDValue &Imm) { 273 return SelectSVEArithImm(N, VT, Imm); 274 } 275 276 template <unsigned Low, unsigned High, bool AllowSaturation = false> 277 bool SelectSVEShiftImm(SDValue N, SDValue &Imm) { 278 return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm); 279 } 280 281 bool SelectSVEShiftSplatImmR(SDValue N, SDValue &Imm) { 282 if (N->getOpcode() != ISD::SPLAT_VECTOR) 283 return false; 284 285 EVT EltVT = N->getValueType(0).getVectorElementType(); 286 return SelectSVEShiftImm(N->getOperand(0), /* Low */ 1, 287 /* High */ EltVT.getFixedSizeInBits(), 288 /* AllowSaturation */ true, Imm); 289 } 290 291 // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N. 292 template<signed Min, signed Max, signed Scale, bool Shift> 293 bool SelectCntImm(SDValue N, SDValue &Imm) { 294 if (!isa<ConstantSDNode>(N)) 295 return false; 296 297 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue(); 298 if (Shift) 299 MulImm = 1LL << MulImm; 300 301 if ((MulImm % std::abs(Scale)) != 0) 302 return false; 303 304 MulImm /= Scale; 305 if ((MulImm >= Min) && (MulImm <= Max)) { 306 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32); 307 return true; 308 } 309 310 return false; 311 } 312 313 template <signed Max, signed Scale> 314 bool SelectEXTImm(SDValue N, SDValue &Imm) { 315 if (!isa<ConstantSDNode>(N)) 316 return false; 317 318 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue(); 319 320 if (MulImm >= 0 && MulImm <= Max) { 321 MulImm *= Scale; 322 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32); 323 return true; 324 } 325 326 return false; 327 } 328 329 template <unsigned BaseReg, unsigned Max> 330 bool ImmToReg(SDValue N, SDValue &Imm) { 331 if (auto *CI = dyn_cast<ConstantSDNode>(N)) { 332 uint64_t C = CI->getZExtValue(); 333 334 if (C > Max) 335 return false; 336 337 Imm = CurDAG->getRegister(BaseReg + C, MVT::Other); 338 return true; 339 } 340 return false; 341 } 342 343 /// Form sequences of consecutive 64/128-bit registers for use in NEON 344 /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have 345 /// between 1 and 4 elements. If it contains a single element that is returned 346 /// unchanged; otherwise a REG_SEQUENCE value is returned. 347 SDValue createDTuple(ArrayRef<SDValue> Vecs); 348 SDValue createQTuple(ArrayRef<SDValue> Vecs); 349 // Form a sequence of SVE registers for instructions using list of vectors, 350 // e.g. structured loads and stores (ldN, stN). 351 SDValue createZTuple(ArrayRef<SDValue> Vecs); 352 353 // Similar to above, except the register must start at a multiple of the 354 // tuple, e.g. z2 for a 2-tuple, or z8 for a 4-tuple. 355 SDValue createZMulTuple(ArrayRef<SDValue> Regs); 356 357 /// Generic helper for the createDTuple/createQTuple 358 /// functions. Those should almost always be called instead. 359 SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[], 360 const unsigned SubRegs[]); 361 362 void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt); 363 364 bool tryIndexedLoad(SDNode *N); 365 366 bool trySelectStackSlotTagP(SDNode *N); 367 void SelectTagP(SDNode *N); 368 369 void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc, 370 unsigned SubRegIdx); 371 void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc, 372 unsigned SubRegIdx); 373 void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc); 374 void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc); 375 void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale, 376 unsigned Opc_rr, unsigned Opc_ri, 377 bool IsIntr = false); 378 void SelectContiguousMultiVectorLoad(SDNode *N, unsigned NumVecs, 379 unsigned Scale, unsigned Opc_ri, 380 unsigned Opc_rr); 381 void SelectDestructiveMultiIntrinsic(SDNode *N, unsigned NumVecs, 382 bool IsZmMulti, unsigned Opcode, 383 bool HasPred = false); 384 void SelectPExtPair(SDNode *N, unsigned Opc); 385 void SelectWhilePair(SDNode *N, unsigned Opc); 386 void SelectCVTIntrinsic(SDNode *N, unsigned NumVecs, unsigned Opcode); 387 void SelectClamp(SDNode *N, unsigned NumVecs, unsigned Opcode); 388 void SelectUnaryMultiIntrinsic(SDNode *N, unsigned NumOutVecs, 389 bool IsTupleInput, unsigned Opc); 390 void SelectFrintFromVT(SDNode *N, unsigned NumVecs, unsigned Opcode); 391 392 template <unsigned MaxIdx, unsigned Scale> 393 void SelectMultiVectorMove(SDNode *N, unsigned NumVecs, unsigned BaseReg, 394 unsigned Op); 395 396 bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm); 397 /// SVE Reg+Imm addressing mode. 398 template <int64_t Min, int64_t Max> 399 bool SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base, 400 SDValue &OffImm); 401 /// SVE Reg+Reg address mode. 402 template <unsigned Scale> 403 bool SelectSVERegRegAddrMode(SDValue N, SDValue &Base, SDValue &Offset) { 404 return SelectSVERegRegAddrMode(N, Scale, Base, Offset); 405 } 406 407 void SelectMultiVectorLuti(SDNode *Node, unsigned NumOutVecs, unsigned Opc, 408 uint32_t MaxImm); 409 410 template <unsigned MaxIdx, unsigned Scale> 411 bool SelectSMETileSlice(SDValue N, SDValue &Vector, SDValue &Offset) { 412 return SelectSMETileSlice(N, MaxIdx, Vector, Offset, Scale); 413 } 414 415 void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc); 416 void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc); 417 void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc); 418 void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc); 419 void SelectPredicatedStore(SDNode *N, unsigned NumVecs, unsigned Scale, 420 unsigned Opc_rr, unsigned Opc_ri); 421 std::tuple<unsigned, SDValue, SDValue> 422 findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri, 423 const SDValue &OldBase, const SDValue &OldOffset, 424 unsigned Scale); 425 426 bool tryBitfieldExtractOp(SDNode *N); 427 bool tryBitfieldExtractOpFromSExt(SDNode *N); 428 bool tryBitfieldInsertOp(SDNode *N); 429 bool tryBitfieldInsertInZeroOp(SDNode *N); 430 bool tryShiftAmountMod(SDNode *N); 431 432 bool tryReadRegister(SDNode *N); 433 bool tryWriteRegister(SDNode *N); 434 435 bool trySelectCastFixedLengthToScalableVector(SDNode *N); 436 bool trySelectCastScalableToFixedLengthVector(SDNode *N); 437 438 bool trySelectXAR(SDNode *N); 439 440 // Include the pieces autogenerated from the target description. 441 #include "AArch64GenDAGISel.inc" 442 443 private: 444 bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg, 445 SDValue &Shift); 446 bool SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg, SDValue &Shift); 447 bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base, 448 SDValue &OffImm) { 449 return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm); 450 } 451 bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW, 452 unsigned Size, SDValue &Base, 453 SDValue &OffImm); 454 bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base, 455 SDValue &OffImm); 456 bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base, 457 SDValue &OffImm); 458 bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base, 459 SDValue &Offset, SDValue &SignExtend, 460 SDValue &DoShift); 461 bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base, 462 SDValue &Offset, SDValue &SignExtend, 463 SDValue &DoShift); 464 bool isWorthFoldingALU(SDValue V, bool LSL = false) const; 465 bool isWorthFoldingAddr(SDValue V) const; 466 bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend, 467 SDValue &Offset, SDValue &SignExtend); 468 469 template<unsigned RegWidth> 470 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) { 471 return SelectCVTFixedPosOperand(N, FixedPos, RegWidth); 472 } 473 474 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width); 475 476 template<unsigned RegWidth> 477 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos) { 478 return SelectCVTFixedPosRecipOperand(N, FixedPos, RegWidth); 479 } 480 481 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos, 482 unsigned Width); 483 484 bool SelectCMP_SWAP(SDNode *N); 485 486 bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift); 487 bool SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift); 488 bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert); 489 490 bool SelectSVESignedArithImm(SDValue N, SDValue &Imm); 491 bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High, 492 bool AllowSaturation, SDValue &Imm); 493 494 bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm); 495 bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base, 496 SDValue &Offset); 497 bool SelectSMETileSlice(SDValue N, unsigned MaxSize, SDValue &Vector, 498 SDValue &Offset, unsigned Scale = 1); 499 500 bool SelectAllActivePredicate(SDValue N); 501 bool SelectAnyPredicate(SDValue N); 502 }; 503 } // end anonymous namespace 504 505 char AArch64DAGToDAGISel::ID = 0; 506 507 INITIALIZE_PASS(AArch64DAGToDAGISel, DEBUG_TYPE, PASS_NAME, false, false) 508 509 /// isIntImmediate - This method tests to see if the node is a constant 510 /// operand. If so Imm will receive the 32-bit value. 511 static bool isIntImmediate(const SDNode *N, uint64_t &Imm) { 512 if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) { 513 Imm = C->getZExtValue(); 514 return true; 515 } 516 return false; 517 } 518 519 // isIntImmediate - This method tests to see if a constant operand. 520 // If so Imm will receive the value. 521 static bool isIntImmediate(SDValue N, uint64_t &Imm) { 522 return isIntImmediate(N.getNode(), Imm); 523 } 524 525 // isOpcWithIntImmediate - This method tests to see if the node is a specific 526 // opcode and that it has a immediate integer right operand. 527 // If so Imm will receive the 32 bit value. 528 static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, 529 uint64_t &Imm) { 530 return N->getOpcode() == Opc && 531 isIntImmediate(N->getOperand(1).getNode(), Imm); 532 } 533 534 // isIntImmediateEq - This method tests to see if N is a constant operand that 535 // is equivalent to 'ImmExpected'. 536 #ifndef NDEBUG 537 static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected) { 538 uint64_t Imm; 539 if (!isIntImmediate(N.getNode(), Imm)) 540 return false; 541 return Imm == ImmExpected; 542 } 543 #endif 544 545 bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand( 546 const SDValue &Op, const InlineAsm::ConstraintCode ConstraintID, 547 std::vector<SDValue> &OutOps) { 548 switch(ConstraintID) { 549 default: 550 llvm_unreachable("Unexpected asm memory constraint"); 551 case InlineAsm::ConstraintCode::m: 552 case InlineAsm::ConstraintCode::o: 553 case InlineAsm::ConstraintCode::Q: 554 // We need to make sure that this one operand does not end up in XZR, thus 555 // require the address to be in a PointerRegClass register. 556 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo(); 557 const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF); 558 SDLoc dl(Op); 559 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64); 560 SDValue NewOp = 561 SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, 562 dl, Op.getValueType(), 563 Op, RC), 0); 564 OutOps.push_back(NewOp); 565 return false; 566 } 567 return true; 568 } 569 570 /// SelectArithImmed - Select an immediate value that can be represented as 571 /// a 12-bit value shifted left by either 0 or 12. If so, return true with 572 /// Val set to the 12-bit value and Shift set to the shifter operand. 573 bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val, 574 SDValue &Shift) { 575 // This function is called from the addsub_shifted_imm ComplexPattern, 576 // which lists [imm] as the list of opcode it's interested in, however 577 // we still need to check whether the operand is actually an immediate 578 // here because the ComplexPattern opcode list is only used in 579 // root-level opcode matching. 580 if (!isa<ConstantSDNode>(N.getNode())) 581 return false; 582 583 uint64_t Immed = N.getNode()->getAsZExtVal(); 584 unsigned ShiftAmt; 585 586 if (Immed >> 12 == 0) { 587 ShiftAmt = 0; 588 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) { 589 ShiftAmt = 12; 590 Immed = Immed >> 12; 591 } else 592 return false; 593 594 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt); 595 SDLoc dl(N); 596 Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32); 597 Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32); 598 return true; 599 } 600 601 /// SelectNegArithImmed - As above, but negates the value before trying to 602 /// select it. 603 bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val, 604 SDValue &Shift) { 605 // This function is called from the addsub_shifted_imm ComplexPattern, 606 // which lists [imm] as the list of opcode it's interested in, however 607 // we still need to check whether the operand is actually an immediate 608 // here because the ComplexPattern opcode list is only used in 609 // root-level opcode matching. 610 if (!isa<ConstantSDNode>(N.getNode())) 611 return false; 612 613 // The immediate operand must be a 24-bit zero-extended immediate. 614 uint64_t Immed = N.getNode()->getAsZExtVal(); 615 616 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0" 617 // have the opposite effect on the C flag, so this pattern mustn't match under 618 // those circumstances. 619 if (Immed == 0) 620 return false; 621 622 if (N.getValueType() == MVT::i32) 623 Immed = ~((uint32_t)Immed) + 1; 624 else 625 Immed = ~Immed + 1ULL; 626 if (Immed & 0xFFFFFFFFFF000000ULL) 627 return false; 628 629 Immed &= 0xFFFFFFULL; 630 return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val, 631 Shift); 632 } 633 634 /// getShiftTypeForNode - Translate a shift node to the corresponding 635 /// ShiftType value. 636 static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) { 637 switch (N.getOpcode()) { 638 default: 639 return AArch64_AM::InvalidShiftExtend; 640 case ISD::SHL: 641 return AArch64_AM::LSL; 642 case ISD::SRL: 643 return AArch64_AM::LSR; 644 case ISD::SRA: 645 return AArch64_AM::ASR; 646 case ISD::ROTR: 647 return AArch64_AM::ROR; 648 } 649 } 650 651 /// Determine whether it is worth it to fold SHL into the addressing 652 /// mode. 653 static bool isWorthFoldingSHL(SDValue V) { 654 assert(V.getOpcode() == ISD::SHL && "invalid opcode"); 655 // It is worth folding logical shift of up to three places. 656 auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1)); 657 if (!CSD) 658 return false; 659 unsigned ShiftVal = CSD->getZExtValue(); 660 if (ShiftVal > 3) 661 return false; 662 663 // Check if this particular node is reused in any non-memory related 664 // operation. If yes, do not try to fold this node into the address 665 // computation, since the computation will be kept. 666 const SDNode *Node = V.getNode(); 667 for (SDNode *UI : Node->uses()) 668 if (!isa<MemSDNode>(*UI)) 669 for (SDNode *UII : UI->uses()) 670 if (!isa<MemSDNode>(*UII)) 671 return false; 672 return true; 673 } 674 675 /// Determine whether it is worth to fold V into an extended register addressing 676 /// mode. 677 bool AArch64DAGToDAGISel::isWorthFoldingAddr(SDValue V) const { 678 // Trivial if we are optimizing for code size or if there is only 679 // one use of the value. 680 if (CurDAG->shouldOptForSize() || V.hasOneUse()) 681 return true; 682 // If a subtarget has a fastpath LSL we can fold a logical shift into 683 // the addressing mode and save a cycle. 684 if (Subtarget->hasAddrLSLFast() && V.getOpcode() == ISD::SHL && 685 isWorthFoldingSHL(V)) 686 return true; 687 if (Subtarget->hasAddrLSLFast() && V.getOpcode() == ISD::ADD) { 688 const SDValue LHS = V.getOperand(0); 689 const SDValue RHS = V.getOperand(1); 690 if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS)) 691 return true; 692 if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS)) 693 return true; 694 } 695 696 // It hurts otherwise, since the value will be reused. 697 return false; 698 } 699 700 /// and (shl/srl/sra, x, c), mask --> shl (srl/sra, x, c1), c2 701 /// to select more shifted register 702 bool AArch64DAGToDAGISel::SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg, 703 SDValue &Shift) { 704 EVT VT = N.getValueType(); 705 if (VT != MVT::i32 && VT != MVT::i64) 706 return false; 707 708 if (N->getOpcode() != ISD::AND || !N->hasOneUse()) 709 return false; 710 SDValue LHS = N.getOperand(0); 711 if (!LHS->hasOneUse()) 712 return false; 713 714 unsigned LHSOpcode = LHS->getOpcode(); 715 if (LHSOpcode != ISD::SHL && LHSOpcode != ISD::SRL && LHSOpcode != ISD::SRA) 716 return false; 717 718 ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(LHS.getOperand(1)); 719 if (!ShiftAmtNode) 720 return false; 721 722 uint64_t ShiftAmtC = ShiftAmtNode->getZExtValue(); 723 ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N.getOperand(1)); 724 if (!RHSC) 725 return false; 726 727 APInt AndMask = RHSC->getAPIntValue(); 728 unsigned LowZBits, MaskLen; 729 if (!AndMask.isShiftedMask(LowZBits, MaskLen)) 730 return false; 731 732 unsigned BitWidth = N.getValueSizeInBits(); 733 SDLoc DL(LHS); 734 uint64_t NewShiftC; 735 unsigned NewShiftOp; 736 if (LHSOpcode == ISD::SHL) { 737 // LowZBits <= ShiftAmtC will fall into isBitfieldPositioningOp 738 // BitWidth != LowZBits + MaskLen doesn't match the pattern 739 if (LowZBits <= ShiftAmtC || (BitWidth != LowZBits + MaskLen)) 740 return false; 741 742 NewShiftC = LowZBits - ShiftAmtC; 743 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri; 744 } else { 745 if (LowZBits == 0) 746 return false; 747 748 // NewShiftC >= BitWidth will fall into isBitfieldExtractOp 749 NewShiftC = LowZBits + ShiftAmtC; 750 if (NewShiftC >= BitWidth) 751 return false; 752 753 // SRA need all high bits 754 if (LHSOpcode == ISD::SRA && (BitWidth != (LowZBits + MaskLen))) 755 return false; 756 757 // SRL high bits can be 0 or 1 758 if (LHSOpcode == ISD::SRL && (BitWidth > (NewShiftC + MaskLen))) 759 return false; 760 761 if (LHSOpcode == ISD::SRL) 762 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri; 763 else 764 NewShiftOp = VT == MVT::i64 ? AArch64::SBFMXri : AArch64::SBFMWri; 765 } 766 767 assert(NewShiftC < BitWidth && "Invalid shift amount"); 768 SDValue NewShiftAmt = CurDAG->getTargetConstant(NewShiftC, DL, VT); 769 SDValue BitWidthMinus1 = CurDAG->getTargetConstant(BitWidth - 1, DL, VT); 770 Reg = SDValue(CurDAG->getMachineNode(NewShiftOp, DL, VT, LHS->getOperand(0), 771 NewShiftAmt, BitWidthMinus1), 772 0); 773 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, LowZBits); 774 Shift = CurDAG->getTargetConstant(ShVal, DL, MVT::i32); 775 return true; 776 } 777 778 /// getExtendTypeForNode - Translate an extend node to the corresponding 779 /// ExtendType value. 780 static AArch64_AM::ShiftExtendType 781 getExtendTypeForNode(SDValue N, bool IsLoadStore = false) { 782 if (N.getOpcode() == ISD::SIGN_EXTEND || 783 N.getOpcode() == ISD::SIGN_EXTEND_INREG) { 784 EVT SrcVT; 785 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG) 786 SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT(); 787 else 788 SrcVT = N.getOperand(0).getValueType(); 789 790 if (!IsLoadStore && SrcVT == MVT::i8) 791 return AArch64_AM::SXTB; 792 else if (!IsLoadStore && SrcVT == MVT::i16) 793 return AArch64_AM::SXTH; 794 else if (SrcVT == MVT::i32) 795 return AArch64_AM::SXTW; 796 assert(SrcVT != MVT::i64 && "extend from 64-bits?"); 797 798 return AArch64_AM::InvalidShiftExtend; 799 } else if (N.getOpcode() == ISD::ZERO_EXTEND || 800 N.getOpcode() == ISD::ANY_EXTEND) { 801 EVT SrcVT = N.getOperand(0).getValueType(); 802 if (!IsLoadStore && SrcVT == MVT::i8) 803 return AArch64_AM::UXTB; 804 else if (!IsLoadStore && SrcVT == MVT::i16) 805 return AArch64_AM::UXTH; 806 else if (SrcVT == MVT::i32) 807 return AArch64_AM::UXTW; 808 assert(SrcVT != MVT::i64 && "extend from 64-bits?"); 809 810 return AArch64_AM::InvalidShiftExtend; 811 } else if (N.getOpcode() == ISD::AND) { 812 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); 813 if (!CSD) 814 return AArch64_AM::InvalidShiftExtend; 815 uint64_t AndMask = CSD->getZExtValue(); 816 817 switch (AndMask) { 818 default: 819 return AArch64_AM::InvalidShiftExtend; 820 case 0xFF: 821 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend; 822 case 0xFFFF: 823 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend; 824 case 0xFFFFFFFF: 825 return AArch64_AM::UXTW; 826 } 827 } 828 829 return AArch64_AM::InvalidShiftExtend; 830 } 831 832 /// Determine whether it is worth to fold V into an extended register of an 833 /// Add/Sub. LSL means we are folding into an `add w0, w1, w2, lsl #N` 834 /// instruction, and the shift should be treated as worth folding even if has 835 /// multiple uses. 836 bool AArch64DAGToDAGISel::isWorthFoldingALU(SDValue V, bool LSL) const { 837 // Trivial if we are optimizing for code size or if there is only 838 // one use of the value. 839 if (CurDAG->shouldOptForSize() || V.hasOneUse()) 840 return true; 841 842 // If a subtarget has a fastpath LSL we can fold a logical shift into 843 // the add/sub and save a cycle. 844 if (LSL && Subtarget->hasALULSLFast() && V.getOpcode() == ISD::SHL && 845 V.getConstantOperandVal(1) <= 4 && 846 getExtendTypeForNode(V.getOperand(0)) == AArch64_AM::InvalidShiftExtend) 847 return true; 848 849 // It hurts otherwise, since the value will be reused. 850 return false; 851 } 852 853 /// SelectShiftedRegister - Select a "shifted register" operand. If the value 854 /// is not shifted, set the Shift operand to default of "LSL 0". The logical 855 /// instructions allow the shifted register to be rotated, but the arithmetic 856 /// instructions do not. The AllowROR parameter specifies whether ROR is 857 /// supported. 858 bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR, 859 SDValue &Reg, SDValue &Shift) { 860 if (SelectShiftedRegisterFromAnd(N, Reg, Shift)) 861 return true; 862 863 AArch64_AM::ShiftExtendType ShType = getShiftTypeForNode(N); 864 if (ShType == AArch64_AM::InvalidShiftExtend) 865 return false; 866 if (!AllowROR && ShType == AArch64_AM::ROR) 867 return false; 868 869 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 870 unsigned BitSize = N.getValueSizeInBits(); 871 unsigned Val = RHS->getZExtValue() & (BitSize - 1); 872 unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val); 873 874 Reg = N.getOperand(0); 875 Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32); 876 return isWorthFoldingALU(N, true); 877 } 878 879 return false; 880 } 881 882 /// Instructions that accept extend modifiers like UXTW expect the register 883 /// being extended to be a GPR32, but the incoming DAG might be acting on a 884 /// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if 885 /// this is the case. 886 static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N) { 887 if (N.getValueType() == MVT::i32) 888 return N; 889 890 SDLoc dl(N); 891 return CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl, MVT::i32, N); 892 } 893 894 // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N. 895 template<signed Low, signed High, signed Scale> 896 bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) { 897 if (!isa<ConstantSDNode>(N)) 898 return false; 899 900 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue(); 901 if ((MulImm % std::abs(Scale)) == 0) { 902 int64_t RDVLImm = MulImm / Scale; 903 if ((RDVLImm >= Low) && (RDVLImm <= High)) { 904 Imm = CurDAG->getTargetConstant(RDVLImm, SDLoc(N), MVT::i32); 905 return true; 906 } 907 } 908 909 return false; 910 } 911 912 /// SelectArithExtendedRegister - Select a "extended register" operand. This 913 /// operand folds in an extend followed by an optional left shift. 914 bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg, 915 SDValue &Shift) { 916 unsigned ShiftVal = 0; 917 AArch64_AM::ShiftExtendType Ext; 918 919 if (N.getOpcode() == ISD::SHL) { 920 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); 921 if (!CSD) 922 return false; 923 ShiftVal = CSD->getZExtValue(); 924 if (ShiftVal > 4) 925 return false; 926 927 Ext = getExtendTypeForNode(N.getOperand(0)); 928 if (Ext == AArch64_AM::InvalidShiftExtend) 929 return false; 930 931 Reg = N.getOperand(0).getOperand(0); 932 } else { 933 Ext = getExtendTypeForNode(N); 934 if (Ext == AArch64_AM::InvalidShiftExtend) 935 return false; 936 937 Reg = N.getOperand(0); 938 939 // Don't match if free 32-bit -> 64-bit zext can be used instead. Use the 940 // isDef32 as a heuristic for when the operand is likely to be a 32bit def. 941 auto isDef32 = [](SDValue N) { 942 unsigned Opc = N.getOpcode(); 943 return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG && 944 Opc != ISD::CopyFromReg && Opc != ISD::AssertSext && 945 Opc != ISD::AssertZext && Opc != ISD::AssertAlign && 946 Opc != ISD::FREEZE; 947 }; 948 if (Ext == AArch64_AM::UXTW && Reg->getValueType(0).getSizeInBits() == 32 && 949 isDef32(Reg)) 950 return false; 951 } 952 953 // AArch64 mandates that the RHS of the operation must use the smallest 954 // register class that could contain the size being extended from. Thus, 955 // if we're folding a (sext i8), we need the RHS to be a GPR32, even though 956 // there might not be an actual 32-bit value in the program. We can 957 // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here. 958 assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX); 959 Reg = narrowIfNeeded(CurDAG, Reg); 960 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N), 961 MVT::i32); 962 return isWorthFoldingALU(N); 963 } 964 965 /// SelectArithUXTXRegister - Select a "UXTX register" operand. This 966 /// operand is refered by the instructions have SP operand 967 bool AArch64DAGToDAGISel::SelectArithUXTXRegister(SDValue N, SDValue &Reg, 968 SDValue &Shift) { 969 unsigned ShiftVal = 0; 970 AArch64_AM::ShiftExtendType Ext; 971 972 if (N.getOpcode() != ISD::SHL) 973 return false; 974 975 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); 976 if (!CSD) 977 return false; 978 ShiftVal = CSD->getZExtValue(); 979 if (ShiftVal > 4) 980 return false; 981 982 Ext = AArch64_AM::UXTX; 983 Reg = N.getOperand(0); 984 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N), 985 MVT::i32); 986 return isWorthFoldingALU(N); 987 } 988 989 /// If there's a use of this ADDlow that's not itself a load/store then we'll 990 /// need to create a real ADD instruction from it anyway and there's no point in 991 /// folding it into the mem op. Theoretically, it shouldn't matter, but there's 992 /// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding 993 /// leads to duplicated ADRP instructions. 994 static bool isWorthFoldingADDlow(SDValue N) { 995 for (auto *Use : N->uses()) { 996 if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE && 997 Use->getOpcode() != ISD::ATOMIC_LOAD && 998 Use->getOpcode() != ISD::ATOMIC_STORE) 999 return false; 1000 1001 // ldar and stlr have much more restrictive addressing modes (just a 1002 // register). 1003 if (isStrongerThanMonotonic(cast<MemSDNode>(Use)->getSuccessOrdering())) 1004 return false; 1005 } 1006 1007 return true; 1008 } 1009 1010 /// Check if the immediate offset is valid as a scaled immediate. 1011 static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range, 1012 unsigned Size) { 1013 if ((Offset & (Size - 1)) == 0 && Offset >= 0 && 1014 Offset < (Range << Log2_32(Size))) 1015 return true; 1016 return false; 1017 } 1018 1019 /// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit 1020 /// immediate" address. The "Size" argument is the size in bytes of the memory 1021 /// reference, which determines the scale. 1022 bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, 1023 unsigned BW, unsigned Size, 1024 SDValue &Base, 1025 SDValue &OffImm) { 1026 SDLoc dl(N); 1027 const DataLayout &DL = CurDAG->getDataLayout(); 1028 const TargetLowering *TLI = getTargetLowering(); 1029 if (N.getOpcode() == ISD::FrameIndex) { 1030 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 1031 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 1032 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); 1033 return true; 1034 } 1035 1036 // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed 1037 // selected here doesn't support labels/immediates, only base+offset. 1038 if (CurDAG->isBaseWithConstantOffset(N)) { 1039 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1040 if (IsSignedImm) { 1041 int64_t RHSC = RHS->getSExtValue(); 1042 unsigned Scale = Log2_32(Size); 1043 int64_t Range = 0x1LL << (BW - 1); 1044 1045 if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) && 1046 RHSC < (Range << Scale)) { 1047 Base = N.getOperand(0); 1048 if (Base.getOpcode() == ISD::FrameIndex) { 1049 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1050 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 1051 } 1052 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64); 1053 return true; 1054 } 1055 } else { 1056 // unsigned Immediate 1057 uint64_t RHSC = RHS->getZExtValue(); 1058 unsigned Scale = Log2_32(Size); 1059 uint64_t Range = 0x1ULL << BW; 1060 1061 if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) { 1062 Base = N.getOperand(0); 1063 if (Base.getOpcode() == ISD::FrameIndex) { 1064 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1065 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 1066 } 1067 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64); 1068 return true; 1069 } 1070 } 1071 } 1072 } 1073 // Base only. The address will be materialized into a register before 1074 // the memory is accessed. 1075 // add x0, Xbase, #offset 1076 // stp x1, x2, [x0] 1077 Base = N; 1078 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); 1079 return true; 1080 } 1081 1082 /// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit 1083 /// immediate" address. The "Size" argument is the size in bytes of the memory 1084 /// reference, which determines the scale. 1085 bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size, 1086 SDValue &Base, SDValue &OffImm) { 1087 SDLoc dl(N); 1088 const DataLayout &DL = CurDAG->getDataLayout(); 1089 const TargetLowering *TLI = getTargetLowering(); 1090 if (N.getOpcode() == ISD::FrameIndex) { 1091 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 1092 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 1093 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); 1094 return true; 1095 } 1096 1097 if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) { 1098 GlobalAddressSDNode *GAN = 1099 dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode()); 1100 Base = N.getOperand(0); 1101 OffImm = N.getOperand(1); 1102 if (!GAN) 1103 return true; 1104 1105 if (GAN->getOffset() % Size == 0 && 1106 GAN->getGlobal()->getPointerAlignment(DL) >= Size) 1107 return true; 1108 } 1109 1110 if (CurDAG->isBaseWithConstantOffset(N)) { 1111 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1112 int64_t RHSC = (int64_t)RHS->getZExtValue(); 1113 unsigned Scale = Log2_32(Size); 1114 if (isValidAsScaledImmediate(RHSC, 0x1000, Size)) { 1115 Base = N.getOperand(0); 1116 if (Base.getOpcode() == ISD::FrameIndex) { 1117 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1118 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 1119 } 1120 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64); 1121 return true; 1122 } 1123 } 1124 } 1125 1126 // Before falling back to our general case, check if the unscaled 1127 // instructions can handle this. If so, that's preferable. 1128 if (SelectAddrModeUnscaled(N, Size, Base, OffImm)) 1129 return false; 1130 1131 // Base only. The address will be materialized into a register before 1132 // the memory is accessed. 1133 // add x0, Xbase, #offset 1134 // ldr x0, [x0] 1135 Base = N; 1136 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); 1137 return true; 1138 } 1139 1140 /// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit 1141 /// immediate" address. This should only match when there is an offset that 1142 /// is not valid for a scaled immediate addressing mode. The "Size" argument 1143 /// is the size in bytes of the memory reference, which is needed here to know 1144 /// what is valid for a scaled immediate. 1145 bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size, 1146 SDValue &Base, 1147 SDValue &OffImm) { 1148 if (!CurDAG->isBaseWithConstantOffset(N)) 1149 return false; 1150 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1151 int64_t RHSC = RHS->getSExtValue(); 1152 if (RHSC >= -256 && RHSC < 256) { 1153 Base = N.getOperand(0); 1154 if (Base.getOpcode() == ISD::FrameIndex) { 1155 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1156 const TargetLowering *TLI = getTargetLowering(); 1157 Base = CurDAG->getTargetFrameIndex( 1158 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1159 } 1160 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64); 1161 return true; 1162 } 1163 } 1164 return false; 1165 } 1166 1167 static SDValue Widen(SelectionDAG *CurDAG, SDValue N) { 1168 SDLoc dl(N); 1169 SDValue ImpDef = SDValue( 1170 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0); 1171 return CurDAG->getTargetInsertSubreg(AArch64::sub_32, dl, MVT::i64, ImpDef, 1172 N); 1173 } 1174 1175 /// Check if the given SHL node (\p N), can be used to form an 1176 /// extended register for an addressing mode. 1177 bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size, 1178 bool WantExtend, SDValue &Offset, 1179 SDValue &SignExtend) { 1180 assert(N.getOpcode() == ISD::SHL && "Invalid opcode."); 1181 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); 1182 if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue()) 1183 return false; 1184 1185 SDLoc dl(N); 1186 if (WantExtend) { 1187 AArch64_AM::ShiftExtendType Ext = 1188 getExtendTypeForNode(N.getOperand(0), true); 1189 if (Ext == AArch64_AM::InvalidShiftExtend) 1190 return false; 1191 1192 Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0)); 1193 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl, 1194 MVT::i32); 1195 } else { 1196 Offset = N.getOperand(0); 1197 SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32); 1198 } 1199 1200 unsigned LegalShiftVal = Log2_32(Size); 1201 unsigned ShiftVal = CSD->getZExtValue(); 1202 1203 if (ShiftVal != 0 && ShiftVal != LegalShiftVal) 1204 return false; 1205 1206 return isWorthFoldingAddr(N); 1207 } 1208 1209 bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size, 1210 SDValue &Base, SDValue &Offset, 1211 SDValue &SignExtend, 1212 SDValue &DoShift) { 1213 if (N.getOpcode() != ISD::ADD) 1214 return false; 1215 SDValue LHS = N.getOperand(0); 1216 SDValue RHS = N.getOperand(1); 1217 SDLoc dl(N); 1218 1219 // We don't want to match immediate adds here, because they are better lowered 1220 // to the register-immediate addressing modes. 1221 if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS)) 1222 return false; 1223 1224 // Check if this particular node is reused in any non-memory related 1225 // operation. If yes, do not try to fold this node into the address 1226 // computation, since the computation will be kept. 1227 const SDNode *Node = N.getNode(); 1228 for (SDNode *UI : Node->uses()) { 1229 if (!isa<MemSDNode>(*UI)) 1230 return false; 1231 } 1232 1233 // Remember if it is worth folding N when it produces extended register. 1234 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N); 1235 1236 // Try to match a shifted extend on the RHS. 1237 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL && 1238 SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) { 1239 Base = LHS; 1240 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32); 1241 return true; 1242 } 1243 1244 // Try to match a shifted extend on the LHS. 1245 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL && 1246 SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) { 1247 Base = RHS; 1248 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32); 1249 return true; 1250 } 1251 1252 // There was no shift, whatever else we find. 1253 DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32); 1254 1255 AArch64_AM::ShiftExtendType Ext = AArch64_AM::InvalidShiftExtend; 1256 // Try to match an unshifted extend on the LHS. 1257 if (IsExtendedRegisterWorthFolding && 1258 (Ext = getExtendTypeForNode(LHS, true)) != 1259 AArch64_AM::InvalidShiftExtend) { 1260 Base = RHS; 1261 Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0)); 1262 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl, 1263 MVT::i32); 1264 if (isWorthFoldingAddr(LHS)) 1265 return true; 1266 } 1267 1268 // Try to match an unshifted extend on the RHS. 1269 if (IsExtendedRegisterWorthFolding && 1270 (Ext = getExtendTypeForNode(RHS, true)) != 1271 AArch64_AM::InvalidShiftExtend) { 1272 Base = LHS; 1273 Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0)); 1274 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl, 1275 MVT::i32); 1276 if (isWorthFoldingAddr(RHS)) 1277 return true; 1278 } 1279 1280 return false; 1281 } 1282 1283 // Check if the given immediate is preferred by ADD. If an immediate can be 1284 // encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be 1285 // encoded by one MOVZ, return true. 1286 static bool isPreferredADD(int64_t ImmOff) { 1287 // Constant in [0x0, 0xfff] can be encoded in ADD. 1288 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL) 1289 return true; 1290 // Check if it can be encoded in an "ADD LSL #12". 1291 if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL) 1292 // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant. 1293 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL && 1294 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL; 1295 return false; 1296 } 1297 1298 bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size, 1299 SDValue &Base, SDValue &Offset, 1300 SDValue &SignExtend, 1301 SDValue &DoShift) { 1302 if (N.getOpcode() != ISD::ADD) 1303 return false; 1304 SDValue LHS = N.getOperand(0); 1305 SDValue RHS = N.getOperand(1); 1306 SDLoc DL(N); 1307 1308 // Check if this particular node is reused in any non-memory related 1309 // operation. If yes, do not try to fold this node into the address 1310 // computation, since the computation will be kept. 1311 const SDNode *Node = N.getNode(); 1312 for (SDNode *UI : Node->uses()) { 1313 if (!isa<MemSDNode>(*UI)) 1314 return false; 1315 } 1316 1317 // Watch out if RHS is a wide immediate, it can not be selected into 1318 // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into 1319 // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate 1320 // instructions like: 1321 // MOV X0, WideImmediate 1322 // ADD X1, BaseReg, X0 1323 // LDR X2, [X1, 0] 1324 // For such situation, using [BaseReg, XReg] addressing mode can save one 1325 // ADD/SUB: 1326 // MOV X0, WideImmediate 1327 // LDR X2, [BaseReg, X0] 1328 if (isa<ConstantSDNode>(RHS)) { 1329 int64_t ImmOff = (int64_t)RHS->getAsZExtVal(); 1330 // Skip the immediate can be selected by load/store addressing mode. 1331 // Also skip the immediate can be encoded by a single ADD (SUB is also 1332 // checked by using -ImmOff). 1333 if (isValidAsScaledImmediate(ImmOff, 0x1000, Size) || 1334 isPreferredADD(ImmOff) || isPreferredADD(-ImmOff)) 1335 return false; 1336 1337 SDValue Ops[] = { RHS }; 1338 SDNode *MOVI = 1339 CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops); 1340 SDValue MOVIV = SDValue(MOVI, 0); 1341 // This ADD of two X register will be selected into [Reg+Reg] mode. 1342 N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV); 1343 } 1344 1345 // Remember if it is worth folding N when it produces extended register. 1346 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N); 1347 1348 // Try to match a shifted extend on the RHS. 1349 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL && 1350 SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) { 1351 Base = LHS; 1352 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32); 1353 return true; 1354 } 1355 1356 // Try to match a shifted extend on the LHS. 1357 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL && 1358 SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) { 1359 Base = RHS; 1360 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32); 1361 return true; 1362 } 1363 1364 // Match any non-shifted, non-extend, non-immediate add expression. 1365 Base = LHS; 1366 Offset = RHS; 1367 SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32); 1368 DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32); 1369 // Reg1 + Reg2 is free: no check needed. 1370 return true; 1371 } 1372 1373 SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) { 1374 static const unsigned RegClassIDs[] = { 1375 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID}; 1376 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1, 1377 AArch64::dsub2, AArch64::dsub3}; 1378 1379 return createTuple(Regs, RegClassIDs, SubRegs); 1380 } 1381 1382 SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) { 1383 static const unsigned RegClassIDs[] = { 1384 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID}; 1385 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1, 1386 AArch64::qsub2, AArch64::qsub3}; 1387 1388 return createTuple(Regs, RegClassIDs, SubRegs); 1389 } 1390 1391 SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef<SDValue> Regs) { 1392 static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID, 1393 AArch64::ZPR3RegClassID, 1394 AArch64::ZPR4RegClassID}; 1395 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1, 1396 AArch64::zsub2, AArch64::zsub3}; 1397 1398 return createTuple(Regs, RegClassIDs, SubRegs); 1399 } 1400 1401 SDValue AArch64DAGToDAGISel::createZMulTuple(ArrayRef<SDValue> Regs) { 1402 assert(Regs.size() == 2 || Regs.size() == 4); 1403 1404 // The createTuple interface requires 3 RegClassIDs for each possible 1405 // tuple type even though we only have them for ZPR2 and ZPR4. 1406 static const unsigned RegClassIDs[] = {AArch64::ZPR2Mul2RegClassID, 0, 1407 AArch64::ZPR4Mul4RegClassID}; 1408 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1, 1409 AArch64::zsub2, AArch64::zsub3}; 1410 return createTuple(Regs, RegClassIDs, SubRegs); 1411 } 1412 1413 SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs, 1414 const unsigned RegClassIDs[], 1415 const unsigned SubRegs[]) { 1416 // There's no special register-class for a vector-list of 1 element: it's just 1417 // a vector. 1418 if (Regs.size() == 1) 1419 return Regs[0]; 1420 1421 assert(Regs.size() >= 2 && Regs.size() <= 4); 1422 1423 SDLoc DL(Regs[0]); 1424 1425 SmallVector<SDValue, 4> Ops; 1426 1427 // First operand of REG_SEQUENCE is the desired RegClass. 1428 Ops.push_back( 1429 CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32)); 1430 1431 // Then we get pairs of source & subregister-position for the components. 1432 for (unsigned i = 0; i < Regs.size(); ++i) { 1433 Ops.push_back(Regs[i]); 1434 Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32)); 1435 } 1436 1437 SDNode *N = 1438 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops); 1439 return SDValue(N, 0); 1440 } 1441 1442 void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, 1443 bool isExt) { 1444 SDLoc dl(N); 1445 EVT VT = N->getValueType(0); 1446 1447 unsigned ExtOff = isExt; 1448 1449 // Form a REG_SEQUENCE to force register allocation. 1450 unsigned Vec0Off = ExtOff + 1; 1451 SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off, 1452 N->op_begin() + Vec0Off + NumVecs); 1453 SDValue RegSeq = createQTuple(Regs); 1454 1455 SmallVector<SDValue, 6> Ops; 1456 if (isExt) 1457 Ops.push_back(N->getOperand(1)); 1458 Ops.push_back(RegSeq); 1459 Ops.push_back(N->getOperand(NumVecs + ExtOff + 1)); 1460 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops)); 1461 } 1462 1463 bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) { 1464 LoadSDNode *LD = cast<LoadSDNode>(N); 1465 if (LD->isUnindexed()) 1466 return false; 1467 EVT VT = LD->getMemoryVT(); 1468 EVT DstVT = N->getValueType(0); 1469 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1470 bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC; 1471 1472 // We're not doing validity checking here. That was done when checking 1473 // if we should mark the load as indexed or not. We're just selecting 1474 // the right instruction. 1475 unsigned Opcode = 0; 1476 1477 ISD::LoadExtType ExtType = LD->getExtensionType(); 1478 bool InsertTo64 = false; 1479 if (VT == MVT::i64) 1480 Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost; 1481 else if (VT == MVT::i32) { 1482 if (ExtType == ISD::NON_EXTLOAD) 1483 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost; 1484 else if (ExtType == ISD::SEXTLOAD) 1485 Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost; 1486 else { 1487 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost; 1488 InsertTo64 = true; 1489 // The result of the load is only i32. It's the subreg_to_reg that makes 1490 // it into an i64. 1491 DstVT = MVT::i32; 1492 } 1493 } else if (VT == MVT::i16) { 1494 if (ExtType == ISD::SEXTLOAD) { 1495 if (DstVT == MVT::i64) 1496 Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost; 1497 else 1498 Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost; 1499 } else { 1500 Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost; 1501 InsertTo64 = DstVT == MVT::i64; 1502 // The result of the load is only i32. It's the subreg_to_reg that makes 1503 // it into an i64. 1504 DstVT = MVT::i32; 1505 } 1506 } else if (VT == MVT::i8) { 1507 if (ExtType == ISD::SEXTLOAD) { 1508 if (DstVT == MVT::i64) 1509 Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost; 1510 else 1511 Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost; 1512 } else { 1513 Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost; 1514 InsertTo64 = DstVT == MVT::i64; 1515 // The result of the load is only i32. It's the subreg_to_reg that makes 1516 // it into an i64. 1517 DstVT = MVT::i32; 1518 } 1519 } else if (VT == MVT::f16) { 1520 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost; 1521 } else if (VT == MVT::bf16) { 1522 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost; 1523 } else if (VT == MVT::f32) { 1524 Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost; 1525 } else if (VT == MVT::f64 || VT.is64BitVector()) { 1526 Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost; 1527 } else if (VT.is128BitVector()) { 1528 Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost; 1529 } else 1530 return false; 1531 SDValue Chain = LD->getChain(); 1532 SDValue Base = LD->getBasePtr(); 1533 ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset()); 1534 int OffsetVal = (int)OffsetOp->getZExtValue(); 1535 SDLoc dl(N); 1536 SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64); 1537 SDValue Ops[] = { Base, Offset, Chain }; 1538 SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT, 1539 MVT::Other, Ops); 1540 1541 // Transfer memoperands. 1542 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); 1543 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Res), {MemOp}); 1544 1545 // Either way, we're replacing the node, so tell the caller that. 1546 SDValue LoadedVal = SDValue(Res, 1); 1547 if (InsertTo64) { 1548 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32); 1549 LoadedVal = 1550 SDValue(CurDAG->getMachineNode( 1551 AArch64::SUBREG_TO_REG, dl, MVT::i64, 1552 CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal, 1553 SubReg), 1554 0); 1555 } 1556 1557 ReplaceUses(SDValue(N, 0), LoadedVal); 1558 ReplaceUses(SDValue(N, 1), SDValue(Res, 0)); 1559 ReplaceUses(SDValue(N, 2), SDValue(Res, 2)); 1560 CurDAG->RemoveDeadNode(N); 1561 return true; 1562 } 1563 1564 void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc, 1565 unsigned SubRegIdx) { 1566 SDLoc dl(N); 1567 EVT VT = N->getValueType(0); 1568 SDValue Chain = N->getOperand(0); 1569 1570 SDValue Ops[] = {N->getOperand(2), // Mem operand; 1571 Chain}; 1572 1573 const EVT ResTys[] = {MVT::Untyped, MVT::Other}; 1574 1575 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1576 SDValue SuperReg = SDValue(Ld, 0); 1577 for (unsigned i = 0; i < NumVecs; ++i) 1578 ReplaceUses(SDValue(N, i), 1579 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg)); 1580 1581 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1)); 1582 1583 // Transfer memoperands. In the case of AArch64::LD64B, there won't be one, 1584 // because it's too simple to have needed special treatment during lowering. 1585 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(N)) { 1586 MachineMemOperand *MemOp = MemIntr->getMemOperand(); 1587 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp}); 1588 } 1589 1590 CurDAG->RemoveDeadNode(N); 1591 } 1592 1593 void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs, 1594 unsigned Opc, unsigned SubRegIdx) { 1595 SDLoc dl(N); 1596 EVT VT = N->getValueType(0); 1597 SDValue Chain = N->getOperand(0); 1598 1599 SDValue Ops[] = {N->getOperand(1), // Mem operand 1600 N->getOperand(2), // Incremental 1601 Chain}; 1602 1603 const EVT ResTys[] = {MVT::i64, // Type of the write back register 1604 MVT::Untyped, MVT::Other}; 1605 1606 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1607 1608 // Update uses of write back register 1609 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0)); 1610 1611 // Update uses of vector list 1612 SDValue SuperReg = SDValue(Ld, 1); 1613 if (NumVecs == 1) 1614 ReplaceUses(SDValue(N, 0), SuperReg); 1615 else 1616 for (unsigned i = 0; i < NumVecs; ++i) 1617 ReplaceUses(SDValue(N, i), 1618 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg)); 1619 1620 // Update the chain 1621 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2)); 1622 CurDAG->RemoveDeadNode(N); 1623 } 1624 1625 /// Optimize \param OldBase and \param OldOffset selecting the best addressing 1626 /// mode. Returns a tuple consisting of an Opcode, an SDValue representing the 1627 /// new Base and an SDValue representing the new offset. 1628 std::tuple<unsigned, SDValue, SDValue> 1629 AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, 1630 unsigned Opc_ri, 1631 const SDValue &OldBase, 1632 const SDValue &OldOffset, 1633 unsigned Scale) { 1634 SDValue NewBase = OldBase; 1635 SDValue NewOffset = OldOffset; 1636 // Detect a possible Reg+Imm addressing mode. 1637 const bool IsRegImm = SelectAddrModeIndexedSVE</*Min=*/-8, /*Max=*/7>( 1638 N, OldBase, NewBase, NewOffset); 1639 1640 // Detect a possible reg+reg addressing mode, but only if we haven't already 1641 // detected a Reg+Imm one. 1642 const bool IsRegReg = 1643 !IsRegImm && SelectSVERegRegAddrMode(OldBase, Scale, NewBase, NewOffset); 1644 1645 // Select the instruction. 1646 return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset); 1647 } 1648 1649 enum class SelectTypeKind { 1650 Int1 = 0, 1651 Int = 1, 1652 FP = 2, 1653 AnyType = 3, 1654 }; 1655 1656 /// This function selects an opcode from a list of opcodes, which is 1657 /// expected to be the opcode for { 8-bit, 16-bit, 32-bit, 64-bit } 1658 /// element types, in this order. 1659 template <SelectTypeKind Kind> 1660 static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef<unsigned> Opcodes) { 1661 // Only match scalable vector VTs 1662 if (!VT.isScalableVector()) 1663 return 0; 1664 1665 EVT EltVT = VT.getVectorElementType(); 1666 switch (Kind) { 1667 case SelectTypeKind::AnyType: 1668 break; 1669 case SelectTypeKind::Int: 1670 if (EltVT != MVT::i8 && EltVT != MVT::i16 && EltVT != MVT::i32 && 1671 EltVT != MVT::i64) 1672 return 0; 1673 break; 1674 case SelectTypeKind::Int1: 1675 if (EltVT != MVT::i1) 1676 return 0; 1677 break; 1678 case SelectTypeKind::FP: 1679 if (EltVT != MVT::f16 && EltVT != MVT::f32 && EltVT != MVT::f64) 1680 return 0; 1681 break; 1682 } 1683 1684 unsigned Offset; 1685 switch (VT.getVectorMinNumElements()) { 1686 case 16: // 8-bit 1687 Offset = 0; 1688 break; 1689 case 8: // 16-bit 1690 Offset = 1; 1691 break; 1692 case 4: // 32-bit 1693 Offset = 2; 1694 break; 1695 case 2: // 64-bit 1696 Offset = 3; 1697 break; 1698 default: 1699 return 0; 1700 } 1701 1702 return (Opcodes.size() <= Offset) ? 0 : Opcodes[Offset]; 1703 } 1704 1705 // This function is almost identical to SelectWhilePair, but has an 1706 // extra check on the range of the immediate operand. 1707 // TODO: Merge these two functions together at some point? 1708 void AArch64DAGToDAGISel::SelectPExtPair(SDNode *N, unsigned Opc) { 1709 // Immediate can be either 0 or 1. 1710 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(N->getOperand(2))) 1711 if (Imm->getZExtValue() > 1) 1712 return; 1713 1714 SDLoc DL(N); 1715 EVT VT = N->getValueType(0); 1716 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)}; 1717 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops); 1718 SDValue SuperReg = SDValue(WhilePair, 0); 1719 1720 for (unsigned I = 0; I < 2; ++I) 1721 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg( 1722 AArch64::psub0 + I, DL, VT, SuperReg)); 1723 1724 CurDAG->RemoveDeadNode(N); 1725 } 1726 1727 void AArch64DAGToDAGISel::SelectWhilePair(SDNode *N, unsigned Opc) { 1728 SDLoc DL(N); 1729 EVT VT = N->getValueType(0); 1730 1731 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)}; 1732 1733 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops); 1734 SDValue SuperReg = SDValue(WhilePair, 0); 1735 1736 for (unsigned I = 0; I < 2; ++I) 1737 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg( 1738 AArch64::psub0 + I, DL, VT, SuperReg)); 1739 1740 CurDAG->RemoveDeadNode(N); 1741 } 1742 1743 void AArch64DAGToDAGISel::SelectCVTIntrinsic(SDNode *N, unsigned NumVecs, 1744 unsigned Opcode) { 1745 EVT VT = N->getValueType(0); 1746 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); 1747 SDValue Ops = createZTuple(Regs); 1748 SDLoc DL(N); 1749 SDNode *Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Ops); 1750 SDValue SuperReg = SDValue(Intrinsic, 0); 1751 for (unsigned i = 0; i < NumVecs; ++i) 1752 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg( 1753 AArch64::zsub0 + i, DL, VT, SuperReg)); 1754 1755 CurDAG->RemoveDeadNode(N); 1756 } 1757 1758 void AArch64DAGToDAGISel::SelectDestructiveMultiIntrinsic(SDNode *N, 1759 unsigned NumVecs, 1760 bool IsZmMulti, 1761 unsigned Opcode, 1762 bool HasPred) { 1763 assert(Opcode != 0 && "Unexpected opcode"); 1764 1765 SDLoc DL(N); 1766 EVT VT = N->getValueType(0); 1767 unsigned FirstVecIdx = HasPred ? 2 : 1; 1768 1769 auto GetMultiVecOperand = [=](unsigned StartIdx) { 1770 SmallVector<SDValue, 4> Regs(N->op_begin() + StartIdx, 1771 N->op_begin() + StartIdx + NumVecs); 1772 return createZMulTuple(Regs); 1773 }; 1774 1775 SDValue Zdn = GetMultiVecOperand(FirstVecIdx); 1776 1777 SDValue Zm; 1778 if (IsZmMulti) 1779 Zm = GetMultiVecOperand(NumVecs + FirstVecIdx); 1780 else 1781 Zm = N->getOperand(NumVecs + FirstVecIdx); 1782 1783 SDNode *Intrinsic; 1784 if (HasPred) 1785 Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, 1786 N->getOperand(1), Zdn, Zm); 1787 else 1788 Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Zdn, Zm); 1789 SDValue SuperReg = SDValue(Intrinsic, 0); 1790 for (unsigned i = 0; i < NumVecs; ++i) 1791 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg( 1792 AArch64::zsub0 + i, DL, VT, SuperReg)); 1793 1794 CurDAG->RemoveDeadNode(N); 1795 } 1796 1797 void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs, 1798 unsigned Scale, unsigned Opc_ri, 1799 unsigned Opc_rr, bool IsIntr) { 1800 assert(Scale < 5 && "Invalid scaling value."); 1801 SDLoc DL(N); 1802 EVT VT = N->getValueType(0); 1803 SDValue Chain = N->getOperand(0); 1804 1805 // Optimize addressing mode. 1806 SDValue Base, Offset; 1807 unsigned Opc; 1808 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore( 1809 N, Opc_rr, Opc_ri, N->getOperand(IsIntr ? 3 : 2), 1810 CurDAG->getTargetConstant(0, DL, MVT::i64), Scale); 1811 1812 SDValue Ops[] = {N->getOperand(IsIntr ? 2 : 1), // Predicate 1813 Base, // Memory operand 1814 Offset, Chain}; 1815 1816 const EVT ResTys[] = {MVT::Untyped, MVT::Other}; 1817 1818 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops); 1819 SDValue SuperReg = SDValue(Load, 0); 1820 for (unsigned i = 0; i < NumVecs; ++i) 1821 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg( 1822 AArch64::zsub0 + i, DL, VT, SuperReg)); 1823 1824 // Copy chain 1825 unsigned ChainIdx = NumVecs; 1826 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1)); 1827 CurDAG->RemoveDeadNode(N); 1828 } 1829 1830 void AArch64DAGToDAGISel::SelectContiguousMultiVectorLoad(SDNode *N, 1831 unsigned NumVecs, 1832 unsigned Scale, 1833 unsigned Opc_ri, 1834 unsigned Opc_rr) { 1835 assert(Scale < 4 && "Invalid scaling value."); 1836 SDLoc DL(N); 1837 EVT VT = N->getValueType(0); 1838 SDValue Chain = N->getOperand(0); 1839 1840 SDValue PNg = N->getOperand(2); 1841 SDValue Base = N->getOperand(3); 1842 SDValue Offset = CurDAG->getTargetConstant(0, DL, MVT::i64); 1843 unsigned Opc; 1844 std::tie(Opc, Base, Offset) = 1845 findAddrModeSVELoadStore(N, Opc_rr, Opc_ri, Base, Offset, Scale); 1846 1847 SDValue Ops[] = {PNg, // Predicate-as-counter 1848 Base, // Memory operand 1849 Offset, Chain}; 1850 1851 const EVT ResTys[] = {MVT::Untyped, MVT::Other}; 1852 1853 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops); 1854 SDValue SuperReg = SDValue(Load, 0); 1855 for (unsigned i = 0; i < NumVecs; ++i) 1856 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg( 1857 AArch64::zsub0 + i, DL, VT, SuperReg)); 1858 1859 // Copy chain 1860 unsigned ChainIdx = NumVecs; 1861 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1)); 1862 CurDAG->RemoveDeadNode(N); 1863 } 1864 1865 void AArch64DAGToDAGISel::SelectFrintFromVT(SDNode *N, unsigned NumVecs, 1866 unsigned Opcode) { 1867 if (N->getValueType(0) != MVT::nxv4f32) 1868 return; 1869 SelectUnaryMultiIntrinsic(N, NumVecs, true, Opcode); 1870 } 1871 1872 void AArch64DAGToDAGISel::SelectMultiVectorLuti(SDNode *Node, 1873 unsigned NumOutVecs, 1874 unsigned Opc, uint32_t MaxImm) { 1875 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Node->getOperand(4))) 1876 if (Imm->getZExtValue() > MaxImm) 1877 return; 1878 1879 SDValue ZtValue; 1880 if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue)) 1881 return; 1882 SDValue Ops[] = {ZtValue, Node->getOperand(3), Node->getOperand(4)}; 1883 SDLoc DL(Node); 1884 EVT VT = Node->getValueType(0); 1885 1886 SDNode *Instruction = 1887 CurDAG->getMachineNode(Opc, DL, {MVT::Untyped, MVT::Other}, Ops); 1888 SDValue SuperReg = SDValue(Instruction, 0); 1889 1890 for (unsigned I = 0; I < NumOutVecs; ++I) 1891 ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg( 1892 AArch64::zsub0 + I, DL, VT, SuperReg)); 1893 1894 // Copy chain 1895 unsigned ChainIdx = NumOutVecs; 1896 ReplaceUses(SDValue(Node, ChainIdx), SDValue(Instruction, 1)); 1897 CurDAG->RemoveDeadNode(Node); 1898 } 1899 1900 void AArch64DAGToDAGISel::SelectClamp(SDNode *N, unsigned NumVecs, 1901 unsigned Op) { 1902 SDLoc DL(N); 1903 EVT VT = N->getValueType(0); 1904 1905 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); 1906 SDValue Zd = createZMulTuple(Regs); 1907 SDValue Zn = N->getOperand(1 + NumVecs); 1908 SDValue Zm = N->getOperand(2 + NumVecs); 1909 1910 SDValue Ops[] = {Zd, Zn, Zm}; 1911 1912 SDNode *Intrinsic = CurDAG->getMachineNode(Op, DL, MVT::Untyped, Ops); 1913 SDValue SuperReg = SDValue(Intrinsic, 0); 1914 for (unsigned i = 0; i < NumVecs; ++i) 1915 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg( 1916 AArch64::zsub0 + i, DL, VT, SuperReg)); 1917 1918 CurDAG->RemoveDeadNode(N); 1919 } 1920 1921 bool SelectSMETile(unsigned &BaseReg, unsigned TileNum) { 1922 switch (BaseReg) { 1923 default: 1924 return false; 1925 case AArch64::ZA: 1926 case AArch64::ZAB0: 1927 if (TileNum == 0) 1928 break; 1929 return false; 1930 case AArch64::ZAH0: 1931 if (TileNum <= 1) 1932 break; 1933 return false; 1934 case AArch64::ZAS0: 1935 if (TileNum <= 3) 1936 break; 1937 return false; 1938 case AArch64::ZAD0: 1939 if (TileNum <= 7) 1940 break; 1941 return false; 1942 } 1943 1944 BaseReg += TileNum; 1945 return true; 1946 } 1947 1948 template <unsigned MaxIdx, unsigned Scale> 1949 void AArch64DAGToDAGISel::SelectMultiVectorMove(SDNode *N, unsigned NumVecs, 1950 unsigned BaseReg, unsigned Op) { 1951 unsigned TileNum = 0; 1952 if (BaseReg != AArch64::ZA) 1953 TileNum = N->getConstantOperandVal(2); 1954 1955 if (!SelectSMETile(BaseReg, TileNum)) 1956 return; 1957 1958 SDValue SliceBase, Base, Offset; 1959 if (BaseReg == AArch64::ZA) 1960 SliceBase = N->getOperand(2); 1961 else 1962 SliceBase = N->getOperand(3); 1963 1964 if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale)) 1965 return; 1966 1967 SDLoc DL(N); 1968 SDValue SubReg = CurDAG->getRegister(BaseReg, MVT::Other); 1969 SDValue Ops[] = {SubReg, Base, Offset, /*Chain*/ N->getOperand(0)}; 1970 SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops); 1971 1972 EVT VT = N->getValueType(0); 1973 for (unsigned I = 0; I < NumVecs; ++I) 1974 ReplaceUses(SDValue(N, I), 1975 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT, 1976 SDValue(Mov, 0))); 1977 // Copy chain 1978 unsigned ChainIdx = NumVecs; 1979 ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1)); 1980 CurDAG->RemoveDeadNode(N); 1981 } 1982 1983 void AArch64DAGToDAGISel::SelectUnaryMultiIntrinsic(SDNode *N, 1984 unsigned NumOutVecs, 1985 bool IsTupleInput, 1986 unsigned Opc) { 1987 SDLoc DL(N); 1988 EVT VT = N->getValueType(0); 1989 unsigned NumInVecs = N->getNumOperands() - 1; 1990 1991 SmallVector<SDValue, 6> Ops; 1992 if (IsTupleInput) { 1993 assert((NumInVecs == 2 || NumInVecs == 4) && 1994 "Don't know how to handle multi-register input!"); 1995 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, 1996 N->op_begin() + 1 + NumInVecs); 1997 Ops.push_back(createZMulTuple(Regs)); 1998 } else { 1999 // All intrinsic nodes have the ID as the first operand, hence the "1 + I". 2000 for (unsigned I = 0; I < NumInVecs; I++) 2001 Ops.push_back(N->getOperand(1 + I)); 2002 } 2003 2004 SDNode *Res = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops); 2005 SDValue SuperReg = SDValue(Res, 0); 2006 2007 for (unsigned I = 0; I < NumOutVecs; I++) 2008 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg( 2009 AArch64::zsub0 + I, DL, VT, SuperReg)); 2010 CurDAG->RemoveDeadNode(N); 2011 } 2012 2013 void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs, 2014 unsigned Opc) { 2015 SDLoc dl(N); 2016 EVT VT = N->getOperand(2)->getValueType(0); 2017 2018 // Form a REG_SEQUENCE to force register allocation. 2019 bool Is128Bit = VT.getSizeInBits() == 128; 2020 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); 2021 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs); 2022 2023 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)}; 2024 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops); 2025 2026 // Transfer memoperands. 2027 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2028 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 2029 2030 ReplaceNode(N, St); 2031 } 2032 2033 void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs, 2034 unsigned Scale, unsigned Opc_rr, 2035 unsigned Opc_ri) { 2036 SDLoc dl(N); 2037 2038 // Form a REG_SEQUENCE to force register allocation. 2039 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); 2040 SDValue RegSeq = createZTuple(Regs); 2041 2042 // Optimize addressing mode. 2043 unsigned Opc; 2044 SDValue Offset, Base; 2045 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore( 2046 N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3), 2047 CurDAG->getTargetConstant(0, dl, MVT::i64), Scale); 2048 2049 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), // predicate 2050 Base, // address 2051 Offset, // offset 2052 N->getOperand(0)}; // chain 2053 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops); 2054 2055 ReplaceNode(N, St); 2056 } 2057 2058 bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, 2059 SDValue &OffImm) { 2060 SDLoc dl(N); 2061 const DataLayout &DL = CurDAG->getDataLayout(); 2062 const TargetLowering *TLI = getTargetLowering(); 2063 2064 // Try to match it for the frame address 2065 if (auto FINode = dyn_cast<FrameIndexSDNode>(N)) { 2066 int FI = FINode->getIndex(); 2067 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 2068 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); 2069 return true; 2070 } 2071 2072 return false; 2073 } 2074 2075 void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs, 2076 unsigned Opc) { 2077 SDLoc dl(N); 2078 EVT VT = N->getOperand(2)->getValueType(0); 2079 const EVT ResTys[] = {MVT::i64, // Type of the write back register 2080 MVT::Other}; // Type for the Chain 2081 2082 // Form a REG_SEQUENCE to force register allocation. 2083 bool Is128Bit = VT.getSizeInBits() == 128; 2084 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); 2085 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs); 2086 2087 SDValue Ops[] = {RegSeq, 2088 N->getOperand(NumVecs + 1), // base register 2089 N->getOperand(NumVecs + 2), // Incremental 2090 N->getOperand(0)}; // Chain 2091 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2092 2093 ReplaceNode(N, St); 2094 } 2095 2096 namespace { 2097 /// WidenVector - Given a value in the V64 register class, produce the 2098 /// equivalent value in the V128 register class. 2099 class WidenVector { 2100 SelectionDAG &DAG; 2101 2102 public: 2103 WidenVector(SelectionDAG &DAG) : DAG(DAG) {} 2104 2105 SDValue operator()(SDValue V64Reg) { 2106 EVT VT = V64Reg.getValueType(); 2107 unsigned NarrowSize = VT.getVectorNumElements(); 2108 MVT EltTy = VT.getVectorElementType().getSimpleVT(); 2109 MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize); 2110 SDLoc DL(V64Reg); 2111 2112 SDValue Undef = 2113 SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0); 2114 return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg); 2115 } 2116 }; 2117 } // namespace 2118 2119 /// NarrowVector - Given a value in the V128 register class, produce the 2120 /// equivalent value in the V64 register class. 2121 static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) { 2122 EVT VT = V128Reg.getValueType(); 2123 unsigned WideSize = VT.getVectorNumElements(); 2124 MVT EltTy = VT.getVectorElementType().getSimpleVT(); 2125 MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2); 2126 2127 return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy, 2128 V128Reg); 2129 } 2130 2131 void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs, 2132 unsigned Opc) { 2133 SDLoc dl(N); 2134 EVT VT = N->getValueType(0); 2135 bool Narrow = VT.getSizeInBits() == 64; 2136 2137 // Form a REG_SEQUENCE to force register allocation. 2138 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); 2139 2140 if (Narrow) 2141 transform(Regs, Regs.begin(), 2142 WidenVector(*CurDAG)); 2143 2144 SDValue RegSeq = createQTuple(Regs); 2145 2146 const EVT ResTys[] = {MVT::Untyped, MVT::Other}; 2147 2148 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2); 2149 2150 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64), 2151 N->getOperand(NumVecs + 3), N->getOperand(0)}; 2152 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2153 SDValue SuperReg = SDValue(Ld, 0); 2154 2155 EVT WideVT = RegSeq.getOperand(1)->getValueType(0); 2156 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, 2157 AArch64::qsub2, AArch64::qsub3 }; 2158 for (unsigned i = 0; i < NumVecs; ++i) { 2159 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg); 2160 if (Narrow) 2161 NV = NarrowVector(NV, *CurDAG); 2162 ReplaceUses(SDValue(N, i), NV); 2163 } 2164 2165 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1)); 2166 CurDAG->RemoveDeadNode(N); 2167 } 2168 2169 void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs, 2170 unsigned Opc) { 2171 SDLoc dl(N); 2172 EVT VT = N->getValueType(0); 2173 bool Narrow = VT.getSizeInBits() == 64; 2174 2175 // Form a REG_SEQUENCE to force register allocation. 2176 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); 2177 2178 if (Narrow) 2179 transform(Regs, Regs.begin(), 2180 WidenVector(*CurDAG)); 2181 2182 SDValue RegSeq = createQTuple(Regs); 2183 2184 const EVT ResTys[] = {MVT::i64, // Type of the write back register 2185 RegSeq->getValueType(0), MVT::Other}; 2186 2187 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1); 2188 2189 SDValue Ops[] = {RegSeq, 2190 CurDAG->getTargetConstant(LaneNo, dl, 2191 MVT::i64), // Lane Number 2192 N->getOperand(NumVecs + 2), // Base register 2193 N->getOperand(NumVecs + 3), // Incremental 2194 N->getOperand(0)}; 2195 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2196 2197 // Update uses of the write back register 2198 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0)); 2199 2200 // Update uses of the vector list 2201 SDValue SuperReg = SDValue(Ld, 1); 2202 if (NumVecs == 1) { 2203 ReplaceUses(SDValue(N, 0), 2204 Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg); 2205 } else { 2206 EVT WideVT = RegSeq.getOperand(1)->getValueType(0); 2207 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, 2208 AArch64::qsub2, AArch64::qsub3 }; 2209 for (unsigned i = 0; i < NumVecs; ++i) { 2210 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, 2211 SuperReg); 2212 if (Narrow) 2213 NV = NarrowVector(NV, *CurDAG); 2214 ReplaceUses(SDValue(N, i), NV); 2215 } 2216 } 2217 2218 // Update the Chain 2219 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2)); 2220 CurDAG->RemoveDeadNode(N); 2221 } 2222 2223 void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs, 2224 unsigned Opc) { 2225 SDLoc dl(N); 2226 EVT VT = N->getOperand(2)->getValueType(0); 2227 bool Narrow = VT.getSizeInBits() == 64; 2228 2229 // Form a REG_SEQUENCE to force register allocation. 2230 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); 2231 2232 if (Narrow) 2233 transform(Regs, Regs.begin(), 2234 WidenVector(*CurDAG)); 2235 2236 SDValue RegSeq = createQTuple(Regs); 2237 2238 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2); 2239 2240 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64), 2241 N->getOperand(NumVecs + 3), N->getOperand(0)}; 2242 SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops); 2243 2244 // Transfer memoperands. 2245 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2246 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 2247 2248 ReplaceNode(N, St); 2249 } 2250 2251 void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs, 2252 unsigned Opc) { 2253 SDLoc dl(N); 2254 EVT VT = N->getOperand(2)->getValueType(0); 2255 bool Narrow = VT.getSizeInBits() == 64; 2256 2257 // Form a REG_SEQUENCE to force register allocation. 2258 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); 2259 2260 if (Narrow) 2261 transform(Regs, Regs.begin(), 2262 WidenVector(*CurDAG)); 2263 2264 SDValue RegSeq = createQTuple(Regs); 2265 2266 const EVT ResTys[] = {MVT::i64, // Type of the write back register 2267 MVT::Other}; 2268 2269 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1); 2270 2271 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64), 2272 N->getOperand(NumVecs + 2), // Base Register 2273 N->getOperand(NumVecs + 3), // Incremental 2274 N->getOperand(0)}; 2275 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2276 2277 // Transfer memoperands. 2278 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2279 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 2280 2281 ReplaceNode(N, St); 2282 } 2283 2284 static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, 2285 unsigned &Opc, SDValue &Opd0, 2286 unsigned &LSB, unsigned &MSB, 2287 unsigned NumberOfIgnoredLowBits, 2288 bool BiggerPattern) { 2289 assert(N->getOpcode() == ISD::AND && 2290 "N must be a AND operation to call this function"); 2291 2292 EVT VT = N->getValueType(0); 2293 2294 // Here we can test the type of VT and return false when the type does not 2295 // match, but since it is done prior to that call in the current context 2296 // we turned that into an assert to avoid redundant code. 2297 assert((VT == MVT::i32 || VT == MVT::i64) && 2298 "Type checking must have been done before calling this function"); 2299 2300 // FIXME: simplify-demanded-bits in DAGCombine will probably have 2301 // changed the AND node to a 32-bit mask operation. We'll have to 2302 // undo that as part of the transform here if we want to catch all 2303 // the opportunities. 2304 // Currently the NumberOfIgnoredLowBits argument helps to recover 2305 // from these situations when matching bigger pattern (bitfield insert). 2306 2307 // For unsigned extracts, check for a shift right and mask 2308 uint64_t AndImm = 0; 2309 if (!isOpcWithIntImmediate(N, ISD::AND, AndImm)) 2310 return false; 2311 2312 const SDNode *Op0 = N->getOperand(0).getNode(); 2313 2314 // Because of simplify-demanded-bits in DAGCombine, the mask may have been 2315 // simplified. Try to undo that 2316 AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits); 2317 2318 // The immediate is a mask of the low bits iff imm & (imm+1) == 0 2319 if (AndImm & (AndImm + 1)) 2320 return false; 2321 2322 bool ClampMSB = false; 2323 uint64_t SrlImm = 0; 2324 // Handle the SRL + ANY_EXTEND case. 2325 if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND && 2326 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) { 2327 // Extend the incoming operand of the SRL to 64-bit. 2328 Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0)); 2329 // Make sure to clamp the MSB so that we preserve the semantics of the 2330 // original operations. 2331 ClampMSB = true; 2332 } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE && 2333 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, 2334 SrlImm)) { 2335 // If the shift result was truncated, we can still combine them. 2336 Opd0 = Op0->getOperand(0).getOperand(0); 2337 2338 // Use the type of SRL node. 2339 VT = Opd0->getValueType(0); 2340 } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) { 2341 Opd0 = Op0->getOperand(0); 2342 ClampMSB = (VT == MVT::i32); 2343 } else if (BiggerPattern) { 2344 // Let's pretend a 0 shift right has been performed. 2345 // The resulting code will be at least as good as the original one 2346 // plus it may expose more opportunities for bitfield insert pattern. 2347 // FIXME: Currently we limit this to the bigger pattern, because 2348 // some optimizations expect AND and not UBFM. 2349 Opd0 = N->getOperand(0); 2350 } else 2351 return false; 2352 2353 // Bail out on large immediates. This happens when no proper 2354 // combining/constant folding was performed. 2355 if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) { 2356 LLVM_DEBUG( 2357 (dbgs() << N 2358 << ": Found large shift immediate, this should not happen\n")); 2359 return false; 2360 } 2361 2362 LSB = SrlImm; 2363 MSB = SrlImm + 2364 (VT == MVT::i32 ? llvm::countr_one<uint32_t>(AndImm) 2365 : llvm::countr_one<uint64_t>(AndImm)) - 2366 1; 2367 if (ClampMSB) 2368 // Since we're moving the extend before the right shift operation, we need 2369 // to clamp the MSB to make sure we don't shift in undefined bits instead of 2370 // the zeros which would get shifted in with the original right shift 2371 // operation. 2372 MSB = MSB > 31 ? 31 : MSB; 2373 2374 Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri; 2375 return true; 2376 } 2377 2378 static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc, 2379 SDValue &Opd0, unsigned &Immr, 2380 unsigned &Imms) { 2381 assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG); 2382 2383 EVT VT = N->getValueType(0); 2384 unsigned BitWidth = VT.getSizeInBits(); 2385 assert((VT == MVT::i32 || VT == MVT::i64) && 2386 "Type checking must have been done before calling this function"); 2387 2388 SDValue Op = N->getOperand(0); 2389 if (Op->getOpcode() == ISD::TRUNCATE) { 2390 Op = Op->getOperand(0); 2391 VT = Op->getValueType(0); 2392 BitWidth = VT.getSizeInBits(); 2393 } 2394 2395 uint64_t ShiftImm; 2396 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) && 2397 !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm)) 2398 return false; 2399 2400 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits(); 2401 if (ShiftImm + Width > BitWidth) 2402 return false; 2403 2404 Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri; 2405 Opd0 = Op.getOperand(0); 2406 Immr = ShiftImm; 2407 Imms = ShiftImm + Width - 1; 2408 return true; 2409 } 2410 2411 static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc, 2412 SDValue &Opd0, unsigned &LSB, 2413 unsigned &MSB) { 2414 // We are looking for the following pattern which basically extracts several 2415 // continuous bits from the source value and places it from the LSB of the 2416 // destination value, all other bits of the destination value or set to zero: 2417 // 2418 // Value2 = AND Value, MaskImm 2419 // SRL Value2, ShiftImm 2420 // 2421 // with MaskImm >> ShiftImm to search for the bit width. 2422 // 2423 // This gets selected into a single UBFM: 2424 // 2425 // UBFM Value, ShiftImm, Log2_64(MaskImm) 2426 // 2427 2428 if (N->getOpcode() != ISD::SRL) 2429 return false; 2430 2431 uint64_t AndMask = 0; 2432 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask)) 2433 return false; 2434 2435 Opd0 = N->getOperand(0).getOperand(0); 2436 2437 uint64_t SrlImm = 0; 2438 if (!isIntImmediate(N->getOperand(1), SrlImm)) 2439 return false; 2440 2441 // Check whether we really have several bits extract here. 2442 if (!isMask_64(AndMask >> SrlImm)) 2443 return false; 2444 2445 Opc = N->getValueType(0) == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri; 2446 LSB = SrlImm; 2447 MSB = llvm::Log2_64(AndMask); 2448 return true; 2449 } 2450 2451 static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, 2452 unsigned &Immr, unsigned &Imms, 2453 bool BiggerPattern) { 2454 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) && 2455 "N must be a SHR/SRA operation to call this function"); 2456 2457 EVT VT = N->getValueType(0); 2458 2459 // Here we can test the type of VT and return false when the type does not 2460 // match, but since it is done prior to that call in the current context 2461 // we turned that into an assert to avoid redundant code. 2462 assert((VT == MVT::i32 || VT == MVT::i64) && 2463 "Type checking must have been done before calling this function"); 2464 2465 // Check for AND + SRL doing several bits extract. 2466 if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms)) 2467 return true; 2468 2469 // We're looking for a shift of a shift. 2470 uint64_t ShlImm = 0; 2471 uint64_t TruncBits = 0; 2472 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) { 2473 Opd0 = N->getOperand(0).getOperand(0); 2474 } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL && 2475 N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) { 2476 // We are looking for a shift of truncate. Truncate from i64 to i32 could 2477 // be considered as setting high 32 bits as zero. Our strategy here is to 2478 // always generate 64bit UBFM. This consistency will help the CSE pass 2479 // later find more redundancy. 2480 Opd0 = N->getOperand(0).getOperand(0); 2481 TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits(); 2482 VT = Opd0.getValueType(); 2483 assert(VT == MVT::i64 && "the promoted type should be i64"); 2484 } else if (BiggerPattern) { 2485 // Let's pretend a 0 shift left has been performed. 2486 // FIXME: Currently we limit this to the bigger pattern case, 2487 // because some optimizations expect AND and not UBFM 2488 Opd0 = N->getOperand(0); 2489 } else 2490 return false; 2491 2492 // Missing combines/constant folding may have left us with strange 2493 // constants. 2494 if (ShlImm >= VT.getSizeInBits()) { 2495 LLVM_DEBUG( 2496 (dbgs() << N 2497 << ": Found large shift immediate, this should not happen\n")); 2498 return false; 2499 } 2500 2501 uint64_t SrlImm = 0; 2502 if (!isIntImmediate(N->getOperand(1), SrlImm)) 2503 return false; 2504 2505 assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() && 2506 "bad amount in shift node!"); 2507 int immr = SrlImm - ShlImm; 2508 Immr = immr < 0 ? immr + VT.getSizeInBits() : immr; 2509 Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1; 2510 // SRA requires a signed extraction 2511 if (VT == MVT::i32) 2512 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri; 2513 else 2514 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri; 2515 return true; 2516 } 2517 2518 bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) { 2519 assert(N->getOpcode() == ISD::SIGN_EXTEND); 2520 2521 EVT VT = N->getValueType(0); 2522 EVT NarrowVT = N->getOperand(0)->getValueType(0); 2523 if (VT != MVT::i64 || NarrowVT != MVT::i32) 2524 return false; 2525 2526 uint64_t ShiftImm; 2527 SDValue Op = N->getOperand(0); 2528 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm)) 2529 return false; 2530 2531 SDLoc dl(N); 2532 // Extend the incoming operand of the shift to 64-bits. 2533 SDValue Opd0 = Widen(CurDAG, Op.getOperand(0)); 2534 unsigned Immr = ShiftImm; 2535 unsigned Imms = NarrowVT.getSizeInBits() - 1; 2536 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT), 2537 CurDAG->getTargetConstant(Imms, dl, VT)}; 2538 CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops); 2539 return true; 2540 } 2541 2542 static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, 2543 SDValue &Opd0, unsigned &Immr, unsigned &Imms, 2544 unsigned NumberOfIgnoredLowBits = 0, 2545 bool BiggerPattern = false) { 2546 if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64) 2547 return false; 2548 2549 switch (N->getOpcode()) { 2550 default: 2551 if (!N->isMachineOpcode()) 2552 return false; 2553 break; 2554 case ISD::AND: 2555 return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms, 2556 NumberOfIgnoredLowBits, BiggerPattern); 2557 case ISD::SRL: 2558 case ISD::SRA: 2559 return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern); 2560 2561 case ISD::SIGN_EXTEND_INREG: 2562 return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms); 2563 } 2564 2565 unsigned NOpc = N->getMachineOpcode(); 2566 switch (NOpc) { 2567 default: 2568 return false; 2569 case AArch64::SBFMWri: 2570 case AArch64::UBFMWri: 2571 case AArch64::SBFMXri: 2572 case AArch64::UBFMXri: 2573 Opc = NOpc; 2574 Opd0 = N->getOperand(0); 2575 Immr = N->getConstantOperandVal(1); 2576 Imms = N->getConstantOperandVal(2); 2577 return true; 2578 } 2579 // Unreachable 2580 return false; 2581 } 2582 2583 bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) { 2584 unsigned Opc, Immr, Imms; 2585 SDValue Opd0; 2586 if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms)) 2587 return false; 2588 2589 EVT VT = N->getValueType(0); 2590 SDLoc dl(N); 2591 2592 // If the bit extract operation is 64bit but the original type is 32bit, we 2593 // need to add one EXTRACT_SUBREG. 2594 if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) { 2595 SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64), 2596 CurDAG->getTargetConstant(Imms, dl, MVT::i64)}; 2597 2598 SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64); 2599 SDValue Inner = CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl, 2600 MVT::i32, SDValue(BFM, 0)); 2601 ReplaceNode(N, Inner.getNode()); 2602 return true; 2603 } 2604 2605 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT), 2606 CurDAG->getTargetConstant(Imms, dl, VT)}; 2607 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 2608 return true; 2609 } 2610 2611 /// Does DstMask form a complementary pair with the mask provided by 2612 /// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking, 2613 /// this asks whether DstMask zeroes precisely those bits that will be set by 2614 /// the other half. 2615 static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted, 2616 unsigned NumberOfIgnoredHighBits, EVT VT) { 2617 assert((VT == MVT::i32 || VT == MVT::i64) && 2618 "i32 or i64 mask type expected!"); 2619 unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits; 2620 2621 APInt SignificantDstMask = APInt(BitWidth, DstMask); 2622 APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth); 2623 2624 return (SignificantDstMask & SignificantBitsToBeInserted) == 0 && 2625 (SignificantDstMask | SignificantBitsToBeInserted).isAllOnes(); 2626 } 2627 2628 // Look for bits that will be useful for later uses. 2629 // A bit is consider useless as soon as it is dropped and never used 2630 // before it as been dropped. 2631 // E.g., looking for useful bit of x 2632 // 1. y = x & 0x7 2633 // 2. z = y >> 2 2634 // After #1, x useful bits are 0x7, then the useful bits of x, live through 2635 // y. 2636 // After #2, the useful bits of x are 0x4. 2637 // However, if x is used on an unpredicatable instruction, then all its bits 2638 // are useful. 2639 // E.g. 2640 // 1. y = x & 0x7 2641 // 2. z = y >> 2 2642 // 3. str x, [@x] 2643 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0); 2644 2645 static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits, 2646 unsigned Depth) { 2647 uint64_t Imm = 2648 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue(); 2649 Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth()); 2650 UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm); 2651 getUsefulBits(Op, UsefulBits, Depth + 1); 2652 } 2653 2654 static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, 2655 uint64_t Imm, uint64_t MSB, 2656 unsigned Depth) { 2657 // inherit the bitwidth value 2658 APInt OpUsefulBits(UsefulBits); 2659 OpUsefulBits = 1; 2660 2661 if (MSB >= Imm) { 2662 OpUsefulBits <<= MSB - Imm + 1; 2663 --OpUsefulBits; 2664 // The interesting part will be in the lower part of the result 2665 getUsefulBits(Op, OpUsefulBits, Depth + 1); 2666 // The interesting part was starting at Imm in the argument 2667 OpUsefulBits <<= Imm; 2668 } else { 2669 OpUsefulBits <<= MSB + 1; 2670 --OpUsefulBits; 2671 // The interesting part will be shifted in the result 2672 OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm; 2673 getUsefulBits(Op, OpUsefulBits, Depth + 1); 2674 // The interesting part was at zero in the argument 2675 OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm); 2676 } 2677 2678 UsefulBits &= OpUsefulBits; 2679 } 2680 2681 static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits, 2682 unsigned Depth) { 2683 uint64_t Imm = 2684 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue(); 2685 uint64_t MSB = 2686 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue(); 2687 2688 getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth); 2689 } 2690 2691 static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, 2692 unsigned Depth) { 2693 uint64_t ShiftTypeAndValue = 2694 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue(); 2695 APInt Mask(UsefulBits); 2696 Mask.clearAllBits(); 2697 Mask.flipAllBits(); 2698 2699 if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) { 2700 // Shift Left 2701 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue); 2702 Mask <<= ShiftAmt; 2703 getUsefulBits(Op, Mask, Depth + 1); 2704 Mask.lshrInPlace(ShiftAmt); 2705 } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) { 2706 // Shift Right 2707 // We do not handle AArch64_AM::ASR, because the sign will change the 2708 // number of useful bits 2709 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue); 2710 Mask.lshrInPlace(ShiftAmt); 2711 getUsefulBits(Op, Mask, Depth + 1); 2712 Mask <<= ShiftAmt; 2713 } else 2714 return; 2715 2716 UsefulBits &= Mask; 2717 } 2718 2719 static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, 2720 unsigned Depth) { 2721 uint64_t Imm = 2722 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue(); 2723 uint64_t MSB = 2724 cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue(); 2725 2726 APInt OpUsefulBits(UsefulBits); 2727 OpUsefulBits = 1; 2728 2729 APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0); 2730 ResultUsefulBits.flipAllBits(); 2731 APInt Mask(UsefulBits.getBitWidth(), 0); 2732 2733 getUsefulBits(Op, ResultUsefulBits, Depth + 1); 2734 2735 if (MSB >= Imm) { 2736 // The instruction is a BFXIL. 2737 uint64_t Width = MSB - Imm + 1; 2738 uint64_t LSB = Imm; 2739 2740 OpUsefulBits <<= Width; 2741 --OpUsefulBits; 2742 2743 if (Op.getOperand(1) == Orig) { 2744 // Copy the low bits from the result to bits starting from LSB. 2745 Mask = ResultUsefulBits & OpUsefulBits; 2746 Mask <<= LSB; 2747 } 2748 2749 if (Op.getOperand(0) == Orig) 2750 // Bits starting from LSB in the input contribute to the result. 2751 Mask |= (ResultUsefulBits & ~OpUsefulBits); 2752 } else { 2753 // The instruction is a BFI. 2754 uint64_t Width = MSB + 1; 2755 uint64_t LSB = UsefulBits.getBitWidth() - Imm; 2756 2757 OpUsefulBits <<= Width; 2758 --OpUsefulBits; 2759 OpUsefulBits <<= LSB; 2760 2761 if (Op.getOperand(1) == Orig) { 2762 // Copy the bits from the result to the zero bits. 2763 Mask = ResultUsefulBits & OpUsefulBits; 2764 Mask.lshrInPlace(LSB); 2765 } 2766 2767 if (Op.getOperand(0) == Orig) 2768 Mask |= (ResultUsefulBits & ~OpUsefulBits); 2769 } 2770 2771 UsefulBits &= Mask; 2772 } 2773 2774 static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, 2775 SDValue Orig, unsigned Depth) { 2776 2777 // Users of this node should have already been instruction selected 2778 // FIXME: Can we turn that into an assert? 2779 if (!UserNode->isMachineOpcode()) 2780 return; 2781 2782 switch (UserNode->getMachineOpcode()) { 2783 default: 2784 return; 2785 case AArch64::ANDSWri: 2786 case AArch64::ANDSXri: 2787 case AArch64::ANDWri: 2788 case AArch64::ANDXri: 2789 // We increment Depth only when we call the getUsefulBits 2790 return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits, 2791 Depth); 2792 case AArch64::UBFMWri: 2793 case AArch64::UBFMXri: 2794 return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth); 2795 2796 case AArch64::ORRWrs: 2797 case AArch64::ORRXrs: 2798 if (UserNode->getOperand(0) != Orig && UserNode->getOperand(1) == Orig) 2799 getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits, 2800 Depth); 2801 return; 2802 case AArch64::BFMWri: 2803 case AArch64::BFMXri: 2804 return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth); 2805 2806 case AArch64::STRBBui: 2807 case AArch64::STURBBi: 2808 if (UserNode->getOperand(0) != Orig) 2809 return; 2810 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff); 2811 return; 2812 2813 case AArch64::STRHHui: 2814 case AArch64::STURHHi: 2815 if (UserNode->getOperand(0) != Orig) 2816 return; 2817 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff); 2818 return; 2819 } 2820 } 2821 2822 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) { 2823 if (Depth >= SelectionDAG::MaxRecursionDepth) 2824 return; 2825 // Initialize UsefulBits 2826 if (!Depth) { 2827 unsigned Bitwidth = Op.getScalarValueSizeInBits(); 2828 // At the beginning, assume every produced bits is useful 2829 UsefulBits = APInt(Bitwidth, 0); 2830 UsefulBits.flipAllBits(); 2831 } 2832 APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0); 2833 2834 for (SDNode *Node : Op.getNode()->uses()) { 2835 // A use cannot produce useful bits 2836 APInt UsefulBitsForUse = APInt(UsefulBits); 2837 getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth); 2838 UsersUsefulBits |= UsefulBitsForUse; 2839 } 2840 // UsefulBits contains the produced bits that are meaningful for the 2841 // current definition, thus a user cannot make a bit meaningful at 2842 // this point 2843 UsefulBits &= UsersUsefulBits; 2844 } 2845 2846 /// Create a machine node performing a notional SHL of Op by ShlAmount. If 2847 /// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is 2848 /// 0, return Op unchanged. 2849 static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) { 2850 if (ShlAmount == 0) 2851 return Op; 2852 2853 EVT VT = Op.getValueType(); 2854 SDLoc dl(Op); 2855 unsigned BitWidth = VT.getSizeInBits(); 2856 unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri; 2857 2858 SDNode *ShiftNode; 2859 if (ShlAmount > 0) { 2860 // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt 2861 ShiftNode = CurDAG->getMachineNode( 2862 UBFMOpc, dl, VT, Op, 2863 CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT), 2864 CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT)); 2865 } else { 2866 // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1 2867 assert(ShlAmount < 0 && "expected right shift"); 2868 int ShrAmount = -ShlAmount; 2869 ShiftNode = CurDAG->getMachineNode( 2870 UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT), 2871 CurDAG->getTargetConstant(BitWidth - 1, dl, VT)); 2872 } 2873 2874 return SDValue(ShiftNode, 0); 2875 } 2876 2877 // For bit-field-positioning pattern "(and (shl VAL, N), ShiftedMask)". 2878 static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op, 2879 bool BiggerPattern, 2880 const uint64_t NonZeroBits, 2881 SDValue &Src, int &DstLSB, 2882 int &Width); 2883 2884 // For bit-field-positioning pattern "shl VAL, N)". 2885 static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op, 2886 bool BiggerPattern, 2887 const uint64_t NonZeroBits, 2888 SDValue &Src, int &DstLSB, 2889 int &Width); 2890 2891 /// Does this tree qualify as an attempt to move a bitfield into position, 2892 /// essentially "(and (shl VAL, N), Mask)" or (shl VAL, N). 2893 static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, 2894 bool BiggerPattern, SDValue &Src, 2895 int &DstLSB, int &Width) { 2896 EVT VT = Op.getValueType(); 2897 unsigned BitWidth = VT.getSizeInBits(); 2898 (void)BitWidth; 2899 assert(BitWidth == 32 || BitWidth == 64); 2900 2901 KnownBits Known = CurDAG->computeKnownBits(Op); 2902 2903 // Non-zero in the sense that they're not provably zero, which is the key 2904 // point if we want to use this value 2905 const uint64_t NonZeroBits = (~Known.Zero).getZExtValue(); 2906 if (!isShiftedMask_64(NonZeroBits)) 2907 return false; 2908 2909 switch (Op.getOpcode()) { 2910 default: 2911 break; 2912 case ISD::AND: 2913 return isBitfieldPositioningOpFromAnd(CurDAG, Op, BiggerPattern, 2914 NonZeroBits, Src, DstLSB, Width); 2915 case ISD::SHL: 2916 return isBitfieldPositioningOpFromShl(CurDAG, Op, BiggerPattern, 2917 NonZeroBits, Src, DstLSB, Width); 2918 } 2919 2920 return false; 2921 } 2922 2923 static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op, 2924 bool BiggerPattern, 2925 const uint64_t NonZeroBits, 2926 SDValue &Src, int &DstLSB, 2927 int &Width) { 2928 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed"); 2929 2930 EVT VT = Op.getValueType(); 2931 assert((VT == MVT::i32 || VT == MVT::i64) && 2932 "Caller guarantees VT is one of i32 or i64"); 2933 (void)VT; 2934 2935 uint64_t AndImm; 2936 if (!isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm)) 2937 return false; 2938 2939 // If (~AndImm & NonZeroBits) is not zero at POS, we know that 2940 // 1) (AndImm & (1 << POS) == 0) 2941 // 2) the result of AND is not zero at POS bit (according to NonZeroBits) 2942 // 2943 // 1) and 2) don't agree so something must be wrong (e.g., in 2944 // 'SelectionDAG::computeKnownBits') 2945 assert((~AndImm & NonZeroBits) == 0 && 2946 "Something must be wrong (e.g., in SelectionDAG::computeKnownBits)"); 2947 2948 SDValue AndOp0 = Op.getOperand(0); 2949 2950 uint64_t ShlImm; 2951 SDValue ShlOp0; 2952 if (isOpcWithIntImmediate(AndOp0.getNode(), ISD::SHL, ShlImm)) { 2953 // For pattern "and(shl(val, N), shifted-mask)", 'ShlOp0' is set to 'val'. 2954 ShlOp0 = AndOp0.getOperand(0); 2955 } else if (VT == MVT::i64 && AndOp0.getOpcode() == ISD::ANY_EXTEND && 2956 isOpcWithIntImmediate(AndOp0.getOperand(0).getNode(), ISD::SHL, 2957 ShlImm)) { 2958 // For pattern "and(any_extend(shl(val, N)), shifted-mask)" 2959 2960 // ShlVal == shl(val, N), which is a left shift on a smaller type. 2961 SDValue ShlVal = AndOp0.getOperand(0); 2962 2963 // Since this is after type legalization and ShlVal is extended to MVT::i64, 2964 // expect VT to be MVT::i32. 2965 assert((ShlVal.getValueType() == MVT::i32) && "Expect VT to be MVT::i32."); 2966 2967 // Widens 'val' to MVT::i64 as the source of bit field positioning. 2968 ShlOp0 = Widen(CurDAG, ShlVal.getOperand(0)); 2969 } else 2970 return false; 2971 2972 // For !BiggerPattern, bail out if the AndOp0 has more than one use, since 2973 // then we'll end up generating AndOp0+UBFIZ instead of just keeping 2974 // AndOp0+AND. 2975 if (!BiggerPattern && !AndOp0.hasOneUse()) 2976 return false; 2977 2978 DstLSB = llvm::countr_zero(NonZeroBits); 2979 Width = llvm::countr_one(NonZeroBits >> DstLSB); 2980 2981 // Bail out on large Width. This happens when no proper combining / constant 2982 // folding was performed. 2983 if (Width >= (int)VT.getSizeInBits()) { 2984 // If VT is i64, Width > 64 is insensible since NonZeroBits is uint64_t, and 2985 // Width == 64 indicates a missed dag-combine from "(and val, AllOnes)" to 2986 // "val". 2987 // If VT is i32, what Width >= 32 means: 2988 // - For "(and (any_extend(shl val, N)), shifted-mask)", the`and` Op 2989 // demands at least 'Width' bits (after dag-combiner). This together with 2990 // `any_extend` Op (undefined higher bits) indicates missed combination 2991 // when lowering the 'and' IR instruction to an machine IR instruction. 2992 LLVM_DEBUG( 2993 dbgs() 2994 << "Found large Width in bit-field-positioning -- this indicates no " 2995 "proper combining / constant folding was performed\n"); 2996 return false; 2997 } 2998 2999 // BFI encompasses sufficiently many nodes that it's worth inserting an extra 3000 // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL 3001 // amount. BiggerPattern is true when this pattern is being matched for BFI, 3002 // BiggerPattern is false when this pattern is being matched for UBFIZ, in 3003 // which case it is not profitable to insert an extra shift. 3004 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern) 3005 return false; 3006 3007 Src = getLeftShift(CurDAG, ShlOp0, ShlImm - DstLSB); 3008 return true; 3009 } 3010 3011 // For node (shl (and val, mask), N)), returns true if the node is equivalent to 3012 // UBFIZ. 3013 static bool isSeveralBitsPositioningOpFromShl(const uint64_t ShlImm, SDValue Op, 3014 SDValue &Src, int &DstLSB, 3015 int &Width) { 3016 // Caller should have verified that N is a left shift with constant shift 3017 // amount; asserts that. 3018 assert(Op.getOpcode() == ISD::SHL && 3019 "Op.getNode() should be a SHL node to call this function"); 3020 assert(isIntImmediateEq(Op.getOperand(1), ShlImm) && 3021 "Op.getNode() should shift ShlImm to call this function"); 3022 3023 uint64_t AndImm = 0; 3024 SDValue Op0 = Op.getOperand(0); 3025 if (!isOpcWithIntImmediate(Op0.getNode(), ISD::AND, AndImm)) 3026 return false; 3027 3028 const uint64_t ShiftedAndImm = ((AndImm << ShlImm) >> ShlImm); 3029 if (isMask_64(ShiftedAndImm)) { 3030 // AndImm is a superset of (AllOnes >> ShlImm); in other words, AndImm 3031 // should end with Mask, and could be prefixed with random bits if those 3032 // bits are shifted out. 3033 // 3034 // For example, xyz11111 (with {x,y,z} being 0 or 1) is fine if ShlImm >= 3; 3035 // the AND result corresponding to those bits are shifted out, so it's fine 3036 // to not extract them. 3037 Width = llvm::countr_one(ShiftedAndImm); 3038 DstLSB = ShlImm; 3039 Src = Op0.getOperand(0); 3040 return true; 3041 } 3042 return false; 3043 } 3044 3045 static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op, 3046 bool BiggerPattern, 3047 const uint64_t NonZeroBits, 3048 SDValue &Src, int &DstLSB, 3049 int &Width) { 3050 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed"); 3051 3052 EVT VT = Op.getValueType(); 3053 assert((VT == MVT::i32 || VT == MVT::i64) && 3054 "Caller guarantees that type is i32 or i64"); 3055 (void)VT; 3056 3057 uint64_t ShlImm; 3058 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm)) 3059 return false; 3060 3061 if (!BiggerPattern && !Op.hasOneUse()) 3062 return false; 3063 3064 if (isSeveralBitsPositioningOpFromShl(ShlImm, Op, Src, DstLSB, Width)) 3065 return true; 3066 3067 DstLSB = llvm::countr_zero(NonZeroBits); 3068 Width = llvm::countr_one(NonZeroBits >> DstLSB); 3069 3070 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern) 3071 return false; 3072 3073 Src = getLeftShift(CurDAG, Op.getOperand(0), ShlImm - DstLSB); 3074 return true; 3075 } 3076 3077 static bool isShiftedMask(uint64_t Mask, EVT VT) { 3078 assert(VT == MVT::i32 || VT == MVT::i64); 3079 if (VT == MVT::i32) 3080 return isShiftedMask_32(Mask); 3081 return isShiftedMask_64(Mask); 3082 } 3083 3084 // Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being 3085 // inserted only sets known zero bits. 3086 static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG) { 3087 assert(N->getOpcode() == ISD::OR && "Expect a OR operation"); 3088 3089 EVT VT = N->getValueType(0); 3090 if (VT != MVT::i32 && VT != MVT::i64) 3091 return false; 3092 3093 unsigned BitWidth = VT.getSizeInBits(); 3094 3095 uint64_t OrImm; 3096 if (!isOpcWithIntImmediate(N, ISD::OR, OrImm)) 3097 return false; 3098 3099 // Skip this transformation if the ORR immediate can be encoded in the ORR. 3100 // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely 3101 // performance neutral. 3102 if (AArch64_AM::isLogicalImmediate(OrImm, BitWidth)) 3103 return false; 3104 3105 uint64_t MaskImm; 3106 SDValue And = N->getOperand(0); 3107 // Must be a single use AND with an immediate operand. 3108 if (!And.hasOneUse() || 3109 !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm)) 3110 return false; 3111 3112 // Compute the Known Zero for the AND as this allows us to catch more general 3113 // cases than just looking for AND with imm. 3114 KnownBits Known = CurDAG->computeKnownBits(And); 3115 3116 // Non-zero in the sense that they're not provably zero, which is the key 3117 // point if we want to use this value. 3118 uint64_t NotKnownZero = (~Known.Zero).getZExtValue(); 3119 3120 // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00). 3121 if (!isShiftedMask(Known.Zero.getZExtValue(), VT)) 3122 return false; 3123 3124 // The bits being inserted must only set those bits that are known to be zero. 3125 if ((OrImm & NotKnownZero) != 0) { 3126 // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't 3127 // currently handle this case. 3128 return false; 3129 } 3130 3131 // BFI/BFXIL dst, src, #lsb, #width. 3132 int LSB = llvm::countr_one(NotKnownZero); 3133 int Width = BitWidth - APInt(BitWidth, NotKnownZero).popcount(); 3134 3135 // BFI/BFXIL is an alias of BFM, so translate to BFM operands. 3136 unsigned ImmR = (BitWidth - LSB) % BitWidth; 3137 unsigned ImmS = Width - 1; 3138 3139 // If we're creating a BFI instruction avoid cases where we need more 3140 // instructions to materialize the BFI constant as compared to the original 3141 // ORR. A BFXIL will use the same constant as the original ORR, so the code 3142 // should be no worse in this case. 3143 bool IsBFI = LSB != 0; 3144 uint64_t BFIImm = OrImm >> LSB; 3145 if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) { 3146 // We have a BFI instruction and we know the constant can't be materialized 3147 // with a ORR-immediate with the zero register. 3148 unsigned OrChunks = 0, BFIChunks = 0; 3149 for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) { 3150 if (((OrImm >> Shift) & 0xFFFF) != 0) 3151 ++OrChunks; 3152 if (((BFIImm >> Shift) & 0xFFFF) != 0) 3153 ++BFIChunks; 3154 } 3155 if (BFIChunks > OrChunks) 3156 return false; 3157 } 3158 3159 // Materialize the constant to be inserted. 3160 SDLoc DL(N); 3161 unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm; 3162 SDNode *MOVI = CurDAG->getMachineNode( 3163 MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT)); 3164 3165 // Create the BFI/BFXIL instruction. 3166 SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0), 3167 CurDAG->getTargetConstant(ImmR, DL, VT), 3168 CurDAG->getTargetConstant(ImmS, DL, VT)}; 3169 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri; 3170 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 3171 return true; 3172 } 3173 3174 static bool isWorthFoldingIntoOrrWithShift(SDValue Dst, SelectionDAG *CurDAG, 3175 SDValue &ShiftedOperand, 3176 uint64_t &EncodedShiftImm) { 3177 // Avoid folding Dst into ORR-with-shift if Dst has other uses than ORR. 3178 if (!Dst.hasOneUse()) 3179 return false; 3180 3181 EVT VT = Dst.getValueType(); 3182 assert((VT == MVT::i32 || VT == MVT::i64) && 3183 "Caller should guarantee that VT is one of i32 or i64"); 3184 const unsigned SizeInBits = VT.getSizeInBits(); 3185 3186 SDLoc DL(Dst.getNode()); 3187 uint64_t AndImm, ShlImm; 3188 if (isOpcWithIntImmediate(Dst.getNode(), ISD::AND, AndImm) && 3189 isShiftedMask_64(AndImm)) { 3190 // Avoid transforming 'DstOp0' if it has other uses than the AND node. 3191 SDValue DstOp0 = Dst.getOperand(0); 3192 if (!DstOp0.hasOneUse()) 3193 return false; 3194 3195 // An example to illustrate the transformation 3196 // From: 3197 // lsr x8, x1, #1 3198 // and x8, x8, #0x3f80 3199 // bfxil x8, x1, #0, #7 3200 // To: 3201 // and x8, x23, #0x7f 3202 // ubfx x9, x23, #8, #7 3203 // orr x23, x8, x9, lsl #7 3204 // 3205 // The number of instructions remains the same, but ORR is faster than BFXIL 3206 // on many AArch64 processors (or as good as BFXIL if not faster). Besides, 3207 // the dependency chain is improved after the transformation. 3208 uint64_t SrlImm; 3209 if (isOpcWithIntImmediate(DstOp0.getNode(), ISD::SRL, SrlImm)) { 3210 uint64_t NumTrailingZeroInShiftedMask = llvm::countr_zero(AndImm); 3211 if ((SrlImm + NumTrailingZeroInShiftedMask) < SizeInBits) { 3212 unsigned MaskWidth = 3213 llvm::countr_one(AndImm >> NumTrailingZeroInShiftedMask); 3214 unsigned UBFMOpc = 3215 (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri; 3216 SDNode *UBFMNode = CurDAG->getMachineNode( 3217 UBFMOpc, DL, VT, DstOp0.getOperand(0), 3218 CurDAG->getTargetConstant(SrlImm + NumTrailingZeroInShiftedMask, DL, 3219 VT), 3220 CurDAG->getTargetConstant( 3221 SrlImm + NumTrailingZeroInShiftedMask + MaskWidth - 1, DL, VT)); 3222 ShiftedOperand = SDValue(UBFMNode, 0); 3223 EncodedShiftImm = AArch64_AM::getShifterImm( 3224 AArch64_AM::LSL, NumTrailingZeroInShiftedMask); 3225 return true; 3226 } 3227 } 3228 return false; 3229 } 3230 3231 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SHL, ShlImm)) { 3232 ShiftedOperand = Dst.getOperand(0); 3233 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm); 3234 return true; 3235 } 3236 3237 uint64_t SrlImm; 3238 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SRL, SrlImm)) { 3239 ShiftedOperand = Dst.getOperand(0); 3240 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSR, SrlImm); 3241 return true; 3242 } 3243 return false; 3244 } 3245 3246 // Given an 'ISD::OR' node that is going to be selected as BFM, analyze 3247 // the operands and select it to AArch64::ORR with shifted registers if 3248 // that's more efficient. Returns true iff selection to AArch64::ORR happens. 3249 static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1, 3250 SDValue Src, SDValue Dst, SelectionDAG *CurDAG, 3251 const bool BiggerPattern) { 3252 EVT VT = N->getValueType(0); 3253 assert(N->getOpcode() == ISD::OR && "Expect N to be an OR node"); 3254 assert(((N->getOperand(0) == OrOpd0 && N->getOperand(1) == OrOpd1) || 3255 (N->getOperand(1) == OrOpd0 && N->getOperand(0) == OrOpd1)) && 3256 "Expect OrOpd0 and OrOpd1 to be operands of ISD::OR"); 3257 assert((VT == MVT::i32 || VT == MVT::i64) && 3258 "Expect result type to be i32 or i64 since N is combinable to BFM"); 3259 SDLoc DL(N); 3260 3261 // Bail out if BFM simplifies away one node in BFM Dst. 3262 if (OrOpd1 != Dst) 3263 return false; 3264 3265 const unsigned OrrOpc = (VT == MVT::i32) ? AArch64::ORRWrs : AArch64::ORRXrs; 3266 // For "BFM Rd, Rn, #immr, #imms", it's known that BFM simplifies away fewer 3267 // nodes from Rn (or inserts additional shift node) if BiggerPattern is true. 3268 if (BiggerPattern) { 3269 uint64_t SrcAndImm; 3270 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::AND, SrcAndImm) && 3271 isMask_64(SrcAndImm) && OrOpd0.getOperand(0) == Src) { 3272 // OrOpd0 = AND Src, #Mask 3273 // So BFM simplifies away one AND node from Src and doesn't simplify away 3274 // nodes from Dst. If ORR with left-shifted operand also simplifies away 3275 // one node (from Rd), ORR is better since it has higher throughput and 3276 // smaller latency than BFM on many AArch64 processors (and for the rest 3277 // ORR is at least as good as BFM). 3278 SDValue ShiftedOperand; 3279 uint64_t EncodedShiftImm; 3280 if (isWorthFoldingIntoOrrWithShift(Dst, CurDAG, ShiftedOperand, 3281 EncodedShiftImm)) { 3282 SDValue Ops[] = {OrOpd0, ShiftedOperand, 3283 CurDAG->getTargetConstant(EncodedShiftImm, DL, VT)}; 3284 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops); 3285 return true; 3286 } 3287 } 3288 return false; 3289 } 3290 3291 assert((!BiggerPattern) && "BiggerPattern should be handled above"); 3292 3293 uint64_t ShlImm; 3294 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SHL, ShlImm)) { 3295 if (OrOpd0.getOperand(0) == Src && OrOpd0.hasOneUse()) { 3296 SDValue Ops[] = { 3297 Dst, Src, 3298 CurDAG->getTargetConstant( 3299 AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm), DL, VT)}; 3300 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops); 3301 return true; 3302 } 3303 3304 // Select the following pattern to left-shifted operand rather than BFI. 3305 // %val1 = op .. 3306 // %val2 = shl %val1, #imm 3307 // %res = or %val1, %val2 3308 // 3309 // If N is selected to be BFI, we know that 3310 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into 3311 // BFI) 2) OrOpd1 would be the destination operand (i.e., preserved) 3312 // 3313 // Instead of selecting N to BFI, fold OrOpd0 as a left shift directly. 3314 if (OrOpd0.getOperand(0) == OrOpd1) { 3315 SDValue Ops[] = { 3316 OrOpd1, OrOpd1, 3317 CurDAG->getTargetConstant( 3318 AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm), DL, VT)}; 3319 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops); 3320 return true; 3321 } 3322 } 3323 3324 uint64_t SrlImm; 3325 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SRL, SrlImm)) { 3326 // Select the following pattern to right-shifted operand rather than BFXIL. 3327 // %val1 = op .. 3328 // %val2 = lshr %val1, #imm 3329 // %res = or %val1, %val2 3330 // 3331 // If N is selected to be BFXIL, we know that 3332 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into 3333 // BFXIL) 2) OrOpd1 would be the destination operand (i.e., preserved) 3334 // 3335 // Instead of selecting N to BFXIL, fold OrOpd0 as a right shift directly. 3336 if (OrOpd0.getOperand(0) == OrOpd1) { 3337 SDValue Ops[] = { 3338 OrOpd1, OrOpd1, 3339 CurDAG->getTargetConstant( 3340 AArch64_AM::getShifterImm(AArch64_AM::LSR, SrlImm), DL, VT)}; 3341 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops); 3342 return true; 3343 } 3344 } 3345 3346 return false; 3347 } 3348 3349 static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits, 3350 SelectionDAG *CurDAG) { 3351 assert(N->getOpcode() == ISD::OR && "Expect a OR operation"); 3352 3353 EVT VT = N->getValueType(0); 3354 if (VT != MVT::i32 && VT != MVT::i64) 3355 return false; 3356 3357 unsigned BitWidth = VT.getSizeInBits(); 3358 3359 // Because of simplify-demanded-bits in DAGCombine, involved masks may not 3360 // have the expected shape. Try to undo that. 3361 3362 unsigned NumberOfIgnoredLowBits = UsefulBits.countr_zero(); 3363 unsigned NumberOfIgnoredHighBits = UsefulBits.countl_zero(); 3364 3365 // Given a OR operation, check if we have the following pattern 3366 // ubfm c, b, imm, imm2 (or something that does the same jobs, see 3367 // isBitfieldExtractOp) 3368 // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and 3369 // countTrailingZeros(mask2) == imm2 - imm + 1 3370 // f = d | c 3371 // if yes, replace the OR instruction with: 3372 // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2 3373 3374 // OR is commutative, check all combinations of operand order and values of 3375 // BiggerPattern, i.e. 3376 // Opd0, Opd1, BiggerPattern=false 3377 // Opd1, Opd0, BiggerPattern=false 3378 // Opd0, Opd1, BiggerPattern=true 3379 // Opd1, Opd0, BiggerPattern=true 3380 // Several of these combinations may match, so check with BiggerPattern=false 3381 // first since that will produce better results by matching more instructions 3382 // and/or inserting fewer extra instructions. 3383 for (int I = 0; I < 4; ++I) { 3384 3385 SDValue Dst, Src; 3386 unsigned ImmR, ImmS; 3387 bool BiggerPattern = I / 2; 3388 SDValue OrOpd0Val = N->getOperand(I % 2); 3389 SDNode *OrOpd0 = OrOpd0Val.getNode(); 3390 SDValue OrOpd1Val = N->getOperand((I + 1) % 2); 3391 SDNode *OrOpd1 = OrOpd1Val.getNode(); 3392 3393 unsigned BFXOpc; 3394 int DstLSB, Width; 3395 if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS, 3396 NumberOfIgnoredLowBits, BiggerPattern)) { 3397 // Check that the returned opcode is compatible with the pattern, 3398 // i.e., same type and zero extended (U and not S) 3399 if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) || 3400 (BFXOpc != AArch64::UBFMWri && VT == MVT::i32)) 3401 continue; 3402 3403 // Compute the width of the bitfield insertion 3404 DstLSB = 0; 3405 Width = ImmS - ImmR + 1; 3406 // FIXME: This constraint is to catch bitfield insertion we may 3407 // want to widen the pattern if we want to grab general bitfied 3408 // move case 3409 if (Width <= 0) 3410 continue; 3411 3412 // If the mask on the insertee is correct, we have a BFXIL operation. We 3413 // can share the ImmR and ImmS values from the already-computed UBFM. 3414 } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val, 3415 BiggerPattern, 3416 Src, DstLSB, Width)) { 3417 ImmR = (BitWidth - DstLSB) % BitWidth; 3418 ImmS = Width - 1; 3419 } else 3420 continue; 3421 3422 // Check the second part of the pattern 3423 EVT VT = OrOpd1Val.getValueType(); 3424 assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand"); 3425 3426 // Compute the Known Zero for the candidate of the first operand. 3427 // This allows to catch more general case than just looking for 3428 // AND with imm. Indeed, simplify-demanded-bits may have removed 3429 // the AND instruction because it proves it was useless. 3430 KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val); 3431 3432 // Check if there is enough room for the second operand to appear 3433 // in the first one 3434 APInt BitsToBeInserted = 3435 APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width); 3436 3437 if ((BitsToBeInserted & ~Known.Zero) != 0) 3438 continue; 3439 3440 // Set the first operand 3441 uint64_t Imm; 3442 if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) && 3443 isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT)) 3444 // In that case, we can eliminate the AND 3445 Dst = OrOpd1->getOperand(0); 3446 else 3447 // Maybe the AND has been removed by simplify-demanded-bits 3448 // or is useful because it discards more bits 3449 Dst = OrOpd1Val; 3450 3451 // Before selecting ISD::OR node to AArch64::BFM, see if an AArch64::ORR 3452 // with shifted operand is more efficient. 3453 if (tryOrrWithShift(N, OrOpd0Val, OrOpd1Val, Src, Dst, CurDAG, 3454 BiggerPattern)) 3455 return true; 3456 3457 // both parts match 3458 SDLoc DL(N); 3459 SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT), 3460 CurDAG->getTargetConstant(ImmS, DL, VT)}; 3461 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri; 3462 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 3463 return true; 3464 } 3465 3466 // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff 3467 // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted 3468 // mask (e.g., 0x000ffff0). 3469 uint64_t Mask0Imm, Mask1Imm; 3470 SDValue And0 = N->getOperand(0); 3471 SDValue And1 = N->getOperand(1); 3472 if (And0.hasOneUse() && And1.hasOneUse() && 3473 isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) && 3474 isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) && 3475 APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) && 3476 (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) { 3477 3478 // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm), 3479 // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the 3480 // bits to be inserted. 3481 if (isShiftedMask(Mask0Imm, VT)) { 3482 std::swap(And0, And1); 3483 std::swap(Mask0Imm, Mask1Imm); 3484 } 3485 3486 SDValue Src = And1->getOperand(0); 3487 SDValue Dst = And0->getOperand(0); 3488 unsigned LSB = llvm::countr_zero(Mask1Imm); 3489 int Width = BitWidth - APInt(BitWidth, Mask0Imm).popcount(); 3490 3491 // The BFXIL inserts the low-order bits from a source register, so right 3492 // shift the needed bits into place. 3493 SDLoc DL(N); 3494 unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri; 3495 uint64_t LsrImm = LSB; 3496 if (Src->hasOneUse() && 3497 isOpcWithIntImmediate(Src.getNode(), ISD::SRL, LsrImm) && 3498 (LsrImm + LSB) < BitWidth) { 3499 Src = Src->getOperand(0); 3500 LsrImm += LSB; 3501 } 3502 3503 SDNode *LSR = CurDAG->getMachineNode( 3504 ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LsrImm, DL, VT), 3505 CurDAG->getTargetConstant(BitWidth - 1, DL, VT)); 3506 3507 // BFXIL is an alias of BFM, so translate to BFM operands. 3508 unsigned ImmR = (BitWidth - LSB) % BitWidth; 3509 unsigned ImmS = Width - 1; 3510 3511 // Create the BFXIL instruction. 3512 SDValue Ops[] = {Dst, SDValue(LSR, 0), 3513 CurDAG->getTargetConstant(ImmR, DL, VT), 3514 CurDAG->getTargetConstant(ImmS, DL, VT)}; 3515 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri; 3516 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 3517 return true; 3518 } 3519 3520 return false; 3521 } 3522 3523 bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) { 3524 if (N->getOpcode() != ISD::OR) 3525 return false; 3526 3527 APInt NUsefulBits; 3528 getUsefulBits(SDValue(N, 0), NUsefulBits); 3529 3530 // If all bits are not useful, just return UNDEF. 3531 if (!NUsefulBits) { 3532 CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0)); 3533 return true; 3534 } 3535 3536 if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG)) 3537 return true; 3538 3539 return tryBitfieldInsertOpFromOrAndImm(N, CurDAG); 3540 } 3541 3542 /// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the 3543 /// equivalent of a left shift by a constant amount followed by an and masking 3544 /// out a contiguous set of bits. 3545 bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) { 3546 if (N->getOpcode() != ISD::AND) 3547 return false; 3548 3549 EVT VT = N->getValueType(0); 3550 if (VT != MVT::i32 && VT != MVT::i64) 3551 return false; 3552 3553 SDValue Op0; 3554 int DstLSB, Width; 3555 if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false, 3556 Op0, DstLSB, Width)) 3557 return false; 3558 3559 // ImmR is the rotate right amount. 3560 unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits(); 3561 // ImmS is the most significant bit of the source to be moved. 3562 unsigned ImmS = Width - 1; 3563 3564 SDLoc DL(N); 3565 SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT), 3566 CurDAG->getTargetConstant(ImmS, DL, VT)}; 3567 unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri; 3568 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 3569 return true; 3570 } 3571 3572 /// tryShiftAmountMod - Take advantage of built-in mod of shift amount in 3573 /// variable shift/rotate instructions. 3574 bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) { 3575 EVT VT = N->getValueType(0); 3576 3577 unsigned Opc; 3578 switch (N->getOpcode()) { 3579 case ISD::ROTR: 3580 Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr; 3581 break; 3582 case ISD::SHL: 3583 Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr; 3584 break; 3585 case ISD::SRL: 3586 Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr; 3587 break; 3588 case ISD::SRA: 3589 Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr; 3590 break; 3591 default: 3592 return false; 3593 } 3594 3595 uint64_t Size; 3596 uint64_t Bits; 3597 if (VT == MVT::i32) { 3598 Bits = 5; 3599 Size = 32; 3600 } else if (VT == MVT::i64) { 3601 Bits = 6; 3602 Size = 64; 3603 } else 3604 return false; 3605 3606 SDValue ShiftAmt = N->getOperand(1); 3607 SDLoc DL(N); 3608 SDValue NewShiftAmt; 3609 3610 // Skip over an extend of the shift amount. 3611 if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND || 3612 ShiftAmt->getOpcode() == ISD::ANY_EXTEND) 3613 ShiftAmt = ShiftAmt->getOperand(0); 3614 3615 if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) { 3616 SDValue Add0 = ShiftAmt->getOperand(0); 3617 SDValue Add1 = ShiftAmt->getOperand(1); 3618 uint64_t Add0Imm; 3619 uint64_t Add1Imm; 3620 if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0)) { 3621 // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X 3622 // to avoid the ADD/SUB. 3623 NewShiftAmt = Add0; 3624 } else if (ShiftAmt->getOpcode() == ISD::SUB && 3625 isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 && 3626 (Add0Imm % Size == 0)) { 3627 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X 3628 // to generate a NEG instead of a SUB from a constant. 3629 unsigned NegOpc; 3630 unsigned ZeroReg; 3631 EVT SubVT = ShiftAmt->getValueType(0); 3632 if (SubVT == MVT::i32) { 3633 NegOpc = AArch64::SUBWrr; 3634 ZeroReg = AArch64::WZR; 3635 } else { 3636 assert(SubVT == MVT::i64); 3637 NegOpc = AArch64::SUBXrr; 3638 ZeroReg = AArch64::XZR; 3639 } 3640 SDValue Zero = 3641 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT); 3642 MachineSDNode *Neg = 3643 CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1); 3644 NewShiftAmt = SDValue(Neg, 0); 3645 } else if (ShiftAmt->getOpcode() == ISD::SUB && 3646 isIntImmediate(Add0, Add0Imm) && (Add0Imm % Size == Size - 1)) { 3647 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X 3648 // to generate a NOT instead of a SUB from a constant. 3649 unsigned NotOpc; 3650 unsigned ZeroReg; 3651 EVT SubVT = ShiftAmt->getValueType(0); 3652 if (SubVT == MVT::i32) { 3653 NotOpc = AArch64::ORNWrr; 3654 ZeroReg = AArch64::WZR; 3655 } else { 3656 assert(SubVT == MVT::i64); 3657 NotOpc = AArch64::ORNXrr; 3658 ZeroReg = AArch64::XZR; 3659 } 3660 SDValue Zero = 3661 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT); 3662 MachineSDNode *Not = 3663 CurDAG->getMachineNode(NotOpc, DL, SubVT, Zero, Add1); 3664 NewShiftAmt = SDValue(Not, 0); 3665 } else 3666 return false; 3667 } else { 3668 // If the shift amount is masked with an AND, check that the mask covers the 3669 // bits that are implicitly ANDed off by the above opcodes and if so, skip 3670 // the AND. 3671 uint64_t MaskImm; 3672 if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm) && 3673 !isOpcWithIntImmediate(ShiftAmt.getNode(), AArch64ISD::ANDS, MaskImm)) 3674 return false; 3675 3676 if ((unsigned)llvm::countr_one(MaskImm) < Bits) 3677 return false; 3678 3679 NewShiftAmt = ShiftAmt->getOperand(0); 3680 } 3681 3682 // Narrow/widen the shift amount to match the size of the shift operation. 3683 if (VT == MVT::i32) 3684 NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt); 3685 else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) { 3686 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32); 3687 MachineSDNode *Ext = CurDAG->getMachineNode( 3688 AArch64::SUBREG_TO_REG, DL, VT, 3689 CurDAG->getTargetConstant(0, DL, MVT::i64), NewShiftAmt, SubReg); 3690 NewShiftAmt = SDValue(Ext, 0); 3691 } 3692 3693 SDValue Ops[] = {N->getOperand(0), NewShiftAmt}; 3694 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 3695 return true; 3696 } 3697 3698 static bool checkCVTFixedPointOperandWithFBits(SelectionDAG *CurDAG, SDValue N, 3699 SDValue &FixedPos, 3700 unsigned RegWidth, 3701 bool isReciprocal) { 3702 APFloat FVal(0.0); 3703 if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N)) 3704 FVal = CN->getValueAPF(); 3705 else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) { 3706 // Some otherwise illegal constants are allowed in this case. 3707 if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow || 3708 !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1))) 3709 return false; 3710 3711 ConstantPoolSDNode *CN = 3712 dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)); 3713 FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF(); 3714 } else 3715 return false; 3716 3717 // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits 3718 // is between 1 and 32 for a destination w-register, or 1 and 64 for an 3719 // x-register. 3720 // 3721 // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we 3722 // want THIS_NODE to be 2^fbits. This is much easier to deal with using 3723 // integers. 3724 bool IsExact; 3725 3726 if (isReciprocal) 3727 if (!FVal.getExactInverse(&FVal)) 3728 return false; 3729 3730 // fbits is between 1 and 64 in the worst-case, which means the fmul 3731 // could have 2^64 as an actual operand. Need 65 bits of precision. 3732 APSInt IntVal(65, true); 3733 FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact); 3734 3735 // N.b. isPowerOf2 also checks for > 0. 3736 if (!IsExact || !IntVal.isPowerOf2()) 3737 return false; 3738 unsigned FBits = IntVal.logBase2(); 3739 3740 // Checks above should have guaranteed that we haven't lost information in 3741 // finding FBits, but it must still be in range. 3742 if (FBits == 0 || FBits > RegWidth) return false; 3743 3744 FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32); 3745 return true; 3746 } 3747 3748 bool AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, 3749 unsigned RegWidth) { 3750 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth, 3751 false); 3752 } 3753 3754 bool AArch64DAGToDAGISel::SelectCVTFixedPosRecipOperand(SDValue N, 3755 SDValue &FixedPos, 3756 unsigned RegWidth) { 3757 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth, 3758 true); 3759 } 3760 3761 // Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields 3762 // of the string and obtains the integer values from them and combines these 3763 // into a single value to be used in the MRS/MSR instruction. 3764 static int getIntOperandFromRegisterString(StringRef RegString) { 3765 SmallVector<StringRef, 5> Fields; 3766 RegString.split(Fields, ':'); 3767 3768 if (Fields.size() == 1) 3769 return -1; 3770 3771 assert(Fields.size() == 5 3772 && "Invalid number of fields in read register string"); 3773 3774 SmallVector<int, 5> Ops; 3775 bool AllIntFields = true; 3776 3777 for (StringRef Field : Fields) { 3778 unsigned IntField; 3779 AllIntFields &= !Field.getAsInteger(10, IntField); 3780 Ops.push_back(IntField); 3781 } 3782 3783 assert(AllIntFields && 3784 "Unexpected non-integer value in special register string."); 3785 (void)AllIntFields; 3786 3787 // Need to combine the integer fields of the string into a single value 3788 // based on the bit encoding of MRS/MSR instruction. 3789 return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) | 3790 (Ops[3] << 3) | (Ops[4]); 3791 } 3792 3793 // Lower the read_register intrinsic to an MRS instruction node if the special 3794 // register string argument is either of the form detailed in the ALCE (the 3795 // form described in getIntOperandsFromRegsterString) or is a named register 3796 // known by the MRS SysReg mapper. 3797 bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) { 3798 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1)); 3799 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0)); 3800 SDLoc DL(N); 3801 3802 bool ReadIs128Bit = N->getOpcode() == AArch64ISD::MRRS; 3803 3804 unsigned Opcode64Bit = AArch64::MRS; 3805 int Imm = getIntOperandFromRegisterString(RegString->getString()); 3806 if (Imm == -1) { 3807 // No match, Use the sysreg mapper to map the remaining possible strings to 3808 // the value for the register to be used for the instruction operand. 3809 const auto *TheReg = 3810 AArch64SysReg::lookupSysRegByName(RegString->getString()); 3811 if (TheReg && TheReg->Readable && 3812 TheReg->haveFeatures(Subtarget->getFeatureBits())) 3813 Imm = TheReg->Encoding; 3814 else 3815 Imm = AArch64SysReg::parseGenericRegister(RegString->getString()); 3816 3817 if (Imm == -1) { 3818 // Still no match, see if this is "pc" or give up. 3819 if (!ReadIs128Bit && RegString->getString() == "pc") { 3820 Opcode64Bit = AArch64::ADR; 3821 Imm = 0; 3822 } else { 3823 return false; 3824 } 3825 } 3826 } 3827 3828 SDValue InChain = N->getOperand(0); 3829 SDValue SysRegImm = CurDAG->getTargetConstant(Imm, DL, MVT::i32); 3830 if (!ReadIs128Bit) { 3831 CurDAG->SelectNodeTo(N, Opcode64Bit, MVT::i64, MVT::Other /* Chain */, 3832 {SysRegImm, InChain}); 3833 } else { 3834 SDNode *MRRS = CurDAG->getMachineNode( 3835 AArch64::MRRS, DL, 3836 {MVT::Untyped /* XSeqPair */, MVT::Other /* Chain */}, 3837 {SysRegImm, InChain}); 3838 3839 // Sysregs are not endian. The even register always contains the low half 3840 // of the register. 3841 SDValue Lo = CurDAG->getTargetExtractSubreg(AArch64::sube64, DL, MVT::i64, 3842 SDValue(MRRS, 0)); 3843 SDValue Hi = CurDAG->getTargetExtractSubreg(AArch64::subo64, DL, MVT::i64, 3844 SDValue(MRRS, 0)); 3845 SDValue OutChain = SDValue(MRRS, 1); 3846 3847 ReplaceUses(SDValue(N, 0), Lo); 3848 ReplaceUses(SDValue(N, 1), Hi); 3849 ReplaceUses(SDValue(N, 2), OutChain); 3850 }; 3851 return true; 3852 } 3853 3854 // Lower the write_register intrinsic to an MSR instruction node if the special 3855 // register string argument is either of the form detailed in the ALCE (the 3856 // form described in getIntOperandsFromRegsterString) or is a named register 3857 // known by the MSR SysReg mapper. 3858 bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) { 3859 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1)); 3860 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0)); 3861 SDLoc DL(N); 3862 3863 bool WriteIs128Bit = N->getOpcode() == AArch64ISD::MSRR; 3864 3865 if (!WriteIs128Bit) { 3866 // Check if the register was one of those allowed as the pstatefield value 3867 // in the MSR (immediate) instruction. To accept the values allowed in the 3868 // pstatefield for the MSR (immediate) instruction, we also require that an 3869 // immediate value has been provided as an argument, we know that this is 3870 // the case as it has been ensured by semantic checking. 3871 auto trySelectPState = [&](auto PMapper, unsigned State) { 3872 if (PMapper) { 3873 assert(isa<ConstantSDNode>(N->getOperand(2)) && 3874 "Expected a constant integer expression."); 3875 unsigned Reg = PMapper->Encoding; 3876 uint64_t Immed = N->getConstantOperandVal(2); 3877 CurDAG->SelectNodeTo( 3878 N, State, MVT::Other, CurDAG->getTargetConstant(Reg, DL, MVT::i32), 3879 CurDAG->getTargetConstant(Immed, DL, MVT::i16), N->getOperand(0)); 3880 return true; 3881 } 3882 return false; 3883 }; 3884 3885 if (trySelectPState( 3886 AArch64PState::lookupPStateImm0_15ByName(RegString->getString()), 3887 AArch64::MSRpstateImm4)) 3888 return true; 3889 if (trySelectPState( 3890 AArch64PState::lookupPStateImm0_1ByName(RegString->getString()), 3891 AArch64::MSRpstateImm1)) 3892 return true; 3893 } 3894 3895 int Imm = getIntOperandFromRegisterString(RegString->getString()); 3896 if (Imm == -1) { 3897 // Use the sysreg mapper to attempt to map the remaining possible strings 3898 // to the value for the register to be used for the MSR (register) 3899 // instruction operand. 3900 auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString()); 3901 if (TheReg && TheReg->Writeable && 3902 TheReg->haveFeatures(Subtarget->getFeatureBits())) 3903 Imm = TheReg->Encoding; 3904 else 3905 Imm = AArch64SysReg::parseGenericRegister(RegString->getString()); 3906 3907 if (Imm == -1) 3908 return false; 3909 } 3910 3911 SDValue InChain = N->getOperand(0); 3912 if (!WriteIs128Bit) { 3913 CurDAG->SelectNodeTo(N, AArch64::MSR, MVT::Other, 3914 CurDAG->getTargetConstant(Imm, DL, MVT::i32), 3915 N->getOperand(2), InChain); 3916 } else { 3917 // No endian swap. The lower half always goes into the even subreg, and the 3918 // higher half always into the odd supreg. 3919 SDNode *Pair = CurDAG->getMachineNode( 3920 TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped /* XSeqPair */, 3921 {CurDAG->getTargetConstant(AArch64::XSeqPairsClassRegClass.getID(), DL, 3922 MVT::i32), 3923 N->getOperand(2), 3924 CurDAG->getTargetConstant(AArch64::sube64, DL, MVT::i32), 3925 N->getOperand(3), 3926 CurDAG->getTargetConstant(AArch64::subo64, DL, MVT::i32)}); 3927 3928 CurDAG->SelectNodeTo(N, AArch64::MSRR, MVT::Other, 3929 CurDAG->getTargetConstant(Imm, DL, MVT::i32), 3930 SDValue(Pair, 0), InChain); 3931 } 3932 3933 return true; 3934 } 3935 3936 /// We've got special pseudo-instructions for these 3937 bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) { 3938 unsigned Opcode; 3939 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT(); 3940 3941 // Leave IR for LSE if subtarget supports it. 3942 if (Subtarget->hasLSE()) return false; 3943 3944 if (MemTy == MVT::i8) 3945 Opcode = AArch64::CMP_SWAP_8; 3946 else if (MemTy == MVT::i16) 3947 Opcode = AArch64::CMP_SWAP_16; 3948 else if (MemTy == MVT::i32) 3949 Opcode = AArch64::CMP_SWAP_32; 3950 else if (MemTy == MVT::i64) 3951 Opcode = AArch64::CMP_SWAP_64; 3952 else 3953 llvm_unreachable("Unknown AtomicCmpSwap type"); 3954 3955 MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32; 3956 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3), 3957 N->getOperand(0)}; 3958 SDNode *CmpSwap = CurDAG->getMachineNode( 3959 Opcode, SDLoc(N), 3960 CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops); 3961 3962 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); 3963 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp}); 3964 3965 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0)); 3966 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2)); 3967 CurDAG->RemoveDeadNode(N); 3968 3969 return true; 3970 } 3971 3972 bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, 3973 SDValue &Shift) { 3974 if (!isa<ConstantSDNode>(N)) 3975 return false; 3976 3977 SDLoc DL(N); 3978 uint64_t Val = cast<ConstantSDNode>(N) 3979 ->getAPIntValue() 3980 .trunc(VT.getFixedSizeInBits()) 3981 .getZExtValue(); 3982 3983 switch (VT.SimpleTy) { 3984 case MVT::i8: 3985 // All immediates are supported. 3986 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32); 3987 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32); 3988 return true; 3989 case MVT::i16: 3990 case MVT::i32: 3991 case MVT::i64: 3992 // Support 8bit unsigned immediates. 3993 if (Val <= 255) { 3994 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32); 3995 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32); 3996 return true; 3997 } 3998 // Support 16bit unsigned immediates that are a multiple of 256. 3999 if (Val <= 65280 && Val % 256 == 0) { 4000 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32); 4001 Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32); 4002 return true; 4003 } 4004 break; 4005 default: 4006 break; 4007 } 4008 4009 return false; 4010 } 4011 4012 bool AArch64DAGToDAGISel::SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, 4013 SDValue &Shift) { 4014 if (!isa<ConstantSDNode>(N)) 4015 return false; 4016 4017 SDLoc DL(N); 4018 int64_t Val = cast<ConstantSDNode>(N) 4019 ->getAPIntValue() 4020 .trunc(VT.getFixedSizeInBits()) 4021 .getSExtValue(); 4022 4023 switch (VT.SimpleTy) { 4024 case MVT::i8: 4025 // All immediates are supported. 4026 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32); 4027 Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32); 4028 return true; 4029 case MVT::i16: 4030 case MVT::i32: 4031 case MVT::i64: 4032 // Support 8bit signed immediates. 4033 if (Val >= -128 && Val <= 127) { 4034 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32); 4035 Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32); 4036 return true; 4037 } 4038 // Support 16bit signed immediates that are a multiple of 256. 4039 if (Val >= -32768 && Val <= 32512 && Val % 256 == 0) { 4040 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32); 4041 Imm = CurDAG->getTargetConstant((Val >> 8) & 0xFF, DL, MVT::i32); 4042 return true; 4043 } 4044 break; 4045 default: 4046 break; 4047 } 4048 4049 return false; 4050 } 4051 4052 bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) { 4053 if (auto CNode = dyn_cast<ConstantSDNode>(N)) { 4054 int64_t ImmVal = CNode->getSExtValue(); 4055 SDLoc DL(N); 4056 if (ImmVal >= -128 && ImmVal < 128) { 4057 Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32); 4058 return true; 4059 } 4060 } 4061 return false; 4062 } 4063 4064 bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) { 4065 if (auto CNode = dyn_cast<ConstantSDNode>(N)) { 4066 uint64_t ImmVal = CNode->getZExtValue(); 4067 4068 switch (VT.SimpleTy) { 4069 case MVT::i8: 4070 ImmVal &= 0xFF; 4071 break; 4072 case MVT::i16: 4073 ImmVal &= 0xFFFF; 4074 break; 4075 case MVT::i32: 4076 ImmVal &= 0xFFFFFFFF; 4077 break; 4078 case MVT::i64: 4079 break; 4080 default: 4081 llvm_unreachable("Unexpected type"); 4082 } 4083 4084 if (ImmVal < 256) { 4085 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32); 4086 return true; 4087 } 4088 } 4089 return false; 4090 } 4091 4092 bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, 4093 bool Invert) { 4094 if (auto CNode = dyn_cast<ConstantSDNode>(N)) { 4095 uint64_t ImmVal = CNode->getZExtValue(); 4096 SDLoc DL(N); 4097 4098 if (Invert) 4099 ImmVal = ~ImmVal; 4100 4101 // Shift mask depending on type size. 4102 switch (VT.SimpleTy) { 4103 case MVT::i8: 4104 ImmVal &= 0xFF; 4105 ImmVal |= ImmVal << 8; 4106 ImmVal |= ImmVal << 16; 4107 ImmVal |= ImmVal << 32; 4108 break; 4109 case MVT::i16: 4110 ImmVal &= 0xFFFF; 4111 ImmVal |= ImmVal << 16; 4112 ImmVal |= ImmVal << 32; 4113 break; 4114 case MVT::i32: 4115 ImmVal &= 0xFFFFFFFF; 4116 ImmVal |= ImmVal << 32; 4117 break; 4118 case MVT::i64: 4119 break; 4120 default: 4121 llvm_unreachable("Unexpected type"); 4122 } 4123 4124 uint64_t encoding; 4125 if (AArch64_AM::processLogicalImmediate(ImmVal, 64, encoding)) { 4126 Imm = CurDAG->getTargetConstant(encoding, DL, MVT::i64); 4127 return true; 4128 } 4129 } 4130 return false; 4131 } 4132 4133 // SVE shift intrinsics allow shift amounts larger than the element's bitwidth. 4134 // Rather than attempt to normalise everything we can sometimes saturate the 4135 // shift amount during selection. This function also allows for consistent 4136 // isel patterns by ensuring the resulting "Imm" node is of the i32 type 4137 // required by the instructions. 4138 bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low, 4139 uint64_t High, bool AllowSaturation, 4140 SDValue &Imm) { 4141 if (auto *CN = dyn_cast<ConstantSDNode>(N)) { 4142 uint64_t ImmVal = CN->getZExtValue(); 4143 4144 // Reject shift amounts that are too small. 4145 if (ImmVal < Low) 4146 return false; 4147 4148 // Reject or saturate shift amounts that are too big. 4149 if (ImmVal > High) { 4150 if (!AllowSaturation) 4151 return false; 4152 ImmVal = High; 4153 } 4154 4155 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32); 4156 return true; 4157 } 4158 4159 return false; 4160 } 4161 4162 bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) { 4163 // tagp(FrameIndex, IRGstack, tag_offset): 4164 // since the offset between FrameIndex and IRGstack is a compile-time 4165 // constant, this can be lowered to a single ADDG instruction. 4166 if (!(isa<FrameIndexSDNode>(N->getOperand(1)))) { 4167 return false; 4168 } 4169 4170 SDValue IRG_SP = N->getOperand(2); 4171 if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN || 4172 IRG_SP->getConstantOperandVal(1) != Intrinsic::aarch64_irg_sp) { 4173 return false; 4174 } 4175 4176 const TargetLowering *TLI = getTargetLowering(); 4177 SDLoc DL(N); 4178 int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex(); 4179 SDValue FiOp = CurDAG->getTargetFrameIndex( 4180 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 4181 int TagOffset = N->getConstantOperandVal(3); 4182 4183 SDNode *Out = CurDAG->getMachineNode( 4184 AArch64::TAGPstack, DL, MVT::i64, 4185 {FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2), 4186 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)}); 4187 ReplaceNode(N, Out); 4188 return true; 4189 } 4190 4191 void AArch64DAGToDAGISel::SelectTagP(SDNode *N) { 4192 assert(isa<ConstantSDNode>(N->getOperand(3)) && 4193 "llvm.aarch64.tagp third argument must be an immediate"); 4194 if (trySelectStackSlotTagP(N)) 4195 return; 4196 // FIXME: above applies in any case when offset between Op1 and Op2 is a 4197 // compile-time constant, not just for stack allocations. 4198 4199 // General case for unrelated pointers in Op1 and Op2. 4200 SDLoc DL(N); 4201 int TagOffset = N->getConstantOperandVal(3); 4202 SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64, 4203 {N->getOperand(1), N->getOperand(2)}); 4204 SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64, 4205 {SDValue(N1, 0), N->getOperand(2)}); 4206 SDNode *N3 = CurDAG->getMachineNode( 4207 AArch64::ADDG, DL, MVT::i64, 4208 {SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64), 4209 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)}); 4210 ReplaceNode(N, N3); 4211 } 4212 4213 bool AArch64DAGToDAGISel::trySelectCastFixedLengthToScalableVector(SDNode *N) { 4214 assert(N->getOpcode() == ISD::INSERT_SUBVECTOR && "Invalid Node!"); 4215 4216 // Bail when not a "cast" like insert_subvector. 4217 if (N->getConstantOperandVal(2) != 0) 4218 return false; 4219 if (!N->getOperand(0).isUndef()) 4220 return false; 4221 4222 // Bail when normal isel should do the job. 4223 EVT VT = N->getValueType(0); 4224 EVT InVT = N->getOperand(1).getValueType(); 4225 if (VT.isFixedLengthVector() || InVT.isScalableVector()) 4226 return false; 4227 if (InVT.getSizeInBits() <= 128) 4228 return false; 4229 4230 // NOTE: We can only get here when doing fixed length SVE code generation. 4231 // We do manual selection because the types involved are not linked to real 4232 // registers (despite being legal) and must be coerced into SVE registers. 4233 4234 assert(VT.getSizeInBits().getKnownMinValue() == AArch64::SVEBitsPerBlock && 4235 "Expected to insert into a packed scalable vector!"); 4236 4237 SDLoc DL(N); 4238 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64); 4239 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, 4240 N->getOperand(1), RC)); 4241 return true; 4242 } 4243 4244 bool AArch64DAGToDAGISel::trySelectCastScalableToFixedLengthVector(SDNode *N) { 4245 assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR && "Invalid Node!"); 4246 4247 // Bail when not a "cast" like extract_subvector. 4248 if (N->getConstantOperandVal(1) != 0) 4249 return false; 4250 4251 // Bail when normal isel can do the job. 4252 EVT VT = N->getValueType(0); 4253 EVT InVT = N->getOperand(0).getValueType(); 4254 if (VT.isScalableVector() || InVT.isFixedLengthVector()) 4255 return false; 4256 if (VT.getSizeInBits() <= 128) 4257 return false; 4258 4259 // NOTE: We can only get here when doing fixed length SVE code generation. 4260 // We do manual selection because the types involved are not linked to real 4261 // registers (despite being legal) and must be coerced into SVE registers. 4262 4263 assert(InVT.getSizeInBits().getKnownMinValue() == AArch64::SVEBitsPerBlock && 4264 "Expected to extract from a packed scalable vector!"); 4265 4266 SDLoc DL(N); 4267 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64); 4268 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, 4269 N->getOperand(0), RC)); 4270 return true; 4271 } 4272 4273 bool AArch64DAGToDAGISel::trySelectXAR(SDNode *N) { 4274 assert(N->getOpcode() == ISD::OR && "Expected OR instruction"); 4275 4276 SDValue N0 = N->getOperand(0); 4277 SDValue N1 = N->getOperand(1); 4278 EVT VT = N->getValueType(0); 4279 4280 // Essentially: rotr (xor(x, y), imm) -> xar (x, y, imm) 4281 // Rotate by a constant is a funnel shift in IR which is exanded to 4282 // an OR with shifted operands. 4283 // We do the following transform: 4284 // OR N0, N1 -> xar (x, y, imm) 4285 // Where: 4286 // N1 = SRL_PRED true, V, splat(imm) --> rotr amount 4287 // N0 = SHL_PRED true, V, splat(bits-imm) 4288 // V = (xor x, y) 4289 if (VT.isScalableVector() && Subtarget->hasSVE2orSME()) { 4290 if (N0.getOpcode() != AArch64ISD::SHL_PRED || 4291 N1.getOpcode() != AArch64ISD::SRL_PRED) 4292 std::swap(N0, N1); 4293 if (N0.getOpcode() != AArch64ISD::SHL_PRED || 4294 N1.getOpcode() != AArch64ISD::SRL_PRED) 4295 return false; 4296 4297 auto *TLI = static_cast<const AArch64TargetLowering *>(getTargetLowering()); 4298 if (!TLI->isAllActivePredicate(*CurDAG, N0.getOperand(0)) || 4299 !TLI->isAllActivePredicate(*CurDAG, N1.getOperand(0))) 4300 return false; 4301 4302 SDValue XOR = N0.getOperand(1); 4303 if (XOR.getOpcode() != ISD::XOR || XOR != N1.getOperand(1)) 4304 return false; 4305 4306 APInt ShlAmt, ShrAmt; 4307 if (!ISD::isConstantSplatVector(N0.getOperand(2).getNode(), ShlAmt) || 4308 !ISD::isConstantSplatVector(N1.getOperand(2).getNode(), ShrAmt)) 4309 return false; 4310 4311 if (ShlAmt + ShrAmt != VT.getScalarSizeInBits()) 4312 return false; 4313 4314 SDLoc DL(N); 4315 SDValue Imm = 4316 CurDAG->getTargetConstant(ShrAmt.getZExtValue(), DL, MVT::i32); 4317 4318 SDValue Ops[] = {XOR.getOperand(0), XOR.getOperand(1), Imm}; 4319 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::Int>( 4320 VT, {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S, 4321 AArch64::XAR_ZZZI_D})) { 4322 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 4323 return true; 4324 } 4325 return false; 4326 } 4327 4328 if (!Subtarget->hasSHA3()) 4329 return false; 4330 4331 if (N0->getOpcode() != AArch64ISD::VSHL || 4332 N1->getOpcode() != AArch64ISD::VLSHR) 4333 return false; 4334 4335 if (N0->getOperand(0) != N1->getOperand(0) || 4336 N1->getOperand(0)->getOpcode() != ISD::XOR) 4337 return false; 4338 4339 SDValue XOR = N0.getOperand(0); 4340 SDValue R1 = XOR.getOperand(0); 4341 SDValue R2 = XOR.getOperand(1); 4342 4343 unsigned HsAmt = N0.getConstantOperandVal(1); 4344 unsigned ShAmt = N1.getConstantOperandVal(1); 4345 4346 SDLoc DL = SDLoc(N0.getOperand(1)); 4347 SDValue Imm = CurDAG->getTargetConstant( 4348 ShAmt, DL, N0.getOperand(1).getValueType(), false); 4349 4350 if (ShAmt + HsAmt != 64) 4351 return false; 4352 4353 SDValue Ops[] = {R1, R2, Imm}; 4354 CurDAG->SelectNodeTo(N, AArch64::XAR, N0.getValueType(), Ops); 4355 4356 return true; 4357 } 4358 4359 void AArch64DAGToDAGISel::Select(SDNode *Node) { 4360 // If we have a custom node, we already have selected! 4361 if (Node->isMachineOpcode()) { 4362 LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n"); 4363 Node->setNodeId(-1); 4364 return; 4365 } 4366 4367 // Few custom selection stuff. 4368 EVT VT = Node->getValueType(0); 4369 4370 switch (Node->getOpcode()) { 4371 default: 4372 break; 4373 4374 case ISD::ATOMIC_CMP_SWAP: 4375 if (SelectCMP_SWAP(Node)) 4376 return; 4377 break; 4378 4379 case ISD::READ_REGISTER: 4380 case AArch64ISD::MRRS: 4381 if (tryReadRegister(Node)) 4382 return; 4383 break; 4384 4385 case ISD::WRITE_REGISTER: 4386 case AArch64ISD::MSRR: 4387 if (tryWriteRegister(Node)) 4388 return; 4389 break; 4390 4391 case ISD::LOAD: { 4392 // Try to select as an indexed load. Fall through to normal processing 4393 // if we can't. 4394 if (tryIndexedLoad(Node)) 4395 return; 4396 break; 4397 } 4398 4399 case ISD::SRL: 4400 case ISD::AND: 4401 case ISD::SRA: 4402 case ISD::SIGN_EXTEND_INREG: 4403 if (tryBitfieldExtractOp(Node)) 4404 return; 4405 if (tryBitfieldInsertInZeroOp(Node)) 4406 return; 4407 [[fallthrough]]; 4408 case ISD::ROTR: 4409 case ISD::SHL: 4410 if (tryShiftAmountMod(Node)) 4411 return; 4412 break; 4413 4414 case ISD::SIGN_EXTEND: 4415 if (tryBitfieldExtractOpFromSExt(Node)) 4416 return; 4417 break; 4418 4419 case ISD::OR: 4420 if (tryBitfieldInsertOp(Node)) 4421 return; 4422 if (trySelectXAR(Node)) 4423 return; 4424 break; 4425 4426 case ISD::EXTRACT_SUBVECTOR: { 4427 if (trySelectCastScalableToFixedLengthVector(Node)) 4428 return; 4429 break; 4430 } 4431 4432 case ISD::INSERT_SUBVECTOR: { 4433 if (trySelectCastFixedLengthToScalableVector(Node)) 4434 return; 4435 break; 4436 } 4437 4438 case ISD::Constant: { 4439 // Materialize zero constants as copies from WZR/XZR. This allows 4440 // the coalescer to propagate these into other instructions. 4441 ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node); 4442 if (ConstNode->isZero()) { 4443 if (VT == MVT::i32) { 4444 SDValue New = CurDAG->getCopyFromReg( 4445 CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32); 4446 ReplaceNode(Node, New.getNode()); 4447 return; 4448 } else if (VT == MVT::i64) { 4449 SDValue New = CurDAG->getCopyFromReg( 4450 CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64); 4451 ReplaceNode(Node, New.getNode()); 4452 return; 4453 } 4454 } 4455 break; 4456 } 4457 4458 case ISD::FrameIndex: { 4459 // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm. 4460 int FI = cast<FrameIndexSDNode>(Node)->getIndex(); 4461 unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0); 4462 const TargetLowering *TLI = getTargetLowering(); 4463 SDValue TFI = CurDAG->getTargetFrameIndex( 4464 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 4465 SDLoc DL(Node); 4466 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32), 4467 CurDAG->getTargetConstant(Shifter, DL, MVT::i32) }; 4468 CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops); 4469 return; 4470 } 4471 case ISD::INTRINSIC_W_CHAIN: { 4472 unsigned IntNo = Node->getConstantOperandVal(1); 4473 switch (IntNo) { 4474 default: 4475 break; 4476 case Intrinsic::aarch64_ldaxp: 4477 case Intrinsic::aarch64_ldxp: { 4478 unsigned Op = 4479 IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX; 4480 SDValue MemAddr = Node->getOperand(2); 4481 SDLoc DL(Node); 4482 SDValue Chain = Node->getOperand(0); 4483 4484 SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64, 4485 MVT::Other, MemAddr, Chain); 4486 4487 // Transfer memoperands. 4488 MachineMemOperand *MemOp = 4489 cast<MemIntrinsicSDNode>(Node)->getMemOperand(); 4490 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp}); 4491 ReplaceNode(Node, Ld); 4492 return; 4493 } 4494 case Intrinsic::aarch64_stlxp: 4495 case Intrinsic::aarch64_stxp: { 4496 unsigned Op = 4497 IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX; 4498 SDLoc DL(Node); 4499 SDValue Chain = Node->getOperand(0); 4500 SDValue ValLo = Node->getOperand(2); 4501 SDValue ValHi = Node->getOperand(3); 4502 SDValue MemAddr = Node->getOperand(4); 4503 4504 // Place arguments in the right order. 4505 SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain}; 4506 4507 SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops); 4508 // Transfer memoperands. 4509 MachineMemOperand *MemOp = 4510 cast<MemIntrinsicSDNode>(Node)->getMemOperand(); 4511 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 4512 4513 ReplaceNode(Node, St); 4514 return; 4515 } 4516 case Intrinsic::aarch64_neon_ld1x2: 4517 if (VT == MVT::v8i8) { 4518 SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0); 4519 return; 4520 } else if (VT == MVT::v16i8) { 4521 SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0); 4522 return; 4523 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4524 SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0); 4525 return; 4526 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4527 SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0); 4528 return; 4529 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4530 SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0); 4531 return; 4532 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4533 SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0); 4534 return; 4535 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4536 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0); 4537 return; 4538 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4539 SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0); 4540 return; 4541 } 4542 break; 4543 case Intrinsic::aarch64_neon_ld1x3: 4544 if (VT == MVT::v8i8) { 4545 SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0); 4546 return; 4547 } else if (VT == MVT::v16i8) { 4548 SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0); 4549 return; 4550 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4551 SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0); 4552 return; 4553 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4554 SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0); 4555 return; 4556 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4557 SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0); 4558 return; 4559 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4560 SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0); 4561 return; 4562 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4563 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0); 4564 return; 4565 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4566 SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0); 4567 return; 4568 } 4569 break; 4570 case Intrinsic::aarch64_neon_ld1x4: 4571 if (VT == MVT::v8i8) { 4572 SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0); 4573 return; 4574 } else if (VT == MVT::v16i8) { 4575 SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0); 4576 return; 4577 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4578 SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0); 4579 return; 4580 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4581 SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0); 4582 return; 4583 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4584 SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0); 4585 return; 4586 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4587 SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0); 4588 return; 4589 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4590 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0); 4591 return; 4592 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4593 SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0); 4594 return; 4595 } 4596 break; 4597 case Intrinsic::aarch64_neon_ld2: 4598 if (VT == MVT::v8i8) { 4599 SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0); 4600 return; 4601 } else if (VT == MVT::v16i8) { 4602 SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0); 4603 return; 4604 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4605 SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0); 4606 return; 4607 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4608 SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0); 4609 return; 4610 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4611 SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0); 4612 return; 4613 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4614 SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0); 4615 return; 4616 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4617 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0); 4618 return; 4619 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4620 SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0); 4621 return; 4622 } 4623 break; 4624 case Intrinsic::aarch64_neon_ld3: 4625 if (VT == MVT::v8i8) { 4626 SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0); 4627 return; 4628 } else if (VT == MVT::v16i8) { 4629 SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0); 4630 return; 4631 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4632 SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0); 4633 return; 4634 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4635 SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0); 4636 return; 4637 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4638 SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0); 4639 return; 4640 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4641 SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0); 4642 return; 4643 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4644 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0); 4645 return; 4646 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4647 SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0); 4648 return; 4649 } 4650 break; 4651 case Intrinsic::aarch64_neon_ld4: 4652 if (VT == MVT::v8i8) { 4653 SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0); 4654 return; 4655 } else if (VT == MVT::v16i8) { 4656 SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0); 4657 return; 4658 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4659 SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0); 4660 return; 4661 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4662 SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0); 4663 return; 4664 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4665 SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0); 4666 return; 4667 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4668 SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0); 4669 return; 4670 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4671 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0); 4672 return; 4673 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4674 SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0); 4675 return; 4676 } 4677 break; 4678 case Intrinsic::aarch64_neon_ld2r: 4679 if (VT == MVT::v8i8) { 4680 SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0); 4681 return; 4682 } else if (VT == MVT::v16i8) { 4683 SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0); 4684 return; 4685 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4686 SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0); 4687 return; 4688 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4689 SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0); 4690 return; 4691 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4692 SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0); 4693 return; 4694 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4695 SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0); 4696 return; 4697 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4698 SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0); 4699 return; 4700 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4701 SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0); 4702 return; 4703 } 4704 break; 4705 case Intrinsic::aarch64_neon_ld3r: 4706 if (VT == MVT::v8i8) { 4707 SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0); 4708 return; 4709 } else if (VT == MVT::v16i8) { 4710 SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0); 4711 return; 4712 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4713 SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0); 4714 return; 4715 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4716 SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0); 4717 return; 4718 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4719 SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0); 4720 return; 4721 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4722 SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0); 4723 return; 4724 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4725 SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0); 4726 return; 4727 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4728 SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0); 4729 return; 4730 } 4731 break; 4732 case Intrinsic::aarch64_neon_ld4r: 4733 if (VT == MVT::v8i8) { 4734 SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0); 4735 return; 4736 } else if (VT == MVT::v16i8) { 4737 SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0); 4738 return; 4739 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4740 SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0); 4741 return; 4742 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4743 SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0); 4744 return; 4745 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4746 SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0); 4747 return; 4748 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4749 SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0); 4750 return; 4751 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4752 SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0); 4753 return; 4754 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4755 SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0); 4756 return; 4757 } 4758 break; 4759 case Intrinsic::aarch64_neon_ld2lane: 4760 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4761 SelectLoadLane(Node, 2, AArch64::LD2i8); 4762 return; 4763 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4764 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4765 SelectLoadLane(Node, 2, AArch64::LD2i16); 4766 return; 4767 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4768 VT == MVT::v2f32) { 4769 SelectLoadLane(Node, 2, AArch64::LD2i32); 4770 return; 4771 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4772 VT == MVT::v1f64) { 4773 SelectLoadLane(Node, 2, AArch64::LD2i64); 4774 return; 4775 } 4776 break; 4777 case Intrinsic::aarch64_neon_ld3lane: 4778 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4779 SelectLoadLane(Node, 3, AArch64::LD3i8); 4780 return; 4781 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4782 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4783 SelectLoadLane(Node, 3, AArch64::LD3i16); 4784 return; 4785 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4786 VT == MVT::v2f32) { 4787 SelectLoadLane(Node, 3, AArch64::LD3i32); 4788 return; 4789 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4790 VT == MVT::v1f64) { 4791 SelectLoadLane(Node, 3, AArch64::LD3i64); 4792 return; 4793 } 4794 break; 4795 case Intrinsic::aarch64_neon_ld4lane: 4796 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4797 SelectLoadLane(Node, 4, AArch64::LD4i8); 4798 return; 4799 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4800 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4801 SelectLoadLane(Node, 4, AArch64::LD4i16); 4802 return; 4803 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4804 VT == MVT::v2f32) { 4805 SelectLoadLane(Node, 4, AArch64::LD4i32); 4806 return; 4807 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4808 VT == MVT::v1f64) { 4809 SelectLoadLane(Node, 4, AArch64::LD4i64); 4810 return; 4811 } 4812 break; 4813 case Intrinsic::aarch64_ld64b: 4814 SelectLoad(Node, 8, AArch64::LD64B, AArch64::x8sub_0); 4815 return; 4816 case Intrinsic::aarch64_sve_ld2q_sret: { 4817 SelectPredicatedLoad(Node, 2, 4, AArch64::LD2Q_IMM, AArch64::LD2Q, true); 4818 return; 4819 } 4820 case Intrinsic::aarch64_sve_ld3q_sret: { 4821 SelectPredicatedLoad(Node, 3, 4, AArch64::LD3Q_IMM, AArch64::LD3Q, true); 4822 return; 4823 } 4824 case Intrinsic::aarch64_sve_ld4q_sret: { 4825 SelectPredicatedLoad(Node, 4, 4, AArch64::LD4Q_IMM, AArch64::LD4Q, true); 4826 return; 4827 } 4828 case Intrinsic::aarch64_sve_ld2_sret: { 4829 if (VT == MVT::nxv16i8) { 4830 SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B, 4831 true); 4832 return; 4833 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 4834 VT == MVT::nxv8bf16) { 4835 SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H, 4836 true); 4837 return; 4838 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 4839 SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W, 4840 true); 4841 return; 4842 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 4843 SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D, 4844 true); 4845 return; 4846 } 4847 break; 4848 } 4849 case Intrinsic::aarch64_sve_ld1_pn_x2: { 4850 if (VT == MVT::nxv16i8) { 4851 if (Subtarget->hasSME2()) 4852 SelectContiguousMultiVectorLoad( 4853 Node, 2, 0, AArch64::LD1B_2Z_IMM_PSEUDO, AArch64::LD1B_2Z_PSEUDO); 4854 else if (Subtarget->hasSVE2p1()) 4855 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LD1B_2Z_IMM, 4856 AArch64::LD1B_2Z); 4857 else 4858 break; 4859 return; 4860 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 4861 VT == MVT::nxv8bf16) { 4862 if (Subtarget->hasSME2()) 4863 SelectContiguousMultiVectorLoad( 4864 Node, 2, 1, AArch64::LD1H_2Z_IMM_PSEUDO, AArch64::LD1H_2Z_PSEUDO); 4865 else if (Subtarget->hasSVE2p1()) 4866 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LD1H_2Z_IMM, 4867 AArch64::LD1H_2Z); 4868 else 4869 break; 4870 return; 4871 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 4872 if (Subtarget->hasSME2()) 4873 SelectContiguousMultiVectorLoad( 4874 Node, 2, 2, AArch64::LD1W_2Z_IMM_PSEUDO, AArch64::LD1W_2Z_PSEUDO); 4875 else if (Subtarget->hasSVE2p1()) 4876 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LD1W_2Z_IMM, 4877 AArch64::LD1W_2Z); 4878 else 4879 break; 4880 return; 4881 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 4882 if (Subtarget->hasSME2()) 4883 SelectContiguousMultiVectorLoad( 4884 Node, 2, 3, AArch64::LD1D_2Z_IMM_PSEUDO, AArch64::LD1D_2Z_PSEUDO); 4885 else if (Subtarget->hasSVE2p1()) 4886 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LD1D_2Z_IMM, 4887 AArch64::LD1D_2Z); 4888 else 4889 break; 4890 return; 4891 } 4892 break; 4893 } 4894 case Intrinsic::aarch64_sve_ld1_pn_x4: { 4895 if (VT == MVT::nxv16i8) { 4896 if (Subtarget->hasSME2()) 4897 SelectContiguousMultiVectorLoad( 4898 Node, 4, 0, AArch64::LD1B_4Z_IMM_PSEUDO, AArch64::LD1B_4Z_PSEUDO); 4899 else if (Subtarget->hasSVE2p1()) 4900 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LD1B_4Z_IMM, 4901 AArch64::LD1B_4Z); 4902 else 4903 break; 4904 return; 4905 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 4906 VT == MVT::nxv8bf16) { 4907 if (Subtarget->hasSME2()) 4908 SelectContiguousMultiVectorLoad( 4909 Node, 4, 1, AArch64::LD1H_4Z_IMM_PSEUDO, AArch64::LD1H_4Z_PSEUDO); 4910 else if (Subtarget->hasSVE2p1()) 4911 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LD1H_4Z_IMM, 4912 AArch64::LD1H_4Z); 4913 else 4914 break; 4915 return; 4916 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 4917 if (Subtarget->hasSME2()) 4918 SelectContiguousMultiVectorLoad( 4919 Node, 4, 2, AArch64::LD1W_4Z_IMM_PSEUDO, AArch64::LD1W_4Z_PSEUDO); 4920 else if (Subtarget->hasSVE2p1()) 4921 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LD1W_4Z_IMM, 4922 AArch64::LD1W_4Z); 4923 else 4924 break; 4925 return; 4926 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 4927 if (Subtarget->hasSME2()) 4928 SelectContiguousMultiVectorLoad( 4929 Node, 4, 3, AArch64::LD1D_4Z_IMM_PSEUDO, AArch64::LD1D_4Z_PSEUDO); 4930 else if (Subtarget->hasSVE2p1()) 4931 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LD1D_4Z_IMM, 4932 AArch64::LD1D_4Z); 4933 else 4934 break; 4935 return; 4936 } 4937 break; 4938 } 4939 case Intrinsic::aarch64_sve_ldnt1_pn_x2: { 4940 if (VT == MVT::nxv16i8) { 4941 if (Subtarget->hasSME2()) 4942 SelectContiguousMultiVectorLoad(Node, 2, 0, 4943 AArch64::LDNT1B_2Z_IMM_PSEUDO, 4944 AArch64::LDNT1B_2Z_PSEUDO); 4945 else if (Subtarget->hasSVE2p1()) 4946 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LDNT1B_2Z_IMM, 4947 AArch64::LDNT1B_2Z); 4948 else 4949 break; 4950 return; 4951 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 4952 VT == MVT::nxv8bf16) { 4953 if (Subtarget->hasSME2()) 4954 SelectContiguousMultiVectorLoad(Node, 2, 1, 4955 AArch64::LDNT1H_2Z_IMM_PSEUDO, 4956 AArch64::LDNT1H_2Z_PSEUDO); 4957 else if (Subtarget->hasSVE2p1()) 4958 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LDNT1H_2Z_IMM, 4959 AArch64::LDNT1H_2Z); 4960 else 4961 break; 4962 return; 4963 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 4964 if (Subtarget->hasSME2()) 4965 SelectContiguousMultiVectorLoad(Node, 2, 2, 4966 AArch64::LDNT1W_2Z_IMM_PSEUDO, 4967 AArch64::LDNT1W_2Z_PSEUDO); 4968 else if (Subtarget->hasSVE2p1()) 4969 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LDNT1W_2Z_IMM, 4970 AArch64::LDNT1W_2Z); 4971 else 4972 break; 4973 return; 4974 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 4975 if (Subtarget->hasSME2()) 4976 SelectContiguousMultiVectorLoad(Node, 2, 3, 4977 AArch64::LDNT1D_2Z_IMM_PSEUDO, 4978 AArch64::LDNT1D_2Z_PSEUDO); 4979 else if (Subtarget->hasSVE2p1()) 4980 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LDNT1D_2Z_IMM, 4981 AArch64::LDNT1D_2Z); 4982 else 4983 break; 4984 return; 4985 } 4986 break; 4987 } 4988 case Intrinsic::aarch64_sve_ldnt1_pn_x4: { 4989 if (VT == MVT::nxv16i8) { 4990 if (Subtarget->hasSME2()) 4991 SelectContiguousMultiVectorLoad(Node, 4, 0, 4992 AArch64::LDNT1B_4Z_IMM_PSEUDO, 4993 AArch64::LDNT1B_4Z_PSEUDO); 4994 else if (Subtarget->hasSVE2p1()) 4995 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LDNT1B_4Z_IMM, 4996 AArch64::LDNT1B_4Z); 4997 else 4998 break; 4999 return; 5000 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 5001 VT == MVT::nxv8bf16) { 5002 if (Subtarget->hasSME2()) 5003 SelectContiguousMultiVectorLoad(Node, 4, 1, 5004 AArch64::LDNT1H_4Z_IMM_PSEUDO, 5005 AArch64::LDNT1H_4Z_PSEUDO); 5006 else if (Subtarget->hasSVE2p1()) 5007 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LDNT1H_4Z_IMM, 5008 AArch64::LDNT1H_4Z); 5009 else 5010 break; 5011 return; 5012 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 5013 if (Subtarget->hasSME2()) 5014 SelectContiguousMultiVectorLoad(Node, 4, 2, 5015 AArch64::LDNT1W_4Z_IMM_PSEUDO, 5016 AArch64::LDNT1W_4Z_PSEUDO); 5017 else if (Subtarget->hasSVE2p1()) 5018 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LDNT1W_4Z_IMM, 5019 AArch64::LDNT1W_4Z); 5020 else 5021 break; 5022 return; 5023 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 5024 if (Subtarget->hasSME2()) 5025 SelectContiguousMultiVectorLoad(Node, 4, 3, 5026 AArch64::LDNT1D_4Z_IMM_PSEUDO, 5027 AArch64::LDNT1D_4Z_PSEUDO); 5028 else if (Subtarget->hasSVE2p1()) 5029 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LDNT1D_4Z_IMM, 5030 AArch64::LDNT1D_4Z); 5031 else 5032 break; 5033 return; 5034 } 5035 break; 5036 } 5037 case Intrinsic::aarch64_sve_ld3_sret: { 5038 if (VT == MVT::nxv16i8) { 5039 SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B, 5040 true); 5041 return; 5042 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 5043 VT == MVT::nxv8bf16) { 5044 SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H, 5045 true); 5046 return; 5047 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 5048 SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W, 5049 true); 5050 return; 5051 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 5052 SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D, 5053 true); 5054 return; 5055 } 5056 break; 5057 } 5058 case Intrinsic::aarch64_sve_ld4_sret: { 5059 if (VT == MVT::nxv16i8) { 5060 SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B, 5061 true); 5062 return; 5063 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 5064 VT == MVT::nxv8bf16) { 5065 SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H, 5066 true); 5067 return; 5068 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 5069 SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W, 5070 true); 5071 return; 5072 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 5073 SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D, 5074 true); 5075 return; 5076 } 5077 break; 5078 } 5079 case Intrinsic::aarch64_sme_read_hor_vg2: { 5080 if (VT == MVT::nxv16i8) { 5081 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0, 5082 AArch64::MOVA_2ZMXI_H_B); 5083 return; 5084 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 5085 VT == MVT::nxv8bf16) { 5086 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0, 5087 AArch64::MOVA_2ZMXI_H_H); 5088 return; 5089 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 5090 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0, 5091 AArch64::MOVA_2ZMXI_H_S); 5092 return; 5093 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 5094 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0, 5095 AArch64::MOVA_2ZMXI_H_D); 5096 return; 5097 } 5098 break; 5099 } 5100 case Intrinsic::aarch64_sme_read_ver_vg2: { 5101 if (VT == MVT::nxv16i8) { 5102 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0, 5103 AArch64::MOVA_2ZMXI_V_B); 5104 return; 5105 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 5106 VT == MVT::nxv8bf16) { 5107 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0, 5108 AArch64::MOVA_2ZMXI_V_H); 5109 return; 5110 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 5111 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0, 5112 AArch64::MOVA_2ZMXI_V_S); 5113 return; 5114 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 5115 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0, 5116 AArch64::MOVA_2ZMXI_V_D); 5117 return; 5118 } 5119 break; 5120 } 5121 case Intrinsic::aarch64_sme_read_hor_vg4: { 5122 if (VT == MVT::nxv16i8) { 5123 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0, 5124 AArch64::MOVA_4ZMXI_H_B); 5125 return; 5126 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 5127 VT == MVT::nxv8bf16) { 5128 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0, 5129 AArch64::MOVA_4ZMXI_H_H); 5130 return; 5131 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 5132 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAS0, 5133 AArch64::MOVA_4ZMXI_H_S); 5134 return; 5135 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 5136 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAD0, 5137 AArch64::MOVA_4ZMXI_H_D); 5138 return; 5139 } 5140 break; 5141 } 5142 case Intrinsic::aarch64_sme_read_ver_vg4: { 5143 if (VT == MVT::nxv16i8) { 5144 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0, 5145 AArch64::MOVA_4ZMXI_V_B); 5146 return; 5147 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 5148 VT == MVT::nxv8bf16) { 5149 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0, 5150 AArch64::MOVA_4ZMXI_V_H); 5151 return; 5152 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 5153 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAS0, 5154 AArch64::MOVA_4ZMXI_V_S); 5155 return; 5156 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 5157 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAD0, 5158 AArch64::MOVA_4ZMXI_V_D); 5159 return; 5160 } 5161 break; 5162 } 5163 case Intrinsic::aarch64_sme_read_vg1x2: { 5164 SelectMultiVectorMove<7, 1>(Node, 2, AArch64::ZA, 5165 AArch64::MOVA_VG2_2ZMXI); 5166 return; 5167 } 5168 case Intrinsic::aarch64_sme_read_vg1x4: { 5169 SelectMultiVectorMove<7, 1>(Node, 4, AArch64::ZA, 5170 AArch64::MOVA_VG4_4ZMXI); 5171 return; 5172 } 5173 case Intrinsic::swift_async_context_addr: { 5174 SDLoc DL(Node); 5175 SDValue Chain = Node->getOperand(0); 5176 SDValue CopyFP = CurDAG->getCopyFromReg(Chain, DL, AArch64::FP, MVT::i64); 5177 SDValue Res = SDValue( 5178 CurDAG->getMachineNode(AArch64::SUBXri, DL, MVT::i64, CopyFP, 5179 CurDAG->getTargetConstant(8, DL, MVT::i32), 5180 CurDAG->getTargetConstant(0, DL, MVT::i32)), 5181 0); 5182 ReplaceUses(SDValue(Node, 0), Res); 5183 ReplaceUses(SDValue(Node, 1), CopyFP.getValue(1)); 5184 CurDAG->RemoveDeadNode(Node); 5185 5186 auto &MF = CurDAG->getMachineFunction(); 5187 MF.getFrameInfo().setFrameAddressIsTaken(true); 5188 MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true); 5189 return; 5190 } 5191 case Intrinsic::aarch64_sme_luti2_lane_zt_x4: { 5192 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>( 5193 Node->getValueType(0), 5194 {AArch64::LUTI2_4ZTZI_B, AArch64::LUTI2_4ZTZI_H, 5195 AArch64::LUTI2_4ZTZI_S})) 5196 // Second Immediate must be <= 3: 5197 SelectMultiVectorLuti(Node, 4, Opc, 3); 5198 return; 5199 } 5200 case Intrinsic::aarch64_sme_luti4_lane_zt_x4: { 5201 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>( 5202 Node->getValueType(0), 5203 {0, AArch64::LUTI4_4ZTZI_H, AArch64::LUTI4_4ZTZI_S})) 5204 // Second Immediate must be <= 1: 5205 SelectMultiVectorLuti(Node, 4, Opc, 1); 5206 return; 5207 } 5208 case Intrinsic::aarch64_sme_luti2_lane_zt_x2: { 5209 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>( 5210 Node->getValueType(0), 5211 {AArch64::LUTI2_2ZTZI_B, AArch64::LUTI2_2ZTZI_H, 5212 AArch64::LUTI2_2ZTZI_S})) 5213 // Second Immediate must be <= 7: 5214 SelectMultiVectorLuti(Node, 2, Opc, 7); 5215 return; 5216 } 5217 case Intrinsic::aarch64_sme_luti4_lane_zt_x2: { 5218 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>( 5219 Node->getValueType(0), 5220 {AArch64::LUTI4_2ZTZI_B, AArch64::LUTI4_2ZTZI_H, 5221 AArch64::LUTI4_2ZTZI_S})) 5222 // Second Immediate must be <= 3: 5223 SelectMultiVectorLuti(Node, 2, Opc, 3); 5224 return; 5225 } 5226 } 5227 } break; 5228 case ISD::INTRINSIC_WO_CHAIN: { 5229 unsigned IntNo = Node->getConstantOperandVal(0); 5230 switch (IntNo) { 5231 default: 5232 break; 5233 case Intrinsic::aarch64_tagp: 5234 SelectTagP(Node); 5235 return; 5236 case Intrinsic::aarch64_neon_tbl2: 5237 SelectTable(Node, 2, 5238 VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two, 5239 false); 5240 return; 5241 case Intrinsic::aarch64_neon_tbl3: 5242 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three 5243 : AArch64::TBLv16i8Three, 5244 false); 5245 return; 5246 case Intrinsic::aarch64_neon_tbl4: 5247 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four 5248 : AArch64::TBLv16i8Four, 5249 false); 5250 return; 5251 case Intrinsic::aarch64_neon_tbx2: 5252 SelectTable(Node, 2, 5253 VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two, 5254 true); 5255 return; 5256 case Intrinsic::aarch64_neon_tbx3: 5257 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three 5258 : AArch64::TBXv16i8Three, 5259 true); 5260 return; 5261 case Intrinsic::aarch64_neon_tbx4: 5262 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four 5263 : AArch64::TBXv16i8Four, 5264 true); 5265 return; 5266 case Intrinsic::aarch64_sve_srshl_single_x2: 5267 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5268 Node->getValueType(0), 5269 {AArch64::SRSHL_VG2_2ZZ_B, AArch64::SRSHL_VG2_2ZZ_H, 5270 AArch64::SRSHL_VG2_2ZZ_S, AArch64::SRSHL_VG2_2ZZ_D})) 5271 SelectDestructiveMultiIntrinsic(Node, 2, false, Op); 5272 return; 5273 case Intrinsic::aarch64_sve_srshl_single_x4: 5274 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5275 Node->getValueType(0), 5276 {AArch64::SRSHL_VG4_4ZZ_B, AArch64::SRSHL_VG4_4ZZ_H, 5277 AArch64::SRSHL_VG4_4ZZ_S, AArch64::SRSHL_VG4_4ZZ_D})) 5278 SelectDestructiveMultiIntrinsic(Node, 4, false, Op); 5279 return; 5280 case Intrinsic::aarch64_sve_urshl_single_x2: 5281 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5282 Node->getValueType(0), 5283 {AArch64::URSHL_VG2_2ZZ_B, AArch64::URSHL_VG2_2ZZ_H, 5284 AArch64::URSHL_VG2_2ZZ_S, AArch64::URSHL_VG2_2ZZ_D})) 5285 SelectDestructiveMultiIntrinsic(Node, 2, false, Op); 5286 return; 5287 case Intrinsic::aarch64_sve_urshl_single_x4: 5288 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5289 Node->getValueType(0), 5290 {AArch64::URSHL_VG4_4ZZ_B, AArch64::URSHL_VG4_4ZZ_H, 5291 AArch64::URSHL_VG4_4ZZ_S, AArch64::URSHL_VG4_4ZZ_D})) 5292 SelectDestructiveMultiIntrinsic(Node, 4, false, Op); 5293 return; 5294 case Intrinsic::aarch64_sve_srshl_x2: 5295 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5296 Node->getValueType(0), 5297 {AArch64::SRSHL_VG2_2Z2Z_B, AArch64::SRSHL_VG2_2Z2Z_H, 5298 AArch64::SRSHL_VG2_2Z2Z_S, AArch64::SRSHL_VG2_2Z2Z_D})) 5299 SelectDestructiveMultiIntrinsic(Node, 2, true, Op); 5300 return; 5301 case Intrinsic::aarch64_sve_srshl_x4: 5302 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5303 Node->getValueType(0), 5304 {AArch64::SRSHL_VG4_4Z4Z_B, AArch64::SRSHL_VG4_4Z4Z_H, 5305 AArch64::SRSHL_VG4_4Z4Z_S, AArch64::SRSHL_VG4_4Z4Z_D})) 5306 SelectDestructiveMultiIntrinsic(Node, 4, true, Op); 5307 return; 5308 case Intrinsic::aarch64_sve_urshl_x2: 5309 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5310 Node->getValueType(0), 5311 {AArch64::URSHL_VG2_2Z2Z_B, AArch64::URSHL_VG2_2Z2Z_H, 5312 AArch64::URSHL_VG2_2Z2Z_S, AArch64::URSHL_VG2_2Z2Z_D})) 5313 SelectDestructiveMultiIntrinsic(Node, 2, true, Op); 5314 return; 5315 case Intrinsic::aarch64_sve_urshl_x4: 5316 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5317 Node->getValueType(0), 5318 {AArch64::URSHL_VG4_4Z4Z_B, AArch64::URSHL_VG4_4Z4Z_H, 5319 AArch64::URSHL_VG4_4Z4Z_S, AArch64::URSHL_VG4_4Z4Z_D})) 5320 SelectDestructiveMultiIntrinsic(Node, 4, true, Op); 5321 return; 5322 case Intrinsic::aarch64_sve_sqdmulh_single_vgx2: 5323 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5324 Node->getValueType(0), 5325 {AArch64::SQDMULH_VG2_2ZZ_B, AArch64::SQDMULH_VG2_2ZZ_H, 5326 AArch64::SQDMULH_VG2_2ZZ_S, AArch64::SQDMULH_VG2_2ZZ_D})) 5327 SelectDestructiveMultiIntrinsic(Node, 2, false, Op); 5328 return; 5329 case Intrinsic::aarch64_sve_sqdmulh_single_vgx4: 5330 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5331 Node->getValueType(0), 5332 {AArch64::SQDMULH_VG4_4ZZ_B, AArch64::SQDMULH_VG4_4ZZ_H, 5333 AArch64::SQDMULH_VG4_4ZZ_S, AArch64::SQDMULH_VG4_4ZZ_D})) 5334 SelectDestructiveMultiIntrinsic(Node, 4, false, Op); 5335 return; 5336 case Intrinsic::aarch64_sve_sqdmulh_vgx2: 5337 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5338 Node->getValueType(0), 5339 {AArch64::SQDMULH_VG2_2Z2Z_B, AArch64::SQDMULH_VG2_2Z2Z_H, 5340 AArch64::SQDMULH_VG2_2Z2Z_S, AArch64::SQDMULH_VG2_2Z2Z_D})) 5341 SelectDestructiveMultiIntrinsic(Node, 2, true, Op); 5342 return; 5343 case Intrinsic::aarch64_sve_sqdmulh_vgx4: 5344 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5345 Node->getValueType(0), 5346 {AArch64::SQDMULH_VG4_4Z4Z_B, AArch64::SQDMULH_VG4_4Z4Z_H, 5347 AArch64::SQDMULH_VG4_4Z4Z_S, AArch64::SQDMULH_VG4_4Z4Z_D})) 5348 SelectDestructiveMultiIntrinsic(Node, 4, true, Op); 5349 return; 5350 case Intrinsic::aarch64_sve_whilege_x2: 5351 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>( 5352 Node->getValueType(0), 5353 {AArch64::WHILEGE_2PXX_B, AArch64::WHILEGE_2PXX_H, 5354 AArch64::WHILEGE_2PXX_S, AArch64::WHILEGE_2PXX_D})) 5355 SelectWhilePair(Node, Op); 5356 return; 5357 case Intrinsic::aarch64_sve_whilegt_x2: 5358 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>( 5359 Node->getValueType(0), 5360 {AArch64::WHILEGT_2PXX_B, AArch64::WHILEGT_2PXX_H, 5361 AArch64::WHILEGT_2PXX_S, AArch64::WHILEGT_2PXX_D})) 5362 SelectWhilePair(Node, Op); 5363 return; 5364 case Intrinsic::aarch64_sve_whilehi_x2: 5365 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>( 5366 Node->getValueType(0), 5367 {AArch64::WHILEHI_2PXX_B, AArch64::WHILEHI_2PXX_H, 5368 AArch64::WHILEHI_2PXX_S, AArch64::WHILEHI_2PXX_D})) 5369 SelectWhilePair(Node, Op); 5370 return; 5371 case Intrinsic::aarch64_sve_whilehs_x2: 5372 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>( 5373 Node->getValueType(0), 5374 {AArch64::WHILEHS_2PXX_B, AArch64::WHILEHS_2PXX_H, 5375 AArch64::WHILEHS_2PXX_S, AArch64::WHILEHS_2PXX_D})) 5376 SelectWhilePair(Node, Op); 5377 return; 5378 case Intrinsic::aarch64_sve_whilele_x2: 5379 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>( 5380 Node->getValueType(0), 5381 {AArch64::WHILELE_2PXX_B, AArch64::WHILELE_2PXX_H, 5382 AArch64::WHILELE_2PXX_S, AArch64::WHILELE_2PXX_D})) 5383 SelectWhilePair(Node, Op); 5384 return; 5385 case Intrinsic::aarch64_sve_whilelo_x2: 5386 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>( 5387 Node->getValueType(0), 5388 {AArch64::WHILELO_2PXX_B, AArch64::WHILELO_2PXX_H, 5389 AArch64::WHILELO_2PXX_S, AArch64::WHILELO_2PXX_D})) 5390 SelectWhilePair(Node, Op); 5391 return; 5392 case Intrinsic::aarch64_sve_whilels_x2: 5393 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>( 5394 Node->getValueType(0), 5395 {AArch64::WHILELS_2PXX_B, AArch64::WHILELS_2PXX_H, 5396 AArch64::WHILELS_2PXX_S, AArch64::WHILELS_2PXX_D})) 5397 SelectWhilePair(Node, Op); 5398 return; 5399 case Intrinsic::aarch64_sve_whilelt_x2: 5400 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>( 5401 Node->getValueType(0), 5402 {AArch64::WHILELT_2PXX_B, AArch64::WHILELT_2PXX_H, 5403 AArch64::WHILELT_2PXX_S, AArch64::WHILELT_2PXX_D})) 5404 SelectWhilePair(Node, Op); 5405 return; 5406 case Intrinsic::aarch64_sve_smax_single_x2: 5407 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5408 Node->getValueType(0), 5409 {AArch64::SMAX_VG2_2ZZ_B, AArch64::SMAX_VG2_2ZZ_H, 5410 AArch64::SMAX_VG2_2ZZ_S, AArch64::SMAX_VG2_2ZZ_D})) 5411 SelectDestructiveMultiIntrinsic(Node, 2, false, Op); 5412 return; 5413 case Intrinsic::aarch64_sve_umax_single_x2: 5414 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5415 Node->getValueType(0), 5416 {AArch64::UMAX_VG2_2ZZ_B, AArch64::UMAX_VG2_2ZZ_H, 5417 AArch64::UMAX_VG2_2ZZ_S, AArch64::UMAX_VG2_2ZZ_D})) 5418 SelectDestructiveMultiIntrinsic(Node, 2, false, Op); 5419 return; 5420 case Intrinsic::aarch64_sve_fmax_single_x2: 5421 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 5422 Node->getValueType(0), 5423 {0, AArch64::FMAX_VG2_2ZZ_H, AArch64::FMAX_VG2_2ZZ_S, 5424 AArch64::FMAX_VG2_2ZZ_D})) 5425 SelectDestructiveMultiIntrinsic(Node, 2, false, Op); 5426 return; 5427 case Intrinsic::aarch64_sve_smax_single_x4: 5428 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5429 Node->getValueType(0), 5430 {AArch64::SMAX_VG4_4ZZ_B, AArch64::SMAX_VG4_4ZZ_H, 5431 AArch64::SMAX_VG4_4ZZ_S, AArch64::SMAX_VG4_4ZZ_D})) 5432 SelectDestructiveMultiIntrinsic(Node, 4, false, Op); 5433 return; 5434 case Intrinsic::aarch64_sve_umax_single_x4: 5435 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5436 Node->getValueType(0), 5437 {AArch64::UMAX_VG4_4ZZ_B, AArch64::UMAX_VG4_4ZZ_H, 5438 AArch64::UMAX_VG4_4ZZ_S, AArch64::UMAX_VG4_4ZZ_D})) 5439 SelectDestructiveMultiIntrinsic(Node, 4, false, Op); 5440 return; 5441 case Intrinsic::aarch64_sve_fmax_single_x4: 5442 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 5443 Node->getValueType(0), 5444 {0, AArch64::FMAX_VG4_4ZZ_H, AArch64::FMAX_VG4_4ZZ_S, 5445 AArch64::FMAX_VG4_4ZZ_D})) 5446 SelectDestructiveMultiIntrinsic(Node, 4, false, Op); 5447 return; 5448 case Intrinsic::aarch64_sve_smin_single_x2: 5449 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5450 Node->getValueType(0), 5451 {AArch64::SMIN_VG2_2ZZ_B, AArch64::SMIN_VG2_2ZZ_H, 5452 AArch64::SMIN_VG2_2ZZ_S, AArch64::SMIN_VG2_2ZZ_D})) 5453 SelectDestructiveMultiIntrinsic(Node, 2, false, Op); 5454 return; 5455 case Intrinsic::aarch64_sve_umin_single_x2: 5456 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5457 Node->getValueType(0), 5458 {AArch64::UMIN_VG2_2ZZ_B, AArch64::UMIN_VG2_2ZZ_H, 5459 AArch64::UMIN_VG2_2ZZ_S, AArch64::UMIN_VG2_2ZZ_D})) 5460 SelectDestructiveMultiIntrinsic(Node, 2, false, Op); 5461 return; 5462 case Intrinsic::aarch64_sve_fmin_single_x2: 5463 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 5464 Node->getValueType(0), 5465 {0, AArch64::FMIN_VG2_2ZZ_H, AArch64::FMIN_VG2_2ZZ_S, 5466 AArch64::FMIN_VG2_2ZZ_D})) 5467 SelectDestructiveMultiIntrinsic(Node, 2, false, Op); 5468 return; 5469 case Intrinsic::aarch64_sve_smin_single_x4: 5470 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5471 Node->getValueType(0), 5472 {AArch64::SMIN_VG4_4ZZ_B, AArch64::SMIN_VG4_4ZZ_H, 5473 AArch64::SMIN_VG4_4ZZ_S, AArch64::SMIN_VG4_4ZZ_D})) 5474 SelectDestructiveMultiIntrinsic(Node, 4, false, Op); 5475 return; 5476 case Intrinsic::aarch64_sve_umin_single_x4: 5477 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5478 Node->getValueType(0), 5479 {AArch64::UMIN_VG4_4ZZ_B, AArch64::UMIN_VG4_4ZZ_H, 5480 AArch64::UMIN_VG4_4ZZ_S, AArch64::UMIN_VG4_4ZZ_D})) 5481 SelectDestructiveMultiIntrinsic(Node, 4, false, Op); 5482 return; 5483 case Intrinsic::aarch64_sve_fmin_single_x4: 5484 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 5485 Node->getValueType(0), 5486 {0, AArch64::FMIN_VG4_4ZZ_H, AArch64::FMIN_VG4_4ZZ_S, 5487 AArch64::FMIN_VG4_4ZZ_D})) 5488 SelectDestructiveMultiIntrinsic(Node, 4, false, Op); 5489 return; 5490 case Intrinsic::aarch64_sve_smax_x2: 5491 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5492 Node->getValueType(0), 5493 {AArch64::SMAX_VG2_2Z2Z_B, AArch64::SMAX_VG2_2Z2Z_H, 5494 AArch64::SMAX_VG2_2Z2Z_S, AArch64::SMAX_VG2_2Z2Z_D})) 5495 SelectDestructiveMultiIntrinsic(Node, 2, true, Op); 5496 return; 5497 case Intrinsic::aarch64_sve_umax_x2: 5498 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5499 Node->getValueType(0), 5500 {AArch64::UMAX_VG2_2Z2Z_B, AArch64::UMAX_VG2_2Z2Z_H, 5501 AArch64::UMAX_VG2_2Z2Z_S, AArch64::UMAX_VG2_2Z2Z_D})) 5502 SelectDestructiveMultiIntrinsic(Node, 2, true, Op); 5503 return; 5504 case Intrinsic::aarch64_sve_fmax_x2: 5505 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 5506 Node->getValueType(0), 5507 {0, AArch64::FMAX_VG2_2Z2Z_H, AArch64::FMAX_VG2_2Z2Z_S, 5508 AArch64::FMAX_VG2_2Z2Z_D})) 5509 SelectDestructiveMultiIntrinsic(Node, 2, true, Op); 5510 return; 5511 case Intrinsic::aarch64_sve_smax_x4: 5512 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5513 Node->getValueType(0), 5514 {AArch64::SMAX_VG4_4Z4Z_B, AArch64::SMAX_VG4_4Z4Z_H, 5515 AArch64::SMAX_VG4_4Z4Z_S, AArch64::SMAX_VG4_4Z4Z_D})) 5516 SelectDestructiveMultiIntrinsic(Node, 4, true, Op); 5517 return; 5518 case Intrinsic::aarch64_sve_umax_x4: 5519 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5520 Node->getValueType(0), 5521 {AArch64::UMAX_VG4_4Z4Z_B, AArch64::UMAX_VG4_4Z4Z_H, 5522 AArch64::UMAX_VG4_4Z4Z_S, AArch64::UMAX_VG4_4Z4Z_D})) 5523 SelectDestructiveMultiIntrinsic(Node, 4, true, Op); 5524 return; 5525 case Intrinsic::aarch64_sve_fmax_x4: 5526 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 5527 Node->getValueType(0), 5528 {0, AArch64::FMAX_VG4_4Z4Z_H, AArch64::FMAX_VG4_4Z4Z_S, 5529 AArch64::FMAX_VG4_4Z4Z_D})) 5530 SelectDestructiveMultiIntrinsic(Node, 4, true, Op); 5531 return; 5532 case Intrinsic::aarch64_sve_smin_x2: 5533 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5534 Node->getValueType(0), 5535 {AArch64::SMIN_VG2_2Z2Z_B, AArch64::SMIN_VG2_2Z2Z_H, 5536 AArch64::SMIN_VG2_2Z2Z_S, AArch64::SMIN_VG2_2Z2Z_D})) 5537 SelectDestructiveMultiIntrinsic(Node, 2, true, Op); 5538 return; 5539 case Intrinsic::aarch64_sve_umin_x2: 5540 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5541 Node->getValueType(0), 5542 {AArch64::UMIN_VG2_2Z2Z_B, AArch64::UMIN_VG2_2Z2Z_H, 5543 AArch64::UMIN_VG2_2Z2Z_S, AArch64::UMIN_VG2_2Z2Z_D})) 5544 SelectDestructiveMultiIntrinsic(Node, 2, true, Op); 5545 return; 5546 case Intrinsic::aarch64_sve_fmin_x2: 5547 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 5548 Node->getValueType(0), 5549 {0, AArch64::FMIN_VG2_2Z2Z_H, AArch64::FMIN_VG2_2Z2Z_S, 5550 AArch64::FMIN_VG2_2Z2Z_D})) 5551 SelectDestructiveMultiIntrinsic(Node, 2, true, Op); 5552 return; 5553 case Intrinsic::aarch64_sve_smin_x4: 5554 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5555 Node->getValueType(0), 5556 {AArch64::SMIN_VG4_4Z4Z_B, AArch64::SMIN_VG4_4Z4Z_H, 5557 AArch64::SMIN_VG4_4Z4Z_S, AArch64::SMIN_VG4_4Z4Z_D})) 5558 SelectDestructiveMultiIntrinsic(Node, 4, true, Op); 5559 return; 5560 case Intrinsic::aarch64_sve_umin_x4: 5561 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5562 Node->getValueType(0), 5563 {AArch64::UMIN_VG4_4Z4Z_B, AArch64::UMIN_VG4_4Z4Z_H, 5564 AArch64::UMIN_VG4_4Z4Z_S, AArch64::UMIN_VG4_4Z4Z_D})) 5565 SelectDestructiveMultiIntrinsic(Node, 4, true, Op); 5566 return; 5567 case Intrinsic::aarch64_sve_fmin_x4: 5568 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 5569 Node->getValueType(0), 5570 {0, AArch64::FMIN_VG4_4Z4Z_H, AArch64::FMIN_VG4_4Z4Z_S, 5571 AArch64::FMIN_VG4_4Z4Z_D})) 5572 SelectDestructiveMultiIntrinsic(Node, 4, true, Op); 5573 return; 5574 case Intrinsic::aarch64_sve_fmaxnm_single_x2 : 5575 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 5576 Node->getValueType(0), 5577 {0, AArch64::FMAXNM_VG2_2ZZ_H, AArch64::FMAXNM_VG2_2ZZ_S, 5578 AArch64::FMAXNM_VG2_2ZZ_D})) 5579 SelectDestructiveMultiIntrinsic(Node, 2, false, Op); 5580 return; 5581 case Intrinsic::aarch64_sve_fmaxnm_single_x4 : 5582 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 5583 Node->getValueType(0), 5584 {0, AArch64::FMAXNM_VG4_4ZZ_H, AArch64::FMAXNM_VG4_4ZZ_S, 5585 AArch64::FMAXNM_VG4_4ZZ_D})) 5586 SelectDestructiveMultiIntrinsic(Node, 4, false, Op); 5587 return; 5588 case Intrinsic::aarch64_sve_fminnm_single_x2: 5589 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 5590 Node->getValueType(0), 5591 {0, AArch64::FMINNM_VG2_2ZZ_H, AArch64::FMINNM_VG2_2ZZ_S, 5592 AArch64::FMINNM_VG2_2ZZ_D})) 5593 SelectDestructiveMultiIntrinsic(Node, 2, false, Op); 5594 return; 5595 case Intrinsic::aarch64_sve_fminnm_single_x4: 5596 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 5597 Node->getValueType(0), 5598 {0, AArch64::FMINNM_VG4_4ZZ_H, AArch64::FMINNM_VG4_4ZZ_S, 5599 AArch64::FMINNM_VG4_4ZZ_D})) 5600 SelectDestructiveMultiIntrinsic(Node, 4, false, Op); 5601 return; 5602 case Intrinsic::aarch64_sve_fmaxnm_x2: 5603 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 5604 Node->getValueType(0), 5605 {0, AArch64::FMAXNM_VG2_2Z2Z_H, AArch64::FMAXNM_VG2_2Z2Z_S, 5606 AArch64::FMAXNM_VG2_2Z2Z_D})) 5607 SelectDestructiveMultiIntrinsic(Node, 2, true, Op); 5608 return; 5609 case Intrinsic::aarch64_sve_fmaxnm_x4: 5610 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 5611 Node->getValueType(0), 5612 {0, AArch64::FMAXNM_VG4_4Z4Z_H, AArch64::FMAXNM_VG4_4Z4Z_S, 5613 AArch64::FMAXNM_VG4_4Z4Z_D})) 5614 SelectDestructiveMultiIntrinsic(Node, 4, true, Op); 5615 return; 5616 case Intrinsic::aarch64_sve_fminnm_x2: 5617 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 5618 Node->getValueType(0), 5619 {0, AArch64::FMINNM_VG2_2Z2Z_H, AArch64::FMINNM_VG2_2Z2Z_S, 5620 AArch64::FMINNM_VG2_2Z2Z_D})) 5621 SelectDestructiveMultiIntrinsic(Node, 2, true, Op); 5622 return; 5623 case Intrinsic::aarch64_sve_fminnm_x4: 5624 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 5625 Node->getValueType(0), 5626 {0, AArch64::FMINNM_VG4_4Z4Z_H, AArch64::FMINNM_VG4_4Z4Z_S, 5627 AArch64::FMINNM_VG4_4Z4Z_D})) 5628 SelectDestructiveMultiIntrinsic(Node, 4, true, Op); 5629 return; 5630 case Intrinsic::aarch64_sve_fcvtzs_x2: 5631 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZS_2Z2Z_StoS); 5632 return; 5633 case Intrinsic::aarch64_sve_scvtf_x2: 5634 SelectCVTIntrinsic(Node, 2, AArch64::SCVTF_2Z2Z_StoS); 5635 return; 5636 case Intrinsic::aarch64_sve_fcvtzu_x2: 5637 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZU_2Z2Z_StoS); 5638 return; 5639 case Intrinsic::aarch64_sve_ucvtf_x2: 5640 SelectCVTIntrinsic(Node, 2, AArch64::UCVTF_2Z2Z_StoS); 5641 return; 5642 case Intrinsic::aarch64_sve_fcvtzs_x4: 5643 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZS_4Z4Z_StoS); 5644 return; 5645 case Intrinsic::aarch64_sve_scvtf_x4: 5646 SelectCVTIntrinsic(Node, 4, AArch64::SCVTF_4Z4Z_StoS); 5647 return; 5648 case Intrinsic::aarch64_sve_fcvtzu_x4: 5649 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZU_4Z4Z_StoS); 5650 return; 5651 case Intrinsic::aarch64_sve_ucvtf_x4: 5652 SelectCVTIntrinsic(Node, 4, AArch64::UCVTF_4Z4Z_StoS); 5653 return; 5654 case Intrinsic::aarch64_sve_sclamp_single_x2: 5655 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5656 Node->getValueType(0), 5657 {AArch64::SCLAMP_VG2_2Z2Z_B, AArch64::SCLAMP_VG2_2Z2Z_H, 5658 AArch64::SCLAMP_VG2_2Z2Z_S, AArch64::SCLAMP_VG2_2Z2Z_D})) 5659 SelectClamp(Node, 2, Op); 5660 return; 5661 case Intrinsic::aarch64_sve_uclamp_single_x2: 5662 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5663 Node->getValueType(0), 5664 {AArch64::UCLAMP_VG2_2Z2Z_B, AArch64::UCLAMP_VG2_2Z2Z_H, 5665 AArch64::UCLAMP_VG2_2Z2Z_S, AArch64::UCLAMP_VG2_2Z2Z_D})) 5666 SelectClamp(Node, 2, Op); 5667 return; 5668 case Intrinsic::aarch64_sve_fclamp_single_x2: 5669 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 5670 Node->getValueType(0), 5671 {0, AArch64::FCLAMP_VG2_2Z2Z_H, AArch64::FCLAMP_VG2_2Z2Z_S, 5672 AArch64::FCLAMP_VG2_2Z2Z_D})) 5673 SelectClamp(Node, 2, Op); 5674 return; 5675 case Intrinsic::aarch64_sve_sclamp_single_x4: 5676 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5677 Node->getValueType(0), 5678 {AArch64::SCLAMP_VG4_4Z4Z_B, AArch64::SCLAMP_VG4_4Z4Z_H, 5679 AArch64::SCLAMP_VG4_4Z4Z_S, AArch64::SCLAMP_VG4_4Z4Z_D})) 5680 SelectClamp(Node, 4, Op); 5681 return; 5682 case Intrinsic::aarch64_sve_uclamp_single_x4: 5683 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5684 Node->getValueType(0), 5685 {AArch64::UCLAMP_VG4_4Z4Z_B, AArch64::UCLAMP_VG4_4Z4Z_H, 5686 AArch64::UCLAMP_VG4_4Z4Z_S, AArch64::UCLAMP_VG4_4Z4Z_D})) 5687 SelectClamp(Node, 4, Op); 5688 return; 5689 case Intrinsic::aarch64_sve_fclamp_single_x4: 5690 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 5691 Node->getValueType(0), 5692 {0, AArch64::FCLAMP_VG4_4Z4Z_H, AArch64::FCLAMP_VG4_4Z4Z_S, 5693 AArch64::FCLAMP_VG4_4Z4Z_D})) 5694 SelectClamp(Node, 4, Op); 5695 return; 5696 case Intrinsic::aarch64_sve_add_single_x2: 5697 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5698 Node->getValueType(0), 5699 {AArch64::ADD_VG2_2ZZ_B, AArch64::ADD_VG2_2ZZ_H, 5700 AArch64::ADD_VG2_2ZZ_S, AArch64::ADD_VG2_2ZZ_D})) 5701 SelectDestructiveMultiIntrinsic(Node, 2, false, Op); 5702 return; 5703 case Intrinsic::aarch64_sve_add_single_x4: 5704 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5705 Node->getValueType(0), 5706 {AArch64::ADD_VG4_4ZZ_B, AArch64::ADD_VG4_4ZZ_H, 5707 AArch64::ADD_VG4_4ZZ_S, AArch64::ADD_VG4_4ZZ_D})) 5708 SelectDestructiveMultiIntrinsic(Node, 4, false, Op); 5709 return; 5710 case Intrinsic::aarch64_sve_zip_x2: 5711 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>( 5712 Node->getValueType(0), 5713 {AArch64::ZIP_VG2_2ZZZ_B, AArch64::ZIP_VG2_2ZZZ_H, 5714 AArch64::ZIP_VG2_2ZZZ_S, AArch64::ZIP_VG2_2ZZZ_D})) 5715 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op); 5716 return; 5717 case Intrinsic::aarch64_sve_zipq_x2: 5718 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, 5719 AArch64::ZIP_VG2_2ZZZ_Q); 5720 return; 5721 case Intrinsic::aarch64_sve_zip_x4: 5722 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>( 5723 Node->getValueType(0), 5724 {AArch64::ZIP_VG4_4Z4Z_B, AArch64::ZIP_VG4_4Z4Z_H, 5725 AArch64::ZIP_VG4_4Z4Z_S, AArch64::ZIP_VG4_4Z4Z_D})) 5726 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op); 5727 return; 5728 case Intrinsic::aarch64_sve_zipq_x4: 5729 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, 5730 AArch64::ZIP_VG4_4Z4Z_Q); 5731 return; 5732 case Intrinsic::aarch64_sve_uzp_x2: 5733 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>( 5734 Node->getValueType(0), 5735 {AArch64::UZP_VG2_2ZZZ_B, AArch64::UZP_VG2_2ZZZ_H, 5736 AArch64::UZP_VG2_2ZZZ_S, AArch64::UZP_VG2_2ZZZ_D})) 5737 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op); 5738 return; 5739 case Intrinsic::aarch64_sve_uzpq_x2: 5740 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, 5741 AArch64::UZP_VG2_2ZZZ_Q); 5742 return; 5743 case Intrinsic::aarch64_sve_uzp_x4: 5744 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>( 5745 Node->getValueType(0), 5746 {AArch64::UZP_VG4_4Z4Z_B, AArch64::UZP_VG4_4Z4Z_H, 5747 AArch64::UZP_VG4_4Z4Z_S, AArch64::UZP_VG4_4Z4Z_D})) 5748 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op); 5749 return; 5750 case Intrinsic::aarch64_sve_uzpq_x4: 5751 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, 5752 AArch64::UZP_VG4_4Z4Z_Q); 5753 return; 5754 case Intrinsic::aarch64_sve_sel_x2: 5755 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>( 5756 Node->getValueType(0), 5757 {AArch64::SEL_VG2_2ZC2Z2Z_B, AArch64::SEL_VG2_2ZC2Z2Z_H, 5758 AArch64::SEL_VG2_2ZC2Z2Z_S, AArch64::SEL_VG2_2ZC2Z2Z_D})) 5759 SelectDestructiveMultiIntrinsic(Node, 2, true, Op, /*HasPred=*/true); 5760 return; 5761 case Intrinsic::aarch64_sve_sel_x4: 5762 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>( 5763 Node->getValueType(0), 5764 {AArch64::SEL_VG4_4ZC4Z4Z_B, AArch64::SEL_VG4_4ZC4Z4Z_H, 5765 AArch64::SEL_VG4_4ZC4Z4Z_S, AArch64::SEL_VG4_4ZC4Z4Z_D})) 5766 SelectDestructiveMultiIntrinsic(Node, 4, true, Op, /*HasPred=*/true); 5767 return; 5768 case Intrinsic::aarch64_sve_frinta_x2: 5769 SelectFrintFromVT(Node, 2, AArch64::FRINTA_2Z2Z_S); 5770 return; 5771 case Intrinsic::aarch64_sve_frinta_x4: 5772 SelectFrintFromVT(Node, 4, AArch64::FRINTA_4Z4Z_S); 5773 return; 5774 case Intrinsic::aarch64_sve_frintm_x2: 5775 SelectFrintFromVT(Node, 2, AArch64::FRINTM_2Z2Z_S); 5776 return; 5777 case Intrinsic::aarch64_sve_frintm_x4: 5778 SelectFrintFromVT(Node, 4, AArch64::FRINTM_4Z4Z_S); 5779 return; 5780 case Intrinsic::aarch64_sve_frintn_x2: 5781 SelectFrintFromVT(Node, 2, AArch64::FRINTN_2Z2Z_S); 5782 return; 5783 case Intrinsic::aarch64_sve_frintn_x4: 5784 SelectFrintFromVT(Node, 4, AArch64::FRINTN_4Z4Z_S); 5785 return; 5786 case Intrinsic::aarch64_sve_frintp_x2: 5787 SelectFrintFromVT(Node, 2, AArch64::FRINTP_2Z2Z_S); 5788 return; 5789 case Intrinsic::aarch64_sve_frintp_x4: 5790 SelectFrintFromVT(Node, 4, AArch64::FRINTP_4Z4Z_S); 5791 return; 5792 case Intrinsic::aarch64_sve_sunpk_x2: 5793 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5794 Node->getValueType(0), 5795 {0, AArch64::SUNPK_VG2_2ZZ_H, AArch64::SUNPK_VG2_2ZZ_S, 5796 AArch64::SUNPK_VG2_2ZZ_D})) 5797 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op); 5798 return; 5799 case Intrinsic::aarch64_sve_uunpk_x2: 5800 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5801 Node->getValueType(0), 5802 {0, AArch64::UUNPK_VG2_2ZZ_H, AArch64::UUNPK_VG2_2ZZ_S, 5803 AArch64::UUNPK_VG2_2ZZ_D})) 5804 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op); 5805 return; 5806 case Intrinsic::aarch64_sve_sunpk_x4: 5807 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5808 Node->getValueType(0), 5809 {0, AArch64::SUNPK_VG4_4Z2Z_H, AArch64::SUNPK_VG4_4Z2Z_S, 5810 AArch64::SUNPK_VG4_4Z2Z_D})) 5811 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op); 5812 return; 5813 case Intrinsic::aarch64_sve_uunpk_x4: 5814 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5815 Node->getValueType(0), 5816 {0, AArch64::UUNPK_VG4_4Z2Z_H, AArch64::UUNPK_VG4_4Z2Z_S, 5817 AArch64::UUNPK_VG4_4Z2Z_D})) 5818 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op); 5819 return; 5820 case Intrinsic::aarch64_sve_pext_x2: { 5821 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>( 5822 Node->getValueType(0), 5823 {AArch64::PEXT_2PCI_B, AArch64::PEXT_2PCI_H, AArch64::PEXT_2PCI_S, 5824 AArch64::PEXT_2PCI_D})) 5825 SelectPExtPair(Node, Op); 5826 return; 5827 } 5828 } 5829 break; 5830 } 5831 case ISD::INTRINSIC_VOID: { 5832 unsigned IntNo = Node->getConstantOperandVal(1); 5833 if (Node->getNumOperands() >= 3) 5834 VT = Node->getOperand(2)->getValueType(0); 5835 switch (IntNo) { 5836 default: 5837 break; 5838 case Intrinsic::aarch64_neon_st1x2: { 5839 if (VT == MVT::v8i8) { 5840 SelectStore(Node, 2, AArch64::ST1Twov8b); 5841 return; 5842 } else if (VT == MVT::v16i8) { 5843 SelectStore(Node, 2, AArch64::ST1Twov16b); 5844 return; 5845 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 5846 VT == MVT::v4bf16) { 5847 SelectStore(Node, 2, AArch64::ST1Twov4h); 5848 return; 5849 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 5850 VT == MVT::v8bf16) { 5851 SelectStore(Node, 2, AArch64::ST1Twov8h); 5852 return; 5853 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 5854 SelectStore(Node, 2, AArch64::ST1Twov2s); 5855 return; 5856 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 5857 SelectStore(Node, 2, AArch64::ST1Twov4s); 5858 return; 5859 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 5860 SelectStore(Node, 2, AArch64::ST1Twov2d); 5861 return; 5862 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 5863 SelectStore(Node, 2, AArch64::ST1Twov1d); 5864 return; 5865 } 5866 break; 5867 } 5868 case Intrinsic::aarch64_neon_st1x3: { 5869 if (VT == MVT::v8i8) { 5870 SelectStore(Node, 3, AArch64::ST1Threev8b); 5871 return; 5872 } else if (VT == MVT::v16i8) { 5873 SelectStore(Node, 3, AArch64::ST1Threev16b); 5874 return; 5875 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 5876 VT == MVT::v4bf16) { 5877 SelectStore(Node, 3, AArch64::ST1Threev4h); 5878 return; 5879 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 5880 VT == MVT::v8bf16) { 5881 SelectStore(Node, 3, AArch64::ST1Threev8h); 5882 return; 5883 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 5884 SelectStore(Node, 3, AArch64::ST1Threev2s); 5885 return; 5886 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 5887 SelectStore(Node, 3, AArch64::ST1Threev4s); 5888 return; 5889 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 5890 SelectStore(Node, 3, AArch64::ST1Threev2d); 5891 return; 5892 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 5893 SelectStore(Node, 3, AArch64::ST1Threev1d); 5894 return; 5895 } 5896 break; 5897 } 5898 case Intrinsic::aarch64_neon_st1x4: { 5899 if (VT == MVT::v8i8) { 5900 SelectStore(Node, 4, AArch64::ST1Fourv8b); 5901 return; 5902 } else if (VT == MVT::v16i8) { 5903 SelectStore(Node, 4, AArch64::ST1Fourv16b); 5904 return; 5905 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 5906 VT == MVT::v4bf16) { 5907 SelectStore(Node, 4, AArch64::ST1Fourv4h); 5908 return; 5909 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 5910 VT == MVT::v8bf16) { 5911 SelectStore(Node, 4, AArch64::ST1Fourv8h); 5912 return; 5913 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 5914 SelectStore(Node, 4, AArch64::ST1Fourv2s); 5915 return; 5916 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 5917 SelectStore(Node, 4, AArch64::ST1Fourv4s); 5918 return; 5919 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 5920 SelectStore(Node, 4, AArch64::ST1Fourv2d); 5921 return; 5922 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 5923 SelectStore(Node, 4, AArch64::ST1Fourv1d); 5924 return; 5925 } 5926 break; 5927 } 5928 case Intrinsic::aarch64_neon_st2: { 5929 if (VT == MVT::v8i8) { 5930 SelectStore(Node, 2, AArch64::ST2Twov8b); 5931 return; 5932 } else if (VT == MVT::v16i8) { 5933 SelectStore(Node, 2, AArch64::ST2Twov16b); 5934 return; 5935 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 5936 VT == MVT::v4bf16) { 5937 SelectStore(Node, 2, AArch64::ST2Twov4h); 5938 return; 5939 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 5940 VT == MVT::v8bf16) { 5941 SelectStore(Node, 2, AArch64::ST2Twov8h); 5942 return; 5943 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 5944 SelectStore(Node, 2, AArch64::ST2Twov2s); 5945 return; 5946 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 5947 SelectStore(Node, 2, AArch64::ST2Twov4s); 5948 return; 5949 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 5950 SelectStore(Node, 2, AArch64::ST2Twov2d); 5951 return; 5952 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 5953 SelectStore(Node, 2, AArch64::ST1Twov1d); 5954 return; 5955 } 5956 break; 5957 } 5958 case Intrinsic::aarch64_neon_st3: { 5959 if (VT == MVT::v8i8) { 5960 SelectStore(Node, 3, AArch64::ST3Threev8b); 5961 return; 5962 } else if (VT == MVT::v16i8) { 5963 SelectStore(Node, 3, AArch64::ST3Threev16b); 5964 return; 5965 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 5966 VT == MVT::v4bf16) { 5967 SelectStore(Node, 3, AArch64::ST3Threev4h); 5968 return; 5969 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 5970 VT == MVT::v8bf16) { 5971 SelectStore(Node, 3, AArch64::ST3Threev8h); 5972 return; 5973 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 5974 SelectStore(Node, 3, AArch64::ST3Threev2s); 5975 return; 5976 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 5977 SelectStore(Node, 3, AArch64::ST3Threev4s); 5978 return; 5979 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 5980 SelectStore(Node, 3, AArch64::ST3Threev2d); 5981 return; 5982 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 5983 SelectStore(Node, 3, AArch64::ST1Threev1d); 5984 return; 5985 } 5986 break; 5987 } 5988 case Intrinsic::aarch64_neon_st4: { 5989 if (VT == MVT::v8i8) { 5990 SelectStore(Node, 4, AArch64::ST4Fourv8b); 5991 return; 5992 } else if (VT == MVT::v16i8) { 5993 SelectStore(Node, 4, AArch64::ST4Fourv16b); 5994 return; 5995 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 5996 VT == MVT::v4bf16) { 5997 SelectStore(Node, 4, AArch64::ST4Fourv4h); 5998 return; 5999 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 6000 VT == MVT::v8bf16) { 6001 SelectStore(Node, 4, AArch64::ST4Fourv8h); 6002 return; 6003 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 6004 SelectStore(Node, 4, AArch64::ST4Fourv2s); 6005 return; 6006 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 6007 SelectStore(Node, 4, AArch64::ST4Fourv4s); 6008 return; 6009 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 6010 SelectStore(Node, 4, AArch64::ST4Fourv2d); 6011 return; 6012 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 6013 SelectStore(Node, 4, AArch64::ST1Fourv1d); 6014 return; 6015 } 6016 break; 6017 } 6018 case Intrinsic::aarch64_neon_st2lane: { 6019 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 6020 SelectStoreLane(Node, 2, AArch64::ST2i8); 6021 return; 6022 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 6023 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 6024 SelectStoreLane(Node, 2, AArch64::ST2i16); 6025 return; 6026 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 6027 VT == MVT::v2f32) { 6028 SelectStoreLane(Node, 2, AArch64::ST2i32); 6029 return; 6030 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 6031 VT == MVT::v1f64) { 6032 SelectStoreLane(Node, 2, AArch64::ST2i64); 6033 return; 6034 } 6035 break; 6036 } 6037 case Intrinsic::aarch64_neon_st3lane: { 6038 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 6039 SelectStoreLane(Node, 3, AArch64::ST3i8); 6040 return; 6041 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 6042 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 6043 SelectStoreLane(Node, 3, AArch64::ST3i16); 6044 return; 6045 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 6046 VT == MVT::v2f32) { 6047 SelectStoreLane(Node, 3, AArch64::ST3i32); 6048 return; 6049 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 6050 VT == MVT::v1f64) { 6051 SelectStoreLane(Node, 3, AArch64::ST3i64); 6052 return; 6053 } 6054 break; 6055 } 6056 case Intrinsic::aarch64_neon_st4lane: { 6057 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 6058 SelectStoreLane(Node, 4, AArch64::ST4i8); 6059 return; 6060 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 6061 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 6062 SelectStoreLane(Node, 4, AArch64::ST4i16); 6063 return; 6064 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 6065 VT == MVT::v2f32) { 6066 SelectStoreLane(Node, 4, AArch64::ST4i32); 6067 return; 6068 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 6069 VT == MVT::v1f64) { 6070 SelectStoreLane(Node, 4, AArch64::ST4i64); 6071 return; 6072 } 6073 break; 6074 } 6075 case Intrinsic::aarch64_sve_st2q: { 6076 SelectPredicatedStore(Node, 2, 4, AArch64::ST2Q, AArch64::ST2Q_IMM); 6077 return; 6078 } 6079 case Intrinsic::aarch64_sve_st3q: { 6080 SelectPredicatedStore(Node, 3, 4, AArch64::ST3Q, AArch64::ST3Q_IMM); 6081 return; 6082 } 6083 case Intrinsic::aarch64_sve_st4q: { 6084 SelectPredicatedStore(Node, 4, 4, AArch64::ST4Q, AArch64::ST4Q_IMM); 6085 return; 6086 } 6087 case Intrinsic::aarch64_sve_st2: { 6088 if (VT == MVT::nxv16i8) { 6089 SelectPredicatedStore(Node, 2, 0, AArch64::ST2B, AArch64::ST2B_IMM); 6090 return; 6091 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 6092 VT == MVT::nxv8bf16) { 6093 SelectPredicatedStore(Node, 2, 1, AArch64::ST2H, AArch64::ST2H_IMM); 6094 return; 6095 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 6096 SelectPredicatedStore(Node, 2, 2, AArch64::ST2W, AArch64::ST2W_IMM); 6097 return; 6098 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 6099 SelectPredicatedStore(Node, 2, 3, AArch64::ST2D, AArch64::ST2D_IMM); 6100 return; 6101 } 6102 break; 6103 } 6104 case Intrinsic::aarch64_sve_st3: { 6105 if (VT == MVT::nxv16i8) { 6106 SelectPredicatedStore(Node, 3, 0, AArch64::ST3B, AArch64::ST3B_IMM); 6107 return; 6108 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 6109 VT == MVT::nxv8bf16) { 6110 SelectPredicatedStore(Node, 3, 1, AArch64::ST3H, AArch64::ST3H_IMM); 6111 return; 6112 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 6113 SelectPredicatedStore(Node, 3, 2, AArch64::ST3W, AArch64::ST3W_IMM); 6114 return; 6115 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 6116 SelectPredicatedStore(Node, 3, 3, AArch64::ST3D, AArch64::ST3D_IMM); 6117 return; 6118 } 6119 break; 6120 } 6121 case Intrinsic::aarch64_sve_st4: { 6122 if (VT == MVT::nxv16i8) { 6123 SelectPredicatedStore(Node, 4, 0, AArch64::ST4B, AArch64::ST4B_IMM); 6124 return; 6125 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 6126 VT == MVT::nxv8bf16) { 6127 SelectPredicatedStore(Node, 4, 1, AArch64::ST4H, AArch64::ST4H_IMM); 6128 return; 6129 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 6130 SelectPredicatedStore(Node, 4, 2, AArch64::ST4W, AArch64::ST4W_IMM); 6131 return; 6132 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 6133 SelectPredicatedStore(Node, 4, 3, AArch64::ST4D, AArch64::ST4D_IMM); 6134 return; 6135 } 6136 break; 6137 } 6138 } 6139 break; 6140 } 6141 case AArch64ISD::LD2post: { 6142 if (VT == MVT::v8i8) { 6143 SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0); 6144 return; 6145 } else if (VT == MVT::v16i8) { 6146 SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0); 6147 return; 6148 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 6149 SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0); 6150 return; 6151 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 6152 SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0); 6153 return; 6154 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 6155 SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0); 6156 return; 6157 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 6158 SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0); 6159 return; 6160 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 6161 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0); 6162 return; 6163 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 6164 SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0); 6165 return; 6166 } 6167 break; 6168 } 6169 case AArch64ISD::LD3post: { 6170 if (VT == MVT::v8i8) { 6171 SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0); 6172 return; 6173 } else if (VT == MVT::v16i8) { 6174 SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0); 6175 return; 6176 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 6177 SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0); 6178 return; 6179 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 6180 SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0); 6181 return; 6182 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 6183 SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0); 6184 return; 6185 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 6186 SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0); 6187 return; 6188 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 6189 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0); 6190 return; 6191 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 6192 SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0); 6193 return; 6194 } 6195 break; 6196 } 6197 case AArch64ISD::LD4post: { 6198 if (VT == MVT::v8i8) { 6199 SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0); 6200 return; 6201 } else if (VT == MVT::v16i8) { 6202 SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0); 6203 return; 6204 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 6205 SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0); 6206 return; 6207 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 6208 SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0); 6209 return; 6210 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 6211 SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0); 6212 return; 6213 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 6214 SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0); 6215 return; 6216 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 6217 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0); 6218 return; 6219 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 6220 SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0); 6221 return; 6222 } 6223 break; 6224 } 6225 case AArch64ISD::LD1x2post: { 6226 if (VT == MVT::v8i8) { 6227 SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0); 6228 return; 6229 } else if (VT == MVT::v16i8) { 6230 SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0); 6231 return; 6232 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 6233 SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0); 6234 return; 6235 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 6236 SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0); 6237 return; 6238 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 6239 SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0); 6240 return; 6241 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 6242 SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0); 6243 return; 6244 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 6245 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0); 6246 return; 6247 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 6248 SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0); 6249 return; 6250 } 6251 break; 6252 } 6253 case AArch64ISD::LD1x3post: { 6254 if (VT == MVT::v8i8) { 6255 SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0); 6256 return; 6257 } else if (VT == MVT::v16i8) { 6258 SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0); 6259 return; 6260 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 6261 SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0); 6262 return; 6263 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 6264 SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0); 6265 return; 6266 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 6267 SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0); 6268 return; 6269 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 6270 SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0); 6271 return; 6272 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 6273 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0); 6274 return; 6275 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 6276 SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0); 6277 return; 6278 } 6279 break; 6280 } 6281 case AArch64ISD::LD1x4post: { 6282 if (VT == MVT::v8i8) { 6283 SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0); 6284 return; 6285 } else if (VT == MVT::v16i8) { 6286 SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0); 6287 return; 6288 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 6289 SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0); 6290 return; 6291 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 6292 SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0); 6293 return; 6294 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 6295 SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0); 6296 return; 6297 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 6298 SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0); 6299 return; 6300 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 6301 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0); 6302 return; 6303 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 6304 SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0); 6305 return; 6306 } 6307 break; 6308 } 6309 case AArch64ISD::LD1DUPpost: { 6310 if (VT == MVT::v8i8) { 6311 SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0); 6312 return; 6313 } else if (VT == MVT::v16i8) { 6314 SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0); 6315 return; 6316 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 6317 SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0); 6318 return; 6319 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 6320 SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0); 6321 return; 6322 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 6323 SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0); 6324 return; 6325 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 6326 SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0); 6327 return; 6328 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 6329 SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0); 6330 return; 6331 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 6332 SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0); 6333 return; 6334 } 6335 break; 6336 } 6337 case AArch64ISD::LD2DUPpost: { 6338 if (VT == MVT::v8i8) { 6339 SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0); 6340 return; 6341 } else if (VT == MVT::v16i8) { 6342 SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0); 6343 return; 6344 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 6345 SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0); 6346 return; 6347 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 6348 SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0); 6349 return; 6350 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 6351 SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0); 6352 return; 6353 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 6354 SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0); 6355 return; 6356 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 6357 SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0); 6358 return; 6359 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 6360 SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0); 6361 return; 6362 } 6363 break; 6364 } 6365 case AArch64ISD::LD3DUPpost: { 6366 if (VT == MVT::v8i8) { 6367 SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0); 6368 return; 6369 } else if (VT == MVT::v16i8) { 6370 SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0); 6371 return; 6372 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 6373 SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0); 6374 return; 6375 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 6376 SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0); 6377 return; 6378 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 6379 SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0); 6380 return; 6381 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 6382 SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0); 6383 return; 6384 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 6385 SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0); 6386 return; 6387 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 6388 SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0); 6389 return; 6390 } 6391 break; 6392 } 6393 case AArch64ISD::LD4DUPpost: { 6394 if (VT == MVT::v8i8) { 6395 SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0); 6396 return; 6397 } else if (VT == MVT::v16i8) { 6398 SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0); 6399 return; 6400 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 6401 SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0); 6402 return; 6403 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 6404 SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0); 6405 return; 6406 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 6407 SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0); 6408 return; 6409 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 6410 SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0); 6411 return; 6412 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 6413 SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0); 6414 return; 6415 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 6416 SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0); 6417 return; 6418 } 6419 break; 6420 } 6421 case AArch64ISD::LD1LANEpost: { 6422 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 6423 SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST); 6424 return; 6425 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 6426 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 6427 SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST); 6428 return; 6429 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 6430 VT == MVT::v2f32) { 6431 SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST); 6432 return; 6433 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 6434 VT == MVT::v1f64) { 6435 SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST); 6436 return; 6437 } 6438 break; 6439 } 6440 case AArch64ISD::LD2LANEpost: { 6441 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 6442 SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST); 6443 return; 6444 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 6445 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 6446 SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST); 6447 return; 6448 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 6449 VT == MVT::v2f32) { 6450 SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST); 6451 return; 6452 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 6453 VT == MVT::v1f64) { 6454 SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST); 6455 return; 6456 } 6457 break; 6458 } 6459 case AArch64ISD::LD3LANEpost: { 6460 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 6461 SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST); 6462 return; 6463 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 6464 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 6465 SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST); 6466 return; 6467 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 6468 VT == MVT::v2f32) { 6469 SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST); 6470 return; 6471 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 6472 VT == MVT::v1f64) { 6473 SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST); 6474 return; 6475 } 6476 break; 6477 } 6478 case AArch64ISD::LD4LANEpost: { 6479 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 6480 SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST); 6481 return; 6482 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 6483 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 6484 SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST); 6485 return; 6486 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 6487 VT == MVT::v2f32) { 6488 SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST); 6489 return; 6490 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 6491 VT == MVT::v1f64) { 6492 SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST); 6493 return; 6494 } 6495 break; 6496 } 6497 case AArch64ISD::ST2post: { 6498 VT = Node->getOperand(1).getValueType(); 6499 if (VT == MVT::v8i8) { 6500 SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST); 6501 return; 6502 } else if (VT == MVT::v16i8) { 6503 SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST); 6504 return; 6505 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 6506 SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST); 6507 return; 6508 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 6509 SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST); 6510 return; 6511 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 6512 SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST); 6513 return; 6514 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 6515 SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST); 6516 return; 6517 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 6518 SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST); 6519 return; 6520 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 6521 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST); 6522 return; 6523 } 6524 break; 6525 } 6526 case AArch64ISD::ST3post: { 6527 VT = Node->getOperand(1).getValueType(); 6528 if (VT == MVT::v8i8) { 6529 SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST); 6530 return; 6531 } else if (VT == MVT::v16i8) { 6532 SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST); 6533 return; 6534 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 6535 SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST); 6536 return; 6537 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 6538 SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST); 6539 return; 6540 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 6541 SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST); 6542 return; 6543 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 6544 SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST); 6545 return; 6546 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 6547 SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST); 6548 return; 6549 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 6550 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST); 6551 return; 6552 } 6553 break; 6554 } 6555 case AArch64ISD::ST4post: { 6556 VT = Node->getOperand(1).getValueType(); 6557 if (VT == MVT::v8i8) { 6558 SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST); 6559 return; 6560 } else if (VT == MVT::v16i8) { 6561 SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST); 6562 return; 6563 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 6564 SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST); 6565 return; 6566 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 6567 SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST); 6568 return; 6569 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 6570 SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST); 6571 return; 6572 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 6573 SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST); 6574 return; 6575 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 6576 SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST); 6577 return; 6578 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 6579 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST); 6580 return; 6581 } 6582 break; 6583 } 6584 case AArch64ISD::ST1x2post: { 6585 VT = Node->getOperand(1).getValueType(); 6586 if (VT == MVT::v8i8) { 6587 SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST); 6588 return; 6589 } else if (VT == MVT::v16i8) { 6590 SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST); 6591 return; 6592 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 6593 SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST); 6594 return; 6595 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 6596 SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST); 6597 return; 6598 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 6599 SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST); 6600 return; 6601 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 6602 SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST); 6603 return; 6604 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 6605 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST); 6606 return; 6607 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 6608 SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST); 6609 return; 6610 } 6611 break; 6612 } 6613 case AArch64ISD::ST1x3post: { 6614 VT = Node->getOperand(1).getValueType(); 6615 if (VT == MVT::v8i8) { 6616 SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST); 6617 return; 6618 } else if (VT == MVT::v16i8) { 6619 SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST); 6620 return; 6621 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 6622 SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST); 6623 return; 6624 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16 ) { 6625 SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST); 6626 return; 6627 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 6628 SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST); 6629 return; 6630 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 6631 SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST); 6632 return; 6633 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 6634 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST); 6635 return; 6636 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 6637 SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST); 6638 return; 6639 } 6640 break; 6641 } 6642 case AArch64ISD::ST1x4post: { 6643 VT = Node->getOperand(1).getValueType(); 6644 if (VT == MVT::v8i8) { 6645 SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST); 6646 return; 6647 } else if (VT == MVT::v16i8) { 6648 SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST); 6649 return; 6650 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 6651 SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST); 6652 return; 6653 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 6654 SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST); 6655 return; 6656 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 6657 SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST); 6658 return; 6659 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 6660 SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST); 6661 return; 6662 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 6663 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST); 6664 return; 6665 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 6666 SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST); 6667 return; 6668 } 6669 break; 6670 } 6671 case AArch64ISD::ST2LANEpost: { 6672 VT = Node->getOperand(1).getValueType(); 6673 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 6674 SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST); 6675 return; 6676 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 6677 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 6678 SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST); 6679 return; 6680 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 6681 VT == MVT::v2f32) { 6682 SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST); 6683 return; 6684 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 6685 VT == MVT::v1f64) { 6686 SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST); 6687 return; 6688 } 6689 break; 6690 } 6691 case AArch64ISD::ST3LANEpost: { 6692 VT = Node->getOperand(1).getValueType(); 6693 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 6694 SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST); 6695 return; 6696 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 6697 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 6698 SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST); 6699 return; 6700 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 6701 VT == MVT::v2f32) { 6702 SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST); 6703 return; 6704 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 6705 VT == MVT::v1f64) { 6706 SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST); 6707 return; 6708 } 6709 break; 6710 } 6711 case AArch64ISD::ST4LANEpost: { 6712 VT = Node->getOperand(1).getValueType(); 6713 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 6714 SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST); 6715 return; 6716 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 6717 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 6718 SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST); 6719 return; 6720 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 6721 VT == MVT::v2f32) { 6722 SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST); 6723 return; 6724 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 6725 VT == MVT::v1f64) { 6726 SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST); 6727 return; 6728 } 6729 break; 6730 } 6731 case AArch64ISD::SVE_LD2_MERGE_ZERO: { 6732 if (VT == MVT::nxv16i8) { 6733 SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B); 6734 return; 6735 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 6736 VT == MVT::nxv8bf16) { 6737 SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H); 6738 return; 6739 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 6740 SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W); 6741 return; 6742 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 6743 SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D); 6744 return; 6745 } 6746 break; 6747 } 6748 case AArch64ISD::SVE_LD3_MERGE_ZERO: { 6749 if (VT == MVT::nxv16i8) { 6750 SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B); 6751 return; 6752 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 6753 VT == MVT::nxv8bf16) { 6754 SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H); 6755 return; 6756 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 6757 SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W); 6758 return; 6759 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 6760 SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D); 6761 return; 6762 } 6763 break; 6764 } 6765 case AArch64ISD::SVE_LD4_MERGE_ZERO: { 6766 if (VT == MVT::nxv16i8) { 6767 SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B); 6768 return; 6769 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 6770 VT == MVT::nxv8bf16) { 6771 SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H); 6772 return; 6773 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 6774 SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W); 6775 return; 6776 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 6777 SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D); 6778 return; 6779 } 6780 break; 6781 } 6782 } 6783 6784 // Select the default instruction 6785 SelectCode(Node); 6786 } 6787 6788 /// createAArch64ISelDag - This pass converts a legalized DAG into a 6789 /// AArch64-specific DAG, ready for instruction scheduling. 6790 FunctionPass *llvm::createAArch64ISelDag(AArch64TargetMachine &TM, 6791 CodeGenOptLevel OptLevel) { 6792 return new AArch64DAGToDAGISel(TM, OptLevel); 6793 } 6794 6795 /// When \p PredVT is a scalable vector predicate in the form 6796 /// MVT::nx<M>xi1, it builds the correspondent scalable vector of 6797 /// integers MVT::nx<M>xi<bits> s.t. M x bits = 128. When targeting 6798 /// structured vectors (NumVec >1), the output data type is 6799 /// MVT::nx<M*NumVec>xi<bits> s.t. M x bits = 128. If the input 6800 /// PredVT is not in the form MVT::nx<M>xi1, it returns an invalid 6801 /// EVT. 6802 static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT, 6803 unsigned NumVec) { 6804 assert(NumVec > 0 && NumVec < 5 && "Invalid number of vectors."); 6805 if (!PredVT.isScalableVector() || PredVT.getVectorElementType() != MVT::i1) 6806 return EVT(); 6807 6808 if (PredVT != MVT::nxv16i1 && PredVT != MVT::nxv8i1 && 6809 PredVT != MVT::nxv4i1 && PredVT != MVT::nxv2i1) 6810 return EVT(); 6811 6812 ElementCount EC = PredVT.getVectorElementCount(); 6813 EVT ScalarVT = 6814 EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.getKnownMinValue()); 6815 EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, EC * NumVec); 6816 6817 return MemVT; 6818 } 6819 6820 /// Return the EVT of the data associated to a memory operation in \p 6821 /// Root. If such EVT cannot be retrived, it returns an invalid EVT. 6822 static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root) { 6823 if (isa<MemSDNode>(Root)) 6824 return cast<MemSDNode>(Root)->getMemoryVT(); 6825 6826 if (isa<MemIntrinsicSDNode>(Root)) 6827 return cast<MemIntrinsicSDNode>(Root)->getMemoryVT(); 6828 6829 const unsigned Opcode = Root->getOpcode(); 6830 // For custom ISD nodes, we have to look at them individually to extract the 6831 // type of the data moved to/from memory. 6832 switch (Opcode) { 6833 case AArch64ISD::LD1_MERGE_ZERO: 6834 case AArch64ISD::LD1S_MERGE_ZERO: 6835 case AArch64ISD::LDNF1_MERGE_ZERO: 6836 case AArch64ISD::LDNF1S_MERGE_ZERO: 6837 return cast<VTSDNode>(Root->getOperand(3))->getVT(); 6838 case AArch64ISD::ST1_PRED: 6839 return cast<VTSDNode>(Root->getOperand(4))->getVT(); 6840 case AArch64ISD::SVE_LD2_MERGE_ZERO: 6841 return getPackedVectorTypeFromPredicateType( 6842 Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/2); 6843 case AArch64ISD::SVE_LD3_MERGE_ZERO: 6844 return getPackedVectorTypeFromPredicateType( 6845 Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/3); 6846 case AArch64ISD::SVE_LD4_MERGE_ZERO: 6847 return getPackedVectorTypeFromPredicateType( 6848 Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/4); 6849 default: 6850 break; 6851 } 6852 6853 if (Opcode != ISD::INTRINSIC_VOID && Opcode != ISD::INTRINSIC_W_CHAIN) 6854 return EVT(); 6855 6856 switch (Root->getConstantOperandVal(1)) { 6857 default: 6858 return EVT(); 6859 case Intrinsic::aarch64_sme_ldr: 6860 case Intrinsic::aarch64_sme_str: 6861 return MVT::nxv16i8; 6862 case Intrinsic::aarch64_sve_prf: 6863 // We are using an SVE prefetch intrinsic. Type must be inferred from the 6864 // width of the predicate. 6865 return getPackedVectorTypeFromPredicateType( 6866 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/1); 6867 case Intrinsic::aarch64_sve_ld2_sret: 6868 case Intrinsic::aarch64_sve_ld2q_sret: 6869 return getPackedVectorTypeFromPredicateType( 6870 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/2); 6871 case Intrinsic::aarch64_sve_st2q: 6872 return getPackedVectorTypeFromPredicateType( 6873 Ctx, Root->getOperand(4)->getValueType(0), /*NumVec=*/2); 6874 case Intrinsic::aarch64_sve_ld3_sret: 6875 case Intrinsic::aarch64_sve_ld3q_sret: 6876 return getPackedVectorTypeFromPredicateType( 6877 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/3); 6878 case Intrinsic::aarch64_sve_st3q: 6879 return getPackedVectorTypeFromPredicateType( 6880 Ctx, Root->getOperand(5)->getValueType(0), /*NumVec=*/3); 6881 case Intrinsic::aarch64_sve_ld4_sret: 6882 case Intrinsic::aarch64_sve_ld4q_sret: 6883 return getPackedVectorTypeFromPredicateType( 6884 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/4); 6885 case Intrinsic::aarch64_sve_st4q: 6886 return getPackedVectorTypeFromPredicateType( 6887 Ctx, Root->getOperand(6)->getValueType(0), /*NumVec=*/4); 6888 case Intrinsic::aarch64_sve_ld1udq: 6889 case Intrinsic::aarch64_sve_st1dq: 6890 return EVT(MVT::nxv1i64); 6891 case Intrinsic::aarch64_sve_ld1uwq: 6892 case Intrinsic::aarch64_sve_st1wq: 6893 return EVT(MVT::nxv1i32); 6894 } 6895 } 6896 6897 /// SelectAddrModeIndexedSVE - Attempt selection of the addressing mode: 6898 /// Base + OffImm * sizeof(MemVT) for Min >= OffImm <= Max 6899 /// where Root is the memory access using N for its address. 6900 template <int64_t Min, int64_t Max> 6901 bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, 6902 SDValue &Base, 6903 SDValue &OffImm) { 6904 const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root); 6905 const DataLayout &DL = CurDAG->getDataLayout(); 6906 const MachineFrameInfo &MFI = MF->getFrameInfo(); 6907 6908 if (N.getOpcode() == ISD::FrameIndex) { 6909 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 6910 // We can only encode VL scaled offsets, so only fold in frame indexes 6911 // referencing SVE objects. 6912 if (MFI.getStackID(FI) == TargetStackID::ScalableVector) { 6913 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 6914 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64); 6915 return true; 6916 } 6917 6918 return false; 6919 } 6920 6921 if (MemVT == EVT()) 6922 return false; 6923 6924 if (N.getOpcode() != ISD::ADD) 6925 return false; 6926 6927 SDValue VScale = N.getOperand(1); 6928 if (VScale.getOpcode() != ISD::VSCALE) 6929 return false; 6930 6931 TypeSize TS = MemVT.getSizeInBits(); 6932 int64_t MemWidthBytes = static_cast<int64_t>(TS.getKnownMinValue()) / 8; 6933 int64_t MulImm = cast<ConstantSDNode>(VScale.getOperand(0))->getSExtValue(); 6934 6935 if ((MulImm % MemWidthBytes) != 0) 6936 return false; 6937 6938 int64_t Offset = MulImm / MemWidthBytes; 6939 if (Offset < Min || Offset > Max) 6940 return false; 6941 6942 Base = N.getOperand(0); 6943 if (Base.getOpcode() == ISD::FrameIndex) { 6944 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 6945 // We can only encode VL scaled offsets, so only fold in frame indexes 6946 // referencing SVE objects. 6947 if (MFI.getStackID(FI) == TargetStackID::ScalableVector) 6948 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 6949 } 6950 6951 OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64); 6952 return true; 6953 } 6954 6955 /// Select register plus register addressing mode for SVE, with scaled 6956 /// offset. 6957 bool AArch64DAGToDAGISel::SelectSVERegRegAddrMode(SDValue N, unsigned Scale, 6958 SDValue &Base, 6959 SDValue &Offset) { 6960 if (N.getOpcode() != ISD::ADD) 6961 return false; 6962 6963 // Process an ADD node. 6964 const SDValue LHS = N.getOperand(0); 6965 const SDValue RHS = N.getOperand(1); 6966 6967 // 8 bit data does not come with the SHL node, so it is treated 6968 // separately. 6969 if (Scale == 0) { 6970 Base = LHS; 6971 Offset = RHS; 6972 return true; 6973 } 6974 6975 if (auto C = dyn_cast<ConstantSDNode>(RHS)) { 6976 int64_t ImmOff = C->getSExtValue(); 6977 unsigned Size = 1 << Scale; 6978 6979 // To use the reg+reg addressing mode, the immediate must be a multiple of 6980 // the vector element's byte size. 6981 if (ImmOff % Size) 6982 return false; 6983 6984 SDLoc DL(N); 6985 Base = LHS; 6986 Offset = CurDAG->getTargetConstant(ImmOff >> Scale, DL, MVT::i64); 6987 SDValue Ops[] = {Offset}; 6988 SDNode *MI = CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops); 6989 Offset = SDValue(MI, 0); 6990 return true; 6991 } 6992 6993 // Check if the RHS is a shift node with a constant. 6994 if (RHS.getOpcode() != ISD::SHL) 6995 return false; 6996 6997 const SDValue ShiftRHS = RHS.getOperand(1); 6998 if (auto *C = dyn_cast<ConstantSDNode>(ShiftRHS)) 6999 if (C->getZExtValue() == Scale) { 7000 Base = LHS; 7001 Offset = RHS.getOperand(0); 7002 return true; 7003 } 7004 7005 return false; 7006 } 7007 7008 bool AArch64DAGToDAGISel::SelectAllActivePredicate(SDValue N) { 7009 const AArch64TargetLowering *TLI = 7010 static_cast<const AArch64TargetLowering *>(getTargetLowering()); 7011 7012 return TLI->isAllActivePredicate(*CurDAG, N); 7013 } 7014 7015 bool AArch64DAGToDAGISel::SelectAnyPredicate(SDValue N) { 7016 EVT VT = N.getValueType(); 7017 return VT.isScalableVector() && VT.getVectorElementType() == MVT::i1; 7018 } 7019 7020 bool AArch64DAGToDAGISel::SelectSMETileSlice(SDValue N, unsigned MaxSize, 7021 SDValue &Base, SDValue &Offset, 7022 unsigned Scale) { 7023 // Try to untangle an ADD node into a 'reg + offset' 7024 if (N.getOpcode() == ISD::ADD) 7025 if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 7026 int64_t ImmOff = C->getSExtValue(); 7027 if ((ImmOff > 0 && ImmOff <= MaxSize && (ImmOff % Scale == 0))) { 7028 Base = N.getOperand(0); 7029 Offset = CurDAG->getTargetConstant(ImmOff / Scale, SDLoc(N), MVT::i64); 7030 return true; 7031 } 7032 } 7033 7034 // By default, just match reg + 0. 7035 Base = N; 7036 Offset = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64); 7037 return true; 7038 } 7039