1 //===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines an instruction selector for the AArch64 target. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AArch64MachineFunctionInfo.h" 14 #include "AArch64TargetMachine.h" 15 #include "MCTargetDesc/AArch64AddressingModes.h" 16 #include "llvm/ADT/APSInt.h" 17 #include "llvm/CodeGen/ISDOpcodes.h" 18 #include "llvm/CodeGen/SelectionDAGISel.h" 19 #include "llvm/IR/Function.h" // To access function attributes. 20 #include "llvm/IR/GlobalValue.h" 21 #include "llvm/IR/Intrinsics.h" 22 #include "llvm/IR/IntrinsicsAArch64.h" 23 #include "llvm/Support/Debug.h" 24 #include "llvm/Support/ErrorHandling.h" 25 #include "llvm/Support/KnownBits.h" 26 #include "llvm/Support/MathExtras.h" 27 #include "llvm/Support/raw_ostream.h" 28 29 using namespace llvm; 30 31 #define DEBUG_TYPE "aarch64-isel" 32 #define PASS_NAME "AArch64 Instruction Selection" 33 34 //===--------------------------------------------------------------------===// 35 /// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine 36 /// instructions for SelectionDAG operations. 37 /// 38 namespace { 39 40 class AArch64DAGToDAGISel : public SelectionDAGISel { 41 42 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can 43 /// make the right decision when generating code for different targets. 44 const AArch64Subtarget *Subtarget; 45 46 public: 47 AArch64DAGToDAGISel() = delete; 48 49 explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm, 50 CodeGenOptLevel OptLevel) 51 : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr) {} 52 53 bool runOnMachineFunction(MachineFunction &MF) override { 54 Subtarget = &MF.getSubtarget<AArch64Subtarget>(); 55 return SelectionDAGISel::runOnMachineFunction(MF); 56 } 57 58 void Select(SDNode *Node) override; 59 60 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 61 /// inline asm expressions. 62 bool SelectInlineAsmMemoryOperand(const SDValue &Op, 63 InlineAsm::ConstraintCode ConstraintID, 64 std::vector<SDValue> &OutOps) override; 65 66 template <signed Low, signed High, signed Scale> 67 bool SelectRDVLImm(SDValue N, SDValue &Imm); 68 69 bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift); 70 bool SelectArithUXTXRegister(SDValue N, SDValue &Reg, SDValue &Shift); 71 bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift); 72 bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift); 73 bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) { 74 return SelectShiftedRegister(N, false, Reg, Shift); 75 } 76 bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) { 77 return SelectShiftedRegister(N, true, Reg, Shift); 78 } 79 bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) { 80 return SelectAddrModeIndexed7S(N, 1, Base, OffImm); 81 } 82 bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) { 83 return SelectAddrModeIndexed7S(N, 2, Base, OffImm); 84 } 85 bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) { 86 return SelectAddrModeIndexed7S(N, 4, Base, OffImm); 87 } 88 bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) { 89 return SelectAddrModeIndexed7S(N, 8, Base, OffImm); 90 } 91 bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) { 92 return SelectAddrModeIndexed7S(N, 16, Base, OffImm); 93 } 94 bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) { 95 return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm); 96 } 97 bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) { 98 return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm); 99 } 100 bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) { 101 return SelectAddrModeIndexed(N, 1, Base, OffImm); 102 } 103 bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) { 104 return SelectAddrModeIndexed(N, 2, Base, OffImm); 105 } 106 bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) { 107 return SelectAddrModeIndexed(N, 4, Base, OffImm); 108 } 109 bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) { 110 return SelectAddrModeIndexed(N, 8, Base, OffImm); 111 } 112 bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) { 113 return SelectAddrModeIndexed(N, 16, Base, OffImm); 114 } 115 bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) { 116 return SelectAddrModeUnscaled(N, 1, Base, OffImm); 117 } 118 bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) { 119 return SelectAddrModeUnscaled(N, 2, Base, OffImm); 120 } 121 bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) { 122 return SelectAddrModeUnscaled(N, 4, Base, OffImm); 123 } 124 bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) { 125 return SelectAddrModeUnscaled(N, 8, Base, OffImm); 126 } 127 bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) { 128 return SelectAddrModeUnscaled(N, 16, Base, OffImm); 129 } 130 template <unsigned Size, unsigned Max> 131 bool SelectAddrModeIndexedUImm(SDValue N, SDValue &Base, SDValue &OffImm) { 132 // Test if there is an appropriate addressing mode and check if the 133 // immediate fits. 134 bool Found = SelectAddrModeIndexed(N, Size, Base, OffImm); 135 if (Found) { 136 if (auto *CI = dyn_cast<ConstantSDNode>(OffImm)) { 137 int64_t C = CI->getSExtValue(); 138 if (C <= Max) 139 return true; 140 } 141 } 142 143 // Otherwise, base only, materialize address in register. 144 Base = N; 145 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64); 146 return true; 147 } 148 149 template<int Width> 150 bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset, 151 SDValue &SignExtend, SDValue &DoShift) { 152 return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift); 153 } 154 155 template<int Width> 156 bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset, 157 SDValue &SignExtend, SDValue &DoShift) { 158 return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift); 159 } 160 161 bool SelectExtractHigh(SDValue N, SDValue &Res) { 162 if (Subtarget->isLittleEndian() && N->getOpcode() == ISD::BITCAST) 163 N = N->getOperand(0); 164 if (N->getOpcode() != ISD::EXTRACT_SUBVECTOR || 165 !isa<ConstantSDNode>(N->getOperand(1))) 166 return false; 167 EVT VT = N->getValueType(0); 168 EVT LVT = N->getOperand(0).getValueType(); 169 unsigned Index = N->getConstantOperandVal(1); 170 if (!VT.is64BitVector() || !LVT.is128BitVector() || 171 Index != VT.getVectorNumElements()) 172 return false; 173 Res = N->getOperand(0); 174 return true; 175 } 176 177 bool SelectRoundingVLShr(SDValue N, SDValue &Res1, SDValue &Res2) { 178 if (N.getOpcode() != AArch64ISD::VLSHR) 179 return false; 180 SDValue Op = N->getOperand(0); 181 EVT VT = Op.getValueType(); 182 unsigned ShtAmt = N->getConstantOperandVal(1); 183 if (ShtAmt > VT.getScalarSizeInBits() / 2 || Op.getOpcode() != ISD::ADD) 184 return false; 185 186 APInt Imm; 187 if (Op.getOperand(1).getOpcode() == AArch64ISD::MOVIshift) 188 Imm = APInt(VT.getScalarSizeInBits(), 189 Op.getOperand(1).getConstantOperandVal(0) 190 << Op.getOperand(1).getConstantOperandVal(1)); 191 else if (Op.getOperand(1).getOpcode() == AArch64ISD::DUP && 192 isa<ConstantSDNode>(Op.getOperand(1).getOperand(0))) 193 Imm = APInt(VT.getScalarSizeInBits(), 194 Op.getOperand(1).getConstantOperandVal(0)); 195 else 196 return false; 197 198 if (Imm != 1ULL << (ShtAmt - 1)) 199 return false; 200 201 Res1 = Op.getOperand(0); 202 Res2 = CurDAG->getTargetConstant(ShtAmt, SDLoc(N), MVT::i32); 203 return true; 204 } 205 206 bool SelectDupZeroOrUndef(SDValue N) { 207 switch(N->getOpcode()) { 208 case ISD::UNDEF: 209 return true; 210 case AArch64ISD::DUP: 211 case ISD::SPLAT_VECTOR: { 212 auto Opnd0 = N->getOperand(0); 213 if (isNullConstant(Opnd0)) 214 return true; 215 if (isNullFPConstant(Opnd0)) 216 return true; 217 break; 218 } 219 default: 220 break; 221 } 222 223 return false; 224 } 225 226 bool SelectDupZero(SDValue N) { 227 switch(N->getOpcode()) { 228 case AArch64ISD::DUP: 229 case ISD::SPLAT_VECTOR: { 230 auto Opnd0 = N->getOperand(0); 231 if (isNullConstant(Opnd0)) 232 return true; 233 if (isNullFPConstant(Opnd0)) 234 return true; 235 break; 236 } 237 } 238 239 return false; 240 } 241 242 bool SelectDupNegativeZero(SDValue N) { 243 switch(N->getOpcode()) { 244 case AArch64ISD::DUP: 245 case ISD::SPLAT_VECTOR: { 246 ConstantFPSDNode *Const = dyn_cast<ConstantFPSDNode>(N->getOperand(0)); 247 return Const && Const->isZero() && Const->isNegative(); 248 } 249 } 250 251 return false; 252 } 253 254 template<MVT::SimpleValueType VT> 255 bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) { 256 return SelectSVEAddSubImm(N, VT, Imm, Shift); 257 } 258 259 template <MVT::SimpleValueType VT, bool Negate> 260 bool SelectSVEAddSubSSatImm(SDValue N, SDValue &Imm, SDValue &Shift) { 261 return SelectSVEAddSubSSatImm(N, VT, Imm, Shift, Negate); 262 } 263 264 template <MVT::SimpleValueType VT> 265 bool SelectSVECpyDupImm(SDValue N, SDValue &Imm, SDValue &Shift) { 266 return SelectSVECpyDupImm(N, VT, Imm, Shift); 267 } 268 269 template <MVT::SimpleValueType VT, bool Invert = false> 270 bool SelectSVELogicalImm(SDValue N, SDValue &Imm) { 271 return SelectSVELogicalImm(N, VT, Imm, Invert); 272 } 273 274 template <MVT::SimpleValueType VT> 275 bool SelectSVEArithImm(SDValue N, SDValue &Imm) { 276 return SelectSVEArithImm(N, VT, Imm); 277 } 278 279 template <unsigned Low, unsigned High, bool AllowSaturation = false> 280 bool SelectSVEShiftImm(SDValue N, SDValue &Imm) { 281 return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm); 282 } 283 284 bool SelectSVEShiftSplatImmR(SDValue N, SDValue &Imm) { 285 if (N->getOpcode() != ISD::SPLAT_VECTOR) 286 return false; 287 288 EVT EltVT = N->getValueType(0).getVectorElementType(); 289 return SelectSVEShiftImm(N->getOperand(0), /* Low */ 1, 290 /* High */ EltVT.getFixedSizeInBits(), 291 /* AllowSaturation */ true, Imm); 292 } 293 294 // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N. 295 template<signed Min, signed Max, signed Scale, bool Shift> 296 bool SelectCntImm(SDValue N, SDValue &Imm) { 297 if (!isa<ConstantSDNode>(N)) 298 return false; 299 300 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue(); 301 if (Shift) 302 MulImm = 1LL << MulImm; 303 304 if ((MulImm % std::abs(Scale)) != 0) 305 return false; 306 307 MulImm /= Scale; 308 if ((MulImm >= Min) && (MulImm <= Max)) { 309 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32); 310 return true; 311 } 312 313 return false; 314 } 315 316 template <signed Max, signed Scale> 317 bool SelectEXTImm(SDValue N, SDValue &Imm) { 318 if (!isa<ConstantSDNode>(N)) 319 return false; 320 321 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue(); 322 323 if (MulImm >= 0 && MulImm <= Max) { 324 MulImm *= Scale; 325 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32); 326 return true; 327 } 328 329 return false; 330 } 331 332 template <unsigned BaseReg, unsigned Max> 333 bool ImmToReg(SDValue N, SDValue &Imm) { 334 if (auto *CI = dyn_cast<ConstantSDNode>(N)) { 335 uint64_t C = CI->getZExtValue(); 336 337 if (C > Max) 338 return false; 339 340 Imm = CurDAG->getRegister(BaseReg + C, MVT::Other); 341 return true; 342 } 343 return false; 344 } 345 346 /// Form sequences of consecutive 64/128-bit registers for use in NEON 347 /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have 348 /// between 1 and 4 elements. If it contains a single element that is returned 349 /// unchanged; otherwise a REG_SEQUENCE value is returned. 350 SDValue createDTuple(ArrayRef<SDValue> Vecs); 351 SDValue createQTuple(ArrayRef<SDValue> Vecs); 352 // Form a sequence of SVE registers for instructions using list of vectors, 353 // e.g. structured loads and stores (ldN, stN). 354 SDValue createZTuple(ArrayRef<SDValue> Vecs); 355 356 // Similar to above, except the register must start at a multiple of the 357 // tuple, e.g. z2 for a 2-tuple, or z8 for a 4-tuple. 358 SDValue createZMulTuple(ArrayRef<SDValue> Regs); 359 360 /// Generic helper for the createDTuple/createQTuple 361 /// functions. Those should almost always be called instead. 362 SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[], 363 const unsigned SubRegs[]); 364 365 void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt); 366 367 bool tryIndexedLoad(SDNode *N); 368 369 void SelectPtrauthAuth(SDNode *N); 370 void SelectPtrauthResign(SDNode *N); 371 372 bool trySelectStackSlotTagP(SDNode *N); 373 void SelectTagP(SDNode *N); 374 375 void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc, 376 unsigned SubRegIdx); 377 void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc, 378 unsigned SubRegIdx); 379 void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc); 380 void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc); 381 void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale, 382 unsigned Opc_rr, unsigned Opc_ri, 383 bool IsIntr = false); 384 void SelectContiguousMultiVectorLoad(SDNode *N, unsigned NumVecs, 385 unsigned Scale, unsigned Opc_ri, 386 unsigned Opc_rr); 387 void SelectDestructiveMultiIntrinsic(SDNode *N, unsigned NumVecs, 388 bool IsZmMulti, unsigned Opcode, 389 bool HasPred = false); 390 void SelectPExtPair(SDNode *N, unsigned Opc); 391 void SelectWhilePair(SDNode *N, unsigned Opc); 392 void SelectCVTIntrinsic(SDNode *N, unsigned NumVecs, unsigned Opcode); 393 void SelectClamp(SDNode *N, unsigned NumVecs, unsigned Opcode); 394 void SelectUnaryMultiIntrinsic(SDNode *N, unsigned NumOutVecs, 395 bool IsTupleInput, unsigned Opc); 396 void SelectFrintFromVT(SDNode *N, unsigned NumVecs, unsigned Opcode); 397 398 template <unsigned MaxIdx, unsigned Scale> 399 void SelectMultiVectorMove(SDNode *N, unsigned NumVecs, unsigned BaseReg, 400 unsigned Op); 401 void SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs, 402 unsigned Op, unsigned MaxIdx, unsigned Scale, 403 unsigned BaseReg = 0); 404 bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm); 405 /// SVE Reg+Imm addressing mode. 406 template <int64_t Min, int64_t Max> 407 bool SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base, 408 SDValue &OffImm); 409 /// SVE Reg+Reg address mode. 410 template <unsigned Scale> 411 bool SelectSVERegRegAddrMode(SDValue N, SDValue &Base, SDValue &Offset) { 412 return SelectSVERegRegAddrMode(N, Scale, Base, Offset); 413 } 414 415 void SelectMultiVectorLuti(SDNode *Node, unsigned NumOutVecs, unsigned Opc, 416 uint32_t MaxImm); 417 418 template <unsigned MaxIdx, unsigned Scale> 419 bool SelectSMETileSlice(SDValue N, SDValue &Vector, SDValue &Offset) { 420 return SelectSMETileSlice(N, MaxIdx, Vector, Offset, Scale); 421 } 422 423 void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc); 424 void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc); 425 void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc); 426 void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc); 427 void SelectPredicatedStore(SDNode *N, unsigned NumVecs, unsigned Scale, 428 unsigned Opc_rr, unsigned Opc_ri); 429 std::tuple<unsigned, SDValue, SDValue> 430 findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri, 431 const SDValue &OldBase, const SDValue &OldOffset, 432 unsigned Scale); 433 434 bool tryBitfieldExtractOp(SDNode *N); 435 bool tryBitfieldExtractOpFromSExt(SDNode *N); 436 bool tryBitfieldInsertOp(SDNode *N); 437 bool tryBitfieldInsertInZeroOp(SDNode *N); 438 bool tryShiftAmountMod(SDNode *N); 439 440 bool tryReadRegister(SDNode *N); 441 bool tryWriteRegister(SDNode *N); 442 443 bool trySelectCastFixedLengthToScalableVector(SDNode *N); 444 bool trySelectCastScalableToFixedLengthVector(SDNode *N); 445 446 bool trySelectXAR(SDNode *N); 447 448 // Include the pieces autogenerated from the target description. 449 #include "AArch64GenDAGISel.inc" 450 451 private: 452 bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg, 453 SDValue &Shift); 454 bool SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg, SDValue &Shift); 455 bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base, 456 SDValue &OffImm) { 457 return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm); 458 } 459 bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW, 460 unsigned Size, SDValue &Base, 461 SDValue &OffImm); 462 bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base, 463 SDValue &OffImm); 464 bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base, 465 SDValue &OffImm); 466 bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base, 467 SDValue &Offset, SDValue &SignExtend, 468 SDValue &DoShift); 469 bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base, 470 SDValue &Offset, SDValue &SignExtend, 471 SDValue &DoShift); 472 bool isWorthFoldingALU(SDValue V, bool LSL = false) const; 473 bool isWorthFoldingAddr(SDValue V, unsigned Size) const; 474 bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend, 475 SDValue &Offset, SDValue &SignExtend); 476 477 template<unsigned RegWidth> 478 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) { 479 return SelectCVTFixedPosOperand(N, FixedPos, RegWidth); 480 } 481 482 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width); 483 484 template<unsigned RegWidth> 485 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos) { 486 return SelectCVTFixedPosRecipOperand(N, FixedPos, RegWidth); 487 } 488 489 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos, 490 unsigned Width); 491 492 bool SelectCMP_SWAP(SDNode *N); 493 494 bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift); 495 bool SelectSVEAddSubSSatImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift, 496 bool Negate); 497 bool SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift); 498 bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert); 499 500 bool SelectSVESignedArithImm(SDValue N, SDValue &Imm); 501 bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High, 502 bool AllowSaturation, SDValue &Imm); 503 504 bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm); 505 bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base, 506 SDValue &Offset); 507 bool SelectSMETileSlice(SDValue N, unsigned MaxSize, SDValue &Vector, 508 SDValue &Offset, unsigned Scale = 1); 509 510 bool SelectAllActivePredicate(SDValue N); 511 bool SelectAnyPredicate(SDValue N); 512 }; 513 514 class AArch64DAGToDAGISelLegacy : public SelectionDAGISelLegacy { 515 public: 516 static char ID; 517 explicit AArch64DAGToDAGISelLegacy(AArch64TargetMachine &tm, 518 CodeGenOptLevel OptLevel) 519 : SelectionDAGISelLegacy( 520 ID, std::make_unique<AArch64DAGToDAGISel>(tm, OptLevel)) {} 521 }; 522 } // end anonymous namespace 523 524 char AArch64DAGToDAGISelLegacy::ID = 0; 525 526 INITIALIZE_PASS(AArch64DAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false) 527 528 /// isIntImmediate - This method tests to see if the node is a constant 529 /// operand. If so Imm will receive the 32-bit value. 530 static bool isIntImmediate(const SDNode *N, uint64_t &Imm) { 531 if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) { 532 Imm = C->getZExtValue(); 533 return true; 534 } 535 return false; 536 } 537 538 // isIntImmediate - This method tests to see if a constant operand. 539 // If so Imm will receive the value. 540 static bool isIntImmediate(SDValue N, uint64_t &Imm) { 541 return isIntImmediate(N.getNode(), Imm); 542 } 543 544 // isOpcWithIntImmediate - This method tests to see if the node is a specific 545 // opcode and that it has a immediate integer right operand. 546 // If so Imm will receive the 32 bit value. 547 static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, 548 uint64_t &Imm) { 549 return N->getOpcode() == Opc && 550 isIntImmediate(N->getOperand(1).getNode(), Imm); 551 } 552 553 // isIntImmediateEq - This method tests to see if N is a constant operand that 554 // is equivalent to 'ImmExpected'. 555 #ifndef NDEBUG 556 static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected) { 557 uint64_t Imm; 558 if (!isIntImmediate(N.getNode(), Imm)) 559 return false; 560 return Imm == ImmExpected; 561 } 562 #endif 563 564 bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand( 565 const SDValue &Op, const InlineAsm::ConstraintCode ConstraintID, 566 std::vector<SDValue> &OutOps) { 567 switch(ConstraintID) { 568 default: 569 llvm_unreachable("Unexpected asm memory constraint"); 570 case InlineAsm::ConstraintCode::m: 571 case InlineAsm::ConstraintCode::o: 572 case InlineAsm::ConstraintCode::Q: 573 // We need to make sure that this one operand does not end up in XZR, thus 574 // require the address to be in a PointerRegClass register. 575 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo(); 576 const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF); 577 SDLoc dl(Op); 578 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64); 579 SDValue NewOp = 580 SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, 581 dl, Op.getValueType(), 582 Op, RC), 0); 583 OutOps.push_back(NewOp); 584 return false; 585 } 586 return true; 587 } 588 589 /// SelectArithImmed - Select an immediate value that can be represented as 590 /// a 12-bit value shifted left by either 0 or 12. If so, return true with 591 /// Val set to the 12-bit value and Shift set to the shifter operand. 592 bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val, 593 SDValue &Shift) { 594 // This function is called from the addsub_shifted_imm ComplexPattern, 595 // which lists [imm] as the list of opcode it's interested in, however 596 // we still need to check whether the operand is actually an immediate 597 // here because the ComplexPattern opcode list is only used in 598 // root-level opcode matching. 599 if (!isa<ConstantSDNode>(N.getNode())) 600 return false; 601 602 uint64_t Immed = N.getNode()->getAsZExtVal(); 603 unsigned ShiftAmt; 604 605 if (Immed >> 12 == 0) { 606 ShiftAmt = 0; 607 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) { 608 ShiftAmt = 12; 609 Immed = Immed >> 12; 610 } else 611 return false; 612 613 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt); 614 SDLoc dl(N); 615 Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32); 616 Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32); 617 return true; 618 } 619 620 /// SelectNegArithImmed - As above, but negates the value before trying to 621 /// select it. 622 bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val, 623 SDValue &Shift) { 624 // This function is called from the addsub_shifted_imm ComplexPattern, 625 // which lists [imm] as the list of opcode it's interested in, however 626 // we still need to check whether the operand is actually an immediate 627 // here because the ComplexPattern opcode list is only used in 628 // root-level opcode matching. 629 if (!isa<ConstantSDNode>(N.getNode())) 630 return false; 631 632 // The immediate operand must be a 24-bit zero-extended immediate. 633 uint64_t Immed = N.getNode()->getAsZExtVal(); 634 635 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0" 636 // have the opposite effect on the C flag, so this pattern mustn't match under 637 // those circumstances. 638 if (Immed == 0) 639 return false; 640 641 if (N.getValueType() == MVT::i32) 642 Immed = ~((uint32_t)Immed) + 1; 643 else 644 Immed = ~Immed + 1ULL; 645 if (Immed & 0xFFFFFFFFFF000000ULL) 646 return false; 647 648 Immed &= 0xFFFFFFULL; 649 return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val, 650 Shift); 651 } 652 653 /// getShiftTypeForNode - Translate a shift node to the corresponding 654 /// ShiftType value. 655 static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) { 656 switch (N.getOpcode()) { 657 default: 658 return AArch64_AM::InvalidShiftExtend; 659 case ISD::SHL: 660 return AArch64_AM::LSL; 661 case ISD::SRL: 662 return AArch64_AM::LSR; 663 case ISD::SRA: 664 return AArch64_AM::ASR; 665 case ISD::ROTR: 666 return AArch64_AM::ROR; 667 } 668 } 669 670 /// Determine whether it is worth it to fold SHL into the addressing 671 /// mode. 672 static bool isWorthFoldingSHL(SDValue V) { 673 assert(V.getOpcode() == ISD::SHL && "invalid opcode"); 674 // It is worth folding logical shift of up to three places. 675 auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1)); 676 if (!CSD) 677 return false; 678 unsigned ShiftVal = CSD->getZExtValue(); 679 if (ShiftVal > 3) 680 return false; 681 682 // Check if this particular node is reused in any non-memory related 683 // operation. If yes, do not try to fold this node into the address 684 // computation, since the computation will be kept. 685 const SDNode *Node = V.getNode(); 686 for (SDNode *UI : Node->uses()) 687 if (!isa<MemSDNode>(*UI)) 688 for (SDNode *UII : UI->uses()) 689 if (!isa<MemSDNode>(*UII)) 690 return false; 691 return true; 692 } 693 694 /// Determine whether it is worth to fold V into an extended register addressing 695 /// mode. 696 bool AArch64DAGToDAGISel::isWorthFoldingAddr(SDValue V, unsigned Size) const { 697 // Trivial if we are optimizing for code size or if there is only 698 // one use of the value. 699 if (CurDAG->shouldOptForSize() || V.hasOneUse()) 700 return true; 701 702 // If a subtarget has a slow shift, folding a shift into multiple loads 703 // costs additional micro-ops. 704 if (Subtarget->hasAddrLSLSlow14() && (Size == 2 || Size == 16)) 705 return false; 706 707 // Check whether we're going to emit the address arithmetic anyway because 708 // it's used by a non-address operation. 709 if (V.getOpcode() == ISD::SHL && isWorthFoldingSHL(V)) 710 return true; 711 if (V.getOpcode() == ISD::ADD) { 712 const SDValue LHS = V.getOperand(0); 713 const SDValue RHS = V.getOperand(1); 714 if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS)) 715 return true; 716 if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS)) 717 return true; 718 } 719 720 // It hurts otherwise, since the value will be reused. 721 return false; 722 } 723 724 /// and (shl/srl/sra, x, c), mask --> shl (srl/sra, x, c1), c2 725 /// to select more shifted register 726 bool AArch64DAGToDAGISel::SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg, 727 SDValue &Shift) { 728 EVT VT = N.getValueType(); 729 if (VT != MVT::i32 && VT != MVT::i64) 730 return false; 731 732 if (N->getOpcode() != ISD::AND || !N->hasOneUse()) 733 return false; 734 SDValue LHS = N.getOperand(0); 735 if (!LHS->hasOneUse()) 736 return false; 737 738 unsigned LHSOpcode = LHS->getOpcode(); 739 if (LHSOpcode != ISD::SHL && LHSOpcode != ISD::SRL && LHSOpcode != ISD::SRA) 740 return false; 741 742 ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(LHS.getOperand(1)); 743 if (!ShiftAmtNode) 744 return false; 745 746 uint64_t ShiftAmtC = ShiftAmtNode->getZExtValue(); 747 ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N.getOperand(1)); 748 if (!RHSC) 749 return false; 750 751 APInt AndMask = RHSC->getAPIntValue(); 752 unsigned LowZBits, MaskLen; 753 if (!AndMask.isShiftedMask(LowZBits, MaskLen)) 754 return false; 755 756 unsigned BitWidth = N.getValueSizeInBits(); 757 SDLoc DL(LHS); 758 uint64_t NewShiftC; 759 unsigned NewShiftOp; 760 if (LHSOpcode == ISD::SHL) { 761 // LowZBits <= ShiftAmtC will fall into isBitfieldPositioningOp 762 // BitWidth != LowZBits + MaskLen doesn't match the pattern 763 if (LowZBits <= ShiftAmtC || (BitWidth != LowZBits + MaskLen)) 764 return false; 765 766 NewShiftC = LowZBits - ShiftAmtC; 767 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri; 768 } else { 769 if (LowZBits == 0) 770 return false; 771 772 // NewShiftC >= BitWidth will fall into isBitfieldExtractOp 773 NewShiftC = LowZBits + ShiftAmtC; 774 if (NewShiftC >= BitWidth) 775 return false; 776 777 // SRA need all high bits 778 if (LHSOpcode == ISD::SRA && (BitWidth != (LowZBits + MaskLen))) 779 return false; 780 781 // SRL high bits can be 0 or 1 782 if (LHSOpcode == ISD::SRL && (BitWidth > (NewShiftC + MaskLen))) 783 return false; 784 785 if (LHSOpcode == ISD::SRL) 786 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri; 787 else 788 NewShiftOp = VT == MVT::i64 ? AArch64::SBFMXri : AArch64::SBFMWri; 789 } 790 791 assert(NewShiftC < BitWidth && "Invalid shift amount"); 792 SDValue NewShiftAmt = CurDAG->getTargetConstant(NewShiftC, DL, VT); 793 SDValue BitWidthMinus1 = CurDAG->getTargetConstant(BitWidth - 1, DL, VT); 794 Reg = SDValue(CurDAG->getMachineNode(NewShiftOp, DL, VT, LHS->getOperand(0), 795 NewShiftAmt, BitWidthMinus1), 796 0); 797 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, LowZBits); 798 Shift = CurDAG->getTargetConstant(ShVal, DL, MVT::i32); 799 return true; 800 } 801 802 /// getExtendTypeForNode - Translate an extend node to the corresponding 803 /// ExtendType value. 804 static AArch64_AM::ShiftExtendType 805 getExtendTypeForNode(SDValue N, bool IsLoadStore = false) { 806 if (N.getOpcode() == ISD::SIGN_EXTEND || 807 N.getOpcode() == ISD::SIGN_EXTEND_INREG) { 808 EVT SrcVT; 809 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG) 810 SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT(); 811 else 812 SrcVT = N.getOperand(0).getValueType(); 813 814 if (!IsLoadStore && SrcVT == MVT::i8) 815 return AArch64_AM::SXTB; 816 else if (!IsLoadStore && SrcVT == MVT::i16) 817 return AArch64_AM::SXTH; 818 else if (SrcVT == MVT::i32) 819 return AArch64_AM::SXTW; 820 assert(SrcVT != MVT::i64 && "extend from 64-bits?"); 821 822 return AArch64_AM::InvalidShiftExtend; 823 } else if (N.getOpcode() == ISD::ZERO_EXTEND || 824 N.getOpcode() == ISD::ANY_EXTEND) { 825 EVT SrcVT = N.getOperand(0).getValueType(); 826 if (!IsLoadStore && SrcVT == MVT::i8) 827 return AArch64_AM::UXTB; 828 else if (!IsLoadStore && SrcVT == MVT::i16) 829 return AArch64_AM::UXTH; 830 else if (SrcVT == MVT::i32) 831 return AArch64_AM::UXTW; 832 assert(SrcVT != MVT::i64 && "extend from 64-bits?"); 833 834 return AArch64_AM::InvalidShiftExtend; 835 } else if (N.getOpcode() == ISD::AND) { 836 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); 837 if (!CSD) 838 return AArch64_AM::InvalidShiftExtend; 839 uint64_t AndMask = CSD->getZExtValue(); 840 841 switch (AndMask) { 842 default: 843 return AArch64_AM::InvalidShiftExtend; 844 case 0xFF: 845 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend; 846 case 0xFFFF: 847 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend; 848 case 0xFFFFFFFF: 849 return AArch64_AM::UXTW; 850 } 851 } 852 853 return AArch64_AM::InvalidShiftExtend; 854 } 855 856 /// Determine whether it is worth to fold V into an extended register of an 857 /// Add/Sub. LSL means we are folding into an `add w0, w1, w2, lsl #N` 858 /// instruction, and the shift should be treated as worth folding even if has 859 /// multiple uses. 860 bool AArch64DAGToDAGISel::isWorthFoldingALU(SDValue V, bool LSL) const { 861 // Trivial if we are optimizing for code size or if there is only 862 // one use of the value. 863 if (CurDAG->shouldOptForSize() || V.hasOneUse()) 864 return true; 865 866 // If a subtarget has a fastpath LSL we can fold a logical shift into 867 // the add/sub and save a cycle. 868 if (LSL && Subtarget->hasALULSLFast() && V.getOpcode() == ISD::SHL && 869 V.getConstantOperandVal(1) <= 4 && 870 getExtendTypeForNode(V.getOperand(0)) == AArch64_AM::InvalidShiftExtend) 871 return true; 872 873 // It hurts otherwise, since the value will be reused. 874 return false; 875 } 876 877 /// SelectShiftedRegister - Select a "shifted register" operand. If the value 878 /// is not shifted, set the Shift operand to default of "LSL 0". The logical 879 /// instructions allow the shifted register to be rotated, but the arithmetic 880 /// instructions do not. The AllowROR parameter specifies whether ROR is 881 /// supported. 882 bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR, 883 SDValue &Reg, SDValue &Shift) { 884 if (SelectShiftedRegisterFromAnd(N, Reg, Shift)) 885 return true; 886 887 AArch64_AM::ShiftExtendType ShType = getShiftTypeForNode(N); 888 if (ShType == AArch64_AM::InvalidShiftExtend) 889 return false; 890 if (!AllowROR && ShType == AArch64_AM::ROR) 891 return false; 892 893 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 894 unsigned BitSize = N.getValueSizeInBits(); 895 unsigned Val = RHS->getZExtValue() & (BitSize - 1); 896 unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val); 897 898 Reg = N.getOperand(0); 899 Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32); 900 return isWorthFoldingALU(N, true); 901 } 902 903 return false; 904 } 905 906 /// Instructions that accept extend modifiers like UXTW expect the register 907 /// being extended to be a GPR32, but the incoming DAG might be acting on a 908 /// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if 909 /// this is the case. 910 static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N) { 911 if (N.getValueType() == MVT::i32) 912 return N; 913 914 SDLoc dl(N); 915 return CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl, MVT::i32, N); 916 } 917 918 // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N. 919 template<signed Low, signed High, signed Scale> 920 bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) { 921 if (!isa<ConstantSDNode>(N)) 922 return false; 923 924 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue(); 925 if ((MulImm % std::abs(Scale)) == 0) { 926 int64_t RDVLImm = MulImm / Scale; 927 if ((RDVLImm >= Low) && (RDVLImm <= High)) { 928 Imm = CurDAG->getTargetConstant(RDVLImm, SDLoc(N), MVT::i32); 929 return true; 930 } 931 } 932 933 return false; 934 } 935 936 /// SelectArithExtendedRegister - Select a "extended register" operand. This 937 /// operand folds in an extend followed by an optional left shift. 938 bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg, 939 SDValue &Shift) { 940 unsigned ShiftVal = 0; 941 AArch64_AM::ShiftExtendType Ext; 942 943 if (N.getOpcode() == ISD::SHL) { 944 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); 945 if (!CSD) 946 return false; 947 ShiftVal = CSD->getZExtValue(); 948 if (ShiftVal > 4) 949 return false; 950 951 Ext = getExtendTypeForNode(N.getOperand(0)); 952 if (Ext == AArch64_AM::InvalidShiftExtend) 953 return false; 954 955 Reg = N.getOperand(0).getOperand(0); 956 } else { 957 Ext = getExtendTypeForNode(N); 958 if (Ext == AArch64_AM::InvalidShiftExtend) 959 return false; 960 961 Reg = N.getOperand(0); 962 963 // Don't match if free 32-bit -> 64-bit zext can be used instead. Use the 964 // isDef32 as a heuristic for when the operand is likely to be a 32bit def. 965 auto isDef32 = [](SDValue N) { 966 unsigned Opc = N.getOpcode(); 967 return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG && 968 Opc != ISD::CopyFromReg && Opc != ISD::AssertSext && 969 Opc != ISD::AssertZext && Opc != ISD::AssertAlign && 970 Opc != ISD::FREEZE; 971 }; 972 if (Ext == AArch64_AM::UXTW && Reg->getValueType(0).getSizeInBits() == 32 && 973 isDef32(Reg)) 974 return false; 975 } 976 977 // AArch64 mandates that the RHS of the operation must use the smallest 978 // register class that could contain the size being extended from. Thus, 979 // if we're folding a (sext i8), we need the RHS to be a GPR32, even though 980 // there might not be an actual 32-bit value in the program. We can 981 // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here. 982 assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX); 983 Reg = narrowIfNeeded(CurDAG, Reg); 984 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N), 985 MVT::i32); 986 return isWorthFoldingALU(N); 987 } 988 989 /// SelectArithUXTXRegister - Select a "UXTX register" operand. This 990 /// operand is refered by the instructions have SP operand 991 bool AArch64DAGToDAGISel::SelectArithUXTXRegister(SDValue N, SDValue &Reg, 992 SDValue &Shift) { 993 unsigned ShiftVal = 0; 994 AArch64_AM::ShiftExtendType Ext; 995 996 if (N.getOpcode() != ISD::SHL) 997 return false; 998 999 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); 1000 if (!CSD) 1001 return false; 1002 ShiftVal = CSD->getZExtValue(); 1003 if (ShiftVal > 4) 1004 return false; 1005 1006 Ext = AArch64_AM::UXTX; 1007 Reg = N.getOperand(0); 1008 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N), 1009 MVT::i32); 1010 return isWorthFoldingALU(N); 1011 } 1012 1013 /// If there's a use of this ADDlow that's not itself a load/store then we'll 1014 /// need to create a real ADD instruction from it anyway and there's no point in 1015 /// folding it into the mem op. Theoretically, it shouldn't matter, but there's 1016 /// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding 1017 /// leads to duplicated ADRP instructions. 1018 static bool isWorthFoldingADDlow(SDValue N) { 1019 for (auto *Use : N->uses()) { 1020 if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE && 1021 Use->getOpcode() != ISD::ATOMIC_LOAD && 1022 Use->getOpcode() != ISD::ATOMIC_STORE) 1023 return false; 1024 1025 // ldar and stlr have much more restrictive addressing modes (just a 1026 // register). 1027 if (isStrongerThanMonotonic(cast<MemSDNode>(Use)->getSuccessOrdering())) 1028 return false; 1029 } 1030 1031 return true; 1032 } 1033 1034 /// Check if the immediate offset is valid as a scaled immediate. 1035 static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range, 1036 unsigned Size) { 1037 if ((Offset & (Size - 1)) == 0 && Offset >= 0 && 1038 Offset < (Range << Log2_32(Size))) 1039 return true; 1040 return false; 1041 } 1042 1043 /// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit 1044 /// immediate" address. The "Size" argument is the size in bytes of the memory 1045 /// reference, which determines the scale. 1046 bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, 1047 unsigned BW, unsigned Size, 1048 SDValue &Base, 1049 SDValue &OffImm) { 1050 SDLoc dl(N); 1051 const DataLayout &DL = CurDAG->getDataLayout(); 1052 const TargetLowering *TLI = getTargetLowering(); 1053 if (N.getOpcode() == ISD::FrameIndex) { 1054 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 1055 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 1056 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); 1057 return true; 1058 } 1059 1060 // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed 1061 // selected here doesn't support labels/immediates, only base+offset. 1062 if (CurDAG->isBaseWithConstantOffset(N)) { 1063 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1064 if (IsSignedImm) { 1065 int64_t RHSC = RHS->getSExtValue(); 1066 unsigned Scale = Log2_32(Size); 1067 int64_t Range = 0x1LL << (BW - 1); 1068 1069 if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) && 1070 RHSC < (Range << Scale)) { 1071 Base = N.getOperand(0); 1072 if (Base.getOpcode() == ISD::FrameIndex) { 1073 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1074 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 1075 } 1076 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64); 1077 return true; 1078 } 1079 } else { 1080 // unsigned Immediate 1081 uint64_t RHSC = RHS->getZExtValue(); 1082 unsigned Scale = Log2_32(Size); 1083 uint64_t Range = 0x1ULL << BW; 1084 1085 if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) { 1086 Base = N.getOperand(0); 1087 if (Base.getOpcode() == ISD::FrameIndex) { 1088 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1089 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 1090 } 1091 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64); 1092 return true; 1093 } 1094 } 1095 } 1096 } 1097 // Base only. The address will be materialized into a register before 1098 // the memory is accessed. 1099 // add x0, Xbase, #offset 1100 // stp x1, x2, [x0] 1101 Base = N; 1102 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); 1103 return true; 1104 } 1105 1106 /// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit 1107 /// immediate" address. The "Size" argument is the size in bytes of the memory 1108 /// reference, which determines the scale. 1109 bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size, 1110 SDValue &Base, SDValue &OffImm) { 1111 SDLoc dl(N); 1112 const DataLayout &DL = CurDAG->getDataLayout(); 1113 const TargetLowering *TLI = getTargetLowering(); 1114 if (N.getOpcode() == ISD::FrameIndex) { 1115 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 1116 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 1117 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); 1118 return true; 1119 } 1120 1121 if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) { 1122 GlobalAddressSDNode *GAN = 1123 dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode()); 1124 Base = N.getOperand(0); 1125 OffImm = N.getOperand(1); 1126 if (!GAN) 1127 return true; 1128 1129 if (GAN->getOffset() % Size == 0 && 1130 GAN->getGlobal()->getPointerAlignment(DL) >= Size) 1131 return true; 1132 } 1133 1134 if (CurDAG->isBaseWithConstantOffset(N)) { 1135 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1136 int64_t RHSC = (int64_t)RHS->getZExtValue(); 1137 unsigned Scale = Log2_32(Size); 1138 if (isValidAsScaledImmediate(RHSC, 0x1000, Size)) { 1139 Base = N.getOperand(0); 1140 if (Base.getOpcode() == ISD::FrameIndex) { 1141 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1142 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 1143 } 1144 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64); 1145 return true; 1146 } 1147 } 1148 } 1149 1150 // Before falling back to our general case, check if the unscaled 1151 // instructions can handle this. If so, that's preferable. 1152 if (SelectAddrModeUnscaled(N, Size, Base, OffImm)) 1153 return false; 1154 1155 // Base only. The address will be materialized into a register before 1156 // the memory is accessed. 1157 // add x0, Xbase, #offset 1158 // ldr x0, [x0] 1159 Base = N; 1160 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); 1161 return true; 1162 } 1163 1164 /// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit 1165 /// immediate" address. This should only match when there is an offset that 1166 /// is not valid for a scaled immediate addressing mode. The "Size" argument 1167 /// is the size in bytes of the memory reference, which is needed here to know 1168 /// what is valid for a scaled immediate. 1169 bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size, 1170 SDValue &Base, 1171 SDValue &OffImm) { 1172 if (!CurDAG->isBaseWithConstantOffset(N)) 1173 return false; 1174 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1175 int64_t RHSC = RHS->getSExtValue(); 1176 if (RHSC >= -256 && RHSC < 256) { 1177 Base = N.getOperand(0); 1178 if (Base.getOpcode() == ISD::FrameIndex) { 1179 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1180 const TargetLowering *TLI = getTargetLowering(); 1181 Base = CurDAG->getTargetFrameIndex( 1182 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1183 } 1184 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64); 1185 return true; 1186 } 1187 } 1188 return false; 1189 } 1190 1191 static SDValue Widen(SelectionDAG *CurDAG, SDValue N) { 1192 SDLoc dl(N); 1193 SDValue ImpDef = SDValue( 1194 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0); 1195 return CurDAG->getTargetInsertSubreg(AArch64::sub_32, dl, MVT::i64, ImpDef, 1196 N); 1197 } 1198 1199 /// Check if the given SHL node (\p N), can be used to form an 1200 /// extended register for an addressing mode. 1201 bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size, 1202 bool WantExtend, SDValue &Offset, 1203 SDValue &SignExtend) { 1204 assert(N.getOpcode() == ISD::SHL && "Invalid opcode."); 1205 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); 1206 if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue()) 1207 return false; 1208 1209 SDLoc dl(N); 1210 if (WantExtend) { 1211 AArch64_AM::ShiftExtendType Ext = 1212 getExtendTypeForNode(N.getOperand(0), true); 1213 if (Ext == AArch64_AM::InvalidShiftExtend) 1214 return false; 1215 1216 Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0)); 1217 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl, 1218 MVT::i32); 1219 } else { 1220 Offset = N.getOperand(0); 1221 SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32); 1222 } 1223 1224 unsigned LegalShiftVal = Log2_32(Size); 1225 unsigned ShiftVal = CSD->getZExtValue(); 1226 1227 if (ShiftVal != 0 && ShiftVal != LegalShiftVal) 1228 return false; 1229 1230 return isWorthFoldingAddr(N, Size); 1231 } 1232 1233 bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size, 1234 SDValue &Base, SDValue &Offset, 1235 SDValue &SignExtend, 1236 SDValue &DoShift) { 1237 if (N.getOpcode() != ISD::ADD) 1238 return false; 1239 SDValue LHS = N.getOperand(0); 1240 SDValue RHS = N.getOperand(1); 1241 SDLoc dl(N); 1242 1243 // We don't want to match immediate adds here, because they are better lowered 1244 // to the register-immediate addressing modes. 1245 if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS)) 1246 return false; 1247 1248 // Check if this particular node is reused in any non-memory related 1249 // operation. If yes, do not try to fold this node into the address 1250 // computation, since the computation will be kept. 1251 const SDNode *Node = N.getNode(); 1252 for (SDNode *UI : Node->uses()) { 1253 if (!isa<MemSDNode>(*UI)) 1254 return false; 1255 } 1256 1257 // Remember if it is worth folding N when it produces extended register. 1258 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size); 1259 1260 // Try to match a shifted extend on the RHS. 1261 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL && 1262 SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) { 1263 Base = LHS; 1264 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32); 1265 return true; 1266 } 1267 1268 // Try to match a shifted extend on the LHS. 1269 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL && 1270 SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) { 1271 Base = RHS; 1272 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32); 1273 return true; 1274 } 1275 1276 // There was no shift, whatever else we find. 1277 DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32); 1278 1279 AArch64_AM::ShiftExtendType Ext = AArch64_AM::InvalidShiftExtend; 1280 // Try to match an unshifted extend on the LHS. 1281 if (IsExtendedRegisterWorthFolding && 1282 (Ext = getExtendTypeForNode(LHS, true)) != 1283 AArch64_AM::InvalidShiftExtend) { 1284 Base = RHS; 1285 Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0)); 1286 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl, 1287 MVT::i32); 1288 if (isWorthFoldingAddr(LHS, Size)) 1289 return true; 1290 } 1291 1292 // Try to match an unshifted extend on the RHS. 1293 if (IsExtendedRegisterWorthFolding && 1294 (Ext = getExtendTypeForNode(RHS, true)) != 1295 AArch64_AM::InvalidShiftExtend) { 1296 Base = LHS; 1297 Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0)); 1298 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl, 1299 MVT::i32); 1300 if (isWorthFoldingAddr(RHS, Size)) 1301 return true; 1302 } 1303 1304 return false; 1305 } 1306 1307 // Check if the given immediate is preferred by ADD. If an immediate can be 1308 // encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be 1309 // encoded by one MOVZ, return true. 1310 static bool isPreferredADD(int64_t ImmOff) { 1311 // Constant in [0x0, 0xfff] can be encoded in ADD. 1312 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL) 1313 return true; 1314 // Check if it can be encoded in an "ADD LSL #12". 1315 if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL) 1316 // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant. 1317 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL && 1318 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL; 1319 return false; 1320 } 1321 1322 bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size, 1323 SDValue &Base, SDValue &Offset, 1324 SDValue &SignExtend, 1325 SDValue &DoShift) { 1326 if (N.getOpcode() != ISD::ADD) 1327 return false; 1328 SDValue LHS = N.getOperand(0); 1329 SDValue RHS = N.getOperand(1); 1330 SDLoc DL(N); 1331 1332 // Check if this particular node is reused in any non-memory related 1333 // operation. If yes, do not try to fold this node into the address 1334 // computation, since the computation will be kept. 1335 const SDNode *Node = N.getNode(); 1336 for (SDNode *UI : Node->uses()) { 1337 if (!isa<MemSDNode>(*UI)) 1338 return false; 1339 } 1340 1341 // Watch out if RHS is a wide immediate, it can not be selected into 1342 // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into 1343 // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate 1344 // instructions like: 1345 // MOV X0, WideImmediate 1346 // ADD X1, BaseReg, X0 1347 // LDR X2, [X1, 0] 1348 // For such situation, using [BaseReg, XReg] addressing mode can save one 1349 // ADD/SUB: 1350 // MOV X0, WideImmediate 1351 // LDR X2, [BaseReg, X0] 1352 if (isa<ConstantSDNode>(RHS)) { 1353 int64_t ImmOff = (int64_t)RHS->getAsZExtVal(); 1354 // Skip the immediate can be selected by load/store addressing mode. 1355 // Also skip the immediate can be encoded by a single ADD (SUB is also 1356 // checked by using -ImmOff). 1357 if (isValidAsScaledImmediate(ImmOff, 0x1000, Size) || 1358 isPreferredADD(ImmOff) || isPreferredADD(-ImmOff)) 1359 return false; 1360 1361 SDValue Ops[] = { RHS }; 1362 SDNode *MOVI = 1363 CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops); 1364 SDValue MOVIV = SDValue(MOVI, 0); 1365 // This ADD of two X register will be selected into [Reg+Reg] mode. 1366 N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV); 1367 } 1368 1369 // Remember if it is worth folding N when it produces extended register. 1370 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size); 1371 1372 // Try to match a shifted extend on the RHS. 1373 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL && 1374 SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) { 1375 Base = LHS; 1376 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32); 1377 return true; 1378 } 1379 1380 // Try to match a shifted extend on the LHS. 1381 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL && 1382 SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) { 1383 Base = RHS; 1384 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32); 1385 return true; 1386 } 1387 1388 // Match any non-shifted, non-extend, non-immediate add expression. 1389 Base = LHS; 1390 Offset = RHS; 1391 SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32); 1392 DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32); 1393 // Reg1 + Reg2 is free: no check needed. 1394 return true; 1395 } 1396 1397 SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) { 1398 static const unsigned RegClassIDs[] = { 1399 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID}; 1400 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1, 1401 AArch64::dsub2, AArch64::dsub3}; 1402 1403 return createTuple(Regs, RegClassIDs, SubRegs); 1404 } 1405 1406 SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) { 1407 static const unsigned RegClassIDs[] = { 1408 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID}; 1409 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1, 1410 AArch64::qsub2, AArch64::qsub3}; 1411 1412 return createTuple(Regs, RegClassIDs, SubRegs); 1413 } 1414 1415 SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef<SDValue> Regs) { 1416 static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID, 1417 AArch64::ZPR3RegClassID, 1418 AArch64::ZPR4RegClassID}; 1419 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1, 1420 AArch64::zsub2, AArch64::zsub3}; 1421 1422 return createTuple(Regs, RegClassIDs, SubRegs); 1423 } 1424 1425 SDValue AArch64DAGToDAGISel::createZMulTuple(ArrayRef<SDValue> Regs) { 1426 assert(Regs.size() == 2 || Regs.size() == 4); 1427 1428 // The createTuple interface requires 3 RegClassIDs for each possible 1429 // tuple type even though we only have them for ZPR2 and ZPR4. 1430 static const unsigned RegClassIDs[] = {AArch64::ZPR2Mul2RegClassID, 0, 1431 AArch64::ZPR4Mul4RegClassID}; 1432 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1, 1433 AArch64::zsub2, AArch64::zsub3}; 1434 return createTuple(Regs, RegClassIDs, SubRegs); 1435 } 1436 1437 SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs, 1438 const unsigned RegClassIDs[], 1439 const unsigned SubRegs[]) { 1440 // There's no special register-class for a vector-list of 1 element: it's just 1441 // a vector. 1442 if (Regs.size() == 1) 1443 return Regs[0]; 1444 1445 assert(Regs.size() >= 2 && Regs.size() <= 4); 1446 1447 SDLoc DL(Regs[0]); 1448 1449 SmallVector<SDValue, 4> Ops; 1450 1451 // First operand of REG_SEQUENCE is the desired RegClass. 1452 Ops.push_back( 1453 CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32)); 1454 1455 // Then we get pairs of source & subregister-position for the components. 1456 for (unsigned i = 0; i < Regs.size(); ++i) { 1457 Ops.push_back(Regs[i]); 1458 Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32)); 1459 } 1460 1461 SDNode *N = 1462 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops); 1463 return SDValue(N, 0); 1464 } 1465 1466 void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, 1467 bool isExt) { 1468 SDLoc dl(N); 1469 EVT VT = N->getValueType(0); 1470 1471 unsigned ExtOff = isExt; 1472 1473 // Form a REG_SEQUENCE to force register allocation. 1474 unsigned Vec0Off = ExtOff + 1; 1475 SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off, 1476 N->op_begin() + Vec0Off + NumVecs); 1477 SDValue RegSeq = createQTuple(Regs); 1478 1479 SmallVector<SDValue, 6> Ops; 1480 if (isExt) 1481 Ops.push_back(N->getOperand(1)); 1482 Ops.push_back(RegSeq); 1483 Ops.push_back(N->getOperand(NumVecs + ExtOff + 1)); 1484 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops)); 1485 } 1486 1487 static std::tuple<SDValue, SDValue> 1488 extractPtrauthBlendDiscriminators(SDValue Disc, SelectionDAG *DAG) { 1489 SDLoc DL(Disc); 1490 SDValue AddrDisc; 1491 SDValue ConstDisc; 1492 1493 // If this is a blend, remember the constant and address discriminators. 1494 // Otherwise, it's either a constant discriminator, or a non-blended 1495 // address discriminator. 1496 if (Disc->getOpcode() == ISD::INTRINSIC_WO_CHAIN && 1497 Disc->getConstantOperandVal(0) == Intrinsic::ptrauth_blend) { 1498 AddrDisc = Disc->getOperand(1); 1499 ConstDisc = Disc->getOperand(2); 1500 } else { 1501 ConstDisc = Disc; 1502 } 1503 1504 // If the constant discriminator (either the blend RHS, or the entire 1505 // discriminator value) isn't a 16-bit constant, bail out, and let the 1506 // discriminator be computed separately. 1507 auto *ConstDiscN = dyn_cast<ConstantSDNode>(ConstDisc); 1508 if (!ConstDiscN || !isUInt<16>(ConstDiscN->getZExtValue())) 1509 return std::make_tuple(DAG->getTargetConstant(0, DL, MVT::i64), Disc); 1510 1511 // If there's no address discriminator, use XZR directly. 1512 if (!AddrDisc) 1513 AddrDisc = DAG->getRegister(AArch64::XZR, MVT::i64); 1514 1515 return std::make_tuple( 1516 DAG->getTargetConstant(ConstDiscN->getZExtValue(), DL, MVT::i64), 1517 AddrDisc); 1518 } 1519 1520 void AArch64DAGToDAGISel::SelectPtrauthAuth(SDNode *N) { 1521 SDLoc DL(N); 1522 // IntrinsicID is operand #0 1523 SDValue Val = N->getOperand(1); 1524 SDValue AUTKey = N->getOperand(2); 1525 SDValue AUTDisc = N->getOperand(3); 1526 1527 unsigned AUTKeyC = cast<ConstantSDNode>(AUTKey)->getZExtValue(); 1528 AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64); 1529 1530 SDValue AUTAddrDisc, AUTConstDisc; 1531 std::tie(AUTConstDisc, AUTAddrDisc) = 1532 extractPtrauthBlendDiscriminators(AUTDisc, CurDAG); 1533 1534 SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, 1535 AArch64::X16, Val, SDValue()); 1536 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, X16Copy.getValue(1)}; 1537 1538 SDNode *AUT = CurDAG->getMachineNode(AArch64::AUT, DL, MVT::i64, Ops); 1539 ReplaceNode(N, AUT); 1540 return; 1541 } 1542 1543 void AArch64DAGToDAGISel::SelectPtrauthResign(SDNode *N) { 1544 SDLoc DL(N); 1545 // IntrinsicID is operand #0 1546 SDValue Val = N->getOperand(1); 1547 SDValue AUTKey = N->getOperand(2); 1548 SDValue AUTDisc = N->getOperand(3); 1549 SDValue PACKey = N->getOperand(4); 1550 SDValue PACDisc = N->getOperand(5); 1551 1552 unsigned AUTKeyC = cast<ConstantSDNode>(AUTKey)->getZExtValue(); 1553 unsigned PACKeyC = cast<ConstantSDNode>(PACKey)->getZExtValue(); 1554 1555 AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64); 1556 PACKey = CurDAG->getTargetConstant(PACKeyC, DL, MVT::i64); 1557 1558 SDValue AUTAddrDisc, AUTConstDisc; 1559 std::tie(AUTConstDisc, AUTAddrDisc) = 1560 extractPtrauthBlendDiscriminators(AUTDisc, CurDAG); 1561 1562 SDValue PACAddrDisc, PACConstDisc; 1563 std::tie(PACConstDisc, PACAddrDisc) = 1564 extractPtrauthBlendDiscriminators(PACDisc, CurDAG); 1565 1566 SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, 1567 AArch64::X16, Val, SDValue()); 1568 1569 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, PACKey, 1570 PACConstDisc, PACAddrDisc, X16Copy.getValue(1)}; 1571 1572 SDNode *AUTPAC = CurDAG->getMachineNode(AArch64::AUTPAC, DL, MVT::i64, Ops); 1573 ReplaceNode(N, AUTPAC); 1574 return; 1575 } 1576 1577 bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) { 1578 LoadSDNode *LD = cast<LoadSDNode>(N); 1579 if (LD->isUnindexed()) 1580 return false; 1581 EVT VT = LD->getMemoryVT(); 1582 EVT DstVT = N->getValueType(0); 1583 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1584 bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC; 1585 1586 // We're not doing validity checking here. That was done when checking 1587 // if we should mark the load as indexed or not. We're just selecting 1588 // the right instruction. 1589 unsigned Opcode = 0; 1590 1591 ISD::LoadExtType ExtType = LD->getExtensionType(); 1592 bool InsertTo64 = false; 1593 if (VT == MVT::i64) 1594 Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost; 1595 else if (VT == MVT::i32) { 1596 if (ExtType == ISD::NON_EXTLOAD) 1597 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost; 1598 else if (ExtType == ISD::SEXTLOAD) 1599 Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost; 1600 else { 1601 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost; 1602 InsertTo64 = true; 1603 // The result of the load is only i32. It's the subreg_to_reg that makes 1604 // it into an i64. 1605 DstVT = MVT::i32; 1606 } 1607 } else if (VT == MVT::i16) { 1608 if (ExtType == ISD::SEXTLOAD) { 1609 if (DstVT == MVT::i64) 1610 Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost; 1611 else 1612 Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost; 1613 } else { 1614 Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost; 1615 InsertTo64 = DstVT == MVT::i64; 1616 // The result of the load is only i32. It's the subreg_to_reg that makes 1617 // it into an i64. 1618 DstVT = MVT::i32; 1619 } 1620 } else if (VT == MVT::i8) { 1621 if (ExtType == ISD::SEXTLOAD) { 1622 if (DstVT == MVT::i64) 1623 Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost; 1624 else 1625 Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost; 1626 } else { 1627 Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost; 1628 InsertTo64 = DstVT == MVT::i64; 1629 // The result of the load is only i32. It's the subreg_to_reg that makes 1630 // it into an i64. 1631 DstVT = MVT::i32; 1632 } 1633 } else if (VT == MVT::f16) { 1634 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost; 1635 } else if (VT == MVT::bf16) { 1636 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost; 1637 } else if (VT == MVT::f32) { 1638 Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost; 1639 } else if (VT == MVT::f64 || VT.is64BitVector()) { 1640 Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost; 1641 } else if (VT.is128BitVector()) { 1642 Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost; 1643 } else 1644 return false; 1645 SDValue Chain = LD->getChain(); 1646 SDValue Base = LD->getBasePtr(); 1647 ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset()); 1648 int OffsetVal = (int)OffsetOp->getZExtValue(); 1649 SDLoc dl(N); 1650 SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64); 1651 SDValue Ops[] = { Base, Offset, Chain }; 1652 SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT, 1653 MVT::Other, Ops); 1654 1655 // Transfer memoperands. 1656 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); 1657 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Res), {MemOp}); 1658 1659 // Either way, we're replacing the node, so tell the caller that. 1660 SDValue LoadedVal = SDValue(Res, 1); 1661 if (InsertTo64) { 1662 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32); 1663 LoadedVal = 1664 SDValue(CurDAG->getMachineNode( 1665 AArch64::SUBREG_TO_REG, dl, MVT::i64, 1666 CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal, 1667 SubReg), 1668 0); 1669 } 1670 1671 ReplaceUses(SDValue(N, 0), LoadedVal); 1672 ReplaceUses(SDValue(N, 1), SDValue(Res, 0)); 1673 ReplaceUses(SDValue(N, 2), SDValue(Res, 2)); 1674 CurDAG->RemoveDeadNode(N); 1675 return true; 1676 } 1677 1678 void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc, 1679 unsigned SubRegIdx) { 1680 SDLoc dl(N); 1681 EVT VT = N->getValueType(0); 1682 SDValue Chain = N->getOperand(0); 1683 1684 SDValue Ops[] = {N->getOperand(2), // Mem operand; 1685 Chain}; 1686 1687 const EVT ResTys[] = {MVT::Untyped, MVT::Other}; 1688 1689 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1690 SDValue SuperReg = SDValue(Ld, 0); 1691 for (unsigned i = 0; i < NumVecs; ++i) 1692 ReplaceUses(SDValue(N, i), 1693 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg)); 1694 1695 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1)); 1696 1697 // Transfer memoperands. In the case of AArch64::LD64B, there won't be one, 1698 // because it's too simple to have needed special treatment during lowering. 1699 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(N)) { 1700 MachineMemOperand *MemOp = MemIntr->getMemOperand(); 1701 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp}); 1702 } 1703 1704 CurDAG->RemoveDeadNode(N); 1705 } 1706 1707 void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs, 1708 unsigned Opc, unsigned SubRegIdx) { 1709 SDLoc dl(N); 1710 EVT VT = N->getValueType(0); 1711 SDValue Chain = N->getOperand(0); 1712 1713 SDValue Ops[] = {N->getOperand(1), // Mem operand 1714 N->getOperand(2), // Incremental 1715 Chain}; 1716 1717 const EVT ResTys[] = {MVT::i64, // Type of the write back register 1718 MVT::Untyped, MVT::Other}; 1719 1720 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1721 1722 // Update uses of write back register 1723 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0)); 1724 1725 // Update uses of vector list 1726 SDValue SuperReg = SDValue(Ld, 1); 1727 if (NumVecs == 1) 1728 ReplaceUses(SDValue(N, 0), SuperReg); 1729 else 1730 for (unsigned i = 0; i < NumVecs; ++i) 1731 ReplaceUses(SDValue(N, i), 1732 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg)); 1733 1734 // Update the chain 1735 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2)); 1736 CurDAG->RemoveDeadNode(N); 1737 } 1738 1739 /// Optimize \param OldBase and \param OldOffset selecting the best addressing 1740 /// mode. Returns a tuple consisting of an Opcode, an SDValue representing the 1741 /// new Base and an SDValue representing the new offset. 1742 std::tuple<unsigned, SDValue, SDValue> 1743 AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, 1744 unsigned Opc_ri, 1745 const SDValue &OldBase, 1746 const SDValue &OldOffset, 1747 unsigned Scale) { 1748 SDValue NewBase = OldBase; 1749 SDValue NewOffset = OldOffset; 1750 // Detect a possible Reg+Imm addressing mode. 1751 const bool IsRegImm = SelectAddrModeIndexedSVE</*Min=*/-8, /*Max=*/7>( 1752 N, OldBase, NewBase, NewOffset); 1753 1754 // Detect a possible reg+reg addressing mode, but only if we haven't already 1755 // detected a Reg+Imm one. 1756 const bool IsRegReg = 1757 !IsRegImm && SelectSVERegRegAddrMode(OldBase, Scale, NewBase, NewOffset); 1758 1759 // Select the instruction. 1760 return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset); 1761 } 1762 1763 enum class SelectTypeKind { 1764 Int1 = 0, 1765 Int = 1, 1766 FP = 2, 1767 AnyType = 3, 1768 }; 1769 1770 /// This function selects an opcode from a list of opcodes, which is 1771 /// expected to be the opcode for { 8-bit, 16-bit, 32-bit, 64-bit } 1772 /// element types, in this order. 1773 template <SelectTypeKind Kind> 1774 static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef<unsigned> Opcodes) { 1775 // Only match scalable vector VTs 1776 if (!VT.isScalableVector()) 1777 return 0; 1778 1779 EVT EltVT = VT.getVectorElementType(); 1780 unsigned Key = VT.getVectorMinNumElements(); 1781 switch (Kind) { 1782 case SelectTypeKind::AnyType: 1783 break; 1784 case SelectTypeKind::Int: 1785 if (EltVT != MVT::i8 && EltVT != MVT::i16 && EltVT != MVT::i32 && 1786 EltVT != MVT::i64) 1787 return 0; 1788 break; 1789 case SelectTypeKind::Int1: 1790 if (EltVT != MVT::i1) 1791 return 0; 1792 break; 1793 case SelectTypeKind::FP: 1794 if (EltVT == MVT::bf16) 1795 Key = 16; 1796 else if (EltVT != MVT::bf16 && EltVT != MVT::f16 && EltVT != MVT::f32 && 1797 EltVT != MVT::f64) 1798 return 0; 1799 break; 1800 } 1801 1802 unsigned Offset; 1803 switch (Key) { 1804 case 16: // 8-bit or bf16 1805 Offset = 0; 1806 break; 1807 case 8: // 16-bit 1808 Offset = 1; 1809 break; 1810 case 4: // 32-bit 1811 Offset = 2; 1812 break; 1813 case 2: // 64-bit 1814 Offset = 3; 1815 break; 1816 default: 1817 return 0; 1818 } 1819 1820 return (Opcodes.size() <= Offset) ? 0 : Opcodes[Offset]; 1821 } 1822 1823 // This function is almost identical to SelectWhilePair, but has an 1824 // extra check on the range of the immediate operand. 1825 // TODO: Merge these two functions together at some point? 1826 void AArch64DAGToDAGISel::SelectPExtPair(SDNode *N, unsigned Opc) { 1827 // Immediate can be either 0 or 1. 1828 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(N->getOperand(2))) 1829 if (Imm->getZExtValue() > 1) 1830 return; 1831 1832 SDLoc DL(N); 1833 EVT VT = N->getValueType(0); 1834 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)}; 1835 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops); 1836 SDValue SuperReg = SDValue(WhilePair, 0); 1837 1838 for (unsigned I = 0; I < 2; ++I) 1839 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg( 1840 AArch64::psub0 + I, DL, VT, SuperReg)); 1841 1842 CurDAG->RemoveDeadNode(N); 1843 } 1844 1845 void AArch64DAGToDAGISel::SelectWhilePair(SDNode *N, unsigned Opc) { 1846 SDLoc DL(N); 1847 EVT VT = N->getValueType(0); 1848 1849 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)}; 1850 1851 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops); 1852 SDValue SuperReg = SDValue(WhilePair, 0); 1853 1854 for (unsigned I = 0; I < 2; ++I) 1855 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg( 1856 AArch64::psub0 + I, DL, VT, SuperReg)); 1857 1858 CurDAG->RemoveDeadNode(N); 1859 } 1860 1861 void AArch64DAGToDAGISel::SelectCVTIntrinsic(SDNode *N, unsigned NumVecs, 1862 unsigned Opcode) { 1863 EVT VT = N->getValueType(0); 1864 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); 1865 SDValue Ops = createZTuple(Regs); 1866 SDLoc DL(N); 1867 SDNode *Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Ops); 1868 SDValue SuperReg = SDValue(Intrinsic, 0); 1869 for (unsigned i = 0; i < NumVecs; ++i) 1870 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg( 1871 AArch64::zsub0 + i, DL, VT, SuperReg)); 1872 1873 CurDAG->RemoveDeadNode(N); 1874 } 1875 1876 void AArch64DAGToDAGISel::SelectDestructiveMultiIntrinsic(SDNode *N, 1877 unsigned NumVecs, 1878 bool IsZmMulti, 1879 unsigned Opcode, 1880 bool HasPred) { 1881 assert(Opcode != 0 && "Unexpected opcode"); 1882 1883 SDLoc DL(N); 1884 EVT VT = N->getValueType(0); 1885 unsigned FirstVecIdx = HasPred ? 2 : 1; 1886 1887 auto GetMultiVecOperand = [=](unsigned StartIdx) { 1888 SmallVector<SDValue, 4> Regs(N->op_begin() + StartIdx, 1889 N->op_begin() + StartIdx + NumVecs); 1890 return createZMulTuple(Regs); 1891 }; 1892 1893 SDValue Zdn = GetMultiVecOperand(FirstVecIdx); 1894 1895 SDValue Zm; 1896 if (IsZmMulti) 1897 Zm = GetMultiVecOperand(NumVecs + FirstVecIdx); 1898 else 1899 Zm = N->getOperand(NumVecs + FirstVecIdx); 1900 1901 SDNode *Intrinsic; 1902 if (HasPred) 1903 Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, 1904 N->getOperand(1), Zdn, Zm); 1905 else 1906 Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Zdn, Zm); 1907 SDValue SuperReg = SDValue(Intrinsic, 0); 1908 for (unsigned i = 0; i < NumVecs; ++i) 1909 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg( 1910 AArch64::zsub0 + i, DL, VT, SuperReg)); 1911 1912 CurDAG->RemoveDeadNode(N); 1913 } 1914 1915 void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs, 1916 unsigned Scale, unsigned Opc_ri, 1917 unsigned Opc_rr, bool IsIntr) { 1918 assert(Scale < 5 && "Invalid scaling value."); 1919 SDLoc DL(N); 1920 EVT VT = N->getValueType(0); 1921 SDValue Chain = N->getOperand(0); 1922 1923 // Optimize addressing mode. 1924 SDValue Base, Offset; 1925 unsigned Opc; 1926 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore( 1927 N, Opc_rr, Opc_ri, N->getOperand(IsIntr ? 3 : 2), 1928 CurDAG->getTargetConstant(0, DL, MVT::i64), Scale); 1929 1930 SDValue Ops[] = {N->getOperand(IsIntr ? 2 : 1), // Predicate 1931 Base, // Memory operand 1932 Offset, Chain}; 1933 1934 const EVT ResTys[] = {MVT::Untyped, MVT::Other}; 1935 1936 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops); 1937 SDValue SuperReg = SDValue(Load, 0); 1938 for (unsigned i = 0; i < NumVecs; ++i) 1939 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg( 1940 AArch64::zsub0 + i, DL, VT, SuperReg)); 1941 1942 // Copy chain 1943 unsigned ChainIdx = NumVecs; 1944 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1)); 1945 CurDAG->RemoveDeadNode(N); 1946 } 1947 1948 void AArch64DAGToDAGISel::SelectContiguousMultiVectorLoad(SDNode *N, 1949 unsigned NumVecs, 1950 unsigned Scale, 1951 unsigned Opc_ri, 1952 unsigned Opc_rr) { 1953 assert(Scale < 4 && "Invalid scaling value."); 1954 SDLoc DL(N); 1955 EVT VT = N->getValueType(0); 1956 SDValue Chain = N->getOperand(0); 1957 1958 SDValue PNg = N->getOperand(2); 1959 SDValue Base = N->getOperand(3); 1960 SDValue Offset = CurDAG->getTargetConstant(0, DL, MVT::i64); 1961 unsigned Opc; 1962 std::tie(Opc, Base, Offset) = 1963 findAddrModeSVELoadStore(N, Opc_rr, Opc_ri, Base, Offset, Scale); 1964 1965 SDValue Ops[] = {PNg, // Predicate-as-counter 1966 Base, // Memory operand 1967 Offset, Chain}; 1968 1969 const EVT ResTys[] = {MVT::Untyped, MVT::Other}; 1970 1971 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops); 1972 SDValue SuperReg = SDValue(Load, 0); 1973 for (unsigned i = 0; i < NumVecs; ++i) 1974 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg( 1975 AArch64::zsub0 + i, DL, VT, SuperReg)); 1976 1977 // Copy chain 1978 unsigned ChainIdx = NumVecs; 1979 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1)); 1980 CurDAG->RemoveDeadNode(N); 1981 } 1982 1983 void AArch64DAGToDAGISel::SelectFrintFromVT(SDNode *N, unsigned NumVecs, 1984 unsigned Opcode) { 1985 if (N->getValueType(0) != MVT::nxv4f32) 1986 return; 1987 SelectUnaryMultiIntrinsic(N, NumVecs, true, Opcode); 1988 } 1989 1990 void AArch64DAGToDAGISel::SelectMultiVectorLuti(SDNode *Node, 1991 unsigned NumOutVecs, 1992 unsigned Opc, uint32_t MaxImm) { 1993 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Node->getOperand(4))) 1994 if (Imm->getZExtValue() > MaxImm) 1995 return; 1996 1997 SDValue ZtValue; 1998 if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue)) 1999 return; 2000 SDValue Ops[] = {ZtValue, Node->getOperand(3), Node->getOperand(4)}; 2001 SDLoc DL(Node); 2002 EVT VT = Node->getValueType(0); 2003 2004 SDNode *Instruction = 2005 CurDAG->getMachineNode(Opc, DL, {MVT::Untyped, MVT::Other}, Ops); 2006 SDValue SuperReg = SDValue(Instruction, 0); 2007 2008 for (unsigned I = 0; I < NumOutVecs; ++I) 2009 ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg( 2010 AArch64::zsub0 + I, DL, VT, SuperReg)); 2011 2012 // Copy chain 2013 unsigned ChainIdx = NumOutVecs; 2014 ReplaceUses(SDValue(Node, ChainIdx), SDValue(Instruction, 1)); 2015 CurDAG->RemoveDeadNode(Node); 2016 } 2017 2018 void AArch64DAGToDAGISel::SelectClamp(SDNode *N, unsigned NumVecs, 2019 unsigned Op) { 2020 SDLoc DL(N); 2021 EVT VT = N->getValueType(0); 2022 2023 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); 2024 SDValue Zd = createZMulTuple(Regs); 2025 SDValue Zn = N->getOperand(1 + NumVecs); 2026 SDValue Zm = N->getOperand(2 + NumVecs); 2027 2028 SDValue Ops[] = {Zd, Zn, Zm}; 2029 2030 SDNode *Intrinsic = CurDAG->getMachineNode(Op, DL, MVT::Untyped, Ops); 2031 SDValue SuperReg = SDValue(Intrinsic, 0); 2032 for (unsigned i = 0; i < NumVecs; ++i) 2033 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg( 2034 AArch64::zsub0 + i, DL, VT, SuperReg)); 2035 2036 CurDAG->RemoveDeadNode(N); 2037 } 2038 2039 bool SelectSMETile(unsigned &BaseReg, unsigned TileNum) { 2040 switch (BaseReg) { 2041 default: 2042 return false; 2043 case AArch64::ZA: 2044 case AArch64::ZAB0: 2045 if (TileNum == 0) 2046 break; 2047 return false; 2048 case AArch64::ZAH0: 2049 if (TileNum <= 1) 2050 break; 2051 return false; 2052 case AArch64::ZAS0: 2053 if (TileNum <= 3) 2054 break; 2055 return false; 2056 case AArch64::ZAD0: 2057 if (TileNum <= 7) 2058 break; 2059 return false; 2060 } 2061 2062 BaseReg += TileNum; 2063 return true; 2064 } 2065 2066 template <unsigned MaxIdx, unsigned Scale> 2067 void AArch64DAGToDAGISel::SelectMultiVectorMove(SDNode *N, unsigned NumVecs, 2068 unsigned BaseReg, unsigned Op) { 2069 unsigned TileNum = 0; 2070 if (BaseReg != AArch64::ZA) 2071 TileNum = N->getConstantOperandVal(2); 2072 2073 if (!SelectSMETile(BaseReg, TileNum)) 2074 return; 2075 2076 SDValue SliceBase, Base, Offset; 2077 if (BaseReg == AArch64::ZA) 2078 SliceBase = N->getOperand(2); 2079 else 2080 SliceBase = N->getOperand(3); 2081 2082 if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale)) 2083 return; 2084 2085 SDLoc DL(N); 2086 SDValue SubReg = CurDAG->getRegister(BaseReg, MVT::Other); 2087 SDValue Ops[] = {SubReg, Base, Offset, /*Chain*/ N->getOperand(0)}; 2088 SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops); 2089 2090 EVT VT = N->getValueType(0); 2091 for (unsigned I = 0; I < NumVecs; ++I) 2092 ReplaceUses(SDValue(N, I), 2093 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT, 2094 SDValue(Mov, 0))); 2095 // Copy chain 2096 unsigned ChainIdx = NumVecs; 2097 ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1)); 2098 CurDAG->RemoveDeadNode(N); 2099 } 2100 2101 void AArch64DAGToDAGISel::SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs, 2102 unsigned Op, unsigned MaxIdx, 2103 unsigned Scale, unsigned BaseReg) { 2104 // Slice can be in different positions 2105 // The array to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(slice) 2106 // The tile to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(tile, slice) 2107 SDValue SliceBase = N->getOperand(2); 2108 if (BaseReg != AArch64::ZA) 2109 SliceBase = N->getOperand(3); 2110 2111 SDValue Base, Offset; 2112 if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale)) 2113 return; 2114 // The correct Za tile number is computed in Machine Instruction 2115 // See EmitZAInstr 2116 // DAG cannot select Za tile as an output register with ZReg 2117 SDLoc DL(N); 2118 SmallVector<SDValue, 6> Ops; 2119 if (BaseReg != AArch64::ZA ) 2120 Ops.push_back(N->getOperand(2)); 2121 Ops.push_back(Base); 2122 Ops.push_back(Offset); 2123 Ops.push_back(N->getOperand(0)); //Chain 2124 SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops); 2125 2126 EVT VT = N->getValueType(0); 2127 for (unsigned I = 0; I < NumVecs; ++I) 2128 ReplaceUses(SDValue(N, I), 2129 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT, 2130 SDValue(Mov, 0))); 2131 2132 // Copy chain 2133 unsigned ChainIdx = NumVecs; 2134 ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1)); 2135 CurDAG->RemoveDeadNode(N); 2136 } 2137 2138 void AArch64DAGToDAGISel::SelectUnaryMultiIntrinsic(SDNode *N, 2139 unsigned NumOutVecs, 2140 bool IsTupleInput, 2141 unsigned Opc) { 2142 SDLoc DL(N); 2143 EVT VT = N->getValueType(0); 2144 unsigned NumInVecs = N->getNumOperands() - 1; 2145 2146 SmallVector<SDValue, 6> Ops; 2147 if (IsTupleInput) { 2148 assert((NumInVecs == 2 || NumInVecs == 4) && 2149 "Don't know how to handle multi-register input!"); 2150 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, 2151 N->op_begin() + 1 + NumInVecs); 2152 Ops.push_back(createZMulTuple(Regs)); 2153 } else { 2154 // All intrinsic nodes have the ID as the first operand, hence the "1 + I". 2155 for (unsigned I = 0; I < NumInVecs; I++) 2156 Ops.push_back(N->getOperand(1 + I)); 2157 } 2158 2159 SDNode *Res = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops); 2160 SDValue SuperReg = SDValue(Res, 0); 2161 2162 for (unsigned I = 0; I < NumOutVecs; I++) 2163 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg( 2164 AArch64::zsub0 + I, DL, VT, SuperReg)); 2165 CurDAG->RemoveDeadNode(N); 2166 } 2167 2168 void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs, 2169 unsigned Opc) { 2170 SDLoc dl(N); 2171 EVT VT = N->getOperand(2)->getValueType(0); 2172 2173 // Form a REG_SEQUENCE to force register allocation. 2174 bool Is128Bit = VT.getSizeInBits() == 128; 2175 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); 2176 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs); 2177 2178 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)}; 2179 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops); 2180 2181 // Transfer memoperands. 2182 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2183 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 2184 2185 ReplaceNode(N, St); 2186 } 2187 2188 void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs, 2189 unsigned Scale, unsigned Opc_rr, 2190 unsigned Opc_ri) { 2191 SDLoc dl(N); 2192 2193 // Form a REG_SEQUENCE to force register allocation. 2194 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); 2195 SDValue RegSeq = createZTuple(Regs); 2196 2197 // Optimize addressing mode. 2198 unsigned Opc; 2199 SDValue Offset, Base; 2200 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore( 2201 N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3), 2202 CurDAG->getTargetConstant(0, dl, MVT::i64), Scale); 2203 2204 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), // predicate 2205 Base, // address 2206 Offset, // offset 2207 N->getOperand(0)}; // chain 2208 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops); 2209 2210 ReplaceNode(N, St); 2211 } 2212 2213 bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, 2214 SDValue &OffImm) { 2215 SDLoc dl(N); 2216 const DataLayout &DL = CurDAG->getDataLayout(); 2217 const TargetLowering *TLI = getTargetLowering(); 2218 2219 // Try to match it for the frame address 2220 if (auto FINode = dyn_cast<FrameIndexSDNode>(N)) { 2221 int FI = FINode->getIndex(); 2222 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 2223 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); 2224 return true; 2225 } 2226 2227 return false; 2228 } 2229 2230 void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs, 2231 unsigned Opc) { 2232 SDLoc dl(N); 2233 EVT VT = N->getOperand(2)->getValueType(0); 2234 const EVT ResTys[] = {MVT::i64, // Type of the write back register 2235 MVT::Other}; // Type for the Chain 2236 2237 // Form a REG_SEQUENCE to force register allocation. 2238 bool Is128Bit = VT.getSizeInBits() == 128; 2239 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); 2240 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs); 2241 2242 SDValue Ops[] = {RegSeq, 2243 N->getOperand(NumVecs + 1), // base register 2244 N->getOperand(NumVecs + 2), // Incremental 2245 N->getOperand(0)}; // Chain 2246 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2247 2248 ReplaceNode(N, St); 2249 } 2250 2251 namespace { 2252 /// WidenVector - Given a value in the V64 register class, produce the 2253 /// equivalent value in the V128 register class. 2254 class WidenVector { 2255 SelectionDAG &DAG; 2256 2257 public: 2258 WidenVector(SelectionDAG &DAG) : DAG(DAG) {} 2259 2260 SDValue operator()(SDValue V64Reg) { 2261 EVT VT = V64Reg.getValueType(); 2262 unsigned NarrowSize = VT.getVectorNumElements(); 2263 MVT EltTy = VT.getVectorElementType().getSimpleVT(); 2264 MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize); 2265 SDLoc DL(V64Reg); 2266 2267 SDValue Undef = 2268 SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0); 2269 return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg); 2270 } 2271 }; 2272 } // namespace 2273 2274 /// NarrowVector - Given a value in the V128 register class, produce the 2275 /// equivalent value in the V64 register class. 2276 static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) { 2277 EVT VT = V128Reg.getValueType(); 2278 unsigned WideSize = VT.getVectorNumElements(); 2279 MVT EltTy = VT.getVectorElementType().getSimpleVT(); 2280 MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2); 2281 2282 return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy, 2283 V128Reg); 2284 } 2285 2286 void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs, 2287 unsigned Opc) { 2288 SDLoc dl(N); 2289 EVT VT = N->getValueType(0); 2290 bool Narrow = VT.getSizeInBits() == 64; 2291 2292 // Form a REG_SEQUENCE to force register allocation. 2293 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); 2294 2295 if (Narrow) 2296 transform(Regs, Regs.begin(), 2297 WidenVector(*CurDAG)); 2298 2299 SDValue RegSeq = createQTuple(Regs); 2300 2301 const EVT ResTys[] = {MVT::Untyped, MVT::Other}; 2302 2303 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2); 2304 2305 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64), 2306 N->getOperand(NumVecs + 3), N->getOperand(0)}; 2307 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2308 SDValue SuperReg = SDValue(Ld, 0); 2309 2310 EVT WideVT = RegSeq.getOperand(1)->getValueType(0); 2311 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, 2312 AArch64::qsub2, AArch64::qsub3 }; 2313 for (unsigned i = 0; i < NumVecs; ++i) { 2314 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg); 2315 if (Narrow) 2316 NV = NarrowVector(NV, *CurDAG); 2317 ReplaceUses(SDValue(N, i), NV); 2318 } 2319 2320 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1)); 2321 CurDAG->RemoveDeadNode(N); 2322 } 2323 2324 void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs, 2325 unsigned Opc) { 2326 SDLoc dl(N); 2327 EVT VT = N->getValueType(0); 2328 bool Narrow = VT.getSizeInBits() == 64; 2329 2330 // Form a REG_SEQUENCE to force register allocation. 2331 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); 2332 2333 if (Narrow) 2334 transform(Regs, Regs.begin(), 2335 WidenVector(*CurDAG)); 2336 2337 SDValue RegSeq = createQTuple(Regs); 2338 2339 const EVT ResTys[] = {MVT::i64, // Type of the write back register 2340 RegSeq->getValueType(0), MVT::Other}; 2341 2342 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1); 2343 2344 SDValue Ops[] = {RegSeq, 2345 CurDAG->getTargetConstant(LaneNo, dl, 2346 MVT::i64), // Lane Number 2347 N->getOperand(NumVecs + 2), // Base register 2348 N->getOperand(NumVecs + 3), // Incremental 2349 N->getOperand(0)}; 2350 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2351 2352 // Update uses of the write back register 2353 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0)); 2354 2355 // Update uses of the vector list 2356 SDValue SuperReg = SDValue(Ld, 1); 2357 if (NumVecs == 1) { 2358 ReplaceUses(SDValue(N, 0), 2359 Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg); 2360 } else { 2361 EVT WideVT = RegSeq.getOperand(1)->getValueType(0); 2362 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, 2363 AArch64::qsub2, AArch64::qsub3 }; 2364 for (unsigned i = 0; i < NumVecs; ++i) { 2365 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, 2366 SuperReg); 2367 if (Narrow) 2368 NV = NarrowVector(NV, *CurDAG); 2369 ReplaceUses(SDValue(N, i), NV); 2370 } 2371 } 2372 2373 // Update the Chain 2374 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2)); 2375 CurDAG->RemoveDeadNode(N); 2376 } 2377 2378 void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs, 2379 unsigned Opc) { 2380 SDLoc dl(N); 2381 EVT VT = N->getOperand(2)->getValueType(0); 2382 bool Narrow = VT.getSizeInBits() == 64; 2383 2384 // Form a REG_SEQUENCE to force register allocation. 2385 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); 2386 2387 if (Narrow) 2388 transform(Regs, Regs.begin(), 2389 WidenVector(*CurDAG)); 2390 2391 SDValue RegSeq = createQTuple(Regs); 2392 2393 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2); 2394 2395 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64), 2396 N->getOperand(NumVecs + 3), N->getOperand(0)}; 2397 SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops); 2398 2399 // Transfer memoperands. 2400 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2401 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 2402 2403 ReplaceNode(N, St); 2404 } 2405 2406 void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs, 2407 unsigned Opc) { 2408 SDLoc dl(N); 2409 EVT VT = N->getOperand(2)->getValueType(0); 2410 bool Narrow = VT.getSizeInBits() == 64; 2411 2412 // Form a REG_SEQUENCE to force register allocation. 2413 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); 2414 2415 if (Narrow) 2416 transform(Regs, Regs.begin(), 2417 WidenVector(*CurDAG)); 2418 2419 SDValue RegSeq = createQTuple(Regs); 2420 2421 const EVT ResTys[] = {MVT::i64, // Type of the write back register 2422 MVT::Other}; 2423 2424 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1); 2425 2426 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64), 2427 N->getOperand(NumVecs + 2), // Base Register 2428 N->getOperand(NumVecs + 3), // Incremental 2429 N->getOperand(0)}; 2430 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2431 2432 // Transfer memoperands. 2433 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2434 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 2435 2436 ReplaceNode(N, St); 2437 } 2438 2439 static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, 2440 unsigned &Opc, SDValue &Opd0, 2441 unsigned &LSB, unsigned &MSB, 2442 unsigned NumberOfIgnoredLowBits, 2443 bool BiggerPattern) { 2444 assert(N->getOpcode() == ISD::AND && 2445 "N must be a AND operation to call this function"); 2446 2447 EVT VT = N->getValueType(0); 2448 2449 // Here we can test the type of VT and return false when the type does not 2450 // match, but since it is done prior to that call in the current context 2451 // we turned that into an assert to avoid redundant code. 2452 assert((VT == MVT::i32 || VT == MVT::i64) && 2453 "Type checking must have been done before calling this function"); 2454 2455 // FIXME: simplify-demanded-bits in DAGCombine will probably have 2456 // changed the AND node to a 32-bit mask operation. We'll have to 2457 // undo that as part of the transform here if we want to catch all 2458 // the opportunities. 2459 // Currently the NumberOfIgnoredLowBits argument helps to recover 2460 // from these situations when matching bigger pattern (bitfield insert). 2461 2462 // For unsigned extracts, check for a shift right and mask 2463 uint64_t AndImm = 0; 2464 if (!isOpcWithIntImmediate(N, ISD::AND, AndImm)) 2465 return false; 2466 2467 const SDNode *Op0 = N->getOperand(0).getNode(); 2468 2469 // Because of simplify-demanded-bits in DAGCombine, the mask may have been 2470 // simplified. Try to undo that 2471 AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits); 2472 2473 // The immediate is a mask of the low bits iff imm & (imm+1) == 0 2474 if (AndImm & (AndImm + 1)) 2475 return false; 2476 2477 bool ClampMSB = false; 2478 uint64_t SrlImm = 0; 2479 // Handle the SRL + ANY_EXTEND case. 2480 if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND && 2481 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) { 2482 // Extend the incoming operand of the SRL to 64-bit. 2483 Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0)); 2484 // Make sure to clamp the MSB so that we preserve the semantics of the 2485 // original operations. 2486 ClampMSB = true; 2487 } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE && 2488 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, 2489 SrlImm)) { 2490 // If the shift result was truncated, we can still combine them. 2491 Opd0 = Op0->getOperand(0).getOperand(0); 2492 2493 // Use the type of SRL node. 2494 VT = Opd0->getValueType(0); 2495 } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) { 2496 Opd0 = Op0->getOperand(0); 2497 ClampMSB = (VT == MVT::i32); 2498 } else if (BiggerPattern) { 2499 // Let's pretend a 0 shift right has been performed. 2500 // The resulting code will be at least as good as the original one 2501 // plus it may expose more opportunities for bitfield insert pattern. 2502 // FIXME: Currently we limit this to the bigger pattern, because 2503 // some optimizations expect AND and not UBFM. 2504 Opd0 = N->getOperand(0); 2505 } else 2506 return false; 2507 2508 // Bail out on large immediates. This happens when no proper 2509 // combining/constant folding was performed. 2510 if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) { 2511 LLVM_DEBUG( 2512 (dbgs() << N 2513 << ": Found large shift immediate, this should not happen\n")); 2514 return false; 2515 } 2516 2517 LSB = SrlImm; 2518 MSB = SrlImm + 2519 (VT == MVT::i32 ? llvm::countr_one<uint32_t>(AndImm) 2520 : llvm::countr_one<uint64_t>(AndImm)) - 2521 1; 2522 if (ClampMSB) 2523 // Since we're moving the extend before the right shift operation, we need 2524 // to clamp the MSB to make sure we don't shift in undefined bits instead of 2525 // the zeros which would get shifted in with the original right shift 2526 // operation. 2527 MSB = MSB > 31 ? 31 : MSB; 2528 2529 Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri; 2530 return true; 2531 } 2532 2533 static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc, 2534 SDValue &Opd0, unsigned &Immr, 2535 unsigned &Imms) { 2536 assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG); 2537 2538 EVT VT = N->getValueType(0); 2539 unsigned BitWidth = VT.getSizeInBits(); 2540 assert((VT == MVT::i32 || VT == MVT::i64) && 2541 "Type checking must have been done before calling this function"); 2542 2543 SDValue Op = N->getOperand(0); 2544 if (Op->getOpcode() == ISD::TRUNCATE) { 2545 Op = Op->getOperand(0); 2546 VT = Op->getValueType(0); 2547 BitWidth = VT.getSizeInBits(); 2548 } 2549 2550 uint64_t ShiftImm; 2551 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) && 2552 !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm)) 2553 return false; 2554 2555 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits(); 2556 if (ShiftImm + Width > BitWidth) 2557 return false; 2558 2559 Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri; 2560 Opd0 = Op.getOperand(0); 2561 Immr = ShiftImm; 2562 Imms = ShiftImm + Width - 1; 2563 return true; 2564 } 2565 2566 static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc, 2567 SDValue &Opd0, unsigned &LSB, 2568 unsigned &MSB) { 2569 // We are looking for the following pattern which basically extracts several 2570 // continuous bits from the source value and places it from the LSB of the 2571 // destination value, all other bits of the destination value or set to zero: 2572 // 2573 // Value2 = AND Value, MaskImm 2574 // SRL Value2, ShiftImm 2575 // 2576 // with MaskImm >> ShiftImm to search for the bit width. 2577 // 2578 // This gets selected into a single UBFM: 2579 // 2580 // UBFM Value, ShiftImm, Log2_64(MaskImm) 2581 // 2582 2583 if (N->getOpcode() != ISD::SRL) 2584 return false; 2585 2586 uint64_t AndMask = 0; 2587 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask)) 2588 return false; 2589 2590 Opd0 = N->getOperand(0).getOperand(0); 2591 2592 uint64_t SrlImm = 0; 2593 if (!isIntImmediate(N->getOperand(1), SrlImm)) 2594 return false; 2595 2596 // Check whether we really have several bits extract here. 2597 if (!isMask_64(AndMask >> SrlImm)) 2598 return false; 2599 2600 Opc = N->getValueType(0) == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri; 2601 LSB = SrlImm; 2602 MSB = llvm::Log2_64(AndMask); 2603 return true; 2604 } 2605 2606 static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, 2607 unsigned &Immr, unsigned &Imms, 2608 bool BiggerPattern) { 2609 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) && 2610 "N must be a SHR/SRA operation to call this function"); 2611 2612 EVT VT = N->getValueType(0); 2613 2614 // Here we can test the type of VT and return false when the type does not 2615 // match, but since it is done prior to that call in the current context 2616 // we turned that into an assert to avoid redundant code. 2617 assert((VT == MVT::i32 || VT == MVT::i64) && 2618 "Type checking must have been done before calling this function"); 2619 2620 // Check for AND + SRL doing several bits extract. 2621 if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms)) 2622 return true; 2623 2624 // We're looking for a shift of a shift. 2625 uint64_t ShlImm = 0; 2626 uint64_t TruncBits = 0; 2627 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) { 2628 Opd0 = N->getOperand(0).getOperand(0); 2629 } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL && 2630 N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) { 2631 // We are looking for a shift of truncate. Truncate from i64 to i32 could 2632 // be considered as setting high 32 bits as zero. Our strategy here is to 2633 // always generate 64bit UBFM. This consistency will help the CSE pass 2634 // later find more redundancy. 2635 Opd0 = N->getOperand(0).getOperand(0); 2636 TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits(); 2637 VT = Opd0.getValueType(); 2638 assert(VT == MVT::i64 && "the promoted type should be i64"); 2639 } else if (BiggerPattern) { 2640 // Let's pretend a 0 shift left has been performed. 2641 // FIXME: Currently we limit this to the bigger pattern case, 2642 // because some optimizations expect AND and not UBFM 2643 Opd0 = N->getOperand(0); 2644 } else 2645 return false; 2646 2647 // Missing combines/constant folding may have left us with strange 2648 // constants. 2649 if (ShlImm >= VT.getSizeInBits()) { 2650 LLVM_DEBUG( 2651 (dbgs() << N 2652 << ": Found large shift immediate, this should not happen\n")); 2653 return false; 2654 } 2655 2656 uint64_t SrlImm = 0; 2657 if (!isIntImmediate(N->getOperand(1), SrlImm)) 2658 return false; 2659 2660 assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() && 2661 "bad amount in shift node!"); 2662 int immr = SrlImm - ShlImm; 2663 Immr = immr < 0 ? immr + VT.getSizeInBits() : immr; 2664 Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1; 2665 // SRA requires a signed extraction 2666 if (VT == MVT::i32) 2667 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri; 2668 else 2669 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri; 2670 return true; 2671 } 2672 2673 bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) { 2674 assert(N->getOpcode() == ISD::SIGN_EXTEND); 2675 2676 EVT VT = N->getValueType(0); 2677 EVT NarrowVT = N->getOperand(0)->getValueType(0); 2678 if (VT != MVT::i64 || NarrowVT != MVT::i32) 2679 return false; 2680 2681 uint64_t ShiftImm; 2682 SDValue Op = N->getOperand(0); 2683 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm)) 2684 return false; 2685 2686 SDLoc dl(N); 2687 // Extend the incoming operand of the shift to 64-bits. 2688 SDValue Opd0 = Widen(CurDAG, Op.getOperand(0)); 2689 unsigned Immr = ShiftImm; 2690 unsigned Imms = NarrowVT.getSizeInBits() - 1; 2691 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT), 2692 CurDAG->getTargetConstant(Imms, dl, VT)}; 2693 CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops); 2694 return true; 2695 } 2696 2697 static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, 2698 SDValue &Opd0, unsigned &Immr, unsigned &Imms, 2699 unsigned NumberOfIgnoredLowBits = 0, 2700 bool BiggerPattern = false) { 2701 if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64) 2702 return false; 2703 2704 switch (N->getOpcode()) { 2705 default: 2706 if (!N->isMachineOpcode()) 2707 return false; 2708 break; 2709 case ISD::AND: 2710 return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms, 2711 NumberOfIgnoredLowBits, BiggerPattern); 2712 case ISD::SRL: 2713 case ISD::SRA: 2714 return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern); 2715 2716 case ISD::SIGN_EXTEND_INREG: 2717 return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms); 2718 } 2719 2720 unsigned NOpc = N->getMachineOpcode(); 2721 switch (NOpc) { 2722 default: 2723 return false; 2724 case AArch64::SBFMWri: 2725 case AArch64::UBFMWri: 2726 case AArch64::SBFMXri: 2727 case AArch64::UBFMXri: 2728 Opc = NOpc; 2729 Opd0 = N->getOperand(0); 2730 Immr = N->getConstantOperandVal(1); 2731 Imms = N->getConstantOperandVal(2); 2732 return true; 2733 } 2734 // Unreachable 2735 return false; 2736 } 2737 2738 bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) { 2739 unsigned Opc, Immr, Imms; 2740 SDValue Opd0; 2741 if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms)) 2742 return false; 2743 2744 EVT VT = N->getValueType(0); 2745 SDLoc dl(N); 2746 2747 // If the bit extract operation is 64bit but the original type is 32bit, we 2748 // need to add one EXTRACT_SUBREG. 2749 if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) { 2750 SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64), 2751 CurDAG->getTargetConstant(Imms, dl, MVT::i64)}; 2752 2753 SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64); 2754 SDValue Inner = CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl, 2755 MVT::i32, SDValue(BFM, 0)); 2756 ReplaceNode(N, Inner.getNode()); 2757 return true; 2758 } 2759 2760 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT), 2761 CurDAG->getTargetConstant(Imms, dl, VT)}; 2762 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 2763 return true; 2764 } 2765 2766 /// Does DstMask form a complementary pair with the mask provided by 2767 /// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking, 2768 /// this asks whether DstMask zeroes precisely those bits that will be set by 2769 /// the other half. 2770 static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted, 2771 unsigned NumberOfIgnoredHighBits, EVT VT) { 2772 assert((VT == MVT::i32 || VT == MVT::i64) && 2773 "i32 or i64 mask type expected!"); 2774 unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits; 2775 2776 APInt SignificantDstMask = APInt(BitWidth, DstMask); 2777 APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth); 2778 2779 return (SignificantDstMask & SignificantBitsToBeInserted) == 0 && 2780 (SignificantDstMask | SignificantBitsToBeInserted).isAllOnes(); 2781 } 2782 2783 // Look for bits that will be useful for later uses. 2784 // A bit is consider useless as soon as it is dropped and never used 2785 // before it as been dropped. 2786 // E.g., looking for useful bit of x 2787 // 1. y = x & 0x7 2788 // 2. z = y >> 2 2789 // After #1, x useful bits are 0x7, then the useful bits of x, live through 2790 // y. 2791 // After #2, the useful bits of x are 0x4. 2792 // However, if x is used on an unpredicatable instruction, then all its bits 2793 // are useful. 2794 // E.g. 2795 // 1. y = x & 0x7 2796 // 2. z = y >> 2 2797 // 3. str x, [@x] 2798 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0); 2799 2800 static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits, 2801 unsigned Depth) { 2802 uint64_t Imm = 2803 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue(); 2804 Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth()); 2805 UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm); 2806 getUsefulBits(Op, UsefulBits, Depth + 1); 2807 } 2808 2809 static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, 2810 uint64_t Imm, uint64_t MSB, 2811 unsigned Depth) { 2812 // inherit the bitwidth value 2813 APInt OpUsefulBits(UsefulBits); 2814 OpUsefulBits = 1; 2815 2816 if (MSB >= Imm) { 2817 OpUsefulBits <<= MSB - Imm + 1; 2818 --OpUsefulBits; 2819 // The interesting part will be in the lower part of the result 2820 getUsefulBits(Op, OpUsefulBits, Depth + 1); 2821 // The interesting part was starting at Imm in the argument 2822 OpUsefulBits <<= Imm; 2823 } else { 2824 OpUsefulBits <<= MSB + 1; 2825 --OpUsefulBits; 2826 // The interesting part will be shifted in the result 2827 OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm; 2828 getUsefulBits(Op, OpUsefulBits, Depth + 1); 2829 // The interesting part was at zero in the argument 2830 OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm); 2831 } 2832 2833 UsefulBits &= OpUsefulBits; 2834 } 2835 2836 static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits, 2837 unsigned Depth) { 2838 uint64_t Imm = 2839 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue(); 2840 uint64_t MSB = 2841 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue(); 2842 2843 getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth); 2844 } 2845 2846 static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, 2847 unsigned Depth) { 2848 uint64_t ShiftTypeAndValue = 2849 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue(); 2850 APInt Mask(UsefulBits); 2851 Mask.clearAllBits(); 2852 Mask.flipAllBits(); 2853 2854 if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) { 2855 // Shift Left 2856 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue); 2857 Mask <<= ShiftAmt; 2858 getUsefulBits(Op, Mask, Depth + 1); 2859 Mask.lshrInPlace(ShiftAmt); 2860 } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) { 2861 // Shift Right 2862 // We do not handle AArch64_AM::ASR, because the sign will change the 2863 // number of useful bits 2864 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue); 2865 Mask.lshrInPlace(ShiftAmt); 2866 getUsefulBits(Op, Mask, Depth + 1); 2867 Mask <<= ShiftAmt; 2868 } else 2869 return; 2870 2871 UsefulBits &= Mask; 2872 } 2873 2874 static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, 2875 unsigned Depth) { 2876 uint64_t Imm = 2877 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue(); 2878 uint64_t MSB = 2879 cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue(); 2880 2881 APInt OpUsefulBits(UsefulBits); 2882 OpUsefulBits = 1; 2883 2884 APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0); 2885 ResultUsefulBits.flipAllBits(); 2886 APInt Mask(UsefulBits.getBitWidth(), 0); 2887 2888 getUsefulBits(Op, ResultUsefulBits, Depth + 1); 2889 2890 if (MSB >= Imm) { 2891 // The instruction is a BFXIL. 2892 uint64_t Width = MSB - Imm + 1; 2893 uint64_t LSB = Imm; 2894 2895 OpUsefulBits <<= Width; 2896 --OpUsefulBits; 2897 2898 if (Op.getOperand(1) == Orig) { 2899 // Copy the low bits from the result to bits starting from LSB. 2900 Mask = ResultUsefulBits & OpUsefulBits; 2901 Mask <<= LSB; 2902 } 2903 2904 if (Op.getOperand(0) == Orig) 2905 // Bits starting from LSB in the input contribute to the result. 2906 Mask |= (ResultUsefulBits & ~OpUsefulBits); 2907 } else { 2908 // The instruction is a BFI. 2909 uint64_t Width = MSB + 1; 2910 uint64_t LSB = UsefulBits.getBitWidth() - Imm; 2911 2912 OpUsefulBits <<= Width; 2913 --OpUsefulBits; 2914 OpUsefulBits <<= LSB; 2915 2916 if (Op.getOperand(1) == Orig) { 2917 // Copy the bits from the result to the zero bits. 2918 Mask = ResultUsefulBits & OpUsefulBits; 2919 Mask.lshrInPlace(LSB); 2920 } 2921 2922 if (Op.getOperand(0) == Orig) 2923 Mask |= (ResultUsefulBits & ~OpUsefulBits); 2924 } 2925 2926 UsefulBits &= Mask; 2927 } 2928 2929 static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, 2930 SDValue Orig, unsigned Depth) { 2931 2932 // Users of this node should have already been instruction selected 2933 // FIXME: Can we turn that into an assert? 2934 if (!UserNode->isMachineOpcode()) 2935 return; 2936 2937 switch (UserNode->getMachineOpcode()) { 2938 default: 2939 return; 2940 case AArch64::ANDSWri: 2941 case AArch64::ANDSXri: 2942 case AArch64::ANDWri: 2943 case AArch64::ANDXri: 2944 // We increment Depth only when we call the getUsefulBits 2945 return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits, 2946 Depth); 2947 case AArch64::UBFMWri: 2948 case AArch64::UBFMXri: 2949 return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth); 2950 2951 case AArch64::ORRWrs: 2952 case AArch64::ORRXrs: 2953 if (UserNode->getOperand(0) != Orig && UserNode->getOperand(1) == Orig) 2954 getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits, 2955 Depth); 2956 return; 2957 case AArch64::BFMWri: 2958 case AArch64::BFMXri: 2959 return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth); 2960 2961 case AArch64::STRBBui: 2962 case AArch64::STURBBi: 2963 if (UserNode->getOperand(0) != Orig) 2964 return; 2965 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff); 2966 return; 2967 2968 case AArch64::STRHHui: 2969 case AArch64::STURHHi: 2970 if (UserNode->getOperand(0) != Orig) 2971 return; 2972 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff); 2973 return; 2974 } 2975 } 2976 2977 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) { 2978 if (Depth >= SelectionDAG::MaxRecursionDepth) 2979 return; 2980 // Initialize UsefulBits 2981 if (!Depth) { 2982 unsigned Bitwidth = Op.getScalarValueSizeInBits(); 2983 // At the beginning, assume every produced bits is useful 2984 UsefulBits = APInt(Bitwidth, 0); 2985 UsefulBits.flipAllBits(); 2986 } 2987 APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0); 2988 2989 for (SDNode *Node : Op.getNode()->uses()) { 2990 // A use cannot produce useful bits 2991 APInt UsefulBitsForUse = APInt(UsefulBits); 2992 getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth); 2993 UsersUsefulBits |= UsefulBitsForUse; 2994 } 2995 // UsefulBits contains the produced bits that are meaningful for the 2996 // current definition, thus a user cannot make a bit meaningful at 2997 // this point 2998 UsefulBits &= UsersUsefulBits; 2999 } 3000 3001 /// Create a machine node performing a notional SHL of Op by ShlAmount. If 3002 /// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is 3003 /// 0, return Op unchanged. 3004 static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) { 3005 if (ShlAmount == 0) 3006 return Op; 3007 3008 EVT VT = Op.getValueType(); 3009 SDLoc dl(Op); 3010 unsigned BitWidth = VT.getSizeInBits(); 3011 unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri; 3012 3013 SDNode *ShiftNode; 3014 if (ShlAmount > 0) { 3015 // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt 3016 ShiftNode = CurDAG->getMachineNode( 3017 UBFMOpc, dl, VT, Op, 3018 CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT), 3019 CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT)); 3020 } else { 3021 // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1 3022 assert(ShlAmount < 0 && "expected right shift"); 3023 int ShrAmount = -ShlAmount; 3024 ShiftNode = CurDAG->getMachineNode( 3025 UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT), 3026 CurDAG->getTargetConstant(BitWidth - 1, dl, VT)); 3027 } 3028 3029 return SDValue(ShiftNode, 0); 3030 } 3031 3032 // For bit-field-positioning pattern "(and (shl VAL, N), ShiftedMask)". 3033 static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op, 3034 bool BiggerPattern, 3035 const uint64_t NonZeroBits, 3036 SDValue &Src, int &DstLSB, 3037 int &Width); 3038 3039 // For bit-field-positioning pattern "shl VAL, N)". 3040 static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op, 3041 bool BiggerPattern, 3042 const uint64_t NonZeroBits, 3043 SDValue &Src, int &DstLSB, 3044 int &Width); 3045 3046 /// Does this tree qualify as an attempt to move a bitfield into position, 3047 /// essentially "(and (shl VAL, N), Mask)" or (shl VAL, N). 3048 static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, 3049 bool BiggerPattern, SDValue &Src, 3050 int &DstLSB, int &Width) { 3051 EVT VT = Op.getValueType(); 3052 unsigned BitWidth = VT.getSizeInBits(); 3053 (void)BitWidth; 3054 assert(BitWidth == 32 || BitWidth == 64); 3055 3056 KnownBits Known = CurDAG->computeKnownBits(Op); 3057 3058 // Non-zero in the sense that they're not provably zero, which is the key 3059 // point if we want to use this value 3060 const uint64_t NonZeroBits = (~Known.Zero).getZExtValue(); 3061 if (!isShiftedMask_64(NonZeroBits)) 3062 return false; 3063 3064 switch (Op.getOpcode()) { 3065 default: 3066 break; 3067 case ISD::AND: 3068 return isBitfieldPositioningOpFromAnd(CurDAG, Op, BiggerPattern, 3069 NonZeroBits, Src, DstLSB, Width); 3070 case ISD::SHL: 3071 return isBitfieldPositioningOpFromShl(CurDAG, Op, BiggerPattern, 3072 NonZeroBits, Src, DstLSB, Width); 3073 } 3074 3075 return false; 3076 } 3077 3078 static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op, 3079 bool BiggerPattern, 3080 const uint64_t NonZeroBits, 3081 SDValue &Src, int &DstLSB, 3082 int &Width) { 3083 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed"); 3084 3085 EVT VT = Op.getValueType(); 3086 assert((VT == MVT::i32 || VT == MVT::i64) && 3087 "Caller guarantees VT is one of i32 or i64"); 3088 (void)VT; 3089 3090 uint64_t AndImm; 3091 if (!isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm)) 3092 return false; 3093 3094 // If (~AndImm & NonZeroBits) is not zero at POS, we know that 3095 // 1) (AndImm & (1 << POS) == 0) 3096 // 2) the result of AND is not zero at POS bit (according to NonZeroBits) 3097 // 3098 // 1) and 2) don't agree so something must be wrong (e.g., in 3099 // 'SelectionDAG::computeKnownBits') 3100 assert((~AndImm & NonZeroBits) == 0 && 3101 "Something must be wrong (e.g., in SelectionDAG::computeKnownBits)"); 3102 3103 SDValue AndOp0 = Op.getOperand(0); 3104 3105 uint64_t ShlImm; 3106 SDValue ShlOp0; 3107 if (isOpcWithIntImmediate(AndOp0.getNode(), ISD::SHL, ShlImm)) { 3108 // For pattern "and(shl(val, N), shifted-mask)", 'ShlOp0' is set to 'val'. 3109 ShlOp0 = AndOp0.getOperand(0); 3110 } else if (VT == MVT::i64 && AndOp0.getOpcode() == ISD::ANY_EXTEND && 3111 isOpcWithIntImmediate(AndOp0.getOperand(0).getNode(), ISD::SHL, 3112 ShlImm)) { 3113 // For pattern "and(any_extend(shl(val, N)), shifted-mask)" 3114 3115 // ShlVal == shl(val, N), which is a left shift on a smaller type. 3116 SDValue ShlVal = AndOp0.getOperand(0); 3117 3118 // Since this is after type legalization and ShlVal is extended to MVT::i64, 3119 // expect VT to be MVT::i32. 3120 assert((ShlVal.getValueType() == MVT::i32) && "Expect VT to be MVT::i32."); 3121 3122 // Widens 'val' to MVT::i64 as the source of bit field positioning. 3123 ShlOp0 = Widen(CurDAG, ShlVal.getOperand(0)); 3124 } else 3125 return false; 3126 3127 // For !BiggerPattern, bail out if the AndOp0 has more than one use, since 3128 // then we'll end up generating AndOp0+UBFIZ instead of just keeping 3129 // AndOp0+AND. 3130 if (!BiggerPattern && !AndOp0.hasOneUse()) 3131 return false; 3132 3133 DstLSB = llvm::countr_zero(NonZeroBits); 3134 Width = llvm::countr_one(NonZeroBits >> DstLSB); 3135 3136 // Bail out on large Width. This happens when no proper combining / constant 3137 // folding was performed. 3138 if (Width >= (int)VT.getSizeInBits()) { 3139 // If VT is i64, Width > 64 is insensible since NonZeroBits is uint64_t, and 3140 // Width == 64 indicates a missed dag-combine from "(and val, AllOnes)" to 3141 // "val". 3142 // If VT is i32, what Width >= 32 means: 3143 // - For "(and (any_extend(shl val, N)), shifted-mask)", the`and` Op 3144 // demands at least 'Width' bits (after dag-combiner). This together with 3145 // `any_extend` Op (undefined higher bits) indicates missed combination 3146 // when lowering the 'and' IR instruction to an machine IR instruction. 3147 LLVM_DEBUG( 3148 dbgs() 3149 << "Found large Width in bit-field-positioning -- this indicates no " 3150 "proper combining / constant folding was performed\n"); 3151 return false; 3152 } 3153 3154 // BFI encompasses sufficiently many nodes that it's worth inserting an extra 3155 // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL 3156 // amount. BiggerPattern is true when this pattern is being matched for BFI, 3157 // BiggerPattern is false when this pattern is being matched for UBFIZ, in 3158 // which case it is not profitable to insert an extra shift. 3159 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern) 3160 return false; 3161 3162 Src = getLeftShift(CurDAG, ShlOp0, ShlImm - DstLSB); 3163 return true; 3164 } 3165 3166 // For node (shl (and val, mask), N)), returns true if the node is equivalent to 3167 // UBFIZ. 3168 static bool isSeveralBitsPositioningOpFromShl(const uint64_t ShlImm, SDValue Op, 3169 SDValue &Src, int &DstLSB, 3170 int &Width) { 3171 // Caller should have verified that N is a left shift with constant shift 3172 // amount; asserts that. 3173 assert(Op.getOpcode() == ISD::SHL && 3174 "Op.getNode() should be a SHL node to call this function"); 3175 assert(isIntImmediateEq(Op.getOperand(1), ShlImm) && 3176 "Op.getNode() should shift ShlImm to call this function"); 3177 3178 uint64_t AndImm = 0; 3179 SDValue Op0 = Op.getOperand(0); 3180 if (!isOpcWithIntImmediate(Op0.getNode(), ISD::AND, AndImm)) 3181 return false; 3182 3183 const uint64_t ShiftedAndImm = ((AndImm << ShlImm) >> ShlImm); 3184 if (isMask_64(ShiftedAndImm)) { 3185 // AndImm is a superset of (AllOnes >> ShlImm); in other words, AndImm 3186 // should end with Mask, and could be prefixed with random bits if those 3187 // bits are shifted out. 3188 // 3189 // For example, xyz11111 (with {x,y,z} being 0 or 1) is fine if ShlImm >= 3; 3190 // the AND result corresponding to those bits are shifted out, so it's fine 3191 // to not extract them. 3192 Width = llvm::countr_one(ShiftedAndImm); 3193 DstLSB = ShlImm; 3194 Src = Op0.getOperand(0); 3195 return true; 3196 } 3197 return false; 3198 } 3199 3200 static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op, 3201 bool BiggerPattern, 3202 const uint64_t NonZeroBits, 3203 SDValue &Src, int &DstLSB, 3204 int &Width) { 3205 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed"); 3206 3207 EVT VT = Op.getValueType(); 3208 assert((VT == MVT::i32 || VT == MVT::i64) && 3209 "Caller guarantees that type is i32 or i64"); 3210 (void)VT; 3211 3212 uint64_t ShlImm; 3213 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm)) 3214 return false; 3215 3216 if (!BiggerPattern && !Op.hasOneUse()) 3217 return false; 3218 3219 if (isSeveralBitsPositioningOpFromShl(ShlImm, Op, Src, DstLSB, Width)) 3220 return true; 3221 3222 DstLSB = llvm::countr_zero(NonZeroBits); 3223 Width = llvm::countr_one(NonZeroBits >> DstLSB); 3224 3225 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern) 3226 return false; 3227 3228 Src = getLeftShift(CurDAG, Op.getOperand(0), ShlImm - DstLSB); 3229 return true; 3230 } 3231 3232 static bool isShiftedMask(uint64_t Mask, EVT VT) { 3233 assert(VT == MVT::i32 || VT == MVT::i64); 3234 if (VT == MVT::i32) 3235 return isShiftedMask_32(Mask); 3236 return isShiftedMask_64(Mask); 3237 } 3238 3239 // Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being 3240 // inserted only sets known zero bits. 3241 static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG) { 3242 assert(N->getOpcode() == ISD::OR && "Expect a OR operation"); 3243 3244 EVT VT = N->getValueType(0); 3245 if (VT != MVT::i32 && VT != MVT::i64) 3246 return false; 3247 3248 unsigned BitWidth = VT.getSizeInBits(); 3249 3250 uint64_t OrImm; 3251 if (!isOpcWithIntImmediate(N, ISD::OR, OrImm)) 3252 return false; 3253 3254 // Skip this transformation if the ORR immediate can be encoded in the ORR. 3255 // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely 3256 // performance neutral. 3257 if (AArch64_AM::isLogicalImmediate(OrImm, BitWidth)) 3258 return false; 3259 3260 uint64_t MaskImm; 3261 SDValue And = N->getOperand(0); 3262 // Must be a single use AND with an immediate operand. 3263 if (!And.hasOneUse() || 3264 !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm)) 3265 return false; 3266 3267 // Compute the Known Zero for the AND as this allows us to catch more general 3268 // cases than just looking for AND with imm. 3269 KnownBits Known = CurDAG->computeKnownBits(And); 3270 3271 // Non-zero in the sense that they're not provably zero, which is the key 3272 // point if we want to use this value. 3273 uint64_t NotKnownZero = (~Known.Zero).getZExtValue(); 3274 3275 // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00). 3276 if (!isShiftedMask(Known.Zero.getZExtValue(), VT)) 3277 return false; 3278 3279 // The bits being inserted must only set those bits that are known to be zero. 3280 if ((OrImm & NotKnownZero) != 0) { 3281 // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't 3282 // currently handle this case. 3283 return false; 3284 } 3285 3286 // BFI/BFXIL dst, src, #lsb, #width. 3287 int LSB = llvm::countr_one(NotKnownZero); 3288 int Width = BitWidth - APInt(BitWidth, NotKnownZero).popcount(); 3289 3290 // BFI/BFXIL is an alias of BFM, so translate to BFM operands. 3291 unsigned ImmR = (BitWidth - LSB) % BitWidth; 3292 unsigned ImmS = Width - 1; 3293 3294 // If we're creating a BFI instruction avoid cases where we need more 3295 // instructions to materialize the BFI constant as compared to the original 3296 // ORR. A BFXIL will use the same constant as the original ORR, so the code 3297 // should be no worse in this case. 3298 bool IsBFI = LSB != 0; 3299 uint64_t BFIImm = OrImm >> LSB; 3300 if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) { 3301 // We have a BFI instruction and we know the constant can't be materialized 3302 // with a ORR-immediate with the zero register. 3303 unsigned OrChunks = 0, BFIChunks = 0; 3304 for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) { 3305 if (((OrImm >> Shift) & 0xFFFF) != 0) 3306 ++OrChunks; 3307 if (((BFIImm >> Shift) & 0xFFFF) != 0) 3308 ++BFIChunks; 3309 } 3310 if (BFIChunks > OrChunks) 3311 return false; 3312 } 3313 3314 // Materialize the constant to be inserted. 3315 SDLoc DL(N); 3316 unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm; 3317 SDNode *MOVI = CurDAG->getMachineNode( 3318 MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT)); 3319 3320 // Create the BFI/BFXIL instruction. 3321 SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0), 3322 CurDAG->getTargetConstant(ImmR, DL, VT), 3323 CurDAG->getTargetConstant(ImmS, DL, VT)}; 3324 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri; 3325 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 3326 return true; 3327 } 3328 3329 static bool isWorthFoldingIntoOrrWithShift(SDValue Dst, SelectionDAG *CurDAG, 3330 SDValue &ShiftedOperand, 3331 uint64_t &EncodedShiftImm) { 3332 // Avoid folding Dst into ORR-with-shift if Dst has other uses than ORR. 3333 if (!Dst.hasOneUse()) 3334 return false; 3335 3336 EVT VT = Dst.getValueType(); 3337 assert((VT == MVT::i32 || VT == MVT::i64) && 3338 "Caller should guarantee that VT is one of i32 or i64"); 3339 const unsigned SizeInBits = VT.getSizeInBits(); 3340 3341 SDLoc DL(Dst.getNode()); 3342 uint64_t AndImm, ShlImm; 3343 if (isOpcWithIntImmediate(Dst.getNode(), ISD::AND, AndImm) && 3344 isShiftedMask_64(AndImm)) { 3345 // Avoid transforming 'DstOp0' if it has other uses than the AND node. 3346 SDValue DstOp0 = Dst.getOperand(0); 3347 if (!DstOp0.hasOneUse()) 3348 return false; 3349 3350 // An example to illustrate the transformation 3351 // From: 3352 // lsr x8, x1, #1 3353 // and x8, x8, #0x3f80 3354 // bfxil x8, x1, #0, #7 3355 // To: 3356 // and x8, x23, #0x7f 3357 // ubfx x9, x23, #8, #7 3358 // orr x23, x8, x9, lsl #7 3359 // 3360 // The number of instructions remains the same, but ORR is faster than BFXIL 3361 // on many AArch64 processors (or as good as BFXIL if not faster). Besides, 3362 // the dependency chain is improved after the transformation. 3363 uint64_t SrlImm; 3364 if (isOpcWithIntImmediate(DstOp0.getNode(), ISD::SRL, SrlImm)) { 3365 uint64_t NumTrailingZeroInShiftedMask = llvm::countr_zero(AndImm); 3366 if ((SrlImm + NumTrailingZeroInShiftedMask) < SizeInBits) { 3367 unsigned MaskWidth = 3368 llvm::countr_one(AndImm >> NumTrailingZeroInShiftedMask); 3369 unsigned UBFMOpc = 3370 (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri; 3371 SDNode *UBFMNode = CurDAG->getMachineNode( 3372 UBFMOpc, DL, VT, DstOp0.getOperand(0), 3373 CurDAG->getTargetConstant(SrlImm + NumTrailingZeroInShiftedMask, DL, 3374 VT), 3375 CurDAG->getTargetConstant( 3376 SrlImm + NumTrailingZeroInShiftedMask + MaskWidth - 1, DL, VT)); 3377 ShiftedOperand = SDValue(UBFMNode, 0); 3378 EncodedShiftImm = AArch64_AM::getShifterImm( 3379 AArch64_AM::LSL, NumTrailingZeroInShiftedMask); 3380 return true; 3381 } 3382 } 3383 return false; 3384 } 3385 3386 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SHL, ShlImm)) { 3387 ShiftedOperand = Dst.getOperand(0); 3388 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm); 3389 return true; 3390 } 3391 3392 uint64_t SrlImm; 3393 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SRL, SrlImm)) { 3394 ShiftedOperand = Dst.getOperand(0); 3395 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSR, SrlImm); 3396 return true; 3397 } 3398 return false; 3399 } 3400 3401 // Given an 'ISD::OR' node that is going to be selected as BFM, analyze 3402 // the operands and select it to AArch64::ORR with shifted registers if 3403 // that's more efficient. Returns true iff selection to AArch64::ORR happens. 3404 static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1, 3405 SDValue Src, SDValue Dst, SelectionDAG *CurDAG, 3406 const bool BiggerPattern) { 3407 EVT VT = N->getValueType(0); 3408 assert(N->getOpcode() == ISD::OR && "Expect N to be an OR node"); 3409 assert(((N->getOperand(0) == OrOpd0 && N->getOperand(1) == OrOpd1) || 3410 (N->getOperand(1) == OrOpd0 && N->getOperand(0) == OrOpd1)) && 3411 "Expect OrOpd0 and OrOpd1 to be operands of ISD::OR"); 3412 assert((VT == MVT::i32 || VT == MVT::i64) && 3413 "Expect result type to be i32 or i64 since N is combinable to BFM"); 3414 SDLoc DL(N); 3415 3416 // Bail out if BFM simplifies away one node in BFM Dst. 3417 if (OrOpd1 != Dst) 3418 return false; 3419 3420 const unsigned OrrOpc = (VT == MVT::i32) ? AArch64::ORRWrs : AArch64::ORRXrs; 3421 // For "BFM Rd, Rn, #immr, #imms", it's known that BFM simplifies away fewer 3422 // nodes from Rn (or inserts additional shift node) if BiggerPattern is true. 3423 if (BiggerPattern) { 3424 uint64_t SrcAndImm; 3425 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::AND, SrcAndImm) && 3426 isMask_64(SrcAndImm) && OrOpd0.getOperand(0) == Src) { 3427 // OrOpd0 = AND Src, #Mask 3428 // So BFM simplifies away one AND node from Src and doesn't simplify away 3429 // nodes from Dst. If ORR with left-shifted operand also simplifies away 3430 // one node (from Rd), ORR is better since it has higher throughput and 3431 // smaller latency than BFM on many AArch64 processors (and for the rest 3432 // ORR is at least as good as BFM). 3433 SDValue ShiftedOperand; 3434 uint64_t EncodedShiftImm; 3435 if (isWorthFoldingIntoOrrWithShift(Dst, CurDAG, ShiftedOperand, 3436 EncodedShiftImm)) { 3437 SDValue Ops[] = {OrOpd0, ShiftedOperand, 3438 CurDAG->getTargetConstant(EncodedShiftImm, DL, VT)}; 3439 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops); 3440 return true; 3441 } 3442 } 3443 return false; 3444 } 3445 3446 assert((!BiggerPattern) && "BiggerPattern should be handled above"); 3447 3448 uint64_t ShlImm; 3449 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SHL, ShlImm)) { 3450 if (OrOpd0.getOperand(0) == Src && OrOpd0.hasOneUse()) { 3451 SDValue Ops[] = { 3452 Dst, Src, 3453 CurDAG->getTargetConstant( 3454 AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm), DL, VT)}; 3455 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops); 3456 return true; 3457 } 3458 3459 // Select the following pattern to left-shifted operand rather than BFI. 3460 // %val1 = op .. 3461 // %val2 = shl %val1, #imm 3462 // %res = or %val1, %val2 3463 // 3464 // If N is selected to be BFI, we know that 3465 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into 3466 // BFI) 2) OrOpd1 would be the destination operand (i.e., preserved) 3467 // 3468 // Instead of selecting N to BFI, fold OrOpd0 as a left shift directly. 3469 if (OrOpd0.getOperand(0) == OrOpd1) { 3470 SDValue Ops[] = { 3471 OrOpd1, OrOpd1, 3472 CurDAG->getTargetConstant( 3473 AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm), DL, VT)}; 3474 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops); 3475 return true; 3476 } 3477 } 3478 3479 uint64_t SrlImm; 3480 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SRL, SrlImm)) { 3481 // Select the following pattern to right-shifted operand rather than BFXIL. 3482 // %val1 = op .. 3483 // %val2 = lshr %val1, #imm 3484 // %res = or %val1, %val2 3485 // 3486 // If N is selected to be BFXIL, we know that 3487 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into 3488 // BFXIL) 2) OrOpd1 would be the destination operand (i.e., preserved) 3489 // 3490 // Instead of selecting N to BFXIL, fold OrOpd0 as a right shift directly. 3491 if (OrOpd0.getOperand(0) == OrOpd1) { 3492 SDValue Ops[] = { 3493 OrOpd1, OrOpd1, 3494 CurDAG->getTargetConstant( 3495 AArch64_AM::getShifterImm(AArch64_AM::LSR, SrlImm), DL, VT)}; 3496 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops); 3497 return true; 3498 } 3499 } 3500 3501 return false; 3502 } 3503 3504 static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits, 3505 SelectionDAG *CurDAG) { 3506 assert(N->getOpcode() == ISD::OR && "Expect a OR operation"); 3507 3508 EVT VT = N->getValueType(0); 3509 if (VT != MVT::i32 && VT != MVT::i64) 3510 return false; 3511 3512 unsigned BitWidth = VT.getSizeInBits(); 3513 3514 // Because of simplify-demanded-bits in DAGCombine, involved masks may not 3515 // have the expected shape. Try to undo that. 3516 3517 unsigned NumberOfIgnoredLowBits = UsefulBits.countr_zero(); 3518 unsigned NumberOfIgnoredHighBits = UsefulBits.countl_zero(); 3519 3520 // Given a OR operation, check if we have the following pattern 3521 // ubfm c, b, imm, imm2 (or something that does the same jobs, see 3522 // isBitfieldExtractOp) 3523 // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and 3524 // countTrailingZeros(mask2) == imm2 - imm + 1 3525 // f = d | c 3526 // if yes, replace the OR instruction with: 3527 // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2 3528 3529 // OR is commutative, check all combinations of operand order and values of 3530 // BiggerPattern, i.e. 3531 // Opd0, Opd1, BiggerPattern=false 3532 // Opd1, Opd0, BiggerPattern=false 3533 // Opd0, Opd1, BiggerPattern=true 3534 // Opd1, Opd0, BiggerPattern=true 3535 // Several of these combinations may match, so check with BiggerPattern=false 3536 // first since that will produce better results by matching more instructions 3537 // and/or inserting fewer extra instructions. 3538 for (int I = 0; I < 4; ++I) { 3539 3540 SDValue Dst, Src; 3541 unsigned ImmR, ImmS; 3542 bool BiggerPattern = I / 2; 3543 SDValue OrOpd0Val = N->getOperand(I % 2); 3544 SDNode *OrOpd0 = OrOpd0Val.getNode(); 3545 SDValue OrOpd1Val = N->getOperand((I + 1) % 2); 3546 SDNode *OrOpd1 = OrOpd1Val.getNode(); 3547 3548 unsigned BFXOpc; 3549 int DstLSB, Width; 3550 if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS, 3551 NumberOfIgnoredLowBits, BiggerPattern)) { 3552 // Check that the returned opcode is compatible with the pattern, 3553 // i.e., same type and zero extended (U and not S) 3554 if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) || 3555 (BFXOpc != AArch64::UBFMWri && VT == MVT::i32)) 3556 continue; 3557 3558 // Compute the width of the bitfield insertion 3559 DstLSB = 0; 3560 Width = ImmS - ImmR + 1; 3561 // FIXME: This constraint is to catch bitfield insertion we may 3562 // want to widen the pattern if we want to grab general bitfied 3563 // move case 3564 if (Width <= 0) 3565 continue; 3566 3567 // If the mask on the insertee is correct, we have a BFXIL operation. We 3568 // can share the ImmR and ImmS values from the already-computed UBFM. 3569 } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val, 3570 BiggerPattern, 3571 Src, DstLSB, Width)) { 3572 ImmR = (BitWidth - DstLSB) % BitWidth; 3573 ImmS = Width - 1; 3574 } else 3575 continue; 3576 3577 // Check the second part of the pattern 3578 EVT VT = OrOpd1Val.getValueType(); 3579 assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand"); 3580 3581 // Compute the Known Zero for the candidate of the first operand. 3582 // This allows to catch more general case than just looking for 3583 // AND with imm. Indeed, simplify-demanded-bits may have removed 3584 // the AND instruction because it proves it was useless. 3585 KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val); 3586 3587 // Check if there is enough room for the second operand to appear 3588 // in the first one 3589 APInt BitsToBeInserted = 3590 APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width); 3591 3592 if ((BitsToBeInserted & ~Known.Zero) != 0) 3593 continue; 3594 3595 // Set the first operand 3596 uint64_t Imm; 3597 if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) && 3598 isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT)) 3599 // In that case, we can eliminate the AND 3600 Dst = OrOpd1->getOperand(0); 3601 else 3602 // Maybe the AND has been removed by simplify-demanded-bits 3603 // or is useful because it discards more bits 3604 Dst = OrOpd1Val; 3605 3606 // Before selecting ISD::OR node to AArch64::BFM, see if an AArch64::ORR 3607 // with shifted operand is more efficient. 3608 if (tryOrrWithShift(N, OrOpd0Val, OrOpd1Val, Src, Dst, CurDAG, 3609 BiggerPattern)) 3610 return true; 3611 3612 // both parts match 3613 SDLoc DL(N); 3614 SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT), 3615 CurDAG->getTargetConstant(ImmS, DL, VT)}; 3616 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri; 3617 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 3618 return true; 3619 } 3620 3621 // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff 3622 // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted 3623 // mask (e.g., 0x000ffff0). 3624 uint64_t Mask0Imm, Mask1Imm; 3625 SDValue And0 = N->getOperand(0); 3626 SDValue And1 = N->getOperand(1); 3627 if (And0.hasOneUse() && And1.hasOneUse() && 3628 isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) && 3629 isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) && 3630 APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) && 3631 (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) { 3632 3633 // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm), 3634 // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the 3635 // bits to be inserted. 3636 if (isShiftedMask(Mask0Imm, VT)) { 3637 std::swap(And0, And1); 3638 std::swap(Mask0Imm, Mask1Imm); 3639 } 3640 3641 SDValue Src = And1->getOperand(0); 3642 SDValue Dst = And0->getOperand(0); 3643 unsigned LSB = llvm::countr_zero(Mask1Imm); 3644 int Width = BitWidth - APInt(BitWidth, Mask0Imm).popcount(); 3645 3646 // The BFXIL inserts the low-order bits from a source register, so right 3647 // shift the needed bits into place. 3648 SDLoc DL(N); 3649 unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri; 3650 uint64_t LsrImm = LSB; 3651 if (Src->hasOneUse() && 3652 isOpcWithIntImmediate(Src.getNode(), ISD::SRL, LsrImm) && 3653 (LsrImm + LSB) < BitWidth) { 3654 Src = Src->getOperand(0); 3655 LsrImm += LSB; 3656 } 3657 3658 SDNode *LSR = CurDAG->getMachineNode( 3659 ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LsrImm, DL, VT), 3660 CurDAG->getTargetConstant(BitWidth - 1, DL, VT)); 3661 3662 // BFXIL is an alias of BFM, so translate to BFM operands. 3663 unsigned ImmR = (BitWidth - LSB) % BitWidth; 3664 unsigned ImmS = Width - 1; 3665 3666 // Create the BFXIL instruction. 3667 SDValue Ops[] = {Dst, SDValue(LSR, 0), 3668 CurDAG->getTargetConstant(ImmR, DL, VT), 3669 CurDAG->getTargetConstant(ImmS, DL, VT)}; 3670 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri; 3671 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 3672 return true; 3673 } 3674 3675 return false; 3676 } 3677 3678 bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) { 3679 if (N->getOpcode() != ISD::OR) 3680 return false; 3681 3682 APInt NUsefulBits; 3683 getUsefulBits(SDValue(N, 0), NUsefulBits); 3684 3685 // If all bits are not useful, just return UNDEF. 3686 if (!NUsefulBits) { 3687 CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0)); 3688 return true; 3689 } 3690 3691 if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG)) 3692 return true; 3693 3694 return tryBitfieldInsertOpFromOrAndImm(N, CurDAG); 3695 } 3696 3697 /// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the 3698 /// equivalent of a left shift by a constant amount followed by an and masking 3699 /// out a contiguous set of bits. 3700 bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) { 3701 if (N->getOpcode() != ISD::AND) 3702 return false; 3703 3704 EVT VT = N->getValueType(0); 3705 if (VT != MVT::i32 && VT != MVT::i64) 3706 return false; 3707 3708 SDValue Op0; 3709 int DstLSB, Width; 3710 if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false, 3711 Op0, DstLSB, Width)) 3712 return false; 3713 3714 // ImmR is the rotate right amount. 3715 unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits(); 3716 // ImmS is the most significant bit of the source to be moved. 3717 unsigned ImmS = Width - 1; 3718 3719 SDLoc DL(N); 3720 SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT), 3721 CurDAG->getTargetConstant(ImmS, DL, VT)}; 3722 unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri; 3723 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 3724 return true; 3725 } 3726 3727 /// tryShiftAmountMod - Take advantage of built-in mod of shift amount in 3728 /// variable shift/rotate instructions. 3729 bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) { 3730 EVT VT = N->getValueType(0); 3731 3732 unsigned Opc; 3733 switch (N->getOpcode()) { 3734 case ISD::ROTR: 3735 Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr; 3736 break; 3737 case ISD::SHL: 3738 Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr; 3739 break; 3740 case ISD::SRL: 3741 Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr; 3742 break; 3743 case ISD::SRA: 3744 Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr; 3745 break; 3746 default: 3747 return false; 3748 } 3749 3750 uint64_t Size; 3751 uint64_t Bits; 3752 if (VT == MVT::i32) { 3753 Bits = 5; 3754 Size = 32; 3755 } else if (VT == MVT::i64) { 3756 Bits = 6; 3757 Size = 64; 3758 } else 3759 return false; 3760 3761 SDValue ShiftAmt = N->getOperand(1); 3762 SDLoc DL(N); 3763 SDValue NewShiftAmt; 3764 3765 // Skip over an extend of the shift amount. 3766 if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND || 3767 ShiftAmt->getOpcode() == ISD::ANY_EXTEND) 3768 ShiftAmt = ShiftAmt->getOperand(0); 3769 3770 if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) { 3771 SDValue Add0 = ShiftAmt->getOperand(0); 3772 SDValue Add1 = ShiftAmt->getOperand(1); 3773 uint64_t Add0Imm; 3774 uint64_t Add1Imm; 3775 if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0)) { 3776 // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X 3777 // to avoid the ADD/SUB. 3778 NewShiftAmt = Add0; 3779 } else if (ShiftAmt->getOpcode() == ISD::SUB && 3780 isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 && 3781 (Add0Imm % Size == 0)) { 3782 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X 3783 // to generate a NEG instead of a SUB from a constant. 3784 unsigned NegOpc; 3785 unsigned ZeroReg; 3786 EVT SubVT = ShiftAmt->getValueType(0); 3787 if (SubVT == MVT::i32) { 3788 NegOpc = AArch64::SUBWrr; 3789 ZeroReg = AArch64::WZR; 3790 } else { 3791 assert(SubVT == MVT::i64); 3792 NegOpc = AArch64::SUBXrr; 3793 ZeroReg = AArch64::XZR; 3794 } 3795 SDValue Zero = 3796 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT); 3797 MachineSDNode *Neg = 3798 CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1); 3799 NewShiftAmt = SDValue(Neg, 0); 3800 } else if (ShiftAmt->getOpcode() == ISD::SUB && 3801 isIntImmediate(Add0, Add0Imm) && (Add0Imm % Size == Size - 1)) { 3802 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X 3803 // to generate a NOT instead of a SUB from a constant. 3804 unsigned NotOpc; 3805 unsigned ZeroReg; 3806 EVT SubVT = ShiftAmt->getValueType(0); 3807 if (SubVT == MVT::i32) { 3808 NotOpc = AArch64::ORNWrr; 3809 ZeroReg = AArch64::WZR; 3810 } else { 3811 assert(SubVT == MVT::i64); 3812 NotOpc = AArch64::ORNXrr; 3813 ZeroReg = AArch64::XZR; 3814 } 3815 SDValue Zero = 3816 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT); 3817 MachineSDNode *Not = 3818 CurDAG->getMachineNode(NotOpc, DL, SubVT, Zero, Add1); 3819 NewShiftAmt = SDValue(Not, 0); 3820 } else 3821 return false; 3822 } else { 3823 // If the shift amount is masked with an AND, check that the mask covers the 3824 // bits that are implicitly ANDed off by the above opcodes and if so, skip 3825 // the AND. 3826 uint64_t MaskImm; 3827 if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm) && 3828 !isOpcWithIntImmediate(ShiftAmt.getNode(), AArch64ISD::ANDS, MaskImm)) 3829 return false; 3830 3831 if ((unsigned)llvm::countr_one(MaskImm) < Bits) 3832 return false; 3833 3834 NewShiftAmt = ShiftAmt->getOperand(0); 3835 } 3836 3837 // Narrow/widen the shift amount to match the size of the shift operation. 3838 if (VT == MVT::i32) 3839 NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt); 3840 else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) { 3841 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32); 3842 MachineSDNode *Ext = CurDAG->getMachineNode( 3843 AArch64::SUBREG_TO_REG, DL, VT, 3844 CurDAG->getTargetConstant(0, DL, MVT::i64), NewShiftAmt, SubReg); 3845 NewShiftAmt = SDValue(Ext, 0); 3846 } 3847 3848 SDValue Ops[] = {N->getOperand(0), NewShiftAmt}; 3849 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 3850 return true; 3851 } 3852 3853 static bool checkCVTFixedPointOperandWithFBits(SelectionDAG *CurDAG, SDValue N, 3854 SDValue &FixedPos, 3855 unsigned RegWidth, 3856 bool isReciprocal) { 3857 APFloat FVal(0.0); 3858 if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N)) 3859 FVal = CN->getValueAPF(); 3860 else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) { 3861 // Some otherwise illegal constants are allowed in this case. 3862 if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow || 3863 !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1))) 3864 return false; 3865 3866 ConstantPoolSDNode *CN = 3867 dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)); 3868 FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF(); 3869 } else 3870 return false; 3871 3872 // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits 3873 // is between 1 and 32 for a destination w-register, or 1 and 64 for an 3874 // x-register. 3875 // 3876 // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we 3877 // want THIS_NODE to be 2^fbits. This is much easier to deal with using 3878 // integers. 3879 bool IsExact; 3880 3881 if (isReciprocal) 3882 if (!FVal.getExactInverse(&FVal)) 3883 return false; 3884 3885 // fbits is between 1 and 64 in the worst-case, which means the fmul 3886 // could have 2^64 as an actual operand. Need 65 bits of precision. 3887 APSInt IntVal(65, true); 3888 FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact); 3889 3890 // N.b. isPowerOf2 also checks for > 0. 3891 if (!IsExact || !IntVal.isPowerOf2()) 3892 return false; 3893 unsigned FBits = IntVal.logBase2(); 3894 3895 // Checks above should have guaranteed that we haven't lost information in 3896 // finding FBits, but it must still be in range. 3897 if (FBits == 0 || FBits > RegWidth) return false; 3898 3899 FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32); 3900 return true; 3901 } 3902 3903 bool AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, 3904 unsigned RegWidth) { 3905 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth, 3906 false); 3907 } 3908 3909 bool AArch64DAGToDAGISel::SelectCVTFixedPosRecipOperand(SDValue N, 3910 SDValue &FixedPos, 3911 unsigned RegWidth) { 3912 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth, 3913 true); 3914 } 3915 3916 // Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields 3917 // of the string and obtains the integer values from them and combines these 3918 // into a single value to be used in the MRS/MSR instruction. 3919 static int getIntOperandFromRegisterString(StringRef RegString) { 3920 SmallVector<StringRef, 5> Fields; 3921 RegString.split(Fields, ':'); 3922 3923 if (Fields.size() == 1) 3924 return -1; 3925 3926 assert(Fields.size() == 5 3927 && "Invalid number of fields in read register string"); 3928 3929 SmallVector<int, 5> Ops; 3930 bool AllIntFields = true; 3931 3932 for (StringRef Field : Fields) { 3933 unsigned IntField; 3934 AllIntFields &= !Field.getAsInteger(10, IntField); 3935 Ops.push_back(IntField); 3936 } 3937 3938 assert(AllIntFields && 3939 "Unexpected non-integer value in special register string."); 3940 (void)AllIntFields; 3941 3942 // Need to combine the integer fields of the string into a single value 3943 // based on the bit encoding of MRS/MSR instruction. 3944 return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) | 3945 (Ops[3] << 3) | (Ops[4]); 3946 } 3947 3948 // Lower the read_register intrinsic to an MRS instruction node if the special 3949 // register string argument is either of the form detailed in the ALCE (the 3950 // form described in getIntOperandsFromRegsterString) or is a named register 3951 // known by the MRS SysReg mapper. 3952 bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) { 3953 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1)); 3954 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0)); 3955 SDLoc DL(N); 3956 3957 bool ReadIs128Bit = N->getOpcode() == AArch64ISD::MRRS; 3958 3959 unsigned Opcode64Bit = AArch64::MRS; 3960 int Imm = getIntOperandFromRegisterString(RegString->getString()); 3961 if (Imm == -1) { 3962 // No match, Use the sysreg mapper to map the remaining possible strings to 3963 // the value for the register to be used for the instruction operand. 3964 const auto *TheReg = 3965 AArch64SysReg::lookupSysRegByName(RegString->getString()); 3966 if (TheReg && TheReg->Readable && 3967 TheReg->haveFeatures(Subtarget->getFeatureBits())) 3968 Imm = TheReg->Encoding; 3969 else 3970 Imm = AArch64SysReg::parseGenericRegister(RegString->getString()); 3971 3972 if (Imm == -1) { 3973 // Still no match, see if this is "pc" or give up. 3974 if (!ReadIs128Bit && RegString->getString() == "pc") { 3975 Opcode64Bit = AArch64::ADR; 3976 Imm = 0; 3977 } else { 3978 return false; 3979 } 3980 } 3981 } 3982 3983 SDValue InChain = N->getOperand(0); 3984 SDValue SysRegImm = CurDAG->getTargetConstant(Imm, DL, MVT::i32); 3985 if (!ReadIs128Bit) { 3986 CurDAG->SelectNodeTo(N, Opcode64Bit, MVT::i64, MVT::Other /* Chain */, 3987 {SysRegImm, InChain}); 3988 } else { 3989 SDNode *MRRS = CurDAG->getMachineNode( 3990 AArch64::MRRS, DL, 3991 {MVT::Untyped /* XSeqPair */, MVT::Other /* Chain */}, 3992 {SysRegImm, InChain}); 3993 3994 // Sysregs are not endian. The even register always contains the low half 3995 // of the register. 3996 SDValue Lo = CurDAG->getTargetExtractSubreg(AArch64::sube64, DL, MVT::i64, 3997 SDValue(MRRS, 0)); 3998 SDValue Hi = CurDAG->getTargetExtractSubreg(AArch64::subo64, DL, MVT::i64, 3999 SDValue(MRRS, 0)); 4000 SDValue OutChain = SDValue(MRRS, 1); 4001 4002 ReplaceUses(SDValue(N, 0), Lo); 4003 ReplaceUses(SDValue(N, 1), Hi); 4004 ReplaceUses(SDValue(N, 2), OutChain); 4005 }; 4006 return true; 4007 } 4008 4009 // Lower the write_register intrinsic to an MSR instruction node if the special 4010 // register string argument is either of the form detailed in the ALCE (the 4011 // form described in getIntOperandsFromRegsterString) or is a named register 4012 // known by the MSR SysReg mapper. 4013 bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) { 4014 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1)); 4015 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0)); 4016 SDLoc DL(N); 4017 4018 bool WriteIs128Bit = N->getOpcode() == AArch64ISD::MSRR; 4019 4020 if (!WriteIs128Bit) { 4021 // Check if the register was one of those allowed as the pstatefield value 4022 // in the MSR (immediate) instruction. To accept the values allowed in the 4023 // pstatefield for the MSR (immediate) instruction, we also require that an 4024 // immediate value has been provided as an argument, we know that this is 4025 // the case as it has been ensured by semantic checking. 4026 auto trySelectPState = [&](auto PMapper, unsigned State) { 4027 if (PMapper) { 4028 assert(isa<ConstantSDNode>(N->getOperand(2)) && 4029 "Expected a constant integer expression."); 4030 unsigned Reg = PMapper->Encoding; 4031 uint64_t Immed = N->getConstantOperandVal(2); 4032 CurDAG->SelectNodeTo( 4033 N, State, MVT::Other, CurDAG->getTargetConstant(Reg, DL, MVT::i32), 4034 CurDAG->getTargetConstant(Immed, DL, MVT::i16), N->getOperand(0)); 4035 return true; 4036 } 4037 return false; 4038 }; 4039 4040 if (trySelectPState( 4041 AArch64PState::lookupPStateImm0_15ByName(RegString->getString()), 4042 AArch64::MSRpstateImm4)) 4043 return true; 4044 if (trySelectPState( 4045 AArch64PState::lookupPStateImm0_1ByName(RegString->getString()), 4046 AArch64::MSRpstateImm1)) 4047 return true; 4048 } 4049 4050 int Imm = getIntOperandFromRegisterString(RegString->getString()); 4051 if (Imm == -1) { 4052 // Use the sysreg mapper to attempt to map the remaining possible strings 4053 // to the value for the register to be used for the MSR (register) 4054 // instruction operand. 4055 auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString()); 4056 if (TheReg && TheReg->Writeable && 4057 TheReg->haveFeatures(Subtarget->getFeatureBits())) 4058 Imm = TheReg->Encoding; 4059 else 4060 Imm = AArch64SysReg::parseGenericRegister(RegString->getString()); 4061 4062 if (Imm == -1) 4063 return false; 4064 } 4065 4066 SDValue InChain = N->getOperand(0); 4067 if (!WriteIs128Bit) { 4068 CurDAG->SelectNodeTo(N, AArch64::MSR, MVT::Other, 4069 CurDAG->getTargetConstant(Imm, DL, MVT::i32), 4070 N->getOperand(2), InChain); 4071 } else { 4072 // No endian swap. The lower half always goes into the even subreg, and the 4073 // higher half always into the odd supreg. 4074 SDNode *Pair = CurDAG->getMachineNode( 4075 TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped /* XSeqPair */, 4076 {CurDAG->getTargetConstant(AArch64::XSeqPairsClassRegClass.getID(), DL, 4077 MVT::i32), 4078 N->getOperand(2), 4079 CurDAG->getTargetConstant(AArch64::sube64, DL, MVT::i32), 4080 N->getOperand(3), 4081 CurDAG->getTargetConstant(AArch64::subo64, DL, MVT::i32)}); 4082 4083 CurDAG->SelectNodeTo(N, AArch64::MSRR, MVT::Other, 4084 CurDAG->getTargetConstant(Imm, DL, MVT::i32), 4085 SDValue(Pair, 0), InChain); 4086 } 4087 4088 return true; 4089 } 4090 4091 /// We've got special pseudo-instructions for these 4092 bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) { 4093 unsigned Opcode; 4094 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT(); 4095 4096 // Leave IR for LSE if subtarget supports it. 4097 if (Subtarget->hasLSE()) return false; 4098 4099 if (MemTy == MVT::i8) 4100 Opcode = AArch64::CMP_SWAP_8; 4101 else if (MemTy == MVT::i16) 4102 Opcode = AArch64::CMP_SWAP_16; 4103 else if (MemTy == MVT::i32) 4104 Opcode = AArch64::CMP_SWAP_32; 4105 else if (MemTy == MVT::i64) 4106 Opcode = AArch64::CMP_SWAP_64; 4107 else 4108 llvm_unreachable("Unknown AtomicCmpSwap type"); 4109 4110 MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32; 4111 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3), 4112 N->getOperand(0)}; 4113 SDNode *CmpSwap = CurDAG->getMachineNode( 4114 Opcode, SDLoc(N), 4115 CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops); 4116 4117 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); 4118 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp}); 4119 4120 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0)); 4121 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2)); 4122 CurDAG->RemoveDeadNode(N); 4123 4124 return true; 4125 } 4126 4127 bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, 4128 SDValue &Shift) { 4129 if (!isa<ConstantSDNode>(N)) 4130 return false; 4131 4132 SDLoc DL(N); 4133 uint64_t Val = cast<ConstantSDNode>(N) 4134 ->getAPIntValue() 4135 .trunc(VT.getFixedSizeInBits()) 4136 .getZExtValue(); 4137 4138 switch (VT.SimpleTy) { 4139 case MVT::i8: 4140 // All immediates are supported. 4141 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32); 4142 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32); 4143 return true; 4144 case MVT::i16: 4145 case MVT::i32: 4146 case MVT::i64: 4147 // Support 8bit unsigned immediates. 4148 if (Val <= 255) { 4149 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32); 4150 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32); 4151 return true; 4152 } 4153 // Support 16bit unsigned immediates that are a multiple of 256. 4154 if (Val <= 65280 && Val % 256 == 0) { 4155 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32); 4156 Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32); 4157 return true; 4158 } 4159 break; 4160 default: 4161 break; 4162 } 4163 4164 return false; 4165 } 4166 4167 bool AArch64DAGToDAGISel::SelectSVEAddSubSSatImm(SDValue N, MVT VT, 4168 SDValue &Imm, SDValue &Shift, 4169 bool Negate) { 4170 if (!isa<ConstantSDNode>(N)) 4171 return false; 4172 4173 SDLoc DL(N); 4174 int64_t Val = cast<ConstantSDNode>(N) 4175 ->getAPIntValue() 4176 .trunc(VT.getFixedSizeInBits()) 4177 .getSExtValue(); 4178 4179 if (Negate) 4180 Val = -Val; 4181 4182 // Signed saturating instructions treat their immediate operand as unsigned, 4183 // whereas the related intrinsics define their operands to be signed. This 4184 // means we can only use the immediate form when the operand is non-negative. 4185 if (Val < 0) 4186 return false; 4187 4188 switch (VT.SimpleTy) { 4189 case MVT::i8: 4190 // All positive immediates are supported. 4191 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32); 4192 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32); 4193 return true; 4194 case MVT::i16: 4195 case MVT::i32: 4196 case MVT::i64: 4197 // Support 8bit positive immediates. 4198 if (Val <= 255) { 4199 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32); 4200 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32); 4201 return true; 4202 } 4203 // Support 16bit positive immediates that are a multiple of 256. 4204 if (Val <= 65280 && Val % 256 == 0) { 4205 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32); 4206 Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32); 4207 return true; 4208 } 4209 break; 4210 default: 4211 break; 4212 } 4213 4214 return false; 4215 } 4216 4217 bool AArch64DAGToDAGISel::SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, 4218 SDValue &Shift) { 4219 if (!isa<ConstantSDNode>(N)) 4220 return false; 4221 4222 SDLoc DL(N); 4223 int64_t Val = cast<ConstantSDNode>(N) 4224 ->getAPIntValue() 4225 .trunc(VT.getFixedSizeInBits()) 4226 .getSExtValue(); 4227 4228 switch (VT.SimpleTy) { 4229 case MVT::i8: 4230 // All immediates are supported. 4231 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32); 4232 Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32); 4233 return true; 4234 case MVT::i16: 4235 case MVT::i32: 4236 case MVT::i64: 4237 // Support 8bit signed immediates. 4238 if (Val >= -128 && Val <= 127) { 4239 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32); 4240 Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32); 4241 return true; 4242 } 4243 // Support 16bit signed immediates that are a multiple of 256. 4244 if (Val >= -32768 && Val <= 32512 && Val % 256 == 0) { 4245 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32); 4246 Imm = CurDAG->getTargetConstant((Val >> 8) & 0xFF, DL, MVT::i32); 4247 return true; 4248 } 4249 break; 4250 default: 4251 break; 4252 } 4253 4254 return false; 4255 } 4256 4257 bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) { 4258 if (auto CNode = dyn_cast<ConstantSDNode>(N)) { 4259 int64_t ImmVal = CNode->getSExtValue(); 4260 SDLoc DL(N); 4261 if (ImmVal >= -128 && ImmVal < 128) { 4262 Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32); 4263 return true; 4264 } 4265 } 4266 return false; 4267 } 4268 4269 bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) { 4270 if (auto CNode = dyn_cast<ConstantSDNode>(N)) { 4271 uint64_t ImmVal = CNode->getZExtValue(); 4272 4273 switch (VT.SimpleTy) { 4274 case MVT::i8: 4275 ImmVal &= 0xFF; 4276 break; 4277 case MVT::i16: 4278 ImmVal &= 0xFFFF; 4279 break; 4280 case MVT::i32: 4281 ImmVal &= 0xFFFFFFFF; 4282 break; 4283 case MVT::i64: 4284 break; 4285 default: 4286 llvm_unreachable("Unexpected type"); 4287 } 4288 4289 if (ImmVal < 256) { 4290 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32); 4291 return true; 4292 } 4293 } 4294 return false; 4295 } 4296 4297 bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, 4298 bool Invert) { 4299 if (auto CNode = dyn_cast<ConstantSDNode>(N)) { 4300 uint64_t ImmVal = CNode->getZExtValue(); 4301 SDLoc DL(N); 4302 4303 if (Invert) 4304 ImmVal = ~ImmVal; 4305 4306 // Shift mask depending on type size. 4307 switch (VT.SimpleTy) { 4308 case MVT::i8: 4309 ImmVal &= 0xFF; 4310 ImmVal |= ImmVal << 8; 4311 ImmVal |= ImmVal << 16; 4312 ImmVal |= ImmVal << 32; 4313 break; 4314 case MVT::i16: 4315 ImmVal &= 0xFFFF; 4316 ImmVal |= ImmVal << 16; 4317 ImmVal |= ImmVal << 32; 4318 break; 4319 case MVT::i32: 4320 ImmVal &= 0xFFFFFFFF; 4321 ImmVal |= ImmVal << 32; 4322 break; 4323 case MVT::i64: 4324 break; 4325 default: 4326 llvm_unreachable("Unexpected type"); 4327 } 4328 4329 uint64_t encoding; 4330 if (AArch64_AM::processLogicalImmediate(ImmVal, 64, encoding)) { 4331 Imm = CurDAG->getTargetConstant(encoding, DL, MVT::i64); 4332 return true; 4333 } 4334 } 4335 return false; 4336 } 4337 4338 // SVE shift intrinsics allow shift amounts larger than the element's bitwidth. 4339 // Rather than attempt to normalise everything we can sometimes saturate the 4340 // shift amount during selection. This function also allows for consistent 4341 // isel patterns by ensuring the resulting "Imm" node is of the i32 type 4342 // required by the instructions. 4343 bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low, 4344 uint64_t High, bool AllowSaturation, 4345 SDValue &Imm) { 4346 if (auto *CN = dyn_cast<ConstantSDNode>(N)) { 4347 uint64_t ImmVal = CN->getZExtValue(); 4348 4349 // Reject shift amounts that are too small. 4350 if (ImmVal < Low) 4351 return false; 4352 4353 // Reject or saturate shift amounts that are too big. 4354 if (ImmVal > High) { 4355 if (!AllowSaturation) 4356 return false; 4357 ImmVal = High; 4358 } 4359 4360 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32); 4361 return true; 4362 } 4363 4364 return false; 4365 } 4366 4367 bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) { 4368 // tagp(FrameIndex, IRGstack, tag_offset): 4369 // since the offset between FrameIndex and IRGstack is a compile-time 4370 // constant, this can be lowered to a single ADDG instruction. 4371 if (!(isa<FrameIndexSDNode>(N->getOperand(1)))) { 4372 return false; 4373 } 4374 4375 SDValue IRG_SP = N->getOperand(2); 4376 if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN || 4377 IRG_SP->getConstantOperandVal(1) != Intrinsic::aarch64_irg_sp) { 4378 return false; 4379 } 4380 4381 const TargetLowering *TLI = getTargetLowering(); 4382 SDLoc DL(N); 4383 int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex(); 4384 SDValue FiOp = CurDAG->getTargetFrameIndex( 4385 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 4386 int TagOffset = N->getConstantOperandVal(3); 4387 4388 SDNode *Out = CurDAG->getMachineNode( 4389 AArch64::TAGPstack, DL, MVT::i64, 4390 {FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2), 4391 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)}); 4392 ReplaceNode(N, Out); 4393 return true; 4394 } 4395 4396 void AArch64DAGToDAGISel::SelectTagP(SDNode *N) { 4397 assert(isa<ConstantSDNode>(N->getOperand(3)) && 4398 "llvm.aarch64.tagp third argument must be an immediate"); 4399 if (trySelectStackSlotTagP(N)) 4400 return; 4401 // FIXME: above applies in any case when offset between Op1 and Op2 is a 4402 // compile-time constant, not just for stack allocations. 4403 4404 // General case for unrelated pointers in Op1 and Op2. 4405 SDLoc DL(N); 4406 int TagOffset = N->getConstantOperandVal(3); 4407 SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64, 4408 {N->getOperand(1), N->getOperand(2)}); 4409 SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64, 4410 {SDValue(N1, 0), N->getOperand(2)}); 4411 SDNode *N3 = CurDAG->getMachineNode( 4412 AArch64::ADDG, DL, MVT::i64, 4413 {SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64), 4414 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)}); 4415 ReplaceNode(N, N3); 4416 } 4417 4418 bool AArch64DAGToDAGISel::trySelectCastFixedLengthToScalableVector(SDNode *N) { 4419 assert(N->getOpcode() == ISD::INSERT_SUBVECTOR && "Invalid Node!"); 4420 4421 // Bail when not a "cast" like insert_subvector. 4422 if (N->getConstantOperandVal(2) != 0) 4423 return false; 4424 if (!N->getOperand(0).isUndef()) 4425 return false; 4426 4427 // Bail when normal isel should do the job. 4428 EVT VT = N->getValueType(0); 4429 EVT InVT = N->getOperand(1).getValueType(); 4430 if (VT.isFixedLengthVector() || InVT.isScalableVector()) 4431 return false; 4432 if (InVT.getSizeInBits() <= 128) 4433 return false; 4434 4435 // NOTE: We can only get here when doing fixed length SVE code generation. 4436 // We do manual selection because the types involved are not linked to real 4437 // registers (despite being legal) and must be coerced into SVE registers. 4438 4439 assert(VT.getSizeInBits().getKnownMinValue() == AArch64::SVEBitsPerBlock && 4440 "Expected to insert into a packed scalable vector!"); 4441 4442 SDLoc DL(N); 4443 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64); 4444 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, 4445 N->getOperand(1), RC)); 4446 return true; 4447 } 4448 4449 bool AArch64DAGToDAGISel::trySelectCastScalableToFixedLengthVector(SDNode *N) { 4450 assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR && "Invalid Node!"); 4451 4452 // Bail when not a "cast" like extract_subvector. 4453 if (N->getConstantOperandVal(1) != 0) 4454 return false; 4455 4456 // Bail when normal isel can do the job. 4457 EVT VT = N->getValueType(0); 4458 EVT InVT = N->getOperand(0).getValueType(); 4459 if (VT.isScalableVector() || InVT.isFixedLengthVector()) 4460 return false; 4461 if (VT.getSizeInBits() <= 128) 4462 return false; 4463 4464 // NOTE: We can only get here when doing fixed length SVE code generation. 4465 // We do manual selection because the types involved are not linked to real 4466 // registers (despite being legal) and must be coerced into SVE registers. 4467 4468 assert(InVT.getSizeInBits().getKnownMinValue() == AArch64::SVEBitsPerBlock && 4469 "Expected to extract from a packed scalable vector!"); 4470 4471 SDLoc DL(N); 4472 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64); 4473 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, 4474 N->getOperand(0), RC)); 4475 return true; 4476 } 4477 4478 bool AArch64DAGToDAGISel::trySelectXAR(SDNode *N) { 4479 assert(N->getOpcode() == ISD::OR && "Expected OR instruction"); 4480 4481 SDValue N0 = N->getOperand(0); 4482 SDValue N1 = N->getOperand(1); 4483 EVT VT = N->getValueType(0); 4484 4485 // Essentially: rotr (xor(x, y), imm) -> xar (x, y, imm) 4486 // Rotate by a constant is a funnel shift in IR which is exanded to 4487 // an OR with shifted operands. 4488 // We do the following transform: 4489 // OR N0, N1 -> xar (x, y, imm) 4490 // Where: 4491 // N1 = SRL_PRED true, V, splat(imm) --> rotr amount 4492 // N0 = SHL_PRED true, V, splat(bits-imm) 4493 // V = (xor x, y) 4494 if (VT.isScalableVector() && 4495 (Subtarget->hasSVE2() || 4496 (Subtarget->hasSME() && Subtarget->isStreaming()))) { 4497 if (N0.getOpcode() != AArch64ISD::SHL_PRED || 4498 N1.getOpcode() != AArch64ISD::SRL_PRED) 4499 std::swap(N0, N1); 4500 if (N0.getOpcode() != AArch64ISD::SHL_PRED || 4501 N1.getOpcode() != AArch64ISD::SRL_PRED) 4502 return false; 4503 4504 auto *TLI = static_cast<const AArch64TargetLowering *>(getTargetLowering()); 4505 if (!TLI->isAllActivePredicate(*CurDAG, N0.getOperand(0)) || 4506 !TLI->isAllActivePredicate(*CurDAG, N1.getOperand(0))) 4507 return false; 4508 4509 SDValue XOR = N0.getOperand(1); 4510 if (XOR.getOpcode() != ISD::XOR || XOR != N1.getOperand(1)) 4511 return false; 4512 4513 APInt ShlAmt, ShrAmt; 4514 if (!ISD::isConstantSplatVector(N0.getOperand(2).getNode(), ShlAmt) || 4515 !ISD::isConstantSplatVector(N1.getOperand(2).getNode(), ShrAmt)) 4516 return false; 4517 4518 if (ShlAmt + ShrAmt != VT.getScalarSizeInBits()) 4519 return false; 4520 4521 SDLoc DL(N); 4522 SDValue Imm = 4523 CurDAG->getTargetConstant(ShrAmt.getZExtValue(), DL, MVT::i32); 4524 4525 SDValue Ops[] = {XOR.getOperand(0), XOR.getOperand(1), Imm}; 4526 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::Int>( 4527 VT, {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S, 4528 AArch64::XAR_ZZZI_D})) { 4529 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 4530 return true; 4531 } 4532 return false; 4533 } 4534 4535 if (!Subtarget->hasSHA3()) 4536 return false; 4537 4538 if (N0->getOpcode() != AArch64ISD::VSHL || 4539 N1->getOpcode() != AArch64ISD::VLSHR) 4540 return false; 4541 4542 if (N0->getOperand(0) != N1->getOperand(0) || 4543 N1->getOperand(0)->getOpcode() != ISD::XOR) 4544 return false; 4545 4546 SDValue XOR = N0.getOperand(0); 4547 SDValue R1 = XOR.getOperand(0); 4548 SDValue R2 = XOR.getOperand(1); 4549 4550 unsigned HsAmt = N0.getConstantOperandVal(1); 4551 unsigned ShAmt = N1.getConstantOperandVal(1); 4552 4553 SDLoc DL = SDLoc(N0.getOperand(1)); 4554 SDValue Imm = CurDAG->getTargetConstant( 4555 ShAmt, DL, N0.getOperand(1).getValueType(), false); 4556 4557 if (ShAmt + HsAmt != 64) 4558 return false; 4559 4560 SDValue Ops[] = {R1, R2, Imm}; 4561 CurDAG->SelectNodeTo(N, AArch64::XAR, N0.getValueType(), Ops); 4562 4563 return true; 4564 } 4565 4566 void AArch64DAGToDAGISel::Select(SDNode *Node) { 4567 // If we have a custom node, we already have selected! 4568 if (Node->isMachineOpcode()) { 4569 LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n"); 4570 Node->setNodeId(-1); 4571 return; 4572 } 4573 4574 // Few custom selection stuff. 4575 EVT VT = Node->getValueType(0); 4576 4577 switch (Node->getOpcode()) { 4578 default: 4579 break; 4580 4581 case ISD::ATOMIC_CMP_SWAP: 4582 if (SelectCMP_SWAP(Node)) 4583 return; 4584 break; 4585 4586 case ISD::READ_REGISTER: 4587 case AArch64ISD::MRRS: 4588 if (tryReadRegister(Node)) 4589 return; 4590 break; 4591 4592 case ISD::WRITE_REGISTER: 4593 case AArch64ISD::MSRR: 4594 if (tryWriteRegister(Node)) 4595 return; 4596 break; 4597 4598 case ISD::LOAD: { 4599 // Try to select as an indexed load. Fall through to normal processing 4600 // if we can't. 4601 if (tryIndexedLoad(Node)) 4602 return; 4603 break; 4604 } 4605 4606 case ISD::SRL: 4607 case ISD::AND: 4608 case ISD::SRA: 4609 case ISD::SIGN_EXTEND_INREG: 4610 if (tryBitfieldExtractOp(Node)) 4611 return; 4612 if (tryBitfieldInsertInZeroOp(Node)) 4613 return; 4614 [[fallthrough]]; 4615 case ISD::ROTR: 4616 case ISD::SHL: 4617 if (tryShiftAmountMod(Node)) 4618 return; 4619 break; 4620 4621 case ISD::SIGN_EXTEND: 4622 if (tryBitfieldExtractOpFromSExt(Node)) 4623 return; 4624 break; 4625 4626 case ISD::OR: 4627 if (tryBitfieldInsertOp(Node)) 4628 return; 4629 if (trySelectXAR(Node)) 4630 return; 4631 break; 4632 4633 case ISD::EXTRACT_SUBVECTOR: { 4634 if (trySelectCastScalableToFixedLengthVector(Node)) 4635 return; 4636 break; 4637 } 4638 4639 case ISD::INSERT_SUBVECTOR: { 4640 if (trySelectCastFixedLengthToScalableVector(Node)) 4641 return; 4642 break; 4643 } 4644 4645 case ISD::Constant: { 4646 // Materialize zero constants as copies from WZR/XZR. This allows 4647 // the coalescer to propagate these into other instructions. 4648 ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node); 4649 if (ConstNode->isZero()) { 4650 if (VT == MVT::i32) { 4651 SDValue New = CurDAG->getCopyFromReg( 4652 CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32); 4653 ReplaceNode(Node, New.getNode()); 4654 return; 4655 } else if (VT == MVT::i64) { 4656 SDValue New = CurDAG->getCopyFromReg( 4657 CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64); 4658 ReplaceNode(Node, New.getNode()); 4659 return; 4660 } 4661 } 4662 break; 4663 } 4664 4665 case ISD::FrameIndex: { 4666 // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm. 4667 int FI = cast<FrameIndexSDNode>(Node)->getIndex(); 4668 unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0); 4669 const TargetLowering *TLI = getTargetLowering(); 4670 SDValue TFI = CurDAG->getTargetFrameIndex( 4671 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 4672 SDLoc DL(Node); 4673 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32), 4674 CurDAG->getTargetConstant(Shifter, DL, MVT::i32) }; 4675 CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops); 4676 return; 4677 } 4678 case ISD::INTRINSIC_W_CHAIN: { 4679 unsigned IntNo = Node->getConstantOperandVal(1); 4680 switch (IntNo) { 4681 default: 4682 break; 4683 case Intrinsic::aarch64_gcsss: { 4684 SDLoc DL(Node); 4685 SDValue Chain = Node->getOperand(0); 4686 SDValue Val = Node->getOperand(2); 4687 SDValue Zero = CurDAG->getCopyFromReg(Chain, DL, AArch64::XZR, MVT::i64); 4688 SDNode *SS1 = 4689 CurDAG->getMachineNode(AArch64::GCSSS1, DL, MVT::Other, Val, Chain); 4690 SDNode *SS2 = CurDAG->getMachineNode(AArch64::GCSSS2, DL, MVT::i64, 4691 MVT::Other, Zero, SDValue(SS1, 0)); 4692 ReplaceNode(Node, SS2); 4693 return; 4694 } 4695 case Intrinsic::aarch64_ldaxp: 4696 case Intrinsic::aarch64_ldxp: { 4697 unsigned Op = 4698 IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX; 4699 SDValue MemAddr = Node->getOperand(2); 4700 SDLoc DL(Node); 4701 SDValue Chain = Node->getOperand(0); 4702 4703 SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64, 4704 MVT::Other, MemAddr, Chain); 4705 4706 // Transfer memoperands. 4707 MachineMemOperand *MemOp = 4708 cast<MemIntrinsicSDNode>(Node)->getMemOperand(); 4709 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp}); 4710 ReplaceNode(Node, Ld); 4711 return; 4712 } 4713 case Intrinsic::aarch64_stlxp: 4714 case Intrinsic::aarch64_stxp: { 4715 unsigned Op = 4716 IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX; 4717 SDLoc DL(Node); 4718 SDValue Chain = Node->getOperand(0); 4719 SDValue ValLo = Node->getOperand(2); 4720 SDValue ValHi = Node->getOperand(3); 4721 SDValue MemAddr = Node->getOperand(4); 4722 4723 // Place arguments in the right order. 4724 SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain}; 4725 4726 SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops); 4727 // Transfer memoperands. 4728 MachineMemOperand *MemOp = 4729 cast<MemIntrinsicSDNode>(Node)->getMemOperand(); 4730 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 4731 4732 ReplaceNode(Node, St); 4733 return; 4734 } 4735 case Intrinsic::aarch64_neon_ld1x2: 4736 if (VT == MVT::v8i8) { 4737 SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0); 4738 return; 4739 } else if (VT == MVT::v16i8) { 4740 SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0); 4741 return; 4742 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4743 SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0); 4744 return; 4745 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4746 SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0); 4747 return; 4748 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4749 SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0); 4750 return; 4751 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4752 SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0); 4753 return; 4754 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4755 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0); 4756 return; 4757 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4758 SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0); 4759 return; 4760 } 4761 break; 4762 case Intrinsic::aarch64_neon_ld1x3: 4763 if (VT == MVT::v8i8) { 4764 SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0); 4765 return; 4766 } else if (VT == MVT::v16i8) { 4767 SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0); 4768 return; 4769 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4770 SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0); 4771 return; 4772 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4773 SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0); 4774 return; 4775 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4776 SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0); 4777 return; 4778 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4779 SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0); 4780 return; 4781 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4782 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0); 4783 return; 4784 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4785 SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0); 4786 return; 4787 } 4788 break; 4789 case Intrinsic::aarch64_neon_ld1x4: 4790 if (VT == MVT::v8i8) { 4791 SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0); 4792 return; 4793 } else if (VT == MVT::v16i8) { 4794 SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0); 4795 return; 4796 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4797 SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0); 4798 return; 4799 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4800 SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0); 4801 return; 4802 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4803 SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0); 4804 return; 4805 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4806 SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0); 4807 return; 4808 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4809 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0); 4810 return; 4811 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4812 SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0); 4813 return; 4814 } 4815 break; 4816 case Intrinsic::aarch64_neon_ld2: 4817 if (VT == MVT::v8i8) { 4818 SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0); 4819 return; 4820 } else if (VT == MVT::v16i8) { 4821 SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0); 4822 return; 4823 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4824 SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0); 4825 return; 4826 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4827 SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0); 4828 return; 4829 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4830 SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0); 4831 return; 4832 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4833 SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0); 4834 return; 4835 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4836 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0); 4837 return; 4838 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4839 SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0); 4840 return; 4841 } 4842 break; 4843 case Intrinsic::aarch64_neon_ld3: 4844 if (VT == MVT::v8i8) { 4845 SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0); 4846 return; 4847 } else if (VT == MVT::v16i8) { 4848 SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0); 4849 return; 4850 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4851 SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0); 4852 return; 4853 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4854 SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0); 4855 return; 4856 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4857 SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0); 4858 return; 4859 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4860 SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0); 4861 return; 4862 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4863 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0); 4864 return; 4865 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4866 SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0); 4867 return; 4868 } 4869 break; 4870 case Intrinsic::aarch64_neon_ld4: 4871 if (VT == MVT::v8i8) { 4872 SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0); 4873 return; 4874 } else if (VT == MVT::v16i8) { 4875 SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0); 4876 return; 4877 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4878 SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0); 4879 return; 4880 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4881 SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0); 4882 return; 4883 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4884 SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0); 4885 return; 4886 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4887 SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0); 4888 return; 4889 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4890 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0); 4891 return; 4892 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4893 SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0); 4894 return; 4895 } 4896 break; 4897 case Intrinsic::aarch64_neon_ld2r: 4898 if (VT == MVT::v8i8) { 4899 SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0); 4900 return; 4901 } else if (VT == MVT::v16i8) { 4902 SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0); 4903 return; 4904 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4905 SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0); 4906 return; 4907 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4908 SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0); 4909 return; 4910 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4911 SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0); 4912 return; 4913 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4914 SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0); 4915 return; 4916 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4917 SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0); 4918 return; 4919 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4920 SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0); 4921 return; 4922 } 4923 break; 4924 case Intrinsic::aarch64_neon_ld3r: 4925 if (VT == MVT::v8i8) { 4926 SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0); 4927 return; 4928 } else if (VT == MVT::v16i8) { 4929 SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0); 4930 return; 4931 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4932 SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0); 4933 return; 4934 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4935 SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0); 4936 return; 4937 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4938 SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0); 4939 return; 4940 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4941 SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0); 4942 return; 4943 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4944 SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0); 4945 return; 4946 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4947 SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0); 4948 return; 4949 } 4950 break; 4951 case Intrinsic::aarch64_neon_ld4r: 4952 if (VT == MVT::v8i8) { 4953 SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0); 4954 return; 4955 } else if (VT == MVT::v16i8) { 4956 SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0); 4957 return; 4958 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4959 SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0); 4960 return; 4961 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4962 SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0); 4963 return; 4964 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4965 SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0); 4966 return; 4967 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4968 SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0); 4969 return; 4970 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4971 SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0); 4972 return; 4973 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4974 SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0); 4975 return; 4976 } 4977 break; 4978 case Intrinsic::aarch64_neon_ld2lane: 4979 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4980 SelectLoadLane(Node, 2, AArch64::LD2i8); 4981 return; 4982 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4983 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4984 SelectLoadLane(Node, 2, AArch64::LD2i16); 4985 return; 4986 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4987 VT == MVT::v2f32) { 4988 SelectLoadLane(Node, 2, AArch64::LD2i32); 4989 return; 4990 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4991 VT == MVT::v1f64) { 4992 SelectLoadLane(Node, 2, AArch64::LD2i64); 4993 return; 4994 } 4995 break; 4996 case Intrinsic::aarch64_neon_ld3lane: 4997 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4998 SelectLoadLane(Node, 3, AArch64::LD3i8); 4999 return; 5000 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 5001 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 5002 SelectLoadLane(Node, 3, AArch64::LD3i16); 5003 return; 5004 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 5005 VT == MVT::v2f32) { 5006 SelectLoadLane(Node, 3, AArch64::LD3i32); 5007 return; 5008 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 5009 VT == MVT::v1f64) { 5010 SelectLoadLane(Node, 3, AArch64::LD3i64); 5011 return; 5012 } 5013 break; 5014 case Intrinsic::aarch64_neon_ld4lane: 5015 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 5016 SelectLoadLane(Node, 4, AArch64::LD4i8); 5017 return; 5018 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 5019 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 5020 SelectLoadLane(Node, 4, AArch64::LD4i16); 5021 return; 5022 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 5023 VT == MVT::v2f32) { 5024 SelectLoadLane(Node, 4, AArch64::LD4i32); 5025 return; 5026 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 5027 VT == MVT::v1f64) { 5028 SelectLoadLane(Node, 4, AArch64::LD4i64); 5029 return; 5030 } 5031 break; 5032 case Intrinsic::aarch64_ld64b: 5033 SelectLoad(Node, 8, AArch64::LD64B, AArch64::x8sub_0); 5034 return; 5035 case Intrinsic::aarch64_sve_ld2q_sret: { 5036 SelectPredicatedLoad(Node, 2, 4, AArch64::LD2Q_IMM, AArch64::LD2Q, true); 5037 return; 5038 } 5039 case Intrinsic::aarch64_sve_ld3q_sret: { 5040 SelectPredicatedLoad(Node, 3, 4, AArch64::LD3Q_IMM, AArch64::LD3Q, true); 5041 return; 5042 } 5043 case Intrinsic::aarch64_sve_ld4q_sret: { 5044 SelectPredicatedLoad(Node, 4, 4, AArch64::LD4Q_IMM, AArch64::LD4Q, true); 5045 return; 5046 } 5047 case Intrinsic::aarch64_sve_ld2_sret: { 5048 if (VT == MVT::nxv16i8) { 5049 SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B, 5050 true); 5051 return; 5052 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 5053 VT == MVT::nxv8bf16) { 5054 SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H, 5055 true); 5056 return; 5057 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 5058 SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W, 5059 true); 5060 return; 5061 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 5062 SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D, 5063 true); 5064 return; 5065 } 5066 break; 5067 } 5068 case Intrinsic::aarch64_sve_ld1_pn_x2: { 5069 if (VT == MVT::nxv16i8) { 5070 if (Subtarget->hasSME2()) 5071 SelectContiguousMultiVectorLoad( 5072 Node, 2, 0, AArch64::LD1B_2Z_IMM_PSEUDO, AArch64::LD1B_2Z_PSEUDO); 5073 else if (Subtarget->hasSVE2p1()) 5074 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LD1B_2Z_IMM, 5075 AArch64::LD1B_2Z); 5076 else 5077 break; 5078 return; 5079 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 5080 VT == MVT::nxv8bf16) { 5081 if (Subtarget->hasSME2()) 5082 SelectContiguousMultiVectorLoad( 5083 Node, 2, 1, AArch64::LD1H_2Z_IMM_PSEUDO, AArch64::LD1H_2Z_PSEUDO); 5084 else if (Subtarget->hasSVE2p1()) 5085 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LD1H_2Z_IMM, 5086 AArch64::LD1H_2Z); 5087 else 5088 break; 5089 return; 5090 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 5091 if (Subtarget->hasSME2()) 5092 SelectContiguousMultiVectorLoad( 5093 Node, 2, 2, AArch64::LD1W_2Z_IMM_PSEUDO, AArch64::LD1W_2Z_PSEUDO); 5094 else if (Subtarget->hasSVE2p1()) 5095 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LD1W_2Z_IMM, 5096 AArch64::LD1W_2Z); 5097 else 5098 break; 5099 return; 5100 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 5101 if (Subtarget->hasSME2()) 5102 SelectContiguousMultiVectorLoad( 5103 Node, 2, 3, AArch64::LD1D_2Z_IMM_PSEUDO, AArch64::LD1D_2Z_PSEUDO); 5104 else if (Subtarget->hasSVE2p1()) 5105 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LD1D_2Z_IMM, 5106 AArch64::LD1D_2Z); 5107 else 5108 break; 5109 return; 5110 } 5111 break; 5112 } 5113 case Intrinsic::aarch64_sve_ld1_pn_x4: { 5114 if (VT == MVT::nxv16i8) { 5115 if (Subtarget->hasSME2()) 5116 SelectContiguousMultiVectorLoad( 5117 Node, 4, 0, AArch64::LD1B_4Z_IMM_PSEUDO, AArch64::LD1B_4Z_PSEUDO); 5118 else if (Subtarget->hasSVE2p1()) 5119 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LD1B_4Z_IMM, 5120 AArch64::LD1B_4Z); 5121 else 5122 break; 5123 return; 5124 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 5125 VT == MVT::nxv8bf16) { 5126 if (Subtarget->hasSME2()) 5127 SelectContiguousMultiVectorLoad( 5128 Node, 4, 1, AArch64::LD1H_4Z_IMM_PSEUDO, AArch64::LD1H_4Z_PSEUDO); 5129 else if (Subtarget->hasSVE2p1()) 5130 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LD1H_4Z_IMM, 5131 AArch64::LD1H_4Z); 5132 else 5133 break; 5134 return; 5135 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 5136 if (Subtarget->hasSME2()) 5137 SelectContiguousMultiVectorLoad( 5138 Node, 4, 2, AArch64::LD1W_4Z_IMM_PSEUDO, AArch64::LD1W_4Z_PSEUDO); 5139 else if (Subtarget->hasSVE2p1()) 5140 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LD1W_4Z_IMM, 5141 AArch64::LD1W_4Z); 5142 else 5143 break; 5144 return; 5145 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 5146 if (Subtarget->hasSME2()) 5147 SelectContiguousMultiVectorLoad( 5148 Node, 4, 3, AArch64::LD1D_4Z_IMM_PSEUDO, AArch64::LD1D_4Z_PSEUDO); 5149 else if (Subtarget->hasSVE2p1()) 5150 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LD1D_4Z_IMM, 5151 AArch64::LD1D_4Z); 5152 else 5153 break; 5154 return; 5155 } 5156 break; 5157 } 5158 case Intrinsic::aarch64_sve_ldnt1_pn_x2: { 5159 if (VT == MVT::nxv16i8) { 5160 if (Subtarget->hasSME2()) 5161 SelectContiguousMultiVectorLoad(Node, 2, 0, 5162 AArch64::LDNT1B_2Z_IMM_PSEUDO, 5163 AArch64::LDNT1B_2Z_PSEUDO); 5164 else if (Subtarget->hasSVE2p1()) 5165 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LDNT1B_2Z_IMM, 5166 AArch64::LDNT1B_2Z); 5167 else 5168 break; 5169 return; 5170 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 5171 VT == MVT::nxv8bf16) { 5172 if (Subtarget->hasSME2()) 5173 SelectContiguousMultiVectorLoad(Node, 2, 1, 5174 AArch64::LDNT1H_2Z_IMM_PSEUDO, 5175 AArch64::LDNT1H_2Z_PSEUDO); 5176 else if (Subtarget->hasSVE2p1()) 5177 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LDNT1H_2Z_IMM, 5178 AArch64::LDNT1H_2Z); 5179 else 5180 break; 5181 return; 5182 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 5183 if (Subtarget->hasSME2()) 5184 SelectContiguousMultiVectorLoad(Node, 2, 2, 5185 AArch64::LDNT1W_2Z_IMM_PSEUDO, 5186 AArch64::LDNT1W_2Z_PSEUDO); 5187 else if (Subtarget->hasSVE2p1()) 5188 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LDNT1W_2Z_IMM, 5189 AArch64::LDNT1W_2Z); 5190 else 5191 break; 5192 return; 5193 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 5194 if (Subtarget->hasSME2()) 5195 SelectContiguousMultiVectorLoad(Node, 2, 3, 5196 AArch64::LDNT1D_2Z_IMM_PSEUDO, 5197 AArch64::LDNT1D_2Z_PSEUDO); 5198 else if (Subtarget->hasSVE2p1()) 5199 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LDNT1D_2Z_IMM, 5200 AArch64::LDNT1D_2Z); 5201 else 5202 break; 5203 return; 5204 } 5205 break; 5206 } 5207 case Intrinsic::aarch64_sve_ldnt1_pn_x4: { 5208 if (VT == MVT::nxv16i8) { 5209 if (Subtarget->hasSME2()) 5210 SelectContiguousMultiVectorLoad(Node, 4, 0, 5211 AArch64::LDNT1B_4Z_IMM_PSEUDO, 5212 AArch64::LDNT1B_4Z_PSEUDO); 5213 else if (Subtarget->hasSVE2p1()) 5214 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LDNT1B_4Z_IMM, 5215 AArch64::LDNT1B_4Z); 5216 else 5217 break; 5218 return; 5219 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 5220 VT == MVT::nxv8bf16) { 5221 if (Subtarget->hasSME2()) 5222 SelectContiguousMultiVectorLoad(Node, 4, 1, 5223 AArch64::LDNT1H_4Z_IMM_PSEUDO, 5224 AArch64::LDNT1H_4Z_PSEUDO); 5225 else if (Subtarget->hasSVE2p1()) 5226 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LDNT1H_4Z_IMM, 5227 AArch64::LDNT1H_4Z); 5228 else 5229 break; 5230 return; 5231 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 5232 if (Subtarget->hasSME2()) 5233 SelectContiguousMultiVectorLoad(Node, 4, 2, 5234 AArch64::LDNT1W_4Z_IMM_PSEUDO, 5235 AArch64::LDNT1W_4Z_PSEUDO); 5236 else if (Subtarget->hasSVE2p1()) 5237 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LDNT1W_4Z_IMM, 5238 AArch64::LDNT1W_4Z); 5239 else 5240 break; 5241 return; 5242 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 5243 if (Subtarget->hasSME2()) 5244 SelectContiguousMultiVectorLoad(Node, 4, 3, 5245 AArch64::LDNT1D_4Z_IMM_PSEUDO, 5246 AArch64::LDNT1D_4Z_PSEUDO); 5247 else if (Subtarget->hasSVE2p1()) 5248 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LDNT1D_4Z_IMM, 5249 AArch64::LDNT1D_4Z); 5250 else 5251 break; 5252 return; 5253 } 5254 break; 5255 } 5256 case Intrinsic::aarch64_sve_ld3_sret: { 5257 if (VT == MVT::nxv16i8) { 5258 SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B, 5259 true); 5260 return; 5261 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 5262 VT == MVT::nxv8bf16) { 5263 SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H, 5264 true); 5265 return; 5266 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 5267 SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W, 5268 true); 5269 return; 5270 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 5271 SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D, 5272 true); 5273 return; 5274 } 5275 break; 5276 } 5277 case Intrinsic::aarch64_sve_ld4_sret: { 5278 if (VT == MVT::nxv16i8) { 5279 SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B, 5280 true); 5281 return; 5282 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 5283 VT == MVT::nxv8bf16) { 5284 SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H, 5285 true); 5286 return; 5287 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 5288 SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W, 5289 true); 5290 return; 5291 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 5292 SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D, 5293 true); 5294 return; 5295 } 5296 break; 5297 } 5298 case Intrinsic::aarch64_sme_read_hor_vg2: { 5299 if (VT == MVT::nxv16i8) { 5300 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0, 5301 AArch64::MOVA_2ZMXI_H_B); 5302 return; 5303 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 5304 VT == MVT::nxv8bf16) { 5305 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0, 5306 AArch64::MOVA_2ZMXI_H_H); 5307 return; 5308 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 5309 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0, 5310 AArch64::MOVA_2ZMXI_H_S); 5311 return; 5312 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 5313 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0, 5314 AArch64::MOVA_2ZMXI_H_D); 5315 return; 5316 } 5317 break; 5318 } 5319 case Intrinsic::aarch64_sme_read_ver_vg2: { 5320 if (VT == MVT::nxv16i8) { 5321 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0, 5322 AArch64::MOVA_2ZMXI_V_B); 5323 return; 5324 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 5325 VT == MVT::nxv8bf16) { 5326 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0, 5327 AArch64::MOVA_2ZMXI_V_H); 5328 return; 5329 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 5330 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0, 5331 AArch64::MOVA_2ZMXI_V_S); 5332 return; 5333 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 5334 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0, 5335 AArch64::MOVA_2ZMXI_V_D); 5336 return; 5337 } 5338 break; 5339 } 5340 case Intrinsic::aarch64_sme_read_hor_vg4: { 5341 if (VT == MVT::nxv16i8) { 5342 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0, 5343 AArch64::MOVA_4ZMXI_H_B); 5344 return; 5345 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 5346 VT == MVT::nxv8bf16) { 5347 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0, 5348 AArch64::MOVA_4ZMXI_H_H); 5349 return; 5350 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 5351 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAS0, 5352 AArch64::MOVA_4ZMXI_H_S); 5353 return; 5354 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 5355 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAD0, 5356 AArch64::MOVA_4ZMXI_H_D); 5357 return; 5358 } 5359 break; 5360 } 5361 case Intrinsic::aarch64_sme_read_ver_vg4: { 5362 if (VT == MVT::nxv16i8) { 5363 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0, 5364 AArch64::MOVA_4ZMXI_V_B); 5365 return; 5366 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 5367 VT == MVT::nxv8bf16) { 5368 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0, 5369 AArch64::MOVA_4ZMXI_V_H); 5370 return; 5371 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 5372 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAS0, 5373 AArch64::MOVA_4ZMXI_V_S); 5374 return; 5375 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 5376 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAD0, 5377 AArch64::MOVA_4ZMXI_V_D); 5378 return; 5379 } 5380 break; 5381 } 5382 case Intrinsic::aarch64_sme_read_vg1x2: { 5383 SelectMultiVectorMove<7, 1>(Node, 2, AArch64::ZA, 5384 AArch64::MOVA_VG2_2ZMXI); 5385 return; 5386 } 5387 case Intrinsic::aarch64_sme_read_vg1x4: { 5388 SelectMultiVectorMove<7, 1>(Node, 4, AArch64::ZA, 5389 AArch64::MOVA_VG4_4ZMXI); 5390 return; 5391 } 5392 case Intrinsic::aarch64_sme_readz_horiz_x2: { 5393 if (VT == MVT::nxv16i8) { 5394 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_B_PSEUDO, 14, 2); 5395 return; 5396 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 5397 VT == MVT::nxv8bf16) { 5398 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_H_PSEUDO, 6, 2); 5399 return; 5400 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 5401 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_S_PSEUDO, 2, 2); 5402 return; 5403 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 5404 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_D_PSEUDO, 0, 2); 5405 return; 5406 } 5407 break; 5408 } 5409 case Intrinsic::aarch64_sme_readz_vert_x2: { 5410 if (VT == MVT::nxv16i8) { 5411 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_B_PSEUDO, 14, 2); 5412 return; 5413 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 5414 VT == MVT::nxv8bf16) { 5415 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_H_PSEUDO, 6, 2); 5416 return; 5417 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 5418 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_S_PSEUDO, 2, 2); 5419 return; 5420 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 5421 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_D_PSEUDO, 0, 2); 5422 return; 5423 } 5424 break; 5425 } 5426 case Intrinsic::aarch64_sme_readz_horiz_x4: { 5427 if (VT == MVT::nxv16i8) { 5428 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_B_PSEUDO, 12, 4); 5429 return; 5430 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 5431 VT == MVT::nxv8bf16) { 5432 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_H_PSEUDO, 4, 4); 5433 return; 5434 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 5435 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_S_PSEUDO, 0, 4); 5436 return; 5437 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 5438 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_D_PSEUDO, 0, 4); 5439 return; 5440 } 5441 break; 5442 } 5443 case Intrinsic::aarch64_sme_readz_vert_x4: { 5444 if (VT == MVT::nxv16i8) { 5445 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_B_PSEUDO, 12, 4); 5446 return; 5447 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 5448 VT == MVT::nxv8bf16) { 5449 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_H_PSEUDO, 4, 4); 5450 return; 5451 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 5452 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_S_PSEUDO, 0, 4); 5453 return; 5454 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 5455 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_D_PSEUDO, 0, 4); 5456 return; 5457 } 5458 break; 5459 } 5460 case Intrinsic::aarch64_sme_readz_x2: { 5461 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_VG2_2ZMXI_PSEUDO, 7, 1, 5462 AArch64::ZA); 5463 return; 5464 } 5465 case Intrinsic::aarch64_sme_readz_x4: { 5466 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_VG4_4ZMXI_PSEUDO, 7, 1, 5467 AArch64::ZA); 5468 return; 5469 } 5470 case Intrinsic::swift_async_context_addr: { 5471 SDLoc DL(Node); 5472 SDValue Chain = Node->getOperand(0); 5473 SDValue CopyFP = CurDAG->getCopyFromReg(Chain, DL, AArch64::FP, MVT::i64); 5474 SDValue Res = SDValue( 5475 CurDAG->getMachineNode(AArch64::SUBXri, DL, MVT::i64, CopyFP, 5476 CurDAG->getTargetConstant(8, DL, MVT::i32), 5477 CurDAG->getTargetConstant(0, DL, MVT::i32)), 5478 0); 5479 ReplaceUses(SDValue(Node, 0), Res); 5480 ReplaceUses(SDValue(Node, 1), CopyFP.getValue(1)); 5481 CurDAG->RemoveDeadNode(Node); 5482 5483 auto &MF = CurDAG->getMachineFunction(); 5484 MF.getFrameInfo().setFrameAddressIsTaken(true); 5485 MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true); 5486 return; 5487 } 5488 case Intrinsic::aarch64_sme_luti2_lane_zt_x4: { 5489 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>( 5490 Node->getValueType(0), 5491 {AArch64::LUTI2_4ZTZI_B, AArch64::LUTI2_4ZTZI_H, 5492 AArch64::LUTI2_4ZTZI_S})) 5493 // Second Immediate must be <= 3: 5494 SelectMultiVectorLuti(Node, 4, Opc, 3); 5495 return; 5496 } 5497 case Intrinsic::aarch64_sme_luti4_lane_zt_x4: { 5498 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>( 5499 Node->getValueType(0), 5500 {0, AArch64::LUTI4_4ZTZI_H, AArch64::LUTI4_4ZTZI_S})) 5501 // Second Immediate must be <= 1: 5502 SelectMultiVectorLuti(Node, 4, Opc, 1); 5503 return; 5504 } 5505 case Intrinsic::aarch64_sme_luti2_lane_zt_x2: { 5506 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>( 5507 Node->getValueType(0), 5508 {AArch64::LUTI2_2ZTZI_B, AArch64::LUTI2_2ZTZI_H, 5509 AArch64::LUTI2_2ZTZI_S})) 5510 // Second Immediate must be <= 7: 5511 SelectMultiVectorLuti(Node, 2, Opc, 7); 5512 return; 5513 } 5514 case Intrinsic::aarch64_sme_luti4_lane_zt_x2: { 5515 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>( 5516 Node->getValueType(0), 5517 {AArch64::LUTI4_2ZTZI_B, AArch64::LUTI4_2ZTZI_H, 5518 AArch64::LUTI4_2ZTZI_S})) 5519 // Second Immediate must be <= 3: 5520 SelectMultiVectorLuti(Node, 2, Opc, 3); 5521 return; 5522 } 5523 } 5524 } break; 5525 case ISD::INTRINSIC_WO_CHAIN: { 5526 unsigned IntNo = Node->getConstantOperandVal(0); 5527 switch (IntNo) { 5528 default: 5529 break; 5530 case Intrinsic::aarch64_tagp: 5531 SelectTagP(Node); 5532 return; 5533 5534 case Intrinsic::ptrauth_auth: 5535 SelectPtrauthAuth(Node); 5536 return; 5537 5538 case Intrinsic::ptrauth_resign: 5539 SelectPtrauthResign(Node); 5540 return; 5541 5542 case Intrinsic::aarch64_neon_tbl2: 5543 SelectTable(Node, 2, 5544 VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two, 5545 false); 5546 return; 5547 case Intrinsic::aarch64_neon_tbl3: 5548 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three 5549 : AArch64::TBLv16i8Three, 5550 false); 5551 return; 5552 case Intrinsic::aarch64_neon_tbl4: 5553 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four 5554 : AArch64::TBLv16i8Four, 5555 false); 5556 return; 5557 case Intrinsic::aarch64_neon_tbx2: 5558 SelectTable(Node, 2, 5559 VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two, 5560 true); 5561 return; 5562 case Intrinsic::aarch64_neon_tbx3: 5563 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three 5564 : AArch64::TBXv16i8Three, 5565 true); 5566 return; 5567 case Intrinsic::aarch64_neon_tbx4: 5568 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four 5569 : AArch64::TBXv16i8Four, 5570 true); 5571 return; 5572 case Intrinsic::aarch64_sve_srshl_single_x2: 5573 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5574 Node->getValueType(0), 5575 {AArch64::SRSHL_VG2_2ZZ_B, AArch64::SRSHL_VG2_2ZZ_H, 5576 AArch64::SRSHL_VG2_2ZZ_S, AArch64::SRSHL_VG2_2ZZ_D})) 5577 SelectDestructiveMultiIntrinsic(Node, 2, false, Op); 5578 return; 5579 case Intrinsic::aarch64_sve_srshl_single_x4: 5580 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5581 Node->getValueType(0), 5582 {AArch64::SRSHL_VG4_4ZZ_B, AArch64::SRSHL_VG4_4ZZ_H, 5583 AArch64::SRSHL_VG4_4ZZ_S, AArch64::SRSHL_VG4_4ZZ_D})) 5584 SelectDestructiveMultiIntrinsic(Node, 4, false, Op); 5585 return; 5586 case Intrinsic::aarch64_sve_urshl_single_x2: 5587 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5588 Node->getValueType(0), 5589 {AArch64::URSHL_VG2_2ZZ_B, AArch64::URSHL_VG2_2ZZ_H, 5590 AArch64::URSHL_VG2_2ZZ_S, AArch64::URSHL_VG2_2ZZ_D})) 5591 SelectDestructiveMultiIntrinsic(Node, 2, false, Op); 5592 return; 5593 case Intrinsic::aarch64_sve_urshl_single_x4: 5594 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5595 Node->getValueType(0), 5596 {AArch64::URSHL_VG4_4ZZ_B, AArch64::URSHL_VG4_4ZZ_H, 5597 AArch64::URSHL_VG4_4ZZ_S, AArch64::URSHL_VG4_4ZZ_D})) 5598 SelectDestructiveMultiIntrinsic(Node, 4, false, Op); 5599 return; 5600 case Intrinsic::aarch64_sve_srshl_x2: 5601 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5602 Node->getValueType(0), 5603 {AArch64::SRSHL_VG2_2Z2Z_B, AArch64::SRSHL_VG2_2Z2Z_H, 5604 AArch64::SRSHL_VG2_2Z2Z_S, AArch64::SRSHL_VG2_2Z2Z_D})) 5605 SelectDestructiveMultiIntrinsic(Node, 2, true, Op); 5606 return; 5607 case Intrinsic::aarch64_sve_srshl_x4: 5608 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5609 Node->getValueType(0), 5610 {AArch64::SRSHL_VG4_4Z4Z_B, AArch64::SRSHL_VG4_4Z4Z_H, 5611 AArch64::SRSHL_VG4_4Z4Z_S, AArch64::SRSHL_VG4_4Z4Z_D})) 5612 SelectDestructiveMultiIntrinsic(Node, 4, true, Op); 5613 return; 5614 case Intrinsic::aarch64_sve_urshl_x2: 5615 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5616 Node->getValueType(0), 5617 {AArch64::URSHL_VG2_2Z2Z_B, AArch64::URSHL_VG2_2Z2Z_H, 5618 AArch64::URSHL_VG2_2Z2Z_S, AArch64::URSHL_VG2_2Z2Z_D})) 5619 SelectDestructiveMultiIntrinsic(Node, 2, true, Op); 5620 return; 5621 case Intrinsic::aarch64_sve_urshl_x4: 5622 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5623 Node->getValueType(0), 5624 {AArch64::URSHL_VG4_4Z4Z_B, AArch64::URSHL_VG4_4Z4Z_H, 5625 AArch64::URSHL_VG4_4Z4Z_S, AArch64::URSHL_VG4_4Z4Z_D})) 5626 SelectDestructiveMultiIntrinsic(Node, 4, true, Op); 5627 return; 5628 case Intrinsic::aarch64_sve_sqdmulh_single_vgx2: 5629 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5630 Node->getValueType(0), 5631 {AArch64::SQDMULH_VG2_2ZZ_B, AArch64::SQDMULH_VG2_2ZZ_H, 5632 AArch64::SQDMULH_VG2_2ZZ_S, AArch64::SQDMULH_VG2_2ZZ_D})) 5633 SelectDestructiveMultiIntrinsic(Node, 2, false, Op); 5634 return; 5635 case Intrinsic::aarch64_sve_sqdmulh_single_vgx4: 5636 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5637 Node->getValueType(0), 5638 {AArch64::SQDMULH_VG4_4ZZ_B, AArch64::SQDMULH_VG4_4ZZ_H, 5639 AArch64::SQDMULH_VG4_4ZZ_S, AArch64::SQDMULH_VG4_4ZZ_D})) 5640 SelectDestructiveMultiIntrinsic(Node, 4, false, Op); 5641 return; 5642 case Intrinsic::aarch64_sve_sqdmulh_vgx2: 5643 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5644 Node->getValueType(0), 5645 {AArch64::SQDMULH_VG2_2Z2Z_B, AArch64::SQDMULH_VG2_2Z2Z_H, 5646 AArch64::SQDMULH_VG2_2Z2Z_S, AArch64::SQDMULH_VG2_2Z2Z_D})) 5647 SelectDestructiveMultiIntrinsic(Node, 2, true, Op); 5648 return; 5649 case Intrinsic::aarch64_sve_sqdmulh_vgx4: 5650 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5651 Node->getValueType(0), 5652 {AArch64::SQDMULH_VG4_4Z4Z_B, AArch64::SQDMULH_VG4_4Z4Z_H, 5653 AArch64::SQDMULH_VG4_4Z4Z_S, AArch64::SQDMULH_VG4_4Z4Z_D})) 5654 SelectDestructiveMultiIntrinsic(Node, 4, true, Op); 5655 return; 5656 case Intrinsic::aarch64_sve_whilege_x2: 5657 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>( 5658 Node->getValueType(0), 5659 {AArch64::WHILEGE_2PXX_B, AArch64::WHILEGE_2PXX_H, 5660 AArch64::WHILEGE_2PXX_S, AArch64::WHILEGE_2PXX_D})) 5661 SelectWhilePair(Node, Op); 5662 return; 5663 case Intrinsic::aarch64_sve_whilegt_x2: 5664 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>( 5665 Node->getValueType(0), 5666 {AArch64::WHILEGT_2PXX_B, AArch64::WHILEGT_2PXX_H, 5667 AArch64::WHILEGT_2PXX_S, AArch64::WHILEGT_2PXX_D})) 5668 SelectWhilePair(Node, Op); 5669 return; 5670 case Intrinsic::aarch64_sve_whilehi_x2: 5671 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>( 5672 Node->getValueType(0), 5673 {AArch64::WHILEHI_2PXX_B, AArch64::WHILEHI_2PXX_H, 5674 AArch64::WHILEHI_2PXX_S, AArch64::WHILEHI_2PXX_D})) 5675 SelectWhilePair(Node, Op); 5676 return; 5677 case Intrinsic::aarch64_sve_whilehs_x2: 5678 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>( 5679 Node->getValueType(0), 5680 {AArch64::WHILEHS_2PXX_B, AArch64::WHILEHS_2PXX_H, 5681 AArch64::WHILEHS_2PXX_S, AArch64::WHILEHS_2PXX_D})) 5682 SelectWhilePair(Node, Op); 5683 return; 5684 case Intrinsic::aarch64_sve_whilele_x2: 5685 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>( 5686 Node->getValueType(0), 5687 {AArch64::WHILELE_2PXX_B, AArch64::WHILELE_2PXX_H, 5688 AArch64::WHILELE_2PXX_S, AArch64::WHILELE_2PXX_D})) 5689 SelectWhilePair(Node, Op); 5690 return; 5691 case Intrinsic::aarch64_sve_whilelo_x2: 5692 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>( 5693 Node->getValueType(0), 5694 {AArch64::WHILELO_2PXX_B, AArch64::WHILELO_2PXX_H, 5695 AArch64::WHILELO_2PXX_S, AArch64::WHILELO_2PXX_D})) 5696 SelectWhilePair(Node, Op); 5697 return; 5698 case Intrinsic::aarch64_sve_whilels_x2: 5699 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>( 5700 Node->getValueType(0), 5701 {AArch64::WHILELS_2PXX_B, AArch64::WHILELS_2PXX_H, 5702 AArch64::WHILELS_2PXX_S, AArch64::WHILELS_2PXX_D})) 5703 SelectWhilePair(Node, Op); 5704 return; 5705 case Intrinsic::aarch64_sve_whilelt_x2: 5706 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>( 5707 Node->getValueType(0), 5708 {AArch64::WHILELT_2PXX_B, AArch64::WHILELT_2PXX_H, 5709 AArch64::WHILELT_2PXX_S, AArch64::WHILELT_2PXX_D})) 5710 SelectWhilePair(Node, Op); 5711 return; 5712 case Intrinsic::aarch64_sve_smax_single_x2: 5713 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5714 Node->getValueType(0), 5715 {AArch64::SMAX_VG2_2ZZ_B, AArch64::SMAX_VG2_2ZZ_H, 5716 AArch64::SMAX_VG2_2ZZ_S, AArch64::SMAX_VG2_2ZZ_D})) 5717 SelectDestructiveMultiIntrinsic(Node, 2, false, Op); 5718 return; 5719 case Intrinsic::aarch64_sve_umax_single_x2: 5720 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5721 Node->getValueType(0), 5722 {AArch64::UMAX_VG2_2ZZ_B, AArch64::UMAX_VG2_2ZZ_H, 5723 AArch64::UMAX_VG2_2ZZ_S, AArch64::UMAX_VG2_2ZZ_D})) 5724 SelectDestructiveMultiIntrinsic(Node, 2, false, Op); 5725 return; 5726 case Intrinsic::aarch64_sve_fmax_single_x2: 5727 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 5728 Node->getValueType(0), 5729 {AArch64::BFMAX_VG2_2ZZ_H, AArch64::FMAX_VG2_2ZZ_H, 5730 AArch64::FMAX_VG2_2ZZ_S, AArch64::FMAX_VG2_2ZZ_D})) 5731 SelectDestructiveMultiIntrinsic(Node, 2, false, Op); 5732 return; 5733 case Intrinsic::aarch64_sve_smax_single_x4: 5734 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5735 Node->getValueType(0), 5736 {AArch64::SMAX_VG4_4ZZ_B, AArch64::SMAX_VG4_4ZZ_H, 5737 AArch64::SMAX_VG4_4ZZ_S, AArch64::SMAX_VG4_4ZZ_D})) 5738 SelectDestructiveMultiIntrinsic(Node, 4, false, Op); 5739 return; 5740 case Intrinsic::aarch64_sve_umax_single_x4: 5741 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5742 Node->getValueType(0), 5743 {AArch64::UMAX_VG4_4ZZ_B, AArch64::UMAX_VG4_4ZZ_H, 5744 AArch64::UMAX_VG4_4ZZ_S, AArch64::UMAX_VG4_4ZZ_D})) 5745 SelectDestructiveMultiIntrinsic(Node, 4, false, Op); 5746 return; 5747 case Intrinsic::aarch64_sve_fmax_single_x4: 5748 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 5749 Node->getValueType(0), 5750 {AArch64::BFMAX_VG4_4ZZ_H, AArch64::FMAX_VG4_4ZZ_H, 5751 AArch64::FMAX_VG4_4ZZ_S, AArch64::FMAX_VG4_4ZZ_D})) 5752 SelectDestructiveMultiIntrinsic(Node, 4, false, Op); 5753 return; 5754 case Intrinsic::aarch64_sve_smin_single_x2: 5755 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5756 Node->getValueType(0), 5757 {AArch64::SMIN_VG2_2ZZ_B, AArch64::SMIN_VG2_2ZZ_H, 5758 AArch64::SMIN_VG2_2ZZ_S, AArch64::SMIN_VG2_2ZZ_D})) 5759 SelectDestructiveMultiIntrinsic(Node, 2, false, Op); 5760 return; 5761 case Intrinsic::aarch64_sve_umin_single_x2: 5762 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5763 Node->getValueType(0), 5764 {AArch64::UMIN_VG2_2ZZ_B, AArch64::UMIN_VG2_2ZZ_H, 5765 AArch64::UMIN_VG2_2ZZ_S, AArch64::UMIN_VG2_2ZZ_D})) 5766 SelectDestructiveMultiIntrinsic(Node, 2, false, Op); 5767 return; 5768 case Intrinsic::aarch64_sve_fmin_single_x2: 5769 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 5770 Node->getValueType(0), 5771 {AArch64::BFMIN_VG2_2ZZ_H, AArch64::FMIN_VG2_2ZZ_H, 5772 AArch64::FMIN_VG2_2ZZ_S, AArch64::FMIN_VG2_2ZZ_D})) 5773 SelectDestructiveMultiIntrinsic(Node, 2, false, Op); 5774 return; 5775 case Intrinsic::aarch64_sve_smin_single_x4: 5776 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5777 Node->getValueType(0), 5778 {AArch64::SMIN_VG4_4ZZ_B, AArch64::SMIN_VG4_4ZZ_H, 5779 AArch64::SMIN_VG4_4ZZ_S, AArch64::SMIN_VG4_4ZZ_D})) 5780 SelectDestructiveMultiIntrinsic(Node, 4, false, Op); 5781 return; 5782 case Intrinsic::aarch64_sve_umin_single_x4: 5783 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5784 Node->getValueType(0), 5785 {AArch64::UMIN_VG4_4ZZ_B, AArch64::UMIN_VG4_4ZZ_H, 5786 AArch64::UMIN_VG4_4ZZ_S, AArch64::UMIN_VG4_4ZZ_D})) 5787 SelectDestructiveMultiIntrinsic(Node, 4, false, Op); 5788 return; 5789 case Intrinsic::aarch64_sve_fmin_single_x4: 5790 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 5791 Node->getValueType(0), 5792 {AArch64::BFMIN_VG4_4ZZ_H, AArch64::FMIN_VG4_4ZZ_H, 5793 AArch64::FMIN_VG4_4ZZ_S, AArch64::FMIN_VG4_4ZZ_D})) 5794 SelectDestructiveMultiIntrinsic(Node, 4, false, Op); 5795 return; 5796 case Intrinsic::aarch64_sve_smax_x2: 5797 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5798 Node->getValueType(0), 5799 {AArch64::SMAX_VG2_2Z2Z_B, AArch64::SMAX_VG2_2Z2Z_H, 5800 AArch64::SMAX_VG2_2Z2Z_S, AArch64::SMAX_VG2_2Z2Z_D})) 5801 SelectDestructiveMultiIntrinsic(Node, 2, true, Op); 5802 return; 5803 case Intrinsic::aarch64_sve_umax_x2: 5804 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5805 Node->getValueType(0), 5806 {AArch64::UMAX_VG2_2Z2Z_B, AArch64::UMAX_VG2_2Z2Z_H, 5807 AArch64::UMAX_VG2_2Z2Z_S, AArch64::UMAX_VG2_2Z2Z_D})) 5808 SelectDestructiveMultiIntrinsic(Node, 2, true, Op); 5809 return; 5810 case Intrinsic::aarch64_sve_fmax_x2: 5811 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 5812 Node->getValueType(0), 5813 {AArch64::BFMAX_VG2_2Z2Z_H, AArch64::FMAX_VG2_2Z2Z_H, 5814 AArch64::FMAX_VG2_2Z2Z_S, AArch64::FMAX_VG2_2Z2Z_D})) 5815 SelectDestructiveMultiIntrinsic(Node, 2, true, Op); 5816 return; 5817 case Intrinsic::aarch64_sve_smax_x4: 5818 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5819 Node->getValueType(0), 5820 {AArch64::SMAX_VG4_4Z4Z_B, AArch64::SMAX_VG4_4Z4Z_H, 5821 AArch64::SMAX_VG4_4Z4Z_S, AArch64::SMAX_VG4_4Z4Z_D})) 5822 SelectDestructiveMultiIntrinsic(Node, 4, true, Op); 5823 return; 5824 case Intrinsic::aarch64_sve_umax_x4: 5825 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5826 Node->getValueType(0), 5827 {AArch64::UMAX_VG4_4Z4Z_B, AArch64::UMAX_VG4_4Z4Z_H, 5828 AArch64::UMAX_VG4_4Z4Z_S, AArch64::UMAX_VG4_4Z4Z_D})) 5829 SelectDestructiveMultiIntrinsic(Node, 4, true, Op); 5830 return; 5831 case Intrinsic::aarch64_sve_fmax_x4: 5832 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 5833 Node->getValueType(0), 5834 {AArch64::BFMAX_VG4_4Z2Z_H, AArch64::FMAX_VG4_4Z4Z_H, 5835 AArch64::FMAX_VG4_4Z4Z_S, AArch64::FMAX_VG4_4Z4Z_D})) 5836 SelectDestructiveMultiIntrinsic(Node, 4, true, Op); 5837 return; 5838 case Intrinsic::aarch64_sve_smin_x2: 5839 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5840 Node->getValueType(0), 5841 {AArch64::SMIN_VG2_2Z2Z_B, AArch64::SMIN_VG2_2Z2Z_H, 5842 AArch64::SMIN_VG2_2Z2Z_S, AArch64::SMIN_VG2_2Z2Z_D})) 5843 SelectDestructiveMultiIntrinsic(Node, 2, true, Op); 5844 return; 5845 case Intrinsic::aarch64_sve_umin_x2: 5846 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5847 Node->getValueType(0), 5848 {AArch64::UMIN_VG2_2Z2Z_B, AArch64::UMIN_VG2_2Z2Z_H, 5849 AArch64::UMIN_VG2_2Z2Z_S, AArch64::UMIN_VG2_2Z2Z_D})) 5850 SelectDestructiveMultiIntrinsic(Node, 2, true, Op); 5851 return; 5852 case Intrinsic::aarch64_sve_fmin_x2: 5853 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 5854 Node->getValueType(0), 5855 {AArch64::BFMIN_VG2_2Z2Z_H, AArch64::FMIN_VG2_2Z2Z_H, 5856 AArch64::FMIN_VG2_2Z2Z_S, AArch64::FMIN_VG2_2Z2Z_D})) 5857 SelectDestructiveMultiIntrinsic(Node, 2, true, Op); 5858 return; 5859 case Intrinsic::aarch64_sve_smin_x4: 5860 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5861 Node->getValueType(0), 5862 {AArch64::SMIN_VG4_4Z4Z_B, AArch64::SMIN_VG4_4Z4Z_H, 5863 AArch64::SMIN_VG4_4Z4Z_S, AArch64::SMIN_VG4_4Z4Z_D})) 5864 SelectDestructiveMultiIntrinsic(Node, 4, true, Op); 5865 return; 5866 case Intrinsic::aarch64_sve_umin_x4: 5867 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5868 Node->getValueType(0), 5869 {AArch64::UMIN_VG4_4Z4Z_B, AArch64::UMIN_VG4_4Z4Z_H, 5870 AArch64::UMIN_VG4_4Z4Z_S, AArch64::UMIN_VG4_4Z4Z_D})) 5871 SelectDestructiveMultiIntrinsic(Node, 4, true, Op); 5872 return; 5873 case Intrinsic::aarch64_sve_fmin_x4: 5874 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 5875 Node->getValueType(0), 5876 {AArch64::BFMIN_VG4_4Z2Z_H, AArch64::FMIN_VG4_4Z4Z_H, 5877 AArch64::FMIN_VG4_4Z4Z_S, AArch64::FMIN_VG4_4Z4Z_D})) 5878 SelectDestructiveMultiIntrinsic(Node, 4, true, Op); 5879 return; 5880 case Intrinsic::aarch64_sve_fmaxnm_single_x2 : 5881 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 5882 Node->getValueType(0), 5883 {AArch64::BFMAXNM_VG2_2ZZ_H, AArch64::FMAXNM_VG2_2ZZ_H, 5884 AArch64::FMAXNM_VG2_2ZZ_S, AArch64::FMAXNM_VG2_2ZZ_D})) 5885 SelectDestructiveMultiIntrinsic(Node, 2, false, Op); 5886 return; 5887 case Intrinsic::aarch64_sve_fmaxnm_single_x4 : 5888 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 5889 Node->getValueType(0), 5890 {AArch64::BFMAXNM_VG4_4ZZ_H, AArch64::FMAXNM_VG4_4ZZ_H, 5891 AArch64::FMAXNM_VG4_4ZZ_S, AArch64::FMAXNM_VG4_4ZZ_D})) 5892 SelectDestructiveMultiIntrinsic(Node, 4, false, Op); 5893 return; 5894 case Intrinsic::aarch64_sve_fminnm_single_x2: 5895 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 5896 Node->getValueType(0), 5897 {AArch64::BFMINNM_VG2_2ZZ_H, AArch64::FMINNM_VG2_2ZZ_H, 5898 AArch64::FMINNM_VG2_2ZZ_S, AArch64::FMINNM_VG2_2ZZ_D})) 5899 SelectDestructiveMultiIntrinsic(Node, 2, false, Op); 5900 return; 5901 case Intrinsic::aarch64_sve_fminnm_single_x4: 5902 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 5903 Node->getValueType(0), 5904 {AArch64::BFMINNM_VG4_4ZZ_H, AArch64::FMINNM_VG4_4ZZ_H, 5905 AArch64::FMINNM_VG4_4ZZ_S, AArch64::FMINNM_VG4_4ZZ_D})) 5906 SelectDestructiveMultiIntrinsic(Node, 4, false, Op); 5907 return; 5908 case Intrinsic::aarch64_sve_fmaxnm_x2: 5909 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 5910 Node->getValueType(0), 5911 {AArch64::BFMAXNM_VG2_2Z2Z_H, AArch64::FMAXNM_VG2_2Z2Z_H, 5912 AArch64::FMAXNM_VG2_2Z2Z_S, AArch64::FMAXNM_VG2_2Z2Z_D})) 5913 SelectDestructiveMultiIntrinsic(Node, 2, true, Op); 5914 return; 5915 case Intrinsic::aarch64_sve_fmaxnm_x4: 5916 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 5917 Node->getValueType(0), 5918 {AArch64::BFMAXNM_VG4_4Z2Z_H, AArch64::FMAXNM_VG4_4Z4Z_H, 5919 AArch64::FMAXNM_VG4_4Z4Z_S, AArch64::FMAXNM_VG4_4Z4Z_D})) 5920 SelectDestructiveMultiIntrinsic(Node, 4, true, Op); 5921 return; 5922 case Intrinsic::aarch64_sve_fminnm_x2: 5923 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 5924 Node->getValueType(0), 5925 {AArch64::BFMINNM_VG2_2Z2Z_H, AArch64::FMINNM_VG2_2Z2Z_H, 5926 AArch64::FMINNM_VG2_2Z2Z_S, AArch64::FMINNM_VG2_2Z2Z_D})) 5927 SelectDestructiveMultiIntrinsic(Node, 2, true, Op); 5928 return; 5929 case Intrinsic::aarch64_sve_fminnm_x4: 5930 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 5931 Node->getValueType(0), 5932 {AArch64::BFMINNM_VG4_4Z2Z_H, AArch64::FMINNM_VG4_4Z4Z_H, 5933 AArch64::FMINNM_VG4_4Z4Z_S, AArch64::FMINNM_VG4_4Z4Z_D})) 5934 SelectDestructiveMultiIntrinsic(Node, 4, true, Op); 5935 return; 5936 case Intrinsic::aarch64_sve_fcvtzs_x2: 5937 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZS_2Z2Z_StoS); 5938 return; 5939 case Intrinsic::aarch64_sve_scvtf_x2: 5940 SelectCVTIntrinsic(Node, 2, AArch64::SCVTF_2Z2Z_StoS); 5941 return; 5942 case Intrinsic::aarch64_sve_fcvtzu_x2: 5943 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZU_2Z2Z_StoS); 5944 return; 5945 case Intrinsic::aarch64_sve_ucvtf_x2: 5946 SelectCVTIntrinsic(Node, 2, AArch64::UCVTF_2Z2Z_StoS); 5947 return; 5948 case Intrinsic::aarch64_sve_fcvtzs_x4: 5949 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZS_4Z4Z_StoS); 5950 return; 5951 case Intrinsic::aarch64_sve_scvtf_x4: 5952 SelectCVTIntrinsic(Node, 4, AArch64::SCVTF_4Z4Z_StoS); 5953 return; 5954 case Intrinsic::aarch64_sve_fcvtzu_x4: 5955 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZU_4Z4Z_StoS); 5956 return; 5957 case Intrinsic::aarch64_sve_ucvtf_x4: 5958 SelectCVTIntrinsic(Node, 4, AArch64::UCVTF_4Z4Z_StoS); 5959 return; 5960 case Intrinsic::aarch64_sve_fcvt_widen_x2: 5961 SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVT_2ZZ_H_S); 5962 return; 5963 case Intrinsic::aarch64_sve_fcvtl_widen_x2: 5964 SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVTL_2ZZ_H_S); 5965 return; 5966 case Intrinsic::aarch64_sve_sclamp_single_x2: 5967 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5968 Node->getValueType(0), 5969 {AArch64::SCLAMP_VG2_2Z2Z_B, AArch64::SCLAMP_VG2_2Z2Z_H, 5970 AArch64::SCLAMP_VG2_2Z2Z_S, AArch64::SCLAMP_VG2_2Z2Z_D})) 5971 SelectClamp(Node, 2, Op); 5972 return; 5973 case Intrinsic::aarch64_sve_uclamp_single_x2: 5974 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5975 Node->getValueType(0), 5976 {AArch64::UCLAMP_VG2_2Z2Z_B, AArch64::UCLAMP_VG2_2Z2Z_H, 5977 AArch64::UCLAMP_VG2_2Z2Z_S, AArch64::UCLAMP_VG2_2Z2Z_D})) 5978 SelectClamp(Node, 2, Op); 5979 return; 5980 case Intrinsic::aarch64_sve_fclamp_single_x2: 5981 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 5982 Node->getValueType(0), 5983 {0, AArch64::FCLAMP_VG2_2Z2Z_H, AArch64::FCLAMP_VG2_2Z2Z_S, 5984 AArch64::FCLAMP_VG2_2Z2Z_D})) 5985 SelectClamp(Node, 2, Op); 5986 return; 5987 case Intrinsic::aarch64_sve_bfclamp_single_x2: 5988 SelectClamp(Node, 2, AArch64::BFCLAMP_VG2_2ZZZ_H); 5989 return; 5990 case Intrinsic::aarch64_sve_sclamp_single_x4: 5991 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5992 Node->getValueType(0), 5993 {AArch64::SCLAMP_VG4_4Z4Z_B, AArch64::SCLAMP_VG4_4Z4Z_H, 5994 AArch64::SCLAMP_VG4_4Z4Z_S, AArch64::SCLAMP_VG4_4Z4Z_D})) 5995 SelectClamp(Node, 4, Op); 5996 return; 5997 case Intrinsic::aarch64_sve_uclamp_single_x4: 5998 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5999 Node->getValueType(0), 6000 {AArch64::UCLAMP_VG4_4Z4Z_B, AArch64::UCLAMP_VG4_4Z4Z_H, 6001 AArch64::UCLAMP_VG4_4Z4Z_S, AArch64::UCLAMP_VG4_4Z4Z_D})) 6002 SelectClamp(Node, 4, Op); 6003 return; 6004 case Intrinsic::aarch64_sve_fclamp_single_x4: 6005 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 6006 Node->getValueType(0), 6007 {0, AArch64::FCLAMP_VG4_4Z4Z_H, AArch64::FCLAMP_VG4_4Z4Z_S, 6008 AArch64::FCLAMP_VG4_4Z4Z_D})) 6009 SelectClamp(Node, 4, Op); 6010 return; 6011 case Intrinsic::aarch64_sve_bfclamp_single_x4: 6012 SelectClamp(Node, 4, AArch64::BFCLAMP_VG4_4ZZZ_H); 6013 return; 6014 case Intrinsic::aarch64_sve_add_single_x2: 6015 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 6016 Node->getValueType(0), 6017 {AArch64::ADD_VG2_2ZZ_B, AArch64::ADD_VG2_2ZZ_H, 6018 AArch64::ADD_VG2_2ZZ_S, AArch64::ADD_VG2_2ZZ_D})) 6019 SelectDestructiveMultiIntrinsic(Node, 2, false, Op); 6020 return; 6021 case Intrinsic::aarch64_sve_add_single_x4: 6022 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 6023 Node->getValueType(0), 6024 {AArch64::ADD_VG4_4ZZ_B, AArch64::ADD_VG4_4ZZ_H, 6025 AArch64::ADD_VG4_4ZZ_S, AArch64::ADD_VG4_4ZZ_D})) 6026 SelectDestructiveMultiIntrinsic(Node, 4, false, Op); 6027 return; 6028 case Intrinsic::aarch64_sve_zip_x2: 6029 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>( 6030 Node->getValueType(0), 6031 {AArch64::ZIP_VG2_2ZZZ_B, AArch64::ZIP_VG2_2ZZZ_H, 6032 AArch64::ZIP_VG2_2ZZZ_S, AArch64::ZIP_VG2_2ZZZ_D})) 6033 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op); 6034 return; 6035 case Intrinsic::aarch64_sve_zipq_x2: 6036 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, 6037 AArch64::ZIP_VG2_2ZZZ_Q); 6038 return; 6039 case Intrinsic::aarch64_sve_zip_x4: 6040 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>( 6041 Node->getValueType(0), 6042 {AArch64::ZIP_VG4_4Z4Z_B, AArch64::ZIP_VG4_4Z4Z_H, 6043 AArch64::ZIP_VG4_4Z4Z_S, AArch64::ZIP_VG4_4Z4Z_D})) 6044 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op); 6045 return; 6046 case Intrinsic::aarch64_sve_zipq_x4: 6047 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, 6048 AArch64::ZIP_VG4_4Z4Z_Q); 6049 return; 6050 case Intrinsic::aarch64_sve_uzp_x2: 6051 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>( 6052 Node->getValueType(0), 6053 {AArch64::UZP_VG2_2ZZZ_B, AArch64::UZP_VG2_2ZZZ_H, 6054 AArch64::UZP_VG2_2ZZZ_S, AArch64::UZP_VG2_2ZZZ_D})) 6055 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op); 6056 return; 6057 case Intrinsic::aarch64_sve_uzpq_x2: 6058 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, 6059 AArch64::UZP_VG2_2ZZZ_Q); 6060 return; 6061 case Intrinsic::aarch64_sve_uzp_x4: 6062 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>( 6063 Node->getValueType(0), 6064 {AArch64::UZP_VG4_4Z4Z_B, AArch64::UZP_VG4_4Z4Z_H, 6065 AArch64::UZP_VG4_4Z4Z_S, AArch64::UZP_VG4_4Z4Z_D})) 6066 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op); 6067 return; 6068 case Intrinsic::aarch64_sve_uzpq_x4: 6069 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, 6070 AArch64::UZP_VG4_4Z4Z_Q); 6071 return; 6072 case Intrinsic::aarch64_sve_sel_x2: 6073 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>( 6074 Node->getValueType(0), 6075 {AArch64::SEL_VG2_2ZC2Z2Z_B, AArch64::SEL_VG2_2ZC2Z2Z_H, 6076 AArch64::SEL_VG2_2ZC2Z2Z_S, AArch64::SEL_VG2_2ZC2Z2Z_D})) 6077 SelectDestructiveMultiIntrinsic(Node, 2, true, Op, /*HasPred=*/true); 6078 return; 6079 case Intrinsic::aarch64_sve_sel_x4: 6080 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>( 6081 Node->getValueType(0), 6082 {AArch64::SEL_VG4_4ZC4Z4Z_B, AArch64::SEL_VG4_4ZC4Z4Z_H, 6083 AArch64::SEL_VG4_4ZC4Z4Z_S, AArch64::SEL_VG4_4ZC4Z4Z_D})) 6084 SelectDestructiveMultiIntrinsic(Node, 4, true, Op, /*HasPred=*/true); 6085 return; 6086 case Intrinsic::aarch64_sve_frinta_x2: 6087 SelectFrintFromVT(Node, 2, AArch64::FRINTA_2Z2Z_S); 6088 return; 6089 case Intrinsic::aarch64_sve_frinta_x4: 6090 SelectFrintFromVT(Node, 4, AArch64::FRINTA_4Z4Z_S); 6091 return; 6092 case Intrinsic::aarch64_sve_frintm_x2: 6093 SelectFrintFromVT(Node, 2, AArch64::FRINTM_2Z2Z_S); 6094 return; 6095 case Intrinsic::aarch64_sve_frintm_x4: 6096 SelectFrintFromVT(Node, 4, AArch64::FRINTM_4Z4Z_S); 6097 return; 6098 case Intrinsic::aarch64_sve_frintn_x2: 6099 SelectFrintFromVT(Node, 2, AArch64::FRINTN_2Z2Z_S); 6100 return; 6101 case Intrinsic::aarch64_sve_frintn_x4: 6102 SelectFrintFromVT(Node, 4, AArch64::FRINTN_4Z4Z_S); 6103 return; 6104 case Intrinsic::aarch64_sve_frintp_x2: 6105 SelectFrintFromVT(Node, 2, AArch64::FRINTP_2Z2Z_S); 6106 return; 6107 case Intrinsic::aarch64_sve_frintp_x4: 6108 SelectFrintFromVT(Node, 4, AArch64::FRINTP_4Z4Z_S); 6109 return; 6110 case Intrinsic::aarch64_sve_sunpk_x2: 6111 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 6112 Node->getValueType(0), 6113 {0, AArch64::SUNPK_VG2_2ZZ_H, AArch64::SUNPK_VG2_2ZZ_S, 6114 AArch64::SUNPK_VG2_2ZZ_D})) 6115 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op); 6116 return; 6117 case Intrinsic::aarch64_sve_uunpk_x2: 6118 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 6119 Node->getValueType(0), 6120 {0, AArch64::UUNPK_VG2_2ZZ_H, AArch64::UUNPK_VG2_2ZZ_S, 6121 AArch64::UUNPK_VG2_2ZZ_D})) 6122 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op); 6123 return; 6124 case Intrinsic::aarch64_sve_sunpk_x4: 6125 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 6126 Node->getValueType(0), 6127 {0, AArch64::SUNPK_VG4_4Z2Z_H, AArch64::SUNPK_VG4_4Z2Z_S, 6128 AArch64::SUNPK_VG4_4Z2Z_D})) 6129 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op); 6130 return; 6131 case Intrinsic::aarch64_sve_uunpk_x4: 6132 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 6133 Node->getValueType(0), 6134 {0, AArch64::UUNPK_VG4_4Z2Z_H, AArch64::UUNPK_VG4_4Z2Z_S, 6135 AArch64::UUNPK_VG4_4Z2Z_D})) 6136 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op); 6137 return; 6138 case Intrinsic::aarch64_sve_pext_x2: { 6139 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>( 6140 Node->getValueType(0), 6141 {AArch64::PEXT_2PCI_B, AArch64::PEXT_2PCI_H, AArch64::PEXT_2PCI_S, 6142 AArch64::PEXT_2PCI_D})) 6143 SelectPExtPair(Node, Op); 6144 return; 6145 } 6146 } 6147 break; 6148 } 6149 case ISD::INTRINSIC_VOID: { 6150 unsigned IntNo = Node->getConstantOperandVal(1); 6151 if (Node->getNumOperands() >= 3) 6152 VT = Node->getOperand(2)->getValueType(0); 6153 switch (IntNo) { 6154 default: 6155 break; 6156 case Intrinsic::aarch64_neon_st1x2: { 6157 if (VT == MVT::v8i8) { 6158 SelectStore(Node, 2, AArch64::ST1Twov8b); 6159 return; 6160 } else if (VT == MVT::v16i8) { 6161 SelectStore(Node, 2, AArch64::ST1Twov16b); 6162 return; 6163 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 6164 VT == MVT::v4bf16) { 6165 SelectStore(Node, 2, AArch64::ST1Twov4h); 6166 return; 6167 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 6168 VT == MVT::v8bf16) { 6169 SelectStore(Node, 2, AArch64::ST1Twov8h); 6170 return; 6171 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 6172 SelectStore(Node, 2, AArch64::ST1Twov2s); 6173 return; 6174 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 6175 SelectStore(Node, 2, AArch64::ST1Twov4s); 6176 return; 6177 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 6178 SelectStore(Node, 2, AArch64::ST1Twov2d); 6179 return; 6180 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 6181 SelectStore(Node, 2, AArch64::ST1Twov1d); 6182 return; 6183 } 6184 break; 6185 } 6186 case Intrinsic::aarch64_neon_st1x3: { 6187 if (VT == MVT::v8i8) { 6188 SelectStore(Node, 3, AArch64::ST1Threev8b); 6189 return; 6190 } else if (VT == MVT::v16i8) { 6191 SelectStore(Node, 3, AArch64::ST1Threev16b); 6192 return; 6193 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 6194 VT == MVT::v4bf16) { 6195 SelectStore(Node, 3, AArch64::ST1Threev4h); 6196 return; 6197 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 6198 VT == MVT::v8bf16) { 6199 SelectStore(Node, 3, AArch64::ST1Threev8h); 6200 return; 6201 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 6202 SelectStore(Node, 3, AArch64::ST1Threev2s); 6203 return; 6204 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 6205 SelectStore(Node, 3, AArch64::ST1Threev4s); 6206 return; 6207 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 6208 SelectStore(Node, 3, AArch64::ST1Threev2d); 6209 return; 6210 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 6211 SelectStore(Node, 3, AArch64::ST1Threev1d); 6212 return; 6213 } 6214 break; 6215 } 6216 case Intrinsic::aarch64_neon_st1x4: { 6217 if (VT == MVT::v8i8) { 6218 SelectStore(Node, 4, AArch64::ST1Fourv8b); 6219 return; 6220 } else if (VT == MVT::v16i8) { 6221 SelectStore(Node, 4, AArch64::ST1Fourv16b); 6222 return; 6223 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 6224 VT == MVT::v4bf16) { 6225 SelectStore(Node, 4, AArch64::ST1Fourv4h); 6226 return; 6227 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 6228 VT == MVT::v8bf16) { 6229 SelectStore(Node, 4, AArch64::ST1Fourv8h); 6230 return; 6231 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 6232 SelectStore(Node, 4, AArch64::ST1Fourv2s); 6233 return; 6234 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 6235 SelectStore(Node, 4, AArch64::ST1Fourv4s); 6236 return; 6237 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 6238 SelectStore(Node, 4, AArch64::ST1Fourv2d); 6239 return; 6240 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 6241 SelectStore(Node, 4, AArch64::ST1Fourv1d); 6242 return; 6243 } 6244 break; 6245 } 6246 case Intrinsic::aarch64_neon_st2: { 6247 if (VT == MVT::v8i8) { 6248 SelectStore(Node, 2, AArch64::ST2Twov8b); 6249 return; 6250 } else if (VT == MVT::v16i8) { 6251 SelectStore(Node, 2, AArch64::ST2Twov16b); 6252 return; 6253 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 6254 VT == MVT::v4bf16) { 6255 SelectStore(Node, 2, AArch64::ST2Twov4h); 6256 return; 6257 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 6258 VT == MVT::v8bf16) { 6259 SelectStore(Node, 2, AArch64::ST2Twov8h); 6260 return; 6261 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 6262 SelectStore(Node, 2, AArch64::ST2Twov2s); 6263 return; 6264 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 6265 SelectStore(Node, 2, AArch64::ST2Twov4s); 6266 return; 6267 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 6268 SelectStore(Node, 2, AArch64::ST2Twov2d); 6269 return; 6270 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 6271 SelectStore(Node, 2, AArch64::ST1Twov1d); 6272 return; 6273 } 6274 break; 6275 } 6276 case Intrinsic::aarch64_neon_st3: { 6277 if (VT == MVT::v8i8) { 6278 SelectStore(Node, 3, AArch64::ST3Threev8b); 6279 return; 6280 } else if (VT == MVT::v16i8) { 6281 SelectStore(Node, 3, AArch64::ST3Threev16b); 6282 return; 6283 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 6284 VT == MVT::v4bf16) { 6285 SelectStore(Node, 3, AArch64::ST3Threev4h); 6286 return; 6287 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 6288 VT == MVT::v8bf16) { 6289 SelectStore(Node, 3, AArch64::ST3Threev8h); 6290 return; 6291 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 6292 SelectStore(Node, 3, AArch64::ST3Threev2s); 6293 return; 6294 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 6295 SelectStore(Node, 3, AArch64::ST3Threev4s); 6296 return; 6297 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 6298 SelectStore(Node, 3, AArch64::ST3Threev2d); 6299 return; 6300 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 6301 SelectStore(Node, 3, AArch64::ST1Threev1d); 6302 return; 6303 } 6304 break; 6305 } 6306 case Intrinsic::aarch64_neon_st4: { 6307 if (VT == MVT::v8i8) { 6308 SelectStore(Node, 4, AArch64::ST4Fourv8b); 6309 return; 6310 } else if (VT == MVT::v16i8) { 6311 SelectStore(Node, 4, AArch64::ST4Fourv16b); 6312 return; 6313 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 6314 VT == MVT::v4bf16) { 6315 SelectStore(Node, 4, AArch64::ST4Fourv4h); 6316 return; 6317 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 6318 VT == MVT::v8bf16) { 6319 SelectStore(Node, 4, AArch64::ST4Fourv8h); 6320 return; 6321 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 6322 SelectStore(Node, 4, AArch64::ST4Fourv2s); 6323 return; 6324 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 6325 SelectStore(Node, 4, AArch64::ST4Fourv4s); 6326 return; 6327 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 6328 SelectStore(Node, 4, AArch64::ST4Fourv2d); 6329 return; 6330 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 6331 SelectStore(Node, 4, AArch64::ST1Fourv1d); 6332 return; 6333 } 6334 break; 6335 } 6336 case Intrinsic::aarch64_neon_st2lane: { 6337 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 6338 SelectStoreLane(Node, 2, AArch64::ST2i8); 6339 return; 6340 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 6341 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 6342 SelectStoreLane(Node, 2, AArch64::ST2i16); 6343 return; 6344 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 6345 VT == MVT::v2f32) { 6346 SelectStoreLane(Node, 2, AArch64::ST2i32); 6347 return; 6348 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 6349 VT == MVT::v1f64) { 6350 SelectStoreLane(Node, 2, AArch64::ST2i64); 6351 return; 6352 } 6353 break; 6354 } 6355 case Intrinsic::aarch64_neon_st3lane: { 6356 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 6357 SelectStoreLane(Node, 3, AArch64::ST3i8); 6358 return; 6359 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 6360 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 6361 SelectStoreLane(Node, 3, AArch64::ST3i16); 6362 return; 6363 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 6364 VT == MVT::v2f32) { 6365 SelectStoreLane(Node, 3, AArch64::ST3i32); 6366 return; 6367 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 6368 VT == MVT::v1f64) { 6369 SelectStoreLane(Node, 3, AArch64::ST3i64); 6370 return; 6371 } 6372 break; 6373 } 6374 case Intrinsic::aarch64_neon_st4lane: { 6375 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 6376 SelectStoreLane(Node, 4, AArch64::ST4i8); 6377 return; 6378 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 6379 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 6380 SelectStoreLane(Node, 4, AArch64::ST4i16); 6381 return; 6382 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 6383 VT == MVT::v2f32) { 6384 SelectStoreLane(Node, 4, AArch64::ST4i32); 6385 return; 6386 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 6387 VT == MVT::v1f64) { 6388 SelectStoreLane(Node, 4, AArch64::ST4i64); 6389 return; 6390 } 6391 break; 6392 } 6393 case Intrinsic::aarch64_sve_st2q: { 6394 SelectPredicatedStore(Node, 2, 4, AArch64::ST2Q, AArch64::ST2Q_IMM); 6395 return; 6396 } 6397 case Intrinsic::aarch64_sve_st3q: { 6398 SelectPredicatedStore(Node, 3, 4, AArch64::ST3Q, AArch64::ST3Q_IMM); 6399 return; 6400 } 6401 case Intrinsic::aarch64_sve_st4q: { 6402 SelectPredicatedStore(Node, 4, 4, AArch64::ST4Q, AArch64::ST4Q_IMM); 6403 return; 6404 } 6405 case Intrinsic::aarch64_sve_st2: { 6406 if (VT == MVT::nxv16i8) { 6407 SelectPredicatedStore(Node, 2, 0, AArch64::ST2B, AArch64::ST2B_IMM); 6408 return; 6409 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 6410 VT == MVT::nxv8bf16) { 6411 SelectPredicatedStore(Node, 2, 1, AArch64::ST2H, AArch64::ST2H_IMM); 6412 return; 6413 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 6414 SelectPredicatedStore(Node, 2, 2, AArch64::ST2W, AArch64::ST2W_IMM); 6415 return; 6416 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 6417 SelectPredicatedStore(Node, 2, 3, AArch64::ST2D, AArch64::ST2D_IMM); 6418 return; 6419 } 6420 break; 6421 } 6422 case Intrinsic::aarch64_sve_st3: { 6423 if (VT == MVT::nxv16i8) { 6424 SelectPredicatedStore(Node, 3, 0, AArch64::ST3B, AArch64::ST3B_IMM); 6425 return; 6426 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 6427 VT == MVT::nxv8bf16) { 6428 SelectPredicatedStore(Node, 3, 1, AArch64::ST3H, AArch64::ST3H_IMM); 6429 return; 6430 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 6431 SelectPredicatedStore(Node, 3, 2, AArch64::ST3W, AArch64::ST3W_IMM); 6432 return; 6433 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 6434 SelectPredicatedStore(Node, 3, 3, AArch64::ST3D, AArch64::ST3D_IMM); 6435 return; 6436 } 6437 break; 6438 } 6439 case Intrinsic::aarch64_sve_st4: { 6440 if (VT == MVT::nxv16i8) { 6441 SelectPredicatedStore(Node, 4, 0, AArch64::ST4B, AArch64::ST4B_IMM); 6442 return; 6443 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 6444 VT == MVT::nxv8bf16) { 6445 SelectPredicatedStore(Node, 4, 1, AArch64::ST4H, AArch64::ST4H_IMM); 6446 return; 6447 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 6448 SelectPredicatedStore(Node, 4, 2, AArch64::ST4W, AArch64::ST4W_IMM); 6449 return; 6450 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 6451 SelectPredicatedStore(Node, 4, 3, AArch64::ST4D, AArch64::ST4D_IMM); 6452 return; 6453 } 6454 break; 6455 } 6456 } 6457 break; 6458 } 6459 case AArch64ISD::LD2post: { 6460 if (VT == MVT::v8i8) { 6461 SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0); 6462 return; 6463 } else if (VT == MVT::v16i8) { 6464 SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0); 6465 return; 6466 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 6467 SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0); 6468 return; 6469 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 6470 SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0); 6471 return; 6472 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 6473 SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0); 6474 return; 6475 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 6476 SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0); 6477 return; 6478 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 6479 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0); 6480 return; 6481 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 6482 SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0); 6483 return; 6484 } 6485 break; 6486 } 6487 case AArch64ISD::LD3post: { 6488 if (VT == MVT::v8i8) { 6489 SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0); 6490 return; 6491 } else if (VT == MVT::v16i8) { 6492 SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0); 6493 return; 6494 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 6495 SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0); 6496 return; 6497 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 6498 SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0); 6499 return; 6500 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 6501 SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0); 6502 return; 6503 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 6504 SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0); 6505 return; 6506 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 6507 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0); 6508 return; 6509 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 6510 SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0); 6511 return; 6512 } 6513 break; 6514 } 6515 case AArch64ISD::LD4post: { 6516 if (VT == MVT::v8i8) { 6517 SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0); 6518 return; 6519 } else if (VT == MVT::v16i8) { 6520 SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0); 6521 return; 6522 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 6523 SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0); 6524 return; 6525 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 6526 SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0); 6527 return; 6528 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 6529 SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0); 6530 return; 6531 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 6532 SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0); 6533 return; 6534 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 6535 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0); 6536 return; 6537 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 6538 SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0); 6539 return; 6540 } 6541 break; 6542 } 6543 case AArch64ISD::LD1x2post: { 6544 if (VT == MVT::v8i8) { 6545 SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0); 6546 return; 6547 } else if (VT == MVT::v16i8) { 6548 SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0); 6549 return; 6550 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 6551 SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0); 6552 return; 6553 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 6554 SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0); 6555 return; 6556 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 6557 SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0); 6558 return; 6559 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 6560 SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0); 6561 return; 6562 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 6563 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0); 6564 return; 6565 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 6566 SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0); 6567 return; 6568 } 6569 break; 6570 } 6571 case AArch64ISD::LD1x3post: { 6572 if (VT == MVT::v8i8) { 6573 SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0); 6574 return; 6575 } else if (VT == MVT::v16i8) { 6576 SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0); 6577 return; 6578 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 6579 SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0); 6580 return; 6581 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 6582 SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0); 6583 return; 6584 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 6585 SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0); 6586 return; 6587 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 6588 SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0); 6589 return; 6590 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 6591 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0); 6592 return; 6593 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 6594 SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0); 6595 return; 6596 } 6597 break; 6598 } 6599 case AArch64ISD::LD1x4post: { 6600 if (VT == MVT::v8i8) { 6601 SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0); 6602 return; 6603 } else if (VT == MVT::v16i8) { 6604 SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0); 6605 return; 6606 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 6607 SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0); 6608 return; 6609 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 6610 SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0); 6611 return; 6612 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 6613 SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0); 6614 return; 6615 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 6616 SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0); 6617 return; 6618 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 6619 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0); 6620 return; 6621 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 6622 SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0); 6623 return; 6624 } 6625 break; 6626 } 6627 case AArch64ISD::LD1DUPpost: { 6628 if (VT == MVT::v8i8) { 6629 SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0); 6630 return; 6631 } else if (VT == MVT::v16i8) { 6632 SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0); 6633 return; 6634 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 6635 SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0); 6636 return; 6637 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 6638 SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0); 6639 return; 6640 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 6641 SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0); 6642 return; 6643 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 6644 SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0); 6645 return; 6646 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 6647 SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0); 6648 return; 6649 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 6650 SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0); 6651 return; 6652 } 6653 break; 6654 } 6655 case AArch64ISD::LD2DUPpost: { 6656 if (VT == MVT::v8i8) { 6657 SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0); 6658 return; 6659 } else if (VT == MVT::v16i8) { 6660 SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0); 6661 return; 6662 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 6663 SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0); 6664 return; 6665 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 6666 SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0); 6667 return; 6668 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 6669 SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0); 6670 return; 6671 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 6672 SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0); 6673 return; 6674 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 6675 SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0); 6676 return; 6677 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 6678 SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0); 6679 return; 6680 } 6681 break; 6682 } 6683 case AArch64ISD::LD3DUPpost: { 6684 if (VT == MVT::v8i8) { 6685 SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0); 6686 return; 6687 } else if (VT == MVT::v16i8) { 6688 SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0); 6689 return; 6690 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 6691 SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0); 6692 return; 6693 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 6694 SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0); 6695 return; 6696 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 6697 SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0); 6698 return; 6699 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 6700 SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0); 6701 return; 6702 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 6703 SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0); 6704 return; 6705 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 6706 SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0); 6707 return; 6708 } 6709 break; 6710 } 6711 case AArch64ISD::LD4DUPpost: { 6712 if (VT == MVT::v8i8) { 6713 SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0); 6714 return; 6715 } else if (VT == MVT::v16i8) { 6716 SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0); 6717 return; 6718 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 6719 SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0); 6720 return; 6721 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 6722 SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0); 6723 return; 6724 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 6725 SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0); 6726 return; 6727 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 6728 SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0); 6729 return; 6730 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 6731 SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0); 6732 return; 6733 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 6734 SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0); 6735 return; 6736 } 6737 break; 6738 } 6739 case AArch64ISD::LD1LANEpost: { 6740 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 6741 SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST); 6742 return; 6743 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 6744 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 6745 SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST); 6746 return; 6747 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 6748 VT == MVT::v2f32) { 6749 SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST); 6750 return; 6751 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 6752 VT == MVT::v1f64) { 6753 SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST); 6754 return; 6755 } 6756 break; 6757 } 6758 case AArch64ISD::LD2LANEpost: { 6759 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 6760 SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST); 6761 return; 6762 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 6763 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 6764 SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST); 6765 return; 6766 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 6767 VT == MVT::v2f32) { 6768 SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST); 6769 return; 6770 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 6771 VT == MVT::v1f64) { 6772 SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST); 6773 return; 6774 } 6775 break; 6776 } 6777 case AArch64ISD::LD3LANEpost: { 6778 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 6779 SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST); 6780 return; 6781 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 6782 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 6783 SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST); 6784 return; 6785 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 6786 VT == MVT::v2f32) { 6787 SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST); 6788 return; 6789 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 6790 VT == MVT::v1f64) { 6791 SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST); 6792 return; 6793 } 6794 break; 6795 } 6796 case AArch64ISD::LD4LANEpost: { 6797 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 6798 SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST); 6799 return; 6800 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 6801 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 6802 SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST); 6803 return; 6804 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 6805 VT == MVT::v2f32) { 6806 SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST); 6807 return; 6808 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 6809 VT == MVT::v1f64) { 6810 SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST); 6811 return; 6812 } 6813 break; 6814 } 6815 case AArch64ISD::ST2post: { 6816 VT = Node->getOperand(1).getValueType(); 6817 if (VT == MVT::v8i8) { 6818 SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST); 6819 return; 6820 } else if (VT == MVT::v16i8) { 6821 SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST); 6822 return; 6823 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 6824 SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST); 6825 return; 6826 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 6827 SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST); 6828 return; 6829 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 6830 SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST); 6831 return; 6832 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 6833 SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST); 6834 return; 6835 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 6836 SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST); 6837 return; 6838 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 6839 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST); 6840 return; 6841 } 6842 break; 6843 } 6844 case AArch64ISD::ST3post: { 6845 VT = Node->getOperand(1).getValueType(); 6846 if (VT == MVT::v8i8) { 6847 SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST); 6848 return; 6849 } else if (VT == MVT::v16i8) { 6850 SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST); 6851 return; 6852 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 6853 SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST); 6854 return; 6855 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 6856 SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST); 6857 return; 6858 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 6859 SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST); 6860 return; 6861 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 6862 SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST); 6863 return; 6864 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 6865 SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST); 6866 return; 6867 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 6868 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST); 6869 return; 6870 } 6871 break; 6872 } 6873 case AArch64ISD::ST4post: { 6874 VT = Node->getOperand(1).getValueType(); 6875 if (VT == MVT::v8i8) { 6876 SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST); 6877 return; 6878 } else if (VT == MVT::v16i8) { 6879 SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST); 6880 return; 6881 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 6882 SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST); 6883 return; 6884 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 6885 SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST); 6886 return; 6887 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 6888 SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST); 6889 return; 6890 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 6891 SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST); 6892 return; 6893 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 6894 SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST); 6895 return; 6896 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 6897 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST); 6898 return; 6899 } 6900 break; 6901 } 6902 case AArch64ISD::ST1x2post: { 6903 VT = Node->getOperand(1).getValueType(); 6904 if (VT == MVT::v8i8) { 6905 SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST); 6906 return; 6907 } else if (VT == MVT::v16i8) { 6908 SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST); 6909 return; 6910 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 6911 SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST); 6912 return; 6913 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 6914 SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST); 6915 return; 6916 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 6917 SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST); 6918 return; 6919 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 6920 SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST); 6921 return; 6922 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 6923 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST); 6924 return; 6925 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 6926 SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST); 6927 return; 6928 } 6929 break; 6930 } 6931 case AArch64ISD::ST1x3post: { 6932 VT = Node->getOperand(1).getValueType(); 6933 if (VT == MVT::v8i8) { 6934 SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST); 6935 return; 6936 } else if (VT == MVT::v16i8) { 6937 SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST); 6938 return; 6939 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 6940 SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST); 6941 return; 6942 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16 ) { 6943 SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST); 6944 return; 6945 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 6946 SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST); 6947 return; 6948 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 6949 SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST); 6950 return; 6951 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 6952 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST); 6953 return; 6954 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 6955 SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST); 6956 return; 6957 } 6958 break; 6959 } 6960 case AArch64ISD::ST1x4post: { 6961 VT = Node->getOperand(1).getValueType(); 6962 if (VT == MVT::v8i8) { 6963 SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST); 6964 return; 6965 } else if (VT == MVT::v16i8) { 6966 SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST); 6967 return; 6968 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 6969 SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST); 6970 return; 6971 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 6972 SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST); 6973 return; 6974 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 6975 SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST); 6976 return; 6977 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 6978 SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST); 6979 return; 6980 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 6981 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST); 6982 return; 6983 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 6984 SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST); 6985 return; 6986 } 6987 break; 6988 } 6989 case AArch64ISD::ST2LANEpost: { 6990 VT = Node->getOperand(1).getValueType(); 6991 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 6992 SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST); 6993 return; 6994 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 6995 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 6996 SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST); 6997 return; 6998 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 6999 VT == MVT::v2f32) { 7000 SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST); 7001 return; 7002 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 7003 VT == MVT::v1f64) { 7004 SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST); 7005 return; 7006 } 7007 break; 7008 } 7009 case AArch64ISD::ST3LANEpost: { 7010 VT = Node->getOperand(1).getValueType(); 7011 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 7012 SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST); 7013 return; 7014 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 7015 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 7016 SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST); 7017 return; 7018 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 7019 VT == MVT::v2f32) { 7020 SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST); 7021 return; 7022 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 7023 VT == MVT::v1f64) { 7024 SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST); 7025 return; 7026 } 7027 break; 7028 } 7029 case AArch64ISD::ST4LANEpost: { 7030 VT = Node->getOperand(1).getValueType(); 7031 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 7032 SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST); 7033 return; 7034 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 7035 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 7036 SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST); 7037 return; 7038 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 7039 VT == MVT::v2f32) { 7040 SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST); 7041 return; 7042 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 7043 VT == MVT::v1f64) { 7044 SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST); 7045 return; 7046 } 7047 break; 7048 } 7049 case AArch64ISD::SVE_LD2_MERGE_ZERO: { 7050 if (VT == MVT::nxv16i8) { 7051 SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B); 7052 return; 7053 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 7054 VT == MVT::nxv8bf16) { 7055 SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H); 7056 return; 7057 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 7058 SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W); 7059 return; 7060 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 7061 SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D); 7062 return; 7063 } 7064 break; 7065 } 7066 case AArch64ISD::SVE_LD3_MERGE_ZERO: { 7067 if (VT == MVT::nxv16i8) { 7068 SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B); 7069 return; 7070 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 7071 VT == MVT::nxv8bf16) { 7072 SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H); 7073 return; 7074 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 7075 SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W); 7076 return; 7077 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 7078 SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D); 7079 return; 7080 } 7081 break; 7082 } 7083 case AArch64ISD::SVE_LD4_MERGE_ZERO: { 7084 if (VT == MVT::nxv16i8) { 7085 SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B); 7086 return; 7087 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 7088 VT == MVT::nxv8bf16) { 7089 SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H); 7090 return; 7091 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 7092 SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W); 7093 return; 7094 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 7095 SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D); 7096 return; 7097 } 7098 break; 7099 } 7100 } 7101 7102 // Select the default instruction 7103 SelectCode(Node); 7104 } 7105 7106 /// createAArch64ISelDag - This pass converts a legalized DAG into a 7107 /// AArch64-specific DAG, ready for instruction scheduling. 7108 FunctionPass *llvm::createAArch64ISelDag(AArch64TargetMachine &TM, 7109 CodeGenOptLevel OptLevel) { 7110 return new AArch64DAGToDAGISelLegacy(TM, OptLevel); 7111 } 7112 7113 /// When \p PredVT is a scalable vector predicate in the form 7114 /// MVT::nx<M>xi1, it builds the correspondent scalable vector of 7115 /// integers MVT::nx<M>xi<bits> s.t. M x bits = 128. When targeting 7116 /// structured vectors (NumVec >1), the output data type is 7117 /// MVT::nx<M*NumVec>xi<bits> s.t. M x bits = 128. If the input 7118 /// PredVT is not in the form MVT::nx<M>xi1, it returns an invalid 7119 /// EVT. 7120 static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT, 7121 unsigned NumVec) { 7122 assert(NumVec > 0 && NumVec < 5 && "Invalid number of vectors."); 7123 if (!PredVT.isScalableVector() || PredVT.getVectorElementType() != MVT::i1) 7124 return EVT(); 7125 7126 if (PredVT != MVT::nxv16i1 && PredVT != MVT::nxv8i1 && 7127 PredVT != MVT::nxv4i1 && PredVT != MVT::nxv2i1) 7128 return EVT(); 7129 7130 ElementCount EC = PredVT.getVectorElementCount(); 7131 EVT ScalarVT = 7132 EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.getKnownMinValue()); 7133 EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, EC * NumVec); 7134 7135 return MemVT; 7136 } 7137 7138 /// Return the EVT of the data associated to a memory operation in \p 7139 /// Root. If such EVT cannot be retrived, it returns an invalid EVT. 7140 static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root) { 7141 if (isa<MemSDNode>(Root)) 7142 return cast<MemSDNode>(Root)->getMemoryVT(); 7143 7144 if (isa<MemIntrinsicSDNode>(Root)) 7145 return cast<MemIntrinsicSDNode>(Root)->getMemoryVT(); 7146 7147 const unsigned Opcode = Root->getOpcode(); 7148 // For custom ISD nodes, we have to look at them individually to extract the 7149 // type of the data moved to/from memory. 7150 switch (Opcode) { 7151 case AArch64ISD::LD1_MERGE_ZERO: 7152 case AArch64ISD::LD1S_MERGE_ZERO: 7153 case AArch64ISD::LDNF1_MERGE_ZERO: 7154 case AArch64ISD::LDNF1S_MERGE_ZERO: 7155 return cast<VTSDNode>(Root->getOperand(3))->getVT(); 7156 case AArch64ISD::ST1_PRED: 7157 return cast<VTSDNode>(Root->getOperand(4))->getVT(); 7158 case AArch64ISD::SVE_LD2_MERGE_ZERO: 7159 return getPackedVectorTypeFromPredicateType( 7160 Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/2); 7161 case AArch64ISD::SVE_LD3_MERGE_ZERO: 7162 return getPackedVectorTypeFromPredicateType( 7163 Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/3); 7164 case AArch64ISD::SVE_LD4_MERGE_ZERO: 7165 return getPackedVectorTypeFromPredicateType( 7166 Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/4); 7167 default: 7168 break; 7169 } 7170 7171 if (Opcode != ISD::INTRINSIC_VOID && Opcode != ISD::INTRINSIC_W_CHAIN) 7172 return EVT(); 7173 7174 switch (Root->getConstantOperandVal(1)) { 7175 default: 7176 return EVT(); 7177 case Intrinsic::aarch64_sme_ldr: 7178 case Intrinsic::aarch64_sme_str: 7179 return MVT::nxv16i8; 7180 case Intrinsic::aarch64_sve_prf: 7181 // We are using an SVE prefetch intrinsic. Type must be inferred from the 7182 // width of the predicate. 7183 return getPackedVectorTypeFromPredicateType( 7184 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/1); 7185 case Intrinsic::aarch64_sve_ld2_sret: 7186 case Intrinsic::aarch64_sve_ld2q_sret: 7187 return getPackedVectorTypeFromPredicateType( 7188 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/2); 7189 case Intrinsic::aarch64_sve_st2q: 7190 return getPackedVectorTypeFromPredicateType( 7191 Ctx, Root->getOperand(4)->getValueType(0), /*NumVec=*/2); 7192 case Intrinsic::aarch64_sve_ld3_sret: 7193 case Intrinsic::aarch64_sve_ld3q_sret: 7194 return getPackedVectorTypeFromPredicateType( 7195 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/3); 7196 case Intrinsic::aarch64_sve_st3q: 7197 return getPackedVectorTypeFromPredicateType( 7198 Ctx, Root->getOperand(5)->getValueType(0), /*NumVec=*/3); 7199 case Intrinsic::aarch64_sve_ld4_sret: 7200 case Intrinsic::aarch64_sve_ld4q_sret: 7201 return getPackedVectorTypeFromPredicateType( 7202 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/4); 7203 case Intrinsic::aarch64_sve_st4q: 7204 return getPackedVectorTypeFromPredicateType( 7205 Ctx, Root->getOperand(6)->getValueType(0), /*NumVec=*/4); 7206 case Intrinsic::aarch64_sve_ld1udq: 7207 case Intrinsic::aarch64_sve_st1dq: 7208 return EVT(MVT::nxv1i64); 7209 case Intrinsic::aarch64_sve_ld1uwq: 7210 case Intrinsic::aarch64_sve_st1wq: 7211 return EVT(MVT::nxv1i32); 7212 } 7213 } 7214 7215 /// SelectAddrModeIndexedSVE - Attempt selection of the addressing mode: 7216 /// Base + OffImm * sizeof(MemVT) for Min >= OffImm <= Max 7217 /// where Root is the memory access using N for its address. 7218 template <int64_t Min, int64_t Max> 7219 bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, 7220 SDValue &Base, 7221 SDValue &OffImm) { 7222 const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root); 7223 const DataLayout &DL = CurDAG->getDataLayout(); 7224 const MachineFrameInfo &MFI = MF->getFrameInfo(); 7225 7226 if (N.getOpcode() == ISD::FrameIndex) { 7227 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 7228 // We can only encode VL scaled offsets, so only fold in frame indexes 7229 // referencing SVE objects. 7230 if (MFI.getStackID(FI) == TargetStackID::ScalableVector) { 7231 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 7232 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64); 7233 return true; 7234 } 7235 7236 return false; 7237 } 7238 7239 if (MemVT == EVT()) 7240 return false; 7241 7242 if (N.getOpcode() != ISD::ADD) 7243 return false; 7244 7245 SDValue VScale = N.getOperand(1); 7246 if (VScale.getOpcode() != ISD::VSCALE) 7247 return false; 7248 7249 TypeSize TS = MemVT.getSizeInBits(); 7250 int64_t MemWidthBytes = static_cast<int64_t>(TS.getKnownMinValue()) / 8; 7251 int64_t MulImm = cast<ConstantSDNode>(VScale.getOperand(0))->getSExtValue(); 7252 7253 if ((MulImm % MemWidthBytes) != 0) 7254 return false; 7255 7256 int64_t Offset = MulImm / MemWidthBytes; 7257 if (Offset < Min || Offset > Max) 7258 return false; 7259 7260 Base = N.getOperand(0); 7261 if (Base.getOpcode() == ISD::FrameIndex) { 7262 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 7263 // We can only encode VL scaled offsets, so only fold in frame indexes 7264 // referencing SVE objects. 7265 if (MFI.getStackID(FI) == TargetStackID::ScalableVector) 7266 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 7267 } 7268 7269 OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64); 7270 return true; 7271 } 7272 7273 /// Select register plus register addressing mode for SVE, with scaled 7274 /// offset. 7275 bool AArch64DAGToDAGISel::SelectSVERegRegAddrMode(SDValue N, unsigned Scale, 7276 SDValue &Base, 7277 SDValue &Offset) { 7278 if (N.getOpcode() != ISD::ADD) 7279 return false; 7280 7281 // Process an ADD node. 7282 const SDValue LHS = N.getOperand(0); 7283 const SDValue RHS = N.getOperand(1); 7284 7285 // 8 bit data does not come with the SHL node, so it is treated 7286 // separately. 7287 if (Scale == 0) { 7288 Base = LHS; 7289 Offset = RHS; 7290 return true; 7291 } 7292 7293 if (auto C = dyn_cast<ConstantSDNode>(RHS)) { 7294 int64_t ImmOff = C->getSExtValue(); 7295 unsigned Size = 1 << Scale; 7296 7297 // To use the reg+reg addressing mode, the immediate must be a multiple of 7298 // the vector element's byte size. 7299 if (ImmOff % Size) 7300 return false; 7301 7302 SDLoc DL(N); 7303 Base = LHS; 7304 Offset = CurDAG->getTargetConstant(ImmOff >> Scale, DL, MVT::i64); 7305 SDValue Ops[] = {Offset}; 7306 SDNode *MI = CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops); 7307 Offset = SDValue(MI, 0); 7308 return true; 7309 } 7310 7311 // Check if the RHS is a shift node with a constant. 7312 if (RHS.getOpcode() != ISD::SHL) 7313 return false; 7314 7315 const SDValue ShiftRHS = RHS.getOperand(1); 7316 if (auto *C = dyn_cast<ConstantSDNode>(ShiftRHS)) 7317 if (C->getZExtValue() == Scale) { 7318 Base = LHS; 7319 Offset = RHS.getOperand(0); 7320 return true; 7321 } 7322 7323 return false; 7324 } 7325 7326 bool AArch64DAGToDAGISel::SelectAllActivePredicate(SDValue N) { 7327 const AArch64TargetLowering *TLI = 7328 static_cast<const AArch64TargetLowering *>(getTargetLowering()); 7329 7330 return TLI->isAllActivePredicate(*CurDAG, N); 7331 } 7332 7333 bool AArch64DAGToDAGISel::SelectAnyPredicate(SDValue N) { 7334 EVT VT = N.getValueType(); 7335 return VT.isScalableVector() && VT.getVectorElementType() == MVT::i1; 7336 } 7337 7338 bool AArch64DAGToDAGISel::SelectSMETileSlice(SDValue N, unsigned MaxSize, 7339 SDValue &Base, SDValue &Offset, 7340 unsigned Scale) { 7341 // Try to untangle an ADD node into a 'reg + offset' 7342 if (N.getOpcode() == ISD::ADD) 7343 if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 7344 int64_t ImmOff = C->getSExtValue(); 7345 if ((ImmOff > 0 && ImmOff <= MaxSize && (ImmOff % Scale == 0))) { 7346 Base = N.getOperand(0); 7347 Offset = CurDAG->getTargetConstant(ImmOff / Scale, SDLoc(N), MVT::i64); 7348 return true; 7349 } 7350 } 7351 7352 // By default, just match reg + 0. 7353 Base = N; 7354 Offset = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64); 7355 return true; 7356 } 7357