1 //===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines an instruction selector for the AArch64 target. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AArch64MachineFunctionInfo.h" 14 #include "AArch64TargetMachine.h" 15 #include "MCTargetDesc/AArch64AddressingModes.h" 16 #include "llvm/ADT/APSInt.h" 17 #include "llvm/CodeGen/ISDOpcodes.h" 18 #include "llvm/CodeGen/SelectionDAGISel.h" 19 #include "llvm/IR/Function.h" // To access function attributes. 20 #include "llvm/IR/GlobalValue.h" 21 #include "llvm/IR/Intrinsics.h" 22 #include "llvm/IR/IntrinsicsAArch64.h" 23 #include "llvm/Support/Debug.h" 24 #include "llvm/Support/ErrorHandling.h" 25 #include "llvm/Support/KnownBits.h" 26 #include "llvm/Support/MathExtras.h" 27 #include "llvm/Support/raw_ostream.h" 28 29 using namespace llvm; 30 31 #define DEBUG_TYPE "aarch64-isel" 32 #define PASS_NAME "AArch64 Instruction Selection" 33 34 //===--------------------------------------------------------------------===// 35 /// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine 36 /// instructions for SelectionDAG operations. 37 /// 38 namespace { 39 40 class AArch64DAGToDAGISel : public SelectionDAGISel { 41 42 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can 43 /// make the right decision when generating code for different targets. 44 const AArch64Subtarget *Subtarget; 45 46 public: 47 static char ID; 48 49 AArch64DAGToDAGISel() = delete; 50 51 explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm, 52 CodeGenOpt::Level OptLevel) 53 : SelectionDAGISel(ID, tm, OptLevel), Subtarget(nullptr) {} 54 55 bool runOnMachineFunction(MachineFunction &MF) override { 56 Subtarget = &MF.getSubtarget<AArch64Subtarget>(); 57 return SelectionDAGISel::runOnMachineFunction(MF); 58 } 59 60 void Select(SDNode *Node) override; 61 62 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 63 /// inline asm expressions. 64 bool SelectInlineAsmMemoryOperand(const SDValue &Op, 65 unsigned ConstraintID, 66 std::vector<SDValue> &OutOps) override; 67 68 template <signed Low, signed High, signed Scale> 69 bool SelectRDVLImm(SDValue N, SDValue &Imm); 70 71 bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift); 72 bool SelectArithUXTXRegister(SDValue N, SDValue &Reg, SDValue &Shift); 73 bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift); 74 bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift); 75 bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) { 76 return SelectShiftedRegister(N, false, Reg, Shift); 77 } 78 bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) { 79 return SelectShiftedRegister(N, true, Reg, Shift); 80 } 81 bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) { 82 return SelectAddrModeIndexed7S(N, 1, Base, OffImm); 83 } 84 bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) { 85 return SelectAddrModeIndexed7S(N, 2, Base, OffImm); 86 } 87 bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) { 88 return SelectAddrModeIndexed7S(N, 4, Base, OffImm); 89 } 90 bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) { 91 return SelectAddrModeIndexed7S(N, 8, Base, OffImm); 92 } 93 bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) { 94 return SelectAddrModeIndexed7S(N, 16, Base, OffImm); 95 } 96 bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) { 97 return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm); 98 } 99 bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) { 100 return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm); 101 } 102 bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) { 103 return SelectAddrModeIndexed(N, 1, Base, OffImm); 104 } 105 bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) { 106 return SelectAddrModeIndexed(N, 2, Base, OffImm); 107 } 108 bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) { 109 return SelectAddrModeIndexed(N, 4, Base, OffImm); 110 } 111 bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) { 112 return SelectAddrModeIndexed(N, 8, Base, OffImm); 113 } 114 bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) { 115 return SelectAddrModeIndexed(N, 16, Base, OffImm); 116 } 117 bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) { 118 return SelectAddrModeUnscaled(N, 1, Base, OffImm); 119 } 120 bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) { 121 return SelectAddrModeUnscaled(N, 2, Base, OffImm); 122 } 123 bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) { 124 return SelectAddrModeUnscaled(N, 4, Base, OffImm); 125 } 126 bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) { 127 return SelectAddrModeUnscaled(N, 8, Base, OffImm); 128 } 129 bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) { 130 return SelectAddrModeUnscaled(N, 16, Base, OffImm); 131 } 132 template <unsigned Size, unsigned Max> 133 bool SelectAddrModeIndexedUImm(SDValue N, SDValue &Base, SDValue &OffImm) { 134 // Test if there is an appropriate addressing mode and check if the 135 // immediate fits. 136 bool Found = SelectAddrModeIndexed(N, Size, Base, OffImm); 137 if (Found) { 138 if (auto *CI = dyn_cast<ConstantSDNode>(OffImm)) { 139 int64_t C = CI->getSExtValue(); 140 if (C <= Max) 141 return true; 142 } 143 } 144 145 // Otherwise, base only, materialize address in register. 146 Base = N; 147 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64); 148 return true; 149 } 150 151 template<int Width> 152 bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset, 153 SDValue &SignExtend, SDValue &DoShift) { 154 return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift); 155 } 156 157 template<int Width> 158 bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset, 159 SDValue &SignExtend, SDValue &DoShift) { 160 return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift); 161 } 162 163 bool SelectExtractHigh(SDValue N, SDValue &Res) { 164 if (Subtarget->isLittleEndian() && N->getOpcode() == ISD::BITCAST) 165 N = N->getOperand(0); 166 if (N->getOpcode() != ISD::EXTRACT_SUBVECTOR || 167 !isa<ConstantSDNode>(N->getOperand(1))) 168 return false; 169 EVT VT = N->getValueType(0); 170 EVT LVT = N->getOperand(0).getValueType(); 171 unsigned Index = N->getConstantOperandVal(1); 172 if (!VT.is64BitVector() || !LVT.is128BitVector() || 173 Index != VT.getVectorNumElements()) 174 return false; 175 Res = N->getOperand(0); 176 return true; 177 } 178 179 bool SelectRoundingVLShr(SDValue N, SDValue &Res1, SDValue &Res2) { 180 if (N.getOpcode() != AArch64ISD::VLSHR) 181 return false; 182 SDValue Op = N->getOperand(0); 183 EVT VT = Op.getValueType(); 184 unsigned ShtAmt = N->getConstantOperandVal(1); 185 if (ShtAmt > VT.getScalarSizeInBits() / 2 || Op.getOpcode() != ISD::ADD) 186 return false; 187 188 APInt Imm; 189 if (Op.getOperand(1).getOpcode() == AArch64ISD::MOVIshift) 190 Imm = APInt(VT.getScalarSizeInBits(), 191 Op.getOperand(1).getConstantOperandVal(0) 192 << Op.getOperand(1).getConstantOperandVal(1)); 193 else if (Op.getOperand(1).getOpcode() == AArch64ISD::DUP && 194 isa<ConstantSDNode>(Op.getOperand(1).getOperand(0))) 195 Imm = APInt(VT.getScalarSizeInBits(), 196 Op.getOperand(1).getConstantOperandVal(0)); 197 else 198 return false; 199 200 if (Imm != 1ULL << (ShtAmt - 1)) 201 return false; 202 203 Res1 = Op.getOperand(0); 204 Res2 = CurDAG->getTargetConstant(ShtAmt, SDLoc(N), MVT::i32); 205 return true; 206 } 207 208 bool SelectDupZeroOrUndef(SDValue N) { 209 switch(N->getOpcode()) { 210 case ISD::UNDEF: 211 return true; 212 case AArch64ISD::DUP: 213 case ISD::SPLAT_VECTOR: { 214 auto Opnd0 = N->getOperand(0); 215 if (isNullConstant(Opnd0)) 216 return true; 217 if (isNullFPConstant(Opnd0)) 218 return true; 219 break; 220 } 221 default: 222 break; 223 } 224 225 return false; 226 } 227 228 bool SelectDupZero(SDValue N) { 229 switch(N->getOpcode()) { 230 case AArch64ISD::DUP: 231 case ISD::SPLAT_VECTOR: { 232 auto Opnd0 = N->getOperand(0); 233 if (isNullConstant(Opnd0)) 234 return true; 235 if (isNullFPConstant(Opnd0)) 236 return true; 237 break; 238 } 239 } 240 241 return false; 242 } 243 244 bool SelectDupNegativeZero(SDValue N) { 245 switch(N->getOpcode()) { 246 case AArch64ISD::DUP: 247 case ISD::SPLAT_VECTOR: { 248 ConstantFPSDNode *Const = dyn_cast<ConstantFPSDNode>(N->getOperand(0)); 249 return Const && Const->isZero() && Const->isNegative(); 250 } 251 } 252 253 return false; 254 } 255 256 template<MVT::SimpleValueType VT> 257 bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) { 258 return SelectSVEAddSubImm(N, VT, Imm, Shift); 259 } 260 261 template <MVT::SimpleValueType VT> 262 bool SelectSVECpyDupImm(SDValue N, SDValue &Imm, SDValue &Shift) { 263 return SelectSVECpyDupImm(N, VT, Imm, Shift); 264 } 265 266 template <MVT::SimpleValueType VT, bool Invert = false> 267 bool SelectSVELogicalImm(SDValue N, SDValue &Imm) { 268 return SelectSVELogicalImm(N, VT, Imm, Invert); 269 } 270 271 template <MVT::SimpleValueType VT> 272 bool SelectSVEArithImm(SDValue N, SDValue &Imm) { 273 return SelectSVEArithImm(N, VT, Imm); 274 } 275 276 template <unsigned Low, unsigned High, bool AllowSaturation = false> 277 bool SelectSVEShiftImm(SDValue N, SDValue &Imm) { 278 return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm); 279 } 280 281 bool SelectSVEShiftSplatImmR(SDValue N, SDValue &Imm) { 282 if (N->getOpcode() != ISD::SPLAT_VECTOR) 283 return false; 284 285 EVT EltVT = N->getValueType(0).getVectorElementType(); 286 return SelectSVEShiftImm(N->getOperand(0), /* Low */ 1, 287 /* High */ EltVT.getFixedSizeInBits(), 288 /* AllowSaturation */ true, Imm); 289 } 290 291 // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N. 292 template<signed Min, signed Max, signed Scale, bool Shift> 293 bool SelectCntImm(SDValue N, SDValue &Imm) { 294 if (!isa<ConstantSDNode>(N)) 295 return false; 296 297 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue(); 298 if (Shift) 299 MulImm = 1LL << MulImm; 300 301 if ((MulImm % std::abs(Scale)) != 0) 302 return false; 303 304 MulImm /= Scale; 305 if ((MulImm >= Min) && (MulImm <= Max)) { 306 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32); 307 return true; 308 } 309 310 return false; 311 } 312 313 template <signed Max, signed Scale> 314 bool SelectEXTImm(SDValue N, SDValue &Imm) { 315 if (!isa<ConstantSDNode>(N)) 316 return false; 317 318 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue(); 319 320 if (MulImm >= 0 && MulImm <= Max) { 321 MulImm *= Scale; 322 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32); 323 return true; 324 } 325 326 return false; 327 } 328 329 template <unsigned BaseReg> bool ImmToTile(SDValue N, SDValue &Imm) { 330 if (auto *CI = dyn_cast<ConstantSDNode>(N)) { 331 uint64_t C = CI->getZExtValue(); 332 Imm = CurDAG->getRegister(BaseReg + C, MVT::Other); 333 return true; 334 } 335 return false; 336 } 337 338 /// Form sequences of consecutive 64/128-bit registers for use in NEON 339 /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have 340 /// between 1 and 4 elements. If it contains a single element that is returned 341 /// unchanged; otherwise a REG_SEQUENCE value is returned. 342 SDValue createDTuple(ArrayRef<SDValue> Vecs); 343 SDValue createQTuple(ArrayRef<SDValue> Vecs); 344 // Form a sequence of SVE registers for instructions using list of vectors, 345 // e.g. structured loads and stores (ldN, stN). 346 SDValue createZTuple(ArrayRef<SDValue> Vecs); 347 348 // Similar to above, except the register must start at a multiple of the 349 // tuple, e.g. z2 for a 2-tuple, or z8 for a 4-tuple. 350 SDValue createZMulTuple(ArrayRef<SDValue> Regs); 351 352 /// Generic helper for the createDTuple/createQTuple 353 /// functions. Those should almost always be called instead. 354 SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[], 355 const unsigned SubRegs[]); 356 357 void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt); 358 359 bool tryIndexedLoad(SDNode *N); 360 361 bool trySelectStackSlotTagP(SDNode *N); 362 void SelectTagP(SDNode *N); 363 364 void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc, 365 unsigned SubRegIdx); 366 void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc, 367 unsigned SubRegIdx); 368 void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc); 369 void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc); 370 void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale, 371 unsigned Opc_rr, unsigned Opc_ri, 372 bool IsIntr = false); 373 void SelectContiguousMultiVectorLoad(SDNode *N, unsigned NumVecs, 374 unsigned Scale, unsigned Opc_ri, 375 unsigned Opc_rr); 376 void SelectDestructiveMultiIntrinsic(SDNode *N, unsigned NumVecs, 377 bool IsZmMulti, unsigned Opcode, 378 bool HasPred = false); 379 void SelectPExtPair(SDNode *N, unsigned Opc); 380 void SelectWhilePair(SDNode *N, unsigned Opc); 381 void SelectCVTIntrinsic(SDNode *N, unsigned NumVecs, unsigned Opcode); 382 void SelectClamp(SDNode *N, unsigned NumVecs, unsigned Opcode); 383 void SelectUnaryMultiIntrinsic(SDNode *N, unsigned NumOutVecs, 384 bool IsTupleInput, unsigned Opc); 385 void SelectFrintFromVT(SDNode *N, unsigned NumVecs, unsigned Opcode); 386 387 template <unsigned MaxIdx, unsigned Scale> 388 void SelectMultiVectorMove(SDNode *N, unsigned NumVecs, unsigned BaseReg, 389 unsigned Op); 390 391 bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm); 392 /// SVE Reg+Imm addressing mode. 393 template <int64_t Min, int64_t Max> 394 bool SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base, 395 SDValue &OffImm); 396 /// SVE Reg+Reg address mode. 397 template <unsigned Scale> 398 bool SelectSVERegRegAddrMode(SDValue N, SDValue &Base, SDValue &Offset) { 399 return SelectSVERegRegAddrMode(N, Scale, Base, Offset); 400 } 401 402 template <unsigned MaxIdx, unsigned Scale> 403 bool SelectSMETileSlice(SDValue N, SDValue &Vector, SDValue &Offset) { 404 return SelectSMETileSlice(N, MaxIdx, Vector, Offset, Scale); 405 } 406 407 void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc); 408 void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc); 409 void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc); 410 void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc); 411 void SelectPredicatedStore(SDNode *N, unsigned NumVecs, unsigned Scale, 412 unsigned Opc_rr, unsigned Opc_ri); 413 std::tuple<unsigned, SDValue, SDValue> 414 findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri, 415 const SDValue &OldBase, const SDValue &OldOffset, 416 unsigned Scale); 417 418 bool tryBitfieldExtractOp(SDNode *N); 419 bool tryBitfieldExtractOpFromSExt(SDNode *N); 420 bool tryBitfieldInsertOp(SDNode *N); 421 bool tryBitfieldInsertInZeroOp(SDNode *N); 422 bool tryShiftAmountMod(SDNode *N); 423 424 bool tryReadRegister(SDNode *N); 425 bool tryWriteRegister(SDNode *N); 426 427 bool trySelectCastFixedLengthToScalableVector(SDNode *N); 428 bool trySelectCastScalableToFixedLengthVector(SDNode *N); 429 430 // Include the pieces autogenerated from the target description. 431 #include "AArch64GenDAGISel.inc" 432 433 private: 434 bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg, 435 SDValue &Shift); 436 bool SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg, SDValue &Shift); 437 bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base, 438 SDValue &OffImm) { 439 return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm); 440 } 441 bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW, 442 unsigned Size, SDValue &Base, 443 SDValue &OffImm); 444 bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base, 445 SDValue &OffImm); 446 bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base, 447 SDValue &OffImm); 448 bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base, 449 SDValue &Offset, SDValue &SignExtend, 450 SDValue &DoShift); 451 bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base, 452 SDValue &Offset, SDValue &SignExtend, 453 SDValue &DoShift); 454 bool isWorthFolding(SDValue V) const; 455 bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend, 456 SDValue &Offset, SDValue &SignExtend); 457 458 template<unsigned RegWidth> 459 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) { 460 return SelectCVTFixedPosOperand(N, FixedPos, RegWidth); 461 } 462 463 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width); 464 465 bool SelectCMP_SWAP(SDNode *N); 466 467 bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift); 468 bool SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift); 469 bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert); 470 471 bool SelectSVESignedArithImm(SDValue N, SDValue &Imm); 472 bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High, 473 bool AllowSaturation, SDValue &Imm); 474 475 bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm); 476 bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base, 477 SDValue &Offset); 478 bool SelectSMETileSlice(SDValue N, unsigned MaxSize, SDValue &Vector, 479 SDValue &Offset, unsigned Scale = 1); 480 481 bool SelectAllActivePredicate(SDValue N); 482 bool SelectAnyPredicate(SDValue N); 483 }; 484 } // end anonymous namespace 485 486 char AArch64DAGToDAGISel::ID = 0; 487 488 INITIALIZE_PASS(AArch64DAGToDAGISel, DEBUG_TYPE, PASS_NAME, false, false) 489 490 /// isIntImmediate - This method tests to see if the node is a constant 491 /// operand. If so Imm will receive the 32-bit value. 492 static bool isIntImmediate(const SDNode *N, uint64_t &Imm) { 493 if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) { 494 Imm = C->getZExtValue(); 495 return true; 496 } 497 return false; 498 } 499 500 // isIntImmediate - This method tests to see if a constant operand. 501 // If so Imm will receive the value. 502 static bool isIntImmediate(SDValue N, uint64_t &Imm) { 503 return isIntImmediate(N.getNode(), Imm); 504 } 505 506 // isOpcWithIntImmediate - This method tests to see if the node is a specific 507 // opcode and that it has a immediate integer right operand. 508 // If so Imm will receive the 32 bit value. 509 static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, 510 uint64_t &Imm) { 511 return N->getOpcode() == Opc && 512 isIntImmediate(N->getOperand(1).getNode(), Imm); 513 } 514 515 // isIntImmediateEq - This method tests to see if N is a constant operand that 516 // is equivalent to 'ImmExpected'. 517 #ifndef NDEBUG 518 static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected) { 519 uint64_t Imm; 520 if (!isIntImmediate(N.getNode(), Imm)) 521 return false; 522 return Imm == ImmExpected; 523 } 524 #endif 525 526 bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand( 527 const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) { 528 switch(ConstraintID) { 529 default: 530 llvm_unreachable("Unexpected asm memory constraint"); 531 case InlineAsm::Constraint_m: 532 case InlineAsm::Constraint_o: 533 case InlineAsm::Constraint_Q: 534 // We need to make sure that this one operand does not end up in XZR, thus 535 // require the address to be in a PointerRegClass register. 536 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo(); 537 const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF); 538 SDLoc dl(Op); 539 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64); 540 SDValue NewOp = 541 SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, 542 dl, Op.getValueType(), 543 Op, RC), 0); 544 OutOps.push_back(NewOp); 545 return false; 546 } 547 return true; 548 } 549 550 /// SelectArithImmed - Select an immediate value that can be represented as 551 /// a 12-bit value shifted left by either 0 or 12. If so, return true with 552 /// Val set to the 12-bit value and Shift set to the shifter operand. 553 bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val, 554 SDValue &Shift) { 555 // This function is called from the addsub_shifted_imm ComplexPattern, 556 // which lists [imm] as the list of opcode it's interested in, however 557 // we still need to check whether the operand is actually an immediate 558 // here because the ComplexPattern opcode list is only used in 559 // root-level opcode matching. 560 if (!isa<ConstantSDNode>(N.getNode())) 561 return false; 562 563 uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue(); 564 unsigned ShiftAmt; 565 566 if (Immed >> 12 == 0) { 567 ShiftAmt = 0; 568 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) { 569 ShiftAmt = 12; 570 Immed = Immed >> 12; 571 } else 572 return false; 573 574 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt); 575 SDLoc dl(N); 576 Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32); 577 Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32); 578 return true; 579 } 580 581 /// SelectNegArithImmed - As above, but negates the value before trying to 582 /// select it. 583 bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val, 584 SDValue &Shift) { 585 // This function is called from the addsub_shifted_imm ComplexPattern, 586 // which lists [imm] as the list of opcode it's interested in, however 587 // we still need to check whether the operand is actually an immediate 588 // here because the ComplexPattern opcode list is only used in 589 // root-level opcode matching. 590 if (!isa<ConstantSDNode>(N.getNode())) 591 return false; 592 593 // The immediate operand must be a 24-bit zero-extended immediate. 594 uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue(); 595 596 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0" 597 // have the opposite effect on the C flag, so this pattern mustn't match under 598 // those circumstances. 599 if (Immed == 0) 600 return false; 601 602 if (N.getValueType() == MVT::i32) 603 Immed = ~((uint32_t)Immed) + 1; 604 else 605 Immed = ~Immed + 1ULL; 606 if (Immed & 0xFFFFFFFFFF000000ULL) 607 return false; 608 609 Immed &= 0xFFFFFFULL; 610 return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val, 611 Shift); 612 } 613 614 /// getShiftTypeForNode - Translate a shift node to the corresponding 615 /// ShiftType value. 616 static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) { 617 switch (N.getOpcode()) { 618 default: 619 return AArch64_AM::InvalidShiftExtend; 620 case ISD::SHL: 621 return AArch64_AM::LSL; 622 case ISD::SRL: 623 return AArch64_AM::LSR; 624 case ISD::SRA: 625 return AArch64_AM::ASR; 626 case ISD::ROTR: 627 return AArch64_AM::ROR; 628 } 629 } 630 631 /// Determine whether it is worth it to fold SHL into the addressing 632 /// mode. 633 static bool isWorthFoldingSHL(SDValue V) { 634 assert(V.getOpcode() == ISD::SHL && "invalid opcode"); 635 // It is worth folding logical shift of up to three places. 636 auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1)); 637 if (!CSD) 638 return false; 639 unsigned ShiftVal = CSD->getZExtValue(); 640 if (ShiftVal > 3) 641 return false; 642 643 // Check if this particular node is reused in any non-memory related 644 // operation. If yes, do not try to fold this node into the address 645 // computation, since the computation will be kept. 646 const SDNode *Node = V.getNode(); 647 for (SDNode *UI : Node->uses()) 648 if (!isa<MemSDNode>(*UI)) 649 for (SDNode *UII : UI->uses()) 650 if (!isa<MemSDNode>(*UII)) 651 return false; 652 return true; 653 } 654 655 /// Determine whether it is worth to fold V into an extended register. 656 bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const { 657 // Trivial if we are optimizing for code size or if there is only 658 // one use of the value. 659 if (CurDAG->shouldOptForSize() || V.hasOneUse()) 660 return true; 661 // If a subtarget has a fastpath LSL we can fold a logical shift into 662 // the addressing mode and save a cycle. 663 if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::SHL && 664 isWorthFoldingSHL(V)) 665 return true; 666 if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::ADD) { 667 const SDValue LHS = V.getOperand(0); 668 const SDValue RHS = V.getOperand(1); 669 if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS)) 670 return true; 671 if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS)) 672 return true; 673 } 674 675 // It hurts otherwise, since the value will be reused. 676 return false; 677 } 678 679 /// and (shl/srl/sra, x, c), mask --> shl (srl/sra, x, c1), c2 680 /// to select more shifted register 681 bool AArch64DAGToDAGISel::SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg, 682 SDValue &Shift) { 683 EVT VT = N.getValueType(); 684 if (VT != MVT::i32 && VT != MVT::i64) 685 return false; 686 687 if (N->getOpcode() != ISD::AND || !N->hasOneUse()) 688 return false; 689 SDValue LHS = N.getOperand(0); 690 if (!LHS->hasOneUse()) 691 return false; 692 693 unsigned LHSOpcode = LHS->getOpcode(); 694 if (LHSOpcode != ISD::SHL && LHSOpcode != ISD::SRL && LHSOpcode != ISD::SRA) 695 return false; 696 697 ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(LHS.getOperand(1)); 698 if (!ShiftAmtNode) 699 return false; 700 701 uint64_t ShiftAmtC = ShiftAmtNode->getZExtValue(); 702 ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N.getOperand(1)); 703 if (!RHSC) 704 return false; 705 706 APInt AndMask = RHSC->getAPIntValue(); 707 unsigned LowZBits, MaskLen; 708 if (!AndMask.isShiftedMask(LowZBits, MaskLen)) 709 return false; 710 711 unsigned BitWidth = N.getValueSizeInBits(); 712 SDLoc DL(LHS); 713 uint64_t NewShiftC; 714 unsigned NewShiftOp; 715 if (LHSOpcode == ISD::SHL) { 716 // LowZBits <= ShiftAmtC will fall into isBitfieldPositioningOp 717 // BitWidth != LowZBits + MaskLen doesn't match the pattern 718 if (LowZBits <= ShiftAmtC || (BitWidth != LowZBits + MaskLen)) 719 return false; 720 721 NewShiftC = LowZBits - ShiftAmtC; 722 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri; 723 } else { 724 if (LowZBits == 0) 725 return false; 726 727 // NewShiftC >= BitWidth will fall into isBitfieldExtractOp 728 NewShiftC = LowZBits + ShiftAmtC; 729 if (NewShiftC >= BitWidth) 730 return false; 731 732 // SRA need all high bits 733 if (LHSOpcode == ISD::SRA && (BitWidth != (LowZBits + MaskLen))) 734 return false; 735 736 // SRL high bits can be 0 or 1 737 if (LHSOpcode == ISD::SRL && (BitWidth > (NewShiftC + MaskLen))) 738 return false; 739 740 if (LHSOpcode == ISD::SRL) 741 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri; 742 else 743 NewShiftOp = VT == MVT::i64 ? AArch64::SBFMXri : AArch64::SBFMWri; 744 } 745 746 assert(NewShiftC < BitWidth && "Invalid shift amount"); 747 SDValue NewShiftAmt = CurDAG->getTargetConstant(NewShiftC, DL, VT); 748 SDValue BitWidthMinus1 = CurDAG->getTargetConstant(BitWidth - 1, DL, VT); 749 Reg = SDValue(CurDAG->getMachineNode(NewShiftOp, DL, VT, LHS->getOperand(0), 750 NewShiftAmt, BitWidthMinus1), 751 0); 752 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, LowZBits); 753 Shift = CurDAG->getTargetConstant(ShVal, DL, MVT::i32); 754 return true; 755 } 756 757 /// SelectShiftedRegister - Select a "shifted register" operand. If the value 758 /// is not shifted, set the Shift operand to default of "LSL 0". The logical 759 /// instructions allow the shifted register to be rotated, but the arithmetic 760 /// instructions do not. The AllowROR parameter specifies whether ROR is 761 /// supported. 762 bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR, 763 SDValue &Reg, SDValue &Shift) { 764 if (SelectShiftedRegisterFromAnd(N, Reg, Shift)) 765 return true; 766 767 AArch64_AM::ShiftExtendType ShType = getShiftTypeForNode(N); 768 if (ShType == AArch64_AM::InvalidShiftExtend) 769 return false; 770 if (!AllowROR && ShType == AArch64_AM::ROR) 771 return false; 772 773 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 774 unsigned BitSize = N.getValueSizeInBits(); 775 unsigned Val = RHS->getZExtValue() & (BitSize - 1); 776 unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val); 777 778 Reg = N.getOperand(0); 779 Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32); 780 return isWorthFolding(N); 781 } 782 783 return false; 784 } 785 786 /// getExtendTypeForNode - Translate an extend node to the corresponding 787 /// ExtendType value. 788 static AArch64_AM::ShiftExtendType 789 getExtendTypeForNode(SDValue N, bool IsLoadStore = false) { 790 if (N.getOpcode() == ISD::SIGN_EXTEND || 791 N.getOpcode() == ISD::SIGN_EXTEND_INREG) { 792 EVT SrcVT; 793 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG) 794 SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT(); 795 else 796 SrcVT = N.getOperand(0).getValueType(); 797 798 if (!IsLoadStore && SrcVT == MVT::i8) 799 return AArch64_AM::SXTB; 800 else if (!IsLoadStore && SrcVT == MVT::i16) 801 return AArch64_AM::SXTH; 802 else if (SrcVT == MVT::i32) 803 return AArch64_AM::SXTW; 804 assert(SrcVT != MVT::i64 && "extend from 64-bits?"); 805 806 return AArch64_AM::InvalidShiftExtend; 807 } else if (N.getOpcode() == ISD::ZERO_EXTEND || 808 N.getOpcode() == ISD::ANY_EXTEND) { 809 EVT SrcVT = N.getOperand(0).getValueType(); 810 if (!IsLoadStore && SrcVT == MVT::i8) 811 return AArch64_AM::UXTB; 812 else if (!IsLoadStore && SrcVT == MVT::i16) 813 return AArch64_AM::UXTH; 814 else if (SrcVT == MVT::i32) 815 return AArch64_AM::UXTW; 816 assert(SrcVT != MVT::i64 && "extend from 64-bits?"); 817 818 return AArch64_AM::InvalidShiftExtend; 819 } else if (N.getOpcode() == ISD::AND) { 820 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); 821 if (!CSD) 822 return AArch64_AM::InvalidShiftExtend; 823 uint64_t AndMask = CSD->getZExtValue(); 824 825 switch (AndMask) { 826 default: 827 return AArch64_AM::InvalidShiftExtend; 828 case 0xFF: 829 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend; 830 case 0xFFFF: 831 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend; 832 case 0xFFFFFFFF: 833 return AArch64_AM::UXTW; 834 } 835 } 836 837 return AArch64_AM::InvalidShiftExtend; 838 } 839 840 /// Instructions that accept extend modifiers like UXTW expect the register 841 /// being extended to be a GPR32, but the incoming DAG might be acting on a 842 /// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if 843 /// this is the case. 844 static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N) { 845 if (N.getValueType() == MVT::i32) 846 return N; 847 848 SDLoc dl(N); 849 return CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl, MVT::i32, N); 850 } 851 852 // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N. 853 template<signed Low, signed High, signed Scale> 854 bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) { 855 if (!isa<ConstantSDNode>(N)) 856 return false; 857 858 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue(); 859 if ((MulImm % std::abs(Scale)) == 0) { 860 int64_t RDVLImm = MulImm / Scale; 861 if ((RDVLImm >= Low) && (RDVLImm <= High)) { 862 Imm = CurDAG->getTargetConstant(RDVLImm, SDLoc(N), MVT::i32); 863 return true; 864 } 865 } 866 867 return false; 868 } 869 870 /// SelectArithExtendedRegister - Select a "extended register" operand. This 871 /// operand folds in an extend followed by an optional left shift. 872 bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg, 873 SDValue &Shift) { 874 unsigned ShiftVal = 0; 875 AArch64_AM::ShiftExtendType Ext; 876 877 if (N.getOpcode() == ISD::SHL) { 878 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); 879 if (!CSD) 880 return false; 881 ShiftVal = CSD->getZExtValue(); 882 if (ShiftVal > 4) 883 return false; 884 885 Ext = getExtendTypeForNode(N.getOperand(0)); 886 if (Ext == AArch64_AM::InvalidShiftExtend) 887 return false; 888 889 Reg = N.getOperand(0).getOperand(0); 890 } else { 891 Ext = getExtendTypeForNode(N); 892 if (Ext == AArch64_AM::InvalidShiftExtend) 893 return false; 894 895 Reg = N.getOperand(0); 896 897 // Don't match if free 32-bit -> 64-bit zext can be used instead. Use the 898 // isDef32 as a heuristic for when the operand is likely to be a 32bit def. 899 auto isDef32 = [](SDValue N) { 900 unsigned Opc = N.getOpcode(); 901 return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG && 902 Opc != ISD::CopyFromReg && Opc != ISD::AssertSext && 903 Opc != ISD::AssertZext && Opc != ISD::AssertAlign && 904 Opc != ISD::FREEZE; 905 }; 906 if (Ext == AArch64_AM::UXTW && Reg->getValueType(0).getSizeInBits() == 32 && 907 isDef32(Reg)) 908 return false; 909 } 910 911 // AArch64 mandates that the RHS of the operation must use the smallest 912 // register class that could contain the size being extended from. Thus, 913 // if we're folding a (sext i8), we need the RHS to be a GPR32, even though 914 // there might not be an actual 32-bit value in the program. We can 915 // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here. 916 assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX); 917 Reg = narrowIfNeeded(CurDAG, Reg); 918 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N), 919 MVT::i32); 920 return isWorthFolding(N); 921 } 922 923 /// SelectArithUXTXRegister - Select a "UXTX register" operand. This 924 /// operand is refered by the instructions have SP operand 925 bool AArch64DAGToDAGISel::SelectArithUXTXRegister(SDValue N, SDValue &Reg, 926 SDValue &Shift) { 927 unsigned ShiftVal = 0; 928 AArch64_AM::ShiftExtendType Ext; 929 930 if (N.getOpcode() != ISD::SHL) 931 return false; 932 933 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); 934 if (!CSD) 935 return false; 936 ShiftVal = CSD->getZExtValue(); 937 if (ShiftVal > 4) 938 return false; 939 940 Ext = AArch64_AM::UXTX; 941 Reg = N.getOperand(0); 942 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N), 943 MVT::i32); 944 return isWorthFolding(N); 945 } 946 947 /// If there's a use of this ADDlow that's not itself a load/store then we'll 948 /// need to create a real ADD instruction from it anyway and there's no point in 949 /// folding it into the mem op. Theoretically, it shouldn't matter, but there's 950 /// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding 951 /// leads to duplicated ADRP instructions. 952 static bool isWorthFoldingADDlow(SDValue N) { 953 for (auto *Use : N->uses()) { 954 if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE && 955 Use->getOpcode() != ISD::ATOMIC_LOAD && 956 Use->getOpcode() != ISD::ATOMIC_STORE) 957 return false; 958 959 // ldar and stlr have much more restrictive addressing modes (just a 960 // register). 961 if (isStrongerThanMonotonic(cast<MemSDNode>(Use)->getSuccessOrdering())) 962 return false; 963 } 964 965 return true; 966 } 967 968 /// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit 969 /// immediate" address. The "Size" argument is the size in bytes of the memory 970 /// reference, which determines the scale. 971 bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, 972 unsigned BW, unsigned Size, 973 SDValue &Base, 974 SDValue &OffImm) { 975 SDLoc dl(N); 976 const DataLayout &DL = CurDAG->getDataLayout(); 977 const TargetLowering *TLI = getTargetLowering(); 978 if (N.getOpcode() == ISD::FrameIndex) { 979 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 980 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 981 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); 982 return true; 983 } 984 985 // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed 986 // selected here doesn't support labels/immediates, only base+offset. 987 if (CurDAG->isBaseWithConstantOffset(N)) { 988 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 989 if (IsSignedImm) { 990 int64_t RHSC = RHS->getSExtValue(); 991 unsigned Scale = Log2_32(Size); 992 int64_t Range = 0x1LL << (BW - 1); 993 994 if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) && 995 RHSC < (Range << Scale)) { 996 Base = N.getOperand(0); 997 if (Base.getOpcode() == ISD::FrameIndex) { 998 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 999 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 1000 } 1001 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64); 1002 return true; 1003 } 1004 } else { 1005 // unsigned Immediate 1006 uint64_t RHSC = RHS->getZExtValue(); 1007 unsigned Scale = Log2_32(Size); 1008 uint64_t Range = 0x1ULL << BW; 1009 1010 if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) { 1011 Base = N.getOperand(0); 1012 if (Base.getOpcode() == ISD::FrameIndex) { 1013 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1014 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 1015 } 1016 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64); 1017 return true; 1018 } 1019 } 1020 } 1021 } 1022 // Base only. The address will be materialized into a register before 1023 // the memory is accessed. 1024 // add x0, Xbase, #offset 1025 // stp x1, x2, [x0] 1026 Base = N; 1027 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); 1028 return true; 1029 } 1030 1031 /// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit 1032 /// immediate" address. The "Size" argument is the size in bytes of the memory 1033 /// reference, which determines the scale. 1034 bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size, 1035 SDValue &Base, SDValue &OffImm) { 1036 SDLoc dl(N); 1037 const DataLayout &DL = CurDAG->getDataLayout(); 1038 const TargetLowering *TLI = getTargetLowering(); 1039 if (N.getOpcode() == ISD::FrameIndex) { 1040 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 1041 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 1042 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); 1043 return true; 1044 } 1045 1046 if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) { 1047 GlobalAddressSDNode *GAN = 1048 dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode()); 1049 Base = N.getOperand(0); 1050 OffImm = N.getOperand(1); 1051 if (!GAN) 1052 return true; 1053 1054 if (GAN->getOffset() % Size == 0 && 1055 GAN->getGlobal()->getPointerAlignment(DL) >= Size) 1056 return true; 1057 } 1058 1059 if (CurDAG->isBaseWithConstantOffset(N)) { 1060 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1061 int64_t RHSC = (int64_t)RHS->getZExtValue(); 1062 unsigned Scale = Log2_32(Size); 1063 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) { 1064 Base = N.getOperand(0); 1065 if (Base.getOpcode() == ISD::FrameIndex) { 1066 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1067 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 1068 } 1069 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64); 1070 return true; 1071 } 1072 } 1073 } 1074 1075 // Before falling back to our general case, check if the unscaled 1076 // instructions can handle this. If so, that's preferable. 1077 if (SelectAddrModeUnscaled(N, Size, Base, OffImm)) 1078 return false; 1079 1080 // Base only. The address will be materialized into a register before 1081 // the memory is accessed. 1082 // add x0, Xbase, #offset 1083 // ldr x0, [x0] 1084 Base = N; 1085 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); 1086 return true; 1087 } 1088 1089 /// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit 1090 /// immediate" address. This should only match when there is an offset that 1091 /// is not valid for a scaled immediate addressing mode. The "Size" argument 1092 /// is the size in bytes of the memory reference, which is needed here to know 1093 /// what is valid for a scaled immediate. 1094 bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size, 1095 SDValue &Base, 1096 SDValue &OffImm) { 1097 if (!CurDAG->isBaseWithConstantOffset(N)) 1098 return false; 1099 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1100 int64_t RHSC = RHS->getSExtValue(); 1101 // If the offset is valid as a scaled immediate, don't match here. 1102 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && 1103 RHSC < (0x1000 << Log2_32(Size))) 1104 return false; 1105 if (RHSC >= -256 && RHSC < 256) { 1106 Base = N.getOperand(0); 1107 if (Base.getOpcode() == ISD::FrameIndex) { 1108 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1109 const TargetLowering *TLI = getTargetLowering(); 1110 Base = CurDAG->getTargetFrameIndex( 1111 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1112 } 1113 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64); 1114 return true; 1115 } 1116 } 1117 return false; 1118 } 1119 1120 static SDValue Widen(SelectionDAG *CurDAG, SDValue N) { 1121 SDLoc dl(N); 1122 SDValue ImpDef = SDValue( 1123 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0); 1124 return CurDAG->getTargetInsertSubreg(AArch64::sub_32, dl, MVT::i64, ImpDef, 1125 N); 1126 } 1127 1128 /// Check if the given SHL node (\p N), can be used to form an 1129 /// extended register for an addressing mode. 1130 bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size, 1131 bool WantExtend, SDValue &Offset, 1132 SDValue &SignExtend) { 1133 assert(N.getOpcode() == ISD::SHL && "Invalid opcode."); 1134 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); 1135 if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue()) 1136 return false; 1137 1138 SDLoc dl(N); 1139 if (WantExtend) { 1140 AArch64_AM::ShiftExtendType Ext = 1141 getExtendTypeForNode(N.getOperand(0), true); 1142 if (Ext == AArch64_AM::InvalidShiftExtend) 1143 return false; 1144 1145 Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0)); 1146 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl, 1147 MVT::i32); 1148 } else { 1149 Offset = N.getOperand(0); 1150 SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32); 1151 } 1152 1153 unsigned LegalShiftVal = Log2_32(Size); 1154 unsigned ShiftVal = CSD->getZExtValue(); 1155 1156 if (ShiftVal != 0 && ShiftVal != LegalShiftVal) 1157 return false; 1158 1159 return isWorthFolding(N); 1160 } 1161 1162 bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size, 1163 SDValue &Base, SDValue &Offset, 1164 SDValue &SignExtend, 1165 SDValue &DoShift) { 1166 if (N.getOpcode() != ISD::ADD) 1167 return false; 1168 SDValue LHS = N.getOperand(0); 1169 SDValue RHS = N.getOperand(1); 1170 SDLoc dl(N); 1171 1172 // We don't want to match immediate adds here, because they are better lowered 1173 // to the register-immediate addressing modes. 1174 if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS)) 1175 return false; 1176 1177 // Check if this particular node is reused in any non-memory related 1178 // operation. If yes, do not try to fold this node into the address 1179 // computation, since the computation will be kept. 1180 const SDNode *Node = N.getNode(); 1181 for (SDNode *UI : Node->uses()) { 1182 if (!isa<MemSDNode>(*UI)) 1183 return false; 1184 } 1185 1186 // Remember if it is worth folding N when it produces extended register. 1187 bool IsExtendedRegisterWorthFolding = isWorthFolding(N); 1188 1189 // Try to match a shifted extend on the RHS. 1190 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL && 1191 SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) { 1192 Base = LHS; 1193 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32); 1194 return true; 1195 } 1196 1197 // Try to match a shifted extend on the LHS. 1198 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL && 1199 SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) { 1200 Base = RHS; 1201 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32); 1202 return true; 1203 } 1204 1205 // There was no shift, whatever else we find. 1206 DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32); 1207 1208 AArch64_AM::ShiftExtendType Ext = AArch64_AM::InvalidShiftExtend; 1209 // Try to match an unshifted extend on the LHS. 1210 if (IsExtendedRegisterWorthFolding && 1211 (Ext = getExtendTypeForNode(LHS, true)) != 1212 AArch64_AM::InvalidShiftExtend) { 1213 Base = RHS; 1214 Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0)); 1215 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl, 1216 MVT::i32); 1217 if (isWorthFolding(LHS)) 1218 return true; 1219 } 1220 1221 // Try to match an unshifted extend on the RHS. 1222 if (IsExtendedRegisterWorthFolding && 1223 (Ext = getExtendTypeForNode(RHS, true)) != 1224 AArch64_AM::InvalidShiftExtend) { 1225 Base = LHS; 1226 Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0)); 1227 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl, 1228 MVT::i32); 1229 if (isWorthFolding(RHS)) 1230 return true; 1231 } 1232 1233 return false; 1234 } 1235 1236 // Check if the given immediate is preferred by ADD. If an immediate can be 1237 // encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be 1238 // encoded by one MOVZ, return true. 1239 static bool isPreferredADD(int64_t ImmOff) { 1240 // Constant in [0x0, 0xfff] can be encoded in ADD. 1241 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL) 1242 return true; 1243 // Check if it can be encoded in an "ADD LSL #12". 1244 if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL) 1245 // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant. 1246 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL && 1247 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL; 1248 return false; 1249 } 1250 1251 bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size, 1252 SDValue &Base, SDValue &Offset, 1253 SDValue &SignExtend, 1254 SDValue &DoShift) { 1255 if (N.getOpcode() != ISD::ADD) 1256 return false; 1257 SDValue LHS = N.getOperand(0); 1258 SDValue RHS = N.getOperand(1); 1259 SDLoc DL(N); 1260 1261 // Check if this particular node is reused in any non-memory related 1262 // operation. If yes, do not try to fold this node into the address 1263 // computation, since the computation will be kept. 1264 const SDNode *Node = N.getNode(); 1265 for (SDNode *UI : Node->uses()) { 1266 if (!isa<MemSDNode>(*UI)) 1267 return false; 1268 } 1269 1270 // Watch out if RHS is a wide immediate, it can not be selected into 1271 // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into 1272 // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate 1273 // instructions like: 1274 // MOV X0, WideImmediate 1275 // ADD X1, BaseReg, X0 1276 // LDR X2, [X1, 0] 1277 // For such situation, using [BaseReg, XReg] addressing mode can save one 1278 // ADD/SUB: 1279 // MOV X0, WideImmediate 1280 // LDR X2, [BaseReg, X0] 1281 if (isa<ConstantSDNode>(RHS)) { 1282 int64_t ImmOff = (int64_t)cast<ConstantSDNode>(RHS)->getZExtValue(); 1283 unsigned Scale = Log2_32(Size); 1284 // Skip the immediate can be selected by load/store addressing mode. 1285 // Also skip the immediate can be encoded by a single ADD (SUB is also 1286 // checked by using -ImmOff). 1287 if ((ImmOff % Size == 0 && ImmOff >= 0 && ImmOff < (0x1000 << Scale)) || 1288 isPreferredADD(ImmOff) || isPreferredADD(-ImmOff)) 1289 return false; 1290 1291 SDValue Ops[] = { RHS }; 1292 SDNode *MOVI = 1293 CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops); 1294 SDValue MOVIV = SDValue(MOVI, 0); 1295 // This ADD of two X register will be selected into [Reg+Reg] mode. 1296 N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV); 1297 } 1298 1299 // Remember if it is worth folding N when it produces extended register. 1300 bool IsExtendedRegisterWorthFolding = isWorthFolding(N); 1301 1302 // Try to match a shifted extend on the RHS. 1303 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL && 1304 SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) { 1305 Base = LHS; 1306 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32); 1307 return true; 1308 } 1309 1310 // Try to match a shifted extend on the LHS. 1311 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL && 1312 SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) { 1313 Base = RHS; 1314 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32); 1315 return true; 1316 } 1317 1318 // Match any non-shifted, non-extend, non-immediate add expression. 1319 Base = LHS; 1320 Offset = RHS; 1321 SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32); 1322 DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32); 1323 // Reg1 + Reg2 is free: no check needed. 1324 return true; 1325 } 1326 1327 SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) { 1328 static const unsigned RegClassIDs[] = { 1329 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID}; 1330 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1, 1331 AArch64::dsub2, AArch64::dsub3}; 1332 1333 return createTuple(Regs, RegClassIDs, SubRegs); 1334 } 1335 1336 SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) { 1337 static const unsigned RegClassIDs[] = { 1338 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID}; 1339 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1, 1340 AArch64::qsub2, AArch64::qsub3}; 1341 1342 return createTuple(Regs, RegClassIDs, SubRegs); 1343 } 1344 1345 SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef<SDValue> Regs) { 1346 static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID, 1347 AArch64::ZPR3RegClassID, 1348 AArch64::ZPR4RegClassID}; 1349 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1, 1350 AArch64::zsub2, AArch64::zsub3}; 1351 1352 return createTuple(Regs, RegClassIDs, SubRegs); 1353 } 1354 1355 SDValue AArch64DAGToDAGISel::createZMulTuple(ArrayRef<SDValue> Regs) { 1356 assert(Regs.size() == 2 || Regs.size() == 4); 1357 1358 // The createTuple interface requires 3 RegClassIDs for each possible 1359 // tuple type even though we only have them for ZPR2 and ZPR4. 1360 static const unsigned RegClassIDs[] = {AArch64::ZPR2Mul2RegClassID, 0, 1361 AArch64::ZPR4Mul4RegClassID}; 1362 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1, 1363 AArch64::zsub2, AArch64::zsub3}; 1364 return createTuple(Regs, RegClassIDs, SubRegs); 1365 } 1366 1367 SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs, 1368 const unsigned RegClassIDs[], 1369 const unsigned SubRegs[]) { 1370 // There's no special register-class for a vector-list of 1 element: it's just 1371 // a vector. 1372 if (Regs.size() == 1) 1373 return Regs[0]; 1374 1375 assert(Regs.size() >= 2 && Regs.size() <= 4); 1376 1377 SDLoc DL(Regs[0]); 1378 1379 SmallVector<SDValue, 4> Ops; 1380 1381 // First operand of REG_SEQUENCE is the desired RegClass. 1382 Ops.push_back( 1383 CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32)); 1384 1385 // Then we get pairs of source & subregister-position for the components. 1386 for (unsigned i = 0; i < Regs.size(); ++i) { 1387 Ops.push_back(Regs[i]); 1388 Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32)); 1389 } 1390 1391 SDNode *N = 1392 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops); 1393 return SDValue(N, 0); 1394 } 1395 1396 void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, 1397 bool isExt) { 1398 SDLoc dl(N); 1399 EVT VT = N->getValueType(0); 1400 1401 unsigned ExtOff = isExt; 1402 1403 // Form a REG_SEQUENCE to force register allocation. 1404 unsigned Vec0Off = ExtOff + 1; 1405 SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off, 1406 N->op_begin() + Vec0Off + NumVecs); 1407 SDValue RegSeq = createQTuple(Regs); 1408 1409 SmallVector<SDValue, 6> Ops; 1410 if (isExt) 1411 Ops.push_back(N->getOperand(1)); 1412 Ops.push_back(RegSeq); 1413 Ops.push_back(N->getOperand(NumVecs + ExtOff + 1)); 1414 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops)); 1415 } 1416 1417 bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) { 1418 LoadSDNode *LD = cast<LoadSDNode>(N); 1419 if (LD->isUnindexed()) 1420 return false; 1421 EVT VT = LD->getMemoryVT(); 1422 EVT DstVT = N->getValueType(0); 1423 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1424 bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC; 1425 1426 // We're not doing validity checking here. That was done when checking 1427 // if we should mark the load as indexed or not. We're just selecting 1428 // the right instruction. 1429 unsigned Opcode = 0; 1430 1431 ISD::LoadExtType ExtType = LD->getExtensionType(); 1432 bool InsertTo64 = false; 1433 if (VT == MVT::i64) 1434 Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost; 1435 else if (VT == MVT::i32) { 1436 if (ExtType == ISD::NON_EXTLOAD) 1437 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost; 1438 else if (ExtType == ISD::SEXTLOAD) 1439 Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost; 1440 else { 1441 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost; 1442 InsertTo64 = true; 1443 // The result of the load is only i32. It's the subreg_to_reg that makes 1444 // it into an i64. 1445 DstVT = MVT::i32; 1446 } 1447 } else if (VT == MVT::i16) { 1448 if (ExtType == ISD::SEXTLOAD) { 1449 if (DstVT == MVT::i64) 1450 Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost; 1451 else 1452 Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost; 1453 } else { 1454 Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost; 1455 InsertTo64 = DstVT == MVT::i64; 1456 // The result of the load is only i32. It's the subreg_to_reg that makes 1457 // it into an i64. 1458 DstVT = MVT::i32; 1459 } 1460 } else if (VT == MVT::i8) { 1461 if (ExtType == ISD::SEXTLOAD) { 1462 if (DstVT == MVT::i64) 1463 Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost; 1464 else 1465 Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost; 1466 } else { 1467 Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost; 1468 InsertTo64 = DstVT == MVT::i64; 1469 // The result of the load is only i32. It's the subreg_to_reg that makes 1470 // it into an i64. 1471 DstVT = MVT::i32; 1472 } 1473 } else if (VT == MVT::f16) { 1474 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost; 1475 } else if (VT == MVT::bf16) { 1476 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost; 1477 } else if (VT == MVT::f32) { 1478 Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost; 1479 } else if (VT == MVT::f64 || VT.is64BitVector()) { 1480 Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost; 1481 } else if (VT.is128BitVector()) { 1482 Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost; 1483 } else 1484 return false; 1485 SDValue Chain = LD->getChain(); 1486 SDValue Base = LD->getBasePtr(); 1487 ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset()); 1488 int OffsetVal = (int)OffsetOp->getZExtValue(); 1489 SDLoc dl(N); 1490 SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64); 1491 SDValue Ops[] = { Base, Offset, Chain }; 1492 SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT, 1493 MVT::Other, Ops); 1494 1495 // Transfer memoperands. 1496 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); 1497 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Res), {MemOp}); 1498 1499 // Either way, we're replacing the node, so tell the caller that. 1500 SDValue LoadedVal = SDValue(Res, 1); 1501 if (InsertTo64) { 1502 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32); 1503 LoadedVal = 1504 SDValue(CurDAG->getMachineNode( 1505 AArch64::SUBREG_TO_REG, dl, MVT::i64, 1506 CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal, 1507 SubReg), 1508 0); 1509 } 1510 1511 ReplaceUses(SDValue(N, 0), LoadedVal); 1512 ReplaceUses(SDValue(N, 1), SDValue(Res, 0)); 1513 ReplaceUses(SDValue(N, 2), SDValue(Res, 2)); 1514 CurDAG->RemoveDeadNode(N); 1515 return true; 1516 } 1517 1518 void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc, 1519 unsigned SubRegIdx) { 1520 SDLoc dl(N); 1521 EVT VT = N->getValueType(0); 1522 SDValue Chain = N->getOperand(0); 1523 1524 SDValue Ops[] = {N->getOperand(2), // Mem operand; 1525 Chain}; 1526 1527 const EVT ResTys[] = {MVT::Untyped, MVT::Other}; 1528 1529 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1530 SDValue SuperReg = SDValue(Ld, 0); 1531 for (unsigned i = 0; i < NumVecs; ++i) 1532 ReplaceUses(SDValue(N, i), 1533 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg)); 1534 1535 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1)); 1536 1537 // Transfer memoperands. In the case of AArch64::LD64B, there won't be one, 1538 // because it's too simple to have needed special treatment during lowering. 1539 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(N)) { 1540 MachineMemOperand *MemOp = MemIntr->getMemOperand(); 1541 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp}); 1542 } 1543 1544 CurDAG->RemoveDeadNode(N); 1545 } 1546 1547 void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs, 1548 unsigned Opc, unsigned SubRegIdx) { 1549 SDLoc dl(N); 1550 EVT VT = N->getValueType(0); 1551 SDValue Chain = N->getOperand(0); 1552 1553 SDValue Ops[] = {N->getOperand(1), // Mem operand 1554 N->getOperand(2), // Incremental 1555 Chain}; 1556 1557 const EVT ResTys[] = {MVT::i64, // Type of the write back register 1558 MVT::Untyped, MVT::Other}; 1559 1560 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1561 1562 // Update uses of write back register 1563 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0)); 1564 1565 // Update uses of vector list 1566 SDValue SuperReg = SDValue(Ld, 1); 1567 if (NumVecs == 1) 1568 ReplaceUses(SDValue(N, 0), SuperReg); 1569 else 1570 for (unsigned i = 0; i < NumVecs; ++i) 1571 ReplaceUses(SDValue(N, i), 1572 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg)); 1573 1574 // Update the chain 1575 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2)); 1576 CurDAG->RemoveDeadNode(N); 1577 } 1578 1579 /// Optimize \param OldBase and \param OldOffset selecting the best addressing 1580 /// mode. Returns a tuple consisting of an Opcode, an SDValue representing the 1581 /// new Base and an SDValue representing the new offset. 1582 std::tuple<unsigned, SDValue, SDValue> 1583 AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, 1584 unsigned Opc_ri, 1585 const SDValue &OldBase, 1586 const SDValue &OldOffset, 1587 unsigned Scale) { 1588 SDValue NewBase = OldBase; 1589 SDValue NewOffset = OldOffset; 1590 // Detect a possible Reg+Imm addressing mode. 1591 const bool IsRegImm = SelectAddrModeIndexedSVE</*Min=*/-8, /*Max=*/7>( 1592 N, OldBase, NewBase, NewOffset); 1593 1594 // Detect a possible reg+reg addressing mode, but only if we haven't already 1595 // detected a Reg+Imm one. 1596 const bool IsRegReg = 1597 !IsRegImm && SelectSVERegRegAddrMode(OldBase, Scale, NewBase, NewOffset); 1598 1599 // Select the instruction. 1600 return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset); 1601 } 1602 1603 enum class SelectTypeKind { 1604 Int1 = 0, 1605 Int = 1, 1606 FP = 2, 1607 AnyType = 3, 1608 }; 1609 1610 /// This function selects an opcode from a list of opcodes, which is 1611 /// expected to be the opcode for { 8-bit, 16-bit, 32-bit, 64-bit } 1612 /// element types, in this order. 1613 template <SelectTypeKind Kind> 1614 static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef<unsigned> Opcodes) { 1615 // Only match scalable vector VTs 1616 if (!VT.isScalableVector()) 1617 return 0; 1618 1619 EVT EltVT = VT.getVectorElementType(); 1620 switch (Kind) { 1621 case SelectTypeKind::AnyType: 1622 break; 1623 case SelectTypeKind::Int: 1624 if (EltVT != MVT::i8 && EltVT != MVT::i16 && EltVT != MVT::i32 && 1625 EltVT != MVT::i64) 1626 return 0; 1627 break; 1628 case SelectTypeKind::Int1: 1629 if (EltVT != MVT::i1) 1630 return 0; 1631 break; 1632 case SelectTypeKind::FP: 1633 if (EltVT != MVT::f16 && EltVT != MVT::f32 && EltVT != MVT::f64) 1634 return 0; 1635 break; 1636 } 1637 1638 unsigned Offset; 1639 switch (VT.getVectorMinNumElements()) { 1640 case 16: // 8-bit 1641 Offset = 0; 1642 break; 1643 case 8: // 16-bit 1644 Offset = 1; 1645 break; 1646 case 4: // 32-bit 1647 Offset = 2; 1648 break; 1649 case 2: // 64-bit 1650 Offset = 3; 1651 break; 1652 default: 1653 return 0; 1654 } 1655 1656 return (Opcodes.size() <= Offset) ? 0 : Opcodes[Offset]; 1657 } 1658 1659 // This function is almost identical to SelectWhilePair, but has an 1660 // extra check on the range of the immediate operand. 1661 // TODO: Merge these two functions together at some point? 1662 void AArch64DAGToDAGISel::SelectPExtPair(SDNode *N, unsigned Opc) { 1663 // Immediate can be either 0 or 1. 1664 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(N->getOperand(2))) 1665 if (Imm->getZExtValue() > 1) 1666 return; 1667 1668 SDLoc DL(N); 1669 EVT VT = N->getValueType(0); 1670 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)}; 1671 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops); 1672 SDValue SuperReg = SDValue(WhilePair, 0); 1673 1674 for (unsigned I = 0; I < 2; ++I) 1675 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg( 1676 AArch64::psub0 + I, DL, VT, SuperReg)); 1677 1678 CurDAG->RemoveDeadNode(N); 1679 } 1680 1681 void AArch64DAGToDAGISel::SelectWhilePair(SDNode *N, unsigned Opc) { 1682 SDLoc DL(N); 1683 EVT VT = N->getValueType(0); 1684 1685 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)}; 1686 1687 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops); 1688 SDValue SuperReg = SDValue(WhilePair, 0); 1689 1690 for (unsigned I = 0; I < 2; ++I) 1691 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg( 1692 AArch64::psub0 + I, DL, VT, SuperReg)); 1693 1694 CurDAG->RemoveDeadNode(N); 1695 } 1696 1697 void AArch64DAGToDAGISel::SelectCVTIntrinsic(SDNode *N, unsigned NumVecs, 1698 unsigned Opcode) { 1699 EVT VT = N->getValueType(0); 1700 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); 1701 SDValue Ops = createZTuple(Regs); 1702 SDLoc DL(N); 1703 SDNode *Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Ops); 1704 SDValue SuperReg = SDValue(Intrinsic, 0); 1705 for (unsigned i = 0; i < NumVecs; ++i) 1706 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg( 1707 AArch64::zsub0 + i, DL, VT, SuperReg)); 1708 1709 CurDAG->RemoveDeadNode(N); 1710 } 1711 1712 void AArch64DAGToDAGISel::SelectDestructiveMultiIntrinsic(SDNode *N, 1713 unsigned NumVecs, 1714 bool IsZmMulti, 1715 unsigned Opcode, 1716 bool HasPred) { 1717 assert(Opcode != 0 && "Unexpected opcode"); 1718 1719 SDLoc DL(N); 1720 EVT VT = N->getValueType(0); 1721 unsigned FirstVecIdx = HasPred ? 2 : 1; 1722 1723 auto GetMultiVecOperand = [=](unsigned StartIdx) { 1724 SmallVector<SDValue, 4> Regs(N->op_begin() + StartIdx, 1725 N->op_begin() + StartIdx + NumVecs); 1726 return createZMulTuple(Regs); 1727 }; 1728 1729 SDValue Zdn = GetMultiVecOperand(FirstVecIdx); 1730 1731 SDValue Zm; 1732 if (IsZmMulti) 1733 Zm = GetMultiVecOperand(NumVecs + FirstVecIdx); 1734 else 1735 Zm = N->getOperand(NumVecs + FirstVecIdx); 1736 1737 SDNode *Intrinsic; 1738 if (HasPred) 1739 Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, 1740 N->getOperand(1), Zdn, Zm); 1741 else 1742 Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Zdn, Zm); 1743 SDValue SuperReg = SDValue(Intrinsic, 0); 1744 for (unsigned i = 0; i < NumVecs; ++i) 1745 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg( 1746 AArch64::zsub0 + i, DL, VT, SuperReg)); 1747 1748 CurDAG->RemoveDeadNode(N); 1749 } 1750 1751 void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs, 1752 unsigned Scale, unsigned Opc_ri, 1753 unsigned Opc_rr, bool IsIntr) { 1754 assert(Scale < 4 && "Invalid scaling value."); 1755 SDLoc DL(N); 1756 EVT VT = N->getValueType(0); 1757 SDValue Chain = N->getOperand(0); 1758 1759 // Optimize addressing mode. 1760 SDValue Base, Offset; 1761 unsigned Opc; 1762 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore( 1763 N, Opc_rr, Opc_ri, N->getOperand(IsIntr ? 3 : 2), 1764 CurDAG->getTargetConstant(0, DL, MVT::i64), Scale); 1765 1766 SDValue Ops[] = {N->getOperand(IsIntr ? 2 : 1), // Predicate 1767 Base, // Memory operand 1768 Offset, Chain}; 1769 1770 const EVT ResTys[] = {MVT::Untyped, MVT::Other}; 1771 1772 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops); 1773 SDValue SuperReg = SDValue(Load, 0); 1774 for (unsigned i = 0; i < NumVecs; ++i) 1775 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg( 1776 AArch64::zsub0 + i, DL, VT, SuperReg)); 1777 1778 // Copy chain 1779 unsigned ChainIdx = NumVecs; 1780 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1)); 1781 CurDAG->RemoveDeadNode(N); 1782 } 1783 1784 void AArch64DAGToDAGISel::SelectContiguousMultiVectorLoad(SDNode *N, 1785 unsigned NumVecs, 1786 unsigned Scale, 1787 unsigned Opc_ri, 1788 unsigned Opc_rr) { 1789 assert(Scale < 4 && "Invalid scaling value."); 1790 SDLoc DL(N); 1791 EVT VT = N->getValueType(0); 1792 SDValue Chain = N->getOperand(0); 1793 1794 SDValue PNg = N->getOperand(2); 1795 SDValue Base = N->getOperand(3); 1796 SDValue Offset = CurDAG->getTargetConstant(0, DL, MVT::i64); 1797 unsigned Opc; 1798 std::tie(Opc, Base, Offset) = 1799 findAddrModeSVELoadStore(N, Opc_rr, Opc_ri, Base, Offset, Scale); 1800 1801 SDValue Ops[] = {PNg, // Predicate-as-counter 1802 Base, // Memory operand 1803 Offset, Chain}; 1804 1805 const EVT ResTys[] = {MVT::Untyped, MVT::Other}; 1806 1807 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops); 1808 SDValue SuperReg = SDValue(Load, 0); 1809 for (unsigned i = 0; i < NumVecs; ++i) 1810 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg( 1811 AArch64::zsub0 + i, DL, VT, SuperReg)); 1812 1813 // Copy chain 1814 unsigned ChainIdx = NumVecs; 1815 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1)); 1816 CurDAG->RemoveDeadNode(N); 1817 } 1818 1819 void AArch64DAGToDAGISel::SelectFrintFromVT(SDNode *N, unsigned NumVecs, 1820 unsigned Opcode) { 1821 if (N->getValueType(0) != MVT::nxv4f32) 1822 return; 1823 SelectUnaryMultiIntrinsic(N, NumVecs, true, Opcode); 1824 } 1825 1826 void AArch64DAGToDAGISel::SelectClamp(SDNode *N, unsigned NumVecs, 1827 unsigned Op) { 1828 SDLoc DL(N); 1829 EVT VT = N->getValueType(0); 1830 1831 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); 1832 SDValue Zd = createZMulTuple(Regs); 1833 SDValue Zn = N->getOperand(1 + NumVecs); 1834 SDValue Zm = N->getOperand(2 + NumVecs); 1835 1836 SDValue Ops[] = {Zd, Zn, Zm}; 1837 1838 SDNode *Intrinsic = CurDAG->getMachineNode(Op, DL, MVT::Untyped, Ops); 1839 SDValue SuperReg = SDValue(Intrinsic, 0); 1840 for (unsigned i = 0; i < NumVecs; ++i) 1841 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg( 1842 AArch64::zsub0 + i, DL, VT, SuperReg)); 1843 1844 CurDAG->RemoveDeadNode(N); 1845 } 1846 1847 bool SelectSMETile(unsigned &BaseReg, unsigned TileNum) { 1848 switch (BaseReg) { 1849 default: 1850 return false; 1851 case AArch64::ZA: 1852 case AArch64::ZAB0: 1853 if (TileNum == 0) 1854 break; 1855 return false; 1856 case AArch64::ZAH0: 1857 if (TileNum <= 1) 1858 break; 1859 return false; 1860 case AArch64::ZAS0: 1861 if (TileNum <= 3) 1862 break; 1863 return false; 1864 case AArch64::ZAD0: 1865 if (TileNum <= 7) 1866 break; 1867 return false; 1868 } 1869 1870 BaseReg += TileNum; 1871 return true; 1872 } 1873 1874 template <unsigned MaxIdx, unsigned Scale> 1875 void AArch64DAGToDAGISel::SelectMultiVectorMove(SDNode *N, unsigned NumVecs, 1876 unsigned BaseReg, unsigned Op) { 1877 unsigned TileNum = 0; 1878 if (BaseReg != AArch64::ZA) 1879 TileNum = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(); 1880 1881 if (!SelectSMETile(BaseReg, TileNum)) 1882 return; 1883 1884 SDValue SliceBase, Base, Offset; 1885 if (BaseReg == AArch64::ZA) 1886 SliceBase = N->getOperand(2); 1887 else 1888 SliceBase = N->getOperand(3); 1889 1890 if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale)) 1891 return; 1892 1893 SDLoc DL(N); 1894 SDValue SubReg = CurDAG->getRegister(BaseReg, MVT::Other); 1895 SDValue Ops[] = {SubReg, Base, Offset, /*Chain*/ N->getOperand(0)}; 1896 SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops); 1897 1898 EVT VT = N->getValueType(0); 1899 for (unsigned I = 0; I < NumVecs; ++I) 1900 ReplaceUses(SDValue(N, I), 1901 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT, 1902 SDValue(Mov, 0))); 1903 // Copy chain 1904 unsigned ChainIdx = NumVecs; 1905 ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1)); 1906 CurDAG->RemoveDeadNode(N); 1907 } 1908 1909 void AArch64DAGToDAGISel::SelectUnaryMultiIntrinsic(SDNode *N, 1910 unsigned NumOutVecs, 1911 bool IsTupleInput, 1912 unsigned Opc) { 1913 SDLoc DL(N); 1914 EVT VT = N->getValueType(0); 1915 unsigned NumInVecs = N->getNumOperands() - 1; 1916 1917 SmallVector<SDValue, 6> Ops; 1918 if (IsTupleInput) { 1919 assert((NumInVecs == 2 || NumInVecs == 4) && 1920 "Don't know how to handle multi-register input!"); 1921 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, 1922 N->op_begin() + 1 + NumInVecs); 1923 Ops.push_back(createZMulTuple(Regs)); 1924 } else { 1925 // All intrinsic nodes have the ID as the first operand, hence the "1 + I". 1926 for (unsigned I = 0; I < NumInVecs; I++) 1927 Ops.push_back(N->getOperand(1 + I)); 1928 } 1929 1930 SDNode *Res = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops); 1931 SDValue SuperReg = SDValue(Res, 0); 1932 1933 for (unsigned I = 0; I < NumOutVecs; I++) 1934 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg( 1935 AArch64::zsub0 + I, DL, VT, SuperReg)); 1936 CurDAG->RemoveDeadNode(N); 1937 } 1938 1939 void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs, 1940 unsigned Opc) { 1941 SDLoc dl(N); 1942 EVT VT = N->getOperand(2)->getValueType(0); 1943 1944 // Form a REG_SEQUENCE to force register allocation. 1945 bool Is128Bit = VT.getSizeInBits() == 128; 1946 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); 1947 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs); 1948 1949 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)}; 1950 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops); 1951 1952 // Transfer memoperands. 1953 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 1954 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 1955 1956 ReplaceNode(N, St); 1957 } 1958 1959 void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs, 1960 unsigned Scale, unsigned Opc_rr, 1961 unsigned Opc_ri) { 1962 SDLoc dl(N); 1963 1964 // Form a REG_SEQUENCE to force register allocation. 1965 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); 1966 SDValue RegSeq = createZTuple(Regs); 1967 1968 // Optimize addressing mode. 1969 unsigned Opc; 1970 SDValue Offset, Base; 1971 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore( 1972 N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3), 1973 CurDAG->getTargetConstant(0, dl, MVT::i64), Scale); 1974 1975 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), // predicate 1976 Base, // address 1977 Offset, // offset 1978 N->getOperand(0)}; // chain 1979 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops); 1980 1981 ReplaceNode(N, St); 1982 } 1983 1984 bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, 1985 SDValue &OffImm) { 1986 SDLoc dl(N); 1987 const DataLayout &DL = CurDAG->getDataLayout(); 1988 const TargetLowering *TLI = getTargetLowering(); 1989 1990 // Try to match it for the frame address 1991 if (auto FINode = dyn_cast<FrameIndexSDNode>(N)) { 1992 int FI = FINode->getIndex(); 1993 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 1994 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); 1995 return true; 1996 } 1997 1998 return false; 1999 } 2000 2001 void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs, 2002 unsigned Opc) { 2003 SDLoc dl(N); 2004 EVT VT = N->getOperand(2)->getValueType(0); 2005 const EVT ResTys[] = {MVT::i64, // Type of the write back register 2006 MVT::Other}; // Type for the Chain 2007 2008 // Form a REG_SEQUENCE to force register allocation. 2009 bool Is128Bit = VT.getSizeInBits() == 128; 2010 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); 2011 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs); 2012 2013 SDValue Ops[] = {RegSeq, 2014 N->getOperand(NumVecs + 1), // base register 2015 N->getOperand(NumVecs + 2), // Incremental 2016 N->getOperand(0)}; // Chain 2017 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2018 2019 ReplaceNode(N, St); 2020 } 2021 2022 namespace { 2023 /// WidenVector - Given a value in the V64 register class, produce the 2024 /// equivalent value in the V128 register class. 2025 class WidenVector { 2026 SelectionDAG &DAG; 2027 2028 public: 2029 WidenVector(SelectionDAG &DAG) : DAG(DAG) {} 2030 2031 SDValue operator()(SDValue V64Reg) { 2032 EVT VT = V64Reg.getValueType(); 2033 unsigned NarrowSize = VT.getVectorNumElements(); 2034 MVT EltTy = VT.getVectorElementType().getSimpleVT(); 2035 MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize); 2036 SDLoc DL(V64Reg); 2037 2038 SDValue Undef = 2039 SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0); 2040 return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg); 2041 } 2042 }; 2043 } // namespace 2044 2045 /// NarrowVector - Given a value in the V128 register class, produce the 2046 /// equivalent value in the V64 register class. 2047 static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) { 2048 EVT VT = V128Reg.getValueType(); 2049 unsigned WideSize = VT.getVectorNumElements(); 2050 MVT EltTy = VT.getVectorElementType().getSimpleVT(); 2051 MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2); 2052 2053 return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy, 2054 V128Reg); 2055 } 2056 2057 void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs, 2058 unsigned Opc) { 2059 SDLoc dl(N); 2060 EVT VT = N->getValueType(0); 2061 bool Narrow = VT.getSizeInBits() == 64; 2062 2063 // Form a REG_SEQUENCE to force register allocation. 2064 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); 2065 2066 if (Narrow) 2067 transform(Regs, Regs.begin(), 2068 WidenVector(*CurDAG)); 2069 2070 SDValue RegSeq = createQTuple(Regs); 2071 2072 const EVT ResTys[] = {MVT::Untyped, MVT::Other}; 2073 2074 unsigned LaneNo = 2075 cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue(); 2076 2077 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64), 2078 N->getOperand(NumVecs + 3), N->getOperand(0)}; 2079 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2080 SDValue SuperReg = SDValue(Ld, 0); 2081 2082 EVT WideVT = RegSeq.getOperand(1)->getValueType(0); 2083 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, 2084 AArch64::qsub2, AArch64::qsub3 }; 2085 for (unsigned i = 0; i < NumVecs; ++i) { 2086 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg); 2087 if (Narrow) 2088 NV = NarrowVector(NV, *CurDAG); 2089 ReplaceUses(SDValue(N, i), NV); 2090 } 2091 2092 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1)); 2093 CurDAG->RemoveDeadNode(N); 2094 } 2095 2096 void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs, 2097 unsigned Opc) { 2098 SDLoc dl(N); 2099 EVT VT = N->getValueType(0); 2100 bool Narrow = VT.getSizeInBits() == 64; 2101 2102 // Form a REG_SEQUENCE to force register allocation. 2103 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); 2104 2105 if (Narrow) 2106 transform(Regs, Regs.begin(), 2107 WidenVector(*CurDAG)); 2108 2109 SDValue RegSeq = createQTuple(Regs); 2110 2111 const EVT ResTys[] = {MVT::i64, // Type of the write back register 2112 RegSeq->getValueType(0), MVT::Other}; 2113 2114 unsigned LaneNo = 2115 cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue(); 2116 2117 SDValue Ops[] = {RegSeq, 2118 CurDAG->getTargetConstant(LaneNo, dl, 2119 MVT::i64), // Lane Number 2120 N->getOperand(NumVecs + 2), // Base register 2121 N->getOperand(NumVecs + 3), // Incremental 2122 N->getOperand(0)}; 2123 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2124 2125 // Update uses of the write back register 2126 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0)); 2127 2128 // Update uses of the vector list 2129 SDValue SuperReg = SDValue(Ld, 1); 2130 if (NumVecs == 1) { 2131 ReplaceUses(SDValue(N, 0), 2132 Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg); 2133 } else { 2134 EVT WideVT = RegSeq.getOperand(1)->getValueType(0); 2135 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, 2136 AArch64::qsub2, AArch64::qsub3 }; 2137 for (unsigned i = 0; i < NumVecs; ++i) { 2138 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, 2139 SuperReg); 2140 if (Narrow) 2141 NV = NarrowVector(NV, *CurDAG); 2142 ReplaceUses(SDValue(N, i), NV); 2143 } 2144 } 2145 2146 // Update the Chain 2147 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2)); 2148 CurDAG->RemoveDeadNode(N); 2149 } 2150 2151 void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs, 2152 unsigned Opc) { 2153 SDLoc dl(N); 2154 EVT VT = N->getOperand(2)->getValueType(0); 2155 bool Narrow = VT.getSizeInBits() == 64; 2156 2157 // Form a REG_SEQUENCE to force register allocation. 2158 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); 2159 2160 if (Narrow) 2161 transform(Regs, Regs.begin(), 2162 WidenVector(*CurDAG)); 2163 2164 SDValue RegSeq = createQTuple(Regs); 2165 2166 unsigned LaneNo = 2167 cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue(); 2168 2169 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64), 2170 N->getOperand(NumVecs + 3), N->getOperand(0)}; 2171 SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops); 2172 2173 // Transfer memoperands. 2174 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2175 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 2176 2177 ReplaceNode(N, St); 2178 } 2179 2180 void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs, 2181 unsigned Opc) { 2182 SDLoc dl(N); 2183 EVT VT = N->getOperand(2)->getValueType(0); 2184 bool Narrow = VT.getSizeInBits() == 64; 2185 2186 // Form a REG_SEQUENCE to force register allocation. 2187 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); 2188 2189 if (Narrow) 2190 transform(Regs, Regs.begin(), 2191 WidenVector(*CurDAG)); 2192 2193 SDValue RegSeq = createQTuple(Regs); 2194 2195 const EVT ResTys[] = {MVT::i64, // Type of the write back register 2196 MVT::Other}; 2197 2198 unsigned LaneNo = 2199 cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue(); 2200 2201 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64), 2202 N->getOperand(NumVecs + 2), // Base Register 2203 N->getOperand(NumVecs + 3), // Incremental 2204 N->getOperand(0)}; 2205 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2206 2207 // Transfer memoperands. 2208 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2209 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 2210 2211 ReplaceNode(N, St); 2212 } 2213 2214 static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, 2215 unsigned &Opc, SDValue &Opd0, 2216 unsigned &LSB, unsigned &MSB, 2217 unsigned NumberOfIgnoredLowBits, 2218 bool BiggerPattern) { 2219 assert(N->getOpcode() == ISD::AND && 2220 "N must be a AND operation to call this function"); 2221 2222 EVT VT = N->getValueType(0); 2223 2224 // Here we can test the type of VT and return false when the type does not 2225 // match, but since it is done prior to that call in the current context 2226 // we turned that into an assert to avoid redundant code. 2227 assert((VT == MVT::i32 || VT == MVT::i64) && 2228 "Type checking must have been done before calling this function"); 2229 2230 // FIXME: simplify-demanded-bits in DAGCombine will probably have 2231 // changed the AND node to a 32-bit mask operation. We'll have to 2232 // undo that as part of the transform here if we want to catch all 2233 // the opportunities. 2234 // Currently the NumberOfIgnoredLowBits argument helps to recover 2235 // from these situations when matching bigger pattern (bitfield insert). 2236 2237 // For unsigned extracts, check for a shift right and mask 2238 uint64_t AndImm = 0; 2239 if (!isOpcWithIntImmediate(N, ISD::AND, AndImm)) 2240 return false; 2241 2242 const SDNode *Op0 = N->getOperand(0).getNode(); 2243 2244 // Because of simplify-demanded-bits in DAGCombine, the mask may have been 2245 // simplified. Try to undo that 2246 AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits); 2247 2248 // The immediate is a mask of the low bits iff imm & (imm+1) == 0 2249 if (AndImm & (AndImm + 1)) 2250 return false; 2251 2252 bool ClampMSB = false; 2253 uint64_t SrlImm = 0; 2254 // Handle the SRL + ANY_EXTEND case. 2255 if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND && 2256 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) { 2257 // Extend the incoming operand of the SRL to 64-bit. 2258 Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0)); 2259 // Make sure to clamp the MSB so that we preserve the semantics of the 2260 // original operations. 2261 ClampMSB = true; 2262 } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE && 2263 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, 2264 SrlImm)) { 2265 // If the shift result was truncated, we can still combine them. 2266 Opd0 = Op0->getOperand(0).getOperand(0); 2267 2268 // Use the type of SRL node. 2269 VT = Opd0->getValueType(0); 2270 } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) { 2271 Opd0 = Op0->getOperand(0); 2272 ClampMSB = (VT == MVT::i32); 2273 } else if (BiggerPattern) { 2274 // Let's pretend a 0 shift right has been performed. 2275 // The resulting code will be at least as good as the original one 2276 // plus it may expose more opportunities for bitfield insert pattern. 2277 // FIXME: Currently we limit this to the bigger pattern, because 2278 // some optimizations expect AND and not UBFM. 2279 Opd0 = N->getOperand(0); 2280 } else 2281 return false; 2282 2283 // Bail out on large immediates. This happens when no proper 2284 // combining/constant folding was performed. 2285 if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) { 2286 LLVM_DEBUG( 2287 (dbgs() << N 2288 << ": Found large shift immediate, this should not happen\n")); 2289 return false; 2290 } 2291 2292 LSB = SrlImm; 2293 MSB = SrlImm + 2294 (VT == MVT::i32 ? llvm::countr_one<uint32_t>(AndImm) 2295 : llvm::countr_one<uint64_t>(AndImm)) - 2296 1; 2297 if (ClampMSB) 2298 // Since we're moving the extend before the right shift operation, we need 2299 // to clamp the MSB to make sure we don't shift in undefined bits instead of 2300 // the zeros which would get shifted in with the original right shift 2301 // operation. 2302 MSB = MSB > 31 ? 31 : MSB; 2303 2304 Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri; 2305 return true; 2306 } 2307 2308 static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc, 2309 SDValue &Opd0, unsigned &Immr, 2310 unsigned &Imms) { 2311 assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG); 2312 2313 EVT VT = N->getValueType(0); 2314 unsigned BitWidth = VT.getSizeInBits(); 2315 assert((VT == MVT::i32 || VT == MVT::i64) && 2316 "Type checking must have been done before calling this function"); 2317 2318 SDValue Op = N->getOperand(0); 2319 if (Op->getOpcode() == ISD::TRUNCATE) { 2320 Op = Op->getOperand(0); 2321 VT = Op->getValueType(0); 2322 BitWidth = VT.getSizeInBits(); 2323 } 2324 2325 uint64_t ShiftImm; 2326 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) && 2327 !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm)) 2328 return false; 2329 2330 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits(); 2331 if (ShiftImm + Width > BitWidth) 2332 return false; 2333 2334 Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri; 2335 Opd0 = Op.getOperand(0); 2336 Immr = ShiftImm; 2337 Imms = ShiftImm + Width - 1; 2338 return true; 2339 } 2340 2341 static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc, 2342 SDValue &Opd0, unsigned &LSB, 2343 unsigned &MSB) { 2344 // We are looking for the following pattern which basically extracts several 2345 // continuous bits from the source value and places it from the LSB of the 2346 // destination value, all other bits of the destination value or set to zero: 2347 // 2348 // Value2 = AND Value, MaskImm 2349 // SRL Value2, ShiftImm 2350 // 2351 // with MaskImm >> ShiftImm to search for the bit width. 2352 // 2353 // This gets selected into a single UBFM: 2354 // 2355 // UBFM Value, ShiftImm, Log2_64(MaskImm) 2356 // 2357 2358 if (N->getOpcode() != ISD::SRL) 2359 return false; 2360 2361 uint64_t AndMask = 0; 2362 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask)) 2363 return false; 2364 2365 Opd0 = N->getOperand(0).getOperand(0); 2366 2367 uint64_t SrlImm = 0; 2368 if (!isIntImmediate(N->getOperand(1), SrlImm)) 2369 return false; 2370 2371 // Check whether we really have several bits extract here. 2372 if (!isMask_64(AndMask >> SrlImm)) 2373 return false; 2374 2375 Opc = N->getValueType(0) == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri; 2376 LSB = SrlImm; 2377 MSB = llvm::Log2_64(AndMask); 2378 return true; 2379 } 2380 2381 static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, 2382 unsigned &Immr, unsigned &Imms, 2383 bool BiggerPattern) { 2384 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) && 2385 "N must be a SHR/SRA operation to call this function"); 2386 2387 EVT VT = N->getValueType(0); 2388 2389 // Here we can test the type of VT and return false when the type does not 2390 // match, but since it is done prior to that call in the current context 2391 // we turned that into an assert to avoid redundant code. 2392 assert((VT == MVT::i32 || VT == MVT::i64) && 2393 "Type checking must have been done before calling this function"); 2394 2395 // Check for AND + SRL doing several bits extract. 2396 if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms)) 2397 return true; 2398 2399 // We're looking for a shift of a shift. 2400 uint64_t ShlImm = 0; 2401 uint64_t TruncBits = 0; 2402 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) { 2403 Opd0 = N->getOperand(0).getOperand(0); 2404 } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL && 2405 N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) { 2406 // We are looking for a shift of truncate. Truncate from i64 to i32 could 2407 // be considered as setting high 32 bits as zero. Our strategy here is to 2408 // always generate 64bit UBFM. This consistency will help the CSE pass 2409 // later find more redundancy. 2410 Opd0 = N->getOperand(0).getOperand(0); 2411 TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits(); 2412 VT = Opd0.getValueType(); 2413 assert(VT == MVT::i64 && "the promoted type should be i64"); 2414 } else if (BiggerPattern) { 2415 // Let's pretend a 0 shift left has been performed. 2416 // FIXME: Currently we limit this to the bigger pattern case, 2417 // because some optimizations expect AND and not UBFM 2418 Opd0 = N->getOperand(0); 2419 } else 2420 return false; 2421 2422 // Missing combines/constant folding may have left us with strange 2423 // constants. 2424 if (ShlImm >= VT.getSizeInBits()) { 2425 LLVM_DEBUG( 2426 (dbgs() << N 2427 << ": Found large shift immediate, this should not happen\n")); 2428 return false; 2429 } 2430 2431 uint64_t SrlImm = 0; 2432 if (!isIntImmediate(N->getOperand(1), SrlImm)) 2433 return false; 2434 2435 assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() && 2436 "bad amount in shift node!"); 2437 int immr = SrlImm - ShlImm; 2438 Immr = immr < 0 ? immr + VT.getSizeInBits() : immr; 2439 Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1; 2440 // SRA requires a signed extraction 2441 if (VT == MVT::i32) 2442 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri; 2443 else 2444 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri; 2445 return true; 2446 } 2447 2448 bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) { 2449 assert(N->getOpcode() == ISD::SIGN_EXTEND); 2450 2451 EVT VT = N->getValueType(0); 2452 EVT NarrowVT = N->getOperand(0)->getValueType(0); 2453 if (VT != MVT::i64 || NarrowVT != MVT::i32) 2454 return false; 2455 2456 uint64_t ShiftImm; 2457 SDValue Op = N->getOperand(0); 2458 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm)) 2459 return false; 2460 2461 SDLoc dl(N); 2462 // Extend the incoming operand of the shift to 64-bits. 2463 SDValue Opd0 = Widen(CurDAG, Op.getOperand(0)); 2464 unsigned Immr = ShiftImm; 2465 unsigned Imms = NarrowVT.getSizeInBits() - 1; 2466 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT), 2467 CurDAG->getTargetConstant(Imms, dl, VT)}; 2468 CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops); 2469 return true; 2470 } 2471 2472 static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, 2473 SDValue &Opd0, unsigned &Immr, unsigned &Imms, 2474 unsigned NumberOfIgnoredLowBits = 0, 2475 bool BiggerPattern = false) { 2476 if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64) 2477 return false; 2478 2479 switch (N->getOpcode()) { 2480 default: 2481 if (!N->isMachineOpcode()) 2482 return false; 2483 break; 2484 case ISD::AND: 2485 return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms, 2486 NumberOfIgnoredLowBits, BiggerPattern); 2487 case ISD::SRL: 2488 case ISD::SRA: 2489 return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern); 2490 2491 case ISD::SIGN_EXTEND_INREG: 2492 return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms); 2493 } 2494 2495 unsigned NOpc = N->getMachineOpcode(); 2496 switch (NOpc) { 2497 default: 2498 return false; 2499 case AArch64::SBFMWri: 2500 case AArch64::UBFMWri: 2501 case AArch64::SBFMXri: 2502 case AArch64::UBFMXri: 2503 Opc = NOpc; 2504 Opd0 = N->getOperand(0); 2505 Immr = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue(); 2506 Imms = cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue(); 2507 return true; 2508 } 2509 // Unreachable 2510 return false; 2511 } 2512 2513 bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) { 2514 unsigned Opc, Immr, Imms; 2515 SDValue Opd0; 2516 if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms)) 2517 return false; 2518 2519 EVT VT = N->getValueType(0); 2520 SDLoc dl(N); 2521 2522 // If the bit extract operation is 64bit but the original type is 32bit, we 2523 // need to add one EXTRACT_SUBREG. 2524 if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) { 2525 SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64), 2526 CurDAG->getTargetConstant(Imms, dl, MVT::i64)}; 2527 2528 SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64); 2529 SDValue Inner = CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl, 2530 MVT::i32, SDValue(BFM, 0)); 2531 ReplaceNode(N, Inner.getNode()); 2532 return true; 2533 } 2534 2535 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT), 2536 CurDAG->getTargetConstant(Imms, dl, VT)}; 2537 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 2538 return true; 2539 } 2540 2541 /// Does DstMask form a complementary pair with the mask provided by 2542 /// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking, 2543 /// this asks whether DstMask zeroes precisely those bits that will be set by 2544 /// the other half. 2545 static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted, 2546 unsigned NumberOfIgnoredHighBits, EVT VT) { 2547 assert((VT == MVT::i32 || VT == MVT::i64) && 2548 "i32 or i64 mask type expected!"); 2549 unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits; 2550 2551 APInt SignificantDstMask = APInt(BitWidth, DstMask); 2552 APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth); 2553 2554 return (SignificantDstMask & SignificantBitsToBeInserted) == 0 && 2555 (SignificantDstMask | SignificantBitsToBeInserted).isAllOnes(); 2556 } 2557 2558 // Look for bits that will be useful for later uses. 2559 // A bit is consider useless as soon as it is dropped and never used 2560 // before it as been dropped. 2561 // E.g., looking for useful bit of x 2562 // 1. y = x & 0x7 2563 // 2. z = y >> 2 2564 // After #1, x useful bits are 0x7, then the useful bits of x, live through 2565 // y. 2566 // After #2, the useful bits of x are 0x4. 2567 // However, if x is used on an unpredicatable instruction, then all its bits 2568 // are useful. 2569 // E.g. 2570 // 1. y = x & 0x7 2571 // 2. z = y >> 2 2572 // 3. str x, [@x] 2573 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0); 2574 2575 static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits, 2576 unsigned Depth) { 2577 uint64_t Imm = 2578 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue(); 2579 Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth()); 2580 UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm); 2581 getUsefulBits(Op, UsefulBits, Depth + 1); 2582 } 2583 2584 static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, 2585 uint64_t Imm, uint64_t MSB, 2586 unsigned Depth) { 2587 // inherit the bitwidth value 2588 APInt OpUsefulBits(UsefulBits); 2589 OpUsefulBits = 1; 2590 2591 if (MSB >= Imm) { 2592 OpUsefulBits <<= MSB - Imm + 1; 2593 --OpUsefulBits; 2594 // The interesting part will be in the lower part of the result 2595 getUsefulBits(Op, OpUsefulBits, Depth + 1); 2596 // The interesting part was starting at Imm in the argument 2597 OpUsefulBits <<= Imm; 2598 } else { 2599 OpUsefulBits <<= MSB + 1; 2600 --OpUsefulBits; 2601 // The interesting part will be shifted in the result 2602 OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm; 2603 getUsefulBits(Op, OpUsefulBits, Depth + 1); 2604 // The interesting part was at zero in the argument 2605 OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm); 2606 } 2607 2608 UsefulBits &= OpUsefulBits; 2609 } 2610 2611 static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits, 2612 unsigned Depth) { 2613 uint64_t Imm = 2614 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue(); 2615 uint64_t MSB = 2616 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue(); 2617 2618 getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth); 2619 } 2620 2621 static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, 2622 unsigned Depth) { 2623 uint64_t ShiftTypeAndValue = 2624 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue(); 2625 APInt Mask(UsefulBits); 2626 Mask.clearAllBits(); 2627 Mask.flipAllBits(); 2628 2629 if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) { 2630 // Shift Left 2631 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue); 2632 Mask <<= ShiftAmt; 2633 getUsefulBits(Op, Mask, Depth + 1); 2634 Mask.lshrInPlace(ShiftAmt); 2635 } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) { 2636 // Shift Right 2637 // We do not handle AArch64_AM::ASR, because the sign will change the 2638 // number of useful bits 2639 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue); 2640 Mask.lshrInPlace(ShiftAmt); 2641 getUsefulBits(Op, Mask, Depth + 1); 2642 Mask <<= ShiftAmt; 2643 } else 2644 return; 2645 2646 UsefulBits &= Mask; 2647 } 2648 2649 static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, 2650 unsigned Depth) { 2651 uint64_t Imm = 2652 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue(); 2653 uint64_t MSB = 2654 cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue(); 2655 2656 APInt OpUsefulBits(UsefulBits); 2657 OpUsefulBits = 1; 2658 2659 APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0); 2660 ResultUsefulBits.flipAllBits(); 2661 APInt Mask(UsefulBits.getBitWidth(), 0); 2662 2663 getUsefulBits(Op, ResultUsefulBits, Depth + 1); 2664 2665 if (MSB >= Imm) { 2666 // The instruction is a BFXIL. 2667 uint64_t Width = MSB - Imm + 1; 2668 uint64_t LSB = Imm; 2669 2670 OpUsefulBits <<= Width; 2671 --OpUsefulBits; 2672 2673 if (Op.getOperand(1) == Orig) { 2674 // Copy the low bits from the result to bits starting from LSB. 2675 Mask = ResultUsefulBits & OpUsefulBits; 2676 Mask <<= LSB; 2677 } 2678 2679 if (Op.getOperand(0) == Orig) 2680 // Bits starting from LSB in the input contribute to the result. 2681 Mask |= (ResultUsefulBits & ~OpUsefulBits); 2682 } else { 2683 // The instruction is a BFI. 2684 uint64_t Width = MSB + 1; 2685 uint64_t LSB = UsefulBits.getBitWidth() - Imm; 2686 2687 OpUsefulBits <<= Width; 2688 --OpUsefulBits; 2689 OpUsefulBits <<= LSB; 2690 2691 if (Op.getOperand(1) == Orig) { 2692 // Copy the bits from the result to the zero bits. 2693 Mask = ResultUsefulBits & OpUsefulBits; 2694 Mask.lshrInPlace(LSB); 2695 } 2696 2697 if (Op.getOperand(0) == Orig) 2698 Mask |= (ResultUsefulBits & ~OpUsefulBits); 2699 } 2700 2701 UsefulBits &= Mask; 2702 } 2703 2704 static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, 2705 SDValue Orig, unsigned Depth) { 2706 2707 // Users of this node should have already been instruction selected 2708 // FIXME: Can we turn that into an assert? 2709 if (!UserNode->isMachineOpcode()) 2710 return; 2711 2712 switch (UserNode->getMachineOpcode()) { 2713 default: 2714 return; 2715 case AArch64::ANDSWri: 2716 case AArch64::ANDSXri: 2717 case AArch64::ANDWri: 2718 case AArch64::ANDXri: 2719 // We increment Depth only when we call the getUsefulBits 2720 return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits, 2721 Depth); 2722 case AArch64::UBFMWri: 2723 case AArch64::UBFMXri: 2724 return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth); 2725 2726 case AArch64::ORRWrs: 2727 case AArch64::ORRXrs: 2728 if (UserNode->getOperand(0) != Orig && UserNode->getOperand(1) == Orig) 2729 getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits, 2730 Depth); 2731 return; 2732 case AArch64::BFMWri: 2733 case AArch64::BFMXri: 2734 return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth); 2735 2736 case AArch64::STRBBui: 2737 case AArch64::STURBBi: 2738 if (UserNode->getOperand(0) != Orig) 2739 return; 2740 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff); 2741 return; 2742 2743 case AArch64::STRHHui: 2744 case AArch64::STURHHi: 2745 if (UserNode->getOperand(0) != Orig) 2746 return; 2747 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff); 2748 return; 2749 } 2750 } 2751 2752 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) { 2753 if (Depth >= SelectionDAG::MaxRecursionDepth) 2754 return; 2755 // Initialize UsefulBits 2756 if (!Depth) { 2757 unsigned Bitwidth = Op.getScalarValueSizeInBits(); 2758 // At the beginning, assume every produced bits is useful 2759 UsefulBits = APInt(Bitwidth, 0); 2760 UsefulBits.flipAllBits(); 2761 } 2762 APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0); 2763 2764 for (SDNode *Node : Op.getNode()->uses()) { 2765 // A use cannot produce useful bits 2766 APInt UsefulBitsForUse = APInt(UsefulBits); 2767 getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth); 2768 UsersUsefulBits |= UsefulBitsForUse; 2769 } 2770 // UsefulBits contains the produced bits that are meaningful for the 2771 // current definition, thus a user cannot make a bit meaningful at 2772 // this point 2773 UsefulBits &= UsersUsefulBits; 2774 } 2775 2776 /// Create a machine node performing a notional SHL of Op by ShlAmount. If 2777 /// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is 2778 /// 0, return Op unchanged. 2779 static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) { 2780 if (ShlAmount == 0) 2781 return Op; 2782 2783 EVT VT = Op.getValueType(); 2784 SDLoc dl(Op); 2785 unsigned BitWidth = VT.getSizeInBits(); 2786 unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri; 2787 2788 SDNode *ShiftNode; 2789 if (ShlAmount > 0) { 2790 // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt 2791 ShiftNode = CurDAG->getMachineNode( 2792 UBFMOpc, dl, VT, Op, 2793 CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT), 2794 CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT)); 2795 } else { 2796 // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1 2797 assert(ShlAmount < 0 && "expected right shift"); 2798 int ShrAmount = -ShlAmount; 2799 ShiftNode = CurDAG->getMachineNode( 2800 UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT), 2801 CurDAG->getTargetConstant(BitWidth - 1, dl, VT)); 2802 } 2803 2804 return SDValue(ShiftNode, 0); 2805 } 2806 2807 // For bit-field-positioning pattern "(and (shl VAL, N), ShiftedMask)". 2808 static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op, 2809 bool BiggerPattern, 2810 const uint64_t NonZeroBits, 2811 SDValue &Src, int &DstLSB, 2812 int &Width); 2813 2814 // For bit-field-positioning pattern "shl VAL, N)". 2815 static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op, 2816 bool BiggerPattern, 2817 const uint64_t NonZeroBits, 2818 SDValue &Src, int &DstLSB, 2819 int &Width); 2820 2821 /// Does this tree qualify as an attempt to move a bitfield into position, 2822 /// essentially "(and (shl VAL, N), Mask)" or (shl VAL, N). 2823 static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, 2824 bool BiggerPattern, SDValue &Src, 2825 int &DstLSB, int &Width) { 2826 EVT VT = Op.getValueType(); 2827 unsigned BitWidth = VT.getSizeInBits(); 2828 (void)BitWidth; 2829 assert(BitWidth == 32 || BitWidth == 64); 2830 2831 KnownBits Known = CurDAG->computeKnownBits(Op); 2832 2833 // Non-zero in the sense that they're not provably zero, which is the key 2834 // point if we want to use this value 2835 const uint64_t NonZeroBits = (~Known.Zero).getZExtValue(); 2836 if (!isShiftedMask_64(NonZeroBits)) 2837 return false; 2838 2839 switch (Op.getOpcode()) { 2840 default: 2841 break; 2842 case ISD::AND: 2843 return isBitfieldPositioningOpFromAnd(CurDAG, Op, BiggerPattern, 2844 NonZeroBits, Src, DstLSB, Width); 2845 case ISD::SHL: 2846 return isBitfieldPositioningOpFromShl(CurDAG, Op, BiggerPattern, 2847 NonZeroBits, Src, DstLSB, Width); 2848 } 2849 2850 return false; 2851 } 2852 2853 static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op, 2854 bool BiggerPattern, 2855 const uint64_t NonZeroBits, 2856 SDValue &Src, int &DstLSB, 2857 int &Width) { 2858 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed"); 2859 2860 EVT VT = Op.getValueType(); 2861 assert((VT == MVT::i32 || VT == MVT::i64) && 2862 "Caller guarantees VT is one of i32 or i64"); 2863 (void)VT; 2864 2865 uint64_t AndImm; 2866 if (!isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm)) 2867 return false; 2868 2869 // If (~AndImm & NonZeroBits) is not zero at POS, we know that 2870 // 1) (AndImm & (1 << POS) == 0) 2871 // 2) the result of AND is not zero at POS bit (according to NonZeroBits) 2872 // 2873 // 1) and 2) don't agree so something must be wrong (e.g., in 2874 // 'SelectionDAG::computeKnownBits') 2875 assert((~AndImm & NonZeroBits) == 0 && 2876 "Something must be wrong (e.g., in SelectionDAG::computeKnownBits)"); 2877 2878 SDValue AndOp0 = Op.getOperand(0); 2879 2880 uint64_t ShlImm; 2881 SDValue ShlOp0; 2882 if (isOpcWithIntImmediate(AndOp0.getNode(), ISD::SHL, ShlImm)) { 2883 // For pattern "and(shl(val, N), shifted-mask)", 'ShlOp0' is set to 'val'. 2884 ShlOp0 = AndOp0.getOperand(0); 2885 } else if (VT == MVT::i64 && AndOp0.getOpcode() == ISD::ANY_EXTEND && 2886 isOpcWithIntImmediate(AndOp0.getOperand(0).getNode(), ISD::SHL, 2887 ShlImm)) { 2888 // For pattern "and(any_extend(shl(val, N)), shifted-mask)" 2889 2890 // ShlVal == shl(val, N), which is a left shift on a smaller type. 2891 SDValue ShlVal = AndOp0.getOperand(0); 2892 2893 // Since this is after type legalization and ShlVal is extended to MVT::i64, 2894 // expect VT to be MVT::i32. 2895 assert((ShlVal.getValueType() == MVT::i32) && "Expect VT to be MVT::i32."); 2896 2897 // Widens 'val' to MVT::i64 as the source of bit field positioning. 2898 ShlOp0 = Widen(CurDAG, ShlVal.getOperand(0)); 2899 } else 2900 return false; 2901 2902 // For !BiggerPattern, bail out if the AndOp0 has more than one use, since 2903 // then we'll end up generating AndOp0+UBFIZ instead of just keeping 2904 // AndOp0+AND. 2905 if (!BiggerPattern && !AndOp0.hasOneUse()) 2906 return false; 2907 2908 DstLSB = llvm::countr_zero(NonZeroBits); 2909 Width = llvm::countr_one(NonZeroBits >> DstLSB); 2910 2911 // Bail out on large Width. This happens when no proper combining / constant 2912 // folding was performed. 2913 if (Width >= (int)VT.getSizeInBits()) { 2914 // If VT is i64, Width > 64 is insensible since NonZeroBits is uint64_t, and 2915 // Width == 64 indicates a missed dag-combine from "(and val, AllOnes)" to 2916 // "val". 2917 // If VT is i32, what Width >= 32 means: 2918 // - For "(and (any_extend(shl val, N)), shifted-mask)", the`and` Op 2919 // demands at least 'Width' bits (after dag-combiner). This together with 2920 // `any_extend` Op (undefined higher bits) indicates missed combination 2921 // when lowering the 'and' IR instruction to an machine IR instruction. 2922 LLVM_DEBUG( 2923 dbgs() 2924 << "Found large Width in bit-field-positioning -- this indicates no " 2925 "proper combining / constant folding was performed\n"); 2926 return false; 2927 } 2928 2929 // BFI encompasses sufficiently many nodes that it's worth inserting an extra 2930 // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL 2931 // amount. BiggerPattern is true when this pattern is being matched for BFI, 2932 // BiggerPattern is false when this pattern is being matched for UBFIZ, in 2933 // which case it is not profitable to insert an extra shift. 2934 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern) 2935 return false; 2936 2937 Src = getLeftShift(CurDAG, ShlOp0, ShlImm - DstLSB); 2938 return true; 2939 } 2940 2941 // For node (shl (and val, mask), N)), returns true if the node is equivalent to 2942 // UBFIZ. 2943 static bool isSeveralBitsPositioningOpFromShl(const uint64_t ShlImm, SDValue Op, 2944 SDValue &Src, int &DstLSB, 2945 int &Width) { 2946 // Caller should have verified that N is a left shift with constant shift 2947 // amount; asserts that. 2948 assert(Op.getOpcode() == ISD::SHL && 2949 "Op.getNode() should be a SHL node to call this function"); 2950 assert(isIntImmediateEq(Op.getOperand(1), ShlImm) && 2951 "Op.getNode() should shift ShlImm to call this function"); 2952 2953 uint64_t AndImm = 0; 2954 SDValue Op0 = Op.getOperand(0); 2955 if (!isOpcWithIntImmediate(Op0.getNode(), ISD::AND, AndImm)) 2956 return false; 2957 2958 const uint64_t ShiftedAndImm = ((AndImm << ShlImm) >> ShlImm); 2959 if (isMask_64(ShiftedAndImm)) { 2960 // AndImm is a superset of (AllOnes >> ShlImm); in other words, AndImm 2961 // should end with Mask, and could be prefixed with random bits if those 2962 // bits are shifted out. 2963 // 2964 // For example, xyz11111 (with {x,y,z} being 0 or 1) is fine if ShlImm >= 3; 2965 // the AND result corresponding to those bits are shifted out, so it's fine 2966 // to not extract them. 2967 Width = llvm::countr_one(ShiftedAndImm); 2968 DstLSB = ShlImm; 2969 Src = Op0.getOperand(0); 2970 return true; 2971 } 2972 return false; 2973 } 2974 2975 static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op, 2976 bool BiggerPattern, 2977 const uint64_t NonZeroBits, 2978 SDValue &Src, int &DstLSB, 2979 int &Width) { 2980 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed"); 2981 2982 EVT VT = Op.getValueType(); 2983 assert((VT == MVT::i32 || VT == MVT::i64) && 2984 "Caller guarantees that type is i32 or i64"); 2985 (void)VT; 2986 2987 uint64_t ShlImm; 2988 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm)) 2989 return false; 2990 2991 if (!BiggerPattern && !Op.hasOneUse()) 2992 return false; 2993 2994 if (isSeveralBitsPositioningOpFromShl(ShlImm, Op, Src, DstLSB, Width)) 2995 return true; 2996 2997 DstLSB = llvm::countr_zero(NonZeroBits); 2998 Width = llvm::countr_one(NonZeroBits >> DstLSB); 2999 3000 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern) 3001 return false; 3002 3003 Src = getLeftShift(CurDAG, Op.getOperand(0), ShlImm - DstLSB); 3004 return true; 3005 } 3006 3007 static bool isShiftedMask(uint64_t Mask, EVT VT) { 3008 assert(VT == MVT::i32 || VT == MVT::i64); 3009 if (VT == MVT::i32) 3010 return isShiftedMask_32(Mask); 3011 return isShiftedMask_64(Mask); 3012 } 3013 3014 // Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being 3015 // inserted only sets known zero bits. 3016 static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG) { 3017 assert(N->getOpcode() == ISD::OR && "Expect a OR operation"); 3018 3019 EVT VT = N->getValueType(0); 3020 if (VT != MVT::i32 && VT != MVT::i64) 3021 return false; 3022 3023 unsigned BitWidth = VT.getSizeInBits(); 3024 3025 uint64_t OrImm; 3026 if (!isOpcWithIntImmediate(N, ISD::OR, OrImm)) 3027 return false; 3028 3029 // Skip this transformation if the ORR immediate can be encoded in the ORR. 3030 // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely 3031 // performance neutral. 3032 if (AArch64_AM::isLogicalImmediate(OrImm, BitWidth)) 3033 return false; 3034 3035 uint64_t MaskImm; 3036 SDValue And = N->getOperand(0); 3037 // Must be a single use AND with an immediate operand. 3038 if (!And.hasOneUse() || 3039 !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm)) 3040 return false; 3041 3042 // Compute the Known Zero for the AND as this allows us to catch more general 3043 // cases than just looking for AND with imm. 3044 KnownBits Known = CurDAG->computeKnownBits(And); 3045 3046 // Non-zero in the sense that they're not provably zero, which is the key 3047 // point if we want to use this value. 3048 uint64_t NotKnownZero = (~Known.Zero).getZExtValue(); 3049 3050 // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00). 3051 if (!isShiftedMask(Known.Zero.getZExtValue(), VT)) 3052 return false; 3053 3054 // The bits being inserted must only set those bits that are known to be zero. 3055 if ((OrImm & NotKnownZero) != 0) { 3056 // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't 3057 // currently handle this case. 3058 return false; 3059 } 3060 3061 // BFI/BFXIL dst, src, #lsb, #width. 3062 int LSB = llvm::countr_one(NotKnownZero); 3063 int Width = BitWidth - APInt(BitWidth, NotKnownZero).popcount(); 3064 3065 // BFI/BFXIL is an alias of BFM, so translate to BFM operands. 3066 unsigned ImmR = (BitWidth - LSB) % BitWidth; 3067 unsigned ImmS = Width - 1; 3068 3069 // If we're creating a BFI instruction avoid cases where we need more 3070 // instructions to materialize the BFI constant as compared to the original 3071 // ORR. A BFXIL will use the same constant as the original ORR, so the code 3072 // should be no worse in this case. 3073 bool IsBFI = LSB != 0; 3074 uint64_t BFIImm = OrImm >> LSB; 3075 if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) { 3076 // We have a BFI instruction and we know the constant can't be materialized 3077 // with a ORR-immediate with the zero register. 3078 unsigned OrChunks = 0, BFIChunks = 0; 3079 for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) { 3080 if (((OrImm >> Shift) & 0xFFFF) != 0) 3081 ++OrChunks; 3082 if (((BFIImm >> Shift) & 0xFFFF) != 0) 3083 ++BFIChunks; 3084 } 3085 if (BFIChunks > OrChunks) 3086 return false; 3087 } 3088 3089 // Materialize the constant to be inserted. 3090 SDLoc DL(N); 3091 unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm; 3092 SDNode *MOVI = CurDAG->getMachineNode( 3093 MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT)); 3094 3095 // Create the BFI/BFXIL instruction. 3096 SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0), 3097 CurDAG->getTargetConstant(ImmR, DL, VT), 3098 CurDAG->getTargetConstant(ImmS, DL, VT)}; 3099 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri; 3100 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 3101 return true; 3102 } 3103 3104 static bool isWorthFoldingIntoOrrWithShift(SDValue Dst, SelectionDAG *CurDAG, 3105 SDValue &ShiftedOperand, 3106 uint64_t &EncodedShiftImm) { 3107 // Avoid folding Dst into ORR-with-shift if Dst has other uses than ORR. 3108 if (!Dst.hasOneUse()) 3109 return false; 3110 3111 EVT VT = Dst.getValueType(); 3112 assert((VT == MVT::i32 || VT == MVT::i64) && 3113 "Caller should guarantee that VT is one of i32 or i64"); 3114 const unsigned SizeInBits = VT.getSizeInBits(); 3115 3116 SDLoc DL(Dst.getNode()); 3117 uint64_t AndImm, ShlImm; 3118 if (isOpcWithIntImmediate(Dst.getNode(), ISD::AND, AndImm) && 3119 isShiftedMask_64(AndImm)) { 3120 // Avoid transforming 'DstOp0' if it has other uses than the AND node. 3121 SDValue DstOp0 = Dst.getOperand(0); 3122 if (!DstOp0.hasOneUse()) 3123 return false; 3124 3125 // An example to illustrate the transformation 3126 // From: 3127 // lsr x8, x1, #1 3128 // and x8, x8, #0x3f80 3129 // bfxil x8, x1, #0, #7 3130 // To: 3131 // and x8, x23, #0x7f 3132 // ubfx x9, x23, #8, #7 3133 // orr x23, x8, x9, lsl #7 3134 // 3135 // The number of instructions remains the same, but ORR is faster than BFXIL 3136 // on many AArch64 processors (or as good as BFXIL if not faster). Besides, 3137 // the dependency chain is improved after the transformation. 3138 uint64_t SrlImm; 3139 if (isOpcWithIntImmediate(DstOp0.getNode(), ISD::SRL, SrlImm)) { 3140 uint64_t NumTrailingZeroInShiftedMask = llvm::countr_zero(AndImm); 3141 if ((SrlImm + NumTrailingZeroInShiftedMask) < SizeInBits) { 3142 unsigned MaskWidth = 3143 llvm::countr_one(AndImm >> NumTrailingZeroInShiftedMask); 3144 unsigned UBFMOpc = 3145 (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri; 3146 SDNode *UBFMNode = CurDAG->getMachineNode( 3147 UBFMOpc, DL, VT, DstOp0.getOperand(0), 3148 CurDAG->getTargetConstant(SrlImm + NumTrailingZeroInShiftedMask, DL, 3149 VT), 3150 CurDAG->getTargetConstant( 3151 SrlImm + NumTrailingZeroInShiftedMask + MaskWidth - 1, DL, VT)); 3152 ShiftedOperand = SDValue(UBFMNode, 0); 3153 EncodedShiftImm = AArch64_AM::getShifterImm( 3154 AArch64_AM::LSL, NumTrailingZeroInShiftedMask); 3155 return true; 3156 } 3157 } 3158 return false; 3159 } 3160 3161 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SHL, ShlImm)) { 3162 ShiftedOperand = Dst.getOperand(0); 3163 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm); 3164 return true; 3165 } 3166 3167 uint64_t SrlImm; 3168 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SRL, SrlImm)) { 3169 ShiftedOperand = Dst.getOperand(0); 3170 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSR, SrlImm); 3171 return true; 3172 } 3173 return false; 3174 } 3175 3176 // Given an 'ISD::OR' node that is going to be selected as BFM, analyze 3177 // the operands and select it to AArch64::ORR with shifted registers if 3178 // that's more efficient. Returns true iff selection to AArch64::ORR happens. 3179 static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1, 3180 SDValue Src, SDValue Dst, SelectionDAG *CurDAG, 3181 const bool BiggerPattern) { 3182 EVT VT = N->getValueType(0); 3183 assert(N->getOpcode() == ISD::OR && "Expect N to be an OR node"); 3184 assert(((N->getOperand(0) == OrOpd0 && N->getOperand(1) == OrOpd1) || 3185 (N->getOperand(1) == OrOpd0 && N->getOperand(0) == OrOpd1)) && 3186 "Expect OrOpd0 and OrOpd1 to be operands of ISD::OR"); 3187 assert((VT == MVT::i32 || VT == MVT::i64) && 3188 "Expect result type to be i32 or i64 since N is combinable to BFM"); 3189 SDLoc DL(N); 3190 3191 // Bail out if BFM simplifies away one node in BFM Dst. 3192 if (OrOpd1 != Dst) 3193 return false; 3194 3195 const unsigned OrrOpc = (VT == MVT::i32) ? AArch64::ORRWrs : AArch64::ORRXrs; 3196 // For "BFM Rd, Rn, #immr, #imms", it's known that BFM simplifies away fewer 3197 // nodes from Rn (or inserts additional shift node) if BiggerPattern is true. 3198 if (BiggerPattern) { 3199 uint64_t SrcAndImm; 3200 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::AND, SrcAndImm) && 3201 isMask_64(SrcAndImm) && OrOpd0.getOperand(0) == Src) { 3202 // OrOpd0 = AND Src, #Mask 3203 // So BFM simplifies away one AND node from Src and doesn't simplify away 3204 // nodes from Dst. If ORR with left-shifted operand also simplifies away 3205 // one node (from Rd), ORR is better since it has higher throughput and 3206 // smaller latency than BFM on many AArch64 processors (and for the rest 3207 // ORR is at least as good as BFM). 3208 SDValue ShiftedOperand; 3209 uint64_t EncodedShiftImm; 3210 if (isWorthFoldingIntoOrrWithShift(Dst, CurDAG, ShiftedOperand, 3211 EncodedShiftImm)) { 3212 SDValue Ops[] = {OrOpd0, ShiftedOperand, 3213 CurDAG->getTargetConstant(EncodedShiftImm, DL, VT)}; 3214 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops); 3215 return true; 3216 } 3217 } 3218 return false; 3219 } 3220 3221 assert((!BiggerPattern) && "BiggerPattern should be handled above"); 3222 3223 uint64_t ShlImm; 3224 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SHL, ShlImm)) { 3225 if (OrOpd0.getOperand(0) == Src && OrOpd0.hasOneUse()) { 3226 SDValue Ops[] = { 3227 Dst, Src, 3228 CurDAG->getTargetConstant( 3229 AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm), DL, VT)}; 3230 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops); 3231 return true; 3232 } 3233 3234 // Select the following pattern to left-shifted operand rather than BFI. 3235 // %val1 = op .. 3236 // %val2 = shl %val1, #imm 3237 // %res = or %val1, %val2 3238 // 3239 // If N is selected to be BFI, we know that 3240 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into 3241 // BFI) 2) OrOpd1 would be the destination operand (i.e., preserved) 3242 // 3243 // Instead of selecting N to BFI, fold OrOpd0 as a left shift directly. 3244 if (OrOpd0.getOperand(0) == OrOpd1) { 3245 SDValue Ops[] = { 3246 OrOpd1, OrOpd1, 3247 CurDAG->getTargetConstant( 3248 AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm), DL, VT)}; 3249 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops); 3250 return true; 3251 } 3252 } 3253 3254 uint64_t SrlImm; 3255 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SRL, SrlImm)) { 3256 // Select the following pattern to right-shifted operand rather than BFXIL. 3257 // %val1 = op .. 3258 // %val2 = lshr %val1, #imm 3259 // %res = or %val1, %val2 3260 // 3261 // If N is selected to be BFXIL, we know that 3262 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into 3263 // BFXIL) 2) OrOpd1 would be the destination operand (i.e., preserved) 3264 // 3265 // Instead of selecting N to BFXIL, fold OrOpd0 as a right shift directly. 3266 if (OrOpd0.getOperand(0) == OrOpd1) { 3267 SDValue Ops[] = { 3268 OrOpd1, OrOpd1, 3269 CurDAG->getTargetConstant( 3270 AArch64_AM::getShifterImm(AArch64_AM::LSR, SrlImm), DL, VT)}; 3271 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops); 3272 return true; 3273 } 3274 } 3275 3276 return false; 3277 } 3278 3279 static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits, 3280 SelectionDAG *CurDAG) { 3281 assert(N->getOpcode() == ISD::OR && "Expect a OR operation"); 3282 3283 EVT VT = N->getValueType(0); 3284 if (VT != MVT::i32 && VT != MVT::i64) 3285 return false; 3286 3287 unsigned BitWidth = VT.getSizeInBits(); 3288 3289 // Because of simplify-demanded-bits in DAGCombine, involved masks may not 3290 // have the expected shape. Try to undo that. 3291 3292 unsigned NumberOfIgnoredLowBits = UsefulBits.countr_zero(); 3293 unsigned NumberOfIgnoredHighBits = UsefulBits.countl_zero(); 3294 3295 // Given a OR operation, check if we have the following pattern 3296 // ubfm c, b, imm, imm2 (or something that does the same jobs, see 3297 // isBitfieldExtractOp) 3298 // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and 3299 // countTrailingZeros(mask2) == imm2 - imm + 1 3300 // f = d | c 3301 // if yes, replace the OR instruction with: 3302 // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2 3303 3304 // OR is commutative, check all combinations of operand order and values of 3305 // BiggerPattern, i.e. 3306 // Opd0, Opd1, BiggerPattern=false 3307 // Opd1, Opd0, BiggerPattern=false 3308 // Opd0, Opd1, BiggerPattern=true 3309 // Opd1, Opd0, BiggerPattern=true 3310 // Several of these combinations may match, so check with BiggerPattern=false 3311 // first since that will produce better results by matching more instructions 3312 // and/or inserting fewer extra instructions. 3313 for (int I = 0; I < 4; ++I) { 3314 3315 SDValue Dst, Src; 3316 unsigned ImmR, ImmS; 3317 bool BiggerPattern = I / 2; 3318 SDValue OrOpd0Val = N->getOperand(I % 2); 3319 SDNode *OrOpd0 = OrOpd0Val.getNode(); 3320 SDValue OrOpd1Val = N->getOperand((I + 1) % 2); 3321 SDNode *OrOpd1 = OrOpd1Val.getNode(); 3322 3323 unsigned BFXOpc; 3324 int DstLSB, Width; 3325 if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS, 3326 NumberOfIgnoredLowBits, BiggerPattern)) { 3327 // Check that the returned opcode is compatible with the pattern, 3328 // i.e., same type and zero extended (U and not S) 3329 if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) || 3330 (BFXOpc != AArch64::UBFMWri && VT == MVT::i32)) 3331 continue; 3332 3333 // Compute the width of the bitfield insertion 3334 DstLSB = 0; 3335 Width = ImmS - ImmR + 1; 3336 // FIXME: This constraint is to catch bitfield insertion we may 3337 // want to widen the pattern if we want to grab general bitfied 3338 // move case 3339 if (Width <= 0) 3340 continue; 3341 3342 // If the mask on the insertee is correct, we have a BFXIL operation. We 3343 // can share the ImmR and ImmS values from the already-computed UBFM. 3344 } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val, 3345 BiggerPattern, 3346 Src, DstLSB, Width)) { 3347 ImmR = (BitWidth - DstLSB) % BitWidth; 3348 ImmS = Width - 1; 3349 } else 3350 continue; 3351 3352 // Check the second part of the pattern 3353 EVT VT = OrOpd1Val.getValueType(); 3354 assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand"); 3355 3356 // Compute the Known Zero for the candidate of the first operand. 3357 // This allows to catch more general case than just looking for 3358 // AND with imm. Indeed, simplify-demanded-bits may have removed 3359 // the AND instruction because it proves it was useless. 3360 KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val); 3361 3362 // Check if there is enough room for the second operand to appear 3363 // in the first one 3364 APInt BitsToBeInserted = 3365 APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width); 3366 3367 if ((BitsToBeInserted & ~Known.Zero) != 0) 3368 continue; 3369 3370 // Set the first operand 3371 uint64_t Imm; 3372 if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) && 3373 isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT)) 3374 // In that case, we can eliminate the AND 3375 Dst = OrOpd1->getOperand(0); 3376 else 3377 // Maybe the AND has been removed by simplify-demanded-bits 3378 // or is useful because it discards more bits 3379 Dst = OrOpd1Val; 3380 3381 // Before selecting ISD::OR node to AArch64::BFM, see if an AArch64::ORR 3382 // with shifted operand is more efficient. 3383 if (tryOrrWithShift(N, OrOpd0Val, OrOpd1Val, Src, Dst, CurDAG, 3384 BiggerPattern)) 3385 return true; 3386 3387 // both parts match 3388 SDLoc DL(N); 3389 SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT), 3390 CurDAG->getTargetConstant(ImmS, DL, VT)}; 3391 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri; 3392 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 3393 return true; 3394 } 3395 3396 // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff 3397 // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted 3398 // mask (e.g., 0x000ffff0). 3399 uint64_t Mask0Imm, Mask1Imm; 3400 SDValue And0 = N->getOperand(0); 3401 SDValue And1 = N->getOperand(1); 3402 if (And0.hasOneUse() && And1.hasOneUse() && 3403 isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) && 3404 isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) && 3405 APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) && 3406 (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) { 3407 3408 // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm), 3409 // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the 3410 // bits to be inserted. 3411 if (isShiftedMask(Mask0Imm, VT)) { 3412 std::swap(And0, And1); 3413 std::swap(Mask0Imm, Mask1Imm); 3414 } 3415 3416 SDValue Src = And1->getOperand(0); 3417 SDValue Dst = And0->getOperand(0); 3418 unsigned LSB = llvm::countr_zero(Mask1Imm); 3419 int Width = BitWidth - APInt(BitWidth, Mask0Imm).popcount(); 3420 3421 // The BFXIL inserts the low-order bits from a source register, so right 3422 // shift the needed bits into place. 3423 SDLoc DL(N); 3424 unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri; 3425 uint64_t LsrImm = LSB; 3426 if (Src->hasOneUse() && 3427 isOpcWithIntImmediate(Src.getNode(), ISD::SRL, LsrImm) && 3428 (LsrImm + LSB) < BitWidth) { 3429 Src = Src->getOperand(0); 3430 LsrImm += LSB; 3431 } 3432 3433 SDNode *LSR = CurDAG->getMachineNode( 3434 ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LsrImm, DL, VT), 3435 CurDAG->getTargetConstant(BitWidth - 1, DL, VT)); 3436 3437 // BFXIL is an alias of BFM, so translate to BFM operands. 3438 unsigned ImmR = (BitWidth - LSB) % BitWidth; 3439 unsigned ImmS = Width - 1; 3440 3441 // Create the BFXIL instruction. 3442 SDValue Ops[] = {Dst, SDValue(LSR, 0), 3443 CurDAG->getTargetConstant(ImmR, DL, VT), 3444 CurDAG->getTargetConstant(ImmS, DL, VT)}; 3445 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri; 3446 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 3447 return true; 3448 } 3449 3450 return false; 3451 } 3452 3453 bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) { 3454 if (N->getOpcode() != ISD::OR) 3455 return false; 3456 3457 APInt NUsefulBits; 3458 getUsefulBits(SDValue(N, 0), NUsefulBits); 3459 3460 // If all bits are not useful, just return UNDEF. 3461 if (!NUsefulBits) { 3462 CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0)); 3463 return true; 3464 } 3465 3466 if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG)) 3467 return true; 3468 3469 return tryBitfieldInsertOpFromOrAndImm(N, CurDAG); 3470 } 3471 3472 /// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the 3473 /// equivalent of a left shift by a constant amount followed by an and masking 3474 /// out a contiguous set of bits. 3475 bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) { 3476 if (N->getOpcode() != ISD::AND) 3477 return false; 3478 3479 EVT VT = N->getValueType(0); 3480 if (VT != MVT::i32 && VT != MVT::i64) 3481 return false; 3482 3483 SDValue Op0; 3484 int DstLSB, Width; 3485 if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false, 3486 Op0, DstLSB, Width)) 3487 return false; 3488 3489 // ImmR is the rotate right amount. 3490 unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits(); 3491 // ImmS is the most significant bit of the source to be moved. 3492 unsigned ImmS = Width - 1; 3493 3494 SDLoc DL(N); 3495 SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT), 3496 CurDAG->getTargetConstant(ImmS, DL, VT)}; 3497 unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri; 3498 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 3499 return true; 3500 } 3501 3502 /// tryShiftAmountMod - Take advantage of built-in mod of shift amount in 3503 /// variable shift/rotate instructions. 3504 bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) { 3505 EVT VT = N->getValueType(0); 3506 3507 unsigned Opc; 3508 switch (N->getOpcode()) { 3509 case ISD::ROTR: 3510 Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr; 3511 break; 3512 case ISD::SHL: 3513 Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr; 3514 break; 3515 case ISD::SRL: 3516 Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr; 3517 break; 3518 case ISD::SRA: 3519 Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr; 3520 break; 3521 default: 3522 return false; 3523 } 3524 3525 uint64_t Size; 3526 uint64_t Bits; 3527 if (VT == MVT::i32) { 3528 Bits = 5; 3529 Size = 32; 3530 } else if (VT == MVT::i64) { 3531 Bits = 6; 3532 Size = 64; 3533 } else 3534 return false; 3535 3536 SDValue ShiftAmt = N->getOperand(1); 3537 SDLoc DL(N); 3538 SDValue NewShiftAmt; 3539 3540 // Skip over an extend of the shift amount. 3541 if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND || 3542 ShiftAmt->getOpcode() == ISD::ANY_EXTEND) 3543 ShiftAmt = ShiftAmt->getOperand(0); 3544 3545 if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) { 3546 SDValue Add0 = ShiftAmt->getOperand(0); 3547 SDValue Add1 = ShiftAmt->getOperand(1); 3548 uint64_t Add0Imm; 3549 uint64_t Add1Imm; 3550 if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0)) { 3551 // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X 3552 // to avoid the ADD/SUB. 3553 NewShiftAmt = Add0; 3554 } else if (ShiftAmt->getOpcode() == ISD::SUB && 3555 isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 && 3556 (Add0Imm % Size == 0)) { 3557 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X 3558 // to generate a NEG instead of a SUB from a constant. 3559 unsigned NegOpc; 3560 unsigned ZeroReg; 3561 EVT SubVT = ShiftAmt->getValueType(0); 3562 if (SubVT == MVT::i32) { 3563 NegOpc = AArch64::SUBWrr; 3564 ZeroReg = AArch64::WZR; 3565 } else { 3566 assert(SubVT == MVT::i64); 3567 NegOpc = AArch64::SUBXrr; 3568 ZeroReg = AArch64::XZR; 3569 } 3570 SDValue Zero = 3571 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT); 3572 MachineSDNode *Neg = 3573 CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1); 3574 NewShiftAmt = SDValue(Neg, 0); 3575 } else if (ShiftAmt->getOpcode() == ISD::SUB && 3576 isIntImmediate(Add0, Add0Imm) && (Add0Imm % Size == Size - 1)) { 3577 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X 3578 // to generate a NOT instead of a SUB from a constant. 3579 unsigned NotOpc; 3580 unsigned ZeroReg; 3581 EVT SubVT = ShiftAmt->getValueType(0); 3582 if (SubVT == MVT::i32) { 3583 NotOpc = AArch64::ORNWrr; 3584 ZeroReg = AArch64::WZR; 3585 } else { 3586 assert(SubVT == MVT::i64); 3587 NotOpc = AArch64::ORNXrr; 3588 ZeroReg = AArch64::XZR; 3589 } 3590 SDValue Zero = 3591 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT); 3592 MachineSDNode *Not = 3593 CurDAG->getMachineNode(NotOpc, DL, SubVT, Zero, Add1); 3594 NewShiftAmt = SDValue(Not, 0); 3595 } else 3596 return false; 3597 } else { 3598 // If the shift amount is masked with an AND, check that the mask covers the 3599 // bits that are implicitly ANDed off by the above opcodes and if so, skip 3600 // the AND. 3601 uint64_t MaskImm; 3602 if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm) && 3603 !isOpcWithIntImmediate(ShiftAmt.getNode(), AArch64ISD::ANDS, MaskImm)) 3604 return false; 3605 3606 if ((unsigned)llvm::countr_one(MaskImm) < Bits) 3607 return false; 3608 3609 NewShiftAmt = ShiftAmt->getOperand(0); 3610 } 3611 3612 // Narrow/widen the shift amount to match the size of the shift operation. 3613 if (VT == MVT::i32) 3614 NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt); 3615 else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) { 3616 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32); 3617 MachineSDNode *Ext = CurDAG->getMachineNode( 3618 AArch64::SUBREG_TO_REG, DL, VT, 3619 CurDAG->getTargetConstant(0, DL, MVT::i64), NewShiftAmt, SubReg); 3620 NewShiftAmt = SDValue(Ext, 0); 3621 } 3622 3623 SDValue Ops[] = {N->getOperand(0), NewShiftAmt}; 3624 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 3625 return true; 3626 } 3627 3628 bool 3629 AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, 3630 unsigned RegWidth) { 3631 APFloat FVal(0.0); 3632 if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N)) 3633 FVal = CN->getValueAPF(); 3634 else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) { 3635 // Some otherwise illegal constants are allowed in this case. 3636 if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow || 3637 !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1))) 3638 return false; 3639 3640 ConstantPoolSDNode *CN = 3641 dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)); 3642 FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF(); 3643 } else 3644 return false; 3645 3646 // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits 3647 // is between 1 and 32 for a destination w-register, or 1 and 64 for an 3648 // x-register. 3649 // 3650 // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we 3651 // want THIS_NODE to be 2^fbits. This is much easier to deal with using 3652 // integers. 3653 bool IsExact; 3654 3655 // fbits is between 1 and 64 in the worst-case, which means the fmul 3656 // could have 2^64 as an actual operand. Need 65 bits of precision. 3657 APSInt IntVal(65, true); 3658 FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact); 3659 3660 // N.b. isPowerOf2 also checks for > 0. 3661 if (!IsExact || !IntVal.isPowerOf2()) return false; 3662 unsigned FBits = IntVal.logBase2(); 3663 3664 // Checks above should have guaranteed that we haven't lost information in 3665 // finding FBits, but it must still be in range. 3666 if (FBits == 0 || FBits > RegWidth) return false; 3667 3668 FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32); 3669 return true; 3670 } 3671 3672 // Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields 3673 // of the string and obtains the integer values from them and combines these 3674 // into a single value to be used in the MRS/MSR instruction. 3675 static int getIntOperandFromRegisterString(StringRef RegString) { 3676 SmallVector<StringRef, 5> Fields; 3677 RegString.split(Fields, ':'); 3678 3679 if (Fields.size() == 1) 3680 return -1; 3681 3682 assert(Fields.size() == 5 3683 && "Invalid number of fields in read register string"); 3684 3685 SmallVector<int, 5> Ops; 3686 bool AllIntFields = true; 3687 3688 for (StringRef Field : Fields) { 3689 unsigned IntField; 3690 AllIntFields &= !Field.getAsInteger(10, IntField); 3691 Ops.push_back(IntField); 3692 } 3693 3694 assert(AllIntFields && 3695 "Unexpected non-integer value in special register string."); 3696 (void)AllIntFields; 3697 3698 // Need to combine the integer fields of the string into a single value 3699 // based on the bit encoding of MRS/MSR instruction. 3700 return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) | 3701 (Ops[3] << 3) | (Ops[4]); 3702 } 3703 3704 // Lower the read_register intrinsic to an MRS instruction node if the special 3705 // register string argument is either of the form detailed in the ALCE (the 3706 // form described in getIntOperandsFromRegsterString) or is a named register 3707 // known by the MRS SysReg mapper. 3708 bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) { 3709 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1)); 3710 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0)); 3711 SDLoc DL(N); 3712 3713 bool ReadIs128Bit = N->getOpcode() == AArch64ISD::MRRS; 3714 3715 unsigned Opcode64Bit = AArch64::MRS; 3716 int Imm = getIntOperandFromRegisterString(RegString->getString()); 3717 if (Imm == -1) { 3718 // No match, Use the sysreg mapper to map the remaining possible strings to 3719 // the value for the register to be used for the instruction operand. 3720 const auto *TheReg = 3721 AArch64SysReg::lookupSysRegByName(RegString->getString()); 3722 if (TheReg && TheReg->Readable && 3723 TheReg->haveFeatures(Subtarget->getFeatureBits())) 3724 Imm = TheReg->Encoding; 3725 else 3726 Imm = AArch64SysReg::parseGenericRegister(RegString->getString()); 3727 3728 if (Imm == -1) { 3729 // Still no match, see if this is "pc" or give up. 3730 if (!ReadIs128Bit && RegString->getString() == "pc") { 3731 Opcode64Bit = AArch64::ADR; 3732 Imm = 0; 3733 } else { 3734 return false; 3735 } 3736 } 3737 } 3738 3739 SDValue InChain = N->getOperand(0); 3740 SDValue SysRegImm = CurDAG->getTargetConstant(Imm, DL, MVT::i32); 3741 if (!ReadIs128Bit) { 3742 CurDAG->SelectNodeTo(N, Opcode64Bit, MVT::i64, MVT::Other /* Chain */, 3743 {SysRegImm, InChain}); 3744 } else { 3745 SDNode *MRRS = CurDAG->getMachineNode( 3746 AArch64::MRRS, DL, 3747 {MVT::Untyped /* XSeqPair */, MVT::Other /* Chain */}, 3748 {SysRegImm, InChain}); 3749 3750 // Sysregs are not endian. The even register always contains the low half 3751 // of the register. 3752 SDValue Lo = CurDAG->getTargetExtractSubreg(AArch64::sube64, DL, MVT::i64, 3753 SDValue(MRRS, 0)); 3754 SDValue Hi = CurDAG->getTargetExtractSubreg(AArch64::subo64, DL, MVT::i64, 3755 SDValue(MRRS, 0)); 3756 SDValue OutChain = SDValue(MRRS, 1); 3757 3758 ReplaceUses(SDValue(N, 0), Lo); 3759 ReplaceUses(SDValue(N, 1), Hi); 3760 ReplaceUses(SDValue(N, 2), OutChain); 3761 }; 3762 return true; 3763 } 3764 3765 // Lower the write_register intrinsic to an MSR instruction node if the special 3766 // register string argument is either of the form detailed in the ALCE (the 3767 // form described in getIntOperandsFromRegsterString) or is a named register 3768 // known by the MSR SysReg mapper. 3769 bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) { 3770 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1)); 3771 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0)); 3772 SDLoc DL(N); 3773 3774 bool WriteIs128Bit = N->getOpcode() == AArch64ISD::MSRR; 3775 3776 if (!WriteIs128Bit) { 3777 // Check if the register was one of those allowed as the pstatefield value 3778 // in the MSR (immediate) instruction. To accept the values allowed in the 3779 // pstatefield for the MSR (immediate) instruction, we also require that an 3780 // immediate value has been provided as an argument, we know that this is 3781 // the case as it has been ensured by semantic checking. 3782 auto trySelectPState = [&](auto PMapper, unsigned State) { 3783 if (PMapper) { 3784 assert(isa<ConstantSDNode>(N->getOperand(2)) && 3785 "Expected a constant integer expression."); 3786 unsigned Reg = PMapper->Encoding; 3787 uint64_t Immed = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(); 3788 CurDAG->SelectNodeTo( 3789 N, State, MVT::Other, CurDAG->getTargetConstant(Reg, DL, MVT::i32), 3790 CurDAG->getTargetConstant(Immed, DL, MVT::i16), N->getOperand(0)); 3791 return true; 3792 } 3793 return false; 3794 }; 3795 3796 if (trySelectPState( 3797 AArch64PState::lookupPStateImm0_15ByName(RegString->getString()), 3798 AArch64::MSRpstateImm4)) 3799 return true; 3800 if (trySelectPState( 3801 AArch64PState::lookupPStateImm0_1ByName(RegString->getString()), 3802 AArch64::MSRpstateImm1)) 3803 return true; 3804 } 3805 3806 int Imm = getIntOperandFromRegisterString(RegString->getString()); 3807 if (Imm == -1) { 3808 // Use the sysreg mapper to attempt to map the remaining possible strings 3809 // to the value for the register to be used for the MSR (register) 3810 // instruction operand. 3811 auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString()); 3812 if (TheReg && TheReg->Writeable && 3813 TheReg->haveFeatures(Subtarget->getFeatureBits())) 3814 Imm = TheReg->Encoding; 3815 else 3816 Imm = AArch64SysReg::parseGenericRegister(RegString->getString()); 3817 3818 if (Imm == -1) 3819 return false; 3820 } 3821 3822 SDValue InChain = N->getOperand(0); 3823 if (!WriteIs128Bit) { 3824 CurDAG->SelectNodeTo(N, AArch64::MSR, MVT::Other, 3825 CurDAG->getTargetConstant(Imm, DL, MVT::i32), 3826 N->getOperand(2), InChain); 3827 } else { 3828 // No endian swap. The lower half always goes into the even subreg, and the 3829 // higher half always into the odd supreg. 3830 SDNode *Pair = CurDAG->getMachineNode( 3831 TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped /* XSeqPair */, 3832 {CurDAG->getTargetConstant(AArch64::XSeqPairsClassRegClass.getID(), DL, 3833 MVT::i32), 3834 N->getOperand(2), 3835 CurDAG->getTargetConstant(AArch64::sube64, DL, MVT::i32), 3836 N->getOperand(3), 3837 CurDAG->getTargetConstant(AArch64::subo64, DL, MVT::i32)}); 3838 3839 CurDAG->SelectNodeTo(N, AArch64::MSRR, MVT::Other, 3840 CurDAG->getTargetConstant(Imm, DL, MVT::i32), 3841 SDValue(Pair, 0), InChain); 3842 } 3843 3844 return true; 3845 } 3846 3847 /// We've got special pseudo-instructions for these 3848 bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) { 3849 unsigned Opcode; 3850 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT(); 3851 3852 // Leave IR for LSE if subtarget supports it. 3853 if (Subtarget->hasLSE()) return false; 3854 3855 if (MemTy == MVT::i8) 3856 Opcode = AArch64::CMP_SWAP_8; 3857 else if (MemTy == MVT::i16) 3858 Opcode = AArch64::CMP_SWAP_16; 3859 else if (MemTy == MVT::i32) 3860 Opcode = AArch64::CMP_SWAP_32; 3861 else if (MemTy == MVT::i64) 3862 Opcode = AArch64::CMP_SWAP_64; 3863 else 3864 llvm_unreachable("Unknown AtomicCmpSwap type"); 3865 3866 MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32; 3867 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3), 3868 N->getOperand(0)}; 3869 SDNode *CmpSwap = CurDAG->getMachineNode( 3870 Opcode, SDLoc(N), 3871 CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops); 3872 3873 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); 3874 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp}); 3875 3876 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0)); 3877 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2)); 3878 CurDAG->RemoveDeadNode(N); 3879 3880 return true; 3881 } 3882 3883 bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, 3884 SDValue &Shift) { 3885 if (!isa<ConstantSDNode>(N)) 3886 return false; 3887 3888 SDLoc DL(N); 3889 uint64_t Val = cast<ConstantSDNode>(N) 3890 ->getAPIntValue() 3891 .trunc(VT.getFixedSizeInBits()) 3892 .getZExtValue(); 3893 3894 switch (VT.SimpleTy) { 3895 case MVT::i8: 3896 // All immediates are supported. 3897 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32); 3898 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32); 3899 return true; 3900 case MVT::i16: 3901 case MVT::i32: 3902 case MVT::i64: 3903 // Support 8bit unsigned immediates. 3904 if (Val <= 255) { 3905 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32); 3906 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32); 3907 return true; 3908 } 3909 // Support 16bit unsigned immediates that are a multiple of 256. 3910 if (Val <= 65280 && Val % 256 == 0) { 3911 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32); 3912 Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32); 3913 return true; 3914 } 3915 break; 3916 default: 3917 break; 3918 } 3919 3920 return false; 3921 } 3922 3923 bool AArch64DAGToDAGISel::SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, 3924 SDValue &Shift) { 3925 if (!isa<ConstantSDNode>(N)) 3926 return false; 3927 3928 SDLoc DL(N); 3929 int64_t Val = cast<ConstantSDNode>(N) 3930 ->getAPIntValue() 3931 .trunc(VT.getFixedSizeInBits()) 3932 .getSExtValue(); 3933 3934 switch (VT.SimpleTy) { 3935 case MVT::i8: 3936 // All immediates are supported. 3937 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32); 3938 Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32); 3939 return true; 3940 case MVT::i16: 3941 case MVT::i32: 3942 case MVT::i64: 3943 // Support 8bit signed immediates. 3944 if (Val >= -128 && Val <= 127) { 3945 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32); 3946 Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32); 3947 return true; 3948 } 3949 // Support 16bit signed immediates that are a multiple of 256. 3950 if (Val >= -32768 && Val <= 32512 && Val % 256 == 0) { 3951 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32); 3952 Imm = CurDAG->getTargetConstant((Val >> 8) & 0xFF, DL, MVT::i32); 3953 return true; 3954 } 3955 break; 3956 default: 3957 break; 3958 } 3959 3960 return false; 3961 } 3962 3963 bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) { 3964 if (auto CNode = dyn_cast<ConstantSDNode>(N)) { 3965 int64_t ImmVal = CNode->getSExtValue(); 3966 SDLoc DL(N); 3967 if (ImmVal >= -128 && ImmVal < 128) { 3968 Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32); 3969 return true; 3970 } 3971 } 3972 return false; 3973 } 3974 3975 bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) { 3976 if (auto CNode = dyn_cast<ConstantSDNode>(N)) { 3977 uint64_t ImmVal = CNode->getZExtValue(); 3978 3979 switch (VT.SimpleTy) { 3980 case MVT::i8: 3981 ImmVal &= 0xFF; 3982 break; 3983 case MVT::i16: 3984 ImmVal &= 0xFFFF; 3985 break; 3986 case MVT::i32: 3987 ImmVal &= 0xFFFFFFFF; 3988 break; 3989 case MVT::i64: 3990 break; 3991 default: 3992 llvm_unreachable("Unexpected type"); 3993 } 3994 3995 if (ImmVal < 256) { 3996 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32); 3997 return true; 3998 } 3999 } 4000 return false; 4001 } 4002 4003 bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, 4004 bool Invert) { 4005 if (auto CNode = dyn_cast<ConstantSDNode>(N)) { 4006 uint64_t ImmVal = CNode->getZExtValue(); 4007 SDLoc DL(N); 4008 4009 if (Invert) 4010 ImmVal = ~ImmVal; 4011 4012 // Shift mask depending on type size. 4013 switch (VT.SimpleTy) { 4014 case MVT::i8: 4015 ImmVal &= 0xFF; 4016 ImmVal |= ImmVal << 8; 4017 ImmVal |= ImmVal << 16; 4018 ImmVal |= ImmVal << 32; 4019 break; 4020 case MVT::i16: 4021 ImmVal &= 0xFFFF; 4022 ImmVal |= ImmVal << 16; 4023 ImmVal |= ImmVal << 32; 4024 break; 4025 case MVT::i32: 4026 ImmVal &= 0xFFFFFFFF; 4027 ImmVal |= ImmVal << 32; 4028 break; 4029 case MVT::i64: 4030 break; 4031 default: 4032 llvm_unreachable("Unexpected type"); 4033 } 4034 4035 uint64_t encoding; 4036 if (AArch64_AM::processLogicalImmediate(ImmVal, 64, encoding)) { 4037 Imm = CurDAG->getTargetConstant(encoding, DL, MVT::i64); 4038 return true; 4039 } 4040 } 4041 return false; 4042 } 4043 4044 // SVE shift intrinsics allow shift amounts larger than the element's bitwidth. 4045 // Rather than attempt to normalise everything we can sometimes saturate the 4046 // shift amount during selection. This function also allows for consistent 4047 // isel patterns by ensuring the resulting "Imm" node is of the i32 type 4048 // required by the instructions. 4049 bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low, 4050 uint64_t High, bool AllowSaturation, 4051 SDValue &Imm) { 4052 if (auto *CN = dyn_cast<ConstantSDNode>(N)) { 4053 uint64_t ImmVal = CN->getZExtValue(); 4054 4055 // Reject shift amounts that are too small. 4056 if (ImmVal < Low) 4057 return false; 4058 4059 // Reject or saturate shift amounts that are too big. 4060 if (ImmVal > High) { 4061 if (!AllowSaturation) 4062 return false; 4063 ImmVal = High; 4064 } 4065 4066 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32); 4067 return true; 4068 } 4069 4070 return false; 4071 } 4072 4073 bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) { 4074 // tagp(FrameIndex, IRGstack, tag_offset): 4075 // since the offset between FrameIndex and IRGstack is a compile-time 4076 // constant, this can be lowered to a single ADDG instruction. 4077 if (!(isa<FrameIndexSDNode>(N->getOperand(1)))) { 4078 return false; 4079 } 4080 4081 SDValue IRG_SP = N->getOperand(2); 4082 if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN || 4083 cast<ConstantSDNode>(IRG_SP->getOperand(1))->getZExtValue() != 4084 Intrinsic::aarch64_irg_sp) { 4085 return false; 4086 } 4087 4088 const TargetLowering *TLI = getTargetLowering(); 4089 SDLoc DL(N); 4090 int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex(); 4091 SDValue FiOp = CurDAG->getTargetFrameIndex( 4092 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 4093 int TagOffset = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(); 4094 4095 SDNode *Out = CurDAG->getMachineNode( 4096 AArch64::TAGPstack, DL, MVT::i64, 4097 {FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2), 4098 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)}); 4099 ReplaceNode(N, Out); 4100 return true; 4101 } 4102 4103 void AArch64DAGToDAGISel::SelectTagP(SDNode *N) { 4104 assert(isa<ConstantSDNode>(N->getOperand(3)) && 4105 "llvm.aarch64.tagp third argument must be an immediate"); 4106 if (trySelectStackSlotTagP(N)) 4107 return; 4108 // FIXME: above applies in any case when offset between Op1 and Op2 is a 4109 // compile-time constant, not just for stack allocations. 4110 4111 // General case for unrelated pointers in Op1 and Op2. 4112 SDLoc DL(N); 4113 int TagOffset = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(); 4114 SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64, 4115 {N->getOperand(1), N->getOperand(2)}); 4116 SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64, 4117 {SDValue(N1, 0), N->getOperand(2)}); 4118 SDNode *N3 = CurDAG->getMachineNode( 4119 AArch64::ADDG, DL, MVT::i64, 4120 {SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64), 4121 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)}); 4122 ReplaceNode(N, N3); 4123 } 4124 4125 bool AArch64DAGToDAGISel::trySelectCastFixedLengthToScalableVector(SDNode *N) { 4126 assert(N->getOpcode() == ISD::INSERT_SUBVECTOR && "Invalid Node!"); 4127 4128 // Bail when not a "cast" like insert_subvector. 4129 if (cast<ConstantSDNode>(N->getOperand(2))->getZExtValue() != 0) 4130 return false; 4131 if (!N->getOperand(0).isUndef()) 4132 return false; 4133 4134 // Bail when normal isel should do the job. 4135 EVT VT = N->getValueType(0); 4136 EVT InVT = N->getOperand(1).getValueType(); 4137 if (VT.isFixedLengthVector() || InVT.isScalableVector()) 4138 return false; 4139 if (InVT.getSizeInBits() <= 128) 4140 return false; 4141 4142 // NOTE: We can only get here when doing fixed length SVE code generation. 4143 // We do manual selection because the types involved are not linked to real 4144 // registers (despite being legal) and must be coerced into SVE registers. 4145 4146 assert(VT.getSizeInBits().getKnownMinValue() == AArch64::SVEBitsPerBlock && 4147 "Expected to insert into a packed scalable vector!"); 4148 4149 SDLoc DL(N); 4150 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64); 4151 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, 4152 N->getOperand(1), RC)); 4153 return true; 4154 } 4155 4156 bool AArch64DAGToDAGISel::trySelectCastScalableToFixedLengthVector(SDNode *N) { 4157 assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR && "Invalid Node!"); 4158 4159 // Bail when not a "cast" like extract_subvector. 4160 if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 0) 4161 return false; 4162 4163 // Bail when normal isel can do the job. 4164 EVT VT = N->getValueType(0); 4165 EVT InVT = N->getOperand(0).getValueType(); 4166 if (VT.isScalableVector() || InVT.isFixedLengthVector()) 4167 return false; 4168 if (VT.getSizeInBits() <= 128) 4169 return false; 4170 4171 // NOTE: We can only get here when doing fixed length SVE code generation. 4172 // We do manual selection because the types involved are not linked to real 4173 // registers (despite being legal) and must be coerced into SVE registers. 4174 4175 assert(InVT.getSizeInBits().getKnownMinValue() == AArch64::SVEBitsPerBlock && 4176 "Expected to extract from a packed scalable vector!"); 4177 4178 SDLoc DL(N); 4179 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64); 4180 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, 4181 N->getOperand(0), RC)); 4182 return true; 4183 } 4184 4185 void AArch64DAGToDAGISel::Select(SDNode *Node) { 4186 // If we have a custom node, we already have selected! 4187 if (Node->isMachineOpcode()) { 4188 LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n"); 4189 Node->setNodeId(-1); 4190 return; 4191 } 4192 4193 // Few custom selection stuff. 4194 EVT VT = Node->getValueType(0); 4195 4196 switch (Node->getOpcode()) { 4197 default: 4198 break; 4199 4200 case ISD::ATOMIC_CMP_SWAP: 4201 if (SelectCMP_SWAP(Node)) 4202 return; 4203 break; 4204 4205 case ISD::READ_REGISTER: 4206 case AArch64ISD::MRRS: 4207 if (tryReadRegister(Node)) 4208 return; 4209 break; 4210 4211 case ISD::WRITE_REGISTER: 4212 case AArch64ISD::MSRR: 4213 if (tryWriteRegister(Node)) 4214 return; 4215 break; 4216 4217 case ISD::LOAD: { 4218 // Try to select as an indexed load. Fall through to normal processing 4219 // if we can't. 4220 if (tryIndexedLoad(Node)) 4221 return; 4222 break; 4223 } 4224 4225 case ISD::SRL: 4226 case ISD::AND: 4227 case ISD::SRA: 4228 case ISD::SIGN_EXTEND_INREG: 4229 if (tryBitfieldExtractOp(Node)) 4230 return; 4231 if (tryBitfieldInsertInZeroOp(Node)) 4232 return; 4233 [[fallthrough]]; 4234 case ISD::ROTR: 4235 case ISD::SHL: 4236 if (tryShiftAmountMod(Node)) 4237 return; 4238 break; 4239 4240 case ISD::SIGN_EXTEND: 4241 if (tryBitfieldExtractOpFromSExt(Node)) 4242 return; 4243 break; 4244 4245 case ISD::OR: 4246 if (tryBitfieldInsertOp(Node)) 4247 return; 4248 break; 4249 4250 case ISD::EXTRACT_SUBVECTOR: { 4251 if (trySelectCastScalableToFixedLengthVector(Node)) 4252 return; 4253 break; 4254 } 4255 4256 case ISD::INSERT_SUBVECTOR: { 4257 if (trySelectCastFixedLengthToScalableVector(Node)) 4258 return; 4259 break; 4260 } 4261 4262 case ISD::Constant: { 4263 // Materialize zero constants as copies from WZR/XZR. This allows 4264 // the coalescer to propagate these into other instructions. 4265 ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node); 4266 if (ConstNode->isZero()) { 4267 if (VT == MVT::i32) { 4268 SDValue New = CurDAG->getCopyFromReg( 4269 CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32); 4270 ReplaceNode(Node, New.getNode()); 4271 return; 4272 } else if (VT == MVT::i64) { 4273 SDValue New = CurDAG->getCopyFromReg( 4274 CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64); 4275 ReplaceNode(Node, New.getNode()); 4276 return; 4277 } 4278 } 4279 break; 4280 } 4281 4282 case ISD::FrameIndex: { 4283 // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm. 4284 int FI = cast<FrameIndexSDNode>(Node)->getIndex(); 4285 unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0); 4286 const TargetLowering *TLI = getTargetLowering(); 4287 SDValue TFI = CurDAG->getTargetFrameIndex( 4288 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 4289 SDLoc DL(Node); 4290 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32), 4291 CurDAG->getTargetConstant(Shifter, DL, MVT::i32) }; 4292 CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops); 4293 return; 4294 } 4295 case ISD::INTRINSIC_W_CHAIN: { 4296 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); 4297 switch (IntNo) { 4298 default: 4299 break; 4300 case Intrinsic::aarch64_ldaxp: 4301 case Intrinsic::aarch64_ldxp: { 4302 unsigned Op = 4303 IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX; 4304 SDValue MemAddr = Node->getOperand(2); 4305 SDLoc DL(Node); 4306 SDValue Chain = Node->getOperand(0); 4307 4308 SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64, 4309 MVT::Other, MemAddr, Chain); 4310 4311 // Transfer memoperands. 4312 MachineMemOperand *MemOp = 4313 cast<MemIntrinsicSDNode>(Node)->getMemOperand(); 4314 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp}); 4315 ReplaceNode(Node, Ld); 4316 return; 4317 } 4318 case Intrinsic::aarch64_stlxp: 4319 case Intrinsic::aarch64_stxp: { 4320 unsigned Op = 4321 IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX; 4322 SDLoc DL(Node); 4323 SDValue Chain = Node->getOperand(0); 4324 SDValue ValLo = Node->getOperand(2); 4325 SDValue ValHi = Node->getOperand(3); 4326 SDValue MemAddr = Node->getOperand(4); 4327 4328 // Place arguments in the right order. 4329 SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain}; 4330 4331 SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops); 4332 // Transfer memoperands. 4333 MachineMemOperand *MemOp = 4334 cast<MemIntrinsicSDNode>(Node)->getMemOperand(); 4335 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 4336 4337 ReplaceNode(Node, St); 4338 return; 4339 } 4340 case Intrinsic::aarch64_neon_ld1x2: 4341 if (VT == MVT::v8i8) { 4342 SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0); 4343 return; 4344 } else if (VT == MVT::v16i8) { 4345 SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0); 4346 return; 4347 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4348 SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0); 4349 return; 4350 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4351 SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0); 4352 return; 4353 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4354 SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0); 4355 return; 4356 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4357 SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0); 4358 return; 4359 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4360 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0); 4361 return; 4362 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4363 SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0); 4364 return; 4365 } 4366 break; 4367 case Intrinsic::aarch64_neon_ld1x3: 4368 if (VT == MVT::v8i8) { 4369 SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0); 4370 return; 4371 } else if (VT == MVT::v16i8) { 4372 SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0); 4373 return; 4374 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4375 SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0); 4376 return; 4377 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4378 SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0); 4379 return; 4380 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4381 SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0); 4382 return; 4383 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4384 SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0); 4385 return; 4386 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4387 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0); 4388 return; 4389 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4390 SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0); 4391 return; 4392 } 4393 break; 4394 case Intrinsic::aarch64_neon_ld1x4: 4395 if (VT == MVT::v8i8) { 4396 SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0); 4397 return; 4398 } else if (VT == MVT::v16i8) { 4399 SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0); 4400 return; 4401 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4402 SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0); 4403 return; 4404 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4405 SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0); 4406 return; 4407 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4408 SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0); 4409 return; 4410 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4411 SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0); 4412 return; 4413 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4414 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0); 4415 return; 4416 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4417 SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0); 4418 return; 4419 } 4420 break; 4421 case Intrinsic::aarch64_neon_ld2: 4422 if (VT == MVT::v8i8) { 4423 SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0); 4424 return; 4425 } else if (VT == MVT::v16i8) { 4426 SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0); 4427 return; 4428 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4429 SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0); 4430 return; 4431 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4432 SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0); 4433 return; 4434 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4435 SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0); 4436 return; 4437 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4438 SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0); 4439 return; 4440 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4441 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0); 4442 return; 4443 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4444 SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0); 4445 return; 4446 } 4447 break; 4448 case Intrinsic::aarch64_neon_ld3: 4449 if (VT == MVT::v8i8) { 4450 SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0); 4451 return; 4452 } else if (VT == MVT::v16i8) { 4453 SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0); 4454 return; 4455 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4456 SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0); 4457 return; 4458 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4459 SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0); 4460 return; 4461 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4462 SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0); 4463 return; 4464 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4465 SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0); 4466 return; 4467 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4468 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0); 4469 return; 4470 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4471 SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0); 4472 return; 4473 } 4474 break; 4475 case Intrinsic::aarch64_neon_ld4: 4476 if (VT == MVT::v8i8) { 4477 SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0); 4478 return; 4479 } else if (VT == MVT::v16i8) { 4480 SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0); 4481 return; 4482 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4483 SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0); 4484 return; 4485 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4486 SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0); 4487 return; 4488 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4489 SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0); 4490 return; 4491 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4492 SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0); 4493 return; 4494 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4495 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0); 4496 return; 4497 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4498 SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0); 4499 return; 4500 } 4501 break; 4502 case Intrinsic::aarch64_neon_ld2r: 4503 if (VT == MVT::v8i8) { 4504 SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0); 4505 return; 4506 } else if (VT == MVT::v16i8) { 4507 SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0); 4508 return; 4509 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4510 SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0); 4511 return; 4512 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4513 SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0); 4514 return; 4515 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4516 SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0); 4517 return; 4518 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4519 SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0); 4520 return; 4521 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4522 SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0); 4523 return; 4524 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4525 SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0); 4526 return; 4527 } 4528 break; 4529 case Intrinsic::aarch64_neon_ld3r: 4530 if (VT == MVT::v8i8) { 4531 SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0); 4532 return; 4533 } else if (VT == MVT::v16i8) { 4534 SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0); 4535 return; 4536 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4537 SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0); 4538 return; 4539 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4540 SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0); 4541 return; 4542 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4543 SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0); 4544 return; 4545 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4546 SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0); 4547 return; 4548 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4549 SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0); 4550 return; 4551 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4552 SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0); 4553 return; 4554 } 4555 break; 4556 case Intrinsic::aarch64_neon_ld4r: 4557 if (VT == MVT::v8i8) { 4558 SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0); 4559 return; 4560 } else if (VT == MVT::v16i8) { 4561 SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0); 4562 return; 4563 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4564 SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0); 4565 return; 4566 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4567 SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0); 4568 return; 4569 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4570 SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0); 4571 return; 4572 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4573 SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0); 4574 return; 4575 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4576 SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0); 4577 return; 4578 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4579 SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0); 4580 return; 4581 } 4582 break; 4583 case Intrinsic::aarch64_neon_ld2lane: 4584 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4585 SelectLoadLane(Node, 2, AArch64::LD2i8); 4586 return; 4587 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4588 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4589 SelectLoadLane(Node, 2, AArch64::LD2i16); 4590 return; 4591 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4592 VT == MVT::v2f32) { 4593 SelectLoadLane(Node, 2, AArch64::LD2i32); 4594 return; 4595 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4596 VT == MVT::v1f64) { 4597 SelectLoadLane(Node, 2, AArch64::LD2i64); 4598 return; 4599 } 4600 break; 4601 case Intrinsic::aarch64_neon_ld3lane: 4602 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4603 SelectLoadLane(Node, 3, AArch64::LD3i8); 4604 return; 4605 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4606 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4607 SelectLoadLane(Node, 3, AArch64::LD3i16); 4608 return; 4609 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4610 VT == MVT::v2f32) { 4611 SelectLoadLane(Node, 3, AArch64::LD3i32); 4612 return; 4613 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4614 VT == MVT::v1f64) { 4615 SelectLoadLane(Node, 3, AArch64::LD3i64); 4616 return; 4617 } 4618 break; 4619 case Intrinsic::aarch64_neon_ld4lane: 4620 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4621 SelectLoadLane(Node, 4, AArch64::LD4i8); 4622 return; 4623 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4624 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4625 SelectLoadLane(Node, 4, AArch64::LD4i16); 4626 return; 4627 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4628 VT == MVT::v2f32) { 4629 SelectLoadLane(Node, 4, AArch64::LD4i32); 4630 return; 4631 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4632 VT == MVT::v1f64) { 4633 SelectLoadLane(Node, 4, AArch64::LD4i64); 4634 return; 4635 } 4636 break; 4637 case Intrinsic::aarch64_ld64b: 4638 SelectLoad(Node, 8, AArch64::LD64B, AArch64::x8sub_0); 4639 return; 4640 case Intrinsic::aarch64_sve_ld2_sret: { 4641 if (VT == MVT::nxv16i8) { 4642 SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B, 4643 true); 4644 return; 4645 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 4646 VT == MVT::nxv8bf16) { 4647 SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H, 4648 true); 4649 return; 4650 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 4651 SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W, 4652 true); 4653 return; 4654 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 4655 SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D, 4656 true); 4657 return; 4658 } 4659 break; 4660 } 4661 case Intrinsic::aarch64_sve_ld1_pn_x2: { 4662 if (VT == MVT::nxv16i8) { 4663 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LD1B_2Z_IMM, AArch64::LD1B_2Z); 4664 return; 4665 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 4666 VT == MVT::nxv8bf16) { 4667 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LD1H_2Z_IMM, AArch64::LD1H_2Z); 4668 return; 4669 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 4670 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LD1W_2Z_IMM, AArch64::LD1W_2Z); 4671 return; 4672 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 4673 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LD1D_2Z_IMM, AArch64::LD1D_2Z); 4674 return; 4675 } 4676 break; 4677 } 4678 case Intrinsic::aarch64_sve_ld1_pn_x4: { 4679 if (VT == MVT::nxv16i8) { 4680 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LD1B_4Z_IMM, AArch64::LD1B_4Z); 4681 return; 4682 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 4683 VT == MVT::nxv8bf16) { 4684 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LD1H_4Z_IMM, AArch64::LD1H_4Z); 4685 return; 4686 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 4687 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LD1W_4Z_IMM, AArch64::LD1W_4Z); 4688 return; 4689 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 4690 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LD1D_4Z_IMM, AArch64::LD1D_4Z); 4691 return; 4692 } 4693 break; 4694 } 4695 case Intrinsic::aarch64_sve_ldnt1_pn_x2: { 4696 if (VT == MVT::nxv16i8) { 4697 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LDNT1B_2Z_IMM, AArch64::LDNT1B_2Z); 4698 return; 4699 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 4700 VT == MVT::nxv8bf16) { 4701 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LDNT1H_2Z_IMM, AArch64::LDNT1H_2Z); 4702 return; 4703 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 4704 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LDNT1W_2Z_IMM, AArch64::LDNT1W_2Z); 4705 return; 4706 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 4707 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LDNT1D_2Z_IMM, AArch64::LDNT1D_2Z); 4708 return; 4709 } 4710 break; 4711 } 4712 case Intrinsic::aarch64_sve_ldnt1_pn_x4: { 4713 if (VT == MVT::nxv16i8) { 4714 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LDNT1B_4Z_IMM, AArch64::LDNT1B_4Z); 4715 return; 4716 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 4717 VT == MVT::nxv8bf16) { 4718 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LDNT1H_4Z_IMM, AArch64::LDNT1H_4Z); 4719 return; 4720 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 4721 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LDNT1W_4Z_IMM, AArch64::LDNT1W_4Z); 4722 return; 4723 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 4724 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LDNT1D_4Z_IMM, AArch64::LDNT1D_4Z); 4725 return; 4726 } 4727 break; 4728 } 4729 case Intrinsic::aarch64_sve_ld3_sret: { 4730 if (VT == MVT::nxv16i8) { 4731 SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B, 4732 true); 4733 return; 4734 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 4735 VT == MVT::nxv8bf16) { 4736 SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H, 4737 true); 4738 return; 4739 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 4740 SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W, 4741 true); 4742 return; 4743 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 4744 SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D, 4745 true); 4746 return; 4747 } 4748 break; 4749 } 4750 case Intrinsic::aarch64_sve_ld4_sret: { 4751 if (VT == MVT::nxv16i8) { 4752 SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B, 4753 true); 4754 return; 4755 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 4756 VT == MVT::nxv8bf16) { 4757 SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H, 4758 true); 4759 return; 4760 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 4761 SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W, 4762 true); 4763 return; 4764 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 4765 SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D, 4766 true); 4767 return; 4768 } 4769 break; 4770 } 4771 case Intrinsic::aarch64_sme_read_hor_vg2: { 4772 if (VT == MVT::nxv16i8) { 4773 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0, 4774 AArch64::MOVA_2ZMXI_H_B); 4775 return; 4776 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 4777 VT == MVT::nxv8bf16) { 4778 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0, 4779 AArch64::MOVA_2ZMXI_H_H); 4780 return; 4781 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 4782 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0, 4783 AArch64::MOVA_2ZMXI_H_S); 4784 return; 4785 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 4786 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0, 4787 AArch64::MOVA_2ZMXI_H_D); 4788 return; 4789 } 4790 break; 4791 } 4792 case Intrinsic::aarch64_sme_read_ver_vg2: { 4793 if (VT == MVT::nxv16i8) { 4794 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0, 4795 AArch64::MOVA_2ZMXI_V_B); 4796 return; 4797 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 4798 VT == MVT::nxv8bf16) { 4799 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0, 4800 AArch64::MOVA_2ZMXI_V_H); 4801 return; 4802 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 4803 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0, 4804 AArch64::MOVA_2ZMXI_V_S); 4805 return; 4806 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 4807 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0, 4808 AArch64::MOVA_2ZMXI_V_D); 4809 return; 4810 } 4811 break; 4812 } 4813 case Intrinsic::aarch64_sme_read_hor_vg4: { 4814 if (VT == MVT::nxv16i8) { 4815 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0, 4816 AArch64::MOVA_4ZMXI_H_B); 4817 return; 4818 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 4819 VT == MVT::nxv8bf16) { 4820 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0, 4821 AArch64::MOVA_4ZMXI_H_H); 4822 return; 4823 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 4824 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAS0, 4825 AArch64::MOVA_4ZMXI_H_S); 4826 return; 4827 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 4828 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAD0, 4829 AArch64::MOVA_4ZMXI_H_D); 4830 return; 4831 } 4832 break; 4833 } 4834 case Intrinsic::aarch64_sme_read_ver_vg4: { 4835 if (VT == MVT::nxv16i8) { 4836 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0, 4837 AArch64::MOVA_4ZMXI_V_B); 4838 return; 4839 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 4840 VT == MVT::nxv8bf16) { 4841 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0, 4842 AArch64::MOVA_4ZMXI_V_H); 4843 return; 4844 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 4845 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAS0, 4846 AArch64::MOVA_4ZMXI_V_S); 4847 return; 4848 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 4849 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAD0, 4850 AArch64::MOVA_4ZMXI_V_D); 4851 return; 4852 } 4853 break; 4854 } 4855 case Intrinsic::aarch64_sme_read_vg1x2: { 4856 SelectMultiVectorMove<7, 1>(Node, 2, AArch64::ZA, 4857 AArch64::MOVA_VG2_2ZMXI); 4858 return; 4859 } 4860 case Intrinsic::aarch64_sme_read_vg1x4: { 4861 SelectMultiVectorMove<7, 1>(Node, 4, AArch64::ZA, 4862 AArch64::MOVA_VG4_4ZMXI); 4863 return; 4864 } 4865 case Intrinsic::swift_async_context_addr: { 4866 SDLoc DL(Node); 4867 SDValue Chain = Node->getOperand(0); 4868 SDValue CopyFP = CurDAG->getCopyFromReg(Chain, DL, AArch64::FP, MVT::i64); 4869 SDValue Res = SDValue( 4870 CurDAG->getMachineNode(AArch64::SUBXri, DL, MVT::i64, CopyFP, 4871 CurDAG->getTargetConstant(8, DL, MVT::i32), 4872 CurDAG->getTargetConstant(0, DL, MVT::i32)), 4873 0); 4874 ReplaceUses(SDValue(Node, 0), Res); 4875 ReplaceUses(SDValue(Node, 1), CopyFP.getValue(1)); 4876 CurDAG->RemoveDeadNode(Node); 4877 4878 auto &MF = CurDAG->getMachineFunction(); 4879 MF.getFrameInfo().setFrameAddressIsTaken(true); 4880 MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true); 4881 return; 4882 } 4883 } 4884 } break; 4885 case ISD::INTRINSIC_WO_CHAIN: { 4886 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue(); 4887 switch (IntNo) { 4888 default: 4889 break; 4890 case Intrinsic::aarch64_tagp: 4891 SelectTagP(Node); 4892 return; 4893 case Intrinsic::aarch64_neon_tbl2: 4894 SelectTable(Node, 2, 4895 VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two, 4896 false); 4897 return; 4898 case Intrinsic::aarch64_neon_tbl3: 4899 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three 4900 : AArch64::TBLv16i8Three, 4901 false); 4902 return; 4903 case Intrinsic::aarch64_neon_tbl4: 4904 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four 4905 : AArch64::TBLv16i8Four, 4906 false); 4907 return; 4908 case Intrinsic::aarch64_neon_tbx2: 4909 SelectTable(Node, 2, 4910 VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two, 4911 true); 4912 return; 4913 case Intrinsic::aarch64_neon_tbx3: 4914 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three 4915 : AArch64::TBXv16i8Three, 4916 true); 4917 return; 4918 case Intrinsic::aarch64_neon_tbx4: 4919 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four 4920 : AArch64::TBXv16i8Four, 4921 true); 4922 return; 4923 case Intrinsic::aarch64_sve_srshl_single_x2: 4924 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 4925 Node->getValueType(0), 4926 {AArch64::SRSHL_VG2_2ZZ_B, AArch64::SRSHL_VG2_2ZZ_H, 4927 AArch64::SRSHL_VG2_2ZZ_S, AArch64::SRSHL_VG2_2ZZ_D})) 4928 SelectDestructiveMultiIntrinsic(Node, 2, false, Op); 4929 return; 4930 case Intrinsic::aarch64_sve_srshl_single_x4: 4931 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 4932 Node->getValueType(0), 4933 {AArch64::SRSHL_VG4_4ZZ_B, AArch64::SRSHL_VG4_4ZZ_H, 4934 AArch64::SRSHL_VG4_4ZZ_S, AArch64::SRSHL_VG4_4ZZ_D})) 4935 SelectDestructiveMultiIntrinsic(Node, 4, false, Op); 4936 return; 4937 case Intrinsic::aarch64_sve_urshl_single_x2: 4938 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 4939 Node->getValueType(0), 4940 {AArch64::URSHL_VG2_2ZZ_B, AArch64::URSHL_VG2_2ZZ_H, 4941 AArch64::URSHL_VG2_2ZZ_S, AArch64::URSHL_VG2_2ZZ_D})) 4942 SelectDestructiveMultiIntrinsic(Node, 2, false, Op); 4943 return; 4944 case Intrinsic::aarch64_sve_urshl_single_x4: 4945 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 4946 Node->getValueType(0), 4947 {AArch64::URSHL_VG4_4ZZ_B, AArch64::URSHL_VG4_4ZZ_H, 4948 AArch64::URSHL_VG4_4ZZ_S, AArch64::URSHL_VG4_4ZZ_D})) 4949 SelectDestructiveMultiIntrinsic(Node, 4, false, Op); 4950 return; 4951 case Intrinsic::aarch64_sve_srshl_x2: 4952 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 4953 Node->getValueType(0), 4954 {AArch64::SRSHL_VG2_2Z2Z_B, AArch64::SRSHL_VG2_2Z2Z_H, 4955 AArch64::SRSHL_VG2_2Z2Z_S, AArch64::SRSHL_VG2_2Z2Z_D})) 4956 SelectDestructiveMultiIntrinsic(Node, 2, true, Op); 4957 return; 4958 case Intrinsic::aarch64_sve_srshl_x4: 4959 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 4960 Node->getValueType(0), 4961 {AArch64::SRSHL_VG4_4Z4Z_B, AArch64::SRSHL_VG4_4Z4Z_H, 4962 AArch64::SRSHL_VG4_4Z4Z_S, AArch64::SRSHL_VG4_4Z4Z_D})) 4963 SelectDestructiveMultiIntrinsic(Node, 4, true, Op); 4964 return; 4965 case Intrinsic::aarch64_sve_urshl_x2: 4966 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 4967 Node->getValueType(0), 4968 {AArch64::URSHL_VG2_2Z2Z_B, AArch64::URSHL_VG2_2Z2Z_H, 4969 AArch64::URSHL_VG2_2Z2Z_S, AArch64::URSHL_VG2_2Z2Z_D})) 4970 SelectDestructiveMultiIntrinsic(Node, 2, true, Op); 4971 return; 4972 case Intrinsic::aarch64_sve_urshl_x4: 4973 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 4974 Node->getValueType(0), 4975 {AArch64::URSHL_VG4_4Z4Z_B, AArch64::URSHL_VG4_4Z4Z_H, 4976 AArch64::URSHL_VG4_4Z4Z_S, AArch64::URSHL_VG4_4Z4Z_D})) 4977 SelectDestructiveMultiIntrinsic(Node, 4, true, Op); 4978 return; 4979 case Intrinsic::aarch64_sve_sqdmulh_single_vgx2: 4980 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 4981 Node->getValueType(0), 4982 {AArch64::SQDMULH_VG2_2ZZ_B, AArch64::SQDMULH_VG2_2ZZ_H, 4983 AArch64::SQDMULH_VG2_2ZZ_S, AArch64::SQDMULH_VG2_2ZZ_D})) 4984 SelectDestructiveMultiIntrinsic(Node, 2, false, Op); 4985 return; 4986 case Intrinsic::aarch64_sve_sqdmulh_single_vgx4: 4987 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 4988 Node->getValueType(0), 4989 {AArch64::SQDMULH_VG4_4ZZ_B, AArch64::SQDMULH_VG4_4ZZ_H, 4990 AArch64::SQDMULH_VG4_4ZZ_S, AArch64::SQDMULH_VG4_4ZZ_D})) 4991 SelectDestructiveMultiIntrinsic(Node, 4, false, Op); 4992 return; 4993 case Intrinsic::aarch64_sve_sqdmulh_vgx2: 4994 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 4995 Node->getValueType(0), 4996 {AArch64::SQDMULH_VG2_2Z2Z_B, AArch64::SQDMULH_VG2_2Z2Z_H, 4997 AArch64::SQDMULH_VG2_2Z2Z_S, AArch64::SQDMULH_VG2_2Z2Z_D})) 4998 SelectDestructiveMultiIntrinsic(Node, 2, true, Op); 4999 return; 5000 case Intrinsic::aarch64_sve_sqdmulh_vgx4: 5001 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5002 Node->getValueType(0), 5003 {AArch64::SQDMULH_VG4_4Z4Z_B, AArch64::SQDMULH_VG4_4Z4Z_H, 5004 AArch64::SQDMULH_VG4_4Z4Z_S, AArch64::SQDMULH_VG4_4Z4Z_D})) 5005 SelectDestructiveMultiIntrinsic(Node, 4, true, Op); 5006 return; 5007 case Intrinsic::aarch64_sve_whilege_x2: 5008 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>( 5009 Node->getValueType(0), 5010 {AArch64::WHILEGE_2PXX_B, AArch64::WHILEGE_2PXX_H, 5011 AArch64::WHILEGE_2PXX_S, AArch64::WHILEGE_2PXX_D})) 5012 SelectWhilePair(Node, Op); 5013 return; 5014 case Intrinsic::aarch64_sve_whilegt_x2: 5015 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>( 5016 Node->getValueType(0), 5017 {AArch64::WHILEGT_2PXX_B, AArch64::WHILEGT_2PXX_H, 5018 AArch64::WHILEGT_2PXX_S, AArch64::WHILEGT_2PXX_D})) 5019 SelectWhilePair(Node, Op); 5020 return; 5021 case Intrinsic::aarch64_sve_whilehi_x2: 5022 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>( 5023 Node->getValueType(0), 5024 {AArch64::WHILEHI_2PXX_B, AArch64::WHILEHI_2PXX_H, 5025 AArch64::WHILEHI_2PXX_S, AArch64::WHILEHI_2PXX_D})) 5026 SelectWhilePair(Node, Op); 5027 return; 5028 case Intrinsic::aarch64_sve_whilehs_x2: 5029 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>( 5030 Node->getValueType(0), 5031 {AArch64::WHILEHS_2PXX_B, AArch64::WHILEHS_2PXX_H, 5032 AArch64::WHILEHS_2PXX_S, AArch64::WHILEHS_2PXX_D})) 5033 SelectWhilePair(Node, Op); 5034 return; 5035 case Intrinsic::aarch64_sve_whilele_x2: 5036 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>( 5037 Node->getValueType(0), 5038 {AArch64::WHILELE_2PXX_B, AArch64::WHILELE_2PXX_H, 5039 AArch64::WHILELE_2PXX_S, AArch64::WHILELE_2PXX_D})) 5040 SelectWhilePair(Node, Op); 5041 return; 5042 case Intrinsic::aarch64_sve_whilelo_x2: 5043 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>( 5044 Node->getValueType(0), 5045 {AArch64::WHILELO_2PXX_B, AArch64::WHILELO_2PXX_H, 5046 AArch64::WHILELO_2PXX_S, AArch64::WHILELO_2PXX_D})) 5047 SelectWhilePair(Node, Op); 5048 return; 5049 case Intrinsic::aarch64_sve_whilels_x2: 5050 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>( 5051 Node->getValueType(0), 5052 {AArch64::WHILELS_2PXX_B, AArch64::WHILELS_2PXX_H, 5053 AArch64::WHILELS_2PXX_S, AArch64::WHILELS_2PXX_D})) 5054 SelectWhilePair(Node, Op); 5055 return; 5056 case Intrinsic::aarch64_sve_whilelt_x2: 5057 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>( 5058 Node->getValueType(0), 5059 {AArch64::WHILELT_2PXX_B, AArch64::WHILELT_2PXX_H, 5060 AArch64::WHILELT_2PXX_S, AArch64::WHILELT_2PXX_D})) 5061 SelectWhilePair(Node, Op); 5062 return; 5063 case Intrinsic::aarch64_sve_smax_single_x2: 5064 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5065 Node->getValueType(0), 5066 {AArch64::SMAX_VG2_2ZZ_B, AArch64::SMAX_VG2_2ZZ_H, 5067 AArch64::SMAX_VG2_2ZZ_S, AArch64::SMAX_VG2_2ZZ_D})) 5068 SelectDestructiveMultiIntrinsic(Node, 2, false, Op); 5069 return; 5070 case Intrinsic::aarch64_sve_umax_single_x2: 5071 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5072 Node->getValueType(0), 5073 {AArch64::UMAX_VG2_2ZZ_B, AArch64::UMAX_VG2_2ZZ_H, 5074 AArch64::UMAX_VG2_2ZZ_S, AArch64::UMAX_VG2_2ZZ_D})) 5075 SelectDestructiveMultiIntrinsic(Node, 2, false, Op); 5076 return; 5077 case Intrinsic::aarch64_sve_fmax_single_x2: 5078 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 5079 Node->getValueType(0), 5080 {0, AArch64::FMAX_VG2_2ZZ_H, AArch64::FMAX_VG2_2ZZ_S, 5081 AArch64::FMAX_VG2_2ZZ_D})) 5082 SelectDestructiveMultiIntrinsic(Node, 2, false, Op); 5083 return; 5084 case Intrinsic::aarch64_sve_smax_single_x4: 5085 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5086 Node->getValueType(0), 5087 {AArch64::SMAX_VG4_4ZZ_B, AArch64::SMAX_VG4_4ZZ_H, 5088 AArch64::SMAX_VG4_4ZZ_S, AArch64::SMAX_VG4_4ZZ_D})) 5089 SelectDestructiveMultiIntrinsic(Node, 4, false, Op); 5090 return; 5091 case Intrinsic::aarch64_sve_umax_single_x4: 5092 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5093 Node->getValueType(0), 5094 {AArch64::UMAX_VG4_4ZZ_B, AArch64::UMAX_VG4_4ZZ_H, 5095 AArch64::UMAX_VG4_4ZZ_S, AArch64::UMAX_VG4_4ZZ_D})) 5096 SelectDestructiveMultiIntrinsic(Node, 4, false, Op); 5097 return; 5098 case Intrinsic::aarch64_sve_fmax_single_x4: 5099 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 5100 Node->getValueType(0), 5101 {0, AArch64::FMAX_VG4_4ZZ_H, AArch64::FMAX_VG4_4ZZ_S, 5102 AArch64::FMAX_VG4_4ZZ_D})) 5103 SelectDestructiveMultiIntrinsic(Node, 4, false, Op); 5104 return; 5105 case Intrinsic::aarch64_sve_smin_single_x2: 5106 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5107 Node->getValueType(0), 5108 {AArch64::SMIN_VG2_2ZZ_B, AArch64::SMIN_VG2_2ZZ_H, 5109 AArch64::SMIN_VG2_2ZZ_S, AArch64::SMIN_VG2_2ZZ_D})) 5110 SelectDestructiveMultiIntrinsic(Node, 2, false, Op); 5111 return; 5112 case Intrinsic::aarch64_sve_umin_single_x2: 5113 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5114 Node->getValueType(0), 5115 {AArch64::UMIN_VG2_2ZZ_B, AArch64::UMIN_VG2_2ZZ_H, 5116 AArch64::UMIN_VG2_2ZZ_S, AArch64::UMIN_VG2_2ZZ_D})) 5117 SelectDestructiveMultiIntrinsic(Node, 2, false, Op); 5118 return; 5119 case Intrinsic::aarch64_sve_fmin_single_x2: 5120 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 5121 Node->getValueType(0), 5122 {0, AArch64::FMIN_VG2_2ZZ_H, AArch64::FMIN_VG2_2ZZ_S, 5123 AArch64::FMIN_VG2_2ZZ_D})) 5124 SelectDestructiveMultiIntrinsic(Node, 2, false, Op); 5125 return; 5126 case Intrinsic::aarch64_sve_smin_single_x4: 5127 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5128 Node->getValueType(0), 5129 {AArch64::SMIN_VG4_4ZZ_B, AArch64::SMIN_VG4_4ZZ_H, 5130 AArch64::SMIN_VG4_4ZZ_S, AArch64::SMIN_VG4_4ZZ_D})) 5131 SelectDestructiveMultiIntrinsic(Node, 4, false, Op); 5132 return; 5133 case Intrinsic::aarch64_sve_umin_single_x4: 5134 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5135 Node->getValueType(0), 5136 {AArch64::UMIN_VG4_4ZZ_B, AArch64::UMIN_VG4_4ZZ_H, 5137 AArch64::UMIN_VG4_4ZZ_S, AArch64::UMIN_VG4_4ZZ_D})) 5138 SelectDestructiveMultiIntrinsic(Node, 4, false, Op); 5139 return; 5140 case Intrinsic::aarch64_sve_fmin_single_x4: 5141 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 5142 Node->getValueType(0), 5143 {0, AArch64::FMIN_VG4_4ZZ_H, AArch64::FMIN_VG4_4ZZ_S, 5144 AArch64::FMIN_VG4_4ZZ_D})) 5145 SelectDestructiveMultiIntrinsic(Node, 4, false, Op); 5146 return; 5147 case Intrinsic::aarch64_sve_smax_x2: 5148 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5149 Node->getValueType(0), 5150 {AArch64::SMAX_VG2_2Z2Z_B, AArch64::SMAX_VG2_2Z2Z_H, 5151 AArch64::SMAX_VG2_2Z2Z_S, AArch64::SMAX_VG2_2Z2Z_D})) 5152 SelectDestructiveMultiIntrinsic(Node, 2, true, Op); 5153 return; 5154 case Intrinsic::aarch64_sve_umax_x2: 5155 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5156 Node->getValueType(0), 5157 {AArch64::UMAX_VG2_2Z2Z_B, AArch64::UMAX_VG2_2Z2Z_H, 5158 AArch64::UMAX_VG2_2Z2Z_S, AArch64::UMAX_VG2_2Z2Z_D})) 5159 SelectDestructiveMultiIntrinsic(Node, 2, true, Op); 5160 return; 5161 case Intrinsic::aarch64_sve_fmax_x2: 5162 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 5163 Node->getValueType(0), 5164 {0, AArch64::FMAX_VG2_2Z2Z_H, AArch64::FMAX_VG2_2Z2Z_S, 5165 AArch64::FMAX_VG2_2Z2Z_D})) 5166 SelectDestructiveMultiIntrinsic(Node, 2, true, Op); 5167 return; 5168 case Intrinsic::aarch64_sve_smax_x4: 5169 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5170 Node->getValueType(0), 5171 {AArch64::SMAX_VG4_4Z4Z_B, AArch64::SMAX_VG4_4Z4Z_H, 5172 AArch64::SMAX_VG4_4Z4Z_S, AArch64::SMAX_VG4_4Z4Z_D})) 5173 SelectDestructiveMultiIntrinsic(Node, 4, true, Op); 5174 return; 5175 case Intrinsic::aarch64_sve_umax_x4: 5176 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5177 Node->getValueType(0), 5178 {AArch64::UMAX_VG4_4Z4Z_B, AArch64::UMAX_VG4_4Z4Z_H, 5179 AArch64::UMAX_VG4_4Z4Z_S, AArch64::UMAX_VG4_4Z4Z_D})) 5180 SelectDestructiveMultiIntrinsic(Node, 4, true, Op); 5181 return; 5182 case Intrinsic::aarch64_sve_fmax_x4: 5183 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 5184 Node->getValueType(0), 5185 {0, AArch64::FMAX_VG4_4Z4Z_H, AArch64::FMAX_VG4_4Z4Z_S, 5186 AArch64::FMAX_VG4_4Z4Z_D})) 5187 SelectDestructiveMultiIntrinsic(Node, 4, true, Op); 5188 return; 5189 case Intrinsic::aarch64_sve_smin_x2: 5190 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5191 Node->getValueType(0), 5192 {AArch64::SMIN_VG2_2Z2Z_B, AArch64::SMIN_VG2_2Z2Z_H, 5193 AArch64::SMIN_VG2_2Z2Z_S, AArch64::SMIN_VG2_2Z2Z_D})) 5194 SelectDestructiveMultiIntrinsic(Node, 2, true, Op); 5195 return; 5196 case Intrinsic::aarch64_sve_umin_x2: 5197 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5198 Node->getValueType(0), 5199 {AArch64::UMIN_VG2_2Z2Z_B, AArch64::UMIN_VG2_2Z2Z_H, 5200 AArch64::UMIN_VG2_2Z2Z_S, AArch64::UMIN_VG2_2Z2Z_D})) 5201 SelectDestructiveMultiIntrinsic(Node, 2, true, Op); 5202 return; 5203 case Intrinsic::aarch64_sve_fmin_x2: 5204 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 5205 Node->getValueType(0), 5206 {0, AArch64::FMIN_VG2_2Z2Z_H, AArch64::FMIN_VG2_2Z2Z_S, 5207 AArch64::FMIN_VG2_2Z2Z_D})) 5208 SelectDestructiveMultiIntrinsic(Node, 2, true, Op); 5209 return; 5210 case Intrinsic::aarch64_sve_smin_x4: 5211 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5212 Node->getValueType(0), 5213 {AArch64::SMIN_VG4_4Z4Z_B, AArch64::SMIN_VG4_4Z4Z_H, 5214 AArch64::SMIN_VG4_4Z4Z_S, AArch64::SMIN_VG4_4Z4Z_D})) 5215 SelectDestructiveMultiIntrinsic(Node, 4, true, Op); 5216 return; 5217 case Intrinsic::aarch64_sve_umin_x4: 5218 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5219 Node->getValueType(0), 5220 {AArch64::UMIN_VG4_4Z4Z_B, AArch64::UMIN_VG4_4Z4Z_H, 5221 AArch64::UMIN_VG4_4Z4Z_S, AArch64::UMIN_VG4_4Z4Z_D})) 5222 SelectDestructiveMultiIntrinsic(Node, 4, true, Op); 5223 return; 5224 case Intrinsic::aarch64_sve_fmin_x4: 5225 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 5226 Node->getValueType(0), 5227 {0, AArch64::FMIN_VG4_4Z4Z_H, AArch64::FMIN_VG4_4Z4Z_S, 5228 AArch64::FMIN_VG4_4Z4Z_D})) 5229 SelectDestructiveMultiIntrinsic(Node, 4, true, Op); 5230 return; 5231 case Intrinsic::aarch64_sve_fmaxnm_single_x2 : 5232 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 5233 Node->getValueType(0), 5234 {0, AArch64::FMAXNM_VG2_2ZZ_H, AArch64::FMAXNM_VG2_2ZZ_S, 5235 AArch64::FMAXNM_VG2_2ZZ_D})) 5236 SelectDestructiveMultiIntrinsic(Node, 2, false, Op); 5237 return; 5238 case Intrinsic::aarch64_sve_fmaxnm_single_x4 : 5239 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 5240 Node->getValueType(0), 5241 {0, AArch64::FMAXNM_VG4_4ZZ_H, AArch64::FMAXNM_VG4_4ZZ_S, 5242 AArch64::FMAXNM_VG4_4ZZ_D})) 5243 SelectDestructiveMultiIntrinsic(Node, 4, false, Op); 5244 return; 5245 case Intrinsic::aarch64_sve_fminnm_single_x2: 5246 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 5247 Node->getValueType(0), 5248 {0, AArch64::FMINNM_VG2_2ZZ_H, AArch64::FMINNM_VG2_2ZZ_S, 5249 AArch64::FMINNM_VG2_2ZZ_D})) 5250 SelectDestructiveMultiIntrinsic(Node, 2, false, Op); 5251 return; 5252 case Intrinsic::aarch64_sve_fminnm_single_x4: 5253 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 5254 Node->getValueType(0), 5255 {0, AArch64::FMINNM_VG4_4ZZ_H, AArch64::FMINNM_VG4_4ZZ_S, 5256 AArch64::FMINNM_VG4_4ZZ_D})) 5257 SelectDestructiveMultiIntrinsic(Node, 4, false, Op); 5258 return; 5259 case Intrinsic::aarch64_sve_fmaxnm_x2: 5260 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 5261 Node->getValueType(0), 5262 {0, AArch64::FMAXNM_VG2_2Z2Z_H, AArch64::FMAXNM_VG2_2Z2Z_S, 5263 AArch64::FMAXNM_VG2_2Z2Z_D})) 5264 SelectDestructiveMultiIntrinsic(Node, 2, true, Op); 5265 return; 5266 case Intrinsic::aarch64_sve_fmaxnm_x4: 5267 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 5268 Node->getValueType(0), 5269 {0, AArch64::FMAXNM_VG4_4Z4Z_H, AArch64::FMAXNM_VG4_4Z4Z_S, 5270 AArch64::FMAXNM_VG4_4Z4Z_D})) 5271 SelectDestructiveMultiIntrinsic(Node, 4, true, Op); 5272 return; 5273 case Intrinsic::aarch64_sve_fminnm_x2: 5274 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 5275 Node->getValueType(0), 5276 {0, AArch64::FMINNM_VG2_2Z2Z_H, AArch64::FMINNM_VG2_2Z2Z_S, 5277 AArch64::FMINNM_VG2_2Z2Z_D})) 5278 SelectDestructiveMultiIntrinsic(Node, 2, true, Op); 5279 return; 5280 case Intrinsic::aarch64_sve_fminnm_x4: 5281 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 5282 Node->getValueType(0), 5283 {0, AArch64::FMINNM_VG4_4Z4Z_H, AArch64::FMINNM_VG4_4Z4Z_S, 5284 AArch64::FMINNM_VG4_4Z4Z_D})) 5285 SelectDestructiveMultiIntrinsic(Node, 4, true, Op); 5286 return; 5287 case Intrinsic::aarch64_sve_fcvts_x2: 5288 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZS_2Z2Z_StoS); 5289 return; 5290 case Intrinsic::aarch64_sve_scvtf_x2: 5291 SelectCVTIntrinsic(Node, 2, AArch64::SCVTF_2Z2Z_StoS); 5292 return; 5293 case Intrinsic::aarch64_sve_fcvtu_x2: 5294 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZU_2Z2Z_StoS); 5295 return; 5296 case Intrinsic::aarch64_sve_ucvtf_x2: 5297 SelectCVTIntrinsic(Node, 2, AArch64::UCVTF_2Z2Z_StoS); 5298 return; 5299 case Intrinsic::aarch64_sve_fcvts_x4: 5300 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZS_4Z4Z_StoS); 5301 return; 5302 case Intrinsic::aarch64_sve_scvtf_x4: 5303 SelectCVTIntrinsic(Node, 4, AArch64::SCVTF_4Z4Z_StoS); 5304 return; 5305 case Intrinsic::aarch64_sve_fcvtu_x4: 5306 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZU_4Z4Z_StoS); 5307 return; 5308 case Intrinsic::aarch64_sve_ucvtf_x4: 5309 SelectCVTIntrinsic(Node, 4, AArch64::UCVTF_4Z4Z_StoS); 5310 return; 5311 case Intrinsic::aarch64_sve_sclamp_single_x2: 5312 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5313 Node->getValueType(0), 5314 {AArch64::SCLAMP_VG2_2Z2Z_B, AArch64::SCLAMP_VG2_2Z2Z_H, 5315 AArch64::SCLAMP_VG2_2Z2Z_S, AArch64::SCLAMP_VG2_2Z2Z_D})) 5316 SelectClamp(Node, 2, Op); 5317 return; 5318 case Intrinsic::aarch64_sve_uclamp_single_x2: 5319 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5320 Node->getValueType(0), 5321 {AArch64::UCLAMP_VG2_2Z2Z_B, AArch64::UCLAMP_VG2_2Z2Z_H, 5322 AArch64::UCLAMP_VG2_2Z2Z_S, AArch64::UCLAMP_VG2_2Z2Z_D})) 5323 SelectClamp(Node, 2, Op); 5324 return; 5325 case Intrinsic::aarch64_sve_fclamp_single_x2: 5326 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 5327 Node->getValueType(0), 5328 {0, AArch64::FCLAMP_VG2_2Z2Z_H, AArch64::FCLAMP_VG2_2Z2Z_S, 5329 AArch64::FCLAMP_VG2_2Z2Z_D})) 5330 SelectClamp(Node, 2, Op); 5331 return; 5332 case Intrinsic::aarch64_sve_sclamp_single_x4: 5333 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5334 Node->getValueType(0), 5335 {AArch64::SCLAMP_VG4_4Z4Z_B, AArch64::SCLAMP_VG4_4Z4Z_H, 5336 AArch64::SCLAMP_VG4_4Z4Z_S, AArch64::SCLAMP_VG4_4Z4Z_D})) 5337 SelectClamp(Node, 4, Op); 5338 return; 5339 case Intrinsic::aarch64_sve_uclamp_single_x4: 5340 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5341 Node->getValueType(0), 5342 {AArch64::UCLAMP_VG4_4Z4Z_B, AArch64::UCLAMP_VG4_4Z4Z_H, 5343 AArch64::UCLAMP_VG4_4Z4Z_S, AArch64::UCLAMP_VG4_4Z4Z_D})) 5344 SelectClamp(Node, 4, Op); 5345 return; 5346 case Intrinsic::aarch64_sve_fclamp_single_x4: 5347 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>( 5348 Node->getValueType(0), 5349 {0, AArch64::FCLAMP_VG4_4Z4Z_H, AArch64::FCLAMP_VG4_4Z4Z_S, 5350 AArch64::FCLAMP_VG4_4Z4Z_D})) 5351 SelectClamp(Node, 4, Op); 5352 return; 5353 case Intrinsic::aarch64_sve_add_single_x2: 5354 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5355 Node->getValueType(0), 5356 {AArch64::ADD_VG2_2ZZ_B, AArch64::ADD_VG2_2ZZ_H, 5357 AArch64::ADD_VG2_2ZZ_S, AArch64::ADD_VG2_2ZZ_D})) 5358 SelectDestructiveMultiIntrinsic(Node, 2, false, Op); 5359 return; 5360 case Intrinsic::aarch64_sve_add_single_x4: 5361 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5362 Node->getValueType(0), 5363 {AArch64::ADD_VG4_4ZZ_B, AArch64::ADD_VG4_4ZZ_H, 5364 AArch64::ADD_VG4_4ZZ_S, AArch64::ADD_VG4_4ZZ_D})) 5365 SelectDestructiveMultiIntrinsic(Node, 4, false, Op); 5366 return; 5367 case Intrinsic::aarch64_sve_zip_x2: 5368 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>( 5369 Node->getValueType(0), 5370 {AArch64::ZIP_VG2_2ZZZ_B, AArch64::ZIP_VG2_2ZZZ_H, 5371 AArch64::ZIP_VG2_2ZZZ_S, AArch64::ZIP_VG2_2ZZZ_D})) 5372 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op); 5373 return; 5374 case Intrinsic::aarch64_sve_zipq_x2: 5375 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, 5376 AArch64::ZIP_VG2_2ZZZ_Q); 5377 return; 5378 case Intrinsic::aarch64_sve_zip_x4: 5379 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>( 5380 Node->getValueType(0), 5381 {AArch64::ZIP_VG4_4Z4Z_B, AArch64::ZIP_VG4_4Z4Z_H, 5382 AArch64::ZIP_VG4_4Z4Z_S, AArch64::ZIP_VG4_4Z4Z_D})) 5383 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op); 5384 return; 5385 case Intrinsic::aarch64_sve_zipq_x4: 5386 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, 5387 AArch64::ZIP_VG4_4Z4Z_Q); 5388 return; 5389 case Intrinsic::aarch64_sve_uzp_x2: 5390 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>( 5391 Node->getValueType(0), 5392 {AArch64::UZP_VG2_2ZZZ_B, AArch64::UZP_VG2_2ZZZ_H, 5393 AArch64::UZP_VG2_2ZZZ_S, AArch64::UZP_VG2_2ZZZ_D})) 5394 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op); 5395 return; 5396 case Intrinsic::aarch64_sve_uzpq_x2: 5397 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, 5398 AArch64::UZP_VG2_2ZZZ_Q); 5399 return; 5400 case Intrinsic::aarch64_sve_uzp_x4: 5401 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>( 5402 Node->getValueType(0), 5403 {AArch64::UZP_VG4_4Z4Z_B, AArch64::UZP_VG4_4Z4Z_H, 5404 AArch64::UZP_VG4_4Z4Z_S, AArch64::UZP_VG4_4Z4Z_D})) 5405 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op); 5406 return; 5407 case Intrinsic::aarch64_sve_uzpq_x4: 5408 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, 5409 AArch64::UZP_VG4_4Z4Z_Q); 5410 return; 5411 case Intrinsic::aarch64_sve_sel_x2: 5412 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>( 5413 Node->getValueType(0), 5414 {AArch64::SEL_VG2_2ZC2Z2Z_B, AArch64::SEL_VG2_2ZC2Z2Z_H, 5415 AArch64::SEL_VG2_2ZC2Z2Z_S, AArch64::SEL_VG2_2ZC2Z2Z_D})) 5416 SelectDestructiveMultiIntrinsic(Node, 2, true, Op, /*HasPred=*/true); 5417 return; 5418 case Intrinsic::aarch64_sve_sel_x4: 5419 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>( 5420 Node->getValueType(0), 5421 {AArch64::SEL_VG4_4ZC4Z4Z_B, AArch64::SEL_VG4_4ZC4Z4Z_H, 5422 AArch64::SEL_VG4_4ZC4Z4Z_S, AArch64::SEL_VG4_4ZC4Z4Z_D})) 5423 SelectDestructiveMultiIntrinsic(Node, 4, true, Op, /*HasPred=*/true); 5424 return; 5425 case Intrinsic::aarch64_sve_frinta_x2: 5426 SelectFrintFromVT(Node, 2, AArch64::FRINTA_2Z2Z_S); 5427 return; 5428 case Intrinsic::aarch64_sve_frinta_x4: 5429 SelectFrintFromVT(Node, 4, AArch64::FRINTA_4Z4Z_S); 5430 return; 5431 case Intrinsic::aarch64_sve_frintm_x2: 5432 SelectFrintFromVT(Node, 2, AArch64::FRINTM_2Z2Z_S); 5433 return; 5434 case Intrinsic::aarch64_sve_frintm_x4: 5435 SelectFrintFromVT(Node, 4, AArch64::FRINTM_4Z4Z_S); 5436 return; 5437 case Intrinsic::aarch64_sve_frintn_x2: 5438 SelectFrintFromVT(Node, 2, AArch64::FRINTN_2Z2Z_S); 5439 return; 5440 case Intrinsic::aarch64_sve_frintn_x4: 5441 SelectFrintFromVT(Node, 4, AArch64::FRINTN_4Z4Z_S); 5442 return; 5443 case Intrinsic::aarch64_sve_frintp_x2: 5444 SelectFrintFromVT(Node, 2, AArch64::FRINTP_2Z2Z_S); 5445 return; 5446 case Intrinsic::aarch64_sve_frintp_x4: 5447 SelectFrintFromVT(Node, 4, AArch64::FRINTP_4Z4Z_S); 5448 return; 5449 case Intrinsic::aarch64_sve_sunpk_x2: 5450 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5451 Node->getValueType(0), 5452 {0, AArch64::SUNPK_VG2_2ZZ_H, AArch64::SUNPK_VG2_2ZZ_S, 5453 AArch64::SUNPK_VG2_2ZZ_D})) 5454 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op); 5455 return; 5456 case Intrinsic::aarch64_sve_uunpk_x2: 5457 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5458 Node->getValueType(0), 5459 {0, AArch64::UUNPK_VG2_2ZZ_H, AArch64::UUNPK_VG2_2ZZ_S, 5460 AArch64::UUNPK_VG2_2ZZ_D})) 5461 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op); 5462 return; 5463 case Intrinsic::aarch64_sve_sunpk_x4: 5464 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5465 Node->getValueType(0), 5466 {0, AArch64::SUNPK_VG4_4Z2Z_H, AArch64::SUNPK_VG4_4Z2Z_S, 5467 AArch64::SUNPK_VG4_4Z2Z_D})) 5468 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op); 5469 return; 5470 case Intrinsic::aarch64_sve_uunpk_x4: 5471 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( 5472 Node->getValueType(0), 5473 {0, AArch64::UUNPK_VG4_4Z2Z_H, AArch64::UUNPK_VG4_4Z2Z_S, 5474 AArch64::UUNPK_VG4_4Z2Z_D})) 5475 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op); 5476 return; 5477 case Intrinsic::aarch64_sve_pext_x2: { 5478 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>( 5479 Node->getValueType(0), 5480 {AArch64::PEXT_2PCI_B, AArch64::PEXT_2PCI_H, AArch64::PEXT_2PCI_S, 5481 AArch64::PEXT_2PCI_D})) 5482 SelectPExtPair(Node, Op); 5483 return; 5484 } 5485 } 5486 break; 5487 } 5488 case ISD::INTRINSIC_VOID: { 5489 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); 5490 if (Node->getNumOperands() >= 3) 5491 VT = Node->getOperand(2)->getValueType(0); 5492 switch (IntNo) { 5493 default: 5494 break; 5495 case Intrinsic::aarch64_neon_st1x2: { 5496 if (VT == MVT::v8i8) { 5497 SelectStore(Node, 2, AArch64::ST1Twov8b); 5498 return; 5499 } else if (VT == MVT::v16i8) { 5500 SelectStore(Node, 2, AArch64::ST1Twov16b); 5501 return; 5502 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 5503 VT == MVT::v4bf16) { 5504 SelectStore(Node, 2, AArch64::ST1Twov4h); 5505 return; 5506 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 5507 VT == MVT::v8bf16) { 5508 SelectStore(Node, 2, AArch64::ST1Twov8h); 5509 return; 5510 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 5511 SelectStore(Node, 2, AArch64::ST1Twov2s); 5512 return; 5513 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 5514 SelectStore(Node, 2, AArch64::ST1Twov4s); 5515 return; 5516 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 5517 SelectStore(Node, 2, AArch64::ST1Twov2d); 5518 return; 5519 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 5520 SelectStore(Node, 2, AArch64::ST1Twov1d); 5521 return; 5522 } 5523 break; 5524 } 5525 case Intrinsic::aarch64_neon_st1x3: { 5526 if (VT == MVT::v8i8) { 5527 SelectStore(Node, 3, AArch64::ST1Threev8b); 5528 return; 5529 } else if (VT == MVT::v16i8) { 5530 SelectStore(Node, 3, AArch64::ST1Threev16b); 5531 return; 5532 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 5533 VT == MVT::v4bf16) { 5534 SelectStore(Node, 3, AArch64::ST1Threev4h); 5535 return; 5536 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 5537 VT == MVT::v8bf16) { 5538 SelectStore(Node, 3, AArch64::ST1Threev8h); 5539 return; 5540 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 5541 SelectStore(Node, 3, AArch64::ST1Threev2s); 5542 return; 5543 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 5544 SelectStore(Node, 3, AArch64::ST1Threev4s); 5545 return; 5546 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 5547 SelectStore(Node, 3, AArch64::ST1Threev2d); 5548 return; 5549 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 5550 SelectStore(Node, 3, AArch64::ST1Threev1d); 5551 return; 5552 } 5553 break; 5554 } 5555 case Intrinsic::aarch64_neon_st1x4: { 5556 if (VT == MVT::v8i8) { 5557 SelectStore(Node, 4, AArch64::ST1Fourv8b); 5558 return; 5559 } else if (VT == MVT::v16i8) { 5560 SelectStore(Node, 4, AArch64::ST1Fourv16b); 5561 return; 5562 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 5563 VT == MVT::v4bf16) { 5564 SelectStore(Node, 4, AArch64::ST1Fourv4h); 5565 return; 5566 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 5567 VT == MVT::v8bf16) { 5568 SelectStore(Node, 4, AArch64::ST1Fourv8h); 5569 return; 5570 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 5571 SelectStore(Node, 4, AArch64::ST1Fourv2s); 5572 return; 5573 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 5574 SelectStore(Node, 4, AArch64::ST1Fourv4s); 5575 return; 5576 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 5577 SelectStore(Node, 4, AArch64::ST1Fourv2d); 5578 return; 5579 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 5580 SelectStore(Node, 4, AArch64::ST1Fourv1d); 5581 return; 5582 } 5583 break; 5584 } 5585 case Intrinsic::aarch64_neon_st2: { 5586 if (VT == MVT::v8i8) { 5587 SelectStore(Node, 2, AArch64::ST2Twov8b); 5588 return; 5589 } else if (VT == MVT::v16i8) { 5590 SelectStore(Node, 2, AArch64::ST2Twov16b); 5591 return; 5592 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 5593 VT == MVT::v4bf16) { 5594 SelectStore(Node, 2, AArch64::ST2Twov4h); 5595 return; 5596 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 5597 VT == MVT::v8bf16) { 5598 SelectStore(Node, 2, AArch64::ST2Twov8h); 5599 return; 5600 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 5601 SelectStore(Node, 2, AArch64::ST2Twov2s); 5602 return; 5603 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 5604 SelectStore(Node, 2, AArch64::ST2Twov4s); 5605 return; 5606 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 5607 SelectStore(Node, 2, AArch64::ST2Twov2d); 5608 return; 5609 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 5610 SelectStore(Node, 2, AArch64::ST1Twov1d); 5611 return; 5612 } 5613 break; 5614 } 5615 case Intrinsic::aarch64_neon_st3: { 5616 if (VT == MVT::v8i8) { 5617 SelectStore(Node, 3, AArch64::ST3Threev8b); 5618 return; 5619 } else if (VT == MVT::v16i8) { 5620 SelectStore(Node, 3, AArch64::ST3Threev16b); 5621 return; 5622 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 5623 VT == MVT::v4bf16) { 5624 SelectStore(Node, 3, AArch64::ST3Threev4h); 5625 return; 5626 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 5627 VT == MVT::v8bf16) { 5628 SelectStore(Node, 3, AArch64::ST3Threev8h); 5629 return; 5630 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 5631 SelectStore(Node, 3, AArch64::ST3Threev2s); 5632 return; 5633 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 5634 SelectStore(Node, 3, AArch64::ST3Threev4s); 5635 return; 5636 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 5637 SelectStore(Node, 3, AArch64::ST3Threev2d); 5638 return; 5639 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 5640 SelectStore(Node, 3, AArch64::ST1Threev1d); 5641 return; 5642 } 5643 break; 5644 } 5645 case Intrinsic::aarch64_neon_st4: { 5646 if (VT == MVT::v8i8) { 5647 SelectStore(Node, 4, AArch64::ST4Fourv8b); 5648 return; 5649 } else if (VT == MVT::v16i8) { 5650 SelectStore(Node, 4, AArch64::ST4Fourv16b); 5651 return; 5652 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 5653 VT == MVT::v4bf16) { 5654 SelectStore(Node, 4, AArch64::ST4Fourv4h); 5655 return; 5656 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 5657 VT == MVT::v8bf16) { 5658 SelectStore(Node, 4, AArch64::ST4Fourv8h); 5659 return; 5660 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 5661 SelectStore(Node, 4, AArch64::ST4Fourv2s); 5662 return; 5663 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 5664 SelectStore(Node, 4, AArch64::ST4Fourv4s); 5665 return; 5666 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 5667 SelectStore(Node, 4, AArch64::ST4Fourv2d); 5668 return; 5669 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 5670 SelectStore(Node, 4, AArch64::ST1Fourv1d); 5671 return; 5672 } 5673 break; 5674 } 5675 case Intrinsic::aarch64_neon_st2lane: { 5676 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 5677 SelectStoreLane(Node, 2, AArch64::ST2i8); 5678 return; 5679 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 5680 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 5681 SelectStoreLane(Node, 2, AArch64::ST2i16); 5682 return; 5683 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 5684 VT == MVT::v2f32) { 5685 SelectStoreLane(Node, 2, AArch64::ST2i32); 5686 return; 5687 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 5688 VT == MVT::v1f64) { 5689 SelectStoreLane(Node, 2, AArch64::ST2i64); 5690 return; 5691 } 5692 break; 5693 } 5694 case Intrinsic::aarch64_neon_st3lane: { 5695 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 5696 SelectStoreLane(Node, 3, AArch64::ST3i8); 5697 return; 5698 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 5699 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 5700 SelectStoreLane(Node, 3, AArch64::ST3i16); 5701 return; 5702 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 5703 VT == MVT::v2f32) { 5704 SelectStoreLane(Node, 3, AArch64::ST3i32); 5705 return; 5706 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 5707 VT == MVT::v1f64) { 5708 SelectStoreLane(Node, 3, AArch64::ST3i64); 5709 return; 5710 } 5711 break; 5712 } 5713 case Intrinsic::aarch64_neon_st4lane: { 5714 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 5715 SelectStoreLane(Node, 4, AArch64::ST4i8); 5716 return; 5717 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 5718 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 5719 SelectStoreLane(Node, 4, AArch64::ST4i16); 5720 return; 5721 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 5722 VT == MVT::v2f32) { 5723 SelectStoreLane(Node, 4, AArch64::ST4i32); 5724 return; 5725 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 5726 VT == MVT::v1f64) { 5727 SelectStoreLane(Node, 4, AArch64::ST4i64); 5728 return; 5729 } 5730 break; 5731 } 5732 case Intrinsic::aarch64_sve_st2: { 5733 if (VT == MVT::nxv16i8) { 5734 SelectPredicatedStore(Node, 2, 0, AArch64::ST2B, AArch64::ST2B_IMM); 5735 return; 5736 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 5737 VT == MVT::nxv8bf16) { 5738 SelectPredicatedStore(Node, 2, 1, AArch64::ST2H, AArch64::ST2H_IMM); 5739 return; 5740 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 5741 SelectPredicatedStore(Node, 2, 2, AArch64::ST2W, AArch64::ST2W_IMM); 5742 return; 5743 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 5744 SelectPredicatedStore(Node, 2, 3, AArch64::ST2D, AArch64::ST2D_IMM); 5745 return; 5746 } 5747 break; 5748 } 5749 case Intrinsic::aarch64_sve_st3: { 5750 if (VT == MVT::nxv16i8) { 5751 SelectPredicatedStore(Node, 3, 0, AArch64::ST3B, AArch64::ST3B_IMM); 5752 return; 5753 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 5754 VT == MVT::nxv8bf16) { 5755 SelectPredicatedStore(Node, 3, 1, AArch64::ST3H, AArch64::ST3H_IMM); 5756 return; 5757 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 5758 SelectPredicatedStore(Node, 3, 2, AArch64::ST3W, AArch64::ST3W_IMM); 5759 return; 5760 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 5761 SelectPredicatedStore(Node, 3, 3, AArch64::ST3D, AArch64::ST3D_IMM); 5762 return; 5763 } 5764 break; 5765 } 5766 case Intrinsic::aarch64_sve_st4: { 5767 if (VT == MVT::nxv16i8) { 5768 SelectPredicatedStore(Node, 4, 0, AArch64::ST4B, AArch64::ST4B_IMM); 5769 return; 5770 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 5771 VT == MVT::nxv8bf16) { 5772 SelectPredicatedStore(Node, 4, 1, AArch64::ST4H, AArch64::ST4H_IMM); 5773 return; 5774 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 5775 SelectPredicatedStore(Node, 4, 2, AArch64::ST4W, AArch64::ST4W_IMM); 5776 return; 5777 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 5778 SelectPredicatedStore(Node, 4, 3, AArch64::ST4D, AArch64::ST4D_IMM); 5779 return; 5780 } 5781 break; 5782 } 5783 } 5784 break; 5785 } 5786 case AArch64ISD::LD2post: { 5787 if (VT == MVT::v8i8) { 5788 SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0); 5789 return; 5790 } else if (VT == MVT::v16i8) { 5791 SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0); 5792 return; 5793 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 5794 SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0); 5795 return; 5796 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 5797 SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0); 5798 return; 5799 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 5800 SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0); 5801 return; 5802 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 5803 SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0); 5804 return; 5805 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 5806 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0); 5807 return; 5808 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 5809 SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0); 5810 return; 5811 } 5812 break; 5813 } 5814 case AArch64ISD::LD3post: { 5815 if (VT == MVT::v8i8) { 5816 SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0); 5817 return; 5818 } else if (VT == MVT::v16i8) { 5819 SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0); 5820 return; 5821 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 5822 SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0); 5823 return; 5824 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 5825 SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0); 5826 return; 5827 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 5828 SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0); 5829 return; 5830 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 5831 SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0); 5832 return; 5833 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 5834 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0); 5835 return; 5836 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 5837 SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0); 5838 return; 5839 } 5840 break; 5841 } 5842 case AArch64ISD::LD4post: { 5843 if (VT == MVT::v8i8) { 5844 SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0); 5845 return; 5846 } else if (VT == MVT::v16i8) { 5847 SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0); 5848 return; 5849 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 5850 SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0); 5851 return; 5852 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 5853 SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0); 5854 return; 5855 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 5856 SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0); 5857 return; 5858 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 5859 SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0); 5860 return; 5861 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 5862 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0); 5863 return; 5864 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 5865 SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0); 5866 return; 5867 } 5868 break; 5869 } 5870 case AArch64ISD::LD1x2post: { 5871 if (VT == MVT::v8i8) { 5872 SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0); 5873 return; 5874 } else if (VT == MVT::v16i8) { 5875 SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0); 5876 return; 5877 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 5878 SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0); 5879 return; 5880 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 5881 SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0); 5882 return; 5883 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 5884 SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0); 5885 return; 5886 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 5887 SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0); 5888 return; 5889 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 5890 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0); 5891 return; 5892 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 5893 SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0); 5894 return; 5895 } 5896 break; 5897 } 5898 case AArch64ISD::LD1x3post: { 5899 if (VT == MVT::v8i8) { 5900 SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0); 5901 return; 5902 } else if (VT == MVT::v16i8) { 5903 SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0); 5904 return; 5905 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 5906 SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0); 5907 return; 5908 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 5909 SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0); 5910 return; 5911 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 5912 SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0); 5913 return; 5914 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 5915 SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0); 5916 return; 5917 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 5918 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0); 5919 return; 5920 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 5921 SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0); 5922 return; 5923 } 5924 break; 5925 } 5926 case AArch64ISD::LD1x4post: { 5927 if (VT == MVT::v8i8) { 5928 SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0); 5929 return; 5930 } else if (VT == MVT::v16i8) { 5931 SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0); 5932 return; 5933 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 5934 SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0); 5935 return; 5936 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 5937 SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0); 5938 return; 5939 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 5940 SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0); 5941 return; 5942 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 5943 SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0); 5944 return; 5945 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 5946 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0); 5947 return; 5948 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 5949 SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0); 5950 return; 5951 } 5952 break; 5953 } 5954 case AArch64ISD::LD1DUPpost: { 5955 if (VT == MVT::v8i8) { 5956 SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0); 5957 return; 5958 } else if (VT == MVT::v16i8) { 5959 SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0); 5960 return; 5961 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 5962 SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0); 5963 return; 5964 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 5965 SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0); 5966 return; 5967 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 5968 SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0); 5969 return; 5970 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 5971 SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0); 5972 return; 5973 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 5974 SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0); 5975 return; 5976 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 5977 SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0); 5978 return; 5979 } 5980 break; 5981 } 5982 case AArch64ISD::LD2DUPpost: { 5983 if (VT == MVT::v8i8) { 5984 SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0); 5985 return; 5986 } else if (VT == MVT::v16i8) { 5987 SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0); 5988 return; 5989 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 5990 SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0); 5991 return; 5992 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 5993 SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0); 5994 return; 5995 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 5996 SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0); 5997 return; 5998 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 5999 SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0); 6000 return; 6001 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 6002 SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0); 6003 return; 6004 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 6005 SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0); 6006 return; 6007 } 6008 break; 6009 } 6010 case AArch64ISD::LD3DUPpost: { 6011 if (VT == MVT::v8i8) { 6012 SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0); 6013 return; 6014 } else if (VT == MVT::v16i8) { 6015 SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0); 6016 return; 6017 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 6018 SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0); 6019 return; 6020 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 6021 SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0); 6022 return; 6023 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 6024 SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0); 6025 return; 6026 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 6027 SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0); 6028 return; 6029 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 6030 SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0); 6031 return; 6032 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 6033 SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0); 6034 return; 6035 } 6036 break; 6037 } 6038 case AArch64ISD::LD4DUPpost: { 6039 if (VT == MVT::v8i8) { 6040 SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0); 6041 return; 6042 } else if (VT == MVT::v16i8) { 6043 SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0); 6044 return; 6045 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 6046 SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0); 6047 return; 6048 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 6049 SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0); 6050 return; 6051 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 6052 SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0); 6053 return; 6054 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 6055 SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0); 6056 return; 6057 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 6058 SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0); 6059 return; 6060 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 6061 SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0); 6062 return; 6063 } 6064 break; 6065 } 6066 case AArch64ISD::LD1LANEpost: { 6067 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 6068 SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST); 6069 return; 6070 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 6071 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 6072 SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST); 6073 return; 6074 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 6075 VT == MVT::v2f32) { 6076 SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST); 6077 return; 6078 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 6079 VT == MVT::v1f64) { 6080 SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST); 6081 return; 6082 } 6083 break; 6084 } 6085 case AArch64ISD::LD2LANEpost: { 6086 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 6087 SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST); 6088 return; 6089 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 6090 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 6091 SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST); 6092 return; 6093 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 6094 VT == MVT::v2f32) { 6095 SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST); 6096 return; 6097 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 6098 VT == MVT::v1f64) { 6099 SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST); 6100 return; 6101 } 6102 break; 6103 } 6104 case AArch64ISD::LD3LANEpost: { 6105 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 6106 SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST); 6107 return; 6108 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 6109 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 6110 SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST); 6111 return; 6112 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 6113 VT == MVT::v2f32) { 6114 SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST); 6115 return; 6116 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 6117 VT == MVT::v1f64) { 6118 SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST); 6119 return; 6120 } 6121 break; 6122 } 6123 case AArch64ISD::LD4LANEpost: { 6124 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 6125 SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST); 6126 return; 6127 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 6128 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 6129 SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST); 6130 return; 6131 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 6132 VT == MVT::v2f32) { 6133 SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST); 6134 return; 6135 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 6136 VT == MVT::v1f64) { 6137 SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST); 6138 return; 6139 } 6140 break; 6141 } 6142 case AArch64ISD::ST2post: { 6143 VT = Node->getOperand(1).getValueType(); 6144 if (VT == MVT::v8i8) { 6145 SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST); 6146 return; 6147 } else if (VT == MVT::v16i8) { 6148 SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST); 6149 return; 6150 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 6151 SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST); 6152 return; 6153 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 6154 SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST); 6155 return; 6156 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 6157 SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST); 6158 return; 6159 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 6160 SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST); 6161 return; 6162 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 6163 SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST); 6164 return; 6165 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 6166 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST); 6167 return; 6168 } 6169 break; 6170 } 6171 case AArch64ISD::ST3post: { 6172 VT = Node->getOperand(1).getValueType(); 6173 if (VT == MVT::v8i8) { 6174 SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST); 6175 return; 6176 } else if (VT == MVT::v16i8) { 6177 SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST); 6178 return; 6179 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 6180 SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST); 6181 return; 6182 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 6183 SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST); 6184 return; 6185 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 6186 SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST); 6187 return; 6188 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 6189 SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST); 6190 return; 6191 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 6192 SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST); 6193 return; 6194 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 6195 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST); 6196 return; 6197 } 6198 break; 6199 } 6200 case AArch64ISD::ST4post: { 6201 VT = Node->getOperand(1).getValueType(); 6202 if (VT == MVT::v8i8) { 6203 SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST); 6204 return; 6205 } else if (VT == MVT::v16i8) { 6206 SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST); 6207 return; 6208 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 6209 SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST); 6210 return; 6211 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 6212 SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST); 6213 return; 6214 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 6215 SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST); 6216 return; 6217 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 6218 SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST); 6219 return; 6220 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 6221 SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST); 6222 return; 6223 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 6224 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST); 6225 return; 6226 } 6227 break; 6228 } 6229 case AArch64ISD::ST1x2post: { 6230 VT = Node->getOperand(1).getValueType(); 6231 if (VT == MVT::v8i8) { 6232 SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST); 6233 return; 6234 } else if (VT == MVT::v16i8) { 6235 SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST); 6236 return; 6237 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 6238 SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST); 6239 return; 6240 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 6241 SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST); 6242 return; 6243 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 6244 SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST); 6245 return; 6246 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 6247 SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST); 6248 return; 6249 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 6250 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST); 6251 return; 6252 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 6253 SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST); 6254 return; 6255 } 6256 break; 6257 } 6258 case AArch64ISD::ST1x3post: { 6259 VT = Node->getOperand(1).getValueType(); 6260 if (VT == MVT::v8i8) { 6261 SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST); 6262 return; 6263 } else if (VT == MVT::v16i8) { 6264 SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST); 6265 return; 6266 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 6267 SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST); 6268 return; 6269 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16 ) { 6270 SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST); 6271 return; 6272 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 6273 SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST); 6274 return; 6275 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 6276 SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST); 6277 return; 6278 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 6279 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST); 6280 return; 6281 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 6282 SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST); 6283 return; 6284 } 6285 break; 6286 } 6287 case AArch64ISD::ST1x4post: { 6288 VT = Node->getOperand(1).getValueType(); 6289 if (VT == MVT::v8i8) { 6290 SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST); 6291 return; 6292 } else if (VT == MVT::v16i8) { 6293 SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST); 6294 return; 6295 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 6296 SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST); 6297 return; 6298 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 6299 SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST); 6300 return; 6301 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 6302 SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST); 6303 return; 6304 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 6305 SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST); 6306 return; 6307 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 6308 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST); 6309 return; 6310 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 6311 SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST); 6312 return; 6313 } 6314 break; 6315 } 6316 case AArch64ISD::ST2LANEpost: { 6317 VT = Node->getOperand(1).getValueType(); 6318 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 6319 SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST); 6320 return; 6321 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 6322 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 6323 SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST); 6324 return; 6325 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 6326 VT == MVT::v2f32) { 6327 SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST); 6328 return; 6329 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 6330 VT == MVT::v1f64) { 6331 SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST); 6332 return; 6333 } 6334 break; 6335 } 6336 case AArch64ISD::ST3LANEpost: { 6337 VT = Node->getOperand(1).getValueType(); 6338 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 6339 SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST); 6340 return; 6341 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 6342 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 6343 SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST); 6344 return; 6345 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 6346 VT == MVT::v2f32) { 6347 SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST); 6348 return; 6349 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 6350 VT == MVT::v1f64) { 6351 SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST); 6352 return; 6353 } 6354 break; 6355 } 6356 case AArch64ISD::ST4LANEpost: { 6357 VT = Node->getOperand(1).getValueType(); 6358 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 6359 SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST); 6360 return; 6361 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 6362 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 6363 SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST); 6364 return; 6365 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 6366 VT == MVT::v2f32) { 6367 SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST); 6368 return; 6369 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 6370 VT == MVT::v1f64) { 6371 SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST); 6372 return; 6373 } 6374 break; 6375 } 6376 case AArch64ISD::SVE_LD2_MERGE_ZERO: { 6377 if (VT == MVT::nxv16i8) { 6378 SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B); 6379 return; 6380 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 6381 VT == MVT::nxv8bf16) { 6382 SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H); 6383 return; 6384 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 6385 SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W); 6386 return; 6387 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 6388 SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D); 6389 return; 6390 } 6391 break; 6392 } 6393 case AArch64ISD::SVE_LD3_MERGE_ZERO: { 6394 if (VT == MVT::nxv16i8) { 6395 SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B); 6396 return; 6397 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 6398 VT == MVT::nxv8bf16) { 6399 SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H); 6400 return; 6401 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 6402 SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W); 6403 return; 6404 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 6405 SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D); 6406 return; 6407 } 6408 break; 6409 } 6410 case AArch64ISD::SVE_LD4_MERGE_ZERO: { 6411 if (VT == MVT::nxv16i8) { 6412 SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B); 6413 return; 6414 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 6415 VT == MVT::nxv8bf16) { 6416 SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H); 6417 return; 6418 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 6419 SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W); 6420 return; 6421 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 6422 SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D); 6423 return; 6424 } 6425 break; 6426 } 6427 } 6428 6429 // Select the default instruction 6430 SelectCode(Node); 6431 } 6432 6433 /// createAArch64ISelDag - This pass converts a legalized DAG into a 6434 /// AArch64-specific DAG, ready for instruction scheduling. 6435 FunctionPass *llvm::createAArch64ISelDag(AArch64TargetMachine &TM, 6436 CodeGenOpt::Level OptLevel) { 6437 return new AArch64DAGToDAGISel(TM, OptLevel); 6438 } 6439 6440 /// When \p PredVT is a scalable vector predicate in the form 6441 /// MVT::nx<M>xi1, it builds the correspondent scalable vector of 6442 /// integers MVT::nx<M>xi<bits> s.t. M x bits = 128. When targeting 6443 /// structured vectors (NumVec >1), the output data type is 6444 /// MVT::nx<M*NumVec>xi<bits> s.t. M x bits = 128. If the input 6445 /// PredVT is not in the form MVT::nx<M>xi1, it returns an invalid 6446 /// EVT. 6447 static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT, 6448 unsigned NumVec) { 6449 assert(NumVec > 0 && NumVec < 5 && "Invalid number of vectors."); 6450 if (!PredVT.isScalableVector() || PredVT.getVectorElementType() != MVT::i1) 6451 return EVT(); 6452 6453 if (PredVT != MVT::nxv16i1 && PredVT != MVT::nxv8i1 && 6454 PredVT != MVT::nxv4i1 && PredVT != MVT::nxv2i1) 6455 return EVT(); 6456 6457 ElementCount EC = PredVT.getVectorElementCount(); 6458 EVT ScalarVT = 6459 EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.getKnownMinValue()); 6460 EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, EC * NumVec); 6461 6462 return MemVT; 6463 } 6464 6465 /// Return the EVT of the data associated to a memory operation in \p 6466 /// Root. If such EVT cannot be retrived, it returns an invalid EVT. 6467 static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root) { 6468 if (isa<MemSDNode>(Root)) 6469 return cast<MemSDNode>(Root)->getMemoryVT(); 6470 6471 if (isa<MemIntrinsicSDNode>(Root)) 6472 return cast<MemIntrinsicSDNode>(Root)->getMemoryVT(); 6473 6474 const unsigned Opcode = Root->getOpcode(); 6475 // For custom ISD nodes, we have to look at them individually to extract the 6476 // type of the data moved to/from memory. 6477 switch (Opcode) { 6478 case AArch64ISD::LD1_MERGE_ZERO: 6479 case AArch64ISD::LD1S_MERGE_ZERO: 6480 case AArch64ISD::LDNF1_MERGE_ZERO: 6481 case AArch64ISD::LDNF1S_MERGE_ZERO: 6482 return cast<VTSDNode>(Root->getOperand(3))->getVT(); 6483 case AArch64ISD::ST1_PRED: 6484 return cast<VTSDNode>(Root->getOperand(4))->getVT(); 6485 case AArch64ISD::SVE_LD2_MERGE_ZERO: 6486 return getPackedVectorTypeFromPredicateType( 6487 Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/2); 6488 case AArch64ISD::SVE_LD3_MERGE_ZERO: 6489 return getPackedVectorTypeFromPredicateType( 6490 Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/3); 6491 case AArch64ISD::SVE_LD4_MERGE_ZERO: 6492 return getPackedVectorTypeFromPredicateType( 6493 Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/4); 6494 default: 6495 break; 6496 } 6497 6498 if (Opcode != ISD::INTRINSIC_VOID && Opcode != ISD::INTRINSIC_W_CHAIN) 6499 return EVT(); 6500 6501 switch (cast<ConstantSDNode>(Root->getOperand(1))->getZExtValue()) { 6502 default: 6503 return EVT(); 6504 case Intrinsic::aarch64_sme_ldr: 6505 case Intrinsic::aarch64_sme_str: 6506 return MVT::nxv16i8; 6507 case Intrinsic::aarch64_sve_prf: 6508 // We are using an SVE prefetch intrinsic. Type must be inferred from the 6509 // width of the predicate. 6510 return getPackedVectorTypeFromPredicateType( 6511 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/1); 6512 case Intrinsic::aarch64_sve_ld2_sret: 6513 return getPackedVectorTypeFromPredicateType( 6514 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/2); 6515 case Intrinsic::aarch64_sve_ld3_sret: 6516 return getPackedVectorTypeFromPredicateType( 6517 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/3); 6518 case Intrinsic::aarch64_sve_ld4_sret: 6519 return getPackedVectorTypeFromPredicateType( 6520 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/4); 6521 } 6522 } 6523 6524 /// SelectAddrModeIndexedSVE - Attempt selection of the addressing mode: 6525 /// Base + OffImm * sizeof(MemVT) for Min >= OffImm <= Max 6526 /// where Root is the memory access using N for its address. 6527 template <int64_t Min, int64_t Max> 6528 bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, 6529 SDValue &Base, 6530 SDValue &OffImm) { 6531 const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root); 6532 const DataLayout &DL = CurDAG->getDataLayout(); 6533 const MachineFrameInfo &MFI = MF->getFrameInfo(); 6534 6535 if (N.getOpcode() == ISD::FrameIndex) { 6536 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 6537 // We can only encode VL scaled offsets, so only fold in frame indexes 6538 // referencing SVE objects. 6539 if (MFI.getStackID(FI) == TargetStackID::ScalableVector) { 6540 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 6541 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64); 6542 return true; 6543 } 6544 6545 return false; 6546 } 6547 6548 if (MemVT == EVT()) 6549 return false; 6550 6551 if (N.getOpcode() != ISD::ADD) 6552 return false; 6553 6554 SDValue VScale = N.getOperand(1); 6555 if (VScale.getOpcode() != ISD::VSCALE) 6556 return false; 6557 6558 TypeSize TS = MemVT.getSizeInBits(); 6559 int64_t MemWidthBytes = static_cast<int64_t>(TS.getKnownMinValue()) / 8; 6560 int64_t MulImm = cast<ConstantSDNode>(VScale.getOperand(0))->getSExtValue(); 6561 6562 if ((MulImm % MemWidthBytes) != 0) 6563 return false; 6564 6565 int64_t Offset = MulImm / MemWidthBytes; 6566 if (Offset < Min || Offset > Max) 6567 return false; 6568 6569 Base = N.getOperand(0); 6570 if (Base.getOpcode() == ISD::FrameIndex) { 6571 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 6572 // We can only encode VL scaled offsets, so only fold in frame indexes 6573 // referencing SVE objects. 6574 if (MFI.getStackID(FI) == TargetStackID::ScalableVector) 6575 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 6576 } 6577 6578 OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64); 6579 return true; 6580 } 6581 6582 /// Select register plus register addressing mode for SVE, with scaled 6583 /// offset. 6584 bool AArch64DAGToDAGISel::SelectSVERegRegAddrMode(SDValue N, unsigned Scale, 6585 SDValue &Base, 6586 SDValue &Offset) { 6587 if (N.getOpcode() != ISD::ADD) 6588 return false; 6589 6590 // Process an ADD node. 6591 const SDValue LHS = N.getOperand(0); 6592 const SDValue RHS = N.getOperand(1); 6593 6594 // 8 bit data does not come with the SHL node, so it is treated 6595 // separately. 6596 if (Scale == 0) { 6597 Base = LHS; 6598 Offset = RHS; 6599 return true; 6600 } 6601 6602 if (auto C = dyn_cast<ConstantSDNode>(RHS)) { 6603 int64_t ImmOff = C->getSExtValue(); 6604 unsigned Size = 1 << Scale; 6605 6606 // To use the reg+reg addressing mode, the immediate must be a multiple of 6607 // the vector element's byte size. 6608 if (ImmOff % Size) 6609 return false; 6610 6611 SDLoc DL(N); 6612 Base = LHS; 6613 Offset = CurDAG->getTargetConstant(ImmOff >> Scale, DL, MVT::i64); 6614 SDValue Ops[] = {Offset}; 6615 SDNode *MI = CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops); 6616 Offset = SDValue(MI, 0); 6617 return true; 6618 } 6619 6620 // Check if the RHS is a shift node with a constant. 6621 if (RHS.getOpcode() != ISD::SHL) 6622 return false; 6623 6624 const SDValue ShiftRHS = RHS.getOperand(1); 6625 if (auto *C = dyn_cast<ConstantSDNode>(ShiftRHS)) 6626 if (C->getZExtValue() == Scale) { 6627 Base = LHS; 6628 Offset = RHS.getOperand(0); 6629 return true; 6630 } 6631 6632 return false; 6633 } 6634 6635 bool AArch64DAGToDAGISel::SelectAllActivePredicate(SDValue N) { 6636 const AArch64TargetLowering *TLI = 6637 static_cast<const AArch64TargetLowering *>(getTargetLowering()); 6638 6639 return TLI->isAllActivePredicate(*CurDAG, N); 6640 } 6641 6642 bool AArch64DAGToDAGISel::SelectAnyPredicate(SDValue N) { 6643 EVT VT = N.getValueType(); 6644 return VT.isScalableVector() && VT.getVectorElementType() == MVT::i1; 6645 } 6646 6647 bool AArch64DAGToDAGISel::SelectSMETileSlice(SDValue N, unsigned MaxSize, 6648 SDValue &Base, SDValue &Offset, 6649 unsigned Scale) { 6650 // Try to untangle an ADD node into a 'reg + offset' 6651 if (N.getOpcode() == ISD::ADD) 6652 if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 6653 int64_t ImmOff = C->getSExtValue(); 6654 if ((ImmOff > 0 && ImmOff <= MaxSize && (ImmOff % Scale == 0))) { 6655 Base = N.getOperand(0); 6656 Offset = CurDAG->getTargetConstant(ImmOff / Scale, SDLoc(N), MVT::i64); 6657 return true; 6658 } 6659 } 6660 6661 // By default, just match reg + 0. 6662 Base = N; 6663 Offset = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64); 6664 return true; 6665 } 6666