1 //===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines an instruction selector for the AArch64 target. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AArch64MachineFunctionInfo.h" 14 #include "AArch64TargetMachine.h" 15 #include "MCTargetDesc/AArch64AddressingModes.h" 16 #include "llvm/ADT/APSInt.h" 17 #include "llvm/CodeGen/ISDOpcodes.h" 18 #include "llvm/CodeGen/SelectionDAGISel.h" 19 #include "llvm/IR/Function.h" // To access function attributes. 20 #include "llvm/IR/GlobalValue.h" 21 #include "llvm/IR/Intrinsics.h" 22 #include "llvm/IR/IntrinsicsAArch64.h" 23 #include "llvm/Support/Debug.h" 24 #include "llvm/Support/ErrorHandling.h" 25 #include "llvm/Support/KnownBits.h" 26 #include "llvm/Support/MathExtras.h" 27 #include "llvm/Support/raw_ostream.h" 28 29 using namespace llvm; 30 31 #define DEBUG_TYPE "aarch64-isel" 32 #define PASS_NAME "AArch64 Instruction Selection" 33 34 //===--------------------------------------------------------------------===// 35 /// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine 36 /// instructions for SelectionDAG operations. 37 /// 38 namespace { 39 40 class AArch64DAGToDAGISel : public SelectionDAGISel { 41 42 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can 43 /// make the right decision when generating code for different targets. 44 const AArch64Subtarget *Subtarget; 45 46 public: 47 static char ID; 48 49 AArch64DAGToDAGISel() = delete; 50 51 explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm, 52 CodeGenOpt::Level OptLevel) 53 : SelectionDAGISel(ID, tm, OptLevel), Subtarget(nullptr) {} 54 55 bool runOnMachineFunction(MachineFunction &MF) override { 56 Subtarget = &MF.getSubtarget<AArch64Subtarget>(); 57 return SelectionDAGISel::runOnMachineFunction(MF); 58 } 59 60 void Select(SDNode *Node) override; 61 62 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 63 /// inline asm expressions. 64 bool SelectInlineAsmMemoryOperand(const SDValue &Op, 65 unsigned ConstraintID, 66 std::vector<SDValue> &OutOps) override; 67 68 template <signed Low, signed High, signed Scale> 69 bool SelectRDVLImm(SDValue N, SDValue &Imm); 70 71 bool tryMLAV64LaneV128(SDNode *N); 72 bool tryMULLV64LaneV128(unsigned IntNo, SDNode *N); 73 bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift); 74 bool SelectArithUXTXRegister(SDValue N, SDValue &Reg, SDValue &Shift); 75 bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift); 76 bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift); 77 bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) { 78 return SelectShiftedRegister(N, false, Reg, Shift); 79 } 80 bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) { 81 return SelectShiftedRegister(N, true, Reg, Shift); 82 } 83 bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) { 84 return SelectAddrModeIndexed7S(N, 1, Base, OffImm); 85 } 86 bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) { 87 return SelectAddrModeIndexed7S(N, 2, Base, OffImm); 88 } 89 bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) { 90 return SelectAddrModeIndexed7S(N, 4, Base, OffImm); 91 } 92 bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) { 93 return SelectAddrModeIndexed7S(N, 8, Base, OffImm); 94 } 95 bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) { 96 return SelectAddrModeIndexed7S(N, 16, Base, OffImm); 97 } 98 bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) { 99 return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm); 100 } 101 bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) { 102 return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm); 103 } 104 bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) { 105 return SelectAddrModeIndexed(N, 1, Base, OffImm); 106 } 107 bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) { 108 return SelectAddrModeIndexed(N, 2, Base, OffImm); 109 } 110 bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) { 111 return SelectAddrModeIndexed(N, 4, Base, OffImm); 112 } 113 bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) { 114 return SelectAddrModeIndexed(N, 8, Base, OffImm); 115 } 116 bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) { 117 return SelectAddrModeIndexed(N, 16, Base, OffImm); 118 } 119 bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) { 120 return SelectAddrModeUnscaled(N, 1, Base, OffImm); 121 } 122 bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) { 123 return SelectAddrModeUnscaled(N, 2, Base, OffImm); 124 } 125 bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) { 126 return SelectAddrModeUnscaled(N, 4, Base, OffImm); 127 } 128 bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) { 129 return SelectAddrModeUnscaled(N, 8, Base, OffImm); 130 } 131 bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) { 132 return SelectAddrModeUnscaled(N, 16, Base, OffImm); 133 } 134 template <unsigned Size, unsigned Max> 135 bool SelectAddrModeIndexedUImm(SDValue N, SDValue &Base, SDValue &OffImm) { 136 // Test if there is an appropriate addressing mode and check if the 137 // immediate fits. 138 bool Found = SelectAddrModeIndexed(N, Size, Base, OffImm); 139 if (Found) { 140 if (auto *CI = dyn_cast<ConstantSDNode>(OffImm)) { 141 int64_t C = CI->getSExtValue(); 142 if (C <= Max) 143 return true; 144 } 145 } 146 147 // Otherwise, base only, materialize address in register. 148 Base = N; 149 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64); 150 return true; 151 } 152 153 template<int Width> 154 bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset, 155 SDValue &SignExtend, SDValue &DoShift) { 156 return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift); 157 } 158 159 template<int Width> 160 bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset, 161 SDValue &SignExtend, SDValue &DoShift) { 162 return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift); 163 } 164 165 bool SelectExtractHigh(SDValue N, SDValue &Res) { 166 if (Subtarget->isLittleEndian() && N->getOpcode() == ISD::BITCAST) 167 N = N->getOperand(0); 168 if (N->getOpcode() != ISD::EXTRACT_SUBVECTOR || 169 !isa<ConstantSDNode>(N->getOperand(1))) 170 return false; 171 EVT VT = N->getValueType(0); 172 EVT LVT = N->getOperand(0).getValueType(); 173 unsigned Index = N->getConstantOperandVal(1); 174 if (!VT.is64BitVector() || !LVT.is128BitVector() || 175 Index != VT.getVectorNumElements()) 176 return false; 177 Res = N->getOperand(0); 178 return true; 179 } 180 181 bool SelectRoundingVLShr(SDValue N, SDValue &Res1, SDValue &Res2) { 182 if (N.getOpcode() != AArch64ISD::VLSHR) 183 return false; 184 SDValue Op = N->getOperand(0); 185 EVT VT = Op.getValueType(); 186 unsigned ShtAmt = N->getConstantOperandVal(1); 187 if (ShtAmt > VT.getScalarSizeInBits() / 2 || Op.getOpcode() != ISD::ADD) 188 return false; 189 190 APInt Imm; 191 if (Op.getOperand(1).getOpcode() == AArch64ISD::MOVIshift) 192 Imm = APInt(VT.getScalarSizeInBits(), 193 Op.getOperand(1).getConstantOperandVal(0) 194 << Op.getOperand(1).getConstantOperandVal(1)); 195 else if (Op.getOperand(1).getOpcode() == AArch64ISD::DUP && 196 isa<ConstantSDNode>(Op.getOperand(1).getOperand(0))) 197 Imm = APInt(VT.getScalarSizeInBits(), 198 Op.getOperand(1).getConstantOperandVal(0)); 199 else 200 return false; 201 202 if (Imm != 1ULL << (ShtAmt - 1)) 203 return false; 204 205 Res1 = Op.getOperand(0); 206 Res2 = CurDAG->getTargetConstant(ShtAmt, SDLoc(N), MVT::i32); 207 return true; 208 } 209 210 bool SelectDupZeroOrUndef(SDValue N) { 211 switch(N->getOpcode()) { 212 case ISD::UNDEF: 213 return true; 214 case AArch64ISD::DUP: 215 case ISD::SPLAT_VECTOR: { 216 auto Opnd0 = N->getOperand(0); 217 if (isNullConstant(Opnd0)) 218 return true; 219 if (isNullFPConstant(Opnd0)) 220 return true; 221 break; 222 } 223 default: 224 break; 225 } 226 227 return false; 228 } 229 230 bool SelectDupZero(SDValue N) { 231 switch(N->getOpcode()) { 232 case AArch64ISD::DUP: 233 case ISD::SPLAT_VECTOR: { 234 auto Opnd0 = N->getOperand(0); 235 if (isNullConstant(Opnd0)) 236 return true; 237 if (isNullFPConstant(Opnd0)) 238 return true; 239 break; 240 } 241 } 242 243 return false; 244 } 245 246 template<MVT::SimpleValueType VT> 247 bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) { 248 return SelectSVEAddSubImm(N, VT, Imm, Shift); 249 } 250 251 template <MVT::SimpleValueType VT> 252 bool SelectSVECpyDupImm(SDValue N, SDValue &Imm, SDValue &Shift) { 253 return SelectSVECpyDupImm(N, VT, Imm, Shift); 254 } 255 256 template <MVT::SimpleValueType VT, bool Invert = false> 257 bool SelectSVELogicalImm(SDValue N, SDValue &Imm) { 258 return SelectSVELogicalImm(N, VT, Imm, Invert); 259 } 260 261 template <MVT::SimpleValueType VT> 262 bool SelectSVEArithImm(SDValue N, SDValue &Imm) { 263 return SelectSVEArithImm(N, VT, Imm); 264 } 265 266 template <unsigned Low, unsigned High, bool AllowSaturation = false> 267 bool SelectSVEShiftImm(SDValue N, SDValue &Imm) { 268 return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm); 269 } 270 271 bool SelectSVEShiftSplatImmR(SDValue N, SDValue &Imm) { 272 if (N->getOpcode() != ISD::SPLAT_VECTOR) 273 return false; 274 275 EVT EltVT = N->getValueType(0).getVectorElementType(); 276 return SelectSVEShiftImm(N->getOperand(0), /* Low */ 1, 277 /* High */ EltVT.getFixedSizeInBits(), 278 /* AllowSaturation */ true, Imm); 279 } 280 281 // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N. 282 template<signed Min, signed Max, signed Scale, bool Shift> 283 bool SelectCntImm(SDValue N, SDValue &Imm) { 284 if (!isa<ConstantSDNode>(N)) 285 return false; 286 287 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue(); 288 if (Shift) 289 MulImm = 1LL << MulImm; 290 291 if ((MulImm % std::abs(Scale)) != 0) 292 return false; 293 294 MulImm /= Scale; 295 if ((MulImm >= Min) && (MulImm <= Max)) { 296 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32); 297 return true; 298 } 299 300 return false; 301 } 302 303 template <signed Max, signed Scale> 304 bool SelectEXTImm(SDValue N, SDValue &Imm) { 305 if (!isa<ConstantSDNode>(N)) 306 return false; 307 308 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue(); 309 310 if (MulImm >= 0 && MulImm <= Max) { 311 MulImm *= Scale; 312 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32); 313 return true; 314 } 315 316 return false; 317 } 318 319 template <unsigned BaseReg> bool ImmToTile(SDValue N, SDValue &Imm) { 320 if (auto *CI = dyn_cast<ConstantSDNode>(N)) { 321 uint64_t C = CI->getZExtValue(); 322 Imm = CurDAG->getRegister(BaseReg + C, MVT::Other); 323 return true; 324 } 325 return false; 326 } 327 328 /// Form sequences of consecutive 64/128-bit registers for use in NEON 329 /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have 330 /// between 1 and 4 elements. If it contains a single element that is returned 331 /// unchanged; otherwise a REG_SEQUENCE value is returned. 332 SDValue createDTuple(ArrayRef<SDValue> Vecs); 333 SDValue createQTuple(ArrayRef<SDValue> Vecs); 334 // Form a sequence of SVE registers for instructions using list of vectors, 335 // e.g. structured loads and stores (ldN, stN). 336 SDValue createZTuple(ArrayRef<SDValue> Vecs); 337 338 /// Generic helper for the createDTuple/createQTuple 339 /// functions. Those should almost always be called instead. 340 SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[], 341 const unsigned SubRegs[]); 342 343 void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt); 344 345 bool tryIndexedLoad(SDNode *N); 346 347 bool trySelectStackSlotTagP(SDNode *N); 348 void SelectTagP(SDNode *N); 349 350 void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc, 351 unsigned SubRegIdx); 352 void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc, 353 unsigned SubRegIdx); 354 void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc); 355 void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc); 356 void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale, 357 unsigned Opc_rr, unsigned Opc_ri, 358 bool IsIntr = false); 359 void SelectWhilePair(SDNode *N, unsigned Opc); 360 void SelectCVTIntrinsic(SDNode *N, unsigned NumVecs, unsigned Opcode); 361 362 bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm); 363 /// SVE Reg+Imm addressing mode. 364 template <int64_t Min, int64_t Max> 365 bool SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base, 366 SDValue &OffImm); 367 /// SVE Reg+Reg address mode. 368 template <unsigned Scale> 369 bool SelectSVERegRegAddrMode(SDValue N, SDValue &Base, SDValue &Offset) { 370 return SelectSVERegRegAddrMode(N, Scale, Base, Offset); 371 } 372 373 template <unsigned MaxIdx, unsigned Scale> 374 bool SelectSMETileSlice(SDValue N, SDValue &Vector, SDValue &Offset) { 375 return SelectSMETileSlice(N, MaxIdx, Vector, Offset, Scale); 376 } 377 378 void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc); 379 void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc); 380 void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc); 381 void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc); 382 void SelectPredicatedStore(SDNode *N, unsigned NumVecs, unsigned Scale, 383 unsigned Opc_rr, unsigned Opc_ri); 384 std::tuple<unsigned, SDValue, SDValue> 385 findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri, 386 const SDValue &OldBase, const SDValue &OldOffset, 387 unsigned Scale); 388 389 bool tryBitfieldExtractOp(SDNode *N); 390 bool tryBitfieldExtractOpFromSExt(SDNode *N); 391 bool tryBitfieldInsertOp(SDNode *N); 392 bool tryBitfieldInsertInZeroOp(SDNode *N); 393 bool tryShiftAmountMod(SDNode *N); 394 bool tryHighFPExt(SDNode *N); 395 396 bool tryReadRegister(SDNode *N); 397 bool tryWriteRegister(SDNode *N); 398 399 // Include the pieces autogenerated from the target description. 400 #include "AArch64GenDAGISel.inc" 401 402 private: 403 bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg, 404 SDValue &Shift); 405 bool SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg, SDValue &Shift); 406 bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base, 407 SDValue &OffImm) { 408 return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm); 409 } 410 bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW, 411 unsigned Size, SDValue &Base, 412 SDValue &OffImm); 413 bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base, 414 SDValue &OffImm); 415 bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base, 416 SDValue &OffImm); 417 bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base, 418 SDValue &Offset, SDValue &SignExtend, 419 SDValue &DoShift); 420 bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base, 421 SDValue &Offset, SDValue &SignExtend, 422 SDValue &DoShift); 423 bool isWorthFolding(SDValue V) const; 424 bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend, 425 SDValue &Offset, SDValue &SignExtend); 426 427 template<unsigned RegWidth> 428 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) { 429 return SelectCVTFixedPosOperand(N, FixedPos, RegWidth); 430 } 431 432 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width); 433 434 bool SelectCMP_SWAP(SDNode *N); 435 436 bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift); 437 bool SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift); 438 bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert); 439 440 bool SelectSVESignedArithImm(SDValue N, SDValue &Imm); 441 bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High, 442 bool AllowSaturation, SDValue &Imm); 443 444 bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm); 445 bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base, 446 SDValue &Offset); 447 bool SelectSMETileSlice(SDValue N, unsigned MaxSize, SDValue &Vector, 448 SDValue &Offset, unsigned Scale = 1); 449 450 bool SelectAllActivePredicate(SDValue N); 451 }; 452 } // end anonymous namespace 453 454 char AArch64DAGToDAGISel::ID = 0; 455 456 INITIALIZE_PASS(AArch64DAGToDAGISel, DEBUG_TYPE, PASS_NAME, false, false) 457 458 /// isIntImmediate - This method tests to see if the node is a constant 459 /// operand. If so Imm will receive the 32-bit value. 460 static bool isIntImmediate(const SDNode *N, uint64_t &Imm) { 461 if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) { 462 Imm = C->getZExtValue(); 463 return true; 464 } 465 return false; 466 } 467 468 // isIntImmediate - This method tests to see if a constant operand. 469 // If so Imm will receive the value. 470 static bool isIntImmediate(SDValue N, uint64_t &Imm) { 471 return isIntImmediate(N.getNode(), Imm); 472 } 473 474 // isOpcWithIntImmediate - This method tests to see if the node is a specific 475 // opcode and that it has a immediate integer right operand. 476 // If so Imm will receive the 32 bit value. 477 static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, 478 uint64_t &Imm) { 479 return N->getOpcode() == Opc && 480 isIntImmediate(N->getOperand(1).getNode(), Imm); 481 } 482 483 // isIntImmediateEq - This method tests to see if N is a constant operand that 484 // is equivalent to 'ImmExpected'. 485 #ifndef NDEBUG 486 static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected) { 487 uint64_t Imm; 488 if (!isIntImmediate(N.getNode(), Imm)) 489 return false; 490 return Imm == ImmExpected; 491 } 492 #endif 493 494 bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand( 495 const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) { 496 switch(ConstraintID) { 497 default: 498 llvm_unreachable("Unexpected asm memory constraint"); 499 case InlineAsm::Constraint_m: 500 case InlineAsm::Constraint_o: 501 case InlineAsm::Constraint_Q: 502 // We need to make sure that this one operand does not end up in XZR, thus 503 // require the address to be in a PointerRegClass register. 504 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo(); 505 const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF); 506 SDLoc dl(Op); 507 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64); 508 SDValue NewOp = 509 SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, 510 dl, Op.getValueType(), 511 Op, RC), 0); 512 OutOps.push_back(NewOp); 513 return false; 514 } 515 return true; 516 } 517 518 /// SelectArithImmed - Select an immediate value that can be represented as 519 /// a 12-bit value shifted left by either 0 or 12. If so, return true with 520 /// Val set to the 12-bit value and Shift set to the shifter operand. 521 bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val, 522 SDValue &Shift) { 523 // This function is called from the addsub_shifted_imm ComplexPattern, 524 // which lists [imm] as the list of opcode it's interested in, however 525 // we still need to check whether the operand is actually an immediate 526 // here because the ComplexPattern opcode list is only used in 527 // root-level opcode matching. 528 if (!isa<ConstantSDNode>(N.getNode())) 529 return false; 530 531 uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue(); 532 unsigned ShiftAmt; 533 534 if (Immed >> 12 == 0) { 535 ShiftAmt = 0; 536 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) { 537 ShiftAmt = 12; 538 Immed = Immed >> 12; 539 } else 540 return false; 541 542 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt); 543 SDLoc dl(N); 544 Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32); 545 Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32); 546 return true; 547 } 548 549 /// SelectNegArithImmed - As above, but negates the value before trying to 550 /// select it. 551 bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val, 552 SDValue &Shift) { 553 // This function is called from the addsub_shifted_imm ComplexPattern, 554 // which lists [imm] as the list of opcode it's interested in, however 555 // we still need to check whether the operand is actually an immediate 556 // here because the ComplexPattern opcode list is only used in 557 // root-level opcode matching. 558 if (!isa<ConstantSDNode>(N.getNode())) 559 return false; 560 561 // The immediate operand must be a 24-bit zero-extended immediate. 562 uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue(); 563 564 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0" 565 // have the opposite effect on the C flag, so this pattern mustn't match under 566 // those circumstances. 567 if (Immed == 0) 568 return false; 569 570 if (N.getValueType() == MVT::i32) 571 Immed = ~((uint32_t)Immed) + 1; 572 else 573 Immed = ~Immed + 1ULL; 574 if (Immed & 0xFFFFFFFFFF000000ULL) 575 return false; 576 577 Immed &= 0xFFFFFFULL; 578 return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val, 579 Shift); 580 } 581 582 /// getShiftTypeForNode - Translate a shift node to the corresponding 583 /// ShiftType value. 584 static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) { 585 switch (N.getOpcode()) { 586 default: 587 return AArch64_AM::InvalidShiftExtend; 588 case ISD::SHL: 589 return AArch64_AM::LSL; 590 case ISD::SRL: 591 return AArch64_AM::LSR; 592 case ISD::SRA: 593 return AArch64_AM::ASR; 594 case ISD::ROTR: 595 return AArch64_AM::ROR; 596 } 597 } 598 599 /// Determine whether it is worth it to fold SHL into the addressing 600 /// mode. 601 static bool isWorthFoldingSHL(SDValue V) { 602 assert(V.getOpcode() == ISD::SHL && "invalid opcode"); 603 // It is worth folding logical shift of up to three places. 604 auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1)); 605 if (!CSD) 606 return false; 607 unsigned ShiftVal = CSD->getZExtValue(); 608 if (ShiftVal > 3) 609 return false; 610 611 // Check if this particular node is reused in any non-memory related 612 // operation. If yes, do not try to fold this node into the address 613 // computation, since the computation will be kept. 614 const SDNode *Node = V.getNode(); 615 for (SDNode *UI : Node->uses()) 616 if (!isa<MemSDNode>(*UI)) 617 for (SDNode *UII : UI->uses()) 618 if (!isa<MemSDNode>(*UII)) 619 return false; 620 return true; 621 } 622 623 /// Determine whether it is worth to fold V into an extended register. 624 bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const { 625 // Trivial if we are optimizing for code size or if there is only 626 // one use of the value. 627 if (CurDAG->shouldOptForSize() || V.hasOneUse()) 628 return true; 629 // If a subtarget has a fastpath LSL we can fold a logical shift into 630 // the addressing mode and save a cycle. 631 if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::SHL && 632 isWorthFoldingSHL(V)) 633 return true; 634 if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::ADD) { 635 const SDValue LHS = V.getOperand(0); 636 const SDValue RHS = V.getOperand(1); 637 if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS)) 638 return true; 639 if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS)) 640 return true; 641 } 642 643 // It hurts otherwise, since the value will be reused. 644 return false; 645 } 646 647 /// and (shl/srl/sra, x, c), mask --> shl (srl/sra, x, c1), c2 648 /// to select more shifted register 649 bool AArch64DAGToDAGISel::SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg, 650 SDValue &Shift) { 651 EVT VT = N.getValueType(); 652 if (VT != MVT::i32 && VT != MVT::i64) 653 return false; 654 655 if (N->getOpcode() != ISD::AND || !N->hasOneUse()) 656 return false; 657 SDValue LHS = N.getOperand(0); 658 if (!LHS->hasOneUse()) 659 return false; 660 661 unsigned LHSOpcode = LHS->getOpcode(); 662 if (LHSOpcode != ISD::SHL && LHSOpcode != ISD::SRL && LHSOpcode != ISD::SRA) 663 return false; 664 665 ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(LHS.getOperand(1)); 666 if (!ShiftAmtNode) 667 return false; 668 669 uint64_t ShiftAmtC = ShiftAmtNode->getZExtValue(); 670 ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N.getOperand(1)); 671 if (!RHSC) 672 return false; 673 674 APInt AndMask = RHSC->getAPIntValue(); 675 unsigned LowZBits, MaskLen; 676 if (!AndMask.isShiftedMask(LowZBits, MaskLen)) 677 return false; 678 679 unsigned BitWidth = N.getValueSizeInBits(); 680 SDLoc DL(LHS); 681 uint64_t NewShiftC; 682 unsigned NewShiftOp; 683 if (LHSOpcode == ISD::SHL) { 684 // LowZBits <= ShiftAmtC will fall into isBitfieldPositioningOp 685 // BitWidth != LowZBits + MaskLen doesn't match the pattern 686 if (LowZBits <= ShiftAmtC || (BitWidth != LowZBits + MaskLen)) 687 return false; 688 689 NewShiftC = LowZBits - ShiftAmtC; 690 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri; 691 } else { 692 if (LowZBits == 0) 693 return false; 694 695 // NewShiftC >= BitWidth will fall into isBitfieldExtractOp 696 NewShiftC = LowZBits + ShiftAmtC; 697 if (NewShiftC >= BitWidth) 698 return false; 699 700 // SRA need all high bits 701 if (LHSOpcode == ISD::SRA && (BitWidth != (LowZBits + MaskLen))) 702 return false; 703 704 // SRL high bits can be 0 or 1 705 if (LHSOpcode == ISD::SRL && (BitWidth > (NewShiftC + MaskLen))) 706 return false; 707 708 if (LHSOpcode == ISD::SRL) 709 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri; 710 else 711 NewShiftOp = VT == MVT::i64 ? AArch64::SBFMXri : AArch64::SBFMWri; 712 } 713 714 assert(NewShiftC < BitWidth && "Invalid shift amount"); 715 SDValue NewShiftAmt = CurDAG->getTargetConstant(NewShiftC, DL, VT); 716 SDValue BitWidthMinus1 = CurDAG->getTargetConstant(BitWidth - 1, DL, VT); 717 Reg = SDValue(CurDAG->getMachineNode(NewShiftOp, DL, VT, LHS->getOperand(0), 718 NewShiftAmt, BitWidthMinus1), 719 0); 720 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, LowZBits); 721 Shift = CurDAG->getTargetConstant(ShVal, DL, MVT::i32); 722 return true; 723 } 724 725 /// SelectShiftedRegister - Select a "shifted register" operand. If the value 726 /// is not shifted, set the Shift operand to default of "LSL 0". The logical 727 /// instructions allow the shifted register to be rotated, but the arithmetic 728 /// instructions do not. The AllowROR parameter specifies whether ROR is 729 /// supported. 730 bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR, 731 SDValue &Reg, SDValue &Shift) { 732 if (SelectShiftedRegisterFromAnd(N, Reg, Shift)) 733 return true; 734 735 AArch64_AM::ShiftExtendType ShType = getShiftTypeForNode(N); 736 if (ShType == AArch64_AM::InvalidShiftExtend) 737 return false; 738 if (!AllowROR && ShType == AArch64_AM::ROR) 739 return false; 740 741 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 742 unsigned BitSize = N.getValueSizeInBits(); 743 unsigned Val = RHS->getZExtValue() & (BitSize - 1); 744 unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val); 745 746 Reg = N.getOperand(0); 747 Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32); 748 return isWorthFolding(N); 749 } 750 751 return false; 752 } 753 754 /// getExtendTypeForNode - Translate an extend node to the corresponding 755 /// ExtendType value. 756 static AArch64_AM::ShiftExtendType 757 getExtendTypeForNode(SDValue N, bool IsLoadStore = false) { 758 if (N.getOpcode() == ISD::SIGN_EXTEND || 759 N.getOpcode() == ISD::SIGN_EXTEND_INREG) { 760 EVT SrcVT; 761 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG) 762 SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT(); 763 else 764 SrcVT = N.getOperand(0).getValueType(); 765 766 if (!IsLoadStore && SrcVT == MVT::i8) 767 return AArch64_AM::SXTB; 768 else if (!IsLoadStore && SrcVT == MVT::i16) 769 return AArch64_AM::SXTH; 770 else if (SrcVT == MVT::i32) 771 return AArch64_AM::SXTW; 772 assert(SrcVT != MVT::i64 && "extend from 64-bits?"); 773 774 return AArch64_AM::InvalidShiftExtend; 775 } else if (N.getOpcode() == ISD::ZERO_EXTEND || 776 N.getOpcode() == ISD::ANY_EXTEND) { 777 EVT SrcVT = N.getOperand(0).getValueType(); 778 if (!IsLoadStore && SrcVT == MVT::i8) 779 return AArch64_AM::UXTB; 780 else if (!IsLoadStore && SrcVT == MVT::i16) 781 return AArch64_AM::UXTH; 782 else if (SrcVT == MVT::i32) 783 return AArch64_AM::UXTW; 784 assert(SrcVT != MVT::i64 && "extend from 64-bits?"); 785 786 return AArch64_AM::InvalidShiftExtend; 787 } else if (N.getOpcode() == ISD::AND) { 788 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); 789 if (!CSD) 790 return AArch64_AM::InvalidShiftExtend; 791 uint64_t AndMask = CSD->getZExtValue(); 792 793 switch (AndMask) { 794 default: 795 return AArch64_AM::InvalidShiftExtend; 796 case 0xFF: 797 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend; 798 case 0xFFFF: 799 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend; 800 case 0xFFFFFFFF: 801 return AArch64_AM::UXTW; 802 } 803 } 804 805 return AArch64_AM::InvalidShiftExtend; 806 } 807 808 // Helper for SelectMLAV64LaneV128 - Recognize high lane extracts. 809 static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx) { 810 if (DL->getOpcode() != AArch64ISD::DUPLANE16 && 811 DL->getOpcode() != AArch64ISD::DUPLANE32) 812 return false; 813 814 SDValue SV = DL->getOperand(0); 815 if (SV.getOpcode() != ISD::INSERT_SUBVECTOR) 816 return false; 817 818 SDValue EV = SV.getOperand(1); 819 if (EV.getOpcode() != ISD::EXTRACT_SUBVECTOR) 820 return false; 821 822 ConstantSDNode *DLidx = cast<ConstantSDNode>(DL->getOperand(1).getNode()); 823 ConstantSDNode *EVidx = cast<ConstantSDNode>(EV.getOperand(1).getNode()); 824 LaneIdx = DLidx->getSExtValue() + EVidx->getSExtValue(); 825 LaneOp = EV.getOperand(0); 826 827 return true; 828 } 829 830 // Helper for SelectOpcV64LaneV128 - Recognize operations where one operand is a 831 // high lane extract. 832 static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp, 833 SDValue &LaneOp, int &LaneIdx) { 834 835 if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) { 836 std::swap(Op0, Op1); 837 if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) 838 return false; 839 } 840 StdOp = Op1; 841 return true; 842 } 843 844 /// SelectMLAV64LaneV128 - AArch64 supports vector MLAs where one multiplicand 845 /// is a lane in the upper half of a 128-bit vector. Recognize and select this 846 /// so that we don't emit unnecessary lane extracts. 847 bool AArch64DAGToDAGISel::tryMLAV64LaneV128(SDNode *N) { 848 SDLoc dl(N); 849 SDValue Op0 = N->getOperand(0); 850 SDValue Op1 = N->getOperand(1); 851 SDValue MLAOp1; // Will hold ordinary multiplicand for MLA. 852 SDValue MLAOp2; // Will hold lane-accessed multiplicand for MLA. 853 int LaneIdx = -1; // Will hold the lane index. 854 855 if (Op1.getOpcode() != ISD::MUL || 856 !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2, 857 LaneIdx)) { 858 std::swap(Op0, Op1); 859 if (Op1.getOpcode() != ISD::MUL || 860 !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2, 861 LaneIdx)) 862 return false; 863 } 864 865 SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64); 866 867 SDValue Ops[] = { Op0, MLAOp1, MLAOp2, LaneIdxVal }; 868 869 unsigned MLAOpc = ~0U; 870 871 switch (N->getSimpleValueType(0).SimpleTy) { 872 default: 873 llvm_unreachable("Unrecognized MLA."); 874 case MVT::v4i16: 875 MLAOpc = AArch64::MLAv4i16_indexed; 876 break; 877 case MVT::v8i16: 878 MLAOpc = AArch64::MLAv8i16_indexed; 879 break; 880 case MVT::v2i32: 881 MLAOpc = AArch64::MLAv2i32_indexed; 882 break; 883 case MVT::v4i32: 884 MLAOpc = AArch64::MLAv4i32_indexed; 885 break; 886 } 887 888 ReplaceNode(N, CurDAG->getMachineNode(MLAOpc, dl, N->getValueType(0), Ops)); 889 return true; 890 } 891 892 bool AArch64DAGToDAGISel::tryMULLV64LaneV128(unsigned IntNo, SDNode *N) { 893 SDLoc dl(N); 894 SDValue SMULLOp0; 895 SDValue SMULLOp1; 896 int LaneIdx; 897 898 if (!checkV64LaneV128(N->getOperand(1), N->getOperand(2), SMULLOp0, SMULLOp1, 899 LaneIdx)) 900 return false; 901 902 SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64); 903 904 SDValue Ops[] = { SMULLOp0, SMULLOp1, LaneIdxVal }; 905 906 unsigned SMULLOpc = ~0U; 907 908 if (IntNo == Intrinsic::aarch64_neon_smull) { 909 switch (N->getSimpleValueType(0).SimpleTy) { 910 default: 911 llvm_unreachable("Unrecognized SMULL."); 912 case MVT::v4i32: 913 SMULLOpc = AArch64::SMULLv4i16_indexed; 914 break; 915 case MVT::v2i64: 916 SMULLOpc = AArch64::SMULLv2i32_indexed; 917 break; 918 } 919 } else if (IntNo == Intrinsic::aarch64_neon_umull) { 920 switch (N->getSimpleValueType(0).SimpleTy) { 921 default: 922 llvm_unreachable("Unrecognized SMULL."); 923 case MVT::v4i32: 924 SMULLOpc = AArch64::UMULLv4i16_indexed; 925 break; 926 case MVT::v2i64: 927 SMULLOpc = AArch64::UMULLv2i32_indexed; 928 break; 929 } 930 } else 931 llvm_unreachable("Unrecognized intrinsic."); 932 933 ReplaceNode(N, CurDAG->getMachineNode(SMULLOpc, dl, N->getValueType(0), Ops)); 934 return true; 935 } 936 937 /// Instructions that accept extend modifiers like UXTW expect the register 938 /// being extended to be a GPR32, but the incoming DAG might be acting on a 939 /// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if 940 /// this is the case. 941 static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N) { 942 if (N.getValueType() == MVT::i32) 943 return N; 944 945 SDLoc dl(N); 946 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32); 947 MachineSDNode *Node = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 948 dl, MVT::i32, N, SubReg); 949 return SDValue(Node, 0); 950 } 951 952 // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N. 953 template<signed Low, signed High, signed Scale> 954 bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) { 955 if (!isa<ConstantSDNode>(N)) 956 return false; 957 958 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue(); 959 if ((MulImm % std::abs(Scale)) == 0) { 960 int64_t RDVLImm = MulImm / Scale; 961 if ((RDVLImm >= Low) && (RDVLImm <= High)) { 962 Imm = CurDAG->getTargetConstant(RDVLImm, SDLoc(N), MVT::i32); 963 return true; 964 } 965 } 966 967 return false; 968 } 969 970 /// SelectArithExtendedRegister - Select a "extended register" operand. This 971 /// operand folds in an extend followed by an optional left shift. 972 bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg, 973 SDValue &Shift) { 974 unsigned ShiftVal = 0; 975 AArch64_AM::ShiftExtendType Ext; 976 977 if (N.getOpcode() == ISD::SHL) { 978 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); 979 if (!CSD) 980 return false; 981 ShiftVal = CSD->getZExtValue(); 982 if (ShiftVal > 4) 983 return false; 984 985 Ext = getExtendTypeForNode(N.getOperand(0)); 986 if (Ext == AArch64_AM::InvalidShiftExtend) 987 return false; 988 989 Reg = N.getOperand(0).getOperand(0); 990 } else { 991 Ext = getExtendTypeForNode(N); 992 if (Ext == AArch64_AM::InvalidShiftExtend) 993 return false; 994 995 Reg = N.getOperand(0); 996 997 // Don't match if free 32-bit -> 64-bit zext can be used instead. Use the 998 // isDef32 as a heuristic for when the operand is likely to be a 32bit def. 999 auto isDef32 = [](SDValue N) { 1000 unsigned Opc = N.getOpcode(); 1001 return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG && 1002 Opc != ISD::CopyFromReg && Opc != ISD::AssertSext && 1003 Opc != ISD::AssertZext && Opc != ISD::AssertAlign && 1004 Opc != ISD::FREEZE; 1005 }; 1006 if (Ext == AArch64_AM::UXTW && Reg->getValueType(0).getSizeInBits() == 32 && 1007 isDef32(Reg)) 1008 return false; 1009 } 1010 1011 // AArch64 mandates that the RHS of the operation must use the smallest 1012 // register class that could contain the size being extended from. Thus, 1013 // if we're folding a (sext i8), we need the RHS to be a GPR32, even though 1014 // there might not be an actual 32-bit value in the program. We can 1015 // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here. 1016 assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX); 1017 Reg = narrowIfNeeded(CurDAG, Reg); 1018 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N), 1019 MVT::i32); 1020 return isWorthFolding(N); 1021 } 1022 1023 /// SelectArithUXTXRegister - Select a "UXTX register" operand. This 1024 /// operand is refered by the instructions have SP operand 1025 bool AArch64DAGToDAGISel::SelectArithUXTXRegister(SDValue N, SDValue &Reg, 1026 SDValue &Shift) { 1027 unsigned ShiftVal = 0; 1028 AArch64_AM::ShiftExtendType Ext; 1029 1030 if (N.getOpcode() != ISD::SHL) 1031 return false; 1032 1033 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); 1034 if (!CSD) 1035 return false; 1036 ShiftVal = CSD->getZExtValue(); 1037 if (ShiftVal > 4) 1038 return false; 1039 1040 Ext = AArch64_AM::UXTX; 1041 Reg = N.getOperand(0); 1042 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N), 1043 MVT::i32); 1044 return isWorthFolding(N); 1045 } 1046 1047 /// If there's a use of this ADDlow that's not itself a load/store then we'll 1048 /// need to create a real ADD instruction from it anyway and there's no point in 1049 /// folding it into the mem op. Theoretically, it shouldn't matter, but there's 1050 /// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding 1051 /// leads to duplicated ADRP instructions. 1052 static bool isWorthFoldingADDlow(SDValue N) { 1053 for (auto *Use : N->uses()) { 1054 if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE && 1055 Use->getOpcode() != ISD::ATOMIC_LOAD && 1056 Use->getOpcode() != ISD::ATOMIC_STORE) 1057 return false; 1058 1059 // ldar and stlr have much more restrictive addressing modes (just a 1060 // register). 1061 if (isStrongerThanMonotonic(cast<MemSDNode>(Use)->getSuccessOrdering())) 1062 return false; 1063 } 1064 1065 return true; 1066 } 1067 1068 /// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit 1069 /// immediate" address. The "Size" argument is the size in bytes of the memory 1070 /// reference, which determines the scale. 1071 bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, 1072 unsigned BW, unsigned Size, 1073 SDValue &Base, 1074 SDValue &OffImm) { 1075 SDLoc dl(N); 1076 const DataLayout &DL = CurDAG->getDataLayout(); 1077 const TargetLowering *TLI = getTargetLowering(); 1078 if (N.getOpcode() == ISD::FrameIndex) { 1079 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 1080 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 1081 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); 1082 return true; 1083 } 1084 1085 // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed 1086 // selected here doesn't support labels/immediates, only base+offset. 1087 if (CurDAG->isBaseWithConstantOffset(N)) { 1088 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1089 if (IsSignedImm) { 1090 int64_t RHSC = RHS->getSExtValue(); 1091 unsigned Scale = Log2_32(Size); 1092 int64_t Range = 0x1LL << (BW - 1); 1093 1094 if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) && 1095 RHSC < (Range << Scale)) { 1096 Base = N.getOperand(0); 1097 if (Base.getOpcode() == ISD::FrameIndex) { 1098 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1099 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 1100 } 1101 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64); 1102 return true; 1103 } 1104 } else { 1105 // unsigned Immediate 1106 uint64_t RHSC = RHS->getZExtValue(); 1107 unsigned Scale = Log2_32(Size); 1108 uint64_t Range = 0x1ULL << BW; 1109 1110 if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) { 1111 Base = N.getOperand(0); 1112 if (Base.getOpcode() == ISD::FrameIndex) { 1113 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1114 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 1115 } 1116 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64); 1117 return true; 1118 } 1119 } 1120 } 1121 } 1122 // Base only. The address will be materialized into a register before 1123 // the memory is accessed. 1124 // add x0, Xbase, #offset 1125 // stp x1, x2, [x0] 1126 Base = N; 1127 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); 1128 return true; 1129 } 1130 1131 /// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit 1132 /// immediate" address. The "Size" argument is the size in bytes of the memory 1133 /// reference, which determines the scale. 1134 bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size, 1135 SDValue &Base, SDValue &OffImm) { 1136 SDLoc dl(N); 1137 const DataLayout &DL = CurDAG->getDataLayout(); 1138 const TargetLowering *TLI = getTargetLowering(); 1139 if (N.getOpcode() == ISD::FrameIndex) { 1140 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 1141 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 1142 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); 1143 return true; 1144 } 1145 1146 if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) { 1147 GlobalAddressSDNode *GAN = 1148 dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode()); 1149 Base = N.getOperand(0); 1150 OffImm = N.getOperand(1); 1151 if (!GAN) 1152 return true; 1153 1154 if (GAN->getOffset() % Size == 0 && 1155 GAN->getGlobal()->getPointerAlignment(DL) >= Size) 1156 return true; 1157 } 1158 1159 if (CurDAG->isBaseWithConstantOffset(N)) { 1160 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1161 int64_t RHSC = (int64_t)RHS->getZExtValue(); 1162 unsigned Scale = Log2_32(Size); 1163 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) { 1164 Base = N.getOperand(0); 1165 if (Base.getOpcode() == ISD::FrameIndex) { 1166 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1167 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 1168 } 1169 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64); 1170 return true; 1171 } 1172 } 1173 } 1174 1175 // Before falling back to our general case, check if the unscaled 1176 // instructions can handle this. If so, that's preferable. 1177 if (SelectAddrModeUnscaled(N, Size, Base, OffImm)) 1178 return false; 1179 1180 // Base only. The address will be materialized into a register before 1181 // the memory is accessed. 1182 // add x0, Xbase, #offset 1183 // ldr x0, [x0] 1184 Base = N; 1185 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); 1186 return true; 1187 } 1188 1189 /// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit 1190 /// immediate" address. This should only match when there is an offset that 1191 /// is not valid for a scaled immediate addressing mode. The "Size" argument 1192 /// is the size in bytes of the memory reference, which is needed here to know 1193 /// what is valid for a scaled immediate. 1194 bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size, 1195 SDValue &Base, 1196 SDValue &OffImm) { 1197 if (!CurDAG->isBaseWithConstantOffset(N)) 1198 return false; 1199 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1200 int64_t RHSC = RHS->getSExtValue(); 1201 // If the offset is valid as a scaled immediate, don't match here. 1202 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && 1203 RHSC < (0x1000 << Log2_32(Size))) 1204 return false; 1205 if (RHSC >= -256 && RHSC < 256) { 1206 Base = N.getOperand(0); 1207 if (Base.getOpcode() == ISD::FrameIndex) { 1208 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1209 const TargetLowering *TLI = getTargetLowering(); 1210 Base = CurDAG->getTargetFrameIndex( 1211 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1212 } 1213 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64); 1214 return true; 1215 } 1216 } 1217 return false; 1218 } 1219 1220 static SDValue Widen(SelectionDAG *CurDAG, SDValue N) { 1221 SDLoc dl(N); 1222 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32); 1223 SDValue ImpDef = SDValue( 1224 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0); 1225 MachineSDNode *Node = CurDAG->getMachineNode( 1226 TargetOpcode::INSERT_SUBREG, dl, MVT::i64, ImpDef, N, SubReg); 1227 return SDValue(Node, 0); 1228 } 1229 1230 /// Check if the given SHL node (\p N), can be used to form an 1231 /// extended register for an addressing mode. 1232 bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size, 1233 bool WantExtend, SDValue &Offset, 1234 SDValue &SignExtend) { 1235 assert(N.getOpcode() == ISD::SHL && "Invalid opcode."); 1236 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); 1237 if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue()) 1238 return false; 1239 1240 SDLoc dl(N); 1241 if (WantExtend) { 1242 AArch64_AM::ShiftExtendType Ext = 1243 getExtendTypeForNode(N.getOperand(0), true); 1244 if (Ext == AArch64_AM::InvalidShiftExtend) 1245 return false; 1246 1247 Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0)); 1248 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl, 1249 MVT::i32); 1250 } else { 1251 Offset = N.getOperand(0); 1252 SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32); 1253 } 1254 1255 unsigned LegalShiftVal = Log2_32(Size); 1256 unsigned ShiftVal = CSD->getZExtValue(); 1257 1258 if (ShiftVal != 0 && ShiftVal != LegalShiftVal) 1259 return false; 1260 1261 return isWorthFolding(N); 1262 } 1263 1264 bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size, 1265 SDValue &Base, SDValue &Offset, 1266 SDValue &SignExtend, 1267 SDValue &DoShift) { 1268 if (N.getOpcode() != ISD::ADD) 1269 return false; 1270 SDValue LHS = N.getOperand(0); 1271 SDValue RHS = N.getOperand(1); 1272 SDLoc dl(N); 1273 1274 // We don't want to match immediate adds here, because they are better lowered 1275 // to the register-immediate addressing modes. 1276 if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS)) 1277 return false; 1278 1279 // Check if this particular node is reused in any non-memory related 1280 // operation. If yes, do not try to fold this node into the address 1281 // computation, since the computation will be kept. 1282 const SDNode *Node = N.getNode(); 1283 for (SDNode *UI : Node->uses()) { 1284 if (!isa<MemSDNode>(*UI)) 1285 return false; 1286 } 1287 1288 // Remember if it is worth folding N when it produces extended register. 1289 bool IsExtendedRegisterWorthFolding = isWorthFolding(N); 1290 1291 // Try to match a shifted extend on the RHS. 1292 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL && 1293 SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) { 1294 Base = LHS; 1295 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32); 1296 return true; 1297 } 1298 1299 // Try to match a shifted extend on the LHS. 1300 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL && 1301 SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) { 1302 Base = RHS; 1303 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32); 1304 return true; 1305 } 1306 1307 // There was no shift, whatever else we find. 1308 DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32); 1309 1310 AArch64_AM::ShiftExtendType Ext = AArch64_AM::InvalidShiftExtend; 1311 // Try to match an unshifted extend on the LHS. 1312 if (IsExtendedRegisterWorthFolding && 1313 (Ext = getExtendTypeForNode(LHS, true)) != 1314 AArch64_AM::InvalidShiftExtend) { 1315 Base = RHS; 1316 Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0)); 1317 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl, 1318 MVT::i32); 1319 if (isWorthFolding(LHS)) 1320 return true; 1321 } 1322 1323 // Try to match an unshifted extend on the RHS. 1324 if (IsExtendedRegisterWorthFolding && 1325 (Ext = getExtendTypeForNode(RHS, true)) != 1326 AArch64_AM::InvalidShiftExtend) { 1327 Base = LHS; 1328 Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0)); 1329 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl, 1330 MVT::i32); 1331 if (isWorthFolding(RHS)) 1332 return true; 1333 } 1334 1335 return false; 1336 } 1337 1338 // Check if the given immediate is preferred by ADD. If an immediate can be 1339 // encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be 1340 // encoded by one MOVZ, return true. 1341 static bool isPreferredADD(int64_t ImmOff) { 1342 // Constant in [0x0, 0xfff] can be encoded in ADD. 1343 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL) 1344 return true; 1345 // Check if it can be encoded in an "ADD LSL #12". 1346 if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL) 1347 // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant. 1348 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL && 1349 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL; 1350 return false; 1351 } 1352 1353 bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size, 1354 SDValue &Base, SDValue &Offset, 1355 SDValue &SignExtend, 1356 SDValue &DoShift) { 1357 if (N.getOpcode() != ISD::ADD) 1358 return false; 1359 SDValue LHS = N.getOperand(0); 1360 SDValue RHS = N.getOperand(1); 1361 SDLoc DL(N); 1362 1363 // Check if this particular node is reused in any non-memory related 1364 // operation. If yes, do not try to fold this node into the address 1365 // computation, since the computation will be kept. 1366 const SDNode *Node = N.getNode(); 1367 for (SDNode *UI : Node->uses()) { 1368 if (!isa<MemSDNode>(*UI)) 1369 return false; 1370 } 1371 1372 // Watch out if RHS is a wide immediate, it can not be selected into 1373 // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into 1374 // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate 1375 // instructions like: 1376 // MOV X0, WideImmediate 1377 // ADD X1, BaseReg, X0 1378 // LDR X2, [X1, 0] 1379 // For such situation, using [BaseReg, XReg] addressing mode can save one 1380 // ADD/SUB: 1381 // MOV X0, WideImmediate 1382 // LDR X2, [BaseReg, X0] 1383 if (isa<ConstantSDNode>(RHS)) { 1384 int64_t ImmOff = (int64_t)cast<ConstantSDNode>(RHS)->getZExtValue(); 1385 unsigned Scale = Log2_32(Size); 1386 // Skip the immediate can be selected by load/store addressing mode. 1387 // Also skip the immediate can be encoded by a single ADD (SUB is also 1388 // checked by using -ImmOff). 1389 if ((ImmOff % Size == 0 && ImmOff >= 0 && ImmOff < (0x1000 << Scale)) || 1390 isPreferredADD(ImmOff) || isPreferredADD(-ImmOff)) 1391 return false; 1392 1393 SDValue Ops[] = { RHS }; 1394 SDNode *MOVI = 1395 CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops); 1396 SDValue MOVIV = SDValue(MOVI, 0); 1397 // This ADD of two X register will be selected into [Reg+Reg] mode. 1398 N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV); 1399 } 1400 1401 // Remember if it is worth folding N when it produces extended register. 1402 bool IsExtendedRegisterWorthFolding = isWorthFolding(N); 1403 1404 // Try to match a shifted extend on the RHS. 1405 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL && 1406 SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) { 1407 Base = LHS; 1408 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32); 1409 return true; 1410 } 1411 1412 // Try to match a shifted extend on the LHS. 1413 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL && 1414 SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) { 1415 Base = RHS; 1416 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32); 1417 return true; 1418 } 1419 1420 // Match any non-shifted, non-extend, non-immediate add expression. 1421 Base = LHS; 1422 Offset = RHS; 1423 SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32); 1424 DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32); 1425 // Reg1 + Reg2 is free: no check needed. 1426 return true; 1427 } 1428 1429 SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) { 1430 static const unsigned RegClassIDs[] = { 1431 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID}; 1432 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1, 1433 AArch64::dsub2, AArch64::dsub3}; 1434 1435 return createTuple(Regs, RegClassIDs, SubRegs); 1436 } 1437 1438 SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) { 1439 static const unsigned RegClassIDs[] = { 1440 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID}; 1441 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1, 1442 AArch64::qsub2, AArch64::qsub3}; 1443 1444 return createTuple(Regs, RegClassIDs, SubRegs); 1445 } 1446 1447 SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef<SDValue> Regs) { 1448 static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID, 1449 AArch64::ZPR3RegClassID, 1450 AArch64::ZPR4RegClassID}; 1451 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1, 1452 AArch64::zsub2, AArch64::zsub3}; 1453 1454 return createTuple(Regs, RegClassIDs, SubRegs); 1455 } 1456 1457 SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs, 1458 const unsigned RegClassIDs[], 1459 const unsigned SubRegs[]) { 1460 // There's no special register-class for a vector-list of 1 element: it's just 1461 // a vector. 1462 if (Regs.size() == 1) 1463 return Regs[0]; 1464 1465 assert(Regs.size() >= 2 && Regs.size() <= 4); 1466 1467 SDLoc DL(Regs[0]); 1468 1469 SmallVector<SDValue, 4> Ops; 1470 1471 // First operand of REG_SEQUENCE is the desired RegClass. 1472 Ops.push_back( 1473 CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32)); 1474 1475 // Then we get pairs of source & subregister-position for the components. 1476 for (unsigned i = 0; i < Regs.size(); ++i) { 1477 Ops.push_back(Regs[i]); 1478 Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32)); 1479 } 1480 1481 SDNode *N = 1482 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops); 1483 return SDValue(N, 0); 1484 } 1485 1486 void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, 1487 bool isExt) { 1488 SDLoc dl(N); 1489 EVT VT = N->getValueType(0); 1490 1491 unsigned ExtOff = isExt; 1492 1493 // Form a REG_SEQUENCE to force register allocation. 1494 unsigned Vec0Off = ExtOff + 1; 1495 SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off, 1496 N->op_begin() + Vec0Off + NumVecs); 1497 SDValue RegSeq = createQTuple(Regs); 1498 1499 SmallVector<SDValue, 6> Ops; 1500 if (isExt) 1501 Ops.push_back(N->getOperand(1)); 1502 Ops.push_back(RegSeq); 1503 Ops.push_back(N->getOperand(NumVecs + ExtOff + 1)); 1504 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops)); 1505 } 1506 1507 bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) { 1508 LoadSDNode *LD = cast<LoadSDNode>(N); 1509 if (LD->isUnindexed()) 1510 return false; 1511 EVT VT = LD->getMemoryVT(); 1512 EVT DstVT = N->getValueType(0); 1513 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1514 bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC; 1515 1516 // We're not doing validity checking here. That was done when checking 1517 // if we should mark the load as indexed or not. We're just selecting 1518 // the right instruction. 1519 unsigned Opcode = 0; 1520 1521 ISD::LoadExtType ExtType = LD->getExtensionType(); 1522 bool InsertTo64 = false; 1523 if (VT == MVT::i64) 1524 Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost; 1525 else if (VT == MVT::i32) { 1526 if (ExtType == ISD::NON_EXTLOAD) 1527 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost; 1528 else if (ExtType == ISD::SEXTLOAD) 1529 Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost; 1530 else { 1531 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost; 1532 InsertTo64 = true; 1533 // The result of the load is only i32. It's the subreg_to_reg that makes 1534 // it into an i64. 1535 DstVT = MVT::i32; 1536 } 1537 } else if (VT == MVT::i16) { 1538 if (ExtType == ISD::SEXTLOAD) { 1539 if (DstVT == MVT::i64) 1540 Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost; 1541 else 1542 Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost; 1543 } else { 1544 Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost; 1545 InsertTo64 = DstVT == MVT::i64; 1546 // The result of the load is only i32. It's the subreg_to_reg that makes 1547 // it into an i64. 1548 DstVT = MVT::i32; 1549 } 1550 } else if (VT == MVT::i8) { 1551 if (ExtType == ISD::SEXTLOAD) { 1552 if (DstVT == MVT::i64) 1553 Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost; 1554 else 1555 Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost; 1556 } else { 1557 Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost; 1558 InsertTo64 = DstVT == MVT::i64; 1559 // The result of the load is only i32. It's the subreg_to_reg that makes 1560 // it into an i64. 1561 DstVT = MVT::i32; 1562 } 1563 } else if (VT == MVT::f16) { 1564 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost; 1565 } else if (VT == MVT::bf16) { 1566 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost; 1567 } else if (VT == MVT::f32) { 1568 Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost; 1569 } else if (VT == MVT::f64 || VT.is64BitVector()) { 1570 Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost; 1571 } else if (VT.is128BitVector()) { 1572 Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost; 1573 } else 1574 return false; 1575 SDValue Chain = LD->getChain(); 1576 SDValue Base = LD->getBasePtr(); 1577 ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset()); 1578 int OffsetVal = (int)OffsetOp->getZExtValue(); 1579 SDLoc dl(N); 1580 SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64); 1581 SDValue Ops[] = { Base, Offset, Chain }; 1582 SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT, 1583 MVT::Other, Ops); 1584 1585 // Transfer memoperands. 1586 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); 1587 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Res), {MemOp}); 1588 1589 // Either way, we're replacing the node, so tell the caller that. 1590 SDValue LoadedVal = SDValue(Res, 1); 1591 if (InsertTo64) { 1592 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32); 1593 LoadedVal = 1594 SDValue(CurDAG->getMachineNode( 1595 AArch64::SUBREG_TO_REG, dl, MVT::i64, 1596 CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal, 1597 SubReg), 1598 0); 1599 } 1600 1601 ReplaceUses(SDValue(N, 0), LoadedVal); 1602 ReplaceUses(SDValue(N, 1), SDValue(Res, 0)); 1603 ReplaceUses(SDValue(N, 2), SDValue(Res, 2)); 1604 CurDAG->RemoveDeadNode(N); 1605 return true; 1606 } 1607 1608 void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc, 1609 unsigned SubRegIdx) { 1610 SDLoc dl(N); 1611 EVT VT = N->getValueType(0); 1612 SDValue Chain = N->getOperand(0); 1613 1614 SDValue Ops[] = {N->getOperand(2), // Mem operand; 1615 Chain}; 1616 1617 const EVT ResTys[] = {MVT::Untyped, MVT::Other}; 1618 1619 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1620 SDValue SuperReg = SDValue(Ld, 0); 1621 for (unsigned i = 0; i < NumVecs; ++i) 1622 ReplaceUses(SDValue(N, i), 1623 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg)); 1624 1625 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1)); 1626 1627 // Transfer memoperands. In the case of AArch64::LD64B, there won't be one, 1628 // because it's too simple to have needed special treatment during lowering. 1629 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(N)) { 1630 MachineMemOperand *MemOp = MemIntr->getMemOperand(); 1631 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp}); 1632 } 1633 1634 CurDAG->RemoveDeadNode(N); 1635 } 1636 1637 void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs, 1638 unsigned Opc, unsigned SubRegIdx) { 1639 SDLoc dl(N); 1640 EVT VT = N->getValueType(0); 1641 SDValue Chain = N->getOperand(0); 1642 1643 SDValue Ops[] = {N->getOperand(1), // Mem operand 1644 N->getOperand(2), // Incremental 1645 Chain}; 1646 1647 const EVT ResTys[] = {MVT::i64, // Type of the write back register 1648 MVT::Untyped, MVT::Other}; 1649 1650 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1651 1652 // Update uses of write back register 1653 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0)); 1654 1655 // Update uses of vector list 1656 SDValue SuperReg = SDValue(Ld, 1); 1657 if (NumVecs == 1) 1658 ReplaceUses(SDValue(N, 0), SuperReg); 1659 else 1660 for (unsigned i = 0; i < NumVecs; ++i) 1661 ReplaceUses(SDValue(N, i), 1662 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg)); 1663 1664 // Update the chain 1665 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2)); 1666 CurDAG->RemoveDeadNode(N); 1667 } 1668 1669 /// Optimize \param OldBase and \param OldOffset selecting the best addressing 1670 /// mode. Returns a tuple consisting of an Opcode, an SDValue representing the 1671 /// new Base and an SDValue representing the new offset. 1672 std::tuple<unsigned, SDValue, SDValue> 1673 AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, 1674 unsigned Opc_ri, 1675 const SDValue &OldBase, 1676 const SDValue &OldOffset, 1677 unsigned Scale) { 1678 SDValue NewBase = OldBase; 1679 SDValue NewOffset = OldOffset; 1680 // Detect a possible Reg+Imm addressing mode. 1681 const bool IsRegImm = SelectAddrModeIndexedSVE</*Min=*/-8, /*Max=*/7>( 1682 N, OldBase, NewBase, NewOffset); 1683 1684 // Detect a possible reg+reg addressing mode, but only if we haven't already 1685 // detected a Reg+Imm one. 1686 const bool IsRegReg = 1687 !IsRegImm && SelectSVERegRegAddrMode(OldBase, Scale, NewBase, NewOffset); 1688 1689 // Select the instruction. 1690 return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset); 1691 } 1692 1693 enum class SelectTypeKind { 1694 Int1 = 0, 1695 }; 1696 1697 /// This function selects an opcode from a list of opcodes, which is 1698 /// expected to be the opcode for { 8-bit, 16-bit, 32-bit, 64-bit } 1699 /// element types, in this order. 1700 template <SelectTypeKind Kind> 1701 static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef<unsigned> Opcodes) { 1702 // Only match scalable vector VTs 1703 if (!VT.isScalableVector()) 1704 return 0; 1705 1706 EVT EltVT = VT.getVectorElementType(); 1707 switch (Kind) { 1708 case SelectTypeKind::Int1: 1709 if (EltVT != MVT::i1) 1710 return 0; 1711 break; 1712 } 1713 1714 unsigned Offset; 1715 switch (VT.getVectorMinNumElements()) { 1716 case 16: // 8-bit 1717 Offset = 0; 1718 break; 1719 case 8: // 16-bit 1720 Offset = 1; 1721 break; 1722 case 4: // 32-bit 1723 Offset = 2; 1724 break; 1725 case 2: // 64-bit 1726 Offset = 3; 1727 break; 1728 default: 1729 return 0; 1730 } 1731 1732 return (Opcodes.size() <= Offset) ? 0 : Opcodes[Offset]; 1733 } 1734 1735 void AArch64DAGToDAGISel::SelectWhilePair(SDNode *N, unsigned Opc) { 1736 SDLoc DL(N); 1737 EVT VT = N->getValueType(0); 1738 1739 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)}; 1740 1741 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops); 1742 SDValue SuperReg = SDValue(WhilePair, 0); 1743 1744 for (unsigned I = 0; I < 2; ++I) 1745 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg( 1746 AArch64::psub0 + I, DL, VT, SuperReg)); 1747 1748 CurDAG->RemoveDeadNode(N); 1749 } 1750 1751 void AArch64DAGToDAGISel::SelectCVTIntrinsic(SDNode *N, unsigned NumVecs, 1752 unsigned Opcode) { 1753 EVT VT = N->getValueType(0); 1754 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); 1755 SDValue Ops = createZTuple(Regs); 1756 SDLoc DL(N); 1757 SDNode *Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Ops); 1758 SDValue SuperReg = SDValue(Intrinsic, 0); 1759 for (unsigned i = 0; i < NumVecs; ++i) 1760 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg( 1761 AArch64::zsub0 + i, DL, VT, SuperReg)); 1762 1763 CurDAG->RemoveDeadNode(N); 1764 return; 1765 } 1766 1767 void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs, 1768 unsigned Scale, unsigned Opc_ri, 1769 unsigned Opc_rr, bool IsIntr) { 1770 assert(Scale < 4 && "Invalid scaling value."); 1771 SDLoc DL(N); 1772 EVT VT = N->getValueType(0); 1773 SDValue Chain = N->getOperand(0); 1774 1775 // Optimize addressing mode. 1776 SDValue Base, Offset; 1777 unsigned Opc; 1778 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore( 1779 N, Opc_rr, Opc_ri, N->getOperand(IsIntr ? 3 : 2), 1780 CurDAG->getTargetConstant(0, DL, MVT::i64), Scale); 1781 1782 SDValue Ops[] = {N->getOperand(IsIntr ? 2 : 1), // Predicate 1783 Base, // Memory operand 1784 Offset, Chain}; 1785 1786 const EVT ResTys[] = {MVT::Untyped, MVT::Other}; 1787 1788 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops); 1789 SDValue SuperReg = SDValue(Load, 0); 1790 for (unsigned i = 0; i < NumVecs; ++i) 1791 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg( 1792 AArch64::zsub0 + i, DL, VT, SuperReg)); 1793 1794 // Copy chain 1795 unsigned ChainIdx = NumVecs; 1796 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1)); 1797 CurDAG->RemoveDeadNode(N); 1798 } 1799 1800 void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs, 1801 unsigned Opc) { 1802 SDLoc dl(N); 1803 EVT VT = N->getOperand(2)->getValueType(0); 1804 1805 // Form a REG_SEQUENCE to force register allocation. 1806 bool Is128Bit = VT.getSizeInBits() == 128; 1807 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); 1808 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs); 1809 1810 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)}; 1811 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops); 1812 1813 // Transfer memoperands. 1814 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 1815 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 1816 1817 ReplaceNode(N, St); 1818 } 1819 1820 void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs, 1821 unsigned Scale, unsigned Opc_rr, 1822 unsigned Opc_ri) { 1823 SDLoc dl(N); 1824 1825 // Form a REG_SEQUENCE to force register allocation. 1826 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); 1827 SDValue RegSeq = createZTuple(Regs); 1828 1829 // Optimize addressing mode. 1830 unsigned Opc; 1831 SDValue Offset, Base; 1832 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore( 1833 N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3), 1834 CurDAG->getTargetConstant(0, dl, MVT::i64), Scale); 1835 1836 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), // predicate 1837 Base, // address 1838 Offset, // offset 1839 N->getOperand(0)}; // chain 1840 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops); 1841 1842 ReplaceNode(N, St); 1843 } 1844 1845 bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, 1846 SDValue &OffImm) { 1847 SDLoc dl(N); 1848 const DataLayout &DL = CurDAG->getDataLayout(); 1849 const TargetLowering *TLI = getTargetLowering(); 1850 1851 // Try to match it for the frame address 1852 if (auto FINode = dyn_cast<FrameIndexSDNode>(N)) { 1853 int FI = FINode->getIndex(); 1854 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 1855 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); 1856 return true; 1857 } 1858 1859 return false; 1860 } 1861 1862 void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs, 1863 unsigned Opc) { 1864 SDLoc dl(N); 1865 EVT VT = N->getOperand(2)->getValueType(0); 1866 const EVT ResTys[] = {MVT::i64, // Type of the write back register 1867 MVT::Other}; // Type for the Chain 1868 1869 // Form a REG_SEQUENCE to force register allocation. 1870 bool Is128Bit = VT.getSizeInBits() == 128; 1871 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); 1872 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs); 1873 1874 SDValue Ops[] = {RegSeq, 1875 N->getOperand(NumVecs + 1), // base register 1876 N->getOperand(NumVecs + 2), // Incremental 1877 N->getOperand(0)}; // Chain 1878 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1879 1880 ReplaceNode(N, St); 1881 } 1882 1883 namespace { 1884 /// WidenVector - Given a value in the V64 register class, produce the 1885 /// equivalent value in the V128 register class. 1886 class WidenVector { 1887 SelectionDAG &DAG; 1888 1889 public: 1890 WidenVector(SelectionDAG &DAG) : DAG(DAG) {} 1891 1892 SDValue operator()(SDValue V64Reg) { 1893 EVT VT = V64Reg.getValueType(); 1894 unsigned NarrowSize = VT.getVectorNumElements(); 1895 MVT EltTy = VT.getVectorElementType().getSimpleVT(); 1896 MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize); 1897 SDLoc DL(V64Reg); 1898 1899 SDValue Undef = 1900 SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0); 1901 return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg); 1902 } 1903 }; 1904 } // namespace 1905 1906 /// NarrowVector - Given a value in the V128 register class, produce the 1907 /// equivalent value in the V64 register class. 1908 static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) { 1909 EVT VT = V128Reg.getValueType(); 1910 unsigned WideSize = VT.getVectorNumElements(); 1911 MVT EltTy = VT.getVectorElementType().getSimpleVT(); 1912 MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2); 1913 1914 return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy, 1915 V128Reg); 1916 } 1917 1918 void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs, 1919 unsigned Opc) { 1920 SDLoc dl(N); 1921 EVT VT = N->getValueType(0); 1922 bool Narrow = VT.getSizeInBits() == 64; 1923 1924 // Form a REG_SEQUENCE to force register allocation. 1925 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); 1926 1927 if (Narrow) 1928 transform(Regs, Regs.begin(), 1929 WidenVector(*CurDAG)); 1930 1931 SDValue RegSeq = createQTuple(Regs); 1932 1933 const EVT ResTys[] = {MVT::Untyped, MVT::Other}; 1934 1935 unsigned LaneNo = 1936 cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue(); 1937 1938 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64), 1939 N->getOperand(NumVecs + 3), N->getOperand(0)}; 1940 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1941 SDValue SuperReg = SDValue(Ld, 0); 1942 1943 EVT WideVT = RegSeq.getOperand(1)->getValueType(0); 1944 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, 1945 AArch64::qsub2, AArch64::qsub3 }; 1946 for (unsigned i = 0; i < NumVecs; ++i) { 1947 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg); 1948 if (Narrow) 1949 NV = NarrowVector(NV, *CurDAG); 1950 ReplaceUses(SDValue(N, i), NV); 1951 } 1952 1953 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1)); 1954 CurDAG->RemoveDeadNode(N); 1955 } 1956 1957 void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs, 1958 unsigned Opc) { 1959 SDLoc dl(N); 1960 EVT VT = N->getValueType(0); 1961 bool Narrow = VT.getSizeInBits() == 64; 1962 1963 // Form a REG_SEQUENCE to force register allocation. 1964 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); 1965 1966 if (Narrow) 1967 transform(Regs, Regs.begin(), 1968 WidenVector(*CurDAG)); 1969 1970 SDValue RegSeq = createQTuple(Regs); 1971 1972 const EVT ResTys[] = {MVT::i64, // Type of the write back register 1973 RegSeq->getValueType(0), MVT::Other}; 1974 1975 unsigned LaneNo = 1976 cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue(); 1977 1978 SDValue Ops[] = {RegSeq, 1979 CurDAG->getTargetConstant(LaneNo, dl, 1980 MVT::i64), // Lane Number 1981 N->getOperand(NumVecs + 2), // Base register 1982 N->getOperand(NumVecs + 3), // Incremental 1983 N->getOperand(0)}; 1984 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1985 1986 // Update uses of the write back register 1987 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0)); 1988 1989 // Update uses of the vector list 1990 SDValue SuperReg = SDValue(Ld, 1); 1991 if (NumVecs == 1) { 1992 ReplaceUses(SDValue(N, 0), 1993 Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg); 1994 } else { 1995 EVT WideVT = RegSeq.getOperand(1)->getValueType(0); 1996 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, 1997 AArch64::qsub2, AArch64::qsub3 }; 1998 for (unsigned i = 0; i < NumVecs; ++i) { 1999 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, 2000 SuperReg); 2001 if (Narrow) 2002 NV = NarrowVector(NV, *CurDAG); 2003 ReplaceUses(SDValue(N, i), NV); 2004 } 2005 } 2006 2007 // Update the Chain 2008 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2)); 2009 CurDAG->RemoveDeadNode(N); 2010 } 2011 2012 void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs, 2013 unsigned Opc) { 2014 SDLoc dl(N); 2015 EVT VT = N->getOperand(2)->getValueType(0); 2016 bool Narrow = VT.getSizeInBits() == 64; 2017 2018 // Form a REG_SEQUENCE to force register allocation. 2019 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); 2020 2021 if (Narrow) 2022 transform(Regs, Regs.begin(), 2023 WidenVector(*CurDAG)); 2024 2025 SDValue RegSeq = createQTuple(Regs); 2026 2027 unsigned LaneNo = 2028 cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue(); 2029 2030 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64), 2031 N->getOperand(NumVecs + 3), N->getOperand(0)}; 2032 SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops); 2033 2034 // Transfer memoperands. 2035 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2036 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 2037 2038 ReplaceNode(N, St); 2039 } 2040 2041 void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs, 2042 unsigned Opc) { 2043 SDLoc dl(N); 2044 EVT VT = N->getOperand(2)->getValueType(0); 2045 bool Narrow = VT.getSizeInBits() == 64; 2046 2047 // Form a REG_SEQUENCE to force register allocation. 2048 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); 2049 2050 if (Narrow) 2051 transform(Regs, Regs.begin(), 2052 WidenVector(*CurDAG)); 2053 2054 SDValue RegSeq = createQTuple(Regs); 2055 2056 const EVT ResTys[] = {MVT::i64, // Type of the write back register 2057 MVT::Other}; 2058 2059 unsigned LaneNo = 2060 cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue(); 2061 2062 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64), 2063 N->getOperand(NumVecs + 2), // Base Register 2064 N->getOperand(NumVecs + 3), // Incremental 2065 N->getOperand(0)}; 2066 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2067 2068 // Transfer memoperands. 2069 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2070 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 2071 2072 ReplaceNode(N, St); 2073 } 2074 2075 static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, 2076 unsigned &Opc, SDValue &Opd0, 2077 unsigned &LSB, unsigned &MSB, 2078 unsigned NumberOfIgnoredLowBits, 2079 bool BiggerPattern) { 2080 assert(N->getOpcode() == ISD::AND && 2081 "N must be a AND operation to call this function"); 2082 2083 EVT VT = N->getValueType(0); 2084 2085 // Here we can test the type of VT and return false when the type does not 2086 // match, but since it is done prior to that call in the current context 2087 // we turned that into an assert to avoid redundant code. 2088 assert((VT == MVT::i32 || VT == MVT::i64) && 2089 "Type checking must have been done before calling this function"); 2090 2091 // FIXME: simplify-demanded-bits in DAGCombine will probably have 2092 // changed the AND node to a 32-bit mask operation. We'll have to 2093 // undo that as part of the transform here if we want to catch all 2094 // the opportunities. 2095 // Currently the NumberOfIgnoredLowBits argument helps to recover 2096 // from these situations when matching bigger pattern (bitfield insert). 2097 2098 // For unsigned extracts, check for a shift right and mask 2099 uint64_t AndImm = 0; 2100 if (!isOpcWithIntImmediate(N, ISD::AND, AndImm)) 2101 return false; 2102 2103 const SDNode *Op0 = N->getOperand(0).getNode(); 2104 2105 // Because of simplify-demanded-bits in DAGCombine, the mask may have been 2106 // simplified. Try to undo that 2107 AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits); 2108 2109 // The immediate is a mask of the low bits iff imm & (imm+1) == 0 2110 if (AndImm & (AndImm + 1)) 2111 return false; 2112 2113 bool ClampMSB = false; 2114 uint64_t SrlImm = 0; 2115 // Handle the SRL + ANY_EXTEND case. 2116 if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND && 2117 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) { 2118 // Extend the incoming operand of the SRL to 64-bit. 2119 Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0)); 2120 // Make sure to clamp the MSB so that we preserve the semantics of the 2121 // original operations. 2122 ClampMSB = true; 2123 } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE && 2124 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, 2125 SrlImm)) { 2126 // If the shift result was truncated, we can still combine them. 2127 Opd0 = Op0->getOperand(0).getOperand(0); 2128 2129 // Use the type of SRL node. 2130 VT = Opd0->getValueType(0); 2131 } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) { 2132 Opd0 = Op0->getOperand(0); 2133 ClampMSB = (VT == MVT::i32); 2134 } else if (BiggerPattern) { 2135 // Let's pretend a 0 shift right has been performed. 2136 // The resulting code will be at least as good as the original one 2137 // plus it may expose more opportunities for bitfield insert pattern. 2138 // FIXME: Currently we limit this to the bigger pattern, because 2139 // some optimizations expect AND and not UBFM. 2140 Opd0 = N->getOperand(0); 2141 } else 2142 return false; 2143 2144 // Bail out on large immediates. This happens when no proper 2145 // combining/constant folding was performed. 2146 if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) { 2147 LLVM_DEBUG( 2148 (dbgs() << N 2149 << ": Found large shift immediate, this should not happen\n")); 2150 return false; 2151 } 2152 2153 LSB = SrlImm; 2154 MSB = SrlImm + (VT == MVT::i32 ? countTrailingOnes<uint32_t>(AndImm) 2155 : countTrailingOnes<uint64_t>(AndImm)) - 2156 1; 2157 if (ClampMSB) 2158 // Since we're moving the extend before the right shift operation, we need 2159 // to clamp the MSB to make sure we don't shift in undefined bits instead of 2160 // the zeros which would get shifted in with the original right shift 2161 // operation. 2162 MSB = MSB > 31 ? 31 : MSB; 2163 2164 Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri; 2165 return true; 2166 } 2167 2168 static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc, 2169 SDValue &Opd0, unsigned &Immr, 2170 unsigned &Imms) { 2171 assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG); 2172 2173 EVT VT = N->getValueType(0); 2174 unsigned BitWidth = VT.getSizeInBits(); 2175 assert((VT == MVT::i32 || VT == MVT::i64) && 2176 "Type checking must have been done before calling this function"); 2177 2178 SDValue Op = N->getOperand(0); 2179 if (Op->getOpcode() == ISD::TRUNCATE) { 2180 Op = Op->getOperand(0); 2181 VT = Op->getValueType(0); 2182 BitWidth = VT.getSizeInBits(); 2183 } 2184 2185 uint64_t ShiftImm; 2186 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) && 2187 !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm)) 2188 return false; 2189 2190 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits(); 2191 if (ShiftImm + Width > BitWidth) 2192 return false; 2193 2194 Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri; 2195 Opd0 = Op.getOperand(0); 2196 Immr = ShiftImm; 2197 Imms = ShiftImm + Width - 1; 2198 return true; 2199 } 2200 2201 static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc, 2202 SDValue &Opd0, unsigned &LSB, 2203 unsigned &MSB) { 2204 // We are looking for the following pattern which basically extracts several 2205 // continuous bits from the source value and places it from the LSB of the 2206 // destination value, all other bits of the destination value or set to zero: 2207 // 2208 // Value2 = AND Value, MaskImm 2209 // SRL Value2, ShiftImm 2210 // 2211 // with MaskImm >> ShiftImm to search for the bit width. 2212 // 2213 // This gets selected into a single UBFM: 2214 // 2215 // UBFM Value, ShiftImm, findLastSet(MaskImm) 2216 // 2217 2218 if (N->getOpcode() != ISD::SRL) 2219 return false; 2220 2221 uint64_t AndMask = 0; 2222 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask)) 2223 return false; 2224 2225 Opd0 = N->getOperand(0).getOperand(0); 2226 2227 uint64_t SrlImm = 0; 2228 if (!isIntImmediate(N->getOperand(1), SrlImm)) 2229 return false; 2230 2231 // Check whether we really have several bits extract here. 2232 if (!isMask_64(AndMask >> SrlImm)) 2233 return false; 2234 2235 Opc = N->getValueType(0) == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri; 2236 LSB = SrlImm; 2237 MSB = findLastSet(AndMask, ZB_Undefined); 2238 return true; 2239 } 2240 2241 static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, 2242 unsigned &Immr, unsigned &Imms, 2243 bool BiggerPattern) { 2244 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) && 2245 "N must be a SHR/SRA operation to call this function"); 2246 2247 EVT VT = N->getValueType(0); 2248 2249 // Here we can test the type of VT and return false when the type does not 2250 // match, but since it is done prior to that call in the current context 2251 // we turned that into an assert to avoid redundant code. 2252 assert((VT == MVT::i32 || VT == MVT::i64) && 2253 "Type checking must have been done before calling this function"); 2254 2255 // Check for AND + SRL doing several bits extract. 2256 if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms)) 2257 return true; 2258 2259 // We're looking for a shift of a shift. 2260 uint64_t ShlImm = 0; 2261 uint64_t TruncBits = 0; 2262 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) { 2263 Opd0 = N->getOperand(0).getOperand(0); 2264 } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL && 2265 N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) { 2266 // We are looking for a shift of truncate. Truncate from i64 to i32 could 2267 // be considered as setting high 32 bits as zero. Our strategy here is to 2268 // always generate 64bit UBFM. This consistency will help the CSE pass 2269 // later find more redundancy. 2270 Opd0 = N->getOperand(0).getOperand(0); 2271 TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits(); 2272 VT = Opd0.getValueType(); 2273 assert(VT == MVT::i64 && "the promoted type should be i64"); 2274 } else if (BiggerPattern) { 2275 // Let's pretend a 0 shift left has been performed. 2276 // FIXME: Currently we limit this to the bigger pattern case, 2277 // because some optimizations expect AND and not UBFM 2278 Opd0 = N->getOperand(0); 2279 } else 2280 return false; 2281 2282 // Missing combines/constant folding may have left us with strange 2283 // constants. 2284 if (ShlImm >= VT.getSizeInBits()) { 2285 LLVM_DEBUG( 2286 (dbgs() << N 2287 << ": Found large shift immediate, this should not happen\n")); 2288 return false; 2289 } 2290 2291 uint64_t SrlImm = 0; 2292 if (!isIntImmediate(N->getOperand(1), SrlImm)) 2293 return false; 2294 2295 assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() && 2296 "bad amount in shift node!"); 2297 int immr = SrlImm - ShlImm; 2298 Immr = immr < 0 ? immr + VT.getSizeInBits() : immr; 2299 Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1; 2300 // SRA requires a signed extraction 2301 if (VT == MVT::i32) 2302 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri; 2303 else 2304 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri; 2305 return true; 2306 } 2307 2308 bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) { 2309 assert(N->getOpcode() == ISD::SIGN_EXTEND); 2310 2311 EVT VT = N->getValueType(0); 2312 EVT NarrowVT = N->getOperand(0)->getValueType(0); 2313 if (VT != MVT::i64 || NarrowVT != MVT::i32) 2314 return false; 2315 2316 uint64_t ShiftImm; 2317 SDValue Op = N->getOperand(0); 2318 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm)) 2319 return false; 2320 2321 SDLoc dl(N); 2322 // Extend the incoming operand of the shift to 64-bits. 2323 SDValue Opd0 = Widen(CurDAG, Op.getOperand(0)); 2324 unsigned Immr = ShiftImm; 2325 unsigned Imms = NarrowVT.getSizeInBits() - 1; 2326 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT), 2327 CurDAG->getTargetConstant(Imms, dl, VT)}; 2328 CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops); 2329 return true; 2330 } 2331 2332 /// Try to form fcvtl2 instructions from a floating-point extend of a high-half 2333 /// extract of a subvector. 2334 bool AArch64DAGToDAGISel::tryHighFPExt(SDNode *N) { 2335 assert(N->getOpcode() == ISD::FP_EXTEND); 2336 2337 // There are 2 forms of fcvtl2 - extend to double or extend to float. 2338 SDValue Extract = N->getOperand(0); 2339 EVT VT = N->getValueType(0); 2340 EVT NarrowVT = Extract.getValueType(); 2341 if ((VT != MVT::v2f64 || NarrowVT != MVT::v2f32) && 2342 (VT != MVT::v4f32 || NarrowVT != MVT::v4f16)) 2343 return false; 2344 2345 // Optionally look past a bitcast. 2346 Extract = peekThroughBitcasts(Extract); 2347 if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR) 2348 return false; 2349 2350 // Match extract from start of high half index. 2351 // Example: v8i16 -> v4i16 means the extract must begin at index 4. 2352 unsigned ExtractIndex = Extract.getConstantOperandVal(1); 2353 if (ExtractIndex != Extract.getValueType().getVectorNumElements()) 2354 return false; 2355 2356 auto Opcode = VT == MVT::v2f64 ? AArch64::FCVTLv4i32 : AArch64::FCVTLv8i16; 2357 CurDAG->SelectNodeTo(N, Opcode, VT, Extract.getOperand(0)); 2358 return true; 2359 } 2360 2361 static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, 2362 SDValue &Opd0, unsigned &Immr, unsigned &Imms, 2363 unsigned NumberOfIgnoredLowBits = 0, 2364 bool BiggerPattern = false) { 2365 if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64) 2366 return false; 2367 2368 switch (N->getOpcode()) { 2369 default: 2370 if (!N->isMachineOpcode()) 2371 return false; 2372 break; 2373 case ISD::AND: 2374 return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms, 2375 NumberOfIgnoredLowBits, BiggerPattern); 2376 case ISD::SRL: 2377 case ISD::SRA: 2378 return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern); 2379 2380 case ISD::SIGN_EXTEND_INREG: 2381 return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms); 2382 } 2383 2384 unsigned NOpc = N->getMachineOpcode(); 2385 switch (NOpc) { 2386 default: 2387 return false; 2388 case AArch64::SBFMWri: 2389 case AArch64::UBFMWri: 2390 case AArch64::SBFMXri: 2391 case AArch64::UBFMXri: 2392 Opc = NOpc; 2393 Opd0 = N->getOperand(0); 2394 Immr = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue(); 2395 Imms = cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue(); 2396 return true; 2397 } 2398 // Unreachable 2399 return false; 2400 } 2401 2402 bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) { 2403 unsigned Opc, Immr, Imms; 2404 SDValue Opd0; 2405 if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms)) 2406 return false; 2407 2408 EVT VT = N->getValueType(0); 2409 SDLoc dl(N); 2410 2411 // If the bit extract operation is 64bit but the original type is 32bit, we 2412 // need to add one EXTRACT_SUBREG. 2413 if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) { 2414 SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64), 2415 CurDAG->getTargetConstant(Imms, dl, MVT::i64)}; 2416 2417 SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64); 2418 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32); 2419 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, 2420 MVT::i32, SDValue(BFM, 0), SubReg)); 2421 return true; 2422 } 2423 2424 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT), 2425 CurDAG->getTargetConstant(Imms, dl, VT)}; 2426 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 2427 return true; 2428 } 2429 2430 /// Does DstMask form a complementary pair with the mask provided by 2431 /// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking, 2432 /// this asks whether DstMask zeroes precisely those bits that will be set by 2433 /// the other half. 2434 static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted, 2435 unsigned NumberOfIgnoredHighBits, EVT VT) { 2436 assert((VT == MVT::i32 || VT == MVT::i64) && 2437 "i32 or i64 mask type expected!"); 2438 unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits; 2439 2440 APInt SignificantDstMask = APInt(BitWidth, DstMask); 2441 APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth); 2442 2443 return (SignificantDstMask & SignificantBitsToBeInserted) == 0 && 2444 (SignificantDstMask | SignificantBitsToBeInserted).isAllOnes(); 2445 } 2446 2447 // Look for bits that will be useful for later uses. 2448 // A bit is consider useless as soon as it is dropped and never used 2449 // before it as been dropped. 2450 // E.g., looking for useful bit of x 2451 // 1. y = x & 0x7 2452 // 2. z = y >> 2 2453 // After #1, x useful bits are 0x7, then the useful bits of x, live through 2454 // y. 2455 // After #2, the useful bits of x are 0x4. 2456 // However, if x is used on an unpredicatable instruction, then all its bits 2457 // are useful. 2458 // E.g. 2459 // 1. y = x & 0x7 2460 // 2. z = y >> 2 2461 // 3. str x, [@x] 2462 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0); 2463 2464 static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits, 2465 unsigned Depth) { 2466 uint64_t Imm = 2467 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue(); 2468 Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth()); 2469 UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm); 2470 getUsefulBits(Op, UsefulBits, Depth + 1); 2471 } 2472 2473 static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, 2474 uint64_t Imm, uint64_t MSB, 2475 unsigned Depth) { 2476 // inherit the bitwidth value 2477 APInt OpUsefulBits(UsefulBits); 2478 OpUsefulBits = 1; 2479 2480 if (MSB >= Imm) { 2481 OpUsefulBits <<= MSB - Imm + 1; 2482 --OpUsefulBits; 2483 // The interesting part will be in the lower part of the result 2484 getUsefulBits(Op, OpUsefulBits, Depth + 1); 2485 // The interesting part was starting at Imm in the argument 2486 OpUsefulBits <<= Imm; 2487 } else { 2488 OpUsefulBits <<= MSB + 1; 2489 --OpUsefulBits; 2490 // The interesting part will be shifted in the result 2491 OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm; 2492 getUsefulBits(Op, OpUsefulBits, Depth + 1); 2493 // The interesting part was at zero in the argument 2494 OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm); 2495 } 2496 2497 UsefulBits &= OpUsefulBits; 2498 } 2499 2500 static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits, 2501 unsigned Depth) { 2502 uint64_t Imm = 2503 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue(); 2504 uint64_t MSB = 2505 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue(); 2506 2507 getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth); 2508 } 2509 2510 static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, 2511 unsigned Depth) { 2512 uint64_t ShiftTypeAndValue = 2513 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue(); 2514 APInt Mask(UsefulBits); 2515 Mask.clearAllBits(); 2516 Mask.flipAllBits(); 2517 2518 if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) { 2519 // Shift Left 2520 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue); 2521 Mask <<= ShiftAmt; 2522 getUsefulBits(Op, Mask, Depth + 1); 2523 Mask.lshrInPlace(ShiftAmt); 2524 } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) { 2525 // Shift Right 2526 // We do not handle AArch64_AM::ASR, because the sign will change the 2527 // number of useful bits 2528 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue); 2529 Mask.lshrInPlace(ShiftAmt); 2530 getUsefulBits(Op, Mask, Depth + 1); 2531 Mask <<= ShiftAmt; 2532 } else 2533 return; 2534 2535 UsefulBits &= Mask; 2536 } 2537 2538 static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, 2539 unsigned Depth) { 2540 uint64_t Imm = 2541 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue(); 2542 uint64_t MSB = 2543 cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue(); 2544 2545 APInt OpUsefulBits(UsefulBits); 2546 OpUsefulBits = 1; 2547 2548 APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0); 2549 ResultUsefulBits.flipAllBits(); 2550 APInt Mask(UsefulBits.getBitWidth(), 0); 2551 2552 getUsefulBits(Op, ResultUsefulBits, Depth + 1); 2553 2554 if (MSB >= Imm) { 2555 // The instruction is a BFXIL. 2556 uint64_t Width = MSB - Imm + 1; 2557 uint64_t LSB = Imm; 2558 2559 OpUsefulBits <<= Width; 2560 --OpUsefulBits; 2561 2562 if (Op.getOperand(1) == Orig) { 2563 // Copy the low bits from the result to bits starting from LSB. 2564 Mask = ResultUsefulBits & OpUsefulBits; 2565 Mask <<= LSB; 2566 } 2567 2568 if (Op.getOperand(0) == Orig) 2569 // Bits starting from LSB in the input contribute to the result. 2570 Mask |= (ResultUsefulBits & ~OpUsefulBits); 2571 } else { 2572 // The instruction is a BFI. 2573 uint64_t Width = MSB + 1; 2574 uint64_t LSB = UsefulBits.getBitWidth() - Imm; 2575 2576 OpUsefulBits <<= Width; 2577 --OpUsefulBits; 2578 OpUsefulBits <<= LSB; 2579 2580 if (Op.getOperand(1) == Orig) { 2581 // Copy the bits from the result to the zero bits. 2582 Mask = ResultUsefulBits & OpUsefulBits; 2583 Mask.lshrInPlace(LSB); 2584 } 2585 2586 if (Op.getOperand(0) == Orig) 2587 Mask |= (ResultUsefulBits & ~OpUsefulBits); 2588 } 2589 2590 UsefulBits &= Mask; 2591 } 2592 2593 static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, 2594 SDValue Orig, unsigned Depth) { 2595 2596 // Users of this node should have already been instruction selected 2597 // FIXME: Can we turn that into an assert? 2598 if (!UserNode->isMachineOpcode()) 2599 return; 2600 2601 switch (UserNode->getMachineOpcode()) { 2602 default: 2603 return; 2604 case AArch64::ANDSWri: 2605 case AArch64::ANDSXri: 2606 case AArch64::ANDWri: 2607 case AArch64::ANDXri: 2608 // We increment Depth only when we call the getUsefulBits 2609 return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits, 2610 Depth); 2611 case AArch64::UBFMWri: 2612 case AArch64::UBFMXri: 2613 return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth); 2614 2615 case AArch64::ORRWrs: 2616 case AArch64::ORRXrs: 2617 if (UserNode->getOperand(0) != Orig && UserNode->getOperand(1) == Orig) 2618 getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits, 2619 Depth); 2620 return; 2621 case AArch64::BFMWri: 2622 case AArch64::BFMXri: 2623 return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth); 2624 2625 case AArch64::STRBBui: 2626 case AArch64::STURBBi: 2627 if (UserNode->getOperand(0) != Orig) 2628 return; 2629 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff); 2630 return; 2631 2632 case AArch64::STRHHui: 2633 case AArch64::STURHHi: 2634 if (UserNode->getOperand(0) != Orig) 2635 return; 2636 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff); 2637 return; 2638 } 2639 } 2640 2641 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) { 2642 if (Depth >= SelectionDAG::MaxRecursionDepth) 2643 return; 2644 // Initialize UsefulBits 2645 if (!Depth) { 2646 unsigned Bitwidth = Op.getScalarValueSizeInBits(); 2647 // At the beginning, assume every produced bits is useful 2648 UsefulBits = APInt(Bitwidth, 0); 2649 UsefulBits.flipAllBits(); 2650 } 2651 APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0); 2652 2653 for (SDNode *Node : Op.getNode()->uses()) { 2654 // A use cannot produce useful bits 2655 APInt UsefulBitsForUse = APInt(UsefulBits); 2656 getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth); 2657 UsersUsefulBits |= UsefulBitsForUse; 2658 } 2659 // UsefulBits contains the produced bits that are meaningful for the 2660 // current definition, thus a user cannot make a bit meaningful at 2661 // this point 2662 UsefulBits &= UsersUsefulBits; 2663 } 2664 2665 /// Create a machine node performing a notional SHL of Op by ShlAmount. If 2666 /// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is 2667 /// 0, return Op unchanged. 2668 static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) { 2669 if (ShlAmount == 0) 2670 return Op; 2671 2672 EVT VT = Op.getValueType(); 2673 SDLoc dl(Op); 2674 unsigned BitWidth = VT.getSizeInBits(); 2675 unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri; 2676 2677 SDNode *ShiftNode; 2678 if (ShlAmount > 0) { 2679 // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt 2680 ShiftNode = CurDAG->getMachineNode( 2681 UBFMOpc, dl, VT, Op, 2682 CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT), 2683 CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT)); 2684 } else { 2685 // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1 2686 assert(ShlAmount < 0 && "expected right shift"); 2687 int ShrAmount = -ShlAmount; 2688 ShiftNode = CurDAG->getMachineNode( 2689 UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT), 2690 CurDAG->getTargetConstant(BitWidth - 1, dl, VT)); 2691 } 2692 2693 return SDValue(ShiftNode, 0); 2694 } 2695 2696 // For bit-field-positioning pattern "(and (shl VAL, N), ShiftedMask)". 2697 static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op, 2698 bool BiggerPattern, 2699 const uint64_t NonZeroBits, 2700 SDValue &Src, int &DstLSB, 2701 int &Width); 2702 2703 // For bit-field-positioning pattern "shl VAL, N)". 2704 static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op, 2705 bool BiggerPattern, 2706 const uint64_t NonZeroBits, 2707 SDValue &Src, int &DstLSB, 2708 int &Width); 2709 2710 /// Does this tree qualify as an attempt to move a bitfield into position, 2711 /// essentially "(and (shl VAL, N), Mask)" or (shl VAL, N). 2712 static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, 2713 bool BiggerPattern, SDValue &Src, 2714 int &DstLSB, int &Width) { 2715 EVT VT = Op.getValueType(); 2716 unsigned BitWidth = VT.getSizeInBits(); 2717 (void)BitWidth; 2718 assert(BitWidth == 32 || BitWidth == 64); 2719 2720 KnownBits Known = CurDAG->computeKnownBits(Op); 2721 2722 // Non-zero in the sense that they're not provably zero, which is the key 2723 // point if we want to use this value 2724 const uint64_t NonZeroBits = (~Known.Zero).getZExtValue(); 2725 if (!isShiftedMask_64(NonZeroBits)) 2726 return false; 2727 2728 switch (Op.getOpcode()) { 2729 default: 2730 break; 2731 case ISD::AND: 2732 return isBitfieldPositioningOpFromAnd(CurDAG, Op, BiggerPattern, 2733 NonZeroBits, Src, DstLSB, Width); 2734 case ISD::SHL: 2735 return isBitfieldPositioningOpFromShl(CurDAG, Op, BiggerPattern, 2736 NonZeroBits, Src, DstLSB, Width); 2737 } 2738 2739 return false; 2740 } 2741 2742 static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op, 2743 bool BiggerPattern, 2744 const uint64_t NonZeroBits, 2745 SDValue &Src, int &DstLSB, 2746 int &Width) { 2747 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed"); 2748 2749 EVT VT = Op.getValueType(); 2750 assert((VT == MVT::i32 || VT == MVT::i64) && 2751 "Caller guarantees VT is one of i32 or i64"); 2752 (void)VT; 2753 2754 uint64_t AndImm; 2755 if (!isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm)) 2756 return false; 2757 2758 // If (~AndImm & NonZeroBits) is not zero at POS, we know that 2759 // 1) (AndImm & (1 << POS) == 0) 2760 // 2) the result of AND is not zero at POS bit (according to NonZeroBits) 2761 // 2762 // 1) and 2) don't agree so something must be wrong (e.g., in 2763 // 'SelectionDAG::computeKnownBits') 2764 assert((~AndImm & NonZeroBits) == 0 && 2765 "Something must be wrong (e.g., in SelectionDAG::computeKnownBits)"); 2766 2767 SDValue AndOp0 = Op.getOperand(0); 2768 2769 uint64_t ShlImm; 2770 SDValue ShlOp0; 2771 if (isOpcWithIntImmediate(AndOp0.getNode(), ISD::SHL, ShlImm)) { 2772 // For pattern "and(shl(val, N), shifted-mask)", 'ShlOp0' is set to 'val'. 2773 ShlOp0 = AndOp0.getOperand(0); 2774 } else if (VT == MVT::i64 && AndOp0.getOpcode() == ISD::ANY_EXTEND && 2775 isOpcWithIntImmediate(AndOp0.getOperand(0).getNode(), ISD::SHL, 2776 ShlImm)) { 2777 // For pattern "and(any_extend(shl(val, N)), shifted-mask)" 2778 2779 // ShlVal == shl(val, N), which is a left shift on a smaller type. 2780 SDValue ShlVal = AndOp0.getOperand(0); 2781 2782 // Since this is after type legalization and ShlVal is extended to MVT::i64, 2783 // expect VT to be MVT::i32. 2784 assert((ShlVal.getValueType() == MVT::i32) && "Expect VT to be MVT::i32."); 2785 2786 // Widens 'val' to MVT::i64 as the source of bit field positioning. 2787 ShlOp0 = Widen(CurDAG, ShlVal.getOperand(0)); 2788 } else 2789 return false; 2790 2791 // For !BiggerPattern, bail out if the AndOp0 has more than one use, since 2792 // then we'll end up generating AndOp0+UBFIZ instead of just keeping 2793 // AndOp0+AND. 2794 if (!BiggerPattern && !AndOp0.hasOneUse()) 2795 return false; 2796 2797 DstLSB = countTrailingZeros(NonZeroBits); 2798 Width = countTrailingOnes(NonZeroBits >> DstLSB); 2799 2800 // Bail out on large Width. This happens when no proper combining / constant 2801 // folding was performed. 2802 if (Width >= (int)VT.getSizeInBits()) { 2803 // If VT is i64, Width > 64 is insensible since NonZeroBits is uint64_t, and 2804 // Width == 64 indicates a missed dag-combine from "(and val, AllOnes)" to 2805 // "val". 2806 // If VT is i32, what Width >= 32 means: 2807 // - For "(and (any_extend(shl val, N)), shifted-mask)", the`and` Op 2808 // demands at least 'Width' bits (after dag-combiner). This together with 2809 // `any_extend` Op (undefined higher bits) indicates missed combination 2810 // when lowering the 'and' IR instruction to an machine IR instruction. 2811 LLVM_DEBUG( 2812 dbgs() 2813 << "Found large Width in bit-field-positioning -- this indicates no " 2814 "proper combining / constant folding was performed\n"); 2815 return false; 2816 } 2817 2818 // BFI encompasses sufficiently many nodes that it's worth inserting an extra 2819 // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL 2820 // amount. BiggerPattern is true when this pattern is being matched for BFI, 2821 // BiggerPattern is false when this pattern is being matched for UBFIZ, in 2822 // which case it is not profitable to insert an extra shift. 2823 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern) 2824 return false; 2825 2826 Src = getLeftShift(CurDAG, ShlOp0, ShlImm - DstLSB); 2827 return true; 2828 } 2829 2830 // For node (shl (and val, mask), N)), returns true if the node is equivalent to 2831 // UBFIZ. 2832 static bool isSeveralBitsPositioningOpFromShl(const uint64_t ShlImm, SDValue Op, 2833 SDValue &Src, int &DstLSB, 2834 int &Width) { 2835 // Caller should have verified that N is a left shift with constant shift 2836 // amount; asserts that. 2837 assert(Op.getOpcode() == ISD::SHL && 2838 "Op.getNode() should be a SHL node to call this function"); 2839 assert(isIntImmediateEq(Op.getOperand(1), ShlImm) && 2840 "Op.getNode() should shift ShlImm to call this function"); 2841 2842 uint64_t AndImm = 0; 2843 SDValue Op0 = Op.getOperand(0); 2844 if (!isOpcWithIntImmediate(Op0.getNode(), ISD::AND, AndImm)) 2845 return false; 2846 2847 const uint64_t ShiftedAndImm = ((AndImm << ShlImm) >> ShlImm); 2848 if (isMask_64(ShiftedAndImm)) { 2849 // AndImm is a superset of (AllOnes >> ShlImm); in other words, AndImm 2850 // should end with Mask, and could be prefixed with random bits if those 2851 // bits are shifted out. 2852 // 2853 // For example, xyz11111 (with {x,y,z} being 0 or 1) is fine if ShlImm >= 3; 2854 // the AND result corresponding to those bits are shifted out, so it's fine 2855 // to not extract them. 2856 Width = countTrailingOnes(ShiftedAndImm); 2857 DstLSB = ShlImm; 2858 Src = Op0.getOperand(0); 2859 return true; 2860 } 2861 return false; 2862 } 2863 2864 static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op, 2865 bool BiggerPattern, 2866 const uint64_t NonZeroBits, 2867 SDValue &Src, int &DstLSB, 2868 int &Width) { 2869 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed"); 2870 2871 EVT VT = Op.getValueType(); 2872 assert((VT == MVT::i32 || VT == MVT::i64) && 2873 "Caller guarantees that type is i32 or i64"); 2874 (void)VT; 2875 2876 uint64_t ShlImm; 2877 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm)) 2878 return false; 2879 2880 if (!BiggerPattern && !Op.hasOneUse()) 2881 return false; 2882 2883 if (isSeveralBitsPositioningOpFromShl(ShlImm, Op, Src, DstLSB, Width)) 2884 return true; 2885 2886 DstLSB = countTrailingZeros(NonZeroBits); 2887 Width = countTrailingOnes(NonZeroBits >> DstLSB); 2888 2889 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern) 2890 return false; 2891 2892 Src = getLeftShift(CurDAG, Op.getOperand(0), ShlImm - DstLSB); 2893 return true; 2894 } 2895 2896 static bool isShiftedMask(uint64_t Mask, EVT VT) { 2897 assert(VT == MVT::i32 || VT == MVT::i64); 2898 if (VT == MVT::i32) 2899 return isShiftedMask_32(Mask); 2900 return isShiftedMask_64(Mask); 2901 } 2902 2903 // Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being 2904 // inserted only sets known zero bits. 2905 static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG) { 2906 assert(N->getOpcode() == ISD::OR && "Expect a OR operation"); 2907 2908 EVT VT = N->getValueType(0); 2909 if (VT != MVT::i32 && VT != MVT::i64) 2910 return false; 2911 2912 unsigned BitWidth = VT.getSizeInBits(); 2913 2914 uint64_t OrImm; 2915 if (!isOpcWithIntImmediate(N, ISD::OR, OrImm)) 2916 return false; 2917 2918 // Skip this transformation if the ORR immediate can be encoded in the ORR. 2919 // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely 2920 // performance neutral. 2921 if (AArch64_AM::isLogicalImmediate(OrImm, BitWidth)) 2922 return false; 2923 2924 uint64_t MaskImm; 2925 SDValue And = N->getOperand(0); 2926 // Must be a single use AND with an immediate operand. 2927 if (!And.hasOneUse() || 2928 !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm)) 2929 return false; 2930 2931 // Compute the Known Zero for the AND as this allows us to catch more general 2932 // cases than just looking for AND with imm. 2933 KnownBits Known = CurDAG->computeKnownBits(And); 2934 2935 // Non-zero in the sense that they're not provably zero, which is the key 2936 // point if we want to use this value. 2937 uint64_t NotKnownZero = (~Known.Zero).getZExtValue(); 2938 2939 // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00). 2940 if (!isShiftedMask(Known.Zero.getZExtValue(), VT)) 2941 return false; 2942 2943 // The bits being inserted must only set those bits that are known to be zero. 2944 if ((OrImm & NotKnownZero) != 0) { 2945 // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't 2946 // currently handle this case. 2947 return false; 2948 } 2949 2950 // BFI/BFXIL dst, src, #lsb, #width. 2951 int LSB = countTrailingOnes(NotKnownZero); 2952 int Width = BitWidth - APInt(BitWidth, NotKnownZero).countPopulation(); 2953 2954 // BFI/BFXIL is an alias of BFM, so translate to BFM operands. 2955 unsigned ImmR = (BitWidth - LSB) % BitWidth; 2956 unsigned ImmS = Width - 1; 2957 2958 // If we're creating a BFI instruction avoid cases where we need more 2959 // instructions to materialize the BFI constant as compared to the original 2960 // ORR. A BFXIL will use the same constant as the original ORR, so the code 2961 // should be no worse in this case. 2962 bool IsBFI = LSB != 0; 2963 uint64_t BFIImm = OrImm >> LSB; 2964 if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) { 2965 // We have a BFI instruction and we know the constant can't be materialized 2966 // with a ORR-immediate with the zero register. 2967 unsigned OrChunks = 0, BFIChunks = 0; 2968 for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) { 2969 if (((OrImm >> Shift) & 0xFFFF) != 0) 2970 ++OrChunks; 2971 if (((BFIImm >> Shift) & 0xFFFF) != 0) 2972 ++BFIChunks; 2973 } 2974 if (BFIChunks > OrChunks) 2975 return false; 2976 } 2977 2978 // Materialize the constant to be inserted. 2979 SDLoc DL(N); 2980 unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm; 2981 SDNode *MOVI = CurDAG->getMachineNode( 2982 MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT)); 2983 2984 // Create the BFI/BFXIL instruction. 2985 SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0), 2986 CurDAG->getTargetConstant(ImmR, DL, VT), 2987 CurDAG->getTargetConstant(ImmS, DL, VT)}; 2988 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri; 2989 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 2990 return true; 2991 } 2992 2993 static bool isWorthFoldingIntoOrrWithShift(SDValue Dst, SelectionDAG *CurDAG, 2994 SDValue &ShiftedOperand, 2995 uint64_t &EncodedShiftImm) { 2996 // Avoid folding Dst into ORR-with-shift if Dst has other uses than ORR. 2997 if (!Dst.hasOneUse()) 2998 return false; 2999 3000 EVT VT = Dst.getValueType(); 3001 assert((VT == MVT::i32 || VT == MVT::i64) && 3002 "Caller should guarantee that VT is one of i32 or i64"); 3003 const unsigned SizeInBits = VT.getSizeInBits(); 3004 3005 SDLoc DL(Dst.getNode()); 3006 uint64_t AndImm, ShlImm; 3007 if (isOpcWithIntImmediate(Dst.getNode(), ISD::AND, AndImm) && 3008 isShiftedMask_64(AndImm)) { 3009 // Avoid transforming 'DstOp0' if it has other uses than the AND node. 3010 SDValue DstOp0 = Dst.getOperand(0); 3011 if (!DstOp0.hasOneUse()) 3012 return false; 3013 3014 // An example to illustrate the transformation 3015 // From: 3016 // lsr x8, x1, #1 3017 // and x8, x8, #0x3f80 3018 // bfxil x8, x1, #0, #7 3019 // To: 3020 // and x8, x23, #0x7f 3021 // ubfx x9, x23, #8, #7 3022 // orr x23, x8, x9, lsl #7 3023 // 3024 // The number of instructions remains the same, but ORR is faster than BFXIL 3025 // on many AArch64 processors (or as good as BFXIL if not faster). Besides, 3026 // the dependency chain is improved after the transformation. 3027 uint64_t SrlImm; 3028 if (isOpcWithIntImmediate(DstOp0.getNode(), ISD::SRL, SrlImm)) { 3029 uint64_t NumTrailingZeroInShiftedMask = countTrailingZeros(AndImm); 3030 if ((SrlImm + NumTrailingZeroInShiftedMask) < SizeInBits) { 3031 unsigned MaskWidth = 3032 countTrailingOnes(AndImm >> NumTrailingZeroInShiftedMask); 3033 unsigned UBFMOpc = 3034 (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri; 3035 SDNode *UBFMNode = CurDAG->getMachineNode( 3036 UBFMOpc, DL, VT, DstOp0.getOperand(0), 3037 CurDAG->getTargetConstant(SrlImm + NumTrailingZeroInShiftedMask, DL, 3038 VT), 3039 CurDAG->getTargetConstant( 3040 SrlImm + NumTrailingZeroInShiftedMask + MaskWidth - 1, DL, VT)); 3041 ShiftedOperand = SDValue(UBFMNode, 0); 3042 EncodedShiftImm = AArch64_AM::getShifterImm( 3043 AArch64_AM::LSL, NumTrailingZeroInShiftedMask); 3044 return true; 3045 } 3046 } 3047 return false; 3048 } 3049 3050 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SHL, ShlImm)) { 3051 ShiftedOperand = Dst.getOperand(0); 3052 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm); 3053 return true; 3054 } 3055 3056 uint64_t SrlImm; 3057 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SRL, SrlImm)) { 3058 ShiftedOperand = Dst.getOperand(0); 3059 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSR, SrlImm); 3060 return true; 3061 } 3062 return false; 3063 } 3064 3065 // Given an 'ISD::OR' node that is going to be selected as BFM, analyze 3066 // the operands and select it to AArch64::ORR with shifted registers if 3067 // that's more efficient. Returns true iff selection to AArch64::ORR happens. 3068 static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1, 3069 SDValue Src, SDValue Dst, SelectionDAG *CurDAG, 3070 const bool BiggerPattern) { 3071 EVT VT = N->getValueType(0); 3072 assert(N->getOpcode() == ISD::OR && "Expect N to be an OR node"); 3073 assert(((N->getOperand(0) == OrOpd0 && N->getOperand(1) == OrOpd1) || 3074 (N->getOperand(1) == OrOpd0 && N->getOperand(0) == OrOpd1)) && 3075 "Expect OrOpd0 and OrOpd1 to be operands of ISD::OR"); 3076 assert((VT == MVT::i32 || VT == MVT::i64) && 3077 "Expect result type to be i32 or i64 since N is combinable to BFM"); 3078 SDLoc DL(N); 3079 3080 // Bail out if BFM simplifies away one node in BFM Dst. 3081 if (OrOpd1 != Dst) 3082 return false; 3083 3084 const unsigned OrrOpc = (VT == MVT::i32) ? AArch64::ORRWrs : AArch64::ORRXrs; 3085 // For "BFM Rd, Rn, #immr, #imms", it's known that BFM simplifies away fewer 3086 // nodes from Rn (or inserts additional shift node) if BiggerPattern is true. 3087 if (BiggerPattern) { 3088 uint64_t SrcAndImm; 3089 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::AND, SrcAndImm) && 3090 isMask_64(SrcAndImm) && OrOpd0.getOperand(0) == Src) { 3091 // OrOpd0 = AND Src, #Mask 3092 // So BFM simplifies away one AND node from Src and doesn't simplify away 3093 // nodes from Dst. If ORR with left-shifted operand also simplifies away 3094 // one node (from Rd), ORR is better since it has higher throughput and 3095 // smaller latency than BFM on many AArch64 processors (and for the rest 3096 // ORR is at least as good as BFM). 3097 SDValue ShiftedOperand; 3098 uint64_t EncodedShiftImm; 3099 if (isWorthFoldingIntoOrrWithShift(Dst, CurDAG, ShiftedOperand, 3100 EncodedShiftImm)) { 3101 SDValue Ops[] = {OrOpd0, ShiftedOperand, 3102 CurDAG->getTargetConstant(EncodedShiftImm, DL, VT)}; 3103 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops); 3104 return true; 3105 } 3106 } 3107 return false; 3108 } 3109 3110 assert((!BiggerPattern) && "BiggerPattern should be handled above"); 3111 3112 uint64_t ShlImm; 3113 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SHL, ShlImm)) { 3114 if (OrOpd0.getOperand(0) == Src && OrOpd0.hasOneUse()) { 3115 SDValue Ops[] = { 3116 Dst, Src, 3117 CurDAG->getTargetConstant( 3118 AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm), DL, VT)}; 3119 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops); 3120 return true; 3121 } 3122 3123 // Select the following pattern to left-shifted operand rather than BFI. 3124 // %val1 = op .. 3125 // %val2 = shl %val1, #imm 3126 // %res = or %val1, %val2 3127 // 3128 // If N is selected to be BFI, we know that 3129 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into 3130 // BFI) 2) OrOpd1 would be the destination operand (i.e., preserved) 3131 // 3132 // Instead of selecting N to BFI, fold OrOpd0 as a left shift directly. 3133 if (OrOpd0.getOperand(0) == OrOpd1) { 3134 SDValue Ops[] = { 3135 OrOpd1, OrOpd1, 3136 CurDAG->getTargetConstant( 3137 AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm), DL, VT)}; 3138 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops); 3139 return true; 3140 } 3141 } 3142 3143 uint64_t SrlImm; 3144 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SRL, SrlImm)) { 3145 // Select the following pattern to right-shifted operand rather than BFXIL. 3146 // %val1 = op .. 3147 // %val2 = lshr %val1, #imm 3148 // %res = or %val1, %val2 3149 // 3150 // If N is selected to be BFXIL, we know that 3151 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into 3152 // BFXIL) 2) OrOpd1 would be the destination operand (i.e., preserved) 3153 // 3154 // Instead of selecting N to BFXIL, fold OrOpd0 as a right shift directly. 3155 if (OrOpd0.getOperand(0) == OrOpd1) { 3156 SDValue Ops[] = { 3157 OrOpd1, OrOpd1, 3158 CurDAG->getTargetConstant( 3159 AArch64_AM::getShifterImm(AArch64_AM::LSR, SrlImm), DL, VT)}; 3160 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops); 3161 return true; 3162 } 3163 } 3164 3165 return false; 3166 } 3167 3168 static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits, 3169 SelectionDAG *CurDAG) { 3170 assert(N->getOpcode() == ISD::OR && "Expect a OR operation"); 3171 3172 EVT VT = N->getValueType(0); 3173 if (VT != MVT::i32 && VT != MVT::i64) 3174 return false; 3175 3176 unsigned BitWidth = VT.getSizeInBits(); 3177 3178 // Because of simplify-demanded-bits in DAGCombine, involved masks may not 3179 // have the expected shape. Try to undo that. 3180 3181 unsigned NumberOfIgnoredLowBits = UsefulBits.countTrailingZeros(); 3182 unsigned NumberOfIgnoredHighBits = UsefulBits.countLeadingZeros(); 3183 3184 // Given a OR operation, check if we have the following pattern 3185 // ubfm c, b, imm, imm2 (or something that does the same jobs, see 3186 // isBitfieldExtractOp) 3187 // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and 3188 // countTrailingZeros(mask2) == imm2 - imm + 1 3189 // f = d | c 3190 // if yes, replace the OR instruction with: 3191 // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2 3192 3193 // OR is commutative, check all combinations of operand order and values of 3194 // BiggerPattern, i.e. 3195 // Opd0, Opd1, BiggerPattern=false 3196 // Opd1, Opd0, BiggerPattern=false 3197 // Opd0, Opd1, BiggerPattern=true 3198 // Opd1, Opd0, BiggerPattern=true 3199 // Several of these combinations may match, so check with BiggerPattern=false 3200 // first since that will produce better results by matching more instructions 3201 // and/or inserting fewer extra instructions. 3202 for (int I = 0; I < 4; ++I) { 3203 3204 SDValue Dst, Src; 3205 unsigned ImmR, ImmS; 3206 bool BiggerPattern = I / 2; 3207 SDValue OrOpd0Val = N->getOperand(I % 2); 3208 SDNode *OrOpd0 = OrOpd0Val.getNode(); 3209 SDValue OrOpd1Val = N->getOperand((I + 1) % 2); 3210 SDNode *OrOpd1 = OrOpd1Val.getNode(); 3211 3212 unsigned BFXOpc; 3213 int DstLSB, Width; 3214 if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS, 3215 NumberOfIgnoredLowBits, BiggerPattern)) { 3216 // Check that the returned opcode is compatible with the pattern, 3217 // i.e., same type and zero extended (U and not S) 3218 if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) || 3219 (BFXOpc != AArch64::UBFMWri && VT == MVT::i32)) 3220 continue; 3221 3222 // Compute the width of the bitfield insertion 3223 DstLSB = 0; 3224 Width = ImmS - ImmR + 1; 3225 // FIXME: This constraint is to catch bitfield insertion we may 3226 // want to widen the pattern if we want to grab general bitfied 3227 // move case 3228 if (Width <= 0) 3229 continue; 3230 3231 // If the mask on the insertee is correct, we have a BFXIL operation. We 3232 // can share the ImmR and ImmS values from the already-computed UBFM. 3233 } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val, 3234 BiggerPattern, 3235 Src, DstLSB, Width)) { 3236 ImmR = (BitWidth - DstLSB) % BitWidth; 3237 ImmS = Width - 1; 3238 } else 3239 continue; 3240 3241 // Check the second part of the pattern 3242 EVT VT = OrOpd1Val.getValueType(); 3243 assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand"); 3244 3245 // Compute the Known Zero for the candidate of the first operand. 3246 // This allows to catch more general case than just looking for 3247 // AND with imm. Indeed, simplify-demanded-bits may have removed 3248 // the AND instruction because it proves it was useless. 3249 KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val); 3250 3251 // Check if there is enough room for the second operand to appear 3252 // in the first one 3253 APInt BitsToBeInserted = 3254 APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width); 3255 3256 if ((BitsToBeInserted & ~Known.Zero) != 0) 3257 continue; 3258 3259 // Set the first operand 3260 uint64_t Imm; 3261 if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) && 3262 isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT)) 3263 // In that case, we can eliminate the AND 3264 Dst = OrOpd1->getOperand(0); 3265 else 3266 // Maybe the AND has been removed by simplify-demanded-bits 3267 // or is useful because it discards more bits 3268 Dst = OrOpd1Val; 3269 3270 // Before selecting ISD::OR node to AArch64::BFM, see if an AArch64::ORR 3271 // with shifted operand is more efficient. 3272 if (tryOrrWithShift(N, OrOpd0Val, OrOpd1Val, Src, Dst, CurDAG, 3273 BiggerPattern)) 3274 return true; 3275 3276 // both parts match 3277 SDLoc DL(N); 3278 SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT), 3279 CurDAG->getTargetConstant(ImmS, DL, VT)}; 3280 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri; 3281 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 3282 return true; 3283 } 3284 3285 // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff 3286 // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted 3287 // mask (e.g., 0x000ffff0). 3288 uint64_t Mask0Imm, Mask1Imm; 3289 SDValue And0 = N->getOperand(0); 3290 SDValue And1 = N->getOperand(1); 3291 if (And0.hasOneUse() && And1.hasOneUse() && 3292 isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) && 3293 isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) && 3294 APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) && 3295 (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) { 3296 3297 // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm), 3298 // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the 3299 // bits to be inserted. 3300 if (isShiftedMask(Mask0Imm, VT)) { 3301 std::swap(And0, And1); 3302 std::swap(Mask0Imm, Mask1Imm); 3303 } 3304 3305 SDValue Src = And1->getOperand(0); 3306 SDValue Dst = And0->getOperand(0); 3307 unsigned LSB = countTrailingZeros(Mask1Imm); 3308 int Width = BitWidth - APInt(BitWidth, Mask0Imm).countPopulation(); 3309 3310 // The BFXIL inserts the low-order bits from a source register, so right 3311 // shift the needed bits into place. 3312 SDLoc DL(N); 3313 unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri; 3314 uint64_t LsrImm = LSB; 3315 if (Src->hasOneUse() && 3316 isOpcWithIntImmediate(Src.getNode(), ISD::SRL, LsrImm) && 3317 (LsrImm + LSB) < BitWidth) { 3318 Src = Src->getOperand(0); 3319 LsrImm += LSB; 3320 } 3321 3322 SDNode *LSR = CurDAG->getMachineNode( 3323 ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LsrImm, DL, VT), 3324 CurDAG->getTargetConstant(BitWidth - 1, DL, VT)); 3325 3326 // BFXIL is an alias of BFM, so translate to BFM operands. 3327 unsigned ImmR = (BitWidth - LSB) % BitWidth; 3328 unsigned ImmS = Width - 1; 3329 3330 // Create the BFXIL instruction. 3331 SDValue Ops[] = {Dst, SDValue(LSR, 0), 3332 CurDAG->getTargetConstant(ImmR, DL, VT), 3333 CurDAG->getTargetConstant(ImmS, DL, VT)}; 3334 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri; 3335 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 3336 return true; 3337 } 3338 3339 return false; 3340 } 3341 3342 bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) { 3343 if (N->getOpcode() != ISD::OR) 3344 return false; 3345 3346 APInt NUsefulBits; 3347 getUsefulBits(SDValue(N, 0), NUsefulBits); 3348 3349 // If all bits are not useful, just return UNDEF. 3350 if (!NUsefulBits) { 3351 CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0)); 3352 return true; 3353 } 3354 3355 if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG)) 3356 return true; 3357 3358 return tryBitfieldInsertOpFromOrAndImm(N, CurDAG); 3359 } 3360 3361 /// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the 3362 /// equivalent of a left shift by a constant amount followed by an and masking 3363 /// out a contiguous set of bits. 3364 bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) { 3365 if (N->getOpcode() != ISD::AND) 3366 return false; 3367 3368 EVT VT = N->getValueType(0); 3369 if (VT != MVT::i32 && VT != MVT::i64) 3370 return false; 3371 3372 SDValue Op0; 3373 int DstLSB, Width; 3374 if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false, 3375 Op0, DstLSB, Width)) 3376 return false; 3377 3378 // ImmR is the rotate right amount. 3379 unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits(); 3380 // ImmS is the most significant bit of the source to be moved. 3381 unsigned ImmS = Width - 1; 3382 3383 SDLoc DL(N); 3384 SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT), 3385 CurDAG->getTargetConstant(ImmS, DL, VT)}; 3386 unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri; 3387 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 3388 return true; 3389 } 3390 3391 /// tryShiftAmountMod - Take advantage of built-in mod of shift amount in 3392 /// variable shift/rotate instructions. 3393 bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) { 3394 EVT VT = N->getValueType(0); 3395 3396 unsigned Opc; 3397 switch (N->getOpcode()) { 3398 case ISD::ROTR: 3399 Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr; 3400 break; 3401 case ISD::SHL: 3402 Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr; 3403 break; 3404 case ISD::SRL: 3405 Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr; 3406 break; 3407 case ISD::SRA: 3408 Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr; 3409 break; 3410 default: 3411 return false; 3412 } 3413 3414 uint64_t Size; 3415 uint64_t Bits; 3416 if (VT == MVT::i32) { 3417 Bits = 5; 3418 Size = 32; 3419 } else if (VT == MVT::i64) { 3420 Bits = 6; 3421 Size = 64; 3422 } else 3423 return false; 3424 3425 SDValue ShiftAmt = N->getOperand(1); 3426 SDLoc DL(N); 3427 SDValue NewShiftAmt; 3428 3429 // Skip over an extend of the shift amount. 3430 if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND || 3431 ShiftAmt->getOpcode() == ISD::ANY_EXTEND) 3432 ShiftAmt = ShiftAmt->getOperand(0); 3433 3434 if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) { 3435 SDValue Add0 = ShiftAmt->getOperand(0); 3436 SDValue Add1 = ShiftAmt->getOperand(1); 3437 uint64_t Add0Imm; 3438 uint64_t Add1Imm; 3439 if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0)) { 3440 // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X 3441 // to avoid the ADD/SUB. 3442 NewShiftAmt = Add0; 3443 } else if (ShiftAmt->getOpcode() == ISD::SUB && 3444 isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 && 3445 (Add0Imm % Size == 0)) { 3446 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X 3447 // to generate a NEG instead of a SUB from a constant. 3448 unsigned NegOpc; 3449 unsigned ZeroReg; 3450 EVT SubVT = ShiftAmt->getValueType(0); 3451 if (SubVT == MVT::i32) { 3452 NegOpc = AArch64::SUBWrr; 3453 ZeroReg = AArch64::WZR; 3454 } else { 3455 assert(SubVT == MVT::i64); 3456 NegOpc = AArch64::SUBXrr; 3457 ZeroReg = AArch64::XZR; 3458 } 3459 SDValue Zero = 3460 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT); 3461 MachineSDNode *Neg = 3462 CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1); 3463 NewShiftAmt = SDValue(Neg, 0); 3464 } else if (ShiftAmt->getOpcode() == ISD::SUB && 3465 isIntImmediate(Add0, Add0Imm) && (Add0Imm % Size == Size - 1)) { 3466 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X 3467 // to generate a NOT instead of a SUB from a constant. 3468 unsigned NotOpc; 3469 unsigned ZeroReg; 3470 EVT SubVT = ShiftAmt->getValueType(0); 3471 if (SubVT == MVT::i32) { 3472 NotOpc = AArch64::ORNWrr; 3473 ZeroReg = AArch64::WZR; 3474 } else { 3475 assert(SubVT == MVT::i64); 3476 NotOpc = AArch64::ORNXrr; 3477 ZeroReg = AArch64::XZR; 3478 } 3479 SDValue Zero = 3480 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT); 3481 MachineSDNode *Not = 3482 CurDAG->getMachineNode(NotOpc, DL, SubVT, Zero, Add1); 3483 NewShiftAmt = SDValue(Not, 0); 3484 } else 3485 return false; 3486 } else { 3487 // If the shift amount is masked with an AND, check that the mask covers the 3488 // bits that are implicitly ANDed off by the above opcodes and if so, skip 3489 // the AND. 3490 uint64_t MaskImm; 3491 if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm) && 3492 !isOpcWithIntImmediate(ShiftAmt.getNode(), AArch64ISD::ANDS, MaskImm)) 3493 return false; 3494 3495 if (countTrailingOnes(MaskImm) < Bits) 3496 return false; 3497 3498 NewShiftAmt = ShiftAmt->getOperand(0); 3499 } 3500 3501 // Narrow/widen the shift amount to match the size of the shift operation. 3502 if (VT == MVT::i32) 3503 NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt); 3504 else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) { 3505 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32); 3506 MachineSDNode *Ext = CurDAG->getMachineNode( 3507 AArch64::SUBREG_TO_REG, DL, VT, 3508 CurDAG->getTargetConstant(0, DL, MVT::i64), NewShiftAmt, SubReg); 3509 NewShiftAmt = SDValue(Ext, 0); 3510 } 3511 3512 SDValue Ops[] = {N->getOperand(0), NewShiftAmt}; 3513 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 3514 return true; 3515 } 3516 3517 bool 3518 AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, 3519 unsigned RegWidth) { 3520 APFloat FVal(0.0); 3521 if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N)) 3522 FVal = CN->getValueAPF(); 3523 else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) { 3524 // Some otherwise illegal constants are allowed in this case. 3525 if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow || 3526 !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1))) 3527 return false; 3528 3529 ConstantPoolSDNode *CN = 3530 dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)); 3531 FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF(); 3532 } else 3533 return false; 3534 3535 // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits 3536 // is between 1 and 32 for a destination w-register, or 1 and 64 for an 3537 // x-register. 3538 // 3539 // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we 3540 // want THIS_NODE to be 2^fbits. This is much easier to deal with using 3541 // integers. 3542 bool IsExact; 3543 3544 // fbits is between 1 and 64 in the worst-case, which means the fmul 3545 // could have 2^64 as an actual operand. Need 65 bits of precision. 3546 APSInt IntVal(65, true); 3547 FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact); 3548 3549 // N.b. isPowerOf2 also checks for > 0. 3550 if (!IsExact || !IntVal.isPowerOf2()) return false; 3551 unsigned FBits = IntVal.logBase2(); 3552 3553 // Checks above should have guaranteed that we haven't lost information in 3554 // finding FBits, but it must still be in range. 3555 if (FBits == 0 || FBits > RegWidth) return false; 3556 3557 FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32); 3558 return true; 3559 } 3560 3561 // Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields 3562 // of the string and obtains the integer values from them and combines these 3563 // into a single value to be used in the MRS/MSR instruction. 3564 static int getIntOperandFromRegisterString(StringRef RegString) { 3565 SmallVector<StringRef, 5> Fields; 3566 RegString.split(Fields, ':'); 3567 3568 if (Fields.size() == 1) 3569 return -1; 3570 3571 assert(Fields.size() == 5 3572 && "Invalid number of fields in read register string"); 3573 3574 SmallVector<int, 5> Ops; 3575 bool AllIntFields = true; 3576 3577 for (StringRef Field : Fields) { 3578 unsigned IntField; 3579 AllIntFields &= !Field.getAsInteger(10, IntField); 3580 Ops.push_back(IntField); 3581 } 3582 3583 assert(AllIntFields && 3584 "Unexpected non-integer value in special register string."); 3585 (void)AllIntFields; 3586 3587 // Need to combine the integer fields of the string into a single value 3588 // based on the bit encoding of MRS/MSR instruction. 3589 return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) | 3590 (Ops[3] << 3) | (Ops[4]); 3591 } 3592 3593 // Lower the read_register intrinsic to an MRS instruction node if the special 3594 // register string argument is either of the form detailed in the ALCE (the 3595 // form described in getIntOperandsFromRegsterString) or is a named register 3596 // known by the MRS SysReg mapper. 3597 bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) { 3598 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1)); 3599 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0)); 3600 SDLoc DL(N); 3601 3602 bool ReadIs128Bit = N->getOpcode() == AArch64ISD::MRRS; 3603 3604 unsigned Opcode64Bit = AArch64::MRS; 3605 int Imm = getIntOperandFromRegisterString(RegString->getString()); 3606 if (Imm == -1) { 3607 // No match, Use the sysreg mapper to map the remaining possible strings to 3608 // the value for the register to be used for the instruction operand. 3609 const auto *TheReg = 3610 AArch64SysReg::lookupSysRegByName(RegString->getString()); 3611 if (TheReg && TheReg->Readable && 3612 TheReg->haveFeatures(Subtarget->getFeatureBits())) 3613 Imm = TheReg->Encoding; 3614 else 3615 Imm = AArch64SysReg::parseGenericRegister(RegString->getString()); 3616 3617 if (Imm == -1) { 3618 // Still no match, see if this is "pc" or give up. 3619 if (!ReadIs128Bit && RegString->getString() == "pc") { 3620 Opcode64Bit = AArch64::ADR; 3621 Imm = 0; 3622 } else { 3623 return false; 3624 } 3625 } 3626 } 3627 3628 SDValue InChain = N->getOperand(0); 3629 SDValue SysRegImm = CurDAG->getTargetConstant(Imm, DL, MVT::i32); 3630 if (!ReadIs128Bit) { 3631 CurDAG->SelectNodeTo(N, Opcode64Bit, MVT::i64, MVT::Other /* Chain */, 3632 {SysRegImm, InChain}); 3633 } else { 3634 SDNode *MRRS = CurDAG->getMachineNode( 3635 AArch64::MRRS, DL, 3636 {MVT::Untyped /* XSeqPair */, MVT::Other /* Chain */}, 3637 {SysRegImm, InChain}); 3638 3639 // Sysregs are not endian. The even register always contains the low half 3640 // of the register. 3641 SDValue Lo = CurDAG->getTargetExtractSubreg(AArch64::sube64, DL, MVT::i64, 3642 SDValue(MRRS, 0)); 3643 SDValue Hi = CurDAG->getTargetExtractSubreg(AArch64::subo64, DL, MVT::i64, 3644 SDValue(MRRS, 0)); 3645 SDValue OutChain = SDValue(MRRS, 1); 3646 3647 ReplaceUses(SDValue(N, 0), Lo); 3648 ReplaceUses(SDValue(N, 1), Hi); 3649 ReplaceUses(SDValue(N, 2), OutChain); 3650 }; 3651 return true; 3652 } 3653 3654 // Lower the write_register intrinsic to an MSR instruction node if the special 3655 // register string argument is either of the form detailed in the ALCE (the 3656 // form described in getIntOperandsFromRegsterString) or is a named register 3657 // known by the MSR SysReg mapper. 3658 bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) { 3659 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1)); 3660 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0)); 3661 SDLoc DL(N); 3662 3663 bool WriteIs128Bit = N->getOpcode() == AArch64ISD::MSRR; 3664 3665 if (!WriteIs128Bit) { 3666 // Check if the register was one of those allowed as the pstatefield value 3667 // in the MSR (immediate) instruction. To accept the values allowed in the 3668 // pstatefield for the MSR (immediate) instruction, we also require that an 3669 // immediate value has been provided as an argument, we know that this is 3670 // the case as it has been ensured by semantic checking. 3671 auto trySelectPState = [&](auto PMapper, unsigned State) { 3672 if (PMapper) { 3673 assert(isa<ConstantSDNode>(N->getOperand(2)) && 3674 "Expected a constant integer expression."); 3675 unsigned Reg = PMapper->Encoding; 3676 uint64_t Immed = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(); 3677 CurDAG->SelectNodeTo( 3678 N, State, MVT::Other, CurDAG->getTargetConstant(Reg, DL, MVT::i32), 3679 CurDAG->getTargetConstant(Immed, DL, MVT::i16), N->getOperand(0)); 3680 return true; 3681 } 3682 return false; 3683 }; 3684 3685 if (trySelectPState( 3686 AArch64PState::lookupPStateImm0_15ByName(RegString->getString()), 3687 AArch64::MSRpstateImm4)) 3688 return true; 3689 if (trySelectPState( 3690 AArch64PState::lookupPStateImm0_1ByName(RegString->getString()), 3691 AArch64::MSRpstateImm1)) 3692 return true; 3693 } 3694 3695 int Imm = getIntOperandFromRegisterString(RegString->getString()); 3696 if (Imm == -1) { 3697 // Use the sysreg mapper to attempt to map the remaining possible strings 3698 // to the value for the register to be used for the MSR (register) 3699 // instruction operand. 3700 auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString()); 3701 if (TheReg && TheReg->Writeable && 3702 TheReg->haveFeatures(Subtarget->getFeatureBits())) 3703 Imm = TheReg->Encoding; 3704 else 3705 Imm = AArch64SysReg::parseGenericRegister(RegString->getString()); 3706 3707 if (Imm == -1) 3708 return false; 3709 } 3710 3711 SDValue InChain = N->getOperand(0); 3712 if (!WriteIs128Bit) { 3713 CurDAG->SelectNodeTo(N, AArch64::MSR, MVT::Other, 3714 CurDAG->getTargetConstant(Imm, DL, MVT::i32), 3715 N->getOperand(2), InChain); 3716 } else { 3717 // No endian swap. The lower half always goes into the even subreg, and the 3718 // higher half always into the odd supreg. 3719 SDNode *Pair = CurDAG->getMachineNode( 3720 TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped /* XSeqPair */, 3721 {CurDAG->getTargetConstant(AArch64::XSeqPairsClassRegClass.getID(), DL, 3722 MVT::i32), 3723 N->getOperand(2), 3724 CurDAG->getTargetConstant(AArch64::sube64, DL, MVT::i32), 3725 N->getOperand(3), 3726 CurDAG->getTargetConstant(AArch64::subo64, DL, MVT::i32)}); 3727 3728 CurDAG->SelectNodeTo(N, AArch64::MSRR, MVT::Other, 3729 CurDAG->getTargetConstant(Imm, DL, MVT::i32), 3730 SDValue(Pair, 0), InChain); 3731 } 3732 3733 return true; 3734 } 3735 3736 /// We've got special pseudo-instructions for these 3737 bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) { 3738 unsigned Opcode; 3739 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT(); 3740 3741 // Leave IR for LSE if subtarget supports it. 3742 if (Subtarget->hasLSE()) return false; 3743 3744 if (MemTy == MVT::i8) 3745 Opcode = AArch64::CMP_SWAP_8; 3746 else if (MemTy == MVT::i16) 3747 Opcode = AArch64::CMP_SWAP_16; 3748 else if (MemTy == MVT::i32) 3749 Opcode = AArch64::CMP_SWAP_32; 3750 else if (MemTy == MVT::i64) 3751 Opcode = AArch64::CMP_SWAP_64; 3752 else 3753 llvm_unreachable("Unknown AtomicCmpSwap type"); 3754 3755 MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32; 3756 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3), 3757 N->getOperand(0)}; 3758 SDNode *CmpSwap = CurDAG->getMachineNode( 3759 Opcode, SDLoc(N), 3760 CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops); 3761 3762 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); 3763 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp}); 3764 3765 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0)); 3766 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2)); 3767 CurDAG->RemoveDeadNode(N); 3768 3769 return true; 3770 } 3771 3772 bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, 3773 SDValue &Shift) { 3774 if (!isa<ConstantSDNode>(N)) 3775 return false; 3776 3777 SDLoc DL(N); 3778 uint64_t Val = cast<ConstantSDNode>(N) 3779 ->getAPIntValue() 3780 .trunc(VT.getFixedSizeInBits()) 3781 .getZExtValue(); 3782 3783 switch (VT.SimpleTy) { 3784 case MVT::i8: 3785 // All immediates are supported. 3786 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32); 3787 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32); 3788 return true; 3789 case MVT::i16: 3790 case MVT::i32: 3791 case MVT::i64: 3792 // Support 8bit unsigned immediates. 3793 if (Val <= 255) { 3794 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32); 3795 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32); 3796 return true; 3797 } 3798 // Support 16bit unsigned immediates that are a multiple of 256. 3799 if (Val <= 65280 && Val % 256 == 0) { 3800 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32); 3801 Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32); 3802 return true; 3803 } 3804 break; 3805 default: 3806 break; 3807 } 3808 3809 return false; 3810 } 3811 3812 bool AArch64DAGToDAGISel::SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, 3813 SDValue &Shift) { 3814 if (!isa<ConstantSDNode>(N)) 3815 return false; 3816 3817 SDLoc DL(N); 3818 int64_t Val = cast<ConstantSDNode>(N) 3819 ->getAPIntValue() 3820 .trunc(VT.getFixedSizeInBits()) 3821 .getSExtValue(); 3822 3823 switch (VT.SimpleTy) { 3824 case MVT::i8: 3825 // All immediates are supported. 3826 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32); 3827 Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32); 3828 return true; 3829 case MVT::i16: 3830 case MVT::i32: 3831 case MVT::i64: 3832 // Support 8bit signed immediates. 3833 if (Val >= -128 && Val <= 127) { 3834 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32); 3835 Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32); 3836 return true; 3837 } 3838 // Support 16bit signed immediates that are a multiple of 256. 3839 if (Val >= -32768 && Val <= 32512 && Val % 256 == 0) { 3840 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32); 3841 Imm = CurDAG->getTargetConstant((Val >> 8) & 0xFF, DL, MVT::i32); 3842 return true; 3843 } 3844 break; 3845 default: 3846 break; 3847 } 3848 3849 return false; 3850 } 3851 3852 bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) { 3853 if (auto CNode = dyn_cast<ConstantSDNode>(N)) { 3854 int64_t ImmVal = CNode->getSExtValue(); 3855 SDLoc DL(N); 3856 if (ImmVal >= -128 && ImmVal < 128) { 3857 Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32); 3858 return true; 3859 } 3860 } 3861 return false; 3862 } 3863 3864 bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) { 3865 if (auto CNode = dyn_cast<ConstantSDNode>(N)) { 3866 uint64_t ImmVal = CNode->getZExtValue(); 3867 3868 switch (VT.SimpleTy) { 3869 case MVT::i8: 3870 ImmVal &= 0xFF; 3871 break; 3872 case MVT::i16: 3873 ImmVal &= 0xFFFF; 3874 break; 3875 case MVT::i32: 3876 ImmVal &= 0xFFFFFFFF; 3877 break; 3878 case MVT::i64: 3879 break; 3880 default: 3881 llvm_unreachable("Unexpected type"); 3882 } 3883 3884 if (ImmVal < 256) { 3885 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32); 3886 return true; 3887 } 3888 } 3889 return false; 3890 } 3891 3892 bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, 3893 bool Invert) { 3894 if (auto CNode = dyn_cast<ConstantSDNode>(N)) { 3895 uint64_t ImmVal = CNode->getZExtValue(); 3896 SDLoc DL(N); 3897 3898 if (Invert) 3899 ImmVal = ~ImmVal; 3900 3901 // Shift mask depending on type size. 3902 switch (VT.SimpleTy) { 3903 case MVT::i8: 3904 ImmVal &= 0xFF; 3905 ImmVal |= ImmVal << 8; 3906 ImmVal |= ImmVal << 16; 3907 ImmVal |= ImmVal << 32; 3908 break; 3909 case MVT::i16: 3910 ImmVal &= 0xFFFF; 3911 ImmVal |= ImmVal << 16; 3912 ImmVal |= ImmVal << 32; 3913 break; 3914 case MVT::i32: 3915 ImmVal &= 0xFFFFFFFF; 3916 ImmVal |= ImmVal << 32; 3917 break; 3918 case MVT::i64: 3919 break; 3920 default: 3921 llvm_unreachable("Unexpected type"); 3922 } 3923 3924 uint64_t encoding; 3925 if (AArch64_AM::processLogicalImmediate(ImmVal, 64, encoding)) { 3926 Imm = CurDAG->getTargetConstant(encoding, DL, MVT::i64); 3927 return true; 3928 } 3929 } 3930 return false; 3931 } 3932 3933 // SVE shift intrinsics allow shift amounts larger than the element's bitwidth. 3934 // Rather than attempt to normalise everything we can sometimes saturate the 3935 // shift amount during selection. This function also allows for consistent 3936 // isel patterns by ensuring the resulting "Imm" node is of the i32 type 3937 // required by the instructions. 3938 bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low, 3939 uint64_t High, bool AllowSaturation, 3940 SDValue &Imm) { 3941 if (auto *CN = dyn_cast<ConstantSDNode>(N)) { 3942 uint64_t ImmVal = CN->getZExtValue(); 3943 3944 // Reject shift amounts that are too small. 3945 if (ImmVal < Low) 3946 return false; 3947 3948 // Reject or saturate shift amounts that are too big. 3949 if (ImmVal > High) { 3950 if (!AllowSaturation) 3951 return false; 3952 ImmVal = High; 3953 } 3954 3955 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32); 3956 return true; 3957 } 3958 3959 return false; 3960 } 3961 3962 bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) { 3963 // tagp(FrameIndex, IRGstack, tag_offset): 3964 // since the offset between FrameIndex and IRGstack is a compile-time 3965 // constant, this can be lowered to a single ADDG instruction. 3966 if (!(isa<FrameIndexSDNode>(N->getOperand(1)))) { 3967 return false; 3968 } 3969 3970 SDValue IRG_SP = N->getOperand(2); 3971 if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN || 3972 cast<ConstantSDNode>(IRG_SP->getOperand(1))->getZExtValue() != 3973 Intrinsic::aarch64_irg_sp) { 3974 return false; 3975 } 3976 3977 const TargetLowering *TLI = getTargetLowering(); 3978 SDLoc DL(N); 3979 int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex(); 3980 SDValue FiOp = CurDAG->getTargetFrameIndex( 3981 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 3982 int TagOffset = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(); 3983 3984 SDNode *Out = CurDAG->getMachineNode( 3985 AArch64::TAGPstack, DL, MVT::i64, 3986 {FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2), 3987 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)}); 3988 ReplaceNode(N, Out); 3989 return true; 3990 } 3991 3992 void AArch64DAGToDAGISel::SelectTagP(SDNode *N) { 3993 assert(isa<ConstantSDNode>(N->getOperand(3)) && 3994 "llvm.aarch64.tagp third argument must be an immediate"); 3995 if (trySelectStackSlotTagP(N)) 3996 return; 3997 // FIXME: above applies in any case when offset between Op1 and Op2 is a 3998 // compile-time constant, not just for stack allocations. 3999 4000 // General case for unrelated pointers in Op1 and Op2. 4001 SDLoc DL(N); 4002 int TagOffset = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(); 4003 SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64, 4004 {N->getOperand(1), N->getOperand(2)}); 4005 SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64, 4006 {SDValue(N1, 0), N->getOperand(2)}); 4007 SDNode *N3 = CurDAG->getMachineNode( 4008 AArch64::ADDG, DL, MVT::i64, 4009 {SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64), 4010 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)}); 4011 ReplaceNode(N, N3); 4012 } 4013 4014 // NOTE: We cannot use EXTRACT_SUBREG in all cases because the fixed length 4015 // vector types larger than NEON don't have a matching SubRegIndex. 4016 static SDNode *extractSubReg(SelectionDAG *DAG, EVT VT, SDValue V) { 4017 assert(V.getValueType().isScalableVector() && 4018 V.getValueType().getSizeInBits().getKnownMinValue() == 4019 AArch64::SVEBitsPerBlock && 4020 "Expected to extract from a packed scalable vector!"); 4021 assert(VT.isFixedLengthVector() && 4022 "Expected to extract a fixed length vector!"); 4023 4024 SDLoc DL(V); 4025 switch (VT.getSizeInBits()) { 4026 case 64: { 4027 auto SubReg = DAG->getTargetConstant(AArch64::dsub, DL, MVT::i32); 4028 return DAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT, V, SubReg); 4029 } 4030 case 128: { 4031 auto SubReg = DAG->getTargetConstant(AArch64::zsub, DL, MVT::i32); 4032 return DAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT, V, SubReg); 4033 } 4034 default: { 4035 auto RC = DAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64); 4036 return DAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC); 4037 } 4038 } 4039 } 4040 4041 // NOTE: We cannot use INSERT_SUBREG in all cases because the fixed length 4042 // vector types larger than NEON don't have a matching SubRegIndex. 4043 static SDNode *insertSubReg(SelectionDAG *DAG, EVT VT, SDValue V) { 4044 assert(VT.isScalableVector() && 4045 VT.getSizeInBits().getKnownMinValue() == AArch64::SVEBitsPerBlock && 4046 "Expected to insert into a packed scalable vector!"); 4047 assert(V.getValueType().isFixedLengthVector() && 4048 "Expected to insert a fixed length vector!"); 4049 4050 SDLoc DL(V); 4051 switch (V.getValueType().getSizeInBits()) { 4052 case 64: { 4053 auto SubReg = DAG->getTargetConstant(AArch64::dsub, DL, MVT::i32); 4054 auto Container = DAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT); 4055 return DAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, VT, 4056 SDValue(Container, 0), V, SubReg); 4057 } 4058 case 128: { 4059 auto SubReg = DAG->getTargetConstant(AArch64::zsub, DL, MVT::i32); 4060 auto Container = DAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT); 4061 return DAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, VT, 4062 SDValue(Container, 0), V, SubReg); 4063 } 4064 default: { 4065 auto RC = DAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64); 4066 return DAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC); 4067 } 4068 } 4069 } 4070 4071 void AArch64DAGToDAGISel::Select(SDNode *Node) { 4072 // If we have a custom node, we already have selected! 4073 if (Node->isMachineOpcode()) { 4074 LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n"); 4075 Node->setNodeId(-1); 4076 return; 4077 } 4078 4079 // Few custom selection stuff. 4080 EVT VT = Node->getValueType(0); 4081 4082 switch (Node->getOpcode()) { 4083 default: 4084 break; 4085 4086 case ISD::ATOMIC_CMP_SWAP: 4087 if (SelectCMP_SWAP(Node)) 4088 return; 4089 break; 4090 4091 case ISD::READ_REGISTER: 4092 case AArch64ISD::MRRS: 4093 if (tryReadRegister(Node)) 4094 return; 4095 break; 4096 4097 case ISD::WRITE_REGISTER: 4098 case AArch64ISD::MSRR: 4099 if (tryWriteRegister(Node)) 4100 return; 4101 break; 4102 4103 case ISD::ADD: 4104 if (tryMLAV64LaneV128(Node)) 4105 return; 4106 break; 4107 4108 case ISD::LOAD: { 4109 // Try to select as an indexed load. Fall through to normal processing 4110 // if we can't. 4111 if (tryIndexedLoad(Node)) 4112 return; 4113 break; 4114 } 4115 4116 case ISD::SRL: 4117 case ISD::AND: 4118 case ISD::SRA: 4119 case ISD::SIGN_EXTEND_INREG: 4120 if (tryBitfieldExtractOp(Node)) 4121 return; 4122 if (tryBitfieldInsertInZeroOp(Node)) 4123 return; 4124 [[fallthrough]]; 4125 case ISD::ROTR: 4126 case ISD::SHL: 4127 if (tryShiftAmountMod(Node)) 4128 return; 4129 break; 4130 4131 case ISD::SIGN_EXTEND: 4132 if (tryBitfieldExtractOpFromSExt(Node)) 4133 return; 4134 break; 4135 4136 case ISD::FP_EXTEND: 4137 if (tryHighFPExt(Node)) 4138 return; 4139 break; 4140 4141 case ISD::OR: 4142 if (tryBitfieldInsertOp(Node)) 4143 return; 4144 break; 4145 4146 case ISD::EXTRACT_SUBVECTOR: { 4147 // Bail when not a "cast" like extract_subvector. 4148 if (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue() != 0) 4149 break; 4150 4151 // Bail when normal isel can do the job. 4152 EVT InVT = Node->getOperand(0).getValueType(); 4153 if (VT.isScalableVector() || InVT.isFixedLengthVector()) 4154 break; 4155 4156 // NOTE: We can only get here when doing fixed length SVE code generation. 4157 // We do manual selection because the types involved are not linked to real 4158 // registers (despite being legal) and must be coerced into SVE registers. 4159 // 4160 // NOTE: If the above changes, be aware that selection will still not work 4161 // because the td definition of extract_vector does not support extracting 4162 // a fixed length vector from a scalable vector. 4163 4164 ReplaceNode(Node, extractSubReg(CurDAG, VT, Node->getOperand(0))); 4165 return; 4166 } 4167 4168 case ISD::INSERT_SUBVECTOR: { 4169 // Bail when not a "cast" like insert_subvector. 4170 if (cast<ConstantSDNode>(Node->getOperand(2))->getZExtValue() != 0) 4171 break; 4172 if (!Node->getOperand(0).isUndef()) 4173 break; 4174 4175 // Bail when normal isel should do the job. 4176 EVT InVT = Node->getOperand(1).getValueType(); 4177 if (VT.isFixedLengthVector() || InVT.isScalableVector()) 4178 break; 4179 4180 // NOTE: We can only get here when doing fixed length SVE code generation. 4181 // We do manual selection because the types involved are not linked to real 4182 // registers (despite being legal) and must be coerced into SVE registers. 4183 // 4184 // NOTE: If the above changes, be aware that selection will still not work 4185 // because the td definition of insert_vector does not support inserting a 4186 // fixed length vector into a scalable vector. 4187 4188 ReplaceNode(Node, insertSubReg(CurDAG, VT, Node->getOperand(1))); 4189 return; 4190 } 4191 4192 case ISD::Constant: { 4193 // Materialize zero constants as copies from WZR/XZR. This allows 4194 // the coalescer to propagate these into other instructions. 4195 ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node); 4196 if (ConstNode->isZero()) { 4197 if (VT == MVT::i32) { 4198 SDValue New = CurDAG->getCopyFromReg( 4199 CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32); 4200 ReplaceNode(Node, New.getNode()); 4201 return; 4202 } else if (VT == MVT::i64) { 4203 SDValue New = CurDAG->getCopyFromReg( 4204 CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64); 4205 ReplaceNode(Node, New.getNode()); 4206 return; 4207 } 4208 } 4209 break; 4210 } 4211 4212 case ISD::FrameIndex: { 4213 // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm. 4214 int FI = cast<FrameIndexSDNode>(Node)->getIndex(); 4215 unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0); 4216 const TargetLowering *TLI = getTargetLowering(); 4217 SDValue TFI = CurDAG->getTargetFrameIndex( 4218 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 4219 SDLoc DL(Node); 4220 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32), 4221 CurDAG->getTargetConstant(Shifter, DL, MVT::i32) }; 4222 CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops); 4223 return; 4224 } 4225 case ISD::INTRINSIC_W_CHAIN: { 4226 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); 4227 switch (IntNo) { 4228 default: 4229 break; 4230 case Intrinsic::aarch64_ldaxp: 4231 case Intrinsic::aarch64_ldxp: { 4232 unsigned Op = 4233 IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX; 4234 SDValue MemAddr = Node->getOperand(2); 4235 SDLoc DL(Node); 4236 SDValue Chain = Node->getOperand(0); 4237 4238 SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64, 4239 MVT::Other, MemAddr, Chain); 4240 4241 // Transfer memoperands. 4242 MachineMemOperand *MemOp = 4243 cast<MemIntrinsicSDNode>(Node)->getMemOperand(); 4244 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp}); 4245 ReplaceNode(Node, Ld); 4246 return; 4247 } 4248 case Intrinsic::aarch64_stlxp: 4249 case Intrinsic::aarch64_stxp: { 4250 unsigned Op = 4251 IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX; 4252 SDLoc DL(Node); 4253 SDValue Chain = Node->getOperand(0); 4254 SDValue ValLo = Node->getOperand(2); 4255 SDValue ValHi = Node->getOperand(3); 4256 SDValue MemAddr = Node->getOperand(4); 4257 4258 // Place arguments in the right order. 4259 SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain}; 4260 4261 SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops); 4262 // Transfer memoperands. 4263 MachineMemOperand *MemOp = 4264 cast<MemIntrinsicSDNode>(Node)->getMemOperand(); 4265 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 4266 4267 ReplaceNode(Node, St); 4268 return; 4269 } 4270 case Intrinsic::aarch64_neon_ld1x2: 4271 if (VT == MVT::v8i8) { 4272 SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0); 4273 return; 4274 } else if (VT == MVT::v16i8) { 4275 SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0); 4276 return; 4277 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4278 SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0); 4279 return; 4280 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4281 SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0); 4282 return; 4283 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4284 SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0); 4285 return; 4286 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4287 SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0); 4288 return; 4289 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4290 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0); 4291 return; 4292 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4293 SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0); 4294 return; 4295 } 4296 break; 4297 case Intrinsic::aarch64_neon_ld1x3: 4298 if (VT == MVT::v8i8) { 4299 SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0); 4300 return; 4301 } else if (VT == MVT::v16i8) { 4302 SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0); 4303 return; 4304 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4305 SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0); 4306 return; 4307 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4308 SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0); 4309 return; 4310 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4311 SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0); 4312 return; 4313 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4314 SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0); 4315 return; 4316 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4317 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0); 4318 return; 4319 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4320 SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0); 4321 return; 4322 } 4323 break; 4324 case Intrinsic::aarch64_neon_ld1x4: 4325 if (VT == MVT::v8i8) { 4326 SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0); 4327 return; 4328 } else if (VT == MVT::v16i8) { 4329 SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0); 4330 return; 4331 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4332 SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0); 4333 return; 4334 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4335 SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0); 4336 return; 4337 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4338 SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0); 4339 return; 4340 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4341 SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0); 4342 return; 4343 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4344 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0); 4345 return; 4346 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4347 SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0); 4348 return; 4349 } 4350 break; 4351 case Intrinsic::aarch64_neon_ld2: 4352 if (VT == MVT::v8i8) { 4353 SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0); 4354 return; 4355 } else if (VT == MVT::v16i8) { 4356 SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0); 4357 return; 4358 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4359 SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0); 4360 return; 4361 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4362 SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0); 4363 return; 4364 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4365 SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0); 4366 return; 4367 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4368 SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0); 4369 return; 4370 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4371 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0); 4372 return; 4373 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4374 SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0); 4375 return; 4376 } 4377 break; 4378 case Intrinsic::aarch64_neon_ld3: 4379 if (VT == MVT::v8i8) { 4380 SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0); 4381 return; 4382 } else if (VT == MVT::v16i8) { 4383 SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0); 4384 return; 4385 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4386 SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0); 4387 return; 4388 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4389 SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0); 4390 return; 4391 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4392 SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0); 4393 return; 4394 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4395 SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0); 4396 return; 4397 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4398 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0); 4399 return; 4400 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4401 SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0); 4402 return; 4403 } 4404 break; 4405 case Intrinsic::aarch64_neon_ld4: 4406 if (VT == MVT::v8i8) { 4407 SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0); 4408 return; 4409 } else if (VT == MVT::v16i8) { 4410 SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0); 4411 return; 4412 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4413 SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0); 4414 return; 4415 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4416 SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0); 4417 return; 4418 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4419 SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0); 4420 return; 4421 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4422 SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0); 4423 return; 4424 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4425 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0); 4426 return; 4427 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4428 SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0); 4429 return; 4430 } 4431 break; 4432 case Intrinsic::aarch64_neon_ld2r: 4433 if (VT == MVT::v8i8) { 4434 SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0); 4435 return; 4436 } else if (VT == MVT::v16i8) { 4437 SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0); 4438 return; 4439 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4440 SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0); 4441 return; 4442 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4443 SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0); 4444 return; 4445 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4446 SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0); 4447 return; 4448 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4449 SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0); 4450 return; 4451 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4452 SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0); 4453 return; 4454 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4455 SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0); 4456 return; 4457 } 4458 break; 4459 case Intrinsic::aarch64_neon_ld3r: 4460 if (VT == MVT::v8i8) { 4461 SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0); 4462 return; 4463 } else if (VT == MVT::v16i8) { 4464 SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0); 4465 return; 4466 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4467 SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0); 4468 return; 4469 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4470 SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0); 4471 return; 4472 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4473 SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0); 4474 return; 4475 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4476 SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0); 4477 return; 4478 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4479 SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0); 4480 return; 4481 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4482 SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0); 4483 return; 4484 } 4485 break; 4486 case Intrinsic::aarch64_neon_ld4r: 4487 if (VT == MVT::v8i8) { 4488 SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0); 4489 return; 4490 } else if (VT == MVT::v16i8) { 4491 SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0); 4492 return; 4493 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4494 SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0); 4495 return; 4496 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4497 SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0); 4498 return; 4499 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4500 SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0); 4501 return; 4502 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4503 SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0); 4504 return; 4505 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4506 SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0); 4507 return; 4508 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4509 SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0); 4510 return; 4511 } 4512 break; 4513 case Intrinsic::aarch64_neon_ld2lane: 4514 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4515 SelectLoadLane(Node, 2, AArch64::LD2i8); 4516 return; 4517 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4518 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4519 SelectLoadLane(Node, 2, AArch64::LD2i16); 4520 return; 4521 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4522 VT == MVT::v2f32) { 4523 SelectLoadLane(Node, 2, AArch64::LD2i32); 4524 return; 4525 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4526 VT == MVT::v1f64) { 4527 SelectLoadLane(Node, 2, AArch64::LD2i64); 4528 return; 4529 } 4530 break; 4531 case Intrinsic::aarch64_neon_ld3lane: 4532 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4533 SelectLoadLane(Node, 3, AArch64::LD3i8); 4534 return; 4535 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4536 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4537 SelectLoadLane(Node, 3, AArch64::LD3i16); 4538 return; 4539 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4540 VT == MVT::v2f32) { 4541 SelectLoadLane(Node, 3, AArch64::LD3i32); 4542 return; 4543 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4544 VT == MVT::v1f64) { 4545 SelectLoadLane(Node, 3, AArch64::LD3i64); 4546 return; 4547 } 4548 break; 4549 case Intrinsic::aarch64_neon_ld4lane: 4550 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4551 SelectLoadLane(Node, 4, AArch64::LD4i8); 4552 return; 4553 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4554 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4555 SelectLoadLane(Node, 4, AArch64::LD4i16); 4556 return; 4557 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4558 VT == MVT::v2f32) { 4559 SelectLoadLane(Node, 4, AArch64::LD4i32); 4560 return; 4561 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4562 VT == MVT::v1f64) { 4563 SelectLoadLane(Node, 4, AArch64::LD4i64); 4564 return; 4565 } 4566 break; 4567 case Intrinsic::aarch64_ld64b: 4568 SelectLoad(Node, 8, AArch64::LD64B, AArch64::x8sub_0); 4569 return; 4570 case Intrinsic::aarch64_sve_ld2_sret: { 4571 if (VT == MVT::nxv16i8) { 4572 SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B, 4573 true); 4574 return; 4575 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 4576 VT == MVT::nxv8bf16) { 4577 SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H, 4578 true); 4579 return; 4580 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 4581 SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W, 4582 true); 4583 return; 4584 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 4585 SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D, 4586 true); 4587 return; 4588 } 4589 break; 4590 } 4591 case Intrinsic::aarch64_sve_ld3_sret: { 4592 if (VT == MVT::nxv16i8) { 4593 SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B, 4594 true); 4595 return; 4596 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 4597 VT == MVT::nxv8bf16) { 4598 SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H, 4599 true); 4600 return; 4601 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 4602 SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W, 4603 true); 4604 return; 4605 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 4606 SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D, 4607 true); 4608 return; 4609 } 4610 break; 4611 } 4612 case Intrinsic::aarch64_sve_ld4_sret: { 4613 if (VT == MVT::nxv16i8) { 4614 SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B, 4615 true); 4616 return; 4617 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 4618 VT == MVT::nxv8bf16) { 4619 SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H, 4620 true); 4621 return; 4622 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 4623 SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W, 4624 true); 4625 return; 4626 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 4627 SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D, 4628 true); 4629 return; 4630 } 4631 break; 4632 } 4633 case Intrinsic::swift_async_context_addr: { 4634 SDLoc DL(Node); 4635 SDValue Chain = Node->getOperand(0); 4636 SDValue CopyFP = CurDAG->getCopyFromReg(Chain, DL, AArch64::FP, MVT::i64); 4637 SDValue Res = SDValue( 4638 CurDAG->getMachineNode(AArch64::SUBXri, DL, MVT::i64, CopyFP, 4639 CurDAG->getTargetConstant(8, DL, MVT::i32), 4640 CurDAG->getTargetConstant(0, DL, MVT::i32)), 4641 0); 4642 ReplaceUses(SDValue(Node, 0), Res); 4643 ReplaceUses(SDValue(Node, 1), CopyFP.getValue(1)); 4644 CurDAG->RemoveDeadNode(Node); 4645 4646 auto &MF = CurDAG->getMachineFunction(); 4647 MF.getFrameInfo().setFrameAddressIsTaken(true); 4648 MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true); 4649 return; 4650 } 4651 } 4652 } break; 4653 case ISD::INTRINSIC_WO_CHAIN: { 4654 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue(); 4655 switch (IntNo) { 4656 default: 4657 break; 4658 case Intrinsic::aarch64_tagp: 4659 SelectTagP(Node); 4660 return; 4661 case Intrinsic::aarch64_neon_tbl2: 4662 SelectTable(Node, 2, 4663 VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two, 4664 false); 4665 return; 4666 case Intrinsic::aarch64_neon_tbl3: 4667 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three 4668 : AArch64::TBLv16i8Three, 4669 false); 4670 return; 4671 case Intrinsic::aarch64_neon_tbl4: 4672 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four 4673 : AArch64::TBLv16i8Four, 4674 false); 4675 return; 4676 case Intrinsic::aarch64_neon_tbx2: 4677 SelectTable(Node, 2, 4678 VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two, 4679 true); 4680 return; 4681 case Intrinsic::aarch64_neon_tbx3: 4682 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three 4683 : AArch64::TBXv16i8Three, 4684 true); 4685 return; 4686 case Intrinsic::aarch64_neon_tbx4: 4687 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four 4688 : AArch64::TBXv16i8Four, 4689 true); 4690 return; 4691 case Intrinsic::aarch64_neon_smull: 4692 case Intrinsic::aarch64_neon_umull: 4693 if (tryMULLV64LaneV128(IntNo, Node)) 4694 return; 4695 break; 4696 case Intrinsic::aarch64_sve_whilege_x2: 4697 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>( 4698 Node->getValueType(0), 4699 {AArch64::WHILEGE_2PXX_B, AArch64::WHILEGE_2PXX_H, 4700 AArch64::WHILEGE_2PXX_S, AArch64::WHILEGE_2PXX_D})) 4701 SelectWhilePair(Node, Op); 4702 return; 4703 case Intrinsic::aarch64_sve_whilegt_x2: 4704 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>( 4705 Node->getValueType(0), 4706 {AArch64::WHILEGT_2PXX_B, AArch64::WHILEGT_2PXX_H, 4707 AArch64::WHILEGT_2PXX_S, AArch64::WHILEGT_2PXX_D})) 4708 SelectWhilePair(Node, Op); 4709 return; 4710 case Intrinsic::aarch64_sve_whilehi_x2: 4711 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>( 4712 Node->getValueType(0), 4713 {AArch64::WHILEHI_2PXX_B, AArch64::WHILEHI_2PXX_H, 4714 AArch64::WHILEHI_2PXX_S, AArch64::WHILEHI_2PXX_D})) 4715 SelectWhilePair(Node, Op); 4716 return; 4717 case Intrinsic::aarch64_sve_whilehs_x2: 4718 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>( 4719 Node->getValueType(0), 4720 {AArch64::WHILEHS_2PXX_B, AArch64::WHILEHS_2PXX_H, 4721 AArch64::WHILEHS_2PXX_S, AArch64::WHILEHS_2PXX_D})) 4722 SelectWhilePair(Node, Op); 4723 return; 4724 case Intrinsic::aarch64_sve_whilele_x2: 4725 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>( 4726 Node->getValueType(0), 4727 {AArch64::WHILELE_2PXX_B, AArch64::WHILELE_2PXX_H, 4728 AArch64::WHILELE_2PXX_S, AArch64::WHILELE_2PXX_D})) 4729 SelectWhilePair(Node, Op); 4730 return; 4731 case Intrinsic::aarch64_sve_whilelo_x2: 4732 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>( 4733 Node->getValueType(0), 4734 {AArch64::WHILELO_2PXX_B, AArch64::WHILELO_2PXX_H, 4735 AArch64::WHILELO_2PXX_S, AArch64::WHILELO_2PXX_D})) 4736 SelectWhilePair(Node, Op); 4737 return; 4738 case Intrinsic::aarch64_sve_whilels_x2: 4739 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>( 4740 Node->getValueType(0), 4741 {AArch64::WHILELS_2PXX_B, AArch64::WHILELS_2PXX_H, 4742 AArch64::WHILELS_2PXX_S, AArch64::WHILELS_2PXX_D})) 4743 SelectWhilePair(Node, Op); 4744 return; 4745 case Intrinsic::aarch64_sve_whilelt_x2: 4746 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>( 4747 Node->getValueType(0), 4748 {AArch64::WHILELT_2PXX_B, AArch64::WHILELT_2PXX_H, 4749 AArch64::WHILELT_2PXX_S, AArch64::WHILELT_2PXX_D})) 4750 SelectWhilePair(Node, Op); 4751 return; 4752 case Intrinsic::aarch64_sve_fcvts_x2: 4753 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZS_2Z2Z_StoS); 4754 return; 4755 case Intrinsic::aarch64_sve_scvtf_x2: 4756 SelectCVTIntrinsic(Node, 2, AArch64::SCVTF_2Z2Z_StoS); 4757 return; 4758 case Intrinsic::aarch64_sve_fcvtu_x2: 4759 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZU_2Z2Z_StoS); 4760 return; 4761 case Intrinsic::aarch64_sve_ucvtf_x2: 4762 SelectCVTIntrinsic(Node, 2, AArch64::UCVTF_2Z2Z_StoS); 4763 return; 4764 case Intrinsic::aarch64_sve_fcvts_x4: 4765 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZS_4Z4Z_StoS); 4766 return; 4767 case Intrinsic::aarch64_sve_scvtf_x4: 4768 SelectCVTIntrinsic(Node, 4, AArch64::SCVTF_4Z4Z_StoS); 4769 return; 4770 case Intrinsic::aarch64_sve_fcvtu_x4: 4771 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZU_4Z4Z_StoS); 4772 return; 4773 case Intrinsic::aarch64_sve_ucvtf_x4: 4774 SelectCVTIntrinsic(Node, 4, AArch64::UCVTF_4Z4Z_StoS); 4775 return; 4776 } 4777 break; 4778 } 4779 case ISD::INTRINSIC_VOID: { 4780 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); 4781 if (Node->getNumOperands() >= 3) 4782 VT = Node->getOperand(2)->getValueType(0); 4783 switch (IntNo) { 4784 default: 4785 break; 4786 case Intrinsic::aarch64_neon_st1x2: { 4787 if (VT == MVT::v8i8) { 4788 SelectStore(Node, 2, AArch64::ST1Twov8b); 4789 return; 4790 } else if (VT == MVT::v16i8) { 4791 SelectStore(Node, 2, AArch64::ST1Twov16b); 4792 return; 4793 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 4794 VT == MVT::v4bf16) { 4795 SelectStore(Node, 2, AArch64::ST1Twov4h); 4796 return; 4797 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 4798 VT == MVT::v8bf16) { 4799 SelectStore(Node, 2, AArch64::ST1Twov8h); 4800 return; 4801 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4802 SelectStore(Node, 2, AArch64::ST1Twov2s); 4803 return; 4804 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4805 SelectStore(Node, 2, AArch64::ST1Twov4s); 4806 return; 4807 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4808 SelectStore(Node, 2, AArch64::ST1Twov2d); 4809 return; 4810 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4811 SelectStore(Node, 2, AArch64::ST1Twov1d); 4812 return; 4813 } 4814 break; 4815 } 4816 case Intrinsic::aarch64_neon_st1x3: { 4817 if (VT == MVT::v8i8) { 4818 SelectStore(Node, 3, AArch64::ST1Threev8b); 4819 return; 4820 } else if (VT == MVT::v16i8) { 4821 SelectStore(Node, 3, AArch64::ST1Threev16b); 4822 return; 4823 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 4824 VT == MVT::v4bf16) { 4825 SelectStore(Node, 3, AArch64::ST1Threev4h); 4826 return; 4827 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 4828 VT == MVT::v8bf16) { 4829 SelectStore(Node, 3, AArch64::ST1Threev8h); 4830 return; 4831 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4832 SelectStore(Node, 3, AArch64::ST1Threev2s); 4833 return; 4834 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4835 SelectStore(Node, 3, AArch64::ST1Threev4s); 4836 return; 4837 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4838 SelectStore(Node, 3, AArch64::ST1Threev2d); 4839 return; 4840 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4841 SelectStore(Node, 3, AArch64::ST1Threev1d); 4842 return; 4843 } 4844 break; 4845 } 4846 case Intrinsic::aarch64_neon_st1x4: { 4847 if (VT == MVT::v8i8) { 4848 SelectStore(Node, 4, AArch64::ST1Fourv8b); 4849 return; 4850 } else if (VT == MVT::v16i8) { 4851 SelectStore(Node, 4, AArch64::ST1Fourv16b); 4852 return; 4853 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 4854 VT == MVT::v4bf16) { 4855 SelectStore(Node, 4, AArch64::ST1Fourv4h); 4856 return; 4857 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 4858 VT == MVT::v8bf16) { 4859 SelectStore(Node, 4, AArch64::ST1Fourv8h); 4860 return; 4861 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4862 SelectStore(Node, 4, AArch64::ST1Fourv2s); 4863 return; 4864 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4865 SelectStore(Node, 4, AArch64::ST1Fourv4s); 4866 return; 4867 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4868 SelectStore(Node, 4, AArch64::ST1Fourv2d); 4869 return; 4870 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4871 SelectStore(Node, 4, AArch64::ST1Fourv1d); 4872 return; 4873 } 4874 break; 4875 } 4876 case Intrinsic::aarch64_neon_st2: { 4877 if (VT == MVT::v8i8) { 4878 SelectStore(Node, 2, AArch64::ST2Twov8b); 4879 return; 4880 } else if (VT == MVT::v16i8) { 4881 SelectStore(Node, 2, AArch64::ST2Twov16b); 4882 return; 4883 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 4884 VT == MVT::v4bf16) { 4885 SelectStore(Node, 2, AArch64::ST2Twov4h); 4886 return; 4887 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 4888 VT == MVT::v8bf16) { 4889 SelectStore(Node, 2, AArch64::ST2Twov8h); 4890 return; 4891 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4892 SelectStore(Node, 2, AArch64::ST2Twov2s); 4893 return; 4894 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4895 SelectStore(Node, 2, AArch64::ST2Twov4s); 4896 return; 4897 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4898 SelectStore(Node, 2, AArch64::ST2Twov2d); 4899 return; 4900 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4901 SelectStore(Node, 2, AArch64::ST1Twov1d); 4902 return; 4903 } 4904 break; 4905 } 4906 case Intrinsic::aarch64_neon_st3: { 4907 if (VT == MVT::v8i8) { 4908 SelectStore(Node, 3, AArch64::ST3Threev8b); 4909 return; 4910 } else if (VT == MVT::v16i8) { 4911 SelectStore(Node, 3, AArch64::ST3Threev16b); 4912 return; 4913 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 4914 VT == MVT::v4bf16) { 4915 SelectStore(Node, 3, AArch64::ST3Threev4h); 4916 return; 4917 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 4918 VT == MVT::v8bf16) { 4919 SelectStore(Node, 3, AArch64::ST3Threev8h); 4920 return; 4921 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4922 SelectStore(Node, 3, AArch64::ST3Threev2s); 4923 return; 4924 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4925 SelectStore(Node, 3, AArch64::ST3Threev4s); 4926 return; 4927 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4928 SelectStore(Node, 3, AArch64::ST3Threev2d); 4929 return; 4930 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4931 SelectStore(Node, 3, AArch64::ST1Threev1d); 4932 return; 4933 } 4934 break; 4935 } 4936 case Intrinsic::aarch64_neon_st4: { 4937 if (VT == MVT::v8i8) { 4938 SelectStore(Node, 4, AArch64::ST4Fourv8b); 4939 return; 4940 } else if (VT == MVT::v16i8) { 4941 SelectStore(Node, 4, AArch64::ST4Fourv16b); 4942 return; 4943 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 4944 VT == MVT::v4bf16) { 4945 SelectStore(Node, 4, AArch64::ST4Fourv4h); 4946 return; 4947 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 4948 VT == MVT::v8bf16) { 4949 SelectStore(Node, 4, AArch64::ST4Fourv8h); 4950 return; 4951 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4952 SelectStore(Node, 4, AArch64::ST4Fourv2s); 4953 return; 4954 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4955 SelectStore(Node, 4, AArch64::ST4Fourv4s); 4956 return; 4957 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4958 SelectStore(Node, 4, AArch64::ST4Fourv2d); 4959 return; 4960 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4961 SelectStore(Node, 4, AArch64::ST1Fourv1d); 4962 return; 4963 } 4964 break; 4965 } 4966 case Intrinsic::aarch64_neon_st2lane: { 4967 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4968 SelectStoreLane(Node, 2, AArch64::ST2i8); 4969 return; 4970 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4971 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4972 SelectStoreLane(Node, 2, AArch64::ST2i16); 4973 return; 4974 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4975 VT == MVT::v2f32) { 4976 SelectStoreLane(Node, 2, AArch64::ST2i32); 4977 return; 4978 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4979 VT == MVT::v1f64) { 4980 SelectStoreLane(Node, 2, AArch64::ST2i64); 4981 return; 4982 } 4983 break; 4984 } 4985 case Intrinsic::aarch64_neon_st3lane: { 4986 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4987 SelectStoreLane(Node, 3, AArch64::ST3i8); 4988 return; 4989 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4990 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4991 SelectStoreLane(Node, 3, AArch64::ST3i16); 4992 return; 4993 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4994 VT == MVT::v2f32) { 4995 SelectStoreLane(Node, 3, AArch64::ST3i32); 4996 return; 4997 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4998 VT == MVT::v1f64) { 4999 SelectStoreLane(Node, 3, AArch64::ST3i64); 5000 return; 5001 } 5002 break; 5003 } 5004 case Intrinsic::aarch64_neon_st4lane: { 5005 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 5006 SelectStoreLane(Node, 4, AArch64::ST4i8); 5007 return; 5008 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 5009 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 5010 SelectStoreLane(Node, 4, AArch64::ST4i16); 5011 return; 5012 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 5013 VT == MVT::v2f32) { 5014 SelectStoreLane(Node, 4, AArch64::ST4i32); 5015 return; 5016 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 5017 VT == MVT::v1f64) { 5018 SelectStoreLane(Node, 4, AArch64::ST4i64); 5019 return; 5020 } 5021 break; 5022 } 5023 case Intrinsic::aarch64_sve_st2: { 5024 if (VT == MVT::nxv16i8) { 5025 SelectPredicatedStore(Node, 2, 0, AArch64::ST2B, AArch64::ST2B_IMM); 5026 return; 5027 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 5028 VT == MVT::nxv8bf16) { 5029 SelectPredicatedStore(Node, 2, 1, AArch64::ST2H, AArch64::ST2H_IMM); 5030 return; 5031 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 5032 SelectPredicatedStore(Node, 2, 2, AArch64::ST2W, AArch64::ST2W_IMM); 5033 return; 5034 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 5035 SelectPredicatedStore(Node, 2, 3, AArch64::ST2D, AArch64::ST2D_IMM); 5036 return; 5037 } 5038 break; 5039 } 5040 case Intrinsic::aarch64_sve_st3: { 5041 if (VT == MVT::nxv16i8) { 5042 SelectPredicatedStore(Node, 3, 0, AArch64::ST3B, AArch64::ST3B_IMM); 5043 return; 5044 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 5045 VT == MVT::nxv8bf16) { 5046 SelectPredicatedStore(Node, 3, 1, AArch64::ST3H, AArch64::ST3H_IMM); 5047 return; 5048 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 5049 SelectPredicatedStore(Node, 3, 2, AArch64::ST3W, AArch64::ST3W_IMM); 5050 return; 5051 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 5052 SelectPredicatedStore(Node, 3, 3, AArch64::ST3D, AArch64::ST3D_IMM); 5053 return; 5054 } 5055 break; 5056 } 5057 case Intrinsic::aarch64_sve_st4: { 5058 if (VT == MVT::nxv16i8) { 5059 SelectPredicatedStore(Node, 4, 0, AArch64::ST4B, AArch64::ST4B_IMM); 5060 return; 5061 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 5062 VT == MVT::nxv8bf16) { 5063 SelectPredicatedStore(Node, 4, 1, AArch64::ST4H, AArch64::ST4H_IMM); 5064 return; 5065 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 5066 SelectPredicatedStore(Node, 4, 2, AArch64::ST4W, AArch64::ST4W_IMM); 5067 return; 5068 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 5069 SelectPredicatedStore(Node, 4, 3, AArch64::ST4D, AArch64::ST4D_IMM); 5070 return; 5071 } 5072 break; 5073 } 5074 } 5075 break; 5076 } 5077 case AArch64ISD::LD2post: { 5078 if (VT == MVT::v8i8) { 5079 SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0); 5080 return; 5081 } else if (VT == MVT::v16i8) { 5082 SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0); 5083 return; 5084 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 5085 SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0); 5086 return; 5087 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 5088 SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0); 5089 return; 5090 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 5091 SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0); 5092 return; 5093 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 5094 SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0); 5095 return; 5096 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 5097 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0); 5098 return; 5099 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 5100 SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0); 5101 return; 5102 } 5103 break; 5104 } 5105 case AArch64ISD::LD3post: { 5106 if (VT == MVT::v8i8) { 5107 SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0); 5108 return; 5109 } else if (VT == MVT::v16i8) { 5110 SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0); 5111 return; 5112 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 5113 SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0); 5114 return; 5115 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 5116 SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0); 5117 return; 5118 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 5119 SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0); 5120 return; 5121 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 5122 SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0); 5123 return; 5124 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 5125 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0); 5126 return; 5127 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 5128 SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0); 5129 return; 5130 } 5131 break; 5132 } 5133 case AArch64ISD::LD4post: { 5134 if (VT == MVT::v8i8) { 5135 SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0); 5136 return; 5137 } else if (VT == MVT::v16i8) { 5138 SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0); 5139 return; 5140 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 5141 SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0); 5142 return; 5143 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 5144 SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0); 5145 return; 5146 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 5147 SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0); 5148 return; 5149 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 5150 SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0); 5151 return; 5152 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 5153 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0); 5154 return; 5155 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 5156 SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0); 5157 return; 5158 } 5159 break; 5160 } 5161 case AArch64ISD::LD1x2post: { 5162 if (VT == MVT::v8i8) { 5163 SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0); 5164 return; 5165 } else if (VT == MVT::v16i8) { 5166 SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0); 5167 return; 5168 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 5169 SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0); 5170 return; 5171 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 5172 SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0); 5173 return; 5174 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 5175 SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0); 5176 return; 5177 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 5178 SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0); 5179 return; 5180 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 5181 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0); 5182 return; 5183 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 5184 SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0); 5185 return; 5186 } 5187 break; 5188 } 5189 case AArch64ISD::LD1x3post: { 5190 if (VT == MVT::v8i8) { 5191 SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0); 5192 return; 5193 } else if (VT == MVT::v16i8) { 5194 SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0); 5195 return; 5196 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 5197 SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0); 5198 return; 5199 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 5200 SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0); 5201 return; 5202 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 5203 SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0); 5204 return; 5205 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 5206 SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0); 5207 return; 5208 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 5209 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0); 5210 return; 5211 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 5212 SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0); 5213 return; 5214 } 5215 break; 5216 } 5217 case AArch64ISD::LD1x4post: { 5218 if (VT == MVT::v8i8) { 5219 SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0); 5220 return; 5221 } else if (VT == MVT::v16i8) { 5222 SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0); 5223 return; 5224 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 5225 SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0); 5226 return; 5227 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 5228 SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0); 5229 return; 5230 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 5231 SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0); 5232 return; 5233 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 5234 SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0); 5235 return; 5236 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 5237 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0); 5238 return; 5239 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 5240 SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0); 5241 return; 5242 } 5243 break; 5244 } 5245 case AArch64ISD::LD1DUPpost: { 5246 if (VT == MVT::v8i8) { 5247 SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0); 5248 return; 5249 } else if (VT == MVT::v16i8) { 5250 SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0); 5251 return; 5252 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 5253 SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0); 5254 return; 5255 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 5256 SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0); 5257 return; 5258 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 5259 SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0); 5260 return; 5261 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 5262 SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0); 5263 return; 5264 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 5265 SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0); 5266 return; 5267 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 5268 SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0); 5269 return; 5270 } 5271 break; 5272 } 5273 case AArch64ISD::LD2DUPpost: { 5274 if (VT == MVT::v8i8) { 5275 SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0); 5276 return; 5277 } else if (VT == MVT::v16i8) { 5278 SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0); 5279 return; 5280 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 5281 SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0); 5282 return; 5283 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 5284 SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0); 5285 return; 5286 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 5287 SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0); 5288 return; 5289 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 5290 SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0); 5291 return; 5292 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 5293 SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0); 5294 return; 5295 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 5296 SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0); 5297 return; 5298 } 5299 break; 5300 } 5301 case AArch64ISD::LD3DUPpost: { 5302 if (VT == MVT::v8i8) { 5303 SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0); 5304 return; 5305 } else if (VT == MVT::v16i8) { 5306 SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0); 5307 return; 5308 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 5309 SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0); 5310 return; 5311 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 5312 SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0); 5313 return; 5314 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 5315 SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0); 5316 return; 5317 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 5318 SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0); 5319 return; 5320 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 5321 SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0); 5322 return; 5323 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 5324 SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0); 5325 return; 5326 } 5327 break; 5328 } 5329 case AArch64ISD::LD4DUPpost: { 5330 if (VT == MVT::v8i8) { 5331 SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0); 5332 return; 5333 } else if (VT == MVT::v16i8) { 5334 SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0); 5335 return; 5336 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 5337 SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0); 5338 return; 5339 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 5340 SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0); 5341 return; 5342 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 5343 SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0); 5344 return; 5345 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 5346 SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0); 5347 return; 5348 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 5349 SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0); 5350 return; 5351 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 5352 SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0); 5353 return; 5354 } 5355 break; 5356 } 5357 case AArch64ISD::LD1LANEpost: { 5358 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 5359 SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST); 5360 return; 5361 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 5362 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 5363 SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST); 5364 return; 5365 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 5366 VT == MVT::v2f32) { 5367 SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST); 5368 return; 5369 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 5370 VT == MVT::v1f64) { 5371 SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST); 5372 return; 5373 } 5374 break; 5375 } 5376 case AArch64ISD::LD2LANEpost: { 5377 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 5378 SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST); 5379 return; 5380 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 5381 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 5382 SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST); 5383 return; 5384 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 5385 VT == MVT::v2f32) { 5386 SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST); 5387 return; 5388 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 5389 VT == MVT::v1f64) { 5390 SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST); 5391 return; 5392 } 5393 break; 5394 } 5395 case AArch64ISD::LD3LANEpost: { 5396 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 5397 SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST); 5398 return; 5399 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 5400 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 5401 SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST); 5402 return; 5403 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 5404 VT == MVT::v2f32) { 5405 SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST); 5406 return; 5407 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 5408 VT == MVT::v1f64) { 5409 SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST); 5410 return; 5411 } 5412 break; 5413 } 5414 case AArch64ISD::LD4LANEpost: { 5415 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 5416 SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST); 5417 return; 5418 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 5419 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 5420 SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST); 5421 return; 5422 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 5423 VT == MVT::v2f32) { 5424 SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST); 5425 return; 5426 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 5427 VT == MVT::v1f64) { 5428 SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST); 5429 return; 5430 } 5431 break; 5432 } 5433 case AArch64ISD::ST2post: { 5434 VT = Node->getOperand(1).getValueType(); 5435 if (VT == MVT::v8i8) { 5436 SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST); 5437 return; 5438 } else if (VT == MVT::v16i8) { 5439 SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST); 5440 return; 5441 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 5442 SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST); 5443 return; 5444 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 5445 SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST); 5446 return; 5447 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 5448 SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST); 5449 return; 5450 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 5451 SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST); 5452 return; 5453 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 5454 SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST); 5455 return; 5456 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 5457 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST); 5458 return; 5459 } 5460 break; 5461 } 5462 case AArch64ISD::ST3post: { 5463 VT = Node->getOperand(1).getValueType(); 5464 if (VT == MVT::v8i8) { 5465 SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST); 5466 return; 5467 } else if (VT == MVT::v16i8) { 5468 SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST); 5469 return; 5470 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 5471 SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST); 5472 return; 5473 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 5474 SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST); 5475 return; 5476 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 5477 SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST); 5478 return; 5479 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 5480 SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST); 5481 return; 5482 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 5483 SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST); 5484 return; 5485 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 5486 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST); 5487 return; 5488 } 5489 break; 5490 } 5491 case AArch64ISD::ST4post: { 5492 VT = Node->getOperand(1).getValueType(); 5493 if (VT == MVT::v8i8) { 5494 SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST); 5495 return; 5496 } else if (VT == MVT::v16i8) { 5497 SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST); 5498 return; 5499 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 5500 SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST); 5501 return; 5502 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 5503 SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST); 5504 return; 5505 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 5506 SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST); 5507 return; 5508 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 5509 SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST); 5510 return; 5511 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 5512 SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST); 5513 return; 5514 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 5515 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST); 5516 return; 5517 } 5518 break; 5519 } 5520 case AArch64ISD::ST1x2post: { 5521 VT = Node->getOperand(1).getValueType(); 5522 if (VT == MVT::v8i8) { 5523 SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST); 5524 return; 5525 } else if (VT == MVT::v16i8) { 5526 SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST); 5527 return; 5528 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 5529 SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST); 5530 return; 5531 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 5532 SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST); 5533 return; 5534 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 5535 SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST); 5536 return; 5537 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 5538 SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST); 5539 return; 5540 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 5541 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST); 5542 return; 5543 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 5544 SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST); 5545 return; 5546 } 5547 break; 5548 } 5549 case AArch64ISD::ST1x3post: { 5550 VT = Node->getOperand(1).getValueType(); 5551 if (VT == MVT::v8i8) { 5552 SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST); 5553 return; 5554 } else if (VT == MVT::v16i8) { 5555 SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST); 5556 return; 5557 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 5558 SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST); 5559 return; 5560 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16 ) { 5561 SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST); 5562 return; 5563 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 5564 SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST); 5565 return; 5566 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 5567 SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST); 5568 return; 5569 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 5570 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST); 5571 return; 5572 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 5573 SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST); 5574 return; 5575 } 5576 break; 5577 } 5578 case AArch64ISD::ST1x4post: { 5579 VT = Node->getOperand(1).getValueType(); 5580 if (VT == MVT::v8i8) { 5581 SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST); 5582 return; 5583 } else if (VT == MVT::v16i8) { 5584 SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST); 5585 return; 5586 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 5587 SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST); 5588 return; 5589 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 5590 SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST); 5591 return; 5592 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 5593 SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST); 5594 return; 5595 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 5596 SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST); 5597 return; 5598 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 5599 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST); 5600 return; 5601 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 5602 SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST); 5603 return; 5604 } 5605 break; 5606 } 5607 case AArch64ISD::ST2LANEpost: { 5608 VT = Node->getOperand(1).getValueType(); 5609 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 5610 SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST); 5611 return; 5612 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 5613 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 5614 SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST); 5615 return; 5616 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 5617 VT == MVT::v2f32) { 5618 SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST); 5619 return; 5620 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 5621 VT == MVT::v1f64) { 5622 SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST); 5623 return; 5624 } 5625 break; 5626 } 5627 case AArch64ISD::ST3LANEpost: { 5628 VT = Node->getOperand(1).getValueType(); 5629 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 5630 SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST); 5631 return; 5632 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 5633 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 5634 SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST); 5635 return; 5636 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 5637 VT == MVT::v2f32) { 5638 SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST); 5639 return; 5640 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 5641 VT == MVT::v1f64) { 5642 SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST); 5643 return; 5644 } 5645 break; 5646 } 5647 case AArch64ISD::ST4LANEpost: { 5648 VT = Node->getOperand(1).getValueType(); 5649 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 5650 SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST); 5651 return; 5652 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 5653 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 5654 SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST); 5655 return; 5656 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 5657 VT == MVT::v2f32) { 5658 SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST); 5659 return; 5660 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 5661 VT == MVT::v1f64) { 5662 SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST); 5663 return; 5664 } 5665 break; 5666 } 5667 case AArch64ISD::SVE_LD2_MERGE_ZERO: { 5668 if (VT == MVT::nxv16i8) { 5669 SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B); 5670 return; 5671 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 5672 VT == MVT::nxv8bf16) { 5673 SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H); 5674 return; 5675 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 5676 SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W); 5677 return; 5678 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 5679 SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D); 5680 return; 5681 } 5682 break; 5683 } 5684 case AArch64ISD::SVE_LD3_MERGE_ZERO: { 5685 if (VT == MVT::nxv16i8) { 5686 SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B); 5687 return; 5688 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 5689 VT == MVT::nxv8bf16) { 5690 SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H); 5691 return; 5692 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 5693 SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W); 5694 return; 5695 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 5696 SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D); 5697 return; 5698 } 5699 break; 5700 } 5701 case AArch64ISD::SVE_LD4_MERGE_ZERO: { 5702 if (VT == MVT::nxv16i8) { 5703 SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B); 5704 return; 5705 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 5706 VT == MVT::nxv8bf16) { 5707 SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H); 5708 return; 5709 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 5710 SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W); 5711 return; 5712 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 5713 SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D); 5714 return; 5715 } 5716 break; 5717 } 5718 } 5719 5720 // Select the default instruction 5721 SelectCode(Node); 5722 } 5723 5724 /// createAArch64ISelDag - This pass converts a legalized DAG into a 5725 /// AArch64-specific DAG, ready for instruction scheduling. 5726 FunctionPass *llvm::createAArch64ISelDag(AArch64TargetMachine &TM, 5727 CodeGenOpt::Level OptLevel) { 5728 return new AArch64DAGToDAGISel(TM, OptLevel); 5729 } 5730 5731 /// When \p PredVT is a scalable vector predicate in the form 5732 /// MVT::nx<M>xi1, it builds the correspondent scalable vector of 5733 /// integers MVT::nx<M>xi<bits> s.t. M x bits = 128. When targeting 5734 /// structured vectors (NumVec >1), the output data type is 5735 /// MVT::nx<M*NumVec>xi<bits> s.t. M x bits = 128. If the input 5736 /// PredVT is not in the form MVT::nx<M>xi1, it returns an invalid 5737 /// EVT. 5738 static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT, 5739 unsigned NumVec) { 5740 assert(NumVec > 0 && NumVec < 5 && "Invalid number of vectors."); 5741 if (!PredVT.isScalableVector() || PredVT.getVectorElementType() != MVT::i1) 5742 return EVT(); 5743 5744 if (PredVT != MVT::nxv16i1 && PredVT != MVT::nxv8i1 && 5745 PredVT != MVT::nxv4i1 && PredVT != MVT::nxv2i1) 5746 return EVT(); 5747 5748 ElementCount EC = PredVT.getVectorElementCount(); 5749 EVT ScalarVT = 5750 EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.getKnownMinValue()); 5751 EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, EC * NumVec); 5752 5753 return MemVT; 5754 } 5755 5756 /// Return the EVT of the data associated to a memory operation in \p 5757 /// Root. If such EVT cannot be retrived, it returns an invalid EVT. 5758 static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root) { 5759 if (isa<MemSDNode>(Root)) 5760 return cast<MemSDNode>(Root)->getMemoryVT(); 5761 5762 if (isa<MemIntrinsicSDNode>(Root)) 5763 return cast<MemIntrinsicSDNode>(Root)->getMemoryVT(); 5764 5765 const unsigned Opcode = Root->getOpcode(); 5766 // For custom ISD nodes, we have to look at them individually to extract the 5767 // type of the data moved to/from memory. 5768 switch (Opcode) { 5769 case AArch64ISD::LD1_MERGE_ZERO: 5770 case AArch64ISD::LD1S_MERGE_ZERO: 5771 case AArch64ISD::LDNF1_MERGE_ZERO: 5772 case AArch64ISD::LDNF1S_MERGE_ZERO: 5773 return cast<VTSDNode>(Root->getOperand(3))->getVT(); 5774 case AArch64ISD::ST1_PRED: 5775 return cast<VTSDNode>(Root->getOperand(4))->getVT(); 5776 case AArch64ISD::SVE_LD2_MERGE_ZERO: 5777 return getPackedVectorTypeFromPredicateType( 5778 Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/2); 5779 case AArch64ISD::SVE_LD3_MERGE_ZERO: 5780 return getPackedVectorTypeFromPredicateType( 5781 Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/3); 5782 case AArch64ISD::SVE_LD4_MERGE_ZERO: 5783 return getPackedVectorTypeFromPredicateType( 5784 Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/4); 5785 default: 5786 break; 5787 } 5788 5789 if (Opcode != ISD::INTRINSIC_VOID && Opcode != ISD::INTRINSIC_W_CHAIN) 5790 return EVT(); 5791 5792 switch (cast<ConstantSDNode>(Root->getOperand(1))->getZExtValue()) { 5793 default: 5794 return EVT(); 5795 case Intrinsic::aarch64_sme_ldr: 5796 case Intrinsic::aarch64_sme_str: 5797 return MVT::nxv16i8; 5798 case Intrinsic::aarch64_sve_prf: 5799 // We are using an SVE prefetch intrinsic. Type must be inferred from the 5800 // width of the predicate. 5801 return getPackedVectorTypeFromPredicateType( 5802 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/1); 5803 case Intrinsic::aarch64_sve_ld2_sret: 5804 return getPackedVectorTypeFromPredicateType( 5805 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/2); 5806 case Intrinsic::aarch64_sve_ld3_sret: 5807 return getPackedVectorTypeFromPredicateType( 5808 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/3); 5809 case Intrinsic::aarch64_sve_ld4_sret: 5810 return getPackedVectorTypeFromPredicateType( 5811 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/4); 5812 } 5813 } 5814 5815 /// SelectAddrModeIndexedSVE - Attempt selection of the addressing mode: 5816 /// Base + OffImm * sizeof(MemVT) for Min >= OffImm <= Max 5817 /// where Root is the memory access using N for its address. 5818 template <int64_t Min, int64_t Max> 5819 bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, 5820 SDValue &Base, 5821 SDValue &OffImm) { 5822 const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root); 5823 const DataLayout &DL = CurDAG->getDataLayout(); 5824 const MachineFrameInfo &MFI = MF->getFrameInfo(); 5825 5826 if (N.getOpcode() == ISD::FrameIndex) { 5827 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 5828 // We can only encode VL scaled offsets, so only fold in frame indexes 5829 // referencing SVE objects. 5830 if (FI == 0 || MFI.getStackID(FI) == TargetStackID::ScalableVector) { 5831 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 5832 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64); 5833 return true; 5834 } 5835 5836 return false; 5837 } 5838 5839 if (MemVT == EVT()) 5840 return false; 5841 5842 if (N.getOpcode() != ISD::ADD) 5843 return false; 5844 5845 SDValue VScale = N.getOperand(1); 5846 if (VScale.getOpcode() != ISD::VSCALE) 5847 return false; 5848 5849 TypeSize TS = MemVT.getSizeInBits(); 5850 int64_t MemWidthBytes = static_cast<int64_t>(TS.getKnownMinValue()) / 8; 5851 int64_t MulImm = cast<ConstantSDNode>(VScale.getOperand(0))->getSExtValue(); 5852 5853 if ((MulImm % MemWidthBytes) != 0) 5854 return false; 5855 5856 int64_t Offset = MulImm / MemWidthBytes; 5857 if (Offset < Min || Offset > Max) 5858 return false; 5859 5860 Base = N.getOperand(0); 5861 if (Base.getOpcode() == ISD::FrameIndex) { 5862 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 5863 // We can only encode VL scaled offsets, so only fold in frame indexes 5864 // referencing SVE objects. 5865 if (FI == 0 || MFI.getStackID(FI) == TargetStackID::ScalableVector) 5866 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 5867 } 5868 5869 OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64); 5870 return true; 5871 } 5872 5873 /// Select register plus register addressing mode for SVE, with scaled 5874 /// offset. 5875 bool AArch64DAGToDAGISel::SelectSVERegRegAddrMode(SDValue N, unsigned Scale, 5876 SDValue &Base, 5877 SDValue &Offset) { 5878 if (N.getOpcode() != ISD::ADD) 5879 return false; 5880 5881 // Process an ADD node. 5882 const SDValue LHS = N.getOperand(0); 5883 const SDValue RHS = N.getOperand(1); 5884 5885 // 8 bit data does not come with the SHL node, so it is treated 5886 // separately. 5887 if (Scale == 0) { 5888 Base = LHS; 5889 Offset = RHS; 5890 return true; 5891 } 5892 5893 if (auto C = dyn_cast<ConstantSDNode>(RHS)) { 5894 int64_t ImmOff = C->getSExtValue(); 5895 unsigned Size = 1 << Scale; 5896 5897 // To use the reg+reg addressing mode, the immediate must be a multiple of 5898 // the vector element's byte size. 5899 if (ImmOff % Size) 5900 return false; 5901 5902 SDLoc DL(N); 5903 Base = LHS; 5904 Offset = CurDAG->getTargetConstant(ImmOff >> Scale, DL, MVT::i64); 5905 SDValue Ops[] = {Offset}; 5906 SDNode *MI = CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops); 5907 Offset = SDValue(MI, 0); 5908 return true; 5909 } 5910 5911 // Check if the RHS is a shift node with a constant. 5912 if (RHS.getOpcode() != ISD::SHL) 5913 return false; 5914 5915 const SDValue ShiftRHS = RHS.getOperand(1); 5916 if (auto *C = dyn_cast<ConstantSDNode>(ShiftRHS)) 5917 if (C->getZExtValue() == Scale) { 5918 Base = LHS; 5919 Offset = RHS.getOperand(0); 5920 return true; 5921 } 5922 5923 return false; 5924 } 5925 5926 bool AArch64DAGToDAGISel::SelectAllActivePredicate(SDValue N) { 5927 const AArch64TargetLowering *TLI = 5928 static_cast<const AArch64TargetLowering *>(getTargetLowering()); 5929 5930 return TLI->isAllActivePredicate(*CurDAG, N); 5931 } 5932 5933 bool AArch64DAGToDAGISel::SelectSMETileSlice(SDValue N, unsigned MaxSize, 5934 SDValue &Base, SDValue &Offset, 5935 unsigned Scale) { 5936 if (N.getOpcode() != ISD::ADD) { 5937 Base = N; 5938 Offset = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64); 5939 return true; 5940 } 5941 5942 // Process an ADD node. 5943 const SDValue LHS = N.getOperand(0); 5944 const SDValue RHS = N.getOperand(1); 5945 5946 if (auto C = dyn_cast<ConstantSDNode>(RHS)) { 5947 int64_t ImmOff = C->getSExtValue(); 5948 5949 if ((ImmOff < 0 || ImmOff > MaxSize) || (ImmOff % Scale != 0)) 5950 return false; 5951 5952 Base = LHS; 5953 Offset = CurDAG->getTargetConstant(ImmOff / Scale, SDLoc(N), MVT::i64); 5954 return true; 5955 } 5956 5957 return false; 5958 } 5959