1 //===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines an instruction selector for the AArch64 target. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AArch64MachineFunctionInfo.h" 14 #include "AArch64TargetMachine.h" 15 #include "MCTargetDesc/AArch64AddressingModes.h" 16 #include "llvm/ADT/APSInt.h" 17 #include "llvm/CodeGen/SelectionDAGISel.h" 18 #include "llvm/IR/Function.h" // To access function attributes. 19 #include "llvm/IR/GlobalValue.h" 20 #include "llvm/IR/Intrinsics.h" 21 #include "llvm/IR/IntrinsicsAArch64.h" 22 #include "llvm/Support/Debug.h" 23 #include "llvm/Support/ErrorHandling.h" 24 #include "llvm/Support/KnownBits.h" 25 #include "llvm/Support/MathExtras.h" 26 #include "llvm/Support/raw_ostream.h" 27 28 using namespace llvm; 29 30 #define DEBUG_TYPE "aarch64-isel" 31 32 //===--------------------------------------------------------------------===// 33 /// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine 34 /// instructions for SelectionDAG operations. 35 /// 36 namespace { 37 38 class AArch64DAGToDAGISel : public SelectionDAGISel { 39 40 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can 41 /// make the right decision when generating code for different targets. 42 const AArch64Subtarget *Subtarget; 43 44 public: 45 explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm, 46 CodeGenOpt::Level OptLevel) 47 : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr) {} 48 49 StringRef getPassName() const override { 50 return "AArch64 Instruction Selection"; 51 } 52 53 bool runOnMachineFunction(MachineFunction &MF) override { 54 Subtarget = &MF.getSubtarget<AArch64Subtarget>(); 55 return SelectionDAGISel::runOnMachineFunction(MF); 56 } 57 58 void Select(SDNode *Node) override; 59 60 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 61 /// inline asm expressions. 62 bool SelectInlineAsmMemoryOperand(const SDValue &Op, 63 unsigned ConstraintID, 64 std::vector<SDValue> &OutOps) override; 65 66 template <signed Low, signed High, signed Scale> 67 bool SelectRDVLImm(SDValue N, SDValue &Imm); 68 69 bool tryMLAV64LaneV128(SDNode *N); 70 bool tryMULLV64LaneV128(unsigned IntNo, SDNode *N); 71 bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift); 72 bool SelectArithUXTXRegister(SDValue N, SDValue &Reg, SDValue &Shift); 73 bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift); 74 bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift); 75 bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) { 76 return SelectShiftedRegister(N, false, Reg, Shift); 77 } 78 bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) { 79 return SelectShiftedRegister(N, true, Reg, Shift); 80 } 81 bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) { 82 return SelectAddrModeIndexed7S(N, 1, Base, OffImm); 83 } 84 bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) { 85 return SelectAddrModeIndexed7S(N, 2, Base, OffImm); 86 } 87 bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) { 88 return SelectAddrModeIndexed7S(N, 4, Base, OffImm); 89 } 90 bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) { 91 return SelectAddrModeIndexed7S(N, 8, Base, OffImm); 92 } 93 bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) { 94 return SelectAddrModeIndexed7S(N, 16, Base, OffImm); 95 } 96 bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) { 97 return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm); 98 } 99 bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) { 100 return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm); 101 } 102 bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) { 103 return SelectAddrModeIndexed(N, 1, Base, OffImm); 104 } 105 bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) { 106 return SelectAddrModeIndexed(N, 2, Base, OffImm); 107 } 108 bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) { 109 return SelectAddrModeIndexed(N, 4, Base, OffImm); 110 } 111 bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) { 112 return SelectAddrModeIndexed(N, 8, Base, OffImm); 113 } 114 bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) { 115 return SelectAddrModeIndexed(N, 16, Base, OffImm); 116 } 117 bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) { 118 return SelectAddrModeUnscaled(N, 1, Base, OffImm); 119 } 120 bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) { 121 return SelectAddrModeUnscaled(N, 2, Base, OffImm); 122 } 123 bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) { 124 return SelectAddrModeUnscaled(N, 4, Base, OffImm); 125 } 126 bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) { 127 return SelectAddrModeUnscaled(N, 8, Base, OffImm); 128 } 129 bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) { 130 return SelectAddrModeUnscaled(N, 16, Base, OffImm); 131 } 132 template <unsigned Size, unsigned Max> 133 bool SelectAddrModeIndexedUImm(SDValue N, SDValue &Base, SDValue &OffImm) { 134 // Test if there is an appropriate addressing mode and check if the 135 // immediate fits. 136 bool Found = SelectAddrModeIndexed(N, Size, Base, OffImm); 137 if (Found) { 138 if (auto *CI = dyn_cast<ConstantSDNode>(OffImm)) { 139 int64_t C = CI->getSExtValue(); 140 if (C <= Max) 141 return true; 142 } 143 } 144 145 // Otherwise, base only, materialize address in register. 146 Base = N; 147 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64); 148 return true; 149 } 150 151 template<int Width> 152 bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset, 153 SDValue &SignExtend, SDValue &DoShift) { 154 return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift); 155 } 156 157 template<int Width> 158 bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset, 159 SDValue &SignExtend, SDValue &DoShift) { 160 return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift); 161 } 162 163 bool SelectExtractHigh(SDValue N, SDValue &Res) { 164 if (Subtarget->isLittleEndian() && N->getOpcode() == ISD::BITCAST) 165 N = N->getOperand(0); 166 if (N->getOpcode() != ISD::EXTRACT_SUBVECTOR || 167 !isa<ConstantSDNode>(N->getOperand(1))) 168 return false; 169 EVT VT = N->getValueType(0); 170 EVT LVT = N->getOperand(0).getValueType(); 171 unsigned Index = N->getConstantOperandVal(1); 172 if (!VT.is64BitVector() || !LVT.is128BitVector() || 173 Index != VT.getVectorNumElements()) 174 return false; 175 Res = N->getOperand(0); 176 return true; 177 } 178 179 bool SelectDupZeroOrUndef(SDValue N) { 180 switch(N->getOpcode()) { 181 case ISD::UNDEF: 182 return true; 183 case AArch64ISD::DUP: 184 case ISD::SPLAT_VECTOR: { 185 auto Opnd0 = N->getOperand(0); 186 if (auto CN = dyn_cast<ConstantSDNode>(Opnd0)) 187 if (CN->isZero()) 188 return true; 189 if (auto CN = dyn_cast<ConstantFPSDNode>(Opnd0)) 190 if (CN->isZero()) 191 return true; 192 break; 193 } 194 default: 195 break; 196 } 197 198 return false; 199 } 200 201 bool SelectDupZero(SDValue N) { 202 switch(N->getOpcode()) { 203 case AArch64ISD::DUP: 204 case ISD::SPLAT_VECTOR: { 205 auto Opnd0 = N->getOperand(0); 206 if (auto CN = dyn_cast<ConstantSDNode>(Opnd0)) 207 if (CN->isZero()) 208 return true; 209 if (auto CN = dyn_cast<ConstantFPSDNode>(Opnd0)) 210 if (CN->isZero()) 211 return true; 212 break; 213 } 214 } 215 216 return false; 217 } 218 219 template<MVT::SimpleValueType VT> 220 bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) { 221 return SelectSVEAddSubImm(N, VT, Imm, Shift); 222 } 223 224 template <MVT::SimpleValueType VT> 225 bool SelectSVECpyDupImm(SDValue N, SDValue &Imm, SDValue &Shift) { 226 return SelectSVECpyDupImm(N, VT, Imm, Shift); 227 } 228 229 template <MVT::SimpleValueType VT, bool Invert = false> 230 bool SelectSVELogicalImm(SDValue N, SDValue &Imm) { 231 return SelectSVELogicalImm(N, VT, Imm, Invert); 232 } 233 234 template <MVT::SimpleValueType VT> 235 bool SelectSVEArithImm(SDValue N, SDValue &Imm) { 236 return SelectSVEArithImm(N, VT, Imm); 237 } 238 239 template <unsigned Low, unsigned High, bool AllowSaturation = false> 240 bool SelectSVEShiftImm(SDValue N, SDValue &Imm) { 241 return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm); 242 } 243 244 bool SelectSVEShiftSplatImmR(SDValue N, SDValue &Imm) { 245 if (N->getOpcode() != ISD::SPLAT_VECTOR) 246 return false; 247 248 EVT EltVT = N->getValueType(0).getVectorElementType(); 249 return SelectSVEShiftImm(N->getOperand(0), /* Low */ 1, 250 /* High */ EltVT.getFixedSizeInBits(), 251 /* AllowSaturation */ true, Imm); 252 } 253 254 // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N. 255 template<signed Min, signed Max, signed Scale, bool Shift> 256 bool SelectCntImm(SDValue N, SDValue &Imm) { 257 if (!isa<ConstantSDNode>(N)) 258 return false; 259 260 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue(); 261 if (Shift) 262 MulImm = 1LL << MulImm; 263 264 if ((MulImm % std::abs(Scale)) != 0) 265 return false; 266 267 MulImm /= Scale; 268 if ((MulImm >= Min) && (MulImm <= Max)) { 269 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32); 270 return true; 271 } 272 273 return false; 274 } 275 276 template <signed Max, signed Scale> 277 bool SelectEXTImm(SDValue N, SDValue &Imm) { 278 if (!isa<ConstantSDNode>(N)) 279 return false; 280 281 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue(); 282 283 if (MulImm >= 0 && MulImm <= Max) { 284 MulImm *= Scale; 285 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32); 286 return true; 287 } 288 289 return false; 290 } 291 292 template <unsigned BaseReg> bool ImmToTile(SDValue N, SDValue &Imm) { 293 if (auto *CI = dyn_cast<ConstantSDNode>(N)) { 294 uint64_t C = CI->getZExtValue(); 295 Imm = CurDAG->getRegister(BaseReg + C, MVT::Other); 296 return true; 297 } 298 return false; 299 } 300 301 /// Form sequences of consecutive 64/128-bit registers for use in NEON 302 /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have 303 /// between 1 and 4 elements. If it contains a single element that is returned 304 /// unchanged; otherwise a REG_SEQUENCE value is returned. 305 SDValue createDTuple(ArrayRef<SDValue> Vecs); 306 SDValue createQTuple(ArrayRef<SDValue> Vecs); 307 // Form a sequence of SVE registers for instructions using list of vectors, 308 // e.g. structured loads and stores (ldN, stN). 309 SDValue createZTuple(ArrayRef<SDValue> Vecs); 310 311 /// Generic helper for the createDTuple/createQTuple 312 /// functions. Those should almost always be called instead. 313 SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[], 314 const unsigned SubRegs[]); 315 316 void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt); 317 318 bool tryIndexedLoad(SDNode *N); 319 320 bool trySelectStackSlotTagP(SDNode *N); 321 void SelectTagP(SDNode *N); 322 323 void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc, 324 unsigned SubRegIdx); 325 void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc, 326 unsigned SubRegIdx); 327 void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc); 328 void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc); 329 void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale, 330 unsigned Opc_rr, unsigned Opc_ri, 331 bool IsIntr = false); 332 333 bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm); 334 /// SVE Reg+Imm addressing mode. 335 template <int64_t Min, int64_t Max> 336 bool SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base, 337 SDValue &OffImm); 338 /// SVE Reg+Reg address mode. 339 template <unsigned Scale> 340 bool SelectSVERegRegAddrMode(SDValue N, SDValue &Base, SDValue &Offset) { 341 return SelectSVERegRegAddrMode(N, Scale, Base, Offset); 342 } 343 344 template <unsigned Scale> 345 bool SelectSMETileSlice(SDValue N, SDValue &Vector, SDValue &Offset) { 346 return SelectSMETileSlice(N, Scale, Vector, Offset); 347 } 348 349 void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc); 350 void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc); 351 void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc); 352 void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc); 353 void SelectPredicatedStore(SDNode *N, unsigned NumVecs, unsigned Scale, 354 unsigned Opc_rr, unsigned Opc_ri); 355 std::tuple<unsigned, SDValue, SDValue> 356 findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri, 357 const SDValue &OldBase, const SDValue &OldOffset, 358 unsigned Scale); 359 360 bool tryBitfieldExtractOp(SDNode *N); 361 bool tryBitfieldExtractOpFromSExt(SDNode *N); 362 bool tryBitfieldInsertOp(SDNode *N); 363 bool tryBitfieldInsertInZeroOp(SDNode *N); 364 bool tryShiftAmountMod(SDNode *N); 365 bool tryHighFPExt(SDNode *N); 366 367 bool tryReadRegister(SDNode *N); 368 bool tryWriteRegister(SDNode *N); 369 370 // Include the pieces autogenerated from the target description. 371 #include "AArch64GenDAGISel.inc" 372 373 private: 374 bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg, 375 SDValue &Shift); 376 bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base, 377 SDValue &OffImm) { 378 return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm); 379 } 380 bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW, 381 unsigned Size, SDValue &Base, 382 SDValue &OffImm); 383 bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base, 384 SDValue &OffImm); 385 bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base, 386 SDValue &OffImm); 387 bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base, 388 SDValue &Offset, SDValue &SignExtend, 389 SDValue &DoShift); 390 bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base, 391 SDValue &Offset, SDValue &SignExtend, 392 SDValue &DoShift); 393 bool isWorthFolding(SDValue V) const; 394 bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend, 395 SDValue &Offset, SDValue &SignExtend); 396 397 template<unsigned RegWidth> 398 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) { 399 return SelectCVTFixedPosOperand(N, FixedPos, RegWidth); 400 } 401 402 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width); 403 404 bool SelectCMP_SWAP(SDNode *N); 405 406 bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift); 407 bool SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift); 408 bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert); 409 410 bool SelectSVESignedArithImm(SDValue N, SDValue &Imm); 411 bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High, 412 bool AllowSaturation, SDValue &Imm); 413 414 bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm); 415 bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base, 416 SDValue &Offset); 417 bool SelectSMETileSlice(SDValue N, unsigned Scale, SDValue &Vector, 418 SDValue &Offset); 419 420 bool SelectAllActivePredicate(SDValue N); 421 }; 422 } // end anonymous namespace 423 424 /// isIntImmediate - This method tests to see if the node is a constant 425 /// operand. If so Imm will receive the 32-bit value. 426 static bool isIntImmediate(const SDNode *N, uint64_t &Imm) { 427 if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) { 428 Imm = C->getZExtValue(); 429 return true; 430 } 431 return false; 432 } 433 434 // isIntImmediate - This method tests to see if a constant operand. 435 // If so Imm will receive the value. 436 static bool isIntImmediate(SDValue N, uint64_t &Imm) { 437 return isIntImmediate(N.getNode(), Imm); 438 } 439 440 // isOpcWithIntImmediate - This method tests to see if the node is a specific 441 // opcode and that it has a immediate integer right operand. 442 // If so Imm will receive the 32 bit value. 443 static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, 444 uint64_t &Imm) { 445 return N->getOpcode() == Opc && 446 isIntImmediate(N->getOperand(1).getNode(), Imm); 447 } 448 449 bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand( 450 const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) { 451 switch(ConstraintID) { 452 default: 453 llvm_unreachable("Unexpected asm memory constraint"); 454 case InlineAsm::Constraint_m: 455 case InlineAsm::Constraint_o: 456 case InlineAsm::Constraint_Q: 457 // We need to make sure that this one operand does not end up in XZR, thus 458 // require the address to be in a PointerRegClass register. 459 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo(); 460 const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF); 461 SDLoc dl(Op); 462 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64); 463 SDValue NewOp = 464 SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, 465 dl, Op.getValueType(), 466 Op, RC), 0); 467 OutOps.push_back(NewOp); 468 return false; 469 } 470 return true; 471 } 472 473 /// SelectArithImmed - Select an immediate value that can be represented as 474 /// a 12-bit value shifted left by either 0 or 12. If so, return true with 475 /// Val set to the 12-bit value and Shift set to the shifter operand. 476 bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val, 477 SDValue &Shift) { 478 // This function is called from the addsub_shifted_imm ComplexPattern, 479 // which lists [imm] as the list of opcode it's interested in, however 480 // we still need to check whether the operand is actually an immediate 481 // here because the ComplexPattern opcode list is only used in 482 // root-level opcode matching. 483 if (!isa<ConstantSDNode>(N.getNode())) 484 return false; 485 486 uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue(); 487 unsigned ShiftAmt; 488 489 if (Immed >> 12 == 0) { 490 ShiftAmt = 0; 491 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) { 492 ShiftAmt = 12; 493 Immed = Immed >> 12; 494 } else 495 return false; 496 497 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt); 498 SDLoc dl(N); 499 Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32); 500 Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32); 501 return true; 502 } 503 504 /// SelectNegArithImmed - As above, but negates the value before trying to 505 /// select it. 506 bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val, 507 SDValue &Shift) { 508 // This function is called from the addsub_shifted_imm ComplexPattern, 509 // which lists [imm] as the list of opcode it's interested in, however 510 // we still need to check whether the operand is actually an immediate 511 // here because the ComplexPattern opcode list is only used in 512 // root-level opcode matching. 513 if (!isa<ConstantSDNode>(N.getNode())) 514 return false; 515 516 // The immediate operand must be a 24-bit zero-extended immediate. 517 uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue(); 518 519 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0" 520 // have the opposite effect on the C flag, so this pattern mustn't match under 521 // those circumstances. 522 if (Immed == 0) 523 return false; 524 525 if (N.getValueType() == MVT::i32) 526 Immed = ~((uint32_t)Immed) + 1; 527 else 528 Immed = ~Immed + 1ULL; 529 if (Immed & 0xFFFFFFFFFF000000ULL) 530 return false; 531 532 Immed &= 0xFFFFFFULL; 533 return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val, 534 Shift); 535 } 536 537 /// getShiftTypeForNode - Translate a shift node to the corresponding 538 /// ShiftType value. 539 static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) { 540 switch (N.getOpcode()) { 541 default: 542 return AArch64_AM::InvalidShiftExtend; 543 case ISD::SHL: 544 return AArch64_AM::LSL; 545 case ISD::SRL: 546 return AArch64_AM::LSR; 547 case ISD::SRA: 548 return AArch64_AM::ASR; 549 case ISD::ROTR: 550 return AArch64_AM::ROR; 551 } 552 } 553 554 /// Determine whether it is worth it to fold SHL into the addressing 555 /// mode. 556 static bool isWorthFoldingSHL(SDValue V) { 557 assert(V.getOpcode() == ISD::SHL && "invalid opcode"); 558 // It is worth folding logical shift of up to three places. 559 auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1)); 560 if (!CSD) 561 return false; 562 unsigned ShiftVal = CSD->getZExtValue(); 563 if (ShiftVal > 3) 564 return false; 565 566 // Check if this particular node is reused in any non-memory related 567 // operation. If yes, do not try to fold this node into the address 568 // computation, since the computation will be kept. 569 const SDNode *Node = V.getNode(); 570 for (SDNode *UI : Node->uses()) 571 if (!isa<MemSDNode>(*UI)) 572 for (SDNode *UII : UI->uses()) 573 if (!isa<MemSDNode>(*UII)) 574 return false; 575 return true; 576 } 577 578 /// Determine whether it is worth to fold V into an extended register. 579 bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const { 580 // Trivial if we are optimizing for code size or if there is only 581 // one use of the value. 582 if (CurDAG->shouldOptForSize() || V.hasOneUse()) 583 return true; 584 // If a subtarget has a fastpath LSL we can fold a logical shift into 585 // the addressing mode and save a cycle. 586 if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::SHL && 587 isWorthFoldingSHL(V)) 588 return true; 589 if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::ADD) { 590 const SDValue LHS = V.getOperand(0); 591 const SDValue RHS = V.getOperand(1); 592 if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS)) 593 return true; 594 if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS)) 595 return true; 596 } 597 598 // It hurts otherwise, since the value will be reused. 599 return false; 600 } 601 602 /// SelectShiftedRegister - Select a "shifted register" operand. If the value 603 /// is not shifted, set the Shift operand to default of "LSL 0". The logical 604 /// instructions allow the shifted register to be rotated, but the arithmetic 605 /// instructions do not. The AllowROR parameter specifies whether ROR is 606 /// supported. 607 bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR, 608 SDValue &Reg, SDValue &Shift) { 609 AArch64_AM::ShiftExtendType ShType = getShiftTypeForNode(N); 610 if (ShType == AArch64_AM::InvalidShiftExtend) 611 return false; 612 if (!AllowROR && ShType == AArch64_AM::ROR) 613 return false; 614 615 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 616 unsigned BitSize = N.getValueSizeInBits(); 617 unsigned Val = RHS->getZExtValue() & (BitSize - 1); 618 unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val); 619 620 Reg = N.getOperand(0); 621 Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32); 622 return isWorthFolding(N); 623 } 624 625 return false; 626 } 627 628 /// getExtendTypeForNode - Translate an extend node to the corresponding 629 /// ExtendType value. 630 static AArch64_AM::ShiftExtendType 631 getExtendTypeForNode(SDValue N, bool IsLoadStore = false) { 632 if (N.getOpcode() == ISD::SIGN_EXTEND || 633 N.getOpcode() == ISD::SIGN_EXTEND_INREG) { 634 EVT SrcVT; 635 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG) 636 SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT(); 637 else 638 SrcVT = N.getOperand(0).getValueType(); 639 640 if (!IsLoadStore && SrcVT == MVT::i8) 641 return AArch64_AM::SXTB; 642 else if (!IsLoadStore && SrcVT == MVT::i16) 643 return AArch64_AM::SXTH; 644 else if (SrcVT == MVT::i32) 645 return AArch64_AM::SXTW; 646 assert(SrcVT != MVT::i64 && "extend from 64-bits?"); 647 648 return AArch64_AM::InvalidShiftExtend; 649 } else if (N.getOpcode() == ISD::ZERO_EXTEND || 650 N.getOpcode() == ISD::ANY_EXTEND) { 651 EVT SrcVT = N.getOperand(0).getValueType(); 652 if (!IsLoadStore && SrcVT == MVT::i8) 653 return AArch64_AM::UXTB; 654 else if (!IsLoadStore && SrcVT == MVT::i16) 655 return AArch64_AM::UXTH; 656 else if (SrcVT == MVT::i32) 657 return AArch64_AM::UXTW; 658 assert(SrcVT != MVT::i64 && "extend from 64-bits?"); 659 660 return AArch64_AM::InvalidShiftExtend; 661 } else if (N.getOpcode() == ISD::AND) { 662 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); 663 if (!CSD) 664 return AArch64_AM::InvalidShiftExtend; 665 uint64_t AndMask = CSD->getZExtValue(); 666 667 switch (AndMask) { 668 default: 669 return AArch64_AM::InvalidShiftExtend; 670 case 0xFF: 671 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend; 672 case 0xFFFF: 673 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend; 674 case 0xFFFFFFFF: 675 return AArch64_AM::UXTW; 676 } 677 } 678 679 return AArch64_AM::InvalidShiftExtend; 680 } 681 682 // Helper for SelectMLAV64LaneV128 - Recognize high lane extracts. 683 static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx) { 684 if (DL->getOpcode() != AArch64ISD::DUPLANE16 && 685 DL->getOpcode() != AArch64ISD::DUPLANE32) 686 return false; 687 688 SDValue SV = DL->getOperand(0); 689 if (SV.getOpcode() != ISD::INSERT_SUBVECTOR) 690 return false; 691 692 SDValue EV = SV.getOperand(1); 693 if (EV.getOpcode() != ISD::EXTRACT_SUBVECTOR) 694 return false; 695 696 ConstantSDNode *DLidx = cast<ConstantSDNode>(DL->getOperand(1).getNode()); 697 ConstantSDNode *EVidx = cast<ConstantSDNode>(EV.getOperand(1).getNode()); 698 LaneIdx = DLidx->getSExtValue() + EVidx->getSExtValue(); 699 LaneOp = EV.getOperand(0); 700 701 return true; 702 } 703 704 // Helper for SelectOpcV64LaneV128 - Recognize operations where one operand is a 705 // high lane extract. 706 static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp, 707 SDValue &LaneOp, int &LaneIdx) { 708 709 if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) { 710 std::swap(Op0, Op1); 711 if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) 712 return false; 713 } 714 StdOp = Op1; 715 return true; 716 } 717 718 /// SelectMLAV64LaneV128 - AArch64 supports vector MLAs where one multiplicand 719 /// is a lane in the upper half of a 128-bit vector. Recognize and select this 720 /// so that we don't emit unnecessary lane extracts. 721 bool AArch64DAGToDAGISel::tryMLAV64LaneV128(SDNode *N) { 722 SDLoc dl(N); 723 SDValue Op0 = N->getOperand(0); 724 SDValue Op1 = N->getOperand(1); 725 SDValue MLAOp1; // Will hold ordinary multiplicand for MLA. 726 SDValue MLAOp2; // Will hold lane-accessed multiplicand for MLA. 727 int LaneIdx = -1; // Will hold the lane index. 728 729 if (Op1.getOpcode() != ISD::MUL || 730 !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2, 731 LaneIdx)) { 732 std::swap(Op0, Op1); 733 if (Op1.getOpcode() != ISD::MUL || 734 !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2, 735 LaneIdx)) 736 return false; 737 } 738 739 SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64); 740 741 SDValue Ops[] = { Op0, MLAOp1, MLAOp2, LaneIdxVal }; 742 743 unsigned MLAOpc = ~0U; 744 745 switch (N->getSimpleValueType(0).SimpleTy) { 746 default: 747 llvm_unreachable("Unrecognized MLA."); 748 case MVT::v4i16: 749 MLAOpc = AArch64::MLAv4i16_indexed; 750 break; 751 case MVT::v8i16: 752 MLAOpc = AArch64::MLAv8i16_indexed; 753 break; 754 case MVT::v2i32: 755 MLAOpc = AArch64::MLAv2i32_indexed; 756 break; 757 case MVT::v4i32: 758 MLAOpc = AArch64::MLAv4i32_indexed; 759 break; 760 } 761 762 ReplaceNode(N, CurDAG->getMachineNode(MLAOpc, dl, N->getValueType(0), Ops)); 763 return true; 764 } 765 766 bool AArch64DAGToDAGISel::tryMULLV64LaneV128(unsigned IntNo, SDNode *N) { 767 SDLoc dl(N); 768 SDValue SMULLOp0; 769 SDValue SMULLOp1; 770 int LaneIdx; 771 772 if (!checkV64LaneV128(N->getOperand(1), N->getOperand(2), SMULLOp0, SMULLOp1, 773 LaneIdx)) 774 return false; 775 776 SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64); 777 778 SDValue Ops[] = { SMULLOp0, SMULLOp1, LaneIdxVal }; 779 780 unsigned SMULLOpc = ~0U; 781 782 if (IntNo == Intrinsic::aarch64_neon_smull) { 783 switch (N->getSimpleValueType(0).SimpleTy) { 784 default: 785 llvm_unreachable("Unrecognized SMULL."); 786 case MVT::v4i32: 787 SMULLOpc = AArch64::SMULLv4i16_indexed; 788 break; 789 case MVT::v2i64: 790 SMULLOpc = AArch64::SMULLv2i32_indexed; 791 break; 792 } 793 } else if (IntNo == Intrinsic::aarch64_neon_umull) { 794 switch (N->getSimpleValueType(0).SimpleTy) { 795 default: 796 llvm_unreachable("Unrecognized SMULL."); 797 case MVT::v4i32: 798 SMULLOpc = AArch64::UMULLv4i16_indexed; 799 break; 800 case MVT::v2i64: 801 SMULLOpc = AArch64::UMULLv2i32_indexed; 802 break; 803 } 804 } else 805 llvm_unreachable("Unrecognized intrinsic."); 806 807 ReplaceNode(N, CurDAG->getMachineNode(SMULLOpc, dl, N->getValueType(0), Ops)); 808 return true; 809 } 810 811 /// Instructions that accept extend modifiers like UXTW expect the register 812 /// being extended to be a GPR32, but the incoming DAG might be acting on a 813 /// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if 814 /// this is the case. 815 static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N) { 816 if (N.getValueType() == MVT::i32) 817 return N; 818 819 SDLoc dl(N); 820 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32); 821 MachineSDNode *Node = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 822 dl, MVT::i32, N, SubReg); 823 return SDValue(Node, 0); 824 } 825 826 // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N. 827 template<signed Low, signed High, signed Scale> 828 bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) { 829 if (!isa<ConstantSDNode>(N)) 830 return false; 831 832 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue(); 833 if ((MulImm % std::abs(Scale)) == 0) { 834 int64_t RDVLImm = MulImm / Scale; 835 if ((RDVLImm >= Low) && (RDVLImm <= High)) { 836 Imm = CurDAG->getTargetConstant(RDVLImm, SDLoc(N), MVT::i32); 837 return true; 838 } 839 } 840 841 return false; 842 } 843 844 /// SelectArithExtendedRegister - Select a "extended register" operand. This 845 /// operand folds in an extend followed by an optional left shift. 846 bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg, 847 SDValue &Shift) { 848 unsigned ShiftVal = 0; 849 AArch64_AM::ShiftExtendType Ext; 850 851 if (N.getOpcode() == ISD::SHL) { 852 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); 853 if (!CSD) 854 return false; 855 ShiftVal = CSD->getZExtValue(); 856 if (ShiftVal > 4) 857 return false; 858 859 Ext = getExtendTypeForNode(N.getOperand(0)); 860 if (Ext == AArch64_AM::InvalidShiftExtend) 861 return false; 862 863 Reg = N.getOperand(0).getOperand(0); 864 } else { 865 Ext = getExtendTypeForNode(N); 866 if (Ext == AArch64_AM::InvalidShiftExtend) 867 return false; 868 869 Reg = N.getOperand(0); 870 871 // Don't match if free 32-bit -> 64-bit zext can be used instead. Use the 872 // isDef32 as a heuristic for when the operand is likely to be a 32bit def. 873 auto isDef32 = [](SDValue N) { 874 unsigned Opc = N.getOpcode(); 875 return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG && 876 Opc != ISD::CopyFromReg && Opc != ISD::AssertSext && 877 Opc != ISD::AssertZext && Opc != ISD::AssertAlign && 878 Opc != ISD::FREEZE; 879 }; 880 if (Ext == AArch64_AM::UXTW && Reg->getValueType(0).getSizeInBits() == 32 && 881 isDef32(Reg)) 882 return false; 883 } 884 885 // AArch64 mandates that the RHS of the operation must use the smallest 886 // register class that could contain the size being extended from. Thus, 887 // if we're folding a (sext i8), we need the RHS to be a GPR32, even though 888 // there might not be an actual 32-bit value in the program. We can 889 // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here. 890 assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX); 891 Reg = narrowIfNeeded(CurDAG, Reg); 892 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N), 893 MVT::i32); 894 return isWorthFolding(N); 895 } 896 897 /// SelectArithUXTXRegister - Select a "UXTX register" operand. This 898 /// operand is refered by the instructions have SP operand 899 bool AArch64DAGToDAGISel::SelectArithUXTXRegister(SDValue N, SDValue &Reg, 900 SDValue &Shift) { 901 unsigned ShiftVal = 0; 902 AArch64_AM::ShiftExtendType Ext; 903 904 if (N.getOpcode() != ISD::SHL) 905 return false; 906 907 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); 908 if (!CSD) 909 return false; 910 ShiftVal = CSD->getZExtValue(); 911 if (ShiftVal > 4) 912 return false; 913 914 Ext = AArch64_AM::UXTX; 915 Reg = N.getOperand(0); 916 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N), 917 MVT::i32); 918 return isWorthFolding(N); 919 } 920 921 /// If there's a use of this ADDlow that's not itself a load/store then we'll 922 /// need to create a real ADD instruction from it anyway and there's no point in 923 /// folding it into the mem op. Theoretically, it shouldn't matter, but there's 924 /// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding 925 /// leads to duplicated ADRP instructions. 926 static bool isWorthFoldingADDlow(SDValue N) { 927 for (auto Use : N->uses()) { 928 if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE && 929 Use->getOpcode() != ISD::ATOMIC_LOAD && 930 Use->getOpcode() != ISD::ATOMIC_STORE) 931 return false; 932 933 // ldar and stlr have much more restrictive addressing modes (just a 934 // register). 935 if (isStrongerThanMonotonic(cast<MemSDNode>(Use)->getSuccessOrdering())) 936 return false; 937 } 938 939 return true; 940 } 941 942 /// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit 943 /// immediate" address. The "Size" argument is the size in bytes of the memory 944 /// reference, which determines the scale. 945 bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, 946 unsigned BW, unsigned Size, 947 SDValue &Base, 948 SDValue &OffImm) { 949 SDLoc dl(N); 950 const DataLayout &DL = CurDAG->getDataLayout(); 951 const TargetLowering *TLI = getTargetLowering(); 952 if (N.getOpcode() == ISD::FrameIndex) { 953 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 954 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 955 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); 956 return true; 957 } 958 959 // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed 960 // selected here doesn't support labels/immediates, only base+offset. 961 if (CurDAG->isBaseWithConstantOffset(N)) { 962 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 963 if (IsSignedImm) { 964 int64_t RHSC = RHS->getSExtValue(); 965 unsigned Scale = Log2_32(Size); 966 int64_t Range = 0x1LL << (BW - 1); 967 968 if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) && 969 RHSC < (Range << Scale)) { 970 Base = N.getOperand(0); 971 if (Base.getOpcode() == ISD::FrameIndex) { 972 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 973 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 974 } 975 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64); 976 return true; 977 } 978 } else { 979 // unsigned Immediate 980 uint64_t RHSC = RHS->getZExtValue(); 981 unsigned Scale = Log2_32(Size); 982 uint64_t Range = 0x1ULL << BW; 983 984 if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) { 985 Base = N.getOperand(0); 986 if (Base.getOpcode() == ISD::FrameIndex) { 987 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 988 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 989 } 990 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64); 991 return true; 992 } 993 } 994 } 995 } 996 // Base only. The address will be materialized into a register before 997 // the memory is accessed. 998 // add x0, Xbase, #offset 999 // stp x1, x2, [x0] 1000 Base = N; 1001 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); 1002 return true; 1003 } 1004 1005 /// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit 1006 /// immediate" address. The "Size" argument is the size in bytes of the memory 1007 /// reference, which determines the scale. 1008 bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size, 1009 SDValue &Base, SDValue &OffImm) { 1010 SDLoc dl(N); 1011 const DataLayout &DL = CurDAG->getDataLayout(); 1012 const TargetLowering *TLI = getTargetLowering(); 1013 if (N.getOpcode() == ISD::FrameIndex) { 1014 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 1015 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 1016 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); 1017 return true; 1018 } 1019 1020 if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) { 1021 GlobalAddressSDNode *GAN = 1022 dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode()); 1023 Base = N.getOperand(0); 1024 OffImm = N.getOperand(1); 1025 if (!GAN) 1026 return true; 1027 1028 if (GAN->getOffset() % Size == 0 && 1029 GAN->getGlobal()->getPointerAlignment(DL) >= Size) 1030 return true; 1031 } 1032 1033 if (CurDAG->isBaseWithConstantOffset(N)) { 1034 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1035 int64_t RHSC = (int64_t)RHS->getZExtValue(); 1036 unsigned Scale = Log2_32(Size); 1037 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) { 1038 Base = N.getOperand(0); 1039 if (Base.getOpcode() == ISD::FrameIndex) { 1040 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1041 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 1042 } 1043 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64); 1044 return true; 1045 } 1046 } 1047 } 1048 1049 // Before falling back to our general case, check if the unscaled 1050 // instructions can handle this. If so, that's preferable. 1051 if (SelectAddrModeUnscaled(N, Size, Base, OffImm)) 1052 return false; 1053 1054 // Base only. The address will be materialized into a register before 1055 // the memory is accessed. 1056 // add x0, Xbase, #offset 1057 // ldr x0, [x0] 1058 Base = N; 1059 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); 1060 return true; 1061 } 1062 1063 /// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit 1064 /// immediate" address. This should only match when there is an offset that 1065 /// is not valid for a scaled immediate addressing mode. The "Size" argument 1066 /// is the size in bytes of the memory reference, which is needed here to know 1067 /// what is valid for a scaled immediate. 1068 bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size, 1069 SDValue &Base, 1070 SDValue &OffImm) { 1071 if (!CurDAG->isBaseWithConstantOffset(N)) 1072 return false; 1073 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1074 int64_t RHSC = RHS->getSExtValue(); 1075 // If the offset is valid as a scaled immediate, don't match here. 1076 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && 1077 RHSC < (0x1000 << Log2_32(Size))) 1078 return false; 1079 if (RHSC >= -256 && RHSC < 256) { 1080 Base = N.getOperand(0); 1081 if (Base.getOpcode() == ISD::FrameIndex) { 1082 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1083 const TargetLowering *TLI = getTargetLowering(); 1084 Base = CurDAG->getTargetFrameIndex( 1085 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1086 } 1087 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64); 1088 return true; 1089 } 1090 } 1091 return false; 1092 } 1093 1094 static SDValue Widen(SelectionDAG *CurDAG, SDValue N) { 1095 SDLoc dl(N); 1096 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32); 1097 SDValue ImpDef = SDValue( 1098 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0); 1099 MachineSDNode *Node = CurDAG->getMachineNode( 1100 TargetOpcode::INSERT_SUBREG, dl, MVT::i64, ImpDef, N, SubReg); 1101 return SDValue(Node, 0); 1102 } 1103 1104 /// Check if the given SHL node (\p N), can be used to form an 1105 /// extended register for an addressing mode. 1106 bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size, 1107 bool WantExtend, SDValue &Offset, 1108 SDValue &SignExtend) { 1109 assert(N.getOpcode() == ISD::SHL && "Invalid opcode."); 1110 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); 1111 if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue()) 1112 return false; 1113 1114 SDLoc dl(N); 1115 if (WantExtend) { 1116 AArch64_AM::ShiftExtendType Ext = 1117 getExtendTypeForNode(N.getOperand(0), true); 1118 if (Ext == AArch64_AM::InvalidShiftExtend) 1119 return false; 1120 1121 Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0)); 1122 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl, 1123 MVT::i32); 1124 } else { 1125 Offset = N.getOperand(0); 1126 SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32); 1127 } 1128 1129 unsigned LegalShiftVal = Log2_32(Size); 1130 unsigned ShiftVal = CSD->getZExtValue(); 1131 1132 if (ShiftVal != 0 && ShiftVal != LegalShiftVal) 1133 return false; 1134 1135 return isWorthFolding(N); 1136 } 1137 1138 bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size, 1139 SDValue &Base, SDValue &Offset, 1140 SDValue &SignExtend, 1141 SDValue &DoShift) { 1142 if (N.getOpcode() != ISD::ADD) 1143 return false; 1144 SDValue LHS = N.getOperand(0); 1145 SDValue RHS = N.getOperand(1); 1146 SDLoc dl(N); 1147 1148 // We don't want to match immediate adds here, because they are better lowered 1149 // to the register-immediate addressing modes. 1150 if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS)) 1151 return false; 1152 1153 // Check if this particular node is reused in any non-memory related 1154 // operation. If yes, do not try to fold this node into the address 1155 // computation, since the computation will be kept. 1156 const SDNode *Node = N.getNode(); 1157 for (SDNode *UI : Node->uses()) { 1158 if (!isa<MemSDNode>(*UI)) 1159 return false; 1160 } 1161 1162 // Remember if it is worth folding N when it produces extended register. 1163 bool IsExtendedRegisterWorthFolding = isWorthFolding(N); 1164 1165 // Try to match a shifted extend on the RHS. 1166 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL && 1167 SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) { 1168 Base = LHS; 1169 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32); 1170 return true; 1171 } 1172 1173 // Try to match a shifted extend on the LHS. 1174 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL && 1175 SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) { 1176 Base = RHS; 1177 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32); 1178 return true; 1179 } 1180 1181 // There was no shift, whatever else we find. 1182 DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32); 1183 1184 AArch64_AM::ShiftExtendType Ext = AArch64_AM::InvalidShiftExtend; 1185 // Try to match an unshifted extend on the LHS. 1186 if (IsExtendedRegisterWorthFolding && 1187 (Ext = getExtendTypeForNode(LHS, true)) != 1188 AArch64_AM::InvalidShiftExtend) { 1189 Base = RHS; 1190 Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0)); 1191 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl, 1192 MVT::i32); 1193 if (isWorthFolding(LHS)) 1194 return true; 1195 } 1196 1197 // Try to match an unshifted extend on the RHS. 1198 if (IsExtendedRegisterWorthFolding && 1199 (Ext = getExtendTypeForNode(RHS, true)) != 1200 AArch64_AM::InvalidShiftExtend) { 1201 Base = LHS; 1202 Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0)); 1203 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl, 1204 MVT::i32); 1205 if (isWorthFolding(RHS)) 1206 return true; 1207 } 1208 1209 return false; 1210 } 1211 1212 // Check if the given immediate is preferred by ADD. If an immediate can be 1213 // encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be 1214 // encoded by one MOVZ, return true. 1215 static bool isPreferredADD(int64_t ImmOff) { 1216 // Constant in [0x0, 0xfff] can be encoded in ADD. 1217 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL) 1218 return true; 1219 // Check if it can be encoded in an "ADD LSL #12". 1220 if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL) 1221 // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant. 1222 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL && 1223 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL; 1224 return false; 1225 } 1226 1227 bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size, 1228 SDValue &Base, SDValue &Offset, 1229 SDValue &SignExtend, 1230 SDValue &DoShift) { 1231 if (N.getOpcode() != ISD::ADD) 1232 return false; 1233 SDValue LHS = N.getOperand(0); 1234 SDValue RHS = N.getOperand(1); 1235 SDLoc DL(N); 1236 1237 // Check if this particular node is reused in any non-memory related 1238 // operation. If yes, do not try to fold this node into the address 1239 // computation, since the computation will be kept. 1240 const SDNode *Node = N.getNode(); 1241 for (SDNode *UI : Node->uses()) { 1242 if (!isa<MemSDNode>(*UI)) 1243 return false; 1244 } 1245 1246 // Watch out if RHS is a wide immediate, it can not be selected into 1247 // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into 1248 // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate 1249 // instructions like: 1250 // MOV X0, WideImmediate 1251 // ADD X1, BaseReg, X0 1252 // LDR X2, [X1, 0] 1253 // For such situation, using [BaseReg, XReg] addressing mode can save one 1254 // ADD/SUB: 1255 // MOV X0, WideImmediate 1256 // LDR X2, [BaseReg, X0] 1257 if (isa<ConstantSDNode>(RHS)) { 1258 int64_t ImmOff = (int64_t)cast<ConstantSDNode>(RHS)->getZExtValue(); 1259 unsigned Scale = Log2_32(Size); 1260 // Skip the immediate can be selected by load/store addressing mode. 1261 // Also skip the immediate can be encoded by a single ADD (SUB is also 1262 // checked by using -ImmOff). 1263 if ((ImmOff % Size == 0 && ImmOff >= 0 && ImmOff < (0x1000 << Scale)) || 1264 isPreferredADD(ImmOff) || isPreferredADD(-ImmOff)) 1265 return false; 1266 1267 SDValue Ops[] = { RHS }; 1268 SDNode *MOVI = 1269 CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops); 1270 SDValue MOVIV = SDValue(MOVI, 0); 1271 // This ADD of two X register will be selected into [Reg+Reg] mode. 1272 N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV); 1273 } 1274 1275 // Remember if it is worth folding N when it produces extended register. 1276 bool IsExtendedRegisterWorthFolding = isWorthFolding(N); 1277 1278 // Try to match a shifted extend on the RHS. 1279 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL && 1280 SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) { 1281 Base = LHS; 1282 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32); 1283 return true; 1284 } 1285 1286 // Try to match a shifted extend on the LHS. 1287 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL && 1288 SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) { 1289 Base = RHS; 1290 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32); 1291 return true; 1292 } 1293 1294 // Match any non-shifted, non-extend, non-immediate add expression. 1295 Base = LHS; 1296 Offset = RHS; 1297 SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32); 1298 DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32); 1299 // Reg1 + Reg2 is free: no check needed. 1300 return true; 1301 } 1302 1303 SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) { 1304 static const unsigned RegClassIDs[] = { 1305 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID}; 1306 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1, 1307 AArch64::dsub2, AArch64::dsub3}; 1308 1309 return createTuple(Regs, RegClassIDs, SubRegs); 1310 } 1311 1312 SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) { 1313 static const unsigned RegClassIDs[] = { 1314 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID}; 1315 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1, 1316 AArch64::qsub2, AArch64::qsub3}; 1317 1318 return createTuple(Regs, RegClassIDs, SubRegs); 1319 } 1320 1321 SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef<SDValue> Regs) { 1322 static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID, 1323 AArch64::ZPR3RegClassID, 1324 AArch64::ZPR4RegClassID}; 1325 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1, 1326 AArch64::zsub2, AArch64::zsub3}; 1327 1328 return createTuple(Regs, RegClassIDs, SubRegs); 1329 } 1330 1331 SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs, 1332 const unsigned RegClassIDs[], 1333 const unsigned SubRegs[]) { 1334 // There's no special register-class for a vector-list of 1 element: it's just 1335 // a vector. 1336 if (Regs.size() == 1) 1337 return Regs[0]; 1338 1339 assert(Regs.size() >= 2 && Regs.size() <= 4); 1340 1341 SDLoc DL(Regs[0]); 1342 1343 SmallVector<SDValue, 4> Ops; 1344 1345 // First operand of REG_SEQUENCE is the desired RegClass. 1346 Ops.push_back( 1347 CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32)); 1348 1349 // Then we get pairs of source & subregister-position for the components. 1350 for (unsigned i = 0; i < Regs.size(); ++i) { 1351 Ops.push_back(Regs[i]); 1352 Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32)); 1353 } 1354 1355 SDNode *N = 1356 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops); 1357 return SDValue(N, 0); 1358 } 1359 1360 void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, 1361 bool isExt) { 1362 SDLoc dl(N); 1363 EVT VT = N->getValueType(0); 1364 1365 unsigned ExtOff = isExt; 1366 1367 // Form a REG_SEQUENCE to force register allocation. 1368 unsigned Vec0Off = ExtOff + 1; 1369 SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off, 1370 N->op_begin() + Vec0Off + NumVecs); 1371 SDValue RegSeq = createQTuple(Regs); 1372 1373 SmallVector<SDValue, 6> Ops; 1374 if (isExt) 1375 Ops.push_back(N->getOperand(1)); 1376 Ops.push_back(RegSeq); 1377 Ops.push_back(N->getOperand(NumVecs + ExtOff + 1)); 1378 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops)); 1379 } 1380 1381 bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) { 1382 LoadSDNode *LD = cast<LoadSDNode>(N); 1383 if (LD->isUnindexed()) 1384 return false; 1385 EVT VT = LD->getMemoryVT(); 1386 EVT DstVT = N->getValueType(0); 1387 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1388 bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC; 1389 1390 // We're not doing validity checking here. That was done when checking 1391 // if we should mark the load as indexed or not. We're just selecting 1392 // the right instruction. 1393 unsigned Opcode = 0; 1394 1395 ISD::LoadExtType ExtType = LD->getExtensionType(); 1396 bool InsertTo64 = false; 1397 if (VT == MVT::i64) 1398 Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost; 1399 else if (VT == MVT::i32) { 1400 if (ExtType == ISD::NON_EXTLOAD) 1401 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost; 1402 else if (ExtType == ISD::SEXTLOAD) 1403 Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost; 1404 else { 1405 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost; 1406 InsertTo64 = true; 1407 // The result of the load is only i32. It's the subreg_to_reg that makes 1408 // it into an i64. 1409 DstVT = MVT::i32; 1410 } 1411 } else if (VT == MVT::i16) { 1412 if (ExtType == ISD::SEXTLOAD) { 1413 if (DstVT == MVT::i64) 1414 Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost; 1415 else 1416 Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost; 1417 } else { 1418 Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost; 1419 InsertTo64 = DstVT == MVT::i64; 1420 // The result of the load is only i32. It's the subreg_to_reg that makes 1421 // it into an i64. 1422 DstVT = MVT::i32; 1423 } 1424 } else if (VT == MVT::i8) { 1425 if (ExtType == ISD::SEXTLOAD) { 1426 if (DstVT == MVT::i64) 1427 Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost; 1428 else 1429 Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost; 1430 } else { 1431 Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost; 1432 InsertTo64 = DstVT == MVT::i64; 1433 // The result of the load is only i32. It's the subreg_to_reg that makes 1434 // it into an i64. 1435 DstVT = MVT::i32; 1436 } 1437 } else if (VT == MVT::f16) { 1438 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost; 1439 } else if (VT == MVT::bf16) { 1440 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost; 1441 } else if (VT == MVT::f32) { 1442 Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost; 1443 } else if (VT == MVT::f64 || VT.is64BitVector()) { 1444 Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost; 1445 } else if (VT.is128BitVector()) { 1446 Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost; 1447 } else 1448 return false; 1449 SDValue Chain = LD->getChain(); 1450 SDValue Base = LD->getBasePtr(); 1451 ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset()); 1452 int OffsetVal = (int)OffsetOp->getZExtValue(); 1453 SDLoc dl(N); 1454 SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64); 1455 SDValue Ops[] = { Base, Offset, Chain }; 1456 SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT, 1457 MVT::Other, Ops); 1458 1459 // Transfer memoperands. 1460 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); 1461 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Res), {MemOp}); 1462 1463 // Either way, we're replacing the node, so tell the caller that. 1464 SDValue LoadedVal = SDValue(Res, 1); 1465 if (InsertTo64) { 1466 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32); 1467 LoadedVal = 1468 SDValue(CurDAG->getMachineNode( 1469 AArch64::SUBREG_TO_REG, dl, MVT::i64, 1470 CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal, 1471 SubReg), 1472 0); 1473 } 1474 1475 ReplaceUses(SDValue(N, 0), LoadedVal); 1476 ReplaceUses(SDValue(N, 1), SDValue(Res, 0)); 1477 ReplaceUses(SDValue(N, 2), SDValue(Res, 2)); 1478 CurDAG->RemoveDeadNode(N); 1479 return true; 1480 } 1481 1482 void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc, 1483 unsigned SubRegIdx) { 1484 SDLoc dl(N); 1485 EVT VT = N->getValueType(0); 1486 SDValue Chain = N->getOperand(0); 1487 1488 SDValue Ops[] = {N->getOperand(2), // Mem operand; 1489 Chain}; 1490 1491 const EVT ResTys[] = {MVT::Untyped, MVT::Other}; 1492 1493 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1494 SDValue SuperReg = SDValue(Ld, 0); 1495 for (unsigned i = 0; i < NumVecs; ++i) 1496 ReplaceUses(SDValue(N, i), 1497 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg)); 1498 1499 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1)); 1500 1501 // Transfer memoperands. In the case of AArch64::LD64B, there won't be one, 1502 // because it's too simple to have needed special treatment during lowering. 1503 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(N)) { 1504 MachineMemOperand *MemOp = MemIntr->getMemOperand(); 1505 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp}); 1506 } 1507 1508 CurDAG->RemoveDeadNode(N); 1509 } 1510 1511 void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs, 1512 unsigned Opc, unsigned SubRegIdx) { 1513 SDLoc dl(N); 1514 EVT VT = N->getValueType(0); 1515 SDValue Chain = N->getOperand(0); 1516 1517 SDValue Ops[] = {N->getOperand(1), // Mem operand 1518 N->getOperand(2), // Incremental 1519 Chain}; 1520 1521 const EVT ResTys[] = {MVT::i64, // Type of the write back register 1522 MVT::Untyped, MVT::Other}; 1523 1524 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1525 1526 // Update uses of write back register 1527 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0)); 1528 1529 // Update uses of vector list 1530 SDValue SuperReg = SDValue(Ld, 1); 1531 if (NumVecs == 1) 1532 ReplaceUses(SDValue(N, 0), SuperReg); 1533 else 1534 for (unsigned i = 0; i < NumVecs; ++i) 1535 ReplaceUses(SDValue(N, i), 1536 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg)); 1537 1538 // Update the chain 1539 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2)); 1540 CurDAG->RemoveDeadNode(N); 1541 } 1542 1543 /// Optimize \param OldBase and \param OldOffset selecting the best addressing 1544 /// mode. Returns a tuple consisting of an Opcode, an SDValue representing the 1545 /// new Base and an SDValue representing the new offset. 1546 std::tuple<unsigned, SDValue, SDValue> 1547 AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, 1548 unsigned Opc_ri, 1549 const SDValue &OldBase, 1550 const SDValue &OldOffset, 1551 unsigned Scale) { 1552 SDValue NewBase = OldBase; 1553 SDValue NewOffset = OldOffset; 1554 // Detect a possible Reg+Imm addressing mode. 1555 const bool IsRegImm = SelectAddrModeIndexedSVE</*Min=*/-8, /*Max=*/7>( 1556 N, OldBase, NewBase, NewOffset); 1557 1558 // Detect a possible reg+reg addressing mode, but only if we haven't already 1559 // detected a Reg+Imm one. 1560 const bool IsRegReg = 1561 !IsRegImm && SelectSVERegRegAddrMode(OldBase, Scale, NewBase, NewOffset); 1562 1563 // Select the instruction. 1564 return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset); 1565 } 1566 1567 void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs, 1568 unsigned Scale, unsigned Opc_ri, 1569 unsigned Opc_rr, bool IsIntr) { 1570 assert(Scale < 4 && "Invalid scaling value."); 1571 SDLoc DL(N); 1572 EVT VT = N->getValueType(0); 1573 SDValue Chain = N->getOperand(0); 1574 1575 // Optimize addressing mode. 1576 SDValue Base, Offset; 1577 unsigned Opc; 1578 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore( 1579 N, Opc_rr, Opc_ri, N->getOperand(IsIntr ? 3 : 2), 1580 CurDAG->getTargetConstant(0, DL, MVT::i64), Scale); 1581 1582 SDValue Ops[] = {N->getOperand(IsIntr ? 2 : 1), // Predicate 1583 Base, // Memory operand 1584 Offset, Chain}; 1585 1586 const EVT ResTys[] = {MVT::Untyped, MVT::Other}; 1587 1588 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops); 1589 SDValue SuperReg = SDValue(Load, 0); 1590 for (unsigned i = 0; i < NumVecs; ++i) 1591 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg( 1592 AArch64::zsub0 + i, DL, VT, SuperReg)); 1593 1594 // Copy chain 1595 unsigned ChainIdx = NumVecs; 1596 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1)); 1597 CurDAG->RemoveDeadNode(N); 1598 } 1599 1600 void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs, 1601 unsigned Opc) { 1602 SDLoc dl(N); 1603 EVT VT = N->getOperand(2)->getValueType(0); 1604 1605 // Form a REG_SEQUENCE to force register allocation. 1606 bool Is128Bit = VT.getSizeInBits() == 128; 1607 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); 1608 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs); 1609 1610 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)}; 1611 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops); 1612 1613 // Transfer memoperands. 1614 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 1615 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 1616 1617 ReplaceNode(N, St); 1618 } 1619 1620 void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs, 1621 unsigned Scale, unsigned Opc_rr, 1622 unsigned Opc_ri) { 1623 SDLoc dl(N); 1624 1625 // Form a REG_SEQUENCE to force register allocation. 1626 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); 1627 SDValue RegSeq = createZTuple(Regs); 1628 1629 // Optimize addressing mode. 1630 unsigned Opc; 1631 SDValue Offset, Base; 1632 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore( 1633 N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3), 1634 CurDAG->getTargetConstant(0, dl, MVT::i64), Scale); 1635 1636 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), // predicate 1637 Base, // address 1638 Offset, // offset 1639 N->getOperand(0)}; // chain 1640 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops); 1641 1642 ReplaceNode(N, St); 1643 } 1644 1645 bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, 1646 SDValue &OffImm) { 1647 SDLoc dl(N); 1648 const DataLayout &DL = CurDAG->getDataLayout(); 1649 const TargetLowering *TLI = getTargetLowering(); 1650 1651 // Try to match it for the frame address 1652 if (auto FINode = dyn_cast<FrameIndexSDNode>(N)) { 1653 int FI = FINode->getIndex(); 1654 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 1655 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); 1656 return true; 1657 } 1658 1659 return false; 1660 } 1661 1662 void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs, 1663 unsigned Opc) { 1664 SDLoc dl(N); 1665 EVT VT = N->getOperand(2)->getValueType(0); 1666 const EVT ResTys[] = {MVT::i64, // Type of the write back register 1667 MVT::Other}; // Type for the Chain 1668 1669 // Form a REG_SEQUENCE to force register allocation. 1670 bool Is128Bit = VT.getSizeInBits() == 128; 1671 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); 1672 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs); 1673 1674 SDValue Ops[] = {RegSeq, 1675 N->getOperand(NumVecs + 1), // base register 1676 N->getOperand(NumVecs + 2), // Incremental 1677 N->getOperand(0)}; // Chain 1678 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1679 1680 ReplaceNode(N, St); 1681 } 1682 1683 namespace { 1684 /// WidenVector - Given a value in the V64 register class, produce the 1685 /// equivalent value in the V128 register class. 1686 class WidenVector { 1687 SelectionDAG &DAG; 1688 1689 public: 1690 WidenVector(SelectionDAG &DAG) : DAG(DAG) {} 1691 1692 SDValue operator()(SDValue V64Reg) { 1693 EVT VT = V64Reg.getValueType(); 1694 unsigned NarrowSize = VT.getVectorNumElements(); 1695 MVT EltTy = VT.getVectorElementType().getSimpleVT(); 1696 MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize); 1697 SDLoc DL(V64Reg); 1698 1699 SDValue Undef = 1700 SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0); 1701 return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg); 1702 } 1703 }; 1704 } // namespace 1705 1706 /// NarrowVector - Given a value in the V128 register class, produce the 1707 /// equivalent value in the V64 register class. 1708 static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) { 1709 EVT VT = V128Reg.getValueType(); 1710 unsigned WideSize = VT.getVectorNumElements(); 1711 MVT EltTy = VT.getVectorElementType().getSimpleVT(); 1712 MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2); 1713 1714 return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy, 1715 V128Reg); 1716 } 1717 1718 void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs, 1719 unsigned Opc) { 1720 SDLoc dl(N); 1721 EVT VT = N->getValueType(0); 1722 bool Narrow = VT.getSizeInBits() == 64; 1723 1724 // Form a REG_SEQUENCE to force register allocation. 1725 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); 1726 1727 if (Narrow) 1728 transform(Regs, Regs.begin(), 1729 WidenVector(*CurDAG)); 1730 1731 SDValue RegSeq = createQTuple(Regs); 1732 1733 const EVT ResTys[] = {MVT::Untyped, MVT::Other}; 1734 1735 unsigned LaneNo = 1736 cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue(); 1737 1738 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64), 1739 N->getOperand(NumVecs + 3), N->getOperand(0)}; 1740 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1741 SDValue SuperReg = SDValue(Ld, 0); 1742 1743 EVT WideVT = RegSeq.getOperand(1)->getValueType(0); 1744 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, 1745 AArch64::qsub2, AArch64::qsub3 }; 1746 for (unsigned i = 0; i < NumVecs; ++i) { 1747 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg); 1748 if (Narrow) 1749 NV = NarrowVector(NV, *CurDAG); 1750 ReplaceUses(SDValue(N, i), NV); 1751 } 1752 1753 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1)); 1754 CurDAG->RemoveDeadNode(N); 1755 } 1756 1757 void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs, 1758 unsigned Opc) { 1759 SDLoc dl(N); 1760 EVT VT = N->getValueType(0); 1761 bool Narrow = VT.getSizeInBits() == 64; 1762 1763 // Form a REG_SEQUENCE to force register allocation. 1764 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); 1765 1766 if (Narrow) 1767 transform(Regs, Regs.begin(), 1768 WidenVector(*CurDAG)); 1769 1770 SDValue RegSeq = createQTuple(Regs); 1771 1772 const EVT ResTys[] = {MVT::i64, // Type of the write back register 1773 RegSeq->getValueType(0), MVT::Other}; 1774 1775 unsigned LaneNo = 1776 cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue(); 1777 1778 SDValue Ops[] = {RegSeq, 1779 CurDAG->getTargetConstant(LaneNo, dl, 1780 MVT::i64), // Lane Number 1781 N->getOperand(NumVecs + 2), // Base register 1782 N->getOperand(NumVecs + 3), // Incremental 1783 N->getOperand(0)}; 1784 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1785 1786 // Update uses of the write back register 1787 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0)); 1788 1789 // Update uses of the vector list 1790 SDValue SuperReg = SDValue(Ld, 1); 1791 if (NumVecs == 1) { 1792 ReplaceUses(SDValue(N, 0), 1793 Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg); 1794 } else { 1795 EVT WideVT = RegSeq.getOperand(1)->getValueType(0); 1796 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, 1797 AArch64::qsub2, AArch64::qsub3 }; 1798 for (unsigned i = 0; i < NumVecs; ++i) { 1799 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, 1800 SuperReg); 1801 if (Narrow) 1802 NV = NarrowVector(NV, *CurDAG); 1803 ReplaceUses(SDValue(N, i), NV); 1804 } 1805 } 1806 1807 // Update the Chain 1808 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2)); 1809 CurDAG->RemoveDeadNode(N); 1810 } 1811 1812 void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs, 1813 unsigned Opc) { 1814 SDLoc dl(N); 1815 EVT VT = N->getOperand(2)->getValueType(0); 1816 bool Narrow = VT.getSizeInBits() == 64; 1817 1818 // Form a REG_SEQUENCE to force register allocation. 1819 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); 1820 1821 if (Narrow) 1822 transform(Regs, Regs.begin(), 1823 WidenVector(*CurDAG)); 1824 1825 SDValue RegSeq = createQTuple(Regs); 1826 1827 unsigned LaneNo = 1828 cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue(); 1829 1830 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64), 1831 N->getOperand(NumVecs + 3), N->getOperand(0)}; 1832 SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops); 1833 1834 // Transfer memoperands. 1835 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 1836 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 1837 1838 ReplaceNode(N, St); 1839 } 1840 1841 void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs, 1842 unsigned Opc) { 1843 SDLoc dl(N); 1844 EVT VT = N->getOperand(2)->getValueType(0); 1845 bool Narrow = VT.getSizeInBits() == 64; 1846 1847 // Form a REG_SEQUENCE to force register allocation. 1848 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); 1849 1850 if (Narrow) 1851 transform(Regs, Regs.begin(), 1852 WidenVector(*CurDAG)); 1853 1854 SDValue RegSeq = createQTuple(Regs); 1855 1856 const EVT ResTys[] = {MVT::i64, // Type of the write back register 1857 MVT::Other}; 1858 1859 unsigned LaneNo = 1860 cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue(); 1861 1862 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64), 1863 N->getOperand(NumVecs + 2), // Base Register 1864 N->getOperand(NumVecs + 3), // Incremental 1865 N->getOperand(0)}; 1866 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1867 1868 // Transfer memoperands. 1869 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 1870 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 1871 1872 ReplaceNode(N, St); 1873 } 1874 1875 static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, 1876 unsigned &Opc, SDValue &Opd0, 1877 unsigned &LSB, unsigned &MSB, 1878 unsigned NumberOfIgnoredLowBits, 1879 bool BiggerPattern) { 1880 assert(N->getOpcode() == ISD::AND && 1881 "N must be a AND operation to call this function"); 1882 1883 EVT VT = N->getValueType(0); 1884 1885 // Here we can test the type of VT and return false when the type does not 1886 // match, but since it is done prior to that call in the current context 1887 // we turned that into an assert to avoid redundant code. 1888 assert((VT == MVT::i32 || VT == MVT::i64) && 1889 "Type checking must have been done before calling this function"); 1890 1891 // FIXME: simplify-demanded-bits in DAGCombine will probably have 1892 // changed the AND node to a 32-bit mask operation. We'll have to 1893 // undo that as part of the transform here if we want to catch all 1894 // the opportunities. 1895 // Currently the NumberOfIgnoredLowBits argument helps to recover 1896 // form these situations when matching bigger pattern (bitfield insert). 1897 1898 // For unsigned extracts, check for a shift right and mask 1899 uint64_t AndImm = 0; 1900 if (!isOpcWithIntImmediate(N, ISD::AND, AndImm)) 1901 return false; 1902 1903 const SDNode *Op0 = N->getOperand(0).getNode(); 1904 1905 // Because of simplify-demanded-bits in DAGCombine, the mask may have been 1906 // simplified. Try to undo that 1907 AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits); 1908 1909 // The immediate is a mask of the low bits iff imm & (imm+1) == 0 1910 if (AndImm & (AndImm + 1)) 1911 return false; 1912 1913 bool ClampMSB = false; 1914 uint64_t SrlImm = 0; 1915 // Handle the SRL + ANY_EXTEND case. 1916 if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND && 1917 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) { 1918 // Extend the incoming operand of the SRL to 64-bit. 1919 Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0)); 1920 // Make sure to clamp the MSB so that we preserve the semantics of the 1921 // original operations. 1922 ClampMSB = true; 1923 } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE && 1924 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, 1925 SrlImm)) { 1926 // If the shift result was truncated, we can still combine them. 1927 Opd0 = Op0->getOperand(0).getOperand(0); 1928 1929 // Use the type of SRL node. 1930 VT = Opd0->getValueType(0); 1931 } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) { 1932 Opd0 = Op0->getOperand(0); 1933 ClampMSB = (VT == MVT::i32); 1934 } else if (BiggerPattern) { 1935 // Let's pretend a 0 shift right has been performed. 1936 // The resulting code will be at least as good as the original one 1937 // plus it may expose more opportunities for bitfield insert pattern. 1938 // FIXME: Currently we limit this to the bigger pattern, because 1939 // some optimizations expect AND and not UBFM. 1940 Opd0 = N->getOperand(0); 1941 } else 1942 return false; 1943 1944 // Bail out on large immediates. This happens when no proper 1945 // combining/constant folding was performed. 1946 if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) { 1947 LLVM_DEBUG( 1948 (dbgs() << N 1949 << ": Found large shift immediate, this should not happen\n")); 1950 return false; 1951 } 1952 1953 LSB = SrlImm; 1954 MSB = SrlImm + (VT == MVT::i32 ? countTrailingOnes<uint32_t>(AndImm) 1955 : countTrailingOnes<uint64_t>(AndImm)) - 1956 1; 1957 if (ClampMSB) 1958 // Since we're moving the extend before the right shift operation, we need 1959 // to clamp the MSB to make sure we don't shift in undefined bits instead of 1960 // the zeros which would get shifted in with the original right shift 1961 // operation. 1962 MSB = MSB > 31 ? 31 : MSB; 1963 1964 Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri; 1965 return true; 1966 } 1967 1968 static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc, 1969 SDValue &Opd0, unsigned &Immr, 1970 unsigned &Imms) { 1971 assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG); 1972 1973 EVT VT = N->getValueType(0); 1974 unsigned BitWidth = VT.getSizeInBits(); 1975 assert((VT == MVT::i32 || VT == MVT::i64) && 1976 "Type checking must have been done before calling this function"); 1977 1978 SDValue Op = N->getOperand(0); 1979 if (Op->getOpcode() == ISD::TRUNCATE) { 1980 Op = Op->getOperand(0); 1981 VT = Op->getValueType(0); 1982 BitWidth = VT.getSizeInBits(); 1983 } 1984 1985 uint64_t ShiftImm; 1986 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) && 1987 !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm)) 1988 return false; 1989 1990 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits(); 1991 if (ShiftImm + Width > BitWidth) 1992 return false; 1993 1994 Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri; 1995 Opd0 = Op.getOperand(0); 1996 Immr = ShiftImm; 1997 Imms = ShiftImm + Width - 1; 1998 return true; 1999 } 2000 2001 static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc, 2002 SDValue &Opd0, unsigned &LSB, 2003 unsigned &MSB) { 2004 // We are looking for the following pattern which basically extracts several 2005 // continuous bits from the source value and places it from the LSB of the 2006 // destination value, all other bits of the destination value or set to zero: 2007 // 2008 // Value2 = AND Value, MaskImm 2009 // SRL Value2, ShiftImm 2010 // 2011 // with MaskImm >> ShiftImm to search for the bit width. 2012 // 2013 // This gets selected into a single UBFM: 2014 // 2015 // UBFM Value, ShiftImm, BitWide + SrlImm -1 2016 // 2017 2018 if (N->getOpcode() != ISD::SRL) 2019 return false; 2020 2021 uint64_t AndMask = 0; 2022 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask)) 2023 return false; 2024 2025 Opd0 = N->getOperand(0).getOperand(0); 2026 2027 uint64_t SrlImm = 0; 2028 if (!isIntImmediate(N->getOperand(1), SrlImm)) 2029 return false; 2030 2031 // Check whether we really have several bits extract here. 2032 unsigned BitWide = 64 - countLeadingOnes(~(AndMask >> SrlImm)); 2033 if (BitWide && isMask_64(AndMask >> SrlImm)) { 2034 if (N->getValueType(0) == MVT::i32) 2035 Opc = AArch64::UBFMWri; 2036 else 2037 Opc = AArch64::UBFMXri; 2038 2039 LSB = SrlImm; 2040 MSB = BitWide + SrlImm - 1; 2041 return true; 2042 } 2043 2044 return false; 2045 } 2046 2047 static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, 2048 unsigned &Immr, unsigned &Imms, 2049 bool BiggerPattern) { 2050 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) && 2051 "N must be a SHR/SRA operation to call this function"); 2052 2053 EVT VT = N->getValueType(0); 2054 2055 // Here we can test the type of VT and return false when the type does not 2056 // match, but since it is done prior to that call in the current context 2057 // we turned that into an assert to avoid redundant code. 2058 assert((VT == MVT::i32 || VT == MVT::i64) && 2059 "Type checking must have been done before calling this function"); 2060 2061 // Check for AND + SRL doing several bits extract. 2062 if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms)) 2063 return true; 2064 2065 // We're looking for a shift of a shift. 2066 uint64_t ShlImm = 0; 2067 uint64_t TruncBits = 0; 2068 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) { 2069 Opd0 = N->getOperand(0).getOperand(0); 2070 } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL && 2071 N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) { 2072 // We are looking for a shift of truncate. Truncate from i64 to i32 could 2073 // be considered as setting high 32 bits as zero. Our strategy here is to 2074 // always generate 64bit UBFM. This consistency will help the CSE pass 2075 // later find more redundancy. 2076 Opd0 = N->getOperand(0).getOperand(0); 2077 TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits(); 2078 VT = Opd0.getValueType(); 2079 assert(VT == MVT::i64 && "the promoted type should be i64"); 2080 } else if (BiggerPattern) { 2081 // Let's pretend a 0 shift left has been performed. 2082 // FIXME: Currently we limit this to the bigger pattern case, 2083 // because some optimizations expect AND and not UBFM 2084 Opd0 = N->getOperand(0); 2085 } else 2086 return false; 2087 2088 // Missing combines/constant folding may have left us with strange 2089 // constants. 2090 if (ShlImm >= VT.getSizeInBits()) { 2091 LLVM_DEBUG( 2092 (dbgs() << N 2093 << ": Found large shift immediate, this should not happen\n")); 2094 return false; 2095 } 2096 2097 uint64_t SrlImm = 0; 2098 if (!isIntImmediate(N->getOperand(1), SrlImm)) 2099 return false; 2100 2101 assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() && 2102 "bad amount in shift node!"); 2103 int immr = SrlImm - ShlImm; 2104 Immr = immr < 0 ? immr + VT.getSizeInBits() : immr; 2105 Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1; 2106 // SRA requires a signed extraction 2107 if (VT == MVT::i32) 2108 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri; 2109 else 2110 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri; 2111 return true; 2112 } 2113 2114 bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) { 2115 assert(N->getOpcode() == ISD::SIGN_EXTEND); 2116 2117 EVT VT = N->getValueType(0); 2118 EVT NarrowVT = N->getOperand(0)->getValueType(0); 2119 if (VT != MVT::i64 || NarrowVT != MVT::i32) 2120 return false; 2121 2122 uint64_t ShiftImm; 2123 SDValue Op = N->getOperand(0); 2124 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm)) 2125 return false; 2126 2127 SDLoc dl(N); 2128 // Extend the incoming operand of the shift to 64-bits. 2129 SDValue Opd0 = Widen(CurDAG, Op.getOperand(0)); 2130 unsigned Immr = ShiftImm; 2131 unsigned Imms = NarrowVT.getSizeInBits() - 1; 2132 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT), 2133 CurDAG->getTargetConstant(Imms, dl, VT)}; 2134 CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops); 2135 return true; 2136 } 2137 2138 /// Try to form fcvtl2 instructions from a floating-point extend of a high-half 2139 /// extract of a subvector. 2140 bool AArch64DAGToDAGISel::tryHighFPExt(SDNode *N) { 2141 assert(N->getOpcode() == ISD::FP_EXTEND); 2142 2143 // There are 2 forms of fcvtl2 - extend to double or extend to float. 2144 SDValue Extract = N->getOperand(0); 2145 EVT VT = N->getValueType(0); 2146 EVT NarrowVT = Extract.getValueType(); 2147 if ((VT != MVT::v2f64 || NarrowVT != MVT::v2f32) && 2148 (VT != MVT::v4f32 || NarrowVT != MVT::v4f16)) 2149 return false; 2150 2151 // Optionally look past a bitcast. 2152 Extract = peekThroughBitcasts(Extract); 2153 if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR) 2154 return false; 2155 2156 // Match extract from start of high half index. 2157 // Example: v8i16 -> v4i16 means the extract must begin at index 4. 2158 unsigned ExtractIndex = Extract.getConstantOperandVal(1); 2159 if (ExtractIndex != Extract.getValueType().getVectorNumElements()) 2160 return false; 2161 2162 auto Opcode = VT == MVT::v2f64 ? AArch64::FCVTLv4i32 : AArch64::FCVTLv8i16; 2163 CurDAG->SelectNodeTo(N, Opcode, VT, Extract.getOperand(0)); 2164 return true; 2165 } 2166 2167 static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, 2168 SDValue &Opd0, unsigned &Immr, unsigned &Imms, 2169 unsigned NumberOfIgnoredLowBits = 0, 2170 bool BiggerPattern = false) { 2171 if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64) 2172 return false; 2173 2174 switch (N->getOpcode()) { 2175 default: 2176 if (!N->isMachineOpcode()) 2177 return false; 2178 break; 2179 case ISD::AND: 2180 return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms, 2181 NumberOfIgnoredLowBits, BiggerPattern); 2182 case ISD::SRL: 2183 case ISD::SRA: 2184 return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern); 2185 2186 case ISD::SIGN_EXTEND_INREG: 2187 return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms); 2188 } 2189 2190 unsigned NOpc = N->getMachineOpcode(); 2191 switch (NOpc) { 2192 default: 2193 return false; 2194 case AArch64::SBFMWri: 2195 case AArch64::UBFMWri: 2196 case AArch64::SBFMXri: 2197 case AArch64::UBFMXri: 2198 Opc = NOpc; 2199 Opd0 = N->getOperand(0); 2200 Immr = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue(); 2201 Imms = cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue(); 2202 return true; 2203 } 2204 // Unreachable 2205 return false; 2206 } 2207 2208 bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) { 2209 unsigned Opc, Immr, Imms; 2210 SDValue Opd0; 2211 if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms)) 2212 return false; 2213 2214 EVT VT = N->getValueType(0); 2215 SDLoc dl(N); 2216 2217 // If the bit extract operation is 64bit but the original type is 32bit, we 2218 // need to add one EXTRACT_SUBREG. 2219 if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) { 2220 SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64), 2221 CurDAG->getTargetConstant(Imms, dl, MVT::i64)}; 2222 2223 SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64); 2224 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32); 2225 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, 2226 MVT::i32, SDValue(BFM, 0), SubReg)); 2227 return true; 2228 } 2229 2230 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT), 2231 CurDAG->getTargetConstant(Imms, dl, VT)}; 2232 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 2233 return true; 2234 } 2235 2236 /// Does DstMask form a complementary pair with the mask provided by 2237 /// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking, 2238 /// this asks whether DstMask zeroes precisely those bits that will be set by 2239 /// the other half. 2240 static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted, 2241 unsigned NumberOfIgnoredHighBits, EVT VT) { 2242 assert((VT == MVT::i32 || VT == MVT::i64) && 2243 "i32 or i64 mask type expected!"); 2244 unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits; 2245 2246 APInt SignificantDstMask = APInt(BitWidth, DstMask); 2247 APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth); 2248 2249 return (SignificantDstMask & SignificantBitsToBeInserted) == 0 && 2250 (SignificantDstMask | SignificantBitsToBeInserted).isAllOnes(); 2251 } 2252 2253 // Look for bits that will be useful for later uses. 2254 // A bit is consider useless as soon as it is dropped and never used 2255 // before it as been dropped. 2256 // E.g., looking for useful bit of x 2257 // 1. y = x & 0x7 2258 // 2. z = y >> 2 2259 // After #1, x useful bits are 0x7, then the useful bits of x, live through 2260 // y. 2261 // After #2, the useful bits of x are 0x4. 2262 // However, if x is used on an unpredicatable instruction, then all its bits 2263 // are useful. 2264 // E.g. 2265 // 1. y = x & 0x7 2266 // 2. z = y >> 2 2267 // 3. str x, [@x] 2268 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0); 2269 2270 static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits, 2271 unsigned Depth) { 2272 uint64_t Imm = 2273 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue(); 2274 Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth()); 2275 UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm); 2276 getUsefulBits(Op, UsefulBits, Depth + 1); 2277 } 2278 2279 static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, 2280 uint64_t Imm, uint64_t MSB, 2281 unsigned Depth) { 2282 // inherit the bitwidth value 2283 APInt OpUsefulBits(UsefulBits); 2284 OpUsefulBits = 1; 2285 2286 if (MSB >= Imm) { 2287 OpUsefulBits <<= MSB - Imm + 1; 2288 --OpUsefulBits; 2289 // The interesting part will be in the lower part of the result 2290 getUsefulBits(Op, OpUsefulBits, Depth + 1); 2291 // The interesting part was starting at Imm in the argument 2292 OpUsefulBits <<= Imm; 2293 } else { 2294 OpUsefulBits <<= MSB + 1; 2295 --OpUsefulBits; 2296 // The interesting part will be shifted in the result 2297 OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm; 2298 getUsefulBits(Op, OpUsefulBits, Depth + 1); 2299 // The interesting part was at zero in the argument 2300 OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm); 2301 } 2302 2303 UsefulBits &= OpUsefulBits; 2304 } 2305 2306 static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits, 2307 unsigned Depth) { 2308 uint64_t Imm = 2309 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue(); 2310 uint64_t MSB = 2311 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue(); 2312 2313 getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth); 2314 } 2315 2316 static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, 2317 unsigned Depth) { 2318 uint64_t ShiftTypeAndValue = 2319 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue(); 2320 APInt Mask(UsefulBits); 2321 Mask.clearAllBits(); 2322 Mask.flipAllBits(); 2323 2324 if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) { 2325 // Shift Left 2326 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue); 2327 Mask <<= ShiftAmt; 2328 getUsefulBits(Op, Mask, Depth + 1); 2329 Mask.lshrInPlace(ShiftAmt); 2330 } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) { 2331 // Shift Right 2332 // We do not handle AArch64_AM::ASR, because the sign will change the 2333 // number of useful bits 2334 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue); 2335 Mask.lshrInPlace(ShiftAmt); 2336 getUsefulBits(Op, Mask, Depth + 1); 2337 Mask <<= ShiftAmt; 2338 } else 2339 return; 2340 2341 UsefulBits &= Mask; 2342 } 2343 2344 static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, 2345 unsigned Depth) { 2346 uint64_t Imm = 2347 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue(); 2348 uint64_t MSB = 2349 cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue(); 2350 2351 APInt OpUsefulBits(UsefulBits); 2352 OpUsefulBits = 1; 2353 2354 APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0); 2355 ResultUsefulBits.flipAllBits(); 2356 APInt Mask(UsefulBits.getBitWidth(), 0); 2357 2358 getUsefulBits(Op, ResultUsefulBits, Depth + 1); 2359 2360 if (MSB >= Imm) { 2361 // The instruction is a BFXIL. 2362 uint64_t Width = MSB - Imm + 1; 2363 uint64_t LSB = Imm; 2364 2365 OpUsefulBits <<= Width; 2366 --OpUsefulBits; 2367 2368 if (Op.getOperand(1) == Orig) { 2369 // Copy the low bits from the result to bits starting from LSB. 2370 Mask = ResultUsefulBits & OpUsefulBits; 2371 Mask <<= LSB; 2372 } 2373 2374 if (Op.getOperand(0) == Orig) 2375 // Bits starting from LSB in the input contribute to the result. 2376 Mask |= (ResultUsefulBits & ~OpUsefulBits); 2377 } else { 2378 // The instruction is a BFI. 2379 uint64_t Width = MSB + 1; 2380 uint64_t LSB = UsefulBits.getBitWidth() - Imm; 2381 2382 OpUsefulBits <<= Width; 2383 --OpUsefulBits; 2384 OpUsefulBits <<= LSB; 2385 2386 if (Op.getOperand(1) == Orig) { 2387 // Copy the bits from the result to the zero bits. 2388 Mask = ResultUsefulBits & OpUsefulBits; 2389 Mask.lshrInPlace(LSB); 2390 } 2391 2392 if (Op.getOperand(0) == Orig) 2393 Mask |= (ResultUsefulBits & ~OpUsefulBits); 2394 } 2395 2396 UsefulBits &= Mask; 2397 } 2398 2399 static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, 2400 SDValue Orig, unsigned Depth) { 2401 2402 // Users of this node should have already been instruction selected 2403 // FIXME: Can we turn that into an assert? 2404 if (!UserNode->isMachineOpcode()) 2405 return; 2406 2407 switch (UserNode->getMachineOpcode()) { 2408 default: 2409 return; 2410 case AArch64::ANDSWri: 2411 case AArch64::ANDSXri: 2412 case AArch64::ANDWri: 2413 case AArch64::ANDXri: 2414 // We increment Depth only when we call the getUsefulBits 2415 return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits, 2416 Depth); 2417 case AArch64::UBFMWri: 2418 case AArch64::UBFMXri: 2419 return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth); 2420 2421 case AArch64::ORRWrs: 2422 case AArch64::ORRXrs: 2423 if (UserNode->getOperand(0) != Orig && UserNode->getOperand(1) == Orig) 2424 getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits, 2425 Depth); 2426 return; 2427 case AArch64::BFMWri: 2428 case AArch64::BFMXri: 2429 return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth); 2430 2431 case AArch64::STRBBui: 2432 case AArch64::STURBBi: 2433 if (UserNode->getOperand(0) != Orig) 2434 return; 2435 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff); 2436 return; 2437 2438 case AArch64::STRHHui: 2439 case AArch64::STURHHi: 2440 if (UserNode->getOperand(0) != Orig) 2441 return; 2442 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff); 2443 return; 2444 } 2445 } 2446 2447 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) { 2448 if (Depth >= SelectionDAG::MaxRecursionDepth) 2449 return; 2450 // Initialize UsefulBits 2451 if (!Depth) { 2452 unsigned Bitwidth = Op.getScalarValueSizeInBits(); 2453 // At the beginning, assume every produced bits is useful 2454 UsefulBits = APInt(Bitwidth, 0); 2455 UsefulBits.flipAllBits(); 2456 } 2457 APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0); 2458 2459 for (SDNode *Node : Op.getNode()->uses()) { 2460 // A use cannot produce useful bits 2461 APInt UsefulBitsForUse = APInt(UsefulBits); 2462 getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth); 2463 UsersUsefulBits |= UsefulBitsForUse; 2464 } 2465 // UsefulBits contains the produced bits that are meaningful for the 2466 // current definition, thus a user cannot make a bit meaningful at 2467 // this point 2468 UsefulBits &= UsersUsefulBits; 2469 } 2470 2471 /// Create a machine node performing a notional SHL of Op by ShlAmount. If 2472 /// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is 2473 /// 0, return Op unchanged. 2474 static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) { 2475 if (ShlAmount == 0) 2476 return Op; 2477 2478 EVT VT = Op.getValueType(); 2479 SDLoc dl(Op); 2480 unsigned BitWidth = VT.getSizeInBits(); 2481 unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri; 2482 2483 SDNode *ShiftNode; 2484 if (ShlAmount > 0) { 2485 // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt 2486 ShiftNode = CurDAG->getMachineNode( 2487 UBFMOpc, dl, VT, Op, 2488 CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT), 2489 CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT)); 2490 } else { 2491 // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1 2492 assert(ShlAmount < 0 && "expected right shift"); 2493 int ShrAmount = -ShlAmount; 2494 ShiftNode = CurDAG->getMachineNode( 2495 UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT), 2496 CurDAG->getTargetConstant(BitWidth - 1, dl, VT)); 2497 } 2498 2499 return SDValue(ShiftNode, 0); 2500 } 2501 2502 /// Does this tree qualify as an attempt to move a bitfield into position, 2503 /// essentially "(and (shl VAL, N), Mask)". 2504 static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, 2505 bool BiggerPattern, 2506 SDValue &Src, int &ShiftAmount, 2507 int &MaskWidth) { 2508 EVT VT = Op.getValueType(); 2509 unsigned BitWidth = VT.getSizeInBits(); 2510 (void)BitWidth; 2511 assert(BitWidth == 32 || BitWidth == 64); 2512 2513 KnownBits Known = CurDAG->computeKnownBits(Op); 2514 2515 // Non-zero in the sense that they're not provably zero, which is the key 2516 // point if we want to use this value 2517 uint64_t NonZeroBits = (~Known.Zero).getZExtValue(); 2518 2519 // Discard a constant AND mask if present. It's safe because the node will 2520 // already have been factored into the computeKnownBits calculation above. 2521 uint64_t AndImm; 2522 if (isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm)) { 2523 assert((~APInt(BitWidth, AndImm) & ~Known.Zero) == 0); 2524 Op = Op.getOperand(0); 2525 } 2526 2527 // Don't match if the SHL has more than one use, since then we'll end up 2528 // generating SHL+UBFIZ instead of just keeping SHL+AND. 2529 if (!BiggerPattern && !Op.hasOneUse()) 2530 return false; 2531 2532 uint64_t ShlImm; 2533 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm)) 2534 return false; 2535 Op = Op.getOperand(0); 2536 2537 if (!isShiftedMask_64(NonZeroBits)) 2538 return false; 2539 2540 ShiftAmount = countTrailingZeros(NonZeroBits); 2541 MaskWidth = countTrailingOnes(NonZeroBits >> ShiftAmount); 2542 2543 // BFI encompasses sufficiently many nodes that it's worth inserting an extra 2544 // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL 2545 // amount. BiggerPattern is true when this pattern is being matched for BFI, 2546 // BiggerPattern is false when this pattern is being matched for UBFIZ, in 2547 // which case it is not profitable to insert an extra shift. 2548 if (ShlImm - ShiftAmount != 0 && !BiggerPattern) 2549 return false; 2550 Src = getLeftShift(CurDAG, Op, ShlImm - ShiftAmount); 2551 2552 return true; 2553 } 2554 2555 static bool isShiftedMask(uint64_t Mask, EVT VT) { 2556 assert(VT == MVT::i32 || VT == MVT::i64); 2557 if (VT == MVT::i32) 2558 return isShiftedMask_32(Mask); 2559 return isShiftedMask_64(Mask); 2560 } 2561 2562 // Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being 2563 // inserted only sets known zero bits. 2564 static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG) { 2565 assert(N->getOpcode() == ISD::OR && "Expect a OR operation"); 2566 2567 EVT VT = N->getValueType(0); 2568 if (VT != MVT::i32 && VT != MVT::i64) 2569 return false; 2570 2571 unsigned BitWidth = VT.getSizeInBits(); 2572 2573 uint64_t OrImm; 2574 if (!isOpcWithIntImmediate(N, ISD::OR, OrImm)) 2575 return false; 2576 2577 // Skip this transformation if the ORR immediate can be encoded in the ORR. 2578 // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely 2579 // performance neutral. 2580 if (AArch64_AM::isLogicalImmediate(OrImm, BitWidth)) 2581 return false; 2582 2583 uint64_t MaskImm; 2584 SDValue And = N->getOperand(0); 2585 // Must be a single use AND with an immediate operand. 2586 if (!And.hasOneUse() || 2587 !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm)) 2588 return false; 2589 2590 // Compute the Known Zero for the AND as this allows us to catch more general 2591 // cases than just looking for AND with imm. 2592 KnownBits Known = CurDAG->computeKnownBits(And); 2593 2594 // Non-zero in the sense that they're not provably zero, which is the key 2595 // point if we want to use this value. 2596 uint64_t NotKnownZero = (~Known.Zero).getZExtValue(); 2597 2598 // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00). 2599 if (!isShiftedMask(Known.Zero.getZExtValue(), VT)) 2600 return false; 2601 2602 // The bits being inserted must only set those bits that are known to be zero. 2603 if ((OrImm & NotKnownZero) != 0) { 2604 // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't 2605 // currently handle this case. 2606 return false; 2607 } 2608 2609 // BFI/BFXIL dst, src, #lsb, #width. 2610 int LSB = countTrailingOnes(NotKnownZero); 2611 int Width = BitWidth - APInt(BitWidth, NotKnownZero).countPopulation(); 2612 2613 // BFI/BFXIL is an alias of BFM, so translate to BFM operands. 2614 unsigned ImmR = (BitWidth - LSB) % BitWidth; 2615 unsigned ImmS = Width - 1; 2616 2617 // If we're creating a BFI instruction avoid cases where we need more 2618 // instructions to materialize the BFI constant as compared to the original 2619 // ORR. A BFXIL will use the same constant as the original ORR, so the code 2620 // should be no worse in this case. 2621 bool IsBFI = LSB != 0; 2622 uint64_t BFIImm = OrImm >> LSB; 2623 if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) { 2624 // We have a BFI instruction and we know the constant can't be materialized 2625 // with a ORR-immediate with the zero register. 2626 unsigned OrChunks = 0, BFIChunks = 0; 2627 for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) { 2628 if (((OrImm >> Shift) & 0xFFFF) != 0) 2629 ++OrChunks; 2630 if (((BFIImm >> Shift) & 0xFFFF) != 0) 2631 ++BFIChunks; 2632 } 2633 if (BFIChunks > OrChunks) 2634 return false; 2635 } 2636 2637 // Materialize the constant to be inserted. 2638 SDLoc DL(N); 2639 unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm; 2640 SDNode *MOVI = CurDAG->getMachineNode( 2641 MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT)); 2642 2643 // Create the BFI/BFXIL instruction. 2644 SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0), 2645 CurDAG->getTargetConstant(ImmR, DL, VT), 2646 CurDAG->getTargetConstant(ImmS, DL, VT)}; 2647 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri; 2648 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 2649 return true; 2650 } 2651 2652 static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits, 2653 SelectionDAG *CurDAG) { 2654 assert(N->getOpcode() == ISD::OR && "Expect a OR operation"); 2655 2656 EVT VT = N->getValueType(0); 2657 if (VT != MVT::i32 && VT != MVT::i64) 2658 return false; 2659 2660 unsigned BitWidth = VT.getSizeInBits(); 2661 2662 // Because of simplify-demanded-bits in DAGCombine, involved masks may not 2663 // have the expected shape. Try to undo that. 2664 2665 unsigned NumberOfIgnoredLowBits = UsefulBits.countTrailingZeros(); 2666 unsigned NumberOfIgnoredHighBits = UsefulBits.countLeadingZeros(); 2667 2668 // Given a OR operation, check if we have the following pattern 2669 // ubfm c, b, imm, imm2 (or something that does the same jobs, see 2670 // isBitfieldExtractOp) 2671 // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and 2672 // countTrailingZeros(mask2) == imm2 - imm + 1 2673 // f = d | c 2674 // if yes, replace the OR instruction with: 2675 // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2 2676 2677 // OR is commutative, check all combinations of operand order and values of 2678 // BiggerPattern, i.e. 2679 // Opd0, Opd1, BiggerPattern=false 2680 // Opd1, Opd0, BiggerPattern=false 2681 // Opd0, Opd1, BiggerPattern=true 2682 // Opd1, Opd0, BiggerPattern=true 2683 // Several of these combinations may match, so check with BiggerPattern=false 2684 // first since that will produce better results by matching more instructions 2685 // and/or inserting fewer extra instructions. 2686 for (int I = 0; I < 4; ++I) { 2687 2688 SDValue Dst, Src; 2689 unsigned ImmR, ImmS; 2690 bool BiggerPattern = I / 2; 2691 SDValue OrOpd0Val = N->getOperand(I % 2); 2692 SDNode *OrOpd0 = OrOpd0Val.getNode(); 2693 SDValue OrOpd1Val = N->getOperand((I + 1) % 2); 2694 SDNode *OrOpd1 = OrOpd1Val.getNode(); 2695 2696 unsigned BFXOpc; 2697 int DstLSB, Width; 2698 if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS, 2699 NumberOfIgnoredLowBits, BiggerPattern)) { 2700 // Check that the returned opcode is compatible with the pattern, 2701 // i.e., same type and zero extended (U and not S) 2702 if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) || 2703 (BFXOpc != AArch64::UBFMWri && VT == MVT::i32)) 2704 continue; 2705 2706 // Compute the width of the bitfield insertion 2707 DstLSB = 0; 2708 Width = ImmS - ImmR + 1; 2709 // FIXME: This constraint is to catch bitfield insertion we may 2710 // want to widen the pattern if we want to grab general bitfied 2711 // move case 2712 if (Width <= 0) 2713 continue; 2714 2715 // If the mask on the insertee is correct, we have a BFXIL operation. We 2716 // can share the ImmR and ImmS values from the already-computed UBFM. 2717 } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val, 2718 BiggerPattern, 2719 Src, DstLSB, Width)) { 2720 ImmR = (BitWidth - DstLSB) % BitWidth; 2721 ImmS = Width - 1; 2722 } else 2723 continue; 2724 2725 // Check the second part of the pattern 2726 EVT VT = OrOpd1Val.getValueType(); 2727 assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand"); 2728 2729 // Compute the Known Zero for the candidate of the first operand. 2730 // This allows to catch more general case than just looking for 2731 // AND with imm. Indeed, simplify-demanded-bits may have removed 2732 // the AND instruction because it proves it was useless. 2733 KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val); 2734 2735 // Check if there is enough room for the second operand to appear 2736 // in the first one 2737 APInt BitsToBeInserted = 2738 APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width); 2739 2740 if ((BitsToBeInserted & ~Known.Zero) != 0) 2741 continue; 2742 2743 // Set the first operand 2744 uint64_t Imm; 2745 if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) && 2746 isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT)) 2747 // In that case, we can eliminate the AND 2748 Dst = OrOpd1->getOperand(0); 2749 else 2750 // Maybe the AND has been removed by simplify-demanded-bits 2751 // or is useful because it discards more bits 2752 Dst = OrOpd1Val; 2753 2754 // both parts match 2755 SDLoc DL(N); 2756 SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT), 2757 CurDAG->getTargetConstant(ImmS, DL, VT)}; 2758 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri; 2759 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 2760 return true; 2761 } 2762 2763 // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff 2764 // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted 2765 // mask (e.g., 0x000ffff0). 2766 uint64_t Mask0Imm, Mask1Imm; 2767 SDValue And0 = N->getOperand(0); 2768 SDValue And1 = N->getOperand(1); 2769 if (And0.hasOneUse() && And1.hasOneUse() && 2770 isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) && 2771 isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) && 2772 APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) && 2773 (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) { 2774 2775 // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm), 2776 // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the 2777 // bits to be inserted. 2778 if (isShiftedMask(Mask0Imm, VT)) { 2779 std::swap(And0, And1); 2780 std::swap(Mask0Imm, Mask1Imm); 2781 } 2782 2783 SDValue Src = And1->getOperand(0); 2784 SDValue Dst = And0->getOperand(0); 2785 unsigned LSB = countTrailingZeros(Mask1Imm); 2786 int Width = BitWidth - APInt(BitWidth, Mask0Imm).countPopulation(); 2787 2788 // The BFXIL inserts the low-order bits from a source register, so right 2789 // shift the needed bits into place. 2790 SDLoc DL(N); 2791 unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri; 2792 uint64_t LsrImm = LSB; 2793 if (Src->hasOneUse() && 2794 isOpcWithIntImmediate(Src.getNode(), ISD::SRL, LsrImm) && 2795 (LsrImm + LSB) < BitWidth) { 2796 Src = Src->getOperand(0); 2797 LsrImm += LSB; 2798 } 2799 2800 SDNode *LSR = CurDAG->getMachineNode( 2801 ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LsrImm, DL, VT), 2802 CurDAG->getTargetConstant(BitWidth - 1, DL, VT)); 2803 2804 // BFXIL is an alias of BFM, so translate to BFM operands. 2805 unsigned ImmR = (BitWidth - LSB) % BitWidth; 2806 unsigned ImmS = Width - 1; 2807 2808 // Create the BFXIL instruction. 2809 SDValue Ops[] = {Dst, SDValue(LSR, 0), 2810 CurDAG->getTargetConstant(ImmR, DL, VT), 2811 CurDAG->getTargetConstant(ImmS, DL, VT)}; 2812 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri; 2813 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 2814 return true; 2815 } 2816 2817 return false; 2818 } 2819 2820 bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) { 2821 if (N->getOpcode() != ISD::OR) 2822 return false; 2823 2824 APInt NUsefulBits; 2825 getUsefulBits(SDValue(N, 0), NUsefulBits); 2826 2827 // If all bits are not useful, just return UNDEF. 2828 if (!NUsefulBits) { 2829 CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0)); 2830 return true; 2831 } 2832 2833 if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG)) 2834 return true; 2835 2836 return tryBitfieldInsertOpFromOrAndImm(N, CurDAG); 2837 } 2838 2839 /// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the 2840 /// equivalent of a left shift by a constant amount followed by an and masking 2841 /// out a contiguous set of bits. 2842 bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) { 2843 if (N->getOpcode() != ISD::AND) 2844 return false; 2845 2846 EVT VT = N->getValueType(0); 2847 if (VT != MVT::i32 && VT != MVT::i64) 2848 return false; 2849 2850 SDValue Op0; 2851 int DstLSB, Width; 2852 if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false, 2853 Op0, DstLSB, Width)) 2854 return false; 2855 2856 // ImmR is the rotate right amount. 2857 unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits(); 2858 // ImmS is the most significant bit of the source to be moved. 2859 unsigned ImmS = Width - 1; 2860 2861 SDLoc DL(N); 2862 SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT), 2863 CurDAG->getTargetConstant(ImmS, DL, VT)}; 2864 unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri; 2865 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 2866 return true; 2867 } 2868 2869 /// tryShiftAmountMod - Take advantage of built-in mod of shift amount in 2870 /// variable shift/rotate instructions. 2871 bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) { 2872 EVT VT = N->getValueType(0); 2873 2874 unsigned Opc; 2875 switch (N->getOpcode()) { 2876 case ISD::ROTR: 2877 Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr; 2878 break; 2879 case ISD::SHL: 2880 Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr; 2881 break; 2882 case ISD::SRL: 2883 Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr; 2884 break; 2885 case ISD::SRA: 2886 Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr; 2887 break; 2888 default: 2889 return false; 2890 } 2891 2892 uint64_t Size; 2893 uint64_t Bits; 2894 if (VT == MVT::i32) { 2895 Bits = 5; 2896 Size = 32; 2897 } else if (VT == MVT::i64) { 2898 Bits = 6; 2899 Size = 64; 2900 } else 2901 return false; 2902 2903 SDValue ShiftAmt = N->getOperand(1); 2904 SDLoc DL(N); 2905 SDValue NewShiftAmt; 2906 2907 // Skip over an extend of the shift amount. 2908 if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND || 2909 ShiftAmt->getOpcode() == ISD::ANY_EXTEND) 2910 ShiftAmt = ShiftAmt->getOperand(0); 2911 2912 if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) { 2913 SDValue Add0 = ShiftAmt->getOperand(0); 2914 SDValue Add1 = ShiftAmt->getOperand(1); 2915 uint64_t Add0Imm; 2916 uint64_t Add1Imm; 2917 if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0)) { 2918 // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X 2919 // to avoid the ADD/SUB. 2920 NewShiftAmt = Add0; 2921 } else if (ShiftAmt->getOpcode() == ISD::SUB && 2922 isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 && 2923 (Add0Imm % Size == 0)) { 2924 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X 2925 // to generate a NEG instead of a SUB from a constant. 2926 unsigned NegOpc; 2927 unsigned ZeroReg; 2928 EVT SubVT = ShiftAmt->getValueType(0); 2929 if (SubVT == MVT::i32) { 2930 NegOpc = AArch64::SUBWrr; 2931 ZeroReg = AArch64::WZR; 2932 } else { 2933 assert(SubVT == MVT::i64); 2934 NegOpc = AArch64::SUBXrr; 2935 ZeroReg = AArch64::XZR; 2936 } 2937 SDValue Zero = 2938 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT); 2939 MachineSDNode *Neg = 2940 CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1); 2941 NewShiftAmt = SDValue(Neg, 0); 2942 } else if (ShiftAmt->getOpcode() == ISD::SUB && 2943 isIntImmediate(Add0, Add0Imm) && (Add0Imm % Size == Size - 1)) { 2944 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X 2945 // to generate a NOT instead of a SUB from a constant. 2946 unsigned NotOpc; 2947 unsigned ZeroReg; 2948 EVT SubVT = ShiftAmt->getValueType(0); 2949 if (SubVT == MVT::i32) { 2950 NotOpc = AArch64::ORNWrr; 2951 ZeroReg = AArch64::WZR; 2952 } else { 2953 assert(SubVT == MVT::i64); 2954 NotOpc = AArch64::ORNXrr; 2955 ZeroReg = AArch64::XZR; 2956 } 2957 SDValue Zero = 2958 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT); 2959 MachineSDNode *Not = 2960 CurDAG->getMachineNode(NotOpc, DL, SubVT, Zero, Add1); 2961 NewShiftAmt = SDValue(Not, 0); 2962 } else 2963 return false; 2964 } else { 2965 // If the shift amount is masked with an AND, check that the mask covers the 2966 // bits that are implicitly ANDed off by the above opcodes and if so, skip 2967 // the AND. 2968 uint64_t MaskImm; 2969 if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm) && 2970 !isOpcWithIntImmediate(ShiftAmt.getNode(), AArch64ISD::ANDS, MaskImm)) 2971 return false; 2972 2973 if (countTrailingOnes(MaskImm) < Bits) 2974 return false; 2975 2976 NewShiftAmt = ShiftAmt->getOperand(0); 2977 } 2978 2979 // Narrow/widen the shift amount to match the size of the shift operation. 2980 if (VT == MVT::i32) 2981 NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt); 2982 else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) { 2983 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32); 2984 MachineSDNode *Ext = CurDAG->getMachineNode( 2985 AArch64::SUBREG_TO_REG, DL, VT, 2986 CurDAG->getTargetConstant(0, DL, MVT::i64), NewShiftAmt, SubReg); 2987 NewShiftAmt = SDValue(Ext, 0); 2988 } 2989 2990 SDValue Ops[] = {N->getOperand(0), NewShiftAmt}; 2991 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 2992 return true; 2993 } 2994 2995 bool 2996 AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, 2997 unsigned RegWidth) { 2998 APFloat FVal(0.0); 2999 if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N)) 3000 FVal = CN->getValueAPF(); 3001 else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) { 3002 // Some otherwise illegal constants are allowed in this case. 3003 if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow || 3004 !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1))) 3005 return false; 3006 3007 ConstantPoolSDNode *CN = 3008 dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)); 3009 FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF(); 3010 } else 3011 return false; 3012 3013 // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits 3014 // is between 1 and 32 for a destination w-register, or 1 and 64 for an 3015 // x-register. 3016 // 3017 // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we 3018 // want THIS_NODE to be 2^fbits. This is much easier to deal with using 3019 // integers. 3020 bool IsExact; 3021 3022 // fbits is between 1 and 64 in the worst-case, which means the fmul 3023 // could have 2^64 as an actual operand. Need 65 bits of precision. 3024 APSInt IntVal(65, true); 3025 FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact); 3026 3027 // N.b. isPowerOf2 also checks for > 0. 3028 if (!IsExact || !IntVal.isPowerOf2()) return false; 3029 unsigned FBits = IntVal.logBase2(); 3030 3031 // Checks above should have guaranteed that we haven't lost information in 3032 // finding FBits, but it must still be in range. 3033 if (FBits == 0 || FBits > RegWidth) return false; 3034 3035 FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32); 3036 return true; 3037 } 3038 3039 // Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields 3040 // of the string and obtains the integer values from them and combines these 3041 // into a single value to be used in the MRS/MSR instruction. 3042 static int getIntOperandFromRegisterString(StringRef RegString) { 3043 SmallVector<StringRef, 5> Fields; 3044 RegString.split(Fields, ':'); 3045 3046 if (Fields.size() == 1) 3047 return -1; 3048 3049 assert(Fields.size() == 5 3050 && "Invalid number of fields in read register string"); 3051 3052 SmallVector<int, 5> Ops; 3053 bool AllIntFields = true; 3054 3055 for (StringRef Field : Fields) { 3056 unsigned IntField; 3057 AllIntFields &= !Field.getAsInteger(10, IntField); 3058 Ops.push_back(IntField); 3059 } 3060 3061 assert(AllIntFields && 3062 "Unexpected non-integer value in special register string."); 3063 (void)AllIntFields; 3064 3065 // Need to combine the integer fields of the string into a single value 3066 // based on the bit encoding of MRS/MSR instruction. 3067 return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) | 3068 (Ops[3] << 3) | (Ops[4]); 3069 } 3070 3071 // Lower the read_register intrinsic to an MRS instruction node if the special 3072 // register string argument is either of the form detailed in the ALCE (the 3073 // form described in getIntOperandsFromRegsterString) or is a named register 3074 // known by the MRS SysReg mapper. 3075 bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) { 3076 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1)); 3077 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0)); 3078 SDLoc DL(N); 3079 3080 int Reg = getIntOperandFromRegisterString(RegString->getString()); 3081 if (Reg != -1) { 3082 ReplaceNode(N, CurDAG->getMachineNode( 3083 AArch64::MRS, DL, N->getSimpleValueType(0), MVT::Other, 3084 CurDAG->getTargetConstant(Reg, DL, MVT::i32), 3085 N->getOperand(0))); 3086 return true; 3087 } 3088 3089 // Use the sysreg mapper to map the remaining possible strings to the 3090 // value for the register to be used for the instruction operand. 3091 auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString()); 3092 if (TheReg && TheReg->Readable && 3093 TheReg->haveFeatures(Subtarget->getFeatureBits())) 3094 Reg = TheReg->Encoding; 3095 else 3096 Reg = AArch64SysReg::parseGenericRegister(RegString->getString()); 3097 3098 if (Reg != -1) { 3099 ReplaceNode(N, CurDAG->getMachineNode( 3100 AArch64::MRS, DL, N->getSimpleValueType(0), MVT::Other, 3101 CurDAG->getTargetConstant(Reg, DL, MVT::i32), 3102 N->getOperand(0))); 3103 return true; 3104 } 3105 3106 if (RegString->getString() == "pc") { 3107 ReplaceNode(N, CurDAG->getMachineNode( 3108 AArch64::ADR, DL, N->getSimpleValueType(0), MVT::Other, 3109 CurDAG->getTargetConstant(0, DL, MVT::i32), 3110 N->getOperand(0))); 3111 return true; 3112 } 3113 3114 return false; 3115 } 3116 3117 // Lower the write_register intrinsic to an MSR instruction node if the special 3118 // register string argument is either of the form detailed in the ALCE (the 3119 // form described in getIntOperandsFromRegsterString) or is a named register 3120 // known by the MSR SysReg mapper. 3121 bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) { 3122 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1)); 3123 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0)); 3124 SDLoc DL(N); 3125 3126 int Reg = getIntOperandFromRegisterString(RegString->getString()); 3127 if (Reg != -1) { 3128 ReplaceNode( 3129 N, CurDAG->getMachineNode(AArch64::MSR, DL, MVT::Other, 3130 CurDAG->getTargetConstant(Reg, DL, MVT::i32), 3131 N->getOperand(2), N->getOperand(0))); 3132 return true; 3133 } 3134 3135 // Check if the register was one of those allowed as the pstatefield value in 3136 // the MSR (immediate) instruction. To accept the values allowed in the 3137 // pstatefield for the MSR (immediate) instruction, we also require that an 3138 // immediate value has been provided as an argument, we know that this is 3139 // the case as it has been ensured by semantic checking. 3140 auto PMapper = AArch64PState::lookupPStateByName(RegString->getString()); 3141 if (PMapper) { 3142 assert (isa<ConstantSDNode>(N->getOperand(2)) 3143 && "Expected a constant integer expression."); 3144 unsigned Reg = PMapper->Encoding; 3145 uint64_t Immed = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(); 3146 unsigned State; 3147 if (Reg == AArch64PState::PAN || Reg == AArch64PState::UAO || Reg == AArch64PState::SSBS) { 3148 assert(Immed < 2 && "Bad imm"); 3149 State = AArch64::MSRpstateImm1; 3150 } else { 3151 assert(Immed < 16 && "Bad imm"); 3152 State = AArch64::MSRpstateImm4; 3153 } 3154 ReplaceNode(N, CurDAG->getMachineNode( 3155 State, DL, MVT::Other, 3156 CurDAG->getTargetConstant(Reg, DL, MVT::i32), 3157 CurDAG->getTargetConstant(Immed, DL, MVT::i16), 3158 N->getOperand(0))); 3159 return true; 3160 } 3161 3162 // Use the sysreg mapper to attempt to map the remaining possible strings 3163 // to the value for the register to be used for the MSR (register) 3164 // instruction operand. 3165 auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString()); 3166 if (TheReg && TheReg->Writeable && 3167 TheReg->haveFeatures(Subtarget->getFeatureBits())) 3168 Reg = TheReg->Encoding; 3169 else 3170 Reg = AArch64SysReg::parseGenericRegister(RegString->getString()); 3171 if (Reg != -1) { 3172 ReplaceNode(N, CurDAG->getMachineNode( 3173 AArch64::MSR, DL, MVT::Other, 3174 CurDAG->getTargetConstant(Reg, DL, MVT::i32), 3175 N->getOperand(2), N->getOperand(0))); 3176 return true; 3177 } 3178 3179 return false; 3180 } 3181 3182 /// We've got special pseudo-instructions for these 3183 bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) { 3184 unsigned Opcode; 3185 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT(); 3186 3187 // Leave IR for LSE if subtarget supports it. 3188 if (Subtarget->hasLSE()) return false; 3189 3190 if (MemTy == MVT::i8) 3191 Opcode = AArch64::CMP_SWAP_8; 3192 else if (MemTy == MVT::i16) 3193 Opcode = AArch64::CMP_SWAP_16; 3194 else if (MemTy == MVT::i32) 3195 Opcode = AArch64::CMP_SWAP_32; 3196 else if (MemTy == MVT::i64) 3197 Opcode = AArch64::CMP_SWAP_64; 3198 else 3199 llvm_unreachable("Unknown AtomicCmpSwap type"); 3200 3201 MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32; 3202 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3), 3203 N->getOperand(0)}; 3204 SDNode *CmpSwap = CurDAG->getMachineNode( 3205 Opcode, SDLoc(N), 3206 CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops); 3207 3208 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); 3209 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp}); 3210 3211 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0)); 3212 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2)); 3213 CurDAG->RemoveDeadNode(N); 3214 3215 return true; 3216 } 3217 3218 bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, 3219 SDValue &Shift) { 3220 if (!isa<ConstantSDNode>(N)) 3221 return false; 3222 3223 SDLoc DL(N); 3224 uint64_t Val = cast<ConstantSDNode>(N) 3225 ->getAPIntValue() 3226 .trunc(VT.getFixedSizeInBits()) 3227 .getZExtValue(); 3228 3229 switch (VT.SimpleTy) { 3230 case MVT::i8: 3231 // All immediates are supported. 3232 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32); 3233 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32); 3234 return true; 3235 case MVT::i16: 3236 case MVT::i32: 3237 case MVT::i64: 3238 // Support 8bit unsigned immediates. 3239 if (Val <= 255) { 3240 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32); 3241 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32); 3242 return true; 3243 } 3244 // Support 16bit unsigned immediates that are a multiple of 256. 3245 if (Val <= 65280 && Val % 256 == 0) { 3246 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32); 3247 Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32); 3248 return true; 3249 } 3250 break; 3251 default: 3252 break; 3253 } 3254 3255 return false; 3256 } 3257 3258 bool AArch64DAGToDAGISel::SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, 3259 SDValue &Shift) { 3260 if (!isa<ConstantSDNode>(N)) 3261 return false; 3262 3263 SDLoc DL(N); 3264 int64_t Val = cast<ConstantSDNode>(N) 3265 ->getAPIntValue() 3266 .trunc(VT.getFixedSizeInBits()) 3267 .getSExtValue(); 3268 3269 switch (VT.SimpleTy) { 3270 case MVT::i8: 3271 // All immediates are supported. 3272 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32); 3273 Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32); 3274 return true; 3275 case MVT::i16: 3276 case MVT::i32: 3277 case MVT::i64: 3278 // Support 8bit signed immediates. 3279 if (Val >= -128 && Val <= 127) { 3280 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32); 3281 Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32); 3282 return true; 3283 } 3284 // Support 16bit signed immediates that are a multiple of 256. 3285 if (Val >= -32768 && Val <= 32512 && Val % 256 == 0) { 3286 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32); 3287 Imm = CurDAG->getTargetConstant((Val >> 8) & 0xFF, DL, MVT::i32); 3288 return true; 3289 } 3290 break; 3291 default: 3292 break; 3293 } 3294 3295 return false; 3296 } 3297 3298 bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) { 3299 if (auto CNode = dyn_cast<ConstantSDNode>(N)) { 3300 int64_t ImmVal = CNode->getSExtValue(); 3301 SDLoc DL(N); 3302 if (ImmVal >= -128 && ImmVal < 128) { 3303 Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32); 3304 return true; 3305 } 3306 } 3307 return false; 3308 } 3309 3310 bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) { 3311 if (auto CNode = dyn_cast<ConstantSDNode>(N)) { 3312 uint64_t ImmVal = CNode->getZExtValue(); 3313 3314 switch (VT.SimpleTy) { 3315 case MVT::i8: 3316 ImmVal &= 0xFF; 3317 break; 3318 case MVT::i16: 3319 ImmVal &= 0xFFFF; 3320 break; 3321 case MVT::i32: 3322 ImmVal &= 0xFFFFFFFF; 3323 break; 3324 case MVT::i64: 3325 break; 3326 default: 3327 llvm_unreachable("Unexpected type"); 3328 } 3329 3330 if (ImmVal < 256) { 3331 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32); 3332 return true; 3333 } 3334 } 3335 return false; 3336 } 3337 3338 bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, 3339 bool Invert) { 3340 if (auto CNode = dyn_cast<ConstantSDNode>(N)) { 3341 uint64_t ImmVal = CNode->getZExtValue(); 3342 SDLoc DL(N); 3343 3344 if (Invert) 3345 ImmVal = ~ImmVal; 3346 3347 // Shift mask depending on type size. 3348 switch (VT.SimpleTy) { 3349 case MVT::i8: 3350 ImmVal &= 0xFF; 3351 ImmVal |= ImmVal << 8; 3352 ImmVal |= ImmVal << 16; 3353 ImmVal |= ImmVal << 32; 3354 break; 3355 case MVT::i16: 3356 ImmVal &= 0xFFFF; 3357 ImmVal |= ImmVal << 16; 3358 ImmVal |= ImmVal << 32; 3359 break; 3360 case MVT::i32: 3361 ImmVal &= 0xFFFFFFFF; 3362 ImmVal |= ImmVal << 32; 3363 break; 3364 case MVT::i64: 3365 break; 3366 default: 3367 llvm_unreachable("Unexpected type"); 3368 } 3369 3370 uint64_t encoding; 3371 if (AArch64_AM::processLogicalImmediate(ImmVal, 64, encoding)) { 3372 Imm = CurDAG->getTargetConstant(encoding, DL, MVT::i64); 3373 return true; 3374 } 3375 } 3376 return false; 3377 } 3378 3379 // SVE shift intrinsics allow shift amounts larger than the element's bitwidth. 3380 // Rather than attempt to normalise everything we can sometimes saturate the 3381 // shift amount during selection. This function also allows for consistent 3382 // isel patterns by ensuring the resulting "Imm" node is of the i32 type 3383 // required by the instructions. 3384 bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low, 3385 uint64_t High, bool AllowSaturation, 3386 SDValue &Imm) { 3387 if (auto *CN = dyn_cast<ConstantSDNode>(N)) { 3388 uint64_t ImmVal = CN->getZExtValue(); 3389 3390 // Reject shift amounts that are too small. 3391 if (ImmVal < Low) 3392 return false; 3393 3394 // Reject or saturate shift amounts that are too big. 3395 if (ImmVal > High) { 3396 if (!AllowSaturation) 3397 return false; 3398 ImmVal = High; 3399 } 3400 3401 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32); 3402 return true; 3403 } 3404 3405 return false; 3406 } 3407 3408 bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) { 3409 // tagp(FrameIndex, IRGstack, tag_offset): 3410 // since the offset between FrameIndex and IRGstack is a compile-time 3411 // constant, this can be lowered to a single ADDG instruction. 3412 if (!(isa<FrameIndexSDNode>(N->getOperand(1)))) { 3413 return false; 3414 } 3415 3416 SDValue IRG_SP = N->getOperand(2); 3417 if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN || 3418 cast<ConstantSDNode>(IRG_SP->getOperand(1))->getZExtValue() != 3419 Intrinsic::aarch64_irg_sp) { 3420 return false; 3421 } 3422 3423 const TargetLowering *TLI = getTargetLowering(); 3424 SDLoc DL(N); 3425 int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex(); 3426 SDValue FiOp = CurDAG->getTargetFrameIndex( 3427 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 3428 int TagOffset = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(); 3429 3430 SDNode *Out = CurDAG->getMachineNode( 3431 AArch64::TAGPstack, DL, MVT::i64, 3432 {FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2), 3433 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)}); 3434 ReplaceNode(N, Out); 3435 return true; 3436 } 3437 3438 void AArch64DAGToDAGISel::SelectTagP(SDNode *N) { 3439 assert(isa<ConstantSDNode>(N->getOperand(3)) && 3440 "llvm.aarch64.tagp third argument must be an immediate"); 3441 if (trySelectStackSlotTagP(N)) 3442 return; 3443 // FIXME: above applies in any case when offset between Op1 and Op2 is a 3444 // compile-time constant, not just for stack allocations. 3445 3446 // General case for unrelated pointers in Op1 and Op2. 3447 SDLoc DL(N); 3448 int TagOffset = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(); 3449 SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64, 3450 {N->getOperand(1), N->getOperand(2)}); 3451 SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64, 3452 {SDValue(N1, 0), N->getOperand(2)}); 3453 SDNode *N3 = CurDAG->getMachineNode( 3454 AArch64::ADDG, DL, MVT::i64, 3455 {SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64), 3456 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)}); 3457 ReplaceNode(N, N3); 3458 } 3459 3460 // NOTE: We cannot use EXTRACT_SUBREG in all cases because the fixed length 3461 // vector types larger than NEON don't have a matching SubRegIndex. 3462 static SDNode *extractSubReg(SelectionDAG *DAG, EVT VT, SDValue V) { 3463 assert(V.getValueType().isScalableVector() && 3464 V.getValueType().getSizeInBits().getKnownMinSize() == 3465 AArch64::SVEBitsPerBlock && 3466 "Expected to extract from a packed scalable vector!"); 3467 assert(VT.isFixedLengthVector() && 3468 "Expected to extract a fixed length vector!"); 3469 3470 SDLoc DL(V); 3471 switch (VT.getSizeInBits()) { 3472 case 64: { 3473 auto SubReg = DAG->getTargetConstant(AArch64::dsub, DL, MVT::i32); 3474 return DAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT, V, SubReg); 3475 } 3476 case 128: { 3477 auto SubReg = DAG->getTargetConstant(AArch64::zsub, DL, MVT::i32); 3478 return DAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT, V, SubReg); 3479 } 3480 default: { 3481 auto RC = DAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64); 3482 return DAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC); 3483 } 3484 } 3485 } 3486 3487 // NOTE: We cannot use INSERT_SUBREG in all cases because the fixed length 3488 // vector types larger than NEON don't have a matching SubRegIndex. 3489 static SDNode *insertSubReg(SelectionDAG *DAG, EVT VT, SDValue V) { 3490 assert(VT.isScalableVector() && 3491 VT.getSizeInBits().getKnownMinSize() == AArch64::SVEBitsPerBlock && 3492 "Expected to insert into a packed scalable vector!"); 3493 assert(V.getValueType().isFixedLengthVector() && 3494 "Expected to insert a fixed length vector!"); 3495 3496 SDLoc DL(V); 3497 switch (V.getValueType().getSizeInBits()) { 3498 case 64: { 3499 auto SubReg = DAG->getTargetConstant(AArch64::dsub, DL, MVT::i32); 3500 auto Container = DAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT); 3501 return DAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, VT, 3502 SDValue(Container, 0), V, SubReg); 3503 } 3504 case 128: { 3505 auto SubReg = DAG->getTargetConstant(AArch64::zsub, DL, MVT::i32); 3506 auto Container = DAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT); 3507 return DAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, VT, 3508 SDValue(Container, 0), V, SubReg); 3509 } 3510 default: { 3511 auto RC = DAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64); 3512 return DAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC); 3513 } 3514 } 3515 } 3516 3517 void AArch64DAGToDAGISel::Select(SDNode *Node) { 3518 // If we have a custom node, we already have selected! 3519 if (Node->isMachineOpcode()) { 3520 LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n"); 3521 Node->setNodeId(-1); 3522 return; 3523 } 3524 3525 // Few custom selection stuff. 3526 EVT VT = Node->getValueType(0); 3527 3528 switch (Node->getOpcode()) { 3529 default: 3530 break; 3531 3532 case ISD::ATOMIC_CMP_SWAP: 3533 if (SelectCMP_SWAP(Node)) 3534 return; 3535 break; 3536 3537 case ISD::READ_REGISTER: 3538 if (tryReadRegister(Node)) 3539 return; 3540 break; 3541 3542 case ISD::WRITE_REGISTER: 3543 if (tryWriteRegister(Node)) 3544 return; 3545 break; 3546 3547 case ISD::ADD: 3548 if (tryMLAV64LaneV128(Node)) 3549 return; 3550 break; 3551 3552 case ISD::LOAD: { 3553 // Try to select as an indexed load. Fall through to normal processing 3554 // if we can't. 3555 if (tryIndexedLoad(Node)) 3556 return; 3557 break; 3558 } 3559 3560 case ISD::SRL: 3561 case ISD::AND: 3562 case ISD::SRA: 3563 case ISD::SIGN_EXTEND_INREG: 3564 if (tryBitfieldExtractOp(Node)) 3565 return; 3566 if (tryBitfieldInsertInZeroOp(Node)) 3567 return; 3568 LLVM_FALLTHROUGH; 3569 case ISD::ROTR: 3570 case ISD::SHL: 3571 if (tryShiftAmountMod(Node)) 3572 return; 3573 break; 3574 3575 case ISD::SIGN_EXTEND: 3576 if (tryBitfieldExtractOpFromSExt(Node)) 3577 return; 3578 break; 3579 3580 case ISD::FP_EXTEND: 3581 if (tryHighFPExt(Node)) 3582 return; 3583 break; 3584 3585 case ISD::OR: 3586 if (tryBitfieldInsertOp(Node)) 3587 return; 3588 break; 3589 3590 case ISD::EXTRACT_SUBVECTOR: { 3591 // Bail when not a "cast" like extract_subvector. 3592 if (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue() != 0) 3593 break; 3594 3595 // Bail when normal isel can do the job. 3596 EVT InVT = Node->getOperand(0).getValueType(); 3597 if (VT.isScalableVector() || InVT.isFixedLengthVector()) 3598 break; 3599 3600 // NOTE: We can only get here when doing fixed length SVE code generation. 3601 // We do manual selection because the types involved are not linked to real 3602 // registers (despite being legal) and must be coerced into SVE registers. 3603 // 3604 // NOTE: If the above changes, be aware that selection will still not work 3605 // because the td definition of extract_vector does not support extracting 3606 // a fixed length vector from a scalable vector. 3607 3608 ReplaceNode(Node, extractSubReg(CurDAG, VT, Node->getOperand(0))); 3609 return; 3610 } 3611 3612 case ISD::INSERT_SUBVECTOR: { 3613 // Bail when not a "cast" like insert_subvector. 3614 if (cast<ConstantSDNode>(Node->getOperand(2))->getZExtValue() != 0) 3615 break; 3616 if (!Node->getOperand(0).isUndef()) 3617 break; 3618 3619 // Bail when normal isel should do the job. 3620 EVT InVT = Node->getOperand(1).getValueType(); 3621 if (VT.isFixedLengthVector() || InVT.isScalableVector()) 3622 break; 3623 3624 // NOTE: We can only get here when doing fixed length SVE code generation. 3625 // We do manual selection because the types involved are not linked to real 3626 // registers (despite being legal) and must be coerced into SVE registers. 3627 // 3628 // NOTE: If the above changes, be aware that selection will still not work 3629 // because the td definition of insert_vector does not support inserting a 3630 // fixed length vector into a scalable vector. 3631 3632 ReplaceNode(Node, insertSubReg(CurDAG, VT, Node->getOperand(1))); 3633 return; 3634 } 3635 3636 case ISD::Constant: { 3637 // Materialize zero constants as copies from WZR/XZR. This allows 3638 // the coalescer to propagate these into other instructions. 3639 ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node); 3640 if (ConstNode->isZero()) { 3641 if (VT == MVT::i32) { 3642 SDValue New = CurDAG->getCopyFromReg( 3643 CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32); 3644 ReplaceNode(Node, New.getNode()); 3645 return; 3646 } else if (VT == MVT::i64) { 3647 SDValue New = CurDAG->getCopyFromReg( 3648 CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64); 3649 ReplaceNode(Node, New.getNode()); 3650 return; 3651 } 3652 } 3653 break; 3654 } 3655 3656 case ISD::FrameIndex: { 3657 // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm. 3658 int FI = cast<FrameIndexSDNode>(Node)->getIndex(); 3659 unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0); 3660 const TargetLowering *TLI = getTargetLowering(); 3661 SDValue TFI = CurDAG->getTargetFrameIndex( 3662 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 3663 SDLoc DL(Node); 3664 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32), 3665 CurDAG->getTargetConstant(Shifter, DL, MVT::i32) }; 3666 CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops); 3667 return; 3668 } 3669 case ISD::INTRINSIC_W_CHAIN: { 3670 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); 3671 switch (IntNo) { 3672 default: 3673 break; 3674 case Intrinsic::aarch64_ldaxp: 3675 case Intrinsic::aarch64_ldxp: { 3676 unsigned Op = 3677 IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX; 3678 SDValue MemAddr = Node->getOperand(2); 3679 SDLoc DL(Node); 3680 SDValue Chain = Node->getOperand(0); 3681 3682 SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64, 3683 MVT::Other, MemAddr, Chain); 3684 3685 // Transfer memoperands. 3686 MachineMemOperand *MemOp = 3687 cast<MemIntrinsicSDNode>(Node)->getMemOperand(); 3688 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp}); 3689 ReplaceNode(Node, Ld); 3690 return; 3691 } 3692 case Intrinsic::aarch64_stlxp: 3693 case Intrinsic::aarch64_stxp: { 3694 unsigned Op = 3695 IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX; 3696 SDLoc DL(Node); 3697 SDValue Chain = Node->getOperand(0); 3698 SDValue ValLo = Node->getOperand(2); 3699 SDValue ValHi = Node->getOperand(3); 3700 SDValue MemAddr = Node->getOperand(4); 3701 3702 // Place arguments in the right order. 3703 SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain}; 3704 3705 SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops); 3706 // Transfer memoperands. 3707 MachineMemOperand *MemOp = 3708 cast<MemIntrinsicSDNode>(Node)->getMemOperand(); 3709 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 3710 3711 ReplaceNode(Node, St); 3712 return; 3713 } 3714 case Intrinsic::aarch64_neon_ld1x2: 3715 if (VT == MVT::v8i8) { 3716 SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0); 3717 return; 3718 } else if (VT == MVT::v16i8) { 3719 SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0); 3720 return; 3721 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 3722 SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0); 3723 return; 3724 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 3725 SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0); 3726 return; 3727 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3728 SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0); 3729 return; 3730 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3731 SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0); 3732 return; 3733 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3734 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0); 3735 return; 3736 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3737 SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0); 3738 return; 3739 } 3740 break; 3741 case Intrinsic::aarch64_neon_ld1x3: 3742 if (VT == MVT::v8i8) { 3743 SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0); 3744 return; 3745 } else if (VT == MVT::v16i8) { 3746 SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0); 3747 return; 3748 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 3749 SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0); 3750 return; 3751 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 3752 SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0); 3753 return; 3754 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3755 SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0); 3756 return; 3757 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3758 SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0); 3759 return; 3760 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3761 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0); 3762 return; 3763 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3764 SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0); 3765 return; 3766 } 3767 break; 3768 case Intrinsic::aarch64_neon_ld1x4: 3769 if (VT == MVT::v8i8) { 3770 SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0); 3771 return; 3772 } else if (VT == MVT::v16i8) { 3773 SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0); 3774 return; 3775 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 3776 SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0); 3777 return; 3778 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 3779 SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0); 3780 return; 3781 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3782 SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0); 3783 return; 3784 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3785 SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0); 3786 return; 3787 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3788 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0); 3789 return; 3790 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3791 SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0); 3792 return; 3793 } 3794 break; 3795 case Intrinsic::aarch64_neon_ld2: 3796 if (VT == MVT::v8i8) { 3797 SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0); 3798 return; 3799 } else if (VT == MVT::v16i8) { 3800 SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0); 3801 return; 3802 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 3803 SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0); 3804 return; 3805 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 3806 SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0); 3807 return; 3808 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3809 SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0); 3810 return; 3811 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3812 SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0); 3813 return; 3814 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3815 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0); 3816 return; 3817 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3818 SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0); 3819 return; 3820 } 3821 break; 3822 case Intrinsic::aarch64_neon_ld3: 3823 if (VT == MVT::v8i8) { 3824 SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0); 3825 return; 3826 } else if (VT == MVT::v16i8) { 3827 SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0); 3828 return; 3829 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 3830 SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0); 3831 return; 3832 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 3833 SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0); 3834 return; 3835 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3836 SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0); 3837 return; 3838 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3839 SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0); 3840 return; 3841 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3842 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0); 3843 return; 3844 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3845 SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0); 3846 return; 3847 } 3848 break; 3849 case Intrinsic::aarch64_neon_ld4: 3850 if (VT == MVT::v8i8) { 3851 SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0); 3852 return; 3853 } else if (VT == MVT::v16i8) { 3854 SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0); 3855 return; 3856 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 3857 SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0); 3858 return; 3859 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 3860 SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0); 3861 return; 3862 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3863 SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0); 3864 return; 3865 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3866 SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0); 3867 return; 3868 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3869 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0); 3870 return; 3871 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3872 SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0); 3873 return; 3874 } 3875 break; 3876 case Intrinsic::aarch64_neon_ld2r: 3877 if (VT == MVT::v8i8) { 3878 SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0); 3879 return; 3880 } else if (VT == MVT::v16i8) { 3881 SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0); 3882 return; 3883 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 3884 SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0); 3885 return; 3886 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 3887 SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0); 3888 return; 3889 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3890 SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0); 3891 return; 3892 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3893 SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0); 3894 return; 3895 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3896 SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0); 3897 return; 3898 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3899 SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0); 3900 return; 3901 } 3902 break; 3903 case Intrinsic::aarch64_neon_ld3r: 3904 if (VT == MVT::v8i8) { 3905 SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0); 3906 return; 3907 } else if (VT == MVT::v16i8) { 3908 SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0); 3909 return; 3910 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 3911 SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0); 3912 return; 3913 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 3914 SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0); 3915 return; 3916 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3917 SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0); 3918 return; 3919 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3920 SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0); 3921 return; 3922 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3923 SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0); 3924 return; 3925 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3926 SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0); 3927 return; 3928 } 3929 break; 3930 case Intrinsic::aarch64_neon_ld4r: 3931 if (VT == MVT::v8i8) { 3932 SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0); 3933 return; 3934 } else if (VT == MVT::v16i8) { 3935 SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0); 3936 return; 3937 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 3938 SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0); 3939 return; 3940 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 3941 SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0); 3942 return; 3943 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3944 SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0); 3945 return; 3946 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3947 SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0); 3948 return; 3949 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3950 SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0); 3951 return; 3952 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3953 SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0); 3954 return; 3955 } 3956 break; 3957 case Intrinsic::aarch64_neon_ld2lane: 3958 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 3959 SelectLoadLane(Node, 2, AArch64::LD2i8); 3960 return; 3961 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 3962 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 3963 SelectLoadLane(Node, 2, AArch64::LD2i16); 3964 return; 3965 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 3966 VT == MVT::v2f32) { 3967 SelectLoadLane(Node, 2, AArch64::LD2i32); 3968 return; 3969 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 3970 VT == MVT::v1f64) { 3971 SelectLoadLane(Node, 2, AArch64::LD2i64); 3972 return; 3973 } 3974 break; 3975 case Intrinsic::aarch64_neon_ld3lane: 3976 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 3977 SelectLoadLane(Node, 3, AArch64::LD3i8); 3978 return; 3979 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 3980 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 3981 SelectLoadLane(Node, 3, AArch64::LD3i16); 3982 return; 3983 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 3984 VT == MVT::v2f32) { 3985 SelectLoadLane(Node, 3, AArch64::LD3i32); 3986 return; 3987 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 3988 VT == MVT::v1f64) { 3989 SelectLoadLane(Node, 3, AArch64::LD3i64); 3990 return; 3991 } 3992 break; 3993 case Intrinsic::aarch64_neon_ld4lane: 3994 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 3995 SelectLoadLane(Node, 4, AArch64::LD4i8); 3996 return; 3997 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 3998 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 3999 SelectLoadLane(Node, 4, AArch64::LD4i16); 4000 return; 4001 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4002 VT == MVT::v2f32) { 4003 SelectLoadLane(Node, 4, AArch64::LD4i32); 4004 return; 4005 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4006 VT == MVT::v1f64) { 4007 SelectLoadLane(Node, 4, AArch64::LD4i64); 4008 return; 4009 } 4010 break; 4011 case Intrinsic::aarch64_ld64b: 4012 SelectLoad(Node, 8, AArch64::LD64B, AArch64::x8sub_0); 4013 return; 4014 case Intrinsic::aarch64_sve_ld2_sret: { 4015 if (VT == MVT::nxv16i8) { 4016 SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B, 4017 true); 4018 return; 4019 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 4020 VT == MVT::nxv8bf16) { 4021 SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H, 4022 true); 4023 return; 4024 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 4025 SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W, 4026 true); 4027 return; 4028 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 4029 SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D, 4030 true); 4031 return; 4032 } 4033 break; 4034 } 4035 case Intrinsic::aarch64_sve_ld3_sret: { 4036 if (VT == MVT::nxv16i8) { 4037 SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B, 4038 true); 4039 return; 4040 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 4041 VT == MVT::nxv8bf16) { 4042 SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H, 4043 true); 4044 return; 4045 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 4046 SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W, 4047 true); 4048 return; 4049 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 4050 SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D, 4051 true); 4052 return; 4053 } 4054 break; 4055 } 4056 case Intrinsic::aarch64_sve_ld4_sret: { 4057 if (VT == MVT::nxv16i8) { 4058 SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B, 4059 true); 4060 return; 4061 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 4062 VT == MVT::nxv8bf16) { 4063 SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H, 4064 true); 4065 return; 4066 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 4067 SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W, 4068 true); 4069 return; 4070 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 4071 SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D, 4072 true); 4073 return; 4074 } 4075 break; 4076 } 4077 case Intrinsic::swift_async_context_addr: { 4078 SDLoc DL(Node); 4079 SDValue Chain = Node->getOperand(0); 4080 SDValue CopyFP = CurDAG->getCopyFromReg(Chain, DL, AArch64::FP, MVT::i64); 4081 SDValue Res = SDValue( 4082 CurDAG->getMachineNode(AArch64::SUBXri, DL, MVT::i64, CopyFP, 4083 CurDAG->getTargetConstant(8, DL, MVT::i32), 4084 CurDAG->getTargetConstant(0, DL, MVT::i32)), 4085 0); 4086 ReplaceUses(SDValue(Node, 0), Res); 4087 ReplaceUses(SDValue(Node, 1), CopyFP.getValue(1)); 4088 CurDAG->RemoveDeadNode(Node); 4089 4090 auto &MF = CurDAG->getMachineFunction(); 4091 MF.getFrameInfo().setFrameAddressIsTaken(true); 4092 MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true); 4093 return; 4094 } 4095 } 4096 } break; 4097 case ISD::INTRINSIC_WO_CHAIN: { 4098 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue(); 4099 switch (IntNo) { 4100 default: 4101 break; 4102 case Intrinsic::aarch64_tagp: 4103 SelectTagP(Node); 4104 return; 4105 case Intrinsic::aarch64_neon_tbl2: 4106 SelectTable(Node, 2, 4107 VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two, 4108 false); 4109 return; 4110 case Intrinsic::aarch64_neon_tbl3: 4111 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three 4112 : AArch64::TBLv16i8Three, 4113 false); 4114 return; 4115 case Intrinsic::aarch64_neon_tbl4: 4116 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four 4117 : AArch64::TBLv16i8Four, 4118 false); 4119 return; 4120 case Intrinsic::aarch64_neon_tbx2: 4121 SelectTable(Node, 2, 4122 VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two, 4123 true); 4124 return; 4125 case Intrinsic::aarch64_neon_tbx3: 4126 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three 4127 : AArch64::TBXv16i8Three, 4128 true); 4129 return; 4130 case Intrinsic::aarch64_neon_tbx4: 4131 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four 4132 : AArch64::TBXv16i8Four, 4133 true); 4134 return; 4135 case Intrinsic::aarch64_neon_smull: 4136 case Intrinsic::aarch64_neon_umull: 4137 if (tryMULLV64LaneV128(IntNo, Node)) 4138 return; 4139 break; 4140 } 4141 break; 4142 } 4143 case ISD::INTRINSIC_VOID: { 4144 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); 4145 if (Node->getNumOperands() >= 3) 4146 VT = Node->getOperand(2)->getValueType(0); 4147 switch (IntNo) { 4148 default: 4149 break; 4150 case Intrinsic::aarch64_neon_st1x2: { 4151 if (VT == MVT::v8i8) { 4152 SelectStore(Node, 2, AArch64::ST1Twov8b); 4153 return; 4154 } else if (VT == MVT::v16i8) { 4155 SelectStore(Node, 2, AArch64::ST1Twov16b); 4156 return; 4157 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 4158 VT == MVT::v4bf16) { 4159 SelectStore(Node, 2, AArch64::ST1Twov4h); 4160 return; 4161 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 4162 VT == MVT::v8bf16) { 4163 SelectStore(Node, 2, AArch64::ST1Twov8h); 4164 return; 4165 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4166 SelectStore(Node, 2, AArch64::ST1Twov2s); 4167 return; 4168 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4169 SelectStore(Node, 2, AArch64::ST1Twov4s); 4170 return; 4171 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4172 SelectStore(Node, 2, AArch64::ST1Twov2d); 4173 return; 4174 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4175 SelectStore(Node, 2, AArch64::ST1Twov1d); 4176 return; 4177 } 4178 break; 4179 } 4180 case Intrinsic::aarch64_neon_st1x3: { 4181 if (VT == MVT::v8i8) { 4182 SelectStore(Node, 3, AArch64::ST1Threev8b); 4183 return; 4184 } else if (VT == MVT::v16i8) { 4185 SelectStore(Node, 3, AArch64::ST1Threev16b); 4186 return; 4187 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 4188 VT == MVT::v4bf16) { 4189 SelectStore(Node, 3, AArch64::ST1Threev4h); 4190 return; 4191 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 4192 VT == MVT::v8bf16) { 4193 SelectStore(Node, 3, AArch64::ST1Threev8h); 4194 return; 4195 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4196 SelectStore(Node, 3, AArch64::ST1Threev2s); 4197 return; 4198 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4199 SelectStore(Node, 3, AArch64::ST1Threev4s); 4200 return; 4201 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4202 SelectStore(Node, 3, AArch64::ST1Threev2d); 4203 return; 4204 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4205 SelectStore(Node, 3, AArch64::ST1Threev1d); 4206 return; 4207 } 4208 break; 4209 } 4210 case Intrinsic::aarch64_neon_st1x4: { 4211 if (VT == MVT::v8i8) { 4212 SelectStore(Node, 4, AArch64::ST1Fourv8b); 4213 return; 4214 } else if (VT == MVT::v16i8) { 4215 SelectStore(Node, 4, AArch64::ST1Fourv16b); 4216 return; 4217 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 4218 VT == MVT::v4bf16) { 4219 SelectStore(Node, 4, AArch64::ST1Fourv4h); 4220 return; 4221 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 4222 VT == MVT::v8bf16) { 4223 SelectStore(Node, 4, AArch64::ST1Fourv8h); 4224 return; 4225 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4226 SelectStore(Node, 4, AArch64::ST1Fourv2s); 4227 return; 4228 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4229 SelectStore(Node, 4, AArch64::ST1Fourv4s); 4230 return; 4231 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4232 SelectStore(Node, 4, AArch64::ST1Fourv2d); 4233 return; 4234 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4235 SelectStore(Node, 4, AArch64::ST1Fourv1d); 4236 return; 4237 } 4238 break; 4239 } 4240 case Intrinsic::aarch64_neon_st2: { 4241 if (VT == MVT::v8i8) { 4242 SelectStore(Node, 2, AArch64::ST2Twov8b); 4243 return; 4244 } else if (VT == MVT::v16i8) { 4245 SelectStore(Node, 2, AArch64::ST2Twov16b); 4246 return; 4247 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 4248 VT == MVT::v4bf16) { 4249 SelectStore(Node, 2, AArch64::ST2Twov4h); 4250 return; 4251 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 4252 VT == MVT::v8bf16) { 4253 SelectStore(Node, 2, AArch64::ST2Twov8h); 4254 return; 4255 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4256 SelectStore(Node, 2, AArch64::ST2Twov2s); 4257 return; 4258 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4259 SelectStore(Node, 2, AArch64::ST2Twov4s); 4260 return; 4261 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4262 SelectStore(Node, 2, AArch64::ST2Twov2d); 4263 return; 4264 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4265 SelectStore(Node, 2, AArch64::ST1Twov1d); 4266 return; 4267 } 4268 break; 4269 } 4270 case Intrinsic::aarch64_neon_st3: { 4271 if (VT == MVT::v8i8) { 4272 SelectStore(Node, 3, AArch64::ST3Threev8b); 4273 return; 4274 } else if (VT == MVT::v16i8) { 4275 SelectStore(Node, 3, AArch64::ST3Threev16b); 4276 return; 4277 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 4278 VT == MVT::v4bf16) { 4279 SelectStore(Node, 3, AArch64::ST3Threev4h); 4280 return; 4281 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 4282 VT == MVT::v8bf16) { 4283 SelectStore(Node, 3, AArch64::ST3Threev8h); 4284 return; 4285 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4286 SelectStore(Node, 3, AArch64::ST3Threev2s); 4287 return; 4288 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4289 SelectStore(Node, 3, AArch64::ST3Threev4s); 4290 return; 4291 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4292 SelectStore(Node, 3, AArch64::ST3Threev2d); 4293 return; 4294 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4295 SelectStore(Node, 3, AArch64::ST1Threev1d); 4296 return; 4297 } 4298 break; 4299 } 4300 case Intrinsic::aarch64_neon_st4: { 4301 if (VT == MVT::v8i8) { 4302 SelectStore(Node, 4, AArch64::ST4Fourv8b); 4303 return; 4304 } else if (VT == MVT::v16i8) { 4305 SelectStore(Node, 4, AArch64::ST4Fourv16b); 4306 return; 4307 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 4308 VT == MVT::v4bf16) { 4309 SelectStore(Node, 4, AArch64::ST4Fourv4h); 4310 return; 4311 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 4312 VT == MVT::v8bf16) { 4313 SelectStore(Node, 4, AArch64::ST4Fourv8h); 4314 return; 4315 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4316 SelectStore(Node, 4, AArch64::ST4Fourv2s); 4317 return; 4318 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4319 SelectStore(Node, 4, AArch64::ST4Fourv4s); 4320 return; 4321 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4322 SelectStore(Node, 4, AArch64::ST4Fourv2d); 4323 return; 4324 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4325 SelectStore(Node, 4, AArch64::ST1Fourv1d); 4326 return; 4327 } 4328 break; 4329 } 4330 case Intrinsic::aarch64_neon_st2lane: { 4331 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4332 SelectStoreLane(Node, 2, AArch64::ST2i8); 4333 return; 4334 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4335 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4336 SelectStoreLane(Node, 2, AArch64::ST2i16); 4337 return; 4338 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4339 VT == MVT::v2f32) { 4340 SelectStoreLane(Node, 2, AArch64::ST2i32); 4341 return; 4342 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4343 VT == MVT::v1f64) { 4344 SelectStoreLane(Node, 2, AArch64::ST2i64); 4345 return; 4346 } 4347 break; 4348 } 4349 case Intrinsic::aarch64_neon_st3lane: { 4350 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4351 SelectStoreLane(Node, 3, AArch64::ST3i8); 4352 return; 4353 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4354 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4355 SelectStoreLane(Node, 3, AArch64::ST3i16); 4356 return; 4357 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4358 VT == MVT::v2f32) { 4359 SelectStoreLane(Node, 3, AArch64::ST3i32); 4360 return; 4361 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4362 VT == MVT::v1f64) { 4363 SelectStoreLane(Node, 3, AArch64::ST3i64); 4364 return; 4365 } 4366 break; 4367 } 4368 case Intrinsic::aarch64_neon_st4lane: { 4369 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4370 SelectStoreLane(Node, 4, AArch64::ST4i8); 4371 return; 4372 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4373 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4374 SelectStoreLane(Node, 4, AArch64::ST4i16); 4375 return; 4376 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4377 VT == MVT::v2f32) { 4378 SelectStoreLane(Node, 4, AArch64::ST4i32); 4379 return; 4380 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4381 VT == MVT::v1f64) { 4382 SelectStoreLane(Node, 4, AArch64::ST4i64); 4383 return; 4384 } 4385 break; 4386 } 4387 case Intrinsic::aarch64_sve_st2: { 4388 if (VT == MVT::nxv16i8) { 4389 SelectPredicatedStore(Node, 2, 0, AArch64::ST2B, AArch64::ST2B_IMM); 4390 return; 4391 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 4392 VT == MVT::nxv8bf16) { 4393 SelectPredicatedStore(Node, 2, 1, AArch64::ST2H, AArch64::ST2H_IMM); 4394 return; 4395 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 4396 SelectPredicatedStore(Node, 2, 2, AArch64::ST2W, AArch64::ST2W_IMM); 4397 return; 4398 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 4399 SelectPredicatedStore(Node, 2, 3, AArch64::ST2D, AArch64::ST2D_IMM); 4400 return; 4401 } 4402 break; 4403 } 4404 case Intrinsic::aarch64_sve_st3: { 4405 if (VT == MVT::nxv16i8) { 4406 SelectPredicatedStore(Node, 3, 0, AArch64::ST3B, AArch64::ST3B_IMM); 4407 return; 4408 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 4409 VT == MVT::nxv8bf16) { 4410 SelectPredicatedStore(Node, 3, 1, AArch64::ST3H, AArch64::ST3H_IMM); 4411 return; 4412 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 4413 SelectPredicatedStore(Node, 3, 2, AArch64::ST3W, AArch64::ST3W_IMM); 4414 return; 4415 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 4416 SelectPredicatedStore(Node, 3, 3, AArch64::ST3D, AArch64::ST3D_IMM); 4417 return; 4418 } 4419 break; 4420 } 4421 case Intrinsic::aarch64_sve_st4: { 4422 if (VT == MVT::nxv16i8) { 4423 SelectPredicatedStore(Node, 4, 0, AArch64::ST4B, AArch64::ST4B_IMM); 4424 return; 4425 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 4426 VT == MVT::nxv8bf16) { 4427 SelectPredicatedStore(Node, 4, 1, AArch64::ST4H, AArch64::ST4H_IMM); 4428 return; 4429 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 4430 SelectPredicatedStore(Node, 4, 2, AArch64::ST4W, AArch64::ST4W_IMM); 4431 return; 4432 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 4433 SelectPredicatedStore(Node, 4, 3, AArch64::ST4D, AArch64::ST4D_IMM); 4434 return; 4435 } 4436 break; 4437 } 4438 } 4439 break; 4440 } 4441 case AArch64ISD::LD2post: { 4442 if (VT == MVT::v8i8) { 4443 SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0); 4444 return; 4445 } else if (VT == MVT::v16i8) { 4446 SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0); 4447 return; 4448 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4449 SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0); 4450 return; 4451 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4452 SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0); 4453 return; 4454 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4455 SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0); 4456 return; 4457 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4458 SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0); 4459 return; 4460 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4461 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0); 4462 return; 4463 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4464 SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0); 4465 return; 4466 } 4467 break; 4468 } 4469 case AArch64ISD::LD3post: { 4470 if (VT == MVT::v8i8) { 4471 SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0); 4472 return; 4473 } else if (VT == MVT::v16i8) { 4474 SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0); 4475 return; 4476 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4477 SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0); 4478 return; 4479 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4480 SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0); 4481 return; 4482 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4483 SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0); 4484 return; 4485 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4486 SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0); 4487 return; 4488 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4489 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0); 4490 return; 4491 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4492 SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0); 4493 return; 4494 } 4495 break; 4496 } 4497 case AArch64ISD::LD4post: { 4498 if (VT == MVT::v8i8) { 4499 SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0); 4500 return; 4501 } else if (VT == MVT::v16i8) { 4502 SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0); 4503 return; 4504 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4505 SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0); 4506 return; 4507 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4508 SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0); 4509 return; 4510 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4511 SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0); 4512 return; 4513 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4514 SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0); 4515 return; 4516 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4517 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0); 4518 return; 4519 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4520 SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0); 4521 return; 4522 } 4523 break; 4524 } 4525 case AArch64ISD::LD1x2post: { 4526 if (VT == MVT::v8i8) { 4527 SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0); 4528 return; 4529 } else if (VT == MVT::v16i8) { 4530 SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0); 4531 return; 4532 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4533 SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0); 4534 return; 4535 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4536 SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0); 4537 return; 4538 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4539 SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0); 4540 return; 4541 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4542 SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0); 4543 return; 4544 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4545 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0); 4546 return; 4547 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4548 SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0); 4549 return; 4550 } 4551 break; 4552 } 4553 case AArch64ISD::LD1x3post: { 4554 if (VT == MVT::v8i8) { 4555 SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0); 4556 return; 4557 } else if (VT == MVT::v16i8) { 4558 SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0); 4559 return; 4560 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4561 SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0); 4562 return; 4563 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4564 SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0); 4565 return; 4566 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4567 SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0); 4568 return; 4569 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4570 SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0); 4571 return; 4572 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4573 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0); 4574 return; 4575 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4576 SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0); 4577 return; 4578 } 4579 break; 4580 } 4581 case AArch64ISD::LD1x4post: { 4582 if (VT == MVT::v8i8) { 4583 SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0); 4584 return; 4585 } else if (VT == MVT::v16i8) { 4586 SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0); 4587 return; 4588 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4589 SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0); 4590 return; 4591 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4592 SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0); 4593 return; 4594 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4595 SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0); 4596 return; 4597 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4598 SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0); 4599 return; 4600 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4601 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0); 4602 return; 4603 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4604 SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0); 4605 return; 4606 } 4607 break; 4608 } 4609 case AArch64ISD::LD1DUPpost: { 4610 if (VT == MVT::v8i8) { 4611 SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0); 4612 return; 4613 } else if (VT == MVT::v16i8) { 4614 SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0); 4615 return; 4616 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4617 SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0); 4618 return; 4619 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4620 SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0); 4621 return; 4622 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4623 SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0); 4624 return; 4625 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4626 SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0); 4627 return; 4628 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4629 SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0); 4630 return; 4631 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4632 SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0); 4633 return; 4634 } 4635 break; 4636 } 4637 case AArch64ISD::LD2DUPpost: { 4638 if (VT == MVT::v8i8) { 4639 SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0); 4640 return; 4641 } else if (VT == MVT::v16i8) { 4642 SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0); 4643 return; 4644 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4645 SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0); 4646 return; 4647 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4648 SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0); 4649 return; 4650 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4651 SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0); 4652 return; 4653 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4654 SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0); 4655 return; 4656 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4657 SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0); 4658 return; 4659 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4660 SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0); 4661 return; 4662 } 4663 break; 4664 } 4665 case AArch64ISD::LD3DUPpost: { 4666 if (VT == MVT::v8i8) { 4667 SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0); 4668 return; 4669 } else if (VT == MVT::v16i8) { 4670 SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0); 4671 return; 4672 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4673 SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0); 4674 return; 4675 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4676 SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0); 4677 return; 4678 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4679 SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0); 4680 return; 4681 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4682 SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0); 4683 return; 4684 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4685 SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0); 4686 return; 4687 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4688 SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0); 4689 return; 4690 } 4691 break; 4692 } 4693 case AArch64ISD::LD4DUPpost: { 4694 if (VT == MVT::v8i8) { 4695 SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0); 4696 return; 4697 } else if (VT == MVT::v16i8) { 4698 SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0); 4699 return; 4700 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4701 SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0); 4702 return; 4703 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4704 SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0); 4705 return; 4706 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4707 SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0); 4708 return; 4709 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4710 SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0); 4711 return; 4712 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4713 SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0); 4714 return; 4715 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4716 SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0); 4717 return; 4718 } 4719 break; 4720 } 4721 case AArch64ISD::LD1LANEpost: { 4722 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4723 SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST); 4724 return; 4725 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4726 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4727 SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST); 4728 return; 4729 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4730 VT == MVT::v2f32) { 4731 SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST); 4732 return; 4733 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4734 VT == MVT::v1f64) { 4735 SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST); 4736 return; 4737 } 4738 break; 4739 } 4740 case AArch64ISD::LD2LANEpost: { 4741 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4742 SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST); 4743 return; 4744 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4745 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4746 SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST); 4747 return; 4748 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4749 VT == MVT::v2f32) { 4750 SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST); 4751 return; 4752 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4753 VT == MVT::v1f64) { 4754 SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST); 4755 return; 4756 } 4757 break; 4758 } 4759 case AArch64ISD::LD3LANEpost: { 4760 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4761 SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST); 4762 return; 4763 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4764 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4765 SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST); 4766 return; 4767 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4768 VT == MVT::v2f32) { 4769 SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST); 4770 return; 4771 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4772 VT == MVT::v1f64) { 4773 SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST); 4774 return; 4775 } 4776 break; 4777 } 4778 case AArch64ISD::LD4LANEpost: { 4779 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4780 SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST); 4781 return; 4782 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4783 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4784 SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST); 4785 return; 4786 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4787 VT == MVT::v2f32) { 4788 SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST); 4789 return; 4790 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4791 VT == MVT::v1f64) { 4792 SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST); 4793 return; 4794 } 4795 break; 4796 } 4797 case AArch64ISD::ST2post: { 4798 VT = Node->getOperand(1).getValueType(); 4799 if (VT == MVT::v8i8) { 4800 SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST); 4801 return; 4802 } else if (VT == MVT::v16i8) { 4803 SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST); 4804 return; 4805 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4806 SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST); 4807 return; 4808 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4809 SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST); 4810 return; 4811 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4812 SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST); 4813 return; 4814 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4815 SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST); 4816 return; 4817 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4818 SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST); 4819 return; 4820 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4821 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST); 4822 return; 4823 } 4824 break; 4825 } 4826 case AArch64ISD::ST3post: { 4827 VT = Node->getOperand(1).getValueType(); 4828 if (VT == MVT::v8i8) { 4829 SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST); 4830 return; 4831 } else if (VT == MVT::v16i8) { 4832 SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST); 4833 return; 4834 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4835 SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST); 4836 return; 4837 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4838 SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST); 4839 return; 4840 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4841 SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST); 4842 return; 4843 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4844 SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST); 4845 return; 4846 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4847 SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST); 4848 return; 4849 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4850 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST); 4851 return; 4852 } 4853 break; 4854 } 4855 case AArch64ISD::ST4post: { 4856 VT = Node->getOperand(1).getValueType(); 4857 if (VT == MVT::v8i8) { 4858 SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST); 4859 return; 4860 } else if (VT == MVT::v16i8) { 4861 SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST); 4862 return; 4863 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4864 SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST); 4865 return; 4866 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4867 SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST); 4868 return; 4869 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4870 SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST); 4871 return; 4872 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4873 SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST); 4874 return; 4875 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4876 SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST); 4877 return; 4878 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4879 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST); 4880 return; 4881 } 4882 break; 4883 } 4884 case AArch64ISD::ST1x2post: { 4885 VT = Node->getOperand(1).getValueType(); 4886 if (VT == MVT::v8i8) { 4887 SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST); 4888 return; 4889 } else if (VT == MVT::v16i8) { 4890 SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST); 4891 return; 4892 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4893 SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST); 4894 return; 4895 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4896 SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST); 4897 return; 4898 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4899 SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST); 4900 return; 4901 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4902 SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST); 4903 return; 4904 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4905 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST); 4906 return; 4907 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4908 SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST); 4909 return; 4910 } 4911 break; 4912 } 4913 case AArch64ISD::ST1x3post: { 4914 VT = Node->getOperand(1).getValueType(); 4915 if (VT == MVT::v8i8) { 4916 SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST); 4917 return; 4918 } else if (VT == MVT::v16i8) { 4919 SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST); 4920 return; 4921 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4922 SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST); 4923 return; 4924 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16 ) { 4925 SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST); 4926 return; 4927 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4928 SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST); 4929 return; 4930 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4931 SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST); 4932 return; 4933 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4934 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST); 4935 return; 4936 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4937 SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST); 4938 return; 4939 } 4940 break; 4941 } 4942 case AArch64ISD::ST1x4post: { 4943 VT = Node->getOperand(1).getValueType(); 4944 if (VT == MVT::v8i8) { 4945 SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST); 4946 return; 4947 } else if (VT == MVT::v16i8) { 4948 SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST); 4949 return; 4950 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4951 SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST); 4952 return; 4953 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4954 SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST); 4955 return; 4956 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4957 SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST); 4958 return; 4959 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4960 SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST); 4961 return; 4962 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4963 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST); 4964 return; 4965 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4966 SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST); 4967 return; 4968 } 4969 break; 4970 } 4971 case AArch64ISD::ST2LANEpost: { 4972 VT = Node->getOperand(1).getValueType(); 4973 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4974 SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST); 4975 return; 4976 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4977 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4978 SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST); 4979 return; 4980 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4981 VT == MVT::v2f32) { 4982 SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST); 4983 return; 4984 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4985 VT == MVT::v1f64) { 4986 SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST); 4987 return; 4988 } 4989 break; 4990 } 4991 case AArch64ISD::ST3LANEpost: { 4992 VT = Node->getOperand(1).getValueType(); 4993 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4994 SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST); 4995 return; 4996 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4997 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4998 SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST); 4999 return; 5000 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 5001 VT == MVT::v2f32) { 5002 SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST); 5003 return; 5004 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 5005 VT == MVT::v1f64) { 5006 SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST); 5007 return; 5008 } 5009 break; 5010 } 5011 case AArch64ISD::ST4LANEpost: { 5012 VT = Node->getOperand(1).getValueType(); 5013 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 5014 SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST); 5015 return; 5016 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 5017 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 5018 SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST); 5019 return; 5020 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 5021 VT == MVT::v2f32) { 5022 SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST); 5023 return; 5024 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 5025 VT == MVT::v1f64) { 5026 SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST); 5027 return; 5028 } 5029 break; 5030 } 5031 case AArch64ISD::SVE_LD2_MERGE_ZERO: { 5032 if (VT == MVT::nxv16i8) { 5033 SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B); 5034 return; 5035 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 5036 VT == MVT::nxv8bf16) { 5037 SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H); 5038 return; 5039 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 5040 SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W); 5041 return; 5042 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 5043 SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D); 5044 return; 5045 } 5046 break; 5047 } 5048 case AArch64ISD::SVE_LD3_MERGE_ZERO: { 5049 if (VT == MVT::nxv16i8) { 5050 SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B); 5051 return; 5052 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 5053 VT == MVT::nxv8bf16) { 5054 SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H); 5055 return; 5056 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 5057 SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W); 5058 return; 5059 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 5060 SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D); 5061 return; 5062 } 5063 break; 5064 } 5065 case AArch64ISD::SVE_LD4_MERGE_ZERO: { 5066 if (VT == MVT::nxv16i8) { 5067 SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B); 5068 return; 5069 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 5070 VT == MVT::nxv8bf16) { 5071 SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H); 5072 return; 5073 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 5074 SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W); 5075 return; 5076 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 5077 SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D); 5078 return; 5079 } 5080 break; 5081 } 5082 } 5083 5084 // Select the default instruction 5085 SelectCode(Node); 5086 } 5087 5088 /// createAArch64ISelDag - This pass converts a legalized DAG into a 5089 /// AArch64-specific DAG, ready for instruction scheduling. 5090 FunctionPass *llvm::createAArch64ISelDag(AArch64TargetMachine &TM, 5091 CodeGenOpt::Level OptLevel) { 5092 return new AArch64DAGToDAGISel(TM, OptLevel); 5093 } 5094 5095 /// When \p PredVT is a scalable vector predicate in the form 5096 /// MVT::nx<M>xi1, it builds the correspondent scalable vector of 5097 /// integers MVT::nx<M>xi<bits> s.t. M x bits = 128. When targeting 5098 /// structured vectors (NumVec >1), the output data type is 5099 /// MVT::nx<M*NumVec>xi<bits> s.t. M x bits = 128. If the input 5100 /// PredVT is not in the form MVT::nx<M>xi1, it returns an invalid 5101 /// EVT. 5102 static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT, 5103 unsigned NumVec) { 5104 assert(NumVec > 0 && NumVec < 5 && "Invalid number of vectors."); 5105 if (!PredVT.isScalableVector() || PredVT.getVectorElementType() != MVT::i1) 5106 return EVT(); 5107 5108 if (PredVT != MVT::nxv16i1 && PredVT != MVT::nxv8i1 && 5109 PredVT != MVT::nxv4i1 && PredVT != MVT::nxv2i1) 5110 return EVT(); 5111 5112 ElementCount EC = PredVT.getVectorElementCount(); 5113 EVT ScalarVT = 5114 EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.getKnownMinValue()); 5115 EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, EC * NumVec); 5116 5117 return MemVT; 5118 } 5119 5120 /// Return the EVT of the data associated to a memory operation in \p 5121 /// Root. If such EVT cannot be retrived, it returns an invalid EVT. 5122 static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root) { 5123 if (isa<MemSDNode>(Root)) 5124 return cast<MemSDNode>(Root)->getMemoryVT(); 5125 5126 if (isa<MemIntrinsicSDNode>(Root)) 5127 return cast<MemIntrinsicSDNode>(Root)->getMemoryVT(); 5128 5129 const unsigned Opcode = Root->getOpcode(); 5130 // For custom ISD nodes, we have to look at them individually to extract the 5131 // type of the data moved to/from memory. 5132 switch (Opcode) { 5133 case AArch64ISD::LD1_MERGE_ZERO: 5134 case AArch64ISD::LD1S_MERGE_ZERO: 5135 case AArch64ISD::LDNF1_MERGE_ZERO: 5136 case AArch64ISD::LDNF1S_MERGE_ZERO: 5137 return cast<VTSDNode>(Root->getOperand(3))->getVT(); 5138 case AArch64ISD::ST1_PRED: 5139 return cast<VTSDNode>(Root->getOperand(4))->getVT(); 5140 case AArch64ISD::SVE_LD2_MERGE_ZERO: 5141 return getPackedVectorTypeFromPredicateType( 5142 Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/2); 5143 case AArch64ISD::SVE_LD3_MERGE_ZERO: 5144 return getPackedVectorTypeFromPredicateType( 5145 Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/3); 5146 case AArch64ISD::SVE_LD4_MERGE_ZERO: 5147 return getPackedVectorTypeFromPredicateType( 5148 Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/4); 5149 default: 5150 break; 5151 } 5152 5153 if (Opcode != ISD::INTRINSIC_VOID) 5154 return EVT(); 5155 5156 const unsigned IntNo = 5157 cast<ConstantSDNode>(Root->getOperand(1))->getZExtValue(); 5158 if (IntNo == Intrinsic::aarch64_sme_ldr || 5159 IntNo == Intrinsic::aarch64_sme_str) 5160 return MVT::nxv16i8; 5161 5162 if (IntNo != Intrinsic::aarch64_sve_prf) 5163 return EVT(); 5164 5165 // We are using an SVE prefetch intrinsic. Type must be inferred 5166 // from the width of the predicate. 5167 return getPackedVectorTypeFromPredicateType( 5168 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/1); 5169 } 5170 5171 /// SelectAddrModeIndexedSVE - Attempt selection of the addressing mode: 5172 /// Base + OffImm * sizeof(MemVT) for Min >= OffImm <= Max 5173 /// where Root is the memory access using N for its address. 5174 template <int64_t Min, int64_t Max> 5175 bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, 5176 SDValue &Base, 5177 SDValue &OffImm) { 5178 const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root); 5179 const DataLayout &DL = CurDAG->getDataLayout(); 5180 const MachineFrameInfo &MFI = MF->getFrameInfo(); 5181 5182 if (N.getOpcode() == ISD::FrameIndex) { 5183 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 5184 // We can only encode VL scaled offsets, so only fold in frame indexes 5185 // referencing SVE objects. 5186 if (FI == 0 || MFI.getStackID(FI) == TargetStackID::ScalableVector) { 5187 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 5188 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64); 5189 return true; 5190 } 5191 5192 return false; 5193 } 5194 5195 if (MemVT == EVT()) 5196 return false; 5197 5198 if (N.getOpcode() != ISD::ADD) 5199 return false; 5200 5201 SDValue VScale = N.getOperand(1); 5202 if (VScale.getOpcode() != ISD::VSCALE) 5203 return false; 5204 5205 TypeSize TS = MemVT.getSizeInBits(); 5206 int64_t MemWidthBytes = static_cast<int64_t>(TS.getKnownMinSize()) / 8; 5207 int64_t MulImm = cast<ConstantSDNode>(VScale.getOperand(0))->getSExtValue(); 5208 5209 if ((MulImm % MemWidthBytes) != 0) 5210 return false; 5211 5212 int64_t Offset = MulImm / MemWidthBytes; 5213 if (Offset < Min || Offset > Max) 5214 return false; 5215 5216 Base = N.getOperand(0); 5217 if (Base.getOpcode() == ISD::FrameIndex) { 5218 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 5219 // We can only encode VL scaled offsets, so only fold in frame indexes 5220 // referencing SVE objects. 5221 if (FI == 0 || MFI.getStackID(FI) == TargetStackID::ScalableVector) 5222 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 5223 } 5224 5225 OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64); 5226 return true; 5227 } 5228 5229 /// Select register plus register addressing mode for SVE, with scaled 5230 /// offset. 5231 bool AArch64DAGToDAGISel::SelectSVERegRegAddrMode(SDValue N, unsigned Scale, 5232 SDValue &Base, 5233 SDValue &Offset) { 5234 if (N.getOpcode() != ISD::ADD) 5235 return false; 5236 5237 // Process an ADD node. 5238 const SDValue LHS = N.getOperand(0); 5239 const SDValue RHS = N.getOperand(1); 5240 5241 // 8 bit data does not come with the SHL node, so it is treated 5242 // separately. 5243 if (Scale == 0) { 5244 Base = LHS; 5245 Offset = RHS; 5246 return true; 5247 } 5248 5249 if (auto C = dyn_cast<ConstantSDNode>(RHS)) { 5250 int64_t ImmOff = C->getSExtValue(); 5251 unsigned Size = 1 << Scale; 5252 5253 // To use the reg+reg addressing mode, the immediate must be a multiple of 5254 // the vector element's byte size. 5255 if (ImmOff % Size) 5256 return false; 5257 5258 SDLoc DL(N); 5259 Base = LHS; 5260 Offset = CurDAG->getTargetConstant(ImmOff >> Scale, DL, MVT::i64); 5261 SDValue Ops[] = {Offset}; 5262 SDNode *MI = CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops); 5263 Offset = SDValue(MI, 0); 5264 return true; 5265 } 5266 5267 // Check if the RHS is a shift node with a constant. 5268 if (RHS.getOpcode() != ISD::SHL) 5269 return false; 5270 5271 const SDValue ShiftRHS = RHS.getOperand(1); 5272 if (auto *C = dyn_cast<ConstantSDNode>(ShiftRHS)) 5273 if (C->getZExtValue() == Scale) { 5274 Base = LHS; 5275 Offset = RHS.getOperand(0); 5276 return true; 5277 } 5278 5279 return false; 5280 } 5281 5282 bool AArch64DAGToDAGISel::SelectAllActivePredicate(SDValue N) { 5283 const AArch64TargetLowering *TLI = 5284 static_cast<const AArch64TargetLowering *>(getTargetLowering()); 5285 5286 return TLI->isAllActivePredicate(*CurDAG, N); 5287 } 5288 5289 bool AArch64DAGToDAGISel::SelectSMETileSlice(SDValue N, unsigned Scale, 5290 SDValue &Base, SDValue &Offset) { 5291 if (N.getOpcode() != ISD::ADD) { 5292 Base = N; 5293 Offset = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64); 5294 return true; 5295 } 5296 5297 // Process an ADD node. 5298 const SDValue LHS = N.getOperand(0); 5299 const SDValue RHS = N.getOperand(1); 5300 5301 if (auto C = dyn_cast<ConstantSDNode>(RHS)) { 5302 int64_t ImmOff = C->getSExtValue(); 5303 unsigned MaxSize = (1 << Scale) - 1; 5304 5305 if (ImmOff < 0 || ImmOff > MaxSize) 5306 return false; 5307 5308 Base = LHS; 5309 Offset = CurDAG->getTargetConstant(ImmOff, SDLoc(N), MVT::i64); 5310 return true; 5311 } 5312 5313 return false; 5314 } 5315