1 //===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines an instruction selector for the AArch64 target. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AArch64TargetMachine.h" 14 #include "MCTargetDesc/AArch64AddressingModes.h" 15 #include "llvm/ADT/APSInt.h" 16 #include "llvm/CodeGen/SelectionDAGISel.h" 17 #include "llvm/IR/Function.h" // To access function attributes. 18 #include "llvm/IR/GlobalValue.h" 19 #include "llvm/IR/Intrinsics.h" 20 #include "llvm/Support/Debug.h" 21 #include "llvm/Support/ErrorHandling.h" 22 #include "llvm/Support/KnownBits.h" 23 #include "llvm/Support/MathExtras.h" 24 #include "llvm/Support/raw_ostream.h" 25 26 using namespace llvm; 27 28 #define DEBUG_TYPE "aarch64-isel" 29 30 //===--------------------------------------------------------------------===// 31 /// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine 32 /// instructions for SelectionDAG operations. 33 /// 34 namespace { 35 36 class AArch64DAGToDAGISel : public SelectionDAGISel { 37 38 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can 39 /// make the right decision when generating code for different targets. 40 const AArch64Subtarget *Subtarget; 41 42 bool ForCodeSize; 43 44 public: 45 explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm, 46 CodeGenOpt::Level OptLevel) 47 : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr), 48 ForCodeSize(false) {} 49 50 StringRef getPassName() const override { 51 return "AArch64 Instruction Selection"; 52 } 53 54 bool runOnMachineFunction(MachineFunction &MF) override { 55 ForCodeSize = MF.getFunction().hasOptSize(); 56 Subtarget = &MF.getSubtarget<AArch64Subtarget>(); 57 return SelectionDAGISel::runOnMachineFunction(MF); 58 } 59 60 void Select(SDNode *Node) override; 61 62 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 63 /// inline asm expressions. 64 bool SelectInlineAsmMemoryOperand(const SDValue &Op, 65 unsigned ConstraintID, 66 std::vector<SDValue> &OutOps) override; 67 68 bool tryMLAV64LaneV128(SDNode *N); 69 bool tryMULLV64LaneV128(unsigned IntNo, SDNode *N); 70 bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift); 71 bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift); 72 bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift); 73 bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) { 74 return SelectShiftedRegister(N, false, Reg, Shift); 75 } 76 bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) { 77 return SelectShiftedRegister(N, true, Reg, Shift); 78 } 79 bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) { 80 return SelectAddrModeIndexed7S(N, 1, Base, OffImm); 81 } 82 bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) { 83 return SelectAddrModeIndexed7S(N, 2, Base, OffImm); 84 } 85 bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) { 86 return SelectAddrModeIndexed7S(N, 4, Base, OffImm); 87 } 88 bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) { 89 return SelectAddrModeIndexed7S(N, 8, Base, OffImm); 90 } 91 bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) { 92 return SelectAddrModeIndexed7S(N, 16, Base, OffImm); 93 } 94 bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) { 95 return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm); 96 } 97 bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) { 98 return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm); 99 } 100 bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) { 101 return SelectAddrModeIndexed(N, 1, Base, OffImm); 102 } 103 bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) { 104 return SelectAddrModeIndexed(N, 2, Base, OffImm); 105 } 106 bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) { 107 return SelectAddrModeIndexed(N, 4, Base, OffImm); 108 } 109 bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) { 110 return SelectAddrModeIndexed(N, 8, Base, OffImm); 111 } 112 bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) { 113 return SelectAddrModeIndexed(N, 16, Base, OffImm); 114 } 115 bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) { 116 return SelectAddrModeUnscaled(N, 1, Base, OffImm); 117 } 118 bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) { 119 return SelectAddrModeUnscaled(N, 2, Base, OffImm); 120 } 121 bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) { 122 return SelectAddrModeUnscaled(N, 4, Base, OffImm); 123 } 124 bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) { 125 return SelectAddrModeUnscaled(N, 8, Base, OffImm); 126 } 127 bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) { 128 return SelectAddrModeUnscaled(N, 16, Base, OffImm); 129 } 130 131 template<int Width> 132 bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset, 133 SDValue &SignExtend, SDValue &DoShift) { 134 return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift); 135 } 136 137 template<int Width> 138 bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset, 139 SDValue &SignExtend, SDValue &DoShift) { 140 return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift); 141 } 142 143 144 /// Form sequences of consecutive 64/128-bit registers for use in NEON 145 /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have 146 /// between 1 and 4 elements. If it contains a single element that is returned 147 /// unchanged; otherwise a REG_SEQUENCE value is returned. 148 SDValue createDTuple(ArrayRef<SDValue> Vecs); 149 SDValue createQTuple(ArrayRef<SDValue> Vecs); 150 151 /// Generic helper for the createDTuple/createQTuple 152 /// functions. Those should almost always be called instead. 153 SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[], 154 const unsigned SubRegs[]); 155 156 void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt); 157 158 bool tryIndexedLoad(SDNode *N); 159 160 bool trySelectStackSlotTagP(SDNode *N); 161 void SelectTagP(SDNode *N); 162 163 void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc, 164 unsigned SubRegIdx); 165 void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc, 166 unsigned SubRegIdx); 167 void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc); 168 void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc); 169 170 void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc); 171 void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc); 172 void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc); 173 void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc); 174 175 bool tryBitfieldExtractOp(SDNode *N); 176 bool tryBitfieldExtractOpFromSExt(SDNode *N); 177 bool tryBitfieldInsertOp(SDNode *N); 178 bool tryBitfieldInsertInZeroOp(SDNode *N); 179 bool tryShiftAmountMod(SDNode *N); 180 181 bool tryReadRegister(SDNode *N); 182 bool tryWriteRegister(SDNode *N); 183 184 // Include the pieces autogenerated from the target description. 185 #include "AArch64GenDAGISel.inc" 186 187 private: 188 bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg, 189 SDValue &Shift); 190 bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base, 191 SDValue &OffImm) { 192 return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm); 193 } 194 bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW, 195 unsigned Size, SDValue &Base, 196 SDValue &OffImm); 197 bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base, 198 SDValue &OffImm); 199 bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base, 200 SDValue &OffImm); 201 bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base, 202 SDValue &Offset, SDValue &SignExtend, 203 SDValue &DoShift); 204 bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base, 205 SDValue &Offset, SDValue &SignExtend, 206 SDValue &DoShift); 207 bool isWorthFolding(SDValue V) const; 208 bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend, 209 SDValue &Offset, SDValue &SignExtend); 210 211 template<unsigned RegWidth> 212 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) { 213 return SelectCVTFixedPosOperand(N, FixedPos, RegWidth); 214 } 215 216 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width); 217 218 bool SelectCMP_SWAP(SDNode *N); 219 220 }; 221 } // end anonymous namespace 222 223 /// isIntImmediate - This method tests to see if the node is a constant 224 /// operand. If so Imm will receive the 32-bit value. 225 static bool isIntImmediate(const SDNode *N, uint64_t &Imm) { 226 if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) { 227 Imm = C->getZExtValue(); 228 return true; 229 } 230 return false; 231 } 232 233 // isIntImmediate - This method tests to see if a constant operand. 234 // If so Imm will receive the value. 235 static bool isIntImmediate(SDValue N, uint64_t &Imm) { 236 return isIntImmediate(N.getNode(), Imm); 237 } 238 239 // isOpcWithIntImmediate - This method tests to see if the node is a specific 240 // opcode and that it has a immediate integer right operand. 241 // If so Imm will receive the 32 bit value. 242 static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, 243 uint64_t &Imm) { 244 return N->getOpcode() == Opc && 245 isIntImmediate(N->getOperand(1).getNode(), Imm); 246 } 247 248 bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand( 249 const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) { 250 switch(ConstraintID) { 251 default: 252 llvm_unreachable("Unexpected asm memory constraint"); 253 case InlineAsm::Constraint_i: 254 case InlineAsm::Constraint_m: 255 case InlineAsm::Constraint_Q: 256 // We need to make sure that this one operand does not end up in XZR, thus 257 // require the address to be in a PointerRegClass register. 258 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo(); 259 const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF); 260 SDLoc dl(Op); 261 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64); 262 SDValue NewOp = 263 SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, 264 dl, Op.getValueType(), 265 Op, RC), 0); 266 OutOps.push_back(NewOp); 267 return false; 268 } 269 return true; 270 } 271 272 /// SelectArithImmed - Select an immediate value that can be represented as 273 /// a 12-bit value shifted left by either 0 or 12. If so, return true with 274 /// Val set to the 12-bit value and Shift set to the shifter operand. 275 bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val, 276 SDValue &Shift) { 277 // This function is called from the addsub_shifted_imm ComplexPattern, 278 // which lists [imm] as the list of opcode it's interested in, however 279 // we still need to check whether the operand is actually an immediate 280 // here because the ComplexPattern opcode list is only used in 281 // root-level opcode matching. 282 if (!isa<ConstantSDNode>(N.getNode())) 283 return false; 284 285 uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue(); 286 unsigned ShiftAmt; 287 288 if (Immed >> 12 == 0) { 289 ShiftAmt = 0; 290 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) { 291 ShiftAmt = 12; 292 Immed = Immed >> 12; 293 } else 294 return false; 295 296 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt); 297 SDLoc dl(N); 298 Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32); 299 Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32); 300 return true; 301 } 302 303 /// SelectNegArithImmed - As above, but negates the value before trying to 304 /// select it. 305 bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val, 306 SDValue &Shift) { 307 // This function is called from the addsub_shifted_imm ComplexPattern, 308 // which lists [imm] as the list of opcode it's interested in, however 309 // we still need to check whether the operand is actually an immediate 310 // here because the ComplexPattern opcode list is only used in 311 // root-level opcode matching. 312 if (!isa<ConstantSDNode>(N.getNode())) 313 return false; 314 315 // The immediate operand must be a 24-bit zero-extended immediate. 316 uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue(); 317 318 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0" 319 // have the opposite effect on the C flag, so this pattern mustn't match under 320 // those circumstances. 321 if (Immed == 0) 322 return false; 323 324 if (N.getValueType() == MVT::i32) 325 Immed = ~((uint32_t)Immed) + 1; 326 else 327 Immed = ~Immed + 1ULL; 328 if (Immed & 0xFFFFFFFFFF000000ULL) 329 return false; 330 331 Immed &= 0xFFFFFFULL; 332 return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val, 333 Shift); 334 } 335 336 /// getShiftTypeForNode - Translate a shift node to the corresponding 337 /// ShiftType value. 338 static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) { 339 switch (N.getOpcode()) { 340 default: 341 return AArch64_AM::InvalidShiftExtend; 342 case ISD::SHL: 343 return AArch64_AM::LSL; 344 case ISD::SRL: 345 return AArch64_AM::LSR; 346 case ISD::SRA: 347 return AArch64_AM::ASR; 348 case ISD::ROTR: 349 return AArch64_AM::ROR; 350 } 351 } 352 353 /// Determine whether it is worth it to fold SHL into the addressing 354 /// mode. 355 static bool isWorthFoldingSHL(SDValue V) { 356 assert(V.getOpcode() == ISD::SHL && "invalid opcode"); 357 // It is worth folding logical shift of up to three places. 358 auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1)); 359 if (!CSD) 360 return false; 361 unsigned ShiftVal = CSD->getZExtValue(); 362 if (ShiftVal > 3) 363 return false; 364 365 // Check if this particular node is reused in any non-memory related 366 // operation. If yes, do not try to fold this node into the address 367 // computation, since the computation will be kept. 368 const SDNode *Node = V.getNode(); 369 for (SDNode *UI : Node->uses()) 370 if (!isa<MemSDNode>(*UI)) 371 for (SDNode *UII : UI->uses()) 372 if (!isa<MemSDNode>(*UII)) 373 return false; 374 return true; 375 } 376 377 /// Determine whether it is worth to fold V into an extended register. 378 bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const { 379 // Trivial if we are optimizing for code size or if there is only 380 // one use of the value. 381 if (ForCodeSize || V.hasOneUse()) 382 return true; 383 // If a subtarget has a fastpath LSL we can fold a logical shift into 384 // the addressing mode and save a cycle. 385 if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::SHL && 386 isWorthFoldingSHL(V)) 387 return true; 388 if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::ADD) { 389 const SDValue LHS = V.getOperand(0); 390 const SDValue RHS = V.getOperand(1); 391 if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS)) 392 return true; 393 if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS)) 394 return true; 395 } 396 397 // It hurts otherwise, since the value will be reused. 398 return false; 399 } 400 401 /// SelectShiftedRegister - Select a "shifted register" operand. If the value 402 /// is not shifted, set the Shift operand to default of "LSL 0". The logical 403 /// instructions allow the shifted register to be rotated, but the arithmetic 404 /// instructions do not. The AllowROR parameter specifies whether ROR is 405 /// supported. 406 bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR, 407 SDValue &Reg, SDValue &Shift) { 408 AArch64_AM::ShiftExtendType ShType = getShiftTypeForNode(N); 409 if (ShType == AArch64_AM::InvalidShiftExtend) 410 return false; 411 if (!AllowROR && ShType == AArch64_AM::ROR) 412 return false; 413 414 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 415 unsigned BitSize = N.getValueSizeInBits(); 416 unsigned Val = RHS->getZExtValue() & (BitSize - 1); 417 unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val); 418 419 Reg = N.getOperand(0); 420 Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32); 421 return isWorthFolding(N); 422 } 423 424 return false; 425 } 426 427 /// getExtendTypeForNode - Translate an extend node to the corresponding 428 /// ExtendType value. 429 static AArch64_AM::ShiftExtendType 430 getExtendTypeForNode(SDValue N, bool IsLoadStore = false) { 431 if (N.getOpcode() == ISD::SIGN_EXTEND || 432 N.getOpcode() == ISD::SIGN_EXTEND_INREG) { 433 EVT SrcVT; 434 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG) 435 SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT(); 436 else 437 SrcVT = N.getOperand(0).getValueType(); 438 439 if (!IsLoadStore && SrcVT == MVT::i8) 440 return AArch64_AM::SXTB; 441 else if (!IsLoadStore && SrcVT == MVT::i16) 442 return AArch64_AM::SXTH; 443 else if (SrcVT == MVT::i32) 444 return AArch64_AM::SXTW; 445 assert(SrcVT != MVT::i64 && "extend from 64-bits?"); 446 447 return AArch64_AM::InvalidShiftExtend; 448 } else if (N.getOpcode() == ISD::ZERO_EXTEND || 449 N.getOpcode() == ISD::ANY_EXTEND) { 450 EVT SrcVT = N.getOperand(0).getValueType(); 451 if (!IsLoadStore && SrcVT == MVT::i8) 452 return AArch64_AM::UXTB; 453 else if (!IsLoadStore && SrcVT == MVT::i16) 454 return AArch64_AM::UXTH; 455 else if (SrcVT == MVT::i32) 456 return AArch64_AM::UXTW; 457 assert(SrcVT != MVT::i64 && "extend from 64-bits?"); 458 459 return AArch64_AM::InvalidShiftExtend; 460 } else if (N.getOpcode() == ISD::AND) { 461 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); 462 if (!CSD) 463 return AArch64_AM::InvalidShiftExtend; 464 uint64_t AndMask = CSD->getZExtValue(); 465 466 switch (AndMask) { 467 default: 468 return AArch64_AM::InvalidShiftExtend; 469 case 0xFF: 470 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend; 471 case 0xFFFF: 472 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend; 473 case 0xFFFFFFFF: 474 return AArch64_AM::UXTW; 475 } 476 } 477 478 return AArch64_AM::InvalidShiftExtend; 479 } 480 481 // Helper for SelectMLAV64LaneV128 - Recognize high lane extracts. 482 static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx) { 483 if (DL->getOpcode() != AArch64ISD::DUPLANE16 && 484 DL->getOpcode() != AArch64ISD::DUPLANE32) 485 return false; 486 487 SDValue SV = DL->getOperand(0); 488 if (SV.getOpcode() != ISD::INSERT_SUBVECTOR) 489 return false; 490 491 SDValue EV = SV.getOperand(1); 492 if (EV.getOpcode() != ISD::EXTRACT_SUBVECTOR) 493 return false; 494 495 ConstantSDNode *DLidx = cast<ConstantSDNode>(DL->getOperand(1).getNode()); 496 ConstantSDNode *EVidx = cast<ConstantSDNode>(EV.getOperand(1).getNode()); 497 LaneIdx = DLidx->getSExtValue() + EVidx->getSExtValue(); 498 LaneOp = EV.getOperand(0); 499 500 return true; 501 } 502 503 // Helper for SelectOpcV64LaneV128 - Recognize operations where one operand is a 504 // high lane extract. 505 static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp, 506 SDValue &LaneOp, int &LaneIdx) { 507 508 if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) { 509 std::swap(Op0, Op1); 510 if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) 511 return false; 512 } 513 StdOp = Op1; 514 return true; 515 } 516 517 /// SelectMLAV64LaneV128 - AArch64 supports vector MLAs where one multiplicand 518 /// is a lane in the upper half of a 128-bit vector. Recognize and select this 519 /// so that we don't emit unnecessary lane extracts. 520 bool AArch64DAGToDAGISel::tryMLAV64LaneV128(SDNode *N) { 521 SDLoc dl(N); 522 SDValue Op0 = N->getOperand(0); 523 SDValue Op1 = N->getOperand(1); 524 SDValue MLAOp1; // Will hold ordinary multiplicand for MLA. 525 SDValue MLAOp2; // Will hold lane-accessed multiplicand for MLA. 526 int LaneIdx = -1; // Will hold the lane index. 527 528 if (Op1.getOpcode() != ISD::MUL || 529 !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2, 530 LaneIdx)) { 531 std::swap(Op0, Op1); 532 if (Op1.getOpcode() != ISD::MUL || 533 !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2, 534 LaneIdx)) 535 return false; 536 } 537 538 SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64); 539 540 SDValue Ops[] = { Op0, MLAOp1, MLAOp2, LaneIdxVal }; 541 542 unsigned MLAOpc = ~0U; 543 544 switch (N->getSimpleValueType(0).SimpleTy) { 545 default: 546 llvm_unreachable("Unrecognized MLA."); 547 case MVT::v4i16: 548 MLAOpc = AArch64::MLAv4i16_indexed; 549 break; 550 case MVT::v8i16: 551 MLAOpc = AArch64::MLAv8i16_indexed; 552 break; 553 case MVT::v2i32: 554 MLAOpc = AArch64::MLAv2i32_indexed; 555 break; 556 case MVT::v4i32: 557 MLAOpc = AArch64::MLAv4i32_indexed; 558 break; 559 } 560 561 ReplaceNode(N, CurDAG->getMachineNode(MLAOpc, dl, N->getValueType(0), Ops)); 562 return true; 563 } 564 565 bool AArch64DAGToDAGISel::tryMULLV64LaneV128(unsigned IntNo, SDNode *N) { 566 SDLoc dl(N); 567 SDValue SMULLOp0; 568 SDValue SMULLOp1; 569 int LaneIdx; 570 571 if (!checkV64LaneV128(N->getOperand(1), N->getOperand(2), SMULLOp0, SMULLOp1, 572 LaneIdx)) 573 return false; 574 575 SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64); 576 577 SDValue Ops[] = { SMULLOp0, SMULLOp1, LaneIdxVal }; 578 579 unsigned SMULLOpc = ~0U; 580 581 if (IntNo == Intrinsic::aarch64_neon_smull) { 582 switch (N->getSimpleValueType(0).SimpleTy) { 583 default: 584 llvm_unreachable("Unrecognized SMULL."); 585 case MVT::v4i32: 586 SMULLOpc = AArch64::SMULLv4i16_indexed; 587 break; 588 case MVT::v2i64: 589 SMULLOpc = AArch64::SMULLv2i32_indexed; 590 break; 591 } 592 } else if (IntNo == Intrinsic::aarch64_neon_umull) { 593 switch (N->getSimpleValueType(0).SimpleTy) { 594 default: 595 llvm_unreachable("Unrecognized SMULL."); 596 case MVT::v4i32: 597 SMULLOpc = AArch64::UMULLv4i16_indexed; 598 break; 599 case MVT::v2i64: 600 SMULLOpc = AArch64::UMULLv2i32_indexed; 601 break; 602 } 603 } else 604 llvm_unreachable("Unrecognized intrinsic."); 605 606 ReplaceNode(N, CurDAG->getMachineNode(SMULLOpc, dl, N->getValueType(0), Ops)); 607 return true; 608 } 609 610 /// Instructions that accept extend modifiers like UXTW expect the register 611 /// being extended to be a GPR32, but the incoming DAG might be acting on a 612 /// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if 613 /// this is the case. 614 static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N) { 615 if (N.getValueType() == MVT::i32) 616 return N; 617 618 SDLoc dl(N); 619 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32); 620 MachineSDNode *Node = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 621 dl, MVT::i32, N, SubReg); 622 return SDValue(Node, 0); 623 } 624 625 626 /// SelectArithExtendedRegister - Select a "extended register" operand. This 627 /// operand folds in an extend followed by an optional left shift. 628 bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg, 629 SDValue &Shift) { 630 unsigned ShiftVal = 0; 631 AArch64_AM::ShiftExtendType Ext; 632 633 if (N.getOpcode() == ISD::SHL) { 634 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); 635 if (!CSD) 636 return false; 637 ShiftVal = CSD->getZExtValue(); 638 if (ShiftVal > 4) 639 return false; 640 641 Ext = getExtendTypeForNode(N.getOperand(0)); 642 if (Ext == AArch64_AM::InvalidShiftExtend) 643 return false; 644 645 Reg = N.getOperand(0).getOperand(0); 646 } else { 647 Ext = getExtendTypeForNode(N); 648 if (Ext == AArch64_AM::InvalidShiftExtend) 649 return false; 650 651 Reg = N.getOperand(0); 652 653 // Don't match if free 32-bit -> 64-bit zext can be used instead. 654 if (Ext == AArch64_AM::UXTW && 655 Reg->getValueType(0).getSizeInBits() == 32 && isDef32(*Reg.getNode())) 656 return false; 657 } 658 659 // AArch64 mandates that the RHS of the operation must use the smallest 660 // register class that could contain the size being extended from. Thus, 661 // if we're folding a (sext i8), we need the RHS to be a GPR32, even though 662 // there might not be an actual 32-bit value in the program. We can 663 // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here. 664 assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX); 665 Reg = narrowIfNeeded(CurDAG, Reg); 666 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N), 667 MVT::i32); 668 return isWorthFolding(N); 669 } 670 671 /// If there's a use of this ADDlow that's not itself a load/store then we'll 672 /// need to create a real ADD instruction from it anyway and there's no point in 673 /// folding it into the mem op. Theoretically, it shouldn't matter, but there's 674 /// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding 675 /// leads to duplicated ADRP instructions. 676 static bool isWorthFoldingADDlow(SDValue N) { 677 for (auto Use : N->uses()) { 678 if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE && 679 Use->getOpcode() != ISD::ATOMIC_LOAD && 680 Use->getOpcode() != ISD::ATOMIC_STORE) 681 return false; 682 683 // ldar and stlr have much more restrictive addressing modes (just a 684 // register). 685 if (isStrongerThanMonotonic(cast<MemSDNode>(Use)->getOrdering())) 686 return false; 687 } 688 689 return true; 690 } 691 692 /// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit 693 /// immediate" address. The "Size" argument is the size in bytes of the memory 694 /// reference, which determines the scale. 695 bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, 696 unsigned BW, unsigned Size, 697 SDValue &Base, 698 SDValue &OffImm) { 699 SDLoc dl(N); 700 const DataLayout &DL = CurDAG->getDataLayout(); 701 const TargetLowering *TLI = getTargetLowering(); 702 if (N.getOpcode() == ISD::FrameIndex) { 703 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 704 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 705 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); 706 return true; 707 } 708 709 // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed 710 // selected here doesn't support labels/immediates, only base+offset. 711 if (CurDAG->isBaseWithConstantOffset(N)) { 712 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 713 if (IsSignedImm) { 714 int64_t RHSC = RHS->getSExtValue(); 715 unsigned Scale = Log2_32(Size); 716 int64_t Range = 0x1LL << (BW - 1); 717 718 if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) && 719 RHSC < (Range << Scale)) { 720 Base = N.getOperand(0); 721 if (Base.getOpcode() == ISD::FrameIndex) { 722 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 723 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 724 } 725 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64); 726 return true; 727 } 728 } else { 729 // unsigned Immediate 730 uint64_t RHSC = RHS->getZExtValue(); 731 unsigned Scale = Log2_32(Size); 732 uint64_t Range = 0x1ULL << BW; 733 734 if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) { 735 Base = N.getOperand(0); 736 if (Base.getOpcode() == ISD::FrameIndex) { 737 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 738 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 739 } 740 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64); 741 return true; 742 } 743 } 744 } 745 } 746 // Base only. The address will be materialized into a register before 747 // the memory is accessed. 748 // add x0, Xbase, #offset 749 // stp x1, x2, [x0] 750 Base = N; 751 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); 752 return true; 753 } 754 755 /// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit 756 /// immediate" address. The "Size" argument is the size in bytes of the memory 757 /// reference, which determines the scale. 758 bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size, 759 SDValue &Base, SDValue &OffImm) { 760 SDLoc dl(N); 761 const DataLayout &DL = CurDAG->getDataLayout(); 762 const TargetLowering *TLI = getTargetLowering(); 763 if (N.getOpcode() == ISD::FrameIndex) { 764 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 765 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 766 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); 767 return true; 768 } 769 770 if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) { 771 GlobalAddressSDNode *GAN = 772 dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode()); 773 Base = N.getOperand(0); 774 OffImm = N.getOperand(1); 775 if (!GAN) 776 return true; 777 778 if (GAN->getOffset() % Size == 0) { 779 const GlobalValue *GV = GAN->getGlobal(); 780 unsigned Alignment = GV->getAlignment(); 781 Type *Ty = GV->getValueType(); 782 if (Alignment == 0 && Ty->isSized()) 783 Alignment = DL.getABITypeAlignment(Ty); 784 785 if (Alignment >= Size) 786 return true; 787 } 788 } 789 790 if (CurDAG->isBaseWithConstantOffset(N)) { 791 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 792 int64_t RHSC = (int64_t)RHS->getZExtValue(); 793 unsigned Scale = Log2_32(Size); 794 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) { 795 Base = N.getOperand(0); 796 if (Base.getOpcode() == ISD::FrameIndex) { 797 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 798 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 799 } 800 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64); 801 return true; 802 } 803 } 804 } 805 806 // Before falling back to our general case, check if the unscaled 807 // instructions can handle this. If so, that's preferable. 808 if (SelectAddrModeUnscaled(N, Size, Base, OffImm)) 809 return false; 810 811 // Base only. The address will be materialized into a register before 812 // the memory is accessed. 813 // add x0, Xbase, #offset 814 // ldr x0, [x0] 815 Base = N; 816 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); 817 return true; 818 } 819 820 /// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit 821 /// immediate" address. This should only match when there is an offset that 822 /// is not valid for a scaled immediate addressing mode. The "Size" argument 823 /// is the size in bytes of the memory reference, which is needed here to know 824 /// what is valid for a scaled immediate. 825 bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size, 826 SDValue &Base, 827 SDValue &OffImm) { 828 if (!CurDAG->isBaseWithConstantOffset(N)) 829 return false; 830 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 831 int64_t RHSC = RHS->getSExtValue(); 832 // If the offset is valid as a scaled immediate, don't match here. 833 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && 834 RHSC < (0x1000 << Log2_32(Size))) 835 return false; 836 if (RHSC >= -256 && RHSC < 256) { 837 Base = N.getOperand(0); 838 if (Base.getOpcode() == ISD::FrameIndex) { 839 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 840 const TargetLowering *TLI = getTargetLowering(); 841 Base = CurDAG->getTargetFrameIndex( 842 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 843 } 844 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64); 845 return true; 846 } 847 } 848 return false; 849 } 850 851 static SDValue Widen(SelectionDAG *CurDAG, SDValue N) { 852 SDLoc dl(N); 853 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32); 854 SDValue ImpDef = SDValue( 855 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0); 856 MachineSDNode *Node = CurDAG->getMachineNode( 857 TargetOpcode::INSERT_SUBREG, dl, MVT::i64, ImpDef, N, SubReg); 858 return SDValue(Node, 0); 859 } 860 861 /// Check if the given SHL node (\p N), can be used to form an 862 /// extended register for an addressing mode. 863 bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size, 864 bool WantExtend, SDValue &Offset, 865 SDValue &SignExtend) { 866 assert(N.getOpcode() == ISD::SHL && "Invalid opcode."); 867 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); 868 if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue()) 869 return false; 870 871 SDLoc dl(N); 872 if (WantExtend) { 873 AArch64_AM::ShiftExtendType Ext = 874 getExtendTypeForNode(N.getOperand(0), true); 875 if (Ext == AArch64_AM::InvalidShiftExtend) 876 return false; 877 878 Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0)); 879 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl, 880 MVT::i32); 881 } else { 882 Offset = N.getOperand(0); 883 SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32); 884 } 885 886 unsigned LegalShiftVal = Log2_32(Size); 887 unsigned ShiftVal = CSD->getZExtValue(); 888 889 if (ShiftVal != 0 && ShiftVal != LegalShiftVal) 890 return false; 891 892 return isWorthFolding(N); 893 } 894 895 bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size, 896 SDValue &Base, SDValue &Offset, 897 SDValue &SignExtend, 898 SDValue &DoShift) { 899 if (N.getOpcode() != ISD::ADD) 900 return false; 901 SDValue LHS = N.getOperand(0); 902 SDValue RHS = N.getOperand(1); 903 SDLoc dl(N); 904 905 // We don't want to match immediate adds here, because they are better lowered 906 // to the register-immediate addressing modes. 907 if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS)) 908 return false; 909 910 // Check if this particular node is reused in any non-memory related 911 // operation. If yes, do not try to fold this node into the address 912 // computation, since the computation will be kept. 913 const SDNode *Node = N.getNode(); 914 for (SDNode *UI : Node->uses()) { 915 if (!isa<MemSDNode>(*UI)) 916 return false; 917 } 918 919 // Remember if it is worth folding N when it produces extended register. 920 bool IsExtendedRegisterWorthFolding = isWorthFolding(N); 921 922 // Try to match a shifted extend on the RHS. 923 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL && 924 SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) { 925 Base = LHS; 926 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32); 927 return true; 928 } 929 930 // Try to match a shifted extend on the LHS. 931 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL && 932 SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) { 933 Base = RHS; 934 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32); 935 return true; 936 } 937 938 // There was no shift, whatever else we find. 939 DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32); 940 941 AArch64_AM::ShiftExtendType Ext = AArch64_AM::InvalidShiftExtend; 942 // Try to match an unshifted extend on the LHS. 943 if (IsExtendedRegisterWorthFolding && 944 (Ext = getExtendTypeForNode(LHS, true)) != 945 AArch64_AM::InvalidShiftExtend) { 946 Base = RHS; 947 Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0)); 948 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl, 949 MVT::i32); 950 if (isWorthFolding(LHS)) 951 return true; 952 } 953 954 // Try to match an unshifted extend on the RHS. 955 if (IsExtendedRegisterWorthFolding && 956 (Ext = getExtendTypeForNode(RHS, true)) != 957 AArch64_AM::InvalidShiftExtend) { 958 Base = LHS; 959 Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0)); 960 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl, 961 MVT::i32); 962 if (isWorthFolding(RHS)) 963 return true; 964 } 965 966 return false; 967 } 968 969 // Check if the given immediate is preferred by ADD. If an immediate can be 970 // encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be 971 // encoded by one MOVZ, return true. 972 static bool isPreferredADD(int64_t ImmOff) { 973 // Constant in [0x0, 0xfff] can be encoded in ADD. 974 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL) 975 return true; 976 // Check if it can be encoded in an "ADD LSL #12". 977 if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL) 978 // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant. 979 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL && 980 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL; 981 return false; 982 } 983 984 bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size, 985 SDValue &Base, SDValue &Offset, 986 SDValue &SignExtend, 987 SDValue &DoShift) { 988 if (N.getOpcode() != ISD::ADD) 989 return false; 990 SDValue LHS = N.getOperand(0); 991 SDValue RHS = N.getOperand(1); 992 SDLoc DL(N); 993 994 // Check if this particular node is reused in any non-memory related 995 // operation. If yes, do not try to fold this node into the address 996 // computation, since the computation will be kept. 997 const SDNode *Node = N.getNode(); 998 for (SDNode *UI : Node->uses()) { 999 if (!isa<MemSDNode>(*UI)) 1000 return false; 1001 } 1002 1003 // Watch out if RHS is a wide immediate, it can not be selected into 1004 // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into 1005 // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate 1006 // instructions like: 1007 // MOV X0, WideImmediate 1008 // ADD X1, BaseReg, X0 1009 // LDR X2, [X1, 0] 1010 // For such situation, using [BaseReg, XReg] addressing mode can save one 1011 // ADD/SUB: 1012 // MOV X0, WideImmediate 1013 // LDR X2, [BaseReg, X0] 1014 if (isa<ConstantSDNode>(RHS)) { 1015 int64_t ImmOff = (int64_t)cast<ConstantSDNode>(RHS)->getZExtValue(); 1016 unsigned Scale = Log2_32(Size); 1017 // Skip the immediate can be selected by load/store addressing mode. 1018 // Also skip the immediate can be encoded by a single ADD (SUB is also 1019 // checked by using -ImmOff). 1020 if ((ImmOff % Size == 0 && ImmOff >= 0 && ImmOff < (0x1000 << Scale)) || 1021 isPreferredADD(ImmOff) || isPreferredADD(-ImmOff)) 1022 return false; 1023 1024 SDValue Ops[] = { RHS }; 1025 SDNode *MOVI = 1026 CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops); 1027 SDValue MOVIV = SDValue(MOVI, 0); 1028 // This ADD of two X register will be selected into [Reg+Reg] mode. 1029 N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV); 1030 } 1031 1032 // Remember if it is worth folding N when it produces extended register. 1033 bool IsExtendedRegisterWorthFolding = isWorthFolding(N); 1034 1035 // Try to match a shifted extend on the RHS. 1036 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL && 1037 SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) { 1038 Base = LHS; 1039 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32); 1040 return true; 1041 } 1042 1043 // Try to match a shifted extend on the LHS. 1044 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL && 1045 SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) { 1046 Base = RHS; 1047 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32); 1048 return true; 1049 } 1050 1051 // Match any non-shifted, non-extend, non-immediate add expression. 1052 Base = LHS; 1053 Offset = RHS; 1054 SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32); 1055 DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32); 1056 // Reg1 + Reg2 is free: no check needed. 1057 return true; 1058 } 1059 1060 SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) { 1061 static const unsigned RegClassIDs[] = { 1062 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID}; 1063 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1, 1064 AArch64::dsub2, AArch64::dsub3}; 1065 1066 return createTuple(Regs, RegClassIDs, SubRegs); 1067 } 1068 1069 SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) { 1070 static const unsigned RegClassIDs[] = { 1071 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID}; 1072 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1, 1073 AArch64::qsub2, AArch64::qsub3}; 1074 1075 return createTuple(Regs, RegClassIDs, SubRegs); 1076 } 1077 1078 SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs, 1079 const unsigned RegClassIDs[], 1080 const unsigned SubRegs[]) { 1081 // There's no special register-class for a vector-list of 1 element: it's just 1082 // a vector. 1083 if (Regs.size() == 1) 1084 return Regs[0]; 1085 1086 assert(Regs.size() >= 2 && Regs.size() <= 4); 1087 1088 SDLoc DL(Regs[0]); 1089 1090 SmallVector<SDValue, 4> Ops; 1091 1092 // First operand of REG_SEQUENCE is the desired RegClass. 1093 Ops.push_back( 1094 CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32)); 1095 1096 // Then we get pairs of source & subregister-position for the components. 1097 for (unsigned i = 0; i < Regs.size(); ++i) { 1098 Ops.push_back(Regs[i]); 1099 Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32)); 1100 } 1101 1102 SDNode *N = 1103 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops); 1104 return SDValue(N, 0); 1105 } 1106 1107 void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, 1108 bool isExt) { 1109 SDLoc dl(N); 1110 EVT VT = N->getValueType(0); 1111 1112 unsigned ExtOff = isExt; 1113 1114 // Form a REG_SEQUENCE to force register allocation. 1115 unsigned Vec0Off = ExtOff + 1; 1116 SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off, 1117 N->op_begin() + Vec0Off + NumVecs); 1118 SDValue RegSeq = createQTuple(Regs); 1119 1120 SmallVector<SDValue, 6> Ops; 1121 if (isExt) 1122 Ops.push_back(N->getOperand(1)); 1123 Ops.push_back(RegSeq); 1124 Ops.push_back(N->getOperand(NumVecs + ExtOff + 1)); 1125 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops)); 1126 } 1127 1128 bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) { 1129 LoadSDNode *LD = cast<LoadSDNode>(N); 1130 if (LD->isUnindexed()) 1131 return false; 1132 EVT VT = LD->getMemoryVT(); 1133 EVT DstVT = N->getValueType(0); 1134 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1135 bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC; 1136 1137 // We're not doing validity checking here. That was done when checking 1138 // if we should mark the load as indexed or not. We're just selecting 1139 // the right instruction. 1140 unsigned Opcode = 0; 1141 1142 ISD::LoadExtType ExtType = LD->getExtensionType(); 1143 bool InsertTo64 = false; 1144 if (VT == MVT::i64) 1145 Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost; 1146 else if (VT == MVT::i32) { 1147 if (ExtType == ISD::NON_EXTLOAD) 1148 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost; 1149 else if (ExtType == ISD::SEXTLOAD) 1150 Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost; 1151 else { 1152 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost; 1153 InsertTo64 = true; 1154 // The result of the load is only i32. It's the subreg_to_reg that makes 1155 // it into an i64. 1156 DstVT = MVT::i32; 1157 } 1158 } else if (VT == MVT::i16) { 1159 if (ExtType == ISD::SEXTLOAD) { 1160 if (DstVT == MVT::i64) 1161 Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost; 1162 else 1163 Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost; 1164 } else { 1165 Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost; 1166 InsertTo64 = DstVT == MVT::i64; 1167 // The result of the load is only i32. It's the subreg_to_reg that makes 1168 // it into an i64. 1169 DstVT = MVT::i32; 1170 } 1171 } else if (VT == MVT::i8) { 1172 if (ExtType == ISD::SEXTLOAD) { 1173 if (DstVT == MVT::i64) 1174 Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost; 1175 else 1176 Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost; 1177 } else { 1178 Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost; 1179 InsertTo64 = DstVT == MVT::i64; 1180 // The result of the load is only i32. It's the subreg_to_reg that makes 1181 // it into an i64. 1182 DstVT = MVT::i32; 1183 } 1184 } else if (VT == MVT::f16) { 1185 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost; 1186 } else if (VT == MVT::f32) { 1187 Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost; 1188 } else if (VT == MVT::f64 || VT.is64BitVector()) { 1189 Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost; 1190 } else if (VT.is128BitVector()) { 1191 Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost; 1192 } else 1193 return false; 1194 SDValue Chain = LD->getChain(); 1195 SDValue Base = LD->getBasePtr(); 1196 ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset()); 1197 int OffsetVal = (int)OffsetOp->getZExtValue(); 1198 SDLoc dl(N); 1199 SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64); 1200 SDValue Ops[] = { Base, Offset, Chain }; 1201 SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT, 1202 MVT::Other, Ops); 1203 // Either way, we're replacing the node, so tell the caller that. 1204 SDValue LoadedVal = SDValue(Res, 1); 1205 if (InsertTo64) { 1206 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32); 1207 LoadedVal = 1208 SDValue(CurDAG->getMachineNode( 1209 AArch64::SUBREG_TO_REG, dl, MVT::i64, 1210 CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal, 1211 SubReg), 1212 0); 1213 } 1214 1215 ReplaceUses(SDValue(N, 0), LoadedVal); 1216 ReplaceUses(SDValue(N, 1), SDValue(Res, 0)); 1217 ReplaceUses(SDValue(N, 2), SDValue(Res, 2)); 1218 CurDAG->RemoveDeadNode(N); 1219 return true; 1220 } 1221 1222 void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc, 1223 unsigned SubRegIdx) { 1224 SDLoc dl(N); 1225 EVT VT = N->getValueType(0); 1226 SDValue Chain = N->getOperand(0); 1227 1228 SDValue Ops[] = {N->getOperand(2), // Mem operand; 1229 Chain}; 1230 1231 const EVT ResTys[] = {MVT::Untyped, MVT::Other}; 1232 1233 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1234 SDValue SuperReg = SDValue(Ld, 0); 1235 for (unsigned i = 0; i < NumVecs; ++i) 1236 ReplaceUses(SDValue(N, i), 1237 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg)); 1238 1239 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1)); 1240 1241 // Transfer memoperands. 1242 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 1243 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp}); 1244 1245 CurDAG->RemoveDeadNode(N); 1246 } 1247 1248 void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs, 1249 unsigned Opc, unsigned SubRegIdx) { 1250 SDLoc dl(N); 1251 EVT VT = N->getValueType(0); 1252 SDValue Chain = N->getOperand(0); 1253 1254 SDValue Ops[] = {N->getOperand(1), // Mem operand 1255 N->getOperand(2), // Incremental 1256 Chain}; 1257 1258 const EVT ResTys[] = {MVT::i64, // Type of the write back register 1259 MVT::Untyped, MVT::Other}; 1260 1261 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1262 1263 // Update uses of write back register 1264 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0)); 1265 1266 // Update uses of vector list 1267 SDValue SuperReg = SDValue(Ld, 1); 1268 if (NumVecs == 1) 1269 ReplaceUses(SDValue(N, 0), SuperReg); 1270 else 1271 for (unsigned i = 0; i < NumVecs; ++i) 1272 ReplaceUses(SDValue(N, i), 1273 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg)); 1274 1275 // Update the chain 1276 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2)); 1277 CurDAG->RemoveDeadNode(N); 1278 } 1279 1280 void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs, 1281 unsigned Opc) { 1282 SDLoc dl(N); 1283 EVT VT = N->getOperand(2)->getValueType(0); 1284 1285 // Form a REG_SEQUENCE to force register allocation. 1286 bool Is128Bit = VT.getSizeInBits() == 128; 1287 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); 1288 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs); 1289 1290 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)}; 1291 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops); 1292 1293 // Transfer memoperands. 1294 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 1295 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 1296 1297 ReplaceNode(N, St); 1298 } 1299 1300 void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs, 1301 unsigned Opc) { 1302 SDLoc dl(N); 1303 EVT VT = N->getOperand(2)->getValueType(0); 1304 const EVT ResTys[] = {MVT::i64, // Type of the write back register 1305 MVT::Other}; // Type for the Chain 1306 1307 // Form a REG_SEQUENCE to force register allocation. 1308 bool Is128Bit = VT.getSizeInBits() == 128; 1309 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); 1310 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs); 1311 1312 SDValue Ops[] = {RegSeq, 1313 N->getOperand(NumVecs + 1), // base register 1314 N->getOperand(NumVecs + 2), // Incremental 1315 N->getOperand(0)}; // Chain 1316 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1317 1318 ReplaceNode(N, St); 1319 } 1320 1321 namespace { 1322 /// WidenVector - Given a value in the V64 register class, produce the 1323 /// equivalent value in the V128 register class. 1324 class WidenVector { 1325 SelectionDAG &DAG; 1326 1327 public: 1328 WidenVector(SelectionDAG &DAG) : DAG(DAG) {} 1329 1330 SDValue operator()(SDValue V64Reg) { 1331 EVT VT = V64Reg.getValueType(); 1332 unsigned NarrowSize = VT.getVectorNumElements(); 1333 MVT EltTy = VT.getVectorElementType().getSimpleVT(); 1334 MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize); 1335 SDLoc DL(V64Reg); 1336 1337 SDValue Undef = 1338 SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0); 1339 return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg); 1340 } 1341 }; 1342 } // namespace 1343 1344 /// NarrowVector - Given a value in the V128 register class, produce the 1345 /// equivalent value in the V64 register class. 1346 static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) { 1347 EVT VT = V128Reg.getValueType(); 1348 unsigned WideSize = VT.getVectorNumElements(); 1349 MVT EltTy = VT.getVectorElementType().getSimpleVT(); 1350 MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2); 1351 1352 return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy, 1353 V128Reg); 1354 } 1355 1356 void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs, 1357 unsigned Opc) { 1358 SDLoc dl(N); 1359 EVT VT = N->getValueType(0); 1360 bool Narrow = VT.getSizeInBits() == 64; 1361 1362 // Form a REG_SEQUENCE to force register allocation. 1363 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); 1364 1365 if (Narrow) 1366 transform(Regs, Regs.begin(), 1367 WidenVector(*CurDAG)); 1368 1369 SDValue RegSeq = createQTuple(Regs); 1370 1371 const EVT ResTys[] = {MVT::Untyped, MVT::Other}; 1372 1373 unsigned LaneNo = 1374 cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue(); 1375 1376 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64), 1377 N->getOperand(NumVecs + 3), N->getOperand(0)}; 1378 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1379 SDValue SuperReg = SDValue(Ld, 0); 1380 1381 EVT WideVT = RegSeq.getOperand(1)->getValueType(0); 1382 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, 1383 AArch64::qsub2, AArch64::qsub3 }; 1384 for (unsigned i = 0; i < NumVecs; ++i) { 1385 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg); 1386 if (Narrow) 1387 NV = NarrowVector(NV, *CurDAG); 1388 ReplaceUses(SDValue(N, i), NV); 1389 } 1390 1391 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1)); 1392 CurDAG->RemoveDeadNode(N); 1393 } 1394 1395 void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs, 1396 unsigned Opc) { 1397 SDLoc dl(N); 1398 EVT VT = N->getValueType(0); 1399 bool Narrow = VT.getSizeInBits() == 64; 1400 1401 // Form a REG_SEQUENCE to force register allocation. 1402 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); 1403 1404 if (Narrow) 1405 transform(Regs, Regs.begin(), 1406 WidenVector(*CurDAG)); 1407 1408 SDValue RegSeq = createQTuple(Regs); 1409 1410 const EVT ResTys[] = {MVT::i64, // Type of the write back register 1411 RegSeq->getValueType(0), MVT::Other}; 1412 1413 unsigned LaneNo = 1414 cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue(); 1415 1416 SDValue Ops[] = {RegSeq, 1417 CurDAG->getTargetConstant(LaneNo, dl, 1418 MVT::i64), // Lane Number 1419 N->getOperand(NumVecs + 2), // Base register 1420 N->getOperand(NumVecs + 3), // Incremental 1421 N->getOperand(0)}; 1422 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1423 1424 // Update uses of the write back register 1425 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0)); 1426 1427 // Update uses of the vector list 1428 SDValue SuperReg = SDValue(Ld, 1); 1429 if (NumVecs == 1) { 1430 ReplaceUses(SDValue(N, 0), 1431 Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg); 1432 } else { 1433 EVT WideVT = RegSeq.getOperand(1)->getValueType(0); 1434 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, 1435 AArch64::qsub2, AArch64::qsub3 }; 1436 for (unsigned i = 0; i < NumVecs; ++i) { 1437 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, 1438 SuperReg); 1439 if (Narrow) 1440 NV = NarrowVector(NV, *CurDAG); 1441 ReplaceUses(SDValue(N, i), NV); 1442 } 1443 } 1444 1445 // Update the Chain 1446 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2)); 1447 CurDAG->RemoveDeadNode(N); 1448 } 1449 1450 void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs, 1451 unsigned Opc) { 1452 SDLoc dl(N); 1453 EVT VT = N->getOperand(2)->getValueType(0); 1454 bool Narrow = VT.getSizeInBits() == 64; 1455 1456 // Form a REG_SEQUENCE to force register allocation. 1457 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); 1458 1459 if (Narrow) 1460 transform(Regs, Regs.begin(), 1461 WidenVector(*CurDAG)); 1462 1463 SDValue RegSeq = createQTuple(Regs); 1464 1465 unsigned LaneNo = 1466 cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue(); 1467 1468 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64), 1469 N->getOperand(NumVecs + 3), N->getOperand(0)}; 1470 SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops); 1471 1472 // Transfer memoperands. 1473 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 1474 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 1475 1476 ReplaceNode(N, St); 1477 } 1478 1479 void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs, 1480 unsigned Opc) { 1481 SDLoc dl(N); 1482 EVT VT = N->getOperand(2)->getValueType(0); 1483 bool Narrow = VT.getSizeInBits() == 64; 1484 1485 // Form a REG_SEQUENCE to force register allocation. 1486 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); 1487 1488 if (Narrow) 1489 transform(Regs, Regs.begin(), 1490 WidenVector(*CurDAG)); 1491 1492 SDValue RegSeq = createQTuple(Regs); 1493 1494 const EVT ResTys[] = {MVT::i64, // Type of the write back register 1495 MVT::Other}; 1496 1497 unsigned LaneNo = 1498 cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue(); 1499 1500 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64), 1501 N->getOperand(NumVecs + 2), // Base Register 1502 N->getOperand(NumVecs + 3), // Incremental 1503 N->getOperand(0)}; 1504 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1505 1506 // Transfer memoperands. 1507 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 1508 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 1509 1510 ReplaceNode(N, St); 1511 } 1512 1513 static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, 1514 unsigned &Opc, SDValue &Opd0, 1515 unsigned &LSB, unsigned &MSB, 1516 unsigned NumberOfIgnoredLowBits, 1517 bool BiggerPattern) { 1518 assert(N->getOpcode() == ISD::AND && 1519 "N must be a AND operation to call this function"); 1520 1521 EVT VT = N->getValueType(0); 1522 1523 // Here we can test the type of VT and return false when the type does not 1524 // match, but since it is done prior to that call in the current context 1525 // we turned that into an assert to avoid redundant code. 1526 assert((VT == MVT::i32 || VT == MVT::i64) && 1527 "Type checking must have been done before calling this function"); 1528 1529 // FIXME: simplify-demanded-bits in DAGCombine will probably have 1530 // changed the AND node to a 32-bit mask operation. We'll have to 1531 // undo that as part of the transform here if we want to catch all 1532 // the opportunities. 1533 // Currently the NumberOfIgnoredLowBits argument helps to recover 1534 // form these situations when matching bigger pattern (bitfield insert). 1535 1536 // For unsigned extracts, check for a shift right and mask 1537 uint64_t AndImm = 0; 1538 if (!isOpcWithIntImmediate(N, ISD::AND, AndImm)) 1539 return false; 1540 1541 const SDNode *Op0 = N->getOperand(0).getNode(); 1542 1543 // Because of simplify-demanded-bits in DAGCombine, the mask may have been 1544 // simplified. Try to undo that 1545 AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits); 1546 1547 // The immediate is a mask of the low bits iff imm & (imm+1) == 0 1548 if (AndImm & (AndImm + 1)) 1549 return false; 1550 1551 bool ClampMSB = false; 1552 uint64_t SrlImm = 0; 1553 // Handle the SRL + ANY_EXTEND case. 1554 if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND && 1555 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) { 1556 // Extend the incoming operand of the SRL to 64-bit. 1557 Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0)); 1558 // Make sure to clamp the MSB so that we preserve the semantics of the 1559 // original operations. 1560 ClampMSB = true; 1561 } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE && 1562 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, 1563 SrlImm)) { 1564 // If the shift result was truncated, we can still combine them. 1565 Opd0 = Op0->getOperand(0).getOperand(0); 1566 1567 // Use the type of SRL node. 1568 VT = Opd0->getValueType(0); 1569 } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) { 1570 Opd0 = Op0->getOperand(0); 1571 } else if (BiggerPattern) { 1572 // Let's pretend a 0 shift right has been performed. 1573 // The resulting code will be at least as good as the original one 1574 // plus it may expose more opportunities for bitfield insert pattern. 1575 // FIXME: Currently we limit this to the bigger pattern, because 1576 // some optimizations expect AND and not UBFM. 1577 Opd0 = N->getOperand(0); 1578 } else 1579 return false; 1580 1581 // Bail out on large immediates. This happens when no proper 1582 // combining/constant folding was performed. 1583 if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) { 1584 LLVM_DEBUG( 1585 (dbgs() << N 1586 << ": Found large shift immediate, this should not happen\n")); 1587 return false; 1588 } 1589 1590 LSB = SrlImm; 1591 MSB = SrlImm + (VT == MVT::i32 ? countTrailingOnes<uint32_t>(AndImm) 1592 : countTrailingOnes<uint64_t>(AndImm)) - 1593 1; 1594 if (ClampMSB) 1595 // Since we're moving the extend before the right shift operation, we need 1596 // to clamp the MSB to make sure we don't shift in undefined bits instead of 1597 // the zeros which would get shifted in with the original right shift 1598 // operation. 1599 MSB = MSB > 31 ? 31 : MSB; 1600 1601 Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri; 1602 return true; 1603 } 1604 1605 static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc, 1606 SDValue &Opd0, unsigned &Immr, 1607 unsigned &Imms) { 1608 assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG); 1609 1610 EVT VT = N->getValueType(0); 1611 unsigned BitWidth = VT.getSizeInBits(); 1612 assert((VT == MVT::i32 || VT == MVT::i64) && 1613 "Type checking must have been done before calling this function"); 1614 1615 SDValue Op = N->getOperand(0); 1616 if (Op->getOpcode() == ISD::TRUNCATE) { 1617 Op = Op->getOperand(0); 1618 VT = Op->getValueType(0); 1619 BitWidth = VT.getSizeInBits(); 1620 } 1621 1622 uint64_t ShiftImm; 1623 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) && 1624 !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm)) 1625 return false; 1626 1627 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits(); 1628 if (ShiftImm + Width > BitWidth) 1629 return false; 1630 1631 Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri; 1632 Opd0 = Op.getOperand(0); 1633 Immr = ShiftImm; 1634 Imms = ShiftImm + Width - 1; 1635 return true; 1636 } 1637 1638 static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc, 1639 SDValue &Opd0, unsigned &LSB, 1640 unsigned &MSB) { 1641 // We are looking for the following pattern which basically extracts several 1642 // continuous bits from the source value and places it from the LSB of the 1643 // destination value, all other bits of the destination value or set to zero: 1644 // 1645 // Value2 = AND Value, MaskImm 1646 // SRL Value2, ShiftImm 1647 // 1648 // with MaskImm >> ShiftImm to search for the bit width. 1649 // 1650 // This gets selected into a single UBFM: 1651 // 1652 // UBFM Value, ShiftImm, BitWide + SrlImm -1 1653 // 1654 1655 if (N->getOpcode() != ISD::SRL) 1656 return false; 1657 1658 uint64_t AndMask = 0; 1659 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask)) 1660 return false; 1661 1662 Opd0 = N->getOperand(0).getOperand(0); 1663 1664 uint64_t SrlImm = 0; 1665 if (!isIntImmediate(N->getOperand(1), SrlImm)) 1666 return false; 1667 1668 // Check whether we really have several bits extract here. 1669 unsigned BitWide = 64 - countLeadingOnes(~(AndMask >> SrlImm)); 1670 if (BitWide && isMask_64(AndMask >> SrlImm)) { 1671 if (N->getValueType(0) == MVT::i32) 1672 Opc = AArch64::UBFMWri; 1673 else 1674 Opc = AArch64::UBFMXri; 1675 1676 LSB = SrlImm; 1677 MSB = BitWide + SrlImm - 1; 1678 return true; 1679 } 1680 1681 return false; 1682 } 1683 1684 static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, 1685 unsigned &Immr, unsigned &Imms, 1686 bool BiggerPattern) { 1687 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) && 1688 "N must be a SHR/SRA operation to call this function"); 1689 1690 EVT VT = N->getValueType(0); 1691 1692 // Here we can test the type of VT and return false when the type does not 1693 // match, but since it is done prior to that call in the current context 1694 // we turned that into an assert to avoid redundant code. 1695 assert((VT == MVT::i32 || VT == MVT::i64) && 1696 "Type checking must have been done before calling this function"); 1697 1698 // Check for AND + SRL doing several bits extract. 1699 if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms)) 1700 return true; 1701 1702 // We're looking for a shift of a shift. 1703 uint64_t ShlImm = 0; 1704 uint64_t TruncBits = 0; 1705 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) { 1706 Opd0 = N->getOperand(0).getOperand(0); 1707 } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL && 1708 N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) { 1709 // We are looking for a shift of truncate. Truncate from i64 to i32 could 1710 // be considered as setting high 32 bits as zero. Our strategy here is to 1711 // always generate 64bit UBFM. This consistency will help the CSE pass 1712 // later find more redundancy. 1713 Opd0 = N->getOperand(0).getOperand(0); 1714 TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits(); 1715 VT = Opd0.getValueType(); 1716 assert(VT == MVT::i64 && "the promoted type should be i64"); 1717 } else if (BiggerPattern) { 1718 // Let's pretend a 0 shift left has been performed. 1719 // FIXME: Currently we limit this to the bigger pattern case, 1720 // because some optimizations expect AND and not UBFM 1721 Opd0 = N->getOperand(0); 1722 } else 1723 return false; 1724 1725 // Missing combines/constant folding may have left us with strange 1726 // constants. 1727 if (ShlImm >= VT.getSizeInBits()) { 1728 LLVM_DEBUG( 1729 (dbgs() << N 1730 << ": Found large shift immediate, this should not happen\n")); 1731 return false; 1732 } 1733 1734 uint64_t SrlImm = 0; 1735 if (!isIntImmediate(N->getOperand(1), SrlImm)) 1736 return false; 1737 1738 assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() && 1739 "bad amount in shift node!"); 1740 int immr = SrlImm - ShlImm; 1741 Immr = immr < 0 ? immr + VT.getSizeInBits() : immr; 1742 Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1; 1743 // SRA requires a signed extraction 1744 if (VT == MVT::i32) 1745 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri; 1746 else 1747 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri; 1748 return true; 1749 } 1750 1751 bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) { 1752 assert(N->getOpcode() == ISD::SIGN_EXTEND); 1753 1754 EVT VT = N->getValueType(0); 1755 EVT NarrowVT = N->getOperand(0)->getValueType(0); 1756 if (VT != MVT::i64 || NarrowVT != MVT::i32) 1757 return false; 1758 1759 uint64_t ShiftImm; 1760 SDValue Op = N->getOperand(0); 1761 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm)) 1762 return false; 1763 1764 SDLoc dl(N); 1765 // Extend the incoming operand of the shift to 64-bits. 1766 SDValue Opd0 = Widen(CurDAG, Op.getOperand(0)); 1767 unsigned Immr = ShiftImm; 1768 unsigned Imms = NarrowVT.getSizeInBits() - 1; 1769 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT), 1770 CurDAG->getTargetConstant(Imms, dl, VT)}; 1771 CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops); 1772 return true; 1773 } 1774 1775 static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, 1776 SDValue &Opd0, unsigned &Immr, unsigned &Imms, 1777 unsigned NumberOfIgnoredLowBits = 0, 1778 bool BiggerPattern = false) { 1779 if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64) 1780 return false; 1781 1782 switch (N->getOpcode()) { 1783 default: 1784 if (!N->isMachineOpcode()) 1785 return false; 1786 break; 1787 case ISD::AND: 1788 return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms, 1789 NumberOfIgnoredLowBits, BiggerPattern); 1790 case ISD::SRL: 1791 case ISD::SRA: 1792 return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern); 1793 1794 case ISD::SIGN_EXTEND_INREG: 1795 return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms); 1796 } 1797 1798 unsigned NOpc = N->getMachineOpcode(); 1799 switch (NOpc) { 1800 default: 1801 return false; 1802 case AArch64::SBFMWri: 1803 case AArch64::UBFMWri: 1804 case AArch64::SBFMXri: 1805 case AArch64::UBFMXri: 1806 Opc = NOpc; 1807 Opd0 = N->getOperand(0); 1808 Immr = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue(); 1809 Imms = cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue(); 1810 return true; 1811 } 1812 // Unreachable 1813 return false; 1814 } 1815 1816 bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) { 1817 unsigned Opc, Immr, Imms; 1818 SDValue Opd0; 1819 if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms)) 1820 return false; 1821 1822 EVT VT = N->getValueType(0); 1823 SDLoc dl(N); 1824 1825 // If the bit extract operation is 64bit but the original type is 32bit, we 1826 // need to add one EXTRACT_SUBREG. 1827 if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) { 1828 SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64), 1829 CurDAG->getTargetConstant(Imms, dl, MVT::i64)}; 1830 1831 SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64); 1832 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32); 1833 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, 1834 MVT::i32, SDValue(BFM, 0), SubReg)); 1835 return true; 1836 } 1837 1838 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT), 1839 CurDAG->getTargetConstant(Imms, dl, VT)}; 1840 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 1841 return true; 1842 } 1843 1844 /// Does DstMask form a complementary pair with the mask provided by 1845 /// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking, 1846 /// this asks whether DstMask zeroes precisely those bits that will be set by 1847 /// the other half. 1848 static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted, 1849 unsigned NumberOfIgnoredHighBits, EVT VT) { 1850 assert((VT == MVT::i32 || VT == MVT::i64) && 1851 "i32 or i64 mask type expected!"); 1852 unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits; 1853 1854 APInt SignificantDstMask = APInt(BitWidth, DstMask); 1855 APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth); 1856 1857 return (SignificantDstMask & SignificantBitsToBeInserted) == 0 && 1858 (SignificantDstMask | SignificantBitsToBeInserted).isAllOnesValue(); 1859 } 1860 1861 // Look for bits that will be useful for later uses. 1862 // A bit is consider useless as soon as it is dropped and never used 1863 // before it as been dropped. 1864 // E.g., looking for useful bit of x 1865 // 1. y = x & 0x7 1866 // 2. z = y >> 2 1867 // After #1, x useful bits are 0x7, then the useful bits of x, live through 1868 // y. 1869 // After #2, the useful bits of x are 0x4. 1870 // However, if x is used on an unpredicatable instruction, then all its bits 1871 // are useful. 1872 // E.g. 1873 // 1. y = x & 0x7 1874 // 2. z = y >> 2 1875 // 3. str x, [@x] 1876 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0); 1877 1878 static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits, 1879 unsigned Depth) { 1880 uint64_t Imm = 1881 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue(); 1882 Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth()); 1883 UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm); 1884 getUsefulBits(Op, UsefulBits, Depth + 1); 1885 } 1886 1887 static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, 1888 uint64_t Imm, uint64_t MSB, 1889 unsigned Depth) { 1890 // inherit the bitwidth value 1891 APInt OpUsefulBits(UsefulBits); 1892 OpUsefulBits = 1; 1893 1894 if (MSB >= Imm) { 1895 OpUsefulBits <<= MSB - Imm + 1; 1896 --OpUsefulBits; 1897 // The interesting part will be in the lower part of the result 1898 getUsefulBits(Op, OpUsefulBits, Depth + 1); 1899 // The interesting part was starting at Imm in the argument 1900 OpUsefulBits <<= Imm; 1901 } else { 1902 OpUsefulBits <<= MSB + 1; 1903 --OpUsefulBits; 1904 // The interesting part will be shifted in the result 1905 OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm; 1906 getUsefulBits(Op, OpUsefulBits, Depth + 1); 1907 // The interesting part was at zero in the argument 1908 OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm); 1909 } 1910 1911 UsefulBits &= OpUsefulBits; 1912 } 1913 1914 static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits, 1915 unsigned Depth) { 1916 uint64_t Imm = 1917 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue(); 1918 uint64_t MSB = 1919 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue(); 1920 1921 getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth); 1922 } 1923 1924 static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, 1925 unsigned Depth) { 1926 uint64_t ShiftTypeAndValue = 1927 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue(); 1928 APInt Mask(UsefulBits); 1929 Mask.clearAllBits(); 1930 Mask.flipAllBits(); 1931 1932 if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) { 1933 // Shift Left 1934 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue); 1935 Mask <<= ShiftAmt; 1936 getUsefulBits(Op, Mask, Depth + 1); 1937 Mask.lshrInPlace(ShiftAmt); 1938 } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) { 1939 // Shift Right 1940 // We do not handle AArch64_AM::ASR, because the sign will change the 1941 // number of useful bits 1942 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue); 1943 Mask.lshrInPlace(ShiftAmt); 1944 getUsefulBits(Op, Mask, Depth + 1); 1945 Mask <<= ShiftAmt; 1946 } else 1947 return; 1948 1949 UsefulBits &= Mask; 1950 } 1951 1952 static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, 1953 unsigned Depth) { 1954 uint64_t Imm = 1955 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue(); 1956 uint64_t MSB = 1957 cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue(); 1958 1959 APInt OpUsefulBits(UsefulBits); 1960 OpUsefulBits = 1; 1961 1962 APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0); 1963 ResultUsefulBits.flipAllBits(); 1964 APInt Mask(UsefulBits.getBitWidth(), 0); 1965 1966 getUsefulBits(Op, ResultUsefulBits, Depth + 1); 1967 1968 if (MSB >= Imm) { 1969 // The instruction is a BFXIL. 1970 uint64_t Width = MSB - Imm + 1; 1971 uint64_t LSB = Imm; 1972 1973 OpUsefulBits <<= Width; 1974 --OpUsefulBits; 1975 1976 if (Op.getOperand(1) == Orig) { 1977 // Copy the low bits from the result to bits starting from LSB. 1978 Mask = ResultUsefulBits & OpUsefulBits; 1979 Mask <<= LSB; 1980 } 1981 1982 if (Op.getOperand(0) == Orig) 1983 // Bits starting from LSB in the input contribute to the result. 1984 Mask |= (ResultUsefulBits & ~OpUsefulBits); 1985 } else { 1986 // The instruction is a BFI. 1987 uint64_t Width = MSB + 1; 1988 uint64_t LSB = UsefulBits.getBitWidth() - Imm; 1989 1990 OpUsefulBits <<= Width; 1991 --OpUsefulBits; 1992 OpUsefulBits <<= LSB; 1993 1994 if (Op.getOperand(1) == Orig) { 1995 // Copy the bits from the result to the zero bits. 1996 Mask = ResultUsefulBits & OpUsefulBits; 1997 Mask.lshrInPlace(LSB); 1998 } 1999 2000 if (Op.getOperand(0) == Orig) 2001 Mask |= (ResultUsefulBits & ~OpUsefulBits); 2002 } 2003 2004 UsefulBits &= Mask; 2005 } 2006 2007 static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, 2008 SDValue Orig, unsigned Depth) { 2009 2010 // Users of this node should have already been instruction selected 2011 // FIXME: Can we turn that into an assert? 2012 if (!UserNode->isMachineOpcode()) 2013 return; 2014 2015 switch (UserNode->getMachineOpcode()) { 2016 default: 2017 return; 2018 case AArch64::ANDSWri: 2019 case AArch64::ANDSXri: 2020 case AArch64::ANDWri: 2021 case AArch64::ANDXri: 2022 // We increment Depth only when we call the getUsefulBits 2023 return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits, 2024 Depth); 2025 case AArch64::UBFMWri: 2026 case AArch64::UBFMXri: 2027 return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth); 2028 2029 case AArch64::ORRWrs: 2030 case AArch64::ORRXrs: 2031 if (UserNode->getOperand(1) != Orig) 2032 return; 2033 return getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits, 2034 Depth); 2035 case AArch64::BFMWri: 2036 case AArch64::BFMXri: 2037 return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth); 2038 2039 case AArch64::STRBBui: 2040 case AArch64::STURBBi: 2041 if (UserNode->getOperand(0) != Orig) 2042 return; 2043 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff); 2044 return; 2045 2046 case AArch64::STRHHui: 2047 case AArch64::STURHHi: 2048 if (UserNode->getOperand(0) != Orig) 2049 return; 2050 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff); 2051 return; 2052 } 2053 } 2054 2055 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) { 2056 if (Depth >= 6) 2057 return; 2058 // Initialize UsefulBits 2059 if (!Depth) { 2060 unsigned Bitwidth = Op.getScalarValueSizeInBits(); 2061 // At the beginning, assume every produced bits is useful 2062 UsefulBits = APInt(Bitwidth, 0); 2063 UsefulBits.flipAllBits(); 2064 } 2065 APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0); 2066 2067 for (SDNode *Node : Op.getNode()->uses()) { 2068 // A use cannot produce useful bits 2069 APInt UsefulBitsForUse = APInt(UsefulBits); 2070 getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth); 2071 UsersUsefulBits |= UsefulBitsForUse; 2072 } 2073 // UsefulBits contains the produced bits that are meaningful for the 2074 // current definition, thus a user cannot make a bit meaningful at 2075 // this point 2076 UsefulBits &= UsersUsefulBits; 2077 } 2078 2079 /// Create a machine node performing a notional SHL of Op by ShlAmount. If 2080 /// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is 2081 /// 0, return Op unchanged. 2082 static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) { 2083 if (ShlAmount == 0) 2084 return Op; 2085 2086 EVT VT = Op.getValueType(); 2087 SDLoc dl(Op); 2088 unsigned BitWidth = VT.getSizeInBits(); 2089 unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri; 2090 2091 SDNode *ShiftNode; 2092 if (ShlAmount > 0) { 2093 // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt 2094 ShiftNode = CurDAG->getMachineNode( 2095 UBFMOpc, dl, VT, Op, 2096 CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT), 2097 CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT)); 2098 } else { 2099 // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1 2100 assert(ShlAmount < 0 && "expected right shift"); 2101 int ShrAmount = -ShlAmount; 2102 ShiftNode = CurDAG->getMachineNode( 2103 UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT), 2104 CurDAG->getTargetConstant(BitWidth - 1, dl, VT)); 2105 } 2106 2107 return SDValue(ShiftNode, 0); 2108 } 2109 2110 /// Does this tree qualify as an attempt to move a bitfield into position, 2111 /// essentially "(and (shl VAL, N), Mask)". 2112 static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, 2113 bool BiggerPattern, 2114 SDValue &Src, int &ShiftAmount, 2115 int &MaskWidth) { 2116 EVT VT = Op.getValueType(); 2117 unsigned BitWidth = VT.getSizeInBits(); 2118 (void)BitWidth; 2119 assert(BitWidth == 32 || BitWidth == 64); 2120 2121 KnownBits Known = CurDAG->computeKnownBits(Op); 2122 2123 // Non-zero in the sense that they're not provably zero, which is the key 2124 // point if we want to use this value 2125 uint64_t NonZeroBits = (~Known.Zero).getZExtValue(); 2126 2127 // Discard a constant AND mask if present. It's safe because the node will 2128 // already have been factored into the computeKnownBits calculation above. 2129 uint64_t AndImm; 2130 if (isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm)) { 2131 assert((~APInt(BitWidth, AndImm) & ~Known.Zero) == 0); 2132 Op = Op.getOperand(0); 2133 } 2134 2135 // Don't match if the SHL has more than one use, since then we'll end up 2136 // generating SHL+UBFIZ instead of just keeping SHL+AND. 2137 if (!BiggerPattern && !Op.hasOneUse()) 2138 return false; 2139 2140 uint64_t ShlImm; 2141 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm)) 2142 return false; 2143 Op = Op.getOperand(0); 2144 2145 if (!isShiftedMask_64(NonZeroBits)) 2146 return false; 2147 2148 ShiftAmount = countTrailingZeros(NonZeroBits); 2149 MaskWidth = countTrailingOnes(NonZeroBits >> ShiftAmount); 2150 2151 // BFI encompasses sufficiently many nodes that it's worth inserting an extra 2152 // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL 2153 // amount. BiggerPattern is true when this pattern is being matched for BFI, 2154 // BiggerPattern is false when this pattern is being matched for UBFIZ, in 2155 // which case it is not profitable to insert an extra shift. 2156 if (ShlImm - ShiftAmount != 0 && !BiggerPattern) 2157 return false; 2158 Src = getLeftShift(CurDAG, Op, ShlImm - ShiftAmount); 2159 2160 return true; 2161 } 2162 2163 static bool isShiftedMask(uint64_t Mask, EVT VT) { 2164 assert(VT == MVT::i32 || VT == MVT::i64); 2165 if (VT == MVT::i32) 2166 return isShiftedMask_32(Mask); 2167 return isShiftedMask_64(Mask); 2168 } 2169 2170 // Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being 2171 // inserted only sets known zero bits. 2172 static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG) { 2173 assert(N->getOpcode() == ISD::OR && "Expect a OR operation"); 2174 2175 EVT VT = N->getValueType(0); 2176 if (VT != MVT::i32 && VT != MVT::i64) 2177 return false; 2178 2179 unsigned BitWidth = VT.getSizeInBits(); 2180 2181 uint64_t OrImm; 2182 if (!isOpcWithIntImmediate(N, ISD::OR, OrImm)) 2183 return false; 2184 2185 // Skip this transformation if the ORR immediate can be encoded in the ORR. 2186 // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely 2187 // performance neutral. 2188 if (AArch64_AM::isLogicalImmediate(OrImm, BitWidth)) 2189 return false; 2190 2191 uint64_t MaskImm; 2192 SDValue And = N->getOperand(0); 2193 // Must be a single use AND with an immediate operand. 2194 if (!And.hasOneUse() || 2195 !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm)) 2196 return false; 2197 2198 // Compute the Known Zero for the AND as this allows us to catch more general 2199 // cases than just looking for AND with imm. 2200 KnownBits Known = CurDAG->computeKnownBits(And); 2201 2202 // Non-zero in the sense that they're not provably zero, which is the key 2203 // point if we want to use this value. 2204 uint64_t NotKnownZero = (~Known.Zero).getZExtValue(); 2205 2206 // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00). 2207 if (!isShiftedMask(Known.Zero.getZExtValue(), VT)) 2208 return false; 2209 2210 // The bits being inserted must only set those bits that are known to be zero. 2211 if ((OrImm & NotKnownZero) != 0) { 2212 // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't 2213 // currently handle this case. 2214 return false; 2215 } 2216 2217 // BFI/BFXIL dst, src, #lsb, #width. 2218 int LSB = countTrailingOnes(NotKnownZero); 2219 int Width = BitWidth - APInt(BitWidth, NotKnownZero).countPopulation(); 2220 2221 // BFI/BFXIL is an alias of BFM, so translate to BFM operands. 2222 unsigned ImmR = (BitWidth - LSB) % BitWidth; 2223 unsigned ImmS = Width - 1; 2224 2225 // If we're creating a BFI instruction avoid cases where we need more 2226 // instructions to materialize the BFI constant as compared to the original 2227 // ORR. A BFXIL will use the same constant as the original ORR, so the code 2228 // should be no worse in this case. 2229 bool IsBFI = LSB != 0; 2230 uint64_t BFIImm = OrImm >> LSB; 2231 if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) { 2232 // We have a BFI instruction and we know the constant can't be materialized 2233 // with a ORR-immediate with the zero register. 2234 unsigned OrChunks = 0, BFIChunks = 0; 2235 for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) { 2236 if (((OrImm >> Shift) & 0xFFFF) != 0) 2237 ++OrChunks; 2238 if (((BFIImm >> Shift) & 0xFFFF) != 0) 2239 ++BFIChunks; 2240 } 2241 if (BFIChunks > OrChunks) 2242 return false; 2243 } 2244 2245 // Materialize the constant to be inserted. 2246 SDLoc DL(N); 2247 unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm; 2248 SDNode *MOVI = CurDAG->getMachineNode( 2249 MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT)); 2250 2251 // Create the BFI/BFXIL instruction. 2252 SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0), 2253 CurDAG->getTargetConstant(ImmR, DL, VT), 2254 CurDAG->getTargetConstant(ImmS, DL, VT)}; 2255 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri; 2256 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 2257 return true; 2258 } 2259 2260 static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits, 2261 SelectionDAG *CurDAG) { 2262 assert(N->getOpcode() == ISD::OR && "Expect a OR operation"); 2263 2264 EVT VT = N->getValueType(0); 2265 if (VT != MVT::i32 && VT != MVT::i64) 2266 return false; 2267 2268 unsigned BitWidth = VT.getSizeInBits(); 2269 2270 // Because of simplify-demanded-bits in DAGCombine, involved masks may not 2271 // have the expected shape. Try to undo that. 2272 2273 unsigned NumberOfIgnoredLowBits = UsefulBits.countTrailingZeros(); 2274 unsigned NumberOfIgnoredHighBits = UsefulBits.countLeadingZeros(); 2275 2276 // Given a OR operation, check if we have the following pattern 2277 // ubfm c, b, imm, imm2 (or something that does the same jobs, see 2278 // isBitfieldExtractOp) 2279 // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and 2280 // countTrailingZeros(mask2) == imm2 - imm + 1 2281 // f = d | c 2282 // if yes, replace the OR instruction with: 2283 // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2 2284 2285 // OR is commutative, check all combinations of operand order and values of 2286 // BiggerPattern, i.e. 2287 // Opd0, Opd1, BiggerPattern=false 2288 // Opd1, Opd0, BiggerPattern=false 2289 // Opd0, Opd1, BiggerPattern=true 2290 // Opd1, Opd0, BiggerPattern=true 2291 // Several of these combinations may match, so check with BiggerPattern=false 2292 // first since that will produce better results by matching more instructions 2293 // and/or inserting fewer extra instructions. 2294 for (int I = 0; I < 4; ++I) { 2295 2296 SDValue Dst, Src; 2297 unsigned ImmR, ImmS; 2298 bool BiggerPattern = I / 2; 2299 SDValue OrOpd0Val = N->getOperand(I % 2); 2300 SDNode *OrOpd0 = OrOpd0Val.getNode(); 2301 SDValue OrOpd1Val = N->getOperand((I + 1) % 2); 2302 SDNode *OrOpd1 = OrOpd1Val.getNode(); 2303 2304 unsigned BFXOpc; 2305 int DstLSB, Width; 2306 if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS, 2307 NumberOfIgnoredLowBits, BiggerPattern)) { 2308 // Check that the returned opcode is compatible with the pattern, 2309 // i.e., same type and zero extended (U and not S) 2310 if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) || 2311 (BFXOpc != AArch64::UBFMWri && VT == MVT::i32)) 2312 continue; 2313 2314 // Compute the width of the bitfield insertion 2315 DstLSB = 0; 2316 Width = ImmS - ImmR + 1; 2317 // FIXME: This constraint is to catch bitfield insertion we may 2318 // want to widen the pattern if we want to grab general bitfied 2319 // move case 2320 if (Width <= 0) 2321 continue; 2322 2323 // If the mask on the insertee is correct, we have a BFXIL operation. We 2324 // can share the ImmR and ImmS values from the already-computed UBFM. 2325 } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val, 2326 BiggerPattern, 2327 Src, DstLSB, Width)) { 2328 ImmR = (BitWidth - DstLSB) % BitWidth; 2329 ImmS = Width - 1; 2330 } else 2331 continue; 2332 2333 // Check the second part of the pattern 2334 EVT VT = OrOpd1Val.getValueType(); 2335 assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand"); 2336 2337 // Compute the Known Zero for the candidate of the first operand. 2338 // This allows to catch more general case than just looking for 2339 // AND with imm. Indeed, simplify-demanded-bits may have removed 2340 // the AND instruction because it proves it was useless. 2341 KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val); 2342 2343 // Check if there is enough room for the second operand to appear 2344 // in the first one 2345 APInt BitsToBeInserted = 2346 APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width); 2347 2348 if ((BitsToBeInserted & ~Known.Zero) != 0) 2349 continue; 2350 2351 // Set the first operand 2352 uint64_t Imm; 2353 if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) && 2354 isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT)) 2355 // In that case, we can eliminate the AND 2356 Dst = OrOpd1->getOperand(0); 2357 else 2358 // Maybe the AND has been removed by simplify-demanded-bits 2359 // or is useful because it discards more bits 2360 Dst = OrOpd1Val; 2361 2362 // both parts match 2363 SDLoc DL(N); 2364 SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT), 2365 CurDAG->getTargetConstant(ImmS, DL, VT)}; 2366 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri; 2367 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 2368 return true; 2369 } 2370 2371 // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff 2372 // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted 2373 // mask (e.g., 0x000ffff0). 2374 uint64_t Mask0Imm, Mask1Imm; 2375 SDValue And0 = N->getOperand(0); 2376 SDValue And1 = N->getOperand(1); 2377 if (And0.hasOneUse() && And1.hasOneUse() && 2378 isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) && 2379 isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) && 2380 APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) && 2381 (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) { 2382 2383 // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm), 2384 // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the 2385 // bits to be inserted. 2386 if (isShiftedMask(Mask0Imm, VT)) { 2387 std::swap(And0, And1); 2388 std::swap(Mask0Imm, Mask1Imm); 2389 } 2390 2391 SDValue Src = And1->getOperand(0); 2392 SDValue Dst = And0->getOperand(0); 2393 unsigned LSB = countTrailingZeros(Mask1Imm); 2394 int Width = BitWidth - APInt(BitWidth, Mask0Imm).countPopulation(); 2395 2396 // The BFXIL inserts the low-order bits from a source register, so right 2397 // shift the needed bits into place. 2398 SDLoc DL(N); 2399 unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri; 2400 SDNode *LSR = CurDAG->getMachineNode( 2401 ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LSB, DL, VT), 2402 CurDAG->getTargetConstant(BitWidth - 1, DL, VT)); 2403 2404 // BFXIL is an alias of BFM, so translate to BFM operands. 2405 unsigned ImmR = (BitWidth - LSB) % BitWidth; 2406 unsigned ImmS = Width - 1; 2407 2408 // Create the BFXIL instruction. 2409 SDValue Ops[] = {Dst, SDValue(LSR, 0), 2410 CurDAG->getTargetConstant(ImmR, DL, VT), 2411 CurDAG->getTargetConstant(ImmS, DL, VT)}; 2412 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri; 2413 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 2414 return true; 2415 } 2416 2417 return false; 2418 } 2419 2420 bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) { 2421 if (N->getOpcode() != ISD::OR) 2422 return false; 2423 2424 APInt NUsefulBits; 2425 getUsefulBits(SDValue(N, 0), NUsefulBits); 2426 2427 // If all bits are not useful, just return UNDEF. 2428 if (!NUsefulBits) { 2429 CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0)); 2430 return true; 2431 } 2432 2433 if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG)) 2434 return true; 2435 2436 return tryBitfieldInsertOpFromOrAndImm(N, CurDAG); 2437 } 2438 2439 /// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the 2440 /// equivalent of a left shift by a constant amount followed by an and masking 2441 /// out a contiguous set of bits. 2442 bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) { 2443 if (N->getOpcode() != ISD::AND) 2444 return false; 2445 2446 EVT VT = N->getValueType(0); 2447 if (VT != MVT::i32 && VT != MVT::i64) 2448 return false; 2449 2450 SDValue Op0; 2451 int DstLSB, Width; 2452 if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false, 2453 Op0, DstLSB, Width)) 2454 return false; 2455 2456 // ImmR is the rotate right amount. 2457 unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits(); 2458 // ImmS is the most significant bit of the source to be moved. 2459 unsigned ImmS = Width - 1; 2460 2461 SDLoc DL(N); 2462 SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT), 2463 CurDAG->getTargetConstant(ImmS, DL, VT)}; 2464 unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri; 2465 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 2466 return true; 2467 } 2468 2469 /// tryShiftAmountMod - Take advantage of built-in mod of shift amount in 2470 /// variable shift/rotate instructions. 2471 bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) { 2472 EVT VT = N->getValueType(0); 2473 2474 unsigned Opc; 2475 switch (N->getOpcode()) { 2476 case ISD::ROTR: 2477 Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr; 2478 break; 2479 case ISD::SHL: 2480 Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr; 2481 break; 2482 case ISD::SRL: 2483 Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr; 2484 break; 2485 case ISD::SRA: 2486 Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr; 2487 break; 2488 default: 2489 return false; 2490 } 2491 2492 uint64_t Size; 2493 uint64_t Bits; 2494 if (VT == MVT::i32) { 2495 Bits = 5; 2496 Size = 32; 2497 } else if (VT == MVT::i64) { 2498 Bits = 6; 2499 Size = 64; 2500 } else 2501 return false; 2502 2503 SDValue ShiftAmt = N->getOperand(1); 2504 SDLoc DL(N); 2505 SDValue NewShiftAmt; 2506 2507 // Skip over an extend of the shift amount. 2508 if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND || 2509 ShiftAmt->getOpcode() == ISD::ANY_EXTEND) 2510 ShiftAmt = ShiftAmt->getOperand(0); 2511 2512 if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) { 2513 SDValue Add0 = ShiftAmt->getOperand(0); 2514 SDValue Add1 = ShiftAmt->getOperand(1); 2515 uint64_t Add0Imm; 2516 uint64_t Add1Imm; 2517 // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X 2518 // to avoid the ADD/SUB. 2519 if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0)) 2520 NewShiftAmt = Add0; 2521 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to 2522 // generate a NEG instead of a SUB of a constant. 2523 else if (ShiftAmt->getOpcode() == ISD::SUB && 2524 isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 && 2525 (Add0Imm % Size == 0)) { 2526 unsigned NegOpc; 2527 unsigned ZeroReg; 2528 EVT SubVT = ShiftAmt->getValueType(0); 2529 if (SubVT == MVT::i32) { 2530 NegOpc = AArch64::SUBWrr; 2531 ZeroReg = AArch64::WZR; 2532 } else { 2533 assert(SubVT == MVT::i64); 2534 NegOpc = AArch64::SUBXrr; 2535 ZeroReg = AArch64::XZR; 2536 } 2537 SDValue Zero = 2538 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT); 2539 MachineSDNode *Neg = 2540 CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1); 2541 NewShiftAmt = SDValue(Neg, 0); 2542 } else 2543 return false; 2544 } else { 2545 // If the shift amount is masked with an AND, check that the mask covers the 2546 // bits that are implicitly ANDed off by the above opcodes and if so, skip 2547 // the AND. 2548 uint64_t MaskImm; 2549 if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm)) 2550 return false; 2551 2552 if (countTrailingOnes(MaskImm) < Bits) 2553 return false; 2554 2555 NewShiftAmt = ShiftAmt->getOperand(0); 2556 } 2557 2558 // Narrow/widen the shift amount to match the size of the shift operation. 2559 if (VT == MVT::i32) 2560 NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt); 2561 else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) { 2562 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32); 2563 MachineSDNode *Ext = CurDAG->getMachineNode( 2564 AArch64::SUBREG_TO_REG, DL, VT, 2565 CurDAG->getTargetConstant(0, DL, MVT::i64), NewShiftAmt, SubReg); 2566 NewShiftAmt = SDValue(Ext, 0); 2567 } 2568 2569 SDValue Ops[] = {N->getOperand(0), NewShiftAmt}; 2570 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 2571 return true; 2572 } 2573 2574 bool 2575 AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, 2576 unsigned RegWidth) { 2577 APFloat FVal(0.0); 2578 if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N)) 2579 FVal = CN->getValueAPF(); 2580 else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) { 2581 // Some otherwise illegal constants are allowed in this case. 2582 if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow || 2583 !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1))) 2584 return false; 2585 2586 ConstantPoolSDNode *CN = 2587 dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)); 2588 FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF(); 2589 } else 2590 return false; 2591 2592 // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits 2593 // is between 1 and 32 for a destination w-register, or 1 and 64 for an 2594 // x-register. 2595 // 2596 // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we 2597 // want THIS_NODE to be 2^fbits. This is much easier to deal with using 2598 // integers. 2599 bool IsExact; 2600 2601 // fbits is between 1 and 64 in the worst-case, which means the fmul 2602 // could have 2^64 as an actual operand. Need 65 bits of precision. 2603 APSInt IntVal(65, true); 2604 FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact); 2605 2606 // N.b. isPowerOf2 also checks for > 0. 2607 if (!IsExact || !IntVal.isPowerOf2()) return false; 2608 unsigned FBits = IntVal.logBase2(); 2609 2610 // Checks above should have guaranteed that we haven't lost information in 2611 // finding FBits, but it must still be in range. 2612 if (FBits == 0 || FBits > RegWidth) return false; 2613 2614 FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32); 2615 return true; 2616 } 2617 2618 // Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields 2619 // of the string and obtains the integer values from them and combines these 2620 // into a single value to be used in the MRS/MSR instruction. 2621 static int getIntOperandFromRegisterString(StringRef RegString) { 2622 SmallVector<StringRef, 5> Fields; 2623 RegString.split(Fields, ':'); 2624 2625 if (Fields.size() == 1) 2626 return -1; 2627 2628 assert(Fields.size() == 5 2629 && "Invalid number of fields in read register string"); 2630 2631 SmallVector<int, 5> Ops; 2632 bool AllIntFields = true; 2633 2634 for (StringRef Field : Fields) { 2635 unsigned IntField; 2636 AllIntFields &= !Field.getAsInteger(10, IntField); 2637 Ops.push_back(IntField); 2638 } 2639 2640 assert(AllIntFields && 2641 "Unexpected non-integer value in special register string."); 2642 2643 // Need to combine the integer fields of the string into a single value 2644 // based on the bit encoding of MRS/MSR instruction. 2645 return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) | 2646 (Ops[3] << 3) | (Ops[4]); 2647 } 2648 2649 // Lower the read_register intrinsic to an MRS instruction node if the special 2650 // register string argument is either of the form detailed in the ALCE (the 2651 // form described in getIntOperandsFromRegsterString) or is a named register 2652 // known by the MRS SysReg mapper. 2653 bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) { 2654 const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1)); 2655 const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0)); 2656 SDLoc DL(N); 2657 2658 int Reg = getIntOperandFromRegisterString(RegString->getString()); 2659 if (Reg != -1) { 2660 ReplaceNode(N, CurDAG->getMachineNode( 2661 AArch64::MRS, DL, N->getSimpleValueType(0), MVT::Other, 2662 CurDAG->getTargetConstant(Reg, DL, MVT::i32), 2663 N->getOperand(0))); 2664 return true; 2665 } 2666 2667 // Use the sysreg mapper to map the remaining possible strings to the 2668 // value for the register to be used for the instruction operand. 2669 auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString()); 2670 if (TheReg && TheReg->Readable && 2671 TheReg->haveFeatures(Subtarget->getFeatureBits())) 2672 Reg = TheReg->Encoding; 2673 else 2674 Reg = AArch64SysReg::parseGenericRegister(RegString->getString()); 2675 2676 if (Reg != -1) { 2677 ReplaceNode(N, CurDAG->getMachineNode( 2678 AArch64::MRS, DL, N->getSimpleValueType(0), MVT::Other, 2679 CurDAG->getTargetConstant(Reg, DL, MVT::i32), 2680 N->getOperand(0))); 2681 return true; 2682 } 2683 2684 if (RegString->getString() == "pc") { 2685 ReplaceNode(N, CurDAG->getMachineNode( 2686 AArch64::ADR, DL, N->getSimpleValueType(0), MVT::Other, 2687 CurDAG->getTargetConstant(0, DL, MVT::i32), 2688 N->getOperand(0))); 2689 return true; 2690 } 2691 2692 return false; 2693 } 2694 2695 // Lower the write_register intrinsic to an MSR instruction node if the special 2696 // register string argument is either of the form detailed in the ALCE (the 2697 // form described in getIntOperandsFromRegsterString) or is a named register 2698 // known by the MSR SysReg mapper. 2699 bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) { 2700 const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1)); 2701 const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0)); 2702 SDLoc DL(N); 2703 2704 int Reg = getIntOperandFromRegisterString(RegString->getString()); 2705 if (Reg != -1) { 2706 ReplaceNode( 2707 N, CurDAG->getMachineNode(AArch64::MSR, DL, MVT::Other, 2708 CurDAG->getTargetConstant(Reg, DL, MVT::i32), 2709 N->getOperand(2), N->getOperand(0))); 2710 return true; 2711 } 2712 2713 // Check if the register was one of those allowed as the pstatefield value in 2714 // the MSR (immediate) instruction. To accept the values allowed in the 2715 // pstatefield for the MSR (immediate) instruction, we also require that an 2716 // immediate value has been provided as an argument, we know that this is 2717 // the case as it has been ensured by semantic checking. 2718 auto PMapper = AArch64PState::lookupPStateByName(RegString->getString()); 2719 if (PMapper) { 2720 assert (isa<ConstantSDNode>(N->getOperand(2)) 2721 && "Expected a constant integer expression."); 2722 unsigned Reg = PMapper->Encoding; 2723 uint64_t Immed = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(); 2724 unsigned State; 2725 if (Reg == AArch64PState::PAN || Reg == AArch64PState::UAO || Reg == AArch64PState::SSBS) { 2726 assert(Immed < 2 && "Bad imm"); 2727 State = AArch64::MSRpstateImm1; 2728 } else { 2729 assert(Immed < 16 && "Bad imm"); 2730 State = AArch64::MSRpstateImm4; 2731 } 2732 ReplaceNode(N, CurDAG->getMachineNode( 2733 State, DL, MVT::Other, 2734 CurDAG->getTargetConstant(Reg, DL, MVT::i32), 2735 CurDAG->getTargetConstant(Immed, DL, MVT::i16), 2736 N->getOperand(0))); 2737 return true; 2738 } 2739 2740 // Use the sysreg mapper to attempt to map the remaining possible strings 2741 // to the value for the register to be used for the MSR (register) 2742 // instruction operand. 2743 auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString()); 2744 if (TheReg && TheReg->Writeable && 2745 TheReg->haveFeatures(Subtarget->getFeatureBits())) 2746 Reg = TheReg->Encoding; 2747 else 2748 Reg = AArch64SysReg::parseGenericRegister(RegString->getString()); 2749 if (Reg != -1) { 2750 ReplaceNode(N, CurDAG->getMachineNode( 2751 AArch64::MSR, DL, MVT::Other, 2752 CurDAG->getTargetConstant(Reg, DL, MVT::i32), 2753 N->getOperand(2), N->getOperand(0))); 2754 return true; 2755 } 2756 2757 return false; 2758 } 2759 2760 /// We've got special pseudo-instructions for these 2761 bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) { 2762 unsigned Opcode; 2763 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT(); 2764 2765 // Leave IR for LSE if subtarget supports it. 2766 if (Subtarget->hasLSE()) return false; 2767 2768 if (MemTy == MVT::i8) 2769 Opcode = AArch64::CMP_SWAP_8; 2770 else if (MemTy == MVT::i16) 2771 Opcode = AArch64::CMP_SWAP_16; 2772 else if (MemTy == MVT::i32) 2773 Opcode = AArch64::CMP_SWAP_32; 2774 else if (MemTy == MVT::i64) 2775 Opcode = AArch64::CMP_SWAP_64; 2776 else 2777 llvm_unreachable("Unknown AtomicCmpSwap type"); 2778 2779 MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32; 2780 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3), 2781 N->getOperand(0)}; 2782 SDNode *CmpSwap = CurDAG->getMachineNode( 2783 Opcode, SDLoc(N), 2784 CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops); 2785 2786 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); 2787 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp}); 2788 2789 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0)); 2790 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2)); 2791 CurDAG->RemoveDeadNode(N); 2792 2793 return true; 2794 } 2795 2796 bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) { 2797 // tagp(FrameIndex, IRGstack, tag_offset): 2798 // since the offset between FrameIndex and IRGstack is a compile-time 2799 // constant, this can be lowered to a single ADDG instruction. 2800 if (!(isa<FrameIndexSDNode>(N->getOperand(1)))) { 2801 return false; 2802 } 2803 2804 SDValue IRG_SP = N->getOperand(2); 2805 if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN || 2806 cast<ConstantSDNode>(IRG_SP->getOperand(1))->getZExtValue() != 2807 Intrinsic::aarch64_irg_sp) { 2808 return false; 2809 } 2810 2811 const TargetLowering *TLI = getTargetLowering(); 2812 SDLoc DL(N); 2813 int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex(); 2814 SDValue FiOp = CurDAG->getTargetFrameIndex( 2815 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 2816 int TagOffset = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(); 2817 2818 SDNode *Out = CurDAG->getMachineNode( 2819 AArch64::TAGPstack, DL, MVT::i64, 2820 {FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2), 2821 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)}); 2822 ReplaceNode(N, Out); 2823 return true; 2824 } 2825 2826 void AArch64DAGToDAGISel::SelectTagP(SDNode *N) { 2827 assert(isa<ConstantSDNode>(N->getOperand(3)) && 2828 "llvm.aarch64.tagp third argument must be an immediate"); 2829 if (trySelectStackSlotTagP(N)) 2830 return; 2831 // FIXME: above applies in any case when offset between Op1 and Op2 is a 2832 // compile-time constant, not just for stack allocations. 2833 2834 // General case for unrelated pointers in Op1 and Op2. 2835 SDLoc DL(N); 2836 int TagOffset = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(); 2837 SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64, 2838 {N->getOperand(1), N->getOperand(2)}); 2839 SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64, 2840 {SDValue(N1, 0), N->getOperand(2)}); 2841 SDNode *N3 = CurDAG->getMachineNode( 2842 AArch64::ADDG, DL, MVT::i64, 2843 {SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64), 2844 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)}); 2845 ReplaceNode(N, N3); 2846 } 2847 2848 void AArch64DAGToDAGISel::Select(SDNode *Node) { 2849 // If we have a custom node, we already have selected! 2850 if (Node->isMachineOpcode()) { 2851 LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n"); 2852 Node->setNodeId(-1); 2853 return; 2854 } 2855 2856 // Few custom selection stuff. 2857 EVT VT = Node->getValueType(0); 2858 2859 switch (Node->getOpcode()) { 2860 default: 2861 break; 2862 2863 case ISD::ATOMIC_CMP_SWAP: 2864 if (SelectCMP_SWAP(Node)) 2865 return; 2866 break; 2867 2868 case ISD::READ_REGISTER: 2869 if (tryReadRegister(Node)) 2870 return; 2871 break; 2872 2873 case ISD::WRITE_REGISTER: 2874 if (tryWriteRegister(Node)) 2875 return; 2876 break; 2877 2878 case ISD::ADD: 2879 if (tryMLAV64LaneV128(Node)) 2880 return; 2881 break; 2882 2883 case ISD::LOAD: { 2884 // Try to select as an indexed load. Fall through to normal processing 2885 // if we can't. 2886 if (tryIndexedLoad(Node)) 2887 return; 2888 break; 2889 } 2890 2891 case ISD::SRL: 2892 case ISD::AND: 2893 case ISD::SRA: 2894 case ISD::SIGN_EXTEND_INREG: 2895 if (tryBitfieldExtractOp(Node)) 2896 return; 2897 if (tryBitfieldInsertInZeroOp(Node)) 2898 return; 2899 LLVM_FALLTHROUGH; 2900 case ISD::ROTR: 2901 case ISD::SHL: 2902 if (tryShiftAmountMod(Node)) 2903 return; 2904 break; 2905 2906 case ISD::SIGN_EXTEND: 2907 if (tryBitfieldExtractOpFromSExt(Node)) 2908 return; 2909 break; 2910 2911 case ISD::OR: 2912 if (tryBitfieldInsertOp(Node)) 2913 return; 2914 break; 2915 2916 case ISD::EXTRACT_VECTOR_ELT: { 2917 // Extracting lane zero is a special case where we can just use a plain 2918 // EXTRACT_SUBREG instruction, which will become FMOV. This is easier for 2919 // the rest of the compiler, especially the register allocator and copyi 2920 // propagation, to reason about, so is preferred when it's possible to 2921 // use it. 2922 ConstantSDNode *LaneNode = cast<ConstantSDNode>(Node->getOperand(1)); 2923 // Bail and use the default Select() for non-zero lanes. 2924 if (LaneNode->getZExtValue() != 0) 2925 break; 2926 // If the element type is not the same as the result type, likewise 2927 // bail and use the default Select(), as there's more to do than just 2928 // a cross-class COPY. This catches extracts of i8 and i16 elements 2929 // since they will need an explicit zext. 2930 if (VT != Node->getOperand(0).getValueType().getVectorElementType()) 2931 break; 2932 unsigned SubReg; 2933 switch (Node->getOperand(0) 2934 .getValueType() 2935 .getVectorElementType() 2936 .getSizeInBits()) { 2937 default: 2938 llvm_unreachable("Unexpected vector element type!"); 2939 case 64: 2940 SubReg = AArch64::dsub; 2941 break; 2942 case 32: 2943 SubReg = AArch64::ssub; 2944 break; 2945 case 16: 2946 SubReg = AArch64::hsub; 2947 break; 2948 case 8: 2949 llvm_unreachable("unexpected zext-requiring extract element!"); 2950 } 2951 SDValue Extract = CurDAG->getTargetExtractSubreg(SubReg, SDLoc(Node), VT, 2952 Node->getOperand(0)); 2953 LLVM_DEBUG(dbgs() << "ISEL: Custom selection!\n=> "); 2954 LLVM_DEBUG(Extract->dumpr(CurDAG)); 2955 LLVM_DEBUG(dbgs() << "\n"); 2956 ReplaceNode(Node, Extract.getNode()); 2957 return; 2958 } 2959 case ISD::Constant: { 2960 // Materialize zero constants as copies from WZR/XZR. This allows 2961 // the coalescer to propagate these into other instructions. 2962 ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node); 2963 if (ConstNode->isNullValue()) { 2964 if (VT == MVT::i32) { 2965 SDValue New = CurDAG->getCopyFromReg( 2966 CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32); 2967 ReplaceNode(Node, New.getNode()); 2968 return; 2969 } else if (VT == MVT::i64) { 2970 SDValue New = CurDAG->getCopyFromReg( 2971 CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64); 2972 ReplaceNode(Node, New.getNode()); 2973 return; 2974 } 2975 } 2976 break; 2977 } 2978 2979 case ISD::FrameIndex: { 2980 // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm. 2981 int FI = cast<FrameIndexSDNode>(Node)->getIndex(); 2982 unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0); 2983 const TargetLowering *TLI = getTargetLowering(); 2984 SDValue TFI = CurDAG->getTargetFrameIndex( 2985 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 2986 SDLoc DL(Node); 2987 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32), 2988 CurDAG->getTargetConstant(Shifter, DL, MVT::i32) }; 2989 CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops); 2990 return; 2991 } 2992 case ISD::INTRINSIC_W_CHAIN: { 2993 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); 2994 switch (IntNo) { 2995 default: 2996 break; 2997 case Intrinsic::aarch64_ldaxp: 2998 case Intrinsic::aarch64_ldxp: { 2999 unsigned Op = 3000 IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX; 3001 SDValue MemAddr = Node->getOperand(2); 3002 SDLoc DL(Node); 3003 SDValue Chain = Node->getOperand(0); 3004 3005 SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64, 3006 MVT::Other, MemAddr, Chain); 3007 3008 // Transfer memoperands. 3009 MachineMemOperand *MemOp = 3010 cast<MemIntrinsicSDNode>(Node)->getMemOperand(); 3011 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp}); 3012 ReplaceNode(Node, Ld); 3013 return; 3014 } 3015 case Intrinsic::aarch64_stlxp: 3016 case Intrinsic::aarch64_stxp: { 3017 unsigned Op = 3018 IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX; 3019 SDLoc DL(Node); 3020 SDValue Chain = Node->getOperand(0); 3021 SDValue ValLo = Node->getOperand(2); 3022 SDValue ValHi = Node->getOperand(3); 3023 SDValue MemAddr = Node->getOperand(4); 3024 3025 // Place arguments in the right order. 3026 SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain}; 3027 3028 SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops); 3029 // Transfer memoperands. 3030 MachineMemOperand *MemOp = 3031 cast<MemIntrinsicSDNode>(Node)->getMemOperand(); 3032 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 3033 3034 ReplaceNode(Node, St); 3035 return; 3036 } 3037 case Intrinsic::aarch64_neon_ld1x2: 3038 if (VT == MVT::v8i8) { 3039 SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0); 3040 return; 3041 } else if (VT == MVT::v16i8) { 3042 SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0); 3043 return; 3044 } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { 3045 SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0); 3046 return; 3047 } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { 3048 SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0); 3049 return; 3050 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3051 SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0); 3052 return; 3053 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3054 SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0); 3055 return; 3056 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3057 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0); 3058 return; 3059 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3060 SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0); 3061 return; 3062 } 3063 break; 3064 case Intrinsic::aarch64_neon_ld1x3: 3065 if (VT == MVT::v8i8) { 3066 SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0); 3067 return; 3068 } else if (VT == MVT::v16i8) { 3069 SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0); 3070 return; 3071 } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { 3072 SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0); 3073 return; 3074 } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { 3075 SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0); 3076 return; 3077 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3078 SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0); 3079 return; 3080 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3081 SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0); 3082 return; 3083 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3084 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0); 3085 return; 3086 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3087 SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0); 3088 return; 3089 } 3090 break; 3091 case Intrinsic::aarch64_neon_ld1x4: 3092 if (VT == MVT::v8i8) { 3093 SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0); 3094 return; 3095 } else if (VT == MVT::v16i8) { 3096 SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0); 3097 return; 3098 } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { 3099 SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0); 3100 return; 3101 } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { 3102 SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0); 3103 return; 3104 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3105 SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0); 3106 return; 3107 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3108 SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0); 3109 return; 3110 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3111 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0); 3112 return; 3113 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3114 SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0); 3115 return; 3116 } 3117 break; 3118 case Intrinsic::aarch64_neon_ld2: 3119 if (VT == MVT::v8i8) { 3120 SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0); 3121 return; 3122 } else if (VT == MVT::v16i8) { 3123 SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0); 3124 return; 3125 } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { 3126 SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0); 3127 return; 3128 } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { 3129 SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0); 3130 return; 3131 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3132 SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0); 3133 return; 3134 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3135 SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0); 3136 return; 3137 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3138 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0); 3139 return; 3140 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3141 SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0); 3142 return; 3143 } 3144 break; 3145 case Intrinsic::aarch64_neon_ld3: 3146 if (VT == MVT::v8i8) { 3147 SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0); 3148 return; 3149 } else if (VT == MVT::v16i8) { 3150 SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0); 3151 return; 3152 } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { 3153 SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0); 3154 return; 3155 } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { 3156 SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0); 3157 return; 3158 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3159 SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0); 3160 return; 3161 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3162 SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0); 3163 return; 3164 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3165 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0); 3166 return; 3167 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3168 SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0); 3169 return; 3170 } 3171 break; 3172 case Intrinsic::aarch64_neon_ld4: 3173 if (VT == MVT::v8i8) { 3174 SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0); 3175 return; 3176 } else if (VT == MVT::v16i8) { 3177 SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0); 3178 return; 3179 } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { 3180 SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0); 3181 return; 3182 } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { 3183 SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0); 3184 return; 3185 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3186 SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0); 3187 return; 3188 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3189 SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0); 3190 return; 3191 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3192 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0); 3193 return; 3194 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3195 SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0); 3196 return; 3197 } 3198 break; 3199 case Intrinsic::aarch64_neon_ld2r: 3200 if (VT == MVT::v8i8) { 3201 SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0); 3202 return; 3203 } else if (VT == MVT::v16i8) { 3204 SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0); 3205 return; 3206 } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { 3207 SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0); 3208 return; 3209 } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { 3210 SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0); 3211 return; 3212 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3213 SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0); 3214 return; 3215 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3216 SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0); 3217 return; 3218 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3219 SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0); 3220 return; 3221 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3222 SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0); 3223 return; 3224 } 3225 break; 3226 case Intrinsic::aarch64_neon_ld3r: 3227 if (VT == MVT::v8i8) { 3228 SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0); 3229 return; 3230 } else if (VT == MVT::v16i8) { 3231 SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0); 3232 return; 3233 } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { 3234 SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0); 3235 return; 3236 } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { 3237 SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0); 3238 return; 3239 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3240 SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0); 3241 return; 3242 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3243 SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0); 3244 return; 3245 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3246 SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0); 3247 return; 3248 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3249 SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0); 3250 return; 3251 } 3252 break; 3253 case Intrinsic::aarch64_neon_ld4r: 3254 if (VT == MVT::v8i8) { 3255 SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0); 3256 return; 3257 } else if (VT == MVT::v16i8) { 3258 SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0); 3259 return; 3260 } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { 3261 SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0); 3262 return; 3263 } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { 3264 SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0); 3265 return; 3266 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3267 SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0); 3268 return; 3269 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3270 SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0); 3271 return; 3272 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3273 SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0); 3274 return; 3275 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3276 SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0); 3277 return; 3278 } 3279 break; 3280 case Intrinsic::aarch64_neon_ld2lane: 3281 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 3282 SelectLoadLane(Node, 2, AArch64::LD2i8); 3283 return; 3284 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 3285 VT == MVT::v8f16) { 3286 SelectLoadLane(Node, 2, AArch64::LD2i16); 3287 return; 3288 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 3289 VT == MVT::v2f32) { 3290 SelectLoadLane(Node, 2, AArch64::LD2i32); 3291 return; 3292 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 3293 VT == MVT::v1f64) { 3294 SelectLoadLane(Node, 2, AArch64::LD2i64); 3295 return; 3296 } 3297 break; 3298 case Intrinsic::aarch64_neon_ld3lane: 3299 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 3300 SelectLoadLane(Node, 3, AArch64::LD3i8); 3301 return; 3302 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 3303 VT == MVT::v8f16) { 3304 SelectLoadLane(Node, 3, AArch64::LD3i16); 3305 return; 3306 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 3307 VT == MVT::v2f32) { 3308 SelectLoadLane(Node, 3, AArch64::LD3i32); 3309 return; 3310 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 3311 VT == MVT::v1f64) { 3312 SelectLoadLane(Node, 3, AArch64::LD3i64); 3313 return; 3314 } 3315 break; 3316 case Intrinsic::aarch64_neon_ld4lane: 3317 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 3318 SelectLoadLane(Node, 4, AArch64::LD4i8); 3319 return; 3320 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 3321 VT == MVT::v8f16) { 3322 SelectLoadLane(Node, 4, AArch64::LD4i16); 3323 return; 3324 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 3325 VT == MVT::v2f32) { 3326 SelectLoadLane(Node, 4, AArch64::LD4i32); 3327 return; 3328 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 3329 VT == MVT::v1f64) { 3330 SelectLoadLane(Node, 4, AArch64::LD4i64); 3331 return; 3332 } 3333 break; 3334 } 3335 } break; 3336 case ISD::INTRINSIC_WO_CHAIN: { 3337 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue(); 3338 switch (IntNo) { 3339 default: 3340 break; 3341 case Intrinsic::aarch64_tagp: 3342 SelectTagP(Node); 3343 return; 3344 case Intrinsic::aarch64_neon_tbl2: 3345 SelectTable(Node, 2, 3346 VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two, 3347 false); 3348 return; 3349 case Intrinsic::aarch64_neon_tbl3: 3350 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three 3351 : AArch64::TBLv16i8Three, 3352 false); 3353 return; 3354 case Intrinsic::aarch64_neon_tbl4: 3355 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four 3356 : AArch64::TBLv16i8Four, 3357 false); 3358 return; 3359 case Intrinsic::aarch64_neon_tbx2: 3360 SelectTable(Node, 2, 3361 VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two, 3362 true); 3363 return; 3364 case Intrinsic::aarch64_neon_tbx3: 3365 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three 3366 : AArch64::TBXv16i8Three, 3367 true); 3368 return; 3369 case Intrinsic::aarch64_neon_tbx4: 3370 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four 3371 : AArch64::TBXv16i8Four, 3372 true); 3373 return; 3374 case Intrinsic::aarch64_neon_smull: 3375 case Intrinsic::aarch64_neon_umull: 3376 if (tryMULLV64LaneV128(IntNo, Node)) 3377 return; 3378 break; 3379 } 3380 break; 3381 } 3382 case ISD::INTRINSIC_VOID: { 3383 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); 3384 if (Node->getNumOperands() >= 3) 3385 VT = Node->getOperand(2)->getValueType(0); 3386 switch (IntNo) { 3387 default: 3388 break; 3389 case Intrinsic::aarch64_neon_st1x2: { 3390 if (VT == MVT::v8i8) { 3391 SelectStore(Node, 2, AArch64::ST1Twov8b); 3392 return; 3393 } else if (VT == MVT::v16i8) { 3394 SelectStore(Node, 2, AArch64::ST1Twov16b); 3395 return; 3396 } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { 3397 SelectStore(Node, 2, AArch64::ST1Twov4h); 3398 return; 3399 } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { 3400 SelectStore(Node, 2, AArch64::ST1Twov8h); 3401 return; 3402 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3403 SelectStore(Node, 2, AArch64::ST1Twov2s); 3404 return; 3405 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3406 SelectStore(Node, 2, AArch64::ST1Twov4s); 3407 return; 3408 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3409 SelectStore(Node, 2, AArch64::ST1Twov2d); 3410 return; 3411 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3412 SelectStore(Node, 2, AArch64::ST1Twov1d); 3413 return; 3414 } 3415 break; 3416 } 3417 case Intrinsic::aarch64_neon_st1x3: { 3418 if (VT == MVT::v8i8) { 3419 SelectStore(Node, 3, AArch64::ST1Threev8b); 3420 return; 3421 } else if (VT == MVT::v16i8) { 3422 SelectStore(Node, 3, AArch64::ST1Threev16b); 3423 return; 3424 } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { 3425 SelectStore(Node, 3, AArch64::ST1Threev4h); 3426 return; 3427 } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { 3428 SelectStore(Node, 3, AArch64::ST1Threev8h); 3429 return; 3430 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3431 SelectStore(Node, 3, AArch64::ST1Threev2s); 3432 return; 3433 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3434 SelectStore(Node, 3, AArch64::ST1Threev4s); 3435 return; 3436 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3437 SelectStore(Node, 3, AArch64::ST1Threev2d); 3438 return; 3439 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3440 SelectStore(Node, 3, AArch64::ST1Threev1d); 3441 return; 3442 } 3443 break; 3444 } 3445 case Intrinsic::aarch64_neon_st1x4: { 3446 if (VT == MVT::v8i8) { 3447 SelectStore(Node, 4, AArch64::ST1Fourv8b); 3448 return; 3449 } else if (VT == MVT::v16i8) { 3450 SelectStore(Node, 4, AArch64::ST1Fourv16b); 3451 return; 3452 } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { 3453 SelectStore(Node, 4, AArch64::ST1Fourv4h); 3454 return; 3455 } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { 3456 SelectStore(Node, 4, AArch64::ST1Fourv8h); 3457 return; 3458 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3459 SelectStore(Node, 4, AArch64::ST1Fourv2s); 3460 return; 3461 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3462 SelectStore(Node, 4, AArch64::ST1Fourv4s); 3463 return; 3464 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3465 SelectStore(Node, 4, AArch64::ST1Fourv2d); 3466 return; 3467 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3468 SelectStore(Node, 4, AArch64::ST1Fourv1d); 3469 return; 3470 } 3471 break; 3472 } 3473 case Intrinsic::aarch64_neon_st2: { 3474 if (VT == MVT::v8i8) { 3475 SelectStore(Node, 2, AArch64::ST2Twov8b); 3476 return; 3477 } else if (VT == MVT::v16i8) { 3478 SelectStore(Node, 2, AArch64::ST2Twov16b); 3479 return; 3480 } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { 3481 SelectStore(Node, 2, AArch64::ST2Twov4h); 3482 return; 3483 } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { 3484 SelectStore(Node, 2, AArch64::ST2Twov8h); 3485 return; 3486 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3487 SelectStore(Node, 2, AArch64::ST2Twov2s); 3488 return; 3489 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3490 SelectStore(Node, 2, AArch64::ST2Twov4s); 3491 return; 3492 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3493 SelectStore(Node, 2, AArch64::ST2Twov2d); 3494 return; 3495 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3496 SelectStore(Node, 2, AArch64::ST1Twov1d); 3497 return; 3498 } 3499 break; 3500 } 3501 case Intrinsic::aarch64_neon_st3: { 3502 if (VT == MVT::v8i8) { 3503 SelectStore(Node, 3, AArch64::ST3Threev8b); 3504 return; 3505 } else if (VT == MVT::v16i8) { 3506 SelectStore(Node, 3, AArch64::ST3Threev16b); 3507 return; 3508 } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { 3509 SelectStore(Node, 3, AArch64::ST3Threev4h); 3510 return; 3511 } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { 3512 SelectStore(Node, 3, AArch64::ST3Threev8h); 3513 return; 3514 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3515 SelectStore(Node, 3, AArch64::ST3Threev2s); 3516 return; 3517 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3518 SelectStore(Node, 3, AArch64::ST3Threev4s); 3519 return; 3520 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3521 SelectStore(Node, 3, AArch64::ST3Threev2d); 3522 return; 3523 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3524 SelectStore(Node, 3, AArch64::ST1Threev1d); 3525 return; 3526 } 3527 break; 3528 } 3529 case Intrinsic::aarch64_neon_st4: { 3530 if (VT == MVT::v8i8) { 3531 SelectStore(Node, 4, AArch64::ST4Fourv8b); 3532 return; 3533 } else if (VT == MVT::v16i8) { 3534 SelectStore(Node, 4, AArch64::ST4Fourv16b); 3535 return; 3536 } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { 3537 SelectStore(Node, 4, AArch64::ST4Fourv4h); 3538 return; 3539 } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { 3540 SelectStore(Node, 4, AArch64::ST4Fourv8h); 3541 return; 3542 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3543 SelectStore(Node, 4, AArch64::ST4Fourv2s); 3544 return; 3545 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3546 SelectStore(Node, 4, AArch64::ST4Fourv4s); 3547 return; 3548 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3549 SelectStore(Node, 4, AArch64::ST4Fourv2d); 3550 return; 3551 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3552 SelectStore(Node, 4, AArch64::ST1Fourv1d); 3553 return; 3554 } 3555 break; 3556 } 3557 case Intrinsic::aarch64_neon_st2lane: { 3558 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 3559 SelectStoreLane(Node, 2, AArch64::ST2i8); 3560 return; 3561 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 3562 VT == MVT::v8f16) { 3563 SelectStoreLane(Node, 2, AArch64::ST2i16); 3564 return; 3565 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 3566 VT == MVT::v2f32) { 3567 SelectStoreLane(Node, 2, AArch64::ST2i32); 3568 return; 3569 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 3570 VT == MVT::v1f64) { 3571 SelectStoreLane(Node, 2, AArch64::ST2i64); 3572 return; 3573 } 3574 break; 3575 } 3576 case Intrinsic::aarch64_neon_st3lane: { 3577 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 3578 SelectStoreLane(Node, 3, AArch64::ST3i8); 3579 return; 3580 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 3581 VT == MVT::v8f16) { 3582 SelectStoreLane(Node, 3, AArch64::ST3i16); 3583 return; 3584 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 3585 VT == MVT::v2f32) { 3586 SelectStoreLane(Node, 3, AArch64::ST3i32); 3587 return; 3588 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 3589 VT == MVT::v1f64) { 3590 SelectStoreLane(Node, 3, AArch64::ST3i64); 3591 return; 3592 } 3593 break; 3594 } 3595 case Intrinsic::aarch64_neon_st4lane: { 3596 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 3597 SelectStoreLane(Node, 4, AArch64::ST4i8); 3598 return; 3599 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 3600 VT == MVT::v8f16) { 3601 SelectStoreLane(Node, 4, AArch64::ST4i16); 3602 return; 3603 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 3604 VT == MVT::v2f32) { 3605 SelectStoreLane(Node, 4, AArch64::ST4i32); 3606 return; 3607 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 3608 VT == MVT::v1f64) { 3609 SelectStoreLane(Node, 4, AArch64::ST4i64); 3610 return; 3611 } 3612 break; 3613 } 3614 } 3615 break; 3616 } 3617 case AArch64ISD::LD2post: { 3618 if (VT == MVT::v8i8) { 3619 SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0); 3620 return; 3621 } else if (VT == MVT::v16i8) { 3622 SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0); 3623 return; 3624 } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { 3625 SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0); 3626 return; 3627 } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { 3628 SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0); 3629 return; 3630 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3631 SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0); 3632 return; 3633 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3634 SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0); 3635 return; 3636 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3637 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0); 3638 return; 3639 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3640 SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0); 3641 return; 3642 } 3643 break; 3644 } 3645 case AArch64ISD::LD3post: { 3646 if (VT == MVT::v8i8) { 3647 SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0); 3648 return; 3649 } else if (VT == MVT::v16i8) { 3650 SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0); 3651 return; 3652 } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { 3653 SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0); 3654 return; 3655 } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { 3656 SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0); 3657 return; 3658 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3659 SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0); 3660 return; 3661 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3662 SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0); 3663 return; 3664 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3665 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0); 3666 return; 3667 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3668 SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0); 3669 return; 3670 } 3671 break; 3672 } 3673 case AArch64ISD::LD4post: { 3674 if (VT == MVT::v8i8) { 3675 SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0); 3676 return; 3677 } else if (VT == MVT::v16i8) { 3678 SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0); 3679 return; 3680 } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { 3681 SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0); 3682 return; 3683 } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { 3684 SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0); 3685 return; 3686 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3687 SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0); 3688 return; 3689 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3690 SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0); 3691 return; 3692 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3693 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0); 3694 return; 3695 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3696 SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0); 3697 return; 3698 } 3699 break; 3700 } 3701 case AArch64ISD::LD1x2post: { 3702 if (VT == MVT::v8i8) { 3703 SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0); 3704 return; 3705 } else if (VT == MVT::v16i8) { 3706 SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0); 3707 return; 3708 } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { 3709 SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0); 3710 return; 3711 } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { 3712 SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0); 3713 return; 3714 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3715 SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0); 3716 return; 3717 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3718 SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0); 3719 return; 3720 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3721 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0); 3722 return; 3723 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3724 SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0); 3725 return; 3726 } 3727 break; 3728 } 3729 case AArch64ISD::LD1x3post: { 3730 if (VT == MVT::v8i8) { 3731 SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0); 3732 return; 3733 } else if (VT == MVT::v16i8) { 3734 SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0); 3735 return; 3736 } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { 3737 SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0); 3738 return; 3739 } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { 3740 SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0); 3741 return; 3742 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3743 SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0); 3744 return; 3745 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3746 SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0); 3747 return; 3748 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3749 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0); 3750 return; 3751 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3752 SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0); 3753 return; 3754 } 3755 break; 3756 } 3757 case AArch64ISD::LD1x4post: { 3758 if (VT == MVT::v8i8) { 3759 SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0); 3760 return; 3761 } else if (VT == MVT::v16i8) { 3762 SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0); 3763 return; 3764 } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { 3765 SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0); 3766 return; 3767 } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { 3768 SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0); 3769 return; 3770 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3771 SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0); 3772 return; 3773 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3774 SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0); 3775 return; 3776 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3777 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0); 3778 return; 3779 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3780 SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0); 3781 return; 3782 } 3783 break; 3784 } 3785 case AArch64ISD::LD1DUPpost: { 3786 if (VT == MVT::v8i8) { 3787 SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0); 3788 return; 3789 } else if (VT == MVT::v16i8) { 3790 SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0); 3791 return; 3792 } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { 3793 SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0); 3794 return; 3795 } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { 3796 SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0); 3797 return; 3798 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3799 SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0); 3800 return; 3801 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3802 SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0); 3803 return; 3804 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3805 SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0); 3806 return; 3807 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3808 SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0); 3809 return; 3810 } 3811 break; 3812 } 3813 case AArch64ISD::LD2DUPpost: { 3814 if (VT == MVT::v8i8) { 3815 SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0); 3816 return; 3817 } else if (VT == MVT::v16i8) { 3818 SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0); 3819 return; 3820 } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { 3821 SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0); 3822 return; 3823 } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { 3824 SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0); 3825 return; 3826 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3827 SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0); 3828 return; 3829 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3830 SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0); 3831 return; 3832 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3833 SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0); 3834 return; 3835 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3836 SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0); 3837 return; 3838 } 3839 break; 3840 } 3841 case AArch64ISD::LD3DUPpost: { 3842 if (VT == MVT::v8i8) { 3843 SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0); 3844 return; 3845 } else if (VT == MVT::v16i8) { 3846 SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0); 3847 return; 3848 } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { 3849 SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0); 3850 return; 3851 } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { 3852 SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0); 3853 return; 3854 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3855 SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0); 3856 return; 3857 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3858 SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0); 3859 return; 3860 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3861 SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0); 3862 return; 3863 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3864 SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0); 3865 return; 3866 } 3867 break; 3868 } 3869 case AArch64ISD::LD4DUPpost: { 3870 if (VT == MVT::v8i8) { 3871 SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0); 3872 return; 3873 } else if (VT == MVT::v16i8) { 3874 SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0); 3875 return; 3876 } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { 3877 SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0); 3878 return; 3879 } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { 3880 SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0); 3881 return; 3882 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3883 SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0); 3884 return; 3885 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3886 SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0); 3887 return; 3888 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3889 SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0); 3890 return; 3891 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3892 SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0); 3893 return; 3894 } 3895 break; 3896 } 3897 case AArch64ISD::LD1LANEpost: { 3898 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 3899 SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST); 3900 return; 3901 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 3902 VT == MVT::v8f16) { 3903 SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST); 3904 return; 3905 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 3906 VT == MVT::v2f32) { 3907 SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST); 3908 return; 3909 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 3910 VT == MVT::v1f64) { 3911 SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST); 3912 return; 3913 } 3914 break; 3915 } 3916 case AArch64ISD::LD2LANEpost: { 3917 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 3918 SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST); 3919 return; 3920 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 3921 VT == MVT::v8f16) { 3922 SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST); 3923 return; 3924 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 3925 VT == MVT::v2f32) { 3926 SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST); 3927 return; 3928 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 3929 VT == MVT::v1f64) { 3930 SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST); 3931 return; 3932 } 3933 break; 3934 } 3935 case AArch64ISD::LD3LANEpost: { 3936 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 3937 SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST); 3938 return; 3939 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 3940 VT == MVT::v8f16) { 3941 SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST); 3942 return; 3943 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 3944 VT == MVT::v2f32) { 3945 SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST); 3946 return; 3947 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 3948 VT == MVT::v1f64) { 3949 SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST); 3950 return; 3951 } 3952 break; 3953 } 3954 case AArch64ISD::LD4LANEpost: { 3955 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 3956 SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST); 3957 return; 3958 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 3959 VT == MVT::v8f16) { 3960 SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST); 3961 return; 3962 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 3963 VT == MVT::v2f32) { 3964 SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST); 3965 return; 3966 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 3967 VT == MVT::v1f64) { 3968 SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST); 3969 return; 3970 } 3971 break; 3972 } 3973 case AArch64ISD::ST2post: { 3974 VT = Node->getOperand(1).getValueType(); 3975 if (VT == MVT::v8i8) { 3976 SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST); 3977 return; 3978 } else if (VT == MVT::v16i8) { 3979 SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST); 3980 return; 3981 } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { 3982 SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST); 3983 return; 3984 } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { 3985 SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST); 3986 return; 3987 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3988 SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST); 3989 return; 3990 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3991 SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST); 3992 return; 3993 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3994 SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST); 3995 return; 3996 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3997 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST); 3998 return; 3999 } 4000 break; 4001 } 4002 case AArch64ISD::ST3post: { 4003 VT = Node->getOperand(1).getValueType(); 4004 if (VT == MVT::v8i8) { 4005 SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST); 4006 return; 4007 } else if (VT == MVT::v16i8) { 4008 SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST); 4009 return; 4010 } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { 4011 SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST); 4012 return; 4013 } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { 4014 SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST); 4015 return; 4016 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4017 SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST); 4018 return; 4019 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4020 SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST); 4021 return; 4022 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4023 SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST); 4024 return; 4025 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4026 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST); 4027 return; 4028 } 4029 break; 4030 } 4031 case AArch64ISD::ST4post: { 4032 VT = Node->getOperand(1).getValueType(); 4033 if (VT == MVT::v8i8) { 4034 SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST); 4035 return; 4036 } else if (VT == MVT::v16i8) { 4037 SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST); 4038 return; 4039 } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { 4040 SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST); 4041 return; 4042 } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { 4043 SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST); 4044 return; 4045 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4046 SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST); 4047 return; 4048 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4049 SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST); 4050 return; 4051 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4052 SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST); 4053 return; 4054 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4055 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST); 4056 return; 4057 } 4058 break; 4059 } 4060 case AArch64ISD::ST1x2post: { 4061 VT = Node->getOperand(1).getValueType(); 4062 if (VT == MVT::v8i8) { 4063 SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST); 4064 return; 4065 } else if (VT == MVT::v16i8) { 4066 SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST); 4067 return; 4068 } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { 4069 SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST); 4070 return; 4071 } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { 4072 SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST); 4073 return; 4074 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4075 SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST); 4076 return; 4077 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4078 SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST); 4079 return; 4080 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4081 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST); 4082 return; 4083 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4084 SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST); 4085 return; 4086 } 4087 break; 4088 } 4089 case AArch64ISD::ST1x3post: { 4090 VT = Node->getOperand(1).getValueType(); 4091 if (VT == MVT::v8i8) { 4092 SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST); 4093 return; 4094 } else if (VT == MVT::v16i8) { 4095 SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST); 4096 return; 4097 } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { 4098 SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST); 4099 return; 4100 } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { 4101 SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST); 4102 return; 4103 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4104 SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST); 4105 return; 4106 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4107 SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST); 4108 return; 4109 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4110 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST); 4111 return; 4112 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4113 SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST); 4114 return; 4115 } 4116 break; 4117 } 4118 case AArch64ISD::ST1x4post: { 4119 VT = Node->getOperand(1).getValueType(); 4120 if (VT == MVT::v8i8) { 4121 SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST); 4122 return; 4123 } else if (VT == MVT::v16i8) { 4124 SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST); 4125 return; 4126 } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { 4127 SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST); 4128 return; 4129 } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { 4130 SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST); 4131 return; 4132 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4133 SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST); 4134 return; 4135 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4136 SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST); 4137 return; 4138 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4139 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST); 4140 return; 4141 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4142 SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST); 4143 return; 4144 } 4145 break; 4146 } 4147 case AArch64ISD::ST2LANEpost: { 4148 VT = Node->getOperand(1).getValueType(); 4149 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4150 SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST); 4151 return; 4152 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4153 VT == MVT::v8f16) { 4154 SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST); 4155 return; 4156 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4157 VT == MVT::v2f32) { 4158 SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST); 4159 return; 4160 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4161 VT == MVT::v1f64) { 4162 SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST); 4163 return; 4164 } 4165 break; 4166 } 4167 case AArch64ISD::ST3LANEpost: { 4168 VT = Node->getOperand(1).getValueType(); 4169 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4170 SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST); 4171 return; 4172 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4173 VT == MVT::v8f16) { 4174 SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST); 4175 return; 4176 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4177 VT == MVT::v2f32) { 4178 SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST); 4179 return; 4180 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4181 VT == MVT::v1f64) { 4182 SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST); 4183 return; 4184 } 4185 break; 4186 } 4187 case AArch64ISD::ST4LANEpost: { 4188 VT = Node->getOperand(1).getValueType(); 4189 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4190 SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST); 4191 return; 4192 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4193 VT == MVT::v8f16) { 4194 SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST); 4195 return; 4196 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4197 VT == MVT::v2f32) { 4198 SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST); 4199 return; 4200 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4201 VT == MVT::v1f64) { 4202 SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST); 4203 return; 4204 } 4205 break; 4206 } 4207 } 4208 4209 // Select the default instruction 4210 SelectCode(Node); 4211 } 4212 4213 /// createAArch64ISelDag - This pass converts a legalized DAG into a 4214 /// AArch64-specific DAG, ready for instruction scheduling. 4215 FunctionPass *llvm::createAArch64ISelDag(AArch64TargetMachine &TM, 4216 CodeGenOpt::Level OptLevel) { 4217 return new AArch64DAGToDAGISel(TM, OptLevel); 4218 } 4219