1 //===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines an instruction selector for the AArch64 target. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AArch64TargetMachine.h" 14 #include "MCTargetDesc/AArch64AddressingModes.h" 15 #include "llvm/ADT/APSInt.h" 16 #include "llvm/CodeGen/SelectionDAGISel.h" 17 #include "llvm/IR/Function.h" // To access function attributes. 18 #include "llvm/IR/GlobalValue.h" 19 #include "llvm/IR/Intrinsics.h" 20 #include "llvm/Support/Debug.h" 21 #include "llvm/Support/ErrorHandling.h" 22 #include "llvm/Support/KnownBits.h" 23 #include "llvm/Support/MathExtras.h" 24 #include "llvm/Support/raw_ostream.h" 25 26 using namespace llvm; 27 28 #define DEBUG_TYPE "aarch64-isel" 29 30 //===--------------------------------------------------------------------===// 31 /// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine 32 /// instructions for SelectionDAG operations. 33 /// 34 namespace { 35 36 class AArch64DAGToDAGISel : public SelectionDAGISel { 37 38 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can 39 /// make the right decision when generating code for different targets. 40 const AArch64Subtarget *Subtarget; 41 42 bool ForCodeSize; 43 44 public: 45 explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm, 46 CodeGenOpt::Level OptLevel) 47 : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr), 48 ForCodeSize(false) {} 49 50 StringRef getPassName() const override { 51 return "AArch64 Instruction Selection"; 52 } 53 54 bool runOnMachineFunction(MachineFunction &MF) override { 55 ForCodeSize = MF.getFunction().hasOptSize(); 56 Subtarget = &MF.getSubtarget<AArch64Subtarget>(); 57 return SelectionDAGISel::runOnMachineFunction(MF); 58 } 59 60 void Select(SDNode *Node) override; 61 62 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 63 /// inline asm expressions. 64 bool SelectInlineAsmMemoryOperand(const SDValue &Op, 65 unsigned ConstraintID, 66 std::vector<SDValue> &OutOps) override; 67 68 bool tryMLAV64LaneV128(SDNode *N); 69 bool tryMULLV64LaneV128(unsigned IntNo, SDNode *N); 70 bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift); 71 bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift); 72 bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift); 73 bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) { 74 return SelectShiftedRegister(N, false, Reg, Shift); 75 } 76 bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) { 77 return SelectShiftedRegister(N, true, Reg, Shift); 78 } 79 bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) { 80 return SelectAddrModeIndexed7S(N, 1, Base, OffImm); 81 } 82 bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) { 83 return SelectAddrModeIndexed7S(N, 2, Base, OffImm); 84 } 85 bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) { 86 return SelectAddrModeIndexed7S(N, 4, Base, OffImm); 87 } 88 bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) { 89 return SelectAddrModeIndexed7S(N, 8, Base, OffImm); 90 } 91 bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) { 92 return SelectAddrModeIndexed7S(N, 16, Base, OffImm); 93 } 94 bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) { 95 return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm); 96 } 97 bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) { 98 return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm); 99 } 100 bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) { 101 return SelectAddrModeIndexed(N, 1, Base, OffImm); 102 } 103 bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) { 104 return SelectAddrModeIndexed(N, 2, Base, OffImm); 105 } 106 bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) { 107 return SelectAddrModeIndexed(N, 4, Base, OffImm); 108 } 109 bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) { 110 return SelectAddrModeIndexed(N, 8, Base, OffImm); 111 } 112 bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) { 113 return SelectAddrModeIndexed(N, 16, Base, OffImm); 114 } 115 bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) { 116 return SelectAddrModeUnscaled(N, 1, Base, OffImm); 117 } 118 bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) { 119 return SelectAddrModeUnscaled(N, 2, Base, OffImm); 120 } 121 bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) { 122 return SelectAddrModeUnscaled(N, 4, Base, OffImm); 123 } 124 bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) { 125 return SelectAddrModeUnscaled(N, 8, Base, OffImm); 126 } 127 bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) { 128 return SelectAddrModeUnscaled(N, 16, Base, OffImm); 129 } 130 131 template<int Width> 132 bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset, 133 SDValue &SignExtend, SDValue &DoShift) { 134 return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift); 135 } 136 137 template<int Width> 138 bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset, 139 SDValue &SignExtend, SDValue &DoShift) { 140 return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift); 141 } 142 143 144 /// Form sequences of consecutive 64/128-bit registers for use in NEON 145 /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have 146 /// between 1 and 4 elements. If it contains a single element that is returned 147 /// unchanged; otherwise a REG_SEQUENCE value is returned. 148 SDValue createDTuple(ArrayRef<SDValue> Vecs); 149 SDValue createQTuple(ArrayRef<SDValue> Vecs); 150 151 /// Generic helper for the createDTuple/createQTuple 152 /// functions. Those should almost always be called instead. 153 SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[], 154 const unsigned SubRegs[]); 155 156 void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt); 157 158 bool tryIndexedLoad(SDNode *N); 159 160 bool trySelectStackSlotTagP(SDNode *N); 161 void SelectTagP(SDNode *N); 162 163 void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc, 164 unsigned SubRegIdx); 165 void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc, 166 unsigned SubRegIdx); 167 void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc); 168 void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc); 169 170 void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc); 171 void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc); 172 void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc); 173 void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc); 174 175 bool tryBitfieldExtractOp(SDNode *N); 176 bool tryBitfieldExtractOpFromSExt(SDNode *N); 177 bool tryBitfieldInsertOp(SDNode *N); 178 bool tryBitfieldInsertInZeroOp(SDNode *N); 179 bool tryShiftAmountMod(SDNode *N); 180 181 bool tryReadRegister(SDNode *N); 182 bool tryWriteRegister(SDNode *N); 183 184 // Include the pieces autogenerated from the target description. 185 #include "AArch64GenDAGISel.inc" 186 187 private: 188 bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg, 189 SDValue &Shift); 190 bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base, 191 SDValue &OffImm) { 192 return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm); 193 } 194 bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW, 195 unsigned Size, SDValue &Base, 196 SDValue &OffImm); 197 bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base, 198 SDValue &OffImm); 199 bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base, 200 SDValue &OffImm); 201 bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base, 202 SDValue &Offset, SDValue &SignExtend, 203 SDValue &DoShift); 204 bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base, 205 SDValue &Offset, SDValue &SignExtend, 206 SDValue &DoShift); 207 bool isWorthFolding(SDValue V) const; 208 bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend, 209 SDValue &Offset, SDValue &SignExtend); 210 211 template<unsigned RegWidth> 212 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) { 213 return SelectCVTFixedPosOperand(N, FixedPos, RegWidth); 214 } 215 216 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width); 217 218 bool SelectCMP_SWAP(SDNode *N); 219 220 }; 221 } // end anonymous namespace 222 223 /// isIntImmediate - This method tests to see if the node is a constant 224 /// operand. If so Imm will receive the 32-bit value. 225 static bool isIntImmediate(const SDNode *N, uint64_t &Imm) { 226 if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) { 227 Imm = C->getZExtValue(); 228 return true; 229 } 230 return false; 231 } 232 233 // isIntImmediate - This method tests to see if a constant operand. 234 // If so Imm will receive the value. 235 static bool isIntImmediate(SDValue N, uint64_t &Imm) { 236 return isIntImmediate(N.getNode(), Imm); 237 } 238 239 // isOpcWithIntImmediate - This method tests to see if the node is a specific 240 // opcode and that it has a immediate integer right operand. 241 // If so Imm will receive the 32 bit value. 242 static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, 243 uint64_t &Imm) { 244 return N->getOpcode() == Opc && 245 isIntImmediate(N->getOperand(1).getNode(), Imm); 246 } 247 248 bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand( 249 const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) { 250 switch(ConstraintID) { 251 default: 252 llvm_unreachable("Unexpected asm memory constraint"); 253 case InlineAsm::Constraint_i: 254 case InlineAsm::Constraint_m: 255 case InlineAsm::Constraint_Q: 256 // We need to make sure that this one operand does not end up in XZR, thus 257 // require the address to be in a PointerRegClass register. 258 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo(); 259 const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF); 260 SDLoc dl(Op); 261 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64); 262 SDValue NewOp = 263 SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, 264 dl, Op.getValueType(), 265 Op, RC), 0); 266 OutOps.push_back(NewOp); 267 return false; 268 } 269 return true; 270 } 271 272 /// SelectArithImmed - Select an immediate value that can be represented as 273 /// a 12-bit value shifted left by either 0 or 12. If so, return true with 274 /// Val set to the 12-bit value and Shift set to the shifter operand. 275 bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val, 276 SDValue &Shift) { 277 // This function is called from the addsub_shifted_imm ComplexPattern, 278 // which lists [imm] as the list of opcode it's interested in, however 279 // we still need to check whether the operand is actually an immediate 280 // here because the ComplexPattern opcode list is only used in 281 // root-level opcode matching. 282 if (!isa<ConstantSDNode>(N.getNode())) 283 return false; 284 285 uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue(); 286 unsigned ShiftAmt; 287 288 if (Immed >> 12 == 0) { 289 ShiftAmt = 0; 290 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) { 291 ShiftAmt = 12; 292 Immed = Immed >> 12; 293 } else 294 return false; 295 296 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt); 297 SDLoc dl(N); 298 Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32); 299 Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32); 300 return true; 301 } 302 303 /// SelectNegArithImmed - As above, but negates the value before trying to 304 /// select it. 305 bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val, 306 SDValue &Shift) { 307 // This function is called from the addsub_shifted_imm ComplexPattern, 308 // which lists [imm] as the list of opcode it's interested in, however 309 // we still need to check whether the operand is actually an immediate 310 // here because the ComplexPattern opcode list is only used in 311 // root-level opcode matching. 312 if (!isa<ConstantSDNode>(N.getNode())) 313 return false; 314 315 // The immediate operand must be a 24-bit zero-extended immediate. 316 uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue(); 317 318 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0" 319 // have the opposite effect on the C flag, so this pattern mustn't match under 320 // those circumstances. 321 if (Immed == 0) 322 return false; 323 324 if (N.getValueType() == MVT::i32) 325 Immed = ~((uint32_t)Immed) + 1; 326 else 327 Immed = ~Immed + 1ULL; 328 if (Immed & 0xFFFFFFFFFF000000ULL) 329 return false; 330 331 Immed &= 0xFFFFFFULL; 332 return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val, 333 Shift); 334 } 335 336 /// getShiftTypeForNode - Translate a shift node to the corresponding 337 /// ShiftType value. 338 static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) { 339 switch (N.getOpcode()) { 340 default: 341 return AArch64_AM::InvalidShiftExtend; 342 case ISD::SHL: 343 return AArch64_AM::LSL; 344 case ISD::SRL: 345 return AArch64_AM::LSR; 346 case ISD::SRA: 347 return AArch64_AM::ASR; 348 case ISD::ROTR: 349 return AArch64_AM::ROR; 350 } 351 } 352 353 /// Determine whether it is worth it to fold SHL into the addressing 354 /// mode. 355 static bool isWorthFoldingSHL(SDValue V) { 356 assert(V.getOpcode() == ISD::SHL && "invalid opcode"); 357 // It is worth folding logical shift of up to three places. 358 auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1)); 359 if (!CSD) 360 return false; 361 unsigned ShiftVal = CSD->getZExtValue(); 362 if (ShiftVal > 3) 363 return false; 364 365 // Check if this particular node is reused in any non-memory related 366 // operation. If yes, do not try to fold this node into the address 367 // computation, since the computation will be kept. 368 const SDNode *Node = V.getNode(); 369 for (SDNode *UI : Node->uses()) 370 if (!isa<MemSDNode>(*UI)) 371 for (SDNode *UII : UI->uses()) 372 if (!isa<MemSDNode>(*UII)) 373 return false; 374 return true; 375 } 376 377 /// Determine whether it is worth to fold V into an extended register. 378 bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const { 379 // Trivial if we are optimizing for code size or if there is only 380 // one use of the value. 381 if (ForCodeSize || V.hasOneUse()) 382 return true; 383 // If a subtarget has a fastpath LSL we can fold a logical shift into 384 // the addressing mode and save a cycle. 385 if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::SHL && 386 isWorthFoldingSHL(V)) 387 return true; 388 if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::ADD) { 389 const SDValue LHS = V.getOperand(0); 390 const SDValue RHS = V.getOperand(1); 391 if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS)) 392 return true; 393 if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS)) 394 return true; 395 } 396 397 // It hurts otherwise, since the value will be reused. 398 return false; 399 } 400 401 /// SelectShiftedRegister - Select a "shifted register" operand. If the value 402 /// is not shifted, set the Shift operand to default of "LSL 0". The logical 403 /// instructions allow the shifted register to be rotated, but the arithmetic 404 /// instructions do not. The AllowROR parameter specifies whether ROR is 405 /// supported. 406 bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR, 407 SDValue &Reg, SDValue &Shift) { 408 AArch64_AM::ShiftExtendType ShType = getShiftTypeForNode(N); 409 if (ShType == AArch64_AM::InvalidShiftExtend) 410 return false; 411 if (!AllowROR && ShType == AArch64_AM::ROR) 412 return false; 413 414 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 415 unsigned BitSize = N.getValueSizeInBits(); 416 unsigned Val = RHS->getZExtValue() & (BitSize - 1); 417 unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val); 418 419 Reg = N.getOperand(0); 420 Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32); 421 return isWorthFolding(N); 422 } 423 424 return false; 425 } 426 427 /// getExtendTypeForNode - Translate an extend node to the corresponding 428 /// ExtendType value. 429 static AArch64_AM::ShiftExtendType 430 getExtendTypeForNode(SDValue N, bool IsLoadStore = false) { 431 if (N.getOpcode() == ISD::SIGN_EXTEND || 432 N.getOpcode() == ISD::SIGN_EXTEND_INREG) { 433 EVT SrcVT; 434 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG) 435 SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT(); 436 else 437 SrcVT = N.getOperand(0).getValueType(); 438 439 if (!IsLoadStore && SrcVT == MVT::i8) 440 return AArch64_AM::SXTB; 441 else if (!IsLoadStore && SrcVT == MVT::i16) 442 return AArch64_AM::SXTH; 443 else if (SrcVT == MVT::i32) 444 return AArch64_AM::SXTW; 445 assert(SrcVT != MVT::i64 && "extend from 64-bits?"); 446 447 return AArch64_AM::InvalidShiftExtend; 448 } else if (N.getOpcode() == ISD::ZERO_EXTEND || 449 N.getOpcode() == ISD::ANY_EXTEND) { 450 EVT SrcVT = N.getOperand(0).getValueType(); 451 if (!IsLoadStore && SrcVT == MVT::i8) 452 return AArch64_AM::UXTB; 453 else if (!IsLoadStore && SrcVT == MVT::i16) 454 return AArch64_AM::UXTH; 455 else if (SrcVT == MVT::i32) 456 return AArch64_AM::UXTW; 457 assert(SrcVT != MVT::i64 && "extend from 64-bits?"); 458 459 return AArch64_AM::InvalidShiftExtend; 460 } else if (N.getOpcode() == ISD::AND) { 461 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); 462 if (!CSD) 463 return AArch64_AM::InvalidShiftExtend; 464 uint64_t AndMask = CSD->getZExtValue(); 465 466 switch (AndMask) { 467 default: 468 return AArch64_AM::InvalidShiftExtend; 469 case 0xFF: 470 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend; 471 case 0xFFFF: 472 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend; 473 case 0xFFFFFFFF: 474 return AArch64_AM::UXTW; 475 } 476 } 477 478 return AArch64_AM::InvalidShiftExtend; 479 } 480 481 // Helper for SelectMLAV64LaneV128 - Recognize high lane extracts. 482 static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx) { 483 if (DL->getOpcode() != AArch64ISD::DUPLANE16 && 484 DL->getOpcode() != AArch64ISD::DUPLANE32) 485 return false; 486 487 SDValue SV = DL->getOperand(0); 488 if (SV.getOpcode() != ISD::INSERT_SUBVECTOR) 489 return false; 490 491 SDValue EV = SV.getOperand(1); 492 if (EV.getOpcode() != ISD::EXTRACT_SUBVECTOR) 493 return false; 494 495 ConstantSDNode *DLidx = cast<ConstantSDNode>(DL->getOperand(1).getNode()); 496 ConstantSDNode *EVidx = cast<ConstantSDNode>(EV.getOperand(1).getNode()); 497 LaneIdx = DLidx->getSExtValue() + EVidx->getSExtValue(); 498 LaneOp = EV.getOperand(0); 499 500 return true; 501 } 502 503 // Helper for SelectOpcV64LaneV128 - Recognize operations where one operand is a 504 // high lane extract. 505 static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp, 506 SDValue &LaneOp, int &LaneIdx) { 507 508 if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) { 509 std::swap(Op0, Op1); 510 if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) 511 return false; 512 } 513 StdOp = Op1; 514 return true; 515 } 516 517 /// SelectMLAV64LaneV128 - AArch64 supports vector MLAs where one multiplicand 518 /// is a lane in the upper half of a 128-bit vector. Recognize and select this 519 /// so that we don't emit unnecessary lane extracts. 520 bool AArch64DAGToDAGISel::tryMLAV64LaneV128(SDNode *N) { 521 SDLoc dl(N); 522 SDValue Op0 = N->getOperand(0); 523 SDValue Op1 = N->getOperand(1); 524 SDValue MLAOp1; // Will hold ordinary multiplicand for MLA. 525 SDValue MLAOp2; // Will hold lane-accessed multiplicand for MLA. 526 int LaneIdx = -1; // Will hold the lane index. 527 528 if (Op1.getOpcode() != ISD::MUL || 529 !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2, 530 LaneIdx)) { 531 std::swap(Op0, Op1); 532 if (Op1.getOpcode() != ISD::MUL || 533 !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2, 534 LaneIdx)) 535 return false; 536 } 537 538 SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64); 539 540 SDValue Ops[] = { Op0, MLAOp1, MLAOp2, LaneIdxVal }; 541 542 unsigned MLAOpc = ~0U; 543 544 switch (N->getSimpleValueType(0).SimpleTy) { 545 default: 546 llvm_unreachable("Unrecognized MLA."); 547 case MVT::v4i16: 548 MLAOpc = AArch64::MLAv4i16_indexed; 549 break; 550 case MVT::v8i16: 551 MLAOpc = AArch64::MLAv8i16_indexed; 552 break; 553 case MVT::v2i32: 554 MLAOpc = AArch64::MLAv2i32_indexed; 555 break; 556 case MVT::v4i32: 557 MLAOpc = AArch64::MLAv4i32_indexed; 558 break; 559 } 560 561 ReplaceNode(N, CurDAG->getMachineNode(MLAOpc, dl, N->getValueType(0), Ops)); 562 return true; 563 } 564 565 bool AArch64DAGToDAGISel::tryMULLV64LaneV128(unsigned IntNo, SDNode *N) { 566 SDLoc dl(N); 567 SDValue SMULLOp0; 568 SDValue SMULLOp1; 569 int LaneIdx; 570 571 if (!checkV64LaneV128(N->getOperand(1), N->getOperand(2), SMULLOp0, SMULLOp1, 572 LaneIdx)) 573 return false; 574 575 SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64); 576 577 SDValue Ops[] = { SMULLOp0, SMULLOp1, LaneIdxVal }; 578 579 unsigned SMULLOpc = ~0U; 580 581 if (IntNo == Intrinsic::aarch64_neon_smull) { 582 switch (N->getSimpleValueType(0).SimpleTy) { 583 default: 584 llvm_unreachable("Unrecognized SMULL."); 585 case MVT::v4i32: 586 SMULLOpc = AArch64::SMULLv4i16_indexed; 587 break; 588 case MVT::v2i64: 589 SMULLOpc = AArch64::SMULLv2i32_indexed; 590 break; 591 } 592 } else if (IntNo == Intrinsic::aarch64_neon_umull) { 593 switch (N->getSimpleValueType(0).SimpleTy) { 594 default: 595 llvm_unreachable("Unrecognized SMULL."); 596 case MVT::v4i32: 597 SMULLOpc = AArch64::UMULLv4i16_indexed; 598 break; 599 case MVT::v2i64: 600 SMULLOpc = AArch64::UMULLv2i32_indexed; 601 break; 602 } 603 } else 604 llvm_unreachable("Unrecognized intrinsic."); 605 606 ReplaceNode(N, CurDAG->getMachineNode(SMULLOpc, dl, N->getValueType(0), Ops)); 607 return true; 608 } 609 610 /// Instructions that accept extend modifiers like UXTW expect the register 611 /// being extended to be a GPR32, but the incoming DAG might be acting on a 612 /// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if 613 /// this is the case. 614 static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N) { 615 if (N.getValueType() == MVT::i32) 616 return N; 617 618 SDLoc dl(N); 619 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32); 620 MachineSDNode *Node = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 621 dl, MVT::i32, N, SubReg); 622 return SDValue(Node, 0); 623 } 624 625 626 /// SelectArithExtendedRegister - Select a "extended register" operand. This 627 /// operand folds in an extend followed by an optional left shift. 628 bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg, 629 SDValue &Shift) { 630 unsigned ShiftVal = 0; 631 AArch64_AM::ShiftExtendType Ext; 632 633 if (N.getOpcode() == ISD::SHL) { 634 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); 635 if (!CSD) 636 return false; 637 ShiftVal = CSD->getZExtValue(); 638 if (ShiftVal > 4) 639 return false; 640 641 Ext = getExtendTypeForNode(N.getOperand(0)); 642 if (Ext == AArch64_AM::InvalidShiftExtend) 643 return false; 644 645 Reg = N.getOperand(0).getOperand(0); 646 } else { 647 Ext = getExtendTypeForNode(N); 648 if (Ext == AArch64_AM::InvalidShiftExtend) 649 return false; 650 651 Reg = N.getOperand(0); 652 653 // Don't match if free 32-bit -> 64-bit zext can be used instead. 654 if (Ext == AArch64_AM::UXTW && 655 Reg->getValueType(0).getSizeInBits() == 32 && isDef32(*Reg.getNode())) 656 return false; 657 } 658 659 // AArch64 mandates that the RHS of the operation must use the smallest 660 // register class that could contain the size being extended from. Thus, 661 // if we're folding a (sext i8), we need the RHS to be a GPR32, even though 662 // there might not be an actual 32-bit value in the program. We can 663 // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here. 664 assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX); 665 Reg = narrowIfNeeded(CurDAG, Reg); 666 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N), 667 MVT::i32); 668 return isWorthFolding(N); 669 } 670 671 /// If there's a use of this ADDlow that's not itself a load/store then we'll 672 /// need to create a real ADD instruction from it anyway and there's no point in 673 /// folding it into the mem op. Theoretically, it shouldn't matter, but there's 674 /// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding 675 /// leads to duplicated ADRP instructions. 676 static bool isWorthFoldingADDlow(SDValue N) { 677 for (auto Use : N->uses()) { 678 if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE && 679 Use->getOpcode() != ISD::ATOMIC_LOAD && 680 Use->getOpcode() != ISD::ATOMIC_STORE) 681 return false; 682 683 // ldar and stlr have much more restrictive addressing modes (just a 684 // register). 685 if (isStrongerThanMonotonic(cast<MemSDNode>(Use)->getOrdering())) 686 return false; 687 } 688 689 return true; 690 } 691 692 /// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit 693 /// immediate" address. The "Size" argument is the size in bytes of the memory 694 /// reference, which determines the scale. 695 bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, 696 unsigned BW, unsigned Size, 697 SDValue &Base, 698 SDValue &OffImm) { 699 SDLoc dl(N); 700 const DataLayout &DL = CurDAG->getDataLayout(); 701 const TargetLowering *TLI = getTargetLowering(); 702 if (N.getOpcode() == ISD::FrameIndex) { 703 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 704 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 705 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); 706 return true; 707 } 708 709 // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed 710 // selected here doesn't support labels/immediates, only base+offset. 711 if (CurDAG->isBaseWithConstantOffset(N)) { 712 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 713 if (IsSignedImm) { 714 int64_t RHSC = RHS->getSExtValue(); 715 unsigned Scale = Log2_32(Size); 716 int64_t Range = 0x1LL << (BW - 1); 717 718 if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) && 719 RHSC < (Range << Scale)) { 720 Base = N.getOperand(0); 721 if (Base.getOpcode() == ISD::FrameIndex) { 722 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 723 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 724 } 725 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64); 726 return true; 727 } 728 } else { 729 // unsigned Immediate 730 uint64_t RHSC = RHS->getZExtValue(); 731 unsigned Scale = Log2_32(Size); 732 uint64_t Range = 0x1ULL << BW; 733 734 if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) { 735 Base = N.getOperand(0); 736 if (Base.getOpcode() == ISD::FrameIndex) { 737 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 738 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 739 } 740 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64); 741 return true; 742 } 743 } 744 } 745 } 746 // Base only. The address will be materialized into a register before 747 // the memory is accessed. 748 // add x0, Xbase, #offset 749 // stp x1, x2, [x0] 750 Base = N; 751 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); 752 return true; 753 } 754 755 /// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit 756 /// immediate" address. The "Size" argument is the size in bytes of the memory 757 /// reference, which determines the scale. 758 bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size, 759 SDValue &Base, SDValue &OffImm) { 760 SDLoc dl(N); 761 const DataLayout &DL = CurDAG->getDataLayout(); 762 const TargetLowering *TLI = getTargetLowering(); 763 if (N.getOpcode() == ISD::FrameIndex) { 764 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 765 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 766 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); 767 return true; 768 } 769 770 if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) { 771 GlobalAddressSDNode *GAN = 772 dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode()); 773 Base = N.getOperand(0); 774 OffImm = N.getOperand(1); 775 if (!GAN) 776 return true; 777 778 if (GAN->getOffset() % Size == 0) { 779 const GlobalValue *GV = GAN->getGlobal(); 780 unsigned Alignment = GV->getAlignment(); 781 Type *Ty = GV->getValueType(); 782 if (Alignment == 0 && Ty->isSized()) 783 Alignment = DL.getABITypeAlignment(Ty); 784 785 if (Alignment >= Size) 786 return true; 787 } 788 } 789 790 if (CurDAG->isBaseWithConstantOffset(N)) { 791 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 792 int64_t RHSC = (int64_t)RHS->getZExtValue(); 793 unsigned Scale = Log2_32(Size); 794 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) { 795 Base = N.getOperand(0); 796 if (Base.getOpcode() == ISD::FrameIndex) { 797 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 798 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 799 } 800 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64); 801 return true; 802 } 803 } 804 } 805 806 // Before falling back to our general case, check if the unscaled 807 // instructions can handle this. If so, that's preferable. 808 if (SelectAddrModeUnscaled(N, Size, Base, OffImm)) 809 return false; 810 811 // Base only. The address will be materialized into a register before 812 // the memory is accessed. 813 // add x0, Xbase, #offset 814 // ldr x0, [x0] 815 Base = N; 816 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); 817 return true; 818 } 819 820 /// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit 821 /// immediate" address. This should only match when there is an offset that 822 /// is not valid for a scaled immediate addressing mode. The "Size" argument 823 /// is the size in bytes of the memory reference, which is needed here to know 824 /// what is valid for a scaled immediate. 825 bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size, 826 SDValue &Base, 827 SDValue &OffImm) { 828 if (!CurDAG->isBaseWithConstantOffset(N)) 829 return false; 830 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 831 int64_t RHSC = RHS->getSExtValue(); 832 // If the offset is valid as a scaled immediate, don't match here. 833 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && 834 RHSC < (0x1000 << Log2_32(Size))) 835 return false; 836 if (RHSC >= -256 && RHSC < 256) { 837 Base = N.getOperand(0); 838 if (Base.getOpcode() == ISD::FrameIndex) { 839 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 840 const TargetLowering *TLI = getTargetLowering(); 841 Base = CurDAG->getTargetFrameIndex( 842 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 843 } 844 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64); 845 return true; 846 } 847 } 848 return false; 849 } 850 851 static SDValue Widen(SelectionDAG *CurDAG, SDValue N) { 852 SDLoc dl(N); 853 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32); 854 SDValue ImpDef = SDValue( 855 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0); 856 MachineSDNode *Node = CurDAG->getMachineNode( 857 TargetOpcode::INSERT_SUBREG, dl, MVT::i64, ImpDef, N, SubReg); 858 return SDValue(Node, 0); 859 } 860 861 /// Check if the given SHL node (\p N), can be used to form an 862 /// extended register for an addressing mode. 863 bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size, 864 bool WantExtend, SDValue &Offset, 865 SDValue &SignExtend) { 866 assert(N.getOpcode() == ISD::SHL && "Invalid opcode."); 867 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); 868 if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue()) 869 return false; 870 871 SDLoc dl(N); 872 if (WantExtend) { 873 AArch64_AM::ShiftExtendType Ext = 874 getExtendTypeForNode(N.getOperand(0), true); 875 if (Ext == AArch64_AM::InvalidShiftExtend) 876 return false; 877 878 Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0)); 879 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl, 880 MVT::i32); 881 } else { 882 Offset = N.getOperand(0); 883 SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32); 884 } 885 886 unsigned LegalShiftVal = Log2_32(Size); 887 unsigned ShiftVal = CSD->getZExtValue(); 888 889 if (ShiftVal != 0 && ShiftVal != LegalShiftVal) 890 return false; 891 892 return isWorthFolding(N); 893 } 894 895 bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size, 896 SDValue &Base, SDValue &Offset, 897 SDValue &SignExtend, 898 SDValue &DoShift) { 899 if (N.getOpcode() != ISD::ADD) 900 return false; 901 SDValue LHS = N.getOperand(0); 902 SDValue RHS = N.getOperand(1); 903 SDLoc dl(N); 904 905 // We don't want to match immediate adds here, because they are better lowered 906 // to the register-immediate addressing modes. 907 if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS)) 908 return false; 909 910 // Check if this particular node is reused in any non-memory related 911 // operation. If yes, do not try to fold this node into the address 912 // computation, since the computation will be kept. 913 const SDNode *Node = N.getNode(); 914 for (SDNode *UI : Node->uses()) { 915 if (!isa<MemSDNode>(*UI)) 916 return false; 917 } 918 919 // Remember if it is worth folding N when it produces extended register. 920 bool IsExtendedRegisterWorthFolding = isWorthFolding(N); 921 922 // Try to match a shifted extend on the RHS. 923 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL && 924 SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) { 925 Base = LHS; 926 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32); 927 return true; 928 } 929 930 // Try to match a shifted extend on the LHS. 931 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL && 932 SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) { 933 Base = RHS; 934 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32); 935 return true; 936 } 937 938 // There was no shift, whatever else we find. 939 DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32); 940 941 AArch64_AM::ShiftExtendType Ext = AArch64_AM::InvalidShiftExtend; 942 // Try to match an unshifted extend on the LHS. 943 if (IsExtendedRegisterWorthFolding && 944 (Ext = getExtendTypeForNode(LHS, true)) != 945 AArch64_AM::InvalidShiftExtend) { 946 Base = RHS; 947 Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0)); 948 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl, 949 MVT::i32); 950 if (isWorthFolding(LHS)) 951 return true; 952 } 953 954 // Try to match an unshifted extend on the RHS. 955 if (IsExtendedRegisterWorthFolding && 956 (Ext = getExtendTypeForNode(RHS, true)) != 957 AArch64_AM::InvalidShiftExtend) { 958 Base = LHS; 959 Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0)); 960 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl, 961 MVT::i32); 962 if (isWorthFolding(RHS)) 963 return true; 964 } 965 966 return false; 967 } 968 969 // Check if the given immediate is preferred by ADD. If an immediate can be 970 // encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be 971 // encoded by one MOVZ, return true. 972 static bool isPreferredADD(int64_t ImmOff) { 973 // Constant in [0x0, 0xfff] can be encoded in ADD. 974 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL) 975 return true; 976 // Check if it can be encoded in an "ADD LSL #12". 977 if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL) 978 // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant. 979 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL && 980 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL; 981 return false; 982 } 983 984 bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size, 985 SDValue &Base, SDValue &Offset, 986 SDValue &SignExtend, 987 SDValue &DoShift) { 988 if (N.getOpcode() != ISD::ADD) 989 return false; 990 SDValue LHS = N.getOperand(0); 991 SDValue RHS = N.getOperand(1); 992 SDLoc DL(N); 993 994 // Check if this particular node is reused in any non-memory related 995 // operation. If yes, do not try to fold this node into the address 996 // computation, since the computation will be kept. 997 const SDNode *Node = N.getNode(); 998 for (SDNode *UI : Node->uses()) { 999 if (!isa<MemSDNode>(*UI)) 1000 return false; 1001 } 1002 1003 // Watch out if RHS is a wide immediate, it can not be selected into 1004 // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into 1005 // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate 1006 // instructions like: 1007 // MOV X0, WideImmediate 1008 // ADD X1, BaseReg, X0 1009 // LDR X2, [X1, 0] 1010 // For such situation, using [BaseReg, XReg] addressing mode can save one 1011 // ADD/SUB: 1012 // MOV X0, WideImmediate 1013 // LDR X2, [BaseReg, X0] 1014 if (isa<ConstantSDNode>(RHS)) { 1015 int64_t ImmOff = (int64_t)cast<ConstantSDNode>(RHS)->getZExtValue(); 1016 unsigned Scale = Log2_32(Size); 1017 // Skip the immediate can be selected by load/store addressing mode. 1018 // Also skip the immediate can be encoded by a single ADD (SUB is also 1019 // checked by using -ImmOff). 1020 if ((ImmOff % Size == 0 && ImmOff >= 0 && ImmOff < (0x1000 << Scale)) || 1021 isPreferredADD(ImmOff) || isPreferredADD(-ImmOff)) 1022 return false; 1023 1024 SDValue Ops[] = { RHS }; 1025 SDNode *MOVI = 1026 CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops); 1027 SDValue MOVIV = SDValue(MOVI, 0); 1028 // This ADD of two X register will be selected into [Reg+Reg] mode. 1029 N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV); 1030 } 1031 1032 // Remember if it is worth folding N when it produces extended register. 1033 bool IsExtendedRegisterWorthFolding = isWorthFolding(N); 1034 1035 // Try to match a shifted extend on the RHS. 1036 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL && 1037 SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) { 1038 Base = LHS; 1039 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32); 1040 return true; 1041 } 1042 1043 // Try to match a shifted extend on the LHS. 1044 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL && 1045 SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) { 1046 Base = RHS; 1047 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32); 1048 return true; 1049 } 1050 1051 // Match any non-shifted, non-extend, non-immediate add expression. 1052 Base = LHS; 1053 Offset = RHS; 1054 SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32); 1055 DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32); 1056 // Reg1 + Reg2 is free: no check needed. 1057 return true; 1058 } 1059 1060 SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) { 1061 static const unsigned RegClassIDs[] = { 1062 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID}; 1063 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1, 1064 AArch64::dsub2, AArch64::dsub3}; 1065 1066 return createTuple(Regs, RegClassIDs, SubRegs); 1067 } 1068 1069 SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) { 1070 static const unsigned RegClassIDs[] = { 1071 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID}; 1072 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1, 1073 AArch64::qsub2, AArch64::qsub3}; 1074 1075 return createTuple(Regs, RegClassIDs, SubRegs); 1076 } 1077 1078 SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs, 1079 const unsigned RegClassIDs[], 1080 const unsigned SubRegs[]) { 1081 // There's no special register-class for a vector-list of 1 element: it's just 1082 // a vector. 1083 if (Regs.size() == 1) 1084 return Regs[0]; 1085 1086 assert(Regs.size() >= 2 && Regs.size() <= 4); 1087 1088 SDLoc DL(Regs[0]); 1089 1090 SmallVector<SDValue, 4> Ops; 1091 1092 // First operand of REG_SEQUENCE is the desired RegClass. 1093 Ops.push_back( 1094 CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32)); 1095 1096 // Then we get pairs of source & subregister-position for the components. 1097 for (unsigned i = 0; i < Regs.size(); ++i) { 1098 Ops.push_back(Regs[i]); 1099 Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32)); 1100 } 1101 1102 SDNode *N = 1103 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops); 1104 return SDValue(N, 0); 1105 } 1106 1107 void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, 1108 bool isExt) { 1109 SDLoc dl(N); 1110 EVT VT = N->getValueType(0); 1111 1112 unsigned ExtOff = isExt; 1113 1114 // Form a REG_SEQUENCE to force register allocation. 1115 unsigned Vec0Off = ExtOff + 1; 1116 SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off, 1117 N->op_begin() + Vec0Off + NumVecs); 1118 SDValue RegSeq = createQTuple(Regs); 1119 1120 SmallVector<SDValue, 6> Ops; 1121 if (isExt) 1122 Ops.push_back(N->getOperand(1)); 1123 Ops.push_back(RegSeq); 1124 Ops.push_back(N->getOperand(NumVecs + ExtOff + 1)); 1125 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops)); 1126 } 1127 1128 bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) { 1129 LoadSDNode *LD = cast<LoadSDNode>(N); 1130 if (LD->isUnindexed()) 1131 return false; 1132 EVT VT = LD->getMemoryVT(); 1133 EVT DstVT = N->getValueType(0); 1134 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1135 bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC; 1136 1137 // We're not doing validity checking here. That was done when checking 1138 // if we should mark the load as indexed or not. We're just selecting 1139 // the right instruction. 1140 unsigned Opcode = 0; 1141 1142 ISD::LoadExtType ExtType = LD->getExtensionType(); 1143 bool InsertTo64 = false; 1144 if (VT == MVT::i64) 1145 Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost; 1146 else if (VT == MVT::i32) { 1147 if (ExtType == ISD::NON_EXTLOAD) 1148 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost; 1149 else if (ExtType == ISD::SEXTLOAD) 1150 Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost; 1151 else { 1152 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost; 1153 InsertTo64 = true; 1154 // The result of the load is only i32. It's the subreg_to_reg that makes 1155 // it into an i64. 1156 DstVT = MVT::i32; 1157 } 1158 } else if (VT == MVT::i16) { 1159 if (ExtType == ISD::SEXTLOAD) { 1160 if (DstVT == MVT::i64) 1161 Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost; 1162 else 1163 Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost; 1164 } else { 1165 Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost; 1166 InsertTo64 = DstVT == MVT::i64; 1167 // The result of the load is only i32. It's the subreg_to_reg that makes 1168 // it into an i64. 1169 DstVT = MVT::i32; 1170 } 1171 } else if (VT == MVT::i8) { 1172 if (ExtType == ISD::SEXTLOAD) { 1173 if (DstVT == MVT::i64) 1174 Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost; 1175 else 1176 Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost; 1177 } else { 1178 Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost; 1179 InsertTo64 = DstVT == MVT::i64; 1180 // The result of the load is only i32. It's the subreg_to_reg that makes 1181 // it into an i64. 1182 DstVT = MVT::i32; 1183 } 1184 } else if (VT == MVT::f16) { 1185 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost; 1186 } else if (VT == MVT::f32) { 1187 Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost; 1188 } else if (VT == MVT::f64 || VT.is64BitVector()) { 1189 Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost; 1190 } else if (VT.is128BitVector()) { 1191 Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost; 1192 } else 1193 return false; 1194 SDValue Chain = LD->getChain(); 1195 SDValue Base = LD->getBasePtr(); 1196 ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset()); 1197 int OffsetVal = (int)OffsetOp->getZExtValue(); 1198 SDLoc dl(N); 1199 SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64); 1200 SDValue Ops[] = { Base, Offset, Chain }; 1201 SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT, 1202 MVT::Other, Ops); 1203 // Either way, we're replacing the node, so tell the caller that. 1204 SDValue LoadedVal = SDValue(Res, 1); 1205 if (InsertTo64) { 1206 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32); 1207 LoadedVal = 1208 SDValue(CurDAG->getMachineNode( 1209 AArch64::SUBREG_TO_REG, dl, MVT::i64, 1210 CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal, 1211 SubReg), 1212 0); 1213 } 1214 1215 ReplaceUses(SDValue(N, 0), LoadedVal); 1216 ReplaceUses(SDValue(N, 1), SDValue(Res, 0)); 1217 ReplaceUses(SDValue(N, 2), SDValue(Res, 2)); 1218 CurDAG->RemoveDeadNode(N); 1219 return true; 1220 } 1221 1222 void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc, 1223 unsigned SubRegIdx) { 1224 SDLoc dl(N); 1225 EVT VT = N->getValueType(0); 1226 SDValue Chain = N->getOperand(0); 1227 1228 SDValue Ops[] = {N->getOperand(2), // Mem operand; 1229 Chain}; 1230 1231 const EVT ResTys[] = {MVT::Untyped, MVT::Other}; 1232 1233 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1234 SDValue SuperReg = SDValue(Ld, 0); 1235 for (unsigned i = 0; i < NumVecs; ++i) 1236 ReplaceUses(SDValue(N, i), 1237 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg)); 1238 1239 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1)); 1240 1241 // Transfer memoperands. 1242 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 1243 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp}); 1244 1245 CurDAG->RemoveDeadNode(N); 1246 } 1247 1248 void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs, 1249 unsigned Opc, unsigned SubRegIdx) { 1250 SDLoc dl(N); 1251 EVT VT = N->getValueType(0); 1252 SDValue Chain = N->getOperand(0); 1253 1254 SDValue Ops[] = {N->getOperand(1), // Mem operand 1255 N->getOperand(2), // Incremental 1256 Chain}; 1257 1258 const EVT ResTys[] = {MVT::i64, // Type of the write back register 1259 MVT::Untyped, MVT::Other}; 1260 1261 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1262 1263 // Update uses of write back register 1264 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0)); 1265 1266 // Update uses of vector list 1267 SDValue SuperReg = SDValue(Ld, 1); 1268 if (NumVecs == 1) 1269 ReplaceUses(SDValue(N, 0), SuperReg); 1270 else 1271 for (unsigned i = 0; i < NumVecs; ++i) 1272 ReplaceUses(SDValue(N, i), 1273 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg)); 1274 1275 // Update the chain 1276 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2)); 1277 CurDAG->RemoveDeadNode(N); 1278 } 1279 1280 void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs, 1281 unsigned Opc) { 1282 SDLoc dl(N); 1283 EVT VT = N->getOperand(2)->getValueType(0); 1284 1285 // Form a REG_SEQUENCE to force register allocation. 1286 bool Is128Bit = VT.getSizeInBits() == 128; 1287 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); 1288 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs); 1289 1290 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)}; 1291 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops); 1292 1293 // Transfer memoperands. 1294 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 1295 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 1296 1297 ReplaceNode(N, St); 1298 } 1299 1300 void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs, 1301 unsigned Opc) { 1302 SDLoc dl(N); 1303 EVT VT = N->getOperand(2)->getValueType(0); 1304 const EVT ResTys[] = {MVT::i64, // Type of the write back register 1305 MVT::Other}; // Type for the Chain 1306 1307 // Form a REG_SEQUENCE to force register allocation. 1308 bool Is128Bit = VT.getSizeInBits() == 128; 1309 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); 1310 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs); 1311 1312 SDValue Ops[] = {RegSeq, 1313 N->getOperand(NumVecs + 1), // base register 1314 N->getOperand(NumVecs + 2), // Incremental 1315 N->getOperand(0)}; // Chain 1316 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1317 1318 ReplaceNode(N, St); 1319 } 1320 1321 namespace { 1322 /// WidenVector - Given a value in the V64 register class, produce the 1323 /// equivalent value in the V128 register class. 1324 class WidenVector { 1325 SelectionDAG &DAG; 1326 1327 public: 1328 WidenVector(SelectionDAG &DAG) : DAG(DAG) {} 1329 1330 SDValue operator()(SDValue V64Reg) { 1331 EVT VT = V64Reg.getValueType(); 1332 unsigned NarrowSize = VT.getVectorNumElements(); 1333 MVT EltTy = VT.getVectorElementType().getSimpleVT(); 1334 MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize); 1335 SDLoc DL(V64Reg); 1336 1337 SDValue Undef = 1338 SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0); 1339 return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg); 1340 } 1341 }; 1342 } // namespace 1343 1344 /// NarrowVector - Given a value in the V128 register class, produce the 1345 /// equivalent value in the V64 register class. 1346 static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) { 1347 EVT VT = V128Reg.getValueType(); 1348 unsigned WideSize = VT.getVectorNumElements(); 1349 MVT EltTy = VT.getVectorElementType().getSimpleVT(); 1350 MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2); 1351 1352 return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy, 1353 V128Reg); 1354 } 1355 1356 void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs, 1357 unsigned Opc) { 1358 SDLoc dl(N); 1359 EVT VT = N->getValueType(0); 1360 bool Narrow = VT.getSizeInBits() == 64; 1361 1362 // Form a REG_SEQUENCE to force register allocation. 1363 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); 1364 1365 if (Narrow) 1366 transform(Regs, Regs.begin(), 1367 WidenVector(*CurDAG)); 1368 1369 SDValue RegSeq = createQTuple(Regs); 1370 1371 const EVT ResTys[] = {MVT::Untyped, MVT::Other}; 1372 1373 unsigned LaneNo = 1374 cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue(); 1375 1376 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64), 1377 N->getOperand(NumVecs + 3), N->getOperand(0)}; 1378 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1379 SDValue SuperReg = SDValue(Ld, 0); 1380 1381 EVT WideVT = RegSeq.getOperand(1)->getValueType(0); 1382 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, 1383 AArch64::qsub2, AArch64::qsub3 }; 1384 for (unsigned i = 0; i < NumVecs; ++i) { 1385 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg); 1386 if (Narrow) 1387 NV = NarrowVector(NV, *CurDAG); 1388 ReplaceUses(SDValue(N, i), NV); 1389 } 1390 1391 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1)); 1392 CurDAG->RemoveDeadNode(N); 1393 } 1394 1395 void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs, 1396 unsigned Opc) { 1397 SDLoc dl(N); 1398 EVT VT = N->getValueType(0); 1399 bool Narrow = VT.getSizeInBits() == 64; 1400 1401 // Form a REG_SEQUENCE to force register allocation. 1402 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); 1403 1404 if (Narrow) 1405 transform(Regs, Regs.begin(), 1406 WidenVector(*CurDAG)); 1407 1408 SDValue RegSeq = createQTuple(Regs); 1409 1410 const EVT ResTys[] = {MVT::i64, // Type of the write back register 1411 RegSeq->getValueType(0), MVT::Other}; 1412 1413 unsigned LaneNo = 1414 cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue(); 1415 1416 SDValue Ops[] = {RegSeq, 1417 CurDAG->getTargetConstant(LaneNo, dl, 1418 MVT::i64), // Lane Number 1419 N->getOperand(NumVecs + 2), // Base register 1420 N->getOperand(NumVecs + 3), // Incremental 1421 N->getOperand(0)}; 1422 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1423 1424 // Update uses of the write back register 1425 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0)); 1426 1427 // Update uses of the vector list 1428 SDValue SuperReg = SDValue(Ld, 1); 1429 if (NumVecs == 1) { 1430 ReplaceUses(SDValue(N, 0), 1431 Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg); 1432 } else { 1433 EVT WideVT = RegSeq.getOperand(1)->getValueType(0); 1434 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, 1435 AArch64::qsub2, AArch64::qsub3 }; 1436 for (unsigned i = 0; i < NumVecs; ++i) { 1437 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, 1438 SuperReg); 1439 if (Narrow) 1440 NV = NarrowVector(NV, *CurDAG); 1441 ReplaceUses(SDValue(N, i), NV); 1442 } 1443 } 1444 1445 // Update the Chain 1446 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2)); 1447 CurDAG->RemoveDeadNode(N); 1448 } 1449 1450 void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs, 1451 unsigned Opc) { 1452 SDLoc dl(N); 1453 EVT VT = N->getOperand(2)->getValueType(0); 1454 bool Narrow = VT.getSizeInBits() == 64; 1455 1456 // Form a REG_SEQUENCE to force register allocation. 1457 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); 1458 1459 if (Narrow) 1460 transform(Regs, Regs.begin(), 1461 WidenVector(*CurDAG)); 1462 1463 SDValue RegSeq = createQTuple(Regs); 1464 1465 unsigned LaneNo = 1466 cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue(); 1467 1468 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64), 1469 N->getOperand(NumVecs + 3), N->getOperand(0)}; 1470 SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops); 1471 1472 // Transfer memoperands. 1473 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 1474 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 1475 1476 ReplaceNode(N, St); 1477 } 1478 1479 void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs, 1480 unsigned Opc) { 1481 SDLoc dl(N); 1482 EVT VT = N->getOperand(2)->getValueType(0); 1483 bool Narrow = VT.getSizeInBits() == 64; 1484 1485 // Form a REG_SEQUENCE to force register allocation. 1486 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); 1487 1488 if (Narrow) 1489 transform(Regs, Regs.begin(), 1490 WidenVector(*CurDAG)); 1491 1492 SDValue RegSeq = createQTuple(Regs); 1493 1494 const EVT ResTys[] = {MVT::i64, // Type of the write back register 1495 MVT::Other}; 1496 1497 unsigned LaneNo = 1498 cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue(); 1499 1500 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64), 1501 N->getOperand(NumVecs + 2), // Base Register 1502 N->getOperand(NumVecs + 3), // Incremental 1503 N->getOperand(0)}; 1504 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1505 1506 // Transfer memoperands. 1507 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 1508 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 1509 1510 ReplaceNode(N, St); 1511 } 1512 1513 static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, 1514 unsigned &Opc, SDValue &Opd0, 1515 unsigned &LSB, unsigned &MSB, 1516 unsigned NumberOfIgnoredLowBits, 1517 bool BiggerPattern) { 1518 assert(N->getOpcode() == ISD::AND && 1519 "N must be a AND operation to call this function"); 1520 1521 EVT VT = N->getValueType(0); 1522 1523 // Here we can test the type of VT and return false when the type does not 1524 // match, but since it is done prior to that call in the current context 1525 // we turned that into an assert to avoid redundant code. 1526 assert((VT == MVT::i32 || VT == MVT::i64) && 1527 "Type checking must have been done before calling this function"); 1528 1529 // FIXME: simplify-demanded-bits in DAGCombine will probably have 1530 // changed the AND node to a 32-bit mask operation. We'll have to 1531 // undo that as part of the transform here if we want to catch all 1532 // the opportunities. 1533 // Currently the NumberOfIgnoredLowBits argument helps to recover 1534 // form these situations when matching bigger pattern (bitfield insert). 1535 1536 // For unsigned extracts, check for a shift right and mask 1537 uint64_t AndImm = 0; 1538 if (!isOpcWithIntImmediate(N, ISD::AND, AndImm)) 1539 return false; 1540 1541 const SDNode *Op0 = N->getOperand(0).getNode(); 1542 1543 // Because of simplify-demanded-bits in DAGCombine, the mask may have been 1544 // simplified. Try to undo that 1545 AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits); 1546 1547 // The immediate is a mask of the low bits iff imm & (imm+1) == 0 1548 if (AndImm & (AndImm + 1)) 1549 return false; 1550 1551 bool ClampMSB = false; 1552 uint64_t SrlImm = 0; 1553 // Handle the SRL + ANY_EXTEND case. 1554 if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND && 1555 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) { 1556 // Extend the incoming operand of the SRL to 64-bit. 1557 Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0)); 1558 // Make sure to clamp the MSB so that we preserve the semantics of the 1559 // original operations. 1560 ClampMSB = true; 1561 } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE && 1562 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, 1563 SrlImm)) { 1564 // If the shift result was truncated, we can still combine them. 1565 Opd0 = Op0->getOperand(0).getOperand(0); 1566 1567 // Use the type of SRL node. 1568 VT = Opd0->getValueType(0); 1569 } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) { 1570 Opd0 = Op0->getOperand(0); 1571 } else if (BiggerPattern) { 1572 // Let's pretend a 0 shift right has been performed. 1573 // The resulting code will be at least as good as the original one 1574 // plus it may expose more opportunities for bitfield insert pattern. 1575 // FIXME: Currently we limit this to the bigger pattern, because 1576 // some optimizations expect AND and not UBFM. 1577 Opd0 = N->getOperand(0); 1578 } else 1579 return false; 1580 1581 // Bail out on large immediates. This happens when no proper 1582 // combining/constant folding was performed. 1583 if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) { 1584 LLVM_DEBUG( 1585 (dbgs() << N 1586 << ": Found large shift immediate, this should not happen\n")); 1587 return false; 1588 } 1589 1590 LSB = SrlImm; 1591 MSB = SrlImm + (VT == MVT::i32 ? countTrailingOnes<uint32_t>(AndImm) 1592 : countTrailingOnes<uint64_t>(AndImm)) - 1593 1; 1594 if (ClampMSB) 1595 // Since we're moving the extend before the right shift operation, we need 1596 // to clamp the MSB to make sure we don't shift in undefined bits instead of 1597 // the zeros which would get shifted in with the original right shift 1598 // operation. 1599 MSB = MSB > 31 ? 31 : MSB; 1600 1601 Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri; 1602 return true; 1603 } 1604 1605 static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc, 1606 SDValue &Opd0, unsigned &Immr, 1607 unsigned &Imms) { 1608 assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG); 1609 1610 EVT VT = N->getValueType(0); 1611 unsigned BitWidth = VT.getSizeInBits(); 1612 assert((VT == MVT::i32 || VT == MVT::i64) && 1613 "Type checking must have been done before calling this function"); 1614 1615 SDValue Op = N->getOperand(0); 1616 if (Op->getOpcode() == ISD::TRUNCATE) { 1617 Op = Op->getOperand(0); 1618 VT = Op->getValueType(0); 1619 BitWidth = VT.getSizeInBits(); 1620 } 1621 1622 uint64_t ShiftImm; 1623 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) && 1624 !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm)) 1625 return false; 1626 1627 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits(); 1628 if (ShiftImm + Width > BitWidth) 1629 return false; 1630 1631 Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri; 1632 Opd0 = Op.getOperand(0); 1633 Immr = ShiftImm; 1634 Imms = ShiftImm + Width - 1; 1635 return true; 1636 } 1637 1638 static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc, 1639 SDValue &Opd0, unsigned &LSB, 1640 unsigned &MSB) { 1641 // We are looking for the following pattern which basically extracts several 1642 // continuous bits from the source value and places it from the LSB of the 1643 // destination value, all other bits of the destination value or set to zero: 1644 // 1645 // Value2 = AND Value, MaskImm 1646 // SRL Value2, ShiftImm 1647 // 1648 // with MaskImm >> ShiftImm to search for the bit width. 1649 // 1650 // This gets selected into a single UBFM: 1651 // 1652 // UBFM Value, ShiftImm, BitWide + SrlImm -1 1653 // 1654 1655 if (N->getOpcode() != ISD::SRL) 1656 return false; 1657 1658 uint64_t AndMask = 0; 1659 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask)) 1660 return false; 1661 1662 Opd0 = N->getOperand(0).getOperand(0); 1663 1664 uint64_t SrlImm = 0; 1665 if (!isIntImmediate(N->getOperand(1), SrlImm)) 1666 return false; 1667 1668 // Check whether we really have several bits extract here. 1669 unsigned BitWide = 64 - countLeadingOnes(~(AndMask >> SrlImm)); 1670 if (BitWide && isMask_64(AndMask >> SrlImm)) { 1671 if (N->getValueType(0) == MVT::i32) 1672 Opc = AArch64::UBFMWri; 1673 else 1674 Opc = AArch64::UBFMXri; 1675 1676 LSB = SrlImm; 1677 MSB = BitWide + SrlImm - 1; 1678 return true; 1679 } 1680 1681 return false; 1682 } 1683 1684 static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, 1685 unsigned &Immr, unsigned &Imms, 1686 bool BiggerPattern) { 1687 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) && 1688 "N must be a SHR/SRA operation to call this function"); 1689 1690 EVT VT = N->getValueType(0); 1691 1692 // Here we can test the type of VT and return false when the type does not 1693 // match, but since it is done prior to that call in the current context 1694 // we turned that into an assert to avoid redundant code. 1695 assert((VT == MVT::i32 || VT == MVT::i64) && 1696 "Type checking must have been done before calling this function"); 1697 1698 // Check for AND + SRL doing several bits extract. 1699 if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms)) 1700 return true; 1701 1702 // We're looking for a shift of a shift. 1703 uint64_t ShlImm = 0; 1704 uint64_t TruncBits = 0; 1705 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) { 1706 Opd0 = N->getOperand(0).getOperand(0); 1707 } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL && 1708 N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) { 1709 // We are looking for a shift of truncate. Truncate from i64 to i32 could 1710 // be considered as setting high 32 bits as zero. Our strategy here is to 1711 // always generate 64bit UBFM. This consistency will help the CSE pass 1712 // later find more redundancy. 1713 Opd0 = N->getOperand(0).getOperand(0); 1714 TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits(); 1715 VT = Opd0.getValueType(); 1716 assert(VT == MVT::i64 && "the promoted type should be i64"); 1717 } else if (BiggerPattern) { 1718 // Let's pretend a 0 shift left has been performed. 1719 // FIXME: Currently we limit this to the bigger pattern case, 1720 // because some optimizations expect AND and not UBFM 1721 Opd0 = N->getOperand(0); 1722 } else 1723 return false; 1724 1725 // Missing combines/constant folding may have left us with strange 1726 // constants. 1727 if (ShlImm >= VT.getSizeInBits()) { 1728 LLVM_DEBUG( 1729 (dbgs() << N 1730 << ": Found large shift immediate, this should not happen\n")); 1731 return false; 1732 } 1733 1734 uint64_t SrlImm = 0; 1735 if (!isIntImmediate(N->getOperand(1), SrlImm)) 1736 return false; 1737 1738 assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() && 1739 "bad amount in shift node!"); 1740 int immr = SrlImm - ShlImm; 1741 Immr = immr < 0 ? immr + VT.getSizeInBits() : immr; 1742 Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1; 1743 // SRA requires a signed extraction 1744 if (VT == MVT::i32) 1745 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri; 1746 else 1747 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri; 1748 return true; 1749 } 1750 1751 bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) { 1752 assert(N->getOpcode() == ISD::SIGN_EXTEND); 1753 1754 EVT VT = N->getValueType(0); 1755 EVT NarrowVT = N->getOperand(0)->getValueType(0); 1756 if (VT != MVT::i64 || NarrowVT != MVT::i32) 1757 return false; 1758 1759 uint64_t ShiftImm; 1760 SDValue Op = N->getOperand(0); 1761 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm)) 1762 return false; 1763 1764 SDLoc dl(N); 1765 // Extend the incoming operand of the shift to 64-bits. 1766 SDValue Opd0 = Widen(CurDAG, Op.getOperand(0)); 1767 unsigned Immr = ShiftImm; 1768 unsigned Imms = NarrowVT.getSizeInBits() - 1; 1769 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT), 1770 CurDAG->getTargetConstant(Imms, dl, VT)}; 1771 CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops); 1772 return true; 1773 } 1774 1775 static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, 1776 SDValue &Opd0, unsigned &Immr, unsigned &Imms, 1777 unsigned NumberOfIgnoredLowBits = 0, 1778 bool BiggerPattern = false) { 1779 if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64) 1780 return false; 1781 1782 switch (N->getOpcode()) { 1783 default: 1784 if (!N->isMachineOpcode()) 1785 return false; 1786 break; 1787 case ISD::AND: 1788 return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms, 1789 NumberOfIgnoredLowBits, BiggerPattern); 1790 case ISD::SRL: 1791 case ISD::SRA: 1792 return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern); 1793 1794 case ISD::SIGN_EXTEND_INREG: 1795 return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms); 1796 } 1797 1798 unsigned NOpc = N->getMachineOpcode(); 1799 switch (NOpc) { 1800 default: 1801 return false; 1802 case AArch64::SBFMWri: 1803 case AArch64::UBFMWri: 1804 case AArch64::SBFMXri: 1805 case AArch64::UBFMXri: 1806 Opc = NOpc; 1807 Opd0 = N->getOperand(0); 1808 Immr = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue(); 1809 Imms = cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue(); 1810 return true; 1811 } 1812 // Unreachable 1813 return false; 1814 } 1815 1816 bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) { 1817 unsigned Opc, Immr, Imms; 1818 SDValue Opd0; 1819 if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms)) 1820 return false; 1821 1822 EVT VT = N->getValueType(0); 1823 SDLoc dl(N); 1824 1825 // If the bit extract operation is 64bit but the original type is 32bit, we 1826 // need to add one EXTRACT_SUBREG. 1827 if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) { 1828 SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64), 1829 CurDAG->getTargetConstant(Imms, dl, MVT::i64)}; 1830 1831 SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64); 1832 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32); 1833 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, 1834 MVT::i32, SDValue(BFM, 0), SubReg)); 1835 return true; 1836 } 1837 1838 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT), 1839 CurDAG->getTargetConstant(Imms, dl, VT)}; 1840 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 1841 return true; 1842 } 1843 1844 /// Does DstMask form a complementary pair with the mask provided by 1845 /// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking, 1846 /// this asks whether DstMask zeroes precisely those bits that will be set by 1847 /// the other half. 1848 static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted, 1849 unsigned NumberOfIgnoredHighBits, EVT VT) { 1850 assert((VT == MVT::i32 || VT == MVT::i64) && 1851 "i32 or i64 mask type expected!"); 1852 unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits; 1853 1854 APInt SignificantDstMask = APInt(BitWidth, DstMask); 1855 APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth); 1856 1857 return (SignificantDstMask & SignificantBitsToBeInserted) == 0 && 1858 (SignificantDstMask | SignificantBitsToBeInserted).isAllOnesValue(); 1859 } 1860 1861 // Look for bits that will be useful for later uses. 1862 // A bit is consider useless as soon as it is dropped and never used 1863 // before it as been dropped. 1864 // E.g., looking for useful bit of x 1865 // 1. y = x & 0x7 1866 // 2. z = y >> 2 1867 // After #1, x useful bits are 0x7, then the useful bits of x, live through 1868 // y. 1869 // After #2, the useful bits of x are 0x4. 1870 // However, if x is used on an unpredicatable instruction, then all its bits 1871 // are useful. 1872 // E.g. 1873 // 1. y = x & 0x7 1874 // 2. z = y >> 2 1875 // 3. str x, [@x] 1876 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0); 1877 1878 static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits, 1879 unsigned Depth) { 1880 uint64_t Imm = 1881 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue(); 1882 Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth()); 1883 UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm); 1884 getUsefulBits(Op, UsefulBits, Depth + 1); 1885 } 1886 1887 static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, 1888 uint64_t Imm, uint64_t MSB, 1889 unsigned Depth) { 1890 // inherit the bitwidth value 1891 APInt OpUsefulBits(UsefulBits); 1892 OpUsefulBits = 1; 1893 1894 if (MSB >= Imm) { 1895 OpUsefulBits <<= MSB - Imm + 1; 1896 --OpUsefulBits; 1897 // The interesting part will be in the lower part of the result 1898 getUsefulBits(Op, OpUsefulBits, Depth + 1); 1899 // The interesting part was starting at Imm in the argument 1900 OpUsefulBits <<= Imm; 1901 } else { 1902 OpUsefulBits <<= MSB + 1; 1903 --OpUsefulBits; 1904 // The interesting part will be shifted in the result 1905 OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm; 1906 getUsefulBits(Op, OpUsefulBits, Depth + 1); 1907 // The interesting part was at zero in the argument 1908 OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm); 1909 } 1910 1911 UsefulBits &= OpUsefulBits; 1912 } 1913 1914 static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits, 1915 unsigned Depth) { 1916 uint64_t Imm = 1917 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue(); 1918 uint64_t MSB = 1919 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue(); 1920 1921 getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth); 1922 } 1923 1924 static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, 1925 unsigned Depth) { 1926 uint64_t ShiftTypeAndValue = 1927 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue(); 1928 APInt Mask(UsefulBits); 1929 Mask.clearAllBits(); 1930 Mask.flipAllBits(); 1931 1932 if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) { 1933 // Shift Left 1934 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue); 1935 Mask <<= ShiftAmt; 1936 getUsefulBits(Op, Mask, Depth + 1); 1937 Mask.lshrInPlace(ShiftAmt); 1938 } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) { 1939 // Shift Right 1940 // We do not handle AArch64_AM::ASR, because the sign will change the 1941 // number of useful bits 1942 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue); 1943 Mask.lshrInPlace(ShiftAmt); 1944 getUsefulBits(Op, Mask, Depth + 1); 1945 Mask <<= ShiftAmt; 1946 } else 1947 return; 1948 1949 UsefulBits &= Mask; 1950 } 1951 1952 static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, 1953 unsigned Depth) { 1954 uint64_t Imm = 1955 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue(); 1956 uint64_t MSB = 1957 cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue(); 1958 1959 APInt OpUsefulBits(UsefulBits); 1960 OpUsefulBits = 1; 1961 1962 APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0); 1963 ResultUsefulBits.flipAllBits(); 1964 APInt Mask(UsefulBits.getBitWidth(), 0); 1965 1966 getUsefulBits(Op, ResultUsefulBits, Depth + 1); 1967 1968 if (MSB >= Imm) { 1969 // The instruction is a BFXIL. 1970 uint64_t Width = MSB - Imm + 1; 1971 uint64_t LSB = Imm; 1972 1973 OpUsefulBits <<= Width; 1974 --OpUsefulBits; 1975 1976 if (Op.getOperand(1) == Orig) { 1977 // Copy the low bits from the result to bits starting from LSB. 1978 Mask = ResultUsefulBits & OpUsefulBits; 1979 Mask <<= LSB; 1980 } 1981 1982 if (Op.getOperand(0) == Orig) 1983 // Bits starting from LSB in the input contribute to the result. 1984 Mask |= (ResultUsefulBits & ~OpUsefulBits); 1985 } else { 1986 // The instruction is a BFI. 1987 uint64_t Width = MSB + 1; 1988 uint64_t LSB = UsefulBits.getBitWidth() - Imm; 1989 1990 OpUsefulBits <<= Width; 1991 --OpUsefulBits; 1992 OpUsefulBits <<= LSB; 1993 1994 if (Op.getOperand(1) == Orig) { 1995 // Copy the bits from the result to the zero bits. 1996 Mask = ResultUsefulBits & OpUsefulBits; 1997 Mask.lshrInPlace(LSB); 1998 } 1999 2000 if (Op.getOperand(0) == Orig) 2001 Mask |= (ResultUsefulBits & ~OpUsefulBits); 2002 } 2003 2004 UsefulBits &= Mask; 2005 } 2006 2007 static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, 2008 SDValue Orig, unsigned Depth) { 2009 2010 // Users of this node should have already been instruction selected 2011 // FIXME: Can we turn that into an assert? 2012 if (!UserNode->isMachineOpcode()) 2013 return; 2014 2015 switch (UserNode->getMachineOpcode()) { 2016 default: 2017 return; 2018 case AArch64::ANDSWri: 2019 case AArch64::ANDSXri: 2020 case AArch64::ANDWri: 2021 case AArch64::ANDXri: 2022 // We increment Depth only when we call the getUsefulBits 2023 return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits, 2024 Depth); 2025 case AArch64::UBFMWri: 2026 case AArch64::UBFMXri: 2027 return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth); 2028 2029 case AArch64::ORRWrs: 2030 case AArch64::ORRXrs: 2031 if (UserNode->getOperand(1) != Orig) 2032 return; 2033 return getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits, 2034 Depth); 2035 case AArch64::BFMWri: 2036 case AArch64::BFMXri: 2037 return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth); 2038 2039 case AArch64::STRBBui: 2040 case AArch64::STURBBi: 2041 if (UserNode->getOperand(0) != Orig) 2042 return; 2043 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff); 2044 return; 2045 2046 case AArch64::STRHHui: 2047 case AArch64::STURHHi: 2048 if (UserNode->getOperand(0) != Orig) 2049 return; 2050 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff); 2051 return; 2052 } 2053 } 2054 2055 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) { 2056 if (Depth >= SelectionDAG::MaxRecursionDepth) 2057 return; 2058 // Initialize UsefulBits 2059 if (!Depth) { 2060 unsigned Bitwidth = Op.getScalarValueSizeInBits(); 2061 // At the beginning, assume every produced bits is useful 2062 UsefulBits = APInt(Bitwidth, 0); 2063 UsefulBits.flipAllBits(); 2064 } 2065 APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0); 2066 2067 for (SDNode *Node : Op.getNode()->uses()) { 2068 // A use cannot produce useful bits 2069 APInt UsefulBitsForUse = APInt(UsefulBits); 2070 getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth); 2071 UsersUsefulBits |= UsefulBitsForUse; 2072 } 2073 // UsefulBits contains the produced bits that are meaningful for the 2074 // current definition, thus a user cannot make a bit meaningful at 2075 // this point 2076 UsefulBits &= UsersUsefulBits; 2077 } 2078 2079 /// Create a machine node performing a notional SHL of Op by ShlAmount. If 2080 /// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is 2081 /// 0, return Op unchanged. 2082 static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) { 2083 if (ShlAmount == 0) 2084 return Op; 2085 2086 EVT VT = Op.getValueType(); 2087 SDLoc dl(Op); 2088 unsigned BitWidth = VT.getSizeInBits(); 2089 unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri; 2090 2091 SDNode *ShiftNode; 2092 if (ShlAmount > 0) { 2093 // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt 2094 ShiftNode = CurDAG->getMachineNode( 2095 UBFMOpc, dl, VT, Op, 2096 CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT), 2097 CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT)); 2098 } else { 2099 // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1 2100 assert(ShlAmount < 0 && "expected right shift"); 2101 int ShrAmount = -ShlAmount; 2102 ShiftNode = CurDAG->getMachineNode( 2103 UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT), 2104 CurDAG->getTargetConstant(BitWidth - 1, dl, VT)); 2105 } 2106 2107 return SDValue(ShiftNode, 0); 2108 } 2109 2110 /// Does this tree qualify as an attempt to move a bitfield into position, 2111 /// essentially "(and (shl VAL, N), Mask)". 2112 static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, 2113 bool BiggerPattern, 2114 SDValue &Src, int &ShiftAmount, 2115 int &MaskWidth) { 2116 EVT VT = Op.getValueType(); 2117 unsigned BitWidth = VT.getSizeInBits(); 2118 (void)BitWidth; 2119 assert(BitWidth == 32 || BitWidth == 64); 2120 2121 KnownBits Known = CurDAG->computeKnownBits(Op); 2122 2123 // Non-zero in the sense that they're not provably zero, which is the key 2124 // point if we want to use this value 2125 uint64_t NonZeroBits = (~Known.Zero).getZExtValue(); 2126 2127 // Discard a constant AND mask if present. It's safe because the node will 2128 // already have been factored into the computeKnownBits calculation above. 2129 uint64_t AndImm; 2130 if (isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm)) { 2131 assert((~APInt(BitWidth, AndImm) & ~Known.Zero) == 0); 2132 Op = Op.getOperand(0); 2133 } 2134 2135 // Don't match if the SHL has more than one use, since then we'll end up 2136 // generating SHL+UBFIZ instead of just keeping SHL+AND. 2137 if (!BiggerPattern && !Op.hasOneUse()) 2138 return false; 2139 2140 uint64_t ShlImm; 2141 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm)) 2142 return false; 2143 Op = Op.getOperand(0); 2144 2145 if (!isShiftedMask_64(NonZeroBits)) 2146 return false; 2147 2148 ShiftAmount = countTrailingZeros(NonZeroBits); 2149 MaskWidth = countTrailingOnes(NonZeroBits >> ShiftAmount); 2150 2151 // BFI encompasses sufficiently many nodes that it's worth inserting an extra 2152 // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL 2153 // amount. BiggerPattern is true when this pattern is being matched for BFI, 2154 // BiggerPattern is false when this pattern is being matched for UBFIZ, in 2155 // which case it is not profitable to insert an extra shift. 2156 if (ShlImm - ShiftAmount != 0 && !BiggerPattern) 2157 return false; 2158 Src = getLeftShift(CurDAG, Op, ShlImm - ShiftAmount); 2159 2160 return true; 2161 } 2162 2163 static bool isShiftedMask(uint64_t Mask, EVT VT) { 2164 assert(VT == MVT::i32 || VT == MVT::i64); 2165 if (VT == MVT::i32) 2166 return isShiftedMask_32(Mask); 2167 return isShiftedMask_64(Mask); 2168 } 2169 2170 // Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being 2171 // inserted only sets known zero bits. 2172 static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG) { 2173 assert(N->getOpcode() == ISD::OR && "Expect a OR operation"); 2174 2175 EVT VT = N->getValueType(0); 2176 if (VT != MVT::i32 && VT != MVT::i64) 2177 return false; 2178 2179 unsigned BitWidth = VT.getSizeInBits(); 2180 2181 uint64_t OrImm; 2182 if (!isOpcWithIntImmediate(N, ISD::OR, OrImm)) 2183 return false; 2184 2185 // Skip this transformation if the ORR immediate can be encoded in the ORR. 2186 // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely 2187 // performance neutral. 2188 if (AArch64_AM::isLogicalImmediate(OrImm, BitWidth)) 2189 return false; 2190 2191 uint64_t MaskImm; 2192 SDValue And = N->getOperand(0); 2193 // Must be a single use AND with an immediate operand. 2194 if (!And.hasOneUse() || 2195 !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm)) 2196 return false; 2197 2198 // Compute the Known Zero for the AND as this allows us to catch more general 2199 // cases than just looking for AND with imm. 2200 KnownBits Known = CurDAG->computeKnownBits(And); 2201 2202 // Non-zero in the sense that they're not provably zero, which is the key 2203 // point if we want to use this value. 2204 uint64_t NotKnownZero = (~Known.Zero).getZExtValue(); 2205 2206 // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00). 2207 if (!isShiftedMask(Known.Zero.getZExtValue(), VT)) 2208 return false; 2209 2210 // The bits being inserted must only set those bits that are known to be zero. 2211 if ((OrImm & NotKnownZero) != 0) { 2212 // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't 2213 // currently handle this case. 2214 return false; 2215 } 2216 2217 // BFI/BFXIL dst, src, #lsb, #width. 2218 int LSB = countTrailingOnes(NotKnownZero); 2219 int Width = BitWidth - APInt(BitWidth, NotKnownZero).countPopulation(); 2220 2221 // BFI/BFXIL is an alias of BFM, so translate to BFM operands. 2222 unsigned ImmR = (BitWidth - LSB) % BitWidth; 2223 unsigned ImmS = Width - 1; 2224 2225 // If we're creating a BFI instruction avoid cases where we need more 2226 // instructions to materialize the BFI constant as compared to the original 2227 // ORR. A BFXIL will use the same constant as the original ORR, so the code 2228 // should be no worse in this case. 2229 bool IsBFI = LSB != 0; 2230 uint64_t BFIImm = OrImm >> LSB; 2231 if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) { 2232 // We have a BFI instruction and we know the constant can't be materialized 2233 // with a ORR-immediate with the zero register. 2234 unsigned OrChunks = 0, BFIChunks = 0; 2235 for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) { 2236 if (((OrImm >> Shift) & 0xFFFF) != 0) 2237 ++OrChunks; 2238 if (((BFIImm >> Shift) & 0xFFFF) != 0) 2239 ++BFIChunks; 2240 } 2241 if (BFIChunks > OrChunks) 2242 return false; 2243 } 2244 2245 // Materialize the constant to be inserted. 2246 SDLoc DL(N); 2247 unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm; 2248 SDNode *MOVI = CurDAG->getMachineNode( 2249 MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT)); 2250 2251 // Create the BFI/BFXIL instruction. 2252 SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0), 2253 CurDAG->getTargetConstant(ImmR, DL, VT), 2254 CurDAG->getTargetConstant(ImmS, DL, VT)}; 2255 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri; 2256 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 2257 return true; 2258 } 2259 2260 static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits, 2261 SelectionDAG *CurDAG) { 2262 assert(N->getOpcode() == ISD::OR && "Expect a OR operation"); 2263 2264 EVT VT = N->getValueType(0); 2265 if (VT != MVT::i32 && VT != MVT::i64) 2266 return false; 2267 2268 unsigned BitWidth = VT.getSizeInBits(); 2269 2270 // Because of simplify-demanded-bits in DAGCombine, involved masks may not 2271 // have the expected shape. Try to undo that. 2272 2273 unsigned NumberOfIgnoredLowBits = UsefulBits.countTrailingZeros(); 2274 unsigned NumberOfIgnoredHighBits = UsefulBits.countLeadingZeros(); 2275 2276 // Given a OR operation, check if we have the following pattern 2277 // ubfm c, b, imm, imm2 (or something that does the same jobs, see 2278 // isBitfieldExtractOp) 2279 // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and 2280 // countTrailingZeros(mask2) == imm2 - imm + 1 2281 // f = d | c 2282 // if yes, replace the OR instruction with: 2283 // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2 2284 2285 // OR is commutative, check all combinations of operand order and values of 2286 // BiggerPattern, i.e. 2287 // Opd0, Opd1, BiggerPattern=false 2288 // Opd1, Opd0, BiggerPattern=false 2289 // Opd0, Opd1, BiggerPattern=true 2290 // Opd1, Opd0, BiggerPattern=true 2291 // Several of these combinations may match, so check with BiggerPattern=false 2292 // first since that will produce better results by matching more instructions 2293 // and/or inserting fewer extra instructions. 2294 for (int I = 0; I < 4; ++I) { 2295 2296 SDValue Dst, Src; 2297 unsigned ImmR, ImmS; 2298 bool BiggerPattern = I / 2; 2299 SDValue OrOpd0Val = N->getOperand(I % 2); 2300 SDNode *OrOpd0 = OrOpd0Val.getNode(); 2301 SDValue OrOpd1Val = N->getOperand((I + 1) % 2); 2302 SDNode *OrOpd1 = OrOpd1Val.getNode(); 2303 2304 unsigned BFXOpc; 2305 int DstLSB, Width; 2306 if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS, 2307 NumberOfIgnoredLowBits, BiggerPattern)) { 2308 // Check that the returned opcode is compatible with the pattern, 2309 // i.e., same type and zero extended (U and not S) 2310 if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) || 2311 (BFXOpc != AArch64::UBFMWri && VT == MVT::i32)) 2312 continue; 2313 2314 // Compute the width of the bitfield insertion 2315 DstLSB = 0; 2316 Width = ImmS - ImmR + 1; 2317 // FIXME: This constraint is to catch bitfield insertion we may 2318 // want to widen the pattern if we want to grab general bitfied 2319 // move case 2320 if (Width <= 0) 2321 continue; 2322 2323 // If the mask on the insertee is correct, we have a BFXIL operation. We 2324 // can share the ImmR and ImmS values from the already-computed UBFM. 2325 } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val, 2326 BiggerPattern, 2327 Src, DstLSB, Width)) { 2328 ImmR = (BitWidth - DstLSB) % BitWidth; 2329 ImmS = Width - 1; 2330 } else 2331 continue; 2332 2333 // Check the second part of the pattern 2334 EVT VT = OrOpd1Val.getValueType(); 2335 assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand"); 2336 2337 // Compute the Known Zero for the candidate of the first operand. 2338 // This allows to catch more general case than just looking for 2339 // AND with imm. Indeed, simplify-demanded-bits may have removed 2340 // the AND instruction because it proves it was useless. 2341 KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val); 2342 2343 // Check if there is enough room for the second operand to appear 2344 // in the first one 2345 APInt BitsToBeInserted = 2346 APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width); 2347 2348 if ((BitsToBeInserted & ~Known.Zero) != 0) 2349 continue; 2350 2351 // Set the first operand 2352 uint64_t Imm; 2353 if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) && 2354 isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT)) 2355 // In that case, we can eliminate the AND 2356 Dst = OrOpd1->getOperand(0); 2357 else 2358 // Maybe the AND has been removed by simplify-demanded-bits 2359 // or is useful because it discards more bits 2360 Dst = OrOpd1Val; 2361 2362 // both parts match 2363 SDLoc DL(N); 2364 SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT), 2365 CurDAG->getTargetConstant(ImmS, DL, VT)}; 2366 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri; 2367 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 2368 return true; 2369 } 2370 2371 // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff 2372 // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted 2373 // mask (e.g., 0x000ffff0). 2374 uint64_t Mask0Imm, Mask1Imm; 2375 SDValue And0 = N->getOperand(0); 2376 SDValue And1 = N->getOperand(1); 2377 if (And0.hasOneUse() && And1.hasOneUse() && 2378 isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) && 2379 isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) && 2380 APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) && 2381 (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) { 2382 2383 // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm), 2384 // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the 2385 // bits to be inserted. 2386 if (isShiftedMask(Mask0Imm, VT)) { 2387 std::swap(And0, And1); 2388 std::swap(Mask0Imm, Mask1Imm); 2389 } 2390 2391 SDValue Src = And1->getOperand(0); 2392 SDValue Dst = And0->getOperand(0); 2393 unsigned LSB = countTrailingZeros(Mask1Imm); 2394 int Width = BitWidth - APInt(BitWidth, Mask0Imm).countPopulation(); 2395 2396 // The BFXIL inserts the low-order bits from a source register, so right 2397 // shift the needed bits into place. 2398 SDLoc DL(N); 2399 unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri; 2400 SDNode *LSR = CurDAG->getMachineNode( 2401 ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LSB, DL, VT), 2402 CurDAG->getTargetConstant(BitWidth - 1, DL, VT)); 2403 2404 // BFXIL is an alias of BFM, so translate to BFM operands. 2405 unsigned ImmR = (BitWidth - LSB) % BitWidth; 2406 unsigned ImmS = Width - 1; 2407 2408 // Create the BFXIL instruction. 2409 SDValue Ops[] = {Dst, SDValue(LSR, 0), 2410 CurDAG->getTargetConstant(ImmR, DL, VT), 2411 CurDAG->getTargetConstant(ImmS, DL, VT)}; 2412 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri; 2413 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 2414 return true; 2415 } 2416 2417 return false; 2418 } 2419 2420 bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) { 2421 if (N->getOpcode() != ISD::OR) 2422 return false; 2423 2424 APInt NUsefulBits; 2425 getUsefulBits(SDValue(N, 0), NUsefulBits); 2426 2427 // If all bits are not useful, just return UNDEF. 2428 if (!NUsefulBits) { 2429 CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0)); 2430 return true; 2431 } 2432 2433 if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG)) 2434 return true; 2435 2436 return tryBitfieldInsertOpFromOrAndImm(N, CurDAG); 2437 } 2438 2439 /// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the 2440 /// equivalent of a left shift by a constant amount followed by an and masking 2441 /// out a contiguous set of bits. 2442 bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) { 2443 if (N->getOpcode() != ISD::AND) 2444 return false; 2445 2446 EVT VT = N->getValueType(0); 2447 if (VT != MVT::i32 && VT != MVT::i64) 2448 return false; 2449 2450 SDValue Op0; 2451 int DstLSB, Width; 2452 if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false, 2453 Op0, DstLSB, Width)) 2454 return false; 2455 2456 // ImmR is the rotate right amount. 2457 unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits(); 2458 // ImmS is the most significant bit of the source to be moved. 2459 unsigned ImmS = Width - 1; 2460 2461 SDLoc DL(N); 2462 SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT), 2463 CurDAG->getTargetConstant(ImmS, DL, VT)}; 2464 unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri; 2465 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 2466 return true; 2467 } 2468 2469 /// tryShiftAmountMod - Take advantage of built-in mod of shift amount in 2470 /// variable shift/rotate instructions. 2471 bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) { 2472 EVT VT = N->getValueType(0); 2473 2474 unsigned Opc; 2475 switch (N->getOpcode()) { 2476 case ISD::ROTR: 2477 Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr; 2478 break; 2479 case ISD::SHL: 2480 Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr; 2481 break; 2482 case ISD::SRL: 2483 Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr; 2484 break; 2485 case ISD::SRA: 2486 Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr; 2487 break; 2488 default: 2489 return false; 2490 } 2491 2492 uint64_t Size; 2493 uint64_t Bits; 2494 if (VT == MVT::i32) { 2495 Bits = 5; 2496 Size = 32; 2497 } else if (VT == MVT::i64) { 2498 Bits = 6; 2499 Size = 64; 2500 } else 2501 return false; 2502 2503 SDValue ShiftAmt = N->getOperand(1); 2504 SDLoc DL(N); 2505 SDValue NewShiftAmt; 2506 2507 // Skip over an extend of the shift amount. 2508 if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND || 2509 ShiftAmt->getOpcode() == ISD::ANY_EXTEND) 2510 ShiftAmt = ShiftAmt->getOperand(0); 2511 2512 if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) { 2513 SDValue Add0 = ShiftAmt->getOperand(0); 2514 SDValue Add1 = ShiftAmt->getOperand(1); 2515 uint64_t Add0Imm; 2516 uint64_t Add1Imm; 2517 // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X 2518 // to avoid the ADD/SUB. 2519 if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0)) 2520 NewShiftAmt = Add0; 2521 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to 2522 // generate a NEG instead of a SUB of a constant. 2523 else if (ShiftAmt->getOpcode() == ISD::SUB && 2524 isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 && 2525 (Add0Imm % Size == 0)) { 2526 unsigned NegOpc; 2527 unsigned ZeroReg; 2528 EVT SubVT = ShiftAmt->getValueType(0); 2529 if (SubVT == MVT::i32) { 2530 NegOpc = AArch64::SUBWrr; 2531 ZeroReg = AArch64::WZR; 2532 } else { 2533 assert(SubVT == MVT::i64); 2534 NegOpc = AArch64::SUBXrr; 2535 ZeroReg = AArch64::XZR; 2536 } 2537 SDValue Zero = 2538 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT); 2539 MachineSDNode *Neg = 2540 CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1); 2541 NewShiftAmt = SDValue(Neg, 0); 2542 } else 2543 return false; 2544 } else { 2545 // If the shift amount is masked with an AND, check that the mask covers the 2546 // bits that are implicitly ANDed off by the above opcodes and if so, skip 2547 // the AND. 2548 uint64_t MaskImm; 2549 if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm)) 2550 return false; 2551 2552 if (countTrailingOnes(MaskImm) < Bits) 2553 return false; 2554 2555 NewShiftAmt = ShiftAmt->getOperand(0); 2556 } 2557 2558 // Narrow/widen the shift amount to match the size of the shift operation. 2559 if (VT == MVT::i32) 2560 NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt); 2561 else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) { 2562 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32); 2563 MachineSDNode *Ext = CurDAG->getMachineNode( 2564 AArch64::SUBREG_TO_REG, DL, VT, 2565 CurDAG->getTargetConstant(0, DL, MVT::i64), NewShiftAmt, SubReg); 2566 NewShiftAmt = SDValue(Ext, 0); 2567 } 2568 2569 SDValue Ops[] = {N->getOperand(0), NewShiftAmt}; 2570 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 2571 return true; 2572 } 2573 2574 bool 2575 AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, 2576 unsigned RegWidth) { 2577 APFloat FVal(0.0); 2578 if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N)) 2579 FVal = CN->getValueAPF(); 2580 else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) { 2581 // Some otherwise illegal constants are allowed in this case. 2582 if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow || 2583 !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1))) 2584 return false; 2585 2586 ConstantPoolSDNode *CN = 2587 dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)); 2588 FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF(); 2589 } else 2590 return false; 2591 2592 // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits 2593 // is between 1 and 32 for a destination w-register, or 1 and 64 for an 2594 // x-register. 2595 // 2596 // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we 2597 // want THIS_NODE to be 2^fbits. This is much easier to deal with using 2598 // integers. 2599 bool IsExact; 2600 2601 // fbits is between 1 and 64 in the worst-case, which means the fmul 2602 // could have 2^64 as an actual operand. Need 65 bits of precision. 2603 APSInt IntVal(65, true); 2604 FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact); 2605 2606 // N.b. isPowerOf2 also checks for > 0. 2607 if (!IsExact || !IntVal.isPowerOf2()) return false; 2608 unsigned FBits = IntVal.logBase2(); 2609 2610 // Checks above should have guaranteed that we haven't lost information in 2611 // finding FBits, but it must still be in range. 2612 if (FBits == 0 || FBits > RegWidth) return false; 2613 2614 FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32); 2615 return true; 2616 } 2617 2618 // Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields 2619 // of the string and obtains the integer values from them and combines these 2620 // into a single value to be used in the MRS/MSR instruction. 2621 static int getIntOperandFromRegisterString(StringRef RegString) { 2622 SmallVector<StringRef, 5> Fields; 2623 RegString.split(Fields, ':'); 2624 2625 if (Fields.size() == 1) 2626 return -1; 2627 2628 assert(Fields.size() == 5 2629 && "Invalid number of fields in read register string"); 2630 2631 SmallVector<int, 5> Ops; 2632 bool AllIntFields = true; 2633 2634 for (StringRef Field : Fields) { 2635 unsigned IntField; 2636 AllIntFields &= !Field.getAsInteger(10, IntField); 2637 Ops.push_back(IntField); 2638 } 2639 2640 assert(AllIntFields && 2641 "Unexpected non-integer value in special register string."); 2642 2643 // Need to combine the integer fields of the string into a single value 2644 // based on the bit encoding of MRS/MSR instruction. 2645 return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) | 2646 (Ops[3] << 3) | (Ops[4]); 2647 } 2648 2649 // Lower the read_register intrinsic to an MRS instruction node if the special 2650 // register string argument is either of the form detailed in the ALCE (the 2651 // form described in getIntOperandsFromRegsterString) or is a named register 2652 // known by the MRS SysReg mapper. 2653 bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) { 2654 const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1)); 2655 const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0)); 2656 SDLoc DL(N); 2657 2658 int Reg = getIntOperandFromRegisterString(RegString->getString()); 2659 if (Reg != -1) { 2660 ReplaceNode(N, CurDAG->getMachineNode( 2661 AArch64::MRS, DL, N->getSimpleValueType(0), MVT::Other, 2662 CurDAG->getTargetConstant(Reg, DL, MVT::i32), 2663 N->getOperand(0))); 2664 return true; 2665 } 2666 2667 // Use the sysreg mapper to map the remaining possible strings to the 2668 // value for the register to be used for the instruction operand. 2669 auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString()); 2670 if (TheReg && TheReg->Readable && 2671 TheReg->haveFeatures(Subtarget->getFeatureBits())) 2672 Reg = TheReg->Encoding; 2673 else 2674 Reg = AArch64SysReg::parseGenericRegister(RegString->getString()); 2675 2676 if (Reg != -1) { 2677 ReplaceNode(N, CurDAG->getMachineNode( 2678 AArch64::MRS, DL, N->getSimpleValueType(0), MVT::Other, 2679 CurDAG->getTargetConstant(Reg, DL, MVT::i32), 2680 N->getOperand(0))); 2681 return true; 2682 } 2683 2684 if (RegString->getString() == "pc") { 2685 ReplaceNode(N, CurDAG->getMachineNode( 2686 AArch64::ADR, DL, N->getSimpleValueType(0), MVT::Other, 2687 CurDAG->getTargetConstant(0, DL, MVT::i32), 2688 N->getOperand(0))); 2689 return true; 2690 } 2691 2692 return false; 2693 } 2694 2695 // Lower the write_register intrinsic to an MSR instruction node if the special 2696 // register string argument is either of the form detailed in the ALCE (the 2697 // form described in getIntOperandsFromRegsterString) or is a named register 2698 // known by the MSR SysReg mapper. 2699 bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) { 2700 const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1)); 2701 const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0)); 2702 SDLoc DL(N); 2703 2704 int Reg = getIntOperandFromRegisterString(RegString->getString()); 2705 if (Reg != -1) { 2706 ReplaceNode( 2707 N, CurDAG->getMachineNode(AArch64::MSR, DL, MVT::Other, 2708 CurDAG->getTargetConstant(Reg, DL, MVT::i32), 2709 N->getOperand(2), N->getOperand(0))); 2710 return true; 2711 } 2712 2713 // Check if the register was one of those allowed as the pstatefield value in 2714 // the MSR (immediate) instruction. To accept the values allowed in the 2715 // pstatefield for the MSR (immediate) instruction, we also require that an 2716 // immediate value has been provided as an argument, we know that this is 2717 // the case as it has been ensured by semantic checking. 2718 auto PMapper = AArch64PState::lookupPStateByName(RegString->getString()); 2719 if (PMapper) { 2720 assert (isa<ConstantSDNode>(N->getOperand(2)) 2721 && "Expected a constant integer expression."); 2722 unsigned Reg = PMapper->Encoding; 2723 uint64_t Immed = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(); 2724 unsigned State; 2725 if (Reg == AArch64PState::PAN || Reg == AArch64PState::UAO || Reg == AArch64PState::SSBS) { 2726 assert(Immed < 2 && "Bad imm"); 2727 State = AArch64::MSRpstateImm1; 2728 } else { 2729 assert(Immed < 16 && "Bad imm"); 2730 State = AArch64::MSRpstateImm4; 2731 } 2732 ReplaceNode(N, CurDAG->getMachineNode( 2733 State, DL, MVT::Other, 2734 CurDAG->getTargetConstant(Reg, DL, MVT::i32), 2735 CurDAG->getTargetConstant(Immed, DL, MVT::i16), 2736 N->getOperand(0))); 2737 return true; 2738 } 2739 2740 // Use the sysreg mapper to attempt to map the remaining possible strings 2741 // to the value for the register to be used for the MSR (register) 2742 // instruction operand. 2743 auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString()); 2744 if (TheReg && TheReg->Writeable && 2745 TheReg->haveFeatures(Subtarget->getFeatureBits())) 2746 Reg = TheReg->Encoding; 2747 else 2748 Reg = AArch64SysReg::parseGenericRegister(RegString->getString()); 2749 if (Reg != -1) { 2750 ReplaceNode(N, CurDAG->getMachineNode( 2751 AArch64::MSR, DL, MVT::Other, 2752 CurDAG->getTargetConstant(Reg, DL, MVT::i32), 2753 N->getOperand(2), N->getOperand(0))); 2754 return true; 2755 } 2756 2757 return false; 2758 } 2759 2760 /// We've got special pseudo-instructions for these 2761 bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) { 2762 unsigned Opcode; 2763 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT(); 2764 2765 // Leave IR for LSE if subtarget supports it. 2766 if (Subtarget->hasLSE()) return false; 2767 2768 if (MemTy == MVT::i8) 2769 Opcode = AArch64::CMP_SWAP_8; 2770 else if (MemTy == MVT::i16) 2771 Opcode = AArch64::CMP_SWAP_16; 2772 else if (MemTy == MVT::i32) 2773 Opcode = AArch64::CMP_SWAP_32; 2774 else if (MemTy == MVT::i64) 2775 Opcode = AArch64::CMP_SWAP_64; 2776 else 2777 llvm_unreachable("Unknown AtomicCmpSwap type"); 2778 2779 MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32; 2780 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3), 2781 N->getOperand(0)}; 2782 SDNode *CmpSwap = CurDAG->getMachineNode( 2783 Opcode, SDLoc(N), 2784 CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops); 2785 2786 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); 2787 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp}); 2788 2789 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0)); 2790 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2)); 2791 CurDAG->RemoveDeadNode(N); 2792 2793 return true; 2794 } 2795 2796 bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) { 2797 // tagp(FrameIndex, IRGstack, tag_offset): 2798 // since the offset between FrameIndex and IRGstack is a compile-time 2799 // constant, this can be lowered to a single ADDG instruction. 2800 if (!(isa<FrameIndexSDNode>(N->getOperand(1)))) { 2801 return false; 2802 } 2803 2804 SDValue IRG_SP = N->getOperand(2); 2805 if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN || 2806 cast<ConstantSDNode>(IRG_SP->getOperand(1))->getZExtValue() != 2807 Intrinsic::aarch64_irg_sp) { 2808 return false; 2809 } 2810 2811 const TargetLowering *TLI = getTargetLowering(); 2812 SDLoc DL(N); 2813 int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex(); 2814 SDValue FiOp = CurDAG->getTargetFrameIndex( 2815 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 2816 int TagOffset = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(); 2817 2818 SDNode *Out = CurDAG->getMachineNode( 2819 AArch64::TAGPstack, DL, MVT::i64, 2820 {FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2), 2821 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)}); 2822 ReplaceNode(N, Out); 2823 return true; 2824 } 2825 2826 void AArch64DAGToDAGISel::SelectTagP(SDNode *N) { 2827 assert(isa<ConstantSDNode>(N->getOperand(3)) && 2828 "llvm.aarch64.tagp third argument must be an immediate"); 2829 if (trySelectStackSlotTagP(N)) 2830 return; 2831 // FIXME: above applies in any case when offset between Op1 and Op2 is a 2832 // compile-time constant, not just for stack allocations. 2833 2834 // General case for unrelated pointers in Op1 and Op2. 2835 SDLoc DL(N); 2836 int TagOffset = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(); 2837 SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64, 2838 {N->getOperand(1), N->getOperand(2)}); 2839 SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64, 2840 {SDValue(N1, 0), N->getOperand(2)}); 2841 SDNode *N3 = CurDAG->getMachineNode( 2842 AArch64::ADDG, DL, MVT::i64, 2843 {SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64), 2844 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)}); 2845 ReplaceNode(N, N3); 2846 } 2847 2848 void AArch64DAGToDAGISel::Select(SDNode *Node) { 2849 // If we have a custom node, we already have selected! 2850 if (Node->isMachineOpcode()) { 2851 LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n"); 2852 Node->setNodeId(-1); 2853 return; 2854 } 2855 2856 // Few custom selection stuff. 2857 EVT VT = Node->getValueType(0); 2858 2859 switch (Node->getOpcode()) { 2860 default: 2861 break; 2862 2863 case ISD::ATOMIC_CMP_SWAP: 2864 if (SelectCMP_SWAP(Node)) 2865 return; 2866 break; 2867 2868 case ISD::READ_REGISTER: 2869 if (tryReadRegister(Node)) 2870 return; 2871 break; 2872 2873 case ISD::WRITE_REGISTER: 2874 if (tryWriteRegister(Node)) 2875 return; 2876 break; 2877 2878 case ISD::ADD: 2879 if (tryMLAV64LaneV128(Node)) 2880 return; 2881 break; 2882 2883 case ISD::LOAD: { 2884 // Try to select as an indexed load. Fall through to normal processing 2885 // if we can't. 2886 if (tryIndexedLoad(Node)) 2887 return; 2888 break; 2889 } 2890 2891 case ISD::SRL: 2892 case ISD::AND: 2893 case ISD::SRA: 2894 case ISD::SIGN_EXTEND_INREG: 2895 if (tryBitfieldExtractOp(Node)) 2896 return; 2897 if (tryBitfieldInsertInZeroOp(Node)) 2898 return; 2899 LLVM_FALLTHROUGH; 2900 case ISD::ROTR: 2901 case ISD::SHL: 2902 if (tryShiftAmountMod(Node)) 2903 return; 2904 break; 2905 2906 case ISD::SIGN_EXTEND: 2907 if (tryBitfieldExtractOpFromSExt(Node)) 2908 return; 2909 break; 2910 2911 case ISD::OR: 2912 if (tryBitfieldInsertOp(Node)) 2913 return; 2914 break; 2915 2916 case ISD::Constant: { 2917 // Materialize zero constants as copies from WZR/XZR. This allows 2918 // the coalescer to propagate these into other instructions. 2919 ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node); 2920 if (ConstNode->isNullValue()) { 2921 if (VT == MVT::i32) { 2922 SDValue New = CurDAG->getCopyFromReg( 2923 CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32); 2924 ReplaceNode(Node, New.getNode()); 2925 return; 2926 } else if (VT == MVT::i64) { 2927 SDValue New = CurDAG->getCopyFromReg( 2928 CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64); 2929 ReplaceNode(Node, New.getNode()); 2930 return; 2931 } 2932 } 2933 break; 2934 } 2935 2936 case ISD::FrameIndex: { 2937 // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm. 2938 int FI = cast<FrameIndexSDNode>(Node)->getIndex(); 2939 unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0); 2940 const TargetLowering *TLI = getTargetLowering(); 2941 SDValue TFI = CurDAG->getTargetFrameIndex( 2942 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 2943 SDLoc DL(Node); 2944 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32), 2945 CurDAG->getTargetConstant(Shifter, DL, MVT::i32) }; 2946 CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops); 2947 return; 2948 } 2949 case ISD::INTRINSIC_W_CHAIN: { 2950 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); 2951 switch (IntNo) { 2952 default: 2953 break; 2954 case Intrinsic::aarch64_ldaxp: 2955 case Intrinsic::aarch64_ldxp: { 2956 unsigned Op = 2957 IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX; 2958 SDValue MemAddr = Node->getOperand(2); 2959 SDLoc DL(Node); 2960 SDValue Chain = Node->getOperand(0); 2961 2962 SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64, 2963 MVT::Other, MemAddr, Chain); 2964 2965 // Transfer memoperands. 2966 MachineMemOperand *MemOp = 2967 cast<MemIntrinsicSDNode>(Node)->getMemOperand(); 2968 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp}); 2969 ReplaceNode(Node, Ld); 2970 return; 2971 } 2972 case Intrinsic::aarch64_stlxp: 2973 case Intrinsic::aarch64_stxp: { 2974 unsigned Op = 2975 IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX; 2976 SDLoc DL(Node); 2977 SDValue Chain = Node->getOperand(0); 2978 SDValue ValLo = Node->getOperand(2); 2979 SDValue ValHi = Node->getOperand(3); 2980 SDValue MemAddr = Node->getOperand(4); 2981 2982 // Place arguments in the right order. 2983 SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain}; 2984 2985 SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops); 2986 // Transfer memoperands. 2987 MachineMemOperand *MemOp = 2988 cast<MemIntrinsicSDNode>(Node)->getMemOperand(); 2989 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 2990 2991 ReplaceNode(Node, St); 2992 return; 2993 } 2994 case Intrinsic::aarch64_neon_ld1x2: 2995 if (VT == MVT::v8i8) { 2996 SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0); 2997 return; 2998 } else if (VT == MVT::v16i8) { 2999 SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0); 3000 return; 3001 } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { 3002 SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0); 3003 return; 3004 } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { 3005 SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0); 3006 return; 3007 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3008 SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0); 3009 return; 3010 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3011 SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0); 3012 return; 3013 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3014 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0); 3015 return; 3016 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3017 SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0); 3018 return; 3019 } 3020 break; 3021 case Intrinsic::aarch64_neon_ld1x3: 3022 if (VT == MVT::v8i8) { 3023 SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0); 3024 return; 3025 } else if (VT == MVT::v16i8) { 3026 SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0); 3027 return; 3028 } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { 3029 SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0); 3030 return; 3031 } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { 3032 SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0); 3033 return; 3034 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3035 SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0); 3036 return; 3037 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3038 SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0); 3039 return; 3040 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3041 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0); 3042 return; 3043 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3044 SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0); 3045 return; 3046 } 3047 break; 3048 case Intrinsic::aarch64_neon_ld1x4: 3049 if (VT == MVT::v8i8) { 3050 SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0); 3051 return; 3052 } else if (VT == MVT::v16i8) { 3053 SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0); 3054 return; 3055 } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { 3056 SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0); 3057 return; 3058 } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { 3059 SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0); 3060 return; 3061 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3062 SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0); 3063 return; 3064 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3065 SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0); 3066 return; 3067 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3068 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0); 3069 return; 3070 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3071 SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0); 3072 return; 3073 } 3074 break; 3075 case Intrinsic::aarch64_neon_ld2: 3076 if (VT == MVT::v8i8) { 3077 SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0); 3078 return; 3079 } else if (VT == MVT::v16i8) { 3080 SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0); 3081 return; 3082 } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { 3083 SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0); 3084 return; 3085 } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { 3086 SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0); 3087 return; 3088 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3089 SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0); 3090 return; 3091 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3092 SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0); 3093 return; 3094 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3095 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0); 3096 return; 3097 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3098 SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0); 3099 return; 3100 } 3101 break; 3102 case Intrinsic::aarch64_neon_ld3: 3103 if (VT == MVT::v8i8) { 3104 SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0); 3105 return; 3106 } else if (VT == MVT::v16i8) { 3107 SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0); 3108 return; 3109 } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { 3110 SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0); 3111 return; 3112 } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { 3113 SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0); 3114 return; 3115 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3116 SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0); 3117 return; 3118 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3119 SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0); 3120 return; 3121 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3122 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0); 3123 return; 3124 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3125 SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0); 3126 return; 3127 } 3128 break; 3129 case Intrinsic::aarch64_neon_ld4: 3130 if (VT == MVT::v8i8) { 3131 SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0); 3132 return; 3133 } else if (VT == MVT::v16i8) { 3134 SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0); 3135 return; 3136 } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { 3137 SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0); 3138 return; 3139 } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { 3140 SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0); 3141 return; 3142 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3143 SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0); 3144 return; 3145 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3146 SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0); 3147 return; 3148 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3149 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0); 3150 return; 3151 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3152 SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0); 3153 return; 3154 } 3155 break; 3156 case Intrinsic::aarch64_neon_ld2r: 3157 if (VT == MVT::v8i8) { 3158 SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0); 3159 return; 3160 } else if (VT == MVT::v16i8) { 3161 SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0); 3162 return; 3163 } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { 3164 SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0); 3165 return; 3166 } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { 3167 SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0); 3168 return; 3169 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3170 SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0); 3171 return; 3172 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3173 SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0); 3174 return; 3175 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3176 SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0); 3177 return; 3178 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3179 SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0); 3180 return; 3181 } 3182 break; 3183 case Intrinsic::aarch64_neon_ld3r: 3184 if (VT == MVT::v8i8) { 3185 SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0); 3186 return; 3187 } else if (VT == MVT::v16i8) { 3188 SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0); 3189 return; 3190 } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { 3191 SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0); 3192 return; 3193 } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { 3194 SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0); 3195 return; 3196 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3197 SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0); 3198 return; 3199 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3200 SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0); 3201 return; 3202 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3203 SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0); 3204 return; 3205 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3206 SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0); 3207 return; 3208 } 3209 break; 3210 case Intrinsic::aarch64_neon_ld4r: 3211 if (VT == MVT::v8i8) { 3212 SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0); 3213 return; 3214 } else if (VT == MVT::v16i8) { 3215 SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0); 3216 return; 3217 } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { 3218 SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0); 3219 return; 3220 } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { 3221 SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0); 3222 return; 3223 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3224 SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0); 3225 return; 3226 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3227 SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0); 3228 return; 3229 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3230 SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0); 3231 return; 3232 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3233 SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0); 3234 return; 3235 } 3236 break; 3237 case Intrinsic::aarch64_neon_ld2lane: 3238 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 3239 SelectLoadLane(Node, 2, AArch64::LD2i8); 3240 return; 3241 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 3242 VT == MVT::v8f16) { 3243 SelectLoadLane(Node, 2, AArch64::LD2i16); 3244 return; 3245 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 3246 VT == MVT::v2f32) { 3247 SelectLoadLane(Node, 2, AArch64::LD2i32); 3248 return; 3249 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 3250 VT == MVT::v1f64) { 3251 SelectLoadLane(Node, 2, AArch64::LD2i64); 3252 return; 3253 } 3254 break; 3255 case Intrinsic::aarch64_neon_ld3lane: 3256 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 3257 SelectLoadLane(Node, 3, AArch64::LD3i8); 3258 return; 3259 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 3260 VT == MVT::v8f16) { 3261 SelectLoadLane(Node, 3, AArch64::LD3i16); 3262 return; 3263 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 3264 VT == MVT::v2f32) { 3265 SelectLoadLane(Node, 3, AArch64::LD3i32); 3266 return; 3267 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 3268 VT == MVT::v1f64) { 3269 SelectLoadLane(Node, 3, AArch64::LD3i64); 3270 return; 3271 } 3272 break; 3273 case Intrinsic::aarch64_neon_ld4lane: 3274 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 3275 SelectLoadLane(Node, 4, AArch64::LD4i8); 3276 return; 3277 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 3278 VT == MVT::v8f16) { 3279 SelectLoadLane(Node, 4, AArch64::LD4i16); 3280 return; 3281 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 3282 VT == MVT::v2f32) { 3283 SelectLoadLane(Node, 4, AArch64::LD4i32); 3284 return; 3285 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 3286 VT == MVT::v1f64) { 3287 SelectLoadLane(Node, 4, AArch64::LD4i64); 3288 return; 3289 } 3290 break; 3291 } 3292 } break; 3293 case ISD::INTRINSIC_WO_CHAIN: { 3294 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue(); 3295 switch (IntNo) { 3296 default: 3297 break; 3298 case Intrinsic::aarch64_tagp: 3299 SelectTagP(Node); 3300 return; 3301 case Intrinsic::aarch64_neon_tbl2: 3302 SelectTable(Node, 2, 3303 VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two, 3304 false); 3305 return; 3306 case Intrinsic::aarch64_neon_tbl3: 3307 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three 3308 : AArch64::TBLv16i8Three, 3309 false); 3310 return; 3311 case Intrinsic::aarch64_neon_tbl4: 3312 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four 3313 : AArch64::TBLv16i8Four, 3314 false); 3315 return; 3316 case Intrinsic::aarch64_neon_tbx2: 3317 SelectTable(Node, 2, 3318 VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two, 3319 true); 3320 return; 3321 case Intrinsic::aarch64_neon_tbx3: 3322 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three 3323 : AArch64::TBXv16i8Three, 3324 true); 3325 return; 3326 case Intrinsic::aarch64_neon_tbx4: 3327 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four 3328 : AArch64::TBXv16i8Four, 3329 true); 3330 return; 3331 case Intrinsic::aarch64_neon_smull: 3332 case Intrinsic::aarch64_neon_umull: 3333 if (tryMULLV64LaneV128(IntNo, Node)) 3334 return; 3335 break; 3336 } 3337 break; 3338 } 3339 case ISD::INTRINSIC_VOID: { 3340 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); 3341 if (Node->getNumOperands() >= 3) 3342 VT = Node->getOperand(2)->getValueType(0); 3343 switch (IntNo) { 3344 default: 3345 break; 3346 case Intrinsic::aarch64_neon_st1x2: { 3347 if (VT == MVT::v8i8) { 3348 SelectStore(Node, 2, AArch64::ST1Twov8b); 3349 return; 3350 } else if (VT == MVT::v16i8) { 3351 SelectStore(Node, 2, AArch64::ST1Twov16b); 3352 return; 3353 } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { 3354 SelectStore(Node, 2, AArch64::ST1Twov4h); 3355 return; 3356 } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { 3357 SelectStore(Node, 2, AArch64::ST1Twov8h); 3358 return; 3359 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3360 SelectStore(Node, 2, AArch64::ST1Twov2s); 3361 return; 3362 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3363 SelectStore(Node, 2, AArch64::ST1Twov4s); 3364 return; 3365 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3366 SelectStore(Node, 2, AArch64::ST1Twov2d); 3367 return; 3368 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3369 SelectStore(Node, 2, AArch64::ST1Twov1d); 3370 return; 3371 } 3372 break; 3373 } 3374 case Intrinsic::aarch64_neon_st1x3: { 3375 if (VT == MVT::v8i8) { 3376 SelectStore(Node, 3, AArch64::ST1Threev8b); 3377 return; 3378 } else if (VT == MVT::v16i8) { 3379 SelectStore(Node, 3, AArch64::ST1Threev16b); 3380 return; 3381 } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { 3382 SelectStore(Node, 3, AArch64::ST1Threev4h); 3383 return; 3384 } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { 3385 SelectStore(Node, 3, AArch64::ST1Threev8h); 3386 return; 3387 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3388 SelectStore(Node, 3, AArch64::ST1Threev2s); 3389 return; 3390 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3391 SelectStore(Node, 3, AArch64::ST1Threev4s); 3392 return; 3393 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3394 SelectStore(Node, 3, AArch64::ST1Threev2d); 3395 return; 3396 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3397 SelectStore(Node, 3, AArch64::ST1Threev1d); 3398 return; 3399 } 3400 break; 3401 } 3402 case Intrinsic::aarch64_neon_st1x4: { 3403 if (VT == MVT::v8i8) { 3404 SelectStore(Node, 4, AArch64::ST1Fourv8b); 3405 return; 3406 } else if (VT == MVT::v16i8) { 3407 SelectStore(Node, 4, AArch64::ST1Fourv16b); 3408 return; 3409 } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { 3410 SelectStore(Node, 4, AArch64::ST1Fourv4h); 3411 return; 3412 } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { 3413 SelectStore(Node, 4, AArch64::ST1Fourv8h); 3414 return; 3415 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3416 SelectStore(Node, 4, AArch64::ST1Fourv2s); 3417 return; 3418 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3419 SelectStore(Node, 4, AArch64::ST1Fourv4s); 3420 return; 3421 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3422 SelectStore(Node, 4, AArch64::ST1Fourv2d); 3423 return; 3424 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3425 SelectStore(Node, 4, AArch64::ST1Fourv1d); 3426 return; 3427 } 3428 break; 3429 } 3430 case Intrinsic::aarch64_neon_st2: { 3431 if (VT == MVT::v8i8) { 3432 SelectStore(Node, 2, AArch64::ST2Twov8b); 3433 return; 3434 } else if (VT == MVT::v16i8) { 3435 SelectStore(Node, 2, AArch64::ST2Twov16b); 3436 return; 3437 } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { 3438 SelectStore(Node, 2, AArch64::ST2Twov4h); 3439 return; 3440 } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { 3441 SelectStore(Node, 2, AArch64::ST2Twov8h); 3442 return; 3443 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3444 SelectStore(Node, 2, AArch64::ST2Twov2s); 3445 return; 3446 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3447 SelectStore(Node, 2, AArch64::ST2Twov4s); 3448 return; 3449 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3450 SelectStore(Node, 2, AArch64::ST2Twov2d); 3451 return; 3452 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3453 SelectStore(Node, 2, AArch64::ST1Twov1d); 3454 return; 3455 } 3456 break; 3457 } 3458 case Intrinsic::aarch64_neon_st3: { 3459 if (VT == MVT::v8i8) { 3460 SelectStore(Node, 3, AArch64::ST3Threev8b); 3461 return; 3462 } else if (VT == MVT::v16i8) { 3463 SelectStore(Node, 3, AArch64::ST3Threev16b); 3464 return; 3465 } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { 3466 SelectStore(Node, 3, AArch64::ST3Threev4h); 3467 return; 3468 } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { 3469 SelectStore(Node, 3, AArch64::ST3Threev8h); 3470 return; 3471 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3472 SelectStore(Node, 3, AArch64::ST3Threev2s); 3473 return; 3474 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3475 SelectStore(Node, 3, AArch64::ST3Threev4s); 3476 return; 3477 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3478 SelectStore(Node, 3, AArch64::ST3Threev2d); 3479 return; 3480 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3481 SelectStore(Node, 3, AArch64::ST1Threev1d); 3482 return; 3483 } 3484 break; 3485 } 3486 case Intrinsic::aarch64_neon_st4: { 3487 if (VT == MVT::v8i8) { 3488 SelectStore(Node, 4, AArch64::ST4Fourv8b); 3489 return; 3490 } else if (VT == MVT::v16i8) { 3491 SelectStore(Node, 4, AArch64::ST4Fourv16b); 3492 return; 3493 } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { 3494 SelectStore(Node, 4, AArch64::ST4Fourv4h); 3495 return; 3496 } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { 3497 SelectStore(Node, 4, AArch64::ST4Fourv8h); 3498 return; 3499 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3500 SelectStore(Node, 4, AArch64::ST4Fourv2s); 3501 return; 3502 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3503 SelectStore(Node, 4, AArch64::ST4Fourv4s); 3504 return; 3505 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3506 SelectStore(Node, 4, AArch64::ST4Fourv2d); 3507 return; 3508 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3509 SelectStore(Node, 4, AArch64::ST1Fourv1d); 3510 return; 3511 } 3512 break; 3513 } 3514 case Intrinsic::aarch64_neon_st2lane: { 3515 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 3516 SelectStoreLane(Node, 2, AArch64::ST2i8); 3517 return; 3518 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 3519 VT == MVT::v8f16) { 3520 SelectStoreLane(Node, 2, AArch64::ST2i16); 3521 return; 3522 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 3523 VT == MVT::v2f32) { 3524 SelectStoreLane(Node, 2, AArch64::ST2i32); 3525 return; 3526 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 3527 VT == MVT::v1f64) { 3528 SelectStoreLane(Node, 2, AArch64::ST2i64); 3529 return; 3530 } 3531 break; 3532 } 3533 case Intrinsic::aarch64_neon_st3lane: { 3534 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 3535 SelectStoreLane(Node, 3, AArch64::ST3i8); 3536 return; 3537 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 3538 VT == MVT::v8f16) { 3539 SelectStoreLane(Node, 3, AArch64::ST3i16); 3540 return; 3541 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 3542 VT == MVT::v2f32) { 3543 SelectStoreLane(Node, 3, AArch64::ST3i32); 3544 return; 3545 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 3546 VT == MVT::v1f64) { 3547 SelectStoreLane(Node, 3, AArch64::ST3i64); 3548 return; 3549 } 3550 break; 3551 } 3552 case Intrinsic::aarch64_neon_st4lane: { 3553 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 3554 SelectStoreLane(Node, 4, AArch64::ST4i8); 3555 return; 3556 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 3557 VT == MVT::v8f16) { 3558 SelectStoreLane(Node, 4, AArch64::ST4i16); 3559 return; 3560 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 3561 VT == MVT::v2f32) { 3562 SelectStoreLane(Node, 4, AArch64::ST4i32); 3563 return; 3564 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 3565 VT == MVT::v1f64) { 3566 SelectStoreLane(Node, 4, AArch64::ST4i64); 3567 return; 3568 } 3569 break; 3570 } 3571 } 3572 break; 3573 } 3574 case AArch64ISD::LD2post: { 3575 if (VT == MVT::v8i8) { 3576 SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0); 3577 return; 3578 } else if (VT == MVT::v16i8) { 3579 SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0); 3580 return; 3581 } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { 3582 SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0); 3583 return; 3584 } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { 3585 SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0); 3586 return; 3587 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3588 SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0); 3589 return; 3590 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3591 SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0); 3592 return; 3593 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3594 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0); 3595 return; 3596 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3597 SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0); 3598 return; 3599 } 3600 break; 3601 } 3602 case AArch64ISD::LD3post: { 3603 if (VT == MVT::v8i8) { 3604 SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0); 3605 return; 3606 } else if (VT == MVT::v16i8) { 3607 SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0); 3608 return; 3609 } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { 3610 SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0); 3611 return; 3612 } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { 3613 SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0); 3614 return; 3615 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3616 SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0); 3617 return; 3618 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3619 SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0); 3620 return; 3621 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3622 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0); 3623 return; 3624 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3625 SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0); 3626 return; 3627 } 3628 break; 3629 } 3630 case AArch64ISD::LD4post: { 3631 if (VT == MVT::v8i8) { 3632 SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0); 3633 return; 3634 } else if (VT == MVT::v16i8) { 3635 SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0); 3636 return; 3637 } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { 3638 SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0); 3639 return; 3640 } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { 3641 SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0); 3642 return; 3643 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3644 SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0); 3645 return; 3646 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3647 SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0); 3648 return; 3649 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3650 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0); 3651 return; 3652 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3653 SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0); 3654 return; 3655 } 3656 break; 3657 } 3658 case AArch64ISD::LD1x2post: { 3659 if (VT == MVT::v8i8) { 3660 SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0); 3661 return; 3662 } else if (VT == MVT::v16i8) { 3663 SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0); 3664 return; 3665 } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { 3666 SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0); 3667 return; 3668 } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { 3669 SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0); 3670 return; 3671 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3672 SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0); 3673 return; 3674 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3675 SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0); 3676 return; 3677 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3678 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0); 3679 return; 3680 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3681 SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0); 3682 return; 3683 } 3684 break; 3685 } 3686 case AArch64ISD::LD1x3post: { 3687 if (VT == MVT::v8i8) { 3688 SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0); 3689 return; 3690 } else if (VT == MVT::v16i8) { 3691 SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0); 3692 return; 3693 } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { 3694 SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0); 3695 return; 3696 } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { 3697 SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0); 3698 return; 3699 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3700 SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0); 3701 return; 3702 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3703 SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0); 3704 return; 3705 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3706 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0); 3707 return; 3708 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3709 SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0); 3710 return; 3711 } 3712 break; 3713 } 3714 case AArch64ISD::LD1x4post: { 3715 if (VT == MVT::v8i8) { 3716 SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0); 3717 return; 3718 } else if (VT == MVT::v16i8) { 3719 SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0); 3720 return; 3721 } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { 3722 SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0); 3723 return; 3724 } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { 3725 SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0); 3726 return; 3727 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3728 SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0); 3729 return; 3730 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3731 SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0); 3732 return; 3733 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3734 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0); 3735 return; 3736 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3737 SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0); 3738 return; 3739 } 3740 break; 3741 } 3742 case AArch64ISD::LD1DUPpost: { 3743 if (VT == MVT::v8i8) { 3744 SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0); 3745 return; 3746 } else if (VT == MVT::v16i8) { 3747 SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0); 3748 return; 3749 } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { 3750 SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0); 3751 return; 3752 } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { 3753 SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0); 3754 return; 3755 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3756 SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0); 3757 return; 3758 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3759 SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0); 3760 return; 3761 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3762 SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0); 3763 return; 3764 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3765 SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0); 3766 return; 3767 } 3768 break; 3769 } 3770 case AArch64ISD::LD2DUPpost: { 3771 if (VT == MVT::v8i8) { 3772 SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0); 3773 return; 3774 } else if (VT == MVT::v16i8) { 3775 SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0); 3776 return; 3777 } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { 3778 SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0); 3779 return; 3780 } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { 3781 SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0); 3782 return; 3783 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3784 SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0); 3785 return; 3786 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3787 SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0); 3788 return; 3789 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3790 SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0); 3791 return; 3792 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3793 SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0); 3794 return; 3795 } 3796 break; 3797 } 3798 case AArch64ISD::LD3DUPpost: { 3799 if (VT == MVT::v8i8) { 3800 SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0); 3801 return; 3802 } else if (VT == MVT::v16i8) { 3803 SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0); 3804 return; 3805 } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { 3806 SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0); 3807 return; 3808 } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { 3809 SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0); 3810 return; 3811 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3812 SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0); 3813 return; 3814 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3815 SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0); 3816 return; 3817 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3818 SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0); 3819 return; 3820 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3821 SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0); 3822 return; 3823 } 3824 break; 3825 } 3826 case AArch64ISD::LD4DUPpost: { 3827 if (VT == MVT::v8i8) { 3828 SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0); 3829 return; 3830 } else if (VT == MVT::v16i8) { 3831 SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0); 3832 return; 3833 } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { 3834 SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0); 3835 return; 3836 } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { 3837 SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0); 3838 return; 3839 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3840 SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0); 3841 return; 3842 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3843 SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0); 3844 return; 3845 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3846 SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0); 3847 return; 3848 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3849 SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0); 3850 return; 3851 } 3852 break; 3853 } 3854 case AArch64ISD::LD1LANEpost: { 3855 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 3856 SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST); 3857 return; 3858 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 3859 VT == MVT::v8f16) { 3860 SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST); 3861 return; 3862 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 3863 VT == MVT::v2f32) { 3864 SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST); 3865 return; 3866 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 3867 VT == MVT::v1f64) { 3868 SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST); 3869 return; 3870 } 3871 break; 3872 } 3873 case AArch64ISD::LD2LANEpost: { 3874 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 3875 SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST); 3876 return; 3877 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 3878 VT == MVT::v8f16) { 3879 SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST); 3880 return; 3881 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 3882 VT == MVT::v2f32) { 3883 SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST); 3884 return; 3885 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 3886 VT == MVT::v1f64) { 3887 SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST); 3888 return; 3889 } 3890 break; 3891 } 3892 case AArch64ISD::LD3LANEpost: { 3893 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 3894 SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST); 3895 return; 3896 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 3897 VT == MVT::v8f16) { 3898 SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST); 3899 return; 3900 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 3901 VT == MVT::v2f32) { 3902 SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST); 3903 return; 3904 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 3905 VT == MVT::v1f64) { 3906 SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST); 3907 return; 3908 } 3909 break; 3910 } 3911 case AArch64ISD::LD4LANEpost: { 3912 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 3913 SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST); 3914 return; 3915 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 3916 VT == MVT::v8f16) { 3917 SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST); 3918 return; 3919 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 3920 VT == MVT::v2f32) { 3921 SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST); 3922 return; 3923 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 3924 VT == MVT::v1f64) { 3925 SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST); 3926 return; 3927 } 3928 break; 3929 } 3930 case AArch64ISD::ST2post: { 3931 VT = Node->getOperand(1).getValueType(); 3932 if (VT == MVT::v8i8) { 3933 SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST); 3934 return; 3935 } else if (VT == MVT::v16i8) { 3936 SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST); 3937 return; 3938 } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { 3939 SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST); 3940 return; 3941 } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { 3942 SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST); 3943 return; 3944 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3945 SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST); 3946 return; 3947 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3948 SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST); 3949 return; 3950 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3951 SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST); 3952 return; 3953 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3954 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST); 3955 return; 3956 } 3957 break; 3958 } 3959 case AArch64ISD::ST3post: { 3960 VT = Node->getOperand(1).getValueType(); 3961 if (VT == MVT::v8i8) { 3962 SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST); 3963 return; 3964 } else if (VT == MVT::v16i8) { 3965 SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST); 3966 return; 3967 } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { 3968 SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST); 3969 return; 3970 } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { 3971 SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST); 3972 return; 3973 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3974 SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST); 3975 return; 3976 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3977 SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST); 3978 return; 3979 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3980 SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST); 3981 return; 3982 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3983 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST); 3984 return; 3985 } 3986 break; 3987 } 3988 case AArch64ISD::ST4post: { 3989 VT = Node->getOperand(1).getValueType(); 3990 if (VT == MVT::v8i8) { 3991 SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST); 3992 return; 3993 } else if (VT == MVT::v16i8) { 3994 SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST); 3995 return; 3996 } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { 3997 SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST); 3998 return; 3999 } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { 4000 SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST); 4001 return; 4002 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4003 SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST); 4004 return; 4005 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4006 SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST); 4007 return; 4008 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4009 SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST); 4010 return; 4011 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4012 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST); 4013 return; 4014 } 4015 break; 4016 } 4017 case AArch64ISD::ST1x2post: { 4018 VT = Node->getOperand(1).getValueType(); 4019 if (VT == MVT::v8i8) { 4020 SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST); 4021 return; 4022 } else if (VT == MVT::v16i8) { 4023 SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST); 4024 return; 4025 } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { 4026 SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST); 4027 return; 4028 } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { 4029 SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST); 4030 return; 4031 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4032 SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST); 4033 return; 4034 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4035 SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST); 4036 return; 4037 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4038 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST); 4039 return; 4040 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4041 SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST); 4042 return; 4043 } 4044 break; 4045 } 4046 case AArch64ISD::ST1x3post: { 4047 VT = Node->getOperand(1).getValueType(); 4048 if (VT == MVT::v8i8) { 4049 SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST); 4050 return; 4051 } else if (VT == MVT::v16i8) { 4052 SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST); 4053 return; 4054 } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { 4055 SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST); 4056 return; 4057 } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { 4058 SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST); 4059 return; 4060 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4061 SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST); 4062 return; 4063 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4064 SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST); 4065 return; 4066 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4067 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST); 4068 return; 4069 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4070 SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST); 4071 return; 4072 } 4073 break; 4074 } 4075 case AArch64ISD::ST1x4post: { 4076 VT = Node->getOperand(1).getValueType(); 4077 if (VT == MVT::v8i8) { 4078 SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST); 4079 return; 4080 } else if (VT == MVT::v16i8) { 4081 SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST); 4082 return; 4083 } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { 4084 SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST); 4085 return; 4086 } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { 4087 SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST); 4088 return; 4089 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4090 SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST); 4091 return; 4092 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4093 SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST); 4094 return; 4095 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4096 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST); 4097 return; 4098 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4099 SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST); 4100 return; 4101 } 4102 break; 4103 } 4104 case AArch64ISD::ST2LANEpost: { 4105 VT = Node->getOperand(1).getValueType(); 4106 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4107 SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST); 4108 return; 4109 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4110 VT == MVT::v8f16) { 4111 SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST); 4112 return; 4113 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4114 VT == MVT::v2f32) { 4115 SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST); 4116 return; 4117 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4118 VT == MVT::v1f64) { 4119 SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST); 4120 return; 4121 } 4122 break; 4123 } 4124 case AArch64ISD::ST3LANEpost: { 4125 VT = Node->getOperand(1).getValueType(); 4126 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4127 SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST); 4128 return; 4129 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4130 VT == MVT::v8f16) { 4131 SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST); 4132 return; 4133 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4134 VT == MVT::v2f32) { 4135 SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST); 4136 return; 4137 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4138 VT == MVT::v1f64) { 4139 SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST); 4140 return; 4141 } 4142 break; 4143 } 4144 case AArch64ISD::ST4LANEpost: { 4145 VT = Node->getOperand(1).getValueType(); 4146 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4147 SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST); 4148 return; 4149 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4150 VT == MVT::v8f16) { 4151 SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST); 4152 return; 4153 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4154 VT == MVT::v2f32) { 4155 SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST); 4156 return; 4157 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4158 VT == MVT::v1f64) { 4159 SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST); 4160 return; 4161 } 4162 break; 4163 } 4164 } 4165 4166 // Select the default instruction 4167 SelectCode(Node); 4168 } 4169 4170 /// createAArch64ISelDag - This pass converts a legalized DAG into a 4171 /// AArch64-specific DAG, ready for instruction scheduling. 4172 FunctionPass *llvm::createAArch64ISelDag(AArch64TargetMachine &TM, 4173 CodeGenOpt::Level OptLevel) { 4174 return new AArch64DAGToDAGISel(TM, OptLevel); 4175 } 4176