1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines an instruction selector for the ARM target. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "ARM.h" 14 #include "ARMBaseInstrInfo.h" 15 #include "ARMTargetMachine.h" 16 #include "MCTargetDesc/ARMAddressingModes.h" 17 #include "Utils/ARMBaseInfo.h" 18 #include "llvm/ADT/StringSwitch.h" 19 #include "llvm/CodeGen/MachineFrameInfo.h" 20 #include "llvm/CodeGen/MachineFunction.h" 21 #include "llvm/CodeGen/MachineInstrBuilder.h" 22 #include "llvm/CodeGen/MachineRegisterInfo.h" 23 #include "llvm/CodeGen/SelectionDAG.h" 24 #include "llvm/CodeGen/SelectionDAGISel.h" 25 #include "llvm/CodeGen/TargetLowering.h" 26 #include "llvm/IR/CallingConv.h" 27 #include "llvm/IR/Constants.h" 28 #include "llvm/IR/DerivedTypes.h" 29 #include "llvm/IR/Function.h" 30 #include "llvm/IR/Intrinsics.h" 31 #include "llvm/IR/IntrinsicsARM.h" 32 #include "llvm/IR/LLVMContext.h" 33 #include "llvm/Support/CommandLine.h" 34 #include "llvm/Support/Debug.h" 35 #include "llvm/Support/ErrorHandling.h" 36 #include "llvm/Target/TargetOptions.h" 37 38 using namespace llvm; 39 40 #define DEBUG_TYPE "arm-isel" 41 42 static cl::opt<bool> 43 DisableShifterOp("disable-shifter-op", cl::Hidden, 44 cl::desc("Disable isel of shifter-op"), 45 cl::init(false)); 46 47 //===--------------------------------------------------------------------===// 48 /// ARMDAGToDAGISel - ARM specific code to select ARM machine 49 /// instructions for SelectionDAG operations. 50 /// 51 namespace { 52 53 class ARMDAGToDAGISel : public SelectionDAGISel { 54 /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can 55 /// make the right decision when generating code for different targets. 56 const ARMSubtarget *Subtarget; 57 58 public: 59 explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel) 60 : SelectionDAGISel(tm, OptLevel) {} 61 62 bool runOnMachineFunction(MachineFunction &MF) override { 63 // Reset the subtarget each time through. 64 Subtarget = &MF.getSubtarget<ARMSubtarget>(); 65 SelectionDAGISel::runOnMachineFunction(MF); 66 return true; 67 } 68 69 StringRef getPassName() const override { return "ARM Instruction Selection"; } 70 71 void PreprocessISelDAG() override; 72 73 /// getI32Imm - Return a target constant of type i32 with the specified 74 /// value. 75 inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) { 76 return CurDAG->getTargetConstant(Imm, dl, MVT::i32); 77 } 78 79 void Select(SDNode *N) override; 80 81 bool hasNoVMLxHazardUse(SDNode *N) const; 82 bool isShifterOpProfitable(const SDValue &Shift, 83 ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt); 84 bool SelectRegShifterOperand(SDValue N, SDValue &A, 85 SDValue &B, SDValue &C, 86 bool CheckProfitability = true); 87 bool SelectImmShifterOperand(SDValue N, SDValue &A, 88 SDValue &B, bool CheckProfitability = true); 89 bool SelectShiftRegShifterOperand(SDValue N, SDValue &A, 90 SDValue &B, SDValue &C) { 91 // Don't apply the profitability check 92 return SelectRegShifterOperand(N, A, B, C, false); 93 } 94 bool SelectShiftImmShifterOperand(SDValue N, SDValue &A, 95 SDValue &B) { 96 // Don't apply the profitability check 97 return SelectImmShifterOperand(N, A, B, false); 98 } 99 100 bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out); 101 102 bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); 103 bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc); 104 105 bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) { 106 const ConstantSDNode *CN = cast<ConstantSDNode>(N); 107 Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32); 108 Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32); 109 return true; 110 } 111 112 bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N, 113 SDValue &Offset, SDValue &Opc); 114 bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N, 115 SDValue &Offset, SDValue &Opc); 116 bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N, 117 SDValue &Offset, SDValue &Opc); 118 bool SelectAddrOffsetNone(SDValue N, SDValue &Base); 119 bool SelectAddrMode3(SDValue N, SDValue &Base, 120 SDValue &Offset, SDValue &Opc); 121 bool SelectAddrMode3Offset(SDNode *Op, SDValue N, 122 SDValue &Offset, SDValue &Opc); 123 bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, bool FP16); 124 bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset); 125 bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset); 126 bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align); 127 bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset); 128 129 bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label); 130 131 // Thumb Addressing Modes: 132 bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset); 133 bool SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, SDValue &Offset); 134 bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base, 135 SDValue &OffImm); 136 bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base, 137 SDValue &OffImm); 138 bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base, 139 SDValue &OffImm); 140 bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base, 141 SDValue &OffImm); 142 bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm); 143 template <unsigned Shift> 144 bool SelectTAddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm); 145 146 // Thumb 2 Addressing Modes: 147 bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); 148 template <unsigned Shift> 149 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, SDValue &OffImm); 150 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, 151 SDValue &OffImm); 152 bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N, 153 SDValue &OffImm); 154 template <unsigned Shift> 155 bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm); 156 bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm, 157 unsigned Shift); 158 template <unsigned Shift> 159 bool SelectT2AddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm); 160 bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base, 161 SDValue &OffReg, SDValue &ShImm); 162 bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm); 163 164 template<int Min, int Max> 165 bool SelectImmediateInRange(SDValue N, SDValue &OffImm); 166 167 inline bool is_so_imm(unsigned Imm) const { 168 return ARM_AM::getSOImmVal(Imm) != -1; 169 } 170 171 inline bool is_so_imm_not(unsigned Imm) const { 172 return ARM_AM::getSOImmVal(~Imm) != -1; 173 } 174 175 inline bool is_t2_so_imm(unsigned Imm) const { 176 return ARM_AM::getT2SOImmVal(Imm) != -1; 177 } 178 179 inline bool is_t2_so_imm_not(unsigned Imm) const { 180 return ARM_AM::getT2SOImmVal(~Imm) != -1; 181 } 182 183 // Include the pieces autogenerated from the target description. 184 #include "ARMGenDAGISel.inc" 185 186 private: 187 void transferMemOperands(SDNode *Src, SDNode *Dst); 188 189 /// Indexed (pre/post inc/dec) load matching code for ARM. 190 bool tryARMIndexedLoad(SDNode *N); 191 bool tryT1IndexedLoad(SDNode *N); 192 bool tryT2IndexedLoad(SDNode *N); 193 bool tryMVEIndexedLoad(SDNode *N); 194 195 /// SelectVLD - Select NEON load intrinsics. NumVecs should be 196 /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for 197 /// loads of D registers and even subregs and odd subregs of Q registers. 198 /// For NumVecs <= 2, QOpcodes1 is not used. 199 void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, 200 const uint16_t *DOpcodes, const uint16_t *QOpcodes0, 201 const uint16_t *QOpcodes1); 202 203 /// SelectVST - Select NEON store intrinsics. NumVecs should 204 /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for 205 /// stores of D registers and even subregs and odd subregs of Q registers. 206 /// For NumVecs <= 2, QOpcodes1 is not used. 207 void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, 208 const uint16_t *DOpcodes, const uint16_t *QOpcodes0, 209 const uint16_t *QOpcodes1); 210 211 /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should 212 /// be 2, 3 or 4. The opcode arrays specify the instructions used for 213 /// load/store of D registers and Q registers. 214 void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating, 215 unsigned NumVecs, const uint16_t *DOpcodes, 216 const uint16_t *QOpcodes); 217 218 /// Helper functions for setting up clusters of MVE predication operands. 219 template <typename SDValueVector> 220 void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 221 SDValue PredicateMask); 222 template <typename SDValueVector> 223 void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 224 SDValue PredicateMask, SDValue Inactive); 225 226 template <typename SDValueVector> 227 void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc); 228 template <typename SDValueVector> 229 void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, EVT InactiveTy); 230 231 /// SelectMVE_WB - Select MVE writeback load/store intrinsics. 232 void SelectMVE_WB(SDNode *N, const uint16_t *Opcodes, bool Predicated); 233 234 /// SelectMVE_LongShift - Select MVE 64-bit scalar shift intrinsics. 235 void SelectMVE_LongShift(SDNode *N, uint16_t Opcode, bool Immediate, 236 bool HasSaturationOperand); 237 238 /// SelectMVE_VADCSBC - Select MVE vector add/sub-with-carry intrinsics. 239 void SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry, 240 uint16_t OpcodeWithNoCarry, bool Add, bool Predicated); 241 242 /// SelectMVE_VSHLC - Select MVE intrinsics for a shift that carries between 243 /// vector lanes. 244 void SelectMVE_VSHLC(SDNode *N, bool Predicated); 245 246 /// Select long MVE vector reductions with two vector operands 247 /// Stride is the number of vector element widths the instruction can operate 248 /// on: 249 /// 2 for long non-rounding variants, vml{a,s}ldav[a][x]: [i16, i32] 250 /// 1 for long rounding variants: vrml{a,s}ldavh[a][x]: [i32] 251 /// Stride is used when addressing the OpcodesS array which contains multiple 252 /// opcodes for each element width. 253 /// TySize is the index into the list of element types listed above 254 void SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated, 255 const uint16_t *OpcodesS, const uint16_t *OpcodesU, 256 size_t Stride, size_t TySize); 257 258 /// Select a 64-bit MVE vector reduction with two vector operands 259 /// arm_mve_vmlldava_[predicated] 260 void SelectMVE_VMLLDAV(SDNode *N, bool Predicated, const uint16_t *OpcodesS, 261 const uint16_t *OpcodesU); 262 /// Select a 72-bit MVE vector rounding reduction with two vector operands 263 /// int_arm_mve_vrmlldavha[_predicated] 264 void SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated, const uint16_t *OpcodesS, 265 const uint16_t *OpcodesU); 266 267 /// SelectMVE_VLD - Select MVE interleaving load intrinsics. NumVecs 268 /// should be 2 or 4. The opcode array specifies the instructions 269 /// used for 8, 16 and 32-bit lane sizes respectively, and each 270 /// pointer points to a set of NumVecs sub-opcodes used for the 271 /// different stages (e.g. VLD20 versus VLD21) of each load family. 272 void SelectMVE_VLD(SDNode *N, unsigned NumVecs, 273 const uint16_t *const *Opcodes, bool HasWriteback); 274 275 /// SelectMVE_VxDUP - Select MVE incrementing-dup instructions. Opcodes is an 276 /// array of 3 elements for the 8, 16 and 32-bit lane sizes. 277 void SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes, 278 bool Wrapping, bool Predicated); 279 280 /// Select SelectCDE_CXxD - Select CDE dual-GPR instruction (one of CX1D, 281 /// CX1DA, CX2D, CX2DA, CX3, CX3DA). 282 /// \arg \c NumExtraOps number of extra operands besides the coprocossor, 283 /// the accumulator and the immediate operand, i.e. 0 284 /// for CX1*, 1 for CX2*, 2 for CX3* 285 /// \arg \c HasAccum whether the instruction has an accumulator operand 286 void SelectCDE_CXxD(SDNode *N, uint16_t Opcode, size_t NumExtraOps, 287 bool HasAccum); 288 289 /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs 290 /// should be 1, 2, 3 or 4. The opcode array specifies the instructions used 291 /// for loading D registers. 292 void SelectVLDDup(SDNode *N, bool IsIntrinsic, bool isUpdating, 293 unsigned NumVecs, const uint16_t *DOpcodes, 294 const uint16_t *QOpcodes0 = nullptr, 295 const uint16_t *QOpcodes1 = nullptr); 296 297 /// Try to select SBFX/UBFX instructions for ARM. 298 bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned); 299 300 // Select special operations if node forms integer ABS pattern 301 bool tryABSOp(SDNode *N); 302 303 bool tryReadRegister(SDNode *N); 304 bool tryWriteRegister(SDNode *N); 305 306 bool tryInlineAsm(SDNode *N); 307 308 void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI); 309 310 void SelectCMP_SWAP(SDNode *N); 311 312 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 313 /// inline asm expressions. 314 bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, 315 std::vector<SDValue> &OutOps) override; 316 317 // Form pairs of consecutive R, S, D, or Q registers. 318 SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1); 319 SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1); 320 SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1); 321 SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1); 322 323 // Form sequences of 4 consecutive S, D, or Q registers. 324 SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 325 SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 326 SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 327 328 // Get the alignment operand for a NEON VLD or VST instruction. 329 SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs, 330 bool is64BitVector); 331 332 /// Checks if N is a multiplication by a constant where we can extract out a 333 /// power of two from the constant so that it can be used in a shift, but only 334 /// if it simplifies the materialization of the constant. Returns true if it 335 /// is, and assigns to PowerOfTwo the power of two that should be extracted 336 /// out and to NewMulConst the new constant to be multiplied by. 337 bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift, 338 unsigned &PowerOfTwo, SDValue &NewMulConst) const; 339 340 /// Replace N with M in CurDAG, in a way that also ensures that M gets 341 /// selected when N would have been selected. 342 void replaceDAGValue(const SDValue &N, SDValue M); 343 }; 344 } 345 346 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant 347 /// operand. If so Imm will receive the 32-bit value. 348 static bool isInt32Immediate(SDNode *N, unsigned &Imm) { 349 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) { 350 Imm = cast<ConstantSDNode>(N)->getZExtValue(); 351 return true; 352 } 353 return false; 354 } 355 356 // isInt32Immediate - This method tests to see if a constant operand. 357 // If so Imm will receive the 32 bit value. 358 static bool isInt32Immediate(SDValue N, unsigned &Imm) { 359 return isInt32Immediate(N.getNode(), Imm); 360 } 361 362 // isOpcWithIntImmediate - This method tests to see if the node is a specific 363 // opcode and that it has a immediate integer right operand. 364 // If so Imm will receive the 32 bit value. 365 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) { 366 return N->getOpcode() == Opc && 367 isInt32Immediate(N->getOperand(1).getNode(), Imm); 368 } 369 370 /// Check whether a particular node is a constant value representable as 371 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax). 372 /// 373 /// \param ScaledConstant [out] - On success, the pre-scaled constant value. 374 static bool isScaledConstantInRange(SDValue Node, int Scale, 375 int RangeMin, int RangeMax, 376 int &ScaledConstant) { 377 assert(Scale > 0 && "Invalid scale!"); 378 379 // Check that this is a constant. 380 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node); 381 if (!C) 382 return false; 383 384 ScaledConstant = (int) C->getZExtValue(); 385 if ((ScaledConstant % Scale) != 0) 386 return false; 387 388 ScaledConstant /= Scale; 389 return ScaledConstant >= RangeMin && ScaledConstant < RangeMax; 390 } 391 392 void ARMDAGToDAGISel::PreprocessISelDAG() { 393 if (!Subtarget->hasV6T2Ops()) 394 return; 395 396 bool isThumb2 = Subtarget->isThumb(); 397 for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), 398 E = CurDAG->allnodes_end(); I != E; ) { 399 SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues. 400 401 if (N->getOpcode() != ISD::ADD) 402 continue; 403 404 // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with 405 // leading zeros, followed by consecutive set bits, followed by 1 or 2 406 // trailing zeros, e.g. 1020. 407 // Transform the expression to 408 // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number 409 // of trailing zeros of c2. The left shift would be folded as an shifter 410 // operand of 'add' and the 'and' and 'srl' would become a bits extraction 411 // node (UBFX). 412 413 SDValue N0 = N->getOperand(0); 414 SDValue N1 = N->getOperand(1); 415 unsigned And_imm = 0; 416 if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) { 417 if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm)) 418 std::swap(N0, N1); 419 } 420 if (!And_imm) 421 continue; 422 423 // Check if the AND mask is an immediate of the form: 000.....1111111100 424 unsigned TZ = countTrailingZeros(And_imm); 425 if (TZ != 1 && TZ != 2) 426 // Be conservative here. Shifter operands aren't always free. e.g. On 427 // Swift, left shifter operand of 1 / 2 for free but others are not. 428 // e.g. 429 // ubfx r3, r1, #16, #8 430 // ldr.w r3, [r0, r3, lsl #2] 431 // vs. 432 // mov.w r9, #1020 433 // and.w r2, r9, r1, lsr #14 434 // ldr r2, [r0, r2] 435 continue; 436 And_imm >>= TZ; 437 if (And_imm & (And_imm + 1)) 438 continue; 439 440 // Look for (and (srl X, c1), c2). 441 SDValue Srl = N1.getOperand(0); 442 unsigned Srl_imm = 0; 443 if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) || 444 (Srl_imm <= 2)) 445 continue; 446 447 // Make sure first operand is not a shifter operand which would prevent 448 // folding of the left shift. 449 SDValue CPTmp0; 450 SDValue CPTmp1; 451 SDValue CPTmp2; 452 if (isThumb2) { 453 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1)) 454 continue; 455 } else { 456 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) || 457 SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2)) 458 continue; 459 } 460 461 // Now make the transformation. 462 Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32, 463 Srl.getOperand(0), 464 CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl), 465 MVT::i32)); 466 N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32, 467 Srl, 468 CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32)); 469 N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32, 470 N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32)); 471 CurDAG->UpdateNodeOperands(N, N0, N1); 472 } 473 } 474 475 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS 476 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at 477 /// least on current ARM implementations) which should be avoidded. 478 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const { 479 if (OptLevel == CodeGenOpt::None) 480 return true; 481 482 if (!Subtarget->hasVMLxHazards()) 483 return true; 484 485 if (!N->hasOneUse()) 486 return false; 487 488 SDNode *Use = *N->use_begin(); 489 if (Use->getOpcode() == ISD::CopyToReg) 490 return true; 491 if (Use->isMachineOpcode()) { 492 const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>( 493 CurDAG->getSubtarget().getInstrInfo()); 494 495 const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode()); 496 if (MCID.mayStore()) 497 return true; 498 unsigned Opcode = MCID.getOpcode(); 499 if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD) 500 return true; 501 // vmlx feeding into another vmlx. We actually want to unfold 502 // the use later in the MLxExpansion pass. e.g. 503 // vmla 504 // vmla (stall 8 cycles) 505 // 506 // vmul (5 cycles) 507 // vadd (5 cycles) 508 // vmla 509 // This adds up to about 18 - 19 cycles. 510 // 511 // vmla 512 // vmul (stall 4 cycles) 513 // vadd adds up to about 14 cycles. 514 return TII->isFpMLxInstruction(Opcode); 515 } 516 517 return false; 518 } 519 520 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift, 521 ARM_AM::ShiftOpc ShOpcVal, 522 unsigned ShAmt) { 523 if (!Subtarget->isLikeA9() && !Subtarget->isSwift()) 524 return true; 525 if (Shift.hasOneUse()) 526 return true; 527 // R << 2 is free. 528 return ShOpcVal == ARM_AM::lsl && 529 (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1)); 530 } 531 532 bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N, 533 unsigned MaxShift, 534 unsigned &PowerOfTwo, 535 SDValue &NewMulConst) const { 536 assert(N.getOpcode() == ISD::MUL); 537 assert(MaxShift > 0); 538 539 // If the multiply is used in more than one place then changing the constant 540 // will make other uses incorrect, so don't. 541 if (!N.hasOneUse()) return false; 542 // Check if the multiply is by a constant 543 ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1)); 544 if (!MulConst) return false; 545 // If the constant is used in more than one place then modifying it will mean 546 // we need to materialize two constants instead of one, which is a bad idea. 547 if (!MulConst->hasOneUse()) return false; 548 unsigned MulConstVal = MulConst->getZExtValue(); 549 if (MulConstVal == 0) return false; 550 551 // Find the largest power of 2 that MulConstVal is a multiple of 552 PowerOfTwo = MaxShift; 553 while ((MulConstVal % (1 << PowerOfTwo)) != 0) { 554 --PowerOfTwo; 555 if (PowerOfTwo == 0) return false; 556 } 557 558 // Only optimise if the new cost is better 559 unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo); 560 NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32); 561 unsigned OldCost = ConstantMaterializationCost(MulConstVal, Subtarget); 562 unsigned NewCost = ConstantMaterializationCost(NewMulConstVal, Subtarget); 563 return NewCost < OldCost; 564 } 565 566 void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) { 567 CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode()); 568 ReplaceUses(N, M); 569 } 570 571 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N, 572 SDValue &BaseReg, 573 SDValue &Opc, 574 bool CheckProfitability) { 575 if (DisableShifterOp) 576 return false; 577 578 // If N is a multiply-by-constant and it's profitable to extract a shift and 579 // use it in a shifted operand do so. 580 if (N.getOpcode() == ISD::MUL) { 581 unsigned PowerOfTwo = 0; 582 SDValue NewMulConst; 583 if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) { 584 HandleSDNode Handle(N); 585 SDLoc Loc(N); 586 replaceDAGValue(N.getOperand(1), NewMulConst); 587 BaseReg = Handle.getValue(); 588 Opc = CurDAG->getTargetConstant( 589 ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32); 590 return true; 591 } 592 } 593 594 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 595 596 // Don't match base register only case. That is matched to a separate 597 // lower complexity pattern with explicit register operand. 598 if (ShOpcVal == ARM_AM::no_shift) return false; 599 600 BaseReg = N.getOperand(0); 601 unsigned ShImmVal = 0; 602 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 603 if (!RHS) return false; 604 ShImmVal = RHS->getZExtValue() & 31; 605 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), 606 SDLoc(N), MVT::i32); 607 return true; 608 } 609 610 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N, 611 SDValue &BaseReg, 612 SDValue &ShReg, 613 SDValue &Opc, 614 bool CheckProfitability) { 615 if (DisableShifterOp) 616 return false; 617 618 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 619 620 // Don't match base register only case. That is matched to a separate 621 // lower complexity pattern with explicit register operand. 622 if (ShOpcVal == ARM_AM::no_shift) return false; 623 624 BaseReg = N.getOperand(0); 625 unsigned ShImmVal = 0; 626 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 627 if (RHS) return false; 628 629 ShReg = N.getOperand(1); 630 if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal)) 631 return false; 632 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), 633 SDLoc(N), MVT::i32); 634 return true; 635 } 636 637 // Determine whether an ISD::OR's operands are suitable to turn the operation 638 // into an addition, which often has more compact encodings. 639 bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) { 640 assert(Parent->getOpcode() == ISD::OR && "unexpected parent"); 641 Out = N; 642 return CurDAG->haveNoCommonBitsSet(N, Parent->getOperand(1)); 643 } 644 645 646 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N, 647 SDValue &Base, 648 SDValue &OffImm) { 649 // Match simple R + imm12 operands. 650 651 // Base only. 652 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 653 !CurDAG->isBaseWithConstantOffset(N)) { 654 if (N.getOpcode() == ISD::FrameIndex) { 655 // Match frame index. 656 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 657 Base = CurDAG->getTargetFrameIndex( 658 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 659 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 660 return true; 661 } 662 663 if (N.getOpcode() == ARMISD::Wrapper && 664 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 665 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 666 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 667 Base = N.getOperand(0); 668 } else 669 Base = N; 670 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 671 return true; 672 } 673 674 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 675 int RHSC = (int)RHS->getSExtValue(); 676 if (N.getOpcode() == ISD::SUB) 677 RHSC = -RHSC; 678 679 if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits 680 Base = N.getOperand(0); 681 if (Base.getOpcode() == ISD::FrameIndex) { 682 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 683 Base = CurDAG->getTargetFrameIndex( 684 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 685 } 686 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 687 return true; 688 } 689 } 690 691 // Base only. 692 Base = N; 693 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 694 return true; 695 } 696 697 698 699 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, 700 SDValue &Opc) { 701 if (N.getOpcode() == ISD::MUL && 702 ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) { 703 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 704 // X * [3,5,9] -> X + X * [2,4,8] etc. 705 int RHSC = (int)RHS->getZExtValue(); 706 if (RHSC & 1) { 707 RHSC = RHSC & ~1; 708 ARM_AM::AddrOpc AddSub = ARM_AM::add; 709 if (RHSC < 0) { 710 AddSub = ARM_AM::sub; 711 RHSC = - RHSC; 712 } 713 if (isPowerOf2_32(RHSC)) { 714 unsigned ShAmt = Log2_32(RHSC); 715 Base = Offset = N.getOperand(0); 716 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, 717 ARM_AM::lsl), 718 SDLoc(N), MVT::i32); 719 return true; 720 } 721 } 722 } 723 } 724 725 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 726 // ISD::OR that is equivalent to an ISD::ADD. 727 !CurDAG->isBaseWithConstantOffset(N)) 728 return false; 729 730 // Leave simple R +/- imm12 operands for LDRi12 731 if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) { 732 int RHSC; 733 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1, 734 -0x1000+1, 0x1000, RHSC)) // 12 bits. 735 return false; 736 } 737 738 // Otherwise this is R +/- [possibly shifted] R. 739 ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add; 740 ARM_AM::ShiftOpc ShOpcVal = 741 ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode()); 742 unsigned ShAmt = 0; 743 744 Base = N.getOperand(0); 745 Offset = N.getOperand(1); 746 747 if (ShOpcVal != ARM_AM::no_shift) { 748 // Check to see if the RHS of the shift is a constant, if not, we can't fold 749 // it. 750 if (ConstantSDNode *Sh = 751 dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) { 752 ShAmt = Sh->getZExtValue(); 753 if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt)) 754 Offset = N.getOperand(1).getOperand(0); 755 else { 756 ShAmt = 0; 757 ShOpcVal = ARM_AM::no_shift; 758 } 759 } else { 760 ShOpcVal = ARM_AM::no_shift; 761 } 762 } 763 764 // Try matching (R shl C) + (R). 765 if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift && 766 !(Subtarget->isLikeA9() || Subtarget->isSwift() || 767 N.getOperand(0).hasOneUse())) { 768 ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode()); 769 if (ShOpcVal != ARM_AM::no_shift) { 770 // Check to see if the RHS of the shift is a constant, if not, we can't 771 // fold it. 772 if (ConstantSDNode *Sh = 773 dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) { 774 ShAmt = Sh->getZExtValue(); 775 if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) { 776 Offset = N.getOperand(0).getOperand(0); 777 Base = N.getOperand(1); 778 } else { 779 ShAmt = 0; 780 ShOpcVal = ARM_AM::no_shift; 781 } 782 } else { 783 ShOpcVal = ARM_AM::no_shift; 784 } 785 } 786 } 787 788 // If Offset is a multiply-by-constant and it's profitable to extract a shift 789 // and use it in a shifted operand do so. 790 if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) { 791 unsigned PowerOfTwo = 0; 792 SDValue NewMulConst; 793 if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) { 794 HandleSDNode Handle(Offset); 795 replaceDAGValue(Offset.getOperand(1), NewMulConst); 796 Offset = Handle.getValue(); 797 ShAmt = PowerOfTwo; 798 ShOpcVal = ARM_AM::lsl; 799 } 800 } 801 802 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal), 803 SDLoc(N), MVT::i32); 804 return true; 805 } 806 807 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N, 808 SDValue &Offset, SDValue &Opc) { 809 unsigned Opcode = Op->getOpcode(); 810 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 811 ? cast<LoadSDNode>(Op)->getAddressingMode() 812 : cast<StoreSDNode>(Op)->getAddressingMode(); 813 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 814 ? ARM_AM::add : ARM_AM::sub; 815 int Val; 816 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) 817 return false; 818 819 Offset = N; 820 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 821 unsigned ShAmt = 0; 822 if (ShOpcVal != ARM_AM::no_shift) { 823 // Check to see if the RHS of the shift is a constant, if not, we can't fold 824 // it. 825 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 826 ShAmt = Sh->getZExtValue(); 827 if (isShifterOpProfitable(N, ShOpcVal, ShAmt)) 828 Offset = N.getOperand(0); 829 else { 830 ShAmt = 0; 831 ShOpcVal = ARM_AM::no_shift; 832 } 833 } else { 834 ShOpcVal = ARM_AM::no_shift; 835 } 836 } 837 838 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal), 839 SDLoc(N), MVT::i32); 840 return true; 841 } 842 843 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N, 844 SDValue &Offset, SDValue &Opc) { 845 unsigned Opcode = Op->getOpcode(); 846 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 847 ? cast<LoadSDNode>(Op)->getAddressingMode() 848 : cast<StoreSDNode>(Op)->getAddressingMode(); 849 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 850 ? ARM_AM::add : ARM_AM::sub; 851 int Val; 852 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits. 853 if (AddSub == ARM_AM::sub) Val *= -1; 854 Offset = CurDAG->getRegister(0, MVT::i32); 855 Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32); 856 return true; 857 } 858 859 return false; 860 } 861 862 863 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N, 864 SDValue &Offset, SDValue &Opc) { 865 unsigned Opcode = Op->getOpcode(); 866 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 867 ? cast<LoadSDNode>(Op)->getAddressingMode() 868 : cast<StoreSDNode>(Op)->getAddressingMode(); 869 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 870 ? ARM_AM::add : ARM_AM::sub; 871 int Val; 872 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits. 873 Offset = CurDAG->getRegister(0, MVT::i32); 874 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val, 875 ARM_AM::no_shift), 876 SDLoc(Op), MVT::i32); 877 return true; 878 } 879 880 return false; 881 } 882 883 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) { 884 Base = N; 885 return true; 886 } 887 888 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N, 889 SDValue &Base, SDValue &Offset, 890 SDValue &Opc) { 891 if (N.getOpcode() == ISD::SUB) { 892 // X - C is canonicalize to X + -C, no need to handle it here. 893 Base = N.getOperand(0); 894 Offset = N.getOperand(1); 895 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N), 896 MVT::i32); 897 return true; 898 } 899 900 if (!CurDAG->isBaseWithConstantOffset(N)) { 901 Base = N; 902 if (N.getOpcode() == ISD::FrameIndex) { 903 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 904 Base = CurDAG->getTargetFrameIndex( 905 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 906 } 907 Offset = CurDAG->getRegister(0, MVT::i32); 908 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N), 909 MVT::i32); 910 return true; 911 } 912 913 // If the RHS is +/- imm8, fold into addr mode. 914 int RHSC; 915 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1, 916 -256 + 1, 256, RHSC)) { // 8 bits. 917 Base = N.getOperand(0); 918 if (Base.getOpcode() == ISD::FrameIndex) { 919 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 920 Base = CurDAG->getTargetFrameIndex( 921 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 922 } 923 Offset = CurDAG->getRegister(0, MVT::i32); 924 925 ARM_AM::AddrOpc AddSub = ARM_AM::add; 926 if (RHSC < 0) { 927 AddSub = ARM_AM::sub; 928 RHSC = -RHSC; 929 } 930 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N), 931 MVT::i32); 932 return true; 933 } 934 935 Base = N.getOperand(0); 936 Offset = N.getOperand(1); 937 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N), 938 MVT::i32); 939 return true; 940 } 941 942 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N, 943 SDValue &Offset, SDValue &Opc) { 944 unsigned Opcode = Op->getOpcode(); 945 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 946 ? cast<LoadSDNode>(Op)->getAddressingMode() 947 : cast<StoreSDNode>(Op)->getAddressingMode(); 948 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 949 ? ARM_AM::add : ARM_AM::sub; 950 int Val; 951 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits. 952 Offset = CurDAG->getRegister(0, MVT::i32); 953 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op), 954 MVT::i32); 955 return true; 956 } 957 958 Offset = N; 959 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op), 960 MVT::i32); 961 return true; 962 } 963 964 bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, 965 bool FP16) { 966 if (!CurDAG->isBaseWithConstantOffset(N)) { 967 Base = N; 968 if (N.getOpcode() == ISD::FrameIndex) { 969 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 970 Base = CurDAG->getTargetFrameIndex( 971 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 972 } else if (N.getOpcode() == ARMISD::Wrapper && 973 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 974 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 975 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 976 Base = N.getOperand(0); 977 } 978 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0), 979 SDLoc(N), MVT::i32); 980 return true; 981 } 982 983 // If the RHS is +/- imm8, fold into addr mode. 984 int RHSC; 985 const int Scale = FP16 ? 2 : 4; 986 987 if (isScaledConstantInRange(N.getOperand(1), Scale, -255, 256, RHSC)) { 988 Base = N.getOperand(0); 989 if (Base.getOpcode() == ISD::FrameIndex) { 990 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 991 Base = CurDAG->getTargetFrameIndex( 992 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 993 } 994 995 ARM_AM::AddrOpc AddSub = ARM_AM::add; 996 if (RHSC < 0) { 997 AddSub = ARM_AM::sub; 998 RHSC = -RHSC; 999 } 1000 1001 if (FP16) 1002 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(AddSub, RHSC), 1003 SDLoc(N), MVT::i32); 1004 else 1005 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC), 1006 SDLoc(N), MVT::i32); 1007 1008 return true; 1009 } 1010 1011 Base = N; 1012 1013 if (FP16) 1014 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(ARM_AM::add, 0), 1015 SDLoc(N), MVT::i32); 1016 else 1017 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0), 1018 SDLoc(N), MVT::i32); 1019 1020 return true; 1021 } 1022 1023 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N, 1024 SDValue &Base, SDValue &Offset) { 1025 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ false); 1026 } 1027 1028 bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N, 1029 SDValue &Base, SDValue &Offset) { 1030 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ true); 1031 } 1032 1033 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr, 1034 SDValue &Align) { 1035 Addr = N; 1036 1037 unsigned Alignment = 0; 1038 1039 MemSDNode *MemN = cast<MemSDNode>(Parent); 1040 1041 if (isa<LSBaseSDNode>(MemN) || 1042 ((MemN->getOpcode() == ARMISD::VST1_UPD || 1043 MemN->getOpcode() == ARMISD::VLD1_UPD) && 1044 MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) { 1045 // This case occurs only for VLD1-lane/dup and VST1-lane instructions. 1046 // The maximum alignment is equal to the memory size being referenced. 1047 unsigned MMOAlign = MemN->getAlignment(); 1048 unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8; 1049 if (MMOAlign >= MemSize && MemSize > 1) 1050 Alignment = MemSize; 1051 } else { 1052 // All other uses of addrmode6 are for intrinsics. For now just record 1053 // the raw alignment value; it will be refined later based on the legal 1054 // alignment operands for the intrinsic. 1055 Alignment = MemN->getAlignment(); 1056 } 1057 1058 Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32); 1059 return true; 1060 } 1061 1062 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N, 1063 SDValue &Offset) { 1064 LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op); 1065 ISD::MemIndexedMode AM = LdSt->getAddressingMode(); 1066 if (AM != ISD::POST_INC) 1067 return false; 1068 Offset = N; 1069 if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) { 1070 if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits()) 1071 Offset = CurDAG->getRegister(0, MVT::i32); 1072 } 1073 return true; 1074 } 1075 1076 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N, 1077 SDValue &Offset, SDValue &Label) { 1078 if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) { 1079 Offset = N.getOperand(0); 1080 SDValue N1 = N.getOperand(1); 1081 Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(), 1082 SDLoc(N), MVT::i32); 1083 return true; 1084 } 1085 1086 return false; 1087 } 1088 1089 1090 //===----------------------------------------------------------------------===// 1091 // Thumb Addressing Modes 1092 //===----------------------------------------------------------------------===// 1093 1094 static bool shouldUseZeroOffsetLdSt(SDValue N) { 1095 // Negative numbers are difficult to materialise in thumb1. If we are 1096 // selecting the add of a negative, instead try to select ri with a zero 1097 // offset, so create the add node directly which will become a sub. 1098 if (N.getOpcode() != ISD::ADD) 1099 return false; 1100 1101 // Look for an imm which is not legal for ld/st, but is legal for sub. 1102 if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1))) 1103 return C->getSExtValue() < 0 && C->getSExtValue() >= -255; 1104 1105 return false; 1106 } 1107 1108 bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, 1109 SDValue &Offset) { 1110 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) { 1111 ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N); 1112 if (!NC || !NC->isNullValue()) 1113 return false; 1114 1115 Base = Offset = N; 1116 return true; 1117 } 1118 1119 Base = N.getOperand(0); 1120 Offset = N.getOperand(1); 1121 return true; 1122 } 1123 1124 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, SDValue &Base, 1125 SDValue &Offset) { 1126 if (shouldUseZeroOffsetLdSt(N)) 1127 return false; // Select ri instead 1128 return SelectThumbAddrModeRRSext(N, Base, Offset); 1129 } 1130 1131 bool 1132 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, 1133 SDValue &Base, SDValue &OffImm) { 1134 if (shouldUseZeroOffsetLdSt(N)) { 1135 Base = N; 1136 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1137 return true; 1138 } 1139 1140 if (!CurDAG->isBaseWithConstantOffset(N)) { 1141 if (N.getOpcode() == ISD::ADD) { 1142 return false; // We want to select register offset instead 1143 } else if (N.getOpcode() == ARMISD::Wrapper && 1144 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 1145 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 1146 N.getOperand(0).getOpcode() != ISD::TargetConstantPool && 1147 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 1148 Base = N.getOperand(0); 1149 } else { 1150 Base = N; 1151 } 1152 1153 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1154 return true; 1155 } 1156 1157 // If the RHS is + imm5 * scale, fold into addr mode. 1158 int RHSC; 1159 if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) { 1160 Base = N.getOperand(0); 1161 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1162 return true; 1163 } 1164 1165 // Offset is too large, so use register offset instead. 1166 return false; 1167 } 1168 1169 bool 1170 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base, 1171 SDValue &OffImm) { 1172 return SelectThumbAddrModeImm5S(N, 4, Base, OffImm); 1173 } 1174 1175 bool 1176 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base, 1177 SDValue &OffImm) { 1178 return SelectThumbAddrModeImm5S(N, 2, Base, OffImm); 1179 } 1180 1181 bool 1182 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base, 1183 SDValue &OffImm) { 1184 return SelectThumbAddrModeImm5S(N, 1, Base, OffImm); 1185 } 1186 1187 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N, 1188 SDValue &Base, SDValue &OffImm) { 1189 if (N.getOpcode() == ISD::FrameIndex) { 1190 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 1191 // Only multiples of 4 are allowed for the offset, so the frame object 1192 // alignment must be at least 4. 1193 MachineFrameInfo &MFI = MF->getFrameInfo(); 1194 if (MFI.getObjectAlign(FI) < Align(4)) 1195 MFI.setObjectAlignment(FI, Align(4)); 1196 Base = CurDAG->getTargetFrameIndex( 1197 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1198 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1199 return true; 1200 } 1201 1202 if (!CurDAG->isBaseWithConstantOffset(N)) 1203 return false; 1204 1205 if (N.getOperand(0).getOpcode() == ISD::FrameIndex) { 1206 // If the RHS is + imm8 * scale, fold into addr mode. 1207 int RHSC; 1208 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) { 1209 Base = N.getOperand(0); 1210 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1211 // Make sure the offset is inside the object, or we might fail to 1212 // allocate an emergency spill slot. (An out-of-range access is UB, but 1213 // it could show up anyway.) 1214 MachineFrameInfo &MFI = MF->getFrameInfo(); 1215 if (RHSC * 4 < MFI.getObjectSize(FI)) { 1216 // For LHS+RHS to result in an offset that's a multiple of 4 the object 1217 // indexed by the LHS must be 4-byte aligned. 1218 if (!MFI.isFixedObjectIndex(FI) && MFI.getObjectAlign(FI) < Align(4)) 1219 MFI.setObjectAlignment(FI, Align(4)); 1220 if (MFI.getObjectAlign(FI) >= Align(4)) { 1221 Base = CurDAG->getTargetFrameIndex( 1222 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1223 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1224 return true; 1225 } 1226 } 1227 } 1228 } 1229 1230 return false; 1231 } 1232 1233 template <unsigned Shift> 1234 bool ARMDAGToDAGISel::SelectTAddrModeImm7(SDValue N, SDValue &Base, 1235 SDValue &OffImm) { 1236 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) { 1237 int RHSC; 1238 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80, 1239 RHSC)) { 1240 Base = N.getOperand(0); 1241 if (N.getOpcode() == ISD::SUB) 1242 RHSC = -RHSC; 1243 OffImm = 1244 CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32); 1245 return true; 1246 } 1247 } 1248 1249 // Base only. 1250 Base = N; 1251 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1252 return true; 1253 } 1254 1255 1256 //===----------------------------------------------------------------------===// 1257 // Thumb 2 Addressing Modes 1258 //===----------------------------------------------------------------------===// 1259 1260 1261 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N, 1262 SDValue &Base, SDValue &OffImm) { 1263 // Match simple R + imm12 operands. 1264 1265 // Base only. 1266 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 1267 !CurDAG->isBaseWithConstantOffset(N)) { 1268 if (N.getOpcode() == ISD::FrameIndex) { 1269 // Match frame index. 1270 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 1271 Base = CurDAG->getTargetFrameIndex( 1272 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1273 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1274 return true; 1275 } 1276 1277 if (N.getOpcode() == ARMISD::Wrapper && 1278 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 1279 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 1280 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 1281 Base = N.getOperand(0); 1282 if (Base.getOpcode() == ISD::TargetConstantPool) 1283 return false; // We want to select t2LDRpci instead. 1284 } else 1285 Base = N; 1286 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1287 return true; 1288 } 1289 1290 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1291 if (SelectT2AddrModeImm8(N, Base, OffImm)) 1292 // Let t2LDRi8 handle (R - imm8). 1293 return false; 1294 1295 int RHSC = (int)RHS->getZExtValue(); 1296 if (N.getOpcode() == ISD::SUB) 1297 RHSC = -RHSC; 1298 1299 if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned) 1300 Base = N.getOperand(0); 1301 if (Base.getOpcode() == ISD::FrameIndex) { 1302 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1303 Base = CurDAG->getTargetFrameIndex( 1304 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1305 } 1306 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1307 return true; 1308 } 1309 } 1310 1311 // Base only. 1312 Base = N; 1313 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1314 return true; 1315 } 1316 1317 template <unsigned Shift> 1318 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, SDValue &Base, 1319 SDValue &OffImm) { 1320 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) { 1321 int RHSC; 1322 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -255, 256, RHSC)) { 1323 Base = N.getOperand(0); 1324 if (Base.getOpcode() == ISD::FrameIndex) { 1325 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1326 Base = CurDAG->getTargetFrameIndex( 1327 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1328 } 1329 1330 if (N.getOpcode() == ISD::SUB) 1331 RHSC = -RHSC; 1332 OffImm = 1333 CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32); 1334 return true; 1335 } 1336 } 1337 1338 // Base only. 1339 Base = N; 1340 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1341 return true; 1342 } 1343 1344 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, 1345 SDValue &Base, SDValue &OffImm) { 1346 // Match simple R - imm8 operands. 1347 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 1348 !CurDAG->isBaseWithConstantOffset(N)) 1349 return false; 1350 1351 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1352 int RHSC = (int)RHS->getSExtValue(); 1353 if (N.getOpcode() == ISD::SUB) 1354 RHSC = -RHSC; 1355 1356 if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative) 1357 Base = N.getOperand(0); 1358 if (Base.getOpcode() == ISD::FrameIndex) { 1359 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1360 Base = CurDAG->getTargetFrameIndex( 1361 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1362 } 1363 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1364 return true; 1365 } 1366 } 1367 1368 return false; 1369 } 1370 1371 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N, 1372 SDValue &OffImm){ 1373 unsigned Opcode = Op->getOpcode(); 1374 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 1375 ? cast<LoadSDNode>(Op)->getAddressingMode() 1376 : cast<StoreSDNode>(Op)->getAddressingMode(); 1377 int RHSC; 1378 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits. 1379 OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC)) 1380 ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32) 1381 : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32); 1382 return true; 1383 } 1384 1385 return false; 1386 } 1387 1388 template <unsigned Shift> 1389 bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N, SDValue &Base, 1390 SDValue &OffImm) { 1391 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) { 1392 int RHSC; 1393 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80, 1394 RHSC)) { 1395 Base = N.getOperand(0); 1396 if (Base.getOpcode() == ISD::FrameIndex) { 1397 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1398 Base = CurDAG->getTargetFrameIndex( 1399 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1400 } 1401 1402 if (N.getOpcode() == ISD::SUB) 1403 RHSC = -RHSC; 1404 OffImm = 1405 CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32); 1406 return true; 1407 } 1408 } 1409 1410 // Base only. 1411 Base = N; 1412 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1413 return true; 1414 } 1415 1416 template <unsigned Shift> 1417 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, 1418 SDValue &OffImm) { 1419 return SelectT2AddrModeImm7Offset(Op, N, OffImm, Shift); 1420 } 1421 1422 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, 1423 SDValue &OffImm, 1424 unsigned Shift) { 1425 unsigned Opcode = Op->getOpcode(); 1426 ISD::MemIndexedMode AM; 1427 switch (Opcode) { 1428 case ISD::LOAD: 1429 AM = cast<LoadSDNode>(Op)->getAddressingMode(); 1430 break; 1431 case ISD::STORE: 1432 AM = cast<StoreSDNode>(Op)->getAddressingMode(); 1433 break; 1434 case ISD::MLOAD: 1435 AM = cast<MaskedLoadSDNode>(Op)->getAddressingMode(); 1436 break; 1437 case ISD::MSTORE: 1438 AM = cast<MaskedStoreSDNode>(Op)->getAddressingMode(); 1439 break; 1440 default: 1441 llvm_unreachable("Unexpected Opcode for Imm7Offset"); 1442 } 1443 1444 int RHSC; 1445 // 7 bit constant, shifted by Shift. 1446 if (isScaledConstantInRange(N, 1 << Shift, 0, 0x80, RHSC)) { 1447 OffImm = 1448 ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC)) 1449 ? CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32) 1450 : CurDAG->getTargetConstant(-RHSC * (1 << Shift), SDLoc(N), 1451 MVT::i32); 1452 return true; 1453 } 1454 return false; 1455 } 1456 1457 template <int Min, int Max> 1458 bool ARMDAGToDAGISel::SelectImmediateInRange(SDValue N, SDValue &OffImm) { 1459 int Val; 1460 if (isScaledConstantInRange(N, 1, Min, Max, Val)) { 1461 OffImm = CurDAG->getTargetConstant(Val, SDLoc(N), MVT::i32); 1462 return true; 1463 } 1464 return false; 1465 } 1466 1467 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N, 1468 SDValue &Base, 1469 SDValue &OffReg, SDValue &ShImm) { 1470 // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12. 1471 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) 1472 return false; 1473 1474 // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8. 1475 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1476 int RHSC = (int)RHS->getZExtValue(); 1477 if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned) 1478 return false; 1479 else if (RHSC < 0 && RHSC >= -255) // 8 bits 1480 return false; 1481 } 1482 1483 // Look for (R + R) or (R + (R << [1,2,3])). 1484 unsigned ShAmt = 0; 1485 Base = N.getOperand(0); 1486 OffReg = N.getOperand(1); 1487 1488 // Swap if it is ((R << c) + R). 1489 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode()); 1490 if (ShOpcVal != ARM_AM::lsl) { 1491 ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode()); 1492 if (ShOpcVal == ARM_AM::lsl) 1493 std::swap(Base, OffReg); 1494 } 1495 1496 if (ShOpcVal == ARM_AM::lsl) { 1497 // Check to see if the RHS of the shift is a constant, if not, we can't fold 1498 // it. 1499 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) { 1500 ShAmt = Sh->getZExtValue(); 1501 if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt)) 1502 OffReg = OffReg.getOperand(0); 1503 else { 1504 ShAmt = 0; 1505 } 1506 } 1507 } 1508 1509 // If OffReg is a multiply-by-constant and it's profitable to extract a shift 1510 // and use it in a shifted operand do so. 1511 if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) { 1512 unsigned PowerOfTwo = 0; 1513 SDValue NewMulConst; 1514 if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) { 1515 HandleSDNode Handle(OffReg); 1516 replaceDAGValue(OffReg.getOperand(1), NewMulConst); 1517 OffReg = Handle.getValue(); 1518 ShAmt = PowerOfTwo; 1519 } 1520 } 1521 1522 ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32); 1523 1524 return true; 1525 } 1526 1527 bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base, 1528 SDValue &OffImm) { 1529 // This *must* succeed since it's used for the irreplaceable ldrex and strex 1530 // instructions. 1531 Base = N; 1532 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1533 1534 if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N)) 1535 return true; 1536 1537 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 1538 if (!RHS) 1539 return true; 1540 1541 uint32_t RHSC = (int)RHS->getZExtValue(); 1542 if (RHSC > 1020 || RHSC % 4 != 0) 1543 return true; 1544 1545 Base = N.getOperand(0); 1546 if (Base.getOpcode() == ISD::FrameIndex) { 1547 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1548 Base = CurDAG->getTargetFrameIndex( 1549 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1550 } 1551 1552 OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32); 1553 return true; 1554 } 1555 1556 //===--------------------------------------------------------------------===// 1557 1558 /// getAL - Returns a ARMCC::AL immediate node. 1559 static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) { 1560 return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32); 1561 } 1562 1563 void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) { 1564 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); 1565 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp}); 1566 } 1567 1568 bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) { 1569 LoadSDNode *LD = cast<LoadSDNode>(N); 1570 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1571 if (AM == ISD::UNINDEXED) 1572 return false; 1573 1574 EVT LoadedVT = LD->getMemoryVT(); 1575 SDValue Offset, AMOpc; 1576 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1577 unsigned Opcode = 0; 1578 bool Match = false; 1579 if (LoadedVT == MVT::i32 && isPre && 1580 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) { 1581 Opcode = ARM::LDR_PRE_IMM; 1582 Match = true; 1583 } else if (LoadedVT == MVT::i32 && !isPre && 1584 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) { 1585 Opcode = ARM::LDR_POST_IMM; 1586 Match = true; 1587 } else if (LoadedVT == MVT::i32 && 1588 SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) { 1589 Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG; 1590 Match = true; 1591 1592 } else if (LoadedVT == MVT::i16 && 1593 SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) { 1594 Match = true; 1595 Opcode = (LD->getExtensionType() == ISD::SEXTLOAD) 1596 ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST) 1597 : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST); 1598 } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) { 1599 if (LD->getExtensionType() == ISD::SEXTLOAD) { 1600 if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) { 1601 Match = true; 1602 Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST; 1603 } 1604 } else { 1605 if (isPre && 1606 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) { 1607 Match = true; 1608 Opcode = ARM::LDRB_PRE_IMM; 1609 } else if (!isPre && 1610 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) { 1611 Match = true; 1612 Opcode = ARM::LDRB_POST_IMM; 1613 } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) { 1614 Match = true; 1615 Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG; 1616 } 1617 } 1618 } 1619 1620 if (Match) { 1621 if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) { 1622 SDValue Chain = LD->getChain(); 1623 SDValue Base = LD->getBasePtr(); 1624 SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)), 1625 CurDAG->getRegister(0, MVT::i32), Chain }; 1626 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, 1627 MVT::Other, Ops); 1628 transferMemOperands(N, New); 1629 ReplaceNode(N, New); 1630 return true; 1631 } else { 1632 SDValue Chain = LD->getChain(); 1633 SDValue Base = LD->getBasePtr(); 1634 SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)), 1635 CurDAG->getRegister(0, MVT::i32), Chain }; 1636 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, 1637 MVT::Other, Ops); 1638 transferMemOperands(N, New); 1639 ReplaceNode(N, New); 1640 return true; 1641 } 1642 } 1643 1644 return false; 1645 } 1646 1647 bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) { 1648 LoadSDNode *LD = cast<LoadSDNode>(N); 1649 EVT LoadedVT = LD->getMemoryVT(); 1650 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1651 if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD || 1652 LoadedVT.getSimpleVT().SimpleTy != MVT::i32) 1653 return false; 1654 1655 auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset()); 1656 if (!COffs || COffs->getZExtValue() != 4) 1657 return false; 1658 1659 // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}. 1660 // The encoding of LDM is not how the rest of ISel expects a post-inc load to 1661 // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after 1662 // ISel. 1663 SDValue Chain = LD->getChain(); 1664 SDValue Base = LD->getBasePtr(); 1665 SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)), 1666 CurDAG->getRegister(0, MVT::i32), Chain }; 1667 SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32, 1668 MVT::i32, MVT::Other, Ops); 1669 transferMemOperands(N, New); 1670 ReplaceNode(N, New); 1671 return true; 1672 } 1673 1674 bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) { 1675 LoadSDNode *LD = cast<LoadSDNode>(N); 1676 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1677 if (AM == ISD::UNINDEXED) 1678 return false; 1679 1680 EVT LoadedVT = LD->getMemoryVT(); 1681 bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; 1682 SDValue Offset; 1683 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1684 unsigned Opcode = 0; 1685 bool Match = false; 1686 if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) { 1687 switch (LoadedVT.getSimpleVT().SimpleTy) { 1688 case MVT::i32: 1689 Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST; 1690 break; 1691 case MVT::i16: 1692 if (isSExtLd) 1693 Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST; 1694 else 1695 Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST; 1696 break; 1697 case MVT::i8: 1698 case MVT::i1: 1699 if (isSExtLd) 1700 Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST; 1701 else 1702 Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST; 1703 break; 1704 default: 1705 return false; 1706 } 1707 Match = true; 1708 } 1709 1710 if (Match) { 1711 SDValue Chain = LD->getChain(); 1712 SDValue Base = LD->getBasePtr(); 1713 SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)), 1714 CurDAG->getRegister(0, MVT::i32), Chain }; 1715 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, 1716 MVT::Other, Ops); 1717 transferMemOperands(N, New); 1718 ReplaceNode(N, New); 1719 return true; 1720 } 1721 1722 return false; 1723 } 1724 1725 bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) { 1726 EVT LoadedVT; 1727 unsigned Opcode = 0; 1728 bool isSExtLd, isPre; 1729 Align Alignment; 1730 ARMVCC::VPTCodes Pred; 1731 SDValue PredReg; 1732 SDValue Chain, Base, Offset; 1733 1734 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 1735 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1736 if (AM == ISD::UNINDEXED) 1737 return false; 1738 LoadedVT = LD->getMemoryVT(); 1739 if (!LoadedVT.isVector()) 1740 return false; 1741 1742 Chain = LD->getChain(); 1743 Base = LD->getBasePtr(); 1744 Offset = LD->getOffset(); 1745 Alignment = LD->getAlign(); 1746 isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; 1747 isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1748 Pred = ARMVCC::None; 1749 PredReg = CurDAG->getRegister(0, MVT::i32); 1750 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) { 1751 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1752 if (AM == ISD::UNINDEXED) 1753 return false; 1754 LoadedVT = LD->getMemoryVT(); 1755 if (!LoadedVT.isVector()) 1756 return false; 1757 1758 Chain = LD->getChain(); 1759 Base = LD->getBasePtr(); 1760 Offset = LD->getOffset(); 1761 Alignment = LD->getAlign(); 1762 isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; 1763 isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1764 Pred = ARMVCC::Then; 1765 PredReg = LD->getMask(); 1766 } else 1767 llvm_unreachable("Expected a Load or a Masked Load!"); 1768 1769 // We allow LE non-masked loads to change the type (for example use a vldrb.8 1770 // as opposed to a vldrw.32). This can allow extra addressing modes or 1771 // alignments for what is otherwise an equivalent instruction. 1772 bool CanChangeType = Subtarget->isLittle() && !isa<MaskedLoadSDNode>(N); 1773 1774 SDValue NewOffset; 1775 if (Alignment >= Align(2) && LoadedVT == MVT::v4i16 && 1776 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1)) { 1777 if (isSExtLd) 1778 Opcode = isPre ? ARM::MVE_VLDRHS32_pre : ARM::MVE_VLDRHS32_post; 1779 else 1780 Opcode = isPre ? ARM::MVE_VLDRHU32_pre : ARM::MVE_VLDRHU32_post; 1781 } else if (LoadedVT == MVT::v8i8 && 1782 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) { 1783 if (isSExtLd) 1784 Opcode = isPre ? ARM::MVE_VLDRBS16_pre : ARM::MVE_VLDRBS16_post; 1785 else 1786 Opcode = isPre ? ARM::MVE_VLDRBU16_pre : ARM::MVE_VLDRBU16_post; 1787 } else if (LoadedVT == MVT::v4i8 && 1788 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) { 1789 if (isSExtLd) 1790 Opcode = isPre ? ARM::MVE_VLDRBS32_pre : ARM::MVE_VLDRBS32_post; 1791 else 1792 Opcode = isPre ? ARM::MVE_VLDRBU32_pre : ARM::MVE_VLDRBU32_post; 1793 } else if (Alignment >= Align(4) && 1794 (CanChangeType || LoadedVT == MVT::v4i32 || 1795 LoadedVT == MVT::v4f32) && 1796 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 2)) 1797 Opcode = isPre ? ARM::MVE_VLDRWU32_pre : ARM::MVE_VLDRWU32_post; 1798 else if (Alignment >= Align(2) && 1799 (CanChangeType || LoadedVT == MVT::v8i16 || 1800 LoadedVT == MVT::v8f16) && 1801 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1)) 1802 Opcode = isPre ? ARM::MVE_VLDRHU16_pre : ARM::MVE_VLDRHU16_post; 1803 else if ((CanChangeType || LoadedVT == MVT::v16i8) && 1804 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) 1805 Opcode = isPre ? ARM::MVE_VLDRBU8_pre : ARM::MVE_VLDRBU8_post; 1806 else 1807 return false; 1808 1809 SDValue Ops[] = {Base, NewOffset, 1810 CurDAG->getTargetConstant(Pred, SDLoc(N), MVT::i32), PredReg, 1811 Chain}; 1812 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, 1813 N->getValueType(0), MVT::Other, Ops); 1814 transferMemOperands(N, New); 1815 ReplaceUses(SDValue(N, 0), SDValue(New, 1)); 1816 ReplaceUses(SDValue(N, 1), SDValue(New, 0)); 1817 ReplaceUses(SDValue(N, 2), SDValue(New, 2)); 1818 CurDAG->RemoveDeadNode(N); 1819 return true; 1820 } 1821 1822 /// Form a GPRPair pseudo register from a pair of GPR regs. 1823 SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) { 1824 SDLoc dl(V0.getNode()); 1825 SDValue RegClass = 1826 CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32); 1827 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32); 1828 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32); 1829 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1830 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1831 } 1832 1833 /// Form a D register from a pair of S registers. 1834 SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1835 SDLoc dl(V0.getNode()); 1836 SDValue RegClass = 1837 CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32); 1838 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32); 1839 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32); 1840 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1841 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1842 } 1843 1844 /// Form a quad register from a pair of D registers. 1845 SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1846 SDLoc dl(V0.getNode()); 1847 SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl, 1848 MVT::i32); 1849 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32); 1850 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32); 1851 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1852 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1853 } 1854 1855 /// Form 4 consecutive D registers from a pair of Q registers. 1856 SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1857 SDLoc dl(V0.getNode()); 1858 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl, 1859 MVT::i32); 1860 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32); 1861 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32); 1862 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1863 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1864 } 1865 1866 /// Form 4 consecutive S registers. 1867 SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, 1868 SDValue V2, SDValue V3) { 1869 SDLoc dl(V0.getNode()); 1870 SDValue RegClass = 1871 CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32); 1872 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32); 1873 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32); 1874 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32); 1875 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32); 1876 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1877 V2, SubReg2, V3, SubReg3 }; 1878 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1879 } 1880 1881 /// Form 4 consecutive D registers. 1882 SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, 1883 SDValue V2, SDValue V3) { 1884 SDLoc dl(V0.getNode()); 1885 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl, 1886 MVT::i32); 1887 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32); 1888 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32); 1889 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32); 1890 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32); 1891 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1892 V2, SubReg2, V3, SubReg3 }; 1893 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1894 } 1895 1896 /// Form 4 consecutive Q registers. 1897 SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, 1898 SDValue V2, SDValue V3) { 1899 SDLoc dl(V0.getNode()); 1900 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl, 1901 MVT::i32); 1902 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32); 1903 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32); 1904 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32); 1905 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32); 1906 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1907 V2, SubReg2, V3, SubReg3 }; 1908 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1909 } 1910 1911 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand 1912 /// of a NEON VLD or VST instruction. The supported values depend on the 1913 /// number of registers being loaded. 1914 SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl, 1915 unsigned NumVecs, bool is64BitVector) { 1916 unsigned NumRegs = NumVecs; 1917 if (!is64BitVector && NumVecs < 3) 1918 NumRegs *= 2; 1919 1920 unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue(); 1921 if (Alignment >= 32 && NumRegs == 4) 1922 Alignment = 32; 1923 else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4)) 1924 Alignment = 16; 1925 else if (Alignment >= 8) 1926 Alignment = 8; 1927 else 1928 Alignment = 0; 1929 1930 return CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 1931 } 1932 1933 static bool isVLDfixed(unsigned Opc) 1934 { 1935 switch (Opc) { 1936 default: return false; 1937 case ARM::VLD1d8wb_fixed : return true; 1938 case ARM::VLD1d16wb_fixed : return true; 1939 case ARM::VLD1d64Qwb_fixed : return true; 1940 case ARM::VLD1d32wb_fixed : return true; 1941 case ARM::VLD1d64wb_fixed : return true; 1942 case ARM::VLD1d64TPseudoWB_fixed : return true; 1943 case ARM::VLD1d64QPseudoWB_fixed : return true; 1944 case ARM::VLD1q8wb_fixed : return true; 1945 case ARM::VLD1q16wb_fixed : return true; 1946 case ARM::VLD1q32wb_fixed : return true; 1947 case ARM::VLD1q64wb_fixed : return true; 1948 case ARM::VLD1DUPd8wb_fixed : return true; 1949 case ARM::VLD1DUPd16wb_fixed : return true; 1950 case ARM::VLD1DUPd32wb_fixed : return true; 1951 case ARM::VLD1DUPq8wb_fixed : return true; 1952 case ARM::VLD1DUPq16wb_fixed : return true; 1953 case ARM::VLD1DUPq32wb_fixed : return true; 1954 case ARM::VLD2d8wb_fixed : return true; 1955 case ARM::VLD2d16wb_fixed : return true; 1956 case ARM::VLD2d32wb_fixed : return true; 1957 case ARM::VLD2q8PseudoWB_fixed : return true; 1958 case ARM::VLD2q16PseudoWB_fixed : return true; 1959 case ARM::VLD2q32PseudoWB_fixed : return true; 1960 case ARM::VLD2DUPd8wb_fixed : return true; 1961 case ARM::VLD2DUPd16wb_fixed : return true; 1962 case ARM::VLD2DUPd32wb_fixed : return true; 1963 } 1964 } 1965 1966 static bool isVSTfixed(unsigned Opc) 1967 { 1968 switch (Opc) { 1969 default: return false; 1970 case ARM::VST1d8wb_fixed : return true; 1971 case ARM::VST1d16wb_fixed : return true; 1972 case ARM::VST1d32wb_fixed : return true; 1973 case ARM::VST1d64wb_fixed : return true; 1974 case ARM::VST1q8wb_fixed : return true; 1975 case ARM::VST1q16wb_fixed : return true; 1976 case ARM::VST1q32wb_fixed : return true; 1977 case ARM::VST1q64wb_fixed : return true; 1978 case ARM::VST1d64TPseudoWB_fixed : return true; 1979 case ARM::VST1d64QPseudoWB_fixed : return true; 1980 case ARM::VST2d8wb_fixed : return true; 1981 case ARM::VST2d16wb_fixed : return true; 1982 case ARM::VST2d32wb_fixed : return true; 1983 case ARM::VST2q8PseudoWB_fixed : return true; 1984 case ARM::VST2q16PseudoWB_fixed : return true; 1985 case ARM::VST2q32PseudoWB_fixed : return true; 1986 } 1987 } 1988 1989 // Get the register stride update opcode of a VLD/VST instruction that 1990 // is otherwise equivalent to the given fixed stride updating instruction. 1991 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) { 1992 assert((isVLDfixed(Opc) || isVSTfixed(Opc)) 1993 && "Incorrect fixed stride updating instruction."); 1994 switch (Opc) { 1995 default: break; 1996 case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register; 1997 case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register; 1998 case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register; 1999 case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register; 2000 case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register; 2001 case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register; 2002 case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register; 2003 case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register; 2004 case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register; 2005 case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register; 2006 case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register; 2007 case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register; 2008 case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register; 2009 case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register; 2010 case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register; 2011 case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register; 2012 case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register; 2013 case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register; 2014 2015 case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register; 2016 case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register; 2017 case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register; 2018 case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register; 2019 case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register; 2020 case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register; 2021 case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register; 2022 case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register; 2023 case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register; 2024 case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register; 2025 2026 case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register; 2027 case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register; 2028 case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register; 2029 case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register; 2030 case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register; 2031 case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register; 2032 2033 case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register; 2034 case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register; 2035 case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register; 2036 case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register; 2037 case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register; 2038 case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register; 2039 2040 case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register; 2041 case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register; 2042 case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register; 2043 } 2044 return Opc; // If not one we handle, return it unchanged. 2045 } 2046 2047 /// Returns true if the given increment is a Constant known to be equal to the 2048 /// access size performed by a NEON load/store. This means the "[rN]!" form can 2049 /// be used. 2050 static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) { 2051 auto C = dyn_cast<ConstantSDNode>(Inc); 2052 return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs; 2053 } 2054 2055 void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, 2056 const uint16_t *DOpcodes, 2057 const uint16_t *QOpcodes0, 2058 const uint16_t *QOpcodes1) { 2059 assert(Subtarget->hasNEON()); 2060 assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range"); 2061 SDLoc dl(N); 2062 2063 SDValue MemAddr, Align; 2064 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating 2065 // nodes are not intrinsics. 2066 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2067 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2068 return; 2069 2070 SDValue Chain = N->getOperand(0); 2071 EVT VT = N->getValueType(0); 2072 bool is64BitVector = VT.is64BitVector(); 2073 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector); 2074 2075 unsigned OpcodeIndex; 2076 switch (VT.getSimpleVT().SimpleTy) { 2077 default: llvm_unreachable("unhandled vld type"); 2078 // Double-register operations: 2079 case MVT::v8i8: OpcodeIndex = 0; break; 2080 case MVT::v4f16: 2081 case MVT::v4bf16: 2082 case MVT::v4i16: OpcodeIndex = 1; break; 2083 case MVT::v2f32: 2084 case MVT::v2i32: OpcodeIndex = 2; break; 2085 case MVT::v1i64: OpcodeIndex = 3; break; 2086 // Quad-register operations: 2087 case MVT::v16i8: OpcodeIndex = 0; break; 2088 case MVT::v8f16: 2089 case MVT::v8bf16: 2090 case MVT::v8i16: OpcodeIndex = 1; break; 2091 case MVT::v4f32: 2092 case MVT::v4i32: OpcodeIndex = 2; break; 2093 case MVT::v2f64: 2094 case MVT::v2i64: OpcodeIndex = 3; break; 2095 } 2096 2097 EVT ResTy; 2098 if (NumVecs == 1) 2099 ResTy = VT; 2100 else { 2101 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 2102 if (!is64BitVector) 2103 ResTyElts *= 2; 2104 ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts); 2105 } 2106 std::vector<EVT> ResTys; 2107 ResTys.push_back(ResTy); 2108 if (isUpdating) 2109 ResTys.push_back(MVT::i32); 2110 ResTys.push_back(MVT::Other); 2111 2112 SDValue Pred = getAL(CurDAG, dl); 2113 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2114 SDNode *VLd; 2115 SmallVector<SDValue, 7> Ops; 2116 2117 // Double registers and VLD1/VLD2 quad registers are directly supported. 2118 if (is64BitVector || NumVecs <= 2) { 2119 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 2120 QOpcodes0[OpcodeIndex]); 2121 Ops.push_back(MemAddr); 2122 Ops.push_back(Align); 2123 if (isUpdating) { 2124 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2125 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs); 2126 if (!IsImmUpdate) { 2127 // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so 2128 // check for the opcode rather than the number of vector elements. 2129 if (isVLDfixed(Opc)) 2130 Opc = getVLDSTRegisterUpdateOpcode(Opc); 2131 Ops.push_back(Inc); 2132 // VLD1/VLD2 fixed increment does not need Reg0 so only include it in 2133 // the operands if not such an opcode. 2134 } else if (!isVLDfixed(Opc)) 2135 Ops.push_back(Reg0); 2136 } 2137 Ops.push_back(Pred); 2138 Ops.push_back(Reg0); 2139 Ops.push_back(Chain); 2140 VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2141 2142 } else { 2143 // Otherwise, quad registers are loaded with two separate instructions, 2144 // where one loads the even registers and the other loads the odd registers. 2145 EVT AddrTy = MemAddr.getValueType(); 2146 2147 // Load the even subregs. This is always an updating load, so that it 2148 // provides the address to the second load for the odd subregs. 2149 SDValue ImplDef = 2150 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0); 2151 const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain }; 2152 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, 2153 ResTy, AddrTy, MVT::Other, OpsA); 2154 Chain = SDValue(VLdA, 2); 2155 2156 // Load the odd subregs. 2157 Ops.push_back(SDValue(VLdA, 1)); 2158 Ops.push_back(Align); 2159 if (isUpdating) { 2160 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2161 assert(isa<ConstantSDNode>(Inc.getNode()) && 2162 "only constant post-increment update allowed for VLD3/4"); 2163 (void)Inc; 2164 Ops.push_back(Reg0); 2165 } 2166 Ops.push_back(SDValue(VLdA, 0)); 2167 Ops.push_back(Pred); 2168 Ops.push_back(Reg0); 2169 Ops.push_back(Chain); 2170 VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops); 2171 } 2172 2173 // Transfer memoperands. 2174 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2175 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLd), {MemOp}); 2176 2177 if (NumVecs == 1) { 2178 ReplaceNode(N, VLd); 2179 return; 2180 } 2181 2182 // Extract out the subregisters. 2183 SDValue SuperReg = SDValue(VLd, 0); 2184 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 && 2185 ARM::qsub_3 == ARM::qsub_0 + 3, 2186 "Unexpected subreg numbering"); 2187 unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0); 2188 for (unsigned Vec = 0; Vec < NumVecs; ++Vec) 2189 ReplaceUses(SDValue(N, Vec), 2190 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg)); 2191 ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1)); 2192 if (isUpdating) 2193 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2)); 2194 CurDAG->RemoveDeadNode(N); 2195 } 2196 2197 void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, 2198 const uint16_t *DOpcodes, 2199 const uint16_t *QOpcodes0, 2200 const uint16_t *QOpcodes1) { 2201 assert(Subtarget->hasNEON()); 2202 assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range"); 2203 SDLoc dl(N); 2204 2205 SDValue MemAddr, Align; 2206 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating 2207 // nodes are not intrinsics. 2208 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2209 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1) 2210 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2211 return; 2212 2213 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2214 2215 SDValue Chain = N->getOperand(0); 2216 EVT VT = N->getOperand(Vec0Idx).getValueType(); 2217 bool is64BitVector = VT.is64BitVector(); 2218 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector); 2219 2220 unsigned OpcodeIndex; 2221 switch (VT.getSimpleVT().SimpleTy) { 2222 default: llvm_unreachable("unhandled vst type"); 2223 // Double-register operations: 2224 case MVT::v8i8: OpcodeIndex = 0; break; 2225 case MVT::v4f16: 2226 case MVT::v4bf16: 2227 case MVT::v4i16: OpcodeIndex = 1; break; 2228 case MVT::v2f32: 2229 case MVT::v2i32: OpcodeIndex = 2; break; 2230 case MVT::v1i64: OpcodeIndex = 3; break; 2231 // Quad-register operations: 2232 case MVT::v16i8: OpcodeIndex = 0; break; 2233 case MVT::v8f16: 2234 case MVT::v8bf16: 2235 case MVT::v8i16: OpcodeIndex = 1; break; 2236 case MVT::v4f32: 2237 case MVT::v4i32: OpcodeIndex = 2; break; 2238 case MVT::v2f64: 2239 case MVT::v2i64: OpcodeIndex = 3; break; 2240 } 2241 2242 std::vector<EVT> ResTys; 2243 if (isUpdating) 2244 ResTys.push_back(MVT::i32); 2245 ResTys.push_back(MVT::Other); 2246 2247 SDValue Pred = getAL(CurDAG, dl); 2248 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2249 SmallVector<SDValue, 7> Ops; 2250 2251 // Double registers and VST1/VST2 quad registers are directly supported. 2252 if (is64BitVector || NumVecs <= 2) { 2253 SDValue SrcReg; 2254 if (NumVecs == 1) { 2255 SrcReg = N->getOperand(Vec0Idx); 2256 } else if (is64BitVector) { 2257 // Form a REG_SEQUENCE to force register allocation. 2258 SDValue V0 = N->getOperand(Vec0Idx + 0); 2259 SDValue V1 = N->getOperand(Vec0Idx + 1); 2260 if (NumVecs == 2) 2261 SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0); 2262 else { 2263 SDValue V2 = N->getOperand(Vec0Idx + 2); 2264 // If it's a vst3, form a quad D-register and leave the last part as 2265 // an undef. 2266 SDValue V3 = (NumVecs == 3) 2267 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) 2268 : N->getOperand(Vec0Idx + 3); 2269 SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0); 2270 } 2271 } else { 2272 // Form a QQ register. 2273 SDValue Q0 = N->getOperand(Vec0Idx); 2274 SDValue Q1 = N->getOperand(Vec0Idx + 1); 2275 SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0); 2276 } 2277 2278 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 2279 QOpcodes0[OpcodeIndex]); 2280 Ops.push_back(MemAddr); 2281 Ops.push_back(Align); 2282 if (isUpdating) { 2283 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2284 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs); 2285 if (!IsImmUpdate) { 2286 // We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so 2287 // check for the opcode rather than the number of vector elements. 2288 if (isVSTfixed(Opc)) 2289 Opc = getVLDSTRegisterUpdateOpcode(Opc); 2290 Ops.push_back(Inc); 2291 } 2292 // VST1/VST2 fixed increment does not need Reg0 so only include it in 2293 // the operands if not such an opcode. 2294 else if (!isVSTfixed(Opc)) 2295 Ops.push_back(Reg0); 2296 } 2297 Ops.push_back(SrcReg); 2298 Ops.push_back(Pred); 2299 Ops.push_back(Reg0); 2300 Ops.push_back(Chain); 2301 SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2302 2303 // Transfer memoperands. 2304 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VSt), {MemOp}); 2305 2306 ReplaceNode(N, VSt); 2307 return; 2308 } 2309 2310 // Otherwise, quad registers are stored with two separate instructions, 2311 // where one stores the even registers and the other stores the odd registers. 2312 2313 // Form the QQQQ REG_SEQUENCE. 2314 SDValue V0 = N->getOperand(Vec0Idx + 0); 2315 SDValue V1 = N->getOperand(Vec0Idx + 1); 2316 SDValue V2 = N->getOperand(Vec0Idx + 2); 2317 SDValue V3 = (NumVecs == 3) 2318 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) 2319 : N->getOperand(Vec0Idx + 3); 2320 SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0); 2321 2322 // Store the even D registers. This is always an updating store, so that it 2323 // provides the address to the second store for the odd subregs. 2324 const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain }; 2325 SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, 2326 MemAddr.getValueType(), 2327 MVT::Other, OpsA); 2328 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStA), {MemOp}); 2329 Chain = SDValue(VStA, 1); 2330 2331 // Store the odd D registers. 2332 Ops.push_back(SDValue(VStA, 0)); 2333 Ops.push_back(Align); 2334 if (isUpdating) { 2335 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2336 assert(isa<ConstantSDNode>(Inc.getNode()) && 2337 "only constant post-increment update allowed for VST3/4"); 2338 (void)Inc; 2339 Ops.push_back(Reg0); 2340 } 2341 Ops.push_back(RegSeq); 2342 Ops.push_back(Pred); 2343 Ops.push_back(Reg0); 2344 Ops.push_back(Chain); 2345 SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, 2346 Ops); 2347 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStB), {MemOp}); 2348 ReplaceNode(N, VStB); 2349 } 2350 2351 void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating, 2352 unsigned NumVecs, 2353 const uint16_t *DOpcodes, 2354 const uint16_t *QOpcodes) { 2355 assert(Subtarget->hasNEON()); 2356 assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range"); 2357 SDLoc dl(N); 2358 2359 SDValue MemAddr, Align; 2360 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating 2361 // nodes are not intrinsics. 2362 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2363 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1) 2364 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2365 return; 2366 2367 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2368 2369 SDValue Chain = N->getOperand(0); 2370 unsigned Lane = 2371 cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue(); 2372 EVT VT = N->getOperand(Vec0Idx).getValueType(); 2373 bool is64BitVector = VT.is64BitVector(); 2374 2375 unsigned Alignment = 0; 2376 if (NumVecs != 3) { 2377 Alignment = cast<ConstantSDNode>(Align)->getZExtValue(); 2378 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8; 2379 if (Alignment > NumBytes) 2380 Alignment = NumBytes; 2381 if (Alignment < 8 && Alignment < NumBytes) 2382 Alignment = 0; 2383 // Alignment must be a power of two; make sure of that. 2384 Alignment = (Alignment & -Alignment); 2385 if (Alignment == 1) 2386 Alignment = 0; 2387 } 2388 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 2389 2390 unsigned OpcodeIndex; 2391 switch (VT.getSimpleVT().SimpleTy) { 2392 default: llvm_unreachable("unhandled vld/vst lane type"); 2393 // Double-register operations: 2394 case MVT::v8i8: OpcodeIndex = 0; break; 2395 case MVT::v4f16: 2396 case MVT::v4bf16: 2397 case MVT::v4i16: OpcodeIndex = 1; break; 2398 case MVT::v2f32: 2399 case MVT::v2i32: OpcodeIndex = 2; break; 2400 // Quad-register operations: 2401 case MVT::v8f16: 2402 case MVT::v8bf16: 2403 case MVT::v8i16: OpcodeIndex = 0; break; 2404 case MVT::v4f32: 2405 case MVT::v4i32: OpcodeIndex = 1; break; 2406 } 2407 2408 std::vector<EVT> ResTys; 2409 if (IsLoad) { 2410 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 2411 if (!is64BitVector) 2412 ResTyElts *= 2; 2413 ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), 2414 MVT::i64, ResTyElts)); 2415 } 2416 if (isUpdating) 2417 ResTys.push_back(MVT::i32); 2418 ResTys.push_back(MVT::Other); 2419 2420 SDValue Pred = getAL(CurDAG, dl); 2421 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2422 2423 SmallVector<SDValue, 8> Ops; 2424 Ops.push_back(MemAddr); 2425 Ops.push_back(Align); 2426 if (isUpdating) { 2427 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2428 bool IsImmUpdate = 2429 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs); 2430 Ops.push_back(IsImmUpdate ? Reg0 : Inc); 2431 } 2432 2433 SDValue SuperReg; 2434 SDValue V0 = N->getOperand(Vec0Idx + 0); 2435 SDValue V1 = N->getOperand(Vec0Idx + 1); 2436 if (NumVecs == 2) { 2437 if (is64BitVector) 2438 SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0); 2439 else 2440 SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0); 2441 } else { 2442 SDValue V2 = N->getOperand(Vec0Idx + 2); 2443 SDValue V3 = (NumVecs == 3) 2444 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) 2445 : N->getOperand(Vec0Idx + 3); 2446 if (is64BitVector) 2447 SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0); 2448 else 2449 SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0); 2450 } 2451 Ops.push_back(SuperReg); 2452 Ops.push_back(getI32Imm(Lane, dl)); 2453 Ops.push_back(Pred); 2454 Ops.push_back(Reg0); 2455 Ops.push_back(Chain); 2456 2457 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 2458 QOpcodes[OpcodeIndex]); 2459 SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2460 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdLn), {MemOp}); 2461 if (!IsLoad) { 2462 ReplaceNode(N, VLdLn); 2463 return; 2464 } 2465 2466 // Extract the subregisters. 2467 SuperReg = SDValue(VLdLn, 0); 2468 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 && 2469 ARM::qsub_3 == ARM::qsub_0 + 3, 2470 "Unexpected subreg numbering"); 2471 unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0; 2472 for (unsigned Vec = 0; Vec < NumVecs; ++Vec) 2473 ReplaceUses(SDValue(N, Vec), 2474 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg)); 2475 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1)); 2476 if (isUpdating) 2477 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2)); 2478 CurDAG->RemoveDeadNode(N); 2479 } 2480 2481 template <typename SDValueVector> 2482 void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 2483 SDValue PredicateMask) { 2484 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32)); 2485 Ops.push_back(PredicateMask); 2486 } 2487 2488 template <typename SDValueVector> 2489 void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 2490 SDValue PredicateMask, 2491 SDValue Inactive) { 2492 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32)); 2493 Ops.push_back(PredicateMask); 2494 Ops.push_back(Inactive); 2495 } 2496 2497 template <typename SDValueVector> 2498 void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc) { 2499 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32)); 2500 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 2501 } 2502 2503 template <typename SDValueVector> 2504 void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 2505 EVT InactiveTy) { 2506 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32)); 2507 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 2508 Ops.push_back(SDValue( 2509 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, InactiveTy), 0)); 2510 } 2511 2512 void ARMDAGToDAGISel::SelectMVE_WB(SDNode *N, const uint16_t *Opcodes, 2513 bool Predicated) { 2514 SDLoc Loc(N); 2515 SmallVector<SDValue, 8> Ops; 2516 2517 uint16_t Opcode; 2518 switch (N->getValueType(1).getVectorElementType().getSizeInBits()) { 2519 case 32: 2520 Opcode = Opcodes[0]; 2521 break; 2522 case 64: 2523 Opcode = Opcodes[1]; 2524 break; 2525 default: 2526 llvm_unreachable("bad vector element size in SelectMVE_WB"); 2527 } 2528 2529 Ops.push_back(N->getOperand(2)); // vector of base addresses 2530 2531 int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(); 2532 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate offset 2533 2534 if (Predicated) 2535 AddMVEPredicateToOps(Ops, Loc, N->getOperand(4)); 2536 else 2537 AddEmptyMVEPredicateToOps(Ops, Loc); 2538 2539 Ops.push_back(N->getOperand(0)); // chain 2540 2541 SmallVector<EVT, 8> VTs; 2542 VTs.push_back(N->getValueType(1)); 2543 VTs.push_back(N->getValueType(0)); 2544 VTs.push_back(N->getValueType(2)); 2545 2546 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), VTs, Ops); 2547 ReplaceUses(SDValue(N, 0), SDValue(New, 1)); 2548 ReplaceUses(SDValue(N, 1), SDValue(New, 0)); 2549 ReplaceUses(SDValue(N, 2), SDValue(New, 2)); 2550 CurDAG->RemoveDeadNode(N); 2551 } 2552 2553 void ARMDAGToDAGISel::SelectMVE_LongShift(SDNode *N, uint16_t Opcode, 2554 bool Immediate, 2555 bool HasSaturationOperand) { 2556 SDLoc Loc(N); 2557 SmallVector<SDValue, 8> Ops; 2558 2559 // Two 32-bit halves of the value to be shifted 2560 Ops.push_back(N->getOperand(1)); 2561 Ops.push_back(N->getOperand(2)); 2562 2563 // The shift count 2564 if (Immediate) { 2565 int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(); 2566 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count 2567 } else { 2568 Ops.push_back(N->getOperand(3)); 2569 } 2570 2571 // The immediate saturation operand, if any 2572 if (HasSaturationOperand) { 2573 int32_t SatOp = cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(); 2574 int SatBit = (SatOp == 64 ? 0 : 1); 2575 Ops.push_back(getI32Imm(SatBit, Loc)); 2576 } 2577 2578 // MVE scalar shifts are IT-predicable, so include the standard 2579 // predicate arguments. 2580 Ops.push_back(getAL(CurDAG, Loc)); 2581 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 2582 2583 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops)); 2584 } 2585 2586 void ARMDAGToDAGISel::SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry, 2587 uint16_t OpcodeWithNoCarry, 2588 bool Add, bool Predicated) { 2589 SDLoc Loc(N); 2590 SmallVector<SDValue, 8> Ops; 2591 uint16_t Opcode; 2592 2593 unsigned FirstInputOp = Predicated ? 2 : 1; 2594 2595 // Two input vectors and the input carry flag 2596 Ops.push_back(N->getOperand(FirstInputOp)); 2597 Ops.push_back(N->getOperand(FirstInputOp + 1)); 2598 SDValue CarryIn = N->getOperand(FirstInputOp + 2); 2599 ConstantSDNode *CarryInConstant = dyn_cast<ConstantSDNode>(CarryIn); 2600 uint32_t CarryMask = 1 << 29; 2601 uint32_t CarryExpected = Add ? 0 : CarryMask; 2602 if (CarryInConstant && 2603 (CarryInConstant->getZExtValue() & CarryMask) == CarryExpected) { 2604 Opcode = OpcodeWithNoCarry; 2605 } else { 2606 Ops.push_back(CarryIn); 2607 Opcode = OpcodeWithCarry; 2608 } 2609 2610 if (Predicated) 2611 AddMVEPredicateToOps(Ops, Loc, 2612 N->getOperand(FirstInputOp + 3), // predicate 2613 N->getOperand(FirstInputOp - 1)); // inactive 2614 else 2615 AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0)); 2616 2617 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops)); 2618 } 2619 2620 void ARMDAGToDAGISel::SelectMVE_VSHLC(SDNode *N, bool Predicated) { 2621 SDLoc Loc(N); 2622 SmallVector<SDValue, 8> Ops; 2623 2624 // One vector input, followed by a 32-bit word of bits to shift in 2625 // and then an immediate shift count 2626 Ops.push_back(N->getOperand(1)); 2627 Ops.push_back(N->getOperand(2)); 2628 int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(); 2629 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count 2630 2631 if (Predicated) 2632 AddMVEPredicateToOps(Ops, Loc, N->getOperand(4)); 2633 else 2634 AddEmptyMVEPredicateToOps(Ops, Loc); 2635 2636 CurDAG->SelectNodeTo(N, ARM::MVE_VSHLC, N->getVTList(), makeArrayRef(Ops)); 2637 } 2638 2639 static bool SDValueToConstBool(SDValue SDVal) { 2640 assert(isa<ConstantSDNode>(SDVal) && "expected a compile-time constant"); 2641 ConstantSDNode *SDValConstant = dyn_cast<ConstantSDNode>(SDVal); 2642 uint64_t Value = SDValConstant->getZExtValue(); 2643 assert((Value == 0 || Value == 1) && "expected value 0 or 1"); 2644 return Value; 2645 } 2646 2647 void ARMDAGToDAGISel::SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated, 2648 const uint16_t *OpcodesS, 2649 const uint16_t *OpcodesU, 2650 size_t Stride, size_t TySize) { 2651 assert(TySize < Stride && "Invalid TySize"); 2652 bool IsUnsigned = SDValueToConstBool(N->getOperand(1)); 2653 bool IsSub = SDValueToConstBool(N->getOperand(2)); 2654 bool IsExchange = SDValueToConstBool(N->getOperand(3)); 2655 if (IsUnsigned) { 2656 assert(!IsSub && 2657 "Unsigned versions of vmlsldav[a]/vrmlsldavh[a] do not exist"); 2658 assert(!IsExchange && 2659 "Unsigned versions of vmlaldav[a]x/vrmlaldavh[a]x do not exist"); 2660 } 2661 2662 auto OpIsZero = [N](size_t OpNo) { 2663 if (ConstantSDNode *OpConst = dyn_cast<ConstantSDNode>(N->getOperand(OpNo))) 2664 if (OpConst->getZExtValue() == 0) 2665 return true; 2666 return false; 2667 }; 2668 2669 // If the input accumulator value is not zero, select an instruction with 2670 // accumulator, otherwise select an instruction without accumulator 2671 bool IsAccum = !(OpIsZero(4) && OpIsZero(5)); 2672 2673 const uint16_t *Opcodes = IsUnsigned ? OpcodesU : OpcodesS; 2674 if (IsSub) 2675 Opcodes += 4 * Stride; 2676 if (IsExchange) 2677 Opcodes += 2 * Stride; 2678 if (IsAccum) 2679 Opcodes += Stride; 2680 uint16_t Opcode = Opcodes[TySize]; 2681 2682 SDLoc Loc(N); 2683 SmallVector<SDValue, 8> Ops; 2684 // Push the accumulator operands, if they are used 2685 if (IsAccum) { 2686 Ops.push_back(N->getOperand(4)); 2687 Ops.push_back(N->getOperand(5)); 2688 } 2689 // Push the two vector operands 2690 Ops.push_back(N->getOperand(6)); 2691 Ops.push_back(N->getOperand(7)); 2692 2693 if (Predicated) 2694 AddMVEPredicateToOps(Ops, Loc, N->getOperand(8)); 2695 else 2696 AddEmptyMVEPredicateToOps(Ops, Loc); 2697 2698 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops)); 2699 } 2700 2701 void ARMDAGToDAGISel::SelectMVE_VMLLDAV(SDNode *N, bool Predicated, 2702 const uint16_t *OpcodesS, 2703 const uint16_t *OpcodesU) { 2704 EVT VecTy = N->getOperand(6).getValueType(); 2705 size_t SizeIndex; 2706 switch (VecTy.getVectorElementType().getSizeInBits()) { 2707 case 16: 2708 SizeIndex = 0; 2709 break; 2710 case 32: 2711 SizeIndex = 1; 2712 break; 2713 default: 2714 llvm_unreachable("bad vector element size"); 2715 } 2716 2717 SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 2, SizeIndex); 2718 } 2719 2720 void ARMDAGToDAGISel::SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated, 2721 const uint16_t *OpcodesS, 2722 const uint16_t *OpcodesU) { 2723 assert( 2724 N->getOperand(6).getValueType().getVectorElementType().getSizeInBits() == 2725 32 && 2726 "bad vector element size"); 2727 SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 1, 0); 2728 } 2729 2730 void ARMDAGToDAGISel::SelectMVE_VLD(SDNode *N, unsigned NumVecs, 2731 const uint16_t *const *Opcodes, 2732 bool HasWriteback) { 2733 EVT VT = N->getValueType(0); 2734 SDLoc Loc(N); 2735 2736 const uint16_t *OurOpcodes; 2737 switch (VT.getVectorElementType().getSizeInBits()) { 2738 case 8: 2739 OurOpcodes = Opcodes[0]; 2740 break; 2741 case 16: 2742 OurOpcodes = Opcodes[1]; 2743 break; 2744 case 32: 2745 OurOpcodes = Opcodes[2]; 2746 break; 2747 default: 2748 llvm_unreachable("bad vector element size in SelectMVE_VLD"); 2749 } 2750 2751 EVT DataTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, NumVecs * 2); 2752 SmallVector<EVT, 4> ResultTys = {DataTy, MVT::Other}; 2753 unsigned PtrOperand = HasWriteback ? 1 : 2; 2754 2755 auto Data = SDValue( 2756 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, DataTy), 0); 2757 SDValue Chain = N->getOperand(0); 2758 // Add a MVE_VLDn instruction for each Vec, except the last 2759 for (unsigned Stage = 0; Stage < NumVecs - 1; ++Stage) { 2760 SDValue Ops[] = {Data, N->getOperand(PtrOperand), Chain}; 2761 auto LoadInst = 2762 CurDAG->getMachineNode(OurOpcodes[Stage], Loc, ResultTys, Ops); 2763 Data = SDValue(LoadInst, 0); 2764 Chain = SDValue(LoadInst, 1); 2765 } 2766 // The last may need a writeback on it 2767 if (HasWriteback) 2768 ResultTys = {DataTy, MVT::i32, MVT::Other}; 2769 SDValue Ops[] = {Data, N->getOperand(PtrOperand), Chain}; 2770 auto LoadInst = 2771 CurDAG->getMachineNode(OurOpcodes[NumVecs - 1], Loc, ResultTys, Ops); 2772 2773 unsigned i; 2774 for (i = 0; i < NumVecs; i++) 2775 ReplaceUses(SDValue(N, i), 2776 CurDAG->getTargetExtractSubreg(ARM::qsub_0 + i, Loc, VT, 2777 SDValue(LoadInst, 0))); 2778 if (HasWriteback) 2779 ReplaceUses(SDValue(N, i++), SDValue(LoadInst, 1)); 2780 ReplaceUses(SDValue(N, i), SDValue(LoadInst, HasWriteback ? 2 : 1)); 2781 CurDAG->RemoveDeadNode(N); 2782 } 2783 2784 void ARMDAGToDAGISel::SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes, 2785 bool Wrapping, bool Predicated) { 2786 EVT VT = N->getValueType(0); 2787 SDLoc Loc(N); 2788 2789 uint16_t Opcode; 2790 switch (VT.getScalarSizeInBits()) { 2791 case 8: 2792 Opcode = Opcodes[0]; 2793 break; 2794 case 16: 2795 Opcode = Opcodes[1]; 2796 break; 2797 case 32: 2798 Opcode = Opcodes[2]; 2799 break; 2800 default: 2801 llvm_unreachable("bad vector element size in SelectMVE_VxDUP"); 2802 } 2803 2804 SmallVector<SDValue, 8> Ops; 2805 unsigned OpIdx = 1; 2806 2807 SDValue Inactive; 2808 if (Predicated) 2809 Inactive = N->getOperand(OpIdx++); 2810 2811 Ops.push_back(N->getOperand(OpIdx++)); // base 2812 if (Wrapping) 2813 Ops.push_back(N->getOperand(OpIdx++)); // limit 2814 2815 SDValue ImmOp = N->getOperand(OpIdx++); // step 2816 int ImmValue = cast<ConstantSDNode>(ImmOp)->getZExtValue(); 2817 Ops.push_back(getI32Imm(ImmValue, Loc)); 2818 2819 if (Predicated) 2820 AddMVEPredicateToOps(Ops, Loc, N->getOperand(OpIdx), Inactive); 2821 else 2822 AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0)); 2823 2824 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops)); 2825 } 2826 2827 void ARMDAGToDAGISel::SelectCDE_CXxD(SDNode *N, uint16_t Opcode, 2828 size_t NumExtraOps, bool HasAccum) { 2829 bool IsBigEndian = CurDAG->getDataLayout().isBigEndian(); 2830 SDLoc Loc(N); 2831 SmallVector<SDValue, 8> Ops; 2832 2833 unsigned OpIdx = 1; 2834 2835 // Convert and append the immediate operand designating the coprocessor. 2836 SDValue ImmCorpoc = N->getOperand(OpIdx++); 2837 uint32_t ImmCoprocVal = cast<ConstantSDNode>(ImmCorpoc)->getZExtValue(); 2838 Ops.push_back(getI32Imm(ImmCoprocVal, Loc)); 2839 2840 // For accumulating variants copy the low and high order parts of the 2841 // accumulator into a register pair and add it to the operand vector. 2842 if (HasAccum) { 2843 SDValue AccLo = N->getOperand(OpIdx++); 2844 SDValue AccHi = N->getOperand(OpIdx++); 2845 if (IsBigEndian) 2846 std::swap(AccLo, AccHi); 2847 Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, AccLo, AccHi), 0)); 2848 } 2849 2850 // Copy extra operands as-is. 2851 for (size_t I = 0; I < NumExtraOps; I++) 2852 Ops.push_back(N->getOperand(OpIdx++)); 2853 2854 // Convert and append the immediate operand 2855 SDValue Imm = N->getOperand(OpIdx); 2856 uint32_t ImmVal = cast<ConstantSDNode>(Imm)->getZExtValue(); 2857 Ops.push_back(getI32Imm(ImmVal, Loc)); 2858 2859 // Accumulating variants are IT-predicable, add predicate operands. 2860 if (HasAccum) { 2861 SDValue Pred = getAL(CurDAG, Loc); 2862 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 2863 Ops.push_back(Pred); 2864 Ops.push_back(PredReg); 2865 } 2866 2867 // Create the CDE intruction 2868 SDNode *InstrNode = CurDAG->getMachineNode(Opcode, Loc, MVT::Untyped, Ops); 2869 SDValue ResultPair = SDValue(InstrNode, 0); 2870 2871 // The original intrinsic had two outputs, and the output of the dual-register 2872 // CDE instruction is a register pair. We need to extract the two subregisters 2873 // and replace all uses of the original outputs with the extracted 2874 // subregisters. 2875 uint16_t SubRegs[2] = {ARM::gsub_0, ARM::gsub_1}; 2876 if (IsBigEndian) 2877 std::swap(SubRegs[0], SubRegs[1]); 2878 2879 for (size_t ResIdx = 0; ResIdx < 2; ResIdx++) { 2880 if (SDValue(N, ResIdx).use_empty()) 2881 continue; 2882 SDValue SubReg = CurDAG->getTargetExtractSubreg(SubRegs[ResIdx], Loc, 2883 MVT::i32, ResultPair); 2884 ReplaceUses(SDValue(N, ResIdx), SubReg); 2885 } 2886 2887 CurDAG->RemoveDeadNode(N); 2888 } 2889 2890 void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic, 2891 bool isUpdating, unsigned NumVecs, 2892 const uint16_t *DOpcodes, 2893 const uint16_t *QOpcodes0, 2894 const uint16_t *QOpcodes1) { 2895 assert(Subtarget->hasNEON()); 2896 assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range"); 2897 SDLoc dl(N); 2898 2899 SDValue MemAddr, Align; 2900 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2901 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2902 return; 2903 2904 SDValue Chain = N->getOperand(0); 2905 EVT VT = N->getValueType(0); 2906 bool is64BitVector = VT.is64BitVector(); 2907 2908 unsigned Alignment = 0; 2909 if (NumVecs != 3) { 2910 Alignment = cast<ConstantSDNode>(Align)->getZExtValue(); 2911 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8; 2912 if (Alignment > NumBytes) 2913 Alignment = NumBytes; 2914 if (Alignment < 8 && Alignment < NumBytes) 2915 Alignment = 0; 2916 // Alignment must be a power of two; make sure of that. 2917 Alignment = (Alignment & -Alignment); 2918 if (Alignment == 1) 2919 Alignment = 0; 2920 } 2921 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 2922 2923 unsigned OpcodeIndex; 2924 switch (VT.getSimpleVT().SimpleTy) { 2925 default: llvm_unreachable("unhandled vld-dup type"); 2926 case MVT::v8i8: 2927 case MVT::v16i8: OpcodeIndex = 0; break; 2928 case MVT::v4i16: 2929 case MVT::v8i16: 2930 case MVT::v4f16: 2931 case MVT::v8f16: 2932 case MVT::v4bf16: 2933 case MVT::v8bf16: 2934 OpcodeIndex = 1; break; 2935 case MVT::v2f32: 2936 case MVT::v2i32: 2937 case MVT::v4f32: 2938 case MVT::v4i32: OpcodeIndex = 2; break; 2939 case MVT::v1f64: 2940 case MVT::v1i64: OpcodeIndex = 3; break; 2941 } 2942 2943 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 2944 if (!is64BitVector) 2945 ResTyElts *= 2; 2946 EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts); 2947 2948 std::vector<EVT> ResTys; 2949 ResTys.push_back(ResTy); 2950 if (isUpdating) 2951 ResTys.push_back(MVT::i32); 2952 ResTys.push_back(MVT::Other); 2953 2954 SDValue Pred = getAL(CurDAG, dl); 2955 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2956 2957 SDNode *VLdDup; 2958 if (is64BitVector || NumVecs == 1) { 2959 SmallVector<SDValue, 6> Ops; 2960 Ops.push_back(MemAddr); 2961 Ops.push_back(Align); 2962 unsigned Opc = is64BitVector ? DOpcodes[OpcodeIndex] : 2963 QOpcodes0[OpcodeIndex]; 2964 if (isUpdating) { 2965 // fixed-stride update instructions don't have an explicit writeback 2966 // operand. It's implicit in the opcode itself. 2967 SDValue Inc = N->getOperand(2); 2968 bool IsImmUpdate = 2969 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs); 2970 if (NumVecs <= 2 && !IsImmUpdate) 2971 Opc = getVLDSTRegisterUpdateOpcode(Opc); 2972 if (!IsImmUpdate) 2973 Ops.push_back(Inc); 2974 // FIXME: VLD3 and VLD4 haven't been updated to that form yet. 2975 else if (NumVecs > 2) 2976 Ops.push_back(Reg0); 2977 } 2978 Ops.push_back(Pred); 2979 Ops.push_back(Reg0); 2980 Ops.push_back(Chain); 2981 VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2982 } else if (NumVecs == 2) { 2983 const SDValue OpsA[] = { MemAddr, Align, Pred, Reg0, Chain }; 2984 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], 2985 dl, ResTys, OpsA); 2986 2987 Chain = SDValue(VLdA, 1); 2988 const SDValue OpsB[] = { MemAddr, Align, Pred, Reg0, Chain }; 2989 VLdDup = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, OpsB); 2990 } else { 2991 SDValue ImplDef = 2992 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0); 2993 const SDValue OpsA[] = { MemAddr, Align, ImplDef, Pred, Reg0, Chain }; 2994 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], 2995 dl, ResTys, OpsA); 2996 2997 SDValue SuperReg = SDValue(VLdA, 0); 2998 Chain = SDValue(VLdA, 1); 2999 const SDValue OpsB[] = { MemAddr, Align, SuperReg, Pred, Reg0, Chain }; 3000 VLdDup = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, OpsB); 3001 } 3002 3003 // Transfer memoperands. 3004 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 3005 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdDup), {MemOp}); 3006 3007 // Extract the subregisters. 3008 if (NumVecs == 1) { 3009 ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0)); 3010 } else { 3011 SDValue SuperReg = SDValue(VLdDup, 0); 3012 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering"); 3013 unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0; 3014 for (unsigned Vec = 0; Vec != NumVecs; ++Vec) { 3015 ReplaceUses(SDValue(N, Vec), 3016 CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg)); 3017 } 3018 } 3019 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1)); 3020 if (isUpdating) 3021 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2)); 3022 CurDAG->RemoveDeadNode(N); 3023 } 3024 3025 bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) { 3026 if (!Subtarget->hasV6T2Ops()) 3027 return false; 3028 3029 unsigned Opc = isSigned 3030 ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX) 3031 : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX); 3032 SDLoc dl(N); 3033 3034 // For unsigned extracts, check for a shift right and mask 3035 unsigned And_imm = 0; 3036 if (N->getOpcode() == ISD::AND) { 3037 if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) { 3038 3039 // The immediate is a mask of the low bits iff imm & (imm+1) == 0 3040 if (And_imm & (And_imm + 1)) 3041 return false; 3042 3043 unsigned Srl_imm = 0; 3044 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, 3045 Srl_imm)) { 3046 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 3047 3048 // Mask off the unnecessary bits of the AND immediate; normally 3049 // DAGCombine will do this, but that might not happen if 3050 // targetShrinkDemandedConstant chooses a different immediate. 3051 And_imm &= -1U >> Srl_imm; 3052 3053 // Note: The width operand is encoded as width-1. 3054 unsigned Width = countTrailingOnes(And_imm) - 1; 3055 unsigned LSB = Srl_imm; 3056 3057 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3058 3059 if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) { 3060 // It's cheaper to use a right shift to extract the top bits. 3061 if (Subtarget->isThumb()) { 3062 Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri; 3063 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3064 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 3065 getAL(CurDAG, dl), Reg0, Reg0 }; 3066 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3067 return true; 3068 } 3069 3070 // ARM models shift instructions as MOVsi with shifter operand. 3071 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL); 3072 SDValue ShOpc = 3073 CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl, 3074 MVT::i32); 3075 SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc, 3076 getAL(CurDAG, dl), Reg0, Reg0 }; 3077 CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops); 3078 return true; 3079 } 3080 3081 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx"); 3082 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3083 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 3084 CurDAG->getTargetConstant(Width, dl, MVT::i32), 3085 getAL(CurDAG, dl), Reg0 }; 3086 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3087 return true; 3088 } 3089 } 3090 return false; 3091 } 3092 3093 // Otherwise, we're looking for a shift of a shift 3094 unsigned Shl_imm = 0; 3095 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) { 3096 assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!"); 3097 unsigned Srl_imm = 0; 3098 if (isInt32Immediate(N->getOperand(1), Srl_imm)) { 3099 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 3100 // Note: The width operand is encoded as width-1. 3101 unsigned Width = 32 - Srl_imm - 1; 3102 int LSB = Srl_imm - Shl_imm; 3103 if (LSB < 0) 3104 return false; 3105 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3106 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx"); 3107 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3108 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 3109 CurDAG->getTargetConstant(Width, dl, MVT::i32), 3110 getAL(CurDAG, dl), Reg0 }; 3111 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3112 return true; 3113 } 3114 } 3115 3116 // Or we are looking for a shift of an and, with a mask operand 3117 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) && 3118 isShiftedMask_32(And_imm)) { 3119 unsigned Srl_imm = 0; 3120 unsigned LSB = countTrailingZeros(And_imm); 3121 // Shift must be the same as the ands lsb 3122 if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) { 3123 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 3124 unsigned MSB = 31 - countLeadingZeros(And_imm); 3125 // Note: The width operand is encoded as width-1. 3126 unsigned Width = MSB - LSB; 3127 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3128 assert(Srl_imm + Width + 1 <= 32 && "Shouldn't create an invalid ubfx"); 3129 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3130 CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32), 3131 CurDAG->getTargetConstant(Width, dl, MVT::i32), 3132 getAL(CurDAG, dl), Reg0 }; 3133 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3134 return true; 3135 } 3136 } 3137 3138 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) { 3139 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits(); 3140 unsigned LSB = 0; 3141 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) && 3142 !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB)) 3143 return false; 3144 3145 if (LSB + Width > 32) 3146 return false; 3147 3148 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3149 assert(LSB + Width <= 32 && "Shouldn't create an invalid ubfx"); 3150 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3151 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 3152 CurDAG->getTargetConstant(Width - 1, dl, MVT::i32), 3153 getAL(CurDAG, dl), Reg0 }; 3154 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3155 return true; 3156 } 3157 3158 return false; 3159 } 3160 3161 /// Target-specific DAG combining for ISD::XOR. 3162 /// Target-independent combining lowers SELECT_CC nodes of the form 3163 /// select_cc setg[ge] X, 0, X, -X 3164 /// select_cc setgt X, -1, X, -X 3165 /// select_cc setl[te] X, 0, -X, X 3166 /// select_cc setlt X, 1, -X, X 3167 /// which represent Integer ABS into: 3168 /// Y = sra (X, size(X)-1); xor (add (X, Y), Y) 3169 /// ARM instruction selection detects the latter and matches it to 3170 /// ARM::ABS or ARM::t2ABS machine node. 3171 bool ARMDAGToDAGISel::tryABSOp(SDNode *N){ 3172 SDValue XORSrc0 = N->getOperand(0); 3173 SDValue XORSrc1 = N->getOperand(1); 3174 EVT VT = N->getValueType(0); 3175 3176 if (Subtarget->isThumb1Only()) 3177 return false; 3178 3179 if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA) 3180 return false; 3181 3182 SDValue ADDSrc0 = XORSrc0.getOperand(0); 3183 SDValue ADDSrc1 = XORSrc0.getOperand(1); 3184 SDValue SRASrc0 = XORSrc1.getOperand(0); 3185 SDValue SRASrc1 = XORSrc1.getOperand(1); 3186 ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(SRASrc1); 3187 EVT XType = SRASrc0.getValueType(); 3188 unsigned Size = XType.getSizeInBits() - 1; 3189 3190 if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 && 3191 XType.isInteger() && SRAConstant != nullptr && 3192 Size == SRAConstant->getZExtValue()) { 3193 unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS; 3194 CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0); 3195 return true; 3196 } 3197 3198 return false; 3199 } 3200 3201 /// We've got special pseudo-instructions for these 3202 void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) { 3203 unsigned Opcode; 3204 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT(); 3205 if (MemTy == MVT::i8) 3206 Opcode = ARM::CMP_SWAP_8; 3207 else if (MemTy == MVT::i16) 3208 Opcode = ARM::CMP_SWAP_16; 3209 else if (MemTy == MVT::i32) 3210 Opcode = ARM::CMP_SWAP_32; 3211 else 3212 llvm_unreachable("Unknown AtomicCmpSwap type"); 3213 3214 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3), 3215 N->getOperand(0)}; 3216 SDNode *CmpSwap = CurDAG->getMachineNode( 3217 Opcode, SDLoc(N), 3218 CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops); 3219 3220 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); 3221 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp}); 3222 3223 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0)); 3224 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2)); 3225 CurDAG->RemoveDeadNode(N); 3226 } 3227 3228 static Optional<std::pair<unsigned, unsigned>> 3229 getContiguousRangeOfSetBits(const APInt &A) { 3230 unsigned FirstOne = A.getBitWidth() - A.countLeadingZeros() - 1; 3231 unsigned LastOne = A.countTrailingZeros(); 3232 if (A.countPopulation() != (FirstOne - LastOne + 1)) 3233 return Optional<std::pair<unsigned,unsigned>>(); 3234 return std::make_pair(FirstOne, LastOne); 3235 } 3236 3237 void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) { 3238 assert(N->getOpcode() == ARMISD::CMPZ); 3239 SwitchEQNEToPLMI = false; 3240 3241 if (!Subtarget->isThumb()) 3242 // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and 3243 // LSR don't exist as standalone instructions - they need the barrel shifter. 3244 return; 3245 3246 // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X)) 3247 SDValue And = N->getOperand(0); 3248 if (!And->hasOneUse()) 3249 return; 3250 3251 SDValue Zero = N->getOperand(1); 3252 if (!isa<ConstantSDNode>(Zero) || !cast<ConstantSDNode>(Zero)->isNullValue() || 3253 And->getOpcode() != ISD::AND) 3254 return; 3255 SDValue X = And.getOperand(0); 3256 auto C = dyn_cast<ConstantSDNode>(And.getOperand(1)); 3257 3258 if (!C) 3259 return; 3260 auto Range = getContiguousRangeOfSetBits(C->getAPIntValue()); 3261 if (!Range) 3262 return; 3263 3264 // There are several ways to lower this: 3265 SDNode *NewN; 3266 SDLoc dl(N); 3267 3268 auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* { 3269 if (Subtarget->isThumb2()) { 3270 Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri; 3271 SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32), 3272 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 3273 CurDAG->getRegister(0, MVT::i32) }; 3274 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops); 3275 } else { 3276 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src, 3277 CurDAG->getTargetConstant(Imm, dl, MVT::i32), 3278 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)}; 3279 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops); 3280 } 3281 }; 3282 3283 if (Range->second == 0) { 3284 // 1. Mask includes the LSB -> Simply shift the top N bits off 3285 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first); 3286 ReplaceNode(And.getNode(), NewN); 3287 } else if (Range->first == 31) { 3288 // 2. Mask includes the MSB -> Simply shift the bottom N bits off 3289 NewN = EmitShift(ARM::tLSRri, X, Range->second); 3290 ReplaceNode(And.getNode(), NewN); 3291 } else if (Range->first == Range->second) { 3292 // 3. Only one bit is set. We can shift this into the sign bit and use a 3293 // PL/MI comparison. 3294 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first); 3295 ReplaceNode(And.getNode(), NewN); 3296 3297 SwitchEQNEToPLMI = true; 3298 } else if (!Subtarget->hasV6T2Ops()) { 3299 // 4. Do a double shift to clear bottom and top bits, but only in 3300 // thumb-1 mode as in thumb-2 we can use UBFX. 3301 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first); 3302 NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0), 3303 Range->second + (31 - Range->first)); 3304 ReplaceNode(And.getNode(), NewN); 3305 } 3306 3307 } 3308 3309 void ARMDAGToDAGISel::Select(SDNode *N) { 3310 SDLoc dl(N); 3311 3312 if (N->isMachineOpcode()) { 3313 N->setNodeId(-1); 3314 return; // Already selected. 3315 } 3316 3317 switch (N->getOpcode()) { 3318 default: break; 3319 case ISD::STORE: { 3320 // For Thumb1, match an sp-relative store in C++. This is a little 3321 // unfortunate, but I don't think I can make the chain check work 3322 // otherwise. (The chain of the store has to be the same as the chain 3323 // of the CopyFromReg, or else we can't replace the CopyFromReg with 3324 // a direct reference to "SP".) 3325 // 3326 // This is only necessary on Thumb1 because Thumb1 sp-relative stores use 3327 // a different addressing mode from other four-byte stores. 3328 // 3329 // This pattern usually comes up with call arguments. 3330 StoreSDNode *ST = cast<StoreSDNode>(N); 3331 SDValue Ptr = ST->getBasePtr(); 3332 if (Subtarget->isThumb1Only() && ST->isUnindexed()) { 3333 int RHSC = 0; 3334 if (Ptr.getOpcode() == ISD::ADD && 3335 isScaledConstantInRange(Ptr.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) 3336 Ptr = Ptr.getOperand(0); 3337 3338 if (Ptr.getOpcode() == ISD::CopyFromReg && 3339 cast<RegisterSDNode>(Ptr.getOperand(1))->getReg() == ARM::SP && 3340 Ptr.getOperand(0) == ST->getChain()) { 3341 SDValue Ops[] = {ST->getValue(), 3342 CurDAG->getRegister(ARM::SP, MVT::i32), 3343 CurDAG->getTargetConstant(RHSC, dl, MVT::i32), 3344 getAL(CurDAG, dl), 3345 CurDAG->getRegister(0, MVT::i32), 3346 ST->getChain()}; 3347 MachineSDNode *ResNode = 3348 CurDAG->getMachineNode(ARM::tSTRspi, dl, MVT::Other, Ops); 3349 MachineMemOperand *MemOp = ST->getMemOperand(); 3350 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp}); 3351 ReplaceNode(N, ResNode); 3352 return; 3353 } 3354 } 3355 break; 3356 } 3357 case ISD::WRITE_REGISTER: 3358 if (tryWriteRegister(N)) 3359 return; 3360 break; 3361 case ISD::READ_REGISTER: 3362 if (tryReadRegister(N)) 3363 return; 3364 break; 3365 case ISD::INLINEASM: 3366 case ISD::INLINEASM_BR: 3367 if (tryInlineAsm(N)) 3368 return; 3369 break; 3370 case ISD::XOR: 3371 // Select special operations if XOR node forms integer ABS pattern 3372 if (tryABSOp(N)) 3373 return; 3374 // Other cases are autogenerated. 3375 break; 3376 case ISD::Constant: { 3377 unsigned Val = cast<ConstantSDNode>(N)->getZExtValue(); 3378 // If we can't materialize the constant we need to use a literal pool 3379 if (ConstantMaterializationCost(Val, Subtarget) > 2) { 3380 SDValue CPIdx = CurDAG->getTargetConstantPool( 3381 ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val), 3382 TLI->getPointerTy(CurDAG->getDataLayout())); 3383 3384 SDNode *ResNode; 3385 if (Subtarget->isThumb()) { 3386 SDValue Ops[] = { 3387 CPIdx, 3388 getAL(CurDAG, dl), 3389 CurDAG->getRegister(0, MVT::i32), 3390 CurDAG->getEntryNode() 3391 }; 3392 ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other, 3393 Ops); 3394 } else { 3395 SDValue Ops[] = { 3396 CPIdx, 3397 CurDAG->getTargetConstant(0, dl, MVT::i32), 3398 getAL(CurDAG, dl), 3399 CurDAG->getRegister(0, MVT::i32), 3400 CurDAG->getEntryNode() 3401 }; 3402 ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other, 3403 Ops); 3404 } 3405 // Annotate the Node with memory operand information so that MachineInstr 3406 // queries work properly. This e.g. gives the register allocation the 3407 // required information for rematerialization. 3408 MachineFunction& MF = CurDAG->getMachineFunction(); 3409 MachineMemOperand *MemOp = 3410 MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF), 3411 MachineMemOperand::MOLoad, 4, Align(4)); 3412 3413 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp}); 3414 3415 ReplaceNode(N, ResNode); 3416 return; 3417 } 3418 3419 // Other cases are autogenerated. 3420 break; 3421 } 3422 case ISD::FrameIndex: { 3423 // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm. 3424 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 3425 SDValue TFI = CurDAG->getTargetFrameIndex( 3426 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 3427 if (Subtarget->isThumb1Only()) { 3428 // Set the alignment of the frame object to 4, to avoid having to generate 3429 // more than one ADD 3430 MachineFrameInfo &MFI = MF->getFrameInfo(); 3431 if (MFI.getObjectAlign(FI) < Align(4)) 3432 MFI.setObjectAlignment(FI, Align(4)); 3433 CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI, 3434 CurDAG->getTargetConstant(0, dl, MVT::i32)); 3435 return; 3436 } else { 3437 unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ? 3438 ARM::t2ADDri : ARM::ADDri); 3439 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32), 3440 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 3441 CurDAG->getRegister(0, MVT::i32) }; 3442 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3443 return; 3444 } 3445 } 3446 case ISD::SRL: 3447 if (tryV6T2BitfieldExtractOp(N, false)) 3448 return; 3449 break; 3450 case ISD::SIGN_EXTEND_INREG: 3451 case ISD::SRA: 3452 if (tryV6T2BitfieldExtractOp(N, true)) 3453 return; 3454 break; 3455 case ISD::MUL: 3456 if (Subtarget->isThumb1Only()) 3457 break; 3458 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) { 3459 unsigned RHSV = C->getZExtValue(); 3460 if (!RHSV) break; 3461 if (isPowerOf2_32(RHSV-1)) { // 2^n+1? 3462 unsigned ShImm = Log2_32(RHSV-1); 3463 if (ShImm >= 32) 3464 break; 3465 SDValue V = N->getOperand(0); 3466 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm); 3467 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32); 3468 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3469 if (Subtarget->isThumb()) { 3470 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 }; 3471 CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops); 3472 return; 3473 } else { 3474 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0, 3475 Reg0 }; 3476 CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops); 3477 return; 3478 } 3479 } 3480 if (isPowerOf2_32(RHSV+1)) { // 2^n-1? 3481 unsigned ShImm = Log2_32(RHSV+1); 3482 if (ShImm >= 32) 3483 break; 3484 SDValue V = N->getOperand(0); 3485 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm); 3486 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32); 3487 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3488 if (Subtarget->isThumb()) { 3489 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 }; 3490 CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops); 3491 return; 3492 } else { 3493 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0, 3494 Reg0 }; 3495 CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops); 3496 return; 3497 } 3498 } 3499 } 3500 break; 3501 case ISD::AND: { 3502 // Check for unsigned bitfield extract 3503 if (tryV6T2BitfieldExtractOp(N, false)) 3504 return; 3505 3506 // If an immediate is used in an AND node, it is possible that the immediate 3507 // can be more optimally materialized when negated. If this is the case we 3508 // can negate the immediate and use a BIC instead. 3509 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 3510 if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) { 3511 uint32_t Imm = (uint32_t) N1C->getZExtValue(); 3512 3513 // In Thumb2 mode, an AND can take a 12-bit immediate. If this 3514 // immediate can be negated and fit in the immediate operand of 3515 // a t2BIC, don't do any manual transform here as this can be 3516 // handled by the generic ISel machinery. 3517 bool PreferImmediateEncoding = 3518 Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm)); 3519 if (!PreferImmediateEncoding && 3520 ConstantMaterializationCost(Imm, Subtarget) > 3521 ConstantMaterializationCost(~Imm, Subtarget)) { 3522 // The current immediate costs more to materialize than a negated 3523 // immediate, so negate the immediate and use a BIC. 3524 SDValue NewImm = 3525 CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32); 3526 // If the new constant didn't exist before, reposition it in the topological 3527 // ordering so it is just before N. Otherwise, don't touch its location. 3528 if (NewImm->getNodeId() == -1) 3529 CurDAG->RepositionNode(N->getIterator(), NewImm.getNode()); 3530 3531 if (!Subtarget->hasThumb2()) { 3532 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), 3533 N->getOperand(0), NewImm, getAL(CurDAG, dl), 3534 CurDAG->getRegister(0, MVT::i32)}; 3535 ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops)); 3536 return; 3537 } else { 3538 SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl), 3539 CurDAG->getRegister(0, MVT::i32), 3540 CurDAG->getRegister(0, MVT::i32)}; 3541 ReplaceNode(N, 3542 CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops)); 3543 return; 3544 } 3545 } 3546 } 3547 3548 // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits 3549 // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits 3550 // are entirely contributed by c2 and lower 16-bits are entirely contributed 3551 // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)). 3552 // Select it to: "movt x, ((c1 & 0xffff) >> 16) 3553 EVT VT = N->getValueType(0); 3554 if (VT != MVT::i32) 3555 break; 3556 unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2()) 3557 ? ARM::t2MOVTi16 3558 : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0); 3559 if (!Opc) 3560 break; 3561 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); 3562 N1C = dyn_cast<ConstantSDNode>(N1); 3563 if (!N1C) 3564 break; 3565 if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) { 3566 SDValue N2 = N0.getOperand(1); 3567 ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2); 3568 if (!N2C) 3569 break; 3570 unsigned N1CVal = N1C->getZExtValue(); 3571 unsigned N2CVal = N2C->getZExtValue(); 3572 if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) && 3573 (N1CVal & 0xffffU) == 0xffffU && 3574 (N2CVal & 0xffffU) == 0x0U) { 3575 SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16, 3576 dl, MVT::i32); 3577 SDValue Ops[] = { N0.getOperand(0), Imm16, 3578 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) }; 3579 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops)); 3580 return; 3581 } 3582 } 3583 3584 break; 3585 } 3586 case ARMISD::UMAAL: { 3587 unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL; 3588 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), 3589 N->getOperand(2), N->getOperand(3), 3590 getAL(CurDAG, dl), 3591 CurDAG->getRegister(0, MVT::i32) }; 3592 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops)); 3593 return; 3594 } 3595 case ARMISD::UMLAL:{ 3596 if (Subtarget->isThumb()) { 3597 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3598 N->getOperand(3), getAL(CurDAG, dl), 3599 CurDAG->getRegister(0, MVT::i32)}; 3600 ReplaceNode( 3601 N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops)); 3602 return; 3603 }else{ 3604 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3605 N->getOperand(3), getAL(CurDAG, dl), 3606 CurDAG->getRegister(0, MVT::i32), 3607 CurDAG->getRegister(0, MVT::i32) }; 3608 ReplaceNode(N, CurDAG->getMachineNode( 3609 Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl, 3610 MVT::i32, MVT::i32, Ops)); 3611 return; 3612 } 3613 } 3614 case ARMISD::SMLAL:{ 3615 if (Subtarget->isThumb()) { 3616 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3617 N->getOperand(3), getAL(CurDAG, dl), 3618 CurDAG->getRegister(0, MVT::i32)}; 3619 ReplaceNode( 3620 N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops)); 3621 return; 3622 }else{ 3623 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3624 N->getOperand(3), getAL(CurDAG, dl), 3625 CurDAG->getRegister(0, MVT::i32), 3626 CurDAG->getRegister(0, MVT::i32) }; 3627 ReplaceNode(N, CurDAG->getMachineNode( 3628 Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl, 3629 MVT::i32, MVT::i32, Ops)); 3630 return; 3631 } 3632 } 3633 case ARMISD::SUBE: { 3634 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP()) 3635 break; 3636 // Look for a pattern to match SMMLS 3637 // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b)))) 3638 if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI || 3639 N->getOperand(2).getOpcode() != ARMISD::SUBC || 3640 !SDValue(N, 1).use_empty()) 3641 break; 3642 3643 if (Subtarget->isThumb()) 3644 assert(Subtarget->hasThumb2() && 3645 "This pattern should not be generated for Thumb"); 3646 3647 SDValue SmulLoHi = N->getOperand(1); 3648 SDValue Subc = N->getOperand(2); 3649 auto *Zero = dyn_cast<ConstantSDNode>(Subc.getOperand(0)); 3650 3651 if (!Zero || Zero->getZExtValue() != 0 || 3652 Subc.getOperand(1) != SmulLoHi.getValue(0) || 3653 N->getOperand(1) != SmulLoHi.getValue(1) || 3654 N->getOperand(2) != Subc.getValue(1)) 3655 break; 3656 3657 unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS; 3658 SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1), 3659 N->getOperand(0), getAL(CurDAG, dl), 3660 CurDAG->getRegister(0, MVT::i32) }; 3661 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops)); 3662 return; 3663 } 3664 case ISD::LOAD: { 3665 if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N)) 3666 return; 3667 if (Subtarget->isThumb() && Subtarget->hasThumb2()) { 3668 if (tryT2IndexedLoad(N)) 3669 return; 3670 } else if (Subtarget->isThumb()) { 3671 if (tryT1IndexedLoad(N)) 3672 return; 3673 } else if (tryARMIndexedLoad(N)) 3674 return; 3675 // Other cases are autogenerated. 3676 break; 3677 } 3678 case ISD::MLOAD: 3679 if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N)) 3680 return; 3681 // Other cases are autogenerated. 3682 break; 3683 case ARMISD::WLS: 3684 case ARMISD::LE: { 3685 SDValue Ops[] = { N->getOperand(1), 3686 N->getOperand(2), 3687 N->getOperand(0) }; 3688 unsigned Opc = N->getOpcode() == ARMISD::WLS ? 3689 ARM::t2WhileLoopStart : ARM::t2LoopEnd; 3690 SDNode *New = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops); 3691 ReplaceUses(N, New); 3692 CurDAG->RemoveDeadNode(N); 3693 return; 3694 } 3695 case ARMISD::LDRD: { 3696 if (Subtarget->isThumb2()) 3697 break; // TableGen handles isel in this case. 3698 SDValue Base, RegOffset, ImmOffset; 3699 const SDValue &Chain = N->getOperand(0); 3700 const SDValue &Addr = N->getOperand(1); 3701 SelectAddrMode3(Addr, Base, RegOffset, ImmOffset); 3702 if (RegOffset != CurDAG->getRegister(0, MVT::i32)) { 3703 // The register-offset variant of LDRD mandates that the register 3704 // allocated to RegOffset is not reused in any of the remaining operands. 3705 // This restriction is currently not enforced. Therefore emitting this 3706 // variant is explicitly avoided. 3707 Base = Addr; 3708 RegOffset = CurDAG->getRegister(0, MVT::i32); 3709 } 3710 SDValue Ops[] = {Base, RegOffset, ImmOffset, Chain}; 3711 SDNode *New = CurDAG->getMachineNode(ARM::LOADDUAL, dl, 3712 {MVT::Untyped, MVT::Other}, Ops); 3713 SDValue Lo = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32, 3714 SDValue(New, 0)); 3715 SDValue Hi = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32, 3716 SDValue(New, 0)); 3717 transferMemOperands(N, New); 3718 ReplaceUses(SDValue(N, 0), Lo); 3719 ReplaceUses(SDValue(N, 1), Hi); 3720 ReplaceUses(SDValue(N, 2), SDValue(New, 1)); 3721 CurDAG->RemoveDeadNode(N); 3722 return; 3723 } 3724 case ARMISD::STRD: { 3725 if (Subtarget->isThumb2()) 3726 break; // TableGen handles isel in this case. 3727 SDValue Base, RegOffset, ImmOffset; 3728 const SDValue &Chain = N->getOperand(0); 3729 const SDValue &Addr = N->getOperand(3); 3730 SelectAddrMode3(Addr, Base, RegOffset, ImmOffset); 3731 if (RegOffset != CurDAG->getRegister(0, MVT::i32)) { 3732 // The register-offset variant of STRD mandates that the register 3733 // allocated to RegOffset is not reused in any of the remaining operands. 3734 // This restriction is currently not enforced. Therefore emitting this 3735 // variant is explicitly avoided. 3736 Base = Addr; 3737 RegOffset = CurDAG->getRegister(0, MVT::i32); 3738 } 3739 SDNode *RegPair = 3740 createGPRPairNode(MVT::Untyped, N->getOperand(1), N->getOperand(2)); 3741 SDValue Ops[] = {SDValue(RegPair, 0), Base, RegOffset, ImmOffset, Chain}; 3742 SDNode *New = CurDAG->getMachineNode(ARM::STOREDUAL, dl, MVT::Other, Ops); 3743 transferMemOperands(N, New); 3744 ReplaceUses(SDValue(N, 0), SDValue(New, 0)); 3745 CurDAG->RemoveDeadNode(N); 3746 return; 3747 } 3748 case ARMISD::LOOP_DEC: { 3749 SDValue Ops[] = { N->getOperand(1), 3750 N->getOperand(2), 3751 N->getOperand(0) }; 3752 SDNode *Dec = 3753 CurDAG->getMachineNode(ARM::t2LoopDec, dl, 3754 CurDAG->getVTList(MVT::i32, MVT::Other), Ops); 3755 ReplaceUses(N, Dec); 3756 CurDAG->RemoveDeadNode(N); 3757 return; 3758 } 3759 case ARMISD::BRCOND: { 3760 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 3761 // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc) 3762 // Pattern complexity = 6 cost = 1 size = 0 3763 3764 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 3765 // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc) 3766 // Pattern complexity = 6 cost = 1 size = 0 3767 3768 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 3769 // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc) 3770 // Pattern complexity = 6 cost = 1 size = 0 3771 3772 unsigned Opc = Subtarget->isThumb() ? 3773 ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc; 3774 SDValue Chain = N->getOperand(0); 3775 SDValue N1 = N->getOperand(1); 3776 SDValue N2 = N->getOperand(2); 3777 SDValue N3 = N->getOperand(3); 3778 SDValue InFlag = N->getOperand(4); 3779 assert(N1.getOpcode() == ISD::BasicBlock); 3780 assert(N2.getOpcode() == ISD::Constant); 3781 assert(N3.getOpcode() == ISD::Register); 3782 3783 unsigned CC = (unsigned) cast<ConstantSDNode>(N2)->getZExtValue(); 3784 3785 if (InFlag.getOpcode() == ARMISD::CMPZ) { 3786 if (InFlag.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) { 3787 SDValue Int = InFlag.getOperand(0); 3788 uint64_t ID = cast<ConstantSDNode>(Int->getOperand(1))->getZExtValue(); 3789 3790 // Handle low-overhead loops. 3791 if (ID == Intrinsic::loop_decrement_reg) { 3792 SDValue Elements = Int.getOperand(2); 3793 SDValue Size = CurDAG->getTargetConstant( 3794 cast<ConstantSDNode>(Int.getOperand(3))->getZExtValue(), dl, 3795 MVT::i32); 3796 3797 SDValue Args[] = { Elements, Size, Int.getOperand(0) }; 3798 SDNode *LoopDec = 3799 CurDAG->getMachineNode(ARM::t2LoopDec, dl, 3800 CurDAG->getVTList(MVT::i32, MVT::Other), 3801 Args); 3802 ReplaceUses(Int.getNode(), LoopDec); 3803 3804 SDValue EndArgs[] = { SDValue(LoopDec, 0), N1, Chain }; 3805 SDNode *LoopEnd = 3806 CurDAG->getMachineNode(ARM::t2LoopEnd, dl, MVT::Other, EndArgs); 3807 3808 ReplaceUses(N, LoopEnd); 3809 CurDAG->RemoveDeadNode(N); 3810 CurDAG->RemoveDeadNode(InFlag.getNode()); 3811 CurDAG->RemoveDeadNode(Int.getNode()); 3812 return; 3813 } 3814 } 3815 3816 bool SwitchEQNEToPLMI; 3817 SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI); 3818 InFlag = N->getOperand(4); 3819 3820 if (SwitchEQNEToPLMI) { 3821 switch ((ARMCC::CondCodes)CC) { 3822 default: llvm_unreachable("CMPZ must be either NE or EQ!"); 3823 case ARMCC::NE: 3824 CC = (unsigned)ARMCC::MI; 3825 break; 3826 case ARMCC::EQ: 3827 CC = (unsigned)ARMCC::PL; 3828 break; 3829 } 3830 } 3831 } 3832 3833 SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32); 3834 SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag }; 3835 SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, 3836 MVT::Glue, Ops); 3837 Chain = SDValue(ResNode, 0); 3838 if (N->getNumValues() == 2) { 3839 InFlag = SDValue(ResNode, 1); 3840 ReplaceUses(SDValue(N, 1), InFlag); 3841 } 3842 ReplaceUses(SDValue(N, 0), 3843 SDValue(Chain.getNode(), Chain.getResNo())); 3844 CurDAG->RemoveDeadNode(N); 3845 return; 3846 } 3847 3848 case ARMISD::CMPZ: { 3849 // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0) 3850 // This allows us to avoid materializing the expensive negative constant. 3851 // The CMPZ #0 is useless and will be peepholed away but we need to keep it 3852 // for its glue output. 3853 SDValue X = N->getOperand(0); 3854 auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode()); 3855 if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) { 3856 int64_t Addend = -C->getSExtValue(); 3857 3858 SDNode *Add = nullptr; 3859 // ADDS can be better than CMN if the immediate fits in a 3860 // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3. 3861 // Outside that range we can just use a CMN which is 32-bit but has a 3862 // 12-bit immediate range. 3863 if (Addend < 1<<8) { 3864 if (Subtarget->isThumb2()) { 3865 SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32), 3866 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 3867 CurDAG->getRegister(0, MVT::i32) }; 3868 Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops); 3869 } else { 3870 unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8; 3871 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X, 3872 CurDAG->getTargetConstant(Addend, dl, MVT::i32), 3873 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)}; 3874 Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops); 3875 } 3876 } 3877 if (Add) { 3878 SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)}; 3879 CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2); 3880 } 3881 } 3882 // Other cases are autogenerated. 3883 break; 3884 } 3885 3886 case ARMISD::CMOV: { 3887 SDValue InFlag = N->getOperand(4); 3888 3889 if (InFlag.getOpcode() == ARMISD::CMPZ) { 3890 bool SwitchEQNEToPLMI; 3891 SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI); 3892 3893 if (SwitchEQNEToPLMI) { 3894 SDValue ARMcc = N->getOperand(2); 3895 ARMCC::CondCodes CC = 3896 (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue(); 3897 3898 switch (CC) { 3899 default: llvm_unreachable("CMPZ must be either NE or EQ!"); 3900 case ARMCC::NE: 3901 CC = ARMCC::MI; 3902 break; 3903 case ARMCC::EQ: 3904 CC = ARMCC::PL; 3905 break; 3906 } 3907 SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32); 3908 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc, 3909 N->getOperand(3), N->getOperand(4)}; 3910 CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops); 3911 } 3912 3913 } 3914 // Other cases are autogenerated. 3915 break; 3916 } 3917 3918 case ARMISD::VZIP: { 3919 unsigned Opc = 0; 3920 EVT VT = N->getValueType(0); 3921 switch (VT.getSimpleVT().SimpleTy) { 3922 default: return; 3923 case MVT::v8i8: Opc = ARM::VZIPd8; break; 3924 case MVT::v4f16: 3925 case MVT::v4i16: Opc = ARM::VZIPd16; break; 3926 case MVT::v2f32: 3927 // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 3928 case MVT::v2i32: Opc = ARM::VTRNd32; break; 3929 case MVT::v16i8: Opc = ARM::VZIPq8; break; 3930 case MVT::v8f16: 3931 case MVT::v8i16: Opc = ARM::VZIPq16; break; 3932 case MVT::v4f32: 3933 case MVT::v4i32: Opc = ARM::VZIPq32; break; 3934 } 3935 SDValue Pred = getAL(CurDAG, dl); 3936 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 3937 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; 3938 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops)); 3939 return; 3940 } 3941 case ARMISD::VUZP: { 3942 unsigned Opc = 0; 3943 EVT VT = N->getValueType(0); 3944 switch (VT.getSimpleVT().SimpleTy) { 3945 default: return; 3946 case MVT::v8i8: Opc = ARM::VUZPd8; break; 3947 case MVT::v4f16: 3948 case MVT::v4i16: Opc = ARM::VUZPd16; break; 3949 case MVT::v2f32: 3950 // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 3951 case MVT::v2i32: Opc = ARM::VTRNd32; break; 3952 case MVT::v16i8: Opc = ARM::VUZPq8; break; 3953 case MVT::v8f16: 3954 case MVT::v8i16: Opc = ARM::VUZPq16; break; 3955 case MVT::v4f32: 3956 case MVT::v4i32: Opc = ARM::VUZPq32; break; 3957 } 3958 SDValue Pred = getAL(CurDAG, dl); 3959 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 3960 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; 3961 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops)); 3962 return; 3963 } 3964 case ARMISD::VTRN: { 3965 unsigned Opc = 0; 3966 EVT VT = N->getValueType(0); 3967 switch (VT.getSimpleVT().SimpleTy) { 3968 default: return; 3969 case MVT::v8i8: Opc = ARM::VTRNd8; break; 3970 case MVT::v4f16: 3971 case MVT::v4i16: Opc = ARM::VTRNd16; break; 3972 case MVT::v2f32: 3973 case MVT::v2i32: Opc = ARM::VTRNd32; break; 3974 case MVT::v16i8: Opc = ARM::VTRNq8; break; 3975 case MVT::v8f16: 3976 case MVT::v8i16: Opc = ARM::VTRNq16; break; 3977 case MVT::v4f32: 3978 case MVT::v4i32: Opc = ARM::VTRNq32; break; 3979 } 3980 SDValue Pred = getAL(CurDAG, dl); 3981 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 3982 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; 3983 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops)); 3984 return; 3985 } 3986 case ARMISD::BUILD_VECTOR: { 3987 EVT VecVT = N->getValueType(0); 3988 EVT EltVT = VecVT.getVectorElementType(); 3989 unsigned NumElts = VecVT.getVectorNumElements(); 3990 if (EltVT == MVT::f64) { 3991 assert(NumElts == 2 && "unexpected type for BUILD_VECTOR"); 3992 ReplaceNode( 3993 N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1))); 3994 return; 3995 } 3996 assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR"); 3997 if (NumElts == 2) { 3998 ReplaceNode( 3999 N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1))); 4000 return; 4001 } 4002 assert(NumElts == 4 && "unexpected type for BUILD_VECTOR"); 4003 ReplaceNode(N, 4004 createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1), 4005 N->getOperand(2), N->getOperand(3))); 4006 return; 4007 } 4008 4009 case ARMISD::VLD1DUP: { 4010 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16, 4011 ARM::VLD1DUPd32 }; 4012 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16, 4013 ARM::VLD1DUPq32 }; 4014 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 1, DOpcodes, QOpcodes); 4015 return; 4016 } 4017 4018 case ARMISD::VLD2DUP: { 4019 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16, 4020 ARM::VLD2DUPd32 }; 4021 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 2, Opcodes); 4022 return; 4023 } 4024 4025 case ARMISD::VLD3DUP: { 4026 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo, 4027 ARM::VLD3DUPd16Pseudo, 4028 ARM::VLD3DUPd32Pseudo }; 4029 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 3, Opcodes); 4030 return; 4031 } 4032 4033 case ARMISD::VLD4DUP: { 4034 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo, 4035 ARM::VLD4DUPd16Pseudo, 4036 ARM::VLD4DUPd32Pseudo }; 4037 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 4, Opcodes); 4038 return; 4039 } 4040 4041 case ARMISD::VLD1DUP_UPD: { 4042 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed, 4043 ARM::VLD1DUPd16wb_fixed, 4044 ARM::VLD1DUPd32wb_fixed }; 4045 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed, 4046 ARM::VLD1DUPq16wb_fixed, 4047 ARM::VLD1DUPq32wb_fixed }; 4048 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 1, DOpcodes, QOpcodes); 4049 return; 4050 } 4051 4052 case ARMISD::VLD2DUP_UPD: { 4053 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed, 4054 ARM::VLD2DUPd16wb_fixed, 4055 ARM::VLD2DUPd32wb_fixed }; 4056 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 2, Opcodes); 4057 return; 4058 } 4059 4060 case ARMISD::VLD3DUP_UPD: { 4061 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD, 4062 ARM::VLD3DUPd16Pseudo_UPD, 4063 ARM::VLD3DUPd32Pseudo_UPD }; 4064 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 3, Opcodes); 4065 return; 4066 } 4067 4068 case ARMISD::VLD4DUP_UPD: { 4069 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD, 4070 ARM::VLD4DUPd16Pseudo_UPD, 4071 ARM::VLD4DUPd32Pseudo_UPD }; 4072 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 4, Opcodes); 4073 return; 4074 } 4075 4076 case ARMISD::VLD1_UPD: { 4077 static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed, 4078 ARM::VLD1d16wb_fixed, 4079 ARM::VLD1d32wb_fixed, 4080 ARM::VLD1d64wb_fixed }; 4081 static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed, 4082 ARM::VLD1q16wb_fixed, 4083 ARM::VLD1q32wb_fixed, 4084 ARM::VLD1q64wb_fixed }; 4085 SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr); 4086 return; 4087 } 4088 4089 case ARMISD::VLD2_UPD: { 4090 if (Subtarget->hasNEON()) { 4091 static const uint16_t DOpcodes[] = { 4092 ARM::VLD2d8wb_fixed, ARM::VLD2d16wb_fixed, ARM::VLD2d32wb_fixed, 4093 ARM::VLD1q64wb_fixed}; 4094 static const uint16_t QOpcodes[] = {ARM::VLD2q8PseudoWB_fixed, 4095 ARM::VLD2q16PseudoWB_fixed, 4096 ARM::VLD2q32PseudoWB_fixed}; 4097 SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr); 4098 } else { 4099 static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8, 4100 ARM::MVE_VLD21_8_wb}; 4101 static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16, 4102 ARM::MVE_VLD21_16_wb}; 4103 static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32, 4104 ARM::MVE_VLD21_32_wb}; 4105 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32}; 4106 SelectMVE_VLD(N, 2, Opcodes, true); 4107 } 4108 return; 4109 } 4110 4111 case ARMISD::VLD3_UPD: { 4112 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD, 4113 ARM::VLD3d16Pseudo_UPD, 4114 ARM::VLD3d32Pseudo_UPD, 4115 ARM::VLD1d64TPseudoWB_fixed}; 4116 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD, 4117 ARM::VLD3q16Pseudo_UPD, 4118 ARM::VLD3q32Pseudo_UPD }; 4119 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD, 4120 ARM::VLD3q16oddPseudo_UPD, 4121 ARM::VLD3q32oddPseudo_UPD }; 4122 SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 4123 return; 4124 } 4125 4126 case ARMISD::VLD4_UPD: { 4127 if (Subtarget->hasNEON()) { 4128 static const uint16_t DOpcodes[] = { 4129 ARM::VLD4d8Pseudo_UPD, ARM::VLD4d16Pseudo_UPD, ARM::VLD4d32Pseudo_UPD, 4130 ARM::VLD1d64QPseudoWB_fixed}; 4131 static const uint16_t QOpcodes0[] = {ARM::VLD4q8Pseudo_UPD, 4132 ARM::VLD4q16Pseudo_UPD, 4133 ARM::VLD4q32Pseudo_UPD}; 4134 static const uint16_t QOpcodes1[] = {ARM::VLD4q8oddPseudo_UPD, 4135 ARM::VLD4q16oddPseudo_UPD, 4136 ARM::VLD4q32oddPseudo_UPD}; 4137 SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 4138 } else { 4139 static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8, 4140 ARM::MVE_VLD42_8, 4141 ARM::MVE_VLD43_8_wb}; 4142 static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16, 4143 ARM::MVE_VLD42_16, 4144 ARM::MVE_VLD43_16_wb}; 4145 static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32, 4146 ARM::MVE_VLD42_32, 4147 ARM::MVE_VLD43_32_wb}; 4148 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32}; 4149 SelectMVE_VLD(N, 4, Opcodes, true); 4150 } 4151 return; 4152 } 4153 4154 case ARMISD::VLD2LN_UPD: { 4155 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD, 4156 ARM::VLD2LNd16Pseudo_UPD, 4157 ARM::VLD2LNd32Pseudo_UPD }; 4158 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD, 4159 ARM::VLD2LNq32Pseudo_UPD }; 4160 SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes); 4161 return; 4162 } 4163 4164 case ARMISD::VLD3LN_UPD: { 4165 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD, 4166 ARM::VLD3LNd16Pseudo_UPD, 4167 ARM::VLD3LNd32Pseudo_UPD }; 4168 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD, 4169 ARM::VLD3LNq32Pseudo_UPD }; 4170 SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes); 4171 return; 4172 } 4173 4174 case ARMISD::VLD4LN_UPD: { 4175 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD, 4176 ARM::VLD4LNd16Pseudo_UPD, 4177 ARM::VLD4LNd32Pseudo_UPD }; 4178 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD, 4179 ARM::VLD4LNq32Pseudo_UPD }; 4180 SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes); 4181 return; 4182 } 4183 4184 case ARMISD::VST1_UPD: { 4185 static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed, 4186 ARM::VST1d16wb_fixed, 4187 ARM::VST1d32wb_fixed, 4188 ARM::VST1d64wb_fixed }; 4189 static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed, 4190 ARM::VST1q16wb_fixed, 4191 ARM::VST1q32wb_fixed, 4192 ARM::VST1q64wb_fixed }; 4193 SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr); 4194 return; 4195 } 4196 4197 case ARMISD::VST2_UPD: { 4198 if (Subtarget->hasNEON()) { 4199 static const uint16_t DOpcodes[] = { 4200 ARM::VST2d8wb_fixed, ARM::VST2d16wb_fixed, ARM::VST2d32wb_fixed, 4201 ARM::VST1q64wb_fixed}; 4202 static const uint16_t QOpcodes[] = {ARM::VST2q8PseudoWB_fixed, 4203 ARM::VST2q16PseudoWB_fixed, 4204 ARM::VST2q32PseudoWB_fixed}; 4205 SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr); 4206 return; 4207 } 4208 break; 4209 } 4210 4211 case ARMISD::VST3_UPD: { 4212 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD, 4213 ARM::VST3d16Pseudo_UPD, 4214 ARM::VST3d32Pseudo_UPD, 4215 ARM::VST1d64TPseudoWB_fixed}; 4216 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD, 4217 ARM::VST3q16Pseudo_UPD, 4218 ARM::VST3q32Pseudo_UPD }; 4219 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD, 4220 ARM::VST3q16oddPseudo_UPD, 4221 ARM::VST3q32oddPseudo_UPD }; 4222 SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 4223 return; 4224 } 4225 4226 case ARMISD::VST4_UPD: { 4227 if (Subtarget->hasNEON()) { 4228 static const uint16_t DOpcodes[] = { 4229 ARM::VST4d8Pseudo_UPD, ARM::VST4d16Pseudo_UPD, ARM::VST4d32Pseudo_UPD, 4230 ARM::VST1d64QPseudoWB_fixed}; 4231 static const uint16_t QOpcodes0[] = {ARM::VST4q8Pseudo_UPD, 4232 ARM::VST4q16Pseudo_UPD, 4233 ARM::VST4q32Pseudo_UPD}; 4234 static const uint16_t QOpcodes1[] = {ARM::VST4q8oddPseudo_UPD, 4235 ARM::VST4q16oddPseudo_UPD, 4236 ARM::VST4q32oddPseudo_UPD}; 4237 SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 4238 return; 4239 } 4240 break; 4241 } 4242 4243 case ARMISD::VST2LN_UPD: { 4244 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD, 4245 ARM::VST2LNd16Pseudo_UPD, 4246 ARM::VST2LNd32Pseudo_UPD }; 4247 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD, 4248 ARM::VST2LNq32Pseudo_UPD }; 4249 SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes); 4250 return; 4251 } 4252 4253 case ARMISD::VST3LN_UPD: { 4254 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD, 4255 ARM::VST3LNd16Pseudo_UPD, 4256 ARM::VST3LNd32Pseudo_UPD }; 4257 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD, 4258 ARM::VST3LNq32Pseudo_UPD }; 4259 SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes); 4260 return; 4261 } 4262 4263 case ARMISD::VST4LN_UPD: { 4264 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD, 4265 ARM::VST4LNd16Pseudo_UPD, 4266 ARM::VST4LNd32Pseudo_UPD }; 4267 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD, 4268 ARM::VST4LNq32Pseudo_UPD }; 4269 SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes); 4270 return; 4271 } 4272 4273 case ISD::INTRINSIC_VOID: 4274 case ISD::INTRINSIC_W_CHAIN: { 4275 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 4276 switch (IntNo) { 4277 default: 4278 break; 4279 4280 case Intrinsic::arm_mrrc: 4281 case Intrinsic::arm_mrrc2: { 4282 SDLoc dl(N); 4283 SDValue Chain = N->getOperand(0); 4284 unsigned Opc; 4285 4286 if (Subtarget->isThumb()) 4287 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2); 4288 else 4289 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2); 4290 4291 SmallVector<SDValue, 5> Ops; 4292 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(), dl)); /* coproc */ 4293 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(), dl)); /* opc */ 4294 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(), dl)); /* CRm */ 4295 4296 // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded 4297 // instruction will always be '1111' but it is possible in assembly language to specify 4298 // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction. 4299 if (Opc != ARM::MRRC2) { 4300 Ops.push_back(getAL(CurDAG, dl)); 4301 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 4302 } 4303 4304 Ops.push_back(Chain); 4305 4306 // Writes to two registers. 4307 const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other}; 4308 4309 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops)); 4310 return; 4311 } 4312 case Intrinsic::arm_ldaexd: 4313 case Intrinsic::arm_ldrexd: { 4314 SDLoc dl(N); 4315 SDValue Chain = N->getOperand(0); 4316 SDValue MemAddr = N->getOperand(2); 4317 bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps(); 4318 4319 bool IsAcquire = IntNo == Intrinsic::arm_ldaexd; 4320 unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD) 4321 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD); 4322 4323 // arm_ldrexd returns a i64 value in {i32, i32} 4324 std::vector<EVT> ResTys; 4325 if (isThumb) { 4326 ResTys.push_back(MVT::i32); 4327 ResTys.push_back(MVT::i32); 4328 } else 4329 ResTys.push_back(MVT::Untyped); 4330 ResTys.push_back(MVT::Other); 4331 4332 // Place arguments in the right order. 4333 SDValue Ops[] = {MemAddr, getAL(CurDAG, dl), 4334 CurDAG->getRegister(0, MVT::i32), Chain}; 4335 SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops); 4336 // Transfer memoperands. 4337 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 4338 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp}); 4339 4340 // Remap uses. 4341 SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1); 4342 if (!SDValue(N, 0).use_empty()) { 4343 SDValue Result; 4344 if (isThumb) 4345 Result = SDValue(Ld, 0); 4346 else { 4347 SDValue SubRegIdx = 4348 CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32); 4349 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 4350 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx); 4351 Result = SDValue(ResNode,0); 4352 } 4353 ReplaceUses(SDValue(N, 0), Result); 4354 } 4355 if (!SDValue(N, 1).use_empty()) { 4356 SDValue Result; 4357 if (isThumb) 4358 Result = SDValue(Ld, 1); 4359 else { 4360 SDValue SubRegIdx = 4361 CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32); 4362 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 4363 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx); 4364 Result = SDValue(ResNode,0); 4365 } 4366 ReplaceUses(SDValue(N, 1), Result); 4367 } 4368 ReplaceUses(SDValue(N, 2), OutChain); 4369 CurDAG->RemoveDeadNode(N); 4370 return; 4371 } 4372 case Intrinsic::arm_stlexd: 4373 case Intrinsic::arm_strexd: { 4374 SDLoc dl(N); 4375 SDValue Chain = N->getOperand(0); 4376 SDValue Val0 = N->getOperand(2); 4377 SDValue Val1 = N->getOperand(3); 4378 SDValue MemAddr = N->getOperand(4); 4379 4380 // Store exclusive double return a i32 value which is the return status 4381 // of the issued store. 4382 const EVT ResTys[] = {MVT::i32, MVT::Other}; 4383 4384 bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2(); 4385 // Place arguments in the right order. 4386 SmallVector<SDValue, 7> Ops; 4387 if (isThumb) { 4388 Ops.push_back(Val0); 4389 Ops.push_back(Val1); 4390 } else 4391 // arm_strexd uses GPRPair. 4392 Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0)); 4393 Ops.push_back(MemAddr); 4394 Ops.push_back(getAL(CurDAG, dl)); 4395 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 4396 Ops.push_back(Chain); 4397 4398 bool IsRelease = IntNo == Intrinsic::arm_stlexd; 4399 unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD) 4400 : (IsRelease ? ARM::STLEXD : ARM::STREXD); 4401 4402 SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops); 4403 // Transfer memoperands. 4404 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 4405 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 4406 4407 ReplaceNode(N, St); 4408 return; 4409 } 4410 4411 case Intrinsic::arm_neon_vld1: { 4412 static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16, 4413 ARM::VLD1d32, ARM::VLD1d64 }; 4414 static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16, 4415 ARM::VLD1q32, ARM::VLD1q64}; 4416 SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr); 4417 return; 4418 } 4419 4420 case Intrinsic::arm_neon_vld1x2: { 4421 static const uint16_t DOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16, 4422 ARM::VLD1q32, ARM::VLD1q64 }; 4423 static const uint16_t QOpcodes[] = { ARM::VLD1d8QPseudo, 4424 ARM::VLD1d16QPseudo, 4425 ARM::VLD1d32QPseudo, 4426 ARM::VLD1d64QPseudo }; 4427 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr); 4428 return; 4429 } 4430 4431 case Intrinsic::arm_neon_vld1x3: { 4432 static const uint16_t DOpcodes[] = { ARM::VLD1d8TPseudo, 4433 ARM::VLD1d16TPseudo, 4434 ARM::VLD1d32TPseudo, 4435 ARM::VLD1d64TPseudo }; 4436 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowTPseudo_UPD, 4437 ARM::VLD1q16LowTPseudo_UPD, 4438 ARM::VLD1q32LowTPseudo_UPD, 4439 ARM::VLD1q64LowTPseudo_UPD }; 4440 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighTPseudo, 4441 ARM::VLD1q16HighTPseudo, 4442 ARM::VLD1q32HighTPseudo, 4443 ARM::VLD1q64HighTPseudo }; 4444 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 4445 return; 4446 } 4447 4448 case Intrinsic::arm_neon_vld1x4: { 4449 static const uint16_t DOpcodes[] = { ARM::VLD1d8QPseudo, 4450 ARM::VLD1d16QPseudo, 4451 ARM::VLD1d32QPseudo, 4452 ARM::VLD1d64QPseudo }; 4453 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowQPseudo_UPD, 4454 ARM::VLD1q16LowQPseudo_UPD, 4455 ARM::VLD1q32LowQPseudo_UPD, 4456 ARM::VLD1q64LowQPseudo_UPD }; 4457 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighQPseudo, 4458 ARM::VLD1q16HighQPseudo, 4459 ARM::VLD1q32HighQPseudo, 4460 ARM::VLD1q64HighQPseudo }; 4461 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 4462 return; 4463 } 4464 4465 case Intrinsic::arm_neon_vld2: { 4466 static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16, 4467 ARM::VLD2d32, ARM::VLD1q64 }; 4468 static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo, 4469 ARM::VLD2q32Pseudo }; 4470 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr); 4471 return; 4472 } 4473 4474 case Intrinsic::arm_neon_vld3: { 4475 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo, 4476 ARM::VLD3d16Pseudo, 4477 ARM::VLD3d32Pseudo, 4478 ARM::VLD1d64TPseudo }; 4479 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD, 4480 ARM::VLD3q16Pseudo_UPD, 4481 ARM::VLD3q32Pseudo_UPD }; 4482 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo, 4483 ARM::VLD3q16oddPseudo, 4484 ARM::VLD3q32oddPseudo }; 4485 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 4486 return; 4487 } 4488 4489 case Intrinsic::arm_neon_vld4: { 4490 static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo, 4491 ARM::VLD4d16Pseudo, 4492 ARM::VLD4d32Pseudo, 4493 ARM::VLD1d64QPseudo }; 4494 static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD, 4495 ARM::VLD4q16Pseudo_UPD, 4496 ARM::VLD4q32Pseudo_UPD }; 4497 static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo, 4498 ARM::VLD4q16oddPseudo, 4499 ARM::VLD4q32oddPseudo }; 4500 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 4501 return; 4502 } 4503 4504 case Intrinsic::arm_neon_vld2dup: { 4505 static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16, 4506 ARM::VLD2DUPd32, ARM::VLD1q64 }; 4507 static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo, 4508 ARM::VLD2DUPq16EvenPseudo, 4509 ARM::VLD2DUPq32EvenPseudo }; 4510 static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudo, 4511 ARM::VLD2DUPq16OddPseudo, 4512 ARM::VLD2DUPq32OddPseudo }; 4513 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 2, 4514 DOpcodes, QOpcodes0, QOpcodes1); 4515 return; 4516 } 4517 4518 case Intrinsic::arm_neon_vld3dup: { 4519 static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo, 4520 ARM::VLD3DUPd16Pseudo, 4521 ARM::VLD3DUPd32Pseudo, 4522 ARM::VLD1d64TPseudo }; 4523 static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo, 4524 ARM::VLD3DUPq16EvenPseudo, 4525 ARM::VLD3DUPq32EvenPseudo }; 4526 static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo, 4527 ARM::VLD3DUPq16OddPseudo, 4528 ARM::VLD3DUPq32OddPseudo }; 4529 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 3, 4530 DOpcodes, QOpcodes0, QOpcodes1); 4531 return; 4532 } 4533 4534 case Intrinsic::arm_neon_vld4dup: { 4535 static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo, 4536 ARM::VLD4DUPd16Pseudo, 4537 ARM::VLD4DUPd32Pseudo, 4538 ARM::VLD1d64QPseudo }; 4539 static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo, 4540 ARM::VLD4DUPq16EvenPseudo, 4541 ARM::VLD4DUPq32EvenPseudo }; 4542 static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo, 4543 ARM::VLD4DUPq16OddPseudo, 4544 ARM::VLD4DUPq32OddPseudo }; 4545 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 4, 4546 DOpcodes, QOpcodes0, QOpcodes1); 4547 return; 4548 } 4549 4550 case Intrinsic::arm_neon_vld2lane: { 4551 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo, 4552 ARM::VLD2LNd16Pseudo, 4553 ARM::VLD2LNd32Pseudo }; 4554 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo, 4555 ARM::VLD2LNq32Pseudo }; 4556 SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes); 4557 return; 4558 } 4559 4560 case Intrinsic::arm_neon_vld3lane: { 4561 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo, 4562 ARM::VLD3LNd16Pseudo, 4563 ARM::VLD3LNd32Pseudo }; 4564 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo, 4565 ARM::VLD3LNq32Pseudo }; 4566 SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes); 4567 return; 4568 } 4569 4570 case Intrinsic::arm_neon_vld4lane: { 4571 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo, 4572 ARM::VLD4LNd16Pseudo, 4573 ARM::VLD4LNd32Pseudo }; 4574 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo, 4575 ARM::VLD4LNq32Pseudo }; 4576 SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes); 4577 return; 4578 } 4579 4580 case Intrinsic::arm_neon_vst1: { 4581 static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16, 4582 ARM::VST1d32, ARM::VST1d64 }; 4583 static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16, 4584 ARM::VST1q32, ARM::VST1q64 }; 4585 SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr); 4586 return; 4587 } 4588 4589 case Intrinsic::arm_neon_vst1x2: { 4590 static const uint16_t DOpcodes[] = { ARM::VST1q8, ARM::VST1q16, 4591 ARM::VST1q32, ARM::VST1q64 }; 4592 static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudo, 4593 ARM::VST1d16QPseudo, 4594 ARM::VST1d32QPseudo, 4595 ARM::VST1d64QPseudo }; 4596 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr); 4597 return; 4598 } 4599 4600 case Intrinsic::arm_neon_vst1x3: { 4601 static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudo, 4602 ARM::VST1d16TPseudo, 4603 ARM::VST1d32TPseudo, 4604 ARM::VST1d64TPseudo }; 4605 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD, 4606 ARM::VST1q16LowTPseudo_UPD, 4607 ARM::VST1q32LowTPseudo_UPD, 4608 ARM::VST1q64LowTPseudo_UPD }; 4609 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo, 4610 ARM::VST1q16HighTPseudo, 4611 ARM::VST1q32HighTPseudo, 4612 ARM::VST1q64HighTPseudo }; 4613 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 4614 return; 4615 } 4616 4617 case Intrinsic::arm_neon_vst1x4: { 4618 static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudo, 4619 ARM::VST1d16QPseudo, 4620 ARM::VST1d32QPseudo, 4621 ARM::VST1d64QPseudo }; 4622 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD, 4623 ARM::VST1q16LowQPseudo_UPD, 4624 ARM::VST1q32LowQPseudo_UPD, 4625 ARM::VST1q64LowQPseudo_UPD }; 4626 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo, 4627 ARM::VST1q16HighQPseudo, 4628 ARM::VST1q32HighQPseudo, 4629 ARM::VST1q64HighQPseudo }; 4630 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 4631 return; 4632 } 4633 4634 case Intrinsic::arm_neon_vst2: { 4635 static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16, 4636 ARM::VST2d32, ARM::VST1q64 }; 4637 static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo, 4638 ARM::VST2q32Pseudo }; 4639 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr); 4640 return; 4641 } 4642 4643 case Intrinsic::arm_neon_vst3: { 4644 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo, 4645 ARM::VST3d16Pseudo, 4646 ARM::VST3d32Pseudo, 4647 ARM::VST1d64TPseudo }; 4648 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD, 4649 ARM::VST3q16Pseudo_UPD, 4650 ARM::VST3q32Pseudo_UPD }; 4651 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo, 4652 ARM::VST3q16oddPseudo, 4653 ARM::VST3q32oddPseudo }; 4654 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 4655 return; 4656 } 4657 4658 case Intrinsic::arm_neon_vst4: { 4659 static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo, 4660 ARM::VST4d16Pseudo, 4661 ARM::VST4d32Pseudo, 4662 ARM::VST1d64QPseudo }; 4663 static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD, 4664 ARM::VST4q16Pseudo_UPD, 4665 ARM::VST4q32Pseudo_UPD }; 4666 static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo, 4667 ARM::VST4q16oddPseudo, 4668 ARM::VST4q32oddPseudo }; 4669 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 4670 return; 4671 } 4672 4673 case Intrinsic::arm_neon_vst2lane: { 4674 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo, 4675 ARM::VST2LNd16Pseudo, 4676 ARM::VST2LNd32Pseudo }; 4677 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo, 4678 ARM::VST2LNq32Pseudo }; 4679 SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes); 4680 return; 4681 } 4682 4683 case Intrinsic::arm_neon_vst3lane: { 4684 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo, 4685 ARM::VST3LNd16Pseudo, 4686 ARM::VST3LNd32Pseudo }; 4687 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo, 4688 ARM::VST3LNq32Pseudo }; 4689 SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes); 4690 return; 4691 } 4692 4693 case Intrinsic::arm_neon_vst4lane: { 4694 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo, 4695 ARM::VST4LNd16Pseudo, 4696 ARM::VST4LNd32Pseudo }; 4697 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo, 4698 ARM::VST4LNq32Pseudo }; 4699 SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes); 4700 return; 4701 } 4702 4703 case Intrinsic::arm_mve_vldr_gather_base_wb: 4704 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated: { 4705 static const uint16_t Opcodes[] = {ARM::MVE_VLDRWU32_qi_pre, 4706 ARM::MVE_VLDRDU64_qi_pre}; 4707 SelectMVE_WB(N, Opcodes, 4708 IntNo == Intrinsic::arm_mve_vldr_gather_base_wb_predicated); 4709 return; 4710 } 4711 4712 case Intrinsic::arm_mve_vld2q: { 4713 static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8, ARM::MVE_VLD21_8}; 4714 static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16, 4715 ARM::MVE_VLD21_16}; 4716 static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32, 4717 ARM::MVE_VLD21_32}; 4718 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32}; 4719 SelectMVE_VLD(N, 2, Opcodes, false); 4720 return; 4721 } 4722 4723 case Intrinsic::arm_mve_vld4q: { 4724 static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8, 4725 ARM::MVE_VLD42_8, ARM::MVE_VLD43_8}; 4726 static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16, 4727 ARM::MVE_VLD42_16, 4728 ARM::MVE_VLD43_16}; 4729 static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32, 4730 ARM::MVE_VLD42_32, 4731 ARM::MVE_VLD43_32}; 4732 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32}; 4733 SelectMVE_VLD(N, 4, Opcodes, false); 4734 return; 4735 } 4736 } 4737 break; 4738 } 4739 4740 case ISD::INTRINSIC_WO_CHAIN: { 4741 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 4742 switch (IntNo) { 4743 default: 4744 break; 4745 4746 // Scalar f32 -> bf16 4747 case Intrinsic::arm_neon_vcvtbfp2bf: { 4748 SDLoc dl(N); 4749 const SDValue &Src = N->getOperand(1); 4750 llvm::EVT DestTy = N->getValueType(0); 4751 SDValue Pred = getAL(CurDAG, dl); 4752 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 4753 SDValue Ops[] = { Src, Src, Pred, Reg0 }; 4754 CurDAG->SelectNodeTo(N, ARM::BF16_VCVTB, DestTy, Ops); 4755 return; 4756 } 4757 4758 // Vector v4f32 -> v4bf16 4759 case Intrinsic::arm_neon_vcvtfp2bf: { 4760 SDLoc dl(N); 4761 const SDValue &Src = N->getOperand(1); 4762 SDValue Pred = getAL(CurDAG, dl); 4763 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 4764 SDValue Ops[] = { Src, Pred, Reg0 }; 4765 CurDAG->SelectNodeTo(N, ARM::BF16_VCVT, MVT::v4bf16, Ops); 4766 return; 4767 } 4768 4769 case Intrinsic::arm_mve_urshrl: 4770 SelectMVE_LongShift(N, ARM::MVE_URSHRL, true, false); 4771 return; 4772 case Intrinsic::arm_mve_uqshll: 4773 SelectMVE_LongShift(N, ARM::MVE_UQSHLL, true, false); 4774 return; 4775 case Intrinsic::arm_mve_srshrl: 4776 SelectMVE_LongShift(N, ARM::MVE_SRSHRL, true, false); 4777 return; 4778 case Intrinsic::arm_mve_sqshll: 4779 SelectMVE_LongShift(N, ARM::MVE_SQSHLL, true, false); 4780 return; 4781 case Intrinsic::arm_mve_uqrshll: 4782 SelectMVE_LongShift(N, ARM::MVE_UQRSHLL, false, true); 4783 return; 4784 case Intrinsic::arm_mve_sqrshrl: 4785 SelectMVE_LongShift(N, ARM::MVE_SQRSHRL, false, true); 4786 return; 4787 4788 case Intrinsic::arm_mve_vadc: 4789 case Intrinsic::arm_mve_vadc_predicated: 4790 SelectMVE_VADCSBC(N, ARM::MVE_VADC, ARM::MVE_VADCI, true, 4791 IntNo == Intrinsic::arm_mve_vadc_predicated); 4792 return; 4793 case Intrinsic::arm_mve_vsbc: 4794 case Intrinsic::arm_mve_vsbc_predicated: 4795 SelectMVE_VADCSBC(N, ARM::MVE_VSBC, ARM::MVE_VSBCI, true, 4796 IntNo == Intrinsic::arm_mve_vsbc_predicated); 4797 return; 4798 case Intrinsic::arm_mve_vshlc: 4799 case Intrinsic::arm_mve_vshlc_predicated: 4800 SelectMVE_VSHLC(N, IntNo == Intrinsic::arm_mve_vshlc_predicated); 4801 return; 4802 4803 case Intrinsic::arm_mve_vmlldava: 4804 case Intrinsic::arm_mve_vmlldava_predicated: { 4805 static const uint16_t OpcodesU[] = { 4806 ARM::MVE_VMLALDAVu16, ARM::MVE_VMLALDAVu32, 4807 ARM::MVE_VMLALDAVau16, ARM::MVE_VMLALDAVau32, 4808 }; 4809 static const uint16_t OpcodesS[] = { 4810 ARM::MVE_VMLALDAVs16, ARM::MVE_VMLALDAVs32, 4811 ARM::MVE_VMLALDAVas16, ARM::MVE_VMLALDAVas32, 4812 ARM::MVE_VMLALDAVxs16, ARM::MVE_VMLALDAVxs32, 4813 ARM::MVE_VMLALDAVaxs16, ARM::MVE_VMLALDAVaxs32, 4814 ARM::MVE_VMLSLDAVs16, ARM::MVE_VMLSLDAVs32, 4815 ARM::MVE_VMLSLDAVas16, ARM::MVE_VMLSLDAVas32, 4816 ARM::MVE_VMLSLDAVxs16, ARM::MVE_VMLSLDAVxs32, 4817 ARM::MVE_VMLSLDAVaxs16, ARM::MVE_VMLSLDAVaxs32, 4818 }; 4819 SelectMVE_VMLLDAV(N, IntNo == Intrinsic::arm_mve_vmlldava_predicated, 4820 OpcodesS, OpcodesU); 4821 return; 4822 } 4823 4824 case Intrinsic::arm_mve_vrmlldavha: 4825 case Intrinsic::arm_mve_vrmlldavha_predicated: { 4826 static const uint16_t OpcodesU[] = { 4827 ARM::MVE_VRMLALDAVHu32, ARM::MVE_VRMLALDAVHau32, 4828 }; 4829 static const uint16_t OpcodesS[] = { 4830 ARM::MVE_VRMLALDAVHs32, ARM::MVE_VRMLALDAVHas32, 4831 ARM::MVE_VRMLALDAVHxs32, ARM::MVE_VRMLALDAVHaxs32, 4832 ARM::MVE_VRMLSLDAVHs32, ARM::MVE_VRMLSLDAVHas32, 4833 ARM::MVE_VRMLSLDAVHxs32, ARM::MVE_VRMLSLDAVHaxs32, 4834 }; 4835 SelectMVE_VRMLLDAVH(N, IntNo == Intrinsic::arm_mve_vrmlldavha_predicated, 4836 OpcodesS, OpcodesU); 4837 return; 4838 } 4839 4840 case Intrinsic::arm_mve_vidup: 4841 case Intrinsic::arm_mve_vidup_predicated: { 4842 static const uint16_t Opcodes[] = { 4843 ARM::MVE_VIDUPu8, ARM::MVE_VIDUPu16, ARM::MVE_VIDUPu32, 4844 }; 4845 SelectMVE_VxDUP(N, Opcodes, false, 4846 IntNo == Intrinsic::arm_mve_vidup_predicated); 4847 return; 4848 } 4849 4850 case Intrinsic::arm_mve_vddup: 4851 case Intrinsic::arm_mve_vddup_predicated: { 4852 static const uint16_t Opcodes[] = { 4853 ARM::MVE_VDDUPu8, ARM::MVE_VDDUPu16, ARM::MVE_VDDUPu32, 4854 }; 4855 SelectMVE_VxDUP(N, Opcodes, false, 4856 IntNo == Intrinsic::arm_mve_vddup_predicated); 4857 return; 4858 } 4859 4860 case Intrinsic::arm_mve_viwdup: 4861 case Intrinsic::arm_mve_viwdup_predicated: { 4862 static const uint16_t Opcodes[] = { 4863 ARM::MVE_VIWDUPu8, ARM::MVE_VIWDUPu16, ARM::MVE_VIWDUPu32, 4864 }; 4865 SelectMVE_VxDUP(N, Opcodes, true, 4866 IntNo == Intrinsic::arm_mve_viwdup_predicated); 4867 return; 4868 } 4869 4870 case Intrinsic::arm_mve_vdwdup: 4871 case Intrinsic::arm_mve_vdwdup_predicated: { 4872 static const uint16_t Opcodes[] = { 4873 ARM::MVE_VDWDUPu8, ARM::MVE_VDWDUPu16, ARM::MVE_VDWDUPu32, 4874 }; 4875 SelectMVE_VxDUP(N, Opcodes, true, 4876 IntNo == Intrinsic::arm_mve_vdwdup_predicated); 4877 return; 4878 } 4879 4880 case Intrinsic::arm_cde_cx1d: 4881 case Intrinsic::arm_cde_cx1da: 4882 case Intrinsic::arm_cde_cx2d: 4883 case Intrinsic::arm_cde_cx2da: 4884 case Intrinsic::arm_cde_cx3d: 4885 case Intrinsic::arm_cde_cx3da: { 4886 bool HasAccum = IntNo == Intrinsic::arm_cde_cx1da || 4887 IntNo == Intrinsic::arm_cde_cx2da || 4888 IntNo == Intrinsic::arm_cde_cx3da; 4889 size_t NumExtraOps; 4890 uint16_t Opcode; 4891 switch (IntNo) { 4892 case Intrinsic::arm_cde_cx1d: 4893 case Intrinsic::arm_cde_cx1da: 4894 NumExtraOps = 0; 4895 Opcode = HasAccum ? ARM::CDE_CX1DA : ARM::CDE_CX1D; 4896 break; 4897 case Intrinsic::arm_cde_cx2d: 4898 case Intrinsic::arm_cde_cx2da: 4899 NumExtraOps = 1; 4900 Opcode = HasAccum ? ARM::CDE_CX2DA : ARM::CDE_CX2D; 4901 break; 4902 case Intrinsic::arm_cde_cx3d: 4903 case Intrinsic::arm_cde_cx3da: 4904 NumExtraOps = 2; 4905 Opcode = HasAccum ? ARM::CDE_CX3DA : ARM::CDE_CX3D; 4906 break; 4907 default: 4908 llvm_unreachable("Unexpected opcode"); 4909 } 4910 SelectCDE_CXxD(N, Opcode, NumExtraOps, HasAccum); 4911 return; 4912 } 4913 } 4914 break; 4915 } 4916 4917 case ISD::ATOMIC_CMP_SWAP: 4918 SelectCMP_SWAP(N); 4919 return; 4920 } 4921 4922 SelectCode(N); 4923 } 4924 4925 // Inspect a register string of the form 4926 // cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or 4927 // cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string 4928 // and obtain the integer operands from them, adding these operands to the 4929 // provided vector. 4930 static void getIntOperandsFromRegisterString(StringRef RegString, 4931 SelectionDAG *CurDAG, 4932 const SDLoc &DL, 4933 std::vector<SDValue> &Ops) { 4934 SmallVector<StringRef, 5> Fields; 4935 RegString.split(Fields, ':'); 4936 4937 if (Fields.size() > 1) { 4938 bool AllIntFields = true; 4939 4940 for (StringRef Field : Fields) { 4941 // Need to trim out leading 'cp' characters and get the integer field. 4942 unsigned IntField; 4943 AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField); 4944 Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32)); 4945 } 4946 4947 assert(AllIntFields && 4948 "Unexpected non-integer value in special register string."); 4949 } 4950 } 4951 4952 // Maps a Banked Register string to its mask value. The mask value returned is 4953 // for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register 4954 // mask operand, which expresses which register is to be used, e.g. r8, and in 4955 // which mode it is to be used, e.g. usr. Returns -1 to signify that the string 4956 // was invalid. 4957 static inline int getBankedRegisterMask(StringRef RegString) { 4958 auto TheReg = ARMBankedReg::lookupBankedRegByName(RegString.lower()); 4959 if (!TheReg) 4960 return -1; 4961 return TheReg->Encoding; 4962 } 4963 4964 // The flags here are common to those allowed for apsr in the A class cores and 4965 // those allowed for the special registers in the M class cores. Returns a 4966 // value representing which flags were present, -1 if invalid. 4967 static inline int getMClassFlagsMask(StringRef Flags) { 4968 return StringSwitch<int>(Flags) 4969 .Case("", 0x2) // no flags means nzcvq for psr registers, and 0x2 is 4970 // correct when flags are not permitted 4971 .Case("g", 0x1) 4972 .Case("nzcvq", 0x2) 4973 .Case("nzcvqg", 0x3) 4974 .Default(-1); 4975 } 4976 4977 // Maps MClass special registers string to its value for use in the 4978 // t2MRS_M/t2MSR_M instruction nodes as the SYSm value operand. 4979 // Returns -1 to signify that the string was invalid. 4980 static int getMClassRegisterMask(StringRef Reg, const ARMSubtarget *Subtarget) { 4981 auto TheReg = ARMSysReg::lookupMClassSysRegByName(Reg); 4982 const FeatureBitset &FeatureBits = Subtarget->getFeatureBits(); 4983 if (!TheReg || !TheReg->hasRequiredFeatures(FeatureBits)) 4984 return -1; 4985 return (int)(TheReg->Encoding & 0xFFF); // SYSm value 4986 } 4987 4988 static int getARClassRegisterMask(StringRef Reg, StringRef Flags) { 4989 // The mask operand contains the special register (R Bit) in bit 4, whether 4990 // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and 4991 // bits 3-0 contains the fields to be accessed in the special register, set by 4992 // the flags provided with the register. 4993 int Mask = 0; 4994 if (Reg == "apsr") { 4995 // The flags permitted for apsr are the same flags that are allowed in 4996 // M class registers. We get the flag value and then shift the flags into 4997 // the correct place to combine with the mask. 4998 Mask = getMClassFlagsMask(Flags); 4999 if (Mask == -1) 5000 return -1; 5001 return Mask << 2; 5002 } 5003 5004 if (Reg != "cpsr" && Reg != "spsr") { 5005 return -1; 5006 } 5007 5008 // This is the same as if the flags were "fc" 5009 if (Flags.empty() || Flags == "all") 5010 return Mask | 0x9; 5011 5012 // Inspect the supplied flags string and set the bits in the mask for 5013 // the relevant and valid flags allowed for cpsr and spsr. 5014 for (char Flag : Flags) { 5015 int FlagVal; 5016 switch (Flag) { 5017 case 'c': 5018 FlagVal = 0x1; 5019 break; 5020 case 'x': 5021 FlagVal = 0x2; 5022 break; 5023 case 's': 5024 FlagVal = 0x4; 5025 break; 5026 case 'f': 5027 FlagVal = 0x8; 5028 break; 5029 default: 5030 FlagVal = 0; 5031 } 5032 5033 // This avoids allowing strings where the same flag bit appears twice. 5034 if (!FlagVal || (Mask & FlagVal)) 5035 return -1; 5036 Mask |= FlagVal; 5037 } 5038 5039 // If the register is spsr then we need to set the R bit. 5040 if (Reg == "spsr") 5041 Mask |= 0x10; 5042 5043 return Mask; 5044 } 5045 5046 // Lower the read_register intrinsic to ARM specific DAG nodes 5047 // using the supplied metadata string to select the instruction node to use 5048 // and the registers/masks to construct as operands for the node. 5049 bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){ 5050 const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1)); 5051 const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0)); 5052 bool IsThumb2 = Subtarget->isThumb2(); 5053 SDLoc DL(N); 5054 5055 std::vector<SDValue> Ops; 5056 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops); 5057 5058 if (!Ops.empty()) { 5059 // If the special register string was constructed of fields (as defined 5060 // in the ACLE) then need to lower to MRC node (32 bit) or 5061 // MRRC node(64 bit), we can make the distinction based on the number of 5062 // operands we have. 5063 unsigned Opcode; 5064 SmallVector<EVT, 3> ResTypes; 5065 if (Ops.size() == 5){ 5066 Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC; 5067 ResTypes.append({ MVT::i32, MVT::Other }); 5068 } else { 5069 assert(Ops.size() == 3 && 5070 "Invalid number of fields in special register string."); 5071 Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC; 5072 ResTypes.append({ MVT::i32, MVT::i32, MVT::Other }); 5073 } 5074 5075 Ops.push_back(getAL(CurDAG, DL)); 5076 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 5077 Ops.push_back(N->getOperand(0)); 5078 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops)); 5079 return true; 5080 } 5081 5082 std::string SpecialReg = RegString->getString().lower(); 5083 5084 int BankedReg = getBankedRegisterMask(SpecialReg); 5085 if (BankedReg != -1) { 5086 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), 5087 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5088 N->getOperand(0) }; 5089 ReplaceNode( 5090 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked, 5091 DL, MVT::i32, MVT::Other, Ops)); 5092 return true; 5093 } 5094 5095 // The VFP registers are read by creating SelectionDAG nodes with opcodes 5096 // corresponding to the register that is being read from. So we switch on the 5097 // string to find which opcode we need to use. 5098 unsigned Opcode = StringSwitch<unsigned>(SpecialReg) 5099 .Case("fpscr", ARM::VMRS) 5100 .Case("fpexc", ARM::VMRS_FPEXC) 5101 .Case("fpsid", ARM::VMRS_FPSID) 5102 .Case("mvfr0", ARM::VMRS_MVFR0) 5103 .Case("mvfr1", ARM::VMRS_MVFR1) 5104 .Case("mvfr2", ARM::VMRS_MVFR2) 5105 .Case("fpinst", ARM::VMRS_FPINST) 5106 .Case("fpinst2", ARM::VMRS_FPINST2) 5107 .Default(0); 5108 5109 // If an opcode was found then we can lower the read to a VFP instruction. 5110 if (Opcode) { 5111 if (!Subtarget->hasVFP2Base()) 5112 return false; 5113 if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8Base()) 5114 return false; 5115 5116 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5117 N->getOperand(0) }; 5118 ReplaceNode(N, 5119 CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops)); 5120 return true; 5121 } 5122 5123 // If the target is M Class then need to validate that the register string 5124 // is an acceptable value, so check that a mask can be constructed from the 5125 // string. 5126 if (Subtarget->isMClass()) { 5127 int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget); 5128 if (SYSmValue == -1) 5129 return false; 5130 5131 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32), 5132 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5133 N->getOperand(0) }; 5134 ReplaceNode( 5135 N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops)); 5136 return true; 5137 } 5138 5139 // Here we know the target is not M Class so we need to check if it is one 5140 // of the remaining possible values which are apsr, cpsr or spsr. 5141 if (SpecialReg == "apsr" || SpecialReg == "cpsr") { 5142 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5143 N->getOperand(0) }; 5144 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS, 5145 DL, MVT::i32, MVT::Other, Ops)); 5146 return true; 5147 } 5148 5149 if (SpecialReg == "spsr") { 5150 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5151 N->getOperand(0) }; 5152 ReplaceNode( 5153 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL, 5154 MVT::i32, MVT::Other, Ops)); 5155 return true; 5156 } 5157 5158 return false; 5159 } 5160 5161 // Lower the write_register intrinsic to ARM specific DAG nodes 5162 // using the supplied metadata string to select the instruction node to use 5163 // and the registers/masks to use in the nodes 5164 bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){ 5165 const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1)); 5166 const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0)); 5167 bool IsThumb2 = Subtarget->isThumb2(); 5168 SDLoc DL(N); 5169 5170 std::vector<SDValue> Ops; 5171 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops); 5172 5173 if (!Ops.empty()) { 5174 // If the special register string was constructed of fields (as defined 5175 // in the ACLE) then need to lower to MCR node (32 bit) or 5176 // MCRR node(64 bit), we can make the distinction based on the number of 5177 // operands we have. 5178 unsigned Opcode; 5179 if (Ops.size() == 5) { 5180 Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR; 5181 Ops.insert(Ops.begin()+2, N->getOperand(2)); 5182 } else { 5183 assert(Ops.size() == 3 && 5184 "Invalid number of fields in special register string."); 5185 Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR; 5186 SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) }; 5187 Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2); 5188 } 5189 5190 Ops.push_back(getAL(CurDAG, DL)); 5191 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 5192 Ops.push_back(N->getOperand(0)); 5193 5194 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops)); 5195 return true; 5196 } 5197 5198 std::string SpecialReg = RegString->getString().lower(); 5199 int BankedReg = getBankedRegisterMask(SpecialReg); 5200 if (BankedReg != -1) { 5201 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2), 5202 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5203 N->getOperand(0) }; 5204 ReplaceNode( 5205 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked, 5206 DL, MVT::Other, Ops)); 5207 return true; 5208 } 5209 5210 // The VFP registers are written to by creating SelectionDAG nodes with 5211 // opcodes corresponding to the register that is being written. So we switch 5212 // on the string to find which opcode we need to use. 5213 unsigned Opcode = StringSwitch<unsigned>(SpecialReg) 5214 .Case("fpscr", ARM::VMSR) 5215 .Case("fpexc", ARM::VMSR_FPEXC) 5216 .Case("fpsid", ARM::VMSR_FPSID) 5217 .Case("fpinst", ARM::VMSR_FPINST) 5218 .Case("fpinst2", ARM::VMSR_FPINST2) 5219 .Default(0); 5220 5221 if (Opcode) { 5222 if (!Subtarget->hasVFP2Base()) 5223 return false; 5224 Ops = { N->getOperand(2), getAL(CurDAG, DL), 5225 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) }; 5226 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops)); 5227 return true; 5228 } 5229 5230 std::pair<StringRef, StringRef> Fields; 5231 Fields = StringRef(SpecialReg).rsplit('_'); 5232 std::string Reg = Fields.first.str(); 5233 StringRef Flags = Fields.second; 5234 5235 // If the target was M Class then need to validate the special register value 5236 // and retrieve the mask for use in the instruction node. 5237 if (Subtarget->isMClass()) { 5238 int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget); 5239 if (SYSmValue == -1) 5240 return false; 5241 5242 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32), 5243 N->getOperand(2), getAL(CurDAG, DL), 5244 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) }; 5245 ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops)); 5246 return true; 5247 } 5248 5249 // We then check to see if a valid mask can be constructed for one of the 5250 // register string values permitted for the A and R class cores. These values 5251 // are apsr, spsr and cpsr; these are also valid on older cores. 5252 int Mask = getARClassRegisterMask(Reg, Flags); 5253 if (Mask != -1) { 5254 Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2), 5255 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5256 N->getOperand(0) }; 5257 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR, 5258 DL, MVT::Other, Ops)); 5259 return true; 5260 } 5261 5262 return false; 5263 } 5264 5265 bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){ 5266 std::vector<SDValue> AsmNodeOperands; 5267 unsigned Flag, Kind; 5268 bool Changed = false; 5269 unsigned NumOps = N->getNumOperands(); 5270 5271 // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint. 5272 // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require 5273 // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs 5274 // respectively. Since there is no constraint to explicitly specify a 5275 // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb, 5276 // the 64-bit data may be referred by H, Q, R modifiers, so we still pack 5277 // them into a GPRPair. 5278 5279 SDLoc dl(N); 5280 SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1) 5281 : SDValue(nullptr,0); 5282 5283 SmallVector<bool, 8> OpChanged; 5284 // Glue node will be appended late. 5285 for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) { 5286 SDValue op = N->getOperand(i); 5287 AsmNodeOperands.push_back(op); 5288 5289 if (i < InlineAsm::Op_FirstOperand) 5290 continue; 5291 5292 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) { 5293 Flag = C->getZExtValue(); 5294 Kind = InlineAsm::getKind(Flag); 5295 } 5296 else 5297 continue; 5298 5299 // Immediate operands to inline asm in the SelectionDAG are modeled with 5300 // two operands. The first is a constant of value InlineAsm::Kind_Imm, and 5301 // the second is a constant with the value of the immediate. If we get here 5302 // and we have a Kind_Imm, skip the next operand, and continue. 5303 if (Kind == InlineAsm::Kind_Imm) { 5304 SDValue op = N->getOperand(++i); 5305 AsmNodeOperands.push_back(op); 5306 continue; 5307 } 5308 5309 unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag); 5310 if (NumRegs) 5311 OpChanged.push_back(false); 5312 5313 unsigned DefIdx = 0; 5314 bool IsTiedToChangedOp = false; 5315 // If it's a use that is tied with a previous def, it has no 5316 // reg class constraint. 5317 if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx)) 5318 IsTiedToChangedOp = OpChanged[DefIdx]; 5319 5320 // Memory operands to inline asm in the SelectionDAG are modeled with two 5321 // operands: a constant of value InlineAsm::Kind_Mem followed by the input 5322 // operand. If we get here and we have a Kind_Mem, skip the next operand (so 5323 // it doesn't get misinterpreted), and continue. We do this here because 5324 // it's important to update the OpChanged array correctly before moving on. 5325 if (Kind == InlineAsm::Kind_Mem) { 5326 SDValue op = N->getOperand(++i); 5327 AsmNodeOperands.push_back(op); 5328 continue; 5329 } 5330 5331 if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef 5332 && Kind != InlineAsm::Kind_RegDefEarlyClobber) 5333 continue; 5334 5335 unsigned RC; 5336 bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC); 5337 if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID)) 5338 || NumRegs != 2) 5339 continue; 5340 5341 assert((i+2 < NumOps) && "Invalid number of operands in inline asm"); 5342 SDValue V0 = N->getOperand(i+1); 5343 SDValue V1 = N->getOperand(i+2); 5344 unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg(); 5345 unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg(); 5346 SDValue PairedReg; 5347 MachineRegisterInfo &MRI = MF->getRegInfo(); 5348 5349 if (Kind == InlineAsm::Kind_RegDef || 5350 Kind == InlineAsm::Kind_RegDefEarlyClobber) { 5351 // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to 5352 // the original GPRs. 5353 5354 Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass); 5355 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped); 5356 SDValue Chain = SDValue(N,0); 5357 5358 SDNode *GU = N->getGluedUser(); 5359 SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped, 5360 Chain.getValue(1)); 5361 5362 // Extract values from a GPRPair reg and copy to the original GPR reg. 5363 SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32, 5364 RegCopy); 5365 SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32, 5366 RegCopy); 5367 SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0, 5368 RegCopy.getValue(1)); 5369 SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1)); 5370 5371 // Update the original glue user. 5372 std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1); 5373 Ops.push_back(T1.getValue(1)); 5374 CurDAG->UpdateNodeOperands(GU, Ops); 5375 } 5376 else { 5377 // For Kind == InlineAsm::Kind_RegUse, we first copy two GPRs into a 5378 // GPRPair and then pass the GPRPair to the inline asm. 5379 SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain]; 5380 5381 // As REG_SEQ doesn't take RegisterSDNode, we copy them first. 5382 SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32, 5383 Chain.getValue(1)); 5384 SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32, 5385 T0.getValue(1)); 5386 SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0); 5387 5388 // Copy REG_SEQ into a GPRPair-typed VR and replace the original two 5389 // i32 VRs of inline asm with it. 5390 Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass); 5391 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped); 5392 Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1)); 5393 5394 AsmNodeOperands[InlineAsm::Op_InputChain] = Chain; 5395 Glue = Chain.getValue(1); 5396 } 5397 5398 Changed = true; 5399 5400 if(PairedReg.getNode()) { 5401 OpChanged[OpChanged.size() -1 ] = true; 5402 Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/); 5403 if (IsTiedToChangedOp) 5404 Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx); 5405 else 5406 Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID); 5407 // Replace the current flag. 5408 AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant( 5409 Flag, dl, MVT::i32); 5410 // Add the new register node and skip the original two GPRs. 5411 AsmNodeOperands.push_back(PairedReg); 5412 // Skip the next two GPRs. 5413 i += 2; 5414 } 5415 } 5416 5417 if (Glue.getNode()) 5418 AsmNodeOperands.push_back(Glue); 5419 if (!Changed) 5420 return false; 5421 5422 SDValue New = CurDAG->getNode(N->getOpcode(), SDLoc(N), 5423 CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands); 5424 New->setNodeId(-1); 5425 ReplaceNode(N, New.getNode()); 5426 return true; 5427 } 5428 5429 5430 bool ARMDAGToDAGISel:: 5431 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, 5432 std::vector<SDValue> &OutOps) { 5433 switch(ConstraintID) { 5434 default: 5435 llvm_unreachable("Unexpected asm memory constraint"); 5436 case InlineAsm::Constraint_m: 5437 case InlineAsm::Constraint_o: 5438 case InlineAsm::Constraint_Q: 5439 case InlineAsm::Constraint_Um: 5440 case InlineAsm::Constraint_Un: 5441 case InlineAsm::Constraint_Uq: 5442 case InlineAsm::Constraint_Us: 5443 case InlineAsm::Constraint_Ut: 5444 case InlineAsm::Constraint_Uv: 5445 case InlineAsm::Constraint_Uy: 5446 // Require the address to be in a register. That is safe for all ARM 5447 // variants and it is hard to do anything much smarter without knowing 5448 // how the operand is used. 5449 OutOps.push_back(Op); 5450 return false; 5451 } 5452 return true; 5453 } 5454 5455 /// createARMISelDag - This pass converts a legalized DAG into a 5456 /// ARM-specific DAG, ready for instruction scheduling. 5457 /// 5458 FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM, 5459 CodeGenOpt::Level OptLevel) { 5460 return new ARMDAGToDAGISel(TM, OptLevel); 5461 } 5462