1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines an instruction selector for the ARM target. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "ARM.h" 14 #include "ARMBaseInstrInfo.h" 15 #include "ARMTargetMachine.h" 16 #include "MCTargetDesc/ARMAddressingModes.h" 17 #include "Utils/ARMBaseInfo.h" 18 #include "llvm/ADT/StringSwitch.h" 19 #include "llvm/CodeGen/MachineFrameInfo.h" 20 #include "llvm/CodeGen/MachineFunction.h" 21 #include "llvm/CodeGen/MachineInstrBuilder.h" 22 #include "llvm/CodeGen/MachineRegisterInfo.h" 23 #include "llvm/CodeGen/SelectionDAG.h" 24 #include "llvm/CodeGen/SelectionDAGISel.h" 25 #include "llvm/CodeGen/TargetLowering.h" 26 #include "llvm/IR/CallingConv.h" 27 #include "llvm/IR/Constants.h" 28 #include "llvm/IR/DerivedTypes.h" 29 #include "llvm/IR/Function.h" 30 #include "llvm/IR/Intrinsics.h" 31 #include "llvm/IR/IntrinsicsARM.h" 32 #include "llvm/IR/LLVMContext.h" 33 #include "llvm/Support/CommandLine.h" 34 #include "llvm/Support/Debug.h" 35 #include "llvm/Support/ErrorHandling.h" 36 #include "llvm/Target/TargetOptions.h" 37 38 using namespace llvm; 39 40 #define DEBUG_TYPE "arm-isel" 41 42 static cl::opt<bool> 43 DisableShifterOp("disable-shifter-op", cl::Hidden, 44 cl::desc("Disable isel of shifter-op"), 45 cl::init(false)); 46 47 //===--------------------------------------------------------------------===// 48 /// ARMDAGToDAGISel - ARM specific code to select ARM machine 49 /// instructions for SelectionDAG operations. 50 /// 51 namespace { 52 53 class ARMDAGToDAGISel : public SelectionDAGISel { 54 /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can 55 /// make the right decision when generating code for different targets. 56 const ARMSubtarget *Subtarget; 57 58 public: 59 explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel) 60 : SelectionDAGISel(tm, OptLevel) {} 61 62 bool runOnMachineFunction(MachineFunction &MF) override { 63 // Reset the subtarget each time through. 64 Subtarget = &MF.getSubtarget<ARMSubtarget>(); 65 SelectionDAGISel::runOnMachineFunction(MF); 66 return true; 67 } 68 69 StringRef getPassName() const override { return "ARM Instruction Selection"; } 70 71 void PreprocessISelDAG() override; 72 73 /// getI32Imm - Return a target constant of type i32 with the specified 74 /// value. 75 inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) { 76 return CurDAG->getTargetConstant(Imm, dl, MVT::i32); 77 } 78 79 void Select(SDNode *N) override; 80 81 bool hasNoVMLxHazardUse(SDNode *N) const; 82 bool isShifterOpProfitable(const SDValue &Shift, 83 ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt); 84 bool SelectRegShifterOperand(SDValue N, SDValue &A, 85 SDValue &B, SDValue &C, 86 bool CheckProfitability = true); 87 bool SelectImmShifterOperand(SDValue N, SDValue &A, 88 SDValue &B, bool CheckProfitability = true); 89 bool SelectShiftRegShifterOperand(SDValue N, SDValue &A, 90 SDValue &B, SDValue &C) { 91 // Don't apply the profitability check 92 return SelectRegShifterOperand(N, A, B, C, false); 93 } 94 bool SelectShiftImmShifterOperand(SDValue N, SDValue &A, 95 SDValue &B) { 96 // Don't apply the profitability check 97 return SelectImmShifterOperand(N, A, B, false); 98 } 99 100 bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out); 101 102 bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); 103 bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc); 104 105 bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) { 106 const ConstantSDNode *CN = cast<ConstantSDNode>(N); 107 Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32); 108 Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32); 109 return true; 110 } 111 112 bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N, 113 SDValue &Offset, SDValue &Opc); 114 bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N, 115 SDValue &Offset, SDValue &Opc); 116 bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N, 117 SDValue &Offset, SDValue &Opc); 118 bool SelectAddrOffsetNone(SDValue N, SDValue &Base); 119 bool SelectAddrMode3(SDValue N, SDValue &Base, 120 SDValue &Offset, SDValue &Opc); 121 bool SelectAddrMode3Offset(SDNode *Op, SDValue N, 122 SDValue &Offset, SDValue &Opc); 123 bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, bool FP16); 124 bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset); 125 bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset); 126 bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align); 127 bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset); 128 129 bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label); 130 131 // Thumb Addressing Modes: 132 bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset); 133 bool SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, SDValue &Offset); 134 bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base, 135 SDValue &OffImm); 136 bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base, 137 SDValue &OffImm); 138 bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base, 139 SDValue &OffImm); 140 bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base, 141 SDValue &OffImm); 142 bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm); 143 template <unsigned Shift> 144 bool SelectTAddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm); 145 146 // Thumb 2 Addressing Modes: 147 bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); 148 template <unsigned Shift> 149 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, SDValue &OffImm); 150 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, 151 SDValue &OffImm); 152 bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N, 153 SDValue &OffImm); 154 template <unsigned Shift> 155 bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm); 156 bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm, 157 unsigned Shift); 158 template <unsigned Shift> 159 bool SelectT2AddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm); 160 bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base, 161 SDValue &OffReg, SDValue &ShImm); 162 bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm); 163 164 template<int Min, int Max> 165 bool SelectImmediateInRange(SDValue N, SDValue &OffImm); 166 167 inline bool is_so_imm(unsigned Imm) const { 168 return ARM_AM::getSOImmVal(Imm) != -1; 169 } 170 171 inline bool is_so_imm_not(unsigned Imm) const { 172 return ARM_AM::getSOImmVal(~Imm) != -1; 173 } 174 175 inline bool is_t2_so_imm(unsigned Imm) const { 176 return ARM_AM::getT2SOImmVal(Imm) != -1; 177 } 178 179 inline bool is_t2_so_imm_not(unsigned Imm) const { 180 return ARM_AM::getT2SOImmVal(~Imm) != -1; 181 } 182 183 // Include the pieces autogenerated from the target description. 184 #include "ARMGenDAGISel.inc" 185 186 private: 187 void transferMemOperands(SDNode *Src, SDNode *Dst); 188 189 /// Indexed (pre/post inc/dec) load matching code for ARM. 190 bool tryARMIndexedLoad(SDNode *N); 191 bool tryT1IndexedLoad(SDNode *N); 192 bool tryT2IndexedLoad(SDNode *N); 193 bool tryMVEIndexedLoad(SDNode *N); 194 195 /// SelectVLD - Select NEON load intrinsics. NumVecs should be 196 /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for 197 /// loads of D registers and even subregs and odd subregs of Q registers. 198 /// For NumVecs <= 2, QOpcodes1 is not used. 199 void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, 200 const uint16_t *DOpcodes, const uint16_t *QOpcodes0, 201 const uint16_t *QOpcodes1); 202 203 /// SelectVST - Select NEON store intrinsics. NumVecs should 204 /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for 205 /// stores of D registers and even subregs and odd subregs of Q registers. 206 /// For NumVecs <= 2, QOpcodes1 is not used. 207 void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, 208 const uint16_t *DOpcodes, const uint16_t *QOpcodes0, 209 const uint16_t *QOpcodes1); 210 211 /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should 212 /// be 2, 3 or 4. The opcode arrays specify the instructions used for 213 /// load/store of D registers and Q registers. 214 void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating, 215 unsigned NumVecs, const uint16_t *DOpcodes, 216 const uint16_t *QOpcodes); 217 218 /// Helper functions for setting up clusters of MVE predication operands. 219 template <typename SDValueVector> 220 void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 221 SDValue PredicateMask); 222 template <typename SDValueVector> 223 void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 224 SDValue PredicateMask, SDValue Inactive); 225 226 template <typename SDValueVector> 227 void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc); 228 template <typename SDValueVector> 229 void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, EVT InactiveTy); 230 231 /// SelectMVE_WB - Select MVE writeback load/store intrinsics. 232 void SelectMVE_WB(SDNode *N, const uint16_t *Opcodes, bool Predicated); 233 234 /// SelectMVE_LongShift - Select MVE 64-bit scalar shift intrinsics. 235 void SelectMVE_LongShift(SDNode *N, uint16_t Opcode, bool Immediate, 236 bool HasSaturationOperand); 237 238 /// SelectMVE_VADCSBC - Select MVE vector add/sub-with-carry intrinsics. 239 void SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry, 240 uint16_t OpcodeWithNoCarry, bool Add, bool Predicated); 241 242 /// Select long MVE vector reductions with two vector operands 243 /// Stride is the number of vector element widths the instruction can operate 244 /// on: 245 /// 2 for long non-rounding variants, vml{a,s}ldav[a][x]: [i16, i32] 246 /// 1 for long rounding variants: vrml{a,s}ldavh[a][x]: [i32] 247 /// Stride is used when addressing the OpcodesS array which contains multiple 248 /// opcodes for each element width. 249 /// TySize is the index into the list of element types listed above 250 void SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated, 251 const uint16_t *OpcodesS, const uint16_t *OpcodesU, 252 size_t Stride, size_t TySize); 253 254 /// Select a 64-bit MVE vector reduction with two vector operands 255 /// arm_mve_vmlldava_[predicated] 256 void SelectMVE_VMLLDAV(SDNode *N, bool Predicated, const uint16_t *OpcodesS, 257 const uint16_t *OpcodesU); 258 /// Select a 72-bit MVE vector rounding reduction with two vector operands 259 /// int_arm_mve_vrmlldavha[_predicated] 260 void SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated, const uint16_t *OpcodesS, 261 const uint16_t *OpcodesU); 262 263 /// SelectMVE_VLD - Select MVE interleaving load intrinsics. NumVecs 264 /// should be 2 or 4. The opcode array specifies the instructions 265 /// used for 8, 16 and 32-bit lane sizes respectively, and each 266 /// pointer points to a set of NumVecs sub-opcodes used for the 267 /// different stages (e.g. VLD20 versus VLD21) of each load family. 268 void SelectMVE_VLD(SDNode *N, unsigned NumVecs, 269 const uint16_t *const *Opcodes); 270 271 /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs 272 /// should be 1, 2, 3 or 4. The opcode array specifies the instructions used 273 /// for loading D registers. 274 void SelectVLDDup(SDNode *N, bool IsIntrinsic, bool isUpdating, 275 unsigned NumVecs, const uint16_t *DOpcodes, 276 const uint16_t *QOpcodes0 = nullptr, 277 const uint16_t *QOpcodes1 = nullptr); 278 279 /// Try to select SBFX/UBFX instructions for ARM. 280 bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned); 281 282 // Select special operations if node forms integer ABS pattern 283 bool tryABSOp(SDNode *N); 284 285 bool tryReadRegister(SDNode *N); 286 bool tryWriteRegister(SDNode *N); 287 288 bool tryInlineAsm(SDNode *N); 289 290 void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI); 291 292 void SelectCMP_SWAP(SDNode *N); 293 294 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 295 /// inline asm expressions. 296 bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, 297 std::vector<SDValue> &OutOps) override; 298 299 // Form pairs of consecutive R, S, D, or Q registers. 300 SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1); 301 SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1); 302 SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1); 303 SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1); 304 305 // Form sequences of 4 consecutive S, D, or Q registers. 306 SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 307 SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 308 SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 309 310 // Get the alignment operand for a NEON VLD or VST instruction. 311 SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs, 312 bool is64BitVector); 313 314 /// Checks if N is a multiplication by a constant where we can extract out a 315 /// power of two from the constant so that it can be used in a shift, but only 316 /// if it simplifies the materialization of the constant. Returns true if it 317 /// is, and assigns to PowerOfTwo the power of two that should be extracted 318 /// out and to NewMulConst the new constant to be multiplied by. 319 bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift, 320 unsigned &PowerOfTwo, SDValue &NewMulConst) const; 321 322 /// Replace N with M in CurDAG, in a way that also ensures that M gets 323 /// selected when N would have been selected. 324 void replaceDAGValue(const SDValue &N, SDValue M); 325 }; 326 } 327 328 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant 329 /// operand. If so Imm will receive the 32-bit value. 330 static bool isInt32Immediate(SDNode *N, unsigned &Imm) { 331 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) { 332 Imm = cast<ConstantSDNode>(N)->getZExtValue(); 333 return true; 334 } 335 return false; 336 } 337 338 // isInt32Immediate - This method tests to see if a constant operand. 339 // If so Imm will receive the 32 bit value. 340 static bool isInt32Immediate(SDValue N, unsigned &Imm) { 341 return isInt32Immediate(N.getNode(), Imm); 342 } 343 344 // isOpcWithIntImmediate - This method tests to see if the node is a specific 345 // opcode and that it has a immediate integer right operand. 346 // If so Imm will receive the 32 bit value. 347 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) { 348 return N->getOpcode() == Opc && 349 isInt32Immediate(N->getOperand(1).getNode(), Imm); 350 } 351 352 /// Check whether a particular node is a constant value representable as 353 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax). 354 /// 355 /// \param ScaledConstant [out] - On success, the pre-scaled constant value. 356 static bool isScaledConstantInRange(SDValue Node, int Scale, 357 int RangeMin, int RangeMax, 358 int &ScaledConstant) { 359 assert(Scale > 0 && "Invalid scale!"); 360 361 // Check that this is a constant. 362 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node); 363 if (!C) 364 return false; 365 366 ScaledConstant = (int) C->getZExtValue(); 367 if ((ScaledConstant % Scale) != 0) 368 return false; 369 370 ScaledConstant /= Scale; 371 return ScaledConstant >= RangeMin && ScaledConstant < RangeMax; 372 } 373 374 void ARMDAGToDAGISel::PreprocessISelDAG() { 375 if (!Subtarget->hasV6T2Ops()) 376 return; 377 378 bool isThumb2 = Subtarget->isThumb(); 379 for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), 380 E = CurDAG->allnodes_end(); I != E; ) { 381 SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues. 382 383 if (N->getOpcode() != ISD::ADD) 384 continue; 385 386 // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with 387 // leading zeros, followed by consecutive set bits, followed by 1 or 2 388 // trailing zeros, e.g. 1020. 389 // Transform the expression to 390 // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number 391 // of trailing zeros of c2. The left shift would be folded as an shifter 392 // operand of 'add' and the 'and' and 'srl' would become a bits extraction 393 // node (UBFX). 394 395 SDValue N0 = N->getOperand(0); 396 SDValue N1 = N->getOperand(1); 397 unsigned And_imm = 0; 398 if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) { 399 if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm)) 400 std::swap(N0, N1); 401 } 402 if (!And_imm) 403 continue; 404 405 // Check if the AND mask is an immediate of the form: 000.....1111111100 406 unsigned TZ = countTrailingZeros(And_imm); 407 if (TZ != 1 && TZ != 2) 408 // Be conservative here. Shifter operands aren't always free. e.g. On 409 // Swift, left shifter operand of 1 / 2 for free but others are not. 410 // e.g. 411 // ubfx r3, r1, #16, #8 412 // ldr.w r3, [r0, r3, lsl #2] 413 // vs. 414 // mov.w r9, #1020 415 // and.w r2, r9, r1, lsr #14 416 // ldr r2, [r0, r2] 417 continue; 418 And_imm >>= TZ; 419 if (And_imm & (And_imm + 1)) 420 continue; 421 422 // Look for (and (srl X, c1), c2). 423 SDValue Srl = N1.getOperand(0); 424 unsigned Srl_imm = 0; 425 if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) || 426 (Srl_imm <= 2)) 427 continue; 428 429 // Make sure first operand is not a shifter operand which would prevent 430 // folding of the left shift. 431 SDValue CPTmp0; 432 SDValue CPTmp1; 433 SDValue CPTmp2; 434 if (isThumb2) { 435 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1)) 436 continue; 437 } else { 438 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) || 439 SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2)) 440 continue; 441 } 442 443 // Now make the transformation. 444 Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32, 445 Srl.getOperand(0), 446 CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl), 447 MVT::i32)); 448 N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32, 449 Srl, 450 CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32)); 451 N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32, 452 N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32)); 453 CurDAG->UpdateNodeOperands(N, N0, N1); 454 } 455 } 456 457 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS 458 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at 459 /// least on current ARM implementations) which should be avoidded. 460 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const { 461 if (OptLevel == CodeGenOpt::None) 462 return true; 463 464 if (!Subtarget->hasVMLxHazards()) 465 return true; 466 467 if (!N->hasOneUse()) 468 return false; 469 470 SDNode *Use = *N->use_begin(); 471 if (Use->getOpcode() == ISD::CopyToReg) 472 return true; 473 if (Use->isMachineOpcode()) { 474 const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>( 475 CurDAG->getSubtarget().getInstrInfo()); 476 477 const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode()); 478 if (MCID.mayStore()) 479 return true; 480 unsigned Opcode = MCID.getOpcode(); 481 if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD) 482 return true; 483 // vmlx feeding into another vmlx. We actually want to unfold 484 // the use later in the MLxExpansion pass. e.g. 485 // vmla 486 // vmla (stall 8 cycles) 487 // 488 // vmul (5 cycles) 489 // vadd (5 cycles) 490 // vmla 491 // This adds up to about 18 - 19 cycles. 492 // 493 // vmla 494 // vmul (stall 4 cycles) 495 // vadd adds up to about 14 cycles. 496 return TII->isFpMLxInstruction(Opcode); 497 } 498 499 return false; 500 } 501 502 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift, 503 ARM_AM::ShiftOpc ShOpcVal, 504 unsigned ShAmt) { 505 if (!Subtarget->isLikeA9() && !Subtarget->isSwift()) 506 return true; 507 if (Shift.hasOneUse()) 508 return true; 509 // R << 2 is free. 510 return ShOpcVal == ARM_AM::lsl && 511 (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1)); 512 } 513 514 bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N, 515 unsigned MaxShift, 516 unsigned &PowerOfTwo, 517 SDValue &NewMulConst) const { 518 assert(N.getOpcode() == ISD::MUL); 519 assert(MaxShift > 0); 520 521 // If the multiply is used in more than one place then changing the constant 522 // will make other uses incorrect, so don't. 523 if (!N.hasOneUse()) return false; 524 // Check if the multiply is by a constant 525 ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1)); 526 if (!MulConst) return false; 527 // If the constant is used in more than one place then modifying it will mean 528 // we need to materialize two constants instead of one, which is a bad idea. 529 if (!MulConst->hasOneUse()) return false; 530 unsigned MulConstVal = MulConst->getZExtValue(); 531 if (MulConstVal == 0) return false; 532 533 // Find the largest power of 2 that MulConstVal is a multiple of 534 PowerOfTwo = MaxShift; 535 while ((MulConstVal % (1 << PowerOfTwo)) != 0) { 536 --PowerOfTwo; 537 if (PowerOfTwo == 0) return false; 538 } 539 540 // Only optimise if the new cost is better 541 unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo); 542 NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32); 543 unsigned OldCost = ConstantMaterializationCost(MulConstVal, Subtarget); 544 unsigned NewCost = ConstantMaterializationCost(NewMulConstVal, Subtarget); 545 return NewCost < OldCost; 546 } 547 548 void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) { 549 CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode()); 550 ReplaceUses(N, M); 551 } 552 553 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N, 554 SDValue &BaseReg, 555 SDValue &Opc, 556 bool CheckProfitability) { 557 if (DisableShifterOp) 558 return false; 559 560 // If N is a multiply-by-constant and it's profitable to extract a shift and 561 // use it in a shifted operand do so. 562 if (N.getOpcode() == ISD::MUL) { 563 unsigned PowerOfTwo = 0; 564 SDValue NewMulConst; 565 if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) { 566 HandleSDNode Handle(N); 567 SDLoc Loc(N); 568 replaceDAGValue(N.getOperand(1), NewMulConst); 569 BaseReg = Handle.getValue(); 570 Opc = CurDAG->getTargetConstant( 571 ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32); 572 return true; 573 } 574 } 575 576 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 577 578 // Don't match base register only case. That is matched to a separate 579 // lower complexity pattern with explicit register operand. 580 if (ShOpcVal == ARM_AM::no_shift) return false; 581 582 BaseReg = N.getOperand(0); 583 unsigned ShImmVal = 0; 584 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 585 if (!RHS) return false; 586 ShImmVal = RHS->getZExtValue() & 31; 587 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), 588 SDLoc(N), MVT::i32); 589 return true; 590 } 591 592 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N, 593 SDValue &BaseReg, 594 SDValue &ShReg, 595 SDValue &Opc, 596 bool CheckProfitability) { 597 if (DisableShifterOp) 598 return false; 599 600 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 601 602 // Don't match base register only case. That is matched to a separate 603 // lower complexity pattern with explicit register operand. 604 if (ShOpcVal == ARM_AM::no_shift) return false; 605 606 BaseReg = N.getOperand(0); 607 unsigned ShImmVal = 0; 608 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 609 if (RHS) return false; 610 611 ShReg = N.getOperand(1); 612 if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal)) 613 return false; 614 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), 615 SDLoc(N), MVT::i32); 616 return true; 617 } 618 619 // Determine whether an ISD::OR's operands are suitable to turn the operation 620 // into an addition, which often has more compact encodings. 621 bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) { 622 assert(Parent->getOpcode() == ISD::OR && "unexpected parent"); 623 Out = N; 624 return CurDAG->haveNoCommonBitsSet(N, Parent->getOperand(1)); 625 } 626 627 628 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N, 629 SDValue &Base, 630 SDValue &OffImm) { 631 // Match simple R + imm12 operands. 632 633 // Base only. 634 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 635 !CurDAG->isBaseWithConstantOffset(N)) { 636 if (N.getOpcode() == ISD::FrameIndex) { 637 // Match frame index. 638 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 639 Base = CurDAG->getTargetFrameIndex( 640 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 641 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 642 return true; 643 } 644 645 if (N.getOpcode() == ARMISD::Wrapper && 646 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 647 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 648 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 649 Base = N.getOperand(0); 650 } else 651 Base = N; 652 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 653 return true; 654 } 655 656 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 657 int RHSC = (int)RHS->getSExtValue(); 658 if (N.getOpcode() == ISD::SUB) 659 RHSC = -RHSC; 660 661 if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits 662 Base = N.getOperand(0); 663 if (Base.getOpcode() == ISD::FrameIndex) { 664 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 665 Base = CurDAG->getTargetFrameIndex( 666 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 667 } 668 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 669 return true; 670 } 671 } 672 673 // Base only. 674 Base = N; 675 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 676 return true; 677 } 678 679 680 681 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, 682 SDValue &Opc) { 683 if (N.getOpcode() == ISD::MUL && 684 ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) { 685 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 686 // X * [3,5,9] -> X + X * [2,4,8] etc. 687 int RHSC = (int)RHS->getZExtValue(); 688 if (RHSC & 1) { 689 RHSC = RHSC & ~1; 690 ARM_AM::AddrOpc AddSub = ARM_AM::add; 691 if (RHSC < 0) { 692 AddSub = ARM_AM::sub; 693 RHSC = - RHSC; 694 } 695 if (isPowerOf2_32(RHSC)) { 696 unsigned ShAmt = Log2_32(RHSC); 697 Base = Offset = N.getOperand(0); 698 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, 699 ARM_AM::lsl), 700 SDLoc(N), MVT::i32); 701 return true; 702 } 703 } 704 } 705 } 706 707 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 708 // ISD::OR that is equivalent to an ISD::ADD. 709 !CurDAG->isBaseWithConstantOffset(N)) 710 return false; 711 712 // Leave simple R +/- imm12 operands for LDRi12 713 if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) { 714 int RHSC; 715 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1, 716 -0x1000+1, 0x1000, RHSC)) // 12 bits. 717 return false; 718 } 719 720 // Otherwise this is R +/- [possibly shifted] R. 721 ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add; 722 ARM_AM::ShiftOpc ShOpcVal = 723 ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode()); 724 unsigned ShAmt = 0; 725 726 Base = N.getOperand(0); 727 Offset = N.getOperand(1); 728 729 if (ShOpcVal != ARM_AM::no_shift) { 730 // Check to see if the RHS of the shift is a constant, if not, we can't fold 731 // it. 732 if (ConstantSDNode *Sh = 733 dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) { 734 ShAmt = Sh->getZExtValue(); 735 if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt)) 736 Offset = N.getOperand(1).getOperand(0); 737 else { 738 ShAmt = 0; 739 ShOpcVal = ARM_AM::no_shift; 740 } 741 } else { 742 ShOpcVal = ARM_AM::no_shift; 743 } 744 } 745 746 // Try matching (R shl C) + (R). 747 if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift && 748 !(Subtarget->isLikeA9() || Subtarget->isSwift() || 749 N.getOperand(0).hasOneUse())) { 750 ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode()); 751 if (ShOpcVal != ARM_AM::no_shift) { 752 // Check to see if the RHS of the shift is a constant, if not, we can't 753 // fold it. 754 if (ConstantSDNode *Sh = 755 dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) { 756 ShAmt = Sh->getZExtValue(); 757 if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) { 758 Offset = N.getOperand(0).getOperand(0); 759 Base = N.getOperand(1); 760 } else { 761 ShAmt = 0; 762 ShOpcVal = ARM_AM::no_shift; 763 } 764 } else { 765 ShOpcVal = ARM_AM::no_shift; 766 } 767 } 768 } 769 770 // If Offset is a multiply-by-constant and it's profitable to extract a shift 771 // and use it in a shifted operand do so. 772 if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) { 773 unsigned PowerOfTwo = 0; 774 SDValue NewMulConst; 775 if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) { 776 HandleSDNode Handle(Offset); 777 replaceDAGValue(Offset.getOperand(1), NewMulConst); 778 Offset = Handle.getValue(); 779 ShAmt = PowerOfTwo; 780 ShOpcVal = ARM_AM::lsl; 781 } 782 } 783 784 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal), 785 SDLoc(N), MVT::i32); 786 return true; 787 } 788 789 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N, 790 SDValue &Offset, SDValue &Opc) { 791 unsigned Opcode = Op->getOpcode(); 792 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 793 ? cast<LoadSDNode>(Op)->getAddressingMode() 794 : cast<StoreSDNode>(Op)->getAddressingMode(); 795 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 796 ? ARM_AM::add : ARM_AM::sub; 797 int Val; 798 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) 799 return false; 800 801 Offset = N; 802 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 803 unsigned ShAmt = 0; 804 if (ShOpcVal != ARM_AM::no_shift) { 805 // Check to see if the RHS of the shift is a constant, if not, we can't fold 806 // it. 807 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 808 ShAmt = Sh->getZExtValue(); 809 if (isShifterOpProfitable(N, ShOpcVal, ShAmt)) 810 Offset = N.getOperand(0); 811 else { 812 ShAmt = 0; 813 ShOpcVal = ARM_AM::no_shift; 814 } 815 } else { 816 ShOpcVal = ARM_AM::no_shift; 817 } 818 } 819 820 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal), 821 SDLoc(N), MVT::i32); 822 return true; 823 } 824 825 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N, 826 SDValue &Offset, SDValue &Opc) { 827 unsigned Opcode = Op->getOpcode(); 828 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 829 ? cast<LoadSDNode>(Op)->getAddressingMode() 830 : cast<StoreSDNode>(Op)->getAddressingMode(); 831 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 832 ? ARM_AM::add : ARM_AM::sub; 833 int Val; 834 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits. 835 if (AddSub == ARM_AM::sub) Val *= -1; 836 Offset = CurDAG->getRegister(0, MVT::i32); 837 Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32); 838 return true; 839 } 840 841 return false; 842 } 843 844 845 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N, 846 SDValue &Offset, SDValue &Opc) { 847 unsigned Opcode = Op->getOpcode(); 848 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 849 ? cast<LoadSDNode>(Op)->getAddressingMode() 850 : cast<StoreSDNode>(Op)->getAddressingMode(); 851 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 852 ? ARM_AM::add : ARM_AM::sub; 853 int Val; 854 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits. 855 Offset = CurDAG->getRegister(0, MVT::i32); 856 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val, 857 ARM_AM::no_shift), 858 SDLoc(Op), MVT::i32); 859 return true; 860 } 861 862 return false; 863 } 864 865 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) { 866 Base = N; 867 return true; 868 } 869 870 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N, 871 SDValue &Base, SDValue &Offset, 872 SDValue &Opc) { 873 if (N.getOpcode() == ISD::SUB) { 874 // X - C is canonicalize to X + -C, no need to handle it here. 875 Base = N.getOperand(0); 876 Offset = N.getOperand(1); 877 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N), 878 MVT::i32); 879 return true; 880 } 881 882 if (!CurDAG->isBaseWithConstantOffset(N)) { 883 Base = N; 884 if (N.getOpcode() == ISD::FrameIndex) { 885 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 886 Base = CurDAG->getTargetFrameIndex( 887 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 888 } 889 Offset = CurDAG->getRegister(0, MVT::i32); 890 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N), 891 MVT::i32); 892 return true; 893 } 894 895 // If the RHS is +/- imm8, fold into addr mode. 896 int RHSC; 897 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1, 898 -256 + 1, 256, RHSC)) { // 8 bits. 899 Base = N.getOperand(0); 900 if (Base.getOpcode() == ISD::FrameIndex) { 901 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 902 Base = CurDAG->getTargetFrameIndex( 903 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 904 } 905 Offset = CurDAG->getRegister(0, MVT::i32); 906 907 ARM_AM::AddrOpc AddSub = ARM_AM::add; 908 if (RHSC < 0) { 909 AddSub = ARM_AM::sub; 910 RHSC = -RHSC; 911 } 912 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N), 913 MVT::i32); 914 return true; 915 } 916 917 Base = N.getOperand(0); 918 Offset = N.getOperand(1); 919 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N), 920 MVT::i32); 921 return true; 922 } 923 924 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N, 925 SDValue &Offset, SDValue &Opc) { 926 unsigned Opcode = Op->getOpcode(); 927 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 928 ? cast<LoadSDNode>(Op)->getAddressingMode() 929 : cast<StoreSDNode>(Op)->getAddressingMode(); 930 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 931 ? ARM_AM::add : ARM_AM::sub; 932 int Val; 933 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits. 934 Offset = CurDAG->getRegister(0, MVT::i32); 935 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op), 936 MVT::i32); 937 return true; 938 } 939 940 Offset = N; 941 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op), 942 MVT::i32); 943 return true; 944 } 945 946 bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, 947 bool FP16) { 948 if (!CurDAG->isBaseWithConstantOffset(N)) { 949 Base = N; 950 if (N.getOpcode() == ISD::FrameIndex) { 951 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 952 Base = CurDAG->getTargetFrameIndex( 953 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 954 } else if (N.getOpcode() == ARMISD::Wrapper && 955 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 956 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 957 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 958 Base = N.getOperand(0); 959 } 960 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0), 961 SDLoc(N), MVT::i32); 962 return true; 963 } 964 965 // If the RHS is +/- imm8, fold into addr mode. 966 int RHSC; 967 const int Scale = FP16 ? 2 : 4; 968 969 if (isScaledConstantInRange(N.getOperand(1), Scale, -255, 256, RHSC)) { 970 Base = N.getOperand(0); 971 if (Base.getOpcode() == ISD::FrameIndex) { 972 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 973 Base = CurDAG->getTargetFrameIndex( 974 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 975 } 976 977 ARM_AM::AddrOpc AddSub = ARM_AM::add; 978 if (RHSC < 0) { 979 AddSub = ARM_AM::sub; 980 RHSC = -RHSC; 981 } 982 983 if (FP16) 984 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(AddSub, RHSC), 985 SDLoc(N), MVT::i32); 986 else 987 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC), 988 SDLoc(N), MVT::i32); 989 990 return true; 991 } 992 993 Base = N; 994 995 if (FP16) 996 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(ARM_AM::add, 0), 997 SDLoc(N), MVT::i32); 998 else 999 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0), 1000 SDLoc(N), MVT::i32); 1001 1002 return true; 1003 } 1004 1005 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N, 1006 SDValue &Base, SDValue &Offset) { 1007 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ false); 1008 } 1009 1010 bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N, 1011 SDValue &Base, SDValue &Offset) { 1012 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ true); 1013 } 1014 1015 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr, 1016 SDValue &Align) { 1017 Addr = N; 1018 1019 unsigned Alignment = 0; 1020 1021 MemSDNode *MemN = cast<MemSDNode>(Parent); 1022 1023 if (isa<LSBaseSDNode>(MemN) || 1024 ((MemN->getOpcode() == ARMISD::VST1_UPD || 1025 MemN->getOpcode() == ARMISD::VLD1_UPD) && 1026 MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) { 1027 // This case occurs only for VLD1-lane/dup and VST1-lane instructions. 1028 // The maximum alignment is equal to the memory size being referenced. 1029 unsigned MMOAlign = MemN->getAlignment(); 1030 unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8; 1031 if (MMOAlign >= MemSize && MemSize > 1) 1032 Alignment = MemSize; 1033 } else { 1034 // All other uses of addrmode6 are for intrinsics. For now just record 1035 // the raw alignment value; it will be refined later based on the legal 1036 // alignment operands for the intrinsic. 1037 Alignment = MemN->getAlignment(); 1038 } 1039 1040 Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32); 1041 return true; 1042 } 1043 1044 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N, 1045 SDValue &Offset) { 1046 LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op); 1047 ISD::MemIndexedMode AM = LdSt->getAddressingMode(); 1048 if (AM != ISD::POST_INC) 1049 return false; 1050 Offset = N; 1051 if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) { 1052 if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits()) 1053 Offset = CurDAG->getRegister(0, MVT::i32); 1054 } 1055 return true; 1056 } 1057 1058 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N, 1059 SDValue &Offset, SDValue &Label) { 1060 if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) { 1061 Offset = N.getOperand(0); 1062 SDValue N1 = N.getOperand(1); 1063 Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(), 1064 SDLoc(N), MVT::i32); 1065 return true; 1066 } 1067 1068 return false; 1069 } 1070 1071 1072 //===----------------------------------------------------------------------===// 1073 // Thumb Addressing Modes 1074 //===----------------------------------------------------------------------===// 1075 1076 static bool shouldUseZeroOffsetLdSt(SDValue N) { 1077 // Negative numbers are difficult to materialise in thumb1. If we are 1078 // selecting the add of a negative, instead try to select ri with a zero 1079 // offset, so create the add node directly which will become a sub. 1080 if (N.getOpcode() != ISD::ADD) 1081 return false; 1082 1083 // Look for an imm which is not legal for ld/st, but is legal for sub. 1084 if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1))) 1085 return C->getSExtValue() < 0 && C->getSExtValue() >= -255; 1086 1087 return false; 1088 } 1089 1090 bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, 1091 SDValue &Offset) { 1092 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) { 1093 ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N); 1094 if (!NC || !NC->isNullValue()) 1095 return false; 1096 1097 Base = Offset = N; 1098 return true; 1099 } 1100 1101 Base = N.getOperand(0); 1102 Offset = N.getOperand(1); 1103 return true; 1104 } 1105 1106 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, SDValue &Base, 1107 SDValue &Offset) { 1108 if (shouldUseZeroOffsetLdSt(N)) 1109 return false; // Select ri instead 1110 return SelectThumbAddrModeRRSext(N, Base, Offset); 1111 } 1112 1113 bool 1114 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, 1115 SDValue &Base, SDValue &OffImm) { 1116 if (shouldUseZeroOffsetLdSt(N)) { 1117 Base = N; 1118 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1119 return true; 1120 } 1121 1122 if (!CurDAG->isBaseWithConstantOffset(N)) { 1123 if (N.getOpcode() == ISD::ADD) { 1124 return false; // We want to select register offset instead 1125 } else if (N.getOpcode() == ARMISD::Wrapper && 1126 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 1127 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 1128 N.getOperand(0).getOpcode() != ISD::TargetConstantPool && 1129 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 1130 Base = N.getOperand(0); 1131 } else { 1132 Base = N; 1133 } 1134 1135 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1136 return true; 1137 } 1138 1139 // If the RHS is + imm5 * scale, fold into addr mode. 1140 int RHSC; 1141 if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) { 1142 Base = N.getOperand(0); 1143 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1144 return true; 1145 } 1146 1147 // Offset is too large, so use register offset instead. 1148 return false; 1149 } 1150 1151 bool 1152 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base, 1153 SDValue &OffImm) { 1154 return SelectThumbAddrModeImm5S(N, 4, Base, OffImm); 1155 } 1156 1157 bool 1158 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base, 1159 SDValue &OffImm) { 1160 return SelectThumbAddrModeImm5S(N, 2, Base, OffImm); 1161 } 1162 1163 bool 1164 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base, 1165 SDValue &OffImm) { 1166 return SelectThumbAddrModeImm5S(N, 1, Base, OffImm); 1167 } 1168 1169 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N, 1170 SDValue &Base, SDValue &OffImm) { 1171 if (N.getOpcode() == ISD::FrameIndex) { 1172 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 1173 // Only multiples of 4 are allowed for the offset, so the frame object 1174 // alignment must be at least 4. 1175 MachineFrameInfo &MFI = MF->getFrameInfo(); 1176 if (MFI.getObjectAlignment(FI) < 4) 1177 MFI.setObjectAlignment(FI, 4); 1178 Base = CurDAG->getTargetFrameIndex( 1179 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1180 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1181 return true; 1182 } 1183 1184 if (!CurDAG->isBaseWithConstantOffset(N)) 1185 return false; 1186 1187 if (N.getOperand(0).getOpcode() == ISD::FrameIndex) { 1188 // If the RHS is + imm8 * scale, fold into addr mode. 1189 int RHSC; 1190 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) { 1191 Base = N.getOperand(0); 1192 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1193 // Make sure the offset is inside the object, or we might fail to 1194 // allocate an emergency spill slot. (An out-of-range access is UB, but 1195 // it could show up anyway.) 1196 MachineFrameInfo &MFI = MF->getFrameInfo(); 1197 if (RHSC * 4 < MFI.getObjectSize(FI)) { 1198 // For LHS+RHS to result in an offset that's a multiple of 4 the object 1199 // indexed by the LHS must be 4-byte aligned. 1200 if (!MFI.isFixedObjectIndex(FI) && MFI.getObjectAlignment(FI) < 4) 1201 MFI.setObjectAlignment(FI, 4); 1202 if (MFI.getObjectAlignment(FI) >= 4) { 1203 Base = CurDAG->getTargetFrameIndex( 1204 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1205 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1206 return true; 1207 } 1208 } 1209 } 1210 } 1211 1212 return false; 1213 } 1214 1215 template <unsigned Shift> 1216 bool ARMDAGToDAGISel::SelectTAddrModeImm7(SDValue N, SDValue &Base, 1217 SDValue &OffImm) { 1218 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) { 1219 int RHSC; 1220 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80, 1221 RHSC)) { 1222 Base = N.getOperand(0); 1223 if (N.getOpcode() == ISD::SUB) 1224 RHSC = -RHSC; 1225 OffImm = 1226 CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32); 1227 return true; 1228 } 1229 } 1230 1231 // Base only. 1232 Base = N; 1233 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1234 return true; 1235 } 1236 1237 1238 //===----------------------------------------------------------------------===// 1239 // Thumb 2 Addressing Modes 1240 //===----------------------------------------------------------------------===// 1241 1242 1243 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N, 1244 SDValue &Base, SDValue &OffImm) { 1245 // Match simple R + imm12 operands. 1246 1247 // Base only. 1248 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 1249 !CurDAG->isBaseWithConstantOffset(N)) { 1250 if (N.getOpcode() == ISD::FrameIndex) { 1251 // Match frame index. 1252 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 1253 Base = CurDAG->getTargetFrameIndex( 1254 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1255 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1256 return true; 1257 } 1258 1259 if (N.getOpcode() == ARMISD::Wrapper && 1260 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 1261 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 1262 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 1263 Base = N.getOperand(0); 1264 if (Base.getOpcode() == ISD::TargetConstantPool) 1265 return false; // We want to select t2LDRpci instead. 1266 } else 1267 Base = N; 1268 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1269 return true; 1270 } 1271 1272 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1273 if (SelectT2AddrModeImm8(N, Base, OffImm)) 1274 // Let t2LDRi8 handle (R - imm8). 1275 return false; 1276 1277 int RHSC = (int)RHS->getZExtValue(); 1278 if (N.getOpcode() == ISD::SUB) 1279 RHSC = -RHSC; 1280 1281 if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned) 1282 Base = N.getOperand(0); 1283 if (Base.getOpcode() == ISD::FrameIndex) { 1284 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1285 Base = CurDAG->getTargetFrameIndex( 1286 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1287 } 1288 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1289 return true; 1290 } 1291 } 1292 1293 // Base only. 1294 Base = N; 1295 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1296 return true; 1297 } 1298 1299 template <unsigned Shift> 1300 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, SDValue &Base, 1301 SDValue &OffImm) { 1302 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) { 1303 int RHSC; 1304 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -255, 256, RHSC)) { 1305 Base = N.getOperand(0); 1306 if (Base.getOpcode() == ISD::FrameIndex) { 1307 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1308 Base = CurDAG->getTargetFrameIndex( 1309 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1310 } 1311 1312 if (N.getOpcode() == ISD::SUB) 1313 RHSC = -RHSC; 1314 OffImm = 1315 CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32); 1316 return true; 1317 } 1318 } 1319 1320 // Base only. 1321 Base = N; 1322 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1323 return true; 1324 } 1325 1326 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, 1327 SDValue &Base, SDValue &OffImm) { 1328 // Match simple R - imm8 operands. 1329 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 1330 !CurDAG->isBaseWithConstantOffset(N)) 1331 return false; 1332 1333 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1334 int RHSC = (int)RHS->getSExtValue(); 1335 if (N.getOpcode() == ISD::SUB) 1336 RHSC = -RHSC; 1337 1338 if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative) 1339 Base = N.getOperand(0); 1340 if (Base.getOpcode() == ISD::FrameIndex) { 1341 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1342 Base = CurDAG->getTargetFrameIndex( 1343 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1344 } 1345 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1346 return true; 1347 } 1348 } 1349 1350 return false; 1351 } 1352 1353 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N, 1354 SDValue &OffImm){ 1355 unsigned Opcode = Op->getOpcode(); 1356 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 1357 ? cast<LoadSDNode>(Op)->getAddressingMode() 1358 : cast<StoreSDNode>(Op)->getAddressingMode(); 1359 int RHSC; 1360 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits. 1361 OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC)) 1362 ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32) 1363 : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32); 1364 return true; 1365 } 1366 1367 return false; 1368 } 1369 1370 template <unsigned Shift> 1371 bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N, SDValue &Base, 1372 SDValue &OffImm) { 1373 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) { 1374 int RHSC; 1375 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80, 1376 RHSC)) { 1377 Base = N.getOperand(0); 1378 if (Base.getOpcode() == ISD::FrameIndex) { 1379 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1380 Base = CurDAG->getTargetFrameIndex( 1381 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1382 } 1383 1384 if (N.getOpcode() == ISD::SUB) 1385 RHSC = -RHSC; 1386 OffImm = 1387 CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32); 1388 return true; 1389 } 1390 } 1391 1392 // Base only. 1393 Base = N; 1394 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1395 return true; 1396 } 1397 1398 template <unsigned Shift> 1399 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, 1400 SDValue &OffImm) { 1401 return SelectT2AddrModeImm7Offset(Op, N, OffImm, Shift); 1402 } 1403 1404 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, 1405 SDValue &OffImm, 1406 unsigned Shift) { 1407 unsigned Opcode = Op->getOpcode(); 1408 ISD::MemIndexedMode AM; 1409 switch (Opcode) { 1410 case ISD::LOAD: 1411 AM = cast<LoadSDNode>(Op)->getAddressingMode(); 1412 break; 1413 case ISD::STORE: 1414 AM = cast<StoreSDNode>(Op)->getAddressingMode(); 1415 break; 1416 case ISD::MLOAD: 1417 AM = cast<MaskedLoadSDNode>(Op)->getAddressingMode(); 1418 break; 1419 case ISD::MSTORE: 1420 AM = cast<MaskedStoreSDNode>(Op)->getAddressingMode(); 1421 break; 1422 default: 1423 llvm_unreachable("Unexpected Opcode for Imm7Offset"); 1424 } 1425 1426 int RHSC; 1427 // 7 bit constant, shifted by Shift. 1428 if (isScaledConstantInRange(N, 1 << Shift, 0, 0x80, RHSC)) { 1429 OffImm = 1430 ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC)) 1431 ? CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32) 1432 : CurDAG->getTargetConstant(-RHSC * (1 << Shift), SDLoc(N), 1433 MVT::i32); 1434 return true; 1435 } 1436 return false; 1437 } 1438 1439 template <int Min, int Max> 1440 bool ARMDAGToDAGISel::SelectImmediateInRange(SDValue N, SDValue &OffImm) { 1441 int Val; 1442 if (isScaledConstantInRange(N, 1, Min, Max, Val)) { 1443 OffImm = CurDAG->getTargetConstant(Val, SDLoc(N), MVT::i32); 1444 return true; 1445 } 1446 return false; 1447 } 1448 1449 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N, 1450 SDValue &Base, 1451 SDValue &OffReg, SDValue &ShImm) { 1452 // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12. 1453 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) 1454 return false; 1455 1456 // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8. 1457 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1458 int RHSC = (int)RHS->getZExtValue(); 1459 if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned) 1460 return false; 1461 else if (RHSC < 0 && RHSC >= -255) // 8 bits 1462 return false; 1463 } 1464 1465 // Look for (R + R) or (R + (R << [1,2,3])). 1466 unsigned ShAmt = 0; 1467 Base = N.getOperand(0); 1468 OffReg = N.getOperand(1); 1469 1470 // Swap if it is ((R << c) + R). 1471 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode()); 1472 if (ShOpcVal != ARM_AM::lsl) { 1473 ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode()); 1474 if (ShOpcVal == ARM_AM::lsl) 1475 std::swap(Base, OffReg); 1476 } 1477 1478 if (ShOpcVal == ARM_AM::lsl) { 1479 // Check to see if the RHS of the shift is a constant, if not, we can't fold 1480 // it. 1481 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) { 1482 ShAmt = Sh->getZExtValue(); 1483 if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt)) 1484 OffReg = OffReg.getOperand(0); 1485 else { 1486 ShAmt = 0; 1487 } 1488 } 1489 } 1490 1491 // If OffReg is a multiply-by-constant and it's profitable to extract a shift 1492 // and use it in a shifted operand do so. 1493 if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) { 1494 unsigned PowerOfTwo = 0; 1495 SDValue NewMulConst; 1496 if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) { 1497 HandleSDNode Handle(OffReg); 1498 replaceDAGValue(OffReg.getOperand(1), NewMulConst); 1499 OffReg = Handle.getValue(); 1500 ShAmt = PowerOfTwo; 1501 } 1502 } 1503 1504 ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32); 1505 1506 return true; 1507 } 1508 1509 bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base, 1510 SDValue &OffImm) { 1511 // This *must* succeed since it's used for the irreplaceable ldrex and strex 1512 // instructions. 1513 Base = N; 1514 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1515 1516 if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N)) 1517 return true; 1518 1519 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 1520 if (!RHS) 1521 return true; 1522 1523 uint32_t RHSC = (int)RHS->getZExtValue(); 1524 if (RHSC > 1020 || RHSC % 4 != 0) 1525 return true; 1526 1527 Base = N.getOperand(0); 1528 if (Base.getOpcode() == ISD::FrameIndex) { 1529 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1530 Base = CurDAG->getTargetFrameIndex( 1531 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1532 } 1533 1534 OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32); 1535 return true; 1536 } 1537 1538 //===--------------------------------------------------------------------===// 1539 1540 /// getAL - Returns a ARMCC::AL immediate node. 1541 static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) { 1542 return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32); 1543 } 1544 1545 void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) { 1546 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); 1547 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp}); 1548 } 1549 1550 bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) { 1551 LoadSDNode *LD = cast<LoadSDNode>(N); 1552 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1553 if (AM == ISD::UNINDEXED) 1554 return false; 1555 1556 EVT LoadedVT = LD->getMemoryVT(); 1557 SDValue Offset, AMOpc; 1558 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1559 unsigned Opcode = 0; 1560 bool Match = false; 1561 if (LoadedVT == MVT::i32 && isPre && 1562 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) { 1563 Opcode = ARM::LDR_PRE_IMM; 1564 Match = true; 1565 } else if (LoadedVT == MVT::i32 && !isPre && 1566 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) { 1567 Opcode = ARM::LDR_POST_IMM; 1568 Match = true; 1569 } else if (LoadedVT == MVT::i32 && 1570 SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) { 1571 Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG; 1572 Match = true; 1573 1574 } else if (LoadedVT == MVT::i16 && 1575 SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) { 1576 Match = true; 1577 Opcode = (LD->getExtensionType() == ISD::SEXTLOAD) 1578 ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST) 1579 : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST); 1580 } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) { 1581 if (LD->getExtensionType() == ISD::SEXTLOAD) { 1582 if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) { 1583 Match = true; 1584 Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST; 1585 } 1586 } else { 1587 if (isPre && 1588 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) { 1589 Match = true; 1590 Opcode = ARM::LDRB_PRE_IMM; 1591 } else if (!isPre && 1592 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) { 1593 Match = true; 1594 Opcode = ARM::LDRB_POST_IMM; 1595 } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) { 1596 Match = true; 1597 Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG; 1598 } 1599 } 1600 } 1601 1602 if (Match) { 1603 if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) { 1604 SDValue Chain = LD->getChain(); 1605 SDValue Base = LD->getBasePtr(); 1606 SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)), 1607 CurDAG->getRegister(0, MVT::i32), Chain }; 1608 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, 1609 MVT::Other, Ops); 1610 transferMemOperands(N, New); 1611 ReplaceNode(N, New); 1612 return true; 1613 } else { 1614 SDValue Chain = LD->getChain(); 1615 SDValue Base = LD->getBasePtr(); 1616 SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)), 1617 CurDAG->getRegister(0, MVT::i32), Chain }; 1618 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, 1619 MVT::Other, Ops); 1620 transferMemOperands(N, New); 1621 ReplaceNode(N, New); 1622 return true; 1623 } 1624 } 1625 1626 return false; 1627 } 1628 1629 bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) { 1630 LoadSDNode *LD = cast<LoadSDNode>(N); 1631 EVT LoadedVT = LD->getMemoryVT(); 1632 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1633 if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD || 1634 LoadedVT.getSimpleVT().SimpleTy != MVT::i32) 1635 return false; 1636 1637 auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset()); 1638 if (!COffs || COffs->getZExtValue() != 4) 1639 return false; 1640 1641 // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}. 1642 // The encoding of LDM is not how the rest of ISel expects a post-inc load to 1643 // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after 1644 // ISel. 1645 SDValue Chain = LD->getChain(); 1646 SDValue Base = LD->getBasePtr(); 1647 SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)), 1648 CurDAG->getRegister(0, MVT::i32), Chain }; 1649 SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32, 1650 MVT::i32, MVT::Other, Ops); 1651 transferMemOperands(N, New); 1652 ReplaceNode(N, New); 1653 return true; 1654 } 1655 1656 bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) { 1657 LoadSDNode *LD = cast<LoadSDNode>(N); 1658 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1659 if (AM == ISD::UNINDEXED) 1660 return false; 1661 1662 EVT LoadedVT = LD->getMemoryVT(); 1663 bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; 1664 SDValue Offset; 1665 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1666 unsigned Opcode = 0; 1667 bool Match = false; 1668 if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) { 1669 switch (LoadedVT.getSimpleVT().SimpleTy) { 1670 case MVT::i32: 1671 Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST; 1672 break; 1673 case MVT::i16: 1674 if (isSExtLd) 1675 Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST; 1676 else 1677 Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST; 1678 break; 1679 case MVT::i8: 1680 case MVT::i1: 1681 if (isSExtLd) 1682 Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST; 1683 else 1684 Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST; 1685 break; 1686 default: 1687 return false; 1688 } 1689 Match = true; 1690 } 1691 1692 if (Match) { 1693 SDValue Chain = LD->getChain(); 1694 SDValue Base = LD->getBasePtr(); 1695 SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)), 1696 CurDAG->getRegister(0, MVT::i32), Chain }; 1697 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, 1698 MVT::Other, Ops); 1699 transferMemOperands(N, New); 1700 ReplaceNode(N, New); 1701 return true; 1702 } 1703 1704 return false; 1705 } 1706 1707 bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) { 1708 EVT LoadedVT; 1709 unsigned Opcode = 0; 1710 bool isSExtLd, isPre; 1711 unsigned Align; 1712 ARMVCC::VPTCodes Pred; 1713 SDValue PredReg; 1714 SDValue Chain, Base, Offset; 1715 1716 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 1717 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1718 if (AM == ISD::UNINDEXED) 1719 return false; 1720 LoadedVT = LD->getMemoryVT(); 1721 if (!LoadedVT.isVector()) 1722 return false; 1723 1724 Chain = LD->getChain(); 1725 Base = LD->getBasePtr(); 1726 Offset = LD->getOffset(); 1727 Align = LD->getAlignment(); 1728 isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; 1729 isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1730 Pred = ARMVCC::None; 1731 PredReg = CurDAG->getRegister(0, MVT::i32); 1732 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) { 1733 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1734 if (AM == ISD::UNINDEXED) 1735 return false; 1736 LoadedVT = LD->getMemoryVT(); 1737 if (!LoadedVT.isVector()) 1738 return false; 1739 1740 Chain = LD->getChain(); 1741 Base = LD->getBasePtr(); 1742 Offset = LD->getOffset(); 1743 Align = LD->getAlignment(); 1744 isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; 1745 isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1746 Pred = ARMVCC::Then; 1747 PredReg = LD->getMask(); 1748 } else 1749 llvm_unreachable("Expected a Load or a Masked Load!"); 1750 1751 // We allow LE non-masked loads to change the type (for example use a vldrb.8 1752 // as opposed to a vldrw.32). This can allow extra addressing modes or 1753 // alignments for what is otherwise an equivalent instruction. 1754 bool CanChangeType = Subtarget->isLittle() && !isa<MaskedLoadSDNode>(N); 1755 1756 SDValue NewOffset; 1757 if (Align >= 2 && LoadedVT == MVT::v4i16 && 1758 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1)) { 1759 if (isSExtLd) 1760 Opcode = isPre ? ARM::MVE_VLDRHS32_pre : ARM::MVE_VLDRHS32_post; 1761 else 1762 Opcode = isPre ? ARM::MVE_VLDRHU32_pre : ARM::MVE_VLDRHU32_post; 1763 } else if (LoadedVT == MVT::v8i8 && 1764 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) { 1765 if (isSExtLd) 1766 Opcode = isPre ? ARM::MVE_VLDRBS16_pre : ARM::MVE_VLDRBS16_post; 1767 else 1768 Opcode = isPre ? ARM::MVE_VLDRBU16_pre : ARM::MVE_VLDRBU16_post; 1769 } else if (LoadedVT == MVT::v4i8 && 1770 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) { 1771 if (isSExtLd) 1772 Opcode = isPre ? ARM::MVE_VLDRBS32_pre : ARM::MVE_VLDRBS32_post; 1773 else 1774 Opcode = isPre ? ARM::MVE_VLDRBU32_pre : ARM::MVE_VLDRBU32_post; 1775 } else if (Align >= 4 && 1776 (CanChangeType || LoadedVT == MVT::v4i32 || 1777 LoadedVT == MVT::v4f32) && 1778 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 2)) 1779 Opcode = isPre ? ARM::MVE_VLDRWU32_pre : ARM::MVE_VLDRWU32_post; 1780 else if (Align >= 2 && 1781 (CanChangeType || LoadedVT == MVT::v8i16 || 1782 LoadedVT == MVT::v8f16) && 1783 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1)) 1784 Opcode = isPre ? ARM::MVE_VLDRHU16_pre : ARM::MVE_VLDRHU16_post; 1785 else if ((CanChangeType || LoadedVT == MVT::v16i8) && 1786 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) 1787 Opcode = isPre ? ARM::MVE_VLDRBU8_pre : ARM::MVE_VLDRBU8_post; 1788 else 1789 return false; 1790 1791 SDValue Ops[] = {Base, NewOffset, 1792 CurDAG->getTargetConstant(Pred, SDLoc(N), MVT::i32), PredReg, 1793 Chain}; 1794 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), N->getValueType(0), 1795 MVT::i32, MVT::Other, Ops); 1796 transferMemOperands(N, New); 1797 ReplaceUses(SDValue(N, 0), SDValue(New, 1)); 1798 ReplaceUses(SDValue(N, 1), SDValue(New, 0)); 1799 ReplaceUses(SDValue(N, 2), SDValue(New, 2)); 1800 CurDAG->RemoveDeadNode(N); 1801 return true; 1802 } 1803 1804 /// Form a GPRPair pseudo register from a pair of GPR regs. 1805 SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) { 1806 SDLoc dl(V0.getNode()); 1807 SDValue RegClass = 1808 CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32); 1809 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32); 1810 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32); 1811 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1812 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1813 } 1814 1815 /// Form a D register from a pair of S registers. 1816 SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1817 SDLoc dl(V0.getNode()); 1818 SDValue RegClass = 1819 CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32); 1820 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32); 1821 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32); 1822 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1823 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1824 } 1825 1826 /// Form a quad register from a pair of D registers. 1827 SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1828 SDLoc dl(V0.getNode()); 1829 SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl, 1830 MVT::i32); 1831 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32); 1832 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32); 1833 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1834 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1835 } 1836 1837 /// Form 4 consecutive D registers from a pair of Q registers. 1838 SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1839 SDLoc dl(V0.getNode()); 1840 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl, 1841 MVT::i32); 1842 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32); 1843 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32); 1844 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1845 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1846 } 1847 1848 /// Form 4 consecutive S registers. 1849 SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, 1850 SDValue V2, SDValue V3) { 1851 SDLoc dl(V0.getNode()); 1852 SDValue RegClass = 1853 CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32); 1854 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32); 1855 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32); 1856 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32); 1857 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32); 1858 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1859 V2, SubReg2, V3, SubReg3 }; 1860 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1861 } 1862 1863 /// Form 4 consecutive D registers. 1864 SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, 1865 SDValue V2, SDValue V3) { 1866 SDLoc dl(V0.getNode()); 1867 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl, 1868 MVT::i32); 1869 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32); 1870 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32); 1871 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32); 1872 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32); 1873 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1874 V2, SubReg2, V3, SubReg3 }; 1875 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1876 } 1877 1878 /// Form 4 consecutive Q registers. 1879 SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, 1880 SDValue V2, SDValue V3) { 1881 SDLoc dl(V0.getNode()); 1882 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl, 1883 MVT::i32); 1884 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32); 1885 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32); 1886 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32); 1887 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32); 1888 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1889 V2, SubReg2, V3, SubReg3 }; 1890 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1891 } 1892 1893 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand 1894 /// of a NEON VLD or VST instruction. The supported values depend on the 1895 /// number of registers being loaded. 1896 SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl, 1897 unsigned NumVecs, bool is64BitVector) { 1898 unsigned NumRegs = NumVecs; 1899 if (!is64BitVector && NumVecs < 3) 1900 NumRegs *= 2; 1901 1902 unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue(); 1903 if (Alignment >= 32 && NumRegs == 4) 1904 Alignment = 32; 1905 else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4)) 1906 Alignment = 16; 1907 else if (Alignment >= 8) 1908 Alignment = 8; 1909 else 1910 Alignment = 0; 1911 1912 return CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 1913 } 1914 1915 static bool isVLDfixed(unsigned Opc) 1916 { 1917 switch (Opc) { 1918 default: return false; 1919 case ARM::VLD1d8wb_fixed : return true; 1920 case ARM::VLD1d16wb_fixed : return true; 1921 case ARM::VLD1d64Qwb_fixed : return true; 1922 case ARM::VLD1d32wb_fixed : return true; 1923 case ARM::VLD1d64wb_fixed : return true; 1924 case ARM::VLD1d64TPseudoWB_fixed : return true; 1925 case ARM::VLD1d64QPseudoWB_fixed : return true; 1926 case ARM::VLD1q8wb_fixed : return true; 1927 case ARM::VLD1q16wb_fixed : return true; 1928 case ARM::VLD1q32wb_fixed : return true; 1929 case ARM::VLD1q64wb_fixed : return true; 1930 case ARM::VLD1DUPd8wb_fixed : return true; 1931 case ARM::VLD1DUPd16wb_fixed : return true; 1932 case ARM::VLD1DUPd32wb_fixed : return true; 1933 case ARM::VLD1DUPq8wb_fixed : return true; 1934 case ARM::VLD1DUPq16wb_fixed : return true; 1935 case ARM::VLD1DUPq32wb_fixed : return true; 1936 case ARM::VLD2d8wb_fixed : return true; 1937 case ARM::VLD2d16wb_fixed : return true; 1938 case ARM::VLD2d32wb_fixed : return true; 1939 case ARM::VLD2q8PseudoWB_fixed : return true; 1940 case ARM::VLD2q16PseudoWB_fixed : return true; 1941 case ARM::VLD2q32PseudoWB_fixed : return true; 1942 case ARM::VLD2DUPd8wb_fixed : return true; 1943 case ARM::VLD2DUPd16wb_fixed : return true; 1944 case ARM::VLD2DUPd32wb_fixed : return true; 1945 } 1946 } 1947 1948 static bool isVSTfixed(unsigned Opc) 1949 { 1950 switch (Opc) { 1951 default: return false; 1952 case ARM::VST1d8wb_fixed : return true; 1953 case ARM::VST1d16wb_fixed : return true; 1954 case ARM::VST1d32wb_fixed : return true; 1955 case ARM::VST1d64wb_fixed : return true; 1956 case ARM::VST1q8wb_fixed : return true; 1957 case ARM::VST1q16wb_fixed : return true; 1958 case ARM::VST1q32wb_fixed : return true; 1959 case ARM::VST1q64wb_fixed : return true; 1960 case ARM::VST1d64TPseudoWB_fixed : return true; 1961 case ARM::VST1d64QPseudoWB_fixed : return true; 1962 case ARM::VST2d8wb_fixed : return true; 1963 case ARM::VST2d16wb_fixed : return true; 1964 case ARM::VST2d32wb_fixed : return true; 1965 case ARM::VST2q8PseudoWB_fixed : return true; 1966 case ARM::VST2q16PseudoWB_fixed : return true; 1967 case ARM::VST2q32PseudoWB_fixed : return true; 1968 } 1969 } 1970 1971 // Get the register stride update opcode of a VLD/VST instruction that 1972 // is otherwise equivalent to the given fixed stride updating instruction. 1973 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) { 1974 assert((isVLDfixed(Opc) || isVSTfixed(Opc)) 1975 && "Incorrect fixed stride updating instruction."); 1976 switch (Opc) { 1977 default: break; 1978 case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register; 1979 case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register; 1980 case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register; 1981 case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register; 1982 case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register; 1983 case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register; 1984 case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register; 1985 case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register; 1986 case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register; 1987 case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register; 1988 case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register; 1989 case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register; 1990 case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register; 1991 case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register; 1992 case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register; 1993 case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register; 1994 case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register; 1995 case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register; 1996 1997 case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register; 1998 case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register; 1999 case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register; 2000 case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register; 2001 case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register; 2002 case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register; 2003 case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register; 2004 case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register; 2005 case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register; 2006 case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register; 2007 2008 case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register; 2009 case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register; 2010 case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register; 2011 case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register; 2012 case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register; 2013 case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register; 2014 2015 case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register; 2016 case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register; 2017 case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register; 2018 case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register; 2019 case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register; 2020 case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register; 2021 2022 case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register; 2023 case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register; 2024 case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register; 2025 } 2026 return Opc; // If not one we handle, return it unchanged. 2027 } 2028 2029 /// Returns true if the given increment is a Constant known to be equal to the 2030 /// access size performed by a NEON load/store. This means the "[rN]!" form can 2031 /// be used. 2032 static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) { 2033 auto C = dyn_cast<ConstantSDNode>(Inc); 2034 return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs; 2035 } 2036 2037 void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, 2038 const uint16_t *DOpcodes, 2039 const uint16_t *QOpcodes0, 2040 const uint16_t *QOpcodes1) { 2041 assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range"); 2042 SDLoc dl(N); 2043 2044 SDValue MemAddr, Align; 2045 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating 2046 // nodes are not intrinsics. 2047 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2048 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2049 return; 2050 2051 SDValue Chain = N->getOperand(0); 2052 EVT VT = N->getValueType(0); 2053 bool is64BitVector = VT.is64BitVector(); 2054 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector); 2055 2056 unsigned OpcodeIndex; 2057 switch (VT.getSimpleVT().SimpleTy) { 2058 default: llvm_unreachable("unhandled vld type"); 2059 // Double-register operations: 2060 case MVT::v8i8: OpcodeIndex = 0; break; 2061 case MVT::v4f16: 2062 case MVT::v4i16: OpcodeIndex = 1; break; 2063 case MVT::v2f32: 2064 case MVT::v2i32: OpcodeIndex = 2; break; 2065 case MVT::v1i64: OpcodeIndex = 3; break; 2066 // Quad-register operations: 2067 case MVT::v16i8: OpcodeIndex = 0; break; 2068 case MVT::v8f16: 2069 case MVT::v8i16: OpcodeIndex = 1; break; 2070 case MVT::v4f32: 2071 case MVT::v4i32: OpcodeIndex = 2; break; 2072 case MVT::v2f64: 2073 case MVT::v2i64: OpcodeIndex = 3; break; 2074 } 2075 2076 EVT ResTy; 2077 if (NumVecs == 1) 2078 ResTy = VT; 2079 else { 2080 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 2081 if (!is64BitVector) 2082 ResTyElts *= 2; 2083 ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts); 2084 } 2085 std::vector<EVT> ResTys; 2086 ResTys.push_back(ResTy); 2087 if (isUpdating) 2088 ResTys.push_back(MVT::i32); 2089 ResTys.push_back(MVT::Other); 2090 2091 SDValue Pred = getAL(CurDAG, dl); 2092 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2093 SDNode *VLd; 2094 SmallVector<SDValue, 7> Ops; 2095 2096 // Double registers and VLD1/VLD2 quad registers are directly supported. 2097 if (is64BitVector || NumVecs <= 2) { 2098 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 2099 QOpcodes0[OpcodeIndex]); 2100 Ops.push_back(MemAddr); 2101 Ops.push_back(Align); 2102 if (isUpdating) { 2103 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2104 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs); 2105 if (!IsImmUpdate) { 2106 // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so 2107 // check for the opcode rather than the number of vector elements. 2108 if (isVLDfixed(Opc)) 2109 Opc = getVLDSTRegisterUpdateOpcode(Opc); 2110 Ops.push_back(Inc); 2111 // VLD1/VLD2 fixed increment does not need Reg0 so only include it in 2112 // the operands if not such an opcode. 2113 } else if (!isVLDfixed(Opc)) 2114 Ops.push_back(Reg0); 2115 } 2116 Ops.push_back(Pred); 2117 Ops.push_back(Reg0); 2118 Ops.push_back(Chain); 2119 VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2120 2121 } else { 2122 // Otherwise, quad registers are loaded with two separate instructions, 2123 // where one loads the even registers and the other loads the odd registers. 2124 EVT AddrTy = MemAddr.getValueType(); 2125 2126 // Load the even subregs. This is always an updating load, so that it 2127 // provides the address to the second load for the odd subregs. 2128 SDValue ImplDef = 2129 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0); 2130 const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain }; 2131 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, 2132 ResTy, AddrTy, MVT::Other, OpsA); 2133 Chain = SDValue(VLdA, 2); 2134 2135 // Load the odd subregs. 2136 Ops.push_back(SDValue(VLdA, 1)); 2137 Ops.push_back(Align); 2138 if (isUpdating) { 2139 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2140 assert(isa<ConstantSDNode>(Inc.getNode()) && 2141 "only constant post-increment update allowed for VLD3/4"); 2142 (void)Inc; 2143 Ops.push_back(Reg0); 2144 } 2145 Ops.push_back(SDValue(VLdA, 0)); 2146 Ops.push_back(Pred); 2147 Ops.push_back(Reg0); 2148 Ops.push_back(Chain); 2149 VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops); 2150 } 2151 2152 // Transfer memoperands. 2153 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2154 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLd), {MemOp}); 2155 2156 if (NumVecs == 1) { 2157 ReplaceNode(N, VLd); 2158 return; 2159 } 2160 2161 // Extract out the subregisters. 2162 SDValue SuperReg = SDValue(VLd, 0); 2163 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 && 2164 ARM::qsub_3 == ARM::qsub_0 + 3, 2165 "Unexpected subreg numbering"); 2166 unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0); 2167 for (unsigned Vec = 0; Vec < NumVecs; ++Vec) 2168 ReplaceUses(SDValue(N, Vec), 2169 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg)); 2170 ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1)); 2171 if (isUpdating) 2172 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2)); 2173 CurDAG->RemoveDeadNode(N); 2174 } 2175 2176 void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, 2177 const uint16_t *DOpcodes, 2178 const uint16_t *QOpcodes0, 2179 const uint16_t *QOpcodes1) { 2180 assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range"); 2181 SDLoc dl(N); 2182 2183 SDValue MemAddr, Align; 2184 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating 2185 // nodes are not intrinsics. 2186 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2187 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1) 2188 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2189 return; 2190 2191 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2192 2193 SDValue Chain = N->getOperand(0); 2194 EVT VT = N->getOperand(Vec0Idx).getValueType(); 2195 bool is64BitVector = VT.is64BitVector(); 2196 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector); 2197 2198 unsigned OpcodeIndex; 2199 switch (VT.getSimpleVT().SimpleTy) { 2200 default: llvm_unreachable("unhandled vst type"); 2201 // Double-register operations: 2202 case MVT::v8i8: OpcodeIndex = 0; break; 2203 case MVT::v4f16: 2204 case MVT::v4i16: OpcodeIndex = 1; break; 2205 case MVT::v2f32: 2206 case MVT::v2i32: OpcodeIndex = 2; break; 2207 case MVT::v1i64: OpcodeIndex = 3; break; 2208 // Quad-register operations: 2209 case MVT::v16i8: OpcodeIndex = 0; break; 2210 case MVT::v8f16: 2211 case MVT::v8i16: OpcodeIndex = 1; break; 2212 case MVT::v4f32: 2213 case MVT::v4i32: OpcodeIndex = 2; break; 2214 case MVT::v2f64: 2215 case MVT::v2i64: OpcodeIndex = 3; break; 2216 } 2217 2218 std::vector<EVT> ResTys; 2219 if (isUpdating) 2220 ResTys.push_back(MVT::i32); 2221 ResTys.push_back(MVT::Other); 2222 2223 SDValue Pred = getAL(CurDAG, dl); 2224 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2225 SmallVector<SDValue, 7> Ops; 2226 2227 // Double registers and VST1/VST2 quad registers are directly supported. 2228 if (is64BitVector || NumVecs <= 2) { 2229 SDValue SrcReg; 2230 if (NumVecs == 1) { 2231 SrcReg = N->getOperand(Vec0Idx); 2232 } else if (is64BitVector) { 2233 // Form a REG_SEQUENCE to force register allocation. 2234 SDValue V0 = N->getOperand(Vec0Idx + 0); 2235 SDValue V1 = N->getOperand(Vec0Idx + 1); 2236 if (NumVecs == 2) 2237 SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0); 2238 else { 2239 SDValue V2 = N->getOperand(Vec0Idx + 2); 2240 // If it's a vst3, form a quad D-register and leave the last part as 2241 // an undef. 2242 SDValue V3 = (NumVecs == 3) 2243 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) 2244 : N->getOperand(Vec0Idx + 3); 2245 SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0); 2246 } 2247 } else { 2248 // Form a QQ register. 2249 SDValue Q0 = N->getOperand(Vec0Idx); 2250 SDValue Q1 = N->getOperand(Vec0Idx + 1); 2251 SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0); 2252 } 2253 2254 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 2255 QOpcodes0[OpcodeIndex]); 2256 Ops.push_back(MemAddr); 2257 Ops.push_back(Align); 2258 if (isUpdating) { 2259 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2260 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs); 2261 if (!IsImmUpdate) { 2262 // We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so 2263 // check for the opcode rather than the number of vector elements. 2264 if (isVSTfixed(Opc)) 2265 Opc = getVLDSTRegisterUpdateOpcode(Opc); 2266 Ops.push_back(Inc); 2267 } 2268 // VST1/VST2 fixed increment does not need Reg0 so only include it in 2269 // the operands if not such an opcode. 2270 else if (!isVSTfixed(Opc)) 2271 Ops.push_back(Reg0); 2272 } 2273 Ops.push_back(SrcReg); 2274 Ops.push_back(Pred); 2275 Ops.push_back(Reg0); 2276 Ops.push_back(Chain); 2277 SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2278 2279 // Transfer memoperands. 2280 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VSt), {MemOp}); 2281 2282 ReplaceNode(N, VSt); 2283 return; 2284 } 2285 2286 // Otherwise, quad registers are stored with two separate instructions, 2287 // where one stores the even registers and the other stores the odd registers. 2288 2289 // Form the QQQQ REG_SEQUENCE. 2290 SDValue V0 = N->getOperand(Vec0Idx + 0); 2291 SDValue V1 = N->getOperand(Vec0Idx + 1); 2292 SDValue V2 = N->getOperand(Vec0Idx + 2); 2293 SDValue V3 = (NumVecs == 3) 2294 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) 2295 : N->getOperand(Vec0Idx + 3); 2296 SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0); 2297 2298 // Store the even D registers. This is always an updating store, so that it 2299 // provides the address to the second store for the odd subregs. 2300 const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain }; 2301 SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, 2302 MemAddr.getValueType(), 2303 MVT::Other, OpsA); 2304 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStA), {MemOp}); 2305 Chain = SDValue(VStA, 1); 2306 2307 // Store the odd D registers. 2308 Ops.push_back(SDValue(VStA, 0)); 2309 Ops.push_back(Align); 2310 if (isUpdating) { 2311 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2312 assert(isa<ConstantSDNode>(Inc.getNode()) && 2313 "only constant post-increment update allowed for VST3/4"); 2314 (void)Inc; 2315 Ops.push_back(Reg0); 2316 } 2317 Ops.push_back(RegSeq); 2318 Ops.push_back(Pred); 2319 Ops.push_back(Reg0); 2320 Ops.push_back(Chain); 2321 SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, 2322 Ops); 2323 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStB), {MemOp}); 2324 ReplaceNode(N, VStB); 2325 } 2326 2327 void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating, 2328 unsigned NumVecs, 2329 const uint16_t *DOpcodes, 2330 const uint16_t *QOpcodes) { 2331 assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range"); 2332 SDLoc dl(N); 2333 2334 SDValue MemAddr, Align; 2335 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating 2336 // nodes are not intrinsics. 2337 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2338 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1) 2339 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2340 return; 2341 2342 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2343 2344 SDValue Chain = N->getOperand(0); 2345 unsigned Lane = 2346 cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue(); 2347 EVT VT = N->getOperand(Vec0Idx).getValueType(); 2348 bool is64BitVector = VT.is64BitVector(); 2349 2350 unsigned Alignment = 0; 2351 if (NumVecs != 3) { 2352 Alignment = cast<ConstantSDNode>(Align)->getZExtValue(); 2353 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8; 2354 if (Alignment > NumBytes) 2355 Alignment = NumBytes; 2356 if (Alignment < 8 && Alignment < NumBytes) 2357 Alignment = 0; 2358 // Alignment must be a power of two; make sure of that. 2359 Alignment = (Alignment & -Alignment); 2360 if (Alignment == 1) 2361 Alignment = 0; 2362 } 2363 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 2364 2365 unsigned OpcodeIndex; 2366 switch (VT.getSimpleVT().SimpleTy) { 2367 default: llvm_unreachable("unhandled vld/vst lane type"); 2368 // Double-register operations: 2369 case MVT::v8i8: OpcodeIndex = 0; break; 2370 case MVT::v4f16: 2371 case MVT::v4i16: OpcodeIndex = 1; break; 2372 case MVT::v2f32: 2373 case MVT::v2i32: OpcodeIndex = 2; break; 2374 // Quad-register operations: 2375 case MVT::v8f16: 2376 case MVT::v8i16: OpcodeIndex = 0; break; 2377 case MVT::v4f32: 2378 case MVT::v4i32: OpcodeIndex = 1; break; 2379 } 2380 2381 std::vector<EVT> ResTys; 2382 if (IsLoad) { 2383 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 2384 if (!is64BitVector) 2385 ResTyElts *= 2; 2386 ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), 2387 MVT::i64, ResTyElts)); 2388 } 2389 if (isUpdating) 2390 ResTys.push_back(MVT::i32); 2391 ResTys.push_back(MVT::Other); 2392 2393 SDValue Pred = getAL(CurDAG, dl); 2394 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2395 2396 SmallVector<SDValue, 8> Ops; 2397 Ops.push_back(MemAddr); 2398 Ops.push_back(Align); 2399 if (isUpdating) { 2400 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2401 bool IsImmUpdate = 2402 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs); 2403 Ops.push_back(IsImmUpdate ? Reg0 : Inc); 2404 } 2405 2406 SDValue SuperReg; 2407 SDValue V0 = N->getOperand(Vec0Idx + 0); 2408 SDValue V1 = N->getOperand(Vec0Idx + 1); 2409 if (NumVecs == 2) { 2410 if (is64BitVector) 2411 SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0); 2412 else 2413 SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0); 2414 } else { 2415 SDValue V2 = N->getOperand(Vec0Idx + 2); 2416 SDValue V3 = (NumVecs == 3) 2417 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) 2418 : N->getOperand(Vec0Idx + 3); 2419 if (is64BitVector) 2420 SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0); 2421 else 2422 SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0); 2423 } 2424 Ops.push_back(SuperReg); 2425 Ops.push_back(getI32Imm(Lane, dl)); 2426 Ops.push_back(Pred); 2427 Ops.push_back(Reg0); 2428 Ops.push_back(Chain); 2429 2430 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 2431 QOpcodes[OpcodeIndex]); 2432 SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2433 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdLn), {MemOp}); 2434 if (!IsLoad) { 2435 ReplaceNode(N, VLdLn); 2436 return; 2437 } 2438 2439 // Extract the subregisters. 2440 SuperReg = SDValue(VLdLn, 0); 2441 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 && 2442 ARM::qsub_3 == ARM::qsub_0 + 3, 2443 "Unexpected subreg numbering"); 2444 unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0; 2445 for (unsigned Vec = 0; Vec < NumVecs; ++Vec) 2446 ReplaceUses(SDValue(N, Vec), 2447 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg)); 2448 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1)); 2449 if (isUpdating) 2450 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2)); 2451 CurDAG->RemoveDeadNode(N); 2452 } 2453 2454 template <typename SDValueVector> 2455 void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 2456 SDValue PredicateMask) { 2457 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32)); 2458 Ops.push_back(PredicateMask); 2459 } 2460 2461 template <typename SDValueVector> 2462 void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 2463 SDValue PredicateMask, 2464 SDValue Inactive) { 2465 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32)); 2466 Ops.push_back(PredicateMask); 2467 Ops.push_back(Inactive); 2468 } 2469 2470 template <typename SDValueVector> 2471 void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc) { 2472 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32)); 2473 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 2474 } 2475 2476 template <typename SDValueVector> 2477 void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 2478 EVT InactiveTy) { 2479 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32)); 2480 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 2481 Ops.push_back(SDValue( 2482 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, InactiveTy), 0)); 2483 } 2484 2485 void ARMDAGToDAGISel::SelectMVE_WB(SDNode *N, const uint16_t *Opcodes, 2486 bool Predicated) { 2487 SDLoc Loc(N); 2488 SmallVector<SDValue, 8> Ops; 2489 2490 uint16_t Opcode; 2491 switch (N->getValueType(1).getVectorElementType().getSizeInBits()) { 2492 case 32: 2493 Opcode = Opcodes[0]; 2494 break; 2495 case 64: 2496 Opcode = Opcodes[1]; 2497 break; 2498 default: 2499 llvm_unreachable("bad vector element size in SelectMVE_WB"); 2500 } 2501 2502 Ops.push_back(N->getOperand(2)); // vector of base addresses 2503 2504 int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(); 2505 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate offset 2506 2507 if (Predicated) 2508 AddMVEPredicateToOps(Ops, Loc, N->getOperand(4)); 2509 else 2510 AddEmptyMVEPredicateToOps(Ops, Loc); 2511 2512 Ops.push_back(N->getOperand(0)); // chain 2513 2514 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops)); 2515 } 2516 2517 void ARMDAGToDAGISel::SelectMVE_LongShift(SDNode *N, uint16_t Opcode, 2518 bool Immediate, 2519 bool HasSaturationOperand) { 2520 SDLoc Loc(N); 2521 SmallVector<SDValue, 8> Ops; 2522 2523 // Two 32-bit halves of the value to be shifted 2524 Ops.push_back(N->getOperand(1)); 2525 Ops.push_back(N->getOperand(2)); 2526 2527 // The shift count 2528 if (Immediate) { 2529 int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(); 2530 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count 2531 } else { 2532 Ops.push_back(N->getOperand(3)); 2533 } 2534 2535 // The immediate saturation operand, if any 2536 if (HasSaturationOperand) { 2537 int32_t SatOp = cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(); 2538 int SatBit = (SatOp == 64 ? 0 : 1); 2539 Ops.push_back(getI32Imm(SatBit, Loc)); 2540 } 2541 2542 // MVE scalar shifts are IT-predicable, so include the standard 2543 // predicate arguments. 2544 Ops.push_back(getAL(CurDAG, Loc)); 2545 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 2546 2547 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops)); 2548 } 2549 2550 void ARMDAGToDAGISel::SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry, 2551 uint16_t OpcodeWithNoCarry, 2552 bool Add, bool Predicated) { 2553 SDLoc Loc(N); 2554 SmallVector<SDValue, 8> Ops; 2555 uint16_t Opcode; 2556 2557 unsigned FirstInputOp = Predicated ? 2 : 1; 2558 2559 // Two input vectors and the input carry flag 2560 Ops.push_back(N->getOperand(FirstInputOp)); 2561 Ops.push_back(N->getOperand(FirstInputOp + 1)); 2562 SDValue CarryIn = N->getOperand(FirstInputOp + 2); 2563 ConstantSDNode *CarryInConstant = dyn_cast<ConstantSDNode>(CarryIn); 2564 uint32_t CarryMask = 1 << 29; 2565 uint32_t CarryExpected = Add ? 0 : CarryMask; 2566 if (CarryInConstant && 2567 (CarryInConstant->getZExtValue() & CarryMask) == CarryExpected) { 2568 Opcode = OpcodeWithNoCarry; 2569 } else { 2570 Ops.push_back(CarryIn); 2571 Opcode = OpcodeWithCarry; 2572 } 2573 2574 if (Predicated) 2575 AddMVEPredicateToOps(Ops, Loc, 2576 N->getOperand(FirstInputOp + 3), // predicate 2577 N->getOperand(FirstInputOp - 1)); // inactive 2578 else 2579 AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0)); 2580 2581 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops)); 2582 } 2583 2584 static bool SDValueToConstBool(SDValue SDVal) { 2585 assert(isa<ConstantSDNode>(SDVal) && "expected a compile-time constant"); 2586 ConstantSDNode *SDValConstant = dyn_cast<ConstantSDNode>(SDVal); 2587 uint64_t Value = SDValConstant->getZExtValue(); 2588 assert((Value == 0 || Value == 1) && "expected value 0 or 1"); 2589 return Value; 2590 } 2591 2592 void ARMDAGToDAGISel::SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated, 2593 const uint16_t *OpcodesS, 2594 const uint16_t *OpcodesU, 2595 size_t Stride, size_t TySize) { 2596 assert(TySize < Stride && "Invalid TySize"); 2597 bool IsUnsigned = SDValueToConstBool(N->getOperand(1)); 2598 bool IsSub = SDValueToConstBool(N->getOperand(2)); 2599 bool IsExchange = SDValueToConstBool(N->getOperand(3)); 2600 if (IsUnsigned) { 2601 assert(!IsSub && 2602 "Unsigned versions of vmlsldav[a]/vrmlsldavh[a] do not exist"); 2603 assert(!IsExchange && 2604 "Unsigned versions of vmlaldav[a]x/vrmlaldavh[a]x do not exist"); 2605 } 2606 2607 auto OpIsZero = [N](size_t OpNo) { 2608 if (ConstantSDNode *OpConst = dyn_cast<ConstantSDNode>(N->getOperand(OpNo))) 2609 if (OpConst->getZExtValue() == 0) 2610 return true; 2611 return false; 2612 }; 2613 2614 // If the input accumulator value is not zero, select an instruction with 2615 // accumulator, otherwise select an instruction without accumulator 2616 bool IsAccum = !(OpIsZero(4) && OpIsZero(5)); 2617 2618 const uint16_t *Opcodes = IsUnsigned ? OpcodesU : OpcodesS; 2619 if (IsSub) 2620 Opcodes += 4 * Stride; 2621 if (IsExchange) 2622 Opcodes += 2 * Stride; 2623 if (IsAccum) 2624 Opcodes += Stride; 2625 uint16_t Opcode = Opcodes[TySize]; 2626 2627 SDLoc Loc(N); 2628 SmallVector<SDValue, 8> Ops; 2629 // Push the accumulator operands, if they are used 2630 if (IsAccum) { 2631 Ops.push_back(N->getOperand(4)); 2632 Ops.push_back(N->getOperand(5)); 2633 } 2634 // Push the two vector operands 2635 Ops.push_back(N->getOperand(6)); 2636 Ops.push_back(N->getOperand(7)); 2637 2638 if (Predicated) 2639 AddMVEPredicateToOps(Ops, Loc, N->getOperand(8)); 2640 else 2641 AddEmptyMVEPredicateToOps(Ops, Loc); 2642 2643 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops)); 2644 } 2645 2646 void ARMDAGToDAGISel::SelectMVE_VMLLDAV(SDNode *N, bool Predicated, 2647 const uint16_t *OpcodesS, 2648 const uint16_t *OpcodesU) { 2649 EVT VecTy = N->getOperand(6).getValueType(); 2650 size_t SizeIndex; 2651 switch (VecTy.getVectorElementType().getSizeInBits()) { 2652 case 16: 2653 SizeIndex = 0; 2654 break; 2655 case 32: 2656 SizeIndex = 1; 2657 break; 2658 default: 2659 llvm_unreachable("bad vector element size"); 2660 } 2661 2662 SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 2, SizeIndex); 2663 } 2664 2665 void ARMDAGToDAGISel::SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated, 2666 const uint16_t *OpcodesS, 2667 const uint16_t *OpcodesU) { 2668 assert( 2669 N->getOperand(6).getValueType().getVectorElementType().getSizeInBits() == 2670 32 && 2671 "bad vector element size"); 2672 SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 1, 0); 2673 } 2674 2675 void ARMDAGToDAGISel::SelectMVE_VLD(SDNode *N, unsigned NumVecs, 2676 const uint16_t *const *Opcodes) { 2677 EVT VT = N->getValueType(0); 2678 SDLoc Loc(N); 2679 2680 const uint16_t *OurOpcodes; 2681 switch (VT.getVectorElementType().getSizeInBits()) { 2682 case 8: 2683 OurOpcodes = Opcodes[0]; 2684 break; 2685 case 16: 2686 OurOpcodes = Opcodes[1]; 2687 break; 2688 case 32: 2689 OurOpcodes = Opcodes[2]; 2690 break; 2691 default: 2692 llvm_unreachable("bad vector element size in SelectMVE_VLD"); 2693 } 2694 2695 EVT DataTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, NumVecs * 2); 2696 EVT ResultTys[] = {DataTy, MVT::Other}; 2697 2698 auto Data = SDValue( 2699 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, DataTy), 0); 2700 SDValue Chain = N->getOperand(0); 2701 for (unsigned Stage = 0; Stage < NumVecs; ++Stage) { 2702 SDValue Ops[] = {Data, N->getOperand(2), Chain}; 2703 auto LoadInst = 2704 CurDAG->getMachineNode(OurOpcodes[Stage], Loc, ResultTys, Ops); 2705 Data = SDValue(LoadInst, 0); 2706 Chain = SDValue(LoadInst, 1); 2707 } 2708 2709 for (unsigned i = 0; i < NumVecs; i++) 2710 ReplaceUses(SDValue(N, i), 2711 CurDAG->getTargetExtractSubreg(ARM::qsub_0 + i, Loc, VT, Data)); 2712 ReplaceUses(SDValue(N, NumVecs), Chain); 2713 CurDAG->RemoveDeadNode(N); 2714 } 2715 2716 void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic, 2717 bool isUpdating, unsigned NumVecs, 2718 const uint16_t *DOpcodes, 2719 const uint16_t *QOpcodes0, 2720 const uint16_t *QOpcodes1) { 2721 assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range"); 2722 SDLoc dl(N); 2723 2724 SDValue MemAddr, Align; 2725 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2726 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2727 return; 2728 2729 SDValue Chain = N->getOperand(0); 2730 EVT VT = N->getValueType(0); 2731 bool is64BitVector = VT.is64BitVector(); 2732 2733 unsigned Alignment = 0; 2734 if (NumVecs != 3) { 2735 Alignment = cast<ConstantSDNode>(Align)->getZExtValue(); 2736 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8; 2737 if (Alignment > NumBytes) 2738 Alignment = NumBytes; 2739 if (Alignment < 8 && Alignment < NumBytes) 2740 Alignment = 0; 2741 // Alignment must be a power of two; make sure of that. 2742 Alignment = (Alignment & -Alignment); 2743 if (Alignment == 1) 2744 Alignment = 0; 2745 } 2746 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 2747 2748 unsigned OpcodeIndex; 2749 switch (VT.getSimpleVT().SimpleTy) { 2750 default: llvm_unreachable("unhandled vld-dup type"); 2751 case MVT::v8i8: 2752 case MVT::v16i8: OpcodeIndex = 0; break; 2753 case MVT::v4i16: 2754 case MVT::v8i16: 2755 case MVT::v4f16: 2756 case MVT::v8f16: 2757 OpcodeIndex = 1; break; 2758 case MVT::v2f32: 2759 case MVT::v2i32: 2760 case MVT::v4f32: 2761 case MVT::v4i32: OpcodeIndex = 2; break; 2762 case MVT::v1f64: 2763 case MVT::v1i64: OpcodeIndex = 3; break; 2764 } 2765 2766 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 2767 if (!is64BitVector) 2768 ResTyElts *= 2; 2769 EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts); 2770 2771 std::vector<EVT> ResTys; 2772 ResTys.push_back(ResTy); 2773 if (isUpdating) 2774 ResTys.push_back(MVT::i32); 2775 ResTys.push_back(MVT::Other); 2776 2777 SDValue Pred = getAL(CurDAG, dl); 2778 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2779 2780 SDNode *VLdDup; 2781 if (is64BitVector || NumVecs == 1) { 2782 SmallVector<SDValue, 6> Ops; 2783 Ops.push_back(MemAddr); 2784 Ops.push_back(Align); 2785 unsigned Opc = is64BitVector ? DOpcodes[OpcodeIndex] : 2786 QOpcodes0[OpcodeIndex]; 2787 if (isUpdating) { 2788 // fixed-stride update instructions don't have an explicit writeback 2789 // operand. It's implicit in the opcode itself. 2790 SDValue Inc = N->getOperand(2); 2791 bool IsImmUpdate = 2792 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs); 2793 if (NumVecs <= 2 && !IsImmUpdate) 2794 Opc = getVLDSTRegisterUpdateOpcode(Opc); 2795 if (!IsImmUpdate) 2796 Ops.push_back(Inc); 2797 // FIXME: VLD3 and VLD4 haven't been updated to that form yet. 2798 else if (NumVecs > 2) 2799 Ops.push_back(Reg0); 2800 } 2801 Ops.push_back(Pred); 2802 Ops.push_back(Reg0); 2803 Ops.push_back(Chain); 2804 VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2805 } else if (NumVecs == 2) { 2806 const SDValue OpsA[] = { MemAddr, Align, Pred, Reg0, Chain }; 2807 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], 2808 dl, ResTys, OpsA); 2809 2810 Chain = SDValue(VLdA, 1); 2811 const SDValue OpsB[] = { MemAddr, Align, Pred, Reg0, Chain }; 2812 VLdDup = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, OpsB); 2813 } else { 2814 SDValue ImplDef = 2815 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0); 2816 const SDValue OpsA[] = { MemAddr, Align, ImplDef, Pred, Reg0, Chain }; 2817 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], 2818 dl, ResTys, OpsA); 2819 2820 SDValue SuperReg = SDValue(VLdA, 0); 2821 Chain = SDValue(VLdA, 1); 2822 const SDValue OpsB[] = { MemAddr, Align, SuperReg, Pred, Reg0, Chain }; 2823 VLdDup = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, OpsB); 2824 } 2825 2826 // Transfer memoperands. 2827 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2828 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdDup), {MemOp}); 2829 2830 // Extract the subregisters. 2831 if (NumVecs == 1) { 2832 ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0)); 2833 } else { 2834 SDValue SuperReg = SDValue(VLdDup, 0); 2835 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering"); 2836 unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0; 2837 for (unsigned Vec = 0; Vec != NumVecs; ++Vec) { 2838 ReplaceUses(SDValue(N, Vec), 2839 CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg)); 2840 } 2841 } 2842 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1)); 2843 if (isUpdating) 2844 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2)); 2845 CurDAG->RemoveDeadNode(N); 2846 } 2847 2848 bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) { 2849 if (!Subtarget->hasV6T2Ops()) 2850 return false; 2851 2852 unsigned Opc = isSigned 2853 ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX) 2854 : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX); 2855 SDLoc dl(N); 2856 2857 // For unsigned extracts, check for a shift right and mask 2858 unsigned And_imm = 0; 2859 if (N->getOpcode() == ISD::AND) { 2860 if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) { 2861 2862 // The immediate is a mask of the low bits iff imm & (imm+1) == 0 2863 if (And_imm & (And_imm + 1)) 2864 return false; 2865 2866 unsigned Srl_imm = 0; 2867 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, 2868 Srl_imm)) { 2869 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 2870 2871 // Mask off the unnecessary bits of the AND immediate; normally 2872 // DAGCombine will do this, but that might not happen if 2873 // targetShrinkDemandedConstant chooses a different immediate. 2874 And_imm &= -1U >> Srl_imm; 2875 2876 // Note: The width operand is encoded as width-1. 2877 unsigned Width = countTrailingOnes(And_imm) - 1; 2878 unsigned LSB = Srl_imm; 2879 2880 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2881 2882 if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) { 2883 // It's cheaper to use a right shift to extract the top bits. 2884 if (Subtarget->isThumb()) { 2885 Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri; 2886 SDValue Ops[] = { N->getOperand(0).getOperand(0), 2887 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 2888 getAL(CurDAG, dl), Reg0, Reg0 }; 2889 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 2890 return true; 2891 } 2892 2893 // ARM models shift instructions as MOVsi with shifter operand. 2894 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL); 2895 SDValue ShOpc = 2896 CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl, 2897 MVT::i32); 2898 SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc, 2899 getAL(CurDAG, dl), Reg0, Reg0 }; 2900 CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops); 2901 return true; 2902 } 2903 2904 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx"); 2905 SDValue Ops[] = { N->getOperand(0).getOperand(0), 2906 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 2907 CurDAG->getTargetConstant(Width, dl, MVT::i32), 2908 getAL(CurDAG, dl), Reg0 }; 2909 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 2910 return true; 2911 } 2912 } 2913 return false; 2914 } 2915 2916 // Otherwise, we're looking for a shift of a shift 2917 unsigned Shl_imm = 0; 2918 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) { 2919 assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!"); 2920 unsigned Srl_imm = 0; 2921 if (isInt32Immediate(N->getOperand(1), Srl_imm)) { 2922 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 2923 // Note: The width operand is encoded as width-1. 2924 unsigned Width = 32 - Srl_imm - 1; 2925 int LSB = Srl_imm - Shl_imm; 2926 if (LSB < 0) 2927 return false; 2928 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2929 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx"); 2930 SDValue Ops[] = { N->getOperand(0).getOperand(0), 2931 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 2932 CurDAG->getTargetConstant(Width, dl, MVT::i32), 2933 getAL(CurDAG, dl), Reg0 }; 2934 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 2935 return true; 2936 } 2937 } 2938 2939 // Or we are looking for a shift of an and, with a mask operand 2940 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) && 2941 isShiftedMask_32(And_imm)) { 2942 unsigned Srl_imm = 0; 2943 unsigned LSB = countTrailingZeros(And_imm); 2944 // Shift must be the same as the ands lsb 2945 if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) { 2946 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 2947 unsigned MSB = 31 - countLeadingZeros(And_imm); 2948 // Note: The width operand is encoded as width-1. 2949 unsigned Width = MSB - LSB; 2950 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2951 assert(Srl_imm + Width + 1 <= 32 && "Shouldn't create an invalid ubfx"); 2952 SDValue Ops[] = { N->getOperand(0).getOperand(0), 2953 CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32), 2954 CurDAG->getTargetConstant(Width, dl, MVT::i32), 2955 getAL(CurDAG, dl), Reg0 }; 2956 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 2957 return true; 2958 } 2959 } 2960 2961 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) { 2962 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits(); 2963 unsigned LSB = 0; 2964 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) && 2965 !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB)) 2966 return false; 2967 2968 if (LSB + Width > 32) 2969 return false; 2970 2971 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2972 assert(LSB + Width <= 32 && "Shouldn't create an invalid ubfx"); 2973 SDValue Ops[] = { N->getOperand(0).getOperand(0), 2974 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 2975 CurDAG->getTargetConstant(Width - 1, dl, MVT::i32), 2976 getAL(CurDAG, dl), Reg0 }; 2977 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 2978 return true; 2979 } 2980 2981 return false; 2982 } 2983 2984 /// Target-specific DAG combining for ISD::XOR. 2985 /// Target-independent combining lowers SELECT_CC nodes of the form 2986 /// select_cc setg[ge] X, 0, X, -X 2987 /// select_cc setgt X, -1, X, -X 2988 /// select_cc setl[te] X, 0, -X, X 2989 /// select_cc setlt X, 1, -X, X 2990 /// which represent Integer ABS into: 2991 /// Y = sra (X, size(X)-1); xor (add (X, Y), Y) 2992 /// ARM instruction selection detects the latter and matches it to 2993 /// ARM::ABS or ARM::t2ABS machine node. 2994 bool ARMDAGToDAGISel::tryABSOp(SDNode *N){ 2995 SDValue XORSrc0 = N->getOperand(0); 2996 SDValue XORSrc1 = N->getOperand(1); 2997 EVT VT = N->getValueType(0); 2998 2999 if (Subtarget->isThumb1Only()) 3000 return false; 3001 3002 if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA) 3003 return false; 3004 3005 SDValue ADDSrc0 = XORSrc0.getOperand(0); 3006 SDValue ADDSrc1 = XORSrc0.getOperand(1); 3007 SDValue SRASrc0 = XORSrc1.getOperand(0); 3008 SDValue SRASrc1 = XORSrc1.getOperand(1); 3009 ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(SRASrc1); 3010 EVT XType = SRASrc0.getValueType(); 3011 unsigned Size = XType.getSizeInBits() - 1; 3012 3013 if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 && 3014 XType.isInteger() && SRAConstant != nullptr && 3015 Size == SRAConstant->getZExtValue()) { 3016 unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS; 3017 CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0); 3018 return true; 3019 } 3020 3021 return false; 3022 } 3023 3024 /// We've got special pseudo-instructions for these 3025 void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) { 3026 unsigned Opcode; 3027 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT(); 3028 if (MemTy == MVT::i8) 3029 Opcode = ARM::CMP_SWAP_8; 3030 else if (MemTy == MVT::i16) 3031 Opcode = ARM::CMP_SWAP_16; 3032 else if (MemTy == MVT::i32) 3033 Opcode = ARM::CMP_SWAP_32; 3034 else 3035 llvm_unreachable("Unknown AtomicCmpSwap type"); 3036 3037 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3), 3038 N->getOperand(0)}; 3039 SDNode *CmpSwap = CurDAG->getMachineNode( 3040 Opcode, SDLoc(N), 3041 CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops); 3042 3043 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); 3044 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp}); 3045 3046 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0)); 3047 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2)); 3048 CurDAG->RemoveDeadNode(N); 3049 } 3050 3051 static Optional<std::pair<unsigned, unsigned>> 3052 getContiguousRangeOfSetBits(const APInt &A) { 3053 unsigned FirstOne = A.getBitWidth() - A.countLeadingZeros() - 1; 3054 unsigned LastOne = A.countTrailingZeros(); 3055 if (A.countPopulation() != (FirstOne - LastOne + 1)) 3056 return Optional<std::pair<unsigned,unsigned>>(); 3057 return std::make_pair(FirstOne, LastOne); 3058 } 3059 3060 void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) { 3061 assert(N->getOpcode() == ARMISD::CMPZ); 3062 SwitchEQNEToPLMI = false; 3063 3064 if (!Subtarget->isThumb()) 3065 // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and 3066 // LSR don't exist as standalone instructions - they need the barrel shifter. 3067 return; 3068 3069 // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X)) 3070 SDValue And = N->getOperand(0); 3071 if (!And->hasOneUse()) 3072 return; 3073 3074 SDValue Zero = N->getOperand(1); 3075 if (!isa<ConstantSDNode>(Zero) || !cast<ConstantSDNode>(Zero)->isNullValue() || 3076 And->getOpcode() != ISD::AND) 3077 return; 3078 SDValue X = And.getOperand(0); 3079 auto C = dyn_cast<ConstantSDNode>(And.getOperand(1)); 3080 3081 if (!C) 3082 return; 3083 auto Range = getContiguousRangeOfSetBits(C->getAPIntValue()); 3084 if (!Range) 3085 return; 3086 3087 // There are several ways to lower this: 3088 SDNode *NewN; 3089 SDLoc dl(N); 3090 3091 auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* { 3092 if (Subtarget->isThumb2()) { 3093 Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri; 3094 SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32), 3095 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 3096 CurDAG->getRegister(0, MVT::i32) }; 3097 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops); 3098 } else { 3099 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src, 3100 CurDAG->getTargetConstant(Imm, dl, MVT::i32), 3101 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)}; 3102 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops); 3103 } 3104 }; 3105 3106 if (Range->second == 0) { 3107 // 1. Mask includes the LSB -> Simply shift the top N bits off 3108 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first); 3109 ReplaceNode(And.getNode(), NewN); 3110 } else if (Range->first == 31) { 3111 // 2. Mask includes the MSB -> Simply shift the bottom N bits off 3112 NewN = EmitShift(ARM::tLSRri, X, Range->second); 3113 ReplaceNode(And.getNode(), NewN); 3114 } else if (Range->first == Range->second) { 3115 // 3. Only one bit is set. We can shift this into the sign bit and use a 3116 // PL/MI comparison. 3117 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first); 3118 ReplaceNode(And.getNode(), NewN); 3119 3120 SwitchEQNEToPLMI = true; 3121 } else if (!Subtarget->hasV6T2Ops()) { 3122 // 4. Do a double shift to clear bottom and top bits, but only in 3123 // thumb-1 mode as in thumb-2 we can use UBFX. 3124 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first); 3125 NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0), 3126 Range->second + (31 - Range->first)); 3127 ReplaceNode(And.getNode(), NewN); 3128 } 3129 3130 } 3131 3132 void ARMDAGToDAGISel::Select(SDNode *N) { 3133 SDLoc dl(N); 3134 3135 if (N->isMachineOpcode()) { 3136 N->setNodeId(-1); 3137 return; // Already selected. 3138 } 3139 3140 switch (N->getOpcode()) { 3141 default: break; 3142 case ISD::STORE: { 3143 // For Thumb1, match an sp-relative store in C++. This is a little 3144 // unfortunate, but I don't think I can make the chain check work 3145 // otherwise. (The chain of the store has to be the same as the chain 3146 // of the CopyFromReg, or else we can't replace the CopyFromReg with 3147 // a direct reference to "SP".) 3148 // 3149 // This is only necessary on Thumb1 because Thumb1 sp-relative stores use 3150 // a different addressing mode from other four-byte stores. 3151 // 3152 // This pattern usually comes up with call arguments. 3153 StoreSDNode *ST = cast<StoreSDNode>(N); 3154 SDValue Ptr = ST->getBasePtr(); 3155 if (Subtarget->isThumb1Only() && ST->isUnindexed()) { 3156 int RHSC = 0; 3157 if (Ptr.getOpcode() == ISD::ADD && 3158 isScaledConstantInRange(Ptr.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) 3159 Ptr = Ptr.getOperand(0); 3160 3161 if (Ptr.getOpcode() == ISD::CopyFromReg && 3162 cast<RegisterSDNode>(Ptr.getOperand(1))->getReg() == ARM::SP && 3163 Ptr.getOperand(0) == ST->getChain()) { 3164 SDValue Ops[] = {ST->getValue(), 3165 CurDAG->getRegister(ARM::SP, MVT::i32), 3166 CurDAG->getTargetConstant(RHSC, dl, MVT::i32), 3167 getAL(CurDAG, dl), 3168 CurDAG->getRegister(0, MVT::i32), 3169 ST->getChain()}; 3170 MachineSDNode *ResNode = 3171 CurDAG->getMachineNode(ARM::tSTRspi, dl, MVT::Other, Ops); 3172 MachineMemOperand *MemOp = ST->getMemOperand(); 3173 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp}); 3174 ReplaceNode(N, ResNode); 3175 return; 3176 } 3177 } 3178 break; 3179 } 3180 case ISD::WRITE_REGISTER: 3181 if (tryWriteRegister(N)) 3182 return; 3183 break; 3184 case ISD::READ_REGISTER: 3185 if (tryReadRegister(N)) 3186 return; 3187 break; 3188 case ISD::INLINEASM: 3189 case ISD::INLINEASM_BR: 3190 if (tryInlineAsm(N)) 3191 return; 3192 break; 3193 case ISD::XOR: 3194 // Select special operations if XOR node forms integer ABS pattern 3195 if (tryABSOp(N)) 3196 return; 3197 // Other cases are autogenerated. 3198 break; 3199 case ISD::Constant: { 3200 unsigned Val = cast<ConstantSDNode>(N)->getZExtValue(); 3201 // If we can't materialize the constant we need to use a literal pool 3202 if (ConstantMaterializationCost(Val, Subtarget) > 2) { 3203 SDValue CPIdx = CurDAG->getTargetConstantPool( 3204 ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val), 3205 TLI->getPointerTy(CurDAG->getDataLayout())); 3206 3207 SDNode *ResNode; 3208 if (Subtarget->isThumb()) { 3209 SDValue Ops[] = { 3210 CPIdx, 3211 getAL(CurDAG, dl), 3212 CurDAG->getRegister(0, MVT::i32), 3213 CurDAG->getEntryNode() 3214 }; 3215 ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other, 3216 Ops); 3217 } else { 3218 SDValue Ops[] = { 3219 CPIdx, 3220 CurDAG->getTargetConstant(0, dl, MVT::i32), 3221 getAL(CurDAG, dl), 3222 CurDAG->getRegister(0, MVT::i32), 3223 CurDAG->getEntryNode() 3224 }; 3225 ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other, 3226 Ops); 3227 } 3228 // Annotate the Node with memory operand information so that MachineInstr 3229 // queries work properly. This e.g. gives the register allocation the 3230 // required information for rematerialization. 3231 MachineFunction& MF = CurDAG->getMachineFunction(); 3232 MachineMemOperand *MemOp = 3233 MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF), 3234 MachineMemOperand::MOLoad, 4, 4); 3235 3236 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp}); 3237 3238 ReplaceNode(N, ResNode); 3239 return; 3240 } 3241 3242 // Other cases are autogenerated. 3243 break; 3244 } 3245 case ISD::FrameIndex: { 3246 // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm. 3247 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 3248 SDValue TFI = CurDAG->getTargetFrameIndex( 3249 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 3250 if (Subtarget->isThumb1Only()) { 3251 // Set the alignment of the frame object to 4, to avoid having to generate 3252 // more than one ADD 3253 MachineFrameInfo &MFI = MF->getFrameInfo(); 3254 if (MFI.getObjectAlignment(FI) < 4) 3255 MFI.setObjectAlignment(FI, 4); 3256 CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI, 3257 CurDAG->getTargetConstant(0, dl, MVT::i32)); 3258 return; 3259 } else { 3260 unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ? 3261 ARM::t2ADDri : ARM::ADDri); 3262 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32), 3263 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 3264 CurDAG->getRegister(0, MVT::i32) }; 3265 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3266 return; 3267 } 3268 } 3269 case ISD::SRL: 3270 if (tryV6T2BitfieldExtractOp(N, false)) 3271 return; 3272 break; 3273 case ISD::SIGN_EXTEND_INREG: 3274 case ISD::SRA: 3275 if (tryV6T2BitfieldExtractOp(N, true)) 3276 return; 3277 break; 3278 case ISD::MUL: 3279 if (Subtarget->isThumb1Only()) 3280 break; 3281 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) { 3282 unsigned RHSV = C->getZExtValue(); 3283 if (!RHSV) break; 3284 if (isPowerOf2_32(RHSV-1)) { // 2^n+1? 3285 unsigned ShImm = Log2_32(RHSV-1); 3286 if (ShImm >= 32) 3287 break; 3288 SDValue V = N->getOperand(0); 3289 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm); 3290 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32); 3291 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3292 if (Subtarget->isThumb()) { 3293 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 }; 3294 CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops); 3295 return; 3296 } else { 3297 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0, 3298 Reg0 }; 3299 CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops); 3300 return; 3301 } 3302 } 3303 if (isPowerOf2_32(RHSV+1)) { // 2^n-1? 3304 unsigned ShImm = Log2_32(RHSV+1); 3305 if (ShImm >= 32) 3306 break; 3307 SDValue V = N->getOperand(0); 3308 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm); 3309 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32); 3310 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3311 if (Subtarget->isThumb()) { 3312 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 }; 3313 CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops); 3314 return; 3315 } else { 3316 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0, 3317 Reg0 }; 3318 CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops); 3319 return; 3320 } 3321 } 3322 } 3323 break; 3324 case ISD::AND: { 3325 // Check for unsigned bitfield extract 3326 if (tryV6T2BitfieldExtractOp(N, false)) 3327 return; 3328 3329 // If an immediate is used in an AND node, it is possible that the immediate 3330 // can be more optimally materialized when negated. If this is the case we 3331 // can negate the immediate and use a BIC instead. 3332 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 3333 if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) { 3334 uint32_t Imm = (uint32_t) N1C->getZExtValue(); 3335 3336 // In Thumb2 mode, an AND can take a 12-bit immediate. If this 3337 // immediate can be negated and fit in the immediate operand of 3338 // a t2BIC, don't do any manual transform here as this can be 3339 // handled by the generic ISel machinery. 3340 bool PreferImmediateEncoding = 3341 Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm)); 3342 if (!PreferImmediateEncoding && 3343 ConstantMaterializationCost(Imm, Subtarget) > 3344 ConstantMaterializationCost(~Imm, Subtarget)) { 3345 // The current immediate costs more to materialize than a negated 3346 // immediate, so negate the immediate and use a BIC. 3347 SDValue NewImm = 3348 CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32); 3349 // If the new constant didn't exist before, reposition it in the topological 3350 // ordering so it is just before N. Otherwise, don't touch its location. 3351 if (NewImm->getNodeId() == -1) 3352 CurDAG->RepositionNode(N->getIterator(), NewImm.getNode()); 3353 3354 if (!Subtarget->hasThumb2()) { 3355 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), 3356 N->getOperand(0), NewImm, getAL(CurDAG, dl), 3357 CurDAG->getRegister(0, MVT::i32)}; 3358 ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops)); 3359 return; 3360 } else { 3361 SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl), 3362 CurDAG->getRegister(0, MVT::i32), 3363 CurDAG->getRegister(0, MVT::i32)}; 3364 ReplaceNode(N, 3365 CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops)); 3366 return; 3367 } 3368 } 3369 } 3370 3371 // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits 3372 // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits 3373 // are entirely contributed by c2 and lower 16-bits are entirely contributed 3374 // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)). 3375 // Select it to: "movt x, ((c1 & 0xffff) >> 16) 3376 EVT VT = N->getValueType(0); 3377 if (VT != MVT::i32) 3378 break; 3379 unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2()) 3380 ? ARM::t2MOVTi16 3381 : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0); 3382 if (!Opc) 3383 break; 3384 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); 3385 N1C = dyn_cast<ConstantSDNode>(N1); 3386 if (!N1C) 3387 break; 3388 if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) { 3389 SDValue N2 = N0.getOperand(1); 3390 ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2); 3391 if (!N2C) 3392 break; 3393 unsigned N1CVal = N1C->getZExtValue(); 3394 unsigned N2CVal = N2C->getZExtValue(); 3395 if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) && 3396 (N1CVal & 0xffffU) == 0xffffU && 3397 (N2CVal & 0xffffU) == 0x0U) { 3398 SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16, 3399 dl, MVT::i32); 3400 SDValue Ops[] = { N0.getOperand(0), Imm16, 3401 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) }; 3402 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops)); 3403 return; 3404 } 3405 } 3406 3407 break; 3408 } 3409 case ARMISD::UMAAL: { 3410 unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL; 3411 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), 3412 N->getOperand(2), N->getOperand(3), 3413 getAL(CurDAG, dl), 3414 CurDAG->getRegister(0, MVT::i32) }; 3415 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops)); 3416 return; 3417 } 3418 case ARMISD::UMLAL:{ 3419 if (Subtarget->isThumb()) { 3420 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3421 N->getOperand(3), getAL(CurDAG, dl), 3422 CurDAG->getRegister(0, MVT::i32)}; 3423 ReplaceNode( 3424 N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops)); 3425 return; 3426 }else{ 3427 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3428 N->getOperand(3), getAL(CurDAG, dl), 3429 CurDAG->getRegister(0, MVT::i32), 3430 CurDAG->getRegister(0, MVT::i32) }; 3431 ReplaceNode(N, CurDAG->getMachineNode( 3432 Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl, 3433 MVT::i32, MVT::i32, Ops)); 3434 return; 3435 } 3436 } 3437 case ARMISD::SMLAL:{ 3438 if (Subtarget->isThumb()) { 3439 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3440 N->getOperand(3), getAL(CurDAG, dl), 3441 CurDAG->getRegister(0, MVT::i32)}; 3442 ReplaceNode( 3443 N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops)); 3444 return; 3445 }else{ 3446 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3447 N->getOperand(3), getAL(CurDAG, dl), 3448 CurDAG->getRegister(0, MVT::i32), 3449 CurDAG->getRegister(0, MVT::i32) }; 3450 ReplaceNode(N, CurDAG->getMachineNode( 3451 Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl, 3452 MVT::i32, MVT::i32, Ops)); 3453 return; 3454 } 3455 } 3456 case ARMISD::SUBE: { 3457 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP()) 3458 break; 3459 // Look for a pattern to match SMMLS 3460 // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b)))) 3461 if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI || 3462 N->getOperand(2).getOpcode() != ARMISD::SUBC || 3463 !SDValue(N, 1).use_empty()) 3464 break; 3465 3466 if (Subtarget->isThumb()) 3467 assert(Subtarget->hasThumb2() && 3468 "This pattern should not be generated for Thumb"); 3469 3470 SDValue SmulLoHi = N->getOperand(1); 3471 SDValue Subc = N->getOperand(2); 3472 auto *Zero = dyn_cast<ConstantSDNode>(Subc.getOperand(0)); 3473 3474 if (!Zero || Zero->getZExtValue() != 0 || 3475 Subc.getOperand(1) != SmulLoHi.getValue(0) || 3476 N->getOperand(1) != SmulLoHi.getValue(1) || 3477 N->getOperand(2) != Subc.getValue(1)) 3478 break; 3479 3480 unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS; 3481 SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1), 3482 N->getOperand(0), getAL(CurDAG, dl), 3483 CurDAG->getRegister(0, MVT::i32) }; 3484 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops)); 3485 return; 3486 } 3487 case ISD::LOAD: { 3488 if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N)) 3489 return; 3490 if (Subtarget->isThumb() && Subtarget->hasThumb2()) { 3491 if (tryT2IndexedLoad(N)) 3492 return; 3493 } else if (Subtarget->isThumb()) { 3494 if (tryT1IndexedLoad(N)) 3495 return; 3496 } else if (tryARMIndexedLoad(N)) 3497 return; 3498 // Other cases are autogenerated. 3499 break; 3500 } 3501 case ISD::MLOAD: 3502 if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N)) 3503 return; 3504 // Other cases are autogenerated. 3505 break; 3506 case ARMISD::WLS: 3507 case ARMISD::LE: { 3508 SDValue Ops[] = { N->getOperand(1), 3509 N->getOperand(2), 3510 N->getOperand(0) }; 3511 unsigned Opc = N->getOpcode() == ARMISD::WLS ? 3512 ARM::t2WhileLoopStart : ARM::t2LoopEnd; 3513 SDNode *New = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops); 3514 ReplaceUses(N, New); 3515 CurDAG->RemoveDeadNode(N); 3516 return; 3517 } 3518 case ARMISD::LDRD: { 3519 if (Subtarget->isThumb2()) 3520 break; // TableGen handles isel in this case. 3521 SDValue Base, RegOffset, ImmOffset; 3522 const SDValue &Chain = N->getOperand(0); 3523 const SDValue &Addr = N->getOperand(1); 3524 SelectAddrMode3(Addr, Base, RegOffset, ImmOffset); 3525 SDValue Ops[] = {Base, RegOffset, ImmOffset, Chain}; 3526 SDNode *New = CurDAG->getMachineNode(ARM::LOADDUAL, dl, 3527 {MVT::Untyped, MVT::Other}, Ops); 3528 SDValue Lo = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32, 3529 SDValue(New, 0)); 3530 SDValue Hi = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32, 3531 SDValue(New, 0)); 3532 ReplaceUses(SDValue(N, 0), Lo); 3533 ReplaceUses(SDValue(N, 1), Hi); 3534 ReplaceUses(SDValue(N, 2), SDValue(New, 1)); 3535 CurDAG->RemoveDeadNode(N); 3536 return; 3537 } 3538 case ARMISD::LOOP_DEC: { 3539 SDValue Ops[] = { N->getOperand(1), 3540 N->getOperand(2), 3541 N->getOperand(0) }; 3542 SDNode *Dec = 3543 CurDAG->getMachineNode(ARM::t2LoopDec, dl, 3544 CurDAG->getVTList(MVT::i32, MVT::Other), Ops); 3545 ReplaceUses(N, Dec); 3546 CurDAG->RemoveDeadNode(N); 3547 return; 3548 } 3549 case ARMISD::BRCOND: { 3550 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 3551 // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc) 3552 // Pattern complexity = 6 cost = 1 size = 0 3553 3554 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 3555 // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc) 3556 // Pattern complexity = 6 cost = 1 size = 0 3557 3558 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 3559 // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc) 3560 // Pattern complexity = 6 cost = 1 size = 0 3561 3562 unsigned Opc = Subtarget->isThumb() ? 3563 ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc; 3564 SDValue Chain = N->getOperand(0); 3565 SDValue N1 = N->getOperand(1); 3566 SDValue N2 = N->getOperand(2); 3567 SDValue N3 = N->getOperand(3); 3568 SDValue InFlag = N->getOperand(4); 3569 assert(N1.getOpcode() == ISD::BasicBlock); 3570 assert(N2.getOpcode() == ISD::Constant); 3571 assert(N3.getOpcode() == ISD::Register); 3572 3573 unsigned CC = (unsigned) cast<ConstantSDNode>(N2)->getZExtValue(); 3574 3575 if (InFlag.getOpcode() == ARMISD::CMPZ) { 3576 if (InFlag.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) { 3577 SDValue Int = InFlag.getOperand(0); 3578 uint64_t ID = cast<ConstantSDNode>(Int->getOperand(1))->getZExtValue(); 3579 3580 // Handle low-overhead loops. 3581 if (ID == Intrinsic::loop_decrement_reg) { 3582 SDValue Elements = Int.getOperand(2); 3583 SDValue Size = CurDAG->getTargetConstant( 3584 cast<ConstantSDNode>(Int.getOperand(3))->getZExtValue(), dl, 3585 MVT::i32); 3586 3587 SDValue Args[] = { Elements, Size, Int.getOperand(0) }; 3588 SDNode *LoopDec = 3589 CurDAG->getMachineNode(ARM::t2LoopDec, dl, 3590 CurDAG->getVTList(MVT::i32, MVT::Other), 3591 Args); 3592 ReplaceUses(Int.getNode(), LoopDec); 3593 3594 SDValue EndArgs[] = { SDValue(LoopDec, 0), N1, Chain }; 3595 SDNode *LoopEnd = 3596 CurDAG->getMachineNode(ARM::t2LoopEnd, dl, MVT::Other, EndArgs); 3597 3598 ReplaceUses(N, LoopEnd); 3599 CurDAG->RemoveDeadNode(N); 3600 CurDAG->RemoveDeadNode(InFlag.getNode()); 3601 CurDAG->RemoveDeadNode(Int.getNode()); 3602 return; 3603 } 3604 } 3605 3606 bool SwitchEQNEToPLMI; 3607 SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI); 3608 InFlag = N->getOperand(4); 3609 3610 if (SwitchEQNEToPLMI) { 3611 switch ((ARMCC::CondCodes)CC) { 3612 default: llvm_unreachable("CMPZ must be either NE or EQ!"); 3613 case ARMCC::NE: 3614 CC = (unsigned)ARMCC::MI; 3615 break; 3616 case ARMCC::EQ: 3617 CC = (unsigned)ARMCC::PL; 3618 break; 3619 } 3620 } 3621 } 3622 3623 SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32); 3624 SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag }; 3625 SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, 3626 MVT::Glue, Ops); 3627 Chain = SDValue(ResNode, 0); 3628 if (N->getNumValues() == 2) { 3629 InFlag = SDValue(ResNode, 1); 3630 ReplaceUses(SDValue(N, 1), InFlag); 3631 } 3632 ReplaceUses(SDValue(N, 0), 3633 SDValue(Chain.getNode(), Chain.getResNo())); 3634 CurDAG->RemoveDeadNode(N); 3635 return; 3636 } 3637 3638 case ARMISD::CMPZ: { 3639 // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0) 3640 // This allows us to avoid materializing the expensive negative constant. 3641 // The CMPZ #0 is useless and will be peepholed away but we need to keep it 3642 // for its glue output. 3643 SDValue X = N->getOperand(0); 3644 auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode()); 3645 if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) { 3646 int64_t Addend = -C->getSExtValue(); 3647 3648 SDNode *Add = nullptr; 3649 // ADDS can be better than CMN if the immediate fits in a 3650 // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3. 3651 // Outside that range we can just use a CMN which is 32-bit but has a 3652 // 12-bit immediate range. 3653 if (Addend < 1<<8) { 3654 if (Subtarget->isThumb2()) { 3655 SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32), 3656 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 3657 CurDAG->getRegister(0, MVT::i32) }; 3658 Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops); 3659 } else { 3660 unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8; 3661 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X, 3662 CurDAG->getTargetConstant(Addend, dl, MVT::i32), 3663 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)}; 3664 Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops); 3665 } 3666 } 3667 if (Add) { 3668 SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)}; 3669 CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2); 3670 } 3671 } 3672 // Other cases are autogenerated. 3673 break; 3674 } 3675 3676 case ARMISD::CMOV: { 3677 SDValue InFlag = N->getOperand(4); 3678 3679 if (InFlag.getOpcode() == ARMISD::CMPZ) { 3680 bool SwitchEQNEToPLMI; 3681 SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI); 3682 3683 if (SwitchEQNEToPLMI) { 3684 SDValue ARMcc = N->getOperand(2); 3685 ARMCC::CondCodes CC = 3686 (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue(); 3687 3688 switch (CC) { 3689 default: llvm_unreachable("CMPZ must be either NE or EQ!"); 3690 case ARMCC::NE: 3691 CC = ARMCC::MI; 3692 break; 3693 case ARMCC::EQ: 3694 CC = ARMCC::PL; 3695 break; 3696 } 3697 SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32); 3698 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc, 3699 N->getOperand(3), N->getOperand(4)}; 3700 CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops); 3701 } 3702 3703 } 3704 // Other cases are autogenerated. 3705 break; 3706 } 3707 3708 case ARMISD::VZIP: { 3709 unsigned Opc = 0; 3710 EVT VT = N->getValueType(0); 3711 switch (VT.getSimpleVT().SimpleTy) { 3712 default: return; 3713 case MVT::v8i8: Opc = ARM::VZIPd8; break; 3714 case MVT::v4f16: 3715 case MVT::v4i16: Opc = ARM::VZIPd16; break; 3716 case MVT::v2f32: 3717 // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 3718 case MVT::v2i32: Opc = ARM::VTRNd32; break; 3719 case MVT::v16i8: Opc = ARM::VZIPq8; break; 3720 case MVT::v8f16: 3721 case MVT::v8i16: Opc = ARM::VZIPq16; break; 3722 case MVT::v4f32: 3723 case MVT::v4i32: Opc = ARM::VZIPq32; break; 3724 } 3725 SDValue Pred = getAL(CurDAG, dl); 3726 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 3727 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; 3728 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops)); 3729 return; 3730 } 3731 case ARMISD::VUZP: { 3732 unsigned Opc = 0; 3733 EVT VT = N->getValueType(0); 3734 switch (VT.getSimpleVT().SimpleTy) { 3735 default: return; 3736 case MVT::v8i8: Opc = ARM::VUZPd8; break; 3737 case MVT::v4f16: 3738 case MVT::v4i16: Opc = ARM::VUZPd16; break; 3739 case MVT::v2f32: 3740 // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 3741 case MVT::v2i32: Opc = ARM::VTRNd32; break; 3742 case MVT::v16i8: Opc = ARM::VUZPq8; break; 3743 case MVT::v8f16: 3744 case MVT::v8i16: Opc = ARM::VUZPq16; break; 3745 case MVT::v4f32: 3746 case MVT::v4i32: Opc = ARM::VUZPq32; break; 3747 } 3748 SDValue Pred = getAL(CurDAG, dl); 3749 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 3750 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; 3751 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops)); 3752 return; 3753 } 3754 case ARMISD::VTRN: { 3755 unsigned Opc = 0; 3756 EVT VT = N->getValueType(0); 3757 switch (VT.getSimpleVT().SimpleTy) { 3758 default: return; 3759 case MVT::v8i8: Opc = ARM::VTRNd8; break; 3760 case MVT::v4f16: 3761 case MVT::v4i16: Opc = ARM::VTRNd16; break; 3762 case MVT::v2f32: 3763 case MVT::v2i32: Opc = ARM::VTRNd32; break; 3764 case MVT::v16i8: Opc = ARM::VTRNq8; break; 3765 case MVT::v8f16: 3766 case MVT::v8i16: Opc = ARM::VTRNq16; break; 3767 case MVT::v4f32: 3768 case MVT::v4i32: Opc = ARM::VTRNq32; break; 3769 } 3770 SDValue Pred = getAL(CurDAG, dl); 3771 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 3772 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; 3773 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops)); 3774 return; 3775 } 3776 case ARMISD::BUILD_VECTOR: { 3777 EVT VecVT = N->getValueType(0); 3778 EVT EltVT = VecVT.getVectorElementType(); 3779 unsigned NumElts = VecVT.getVectorNumElements(); 3780 if (EltVT == MVT::f64) { 3781 assert(NumElts == 2 && "unexpected type for BUILD_VECTOR"); 3782 ReplaceNode( 3783 N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1))); 3784 return; 3785 } 3786 assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR"); 3787 if (NumElts == 2) { 3788 ReplaceNode( 3789 N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1))); 3790 return; 3791 } 3792 assert(NumElts == 4 && "unexpected type for BUILD_VECTOR"); 3793 ReplaceNode(N, 3794 createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1), 3795 N->getOperand(2), N->getOperand(3))); 3796 return; 3797 } 3798 3799 case ARMISD::VLD1DUP: { 3800 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16, 3801 ARM::VLD1DUPd32 }; 3802 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16, 3803 ARM::VLD1DUPq32 }; 3804 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 1, DOpcodes, QOpcodes); 3805 return; 3806 } 3807 3808 case ARMISD::VLD2DUP: { 3809 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16, 3810 ARM::VLD2DUPd32 }; 3811 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 2, Opcodes); 3812 return; 3813 } 3814 3815 case ARMISD::VLD3DUP: { 3816 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo, 3817 ARM::VLD3DUPd16Pseudo, 3818 ARM::VLD3DUPd32Pseudo }; 3819 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 3, Opcodes); 3820 return; 3821 } 3822 3823 case ARMISD::VLD4DUP: { 3824 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo, 3825 ARM::VLD4DUPd16Pseudo, 3826 ARM::VLD4DUPd32Pseudo }; 3827 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 4, Opcodes); 3828 return; 3829 } 3830 3831 case ARMISD::VLD1DUP_UPD: { 3832 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed, 3833 ARM::VLD1DUPd16wb_fixed, 3834 ARM::VLD1DUPd32wb_fixed }; 3835 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed, 3836 ARM::VLD1DUPq16wb_fixed, 3837 ARM::VLD1DUPq32wb_fixed }; 3838 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 1, DOpcodes, QOpcodes); 3839 return; 3840 } 3841 3842 case ARMISD::VLD2DUP_UPD: { 3843 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed, 3844 ARM::VLD2DUPd16wb_fixed, 3845 ARM::VLD2DUPd32wb_fixed }; 3846 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 2, Opcodes); 3847 return; 3848 } 3849 3850 case ARMISD::VLD3DUP_UPD: { 3851 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD, 3852 ARM::VLD3DUPd16Pseudo_UPD, 3853 ARM::VLD3DUPd32Pseudo_UPD }; 3854 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 3, Opcodes); 3855 return; 3856 } 3857 3858 case ARMISD::VLD4DUP_UPD: { 3859 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD, 3860 ARM::VLD4DUPd16Pseudo_UPD, 3861 ARM::VLD4DUPd32Pseudo_UPD }; 3862 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 4, Opcodes); 3863 return; 3864 } 3865 3866 case ARMISD::VLD1_UPD: { 3867 static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed, 3868 ARM::VLD1d16wb_fixed, 3869 ARM::VLD1d32wb_fixed, 3870 ARM::VLD1d64wb_fixed }; 3871 static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed, 3872 ARM::VLD1q16wb_fixed, 3873 ARM::VLD1q32wb_fixed, 3874 ARM::VLD1q64wb_fixed }; 3875 SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr); 3876 return; 3877 } 3878 3879 case ARMISD::VLD2_UPD: { 3880 static const uint16_t DOpcodes[] = { ARM::VLD2d8wb_fixed, 3881 ARM::VLD2d16wb_fixed, 3882 ARM::VLD2d32wb_fixed, 3883 ARM::VLD1q64wb_fixed}; 3884 static const uint16_t QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed, 3885 ARM::VLD2q16PseudoWB_fixed, 3886 ARM::VLD2q32PseudoWB_fixed }; 3887 SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr); 3888 return; 3889 } 3890 3891 case ARMISD::VLD3_UPD: { 3892 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD, 3893 ARM::VLD3d16Pseudo_UPD, 3894 ARM::VLD3d32Pseudo_UPD, 3895 ARM::VLD1d64TPseudoWB_fixed}; 3896 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD, 3897 ARM::VLD3q16Pseudo_UPD, 3898 ARM::VLD3q32Pseudo_UPD }; 3899 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD, 3900 ARM::VLD3q16oddPseudo_UPD, 3901 ARM::VLD3q32oddPseudo_UPD }; 3902 SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 3903 return; 3904 } 3905 3906 case ARMISD::VLD4_UPD: { 3907 static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo_UPD, 3908 ARM::VLD4d16Pseudo_UPD, 3909 ARM::VLD4d32Pseudo_UPD, 3910 ARM::VLD1d64QPseudoWB_fixed}; 3911 static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD, 3912 ARM::VLD4q16Pseudo_UPD, 3913 ARM::VLD4q32Pseudo_UPD }; 3914 static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD, 3915 ARM::VLD4q16oddPseudo_UPD, 3916 ARM::VLD4q32oddPseudo_UPD }; 3917 SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 3918 return; 3919 } 3920 3921 case ARMISD::VLD2LN_UPD: { 3922 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD, 3923 ARM::VLD2LNd16Pseudo_UPD, 3924 ARM::VLD2LNd32Pseudo_UPD }; 3925 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD, 3926 ARM::VLD2LNq32Pseudo_UPD }; 3927 SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes); 3928 return; 3929 } 3930 3931 case ARMISD::VLD3LN_UPD: { 3932 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD, 3933 ARM::VLD3LNd16Pseudo_UPD, 3934 ARM::VLD3LNd32Pseudo_UPD }; 3935 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD, 3936 ARM::VLD3LNq32Pseudo_UPD }; 3937 SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes); 3938 return; 3939 } 3940 3941 case ARMISD::VLD4LN_UPD: { 3942 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD, 3943 ARM::VLD4LNd16Pseudo_UPD, 3944 ARM::VLD4LNd32Pseudo_UPD }; 3945 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD, 3946 ARM::VLD4LNq32Pseudo_UPD }; 3947 SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes); 3948 return; 3949 } 3950 3951 case ARMISD::VST1_UPD: { 3952 static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed, 3953 ARM::VST1d16wb_fixed, 3954 ARM::VST1d32wb_fixed, 3955 ARM::VST1d64wb_fixed }; 3956 static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed, 3957 ARM::VST1q16wb_fixed, 3958 ARM::VST1q32wb_fixed, 3959 ARM::VST1q64wb_fixed }; 3960 SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr); 3961 return; 3962 } 3963 3964 case ARMISD::VST2_UPD: { 3965 static const uint16_t DOpcodes[] = { ARM::VST2d8wb_fixed, 3966 ARM::VST2d16wb_fixed, 3967 ARM::VST2d32wb_fixed, 3968 ARM::VST1q64wb_fixed}; 3969 static const uint16_t QOpcodes[] = { ARM::VST2q8PseudoWB_fixed, 3970 ARM::VST2q16PseudoWB_fixed, 3971 ARM::VST2q32PseudoWB_fixed }; 3972 SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr); 3973 return; 3974 } 3975 3976 case ARMISD::VST3_UPD: { 3977 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD, 3978 ARM::VST3d16Pseudo_UPD, 3979 ARM::VST3d32Pseudo_UPD, 3980 ARM::VST1d64TPseudoWB_fixed}; 3981 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD, 3982 ARM::VST3q16Pseudo_UPD, 3983 ARM::VST3q32Pseudo_UPD }; 3984 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD, 3985 ARM::VST3q16oddPseudo_UPD, 3986 ARM::VST3q32oddPseudo_UPD }; 3987 SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 3988 return; 3989 } 3990 3991 case ARMISD::VST4_UPD: { 3992 static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo_UPD, 3993 ARM::VST4d16Pseudo_UPD, 3994 ARM::VST4d32Pseudo_UPD, 3995 ARM::VST1d64QPseudoWB_fixed}; 3996 static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD, 3997 ARM::VST4q16Pseudo_UPD, 3998 ARM::VST4q32Pseudo_UPD }; 3999 static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD, 4000 ARM::VST4q16oddPseudo_UPD, 4001 ARM::VST4q32oddPseudo_UPD }; 4002 SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 4003 return; 4004 } 4005 4006 case ARMISD::VST2LN_UPD: { 4007 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD, 4008 ARM::VST2LNd16Pseudo_UPD, 4009 ARM::VST2LNd32Pseudo_UPD }; 4010 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD, 4011 ARM::VST2LNq32Pseudo_UPD }; 4012 SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes); 4013 return; 4014 } 4015 4016 case ARMISD::VST3LN_UPD: { 4017 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD, 4018 ARM::VST3LNd16Pseudo_UPD, 4019 ARM::VST3LNd32Pseudo_UPD }; 4020 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD, 4021 ARM::VST3LNq32Pseudo_UPD }; 4022 SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes); 4023 return; 4024 } 4025 4026 case ARMISD::VST4LN_UPD: { 4027 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD, 4028 ARM::VST4LNd16Pseudo_UPD, 4029 ARM::VST4LNd32Pseudo_UPD }; 4030 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD, 4031 ARM::VST4LNq32Pseudo_UPD }; 4032 SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes); 4033 return; 4034 } 4035 4036 case ISD::INTRINSIC_VOID: 4037 case ISD::INTRINSIC_W_CHAIN: { 4038 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 4039 switch (IntNo) { 4040 default: 4041 break; 4042 4043 case Intrinsic::arm_mrrc: 4044 case Intrinsic::arm_mrrc2: { 4045 SDLoc dl(N); 4046 SDValue Chain = N->getOperand(0); 4047 unsigned Opc; 4048 4049 if (Subtarget->isThumb()) 4050 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2); 4051 else 4052 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2); 4053 4054 SmallVector<SDValue, 5> Ops; 4055 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(), dl)); /* coproc */ 4056 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(), dl)); /* opc */ 4057 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(), dl)); /* CRm */ 4058 4059 // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded 4060 // instruction will always be '1111' but it is possible in assembly language to specify 4061 // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction. 4062 if (Opc != ARM::MRRC2) { 4063 Ops.push_back(getAL(CurDAG, dl)); 4064 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 4065 } 4066 4067 Ops.push_back(Chain); 4068 4069 // Writes to two registers. 4070 const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other}; 4071 4072 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops)); 4073 return; 4074 } 4075 case Intrinsic::arm_ldaexd: 4076 case Intrinsic::arm_ldrexd: { 4077 SDLoc dl(N); 4078 SDValue Chain = N->getOperand(0); 4079 SDValue MemAddr = N->getOperand(2); 4080 bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps(); 4081 4082 bool IsAcquire = IntNo == Intrinsic::arm_ldaexd; 4083 unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD) 4084 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD); 4085 4086 // arm_ldrexd returns a i64 value in {i32, i32} 4087 std::vector<EVT> ResTys; 4088 if (isThumb) { 4089 ResTys.push_back(MVT::i32); 4090 ResTys.push_back(MVT::i32); 4091 } else 4092 ResTys.push_back(MVT::Untyped); 4093 ResTys.push_back(MVT::Other); 4094 4095 // Place arguments in the right order. 4096 SDValue Ops[] = {MemAddr, getAL(CurDAG, dl), 4097 CurDAG->getRegister(0, MVT::i32), Chain}; 4098 SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops); 4099 // Transfer memoperands. 4100 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 4101 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp}); 4102 4103 // Remap uses. 4104 SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1); 4105 if (!SDValue(N, 0).use_empty()) { 4106 SDValue Result; 4107 if (isThumb) 4108 Result = SDValue(Ld, 0); 4109 else { 4110 SDValue SubRegIdx = 4111 CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32); 4112 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 4113 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx); 4114 Result = SDValue(ResNode,0); 4115 } 4116 ReplaceUses(SDValue(N, 0), Result); 4117 } 4118 if (!SDValue(N, 1).use_empty()) { 4119 SDValue Result; 4120 if (isThumb) 4121 Result = SDValue(Ld, 1); 4122 else { 4123 SDValue SubRegIdx = 4124 CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32); 4125 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 4126 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx); 4127 Result = SDValue(ResNode,0); 4128 } 4129 ReplaceUses(SDValue(N, 1), Result); 4130 } 4131 ReplaceUses(SDValue(N, 2), OutChain); 4132 CurDAG->RemoveDeadNode(N); 4133 return; 4134 } 4135 case Intrinsic::arm_stlexd: 4136 case Intrinsic::arm_strexd: { 4137 SDLoc dl(N); 4138 SDValue Chain = N->getOperand(0); 4139 SDValue Val0 = N->getOperand(2); 4140 SDValue Val1 = N->getOperand(3); 4141 SDValue MemAddr = N->getOperand(4); 4142 4143 // Store exclusive double return a i32 value which is the return status 4144 // of the issued store. 4145 const EVT ResTys[] = {MVT::i32, MVT::Other}; 4146 4147 bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2(); 4148 // Place arguments in the right order. 4149 SmallVector<SDValue, 7> Ops; 4150 if (isThumb) { 4151 Ops.push_back(Val0); 4152 Ops.push_back(Val1); 4153 } else 4154 // arm_strexd uses GPRPair. 4155 Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0)); 4156 Ops.push_back(MemAddr); 4157 Ops.push_back(getAL(CurDAG, dl)); 4158 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 4159 Ops.push_back(Chain); 4160 4161 bool IsRelease = IntNo == Intrinsic::arm_stlexd; 4162 unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD) 4163 : (IsRelease ? ARM::STLEXD : ARM::STREXD); 4164 4165 SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops); 4166 // Transfer memoperands. 4167 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 4168 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 4169 4170 ReplaceNode(N, St); 4171 return; 4172 } 4173 4174 case Intrinsic::arm_neon_vld1: { 4175 static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16, 4176 ARM::VLD1d32, ARM::VLD1d64 }; 4177 static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16, 4178 ARM::VLD1q32, ARM::VLD1q64}; 4179 SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr); 4180 return; 4181 } 4182 4183 case Intrinsic::arm_neon_vld1x2: { 4184 static const uint16_t DOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16, 4185 ARM::VLD1q32, ARM::VLD1q64 }; 4186 static const uint16_t QOpcodes[] = { ARM::VLD1d8QPseudo, 4187 ARM::VLD1d16QPseudo, 4188 ARM::VLD1d32QPseudo, 4189 ARM::VLD1d64QPseudo }; 4190 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr); 4191 return; 4192 } 4193 4194 case Intrinsic::arm_neon_vld1x3: { 4195 static const uint16_t DOpcodes[] = { ARM::VLD1d8TPseudo, 4196 ARM::VLD1d16TPseudo, 4197 ARM::VLD1d32TPseudo, 4198 ARM::VLD1d64TPseudo }; 4199 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowTPseudo_UPD, 4200 ARM::VLD1q16LowTPseudo_UPD, 4201 ARM::VLD1q32LowTPseudo_UPD, 4202 ARM::VLD1q64LowTPseudo_UPD }; 4203 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighTPseudo, 4204 ARM::VLD1q16HighTPseudo, 4205 ARM::VLD1q32HighTPseudo, 4206 ARM::VLD1q64HighTPseudo }; 4207 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 4208 return; 4209 } 4210 4211 case Intrinsic::arm_neon_vld1x4: { 4212 static const uint16_t DOpcodes[] = { ARM::VLD1d8QPseudo, 4213 ARM::VLD1d16QPseudo, 4214 ARM::VLD1d32QPseudo, 4215 ARM::VLD1d64QPseudo }; 4216 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowQPseudo_UPD, 4217 ARM::VLD1q16LowQPseudo_UPD, 4218 ARM::VLD1q32LowQPseudo_UPD, 4219 ARM::VLD1q64LowQPseudo_UPD }; 4220 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighQPseudo, 4221 ARM::VLD1q16HighQPseudo, 4222 ARM::VLD1q32HighQPseudo, 4223 ARM::VLD1q64HighQPseudo }; 4224 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 4225 return; 4226 } 4227 4228 case Intrinsic::arm_neon_vld2: { 4229 static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16, 4230 ARM::VLD2d32, ARM::VLD1q64 }; 4231 static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo, 4232 ARM::VLD2q32Pseudo }; 4233 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr); 4234 return; 4235 } 4236 4237 case Intrinsic::arm_neon_vld3: { 4238 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo, 4239 ARM::VLD3d16Pseudo, 4240 ARM::VLD3d32Pseudo, 4241 ARM::VLD1d64TPseudo }; 4242 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD, 4243 ARM::VLD3q16Pseudo_UPD, 4244 ARM::VLD3q32Pseudo_UPD }; 4245 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo, 4246 ARM::VLD3q16oddPseudo, 4247 ARM::VLD3q32oddPseudo }; 4248 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 4249 return; 4250 } 4251 4252 case Intrinsic::arm_neon_vld4: { 4253 static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo, 4254 ARM::VLD4d16Pseudo, 4255 ARM::VLD4d32Pseudo, 4256 ARM::VLD1d64QPseudo }; 4257 static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD, 4258 ARM::VLD4q16Pseudo_UPD, 4259 ARM::VLD4q32Pseudo_UPD }; 4260 static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo, 4261 ARM::VLD4q16oddPseudo, 4262 ARM::VLD4q32oddPseudo }; 4263 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 4264 return; 4265 } 4266 4267 case Intrinsic::arm_neon_vld2dup: { 4268 static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16, 4269 ARM::VLD2DUPd32, ARM::VLD1q64 }; 4270 static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo, 4271 ARM::VLD2DUPq16EvenPseudo, 4272 ARM::VLD2DUPq32EvenPseudo }; 4273 static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudo, 4274 ARM::VLD2DUPq16OddPseudo, 4275 ARM::VLD2DUPq32OddPseudo }; 4276 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 2, 4277 DOpcodes, QOpcodes0, QOpcodes1); 4278 return; 4279 } 4280 4281 case Intrinsic::arm_neon_vld3dup: { 4282 static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo, 4283 ARM::VLD3DUPd16Pseudo, 4284 ARM::VLD3DUPd32Pseudo, 4285 ARM::VLD1d64TPseudo }; 4286 static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo, 4287 ARM::VLD3DUPq16EvenPseudo, 4288 ARM::VLD3DUPq32EvenPseudo }; 4289 static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo, 4290 ARM::VLD3DUPq16OddPseudo, 4291 ARM::VLD3DUPq32OddPseudo }; 4292 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 3, 4293 DOpcodes, QOpcodes0, QOpcodes1); 4294 return; 4295 } 4296 4297 case Intrinsic::arm_neon_vld4dup: { 4298 static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo, 4299 ARM::VLD4DUPd16Pseudo, 4300 ARM::VLD4DUPd32Pseudo, 4301 ARM::VLD1d64QPseudo }; 4302 static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo, 4303 ARM::VLD4DUPq16EvenPseudo, 4304 ARM::VLD4DUPq32EvenPseudo }; 4305 static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo, 4306 ARM::VLD4DUPq16OddPseudo, 4307 ARM::VLD4DUPq32OddPseudo }; 4308 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 4, 4309 DOpcodes, QOpcodes0, QOpcodes1); 4310 return; 4311 } 4312 4313 case Intrinsic::arm_neon_vld2lane: { 4314 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo, 4315 ARM::VLD2LNd16Pseudo, 4316 ARM::VLD2LNd32Pseudo }; 4317 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo, 4318 ARM::VLD2LNq32Pseudo }; 4319 SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes); 4320 return; 4321 } 4322 4323 case Intrinsic::arm_neon_vld3lane: { 4324 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo, 4325 ARM::VLD3LNd16Pseudo, 4326 ARM::VLD3LNd32Pseudo }; 4327 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo, 4328 ARM::VLD3LNq32Pseudo }; 4329 SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes); 4330 return; 4331 } 4332 4333 case Intrinsic::arm_neon_vld4lane: { 4334 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo, 4335 ARM::VLD4LNd16Pseudo, 4336 ARM::VLD4LNd32Pseudo }; 4337 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo, 4338 ARM::VLD4LNq32Pseudo }; 4339 SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes); 4340 return; 4341 } 4342 4343 case Intrinsic::arm_neon_vst1: { 4344 static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16, 4345 ARM::VST1d32, ARM::VST1d64 }; 4346 static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16, 4347 ARM::VST1q32, ARM::VST1q64 }; 4348 SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr); 4349 return; 4350 } 4351 4352 case Intrinsic::arm_neon_vst1x2: { 4353 static const uint16_t DOpcodes[] = { ARM::VST1q8, ARM::VST1q16, 4354 ARM::VST1q32, ARM::VST1q64 }; 4355 static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudo, 4356 ARM::VST1d16QPseudo, 4357 ARM::VST1d32QPseudo, 4358 ARM::VST1d64QPseudo }; 4359 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr); 4360 return; 4361 } 4362 4363 case Intrinsic::arm_neon_vst1x3: { 4364 static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudo, 4365 ARM::VST1d16TPseudo, 4366 ARM::VST1d32TPseudo, 4367 ARM::VST1d64TPseudo }; 4368 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD, 4369 ARM::VST1q16LowTPseudo_UPD, 4370 ARM::VST1q32LowTPseudo_UPD, 4371 ARM::VST1q64LowTPseudo_UPD }; 4372 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo, 4373 ARM::VST1q16HighTPseudo, 4374 ARM::VST1q32HighTPseudo, 4375 ARM::VST1q64HighTPseudo }; 4376 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 4377 return; 4378 } 4379 4380 case Intrinsic::arm_neon_vst1x4: { 4381 static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudo, 4382 ARM::VST1d16QPseudo, 4383 ARM::VST1d32QPseudo, 4384 ARM::VST1d64QPseudo }; 4385 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD, 4386 ARM::VST1q16LowQPseudo_UPD, 4387 ARM::VST1q32LowQPseudo_UPD, 4388 ARM::VST1q64LowQPseudo_UPD }; 4389 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo, 4390 ARM::VST1q16HighQPseudo, 4391 ARM::VST1q32HighQPseudo, 4392 ARM::VST1q64HighQPseudo }; 4393 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 4394 return; 4395 } 4396 4397 case Intrinsic::arm_neon_vst2: { 4398 static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16, 4399 ARM::VST2d32, ARM::VST1q64 }; 4400 static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo, 4401 ARM::VST2q32Pseudo }; 4402 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr); 4403 return; 4404 } 4405 4406 case Intrinsic::arm_neon_vst3: { 4407 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo, 4408 ARM::VST3d16Pseudo, 4409 ARM::VST3d32Pseudo, 4410 ARM::VST1d64TPseudo }; 4411 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD, 4412 ARM::VST3q16Pseudo_UPD, 4413 ARM::VST3q32Pseudo_UPD }; 4414 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo, 4415 ARM::VST3q16oddPseudo, 4416 ARM::VST3q32oddPseudo }; 4417 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 4418 return; 4419 } 4420 4421 case Intrinsic::arm_neon_vst4: { 4422 static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo, 4423 ARM::VST4d16Pseudo, 4424 ARM::VST4d32Pseudo, 4425 ARM::VST1d64QPseudo }; 4426 static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD, 4427 ARM::VST4q16Pseudo_UPD, 4428 ARM::VST4q32Pseudo_UPD }; 4429 static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo, 4430 ARM::VST4q16oddPseudo, 4431 ARM::VST4q32oddPseudo }; 4432 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 4433 return; 4434 } 4435 4436 case Intrinsic::arm_neon_vst2lane: { 4437 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo, 4438 ARM::VST2LNd16Pseudo, 4439 ARM::VST2LNd32Pseudo }; 4440 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo, 4441 ARM::VST2LNq32Pseudo }; 4442 SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes); 4443 return; 4444 } 4445 4446 case Intrinsic::arm_neon_vst3lane: { 4447 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo, 4448 ARM::VST3LNd16Pseudo, 4449 ARM::VST3LNd32Pseudo }; 4450 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo, 4451 ARM::VST3LNq32Pseudo }; 4452 SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes); 4453 return; 4454 } 4455 4456 case Intrinsic::arm_neon_vst4lane: { 4457 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo, 4458 ARM::VST4LNd16Pseudo, 4459 ARM::VST4LNd32Pseudo }; 4460 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo, 4461 ARM::VST4LNq32Pseudo }; 4462 SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes); 4463 return; 4464 } 4465 4466 case Intrinsic::arm_mve_vldr_gather_base_wb: 4467 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated: { 4468 static const uint16_t Opcodes[] = {ARM::MVE_VLDRWU32_qi_pre, 4469 ARM::MVE_VLDRDU64_qi_pre}; 4470 SelectMVE_WB(N, Opcodes, 4471 IntNo == Intrinsic::arm_mve_vldr_gather_base_wb_predicated); 4472 return; 4473 } 4474 4475 case Intrinsic::arm_mve_vld2q: { 4476 static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8, ARM::MVE_VLD21_8}; 4477 static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16, 4478 ARM::MVE_VLD21_16}; 4479 static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32, 4480 ARM::MVE_VLD21_32}; 4481 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32}; 4482 SelectMVE_VLD(N, 2, Opcodes); 4483 return; 4484 } 4485 4486 case Intrinsic::arm_mve_vld4q: { 4487 static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8, 4488 ARM::MVE_VLD42_8, ARM::MVE_VLD43_8}; 4489 static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16, 4490 ARM::MVE_VLD42_16, 4491 ARM::MVE_VLD43_16}; 4492 static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32, 4493 ARM::MVE_VLD42_32, 4494 ARM::MVE_VLD43_32}; 4495 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32}; 4496 SelectMVE_VLD(N, 4, Opcodes); 4497 return; 4498 } 4499 } 4500 break; 4501 } 4502 4503 case ISD::INTRINSIC_WO_CHAIN: { 4504 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 4505 switch (IntNo) { 4506 default: 4507 break; 4508 4509 case Intrinsic::arm_mve_urshrl: 4510 SelectMVE_LongShift(N, ARM::MVE_URSHRL, true, false); 4511 return; 4512 case Intrinsic::arm_mve_uqshll: 4513 SelectMVE_LongShift(N, ARM::MVE_UQSHLL, true, false); 4514 return; 4515 case Intrinsic::arm_mve_srshrl: 4516 SelectMVE_LongShift(N, ARM::MVE_SRSHRL, true, false); 4517 return; 4518 case Intrinsic::arm_mve_sqshll: 4519 SelectMVE_LongShift(N, ARM::MVE_SQSHLL, true, false); 4520 return; 4521 case Intrinsic::arm_mve_uqrshll: 4522 SelectMVE_LongShift(N, ARM::MVE_UQRSHLL, false, true); 4523 return; 4524 case Intrinsic::arm_mve_sqrshrl: 4525 SelectMVE_LongShift(N, ARM::MVE_SQRSHRL, false, true); 4526 return; 4527 case Intrinsic::arm_mve_lsll: 4528 SelectMVE_LongShift(N, ARM::MVE_LSLLr, false, false); 4529 return; 4530 case Intrinsic::arm_mve_asrl: 4531 SelectMVE_LongShift(N, ARM::MVE_ASRLr, false, false); 4532 return; 4533 4534 case Intrinsic::arm_mve_vadc: 4535 case Intrinsic::arm_mve_vadc_predicated: 4536 SelectMVE_VADCSBC(N, ARM::MVE_VADC, ARM::MVE_VADCI, true, 4537 IntNo == Intrinsic::arm_mve_vadc_predicated); 4538 return; 4539 4540 case Intrinsic::arm_mve_vmlldava: 4541 case Intrinsic::arm_mve_vmlldava_predicated: { 4542 static const uint16_t OpcodesU[] = { 4543 ARM::MVE_VMLALDAVu16, ARM::MVE_VMLALDAVu32, 4544 ARM::MVE_VMLALDAVau16, ARM::MVE_VMLALDAVau32, 4545 }; 4546 static const uint16_t OpcodesS[] = { 4547 ARM::MVE_VMLALDAVs16, ARM::MVE_VMLALDAVs32, 4548 ARM::MVE_VMLALDAVas16, ARM::MVE_VMLALDAVas32, 4549 ARM::MVE_VMLALDAVxs16, ARM::MVE_VMLALDAVxs32, 4550 ARM::MVE_VMLALDAVaxs16, ARM::MVE_VMLALDAVaxs32, 4551 ARM::MVE_VMLSLDAVs16, ARM::MVE_VMLSLDAVs32, 4552 ARM::MVE_VMLSLDAVas16, ARM::MVE_VMLSLDAVas32, 4553 ARM::MVE_VMLSLDAVxs16, ARM::MVE_VMLSLDAVxs32, 4554 ARM::MVE_VMLSLDAVaxs16, ARM::MVE_VMLSLDAVaxs32, 4555 }; 4556 SelectMVE_VMLLDAV(N, IntNo == Intrinsic::arm_mve_vmlldava_predicated, 4557 OpcodesS, OpcodesU); 4558 return; 4559 } 4560 4561 case Intrinsic::arm_mve_vrmlldavha: 4562 case Intrinsic::arm_mve_vrmlldavha_predicated: { 4563 static const uint16_t OpcodesU[] = { 4564 ARM::MVE_VRMLALDAVHu32, ARM::MVE_VRMLALDAVHau32, 4565 }; 4566 static const uint16_t OpcodesS[] = { 4567 ARM::MVE_VRMLALDAVHs32, ARM::MVE_VRMLALDAVHas32, 4568 ARM::MVE_VRMLALDAVHxs32, ARM::MVE_VRMLALDAVHaxs32, 4569 ARM::MVE_VRMLSLDAVHs32, ARM::MVE_VRMLSLDAVHas32, 4570 ARM::MVE_VRMLSLDAVHxs32, ARM::MVE_VRMLSLDAVHaxs32, 4571 }; 4572 SelectMVE_VRMLLDAVH(N, IntNo == Intrinsic::arm_mve_vrmlldavha_predicated, 4573 OpcodesS, OpcodesU); 4574 return; 4575 } 4576 } 4577 break; 4578 } 4579 4580 case ISD::ATOMIC_CMP_SWAP: 4581 SelectCMP_SWAP(N); 4582 return; 4583 } 4584 4585 SelectCode(N); 4586 } 4587 4588 // Inspect a register string of the form 4589 // cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or 4590 // cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string 4591 // and obtain the integer operands from them, adding these operands to the 4592 // provided vector. 4593 static void getIntOperandsFromRegisterString(StringRef RegString, 4594 SelectionDAG *CurDAG, 4595 const SDLoc &DL, 4596 std::vector<SDValue> &Ops) { 4597 SmallVector<StringRef, 5> Fields; 4598 RegString.split(Fields, ':'); 4599 4600 if (Fields.size() > 1) { 4601 bool AllIntFields = true; 4602 4603 for (StringRef Field : Fields) { 4604 // Need to trim out leading 'cp' characters and get the integer field. 4605 unsigned IntField; 4606 AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField); 4607 Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32)); 4608 } 4609 4610 assert(AllIntFields && 4611 "Unexpected non-integer value in special register string."); 4612 } 4613 } 4614 4615 // Maps a Banked Register string to its mask value. The mask value returned is 4616 // for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register 4617 // mask operand, which expresses which register is to be used, e.g. r8, and in 4618 // which mode it is to be used, e.g. usr. Returns -1 to signify that the string 4619 // was invalid. 4620 static inline int getBankedRegisterMask(StringRef RegString) { 4621 auto TheReg = ARMBankedReg::lookupBankedRegByName(RegString.lower()); 4622 if (!TheReg) 4623 return -1; 4624 return TheReg->Encoding; 4625 } 4626 4627 // The flags here are common to those allowed for apsr in the A class cores and 4628 // those allowed for the special registers in the M class cores. Returns a 4629 // value representing which flags were present, -1 if invalid. 4630 static inline int getMClassFlagsMask(StringRef Flags) { 4631 return StringSwitch<int>(Flags) 4632 .Case("", 0x2) // no flags means nzcvq for psr registers, and 0x2 is 4633 // correct when flags are not permitted 4634 .Case("g", 0x1) 4635 .Case("nzcvq", 0x2) 4636 .Case("nzcvqg", 0x3) 4637 .Default(-1); 4638 } 4639 4640 // Maps MClass special registers string to its value for use in the 4641 // t2MRS_M/t2MSR_M instruction nodes as the SYSm value operand. 4642 // Returns -1 to signify that the string was invalid. 4643 static int getMClassRegisterMask(StringRef Reg, const ARMSubtarget *Subtarget) { 4644 auto TheReg = ARMSysReg::lookupMClassSysRegByName(Reg); 4645 const FeatureBitset &FeatureBits = Subtarget->getFeatureBits(); 4646 if (!TheReg || !TheReg->hasRequiredFeatures(FeatureBits)) 4647 return -1; 4648 return (int)(TheReg->Encoding & 0xFFF); // SYSm value 4649 } 4650 4651 static int getARClassRegisterMask(StringRef Reg, StringRef Flags) { 4652 // The mask operand contains the special register (R Bit) in bit 4, whether 4653 // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and 4654 // bits 3-0 contains the fields to be accessed in the special register, set by 4655 // the flags provided with the register. 4656 int Mask = 0; 4657 if (Reg == "apsr") { 4658 // The flags permitted for apsr are the same flags that are allowed in 4659 // M class registers. We get the flag value and then shift the flags into 4660 // the correct place to combine with the mask. 4661 Mask = getMClassFlagsMask(Flags); 4662 if (Mask == -1) 4663 return -1; 4664 return Mask << 2; 4665 } 4666 4667 if (Reg != "cpsr" && Reg != "spsr") { 4668 return -1; 4669 } 4670 4671 // This is the same as if the flags were "fc" 4672 if (Flags.empty() || Flags == "all") 4673 return Mask | 0x9; 4674 4675 // Inspect the supplied flags string and set the bits in the mask for 4676 // the relevant and valid flags allowed for cpsr and spsr. 4677 for (char Flag : Flags) { 4678 int FlagVal; 4679 switch (Flag) { 4680 case 'c': 4681 FlagVal = 0x1; 4682 break; 4683 case 'x': 4684 FlagVal = 0x2; 4685 break; 4686 case 's': 4687 FlagVal = 0x4; 4688 break; 4689 case 'f': 4690 FlagVal = 0x8; 4691 break; 4692 default: 4693 FlagVal = 0; 4694 } 4695 4696 // This avoids allowing strings where the same flag bit appears twice. 4697 if (!FlagVal || (Mask & FlagVal)) 4698 return -1; 4699 Mask |= FlagVal; 4700 } 4701 4702 // If the register is spsr then we need to set the R bit. 4703 if (Reg == "spsr") 4704 Mask |= 0x10; 4705 4706 return Mask; 4707 } 4708 4709 // Lower the read_register intrinsic to ARM specific DAG nodes 4710 // using the supplied metadata string to select the instruction node to use 4711 // and the registers/masks to construct as operands for the node. 4712 bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){ 4713 const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1)); 4714 const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0)); 4715 bool IsThumb2 = Subtarget->isThumb2(); 4716 SDLoc DL(N); 4717 4718 std::vector<SDValue> Ops; 4719 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops); 4720 4721 if (!Ops.empty()) { 4722 // If the special register string was constructed of fields (as defined 4723 // in the ACLE) then need to lower to MRC node (32 bit) or 4724 // MRRC node(64 bit), we can make the distinction based on the number of 4725 // operands we have. 4726 unsigned Opcode; 4727 SmallVector<EVT, 3> ResTypes; 4728 if (Ops.size() == 5){ 4729 Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC; 4730 ResTypes.append({ MVT::i32, MVT::Other }); 4731 } else { 4732 assert(Ops.size() == 3 && 4733 "Invalid number of fields in special register string."); 4734 Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC; 4735 ResTypes.append({ MVT::i32, MVT::i32, MVT::Other }); 4736 } 4737 4738 Ops.push_back(getAL(CurDAG, DL)); 4739 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 4740 Ops.push_back(N->getOperand(0)); 4741 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops)); 4742 return true; 4743 } 4744 4745 std::string SpecialReg = RegString->getString().lower(); 4746 4747 int BankedReg = getBankedRegisterMask(SpecialReg); 4748 if (BankedReg != -1) { 4749 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), 4750 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 4751 N->getOperand(0) }; 4752 ReplaceNode( 4753 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked, 4754 DL, MVT::i32, MVT::Other, Ops)); 4755 return true; 4756 } 4757 4758 // The VFP registers are read by creating SelectionDAG nodes with opcodes 4759 // corresponding to the register that is being read from. So we switch on the 4760 // string to find which opcode we need to use. 4761 unsigned Opcode = StringSwitch<unsigned>(SpecialReg) 4762 .Case("fpscr", ARM::VMRS) 4763 .Case("fpexc", ARM::VMRS_FPEXC) 4764 .Case("fpsid", ARM::VMRS_FPSID) 4765 .Case("mvfr0", ARM::VMRS_MVFR0) 4766 .Case("mvfr1", ARM::VMRS_MVFR1) 4767 .Case("mvfr2", ARM::VMRS_MVFR2) 4768 .Case("fpinst", ARM::VMRS_FPINST) 4769 .Case("fpinst2", ARM::VMRS_FPINST2) 4770 .Default(0); 4771 4772 // If an opcode was found then we can lower the read to a VFP instruction. 4773 if (Opcode) { 4774 if (!Subtarget->hasVFP2Base()) 4775 return false; 4776 if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8Base()) 4777 return false; 4778 4779 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 4780 N->getOperand(0) }; 4781 ReplaceNode(N, 4782 CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops)); 4783 return true; 4784 } 4785 4786 // If the target is M Class then need to validate that the register string 4787 // is an acceptable value, so check that a mask can be constructed from the 4788 // string. 4789 if (Subtarget->isMClass()) { 4790 int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget); 4791 if (SYSmValue == -1) 4792 return false; 4793 4794 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32), 4795 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 4796 N->getOperand(0) }; 4797 ReplaceNode( 4798 N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops)); 4799 return true; 4800 } 4801 4802 // Here we know the target is not M Class so we need to check if it is one 4803 // of the remaining possible values which are apsr, cpsr or spsr. 4804 if (SpecialReg == "apsr" || SpecialReg == "cpsr") { 4805 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 4806 N->getOperand(0) }; 4807 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS, 4808 DL, MVT::i32, MVT::Other, Ops)); 4809 return true; 4810 } 4811 4812 if (SpecialReg == "spsr") { 4813 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 4814 N->getOperand(0) }; 4815 ReplaceNode( 4816 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL, 4817 MVT::i32, MVT::Other, Ops)); 4818 return true; 4819 } 4820 4821 return false; 4822 } 4823 4824 // Lower the write_register intrinsic to ARM specific DAG nodes 4825 // using the supplied metadata string to select the instruction node to use 4826 // and the registers/masks to use in the nodes 4827 bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){ 4828 const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1)); 4829 const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0)); 4830 bool IsThumb2 = Subtarget->isThumb2(); 4831 SDLoc DL(N); 4832 4833 std::vector<SDValue> Ops; 4834 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops); 4835 4836 if (!Ops.empty()) { 4837 // If the special register string was constructed of fields (as defined 4838 // in the ACLE) then need to lower to MCR node (32 bit) or 4839 // MCRR node(64 bit), we can make the distinction based on the number of 4840 // operands we have. 4841 unsigned Opcode; 4842 if (Ops.size() == 5) { 4843 Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR; 4844 Ops.insert(Ops.begin()+2, N->getOperand(2)); 4845 } else { 4846 assert(Ops.size() == 3 && 4847 "Invalid number of fields in special register string."); 4848 Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR; 4849 SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) }; 4850 Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2); 4851 } 4852 4853 Ops.push_back(getAL(CurDAG, DL)); 4854 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 4855 Ops.push_back(N->getOperand(0)); 4856 4857 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops)); 4858 return true; 4859 } 4860 4861 std::string SpecialReg = RegString->getString().lower(); 4862 int BankedReg = getBankedRegisterMask(SpecialReg); 4863 if (BankedReg != -1) { 4864 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2), 4865 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 4866 N->getOperand(0) }; 4867 ReplaceNode( 4868 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked, 4869 DL, MVT::Other, Ops)); 4870 return true; 4871 } 4872 4873 // The VFP registers are written to by creating SelectionDAG nodes with 4874 // opcodes corresponding to the register that is being written. So we switch 4875 // on the string to find which opcode we need to use. 4876 unsigned Opcode = StringSwitch<unsigned>(SpecialReg) 4877 .Case("fpscr", ARM::VMSR) 4878 .Case("fpexc", ARM::VMSR_FPEXC) 4879 .Case("fpsid", ARM::VMSR_FPSID) 4880 .Case("fpinst", ARM::VMSR_FPINST) 4881 .Case("fpinst2", ARM::VMSR_FPINST2) 4882 .Default(0); 4883 4884 if (Opcode) { 4885 if (!Subtarget->hasVFP2Base()) 4886 return false; 4887 Ops = { N->getOperand(2), getAL(CurDAG, DL), 4888 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) }; 4889 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops)); 4890 return true; 4891 } 4892 4893 std::pair<StringRef, StringRef> Fields; 4894 Fields = StringRef(SpecialReg).rsplit('_'); 4895 std::string Reg = Fields.first.str(); 4896 StringRef Flags = Fields.second; 4897 4898 // If the target was M Class then need to validate the special register value 4899 // and retrieve the mask for use in the instruction node. 4900 if (Subtarget->isMClass()) { 4901 int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget); 4902 if (SYSmValue == -1) 4903 return false; 4904 4905 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32), 4906 N->getOperand(2), getAL(CurDAG, DL), 4907 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) }; 4908 ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops)); 4909 return true; 4910 } 4911 4912 // We then check to see if a valid mask can be constructed for one of the 4913 // register string values permitted for the A and R class cores. These values 4914 // are apsr, spsr and cpsr; these are also valid on older cores. 4915 int Mask = getARClassRegisterMask(Reg, Flags); 4916 if (Mask != -1) { 4917 Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2), 4918 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 4919 N->getOperand(0) }; 4920 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR, 4921 DL, MVT::Other, Ops)); 4922 return true; 4923 } 4924 4925 return false; 4926 } 4927 4928 bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){ 4929 std::vector<SDValue> AsmNodeOperands; 4930 unsigned Flag, Kind; 4931 bool Changed = false; 4932 unsigned NumOps = N->getNumOperands(); 4933 4934 // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint. 4935 // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require 4936 // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs 4937 // respectively. Since there is no constraint to explicitly specify a 4938 // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb, 4939 // the 64-bit data may be referred by H, Q, R modifiers, so we still pack 4940 // them into a GPRPair. 4941 4942 SDLoc dl(N); 4943 SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1) 4944 : SDValue(nullptr,0); 4945 4946 SmallVector<bool, 8> OpChanged; 4947 // Glue node will be appended late. 4948 for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) { 4949 SDValue op = N->getOperand(i); 4950 AsmNodeOperands.push_back(op); 4951 4952 if (i < InlineAsm::Op_FirstOperand) 4953 continue; 4954 4955 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) { 4956 Flag = C->getZExtValue(); 4957 Kind = InlineAsm::getKind(Flag); 4958 } 4959 else 4960 continue; 4961 4962 // Immediate operands to inline asm in the SelectionDAG are modeled with 4963 // two operands. The first is a constant of value InlineAsm::Kind_Imm, and 4964 // the second is a constant with the value of the immediate. If we get here 4965 // and we have a Kind_Imm, skip the next operand, and continue. 4966 if (Kind == InlineAsm::Kind_Imm) { 4967 SDValue op = N->getOperand(++i); 4968 AsmNodeOperands.push_back(op); 4969 continue; 4970 } 4971 4972 unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag); 4973 if (NumRegs) 4974 OpChanged.push_back(false); 4975 4976 unsigned DefIdx = 0; 4977 bool IsTiedToChangedOp = false; 4978 // If it's a use that is tied with a previous def, it has no 4979 // reg class constraint. 4980 if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx)) 4981 IsTiedToChangedOp = OpChanged[DefIdx]; 4982 4983 // Memory operands to inline asm in the SelectionDAG are modeled with two 4984 // operands: a constant of value InlineAsm::Kind_Mem followed by the input 4985 // operand. If we get here and we have a Kind_Mem, skip the next operand (so 4986 // it doesn't get misinterpreted), and continue. We do this here because 4987 // it's important to update the OpChanged array correctly before moving on. 4988 if (Kind == InlineAsm::Kind_Mem) { 4989 SDValue op = N->getOperand(++i); 4990 AsmNodeOperands.push_back(op); 4991 continue; 4992 } 4993 4994 if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef 4995 && Kind != InlineAsm::Kind_RegDefEarlyClobber) 4996 continue; 4997 4998 unsigned RC; 4999 bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC); 5000 if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID)) 5001 || NumRegs != 2) 5002 continue; 5003 5004 assert((i+2 < NumOps) && "Invalid number of operands in inline asm"); 5005 SDValue V0 = N->getOperand(i+1); 5006 SDValue V1 = N->getOperand(i+2); 5007 unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg(); 5008 unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg(); 5009 SDValue PairedReg; 5010 MachineRegisterInfo &MRI = MF->getRegInfo(); 5011 5012 if (Kind == InlineAsm::Kind_RegDef || 5013 Kind == InlineAsm::Kind_RegDefEarlyClobber) { 5014 // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to 5015 // the original GPRs. 5016 5017 Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass); 5018 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped); 5019 SDValue Chain = SDValue(N,0); 5020 5021 SDNode *GU = N->getGluedUser(); 5022 SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped, 5023 Chain.getValue(1)); 5024 5025 // Extract values from a GPRPair reg and copy to the original GPR reg. 5026 SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32, 5027 RegCopy); 5028 SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32, 5029 RegCopy); 5030 SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0, 5031 RegCopy.getValue(1)); 5032 SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1)); 5033 5034 // Update the original glue user. 5035 std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1); 5036 Ops.push_back(T1.getValue(1)); 5037 CurDAG->UpdateNodeOperands(GU, Ops); 5038 } 5039 else { 5040 // For Kind == InlineAsm::Kind_RegUse, we first copy two GPRs into a 5041 // GPRPair and then pass the GPRPair to the inline asm. 5042 SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain]; 5043 5044 // As REG_SEQ doesn't take RegisterSDNode, we copy them first. 5045 SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32, 5046 Chain.getValue(1)); 5047 SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32, 5048 T0.getValue(1)); 5049 SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0); 5050 5051 // Copy REG_SEQ into a GPRPair-typed VR and replace the original two 5052 // i32 VRs of inline asm with it. 5053 Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass); 5054 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped); 5055 Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1)); 5056 5057 AsmNodeOperands[InlineAsm::Op_InputChain] = Chain; 5058 Glue = Chain.getValue(1); 5059 } 5060 5061 Changed = true; 5062 5063 if(PairedReg.getNode()) { 5064 OpChanged[OpChanged.size() -1 ] = true; 5065 Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/); 5066 if (IsTiedToChangedOp) 5067 Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx); 5068 else 5069 Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID); 5070 // Replace the current flag. 5071 AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant( 5072 Flag, dl, MVT::i32); 5073 // Add the new register node and skip the original two GPRs. 5074 AsmNodeOperands.push_back(PairedReg); 5075 // Skip the next two GPRs. 5076 i += 2; 5077 } 5078 } 5079 5080 if (Glue.getNode()) 5081 AsmNodeOperands.push_back(Glue); 5082 if (!Changed) 5083 return false; 5084 5085 SDValue New = CurDAG->getNode(N->getOpcode(), SDLoc(N), 5086 CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands); 5087 New->setNodeId(-1); 5088 ReplaceNode(N, New.getNode()); 5089 return true; 5090 } 5091 5092 5093 bool ARMDAGToDAGISel:: 5094 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, 5095 std::vector<SDValue> &OutOps) { 5096 switch(ConstraintID) { 5097 default: 5098 llvm_unreachable("Unexpected asm memory constraint"); 5099 case InlineAsm::Constraint_m: 5100 case InlineAsm::Constraint_o: 5101 case InlineAsm::Constraint_Q: 5102 case InlineAsm::Constraint_Um: 5103 case InlineAsm::Constraint_Un: 5104 case InlineAsm::Constraint_Uq: 5105 case InlineAsm::Constraint_Us: 5106 case InlineAsm::Constraint_Ut: 5107 case InlineAsm::Constraint_Uv: 5108 case InlineAsm::Constraint_Uy: 5109 // Require the address to be in a register. That is safe for all ARM 5110 // variants and it is hard to do anything much smarter without knowing 5111 // how the operand is used. 5112 OutOps.push_back(Op); 5113 return false; 5114 } 5115 return true; 5116 } 5117 5118 /// createARMISelDag - This pass converts a legalized DAG into a 5119 /// ARM-specific DAG, ready for instruction scheduling. 5120 /// 5121 FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM, 5122 CodeGenOpt::Level OptLevel) { 5123 return new ARMDAGToDAGISel(TM, OptLevel); 5124 } 5125