1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines an instruction selector for the ARM target. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "ARM.h" 14 #include "ARMBaseInstrInfo.h" 15 #include "ARMTargetMachine.h" 16 #include "MCTargetDesc/ARMAddressingModes.h" 17 #include "Utils/ARMBaseInfo.h" 18 #include "llvm/ADT/StringSwitch.h" 19 #include "llvm/CodeGen/MachineFrameInfo.h" 20 #include "llvm/CodeGen/MachineFunction.h" 21 #include "llvm/CodeGen/MachineInstrBuilder.h" 22 #include "llvm/CodeGen/MachineRegisterInfo.h" 23 #include "llvm/CodeGen/SelectionDAG.h" 24 #include "llvm/CodeGen/SelectionDAGISel.h" 25 #include "llvm/CodeGen/TargetLowering.h" 26 #include "llvm/IR/CallingConv.h" 27 #include "llvm/IR/Constants.h" 28 #include "llvm/IR/DerivedTypes.h" 29 #include "llvm/IR/Function.h" 30 #include "llvm/IR/Intrinsics.h" 31 #include "llvm/IR/IntrinsicsARM.h" 32 #include "llvm/IR/LLVMContext.h" 33 #include "llvm/Support/CommandLine.h" 34 #include "llvm/Support/Debug.h" 35 #include "llvm/Support/ErrorHandling.h" 36 #include "llvm/Target/TargetOptions.h" 37 38 using namespace llvm; 39 40 #define DEBUG_TYPE "arm-isel" 41 42 static cl::opt<bool> 43 DisableShifterOp("disable-shifter-op", cl::Hidden, 44 cl::desc("Disable isel of shifter-op"), 45 cl::init(false)); 46 47 //===--------------------------------------------------------------------===// 48 /// ARMDAGToDAGISel - ARM specific code to select ARM machine 49 /// instructions for SelectionDAG operations. 50 /// 51 namespace { 52 53 class ARMDAGToDAGISel : public SelectionDAGISel { 54 /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can 55 /// make the right decision when generating code for different targets. 56 const ARMSubtarget *Subtarget; 57 58 public: 59 explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel) 60 : SelectionDAGISel(tm, OptLevel) {} 61 62 bool runOnMachineFunction(MachineFunction &MF) override { 63 // Reset the subtarget each time through. 64 Subtarget = &MF.getSubtarget<ARMSubtarget>(); 65 SelectionDAGISel::runOnMachineFunction(MF); 66 return true; 67 } 68 69 StringRef getPassName() const override { return "ARM Instruction Selection"; } 70 71 void PreprocessISelDAG() override; 72 73 /// getI32Imm - Return a target constant of type i32 with the specified 74 /// value. 75 inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) { 76 return CurDAG->getTargetConstant(Imm, dl, MVT::i32); 77 } 78 79 void Select(SDNode *N) override; 80 81 bool hasNoVMLxHazardUse(SDNode *N) const; 82 bool isShifterOpProfitable(const SDValue &Shift, 83 ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt); 84 bool SelectRegShifterOperand(SDValue N, SDValue &A, 85 SDValue &B, SDValue &C, 86 bool CheckProfitability = true); 87 bool SelectImmShifterOperand(SDValue N, SDValue &A, 88 SDValue &B, bool CheckProfitability = true); 89 bool SelectShiftRegShifterOperand(SDValue N, SDValue &A, 90 SDValue &B, SDValue &C) { 91 // Don't apply the profitability check 92 return SelectRegShifterOperand(N, A, B, C, false); 93 } 94 bool SelectShiftImmShifterOperand(SDValue N, SDValue &A, 95 SDValue &B) { 96 // Don't apply the profitability check 97 return SelectImmShifterOperand(N, A, B, false); 98 } 99 100 bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out); 101 102 bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); 103 bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc); 104 105 bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) { 106 const ConstantSDNode *CN = cast<ConstantSDNode>(N); 107 Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32); 108 Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32); 109 return true; 110 } 111 112 bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N, 113 SDValue &Offset, SDValue &Opc); 114 bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N, 115 SDValue &Offset, SDValue &Opc); 116 bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N, 117 SDValue &Offset, SDValue &Opc); 118 bool SelectAddrOffsetNone(SDValue N, SDValue &Base); 119 bool SelectAddrMode3(SDValue N, SDValue &Base, 120 SDValue &Offset, SDValue &Opc); 121 bool SelectAddrMode3Offset(SDNode *Op, SDValue N, 122 SDValue &Offset, SDValue &Opc); 123 bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, bool FP16); 124 bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset); 125 bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset); 126 bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align); 127 bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset); 128 129 bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label); 130 131 // Thumb Addressing Modes: 132 bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset); 133 bool SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, SDValue &Offset); 134 bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base, 135 SDValue &OffImm); 136 bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base, 137 SDValue &OffImm); 138 bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base, 139 SDValue &OffImm); 140 bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base, 141 SDValue &OffImm); 142 bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm); 143 template <unsigned Shift> 144 bool SelectTAddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm); 145 146 // Thumb 2 Addressing Modes: 147 bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); 148 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, 149 SDValue &OffImm); 150 bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N, 151 SDValue &OffImm); 152 template <unsigned Shift> 153 bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm); 154 bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm, 155 unsigned Shift); 156 template <unsigned Shift> 157 bool SelectT2AddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm); 158 bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base, 159 SDValue &OffReg, SDValue &ShImm); 160 bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm); 161 162 template<int Min, int Max> 163 bool SelectImmediateInRange(SDValue N, SDValue &OffImm); 164 165 inline bool is_so_imm(unsigned Imm) const { 166 return ARM_AM::getSOImmVal(Imm) != -1; 167 } 168 169 inline bool is_so_imm_not(unsigned Imm) const { 170 return ARM_AM::getSOImmVal(~Imm) != -1; 171 } 172 173 inline bool is_t2_so_imm(unsigned Imm) const { 174 return ARM_AM::getT2SOImmVal(Imm) != -1; 175 } 176 177 inline bool is_t2_so_imm_not(unsigned Imm) const { 178 return ARM_AM::getT2SOImmVal(~Imm) != -1; 179 } 180 181 // Include the pieces autogenerated from the target description. 182 #include "ARMGenDAGISel.inc" 183 184 private: 185 void transferMemOperands(SDNode *Src, SDNode *Dst); 186 187 /// Indexed (pre/post inc/dec) load matching code for ARM. 188 bool tryARMIndexedLoad(SDNode *N); 189 bool tryT1IndexedLoad(SDNode *N); 190 bool tryT2IndexedLoad(SDNode *N); 191 bool tryMVEIndexedLoad(SDNode *N); 192 193 /// SelectVLD - Select NEON load intrinsics. NumVecs should be 194 /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for 195 /// loads of D registers and even subregs and odd subregs of Q registers. 196 /// For NumVecs <= 2, QOpcodes1 is not used. 197 void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, 198 const uint16_t *DOpcodes, const uint16_t *QOpcodes0, 199 const uint16_t *QOpcodes1); 200 201 /// SelectVST - Select NEON store intrinsics. NumVecs should 202 /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for 203 /// stores of D registers and even subregs and odd subregs of Q registers. 204 /// For NumVecs <= 2, QOpcodes1 is not used. 205 void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, 206 const uint16_t *DOpcodes, const uint16_t *QOpcodes0, 207 const uint16_t *QOpcodes1); 208 209 /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should 210 /// be 2, 3 or 4. The opcode arrays specify the instructions used for 211 /// load/store of D registers and Q registers. 212 void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating, 213 unsigned NumVecs, const uint16_t *DOpcodes, 214 const uint16_t *QOpcodes); 215 216 /// Helper functions for setting up clusters of MVE predication operands. 217 template <typename SDValueVector> 218 void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 219 SDValue PredicateMask); 220 template <typename SDValueVector> 221 void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 222 SDValue PredicateMask, SDValue Inactive); 223 224 template <typename SDValueVector> 225 void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc); 226 template <typename SDValueVector> 227 void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, EVT InactiveTy); 228 229 /// SelectMVE_WB - Select MVE writeback load/store intrinsics. 230 void SelectMVE_WB(SDNode *N, const uint16_t *Opcodes, bool Predicated); 231 232 /// SelectMVE_LongShift - Select MVE 64-bit scalar shift intrinsics. 233 void SelectMVE_LongShift(SDNode *N, uint16_t Opcode, bool Immediate, 234 bool HasSaturationOperand); 235 236 /// SelectMVE_VADCSBC - Select MVE vector add/sub-with-carry intrinsics. 237 void SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry, 238 uint16_t OpcodeWithNoCarry, bool Add, bool Predicated); 239 240 /// Select long MVE vector reductions with two vector operands 241 /// Stride is the number of vector element widths the instruction can operate 242 /// on: 243 /// 2 for long non-rounding variants, vml{a,s}ldav[a][x]: [i16, i32] 244 /// 1 for long rounding variants: vrml{a,s}ldavh[a][x]: [i32] 245 /// Stride is used when addressing the OpcodesS array which contains multiple 246 /// opcodes for each element width. 247 /// TySize is the index into the list of element types listed above 248 void SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated, 249 const uint16_t *OpcodesS, const uint16_t *OpcodesU, 250 size_t Stride, size_t TySize); 251 252 /// Select a 64-bit MVE vector reduction with two vector operands 253 /// arm_mve_vmlldava_[predicated] 254 void SelectMVE_VMLLDAV(SDNode *N, bool Predicated, const uint16_t *OpcodesS, 255 const uint16_t *OpcodesU); 256 /// Select a 72-bit MVE vector rounding reduction with two vector operands 257 /// int_arm_mve_vrmlldavha[_predicated] 258 void SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated, const uint16_t *OpcodesS, 259 const uint16_t *OpcodesU); 260 261 /// SelectMVE_VLD - Select MVE interleaving load intrinsics. NumVecs 262 /// should be 2 or 4. The opcode array specifies the instructions 263 /// used for 8, 16 and 32-bit lane sizes respectively, and each 264 /// pointer points to a set of NumVecs sub-opcodes used for the 265 /// different stages (e.g. VLD20 versus VLD21) of each load family. 266 void SelectMVE_VLD(SDNode *N, unsigned NumVecs, 267 const uint16_t *const *Opcodes); 268 269 /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs 270 /// should be 1, 2, 3 or 4. The opcode array specifies the instructions used 271 /// for loading D registers. 272 void SelectVLDDup(SDNode *N, bool IsIntrinsic, bool isUpdating, 273 unsigned NumVecs, const uint16_t *DOpcodes, 274 const uint16_t *QOpcodes0 = nullptr, 275 const uint16_t *QOpcodes1 = nullptr); 276 277 /// Try to select SBFX/UBFX instructions for ARM. 278 bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned); 279 280 // Select special operations if node forms integer ABS pattern 281 bool tryABSOp(SDNode *N); 282 283 bool tryReadRegister(SDNode *N); 284 bool tryWriteRegister(SDNode *N); 285 286 bool tryInlineAsm(SDNode *N); 287 288 void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI); 289 290 void SelectCMP_SWAP(SDNode *N); 291 292 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 293 /// inline asm expressions. 294 bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, 295 std::vector<SDValue> &OutOps) override; 296 297 // Form pairs of consecutive R, S, D, or Q registers. 298 SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1); 299 SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1); 300 SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1); 301 SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1); 302 303 // Form sequences of 4 consecutive S, D, or Q registers. 304 SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 305 SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 306 SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 307 308 // Get the alignment operand for a NEON VLD or VST instruction. 309 SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs, 310 bool is64BitVector); 311 312 /// Checks if N is a multiplication by a constant where we can extract out a 313 /// power of two from the constant so that it can be used in a shift, but only 314 /// if it simplifies the materialization of the constant. Returns true if it 315 /// is, and assigns to PowerOfTwo the power of two that should be extracted 316 /// out and to NewMulConst the new constant to be multiplied by. 317 bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift, 318 unsigned &PowerOfTwo, SDValue &NewMulConst) const; 319 320 /// Replace N with M in CurDAG, in a way that also ensures that M gets 321 /// selected when N would have been selected. 322 void replaceDAGValue(const SDValue &N, SDValue M); 323 }; 324 } 325 326 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant 327 /// operand. If so Imm will receive the 32-bit value. 328 static bool isInt32Immediate(SDNode *N, unsigned &Imm) { 329 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) { 330 Imm = cast<ConstantSDNode>(N)->getZExtValue(); 331 return true; 332 } 333 return false; 334 } 335 336 // isInt32Immediate - This method tests to see if a constant operand. 337 // If so Imm will receive the 32 bit value. 338 static bool isInt32Immediate(SDValue N, unsigned &Imm) { 339 return isInt32Immediate(N.getNode(), Imm); 340 } 341 342 // isOpcWithIntImmediate - This method tests to see if the node is a specific 343 // opcode and that it has a immediate integer right operand. 344 // If so Imm will receive the 32 bit value. 345 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) { 346 return N->getOpcode() == Opc && 347 isInt32Immediate(N->getOperand(1).getNode(), Imm); 348 } 349 350 /// Check whether a particular node is a constant value representable as 351 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax). 352 /// 353 /// \param ScaledConstant [out] - On success, the pre-scaled constant value. 354 static bool isScaledConstantInRange(SDValue Node, int Scale, 355 int RangeMin, int RangeMax, 356 int &ScaledConstant) { 357 assert(Scale > 0 && "Invalid scale!"); 358 359 // Check that this is a constant. 360 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node); 361 if (!C) 362 return false; 363 364 ScaledConstant = (int) C->getZExtValue(); 365 if ((ScaledConstant % Scale) != 0) 366 return false; 367 368 ScaledConstant /= Scale; 369 return ScaledConstant >= RangeMin && ScaledConstant < RangeMax; 370 } 371 372 void ARMDAGToDAGISel::PreprocessISelDAG() { 373 if (!Subtarget->hasV6T2Ops()) 374 return; 375 376 bool isThumb2 = Subtarget->isThumb(); 377 for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), 378 E = CurDAG->allnodes_end(); I != E; ) { 379 SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues. 380 381 if (N->getOpcode() != ISD::ADD) 382 continue; 383 384 // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with 385 // leading zeros, followed by consecutive set bits, followed by 1 or 2 386 // trailing zeros, e.g. 1020. 387 // Transform the expression to 388 // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number 389 // of trailing zeros of c2. The left shift would be folded as an shifter 390 // operand of 'add' and the 'and' and 'srl' would become a bits extraction 391 // node (UBFX). 392 393 SDValue N0 = N->getOperand(0); 394 SDValue N1 = N->getOperand(1); 395 unsigned And_imm = 0; 396 if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) { 397 if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm)) 398 std::swap(N0, N1); 399 } 400 if (!And_imm) 401 continue; 402 403 // Check if the AND mask is an immediate of the form: 000.....1111111100 404 unsigned TZ = countTrailingZeros(And_imm); 405 if (TZ != 1 && TZ != 2) 406 // Be conservative here. Shifter operands aren't always free. e.g. On 407 // Swift, left shifter operand of 1 / 2 for free but others are not. 408 // e.g. 409 // ubfx r3, r1, #16, #8 410 // ldr.w r3, [r0, r3, lsl #2] 411 // vs. 412 // mov.w r9, #1020 413 // and.w r2, r9, r1, lsr #14 414 // ldr r2, [r0, r2] 415 continue; 416 And_imm >>= TZ; 417 if (And_imm & (And_imm + 1)) 418 continue; 419 420 // Look for (and (srl X, c1), c2). 421 SDValue Srl = N1.getOperand(0); 422 unsigned Srl_imm = 0; 423 if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) || 424 (Srl_imm <= 2)) 425 continue; 426 427 // Make sure first operand is not a shifter operand which would prevent 428 // folding of the left shift. 429 SDValue CPTmp0; 430 SDValue CPTmp1; 431 SDValue CPTmp2; 432 if (isThumb2) { 433 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1)) 434 continue; 435 } else { 436 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) || 437 SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2)) 438 continue; 439 } 440 441 // Now make the transformation. 442 Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32, 443 Srl.getOperand(0), 444 CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl), 445 MVT::i32)); 446 N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32, 447 Srl, 448 CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32)); 449 N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32, 450 N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32)); 451 CurDAG->UpdateNodeOperands(N, N0, N1); 452 } 453 } 454 455 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS 456 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at 457 /// least on current ARM implementations) which should be avoidded. 458 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const { 459 if (OptLevel == CodeGenOpt::None) 460 return true; 461 462 if (!Subtarget->hasVMLxHazards()) 463 return true; 464 465 if (!N->hasOneUse()) 466 return false; 467 468 SDNode *Use = *N->use_begin(); 469 if (Use->getOpcode() == ISD::CopyToReg) 470 return true; 471 if (Use->isMachineOpcode()) { 472 const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>( 473 CurDAG->getSubtarget().getInstrInfo()); 474 475 const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode()); 476 if (MCID.mayStore()) 477 return true; 478 unsigned Opcode = MCID.getOpcode(); 479 if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD) 480 return true; 481 // vmlx feeding into another vmlx. We actually want to unfold 482 // the use later in the MLxExpansion pass. e.g. 483 // vmla 484 // vmla (stall 8 cycles) 485 // 486 // vmul (5 cycles) 487 // vadd (5 cycles) 488 // vmla 489 // This adds up to about 18 - 19 cycles. 490 // 491 // vmla 492 // vmul (stall 4 cycles) 493 // vadd adds up to about 14 cycles. 494 return TII->isFpMLxInstruction(Opcode); 495 } 496 497 return false; 498 } 499 500 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift, 501 ARM_AM::ShiftOpc ShOpcVal, 502 unsigned ShAmt) { 503 if (!Subtarget->isLikeA9() && !Subtarget->isSwift()) 504 return true; 505 if (Shift.hasOneUse()) 506 return true; 507 // R << 2 is free. 508 return ShOpcVal == ARM_AM::lsl && 509 (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1)); 510 } 511 512 bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N, 513 unsigned MaxShift, 514 unsigned &PowerOfTwo, 515 SDValue &NewMulConst) const { 516 assert(N.getOpcode() == ISD::MUL); 517 assert(MaxShift > 0); 518 519 // If the multiply is used in more than one place then changing the constant 520 // will make other uses incorrect, so don't. 521 if (!N.hasOneUse()) return false; 522 // Check if the multiply is by a constant 523 ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1)); 524 if (!MulConst) return false; 525 // If the constant is used in more than one place then modifying it will mean 526 // we need to materialize two constants instead of one, which is a bad idea. 527 if (!MulConst->hasOneUse()) return false; 528 unsigned MulConstVal = MulConst->getZExtValue(); 529 if (MulConstVal == 0) return false; 530 531 // Find the largest power of 2 that MulConstVal is a multiple of 532 PowerOfTwo = MaxShift; 533 while ((MulConstVal % (1 << PowerOfTwo)) != 0) { 534 --PowerOfTwo; 535 if (PowerOfTwo == 0) return false; 536 } 537 538 // Only optimise if the new cost is better 539 unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo); 540 NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32); 541 unsigned OldCost = ConstantMaterializationCost(MulConstVal, Subtarget); 542 unsigned NewCost = ConstantMaterializationCost(NewMulConstVal, Subtarget); 543 return NewCost < OldCost; 544 } 545 546 void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) { 547 CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode()); 548 ReplaceUses(N, M); 549 } 550 551 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N, 552 SDValue &BaseReg, 553 SDValue &Opc, 554 bool CheckProfitability) { 555 if (DisableShifterOp) 556 return false; 557 558 // If N is a multiply-by-constant and it's profitable to extract a shift and 559 // use it in a shifted operand do so. 560 if (N.getOpcode() == ISD::MUL) { 561 unsigned PowerOfTwo = 0; 562 SDValue NewMulConst; 563 if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) { 564 HandleSDNode Handle(N); 565 SDLoc Loc(N); 566 replaceDAGValue(N.getOperand(1), NewMulConst); 567 BaseReg = Handle.getValue(); 568 Opc = CurDAG->getTargetConstant( 569 ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32); 570 return true; 571 } 572 } 573 574 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 575 576 // Don't match base register only case. That is matched to a separate 577 // lower complexity pattern with explicit register operand. 578 if (ShOpcVal == ARM_AM::no_shift) return false; 579 580 BaseReg = N.getOperand(0); 581 unsigned ShImmVal = 0; 582 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 583 if (!RHS) return false; 584 ShImmVal = RHS->getZExtValue() & 31; 585 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), 586 SDLoc(N), MVT::i32); 587 return true; 588 } 589 590 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N, 591 SDValue &BaseReg, 592 SDValue &ShReg, 593 SDValue &Opc, 594 bool CheckProfitability) { 595 if (DisableShifterOp) 596 return false; 597 598 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 599 600 // Don't match base register only case. That is matched to a separate 601 // lower complexity pattern with explicit register operand. 602 if (ShOpcVal == ARM_AM::no_shift) return false; 603 604 BaseReg = N.getOperand(0); 605 unsigned ShImmVal = 0; 606 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 607 if (RHS) return false; 608 609 ShReg = N.getOperand(1); 610 if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal)) 611 return false; 612 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), 613 SDLoc(N), MVT::i32); 614 return true; 615 } 616 617 // Determine whether an ISD::OR's operands are suitable to turn the operation 618 // into an addition, which often has more compact encodings. 619 bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) { 620 assert(Parent->getOpcode() == ISD::OR && "unexpected parent"); 621 Out = N; 622 return CurDAG->haveNoCommonBitsSet(N, Parent->getOperand(1)); 623 } 624 625 626 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N, 627 SDValue &Base, 628 SDValue &OffImm) { 629 // Match simple R + imm12 operands. 630 631 // Base only. 632 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 633 !CurDAG->isBaseWithConstantOffset(N)) { 634 if (N.getOpcode() == ISD::FrameIndex) { 635 // Match frame index. 636 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 637 Base = CurDAG->getTargetFrameIndex( 638 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 639 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 640 return true; 641 } 642 643 if (N.getOpcode() == ARMISD::Wrapper && 644 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 645 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 646 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 647 Base = N.getOperand(0); 648 } else 649 Base = N; 650 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 651 return true; 652 } 653 654 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 655 int RHSC = (int)RHS->getSExtValue(); 656 if (N.getOpcode() == ISD::SUB) 657 RHSC = -RHSC; 658 659 if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits 660 Base = N.getOperand(0); 661 if (Base.getOpcode() == ISD::FrameIndex) { 662 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 663 Base = CurDAG->getTargetFrameIndex( 664 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 665 } 666 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 667 return true; 668 } 669 } 670 671 // Base only. 672 Base = N; 673 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 674 return true; 675 } 676 677 678 679 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, 680 SDValue &Opc) { 681 if (N.getOpcode() == ISD::MUL && 682 ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) { 683 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 684 // X * [3,5,9] -> X + X * [2,4,8] etc. 685 int RHSC = (int)RHS->getZExtValue(); 686 if (RHSC & 1) { 687 RHSC = RHSC & ~1; 688 ARM_AM::AddrOpc AddSub = ARM_AM::add; 689 if (RHSC < 0) { 690 AddSub = ARM_AM::sub; 691 RHSC = - RHSC; 692 } 693 if (isPowerOf2_32(RHSC)) { 694 unsigned ShAmt = Log2_32(RHSC); 695 Base = Offset = N.getOperand(0); 696 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, 697 ARM_AM::lsl), 698 SDLoc(N), MVT::i32); 699 return true; 700 } 701 } 702 } 703 } 704 705 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 706 // ISD::OR that is equivalent to an ISD::ADD. 707 !CurDAG->isBaseWithConstantOffset(N)) 708 return false; 709 710 // Leave simple R +/- imm12 operands for LDRi12 711 if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) { 712 int RHSC; 713 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1, 714 -0x1000+1, 0x1000, RHSC)) // 12 bits. 715 return false; 716 } 717 718 // Otherwise this is R +/- [possibly shifted] R. 719 ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add; 720 ARM_AM::ShiftOpc ShOpcVal = 721 ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode()); 722 unsigned ShAmt = 0; 723 724 Base = N.getOperand(0); 725 Offset = N.getOperand(1); 726 727 if (ShOpcVal != ARM_AM::no_shift) { 728 // Check to see if the RHS of the shift is a constant, if not, we can't fold 729 // it. 730 if (ConstantSDNode *Sh = 731 dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) { 732 ShAmt = Sh->getZExtValue(); 733 if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt)) 734 Offset = N.getOperand(1).getOperand(0); 735 else { 736 ShAmt = 0; 737 ShOpcVal = ARM_AM::no_shift; 738 } 739 } else { 740 ShOpcVal = ARM_AM::no_shift; 741 } 742 } 743 744 // Try matching (R shl C) + (R). 745 if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift && 746 !(Subtarget->isLikeA9() || Subtarget->isSwift() || 747 N.getOperand(0).hasOneUse())) { 748 ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode()); 749 if (ShOpcVal != ARM_AM::no_shift) { 750 // Check to see if the RHS of the shift is a constant, if not, we can't 751 // fold it. 752 if (ConstantSDNode *Sh = 753 dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) { 754 ShAmt = Sh->getZExtValue(); 755 if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) { 756 Offset = N.getOperand(0).getOperand(0); 757 Base = N.getOperand(1); 758 } else { 759 ShAmt = 0; 760 ShOpcVal = ARM_AM::no_shift; 761 } 762 } else { 763 ShOpcVal = ARM_AM::no_shift; 764 } 765 } 766 } 767 768 // If Offset is a multiply-by-constant and it's profitable to extract a shift 769 // and use it in a shifted operand do so. 770 if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) { 771 unsigned PowerOfTwo = 0; 772 SDValue NewMulConst; 773 if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) { 774 HandleSDNode Handle(Offset); 775 replaceDAGValue(Offset.getOperand(1), NewMulConst); 776 Offset = Handle.getValue(); 777 ShAmt = PowerOfTwo; 778 ShOpcVal = ARM_AM::lsl; 779 } 780 } 781 782 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal), 783 SDLoc(N), MVT::i32); 784 return true; 785 } 786 787 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N, 788 SDValue &Offset, SDValue &Opc) { 789 unsigned Opcode = Op->getOpcode(); 790 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 791 ? cast<LoadSDNode>(Op)->getAddressingMode() 792 : cast<StoreSDNode>(Op)->getAddressingMode(); 793 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 794 ? ARM_AM::add : ARM_AM::sub; 795 int Val; 796 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) 797 return false; 798 799 Offset = N; 800 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 801 unsigned ShAmt = 0; 802 if (ShOpcVal != ARM_AM::no_shift) { 803 // Check to see if the RHS of the shift is a constant, if not, we can't fold 804 // it. 805 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 806 ShAmt = Sh->getZExtValue(); 807 if (isShifterOpProfitable(N, ShOpcVal, ShAmt)) 808 Offset = N.getOperand(0); 809 else { 810 ShAmt = 0; 811 ShOpcVal = ARM_AM::no_shift; 812 } 813 } else { 814 ShOpcVal = ARM_AM::no_shift; 815 } 816 } 817 818 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal), 819 SDLoc(N), MVT::i32); 820 return true; 821 } 822 823 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N, 824 SDValue &Offset, SDValue &Opc) { 825 unsigned Opcode = Op->getOpcode(); 826 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 827 ? cast<LoadSDNode>(Op)->getAddressingMode() 828 : cast<StoreSDNode>(Op)->getAddressingMode(); 829 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 830 ? ARM_AM::add : ARM_AM::sub; 831 int Val; 832 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits. 833 if (AddSub == ARM_AM::sub) Val *= -1; 834 Offset = CurDAG->getRegister(0, MVT::i32); 835 Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32); 836 return true; 837 } 838 839 return false; 840 } 841 842 843 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N, 844 SDValue &Offset, SDValue &Opc) { 845 unsigned Opcode = Op->getOpcode(); 846 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 847 ? cast<LoadSDNode>(Op)->getAddressingMode() 848 : cast<StoreSDNode>(Op)->getAddressingMode(); 849 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 850 ? ARM_AM::add : ARM_AM::sub; 851 int Val; 852 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits. 853 Offset = CurDAG->getRegister(0, MVT::i32); 854 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val, 855 ARM_AM::no_shift), 856 SDLoc(Op), MVT::i32); 857 return true; 858 } 859 860 return false; 861 } 862 863 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) { 864 Base = N; 865 return true; 866 } 867 868 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N, 869 SDValue &Base, SDValue &Offset, 870 SDValue &Opc) { 871 if (N.getOpcode() == ISD::SUB) { 872 // X - C is canonicalize to X + -C, no need to handle it here. 873 Base = N.getOperand(0); 874 Offset = N.getOperand(1); 875 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N), 876 MVT::i32); 877 return true; 878 } 879 880 if (!CurDAG->isBaseWithConstantOffset(N)) { 881 Base = N; 882 if (N.getOpcode() == ISD::FrameIndex) { 883 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 884 Base = CurDAG->getTargetFrameIndex( 885 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 886 } 887 Offset = CurDAG->getRegister(0, MVT::i32); 888 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N), 889 MVT::i32); 890 return true; 891 } 892 893 // If the RHS is +/- imm8, fold into addr mode. 894 int RHSC; 895 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1, 896 -256 + 1, 256, RHSC)) { // 8 bits. 897 Base = N.getOperand(0); 898 if (Base.getOpcode() == ISD::FrameIndex) { 899 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 900 Base = CurDAG->getTargetFrameIndex( 901 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 902 } 903 Offset = CurDAG->getRegister(0, MVT::i32); 904 905 ARM_AM::AddrOpc AddSub = ARM_AM::add; 906 if (RHSC < 0) { 907 AddSub = ARM_AM::sub; 908 RHSC = -RHSC; 909 } 910 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N), 911 MVT::i32); 912 return true; 913 } 914 915 Base = N.getOperand(0); 916 Offset = N.getOperand(1); 917 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N), 918 MVT::i32); 919 return true; 920 } 921 922 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N, 923 SDValue &Offset, SDValue &Opc) { 924 unsigned Opcode = Op->getOpcode(); 925 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 926 ? cast<LoadSDNode>(Op)->getAddressingMode() 927 : cast<StoreSDNode>(Op)->getAddressingMode(); 928 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 929 ? ARM_AM::add : ARM_AM::sub; 930 int Val; 931 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits. 932 Offset = CurDAG->getRegister(0, MVT::i32); 933 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op), 934 MVT::i32); 935 return true; 936 } 937 938 Offset = N; 939 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op), 940 MVT::i32); 941 return true; 942 } 943 944 bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, 945 bool FP16) { 946 if (!CurDAG->isBaseWithConstantOffset(N)) { 947 Base = N; 948 if (N.getOpcode() == ISD::FrameIndex) { 949 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 950 Base = CurDAG->getTargetFrameIndex( 951 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 952 } else if (N.getOpcode() == ARMISD::Wrapper && 953 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 954 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 955 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 956 Base = N.getOperand(0); 957 } 958 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0), 959 SDLoc(N), MVT::i32); 960 return true; 961 } 962 963 // If the RHS is +/- imm8, fold into addr mode. 964 int RHSC; 965 const int Scale = FP16 ? 2 : 4; 966 967 if (isScaledConstantInRange(N.getOperand(1), Scale, -255, 256, RHSC)) { 968 Base = N.getOperand(0); 969 if (Base.getOpcode() == ISD::FrameIndex) { 970 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 971 Base = CurDAG->getTargetFrameIndex( 972 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 973 } 974 975 ARM_AM::AddrOpc AddSub = ARM_AM::add; 976 if (RHSC < 0) { 977 AddSub = ARM_AM::sub; 978 RHSC = -RHSC; 979 } 980 981 if (FP16) 982 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(AddSub, RHSC), 983 SDLoc(N), MVT::i32); 984 else 985 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC), 986 SDLoc(N), MVT::i32); 987 988 return true; 989 } 990 991 Base = N; 992 993 if (FP16) 994 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(ARM_AM::add, 0), 995 SDLoc(N), MVT::i32); 996 else 997 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0), 998 SDLoc(N), MVT::i32); 999 1000 return true; 1001 } 1002 1003 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N, 1004 SDValue &Base, SDValue &Offset) { 1005 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ false); 1006 } 1007 1008 bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N, 1009 SDValue &Base, SDValue &Offset) { 1010 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ true); 1011 } 1012 1013 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr, 1014 SDValue &Align) { 1015 Addr = N; 1016 1017 unsigned Alignment = 0; 1018 1019 MemSDNode *MemN = cast<MemSDNode>(Parent); 1020 1021 if (isa<LSBaseSDNode>(MemN) || 1022 ((MemN->getOpcode() == ARMISD::VST1_UPD || 1023 MemN->getOpcode() == ARMISD::VLD1_UPD) && 1024 MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) { 1025 // This case occurs only for VLD1-lane/dup and VST1-lane instructions. 1026 // The maximum alignment is equal to the memory size being referenced. 1027 unsigned MMOAlign = MemN->getAlignment(); 1028 unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8; 1029 if (MMOAlign >= MemSize && MemSize > 1) 1030 Alignment = MemSize; 1031 } else { 1032 // All other uses of addrmode6 are for intrinsics. For now just record 1033 // the raw alignment value; it will be refined later based on the legal 1034 // alignment operands for the intrinsic. 1035 Alignment = MemN->getAlignment(); 1036 } 1037 1038 Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32); 1039 return true; 1040 } 1041 1042 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N, 1043 SDValue &Offset) { 1044 LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op); 1045 ISD::MemIndexedMode AM = LdSt->getAddressingMode(); 1046 if (AM != ISD::POST_INC) 1047 return false; 1048 Offset = N; 1049 if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) { 1050 if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits()) 1051 Offset = CurDAG->getRegister(0, MVT::i32); 1052 } 1053 return true; 1054 } 1055 1056 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N, 1057 SDValue &Offset, SDValue &Label) { 1058 if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) { 1059 Offset = N.getOperand(0); 1060 SDValue N1 = N.getOperand(1); 1061 Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(), 1062 SDLoc(N), MVT::i32); 1063 return true; 1064 } 1065 1066 return false; 1067 } 1068 1069 1070 //===----------------------------------------------------------------------===// 1071 // Thumb Addressing Modes 1072 //===----------------------------------------------------------------------===// 1073 1074 static bool shouldUseZeroOffsetLdSt(SDValue N) { 1075 // Negative numbers are difficult to materialise in thumb1. If we are 1076 // selecting the add of a negative, instead try to select ri with a zero 1077 // offset, so create the add node directly which will become a sub. 1078 if (N.getOpcode() != ISD::ADD) 1079 return false; 1080 1081 // Look for an imm which is not legal for ld/st, but is legal for sub. 1082 if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1))) 1083 return C->getSExtValue() < 0 && C->getSExtValue() >= -255; 1084 1085 return false; 1086 } 1087 1088 bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, 1089 SDValue &Offset) { 1090 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) { 1091 ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N); 1092 if (!NC || !NC->isNullValue()) 1093 return false; 1094 1095 Base = Offset = N; 1096 return true; 1097 } 1098 1099 Base = N.getOperand(0); 1100 Offset = N.getOperand(1); 1101 return true; 1102 } 1103 1104 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, SDValue &Base, 1105 SDValue &Offset) { 1106 if (shouldUseZeroOffsetLdSt(N)) 1107 return false; // Select ri instead 1108 return SelectThumbAddrModeRRSext(N, Base, Offset); 1109 } 1110 1111 bool 1112 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, 1113 SDValue &Base, SDValue &OffImm) { 1114 if (shouldUseZeroOffsetLdSt(N)) { 1115 Base = N; 1116 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1117 return true; 1118 } 1119 1120 if (!CurDAG->isBaseWithConstantOffset(N)) { 1121 if (N.getOpcode() == ISD::ADD) { 1122 return false; // We want to select register offset instead 1123 } else if (N.getOpcode() == ARMISD::Wrapper && 1124 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 1125 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 1126 N.getOperand(0).getOpcode() != ISD::TargetConstantPool && 1127 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 1128 Base = N.getOperand(0); 1129 } else { 1130 Base = N; 1131 } 1132 1133 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1134 return true; 1135 } 1136 1137 // If the RHS is + imm5 * scale, fold into addr mode. 1138 int RHSC; 1139 if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) { 1140 Base = N.getOperand(0); 1141 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1142 return true; 1143 } 1144 1145 // Offset is too large, so use register offset instead. 1146 return false; 1147 } 1148 1149 bool 1150 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base, 1151 SDValue &OffImm) { 1152 return SelectThumbAddrModeImm5S(N, 4, Base, OffImm); 1153 } 1154 1155 bool 1156 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base, 1157 SDValue &OffImm) { 1158 return SelectThumbAddrModeImm5S(N, 2, Base, OffImm); 1159 } 1160 1161 bool 1162 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base, 1163 SDValue &OffImm) { 1164 return SelectThumbAddrModeImm5S(N, 1, Base, OffImm); 1165 } 1166 1167 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N, 1168 SDValue &Base, SDValue &OffImm) { 1169 if (N.getOpcode() == ISD::FrameIndex) { 1170 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 1171 // Only multiples of 4 are allowed for the offset, so the frame object 1172 // alignment must be at least 4. 1173 MachineFrameInfo &MFI = MF->getFrameInfo(); 1174 if (MFI.getObjectAlignment(FI) < 4) 1175 MFI.setObjectAlignment(FI, 4); 1176 Base = CurDAG->getTargetFrameIndex( 1177 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1178 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1179 return true; 1180 } 1181 1182 if (!CurDAG->isBaseWithConstantOffset(N)) 1183 return false; 1184 1185 if (N.getOperand(0).getOpcode() == ISD::FrameIndex) { 1186 // If the RHS is + imm8 * scale, fold into addr mode. 1187 int RHSC; 1188 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) { 1189 Base = N.getOperand(0); 1190 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1191 // Make sure the offset is inside the object, or we might fail to 1192 // allocate an emergency spill slot. (An out-of-range access is UB, but 1193 // it could show up anyway.) 1194 MachineFrameInfo &MFI = MF->getFrameInfo(); 1195 if (RHSC * 4 < MFI.getObjectSize(FI)) { 1196 // For LHS+RHS to result in an offset that's a multiple of 4 the object 1197 // indexed by the LHS must be 4-byte aligned. 1198 if (!MFI.isFixedObjectIndex(FI) && MFI.getObjectAlignment(FI) < 4) 1199 MFI.setObjectAlignment(FI, 4); 1200 if (MFI.getObjectAlignment(FI) >= 4) { 1201 Base = CurDAG->getTargetFrameIndex( 1202 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1203 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1204 return true; 1205 } 1206 } 1207 } 1208 } 1209 1210 return false; 1211 } 1212 1213 template <unsigned Shift> 1214 bool ARMDAGToDAGISel::SelectTAddrModeImm7(SDValue N, SDValue &Base, 1215 SDValue &OffImm) { 1216 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) { 1217 int RHSC; 1218 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80, 1219 RHSC)) { 1220 Base = N.getOperand(0); 1221 if (N.getOpcode() == ISD::SUB) 1222 RHSC = -RHSC; 1223 OffImm = 1224 CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32); 1225 return true; 1226 } 1227 } 1228 1229 // Base only. 1230 Base = N; 1231 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1232 return true; 1233 } 1234 1235 1236 //===----------------------------------------------------------------------===// 1237 // Thumb 2 Addressing Modes 1238 //===----------------------------------------------------------------------===// 1239 1240 1241 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N, 1242 SDValue &Base, SDValue &OffImm) { 1243 // Match simple R + imm12 operands. 1244 1245 // Base only. 1246 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 1247 !CurDAG->isBaseWithConstantOffset(N)) { 1248 if (N.getOpcode() == ISD::FrameIndex) { 1249 // Match frame index. 1250 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 1251 Base = CurDAG->getTargetFrameIndex( 1252 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1253 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1254 return true; 1255 } 1256 1257 if (N.getOpcode() == ARMISD::Wrapper && 1258 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 1259 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 1260 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 1261 Base = N.getOperand(0); 1262 if (Base.getOpcode() == ISD::TargetConstantPool) 1263 return false; // We want to select t2LDRpci instead. 1264 } else 1265 Base = N; 1266 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1267 return true; 1268 } 1269 1270 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1271 if (SelectT2AddrModeImm8(N, Base, OffImm)) 1272 // Let t2LDRi8 handle (R - imm8). 1273 return false; 1274 1275 int RHSC = (int)RHS->getZExtValue(); 1276 if (N.getOpcode() == ISD::SUB) 1277 RHSC = -RHSC; 1278 1279 if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned) 1280 Base = N.getOperand(0); 1281 if (Base.getOpcode() == ISD::FrameIndex) { 1282 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1283 Base = CurDAG->getTargetFrameIndex( 1284 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1285 } 1286 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1287 return true; 1288 } 1289 } 1290 1291 // Base only. 1292 Base = N; 1293 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1294 return true; 1295 } 1296 1297 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, 1298 SDValue &Base, SDValue &OffImm) { 1299 // Match simple R - imm8 operands. 1300 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 1301 !CurDAG->isBaseWithConstantOffset(N)) 1302 return false; 1303 1304 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1305 int RHSC = (int)RHS->getSExtValue(); 1306 if (N.getOpcode() == ISD::SUB) 1307 RHSC = -RHSC; 1308 1309 if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative) 1310 Base = N.getOperand(0); 1311 if (Base.getOpcode() == ISD::FrameIndex) { 1312 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1313 Base = CurDAG->getTargetFrameIndex( 1314 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1315 } 1316 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1317 return true; 1318 } 1319 } 1320 1321 return false; 1322 } 1323 1324 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N, 1325 SDValue &OffImm){ 1326 unsigned Opcode = Op->getOpcode(); 1327 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 1328 ? cast<LoadSDNode>(Op)->getAddressingMode() 1329 : cast<StoreSDNode>(Op)->getAddressingMode(); 1330 int RHSC; 1331 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits. 1332 OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC)) 1333 ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32) 1334 : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32); 1335 return true; 1336 } 1337 1338 return false; 1339 } 1340 1341 template <unsigned Shift> 1342 bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N, SDValue &Base, 1343 SDValue &OffImm) { 1344 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) { 1345 int RHSC; 1346 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80, 1347 RHSC)) { 1348 Base = N.getOperand(0); 1349 if (Base.getOpcode() == ISD::FrameIndex) { 1350 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1351 Base = CurDAG->getTargetFrameIndex( 1352 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1353 } 1354 1355 if (N.getOpcode() == ISD::SUB) 1356 RHSC = -RHSC; 1357 OffImm = 1358 CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32); 1359 return true; 1360 } 1361 } 1362 1363 // Base only. 1364 Base = N; 1365 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1366 return true; 1367 } 1368 1369 template <unsigned Shift> 1370 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, 1371 SDValue &OffImm) { 1372 return SelectT2AddrModeImm7Offset(Op, N, OffImm, Shift); 1373 } 1374 1375 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, 1376 SDValue &OffImm, 1377 unsigned Shift) { 1378 unsigned Opcode = Op->getOpcode(); 1379 ISD::MemIndexedMode AM; 1380 switch (Opcode) { 1381 case ISD::LOAD: 1382 AM = cast<LoadSDNode>(Op)->getAddressingMode(); 1383 break; 1384 case ISD::STORE: 1385 AM = cast<StoreSDNode>(Op)->getAddressingMode(); 1386 break; 1387 case ISD::MLOAD: 1388 AM = cast<MaskedLoadSDNode>(Op)->getAddressingMode(); 1389 break; 1390 case ISD::MSTORE: 1391 AM = cast<MaskedStoreSDNode>(Op)->getAddressingMode(); 1392 break; 1393 default: 1394 llvm_unreachable("Unexpected Opcode for Imm7Offset"); 1395 } 1396 1397 int RHSC; 1398 // 7 bit constant, shifted by Shift. 1399 if (isScaledConstantInRange(N, 1 << Shift, 0, 0x80, RHSC)) { 1400 OffImm = 1401 ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC)) 1402 ? CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32) 1403 : CurDAG->getTargetConstant(-RHSC * (1 << Shift), SDLoc(N), 1404 MVT::i32); 1405 return true; 1406 } 1407 return false; 1408 } 1409 1410 template <int Min, int Max> 1411 bool ARMDAGToDAGISel::SelectImmediateInRange(SDValue N, SDValue &OffImm) { 1412 int Val; 1413 if (isScaledConstantInRange(N, 1, Min, Max, Val)) { 1414 OffImm = CurDAG->getTargetConstant(Val, SDLoc(N), MVT::i32); 1415 return true; 1416 } 1417 return false; 1418 } 1419 1420 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N, 1421 SDValue &Base, 1422 SDValue &OffReg, SDValue &ShImm) { 1423 // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12. 1424 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) 1425 return false; 1426 1427 // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8. 1428 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1429 int RHSC = (int)RHS->getZExtValue(); 1430 if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned) 1431 return false; 1432 else if (RHSC < 0 && RHSC >= -255) // 8 bits 1433 return false; 1434 } 1435 1436 // Look for (R + R) or (R + (R << [1,2,3])). 1437 unsigned ShAmt = 0; 1438 Base = N.getOperand(0); 1439 OffReg = N.getOperand(1); 1440 1441 // Swap if it is ((R << c) + R). 1442 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode()); 1443 if (ShOpcVal != ARM_AM::lsl) { 1444 ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode()); 1445 if (ShOpcVal == ARM_AM::lsl) 1446 std::swap(Base, OffReg); 1447 } 1448 1449 if (ShOpcVal == ARM_AM::lsl) { 1450 // Check to see if the RHS of the shift is a constant, if not, we can't fold 1451 // it. 1452 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) { 1453 ShAmt = Sh->getZExtValue(); 1454 if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt)) 1455 OffReg = OffReg.getOperand(0); 1456 else { 1457 ShAmt = 0; 1458 } 1459 } 1460 } 1461 1462 // If OffReg is a multiply-by-constant and it's profitable to extract a shift 1463 // and use it in a shifted operand do so. 1464 if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) { 1465 unsigned PowerOfTwo = 0; 1466 SDValue NewMulConst; 1467 if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) { 1468 HandleSDNode Handle(OffReg); 1469 replaceDAGValue(OffReg.getOperand(1), NewMulConst); 1470 OffReg = Handle.getValue(); 1471 ShAmt = PowerOfTwo; 1472 } 1473 } 1474 1475 ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32); 1476 1477 return true; 1478 } 1479 1480 bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base, 1481 SDValue &OffImm) { 1482 // This *must* succeed since it's used for the irreplaceable ldrex and strex 1483 // instructions. 1484 Base = N; 1485 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1486 1487 if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N)) 1488 return true; 1489 1490 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 1491 if (!RHS) 1492 return true; 1493 1494 uint32_t RHSC = (int)RHS->getZExtValue(); 1495 if (RHSC > 1020 || RHSC % 4 != 0) 1496 return true; 1497 1498 Base = N.getOperand(0); 1499 if (Base.getOpcode() == ISD::FrameIndex) { 1500 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1501 Base = CurDAG->getTargetFrameIndex( 1502 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1503 } 1504 1505 OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32); 1506 return true; 1507 } 1508 1509 //===--------------------------------------------------------------------===// 1510 1511 /// getAL - Returns a ARMCC::AL immediate node. 1512 static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) { 1513 return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32); 1514 } 1515 1516 void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) { 1517 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); 1518 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp}); 1519 } 1520 1521 bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) { 1522 LoadSDNode *LD = cast<LoadSDNode>(N); 1523 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1524 if (AM == ISD::UNINDEXED) 1525 return false; 1526 1527 EVT LoadedVT = LD->getMemoryVT(); 1528 SDValue Offset, AMOpc; 1529 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1530 unsigned Opcode = 0; 1531 bool Match = false; 1532 if (LoadedVT == MVT::i32 && isPre && 1533 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) { 1534 Opcode = ARM::LDR_PRE_IMM; 1535 Match = true; 1536 } else if (LoadedVT == MVT::i32 && !isPre && 1537 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) { 1538 Opcode = ARM::LDR_POST_IMM; 1539 Match = true; 1540 } else if (LoadedVT == MVT::i32 && 1541 SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) { 1542 Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG; 1543 Match = true; 1544 1545 } else if (LoadedVT == MVT::i16 && 1546 SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) { 1547 Match = true; 1548 Opcode = (LD->getExtensionType() == ISD::SEXTLOAD) 1549 ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST) 1550 : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST); 1551 } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) { 1552 if (LD->getExtensionType() == ISD::SEXTLOAD) { 1553 if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) { 1554 Match = true; 1555 Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST; 1556 } 1557 } else { 1558 if (isPre && 1559 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) { 1560 Match = true; 1561 Opcode = ARM::LDRB_PRE_IMM; 1562 } else if (!isPre && 1563 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) { 1564 Match = true; 1565 Opcode = ARM::LDRB_POST_IMM; 1566 } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) { 1567 Match = true; 1568 Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG; 1569 } 1570 } 1571 } 1572 1573 if (Match) { 1574 if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) { 1575 SDValue Chain = LD->getChain(); 1576 SDValue Base = LD->getBasePtr(); 1577 SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)), 1578 CurDAG->getRegister(0, MVT::i32), Chain }; 1579 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, 1580 MVT::Other, Ops); 1581 transferMemOperands(N, New); 1582 ReplaceNode(N, New); 1583 return true; 1584 } else { 1585 SDValue Chain = LD->getChain(); 1586 SDValue Base = LD->getBasePtr(); 1587 SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)), 1588 CurDAG->getRegister(0, MVT::i32), Chain }; 1589 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, 1590 MVT::Other, Ops); 1591 transferMemOperands(N, New); 1592 ReplaceNode(N, New); 1593 return true; 1594 } 1595 } 1596 1597 return false; 1598 } 1599 1600 bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) { 1601 LoadSDNode *LD = cast<LoadSDNode>(N); 1602 EVT LoadedVT = LD->getMemoryVT(); 1603 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1604 if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD || 1605 LoadedVT.getSimpleVT().SimpleTy != MVT::i32) 1606 return false; 1607 1608 auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset()); 1609 if (!COffs || COffs->getZExtValue() != 4) 1610 return false; 1611 1612 // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}. 1613 // The encoding of LDM is not how the rest of ISel expects a post-inc load to 1614 // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after 1615 // ISel. 1616 SDValue Chain = LD->getChain(); 1617 SDValue Base = LD->getBasePtr(); 1618 SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)), 1619 CurDAG->getRegister(0, MVT::i32), Chain }; 1620 SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32, 1621 MVT::i32, MVT::Other, Ops); 1622 transferMemOperands(N, New); 1623 ReplaceNode(N, New); 1624 return true; 1625 } 1626 1627 bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) { 1628 LoadSDNode *LD = cast<LoadSDNode>(N); 1629 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1630 if (AM == ISD::UNINDEXED) 1631 return false; 1632 1633 EVT LoadedVT = LD->getMemoryVT(); 1634 bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; 1635 SDValue Offset; 1636 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1637 unsigned Opcode = 0; 1638 bool Match = false; 1639 if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) { 1640 switch (LoadedVT.getSimpleVT().SimpleTy) { 1641 case MVT::i32: 1642 Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST; 1643 break; 1644 case MVT::i16: 1645 if (isSExtLd) 1646 Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST; 1647 else 1648 Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST; 1649 break; 1650 case MVT::i8: 1651 case MVT::i1: 1652 if (isSExtLd) 1653 Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST; 1654 else 1655 Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST; 1656 break; 1657 default: 1658 return false; 1659 } 1660 Match = true; 1661 } 1662 1663 if (Match) { 1664 SDValue Chain = LD->getChain(); 1665 SDValue Base = LD->getBasePtr(); 1666 SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)), 1667 CurDAG->getRegister(0, MVT::i32), Chain }; 1668 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, 1669 MVT::Other, Ops); 1670 transferMemOperands(N, New); 1671 ReplaceNode(N, New); 1672 return true; 1673 } 1674 1675 return false; 1676 } 1677 1678 bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) { 1679 EVT LoadedVT; 1680 unsigned Opcode = 0; 1681 bool isSExtLd, isPre; 1682 unsigned Align; 1683 ARMVCC::VPTCodes Pred; 1684 SDValue PredReg; 1685 SDValue Chain, Base, Offset; 1686 1687 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 1688 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1689 if (AM == ISD::UNINDEXED) 1690 return false; 1691 LoadedVT = LD->getMemoryVT(); 1692 if (!LoadedVT.isVector()) 1693 return false; 1694 1695 Chain = LD->getChain(); 1696 Base = LD->getBasePtr(); 1697 Offset = LD->getOffset(); 1698 Align = LD->getAlignment(); 1699 isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; 1700 isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1701 Pred = ARMVCC::None; 1702 PredReg = CurDAG->getRegister(0, MVT::i32); 1703 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) { 1704 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1705 if (AM == ISD::UNINDEXED) 1706 return false; 1707 LoadedVT = LD->getMemoryVT(); 1708 if (!LoadedVT.isVector()) 1709 return false; 1710 1711 Chain = LD->getChain(); 1712 Base = LD->getBasePtr(); 1713 Offset = LD->getOffset(); 1714 Align = LD->getAlignment(); 1715 isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; 1716 isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1717 Pred = ARMVCC::Then; 1718 PredReg = LD->getMask(); 1719 } else 1720 llvm_unreachable("Expected a Load or a Masked Load!"); 1721 1722 // We allow LE non-masked loads to change the type (for example use a vldrb.8 1723 // as opposed to a vldrw.32). This can allow extra addressing modes or 1724 // alignments for what is otherwise an equivalent instruction. 1725 bool CanChangeType = Subtarget->isLittle() && !isa<MaskedLoadSDNode>(N); 1726 1727 SDValue NewOffset; 1728 if (Align >= 2 && LoadedVT == MVT::v4i16 && 1729 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1)) { 1730 if (isSExtLd) 1731 Opcode = isPre ? ARM::MVE_VLDRHS32_pre : ARM::MVE_VLDRHS32_post; 1732 else 1733 Opcode = isPre ? ARM::MVE_VLDRHU32_pre : ARM::MVE_VLDRHU32_post; 1734 } else if (LoadedVT == MVT::v8i8 && 1735 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) { 1736 if (isSExtLd) 1737 Opcode = isPre ? ARM::MVE_VLDRBS16_pre : ARM::MVE_VLDRBS16_post; 1738 else 1739 Opcode = isPre ? ARM::MVE_VLDRBU16_pre : ARM::MVE_VLDRBU16_post; 1740 } else if (LoadedVT == MVT::v4i8 && 1741 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) { 1742 if (isSExtLd) 1743 Opcode = isPre ? ARM::MVE_VLDRBS32_pre : ARM::MVE_VLDRBS32_post; 1744 else 1745 Opcode = isPre ? ARM::MVE_VLDRBU32_pre : ARM::MVE_VLDRBU32_post; 1746 } else if (Align >= 4 && 1747 (CanChangeType || LoadedVT == MVT::v4i32 || 1748 LoadedVT == MVT::v4f32) && 1749 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 2)) 1750 Opcode = isPre ? ARM::MVE_VLDRWU32_pre : ARM::MVE_VLDRWU32_post; 1751 else if (Align >= 2 && 1752 (CanChangeType || LoadedVT == MVT::v8i16 || 1753 LoadedVT == MVT::v8f16) && 1754 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1)) 1755 Opcode = isPre ? ARM::MVE_VLDRHU16_pre : ARM::MVE_VLDRHU16_post; 1756 else if ((CanChangeType || LoadedVT == MVT::v16i8) && 1757 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) 1758 Opcode = isPre ? ARM::MVE_VLDRBU8_pre : ARM::MVE_VLDRBU8_post; 1759 else 1760 return false; 1761 1762 SDValue Ops[] = {Base, NewOffset, 1763 CurDAG->getTargetConstant(Pred, SDLoc(N), MVT::i32), PredReg, 1764 Chain}; 1765 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), N->getValueType(0), 1766 MVT::i32, MVT::Other, Ops); 1767 transferMemOperands(N, New); 1768 ReplaceUses(SDValue(N, 0), SDValue(New, 1)); 1769 ReplaceUses(SDValue(N, 1), SDValue(New, 0)); 1770 ReplaceUses(SDValue(N, 2), SDValue(New, 2)); 1771 CurDAG->RemoveDeadNode(N); 1772 return true; 1773 } 1774 1775 /// Form a GPRPair pseudo register from a pair of GPR regs. 1776 SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) { 1777 SDLoc dl(V0.getNode()); 1778 SDValue RegClass = 1779 CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32); 1780 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32); 1781 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32); 1782 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1783 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1784 } 1785 1786 /// Form a D register from a pair of S registers. 1787 SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1788 SDLoc dl(V0.getNode()); 1789 SDValue RegClass = 1790 CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32); 1791 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32); 1792 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32); 1793 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1794 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1795 } 1796 1797 /// Form a quad register from a pair of D registers. 1798 SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1799 SDLoc dl(V0.getNode()); 1800 SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl, 1801 MVT::i32); 1802 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32); 1803 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32); 1804 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1805 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1806 } 1807 1808 /// Form 4 consecutive D registers from a pair of Q registers. 1809 SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1810 SDLoc dl(V0.getNode()); 1811 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl, 1812 MVT::i32); 1813 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32); 1814 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32); 1815 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1816 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1817 } 1818 1819 /// Form 4 consecutive S registers. 1820 SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, 1821 SDValue V2, SDValue V3) { 1822 SDLoc dl(V0.getNode()); 1823 SDValue RegClass = 1824 CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32); 1825 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32); 1826 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32); 1827 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32); 1828 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32); 1829 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1830 V2, SubReg2, V3, SubReg3 }; 1831 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1832 } 1833 1834 /// Form 4 consecutive D registers. 1835 SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, 1836 SDValue V2, SDValue V3) { 1837 SDLoc dl(V0.getNode()); 1838 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl, 1839 MVT::i32); 1840 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32); 1841 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32); 1842 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32); 1843 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32); 1844 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1845 V2, SubReg2, V3, SubReg3 }; 1846 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1847 } 1848 1849 /// Form 4 consecutive Q registers. 1850 SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, 1851 SDValue V2, SDValue V3) { 1852 SDLoc dl(V0.getNode()); 1853 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl, 1854 MVT::i32); 1855 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32); 1856 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32); 1857 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32); 1858 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32); 1859 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1860 V2, SubReg2, V3, SubReg3 }; 1861 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1862 } 1863 1864 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand 1865 /// of a NEON VLD or VST instruction. The supported values depend on the 1866 /// number of registers being loaded. 1867 SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl, 1868 unsigned NumVecs, bool is64BitVector) { 1869 unsigned NumRegs = NumVecs; 1870 if (!is64BitVector && NumVecs < 3) 1871 NumRegs *= 2; 1872 1873 unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue(); 1874 if (Alignment >= 32 && NumRegs == 4) 1875 Alignment = 32; 1876 else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4)) 1877 Alignment = 16; 1878 else if (Alignment >= 8) 1879 Alignment = 8; 1880 else 1881 Alignment = 0; 1882 1883 return CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 1884 } 1885 1886 static bool isVLDfixed(unsigned Opc) 1887 { 1888 switch (Opc) { 1889 default: return false; 1890 case ARM::VLD1d8wb_fixed : return true; 1891 case ARM::VLD1d16wb_fixed : return true; 1892 case ARM::VLD1d64Qwb_fixed : return true; 1893 case ARM::VLD1d32wb_fixed : return true; 1894 case ARM::VLD1d64wb_fixed : return true; 1895 case ARM::VLD1d64TPseudoWB_fixed : return true; 1896 case ARM::VLD1d64QPseudoWB_fixed : return true; 1897 case ARM::VLD1q8wb_fixed : return true; 1898 case ARM::VLD1q16wb_fixed : return true; 1899 case ARM::VLD1q32wb_fixed : return true; 1900 case ARM::VLD1q64wb_fixed : return true; 1901 case ARM::VLD1DUPd8wb_fixed : return true; 1902 case ARM::VLD1DUPd16wb_fixed : return true; 1903 case ARM::VLD1DUPd32wb_fixed : return true; 1904 case ARM::VLD1DUPq8wb_fixed : return true; 1905 case ARM::VLD1DUPq16wb_fixed : return true; 1906 case ARM::VLD1DUPq32wb_fixed : return true; 1907 case ARM::VLD2d8wb_fixed : return true; 1908 case ARM::VLD2d16wb_fixed : return true; 1909 case ARM::VLD2d32wb_fixed : return true; 1910 case ARM::VLD2q8PseudoWB_fixed : return true; 1911 case ARM::VLD2q16PseudoWB_fixed : return true; 1912 case ARM::VLD2q32PseudoWB_fixed : return true; 1913 case ARM::VLD2DUPd8wb_fixed : return true; 1914 case ARM::VLD2DUPd16wb_fixed : return true; 1915 case ARM::VLD2DUPd32wb_fixed : return true; 1916 } 1917 } 1918 1919 static bool isVSTfixed(unsigned Opc) 1920 { 1921 switch (Opc) { 1922 default: return false; 1923 case ARM::VST1d8wb_fixed : return true; 1924 case ARM::VST1d16wb_fixed : return true; 1925 case ARM::VST1d32wb_fixed : return true; 1926 case ARM::VST1d64wb_fixed : return true; 1927 case ARM::VST1q8wb_fixed : return true; 1928 case ARM::VST1q16wb_fixed : return true; 1929 case ARM::VST1q32wb_fixed : return true; 1930 case ARM::VST1q64wb_fixed : return true; 1931 case ARM::VST1d64TPseudoWB_fixed : return true; 1932 case ARM::VST1d64QPseudoWB_fixed : return true; 1933 case ARM::VST2d8wb_fixed : return true; 1934 case ARM::VST2d16wb_fixed : return true; 1935 case ARM::VST2d32wb_fixed : return true; 1936 case ARM::VST2q8PseudoWB_fixed : return true; 1937 case ARM::VST2q16PseudoWB_fixed : return true; 1938 case ARM::VST2q32PseudoWB_fixed : return true; 1939 } 1940 } 1941 1942 // Get the register stride update opcode of a VLD/VST instruction that 1943 // is otherwise equivalent to the given fixed stride updating instruction. 1944 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) { 1945 assert((isVLDfixed(Opc) || isVSTfixed(Opc)) 1946 && "Incorrect fixed stride updating instruction."); 1947 switch (Opc) { 1948 default: break; 1949 case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register; 1950 case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register; 1951 case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register; 1952 case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register; 1953 case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register; 1954 case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register; 1955 case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register; 1956 case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register; 1957 case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register; 1958 case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register; 1959 case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register; 1960 case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register; 1961 case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register; 1962 case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register; 1963 case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register; 1964 case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register; 1965 case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register; 1966 case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register; 1967 1968 case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register; 1969 case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register; 1970 case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register; 1971 case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register; 1972 case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register; 1973 case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register; 1974 case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register; 1975 case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register; 1976 case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register; 1977 case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register; 1978 1979 case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register; 1980 case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register; 1981 case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register; 1982 case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register; 1983 case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register; 1984 case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register; 1985 1986 case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register; 1987 case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register; 1988 case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register; 1989 case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register; 1990 case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register; 1991 case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register; 1992 1993 case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register; 1994 case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register; 1995 case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register; 1996 } 1997 return Opc; // If not one we handle, return it unchanged. 1998 } 1999 2000 /// Returns true if the given increment is a Constant known to be equal to the 2001 /// access size performed by a NEON load/store. This means the "[rN]!" form can 2002 /// be used. 2003 static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) { 2004 auto C = dyn_cast<ConstantSDNode>(Inc); 2005 return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs; 2006 } 2007 2008 void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, 2009 const uint16_t *DOpcodes, 2010 const uint16_t *QOpcodes0, 2011 const uint16_t *QOpcodes1) { 2012 assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range"); 2013 SDLoc dl(N); 2014 2015 SDValue MemAddr, Align; 2016 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating 2017 // nodes are not intrinsics. 2018 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2019 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2020 return; 2021 2022 SDValue Chain = N->getOperand(0); 2023 EVT VT = N->getValueType(0); 2024 bool is64BitVector = VT.is64BitVector(); 2025 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector); 2026 2027 unsigned OpcodeIndex; 2028 switch (VT.getSimpleVT().SimpleTy) { 2029 default: llvm_unreachable("unhandled vld type"); 2030 // Double-register operations: 2031 case MVT::v8i8: OpcodeIndex = 0; break; 2032 case MVT::v4f16: 2033 case MVT::v4i16: OpcodeIndex = 1; break; 2034 case MVT::v2f32: 2035 case MVT::v2i32: OpcodeIndex = 2; break; 2036 case MVT::v1i64: OpcodeIndex = 3; break; 2037 // Quad-register operations: 2038 case MVT::v16i8: OpcodeIndex = 0; break; 2039 case MVT::v8f16: 2040 case MVT::v8i16: OpcodeIndex = 1; break; 2041 case MVT::v4f32: 2042 case MVT::v4i32: OpcodeIndex = 2; break; 2043 case MVT::v2f64: 2044 case MVT::v2i64: OpcodeIndex = 3; break; 2045 } 2046 2047 EVT ResTy; 2048 if (NumVecs == 1) 2049 ResTy = VT; 2050 else { 2051 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 2052 if (!is64BitVector) 2053 ResTyElts *= 2; 2054 ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts); 2055 } 2056 std::vector<EVT> ResTys; 2057 ResTys.push_back(ResTy); 2058 if (isUpdating) 2059 ResTys.push_back(MVT::i32); 2060 ResTys.push_back(MVT::Other); 2061 2062 SDValue Pred = getAL(CurDAG, dl); 2063 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2064 SDNode *VLd; 2065 SmallVector<SDValue, 7> Ops; 2066 2067 // Double registers and VLD1/VLD2 quad registers are directly supported. 2068 if (is64BitVector || NumVecs <= 2) { 2069 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 2070 QOpcodes0[OpcodeIndex]); 2071 Ops.push_back(MemAddr); 2072 Ops.push_back(Align); 2073 if (isUpdating) { 2074 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2075 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs); 2076 if (!IsImmUpdate) { 2077 // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so 2078 // check for the opcode rather than the number of vector elements. 2079 if (isVLDfixed(Opc)) 2080 Opc = getVLDSTRegisterUpdateOpcode(Opc); 2081 Ops.push_back(Inc); 2082 // VLD1/VLD2 fixed increment does not need Reg0 so only include it in 2083 // the operands if not such an opcode. 2084 } else if (!isVLDfixed(Opc)) 2085 Ops.push_back(Reg0); 2086 } 2087 Ops.push_back(Pred); 2088 Ops.push_back(Reg0); 2089 Ops.push_back(Chain); 2090 VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2091 2092 } else { 2093 // Otherwise, quad registers are loaded with two separate instructions, 2094 // where one loads the even registers and the other loads the odd registers. 2095 EVT AddrTy = MemAddr.getValueType(); 2096 2097 // Load the even subregs. This is always an updating load, so that it 2098 // provides the address to the second load for the odd subregs. 2099 SDValue ImplDef = 2100 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0); 2101 const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain }; 2102 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, 2103 ResTy, AddrTy, MVT::Other, OpsA); 2104 Chain = SDValue(VLdA, 2); 2105 2106 // Load the odd subregs. 2107 Ops.push_back(SDValue(VLdA, 1)); 2108 Ops.push_back(Align); 2109 if (isUpdating) { 2110 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2111 assert(isa<ConstantSDNode>(Inc.getNode()) && 2112 "only constant post-increment update allowed for VLD3/4"); 2113 (void)Inc; 2114 Ops.push_back(Reg0); 2115 } 2116 Ops.push_back(SDValue(VLdA, 0)); 2117 Ops.push_back(Pred); 2118 Ops.push_back(Reg0); 2119 Ops.push_back(Chain); 2120 VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops); 2121 } 2122 2123 // Transfer memoperands. 2124 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2125 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLd), {MemOp}); 2126 2127 if (NumVecs == 1) { 2128 ReplaceNode(N, VLd); 2129 return; 2130 } 2131 2132 // Extract out the subregisters. 2133 SDValue SuperReg = SDValue(VLd, 0); 2134 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 && 2135 ARM::qsub_3 == ARM::qsub_0 + 3, 2136 "Unexpected subreg numbering"); 2137 unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0); 2138 for (unsigned Vec = 0; Vec < NumVecs; ++Vec) 2139 ReplaceUses(SDValue(N, Vec), 2140 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg)); 2141 ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1)); 2142 if (isUpdating) 2143 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2)); 2144 CurDAG->RemoveDeadNode(N); 2145 } 2146 2147 void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, 2148 const uint16_t *DOpcodes, 2149 const uint16_t *QOpcodes0, 2150 const uint16_t *QOpcodes1) { 2151 assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range"); 2152 SDLoc dl(N); 2153 2154 SDValue MemAddr, Align; 2155 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating 2156 // nodes are not intrinsics. 2157 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2158 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1) 2159 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2160 return; 2161 2162 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2163 2164 SDValue Chain = N->getOperand(0); 2165 EVT VT = N->getOperand(Vec0Idx).getValueType(); 2166 bool is64BitVector = VT.is64BitVector(); 2167 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector); 2168 2169 unsigned OpcodeIndex; 2170 switch (VT.getSimpleVT().SimpleTy) { 2171 default: llvm_unreachable("unhandled vst type"); 2172 // Double-register operations: 2173 case MVT::v8i8: OpcodeIndex = 0; break; 2174 case MVT::v4f16: 2175 case MVT::v4i16: OpcodeIndex = 1; break; 2176 case MVT::v2f32: 2177 case MVT::v2i32: OpcodeIndex = 2; break; 2178 case MVT::v1i64: OpcodeIndex = 3; break; 2179 // Quad-register operations: 2180 case MVT::v16i8: OpcodeIndex = 0; break; 2181 case MVT::v8f16: 2182 case MVT::v8i16: OpcodeIndex = 1; break; 2183 case MVT::v4f32: 2184 case MVT::v4i32: OpcodeIndex = 2; break; 2185 case MVT::v2f64: 2186 case MVT::v2i64: OpcodeIndex = 3; break; 2187 } 2188 2189 std::vector<EVT> ResTys; 2190 if (isUpdating) 2191 ResTys.push_back(MVT::i32); 2192 ResTys.push_back(MVT::Other); 2193 2194 SDValue Pred = getAL(CurDAG, dl); 2195 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2196 SmallVector<SDValue, 7> Ops; 2197 2198 // Double registers and VST1/VST2 quad registers are directly supported. 2199 if (is64BitVector || NumVecs <= 2) { 2200 SDValue SrcReg; 2201 if (NumVecs == 1) { 2202 SrcReg = N->getOperand(Vec0Idx); 2203 } else if (is64BitVector) { 2204 // Form a REG_SEQUENCE to force register allocation. 2205 SDValue V0 = N->getOperand(Vec0Idx + 0); 2206 SDValue V1 = N->getOperand(Vec0Idx + 1); 2207 if (NumVecs == 2) 2208 SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0); 2209 else { 2210 SDValue V2 = N->getOperand(Vec0Idx + 2); 2211 // If it's a vst3, form a quad D-register and leave the last part as 2212 // an undef. 2213 SDValue V3 = (NumVecs == 3) 2214 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) 2215 : N->getOperand(Vec0Idx + 3); 2216 SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0); 2217 } 2218 } else { 2219 // Form a QQ register. 2220 SDValue Q0 = N->getOperand(Vec0Idx); 2221 SDValue Q1 = N->getOperand(Vec0Idx + 1); 2222 SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0); 2223 } 2224 2225 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 2226 QOpcodes0[OpcodeIndex]); 2227 Ops.push_back(MemAddr); 2228 Ops.push_back(Align); 2229 if (isUpdating) { 2230 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2231 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs); 2232 if (!IsImmUpdate) { 2233 // We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so 2234 // check for the opcode rather than the number of vector elements. 2235 if (isVSTfixed(Opc)) 2236 Opc = getVLDSTRegisterUpdateOpcode(Opc); 2237 Ops.push_back(Inc); 2238 } 2239 // VST1/VST2 fixed increment does not need Reg0 so only include it in 2240 // the operands if not such an opcode. 2241 else if (!isVSTfixed(Opc)) 2242 Ops.push_back(Reg0); 2243 } 2244 Ops.push_back(SrcReg); 2245 Ops.push_back(Pred); 2246 Ops.push_back(Reg0); 2247 Ops.push_back(Chain); 2248 SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2249 2250 // Transfer memoperands. 2251 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VSt), {MemOp}); 2252 2253 ReplaceNode(N, VSt); 2254 return; 2255 } 2256 2257 // Otherwise, quad registers are stored with two separate instructions, 2258 // where one stores the even registers and the other stores the odd registers. 2259 2260 // Form the QQQQ REG_SEQUENCE. 2261 SDValue V0 = N->getOperand(Vec0Idx + 0); 2262 SDValue V1 = N->getOperand(Vec0Idx + 1); 2263 SDValue V2 = N->getOperand(Vec0Idx + 2); 2264 SDValue V3 = (NumVecs == 3) 2265 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) 2266 : N->getOperand(Vec0Idx + 3); 2267 SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0); 2268 2269 // Store the even D registers. This is always an updating store, so that it 2270 // provides the address to the second store for the odd subregs. 2271 const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain }; 2272 SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, 2273 MemAddr.getValueType(), 2274 MVT::Other, OpsA); 2275 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStA), {MemOp}); 2276 Chain = SDValue(VStA, 1); 2277 2278 // Store the odd D registers. 2279 Ops.push_back(SDValue(VStA, 0)); 2280 Ops.push_back(Align); 2281 if (isUpdating) { 2282 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2283 assert(isa<ConstantSDNode>(Inc.getNode()) && 2284 "only constant post-increment update allowed for VST3/4"); 2285 (void)Inc; 2286 Ops.push_back(Reg0); 2287 } 2288 Ops.push_back(RegSeq); 2289 Ops.push_back(Pred); 2290 Ops.push_back(Reg0); 2291 Ops.push_back(Chain); 2292 SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, 2293 Ops); 2294 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStB), {MemOp}); 2295 ReplaceNode(N, VStB); 2296 } 2297 2298 void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating, 2299 unsigned NumVecs, 2300 const uint16_t *DOpcodes, 2301 const uint16_t *QOpcodes) { 2302 assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range"); 2303 SDLoc dl(N); 2304 2305 SDValue MemAddr, Align; 2306 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating 2307 // nodes are not intrinsics. 2308 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2309 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1) 2310 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2311 return; 2312 2313 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2314 2315 SDValue Chain = N->getOperand(0); 2316 unsigned Lane = 2317 cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue(); 2318 EVT VT = N->getOperand(Vec0Idx).getValueType(); 2319 bool is64BitVector = VT.is64BitVector(); 2320 2321 unsigned Alignment = 0; 2322 if (NumVecs != 3) { 2323 Alignment = cast<ConstantSDNode>(Align)->getZExtValue(); 2324 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8; 2325 if (Alignment > NumBytes) 2326 Alignment = NumBytes; 2327 if (Alignment < 8 && Alignment < NumBytes) 2328 Alignment = 0; 2329 // Alignment must be a power of two; make sure of that. 2330 Alignment = (Alignment & -Alignment); 2331 if (Alignment == 1) 2332 Alignment = 0; 2333 } 2334 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 2335 2336 unsigned OpcodeIndex; 2337 switch (VT.getSimpleVT().SimpleTy) { 2338 default: llvm_unreachable("unhandled vld/vst lane type"); 2339 // Double-register operations: 2340 case MVT::v8i8: OpcodeIndex = 0; break; 2341 case MVT::v4f16: 2342 case MVT::v4i16: OpcodeIndex = 1; break; 2343 case MVT::v2f32: 2344 case MVT::v2i32: OpcodeIndex = 2; break; 2345 // Quad-register operations: 2346 case MVT::v8f16: 2347 case MVT::v8i16: OpcodeIndex = 0; break; 2348 case MVT::v4f32: 2349 case MVT::v4i32: OpcodeIndex = 1; break; 2350 } 2351 2352 std::vector<EVT> ResTys; 2353 if (IsLoad) { 2354 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 2355 if (!is64BitVector) 2356 ResTyElts *= 2; 2357 ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), 2358 MVT::i64, ResTyElts)); 2359 } 2360 if (isUpdating) 2361 ResTys.push_back(MVT::i32); 2362 ResTys.push_back(MVT::Other); 2363 2364 SDValue Pred = getAL(CurDAG, dl); 2365 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2366 2367 SmallVector<SDValue, 8> Ops; 2368 Ops.push_back(MemAddr); 2369 Ops.push_back(Align); 2370 if (isUpdating) { 2371 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2372 bool IsImmUpdate = 2373 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs); 2374 Ops.push_back(IsImmUpdate ? Reg0 : Inc); 2375 } 2376 2377 SDValue SuperReg; 2378 SDValue V0 = N->getOperand(Vec0Idx + 0); 2379 SDValue V1 = N->getOperand(Vec0Idx + 1); 2380 if (NumVecs == 2) { 2381 if (is64BitVector) 2382 SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0); 2383 else 2384 SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0); 2385 } else { 2386 SDValue V2 = N->getOperand(Vec0Idx + 2); 2387 SDValue V3 = (NumVecs == 3) 2388 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) 2389 : N->getOperand(Vec0Idx + 3); 2390 if (is64BitVector) 2391 SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0); 2392 else 2393 SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0); 2394 } 2395 Ops.push_back(SuperReg); 2396 Ops.push_back(getI32Imm(Lane, dl)); 2397 Ops.push_back(Pred); 2398 Ops.push_back(Reg0); 2399 Ops.push_back(Chain); 2400 2401 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 2402 QOpcodes[OpcodeIndex]); 2403 SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2404 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdLn), {MemOp}); 2405 if (!IsLoad) { 2406 ReplaceNode(N, VLdLn); 2407 return; 2408 } 2409 2410 // Extract the subregisters. 2411 SuperReg = SDValue(VLdLn, 0); 2412 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 && 2413 ARM::qsub_3 == ARM::qsub_0 + 3, 2414 "Unexpected subreg numbering"); 2415 unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0; 2416 for (unsigned Vec = 0; Vec < NumVecs; ++Vec) 2417 ReplaceUses(SDValue(N, Vec), 2418 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg)); 2419 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1)); 2420 if (isUpdating) 2421 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2)); 2422 CurDAG->RemoveDeadNode(N); 2423 } 2424 2425 template <typename SDValueVector> 2426 void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 2427 SDValue PredicateMask) { 2428 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32)); 2429 Ops.push_back(PredicateMask); 2430 } 2431 2432 template <typename SDValueVector> 2433 void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 2434 SDValue PredicateMask, 2435 SDValue Inactive) { 2436 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32)); 2437 Ops.push_back(PredicateMask); 2438 Ops.push_back(Inactive); 2439 } 2440 2441 template <typename SDValueVector> 2442 void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc) { 2443 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32)); 2444 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 2445 } 2446 2447 template <typename SDValueVector> 2448 void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 2449 EVT InactiveTy) { 2450 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32)); 2451 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 2452 Ops.push_back(SDValue( 2453 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, InactiveTy), 0)); 2454 } 2455 2456 void ARMDAGToDAGISel::SelectMVE_WB(SDNode *N, const uint16_t *Opcodes, 2457 bool Predicated) { 2458 SDLoc Loc(N); 2459 SmallVector<SDValue, 8> Ops; 2460 2461 uint16_t Opcode; 2462 switch (N->getValueType(1).getVectorElementType().getSizeInBits()) { 2463 case 32: 2464 Opcode = Opcodes[0]; 2465 break; 2466 case 64: 2467 Opcode = Opcodes[1]; 2468 break; 2469 default: 2470 llvm_unreachable("bad vector element size in SelectMVE_WB"); 2471 } 2472 2473 Ops.push_back(N->getOperand(2)); // vector of base addresses 2474 2475 int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(); 2476 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate offset 2477 2478 if (Predicated) 2479 AddMVEPredicateToOps(Ops, Loc, N->getOperand(4)); 2480 else 2481 AddEmptyMVEPredicateToOps(Ops, Loc); 2482 2483 Ops.push_back(N->getOperand(0)); // chain 2484 2485 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops)); 2486 } 2487 2488 void ARMDAGToDAGISel::SelectMVE_LongShift(SDNode *N, uint16_t Opcode, 2489 bool Immediate, 2490 bool HasSaturationOperand) { 2491 SDLoc Loc(N); 2492 SmallVector<SDValue, 8> Ops; 2493 2494 // Two 32-bit halves of the value to be shifted 2495 Ops.push_back(N->getOperand(1)); 2496 Ops.push_back(N->getOperand(2)); 2497 2498 // The shift count 2499 if (Immediate) { 2500 int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(); 2501 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count 2502 } else { 2503 Ops.push_back(N->getOperand(3)); 2504 } 2505 2506 // The immediate saturation operand, if any 2507 if (HasSaturationOperand) { 2508 int32_t SatOp = cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(); 2509 int SatBit = (SatOp == 64 ? 0 : 1); 2510 Ops.push_back(getI32Imm(SatBit, Loc)); 2511 } 2512 2513 // MVE scalar shifts are IT-predicable, so include the standard 2514 // predicate arguments. 2515 Ops.push_back(getAL(CurDAG, Loc)); 2516 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 2517 2518 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops)); 2519 } 2520 2521 void ARMDAGToDAGISel::SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry, 2522 uint16_t OpcodeWithNoCarry, 2523 bool Add, bool Predicated) { 2524 SDLoc Loc(N); 2525 SmallVector<SDValue, 8> Ops; 2526 uint16_t Opcode; 2527 2528 unsigned FirstInputOp = Predicated ? 2 : 1; 2529 2530 // Two input vectors and the input carry flag 2531 Ops.push_back(N->getOperand(FirstInputOp)); 2532 Ops.push_back(N->getOperand(FirstInputOp + 1)); 2533 SDValue CarryIn = N->getOperand(FirstInputOp + 2); 2534 ConstantSDNode *CarryInConstant = dyn_cast<ConstantSDNode>(CarryIn); 2535 uint32_t CarryMask = 1 << 29; 2536 uint32_t CarryExpected = Add ? 0 : CarryMask; 2537 if (CarryInConstant && 2538 (CarryInConstant->getZExtValue() & CarryMask) == CarryExpected) { 2539 Opcode = OpcodeWithNoCarry; 2540 } else { 2541 Ops.push_back(CarryIn); 2542 Opcode = OpcodeWithCarry; 2543 } 2544 2545 if (Predicated) 2546 AddMVEPredicateToOps(Ops, Loc, 2547 N->getOperand(FirstInputOp + 3), // predicate 2548 N->getOperand(FirstInputOp - 1)); // inactive 2549 else 2550 AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0)); 2551 2552 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops)); 2553 } 2554 2555 static bool SDValueToConstBool(SDValue SDVal) { 2556 assert(isa<ConstantSDNode>(SDVal) && "expected a compile-time constant"); 2557 ConstantSDNode *SDValConstant = dyn_cast<ConstantSDNode>(SDVal); 2558 uint64_t Value = SDValConstant->getZExtValue(); 2559 assert((Value == 0 || Value == 1) && "expected value 0 or 1"); 2560 return Value; 2561 } 2562 2563 void ARMDAGToDAGISel::SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated, 2564 const uint16_t *OpcodesS, 2565 const uint16_t *OpcodesU, 2566 size_t Stride, size_t TySize) { 2567 assert(TySize < Stride && "Invalid TySize"); 2568 bool IsUnsigned = SDValueToConstBool(N->getOperand(1)); 2569 bool IsSub = SDValueToConstBool(N->getOperand(2)); 2570 bool IsExchange = SDValueToConstBool(N->getOperand(3)); 2571 if (IsUnsigned) { 2572 assert(!IsSub && 2573 "Unsigned versions of vmlsldav[a]/vrmlsldavh[a] do not exist"); 2574 assert(!IsExchange && 2575 "Unsigned versions of vmlaldav[a]x/vrmlaldavh[a]x do not exist"); 2576 } 2577 2578 auto OpIsZero = [N](size_t OpNo) { 2579 if (ConstantSDNode *OpConst = dyn_cast<ConstantSDNode>(N->getOperand(OpNo))) 2580 if (OpConst->getZExtValue() == 0) 2581 return true; 2582 return false; 2583 }; 2584 2585 // If the input accumulator value is not zero, select an instruction with 2586 // accumulator, otherwise select an instruction without accumulator 2587 bool IsAccum = !(OpIsZero(4) && OpIsZero(5)); 2588 2589 const uint16_t *Opcodes = IsUnsigned ? OpcodesU : OpcodesS; 2590 if (IsSub) 2591 Opcodes += 4 * Stride; 2592 if (IsExchange) 2593 Opcodes += 2 * Stride; 2594 if (IsAccum) 2595 Opcodes += Stride; 2596 uint16_t Opcode = Opcodes[TySize]; 2597 2598 SDLoc Loc(N); 2599 SmallVector<SDValue, 8> Ops; 2600 // Push the accumulator operands, if they are used 2601 if (IsAccum) { 2602 Ops.push_back(N->getOperand(4)); 2603 Ops.push_back(N->getOperand(5)); 2604 } 2605 // Push the two vector operands 2606 Ops.push_back(N->getOperand(6)); 2607 Ops.push_back(N->getOperand(7)); 2608 2609 if (Predicated) 2610 AddMVEPredicateToOps(Ops, Loc, N->getOperand(8)); 2611 else 2612 AddEmptyMVEPredicateToOps(Ops, Loc); 2613 2614 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops)); 2615 } 2616 2617 void ARMDAGToDAGISel::SelectMVE_VMLLDAV(SDNode *N, bool Predicated, 2618 const uint16_t *OpcodesS, 2619 const uint16_t *OpcodesU) { 2620 EVT VecTy = N->getOperand(6).getValueType(); 2621 size_t SizeIndex; 2622 switch (VecTy.getVectorElementType().getSizeInBits()) { 2623 case 16: 2624 SizeIndex = 0; 2625 break; 2626 case 32: 2627 SizeIndex = 1; 2628 break; 2629 default: 2630 llvm_unreachable("bad vector element size"); 2631 } 2632 2633 SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 2, SizeIndex); 2634 } 2635 2636 void ARMDAGToDAGISel::SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated, 2637 const uint16_t *OpcodesS, 2638 const uint16_t *OpcodesU) { 2639 assert( 2640 N->getOperand(6).getValueType().getVectorElementType().getSizeInBits() == 2641 32 && 2642 "bad vector element size"); 2643 SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 1, 0); 2644 } 2645 2646 void ARMDAGToDAGISel::SelectMVE_VLD(SDNode *N, unsigned NumVecs, 2647 const uint16_t *const *Opcodes) { 2648 EVT VT = N->getValueType(0); 2649 SDLoc Loc(N); 2650 2651 const uint16_t *OurOpcodes; 2652 switch (VT.getVectorElementType().getSizeInBits()) { 2653 case 8: 2654 OurOpcodes = Opcodes[0]; 2655 break; 2656 case 16: 2657 OurOpcodes = Opcodes[1]; 2658 break; 2659 case 32: 2660 OurOpcodes = Opcodes[2]; 2661 break; 2662 default: 2663 llvm_unreachable("bad vector element size in SelectMVE_VLD"); 2664 } 2665 2666 EVT DataTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, NumVecs * 2); 2667 EVT ResultTys[] = {DataTy, MVT::Other}; 2668 2669 auto Data = SDValue( 2670 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, DataTy), 0); 2671 SDValue Chain = N->getOperand(0); 2672 for (unsigned Stage = 0; Stage < NumVecs; ++Stage) { 2673 SDValue Ops[] = {Data, N->getOperand(2), Chain}; 2674 auto LoadInst = 2675 CurDAG->getMachineNode(OurOpcodes[Stage], Loc, ResultTys, Ops); 2676 Data = SDValue(LoadInst, 0); 2677 Chain = SDValue(LoadInst, 1); 2678 } 2679 2680 for (unsigned i = 0; i < NumVecs; i++) 2681 ReplaceUses(SDValue(N, i), 2682 CurDAG->getTargetExtractSubreg(ARM::qsub_0 + i, Loc, VT, Data)); 2683 ReplaceUses(SDValue(N, NumVecs), Chain); 2684 CurDAG->RemoveDeadNode(N); 2685 } 2686 2687 void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic, 2688 bool isUpdating, unsigned NumVecs, 2689 const uint16_t *DOpcodes, 2690 const uint16_t *QOpcodes0, 2691 const uint16_t *QOpcodes1) { 2692 assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range"); 2693 SDLoc dl(N); 2694 2695 SDValue MemAddr, Align; 2696 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2697 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2698 return; 2699 2700 SDValue Chain = N->getOperand(0); 2701 EVT VT = N->getValueType(0); 2702 bool is64BitVector = VT.is64BitVector(); 2703 2704 unsigned Alignment = 0; 2705 if (NumVecs != 3) { 2706 Alignment = cast<ConstantSDNode>(Align)->getZExtValue(); 2707 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8; 2708 if (Alignment > NumBytes) 2709 Alignment = NumBytes; 2710 if (Alignment < 8 && Alignment < NumBytes) 2711 Alignment = 0; 2712 // Alignment must be a power of two; make sure of that. 2713 Alignment = (Alignment & -Alignment); 2714 if (Alignment == 1) 2715 Alignment = 0; 2716 } 2717 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 2718 2719 unsigned OpcodeIndex; 2720 switch (VT.getSimpleVT().SimpleTy) { 2721 default: llvm_unreachable("unhandled vld-dup type"); 2722 case MVT::v8i8: 2723 case MVT::v16i8: OpcodeIndex = 0; break; 2724 case MVT::v4i16: 2725 case MVT::v8i16: 2726 case MVT::v4f16: 2727 case MVT::v8f16: 2728 OpcodeIndex = 1; break; 2729 case MVT::v2f32: 2730 case MVT::v2i32: 2731 case MVT::v4f32: 2732 case MVT::v4i32: OpcodeIndex = 2; break; 2733 case MVT::v1f64: 2734 case MVT::v1i64: OpcodeIndex = 3; break; 2735 } 2736 2737 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 2738 if (!is64BitVector) 2739 ResTyElts *= 2; 2740 EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts); 2741 2742 std::vector<EVT> ResTys; 2743 ResTys.push_back(ResTy); 2744 if (isUpdating) 2745 ResTys.push_back(MVT::i32); 2746 ResTys.push_back(MVT::Other); 2747 2748 SDValue Pred = getAL(CurDAG, dl); 2749 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2750 2751 SDNode *VLdDup; 2752 if (is64BitVector || NumVecs == 1) { 2753 SmallVector<SDValue, 6> Ops; 2754 Ops.push_back(MemAddr); 2755 Ops.push_back(Align); 2756 unsigned Opc = is64BitVector ? DOpcodes[OpcodeIndex] : 2757 QOpcodes0[OpcodeIndex]; 2758 if (isUpdating) { 2759 // fixed-stride update instructions don't have an explicit writeback 2760 // operand. It's implicit in the opcode itself. 2761 SDValue Inc = N->getOperand(2); 2762 bool IsImmUpdate = 2763 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs); 2764 if (NumVecs <= 2 && !IsImmUpdate) 2765 Opc = getVLDSTRegisterUpdateOpcode(Opc); 2766 if (!IsImmUpdate) 2767 Ops.push_back(Inc); 2768 // FIXME: VLD3 and VLD4 haven't been updated to that form yet. 2769 else if (NumVecs > 2) 2770 Ops.push_back(Reg0); 2771 } 2772 Ops.push_back(Pred); 2773 Ops.push_back(Reg0); 2774 Ops.push_back(Chain); 2775 VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2776 } else if (NumVecs == 2) { 2777 const SDValue OpsA[] = { MemAddr, Align, Pred, Reg0, Chain }; 2778 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], 2779 dl, ResTys, OpsA); 2780 2781 Chain = SDValue(VLdA, 1); 2782 const SDValue OpsB[] = { MemAddr, Align, Pred, Reg0, Chain }; 2783 VLdDup = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, OpsB); 2784 } else { 2785 SDValue ImplDef = 2786 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0); 2787 const SDValue OpsA[] = { MemAddr, Align, ImplDef, Pred, Reg0, Chain }; 2788 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], 2789 dl, ResTys, OpsA); 2790 2791 SDValue SuperReg = SDValue(VLdA, 0); 2792 Chain = SDValue(VLdA, 1); 2793 const SDValue OpsB[] = { MemAddr, Align, SuperReg, Pred, Reg0, Chain }; 2794 VLdDup = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, OpsB); 2795 } 2796 2797 // Transfer memoperands. 2798 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2799 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdDup), {MemOp}); 2800 2801 // Extract the subregisters. 2802 if (NumVecs == 1) { 2803 ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0)); 2804 } else { 2805 SDValue SuperReg = SDValue(VLdDup, 0); 2806 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering"); 2807 unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0; 2808 for (unsigned Vec = 0; Vec != NumVecs; ++Vec) { 2809 ReplaceUses(SDValue(N, Vec), 2810 CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg)); 2811 } 2812 } 2813 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1)); 2814 if (isUpdating) 2815 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2)); 2816 CurDAG->RemoveDeadNode(N); 2817 } 2818 2819 bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) { 2820 if (!Subtarget->hasV6T2Ops()) 2821 return false; 2822 2823 unsigned Opc = isSigned 2824 ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX) 2825 : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX); 2826 SDLoc dl(N); 2827 2828 // For unsigned extracts, check for a shift right and mask 2829 unsigned And_imm = 0; 2830 if (N->getOpcode() == ISD::AND) { 2831 if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) { 2832 2833 // The immediate is a mask of the low bits iff imm & (imm+1) == 0 2834 if (And_imm & (And_imm + 1)) 2835 return false; 2836 2837 unsigned Srl_imm = 0; 2838 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, 2839 Srl_imm)) { 2840 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 2841 2842 // Mask off the unnecessary bits of the AND immediate; normally 2843 // DAGCombine will do this, but that might not happen if 2844 // targetShrinkDemandedConstant chooses a different immediate. 2845 And_imm &= -1U >> Srl_imm; 2846 2847 // Note: The width operand is encoded as width-1. 2848 unsigned Width = countTrailingOnes(And_imm) - 1; 2849 unsigned LSB = Srl_imm; 2850 2851 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2852 2853 if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) { 2854 // It's cheaper to use a right shift to extract the top bits. 2855 if (Subtarget->isThumb()) { 2856 Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri; 2857 SDValue Ops[] = { N->getOperand(0).getOperand(0), 2858 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 2859 getAL(CurDAG, dl), Reg0, Reg0 }; 2860 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 2861 return true; 2862 } 2863 2864 // ARM models shift instructions as MOVsi with shifter operand. 2865 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL); 2866 SDValue ShOpc = 2867 CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl, 2868 MVT::i32); 2869 SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc, 2870 getAL(CurDAG, dl), Reg0, Reg0 }; 2871 CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops); 2872 return true; 2873 } 2874 2875 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx"); 2876 SDValue Ops[] = { N->getOperand(0).getOperand(0), 2877 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 2878 CurDAG->getTargetConstant(Width, dl, MVT::i32), 2879 getAL(CurDAG, dl), Reg0 }; 2880 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 2881 return true; 2882 } 2883 } 2884 return false; 2885 } 2886 2887 // Otherwise, we're looking for a shift of a shift 2888 unsigned Shl_imm = 0; 2889 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) { 2890 assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!"); 2891 unsigned Srl_imm = 0; 2892 if (isInt32Immediate(N->getOperand(1), Srl_imm)) { 2893 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 2894 // Note: The width operand is encoded as width-1. 2895 unsigned Width = 32 - Srl_imm - 1; 2896 int LSB = Srl_imm - Shl_imm; 2897 if (LSB < 0) 2898 return false; 2899 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2900 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx"); 2901 SDValue Ops[] = { N->getOperand(0).getOperand(0), 2902 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 2903 CurDAG->getTargetConstant(Width, dl, MVT::i32), 2904 getAL(CurDAG, dl), Reg0 }; 2905 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 2906 return true; 2907 } 2908 } 2909 2910 // Or we are looking for a shift of an and, with a mask operand 2911 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) && 2912 isShiftedMask_32(And_imm)) { 2913 unsigned Srl_imm = 0; 2914 unsigned LSB = countTrailingZeros(And_imm); 2915 // Shift must be the same as the ands lsb 2916 if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) { 2917 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 2918 unsigned MSB = 31 - countLeadingZeros(And_imm); 2919 // Note: The width operand is encoded as width-1. 2920 unsigned Width = MSB - LSB; 2921 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2922 assert(Srl_imm + Width + 1 <= 32 && "Shouldn't create an invalid ubfx"); 2923 SDValue Ops[] = { N->getOperand(0).getOperand(0), 2924 CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32), 2925 CurDAG->getTargetConstant(Width, dl, MVT::i32), 2926 getAL(CurDAG, dl), Reg0 }; 2927 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 2928 return true; 2929 } 2930 } 2931 2932 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) { 2933 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits(); 2934 unsigned LSB = 0; 2935 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) && 2936 !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB)) 2937 return false; 2938 2939 if (LSB + Width > 32) 2940 return false; 2941 2942 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2943 assert(LSB + Width <= 32 && "Shouldn't create an invalid ubfx"); 2944 SDValue Ops[] = { N->getOperand(0).getOperand(0), 2945 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 2946 CurDAG->getTargetConstant(Width - 1, dl, MVT::i32), 2947 getAL(CurDAG, dl), Reg0 }; 2948 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 2949 return true; 2950 } 2951 2952 return false; 2953 } 2954 2955 /// Target-specific DAG combining for ISD::XOR. 2956 /// Target-independent combining lowers SELECT_CC nodes of the form 2957 /// select_cc setg[ge] X, 0, X, -X 2958 /// select_cc setgt X, -1, X, -X 2959 /// select_cc setl[te] X, 0, -X, X 2960 /// select_cc setlt X, 1, -X, X 2961 /// which represent Integer ABS into: 2962 /// Y = sra (X, size(X)-1); xor (add (X, Y), Y) 2963 /// ARM instruction selection detects the latter and matches it to 2964 /// ARM::ABS or ARM::t2ABS machine node. 2965 bool ARMDAGToDAGISel::tryABSOp(SDNode *N){ 2966 SDValue XORSrc0 = N->getOperand(0); 2967 SDValue XORSrc1 = N->getOperand(1); 2968 EVT VT = N->getValueType(0); 2969 2970 if (Subtarget->isThumb1Only()) 2971 return false; 2972 2973 if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA) 2974 return false; 2975 2976 SDValue ADDSrc0 = XORSrc0.getOperand(0); 2977 SDValue ADDSrc1 = XORSrc0.getOperand(1); 2978 SDValue SRASrc0 = XORSrc1.getOperand(0); 2979 SDValue SRASrc1 = XORSrc1.getOperand(1); 2980 ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(SRASrc1); 2981 EVT XType = SRASrc0.getValueType(); 2982 unsigned Size = XType.getSizeInBits() - 1; 2983 2984 if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 && 2985 XType.isInteger() && SRAConstant != nullptr && 2986 Size == SRAConstant->getZExtValue()) { 2987 unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS; 2988 CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0); 2989 return true; 2990 } 2991 2992 return false; 2993 } 2994 2995 /// We've got special pseudo-instructions for these 2996 void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) { 2997 unsigned Opcode; 2998 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT(); 2999 if (MemTy == MVT::i8) 3000 Opcode = ARM::CMP_SWAP_8; 3001 else if (MemTy == MVT::i16) 3002 Opcode = ARM::CMP_SWAP_16; 3003 else if (MemTy == MVT::i32) 3004 Opcode = ARM::CMP_SWAP_32; 3005 else 3006 llvm_unreachable("Unknown AtomicCmpSwap type"); 3007 3008 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3), 3009 N->getOperand(0)}; 3010 SDNode *CmpSwap = CurDAG->getMachineNode( 3011 Opcode, SDLoc(N), 3012 CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops); 3013 3014 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); 3015 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp}); 3016 3017 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0)); 3018 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2)); 3019 CurDAG->RemoveDeadNode(N); 3020 } 3021 3022 static Optional<std::pair<unsigned, unsigned>> 3023 getContiguousRangeOfSetBits(const APInt &A) { 3024 unsigned FirstOne = A.getBitWidth() - A.countLeadingZeros() - 1; 3025 unsigned LastOne = A.countTrailingZeros(); 3026 if (A.countPopulation() != (FirstOne - LastOne + 1)) 3027 return Optional<std::pair<unsigned,unsigned>>(); 3028 return std::make_pair(FirstOne, LastOne); 3029 } 3030 3031 void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) { 3032 assert(N->getOpcode() == ARMISD::CMPZ); 3033 SwitchEQNEToPLMI = false; 3034 3035 if (!Subtarget->isThumb()) 3036 // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and 3037 // LSR don't exist as standalone instructions - they need the barrel shifter. 3038 return; 3039 3040 // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X)) 3041 SDValue And = N->getOperand(0); 3042 if (!And->hasOneUse()) 3043 return; 3044 3045 SDValue Zero = N->getOperand(1); 3046 if (!isa<ConstantSDNode>(Zero) || !cast<ConstantSDNode>(Zero)->isNullValue() || 3047 And->getOpcode() != ISD::AND) 3048 return; 3049 SDValue X = And.getOperand(0); 3050 auto C = dyn_cast<ConstantSDNode>(And.getOperand(1)); 3051 3052 if (!C) 3053 return; 3054 auto Range = getContiguousRangeOfSetBits(C->getAPIntValue()); 3055 if (!Range) 3056 return; 3057 3058 // There are several ways to lower this: 3059 SDNode *NewN; 3060 SDLoc dl(N); 3061 3062 auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* { 3063 if (Subtarget->isThumb2()) { 3064 Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri; 3065 SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32), 3066 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 3067 CurDAG->getRegister(0, MVT::i32) }; 3068 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops); 3069 } else { 3070 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src, 3071 CurDAG->getTargetConstant(Imm, dl, MVT::i32), 3072 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)}; 3073 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops); 3074 } 3075 }; 3076 3077 if (Range->second == 0) { 3078 // 1. Mask includes the LSB -> Simply shift the top N bits off 3079 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first); 3080 ReplaceNode(And.getNode(), NewN); 3081 } else if (Range->first == 31) { 3082 // 2. Mask includes the MSB -> Simply shift the bottom N bits off 3083 NewN = EmitShift(ARM::tLSRri, X, Range->second); 3084 ReplaceNode(And.getNode(), NewN); 3085 } else if (Range->first == Range->second) { 3086 // 3. Only one bit is set. We can shift this into the sign bit and use a 3087 // PL/MI comparison. 3088 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first); 3089 ReplaceNode(And.getNode(), NewN); 3090 3091 SwitchEQNEToPLMI = true; 3092 } else if (!Subtarget->hasV6T2Ops()) { 3093 // 4. Do a double shift to clear bottom and top bits, but only in 3094 // thumb-1 mode as in thumb-2 we can use UBFX. 3095 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first); 3096 NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0), 3097 Range->second + (31 - Range->first)); 3098 ReplaceNode(And.getNode(), NewN); 3099 } 3100 3101 } 3102 3103 void ARMDAGToDAGISel::Select(SDNode *N) { 3104 SDLoc dl(N); 3105 3106 if (N->isMachineOpcode()) { 3107 N->setNodeId(-1); 3108 return; // Already selected. 3109 } 3110 3111 switch (N->getOpcode()) { 3112 default: break; 3113 case ISD::STORE: { 3114 // For Thumb1, match an sp-relative store in C++. This is a little 3115 // unfortunate, but I don't think I can make the chain check work 3116 // otherwise. (The chain of the store has to be the same as the chain 3117 // of the CopyFromReg, or else we can't replace the CopyFromReg with 3118 // a direct reference to "SP".) 3119 // 3120 // This is only necessary on Thumb1 because Thumb1 sp-relative stores use 3121 // a different addressing mode from other four-byte stores. 3122 // 3123 // This pattern usually comes up with call arguments. 3124 StoreSDNode *ST = cast<StoreSDNode>(N); 3125 SDValue Ptr = ST->getBasePtr(); 3126 if (Subtarget->isThumb1Only() && ST->isUnindexed()) { 3127 int RHSC = 0; 3128 if (Ptr.getOpcode() == ISD::ADD && 3129 isScaledConstantInRange(Ptr.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) 3130 Ptr = Ptr.getOperand(0); 3131 3132 if (Ptr.getOpcode() == ISD::CopyFromReg && 3133 cast<RegisterSDNode>(Ptr.getOperand(1))->getReg() == ARM::SP && 3134 Ptr.getOperand(0) == ST->getChain()) { 3135 SDValue Ops[] = {ST->getValue(), 3136 CurDAG->getRegister(ARM::SP, MVT::i32), 3137 CurDAG->getTargetConstant(RHSC, dl, MVT::i32), 3138 getAL(CurDAG, dl), 3139 CurDAG->getRegister(0, MVT::i32), 3140 ST->getChain()}; 3141 MachineSDNode *ResNode = 3142 CurDAG->getMachineNode(ARM::tSTRspi, dl, MVT::Other, Ops); 3143 MachineMemOperand *MemOp = ST->getMemOperand(); 3144 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp}); 3145 ReplaceNode(N, ResNode); 3146 return; 3147 } 3148 } 3149 break; 3150 } 3151 case ISD::WRITE_REGISTER: 3152 if (tryWriteRegister(N)) 3153 return; 3154 break; 3155 case ISD::READ_REGISTER: 3156 if (tryReadRegister(N)) 3157 return; 3158 break; 3159 case ISD::INLINEASM: 3160 case ISD::INLINEASM_BR: 3161 if (tryInlineAsm(N)) 3162 return; 3163 break; 3164 case ISD::XOR: 3165 // Select special operations if XOR node forms integer ABS pattern 3166 if (tryABSOp(N)) 3167 return; 3168 // Other cases are autogenerated. 3169 break; 3170 case ISD::Constant: { 3171 unsigned Val = cast<ConstantSDNode>(N)->getZExtValue(); 3172 // If we can't materialize the constant we need to use a literal pool 3173 if (ConstantMaterializationCost(Val, Subtarget) > 2) { 3174 SDValue CPIdx = CurDAG->getTargetConstantPool( 3175 ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val), 3176 TLI->getPointerTy(CurDAG->getDataLayout())); 3177 3178 SDNode *ResNode; 3179 if (Subtarget->isThumb()) { 3180 SDValue Ops[] = { 3181 CPIdx, 3182 getAL(CurDAG, dl), 3183 CurDAG->getRegister(0, MVT::i32), 3184 CurDAG->getEntryNode() 3185 }; 3186 ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other, 3187 Ops); 3188 } else { 3189 SDValue Ops[] = { 3190 CPIdx, 3191 CurDAG->getTargetConstant(0, dl, MVT::i32), 3192 getAL(CurDAG, dl), 3193 CurDAG->getRegister(0, MVT::i32), 3194 CurDAG->getEntryNode() 3195 }; 3196 ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other, 3197 Ops); 3198 } 3199 // Annotate the Node with memory operand information so that MachineInstr 3200 // queries work properly. This e.g. gives the register allocation the 3201 // required information for rematerialization. 3202 MachineFunction& MF = CurDAG->getMachineFunction(); 3203 MachineMemOperand *MemOp = 3204 MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF), 3205 MachineMemOperand::MOLoad, 4, 4); 3206 3207 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp}); 3208 3209 ReplaceNode(N, ResNode); 3210 return; 3211 } 3212 3213 // Other cases are autogenerated. 3214 break; 3215 } 3216 case ISD::FrameIndex: { 3217 // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm. 3218 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 3219 SDValue TFI = CurDAG->getTargetFrameIndex( 3220 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 3221 if (Subtarget->isThumb1Only()) { 3222 // Set the alignment of the frame object to 4, to avoid having to generate 3223 // more than one ADD 3224 MachineFrameInfo &MFI = MF->getFrameInfo(); 3225 if (MFI.getObjectAlignment(FI) < 4) 3226 MFI.setObjectAlignment(FI, 4); 3227 CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI, 3228 CurDAG->getTargetConstant(0, dl, MVT::i32)); 3229 return; 3230 } else { 3231 unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ? 3232 ARM::t2ADDri : ARM::ADDri); 3233 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32), 3234 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 3235 CurDAG->getRegister(0, MVT::i32) }; 3236 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3237 return; 3238 } 3239 } 3240 case ISD::SRL: 3241 if (tryV6T2BitfieldExtractOp(N, false)) 3242 return; 3243 break; 3244 case ISD::SIGN_EXTEND_INREG: 3245 case ISD::SRA: 3246 if (tryV6T2BitfieldExtractOp(N, true)) 3247 return; 3248 break; 3249 case ISD::MUL: 3250 if (Subtarget->isThumb1Only()) 3251 break; 3252 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) { 3253 unsigned RHSV = C->getZExtValue(); 3254 if (!RHSV) break; 3255 if (isPowerOf2_32(RHSV-1)) { // 2^n+1? 3256 unsigned ShImm = Log2_32(RHSV-1); 3257 if (ShImm >= 32) 3258 break; 3259 SDValue V = N->getOperand(0); 3260 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm); 3261 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32); 3262 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3263 if (Subtarget->isThumb()) { 3264 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 }; 3265 CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops); 3266 return; 3267 } else { 3268 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0, 3269 Reg0 }; 3270 CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops); 3271 return; 3272 } 3273 } 3274 if (isPowerOf2_32(RHSV+1)) { // 2^n-1? 3275 unsigned ShImm = Log2_32(RHSV+1); 3276 if (ShImm >= 32) 3277 break; 3278 SDValue V = N->getOperand(0); 3279 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm); 3280 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32); 3281 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3282 if (Subtarget->isThumb()) { 3283 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 }; 3284 CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops); 3285 return; 3286 } else { 3287 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0, 3288 Reg0 }; 3289 CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops); 3290 return; 3291 } 3292 } 3293 } 3294 break; 3295 case ISD::AND: { 3296 // Check for unsigned bitfield extract 3297 if (tryV6T2BitfieldExtractOp(N, false)) 3298 return; 3299 3300 // If an immediate is used in an AND node, it is possible that the immediate 3301 // can be more optimally materialized when negated. If this is the case we 3302 // can negate the immediate and use a BIC instead. 3303 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 3304 if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) { 3305 uint32_t Imm = (uint32_t) N1C->getZExtValue(); 3306 3307 // In Thumb2 mode, an AND can take a 12-bit immediate. If this 3308 // immediate can be negated and fit in the immediate operand of 3309 // a t2BIC, don't do any manual transform here as this can be 3310 // handled by the generic ISel machinery. 3311 bool PreferImmediateEncoding = 3312 Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm)); 3313 if (!PreferImmediateEncoding && 3314 ConstantMaterializationCost(Imm, Subtarget) > 3315 ConstantMaterializationCost(~Imm, Subtarget)) { 3316 // The current immediate costs more to materialize than a negated 3317 // immediate, so negate the immediate and use a BIC. 3318 SDValue NewImm = 3319 CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32); 3320 // If the new constant didn't exist before, reposition it in the topological 3321 // ordering so it is just before N. Otherwise, don't touch its location. 3322 if (NewImm->getNodeId() == -1) 3323 CurDAG->RepositionNode(N->getIterator(), NewImm.getNode()); 3324 3325 if (!Subtarget->hasThumb2()) { 3326 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), 3327 N->getOperand(0), NewImm, getAL(CurDAG, dl), 3328 CurDAG->getRegister(0, MVT::i32)}; 3329 ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops)); 3330 return; 3331 } else { 3332 SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl), 3333 CurDAG->getRegister(0, MVT::i32), 3334 CurDAG->getRegister(0, MVT::i32)}; 3335 ReplaceNode(N, 3336 CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops)); 3337 return; 3338 } 3339 } 3340 } 3341 3342 // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits 3343 // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits 3344 // are entirely contributed by c2 and lower 16-bits are entirely contributed 3345 // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)). 3346 // Select it to: "movt x, ((c1 & 0xffff) >> 16) 3347 EVT VT = N->getValueType(0); 3348 if (VT != MVT::i32) 3349 break; 3350 unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2()) 3351 ? ARM::t2MOVTi16 3352 : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0); 3353 if (!Opc) 3354 break; 3355 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); 3356 N1C = dyn_cast<ConstantSDNode>(N1); 3357 if (!N1C) 3358 break; 3359 if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) { 3360 SDValue N2 = N0.getOperand(1); 3361 ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2); 3362 if (!N2C) 3363 break; 3364 unsigned N1CVal = N1C->getZExtValue(); 3365 unsigned N2CVal = N2C->getZExtValue(); 3366 if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) && 3367 (N1CVal & 0xffffU) == 0xffffU && 3368 (N2CVal & 0xffffU) == 0x0U) { 3369 SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16, 3370 dl, MVT::i32); 3371 SDValue Ops[] = { N0.getOperand(0), Imm16, 3372 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) }; 3373 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops)); 3374 return; 3375 } 3376 } 3377 3378 break; 3379 } 3380 case ARMISD::UMAAL: { 3381 unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL; 3382 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), 3383 N->getOperand(2), N->getOperand(3), 3384 getAL(CurDAG, dl), 3385 CurDAG->getRegister(0, MVT::i32) }; 3386 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops)); 3387 return; 3388 } 3389 case ARMISD::UMLAL:{ 3390 if (Subtarget->isThumb()) { 3391 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3392 N->getOperand(3), getAL(CurDAG, dl), 3393 CurDAG->getRegister(0, MVT::i32)}; 3394 ReplaceNode( 3395 N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops)); 3396 return; 3397 }else{ 3398 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3399 N->getOperand(3), getAL(CurDAG, dl), 3400 CurDAG->getRegister(0, MVT::i32), 3401 CurDAG->getRegister(0, MVT::i32) }; 3402 ReplaceNode(N, CurDAG->getMachineNode( 3403 Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl, 3404 MVT::i32, MVT::i32, Ops)); 3405 return; 3406 } 3407 } 3408 case ARMISD::SMLAL:{ 3409 if (Subtarget->isThumb()) { 3410 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3411 N->getOperand(3), getAL(CurDAG, dl), 3412 CurDAG->getRegister(0, MVT::i32)}; 3413 ReplaceNode( 3414 N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops)); 3415 return; 3416 }else{ 3417 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3418 N->getOperand(3), getAL(CurDAG, dl), 3419 CurDAG->getRegister(0, MVT::i32), 3420 CurDAG->getRegister(0, MVT::i32) }; 3421 ReplaceNode(N, CurDAG->getMachineNode( 3422 Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl, 3423 MVT::i32, MVT::i32, Ops)); 3424 return; 3425 } 3426 } 3427 case ARMISD::SUBE: { 3428 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP()) 3429 break; 3430 // Look for a pattern to match SMMLS 3431 // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b)))) 3432 if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI || 3433 N->getOperand(2).getOpcode() != ARMISD::SUBC || 3434 !SDValue(N, 1).use_empty()) 3435 break; 3436 3437 if (Subtarget->isThumb()) 3438 assert(Subtarget->hasThumb2() && 3439 "This pattern should not be generated for Thumb"); 3440 3441 SDValue SmulLoHi = N->getOperand(1); 3442 SDValue Subc = N->getOperand(2); 3443 auto *Zero = dyn_cast<ConstantSDNode>(Subc.getOperand(0)); 3444 3445 if (!Zero || Zero->getZExtValue() != 0 || 3446 Subc.getOperand(1) != SmulLoHi.getValue(0) || 3447 N->getOperand(1) != SmulLoHi.getValue(1) || 3448 N->getOperand(2) != Subc.getValue(1)) 3449 break; 3450 3451 unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS; 3452 SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1), 3453 N->getOperand(0), getAL(CurDAG, dl), 3454 CurDAG->getRegister(0, MVT::i32) }; 3455 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops)); 3456 return; 3457 } 3458 case ISD::LOAD: { 3459 if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N)) 3460 return; 3461 if (Subtarget->isThumb() && Subtarget->hasThumb2()) { 3462 if (tryT2IndexedLoad(N)) 3463 return; 3464 } else if (Subtarget->isThumb()) { 3465 if (tryT1IndexedLoad(N)) 3466 return; 3467 } else if (tryARMIndexedLoad(N)) 3468 return; 3469 // Other cases are autogenerated. 3470 break; 3471 } 3472 case ISD::MLOAD: 3473 if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N)) 3474 return; 3475 // Other cases are autogenerated. 3476 break; 3477 case ARMISD::WLS: 3478 case ARMISD::LE: { 3479 SDValue Ops[] = { N->getOperand(1), 3480 N->getOperand(2), 3481 N->getOperand(0) }; 3482 unsigned Opc = N->getOpcode() == ARMISD::WLS ? 3483 ARM::t2WhileLoopStart : ARM::t2LoopEnd; 3484 SDNode *New = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops); 3485 ReplaceUses(N, New); 3486 CurDAG->RemoveDeadNode(N); 3487 return; 3488 } 3489 case ARMISD::LOOP_DEC: { 3490 SDValue Ops[] = { N->getOperand(1), 3491 N->getOperand(2), 3492 N->getOperand(0) }; 3493 SDNode *Dec = 3494 CurDAG->getMachineNode(ARM::t2LoopDec, dl, 3495 CurDAG->getVTList(MVT::i32, MVT::Other), Ops); 3496 ReplaceUses(N, Dec); 3497 CurDAG->RemoveDeadNode(N); 3498 return; 3499 } 3500 case ARMISD::BRCOND: { 3501 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 3502 // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc) 3503 // Pattern complexity = 6 cost = 1 size = 0 3504 3505 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 3506 // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc) 3507 // Pattern complexity = 6 cost = 1 size = 0 3508 3509 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 3510 // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc) 3511 // Pattern complexity = 6 cost = 1 size = 0 3512 3513 unsigned Opc = Subtarget->isThumb() ? 3514 ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc; 3515 SDValue Chain = N->getOperand(0); 3516 SDValue N1 = N->getOperand(1); 3517 SDValue N2 = N->getOperand(2); 3518 SDValue N3 = N->getOperand(3); 3519 SDValue InFlag = N->getOperand(4); 3520 assert(N1.getOpcode() == ISD::BasicBlock); 3521 assert(N2.getOpcode() == ISD::Constant); 3522 assert(N3.getOpcode() == ISD::Register); 3523 3524 unsigned CC = (unsigned) cast<ConstantSDNode>(N2)->getZExtValue(); 3525 3526 if (InFlag.getOpcode() == ARMISD::CMPZ) { 3527 if (InFlag.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) { 3528 SDValue Int = InFlag.getOperand(0); 3529 uint64_t ID = cast<ConstantSDNode>(Int->getOperand(1))->getZExtValue(); 3530 3531 // Handle low-overhead loops. 3532 if (ID == Intrinsic::loop_decrement_reg) { 3533 SDValue Elements = Int.getOperand(2); 3534 SDValue Size = CurDAG->getTargetConstant( 3535 cast<ConstantSDNode>(Int.getOperand(3))->getZExtValue(), dl, 3536 MVT::i32); 3537 3538 SDValue Args[] = { Elements, Size, Int.getOperand(0) }; 3539 SDNode *LoopDec = 3540 CurDAG->getMachineNode(ARM::t2LoopDec, dl, 3541 CurDAG->getVTList(MVT::i32, MVT::Other), 3542 Args); 3543 ReplaceUses(Int.getNode(), LoopDec); 3544 3545 SDValue EndArgs[] = { SDValue(LoopDec, 0), N1, Chain }; 3546 SDNode *LoopEnd = 3547 CurDAG->getMachineNode(ARM::t2LoopEnd, dl, MVT::Other, EndArgs); 3548 3549 ReplaceUses(N, LoopEnd); 3550 CurDAG->RemoveDeadNode(N); 3551 CurDAG->RemoveDeadNode(InFlag.getNode()); 3552 CurDAG->RemoveDeadNode(Int.getNode()); 3553 return; 3554 } 3555 } 3556 3557 bool SwitchEQNEToPLMI; 3558 SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI); 3559 InFlag = N->getOperand(4); 3560 3561 if (SwitchEQNEToPLMI) { 3562 switch ((ARMCC::CondCodes)CC) { 3563 default: llvm_unreachable("CMPZ must be either NE or EQ!"); 3564 case ARMCC::NE: 3565 CC = (unsigned)ARMCC::MI; 3566 break; 3567 case ARMCC::EQ: 3568 CC = (unsigned)ARMCC::PL; 3569 break; 3570 } 3571 } 3572 } 3573 3574 SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32); 3575 SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag }; 3576 SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, 3577 MVT::Glue, Ops); 3578 Chain = SDValue(ResNode, 0); 3579 if (N->getNumValues() == 2) { 3580 InFlag = SDValue(ResNode, 1); 3581 ReplaceUses(SDValue(N, 1), InFlag); 3582 } 3583 ReplaceUses(SDValue(N, 0), 3584 SDValue(Chain.getNode(), Chain.getResNo())); 3585 CurDAG->RemoveDeadNode(N); 3586 return; 3587 } 3588 3589 case ARMISD::CMPZ: { 3590 // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0) 3591 // This allows us to avoid materializing the expensive negative constant. 3592 // The CMPZ #0 is useless and will be peepholed away but we need to keep it 3593 // for its glue output. 3594 SDValue X = N->getOperand(0); 3595 auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode()); 3596 if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) { 3597 int64_t Addend = -C->getSExtValue(); 3598 3599 SDNode *Add = nullptr; 3600 // ADDS can be better than CMN if the immediate fits in a 3601 // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3. 3602 // Outside that range we can just use a CMN which is 32-bit but has a 3603 // 12-bit immediate range. 3604 if (Addend < 1<<8) { 3605 if (Subtarget->isThumb2()) { 3606 SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32), 3607 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 3608 CurDAG->getRegister(0, MVT::i32) }; 3609 Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops); 3610 } else { 3611 unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8; 3612 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X, 3613 CurDAG->getTargetConstant(Addend, dl, MVT::i32), 3614 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)}; 3615 Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops); 3616 } 3617 } 3618 if (Add) { 3619 SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)}; 3620 CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2); 3621 } 3622 } 3623 // Other cases are autogenerated. 3624 break; 3625 } 3626 3627 case ARMISD::CMOV: { 3628 SDValue InFlag = N->getOperand(4); 3629 3630 if (InFlag.getOpcode() == ARMISD::CMPZ) { 3631 bool SwitchEQNEToPLMI; 3632 SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI); 3633 3634 if (SwitchEQNEToPLMI) { 3635 SDValue ARMcc = N->getOperand(2); 3636 ARMCC::CondCodes CC = 3637 (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue(); 3638 3639 switch (CC) { 3640 default: llvm_unreachable("CMPZ must be either NE or EQ!"); 3641 case ARMCC::NE: 3642 CC = ARMCC::MI; 3643 break; 3644 case ARMCC::EQ: 3645 CC = ARMCC::PL; 3646 break; 3647 } 3648 SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32); 3649 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc, 3650 N->getOperand(3), N->getOperand(4)}; 3651 CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops); 3652 } 3653 3654 } 3655 // Other cases are autogenerated. 3656 break; 3657 } 3658 3659 case ARMISD::VZIP: { 3660 unsigned Opc = 0; 3661 EVT VT = N->getValueType(0); 3662 switch (VT.getSimpleVT().SimpleTy) { 3663 default: return; 3664 case MVT::v8i8: Opc = ARM::VZIPd8; break; 3665 case MVT::v4f16: 3666 case MVT::v4i16: Opc = ARM::VZIPd16; break; 3667 case MVT::v2f32: 3668 // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 3669 case MVT::v2i32: Opc = ARM::VTRNd32; break; 3670 case MVT::v16i8: Opc = ARM::VZIPq8; break; 3671 case MVT::v8f16: 3672 case MVT::v8i16: Opc = ARM::VZIPq16; break; 3673 case MVT::v4f32: 3674 case MVT::v4i32: Opc = ARM::VZIPq32; break; 3675 } 3676 SDValue Pred = getAL(CurDAG, dl); 3677 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 3678 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; 3679 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops)); 3680 return; 3681 } 3682 case ARMISD::VUZP: { 3683 unsigned Opc = 0; 3684 EVT VT = N->getValueType(0); 3685 switch (VT.getSimpleVT().SimpleTy) { 3686 default: return; 3687 case MVT::v8i8: Opc = ARM::VUZPd8; break; 3688 case MVT::v4f16: 3689 case MVT::v4i16: Opc = ARM::VUZPd16; break; 3690 case MVT::v2f32: 3691 // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 3692 case MVT::v2i32: Opc = ARM::VTRNd32; break; 3693 case MVT::v16i8: Opc = ARM::VUZPq8; break; 3694 case MVT::v8f16: 3695 case MVT::v8i16: Opc = ARM::VUZPq16; break; 3696 case MVT::v4f32: 3697 case MVT::v4i32: Opc = ARM::VUZPq32; break; 3698 } 3699 SDValue Pred = getAL(CurDAG, dl); 3700 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 3701 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; 3702 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops)); 3703 return; 3704 } 3705 case ARMISD::VTRN: { 3706 unsigned Opc = 0; 3707 EVT VT = N->getValueType(0); 3708 switch (VT.getSimpleVT().SimpleTy) { 3709 default: return; 3710 case MVT::v8i8: Opc = ARM::VTRNd8; break; 3711 case MVT::v4f16: 3712 case MVT::v4i16: Opc = ARM::VTRNd16; break; 3713 case MVT::v2f32: 3714 case MVT::v2i32: Opc = ARM::VTRNd32; break; 3715 case MVT::v16i8: Opc = ARM::VTRNq8; break; 3716 case MVT::v8f16: 3717 case MVT::v8i16: Opc = ARM::VTRNq16; break; 3718 case MVT::v4f32: 3719 case MVT::v4i32: Opc = ARM::VTRNq32; break; 3720 } 3721 SDValue Pred = getAL(CurDAG, dl); 3722 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 3723 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; 3724 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops)); 3725 return; 3726 } 3727 case ARMISD::BUILD_VECTOR: { 3728 EVT VecVT = N->getValueType(0); 3729 EVT EltVT = VecVT.getVectorElementType(); 3730 unsigned NumElts = VecVT.getVectorNumElements(); 3731 if (EltVT == MVT::f64) { 3732 assert(NumElts == 2 && "unexpected type for BUILD_VECTOR"); 3733 ReplaceNode( 3734 N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1))); 3735 return; 3736 } 3737 assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR"); 3738 if (NumElts == 2) { 3739 ReplaceNode( 3740 N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1))); 3741 return; 3742 } 3743 assert(NumElts == 4 && "unexpected type for BUILD_VECTOR"); 3744 ReplaceNode(N, 3745 createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1), 3746 N->getOperand(2), N->getOperand(3))); 3747 return; 3748 } 3749 3750 case ARMISD::VLD1DUP: { 3751 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16, 3752 ARM::VLD1DUPd32 }; 3753 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16, 3754 ARM::VLD1DUPq32 }; 3755 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 1, DOpcodes, QOpcodes); 3756 return; 3757 } 3758 3759 case ARMISD::VLD2DUP: { 3760 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16, 3761 ARM::VLD2DUPd32 }; 3762 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 2, Opcodes); 3763 return; 3764 } 3765 3766 case ARMISD::VLD3DUP: { 3767 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo, 3768 ARM::VLD3DUPd16Pseudo, 3769 ARM::VLD3DUPd32Pseudo }; 3770 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 3, Opcodes); 3771 return; 3772 } 3773 3774 case ARMISD::VLD4DUP: { 3775 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo, 3776 ARM::VLD4DUPd16Pseudo, 3777 ARM::VLD4DUPd32Pseudo }; 3778 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 4, Opcodes); 3779 return; 3780 } 3781 3782 case ARMISD::VLD1DUP_UPD: { 3783 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed, 3784 ARM::VLD1DUPd16wb_fixed, 3785 ARM::VLD1DUPd32wb_fixed }; 3786 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed, 3787 ARM::VLD1DUPq16wb_fixed, 3788 ARM::VLD1DUPq32wb_fixed }; 3789 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 1, DOpcodes, QOpcodes); 3790 return; 3791 } 3792 3793 case ARMISD::VLD2DUP_UPD: { 3794 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed, 3795 ARM::VLD2DUPd16wb_fixed, 3796 ARM::VLD2DUPd32wb_fixed }; 3797 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 2, Opcodes); 3798 return; 3799 } 3800 3801 case ARMISD::VLD3DUP_UPD: { 3802 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD, 3803 ARM::VLD3DUPd16Pseudo_UPD, 3804 ARM::VLD3DUPd32Pseudo_UPD }; 3805 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 3, Opcodes); 3806 return; 3807 } 3808 3809 case ARMISD::VLD4DUP_UPD: { 3810 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD, 3811 ARM::VLD4DUPd16Pseudo_UPD, 3812 ARM::VLD4DUPd32Pseudo_UPD }; 3813 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 4, Opcodes); 3814 return; 3815 } 3816 3817 case ARMISD::VLD1_UPD: { 3818 static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed, 3819 ARM::VLD1d16wb_fixed, 3820 ARM::VLD1d32wb_fixed, 3821 ARM::VLD1d64wb_fixed }; 3822 static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed, 3823 ARM::VLD1q16wb_fixed, 3824 ARM::VLD1q32wb_fixed, 3825 ARM::VLD1q64wb_fixed }; 3826 SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr); 3827 return; 3828 } 3829 3830 case ARMISD::VLD2_UPD: { 3831 static const uint16_t DOpcodes[] = { ARM::VLD2d8wb_fixed, 3832 ARM::VLD2d16wb_fixed, 3833 ARM::VLD2d32wb_fixed, 3834 ARM::VLD1q64wb_fixed}; 3835 static const uint16_t QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed, 3836 ARM::VLD2q16PseudoWB_fixed, 3837 ARM::VLD2q32PseudoWB_fixed }; 3838 SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr); 3839 return; 3840 } 3841 3842 case ARMISD::VLD3_UPD: { 3843 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD, 3844 ARM::VLD3d16Pseudo_UPD, 3845 ARM::VLD3d32Pseudo_UPD, 3846 ARM::VLD1d64TPseudoWB_fixed}; 3847 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD, 3848 ARM::VLD3q16Pseudo_UPD, 3849 ARM::VLD3q32Pseudo_UPD }; 3850 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD, 3851 ARM::VLD3q16oddPseudo_UPD, 3852 ARM::VLD3q32oddPseudo_UPD }; 3853 SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 3854 return; 3855 } 3856 3857 case ARMISD::VLD4_UPD: { 3858 static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo_UPD, 3859 ARM::VLD4d16Pseudo_UPD, 3860 ARM::VLD4d32Pseudo_UPD, 3861 ARM::VLD1d64QPseudoWB_fixed}; 3862 static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD, 3863 ARM::VLD4q16Pseudo_UPD, 3864 ARM::VLD4q32Pseudo_UPD }; 3865 static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD, 3866 ARM::VLD4q16oddPseudo_UPD, 3867 ARM::VLD4q32oddPseudo_UPD }; 3868 SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 3869 return; 3870 } 3871 3872 case ARMISD::VLD2LN_UPD: { 3873 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD, 3874 ARM::VLD2LNd16Pseudo_UPD, 3875 ARM::VLD2LNd32Pseudo_UPD }; 3876 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD, 3877 ARM::VLD2LNq32Pseudo_UPD }; 3878 SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes); 3879 return; 3880 } 3881 3882 case ARMISD::VLD3LN_UPD: { 3883 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD, 3884 ARM::VLD3LNd16Pseudo_UPD, 3885 ARM::VLD3LNd32Pseudo_UPD }; 3886 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD, 3887 ARM::VLD3LNq32Pseudo_UPD }; 3888 SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes); 3889 return; 3890 } 3891 3892 case ARMISD::VLD4LN_UPD: { 3893 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD, 3894 ARM::VLD4LNd16Pseudo_UPD, 3895 ARM::VLD4LNd32Pseudo_UPD }; 3896 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD, 3897 ARM::VLD4LNq32Pseudo_UPD }; 3898 SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes); 3899 return; 3900 } 3901 3902 case ARMISD::VST1_UPD: { 3903 static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed, 3904 ARM::VST1d16wb_fixed, 3905 ARM::VST1d32wb_fixed, 3906 ARM::VST1d64wb_fixed }; 3907 static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed, 3908 ARM::VST1q16wb_fixed, 3909 ARM::VST1q32wb_fixed, 3910 ARM::VST1q64wb_fixed }; 3911 SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr); 3912 return; 3913 } 3914 3915 case ARMISD::VST2_UPD: { 3916 static const uint16_t DOpcodes[] = { ARM::VST2d8wb_fixed, 3917 ARM::VST2d16wb_fixed, 3918 ARM::VST2d32wb_fixed, 3919 ARM::VST1q64wb_fixed}; 3920 static const uint16_t QOpcodes[] = { ARM::VST2q8PseudoWB_fixed, 3921 ARM::VST2q16PseudoWB_fixed, 3922 ARM::VST2q32PseudoWB_fixed }; 3923 SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr); 3924 return; 3925 } 3926 3927 case ARMISD::VST3_UPD: { 3928 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD, 3929 ARM::VST3d16Pseudo_UPD, 3930 ARM::VST3d32Pseudo_UPD, 3931 ARM::VST1d64TPseudoWB_fixed}; 3932 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD, 3933 ARM::VST3q16Pseudo_UPD, 3934 ARM::VST3q32Pseudo_UPD }; 3935 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD, 3936 ARM::VST3q16oddPseudo_UPD, 3937 ARM::VST3q32oddPseudo_UPD }; 3938 SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 3939 return; 3940 } 3941 3942 case ARMISD::VST4_UPD: { 3943 static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo_UPD, 3944 ARM::VST4d16Pseudo_UPD, 3945 ARM::VST4d32Pseudo_UPD, 3946 ARM::VST1d64QPseudoWB_fixed}; 3947 static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD, 3948 ARM::VST4q16Pseudo_UPD, 3949 ARM::VST4q32Pseudo_UPD }; 3950 static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD, 3951 ARM::VST4q16oddPseudo_UPD, 3952 ARM::VST4q32oddPseudo_UPD }; 3953 SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 3954 return; 3955 } 3956 3957 case ARMISD::VST2LN_UPD: { 3958 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD, 3959 ARM::VST2LNd16Pseudo_UPD, 3960 ARM::VST2LNd32Pseudo_UPD }; 3961 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD, 3962 ARM::VST2LNq32Pseudo_UPD }; 3963 SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes); 3964 return; 3965 } 3966 3967 case ARMISD::VST3LN_UPD: { 3968 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD, 3969 ARM::VST3LNd16Pseudo_UPD, 3970 ARM::VST3LNd32Pseudo_UPD }; 3971 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD, 3972 ARM::VST3LNq32Pseudo_UPD }; 3973 SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes); 3974 return; 3975 } 3976 3977 case ARMISD::VST4LN_UPD: { 3978 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD, 3979 ARM::VST4LNd16Pseudo_UPD, 3980 ARM::VST4LNd32Pseudo_UPD }; 3981 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD, 3982 ARM::VST4LNq32Pseudo_UPD }; 3983 SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes); 3984 return; 3985 } 3986 3987 case ISD::INTRINSIC_VOID: 3988 case ISD::INTRINSIC_W_CHAIN: { 3989 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 3990 switch (IntNo) { 3991 default: 3992 break; 3993 3994 case Intrinsic::arm_mrrc: 3995 case Intrinsic::arm_mrrc2: { 3996 SDLoc dl(N); 3997 SDValue Chain = N->getOperand(0); 3998 unsigned Opc; 3999 4000 if (Subtarget->isThumb()) 4001 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2); 4002 else 4003 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2); 4004 4005 SmallVector<SDValue, 5> Ops; 4006 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(), dl)); /* coproc */ 4007 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(), dl)); /* opc */ 4008 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(), dl)); /* CRm */ 4009 4010 // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded 4011 // instruction will always be '1111' but it is possible in assembly language to specify 4012 // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction. 4013 if (Opc != ARM::MRRC2) { 4014 Ops.push_back(getAL(CurDAG, dl)); 4015 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 4016 } 4017 4018 Ops.push_back(Chain); 4019 4020 // Writes to two registers. 4021 const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other}; 4022 4023 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops)); 4024 return; 4025 } 4026 case Intrinsic::arm_ldaexd: 4027 case Intrinsic::arm_ldrexd: { 4028 SDLoc dl(N); 4029 SDValue Chain = N->getOperand(0); 4030 SDValue MemAddr = N->getOperand(2); 4031 bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps(); 4032 4033 bool IsAcquire = IntNo == Intrinsic::arm_ldaexd; 4034 unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD) 4035 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD); 4036 4037 // arm_ldrexd returns a i64 value in {i32, i32} 4038 std::vector<EVT> ResTys; 4039 if (isThumb) { 4040 ResTys.push_back(MVT::i32); 4041 ResTys.push_back(MVT::i32); 4042 } else 4043 ResTys.push_back(MVT::Untyped); 4044 ResTys.push_back(MVT::Other); 4045 4046 // Place arguments in the right order. 4047 SDValue Ops[] = {MemAddr, getAL(CurDAG, dl), 4048 CurDAG->getRegister(0, MVT::i32), Chain}; 4049 SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops); 4050 // Transfer memoperands. 4051 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 4052 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp}); 4053 4054 // Remap uses. 4055 SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1); 4056 if (!SDValue(N, 0).use_empty()) { 4057 SDValue Result; 4058 if (isThumb) 4059 Result = SDValue(Ld, 0); 4060 else { 4061 SDValue SubRegIdx = 4062 CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32); 4063 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 4064 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx); 4065 Result = SDValue(ResNode,0); 4066 } 4067 ReplaceUses(SDValue(N, 0), Result); 4068 } 4069 if (!SDValue(N, 1).use_empty()) { 4070 SDValue Result; 4071 if (isThumb) 4072 Result = SDValue(Ld, 1); 4073 else { 4074 SDValue SubRegIdx = 4075 CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32); 4076 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 4077 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx); 4078 Result = SDValue(ResNode,0); 4079 } 4080 ReplaceUses(SDValue(N, 1), Result); 4081 } 4082 ReplaceUses(SDValue(N, 2), OutChain); 4083 CurDAG->RemoveDeadNode(N); 4084 return; 4085 } 4086 case Intrinsic::arm_stlexd: 4087 case Intrinsic::arm_strexd: { 4088 SDLoc dl(N); 4089 SDValue Chain = N->getOperand(0); 4090 SDValue Val0 = N->getOperand(2); 4091 SDValue Val1 = N->getOperand(3); 4092 SDValue MemAddr = N->getOperand(4); 4093 4094 // Store exclusive double return a i32 value which is the return status 4095 // of the issued store. 4096 const EVT ResTys[] = {MVT::i32, MVT::Other}; 4097 4098 bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2(); 4099 // Place arguments in the right order. 4100 SmallVector<SDValue, 7> Ops; 4101 if (isThumb) { 4102 Ops.push_back(Val0); 4103 Ops.push_back(Val1); 4104 } else 4105 // arm_strexd uses GPRPair. 4106 Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0)); 4107 Ops.push_back(MemAddr); 4108 Ops.push_back(getAL(CurDAG, dl)); 4109 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 4110 Ops.push_back(Chain); 4111 4112 bool IsRelease = IntNo == Intrinsic::arm_stlexd; 4113 unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD) 4114 : (IsRelease ? ARM::STLEXD : ARM::STREXD); 4115 4116 SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops); 4117 // Transfer memoperands. 4118 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 4119 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 4120 4121 ReplaceNode(N, St); 4122 return; 4123 } 4124 4125 case Intrinsic::arm_neon_vld1: { 4126 static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16, 4127 ARM::VLD1d32, ARM::VLD1d64 }; 4128 static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16, 4129 ARM::VLD1q32, ARM::VLD1q64}; 4130 SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr); 4131 return; 4132 } 4133 4134 case Intrinsic::arm_neon_vld1x2: { 4135 static const uint16_t DOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16, 4136 ARM::VLD1q32, ARM::VLD1q64 }; 4137 static const uint16_t QOpcodes[] = { ARM::VLD1d8QPseudo, 4138 ARM::VLD1d16QPseudo, 4139 ARM::VLD1d32QPseudo, 4140 ARM::VLD1d64QPseudo }; 4141 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr); 4142 return; 4143 } 4144 4145 case Intrinsic::arm_neon_vld1x3: { 4146 static const uint16_t DOpcodes[] = { ARM::VLD1d8TPseudo, 4147 ARM::VLD1d16TPseudo, 4148 ARM::VLD1d32TPseudo, 4149 ARM::VLD1d64TPseudo }; 4150 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowTPseudo_UPD, 4151 ARM::VLD1q16LowTPseudo_UPD, 4152 ARM::VLD1q32LowTPseudo_UPD, 4153 ARM::VLD1q64LowTPseudo_UPD }; 4154 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighTPseudo, 4155 ARM::VLD1q16HighTPseudo, 4156 ARM::VLD1q32HighTPseudo, 4157 ARM::VLD1q64HighTPseudo }; 4158 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 4159 return; 4160 } 4161 4162 case Intrinsic::arm_neon_vld1x4: { 4163 static const uint16_t DOpcodes[] = { ARM::VLD1d8QPseudo, 4164 ARM::VLD1d16QPseudo, 4165 ARM::VLD1d32QPseudo, 4166 ARM::VLD1d64QPseudo }; 4167 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowQPseudo_UPD, 4168 ARM::VLD1q16LowQPseudo_UPD, 4169 ARM::VLD1q32LowQPseudo_UPD, 4170 ARM::VLD1q64LowQPseudo_UPD }; 4171 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighQPseudo, 4172 ARM::VLD1q16HighQPseudo, 4173 ARM::VLD1q32HighQPseudo, 4174 ARM::VLD1q64HighQPseudo }; 4175 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 4176 return; 4177 } 4178 4179 case Intrinsic::arm_neon_vld2: { 4180 static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16, 4181 ARM::VLD2d32, ARM::VLD1q64 }; 4182 static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo, 4183 ARM::VLD2q32Pseudo }; 4184 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr); 4185 return; 4186 } 4187 4188 case Intrinsic::arm_neon_vld3: { 4189 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo, 4190 ARM::VLD3d16Pseudo, 4191 ARM::VLD3d32Pseudo, 4192 ARM::VLD1d64TPseudo }; 4193 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD, 4194 ARM::VLD3q16Pseudo_UPD, 4195 ARM::VLD3q32Pseudo_UPD }; 4196 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo, 4197 ARM::VLD3q16oddPseudo, 4198 ARM::VLD3q32oddPseudo }; 4199 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 4200 return; 4201 } 4202 4203 case Intrinsic::arm_neon_vld4: { 4204 static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo, 4205 ARM::VLD4d16Pseudo, 4206 ARM::VLD4d32Pseudo, 4207 ARM::VLD1d64QPseudo }; 4208 static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD, 4209 ARM::VLD4q16Pseudo_UPD, 4210 ARM::VLD4q32Pseudo_UPD }; 4211 static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo, 4212 ARM::VLD4q16oddPseudo, 4213 ARM::VLD4q32oddPseudo }; 4214 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 4215 return; 4216 } 4217 4218 case Intrinsic::arm_neon_vld2dup: { 4219 static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16, 4220 ARM::VLD2DUPd32, ARM::VLD1q64 }; 4221 static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo, 4222 ARM::VLD2DUPq16EvenPseudo, 4223 ARM::VLD2DUPq32EvenPseudo }; 4224 static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudo, 4225 ARM::VLD2DUPq16OddPseudo, 4226 ARM::VLD2DUPq32OddPseudo }; 4227 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 2, 4228 DOpcodes, QOpcodes0, QOpcodes1); 4229 return; 4230 } 4231 4232 case Intrinsic::arm_neon_vld3dup: { 4233 static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo, 4234 ARM::VLD3DUPd16Pseudo, 4235 ARM::VLD3DUPd32Pseudo, 4236 ARM::VLD1d64TPseudo }; 4237 static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo, 4238 ARM::VLD3DUPq16EvenPseudo, 4239 ARM::VLD3DUPq32EvenPseudo }; 4240 static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo, 4241 ARM::VLD3DUPq16OddPseudo, 4242 ARM::VLD3DUPq32OddPseudo }; 4243 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 3, 4244 DOpcodes, QOpcodes0, QOpcodes1); 4245 return; 4246 } 4247 4248 case Intrinsic::arm_neon_vld4dup: { 4249 static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo, 4250 ARM::VLD4DUPd16Pseudo, 4251 ARM::VLD4DUPd32Pseudo, 4252 ARM::VLD1d64QPseudo }; 4253 static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo, 4254 ARM::VLD4DUPq16EvenPseudo, 4255 ARM::VLD4DUPq32EvenPseudo }; 4256 static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo, 4257 ARM::VLD4DUPq16OddPseudo, 4258 ARM::VLD4DUPq32OddPseudo }; 4259 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 4, 4260 DOpcodes, QOpcodes0, QOpcodes1); 4261 return; 4262 } 4263 4264 case Intrinsic::arm_neon_vld2lane: { 4265 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo, 4266 ARM::VLD2LNd16Pseudo, 4267 ARM::VLD2LNd32Pseudo }; 4268 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo, 4269 ARM::VLD2LNq32Pseudo }; 4270 SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes); 4271 return; 4272 } 4273 4274 case Intrinsic::arm_neon_vld3lane: { 4275 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo, 4276 ARM::VLD3LNd16Pseudo, 4277 ARM::VLD3LNd32Pseudo }; 4278 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo, 4279 ARM::VLD3LNq32Pseudo }; 4280 SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes); 4281 return; 4282 } 4283 4284 case Intrinsic::arm_neon_vld4lane: { 4285 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo, 4286 ARM::VLD4LNd16Pseudo, 4287 ARM::VLD4LNd32Pseudo }; 4288 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo, 4289 ARM::VLD4LNq32Pseudo }; 4290 SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes); 4291 return; 4292 } 4293 4294 case Intrinsic::arm_neon_vst1: { 4295 static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16, 4296 ARM::VST1d32, ARM::VST1d64 }; 4297 static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16, 4298 ARM::VST1q32, ARM::VST1q64 }; 4299 SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr); 4300 return; 4301 } 4302 4303 case Intrinsic::arm_neon_vst1x2: { 4304 static const uint16_t DOpcodes[] = { ARM::VST1q8, ARM::VST1q16, 4305 ARM::VST1q32, ARM::VST1q64 }; 4306 static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudo, 4307 ARM::VST1d16QPseudo, 4308 ARM::VST1d32QPseudo, 4309 ARM::VST1d64QPseudo }; 4310 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr); 4311 return; 4312 } 4313 4314 case Intrinsic::arm_neon_vst1x3: { 4315 static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudo, 4316 ARM::VST1d16TPseudo, 4317 ARM::VST1d32TPseudo, 4318 ARM::VST1d64TPseudo }; 4319 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD, 4320 ARM::VST1q16LowTPseudo_UPD, 4321 ARM::VST1q32LowTPseudo_UPD, 4322 ARM::VST1q64LowTPseudo_UPD }; 4323 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo, 4324 ARM::VST1q16HighTPseudo, 4325 ARM::VST1q32HighTPseudo, 4326 ARM::VST1q64HighTPseudo }; 4327 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 4328 return; 4329 } 4330 4331 case Intrinsic::arm_neon_vst1x4: { 4332 static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudo, 4333 ARM::VST1d16QPseudo, 4334 ARM::VST1d32QPseudo, 4335 ARM::VST1d64QPseudo }; 4336 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD, 4337 ARM::VST1q16LowQPseudo_UPD, 4338 ARM::VST1q32LowQPseudo_UPD, 4339 ARM::VST1q64LowQPseudo_UPD }; 4340 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo, 4341 ARM::VST1q16HighQPseudo, 4342 ARM::VST1q32HighQPseudo, 4343 ARM::VST1q64HighQPseudo }; 4344 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 4345 return; 4346 } 4347 4348 case Intrinsic::arm_neon_vst2: { 4349 static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16, 4350 ARM::VST2d32, ARM::VST1q64 }; 4351 static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo, 4352 ARM::VST2q32Pseudo }; 4353 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr); 4354 return; 4355 } 4356 4357 case Intrinsic::arm_neon_vst3: { 4358 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo, 4359 ARM::VST3d16Pseudo, 4360 ARM::VST3d32Pseudo, 4361 ARM::VST1d64TPseudo }; 4362 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD, 4363 ARM::VST3q16Pseudo_UPD, 4364 ARM::VST3q32Pseudo_UPD }; 4365 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo, 4366 ARM::VST3q16oddPseudo, 4367 ARM::VST3q32oddPseudo }; 4368 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 4369 return; 4370 } 4371 4372 case Intrinsic::arm_neon_vst4: { 4373 static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo, 4374 ARM::VST4d16Pseudo, 4375 ARM::VST4d32Pseudo, 4376 ARM::VST1d64QPseudo }; 4377 static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD, 4378 ARM::VST4q16Pseudo_UPD, 4379 ARM::VST4q32Pseudo_UPD }; 4380 static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo, 4381 ARM::VST4q16oddPseudo, 4382 ARM::VST4q32oddPseudo }; 4383 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 4384 return; 4385 } 4386 4387 case Intrinsic::arm_neon_vst2lane: { 4388 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo, 4389 ARM::VST2LNd16Pseudo, 4390 ARM::VST2LNd32Pseudo }; 4391 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo, 4392 ARM::VST2LNq32Pseudo }; 4393 SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes); 4394 return; 4395 } 4396 4397 case Intrinsic::arm_neon_vst3lane: { 4398 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo, 4399 ARM::VST3LNd16Pseudo, 4400 ARM::VST3LNd32Pseudo }; 4401 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo, 4402 ARM::VST3LNq32Pseudo }; 4403 SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes); 4404 return; 4405 } 4406 4407 case Intrinsic::arm_neon_vst4lane: { 4408 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo, 4409 ARM::VST4LNd16Pseudo, 4410 ARM::VST4LNd32Pseudo }; 4411 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo, 4412 ARM::VST4LNq32Pseudo }; 4413 SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes); 4414 return; 4415 } 4416 4417 case Intrinsic::arm_mve_vldr_gather_base_wb: 4418 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated: { 4419 static const uint16_t Opcodes[] = {ARM::MVE_VLDRWU32_qi_pre, 4420 ARM::MVE_VLDRDU64_qi_pre}; 4421 SelectMVE_WB(N, Opcodes, 4422 IntNo == Intrinsic::arm_mve_vldr_gather_base_wb_predicated); 4423 return; 4424 } 4425 4426 case Intrinsic::arm_mve_vld2q: { 4427 static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8, ARM::MVE_VLD21_8}; 4428 static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16, 4429 ARM::MVE_VLD21_16}; 4430 static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32, 4431 ARM::MVE_VLD21_32}; 4432 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32}; 4433 SelectMVE_VLD(N, 2, Opcodes); 4434 return; 4435 } 4436 4437 case Intrinsic::arm_mve_vld4q: { 4438 static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8, 4439 ARM::MVE_VLD42_8, ARM::MVE_VLD43_8}; 4440 static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16, 4441 ARM::MVE_VLD42_16, 4442 ARM::MVE_VLD43_16}; 4443 static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32, 4444 ARM::MVE_VLD42_32, 4445 ARM::MVE_VLD43_32}; 4446 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32}; 4447 SelectMVE_VLD(N, 4, Opcodes); 4448 return; 4449 } 4450 } 4451 break; 4452 } 4453 4454 case ISD::INTRINSIC_WO_CHAIN: { 4455 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 4456 switch (IntNo) { 4457 default: 4458 break; 4459 4460 case Intrinsic::arm_mve_urshrl: 4461 SelectMVE_LongShift(N, ARM::MVE_URSHRL, true, false); 4462 return; 4463 case Intrinsic::arm_mve_uqshll: 4464 SelectMVE_LongShift(N, ARM::MVE_UQSHLL, true, false); 4465 return; 4466 case Intrinsic::arm_mve_srshrl: 4467 SelectMVE_LongShift(N, ARM::MVE_SRSHRL, true, false); 4468 return; 4469 case Intrinsic::arm_mve_sqshll: 4470 SelectMVE_LongShift(N, ARM::MVE_SQSHLL, true, false); 4471 return; 4472 case Intrinsic::arm_mve_uqrshll: 4473 SelectMVE_LongShift(N, ARM::MVE_UQRSHLL, false, true); 4474 return; 4475 case Intrinsic::arm_mve_sqrshrl: 4476 SelectMVE_LongShift(N, ARM::MVE_SQRSHRL, false, true); 4477 return; 4478 case Intrinsic::arm_mve_lsll: 4479 SelectMVE_LongShift(N, ARM::MVE_LSLLr, false, false); 4480 return; 4481 case Intrinsic::arm_mve_asrl: 4482 SelectMVE_LongShift(N, ARM::MVE_ASRLr, false, false); 4483 return; 4484 4485 case Intrinsic::arm_mve_vadc: 4486 case Intrinsic::arm_mve_vadc_predicated: 4487 SelectMVE_VADCSBC(N, ARM::MVE_VADC, ARM::MVE_VADCI, true, 4488 IntNo == Intrinsic::arm_mve_vadc_predicated); 4489 return; 4490 4491 case Intrinsic::arm_mve_vmlldava: 4492 case Intrinsic::arm_mve_vmlldava_predicated: { 4493 static const uint16_t OpcodesU[] = { 4494 ARM::MVE_VMLALDAVu16, ARM::MVE_VMLALDAVu32, 4495 ARM::MVE_VMLALDAVau16, ARM::MVE_VMLALDAVau32, 4496 }; 4497 static const uint16_t OpcodesS[] = { 4498 ARM::MVE_VMLALDAVs16, ARM::MVE_VMLALDAVs32, 4499 ARM::MVE_VMLALDAVas16, ARM::MVE_VMLALDAVas32, 4500 ARM::MVE_VMLALDAVxs16, ARM::MVE_VMLALDAVxs32, 4501 ARM::MVE_VMLALDAVaxs16, ARM::MVE_VMLALDAVaxs32, 4502 ARM::MVE_VMLSLDAVs16, ARM::MVE_VMLSLDAVs32, 4503 ARM::MVE_VMLSLDAVas16, ARM::MVE_VMLSLDAVas32, 4504 ARM::MVE_VMLSLDAVxs16, ARM::MVE_VMLSLDAVxs32, 4505 ARM::MVE_VMLSLDAVaxs16, ARM::MVE_VMLSLDAVaxs32, 4506 }; 4507 SelectMVE_VMLLDAV(N, IntNo == Intrinsic::arm_mve_vmlldava_predicated, 4508 OpcodesS, OpcodesU); 4509 return; 4510 } 4511 4512 case Intrinsic::arm_mve_vrmlldavha: 4513 case Intrinsic::arm_mve_vrmlldavha_predicated: { 4514 static const uint16_t OpcodesU[] = { 4515 ARM::MVE_VRMLALDAVHu32, ARM::MVE_VRMLALDAVHau32, 4516 }; 4517 static const uint16_t OpcodesS[] = { 4518 ARM::MVE_VRMLALDAVHs32, ARM::MVE_VRMLALDAVHas32, 4519 ARM::MVE_VRMLALDAVHxs32, ARM::MVE_VRMLALDAVHaxs32, 4520 ARM::MVE_VRMLSLDAVHs32, ARM::MVE_VRMLSLDAVHas32, 4521 ARM::MVE_VRMLSLDAVHxs32, ARM::MVE_VRMLSLDAVHaxs32, 4522 }; 4523 SelectMVE_VRMLLDAVH(N, IntNo == Intrinsic::arm_mve_vrmlldavha_predicated, 4524 OpcodesS, OpcodesU); 4525 return; 4526 } 4527 } 4528 break; 4529 } 4530 4531 case ISD::ATOMIC_CMP_SWAP: 4532 SelectCMP_SWAP(N); 4533 return; 4534 } 4535 4536 SelectCode(N); 4537 } 4538 4539 // Inspect a register string of the form 4540 // cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or 4541 // cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string 4542 // and obtain the integer operands from them, adding these operands to the 4543 // provided vector. 4544 static void getIntOperandsFromRegisterString(StringRef RegString, 4545 SelectionDAG *CurDAG, 4546 const SDLoc &DL, 4547 std::vector<SDValue> &Ops) { 4548 SmallVector<StringRef, 5> Fields; 4549 RegString.split(Fields, ':'); 4550 4551 if (Fields.size() > 1) { 4552 bool AllIntFields = true; 4553 4554 for (StringRef Field : Fields) { 4555 // Need to trim out leading 'cp' characters and get the integer field. 4556 unsigned IntField; 4557 AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField); 4558 Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32)); 4559 } 4560 4561 assert(AllIntFields && 4562 "Unexpected non-integer value in special register string."); 4563 } 4564 } 4565 4566 // Maps a Banked Register string to its mask value. The mask value returned is 4567 // for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register 4568 // mask operand, which expresses which register is to be used, e.g. r8, and in 4569 // which mode it is to be used, e.g. usr. Returns -1 to signify that the string 4570 // was invalid. 4571 static inline int getBankedRegisterMask(StringRef RegString) { 4572 auto TheReg = ARMBankedReg::lookupBankedRegByName(RegString.lower()); 4573 if (!TheReg) 4574 return -1; 4575 return TheReg->Encoding; 4576 } 4577 4578 // The flags here are common to those allowed for apsr in the A class cores and 4579 // those allowed for the special registers in the M class cores. Returns a 4580 // value representing which flags were present, -1 if invalid. 4581 static inline int getMClassFlagsMask(StringRef Flags) { 4582 return StringSwitch<int>(Flags) 4583 .Case("", 0x2) // no flags means nzcvq for psr registers, and 0x2 is 4584 // correct when flags are not permitted 4585 .Case("g", 0x1) 4586 .Case("nzcvq", 0x2) 4587 .Case("nzcvqg", 0x3) 4588 .Default(-1); 4589 } 4590 4591 // Maps MClass special registers string to its value for use in the 4592 // t2MRS_M/t2MSR_M instruction nodes as the SYSm value operand. 4593 // Returns -1 to signify that the string was invalid. 4594 static int getMClassRegisterMask(StringRef Reg, const ARMSubtarget *Subtarget) { 4595 auto TheReg = ARMSysReg::lookupMClassSysRegByName(Reg); 4596 const FeatureBitset &FeatureBits = Subtarget->getFeatureBits(); 4597 if (!TheReg || !TheReg->hasRequiredFeatures(FeatureBits)) 4598 return -1; 4599 return (int)(TheReg->Encoding & 0xFFF); // SYSm value 4600 } 4601 4602 static int getARClassRegisterMask(StringRef Reg, StringRef Flags) { 4603 // The mask operand contains the special register (R Bit) in bit 4, whether 4604 // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and 4605 // bits 3-0 contains the fields to be accessed in the special register, set by 4606 // the flags provided with the register. 4607 int Mask = 0; 4608 if (Reg == "apsr") { 4609 // The flags permitted for apsr are the same flags that are allowed in 4610 // M class registers. We get the flag value and then shift the flags into 4611 // the correct place to combine with the mask. 4612 Mask = getMClassFlagsMask(Flags); 4613 if (Mask == -1) 4614 return -1; 4615 return Mask << 2; 4616 } 4617 4618 if (Reg != "cpsr" && Reg != "spsr") { 4619 return -1; 4620 } 4621 4622 // This is the same as if the flags were "fc" 4623 if (Flags.empty() || Flags == "all") 4624 return Mask | 0x9; 4625 4626 // Inspect the supplied flags string and set the bits in the mask for 4627 // the relevant and valid flags allowed for cpsr and spsr. 4628 for (char Flag : Flags) { 4629 int FlagVal; 4630 switch (Flag) { 4631 case 'c': 4632 FlagVal = 0x1; 4633 break; 4634 case 'x': 4635 FlagVal = 0x2; 4636 break; 4637 case 's': 4638 FlagVal = 0x4; 4639 break; 4640 case 'f': 4641 FlagVal = 0x8; 4642 break; 4643 default: 4644 FlagVal = 0; 4645 } 4646 4647 // This avoids allowing strings where the same flag bit appears twice. 4648 if (!FlagVal || (Mask & FlagVal)) 4649 return -1; 4650 Mask |= FlagVal; 4651 } 4652 4653 // If the register is spsr then we need to set the R bit. 4654 if (Reg == "spsr") 4655 Mask |= 0x10; 4656 4657 return Mask; 4658 } 4659 4660 // Lower the read_register intrinsic to ARM specific DAG nodes 4661 // using the supplied metadata string to select the instruction node to use 4662 // and the registers/masks to construct as operands for the node. 4663 bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){ 4664 const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1)); 4665 const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0)); 4666 bool IsThumb2 = Subtarget->isThumb2(); 4667 SDLoc DL(N); 4668 4669 std::vector<SDValue> Ops; 4670 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops); 4671 4672 if (!Ops.empty()) { 4673 // If the special register string was constructed of fields (as defined 4674 // in the ACLE) then need to lower to MRC node (32 bit) or 4675 // MRRC node(64 bit), we can make the distinction based on the number of 4676 // operands we have. 4677 unsigned Opcode; 4678 SmallVector<EVT, 3> ResTypes; 4679 if (Ops.size() == 5){ 4680 Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC; 4681 ResTypes.append({ MVT::i32, MVT::Other }); 4682 } else { 4683 assert(Ops.size() == 3 && 4684 "Invalid number of fields in special register string."); 4685 Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC; 4686 ResTypes.append({ MVT::i32, MVT::i32, MVT::Other }); 4687 } 4688 4689 Ops.push_back(getAL(CurDAG, DL)); 4690 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 4691 Ops.push_back(N->getOperand(0)); 4692 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops)); 4693 return true; 4694 } 4695 4696 std::string SpecialReg = RegString->getString().lower(); 4697 4698 int BankedReg = getBankedRegisterMask(SpecialReg); 4699 if (BankedReg != -1) { 4700 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), 4701 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 4702 N->getOperand(0) }; 4703 ReplaceNode( 4704 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked, 4705 DL, MVT::i32, MVT::Other, Ops)); 4706 return true; 4707 } 4708 4709 // The VFP registers are read by creating SelectionDAG nodes with opcodes 4710 // corresponding to the register that is being read from. So we switch on the 4711 // string to find which opcode we need to use. 4712 unsigned Opcode = StringSwitch<unsigned>(SpecialReg) 4713 .Case("fpscr", ARM::VMRS) 4714 .Case("fpexc", ARM::VMRS_FPEXC) 4715 .Case("fpsid", ARM::VMRS_FPSID) 4716 .Case("mvfr0", ARM::VMRS_MVFR0) 4717 .Case("mvfr1", ARM::VMRS_MVFR1) 4718 .Case("mvfr2", ARM::VMRS_MVFR2) 4719 .Case("fpinst", ARM::VMRS_FPINST) 4720 .Case("fpinst2", ARM::VMRS_FPINST2) 4721 .Default(0); 4722 4723 // If an opcode was found then we can lower the read to a VFP instruction. 4724 if (Opcode) { 4725 if (!Subtarget->hasVFP2Base()) 4726 return false; 4727 if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8Base()) 4728 return false; 4729 4730 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 4731 N->getOperand(0) }; 4732 ReplaceNode(N, 4733 CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops)); 4734 return true; 4735 } 4736 4737 // If the target is M Class then need to validate that the register string 4738 // is an acceptable value, so check that a mask can be constructed from the 4739 // string. 4740 if (Subtarget->isMClass()) { 4741 int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget); 4742 if (SYSmValue == -1) 4743 return false; 4744 4745 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32), 4746 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 4747 N->getOperand(0) }; 4748 ReplaceNode( 4749 N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops)); 4750 return true; 4751 } 4752 4753 // Here we know the target is not M Class so we need to check if it is one 4754 // of the remaining possible values which are apsr, cpsr or spsr. 4755 if (SpecialReg == "apsr" || SpecialReg == "cpsr") { 4756 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 4757 N->getOperand(0) }; 4758 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS, 4759 DL, MVT::i32, MVT::Other, Ops)); 4760 return true; 4761 } 4762 4763 if (SpecialReg == "spsr") { 4764 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 4765 N->getOperand(0) }; 4766 ReplaceNode( 4767 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL, 4768 MVT::i32, MVT::Other, Ops)); 4769 return true; 4770 } 4771 4772 return false; 4773 } 4774 4775 // Lower the write_register intrinsic to ARM specific DAG nodes 4776 // using the supplied metadata string to select the instruction node to use 4777 // and the registers/masks to use in the nodes 4778 bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){ 4779 const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1)); 4780 const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0)); 4781 bool IsThumb2 = Subtarget->isThumb2(); 4782 SDLoc DL(N); 4783 4784 std::vector<SDValue> Ops; 4785 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops); 4786 4787 if (!Ops.empty()) { 4788 // If the special register string was constructed of fields (as defined 4789 // in the ACLE) then need to lower to MCR node (32 bit) or 4790 // MCRR node(64 bit), we can make the distinction based on the number of 4791 // operands we have. 4792 unsigned Opcode; 4793 if (Ops.size() == 5) { 4794 Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR; 4795 Ops.insert(Ops.begin()+2, N->getOperand(2)); 4796 } else { 4797 assert(Ops.size() == 3 && 4798 "Invalid number of fields in special register string."); 4799 Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR; 4800 SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) }; 4801 Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2); 4802 } 4803 4804 Ops.push_back(getAL(CurDAG, DL)); 4805 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 4806 Ops.push_back(N->getOperand(0)); 4807 4808 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops)); 4809 return true; 4810 } 4811 4812 std::string SpecialReg = RegString->getString().lower(); 4813 int BankedReg = getBankedRegisterMask(SpecialReg); 4814 if (BankedReg != -1) { 4815 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2), 4816 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 4817 N->getOperand(0) }; 4818 ReplaceNode( 4819 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked, 4820 DL, MVT::Other, Ops)); 4821 return true; 4822 } 4823 4824 // The VFP registers are written to by creating SelectionDAG nodes with 4825 // opcodes corresponding to the register that is being written. So we switch 4826 // on the string to find which opcode we need to use. 4827 unsigned Opcode = StringSwitch<unsigned>(SpecialReg) 4828 .Case("fpscr", ARM::VMSR) 4829 .Case("fpexc", ARM::VMSR_FPEXC) 4830 .Case("fpsid", ARM::VMSR_FPSID) 4831 .Case("fpinst", ARM::VMSR_FPINST) 4832 .Case("fpinst2", ARM::VMSR_FPINST2) 4833 .Default(0); 4834 4835 if (Opcode) { 4836 if (!Subtarget->hasVFP2Base()) 4837 return false; 4838 Ops = { N->getOperand(2), getAL(CurDAG, DL), 4839 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) }; 4840 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops)); 4841 return true; 4842 } 4843 4844 std::pair<StringRef, StringRef> Fields; 4845 Fields = StringRef(SpecialReg).rsplit('_'); 4846 std::string Reg = Fields.first.str(); 4847 StringRef Flags = Fields.second; 4848 4849 // If the target was M Class then need to validate the special register value 4850 // and retrieve the mask for use in the instruction node. 4851 if (Subtarget->isMClass()) { 4852 int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget); 4853 if (SYSmValue == -1) 4854 return false; 4855 4856 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32), 4857 N->getOperand(2), getAL(CurDAG, DL), 4858 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) }; 4859 ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops)); 4860 return true; 4861 } 4862 4863 // We then check to see if a valid mask can be constructed for one of the 4864 // register string values permitted for the A and R class cores. These values 4865 // are apsr, spsr and cpsr; these are also valid on older cores. 4866 int Mask = getARClassRegisterMask(Reg, Flags); 4867 if (Mask != -1) { 4868 Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2), 4869 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 4870 N->getOperand(0) }; 4871 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR, 4872 DL, MVT::Other, Ops)); 4873 return true; 4874 } 4875 4876 return false; 4877 } 4878 4879 bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){ 4880 std::vector<SDValue> AsmNodeOperands; 4881 unsigned Flag, Kind; 4882 bool Changed = false; 4883 unsigned NumOps = N->getNumOperands(); 4884 4885 // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint. 4886 // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require 4887 // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs 4888 // respectively. Since there is no constraint to explicitly specify a 4889 // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb, 4890 // the 64-bit data may be referred by H, Q, R modifiers, so we still pack 4891 // them into a GPRPair. 4892 4893 SDLoc dl(N); 4894 SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1) 4895 : SDValue(nullptr,0); 4896 4897 SmallVector<bool, 8> OpChanged; 4898 // Glue node will be appended late. 4899 for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) { 4900 SDValue op = N->getOperand(i); 4901 AsmNodeOperands.push_back(op); 4902 4903 if (i < InlineAsm::Op_FirstOperand) 4904 continue; 4905 4906 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) { 4907 Flag = C->getZExtValue(); 4908 Kind = InlineAsm::getKind(Flag); 4909 } 4910 else 4911 continue; 4912 4913 // Immediate operands to inline asm in the SelectionDAG are modeled with 4914 // two operands. The first is a constant of value InlineAsm::Kind_Imm, and 4915 // the second is a constant with the value of the immediate. If we get here 4916 // and we have a Kind_Imm, skip the next operand, and continue. 4917 if (Kind == InlineAsm::Kind_Imm) { 4918 SDValue op = N->getOperand(++i); 4919 AsmNodeOperands.push_back(op); 4920 continue; 4921 } 4922 4923 unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag); 4924 if (NumRegs) 4925 OpChanged.push_back(false); 4926 4927 unsigned DefIdx = 0; 4928 bool IsTiedToChangedOp = false; 4929 // If it's a use that is tied with a previous def, it has no 4930 // reg class constraint. 4931 if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx)) 4932 IsTiedToChangedOp = OpChanged[DefIdx]; 4933 4934 // Memory operands to inline asm in the SelectionDAG are modeled with two 4935 // operands: a constant of value InlineAsm::Kind_Mem followed by the input 4936 // operand. If we get here and we have a Kind_Mem, skip the next operand (so 4937 // it doesn't get misinterpreted), and continue. We do this here because 4938 // it's important to update the OpChanged array correctly before moving on. 4939 if (Kind == InlineAsm::Kind_Mem) { 4940 SDValue op = N->getOperand(++i); 4941 AsmNodeOperands.push_back(op); 4942 continue; 4943 } 4944 4945 if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef 4946 && Kind != InlineAsm::Kind_RegDefEarlyClobber) 4947 continue; 4948 4949 unsigned RC; 4950 bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC); 4951 if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID)) 4952 || NumRegs != 2) 4953 continue; 4954 4955 assert((i+2 < NumOps) && "Invalid number of operands in inline asm"); 4956 SDValue V0 = N->getOperand(i+1); 4957 SDValue V1 = N->getOperand(i+2); 4958 unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg(); 4959 unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg(); 4960 SDValue PairedReg; 4961 MachineRegisterInfo &MRI = MF->getRegInfo(); 4962 4963 if (Kind == InlineAsm::Kind_RegDef || 4964 Kind == InlineAsm::Kind_RegDefEarlyClobber) { 4965 // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to 4966 // the original GPRs. 4967 4968 Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass); 4969 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped); 4970 SDValue Chain = SDValue(N,0); 4971 4972 SDNode *GU = N->getGluedUser(); 4973 SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped, 4974 Chain.getValue(1)); 4975 4976 // Extract values from a GPRPair reg and copy to the original GPR reg. 4977 SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32, 4978 RegCopy); 4979 SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32, 4980 RegCopy); 4981 SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0, 4982 RegCopy.getValue(1)); 4983 SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1)); 4984 4985 // Update the original glue user. 4986 std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1); 4987 Ops.push_back(T1.getValue(1)); 4988 CurDAG->UpdateNodeOperands(GU, Ops); 4989 } 4990 else { 4991 // For Kind == InlineAsm::Kind_RegUse, we first copy two GPRs into a 4992 // GPRPair and then pass the GPRPair to the inline asm. 4993 SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain]; 4994 4995 // As REG_SEQ doesn't take RegisterSDNode, we copy them first. 4996 SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32, 4997 Chain.getValue(1)); 4998 SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32, 4999 T0.getValue(1)); 5000 SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0); 5001 5002 // Copy REG_SEQ into a GPRPair-typed VR and replace the original two 5003 // i32 VRs of inline asm with it. 5004 Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass); 5005 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped); 5006 Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1)); 5007 5008 AsmNodeOperands[InlineAsm::Op_InputChain] = Chain; 5009 Glue = Chain.getValue(1); 5010 } 5011 5012 Changed = true; 5013 5014 if(PairedReg.getNode()) { 5015 OpChanged[OpChanged.size() -1 ] = true; 5016 Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/); 5017 if (IsTiedToChangedOp) 5018 Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx); 5019 else 5020 Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID); 5021 // Replace the current flag. 5022 AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant( 5023 Flag, dl, MVT::i32); 5024 // Add the new register node and skip the original two GPRs. 5025 AsmNodeOperands.push_back(PairedReg); 5026 // Skip the next two GPRs. 5027 i += 2; 5028 } 5029 } 5030 5031 if (Glue.getNode()) 5032 AsmNodeOperands.push_back(Glue); 5033 if (!Changed) 5034 return false; 5035 5036 SDValue New = CurDAG->getNode(N->getOpcode(), SDLoc(N), 5037 CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands); 5038 New->setNodeId(-1); 5039 ReplaceNode(N, New.getNode()); 5040 return true; 5041 } 5042 5043 5044 bool ARMDAGToDAGISel:: 5045 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, 5046 std::vector<SDValue> &OutOps) { 5047 switch(ConstraintID) { 5048 default: 5049 llvm_unreachable("Unexpected asm memory constraint"); 5050 case InlineAsm::Constraint_m: 5051 case InlineAsm::Constraint_o: 5052 case InlineAsm::Constraint_Q: 5053 case InlineAsm::Constraint_Um: 5054 case InlineAsm::Constraint_Un: 5055 case InlineAsm::Constraint_Uq: 5056 case InlineAsm::Constraint_Us: 5057 case InlineAsm::Constraint_Ut: 5058 case InlineAsm::Constraint_Uv: 5059 case InlineAsm::Constraint_Uy: 5060 // Require the address to be in a register. That is safe for all ARM 5061 // variants and it is hard to do anything much smarter without knowing 5062 // how the operand is used. 5063 OutOps.push_back(Op); 5064 return false; 5065 } 5066 return true; 5067 } 5068 5069 /// createARMISelDag - This pass converts a legalized DAG into a 5070 /// ARM-specific DAG, ready for instruction scheduling. 5071 /// 5072 FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM, 5073 CodeGenOpt::Level OptLevel) { 5074 return new ARMDAGToDAGISel(TM, OptLevel); 5075 } 5076