1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines an instruction selector for the ARM target. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "ARM.h" 14 #include "ARMBaseInstrInfo.h" 15 #include "ARMTargetMachine.h" 16 #include "MCTargetDesc/ARMAddressingModes.h" 17 #include "Utils/ARMBaseInfo.h" 18 #include "llvm/ADT/APSInt.h" 19 #include "llvm/ADT/StringSwitch.h" 20 #include "llvm/CodeGen/MachineFrameInfo.h" 21 #include "llvm/CodeGen/MachineFunction.h" 22 #include "llvm/CodeGen/MachineInstrBuilder.h" 23 #include "llvm/CodeGen/MachineRegisterInfo.h" 24 #include "llvm/CodeGen/SelectionDAG.h" 25 #include "llvm/CodeGen/SelectionDAGISel.h" 26 #include "llvm/CodeGen/TargetLowering.h" 27 #include "llvm/IR/CallingConv.h" 28 #include "llvm/IR/Constants.h" 29 #include "llvm/IR/DerivedTypes.h" 30 #include "llvm/IR/Function.h" 31 #include "llvm/IR/Intrinsics.h" 32 #include "llvm/IR/IntrinsicsARM.h" 33 #include "llvm/IR/LLVMContext.h" 34 #include "llvm/Support/CommandLine.h" 35 #include "llvm/Support/Debug.h" 36 #include "llvm/Support/ErrorHandling.h" 37 #include "llvm/Target/TargetOptions.h" 38 39 using namespace llvm; 40 41 #define DEBUG_TYPE "arm-isel" 42 43 static cl::opt<bool> 44 DisableShifterOp("disable-shifter-op", cl::Hidden, 45 cl::desc("Disable isel of shifter-op"), 46 cl::init(false)); 47 48 //===--------------------------------------------------------------------===// 49 /// ARMDAGToDAGISel - ARM specific code to select ARM machine 50 /// instructions for SelectionDAG operations. 51 /// 52 namespace { 53 54 class ARMDAGToDAGISel : public SelectionDAGISel { 55 /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can 56 /// make the right decision when generating code for different targets. 57 const ARMSubtarget *Subtarget; 58 59 public: 60 explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel) 61 : SelectionDAGISel(tm, OptLevel) {} 62 63 bool runOnMachineFunction(MachineFunction &MF) override { 64 // Reset the subtarget each time through. 65 Subtarget = &MF.getSubtarget<ARMSubtarget>(); 66 SelectionDAGISel::runOnMachineFunction(MF); 67 return true; 68 } 69 70 StringRef getPassName() const override { return "ARM Instruction Selection"; } 71 72 void PreprocessISelDAG() override; 73 74 /// getI32Imm - Return a target constant of type i32 with the specified 75 /// value. 76 inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) { 77 return CurDAG->getTargetConstant(Imm, dl, MVT::i32); 78 } 79 80 void Select(SDNode *N) override; 81 82 /// Return true as some complex patterns, like those that call 83 /// canExtractShiftFromMul can modify the DAG inplace. 84 bool ComplexPatternFuncMutatesDAG() const override { return true; } 85 86 bool hasNoVMLxHazardUse(SDNode *N) const; 87 bool isShifterOpProfitable(const SDValue &Shift, 88 ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt); 89 bool SelectRegShifterOperand(SDValue N, SDValue &A, 90 SDValue &B, SDValue &C, 91 bool CheckProfitability = true); 92 bool SelectImmShifterOperand(SDValue N, SDValue &A, 93 SDValue &B, bool CheckProfitability = true); 94 bool SelectShiftRegShifterOperand(SDValue N, SDValue &A, SDValue &B, 95 SDValue &C) { 96 // Don't apply the profitability check 97 return SelectRegShifterOperand(N, A, B, C, false); 98 } 99 bool SelectShiftImmShifterOperand(SDValue N, SDValue &A, SDValue &B) { 100 // Don't apply the profitability check 101 return SelectImmShifterOperand(N, A, B, false); 102 } 103 bool SelectShiftImmShifterOperandOneUse(SDValue N, SDValue &A, SDValue &B) { 104 if (!N.hasOneUse()) 105 return false; 106 return SelectImmShifterOperand(N, A, B, false); 107 } 108 109 bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out); 110 111 bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); 112 bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc); 113 114 bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) { 115 const ConstantSDNode *CN = cast<ConstantSDNode>(N); 116 Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32); 117 Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32); 118 return true; 119 } 120 121 bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N, 122 SDValue &Offset, SDValue &Opc); 123 bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N, 124 SDValue &Offset, SDValue &Opc); 125 bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N, 126 SDValue &Offset, SDValue &Opc); 127 bool SelectAddrOffsetNone(SDValue N, SDValue &Base); 128 bool SelectAddrMode3(SDValue N, SDValue &Base, 129 SDValue &Offset, SDValue &Opc); 130 bool SelectAddrMode3Offset(SDNode *Op, SDValue N, 131 SDValue &Offset, SDValue &Opc); 132 bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, bool FP16); 133 bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset); 134 bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset); 135 bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align); 136 bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset); 137 138 bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label); 139 140 // Thumb Addressing Modes: 141 bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset); 142 bool SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, SDValue &Offset); 143 bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base, 144 SDValue &OffImm); 145 bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base, 146 SDValue &OffImm); 147 bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base, 148 SDValue &OffImm); 149 bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base, 150 SDValue &OffImm); 151 bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm); 152 template <unsigned Shift> 153 bool SelectTAddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm); 154 155 // Thumb 2 Addressing Modes: 156 bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); 157 template <unsigned Shift> 158 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, SDValue &OffImm); 159 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, 160 SDValue &OffImm); 161 bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N, 162 SDValue &OffImm); 163 template <unsigned Shift> 164 bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm); 165 bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm, 166 unsigned Shift); 167 template <unsigned Shift> 168 bool SelectT2AddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm); 169 bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base, 170 SDValue &OffReg, SDValue &ShImm); 171 bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm); 172 173 template<int Min, int Max> 174 bool SelectImmediateInRange(SDValue N, SDValue &OffImm); 175 176 inline bool is_so_imm(unsigned Imm) const { 177 return ARM_AM::getSOImmVal(Imm) != -1; 178 } 179 180 inline bool is_so_imm_not(unsigned Imm) const { 181 return ARM_AM::getSOImmVal(~Imm) != -1; 182 } 183 184 inline bool is_t2_so_imm(unsigned Imm) const { 185 return ARM_AM::getT2SOImmVal(Imm) != -1; 186 } 187 188 inline bool is_t2_so_imm_not(unsigned Imm) const { 189 return ARM_AM::getT2SOImmVal(~Imm) != -1; 190 } 191 192 // Include the pieces autogenerated from the target description. 193 #include "ARMGenDAGISel.inc" 194 195 private: 196 void transferMemOperands(SDNode *Src, SDNode *Dst); 197 198 /// Indexed (pre/post inc/dec) load matching code for ARM. 199 bool tryARMIndexedLoad(SDNode *N); 200 bool tryT1IndexedLoad(SDNode *N); 201 bool tryT2IndexedLoad(SDNode *N); 202 bool tryMVEIndexedLoad(SDNode *N); 203 bool tryFMULFixed(SDNode *N, SDLoc dl); 204 bool tryFP_TO_INT(SDNode *N, SDLoc dl); 205 bool transformFixedFloatingPointConversion(SDNode *N, SDNode *FMul, 206 bool IsUnsigned, 207 bool FixedToFloat); 208 209 /// SelectVLD - Select NEON load intrinsics. NumVecs should be 210 /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for 211 /// loads of D registers and even subregs and odd subregs of Q registers. 212 /// For NumVecs <= 2, QOpcodes1 is not used. 213 void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, 214 const uint16_t *DOpcodes, const uint16_t *QOpcodes0, 215 const uint16_t *QOpcodes1); 216 217 /// SelectVST - Select NEON store intrinsics. NumVecs should 218 /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for 219 /// stores of D registers and even subregs and odd subregs of Q registers. 220 /// For NumVecs <= 2, QOpcodes1 is not used. 221 void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, 222 const uint16_t *DOpcodes, const uint16_t *QOpcodes0, 223 const uint16_t *QOpcodes1); 224 225 /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should 226 /// be 2, 3 or 4. The opcode arrays specify the instructions used for 227 /// load/store of D registers and Q registers. 228 void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating, 229 unsigned NumVecs, const uint16_t *DOpcodes, 230 const uint16_t *QOpcodes); 231 232 /// Helper functions for setting up clusters of MVE predication operands. 233 template <typename SDValueVector> 234 void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 235 SDValue PredicateMask); 236 template <typename SDValueVector> 237 void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 238 SDValue PredicateMask, SDValue Inactive); 239 240 template <typename SDValueVector> 241 void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc); 242 template <typename SDValueVector> 243 void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, EVT InactiveTy); 244 245 /// SelectMVE_WB - Select MVE writeback load/store intrinsics. 246 void SelectMVE_WB(SDNode *N, const uint16_t *Opcodes, bool Predicated); 247 248 /// SelectMVE_LongShift - Select MVE 64-bit scalar shift intrinsics. 249 void SelectMVE_LongShift(SDNode *N, uint16_t Opcode, bool Immediate, 250 bool HasSaturationOperand); 251 252 /// SelectMVE_VADCSBC - Select MVE vector add/sub-with-carry intrinsics. 253 void SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry, 254 uint16_t OpcodeWithNoCarry, bool Add, bool Predicated); 255 256 /// SelectMVE_VSHLC - Select MVE intrinsics for a shift that carries between 257 /// vector lanes. 258 void SelectMVE_VSHLC(SDNode *N, bool Predicated); 259 260 /// Select long MVE vector reductions with two vector operands 261 /// Stride is the number of vector element widths the instruction can operate 262 /// on: 263 /// 2 for long non-rounding variants, vml{a,s}ldav[a][x]: [i16, i32] 264 /// 1 for long rounding variants: vrml{a,s}ldavh[a][x]: [i32] 265 /// Stride is used when addressing the OpcodesS array which contains multiple 266 /// opcodes for each element width. 267 /// TySize is the index into the list of element types listed above 268 void SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated, 269 const uint16_t *OpcodesS, const uint16_t *OpcodesU, 270 size_t Stride, size_t TySize); 271 272 /// Select a 64-bit MVE vector reduction with two vector operands 273 /// arm_mve_vmlldava_[predicated] 274 void SelectMVE_VMLLDAV(SDNode *N, bool Predicated, const uint16_t *OpcodesS, 275 const uint16_t *OpcodesU); 276 /// Select a 72-bit MVE vector rounding reduction with two vector operands 277 /// int_arm_mve_vrmlldavha[_predicated] 278 void SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated, const uint16_t *OpcodesS, 279 const uint16_t *OpcodesU); 280 281 /// SelectMVE_VLD - Select MVE interleaving load intrinsics. NumVecs 282 /// should be 2 or 4. The opcode array specifies the instructions 283 /// used for 8, 16 and 32-bit lane sizes respectively, and each 284 /// pointer points to a set of NumVecs sub-opcodes used for the 285 /// different stages (e.g. VLD20 versus VLD21) of each load family. 286 void SelectMVE_VLD(SDNode *N, unsigned NumVecs, 287 const uint16_t *const *Opcodes, bool HasWriteback); 288 289 /// SelectMVE_VxDUP - Select MVE incrementing-dup instructions. Opcodes is an 290 /// array of 3 elements for the 8, 16 and 32-bit lane sizes. 291 void SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes, 292 bool Wrapping, bool Predicated); 293 294 /// Select SelectCDE_CXxD - Select CDE dual-GPR instruction (one of CX1D, 295 /// CX1DA, CX2D, CX2DA, CX3, CX3DA). 296 /// \arg \c NumExtraOps number of extra operands besides the coprocossor, 297 /// the accumulator and the immediate operand, i.e. 0 298 /// for CX1*, 1 for CX2*, 2 for CX3* 299 /// \arg \c HasAccum whether the instruction has an accumulator operand 300 void SelectCDE_CXxD(SDNode *N, uint16_t Opcode, size_t NumExtraOps, 301 bool HasAccum); 302 303 /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs 304 /// should be 1, 2, 3 or 4. The opcode array specifies the instructions used 305 /// for loading D registers. 306 void SelectVLDDup(SDNode *N, bool IsIntrinsic, bool isUpdating, 307 unsigned NumVecs, const uint16_t *DOpcodes, 308 const uint16_t *QOpcodes0 = nullptr, 309 const uint16_t *QOpcodes1 = nullptr); 310 311 /// Try to select SBFX/UBFX instructions for ARM. 312 bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned); 313 314 bool tryInsertVectorElt(SDNode *N); 315 316 // Select special operations if node forms integer ABS pattern 317 bool tryABSOp(SDNode *N); 318 319 bool tryReadRegister(SDNode *N); 320 bool tryWriteRegister(SDNode *N); 321 322 bool tryInlineAsm(SDNode *N); 323 324 void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI); 325 326 void SelectCMP_SWAP(SDNode *N); 327 328 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 329 /// inline asm expressions. 330 bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, 331 std::vector<SDValue> &OutOps) override; 332 333 // Form pairs of consecutive R, S, D, or Q registers. 334 SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1); 335 SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1); 336 SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1); 337 SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1); 338 339 // Form sequences of 4 consecutive S, D, or Q registers. 340 SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 341 SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 342 SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 343 344 // Get the alignment operand for a NEON VLD or VST instruction. 345 SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs, 346 bool is64BitVector); 347 348 /// Checks if N is a multiplication by a constant where we can extract out a 349 /// power of two from the constant so that it can be used in a shift, but only 350 /// if it simplifies the materialization of the constant. Returns true if it 351 /// is, and assigns to PowerOfTwo the power of two that should be extracted 352 /// out and to NewMulConst the new constant to be multiplied by. 353 bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift, 354 unsigned &PowerOfTwo, SDValue &NewMulConst) const; 355 356 /// Replace N with M in CurDAG, in a way that also ensures that M gets 357 /// selected when N would have been selected. 358 void replaceDAGValue(const SDValue &N, SDValue M); 359 }; 360 } 361 362 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant 363 /// operand. If so Imm will receive the 32-bit value. 364 static bool isInt32Immediate(SDNode *N, unsigned &Imm) { 365 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) { 366 Imm = cast<ConstantSDNode>(N)->getZExtValue(); 367 return true; 368 } 369 return false; 370 } 371 372 // isInt32Immediate - This method tests to see if a constant operand. 373 // If so Imm will receive the 32 bit value. 374 static bool isInt32Immediate(SDValue N, unsigned &Imm) { 375 return isInt32Immediate(N.getNode(), Imm); 376 } 377 378 // isOpcWithIntImmediate - This method tests to see if the node is a specific 379 // opcode and that it has a immediate integer right operand. 380 // If so Imm will receive the 32 bit value. 381 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) { 382 return N->getOpcode() == Opc && 383 isInt32Immediate(N->getOperand(1).getNode(), Imm); 384 } 385 386 /// Check whether a particular node is a constant value representable as 387 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax). 388 /// 389 /// \param ScaledConstant [out] - On success, the pre-scaled constant value. 390 static bool isScaledConstantInRange(SDValue Node, int Scale, 391 int RangeMin, int RangeMax, 392 int &ScaledConstant) { 393 assert(Scale > 0 && "Invalid scale!"); 394 395 // Check that this is a constant. 396 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node); 397 if (!C) 398 return false; 399 400 ScaledConstant = (int) C->getZExtValue(); 401 if ((ScaledConstant % Scale) != 0) 402 return false; 403 404 ScaledConstant /= Scale; 405 return ScaledConstant >= RangeMin && ScaledConstant < RangeMax; 406 } 407 408 void ARMDAGToDAGISel::PreprocessISelDAG() { 409 if (!Subtarget->hasV6T2Ops()) 410 return; 411 412 bool isThumb2 = Subtarget->isThumb(); 413 for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), 414 E = CurDAG->allnodes_end(); I != E; ) { 415 SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues. 416 417 if (N->getOpcode() != ISD::ADD) 418 continue; 419 420 // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with 421 // leading zeros, followed by consecutive set bits, followed by 1 or 2 422 // trailing zeros, e.g. 1020. 423 // Transform the expression to 424 // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number 425 // of trailing zeros of c2. The left shift would be folded as an shifter 426 // operand of 'add' and the 'and' and 'srl' would become a bits extraction 427 // node (UBFX). 428 429 SDValue N0 = N->getOperand(0); 430 SDValue N1 = N->getOperand(1); 431 unsigned And_imm = 0; 432 if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) { 433 if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm)) 434 std::swap(N0, N1); 435 } 436 if (!And_imm) 437 continue; 438 439 // Check if the AND mask is an immediate of the form: 000.....1111111100 440 unsigned TZ = countTrailingZeros(And_imm); 441 if (TZ != 1 && TZ != 2) 442 // Be conservative here. Shifter operands aren't always free. e.g. On 443 // Swift, left shifter operand of 1 / 2 for free but others are not. 444 // e.g. 445 // ubfx r3, r1, #16, #8 446 // ldr.w r3, [r0, r3, lsl #2] 447 // vs. 448 // mov.w r9, #1020 449 // and.w r2, r9, r1, lsr #14 450 // ldr r2, [r0, r2] 451 continue; 452 And_imm >>= TZ; 453 if (And_imm & (And_imm + 1)) 454 continue; 455 456 // Look for (and (srl X, c1), c2). 457 SDValue Srl = N1.getOperand(0); 458 unsigned Srl_imm = 0; 459 if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) || 460 (Srl_imm <= 2)) 461 continue; 462 463 // Make sure first operand is not a shifter operand which would prevent 464 // folding of the left shift. 465 SDValue CPTmp0; 466 SDValue CPTmp1; 467 SDValue CPTmp2; 468 if (isThumb2) { 469 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1)) 470 continue; 471 } else { 472 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) || 473 SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2)) 474 continue; 475 } 476 477 // Now make the transformation. 478 Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32, 479 Srl.getOperand(0), 480 CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl), 481 MVT::i32)); 482 N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32, 483 Srl, 484 CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32)); 485 N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32, 486 N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32)); 487 CurDAG->UpdateNodeOperands(N, N0, N1); 488 } 489 } 490 491 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS 492 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at 493 /// least on current ARM implementations) which should be avoidded. 494 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const { 495 if (OptLevel == CodeGenOpt::None) 496 return true; 497 498 if (!Subtarget->hasVMLxHazards()) 499 return true; 500 501 if (!N->hasOneUse()) 502 return false; 503 504 SDNode *Use = *N->use_begin(); 505 if (Use->getOpcode() == ISD::CopyToReg) 506 return true; 507 if (Use->isMachineOpcode()) { 508 const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>( 509 CurDAG->getSubtarget().getInstrInfo()); 510 511 const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode()); 512 if (MCID.mayStore()) 513 return true; 514 unsigned Opcode = MCID.getOpcode(); 515 if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD) 516 return true; 517 // vmlx feeding into another vmlx. We actually want to unfold 518 // the use later in the MLxExpansion pass. e.g. 519 // vmla 520 // vmla (stall 8 cycles) 521 // 522 // vmul (5 cycles) 523 // vadd (5 cycles) 524 // vmla 525 // This adds up to about 18 - 19 cycles. 526 // 527 // vmla 528 // vmul (stall 4 cycles) 529 // vadd adds up to about 14 cycles. 530 return TII->isFpMLxInstruction(Opcode); 531 } 532 533 return false; 534 } 535 536 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift, 537 ARM_AM::ShiftOpc ShOpcVal, 538 unsigned ShAmt) { 539 if (!Subtarget->isLikeA9() && !Subtarget->isSwift()) 540 return true; 541 if (Shift.hasOneUse()) 542 return true; 543 // R << 2 is free. 544 return ShOpcVal == ARM_AM::lsl && 545 (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1)); 546 } 547 548 bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N, 549 unsigned MaxShift, 550 unsigned &PowerOfTwo, 551 SDValue &NewMulConst) const { 552 assert(N.getOpcode() == ISD::MUL); 553 assert(MaxShift > 0); 554 555 // If the multiply is used in more than one place then changing the constant 556 // will make other uses incorrect, so don't. 557 if (!N.hasOneUse()) return false; 558 // Check if the multiply is by a constant 559 ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1)); 560 if (!MulConst) return false; 561 // If the constant is used in more than one place then modifying it will mean 562 // we need to materialize two constants instead of one, which is a bad idea. 563 if (!MulConst->hasOneUse()) return false; 564 unsigned MulConstVal = MulConst->getZExtValue(); 565 if (MulConstVal == 0) return false; 566 567 // Find the largest power of 2 that MulConstVal is a multiple of 568 PowerOfTwo = MaxShift; 569 while ((MulConstVal % (1 << PowerOfTwo)) != 0) { 570 --PowerOfTwo; 571 if (PowerOfTwo == 0) return false; 572 } 573 574 // Only optimise if the new cost is better 575 unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo); 576 NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32); 577 unsigned OldCost = ConstantMaterializationCost(MulConstVal, Subtarget); 578 unsigned NewCost = ConstantMaterializationCost(NewMulConstVal, Subtarget); 579 return NewCost < OldCost; 580 } 581 582 void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) { 583 CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode()); 584 ReplaceUses(N, M); 585 } 586 587 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N, 588 SDValue &BaseReg, 589 SDValue &Opc, 590 bool CheckProfitability) { 591 if (DisableShifterOp) 592 return false; 593 594 // If N is a multiply-by-constant and it's profitable to extract a shift and 595 // use it in a shifted operand do so. 596 if (N.getOpcode() == ISD::MUL) { 597 unsigned PowerOfTwo = 0; 598 SDValue NewMulConst; 599 if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) { 600 HandleSDNode Handle(N); 601 SDLoc Loc(N); 602 replaceDAGValue(N.getOperand(1), NewMulConst); 603 BaseReg = Handle.getValue(); 604 Opc = CurDAG->getTargetConstant( 605 ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32); 606 return true; 607 } 608 } 609 610 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 611 612 // Don't match base register only case. That is matched to a separate 613 // lower complexity pattern with explicit register operand. 614 if (ShOpcVal == ARM_AM::no_shift) return false; 615 616 BaseReg = N.getOperand(0); 617 unsigned ShImmVal = 0; 618 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 619 if (!RHS) return false; 620 ShImmVal = RHS->getZExtValue() & 31; 621 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), 622 SDLoc(N), MVT::i32); 623 return true; 624 } 625 626 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N, 627 SDValue &BaseReg, 628 SDValue &ShReg, 629 SDValue &Opc, 630 bool CheckProfitability) { 631 if (DisableShifterOp) 632 return false; 633 634 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 635 636 // Don't match base register only case. That is matched to a separate 637 // lower complexity pattern with explicit register operand. 638 if (ShOpcVal == ARM_AM::no_shift) return false; 639 640 BaseReg = N.getOperand(0); 641 unsigned ShImmVal = 0; 642 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 643 if (RHS) return false; 644 645 ShReg = N.getOperand(1); 646 if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal)) 647 return false; 648 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), 649 SDLoc(N), MVT::i32); 650 return true; 651 } 652 653 // Determine whether an ISD::OR's operands are suitable to turn the operation 654 // into an addition, which often has more compact encodings. 655 bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) { 656 assert(Parent->getOpcode() == ISD::OR && "unexpected parent"); 657 Out = N; 658 return CurDAG->haveNoCommonBitsSet(N, Parent->getOperand(1)); 659 } 660 661 662 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N, 663 SDValue &Base, 664 SDValue &OffImm) { 665 // Match simple R + imm12 operands. 666 667 // Base only. 668 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 669 !CurDAG->isBaseWithConstantOffset(N)) { 670 if (N.getOpcode() == ISD::FrameIndex) { 671 // Match frame index. 672 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 673 Base = CurDAG->getTargetFrameIndex( 674 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 675 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 676 return true; 677 } 678 679 if (N.getOpcode() == ARMISD::Wrapper && 680 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 681 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 682 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 683 Base = N.getOperand(0); 684 } else 685 Base = N; 686 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 687 return true; 688 } 689 690 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 691 int RHSC = (int)RHS->getSExtValue(); 692 if (N.getOpcode() == ISD::SUB) 693 RHSC = -RHSC; 694 695 if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits 696 Base = N.getOperand(0); 697 if (Base.getOpcode() == ISD::FrameIndex) { 698 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 699 Base = CurDAG->getTargetFrameIndex( 700 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 701 } 702 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 703 return true; 704 } 705 } 706 707 // Base only. 708 Base = N; 709 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 710 return true; 711 } 712 713 714 715 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, 716 SDValue &Opc) { 717 if (N.getOpcode() == ISD::MUL && 718 ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) { 719 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 720 // X * [3,5,9] -> X + X * [2,4,8] etc. 721 int RHSC = (int)RHS->getZExtValue(); 722 if (RHSC & 1) { 723 RHSC = RHSC & ~1; 724 ARM_AM::AddrOpc AddSub = ARM_AM::add; 725 if (RHSC < 0) { 726 AddSub = ARM_AM::sub; 727 RHSC = - RHSC; 728 } 729 if (isPowerOf2_32(RHSC)) { 730 unsigned ShAmt = Log2_32(RHSC); 731 Base = Offset = N.getOperand(0); 732 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, 733 ARM_AM::lsl), 734 SDLoc(N), MVT::i32); 735 return true; 736 } 737 } 738 } 739 } 740 741 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 742 // ISD::OR that is equivalent to an ISD::ADD. 743 !CurDAG->isBaseWithConstantOffset(N)) 744 return false; 745 746 // Leave simple R +/- imm12 operands for LDRi12 747 if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) { 748 int RHSC; 749 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1, 750 -0x1000+1, 0x1000, RHSC)) // 12 bits. 751 return false; 752 } 753 754 // Otherwise this is R +/- [possibly shifted] R. 755 ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add; 756 ARM_AM::ShiftOpc ShOpcVal = 757 ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode()); 758 unsigned ShAmt = 0; 759 760 Base = N.getOperand(0); 761 Offset = N.getOperand(1); 762 763 if (ShOpcVal != ARM_AM::no_shift) { 764 // Check to see if the RHS of the shift is a constant, if not, we can't fold 765 // it. 766 if (ConstantSDNode *Sh = 767 dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) { 768 ShAmt = Sh->getZExtValue(); 769 if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt)) 770 Offset = N.getOperand(1).getOperand(0); 771 else { 772 ShAmt = 0; 773 ShOpcVal = ARM_AM::no_shift; 774 } 775 } else { 776 ShOpcVal = ARM_AM::no_shift; 777 } 778 } 779 780 // Try matching (R shl C) + (R). 781 if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift && 782 !(Subtarget->isLikeA9() || Subtarget->isSwift() || 783 N.getOperand(0).hasOneUse())) { 784 ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode()); 785 if (ShOpcVal != ARM_AM::no_shift) { 786 // Check to see if the RHS of the shift is a constant, if not, we can't 787 // fold it. 788 if (ConstantSDNode *Sh = 789 dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) { 790 ShAmt = Sh->getZExtValue(); 791 if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) { 792 Offset = N.getOperand(0).getOperand(0); 793 Base = N.getOperand(1); 794 } else { 795 ShAmt = 0; 796 ShOpcVal = ARM_AM::no_shift; 797 } 798 } else { 799 ShOpcVal = ARM_AM::no_shift; 800 } 801 } 802 } 803 804 // If Offset is a multiply-by-constant and it's profitable to extract a shift 805 // and use it in a shifted operand do so. 806 if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) { 807 unsigned PowerOfTwo = 0; 808 SDValue NewMulConst; 809 if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) { 810 HandleSDNode Handle(Offset); 811 replaceDAGValue(Offset.getOperand(1), NewMulConst); 812 Offset = Handle.getValue(); 813 ShAmt = PowerOfTwo; 814 ShOpcVal = ARM_AM::lsl; 815 } 816 } 817 818 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal), 819 SDLoc(N), MVT::i32); 820 return true; 821 } 822 823 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N, 824 SDValue &Offset, SDValue &Opc) { 825 unsigned Opcode = Op->getOpcode(); 826 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 827 ? cast<LoadSDNode>(Op)->getAddressingMode() 828 : cast<StoreSDNode>(Op)->getAddressingMode(); 829 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 830 ? ARM_AM::add : ARM_AM::sub; 831 int Val; 832 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) 833 return false; 834 835 Offset = N; 836 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 837 unsigned ShAmt = 0; 838 if (ShOpcVal != ARM_AM::no_shift) { 839 // Check to see if the RHS of the shift is a constant, if not, we can't fold 840 // it. 841 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 842 ShAmt = Sh->getZExtValue(); 843 if (isShifterOpProfitable(N, ShOpcVal, ShAmt)) 844 Offset = N.getOperand(0); 845 else { 846 ShAmt = 0; 847 ShOpcVal = ARM_AM::no_shift; 848 } 849 } else { 850 ShOpcVal = ARM_AM::no_shift; 851 } 852 } 853 854 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal), 855 SDLoc(N), MVT::i32); 856 return true; 857 } 858 859 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N, 860 SDValue &Offset, SDValue &Opc) { 861 unsigned Opcode = Op->getOpcode(); 862 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 863 ? cast<LoadSDNode>(Op)->getAddressingMode() 864 : cast<StoreSDNode>(Op)->getAddressingMode(); 865 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 866 ? ARM_AM::add : ARM_AM::sub; 867 int Val; 868 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits. 869 if (AddSub == ARM_AM::sub) Val *= -1; 870 Offset = CurDAG->getRegister(0, MVT::i32); 871 Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32); 872 return true; 873 } 874 875 return false; 876 } 877 878 879 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N, 880 SDValue &Offset, SDValue &Opc) { 881 unsigned Opcode = Op->getOpcode(); 882 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 883 ? cast<LoadSDNode>(Op)->getAddressingMode() 884 : cast<StoreSDNode>(Op)->getAddressingMode(); 885 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 886 ? ARM_AM::add : ARM_AM::sub; 887 int Val; 888 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits. 889 Offset = CurDAG->getRegister(0, MVT::i32); 890 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val, 891 ARM_AM::no_shift), 892 SDLoc(Op), MVT::i32); 893 return true; 894 } 895 896 return false; 897 } 898 899 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) { 900 Base = N; 901 return true; 902 } 903 904 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N, 905 SDValue &Base, SDValue &Offset, 906 SDValue &Opc) { 907 if (N.getOpcode() == ISD::SUB) { 908 // X - C is canonicalize to X + -C, no need to handle it here. 909 Base = N.getOperand(0); 910 Offset = N.getOperand(1); 911 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N), 912 MVT::i32); 913 return true; 914 } 915 916 if (!CurDAG->isBaseWithConstantOffset(N)) { 917 Base = N; 918 if (N.getOpcode() == ISD::FrameIndex) { 919 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 920 Base = CurDAG->getTargetFrameIndex( 921 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 922 } 923 Offset = CurDAG->getRegister(0, MVT::i32); 924 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N), 925 MVT::i32); 926 return true; 927 } 928 929 // If the RHS is +/- imm8, fold into addr mode. 930 int RHSC; 931 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1, 932 -256 + 1, 256, RHSC)) { // 8 bits. 933 Base = N.getOperand(0); 934 if (Base.getOpcode() == ISD::FrameIndex) { 935 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 936 Base = CurDAG->getTargetFrameIndex( 937 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 938 } 939 Offset = CurDAG->getRegister(0, MVT::i32); 940 941 ARM_AM::AddrOpc AddSub = ARM_AM::add; 942 if (RHSC < 0) { 943 AddSub = ARM_AM::sub; 944 RHSC = -RHSC; 945 } 946 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N), 947 MVT::i32); 948 return true; 949 } 950 951 Base = N.getOperand(0); 952 Offset = N.getOperand(1); 953 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N), 954 MVT::i32); 955 return true; 956 } 957 958 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N, 959 SDValue &Offset, SDValue &Opc) { 960 unsigned Opcode = Op->getOpcode(); 961 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 962 ? cast<LoadSDNode>(Op)->getAddressingMode() 963 : cast<StoreSDNode>(Op)->getAddressingMode(); 964 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 965 ? ARM_AM::add : ARM_AM::sub; 966 int Val; 967 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits. 968 Offset = CurDAG->getRegister(0, MVT::i32); 969 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op), 970 MVT::i32); 971 return true; 972 } 973 974 Offset = N; 975 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op), 976 MVT::i32); 977 return true; 978 } 979 980 bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, 981 bool FP16) { 982 if (!CurDAG->isBaseWithConstantOffset(N)) { 983 Base = N; 984 if (N.getOpcode() == ISD::FrameIndex) { 985 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 986 Base = CurDAG->getTargetFrameIndex( 987 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 988 } else if (N.getOpcode() == ARMISD::Wrapper && 989 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 990 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 991 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 992 Base = N.getOperand(0); 993 } 994 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0), 995 SDLoc(N), MVT::i32); 996 return true; 997 } 998 999 // If the RHS is +/- imm8, fold into addr mode. 1000 int RHSC; 1001 const int Scale = FP16 ? 2 : 4; 1002 1003 if (isScaledConstantInRange(N.getOperand(1), Scale, -255, 256, RHSC)) { 1004 Base = N.getOperand(0); 1005 if (Base.getOpcode() == ISD::FrameIndex) { 1006 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1007 Base = CurDAG->getTargetFrameIndex( 1008 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1009 } 1010 1011 ARM_AM::AddrOpc AddSub = ARM_AM::add; 1012 if (RHSC < 0) { 1013 AddSub = ARM_AM::sub; 1014 RHSC = -RHSC; 1015 } 1016 1017 if (FP16) 1018 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(AddSub, RHSC), 1019 SDLoc(N), MVT::i32); 1020 else 1021 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC), 1022 SDLoc(N), MVT::i32); 1023 1024 return true; 1025 } 1026 1027 Base = N; 1028 1029 if (FP16) 1030 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(ARM_AM::add, 0), 1031 SDLoc(N), MVT::i32); 1032 else 1033 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0), 1034 SDLoc(N), MVT::i32); 1035 1036 return true; 1037 } 1038 1039 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N, 1040 SDValue &Base, SDValue &Offset) { 1041 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ false); 1042 } 1043 1044 bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N, 1045 SDValue &Base, SDValue &Offset) { 1046 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ true); 1047 } 1048 1049 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr, 1050 SDValue &Align) { 1051 Addr = N; 1052 1053 unsigned Alignment = 0; 1054 1055 MemSDNode *MemN = cast<MemSDNode>(Parent); 1056 1057 if (isa<LSBaseSDNode>(MemN) || 1058 ((MemN->getOpcode() == ARMISD::VST1_UPD || 1059 MemN->getOpcode() == ARMISD::VLD1_UPD) && 1060 MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) { 1061 // This case occurs only for VLD1-lane/dup and VST1-lane instructions. 1062 // The maximum alignment is equal to the memory size being referenced. 1063 unsigned MMOAlign = MemN->getAlignment(); 1064 unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8; 1065 if (MMOAlign >= MemSize && MemSize > 1) 1066 Alignment = MemSize; 1067 } else { 1068 // All other uses of addrmode6 are for intrinsics. For now just record 1069 // the raw alignment value; it will be refined later based on the legal 1070 // alignment operands for the intrinsic. 1071 Alignment = MemN->getAlignment(); 1072 } 1073 1074 Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32); 1075 return true; 1076 } 1077 1078 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N, 1079 SDValue &Offset) { 1080 LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op); 1081 ISD::MemIndexedMode AM = LdSt->getAddressingMode(); 1082 if (AM != ISD::POST_INC) 1083 return false; 1084 Offset = N; 1085 if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) { 1086 if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits()) 1087 Offset = CurDAG->getRegister(0, MVT::i32); 1088 } 1089 return true; 1090 } 1091 1092 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N, 1093 SDValue &Offset, SDValue &Label) { 1094 if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) { 1095 Offset = N.getOperand(0); 1096 SDValue N1 = N.getOperand(1); 1097 Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(), 1098 SDLoc(N), MVT::i32); 1099 return true; 1100 } 1101 1102 return false; 1103 } 1104 1105 1106 //===----------------------------------------------------------------------===// 1107 // Thumb Addressing Modes 1108 //===----------------------------------------------------------------------===// 1109 1110 static bool shouldUseZeroOffsetLdSt(SDValue N) { 1111 // Negative numbers are difficult to materialise in thumb1. If we are 1112 // selecting the add of a negative, instead try to select ri with a zero 1113 // offset, so create the add node directly which will become a sub. 1114 if (N.getOpcode() != ISD::ADD) 1115 return false; 1116 1117 // Look for an imm which is not legal for ld/st, but is legal for sub. 1118 if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1))) 1119 return C->getSExtValue() < 0 && C->getSExtValue() >= -255; 1120 1121 return false; 1122 } 1123 1124 bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, 1125 SDValue &Offset) { 1126 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) { 1127 ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N); 1128 if (!NC || !NC->isNullValue()) 1129 return false; 1130 1131 Base = Offset = N; 1132 return true; 1133 } 1134 1135 Base = N.getOperand(0); 1136 Offset = N.getOperand(1); 1137 return true; 1138 } 1139 1140 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, SDValue &Base, 1141 SDValue &Offset) { 1142 if (shouldUseZeroOffsetLdSt(N)) 1143 return false; // Select ri instead 1144 return SelectThumbAddrModeRRSext(N, Base, Offset); 1145 } 1146 1147 bool 1148 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, 1149 SDValue &Base, SDValue &OffImm) { 1150 if (shouldUseZeroOffsetLdSt(N)) { 1151 Base = N; 1152 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1153 return true; 1154 } 1155 1156 if (!CurDAG->isBaseWithConstantOffset(N)) { 1157 if (N.getOpcode() == ISD::ADD) { 1158 return false; // We want to select register offset instead 1159 } else if (N.getOpcode() == ARMISD::Wrapper && 1160 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 1161 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 1162 N.getOperand(0).getOpcode() != ISD::TargetConstantPool && 1163 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 1164 Base = N.getOperand(0); 1165 } else { 1166 Base = N; 1167 } 1168 1169 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1170 return true; 1171 } 1172 1173 // If the RHS is + imm5 * scale, fold into addr mode. 1174 int RHSC; 1175 if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) { 1176 Base = N.getOperand(0); 1177 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1178 return true; 1179 } 1180 1181 // Offset is too large, so use register offset instead. 1182 return false; 1183 } 1184 1185 bool 1186 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base, 1187 SDValue &OffImm) { 1188 return SelectThumbAddrModeImm5S(N, 4, Base, OffImm); 1189 } 1190 1191 bool 1192 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base, 1193 SDValue &OffImm) { 1194 return SelectThumbAddrModeImm5S(N, 2, Base, OffImm); 1195 } 1196 1197 bool 1198 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base, 1199 SDValue &OffImm) { 1200 return SelectThumbAddrModeImm5S(N, 1, Base, OffImm); 1201 } 1202 1203 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N, 1204 SDValue &Base, SDValue &OffImm) { 1205 if (N.getOpcode() == ISD::FrameIndex) { 1206 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 1207 // Only multiples of 4 are allowed for the offset, so the frame object 1208 // alignment must be at least 4. 1209 MachineFrameInfo &MFI = MF->getFrameInfo(); 1210 if (MFI.getObjectAlign(FI) < Align(4)) 1211 MFI.setObjectAlignment(FI, Align(4)); 1212 Base = CurDAG->getTargetFrameIndex( 1213 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1214 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1215 return true; 1216 } 1217 1218 if (!CurDAG->isBaseWithConstantOffset(N)) 1219 return false; 1220 1221 if (N.getOperand(0).getOpcode() == ISD::FrameIndex) { 1222 // If the RHS is + imm8 * scale, fold into addr mode. 1223 int RHSC; 1224 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) { 1225 Base = N.getOperand(0); 1226 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1227 // Make sure the offset is inside the object, or we might fail to 1228 // allocate an emergency spill slot. (An out-of-range access is UB, but 1229 // it could show up anyway.) 1230 MachineFrameInfo &MFI = MF->getFrameInfo(); 1231 if (RHSC * 4 < MFI.getObjectSize(FI)) { 1232 // For LHS+RHS to result in an offset that's a multiple of 4 the object 1233 // indexed by the LHS must be 4-byte aligned. 1234 if (!MFI.isFixedObjectIndex(FI) && MFI.getObjectAlign(FI) < Align(4)) 1235 MFI.setObjectAlignment(FI, Align(4)); 1236 if (MFI.getObjectAlign(FI) >= Align(4)) { 1237 Base = CurDAG->getTargetFrameIndex( 1238 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1239 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1240 return true; 1241 } 1242 } 1243 } 1244 } 1245 1246 return false; 1247 } 1248 1249 template <unsigned Shift> 1250 bool ARMDAGToDAGISel::SelectTAddrModeImm7(SDValue N, SDValue &Base, 1251 SDValue &OffImm) { 1252 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) { 1253 int RHSC; 1254 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80, 1255 RHSC)) { 1256 Base = N.getOperand(0); 1257 if (N.getOpcode() == ISD::SUB) 1258 RHSC = -RHSC; 1259 OffImm = 1260 CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32); 1261 return true; 1262 } 1263 } 1264 1265 // Base only. 1266 Base = N; 1267 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1268 return true; 1269 } 1270 1271 1272 //===----------------------------------------------------------------------===// 1273 // Thumb 2 Addressing Modes 1274 //===----------------------------------------------------------------------===// 1275 1276 1277 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N, 1278 SDValue &Base, SDValue &OffImm) { 1279 // Match simple R + imm12 operands. 1280 1281 // Base only. 1282 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 1283 !CurDAG->isBaseWithConstantOffset(N)) { 1284 if (N.getOpcode() == ISD::FrameIndex) { 1285 // Match frame index. 1286 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 1287 Base = CurDAG->getTargetFrameIndex( 1288 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1289 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1290 return true; 1291 } 1292 1293 if (N.getOpcode() == ARMISD::Wrapper && 1294 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 1295 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 1296 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 1297 Base = N.getOperand(0); 1298 if (Base.getOpcode() == ISD::TargetConstantPool) 1299 return false; // We want to select t2LDRpci instead. 1300 } else 1301 Base = N; 1302 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1303 return true; 1304 } 1305 1306 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1307 if (SelectT2AddrModeImm8(N, Base, OffImm)) 1308 // Let t2LDRi8 handle (R - imm8). 1309 return false; 1310 1311 int RHSC = (int)RHS->getZExtValue(); 1312 if (N.getOpcode() == ISD::SUB) 1313 RHSC = -RHSC; 1314 1315 if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned) 1316 Base = N.getOperand(0); 1317 if (Base.getOpcode() == ISD::FrameIndex) { 1318 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1319 Base = CurDAG->getTargetFrameIndex( 1320 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1321 } 1322 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1323 return true; 1324 } 1325 } 1326 1327 // Base only. 1328 Base = N; 1329 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1330 return true; 1331 } 1332 1333 template <unsigned Shift> 1334 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, SDValue &Base, 1335 SDValue &OffImm) { 1336 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) { 1337 int RHSC; 1338 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -255, 256, RHSC)) { 1339 Base = N.getOperand(0); 1340 if (Base.getOpcode() == ISD::FrameIndex) { 1341 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1342 Base = CurDAG->getTargetFrameIndex( 1343 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1344 } 1345 1346 if (N.getOpcode() == ISD::SUB) 1347 RHSC = -RHSC; 1348 OffImm = 1349 CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32); 1350 return true; 1351 } 1352 } 1353 1354 // Base only. 1355 Base = N; 1356 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1357 return true; 1358 } 1359 1360 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, 1361 SDValue &Base, SDValue &OffImm) { 1362 // Match simple R - imm8 operands. 1363 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 1364 !CurDAG->isBaseWithConstantOffset(N)) 1365 return false; 1366 1367 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1368 int RHSC = (int)RHS->getSExtValue(); 1369 if (N.getOpcode() == ISD::SUB) 1370 RHSC = -RHSC; 1371 1372 if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative) 1373 Base = N.getOperand(0); 1374 if (Base.getOpcode() == ISD::FrameIndex) { 1375 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1376 Base = CurDAG->getTargetFrameIndex( 1377 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1378 } 1379 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1380 return true; 1381 } 1382 } 1383 1384 return false; 1385 } 1386 1387 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N, 1388 SDValue &OffImm){ 1389 unsigned Opcode = Op->getOpcode(); 1390 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 1391 ? cast<LoadSDNode>(Op)->getAddressingMode() 1392 : cast<StoreSDNode>(Op)->getAddressingMode(); 1393 int RHSC; 1394 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits. 1395 OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC)) 1396 ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32) 1397 : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32); 1398 return true; 1399 } 1400 1401 return false; 1402 } 1403 1404 template <unsigned Shift> 1405 bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N, SDValue &Base, 1406 SDValue &OffImm) { 1407 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) { 1408 int RHSC; 1409 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80, 1410 RHSC)) { 1411 Base = N.getOperand(0); 1412 if (Base.getOpcode() == ISD::FrameIndex) { 1413 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1414 Base = CurDAG->getTargetFrameIndex( 1415 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1416 } 1417 1418 if (N.getOpcode() == ISD::SUB) 1419 RHSC = -RHSC; 1420 OffImm = 1421 CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32); 1422 return true; 1423 } 1424 } 1425 1426 // Base only. 1427 Base = N; 1428 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1429 return true; 1430 } 1431 1432 template <unsigned Shift> 1433 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, 1434 SDValue &OffImm) { 1435 return SelectT2AddrModeImm7Offset(Op, N, OffImm, Shift); 1436 } 1437 1438 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, 1439 SDValue &OffImm, 1440 unsigned Shift) { 1441 unsigned Opcode = Op->getOpcode(); 1442 ISD::MemIndexedMode AM; 1443 switch (Opcode) { 1444 case ISD::LOAD: 1445 AM = cast<LoadSDNode>(Op)->getAddressingMode(); 1446 break; 1447 case ISD::STORE: 1448 AM = cast<StoreSDNode>(Op)->getAddressingMode(); 1449 break; 1450 case ISD::MLOAD: 1451 AM = cast<MaskedLoadSDNode>(Op)->getAddressingMode(); 1452 break; 1453 case ISD::MSTORE: 1454 AM = cast<MaskedStoreSDNode>(Op)->getAddressingMode(); 1455 break; 1456 default: 1457 llvm_unreachable("Unexpected Opcode for Imm7Offset"); 1458 } 1459 1460 int RHSC; 1461 // 7 bit constant, shifted by Shift. 1462 if (isScaledConstantInRange(N, 1 << Shift, 0, 0x80, RHSC)) { 1463 OffImm = 1464 ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC)) 1465 ? CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32) 1466 : CurDAG->getTargetConstant(-RHSC * (1 << Shift), SDLoc(N), 1467 MVT::i32); 1468 return true; 1469 } 1470 return false; 1471 } 1472 1473 template <int Min, int Max> 1474 bool ARMDAGToDAGISel::SelectImmediateInRange(SDValue N, SDValue &OffImm) { 1475 int Val; 1476 if (isScaledConstantInRange(N, 1, Min, Max, Val)) { 1477 OffImm = CurDAG->getTargetConstant(Val, SDLoc(N), MVT::i32); 1478 return true; 1479 } 1480 return false; 1481 } 1482 1483 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N, 1484 SDValue &Base, 1485 SDValue &OffReg, SDValue &ShImm) { 1486 // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12. 1487 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) 1488 return false; 1489 1490 // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8. 1491 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1492 int RHSC = (int)RHS->getZExtValue(); 1493 if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned) 1494 return false; 1495 else if (RHSC < 0 && RHSC >= -255) // 8 bits 1496 return false; 1497 } 1498 1499 // Look for (R + R) or (R + (R << [1,2,3])). 1500 unsigned ShAmt = 0; 1501 Base = N.getOperand(0); 1502 OffReg = N.getOperand(1); 1503 1504 // Swap if it is ((R << c) + R). 1505 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode()); 1506 if (ShOpcVal != ARM_AM::lsl) { 1507 ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode()); 1508 if (ShOpcVal == ARM_AM::lsl) 1509 std::swap(Base, OffReg); 1510 } 1511 1512 if (ShOpcVal == ARM_AM::lsl) { 1513 // Check to see if the RHS of the shift is a constant, if not, we can't fold 1514 // it. 1515 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) { 1516 ShAmt = Sh->getZExtValue(); 1517 if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt)) 1518 OffReg = OffReg.getOperand(0); 1519 else { 1520 ShAmt = 0; 1521 } 1522 } 1523 } 1524 1525 // If OffReg is a multiply-by-constant and it's profitable to extract a shift 1526 // and use it in a shifted operand do so. 1527 if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) { 1528 unsigned PowerOfTwo = 0; 1529 SDValue NewMulConst; 1530 if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) { 1531 HandleSDNode Handle(OffReg); 1532 replaceDAGValue(OffReg.getOperand(1), NewMulConst); 1533 OffReg = Handle.getValue(); 1534 ShAmt = PowerOfTwo; 1535 } 1536 } 1537 1538 ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32); 1539 1540 return true; 1541 } 1542 1543 bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base, 1544 SDValue &OffImm) { 1545 // This *must* succeed since it's used for the irreplaceable ldrex and strex 1546 // instructions. 1547 Base = N; 1548 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1549 1550 if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N)) 1551 return true; 1552 1553 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 1554 if (!RHS) 1555 return true; 1556 1557 uint32_t RHSC = (int)RHS->getZExtValue(); 1558 if (RHSC > 1020 || RHSC % 4 != 0) 1559 return true; 1560 1561 Base = N.getOperand(0); 1562 if (Base.getOpcode() == ISD::FrameIndex) { 1563 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1564 Base = CurDAG->getTargetFrameIndex( 1565 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1566 } 1567 1568 OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32); 1569 return true; 1570 } 1571 1572 //===--------------------------------------------------------------------===// 1573 1574 /// getAL - Returns a ARMCC::AL immediate node. 1575 static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) { 1576 return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32); 1577 } 1578 1579 void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) { 1580 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); 1581 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp}); 1582 } 1583 1584 bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) { 1585 LoadSDNode *LD = cast<LoadSDNode>(N); 1586 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1587 if (AM == ISD::UNINDEXED) 1588 return false; 1589 1590 EVT LoadedVT = LD->getMemoryVT(); 1591 SDValue Offset, AMOpc; 1592 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1593 unsigned Opcode = 0; 1594 bool Match = false; 1595 if (LoadedVT == MVT::i32 && isPre && 1596 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) { 1597 Opcode = ARM::LDR_PRE_IMM; 1598 Match = true; 1599 } else if (LoadedVT == MVT::i32 && !isPre && 1600 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) { 1601 Opcode = ARM::LDR_POST_IMM; 1602 Match = true; 1603 } else if (LoadedVT == MVT::i32 && 1604 SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) { 1605 Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG; 1606 Match = true; 1607 1608 } else if (LoadedVT == MVT::i16 && 1609 SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) { 1610 Match = true; 1611 Opcode = (LD->getExtensionType() == ISD::SEXTLOAD) 1612 ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST) 1613 : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST); 1614 } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) { 1615 if (LD->getExtensionType() == ISD::SEXTLOAD) { 1616 if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) { 1617 Match = true; 1618 Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST; 1619 } 1620 } else { 1621 if (isPre && 1622 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) { 1623 Match = true; 1624 Opcode = ARM::LDRB_PRE_IMM; 1625 } else if (!isPre && 1626 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) { 1627 Match = true; 1628 Opcode = ARM::LDRB_POST_IMM; 1629 } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) { 1630 Match = true; 1631 Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG; 1632 } 1633 } 1634 } 1635 1636 if (Match) { 1637 if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) { 1638 SDValue Chain = LD->getChain(); 1639 SDValue Base = LD->getBasePtr(); 1640 SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)), 1641 CurDAG->getRegister(0, MVT::i32), Chain }; 1642 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, 1643 MVT::Other, Ops); 1644 transferMemOperands(N, New); 1645 ReplaceNode(N, New); 1646 return true; 1647 } else { 1648 SDValue Chain = LD->getChain(); 1649 SDValue Base = LD->getBasePtr(); 1650 SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)), 1651 CurDAG->getRegister(0, MVT::i32), Chain }; 1652 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, 1653 MVT::Other, Ops); 1654 transferMemOperands(N, New); 1655 ReplaceNode(N, New); 1656 return true; 1657 } 1658 } 1659 1660 return false; 1661 } 1662 1663 bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) { 1664 LoadSDNode *LD = cast<LoadSDNode>(N); 1665 EVT LoadedVT = LD->getMemoryVT(); 1666 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1667 if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD || 1668 LoadedVT.getSimpleVT().SimpleTy != MVT::i32) 1669 return false; 1670 1671 auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset()); 1672 if (!COffs || COffs->getZExtValue() != 4) 1673 return false; 1674 1675 // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}. 1676 // The encoding of LDM is not how the rest of ISel expects a post-inc load to 1677 // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after 1678 // ISel. 1679 SDValue Chain = LD->getChain(); 1680 SDValue Base = LD->getBasePtr(); 1681 SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)), 1682 CurDAG->getRegister(0, MVT::i32), Chain }; 1683 SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32, 1684 MVT::i32, MVT::Other, Ops); 1685 transferMemOperands(N, New); 1686 ReplaceNode(N, New); 1687 return true; 1688 } 1689 1690 bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) { 1691 LoadSDNode *LD = cast<LoadSDNode>(N); 1692 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1693 if (AM == ISD::UNINDEXED) 1694 return false; 1695 1696 EVT LoadedVT = LD->getMemoryVT(); 1697 bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; 1698 SDValue Offset; 1699 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1700 unsigned Opcode = 0; 1701 bool Match = false; 1702 if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) { 1703 switch (LoadedVT.getSimpleVT().SimpleTy) { 1704 case MVT::i32: 1705 Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST; 1706 break; 1707 case MVT::i16: 1708 if (isSExtLd) 1709 Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST; 1710 else 1711 Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST; 1712 break; 1713 case MVT::i8: 1714 case MVT::i1: 1715 if (isSExtLd) 1716 Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST; 1717 else 1718 Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST; 1719 break; 1720 default: 1721 return false; 1722 } 1723 Match = true; 1724 } 1725 1726 if (Match) { 1727 SDValue Chain = LD->getChain(); 1728 SDValue Base = LD->getBasePtr(); 1729 SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)), 1730 CurDAG->getRegister(0, MVT::i32), Chain }; 1731 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, 1732 MVT::Other, Ops); 1733 transferMemOperands(N, New); 1734 ReplaceNode(N, New); 1735 return true; 1736 } 1737 1738 return false; 1739 } 1740 1741 bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) { 1742 EVT LoadedVT; 1743 unsigned Opcode = 0; 1744 bool isSExtLd, isPre; 1745 Align Alignment; 1746 ARMVCC::VPTCodes Pred; 1747 SDValue PredReg; 1748 SDValue Chain, Base, Offset; 1749 1750 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 1751 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1752 if (AM == ISD::UNINDEXED) 1753 return false; 1754 LoadedVT = LD->getMemoryVT(); 1755 if (!LoadedVT.isVector()) 1756 return false; 1757 1758 Chain = LD->getChain(); 1759 Base = LD->getBasePtr(); 1760 Offset = LD->getOffset(); 1761 Alignment = LD->getAlign(); 1762 isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; 1763 isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1764 Pred = ARMVCC::None; 1765 PredReg = CurDAG->getRegister(0, MVT::i32); 1766 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) { 1767 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1768 if (AM == ISD::UNINDEXED) 1769 return false; 1770 LoadedVT = LD->getMemoryVT(); 1771 if (!LoadedVT.isVector()) 1772 return false; 1773 1774 Chain = LD->getChain(); 1775 Base = LD->getBasePtr(); 1776 Offset = LD->getOffset(); 1777 Alignment = LD->getAlign(); 1778 isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; 1779 isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1780 Pred = ARMVCC::Then; 1781 PredReg = LD->getMask(); 1782 } else 1783 llvm_unreachable("Expected a Load or a Masked Load!"); 1784 1785 // We allow LE non-masked loads to change the type (for example use a vldrb.8 1786 // as opposed to a vldrw.32). This can allow extra addressing modes or 1787 // alignments for what is otherwise an equivalent instruction. 1788 bool CanChangeType = Subtarget->isLittle() && !isa<MaskedLoadSDNode>(N); 1789 1790 SDValue NewOffset; 1791 if (Alignment >= Align(2) && LoadedVT == MVT::v4i16 && 1792 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1)) { 1793 if (isSExtLd) 1794 Opcode = isPre ? ARM::MVE_VLDRHS32_pre : ARM::MVE_VLDRHS32_post; 1795 else 1796 Opcode = isPre ? ARM::MVE_VLDRHU32_pre : ARM::MVE_VLDRHU32_post; 1797 } else if (LoadedVT == MVT::v8i8 && 1798 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) { 1799 if (isSExtLd) 1800 Opcode = isPre ? ARM::MVE_VLDRBS16_pre : ARM::MVE_VLDRBS16_post; 1801 else 1802 Opcode = isPre ? ARM::MVE_VLDRBU16_pre : ARM::MVE_VLDRBU16_post; 1803 } else if (LoadedVT == MVT::v4i8 && 1804 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) { 1805 if (isSExtLd) 1806 Opcode = isPre ? ARM::MVE_VLDRBS32_pre : ARM::MVE_VLDRBS32_post; 1807 else 1808 Opcode = isPre ? ARM::MVE_VLDRBU32_pre : ARM::MVE_VLDRBU32_post; 1809 } else if (Alignment >= Align(4) && 1810 (CanChangeType || LoadedVT == MVT::v4i32 || 1811 LoadedVT == MVT::v4f32) && 1812 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 2)) 1813 Opcode = isPre ? ARM::MVE_VLDRWU32_pre : ARM::MVE_VLDRWU32_post; 1814 else if (Alignment >= Align(2) && 1815 (CanChangeType || LoadedVT == MVT::v8i16 || 1816 LoadedVT == MVT::v8f16) && 1817 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1)) 1818 Opcode = isPre ? ARM::MVE_VLDRHU16_pre : ARM::MVE_VLDRHU16_post; 1819 else if ((CanChangeType || LoadedVT == MVT::v16i8) && 1820 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) 1821 Opcode = isPre ? ARM::MVE_VLDRBU8_pre : ARM::MVE_VLDRBU8_post; 1822 else 1823 return false; 1824 1825 SDValue Ops[] = {Base, NewOffset, 1826 CurDAG->getTargetConstant(Pred, SDLoc(N), MVT::i32), PredReg, 1827 Chain}; 1828 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, 1829 N->getValueType(0), MVT::Other, Ops); 1830 transferMemOperands(N, New); 1831 ReplaceUses(SDValue(N, 0), SDValue(New, 1)); 1832 ReplaceUses(SDValue(N, 1), SDValue(New, 0)); 1833 ReplaceUses(SDValue(N, 2), SDValue(New, 2)); 1834 CurDAG->RemoveDeadNode(N); 1835 return true; 1836 } 1837 1838 /// Form a GPRPair pseudo register from a pair of GPR regs. 1839 SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) { 1840 SDLoc dl(V0.getNode()); 1841 SDValue RegClass = 1842 CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32); 1843 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32); 1844 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32); 1845 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1846 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1847 } 1848 1849 /// Form a D register from a pair of S registers. 1850 SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1851 SDLoc dl(V0.getNode()); 1852 SDValue RegClass = 1853 CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32); 1854 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32); 1855 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32); 1856 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1857 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1858 } 1859 1860 /// Form a quad register from a pair of D registers. 1861 SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1862 SDLoc dl(V0.getNode()); 1863 SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl, 1864 MVT::i32); 1865 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32); 1866 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32); 1867 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1868 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1869 } 1870 1871 /// Form 4 consecutive D registers from a pair of Q registers. 1872 SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1873 SDLoc dl(V0.getNode()); 1874 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl, 1875 MVT::i32); 1876 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32); 1877 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32); 1878 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1879 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1880 } 1881 1882 /// Form 4 consecutive S registers. 1883 SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, 1884 SDValue V2, SDValue V3) { 1885 SDLoc dl(V0.getNode()); 1886 SDValue RegClass = 1887 CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32); 1888 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32); 1889 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32); 1890 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32); 1891 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32); 1892 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1893 V2, SubReg2, V3, SubReg3 }; 1894 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1895 } 1896 1897 /// Form 4 consecutive D registers. 1898 SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, 1899 SDValue V2, SDValue V3) { 1900 SDLoc dl(V0.getNode()); 1901 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl, 1902 MVT::i32); 1903 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32); 1904 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32); 1905 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32); 1906 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32); 1907 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1908 V2, SubReg2, V3, SubReg3 }; 1909 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1910 } 1911 1912 /// Form 4 consecutive Q registers. 1913 SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, 1914 SDValue V2, SDValue V3) { 1915 SDLoc dl(V0.getNode()); 1916 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl, 1917 MVT::i32); 1918 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32); 1919 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32); 1920 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32); 1921 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32); 1922 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1923 V2, SubReg2, V3, SubReg3 }; 1924 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1925 } 1926 1927 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand 1928 /// of a NEON VLD or VST instruction. The supported values depend on the 1929 /// number of registers being loaded. 1930 SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl, 1931 unsigned NumVecs, bool is64BitVector) { 1932 unsigned NumRegs = NumVecs; 1933 if (!is64BitVector && NumVecs < 3) 1934 NumRegs *= 2; 1935 1936 unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue(); 1937 if (Alignment >= 32 && NumRegs == 4) 1938 Alignment = 32; 1939 else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4)) 1940 Alignment = 16; 1941 else if (Alignment >= 8) 1942 Alignment = 8; 1943 else 1944 Alignment = 0; 1945 1946 return CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 1947 } 1948 1949 static bool isVLDfixed(unsigned Opc) 1950 { 1951 switch (Opc) { 1952 default: return false; 1953 case ARM::VLD1d8wb_fixed : return true; 1954 case ARM::VLD1d16wb_fixed : return true; 1955 case ARM::VLD1d64Qwb_fixed : return true; 1956 case ARM::VLD1d32wb_fixed : return true; 1957 case ARM::VLD1d64wb_fixed : return true; 1958 case ARM::VLD1d8TPseudoWB_fixed : return true; 1959 case ARM::VLD1d16TPseudoWB_fixed : return true; 1960 case ARM::VLD1d32TPseudoWB_fixed : return true; 1961 case ARM::VLD1d64TPseudoWB_fixed : return true; 1962 case ARM::VLD1d8QPseudoWB_fixed : return true; 1963 case ARM::VLD1d16QPseudoWB_fixed : return true; 1964 case ARM::VLD1d32QPseudoWB_fixed : return true; 1965 case ARM::VLD1d64QPseudoWB_fixed : return true; 1966 case ARM::VLD1q8wb_fixed : return true; 1967 case ARM::VLD1q16wb_fixed : return true; 1968 case ARM::VLD1q32wb_fixed : return true; 1969 case ARM::VLD1q64wb_fixed : return true; 1970 case ARM::VLD1DUPd8wb_fixed : return true; 1971 case ARM::VLD1DUPd16wb_fixed : return true; 1972 case ARM::VLD1DUPd32wb_fixed : return true; 1973 case ARM::VLD1DUPq8wb_fixed : return true; 1974 case ARM::VLD1DUPq16wb_fixed : return true; 1975 case ARM::VLD1DUPq32wb_fixed : return true; 1976 case ARM::VLD2d8wb_fixed : return true; 1977 case ARM::VLD2d16wb_fixed : return true; 1978 case ARM::VLD2d32wb_fixed : return true; 1979 case ARM::VLD2q8PseudoWB_fixed : return true; 1980 case ARM::VLD2q16PseudoWB_fixed : return true; 1981 case ARM::VLD2q32PseudoWB_fixed : return true; 1982 case ARM::VLD2DUPd8wb_fixed : return true; 1983 case ARM::VLD2DUPd16wb_fixed : return true; 1984 case ARM::VLD2DUPd32wb_fixed : return true; 1985 case ARM::VLD2DUPq8OddPseudoWB_fixed: return true; 1986 case ARM::VLD2DUPq16OddPseudoWB_fixed: return true; 1987 case ARM::VLD2DUPq32OddPseudoWB_fixed: return true; 1988 } 1989 } 1990 1991 static bool isVSTfixed(unsigned Opc) 1992 { 1993 switch (Opc) { 1994 default: return false; 1995 case ARM::VST1d8wb_fixed : return true; 1996 case ARM::VST1d16wb_fixed : return true; 1997 case ARM::VST1d32wb_fixed : return true; 1998 case ARM::VST1d64wb_fixed : return true; 1999 case ARM::VST1q8wb_fixed : return true; 2000 case ARM::VST1q16wb_fixed : return true; 2001 case ARM::VST1q32wb_fixed : return true; 2002 case ARM::VST1q64wb_fixed : return true; 2003 case ARM::VST1d8TPseudoWB_fixed : return true; 2004 case ARM::VST1d16TPseudoWB_fixed : return true; 2005 case ARM::VST1d32TPseudoWB_fixed : return true; 2006 case ARM::VST1d64TPseudoWB_fixed : return true; 2007 case ARM::VST1d8QPseudoWB_fixed : return true; 2008 case ARM::VST1d16QPseudoWB_fixed : return true; 2009 case ARM::VST1d32QPseudoWB_fixed : return true; 2010 case ARM::VST1d64QPseudoWB_fixed : return true; 2011 case ARM::VST2d8wb_fixed : return true; 2012 case ARM::VST2d16wb_fixed : return true; 2013 case ARM::VST2d32wb_fixed : return true; 2014 case ARM::VST2q8PseudoWB_fixed : return true; 2015 case ARM::VST2q16PseudoWB_fixed : return true; 2016 case ARM::VST2q32PseudoWB_fixed : return true; 2017 } 2018 } 2019 2020 // Get the register stride update opcode of a VLD/VST instruction that 2021 // is otherwise equivalent to the given fixed stride updating instruction. 2022 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) { 2023 assert((isVLDfixed(Opc) || isVSTfixed(Opc)) 2024 && "Incorrect fixed stride updating instruction."); 2025 switch (Opc) { 2026 default: break; 2027 case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register; 2028 case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register; 2029 case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register; 2030 case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register; 2031 case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register; 2032 case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register; 2033 case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register; 2034 case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register; 2035 case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register; 2036 case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register; 2037 case ARM::VLD1d8TPseudoWB_fixed: return ARM::VLD1d8TPseudoWB_register; 2038 case ARM::VLD1d16TPseudoWB_fixed: return ARM::VLD1d16TPseudoWB_register; 2039 case ARM::VLD1d32TPseudoWB_fixed: return ARM::VLD1d32TPseudoWB_register; 2040 case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register; 2041 case ARM::VLD1d8QPseudoWB_fixed: return ARM::VLD1d8QPseudoWB_register; 2042 case ARM::VLD1d16QPseudoWB_fixed: return ARM::VLD1d16QPseudoWB_register; 2043 case ARM::VLD1d32QPseudoWB_fixed: return ARM::VLD1d32QPseudoWB_register; 2044 case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register; 2045 case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register; 2046 case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register; 2047 case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register; 2048 case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register; 2049 case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register; 2050 case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register; 2051 case ARM::VLD2DUPq8OddPseudoWB_fixed: return ARM::VLD2DUPq8OddPseudoWB_register; 2052 case ARM::VLD2DUPq16OddPseudoWB_fixed: return ARM::VLD2DUPq16OddPseudoWB_register; 2053 case ARM::VLD2DUPq32OddPseudoWB_fixed: return ARM::VLD2DUPq32OddPseudoWB_register; 2054 2055 case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register; 2056 case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register; 2057 case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register; 2058 case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register; 2059 case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register; 2060 case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register; 2061 case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register; 2062 case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register; 2063 case ARM::VST1d8TPseudoWB_fixed: return ARM::VST1d8TPseudoWB_register; 2064 case ARM::VST1d16TPseudoWB_fixed: return ARM::VST1d16TPseudoWB_register; 2065 case ARM::VST1d32TPseudoWB_fixed: return ARM::VST1d32TPseudoWB_register; 2066 case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register; 2067 case ARM::VST1d8QPseudoWB_fixed: return ARM::VST1d8QPseudoWB_register; 2068 case ARM::VST1d16QPseudoWB_fixed: return ARM::VST1d16QPseudoWB_register; 2069 case ARM::VST1d32QPseudoWB_fixed: return ARM::VST1d32QPseudoWB_register; 2070 case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register; 2071 2072 case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register; 2073 case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register; 2074 case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register; 2075 case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register; 2076 case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register; 2077 case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register; 2078 2079 case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register; 2080 case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register; 2081 case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register; 2082 case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register; 2083 case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register; 2084 case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register; 2085 2086 case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register; 2087 case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register; 2088 case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register; 2089 } 2090 return Opc; // If not one we handle, return it unchanged. 2091 } 2092 2093 /// Returns true if the given increment is a Constant known to be equal to the 2094 /// access size performed by a NEON load/store. This means the "[rN]!" form can 2095 /// be used. 2096 static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) { 2097 auto C = dyn_cast<ConstantSDNode>(Inc); 2098 return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs; 2099 } 2100 2101 void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, 2102 const uint16_t *DOpcodes, 2103 const uint16_t *QOpcodes0, 2104 const uint16_t *QOpcodes1) { 2105 assert(Subtarget->hasNEON()); 2106 assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range"); 2107 SDLoc dl(N); 2108 2109 SDValue MemAddr, Align; 2110 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating 2111 // nodes are not intrinsics. 2112 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2113 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2114 return; 2115 2116 SDValue Chain = N->getOperand(0); 2117 EVT VT = N->getValueType(0); 2118 bool is64BitVector = VT.is64BitVector(); 2119 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector); 2120 2121 unsigned OpcodeIndex; 2122 switch (VT.getSimpleVT().SimpleTy) { 2123 default: llvm_unreachable("unhandled vld type"); 2124 // Double-register operations: 2125 case MVT::v8i8: OpcodeIndex = 0; break; 2126 case MVT::v4f16: 2127 case MVT::v4bf16: 2128 case MVT::v4i16: OpcodeIndex = 1; break; 2129 case MVT::v2f32: 2130 case MVT::v2i32: OpcodeIndex = 2; break; 2131 case MVT::v1i64: OpcodeIndex = 3; break; 2132 // Quad-register operations: 2133 case MVT::v16i8: OpcodeIndex = 0; break; 2134 case MVT::v8f16: 2135 case MVT::v8bf16: 2136 case MVT::v8i16: OpcodeIndex = 1; break; 2137 case MVT::v4f32: 2138 case MVT::v4i32: OpcodeIndex = 2; break; 2139 case MVT::v2f64: 2140 case MVT::v2i64: OpcodeIndex = 3; break; 2141 } 2142 2143 EVT ResTy; 2144 if (NumVecs == 1) 2145 ResTy = VT; 2146 else { 2147 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 2148 if (!is64BitVector) 2149 ResTyElts *= 2; 2150 ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts); 2151 } 2152 std::vector<EVT> ResTys; 2153 ResTys.push_back(ResTy); 2154 if (isUpdating) 2155 ResTys.push_back(MVT::i32); 2156 ResTys.push_back(MVT::Other); 2157 2158 SDValue Pred = getAL(CurDAG, dl); 2159 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2160 SDNode *VLd; 2161 SmallVector<SDValue, 7> Ops; 2162 2163 // Double registers and VLD1/VLD2 quad registers are directly supported. 2164 if (is64BitVector || NumVecs <= 2) { 2165 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 2166 QOpcodes0[OpcodeIndex]); 2167 Ops.push_back(MemAddr); 2168 Ops.push_back(Align); 2169 if (isUpdating) { 2170 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2171 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs); 2172 if (!IsImmUpdate) { 2173 // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so 2174 // check for the opcode rather than the number of vector elements. 2175 if (isVLDfixed(Opc)) 2176 Opc = getVLDSTRegisterUpdateOpcode(Opc); 2177 Ops.push_back(Inc); 2178 // VLD1/VLD2 fixed increment does not need Reg0 so only include it in 2179 // the operands if not such an opcode. 2180 } else if (!isVLDfixed(Opc)) 2181 Ops.push_back(Reg0); 2182 } 2183 Ops.push_back(Pred); 2184 Ops.push_back(Reg0); 2185 Ops.push_back(Chain); 2186 VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2187 2188 } else { 2189 // Otherwise, quad registers are loaded with two separate instructions, 2190 // where one loads the even registers and the other loads the odd registers. 2191 EVT AddrTy = MemAddr.getValueType(); 2192 2193 // Load the even subregs. This is always an updating load, so that it 2194 // provides the address to the second load for the odd subregs. 2195 SDValue ImplDef = 2196 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0); 2197 const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain }; 2198 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, 2199 ResTy, AddrTy, MVT::Other, OpsA); 2200 Chain = SDValue(VLdA, 2); 2201 2202 // Load the odd subregs. 2203 Ops.push_back(SDValue(VLdA, 1)); 2204 Ops.push_back(Align); 2205 if (isUpdating) { 2206 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2207 assert(isa<ConstantSDNode>(Inc.getNode()) && 2208 "only constant post-increment update allowed for VLD3/4"); 2209 (void)Inc; 2210 Ops.push_back(Reg0); 2211 } 2212 Ops.push_back(SDValue(VLdA, 0)); 2213 Ops.push_back(Pred); 2214 Ops.push_back(Reg0); 2215 Ops.push_back(Chain); 2216 VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops); 2217 } 2218 2219 // Transfer memoperands. 2220 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2221 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLd), {MemOp}); 2222 2223 if (NumVecs == 1) { 2224 ReplaceNode(N, VLd); 2225 return; 2226 } 2227 2228 // Extract out the subregisters. 2229 SDValue SuperReg = SDValue(VLd, 0); 2230 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 && 2231 ARM::qsub_3 == ARM::qsub_0 + 3, 2232 "Unexpected subreg numbering"); 2233 unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0); 2234 for (unsigned Vec = 0; Vec < NumVecs; ++Vec) 2235 ReplaceUses(SDValue(N, Vec), 2236 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg)); 2237 ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1)); 2238 if (isUpdating) 2239 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2)); 2240 CurDAG->RemoveDeadNode(N); 2241 } 2242 2243 void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, 2244 const uint16_t *DOpcodes, 2245 const uint16_t *QOpcodes0, 2246 const uint16_t *QOpcodes1) { 2247 assert(Subtarget->hasNEON()); 2248 assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range"); 2249 SDLoc dl(N); 2250 2251 SDValue MemAddr, Align; 2252 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating 2253 // nodes are not intrinsics. 2254 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2255 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1) 2256 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2257 return; 2258 2259 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2260 2261 SDValue Chain = N->getOperand(0); 2262 EVT VT = N->getOperand(Vec0Idx).getValueType(); 2263 bool is64BitVector = VT.is64BitVector(); 2264 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector); 2265 2266 unsigned OpcodeIndex; 2267 switch (VT.getSimpleVT().SimpleTy) { 2268 default: llvm_unreachable("unhandled vst type"); 2269 // Double-register operations: 2270 case MVT::v8i8: OpcodeIndex = 0; break; 2271 case MVT::v4f16: 2272 case MVT::v4bf16: 2273 case MVT::v4i16: OpcodeIndex = 1; break; 2274 case MVT::v2f32: 2275 case MVT::v2i32: OpcodeIndex = 2; break; 2276 case MVT::v1i64: OpcodeIndex = 3; break; 2277 // Quad-register operations: 2278 case MVT::v16i8: OpcodeIndex = 0; break; 2279 case MVT::v8f16: 2280 case MVT::v8bf16: 2281 case MVT::v8i16: OpcodeIndex = 1; break; 2282 case MVT::v4f32: 2283 case MVT::v4i32: OpcodeIndex = 2; break; 2284 case MVT::v2f64: 2285 case MVT::v2i64: OpcodeIndex = 3; break; 2286 } 2287 2288 std::vector<EVT> ResTys; 2289 if (isUpdating) 2290 ResTys.push_back(MVT::i32); 2291 ResTys.push_back(MVT::Other); 2292 2293 SDValue Pred = getAL(CurDAG, dl); 2294 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2295 SmallVector<SDValue, 7> Ops; 2296 2297 // Double registers and VST1/VST2 quad registers are directly supported. 2298 if (is64BitVector || NumVecs <= 2) { 2299 SDValue SrcReg; 2300 if (NumVecs == 1) { 2301 SrcReg = N->getOperand(Vec0Idx); 2302 } else if (is64BitVector) { 2303 // Form a REG_SEQUENCE to force register allocation. 2304 SDValue V0 = N->getOperand(Vec0Idx + 0); 2305 SDValue V1 = N->getOperand(Vec0Idx + 1); 2306 if (NumVecs == 2) 2307 SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0); 2308 else { 2309 SDValue V2 = N->getOperand(Vec0Idx + 2); 2310 // If it's a vst3, form a quad D-register and leave the last part as 2311 // an undef. 2312 SDValue V3 = (NumVecs == 3) 2313 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) 2314 : N->getOperand(Vec0Idx + 3); 2315 SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0); 2316 } 2317 } else { 2318 // Form a QQ register. 2319 SDValue Q0 = N->getOperand(Vec0Idx); 2320 SDValue Q1 = N->getOperand(Vec0Idx + 1); 2321 SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0); 2322 } 2323 2324 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 2325 QOpcodes0[OpcodeIndex]); 2326 Ops.push_back(MemAddr); 2327 Ops.push_back(Align); 2328 if (isUpdating) { 2329 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2330 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs); 2331 if (!IsImmUpdate) { 2332 // We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so 2333 // check for the opcode rather than the number of vector elements. 2334 if (isVSTfixed(Opc)) 2335 Opc = getVLDSTRegisterUpdateOpcode(Opc); 2336 Ops.push_back(Inc); 2337 } 2338 // VST1/VST2 fixed increment does not need Reg0 so only include it in 2339 // the operands if not such an opcode. 2340 else if (!isVSTfixed(Opc)) 2341 Ops.push_back(Reg0); 2342 } 2343 Ops.push_back(SrcReg); 2344 Ops.push_back(Pred); 2345 Ops.push_back(Reg0); 2346 Ops.push_back(Chain); 2347 SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2348 2349 // Transfer memoperands. 2350 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VSt), {MemOp}); 2351 2352 ReplaceNode(N, VSt); 2353 return; 2354 } 2355 2356 // Otherwise, quad registers are stored with two separate instructions, 2357 // where one stores the even registers and the other stores the odd registers. 2358 2359 // Form the QQQQ REG_SEQUENCE. 2360 SDValue V0 = N->getOperand(Vec0Idx + 0); 2361 SDValue V1 = N->getOperand(Vec0Idx + 1); 2362 SDValue V2 = N->getOperand(Vec0Idx + 2); 2363 SDValue V3 = (NumVecs == 3) 2364 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) 2365 : N->getOperand(Vec0Idx + 3); 2366 SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0); 2367 2368 // Store the even D registers. This is always an updating store, so that it 2369 // provides the address to the second store for the odd subregs. 2370 const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain }; 2371 SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, 2372 MemAddr.getValueType(), 2373 MVT::Other, OpsA); 2374 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStA), {MemOp}); 2375 Chain = SDValue(VStA, 1); 2376 2377 // Store the odd D registers. 2378 Ops.push_back(SDValue(VStA, 0)); 2379 Ops.push_back(Align); 2380 if (isUpdating) { 2381 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2382 assert(isa<ConstantSDNode>(Inc.getNode()) && 2383 "only constant post-increment update allowed for VST3/4"); 2384 (void)Inc; 2385 Ops.push_back(Reg0); 2386 } 2387 Ops.push_back(RegSeq); 2388 Ops.push_back(Pred); 2389 Ops.push_back(Reg0); 2390 Ops.push_back(Chain); 2391 SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, 2392 Ops); 2393 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStB), {MemOp}); 2394 ReplaceNode(N, VStB); 2395 } 2396 2397 void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating, 2398 unsigned NumVecs, 2399 const uint16_t *DOpcodes, 2400 const uint16_t *QOpcodes) { 2401 assert(Subtarget->hasNEON()); 2402 assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range"); 2403 SDLoc dl(N); 2404 2405 SDValue MemAddr, Align; 2406 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating 2407 // nodes are not intrinsics. 2408 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2409 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1) 2410 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2411 return; 2412 2413 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2414 2415 SDValue Chain = N->getOperand(0); 2416 unsigned Lane = 2417 cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue(); 2418 EVT VT = N->getOperand(Vec0Idx).getValueType(); 2419 bool is64BitVector = VT.is64BitVector(); 2420 2421 unsigned Alignment = 0; 2422 if (NumVecs != 3) { 2423 Alignment = cast<ConstantSDNode>(Align)->getZExtValue(); 2424 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8; 2425 if (Alignment > NumBytes) 2426 Alignment = NumBytes; 2427 if (Alignment < 8 && Alignment < NumBytes) 2428 Alignment = 0; 2429 // Alignment must be a power of two; make sure of that. 2430 Alignment = (Alignment & -Alignment); 2431 if (Alignment == 1) 2432 Alignment = 0; 2433 } 2434 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 2435 2436 unsigned OpcodeIndex; 2437 switch (VT.getSimpleVT().SimpleTy) { 2438 default: llvm_unreachable("unhandled vld/vst lane type"); 2439 // Double-register operations: 2440 case MVT::v8i8: OpcodeIndex = 0; break; 2441 case MVT::v4f16: 2442 case MVT::v4bf16: 2443 case MVT::v4i16: OpcodeIndex = 1; break; 2444 case MVT::v2f32: 2445 case MVT::v2i32: OpcodeIndex = 2; break; 2446 // Quad-register operations: 2447 case MVT::v8f16: 2448 case MVT::v8bf16: 2449 case MVT::v8i16: OpcodeIndex = 0; break; 2450 case MVT::v4f32: 2451 case MVT::v4i32: OpcodeIndex = 1; break; 2452 } 2453 2454 std::vector<EVT> ResTys; 2455 if (IsLoad) { 2456 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 2457 if (!is64BitVector) 2458 ResTyElts *= 2; 2459 ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), 2460 MVT::i64, ResTyElts)); 2461 } 2462 if (isUpdating) 2463 ResTys.push_back(MVT::i32); 2464 ResTys.push_back(MVT::Other); 2465 2466 SDValue Pred = getAL(CurDAG, dl); 2467 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2468 2469 SmallVector<SDValue, 8> Ops; 2470 Ops.push_back(MemAddr); 2471 Ops.push_back(Align); 2472 if (isUpdating) { 2473 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2474 bool IsImmUpdate = 2475 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs); 2476 Ops.push_back(IsImmUpdate ? Reg0 : Inc); 2477 } 2478 2479 SDValue SuperReg; 2480 SDValue V0 = N->getOperand(Vec0Idx + 0); 2481 SDValue V1 = N->getOperand(Vec0Idx + 1); 2482 if (NumVecs == 2) { 2483 if (is64BitVector) 2484 SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0); 2485 else 2486 SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0); 2487 } else { 2488 SDValue V2 = N->getOperand(Vec0Idx + 2); 2489 SDValue V3 = (NumVecs == 3) 2490 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) 2491 : N->getOperand(Vec0Idx + 3); 2492 if (is64BitVector) 2493 SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0); 2494 else 2495 SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0); 2496 } 2497 Ops.push_back(SuperReg); 2498 Ops.push_back(getI32Imm(Lane, dl)); 2499 Ops.push_back(Pred); 2500 Ops.push_back(Reg0); 2501 Ops.push_back(Chain); 2502 2503 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 2504 QOpcodes[OpcodeIndex]); 2505 SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2506 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdLn), {MemOp}); 2507 if (!IsLoad) { 2508 ReplaceNode(N, VLdLn); 2509 return; 2510 } 2511 2512 // Extract the subregisters. 2513 SuperReg = SDValue(VLdLn, 0); 2514 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 && 2515 ARM::qsub_3 == ARM::qsub_0 + 3, 2516 "Unexpected subreg numbering"); 2517 unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0; 2518 for (unsigned Vec = 0; Vec < NumVecs; ++Vec) 2519 ReplaceUses(SDValue(N, Vec), 2520 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg)); 2521 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1)); 2522 if (isUpdating) 2523 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2)); 2524 CurDAG->RemoveDeadNode(N); 2525 } 2526 2527 template <typename SDValueVector> 2528 void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 2529 SDValue PredicateMask) { 2530 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32)); 2531 Ops.push_back(PredicateMask); 2532 } 2533 2534 template <typename SDValueVector> 2535 void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 2536 SDValue PredicateMask, 2537 SDValue Inactive) { 2538 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32)); 2539 Ops.push_back(PredicateMask); 2540 Ops.push_back(Inactive); 2541 } 2542 2543 template <typename SDValueVector> 2544 void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc) { 2545 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32)); 2546 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 2547 } 2548 2549 template <typename SDValueVector> 2550 void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 2551 EVT InactiveTy) { 2552 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32)); 2553 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 2554 Ops.push_back(SDValue( 2555 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, InactiveTy), 0)); 2556 } 2557 2558 void ARMDAGToDAGISel::SelectMVE_WB(SDNode *N, const uint16_t *Opcodes, 2559 bool Predicated) { 2560 SDLoc Loc(N); 2561 SmallVector<SDValue, 8> Ops; 2562 2563 uint16_t Opcode; 2564 switch (N->getValueType(1).getVectorElementType().getSizeInBits()) { 2565 case 32: 2566 Opcode = Opcodes[0]; 2567 break; 2568 case 64: 2569 Opcode = Opcodes[1]; 2570 break; 2571 default: 2572 llvm_unreachable("bad vector element size in SelectMVE_WB"); 2573 } 2574 2575 Ops.push_back(N->getOperand(2)); // vector of base addresses 2576 2577 int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(); 2578 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate offset 2579 2580 if (Predicated) 2581 AddMVEPredicateToOps(Ops, Loc, N->getOperand(4)); 2582 else 2583 AddEmptyMVEPredicateToOps(Ops, Loc); 2584 2585 Ops.push_back(N->getOperand(0)); // chain 2586 2587 SmallVector<EVT, 8> VTs; 2588 VTs.push_back(N->getValueType(1)); 2589 VTs.push_back(N->getValueType(0)); 2590 VTs.push_back(N->getValueType(2)); 2591 2592 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), VTs, Ops); 2593 ReplaceUses(SDValue(N, 0), SDValue(New, 1)); 2594 ReplaceUses(SDValue(N, 1), SDValue(New, 0)); 2595 ReplaceUses(SDValue(N, 2), SDValue(New, 2)); 2596 transferMemOperands(N, New); 2597 CurDAG->RemoveDeadNode(N); 2598 } 2599 2600 void ARMDAGToDAGISel::SelectMVE_LongShift(SDNode *N, uint16_t Opcode, 2601 bool Immediate, 2602 bool HasSaturationOperand) { 2603 SDLoc Loc(N); 2604 SmallVector<SDValue, 8> Ops; 2605 2606 // Two 32-bit halves of the value to be shifted 2607 Ops.push_back(N->getOperand(1)); 2608 Ops.push_back(N->getOperand(2)); 2609 2610 // The shift count 2611 if (Immediate) { 2612 int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(); 2613 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count 2614 } else { 2615 Ops.push_back(N->getOperand(3)); 2616 } 2617 2618 // The immediate saturation operand, if any 2619 if (HasSaturationOperand) { 2620 int32_t SatOp = cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(); 2621 int SatBit = (SatOp == 64 ? 0 : 1); 2622 Ops.push_back(getI32Imm(SatBit, Loc)); 2623 } 2624 2625 // MVE scalar shifts are IT-predicable, so include the standard 2626 // predicate arguments. 2627 Ops.push_back(getAL(CurDAG, Loc)); 2628 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 2629 2630 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops)); 2631 } 2632 2633 void ARMDAGToDAGISel::SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry, 2634 uint16_t OpcodeWithNoCarry, 2635 bool Add, bool Predicated) { 2636 SDLoc Loc(N); 2637 SmallVector<SDValue, 8> Ops; 2638 uint16_t Opcode; 2639 2640 unsigned FirstInputOp = Predicated ? 2 : 1; 2641 2642 // Two input vectors and the input carry flag 2643 Ops.push_back(N->getOperand(FirstInputOp)); 2644 Ops.push_back(N->getOperand(FirstInputOp + 1)); 2645 SDValue CarryIn = N->getOperand(FirstInputOp + 2); 2646 ConstantSDNode *CarryInConstant = dyn_cast<ConstantSDNode>(CarryIn); 2647 uint32_t CarryMask = 1 << 29; 2648 uint32_t CarryExpected = Add ? 0 : CarryMask; 2649 if (CarryInConstant && 2650 (CarryInConstant->getZExtValue() & CarryMask) == CarryExpected) { 2651 Opcode = OpcodeWithNoCarry; 2652 } else { 2653 Ops.push_back(CarryIn); 2654 Opcode = OpcodeWithCarry; 2655 } 2656 2657 if (Predicated) 2658 AddMVEPredicateToOps(Ops, Loc, 2659 N->getOperand(FirstInputOp + 3), // predicate 2660 N->getOperand(FirstInputOp - 1)); // inactive 2661 else 2662 AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0)); 2663 2664 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops)); 2665 } 2666 2667 void ARMDAGToDAGISel::SelectMVE_VSHLC(SDNode *N, bool Predicated) { 2668 SDLoc Loc(N); 2669 SmallVector<SDValue, 8> Ops; 2670 2671 // One vector input, followed by a 32-bit word of bits to shift in 2672 // and then an immediate shift count 2673 Ops.push_back(N->getOperand(1)); 2674 Ops.push_back(N->getOperand(2)); 2675 int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(); 2676 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count 2677 2678 if (Predicated) 2679 AddMVEPredicateToOps(Ops, Loc, N->getOperand(4)); 2680 else 2681 AddEmptyMVEPredicateToOps(Ops, Loc); 2682 2683 CurDAG->SelectNodeTo(N, ARM::MVE_VSHLC, N->getVTList(), makeArrayRef(Ops)); 2684 } 2685 2686 static bool SDValueToConstBool(SDValue SDVal) { 2687 assert(isa<ConstantSDNode>(SDVal) && "expected a compile-time constant"); 2688 ConstantSDNode *SDValConstant = dyn_cast<ConstantSDNode>(SDVal); 2689 uint64_t Value = SDValConstant->getZExtValue(); 2690 assert((Value == 0 || Value == 1) && "expected value 0 or 1"); 2691 return Value; 2692 } 2693 2694 void ARMDAGToDAGISel::SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated, 2695 const uint16_t *OpcodesS, 2696 const uint16_t *OpcodesU, 2697 size_t Stride, size_t TySize) { 2698 assert(TySize < Stride && "Invalid TySize"); 2699 bool IsUnsigned = SDValueToConstBool(N->getOperand(1)); 2700 bool IsSub = SDValueToConstBool(N->getOperand(2)); 2701 bool IsExchange = SDValueToConstBool(N->getOperand(3)); 2702 if (IsUnsigned) { 2703 assert(!IsSub && 2704 "Unsigned versions of vmlsldav[a]/vrmlsldavh[a] do not exist"); 2705 assert(!IsExchange && 2706 "Unsigned versions of vmlaldav[a]x/vrmlaldavh[a]x do not exist"); 2707 } 2708 2709 auto OpIsZero = [N](size_t OpNo) { 2710 if (ConstantSDNode *OpConst = dyn_cast<ConstantSDNode>(N->getOperand(OpNo))) 2711 if (OpConst->getZExtValue() == 0) 2712 return true; 2713 return false; 2714 }; 2715 2716 // If the input accumulator value is not zero, select an instruction with 2717 // accumulator, otherwise select an instruction without accumulator 2718 bool IsAccum = !(OpIsZero(4) && OpIsZero(5)); 2719 2720 const uint16_t *Opcodes = IsUnsigned ? OpcodesU : OpcodesS; 2721 if (IsSub) 2722 Opcodes += 4 * Stride; 2723 if (IsExchange) 2724 Opcodes += 2 * Stride; 2725 if (IsAccum) 2726 Opcodes += Stride; 2727 uint16_t Opcode = Opcodes[TySize]; 2728 2729 SDLoc Loc(N); 2730 SmallVector<SDValue, 8> Ops; 2731 // Push the accumulator operands, if they are used 2732 if (IsAccum) { 2733 Ops.push_back(N->getOperand(4)); 2734 Ops.push_back(N->getOperand(5)); 2735 } 2736 // Push the two vector operands 2737 Ops.push_back(N->getOperand(6)); 2738 Ops.push_back(N->getOperand(7)); 2739 2740 if (Predicated) 2741 AddMVEPredicateToOps(Ops, Loc, N->getOperand(8)); 2742 else 2743 AddEmptyMVEPredicateToOps(Ops, Loc); 2744 2745 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops)); 2746 } 2747 2748 void ARMDAGToDAGISel::SelectMVE_VMLLDAV(SDNode *N, bool Predicated, 2749 const uint16_t *OpcodesS, 2750 const uint16_t *OpcodesU) { 2751 EVT VecTy = N->getOperand(6).getValueType(); 2752 size_t SizeIndex; 2753 switch (VecTy.getVectorElementType().getSizeInBits()) { 2754 case 16: 2755 SizeIndex = 0; 2756 break; 2757 case 32: 2758 SizeIndex = 1; 2759 break; 2760 default: 2761 llvm_unreachable("bad vector element size"); 2762 } 2763 2764 SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 2, SizeIndex); 2765 } 2766 2767 void ARMDAGToDAGISel::SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated, 2768 const uint16_t *OpcodesS, 2769 const uint16_t *OpcodesU) { 2770 assert( 2771 N->getOperand(6).getValueType().getVectorElementType().getSizeInBits() == 2772 32 && 2773 "bad vector element size"); 2774 SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 1, 0); 2775 } 2776 2777 void ARMDAGToDAGISel::SelectMVE_VLD(SDNode *N, unsigned NumVecs, 2778 const uint16_t *const *Opcodes, 2779 bool HasWriteback) { 2780 EVT VT = N->getValueType(0); 2781 SDLoc Loc(N); 2782 2783 const uint16_t *OurOpcodes; 2784 switch (VT.getVectorElementType().getSizeInBits()) { 2785 case 8: 2786 OurOpcodes = Opcodes[0]; 2787 break; 2788 case 16: 2789 OurOpcodes = Opcodes[1]; 2790 break; 2791 case 32: 2792 OurOpcodes = Opcodes[2]; 2793 break; 2794 default: 2795 llvm_unreachable("bad vector element size in SelectMVE_VLD"); 2796 } 2797 2798 EVT DataTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, NumVecs * 2); 2799 SmallVector<EVT, 4> ResultTys = {DataTy, MVT::Other}; 2800 unsigned PtrOperand = HasWriteback ? 1 : 2; 2801 2802 auto Data = SDValue( 2803 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, DataTy), 0); 2804 SDValue Chain = N->getOperand(0); 2805 // Add a MVE_VLDn instruction for each Vec, except the last 2806 for (unsigned Stage = 0; Stage < NumVecs - 1; ++Stage) { 2807 SDValue Ops[] = {Data, N->getOperand(PtrOperand), Chain}; 2808 auto LoadInst = 2809 CurDAG->getMachineNode(OurOpcodes[Stage], Loc, ResultTys, Ops); 2810 Data = SDValue(LoadInst, 0); 2811 Chain = SDValue(LoadInst, 1); 2812 transferMemOperands(N, LoadInst); 2813 } 2814 // The last may need a writeback on it 2815 if (HasWriteback) 2816 ResultTys = {DataTy, MVT::i32, MVT::Other}; 2817 SDValue Ops[] = {Data, N->getOperand(PtrOperand), Chain}; 2818 auto LoadInst = 2819 CurDAG->getMachineNode(OurOpcodes[NumVecs - 1], Loc, ResultTys, Ops); 2820 transferMemOperands(N, LoadInst); 2821 2822 unsigned i; 2823 for (i = 0; i < NumVecs; i++) 2824 ReplaceUses(SDValue(N, i), 2825 CurDAG->getTargetExtractSubreg(ARM::qsub_0 + i, Loc, VT, 2826 SDValue(LoadInst, 0))); 2827 if (HasWriteback) 2828 ReplaceUses(SDValue(N, i++), SDValue(LoadInst, 1)); 2829 ReplaceUses(SDValue(N, i), SDValue(LoadInst, HasWriteback ? 2 : 1)); 2830 CurDAG->RemoveDeadNode(N); 2831 } 2832 2833 void ARMDAGToDAGISel::SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes, 2834 bool Wrapping, bool Predicated) { 2835 EVT VT = N->getValueType(0); 2836 SDLoc Loc(N); 2837 2838 uint16_t Opcode; 2839 switch (VT.getScalarSizeInBits()) { 2840 case 8: 2841 Opcode = Opcodes[0]; 2842 break; 2843 case 16: 2844 Opcode = Opcodes[1]; 2845 break; 2846 case 32: 2847 Opcode = Opcodes[2]; 2848 break; 2849 default: 2850 llvm_unreachable("bad vector element size in SelectMVE_VxDUP"); 2851 } 2852 2853 SmallVector<SDValue, 8> Ops; 2854 unsigned OpIdx = 1; 2855 2856 SDValue Inactive; 2857 if (Predicated) 2858 Inactive = N->getOperand(OpIdx++); 2859 2860 Ops.push_back(N->getOperand(OpIdx++)); // base 2861 if (Wrapping) 2862 Ops.push_back(N->getOperand(OpIdx++)); // limit 2863 2864 SDValue ImmOp = N->getOperand(OpIdx++); // step 2865 int ImmValue = cast<ConstantSDNode>(ImmOp)->getZExtValue(); 2866 Ops.push_back(getI32Imm(ImmValue, Loc)); 2867 2868 if (Predicated) 2869 AddMVEPredicateToOps(Ops, Loc, N->getOperand(OpIdx), Inactive); 2870 else 2871 AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0)); 2872 2873 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops)); 2874 } 2875 2876 void ARMDAGToDAGISel::SelectCDE_CXxD(SDNode *N, uint16_t Opcode, 2877 size_t NumExtraOps, bool HasAccum) { 2878 bool IsBigEndian = CurDAG->getDataLayout().isBigEndian(); 2879 SDLoc Loc(N); 2880 SmallVector<SDValue, 8> Ops; 2881 2882 unsigned OpIdx = 1; 2883 2884 // Convert and append the immediate operand designating the coprocessor. 2885 SDValue ImmCorpoc = N->getOperand(OpIdx++); 2886 uint32_t ImmCoprocVal = cast<ConstantSDNode>(ImmCorpoc)->getZExtValue(); 2887 Ops.push_back(getI32Imm(ImmCoprocVal, Loc)); 2888 2889 // For accumulating variants copy the low and high order parts of the 2890 // accumulator into a register pair and add it to the operand vector. 2891 if (HasAccum) { 2892 SDValue AccLo = N->getOperand(OpIdx++); 2893 SDValue AccHi = N->getOperand(OpIdx++); 2894 if (IsBigEndian) 2895 std::swap(AccLo, AccHi); 2896 Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, AccLo, AccHi), 0)); 2897 } 2898 2899 // Copy extra operands as-is. 2900 for (size_t I = 0; I < NumExtraOps; I++) 2901 Ops.push_back(N->getOperand(OpIdx++)); 2902 2903 // Convert and append the immediate operand 2904 SDValue Imm = N->getOperand(OpIdx); 2905 uint32_t ImmVal = cast<ConstantSDNode>(Imm)->getZExtValue(); 2906 Ops.push_back(getI32Imm(ImmVal, Loc)); 2907 2908 // Accumulating variants are IT-predicable, add predicate operands. 2909 if (HasAccum) { 2910 SDValue Pred = getAL(CurDAG, Loc); 2911 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 2912 Ops.push_back(Pred); 2913 Ops.push_back(PredReg); 2914 } 2915 2916 // Create the CDE intruction 2917 SDNode *InstrNode = CurDAG->getMachineNode(Opcode, Loc, MVT::Untyped, Ops); 2918 SDValue ResultPair = SDValue(InstrNode, 0); 2919 2920 // The original intrinsic had two outputs, and the output of the dual-register 2921 // CDE instruction is a register pair. We need to extract the two subregisters 2922 // and replace all uses of the original outputs with the extracted 2923 // subregisters. 2924 uint16_t SubRegs[2] = {ARM::gsub_0, ARM::gsub_1}; 2925 if (IsBigEndian) 2926 std::swap(SubRegs[0], SubRegs[1]); 2927 2928 for (size_t ResIdx = 0; ResIdx < 2; ResIdx++) { 2929 if (SDValue(N, ResIdx).use_empty()) 2930 continue; 2931 SDValue SubReg = CurDAG->getTargetExtractSubreg(SubRegs[ResIdx], Loc, 2932 MVT::i32, ResultPair); 2933 ReplaceUses(SDValue(N, ResIdx), SubReg); 2934 } 2935 2936 CurDAG->RemoveDeadNode(N); 2937 } 2938 2939 void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic, 2940 bool isUpdating, unsigned NumVecs, 2941 const uint16_t *DOpcodes, 2942 const uint16_t *QOpcodes0, 2943 const uint16_t *QOpcodes1) { 2944 assert(Subtarget->hasNEON()); 2945 assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range"); 2946 SDLoc dl(N); 2947 2948 SDValue MemAddr, Align; 2949 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2950 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2951 return; 2952 2953 SDValue Chain = N->getOperand(0); 2954 EVT VT = N->getValueType(0); 2955 bool is64BitVector = VT.is64BitVector(); 2956 2957 unsigned Alignment = 0; 2958 if (NumVecs != 3) { 2959 Alignment = cast<ConstantSDNode>(Align)->getZExtValue(); 2960 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8; 2961 if (Alignment > NumBytes) 2962 Alignment = NumBytes; 2963 if (Alignment < 8 && Alignment < NumBytes) 2964 Alignment = 0; 2965 // Alignment must be a power of two; make sure of that. 2966 Alignment = (Alignment & -Alignment); 2967 if (Alignment == 1) 2968 Alignment = 0; 2969 } 2970 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 2971 2972 unsigned OpcodeIndex; 2973 switch (VT.getSimpleVT().SimpleTy) { 2974 default: llvm_unreachable("unhandled vld-dup type"); 2975 case MVT::v8i8: 2976 case MVT::v16i8: OpcodeIndex = 0; break; 2977 case MVT::v4i16: 2978 case MVT::v8i16: 2979 case MVT::v4f16: 2980 case MVT::v8f16: 2981 case MVT::v4bf16: 2982 case MVT::v8bf16: 2983 OpcodeIndex = 1; break; 2984 case MVT::v2f32: 2985 case MVT::v2i32: 2986 case MVT::v4f32: 2987 case MVT::v4i32: OpcodeIndex = 2; break; 2988 case MVT::v1f64: 2989 case MVT::v1i64: OpcodeIndex = 3; break; 2990 } 2991 2992 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 2993 if (!is64BitVector) 2994 ResTyElts *= 2; 2995 EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts); 2996 2997 std::vector<EVT> ResTys; 2998 ResTys.push_back(ResTy); 2999 if (isUpdating) 3000 ResTys.push_back(MVT::i32); 3001 ResTys.push_back(MVT::Other); 3002 3003 SDValue Pred = getAL(CurDAG, dl); 3004 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3005 3006 SmallVector<SDValue, 6> Ops; 3007 Ops.push_back(MemAddr); 3008 Ops.push_back(Align); 3009 unsigned Opc = is64BitVector ? DOpcodes[OpcodeIndex] 3010 : (NumVecs == 1) ? QOpcodes0[OpcodeIndex] 3011 : QOpcodes1[OpcodeIndex]; 3012 if (isUpdating) { 3013 SDValue Inc = N->getOperand(2); 3014 bool IsImmUpdate = 3015 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs); 3016 if (IsImmUpdate) { 3017 if (!isVLDfixed(Opc)) 3018 Ops.push_back(Reg0); 3019 } else { 3020 if (isVLDfixed(Opc)) 3021 Opc = getVLDSTRegisterUpdateOpcode(Opc); 3022 Ops.push_back(Inc); 3023 } 3024 } 3025 if (is64BitVector || NumVecs == 1) { 3026 // Double registers and VLD1 quad registers are directly supported. 3027 } else if (NumVecs == 2) { 3028 const SDValue OpsA[] = {MemAddr, Align, Pred, Reg0, Chain}; 3029 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, ResTy, 3030 MVT::Other, OpsA); 3031 Chain = SDValue(VLdA, 1); 3032 } else { 3033 SDValue ImplDef = SDValue( 3034 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0); 3035 const SDValue OpsA[] = {MemAddr, Align, ImplDef, Pred, Reg0, Chain}; 3036 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, ResTy, 3037 MVT::Other, OpsA); 3038 Ops.push_back(SDValue(VLdA, 0)); 3039 Chain = SDValue(VLdA, 1); 3040 } 3041 3042 Ops.push_back(Pred); 3043 Ops.push_back(Reg0); 3044 Ops.push_back(Chain); 3045 3046 SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 3047 3048 // Transfer memoperands. 3049 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 3050 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdDup), {MemOp}); 3051 3052 // Extract the subregisters. 3053 if (NumVecs == 1) { 3054 ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0)); 3055 } else { 3056 SDValue SuperReg = SDValue(VLdDup, 0); 3057 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering"); 3058 unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0; 3059 for (unsigned Vec = 0; Vec != NumVecs; ++Vec) { 3060 ReplaceUses(SDValue(N, Vec), 3061 CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg)); 3062 } 3063 } 3064 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1)); 3065 if (isUpdating) 3066 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2)); 3067 CurDAG->RemoveDeadNode(N); 3068 } 3069 3070 bool ARMDAGToDAGISel::tryInsertVectorElt(SDNode *N) { 3071 if (!Subtarget->hasMVEIntegerOps()) 3072 return false; 3073 3074 SDLoc dl(N); 3075 3076 // We are trying to use VMOV/VMOVX/VINS to more efficiently lower insert and 3077 // extracts of v8f16 and v8i16 vectors. Check that we have two adjacent 3078 // inserts of the correct type: 3079 SDValue Ins1 = SDValue(N, 0); 3080 SDValue Ins2 = N->getOperand(0); 3081 EVT VT = Ins1.getValueType(); 3082 if (Ins2.getOpcode() != ISD::INSERT_VECTOR_ELT || !Ins2.hasOneUse() || 3083 !isa<ConstantSDNode>(Ins1.getOperand(2)) || 3084 !isa<ConstantSDNode>(Ins2.getOperand(2)) || 3085 (VT != MVT::v8f16 && VT != MVT::v8i16) || (Ins2.getValueType() != VT)) 3086 return false; 3087 3088 unsigned Lane1 = Ins1.getConstantOperandVal(2); 3089 unsigned Lane2 = Ins2.getConstantOperandVal(2); 3090 if (Lane2 % 2 != 0 || Lane1 != Lane2 + 1) 3091 return false; 3092 3093 // If the inserted values will be able to use T/B already, leave it to the 3094 // existing tablegen patterns. For example VCVTT/VCVTB. 3095 SDValue Val1 = Ins1.getOperand(1); 3096 SDValue Val2 = Ins2.getOperand(1); 3097 if (Val1.getOpcode() == ISD::FP_ROUND || Val2.getOpcode() == ISD::FP_ROUND) 3098 return false; 3099 3100 // Check if the inserted values are both extracts. 3101 if ((Val1.getOpcode() == ISD::EXTRACT_VECTOR_ELT || 3102 Val1.getOpcode() == ARMISD::VGETLANEu) && 3103 (Val2.getOpcode() == ISD::EXTRACT_VECTOR_ELT || 3104 Val2.getOpcode() == ARMISD::VGETLANEu) && 3105 isa<ConstantSDNode>(Val1.getOperand(1)) && 3106 isa<ConstantSDNode>(Val2.getOperand(1)) && 3107 (Val1.getOperand(0).getValueType() == MVT::v8f16 || 3108 Val1.getOperand(0).getValueType() == MVT::v8i16) && 3109 (Val2.getOperand(0).getValueType() == MVT::v8f16 || 3110 Val2.getOperand(0).getValueType() == MVT::v8i16)) { 3111 unsigned ExtractLane1 = Val1.getConstantOperandVal(1); 3112 unsigned ExtractLane2 = Val2.getConstantOperandVal(1); 3113 3114 // If the two extracted lanes are from the same place and adjacent, this 3115 // simplifies into a f32 lane move. 3116 if (Val1.getOperand(0) == Val2.getOperand(0) && ExtractLane2 % 2 == 0 && 3117 ExtractLane1 == ExtractLane2 + 1) { 3118 SDValue NewExt = CurDAG->getTargetExtractSubreg( 3119 ARM::ssub_0 + ExtractLane2 / 2, dl, MVT::f32, Val1.getOperand(0)); 3120 SDValue NewIns = CurDAG->getTargetInsertSubreg( 3121 ARM::ssub_0 + Lane2 / 2, dl, VT, Ins2.getOperand(0), 3122 NewExt); 3123 ReplaceUses(Ins1, NewIns); 3124 return true; 3125 } 3126 3127 // Else v8i16 pattern of an extract and an insert, with a optional vmovx for 3128 // extracting odd lanes. 3129 if (VT == MVT::v8i16) { 3130 SDValue Inp1 = CurDAG->getTargetExtractSubreg( 3131 ARM::ssub_0 + ExtractLane1 / 2, dl, MVT::f32, Val1.getOperand(0)); 3132 SDValue Inp2 = CurDAG->getTargetExtractSubreg( 3133 ARM::ssub_0 + ExtractLane2 / 2, dl, MVT::f32, Val2.getOperand(0)); 3134 if (ExtractLane1 % 2 != 0) 3135 Inp1 = SDValue(CurDAG->getMachineNode(ARM::VMOVH, dl, MVT::f32, Inp1), 0); 3136 if (ExtractLane2 % 2 != 0) 3137 Inp2 = SDValue(CurDAG->getMachineNode(ARM::VMOVH, dl, MVT::f32, Inp2), 0); 3138 SDNode *VINS = CurDAG->getMachineNode(ARM::VINSH, dl, MVT::f32, Inp2, Inp1); 3139 SDValue NewIns = 3140 CurDAG->getTargetInsertSubreg(ARM::ssub_0 + Lane2 / 2, dl, MVT::v4f32, 3141 Ins2.getOperand(0), SDValue(VINS, 0)); 3142 ReplaceUses(Ins1, NewIns); 3143 return true; 3144 } 3145 } 3146 3147 // The inserted values are not extracted - if they are f16 then insert them 3148 // directly using a VINS. 3149 if (VT == MVT::v8f16) { 3150 SDNode *VINS = CurDAG->getMachineNode(ARM::VINSH, dl, MVT::f32, Val2, Val1); 3151 SDValue NewIns = 3152 CurDAG->getTargetInsertSubreg(ARM::ssub_0 + Lane2 / 2, dl, MVT::v4f32, 3153 Ins2.getOperand(0), SDValue(VINS, 0)); 3154 ReplaceUses(Ins1, NewIns); 3155 return true; 3156 } 3157 3158 return false; 3159 } 3160 3161 bool ARMDAGToDAGISel::transformFixedFloatingPointConversion(SDNode *N, 3162 SDNode *FMul, 3163 bool IsUnsigned, 3164 bool FixedToFloat) { 3165 auto Type = N->getValueType(0); 3166 unsigned ScalarBits = Type.getScalarSizeInBits(); 3167 if (ScalarBits > 32) 3168 return false; 3169 3170 SDNodeFlags FMulFlags = FMul->getFlags(); 3171 // The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is 3172 // allowed in 16 bit unsigned floats 3173 if (ScalarBits == 16 && !FMulFlags.hasNoInfs() && IsUnsigned) 3174 return false; 3175 3176 SDValue ImmNode = FMul->getOperand(1); 3177 SDValue VecVal = FMul->getOperand(0); 3178 if (VecVal->getOpcode() == ISD::UINT_TO_FP || 3179 VecVal->getOpcode() == ISD::SINT_TO_FP) 3180 VecVal = VecVal->getOperand(0); 3181 3182 if (VecVal.getValueType().getScalarSizeInBits() != ScalarBits) 3183 return false; 3184 3185 if (ImmNode.getOpcode() == ISD::BITCAST) { 3186 if (ImmNode.getValueType().getScalarSizeInBits() != ScalarBits) 3187 return false; 3188 ImmNode = ImmNode.getOperand(0); 3189 } 3190 3191 if (ImmNode.getValueType().getScalarSizeInBits() != ScalarBits) 3192 return false; 3193 3194 APFloat ImmAPF(0.0f); 3195 switch (ImmNode.getOpcode()) { 3196 case ARMISD::VMOVIMM: 3197 case ARMISD::VDUP: { 3198 if (!isa<ConstantSDNode>(ImmNode.getOperand(0))) 3199 return false; 3200 unsigned Imm = ImmNode.getConstantOperandVal(0); 3201 if (ImmNode.getOpcode() == ARMISD::VMOVIMM) 3202 Imm = ARM_AM::decodeVMOVModImm(Imm, ScalarBits); 3203 ImmAPF = 3204 APFloat(ScalarBits == 32 ? APFloat::IEEEsingle() : APFloat::IEEEhalf(), 3205 APInt(ScalarBits, Imm)); 3206 break; 3207 } 3208 case ARMISD::VMOVFPIMM: { 3209 ImmAPF = APFloat(ARM_AM::getFPImmFloat(ImmNode.getConstantOperandVal(0))); 3210 break; 3211 } 3212 default: 3213 return false; 3214 } 3215 3216 // Where n is the number of fractional bits, multiplying by 2^n will convert 3217 // from float to fixed and multiplying by 2^-n will convert from fixed to 3218 // float. Taking log2 of the factor (after taking the inverse in the case of 3219 // float to fixed) will give n. 3220 APFloat ToConvert = ImmAPF; 3221 if (FixedToFloat) { 3222 if (!ImmAPF.getExactInverse(&ToConvert)) 3223 return false; 3224 } 3225 APSInt Converted(64, 0); 3226 bool IsExact; 3227 ToConvert.convertToInteger(Converted, llvm::RoundingMode::NearestTiesToEven, 3228 &IsExact); 3229 if (!IsExact || !Converted.isPowerOf2()) 3230 return false; 3231 3232 unsigned FracBits = Converted.logBase2(); 3233 if (FracBits > ScalarBits) 3234 return false; 3235 3236 SmallVector<SDValue, 3> Ops{ 3237 VecVal, CurDAG->getConstant(FracBits, SDLoc(N), MVT::i32)}; 3238 AddEmptyMVEPredicateToOps(Ops, SDLoc(N), Type); 3239 3240 unsigned int Opcode; 3241 switch (ScalarBits) { 3242 case 16: 3243 if (FixedToFloat) 3244 Opcode = IsUnsigned ? ARM::MVE_VCVTf16u16_fix : ARM::MVE_VCVTf16s16_fix; 3245 else 3246 Opcode = IsUnsigned ? ARM::MVE_VCVTu16f16_fix : ARM::MVE_VCVTs16f16_fix; 3247 break; 3248 case 32: 3249 if (FixedToFloat) 3250 Opcode = IsUnsigned ? ARM::MVE_VCVTf32u32_fix : ARM::MVE_VCVTf32s32_fix; 3251 else 3252 Opcode = IsUnsigned ? ARM::MVE_VCVTu32f32_fix : ARM::MVE_VCVTs32f32_fix; 3253 break; 3254 default: 3255 llvm_unreachable("unexpected number of scalar bits"); 3256 break; 3257 } 3258 3259 ReplaceNode(N, CurDAG->getMachineNode(Opcode, SDLoc(N), Type, Ops)); 3260 return true; 3261 } 3262 3263 bool ARMDAGToDAGISel::tryFP_TO_INT(SDNode *N, SDLoc dl) { 3264 // Transform a floating-point to fixed-point conversion to a VCVT 3265 if (!Subtarget->hasMVEFloatOps()) 3266 return false; 3267 EVT Type = N->getValueType(0); 3268 if (!Type.isVector()) 3269 return false; 3270 unsigned int ScalarBits = Type.getScalarSizeInBits(); 3271 3272 bool IsUnsigned = N->getOpcode() == ISD::FP_TO_UINT; 3273 SDNode *Node = N->getOperand(0).getNode(); 3274 3275 // floating-point to fixed-point with one fractional bit gets turned into an 3276 // FP_TO_[U|S]INT(FADD (x, x)) rather than an FP_TO_[U|S]INT(FMUL (x, y)) 3277 if (Node->getOpcode() == ISD::FADD) { 3278 if (Node->getOperand(0) != Node->getOperand(1)) 3279 return false; 3280 SDNodeFlags Flags = Node->getFlags(); 3281 // The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is 3282 // allowed in 16 bit unsigned floats 3283 if (ScalarBits == 16 && !Flags.hasNoInfs() && IsUnsigned) 3284 return false; 3285 3286 unsigned Opcode; 3287 switch (ScalarBits) { 3288 case 16: 3289 Opcode = IsUnsigned ? ARM::MVE_VCVTu16f16_fix : ARM::MVE_VCVTs16f16_fix; 3290 break; 3291 case 32: 3292 Opcode = IsUnsigned ? ARM::MVE_VCVTu32f32_fix : ARM::MVE_VCVTs32f32_fix; 3293 break; 3294 } 3295 SmallVector<SDValue, 3> Ops{Node->getOperand(0), 3296 CurDAG->getConstant(1, dl, MVT::i32)}; 3297 AddEmptyMVEPredicateToOps(Ops, dl, Type); 3298 3299 ReplaceNode(N, CurDAG->getMachineNode(Opcode, dl, Type, Ops)); 3300 return true; 3301 } 3302 3303 if (Node->getOpcode() != ISD::FMUL) 3304 return false; 3305 3306 return transformFixedFloatingPointConversion(N, Node, IsUnsigned, false); 3307 } 3308 3309 bool ARMDAGToDAGISel::tryFMULFixed(SDNode *N, SDLoc dl) { 3310 // Transform a fixed-point to floating-point conversion to a VCVT 3311 if (!Subtarget->hasMVEFloatOps()) 3312 return false; 3313 auto Type = N->getValueType(0); 3314 if (!Type.isVector()) 3315 return false; 3316 3317 auto LHS = N->getOperand(0); 3318 if (LHS.getOpcode() != ISD::SINT_TO_FP && LHS.getOpcode() != ISD::UINT_TO_FP) 3319 return false; 3320 3321 return transformFixedFloatingPointConversion( 3322 N, N, LHS.getOpcode() == ISD::UINT_TO_FP, true); 3323 } 3324 3325 bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) { 3326 if (!Subtarget->hasV6T2Ops()) 3327 return false; 3328 3329 unsigned Opc = isSigned 3330 ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX) 3331 : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX); 3332 SDLoc dl(N); 3333 3334 // For unsigned extracts, check for a shift right and mask 3335 unsigned And_imm = 0; 3336 if (N->getOpcode() == ISD::AND) { 3337 if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) { 3338 3339 // The immediate is a mask of the low bits iff imm & (imm+1) == 0 3340 if (And_imm & (And_imm + 1)) 3341 return false; 3342 3343 unsigned Srl_imm = 0; 3344 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, 3345 Srl_imm)) { 3346 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 3347 3348 // Mask off the unnecessary bits of the AND immediate; normally 3349 // DAGCombine will do this, but that might not happen if 3350 // targetShrinkDemandedConstant chooses a different immediate. 3351 And_imm &= -1U >> Srl_imm; 3352 3353 // Note: The width operand is encoded as width-1. 3354 unsigned Width = countTrailingOnes(And_imm) - 1; 3355 unsigned LSB = Srl_imm; 3356 3357 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3358 3359 if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) { 3360 // It's cheaper to use a right shift to extract the top bits. 3361 if (Subtarget->isThumb()) { 3362 Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri; 3363 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3364 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 3365 getAL(CurDAG, dl), Reg0, Reg0 }; 3366 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3367 return true; 3368 } 3369 3370 // ARM models shift instructions as MOVsi with shifter operand. 3371 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL); 3372 SDValue ShOpc = 3373 CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl, 3374 MVT::i32); 3375 SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc, 3376 getAL(CurDAG, dl), Reg0, Reg0 }; 3377 CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops); 3378 return true; 3379 } 3380 3381 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx"); 3382 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3383 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 3384 CurDAG->getTargetConstant(Width, dl, MVT::i32), 3385 getAL(CurDAG, dl), Reg0 }; 3386 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3387 return true; 3388 } 3389 } 3390 return false; 3391 } 3392 3393 // Otherwise, we're looking for a shift of a shift 3394 unsigned Shl_imm = 0; 3395 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) { 3396 assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!"); 3397 unsigned Srl_imm = 0; 3398 if (isInt32Immediate(N->getOperand(1), Srl_imm)) { 3399 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 3400 // Note: The width operand is encoded as width-1. 3401 unsigned Width = 32 - Srl_imm - 1; 3402 int LSB = Srl_imm - Shl_imm; 3403 if (LSB < 0) 3404 return false; 3405 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3406 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx"); 3407 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3408 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 3409 CurDAG->getTargetConstant(Width, dl, MVT::i32), 3410 getAL(CurDAG, dl), Reg0 }; 3411 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3412 return true; 3413 } 3414 } 3415 3416 // Or we are looking for a shift of an and, with a mask operand 3417 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) && 3418 isShiftedMask_32(And_imm)) { 3419 unsigned Srl_imm = 0; 3420 unsigned LSB = countTrailingZeros(And_imm); 3421 // Shift must be the same as the ands lsb 3422 if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) { 3423 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 3424 unsigned MSB = 31 - countLeadingZeros(And_imm); 3425 // Note: The width operand is encoded as width-1. 3426 unsigned Width = MSB - LSB; 3427 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3428 assert(Srl_imm + Width + 1 <= 32 && "Shouldn't create an invalid ubfx"); 3429 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3430 CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32), 3431 CurDAG->getTargetConstant(Width, dl, MVT::i32), 3432 getAL(CurDAG, dl), Reg0 }; 3433 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3434 return true; 3435 } 3436 } 3437 3438 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) { 3439 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits(); 3440 unsigned LSB = 0; 3441 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) && 3442 !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB)) 3443 return false; 3444 3445 if (LSB + Width > 32) 3446 return false; 3447 3448 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3449 assert(LSB + Width <= 32 && "Shouldn't create an invalid ubfx"); 3450 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3451 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 3452 CurDAG->getTargetConstant(Width - 1, dl, MVT::i32), 3453 getAL(CurDAG, dl), Reg0 }; 3454 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3455 return true; 3456 } 3457 3458 return false; 3459 } 3460 3461 /// Target-specific DAG combining for ISD::XOR. 3462 /// Target-independent combining lowers SELECT_CC nodes of the form 3463 /// select_cc setg[ge] X, 0, X, -X 3464 /// select_cc setgt X, -1, X, -X 3465 /// select_cc setl[te] X, 0, -X, X 3466 /// select_cc setlt X, 1, -X, X 3467 /// which represent Integer ABS into: 3468 /// Y = sra (X, size(X)-1); xor (add (X, Y), Y) 3469 /// ARM instruction selection detects the latter and matches it to 3470 /// ARM::ABS or ARM::t2ABS machine node. 3471 bool ARMDAGToDAGISel::tryABSOp(SDNode *N){ 3472 SDValue XORSrc0 = N->getOperand(0); 3473 SDValue XORSrc1 = N->getOperand(1); 3474 EVT VT = N->getValueType(0); 3475 3476 if (Subtarget->isThumb1Only()) 3477 return false; 3478 3479 if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA) 3480 return false; 3481 3482 SDValue ADDSrc0 = XORSrc0.getOperand(0); 3483 SDValue ADDSrc1 = XORSrc0.getOperand(1); 3484 SDValue SRASrc0 = XORSrc1.getOperand(0); 3485 SDValue SRASrc1 = XORSrc1.getOperand(1); 3486 ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(SRASrc1); 3487 EVT XType = SRASrc0.getValueType(); 3488 unsigned Size = XType.getSizeInBits() - 1; 3489 3490 if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 && 3491 XType.isInteger() && SRAConstant != nullptr && 3492 Size == SRAConstant->getZExtValue()) { 3493 unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS; 3494 CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0); 3495 return true; 3496 } 3497 3498 return false; 3499 } 3500 3501 /// We've got special pseudo-instructions for these 3502 void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) { 3503 unsigned Opcode; 3504 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT(); 3505 if (MemTy == MVT::i8) 3506 Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_8 : ARM::CMP_SWAP_8; 3507 else if (MemTy == MVT::i16) 3508 Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_16 : ARM::CMP_SWAP_16; 3509 else if (MemTy == MVT::i32) 3510 Opcode = ARM::CMP_SWAP_32; 3511 else 3512 llvm_unreachable("Unknown AtomicCmpSwap type"); 3513 3514 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3), 3515 N->getOperand(0)}; 3516 SDNode *CmpSwap = CurDAG->getMachineNode( 3517 Opcode, SDLoc(N), 3518 CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops); 3519 3520 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); 3521 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp}); 3522 3523 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0)); 3524 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2)); 3525 CurDAG->RemoveDeadNode(N); 3526 } 3527 3528 static Optional<std::pair<unsigned, unsigned>> 3529 getContiguousRangeOfSetBits(const APInt &A) { 3530 unsigned FirstOne = A.getBitWidth() - A.countLeadingZeros() - 1; 3531 unsigned LastOne = A.countTrailingZeros(); 3532 if (A.countPopulation() != (FirstOne - LastOne + 1)) 3533 return Optional<std::pair<unsigned,unsigned>>(); 3534 return std::make_pair(FirstOne, LastOne); 3535 } 3536 3537 void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) { 3538 assert(N->getOpcode() == ARMISD::CMPZ); 3539 SwitchEQNEToPLMI = false; 3540 3541 if (!Subtarget->isThumb()) 3542 // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and 3543 // LSR don't exist as standalone instructions - they need the barrel shifter. 3544 return; 3545 3546 // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X)) 3547 SDValue And = N->getOperand(0); 3548 if (!And->hasOneUse()) 3549 return; 3550 3551 SDValue Zero = N->getOperand(1); 3552 if (!isa<ConstantSDNode>(Zero) || !cast<ConstantSDNode>(Zero)->isNullValue() || 3553 And->getOpcode() != ISD::AND) 3554 return; 3555 SDValue X = And.getOperand(0); 3556 auto C = dyn_cast<ConstantSDNode>(And.getOperand(1)); 3557 3558 if (!C) 3559 return; 3560 auto Range = getContiguousRangeOfSetBits(C->getAPIntValue()); 3561 if (!Range) 3562 return; 3563 3564 // There are several ways to lower this: 3565 SDNode *NewN; 3566 SDLoc dl(N); 3567 3568 auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* { 3569 if (Subtarget->isThumb2()) { 3570 Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri; 3571 SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32), 3572 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 3573 CurDAG->getRegister(0, MVT::i32) }; 3574 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops); 3575 } else { 3576 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src, 3577 CurDAG->getTargetConstant(Imm, dl, MVT::i32), 3578 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)}; 3579 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops); 3580 } 3581 }; 3582 3583 if (Range->second == 0) { 3584 // 1. Mask includes the LSB -> Simply shift the top N bits off 3585 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first); 3586 ReplaceNode(And.getNode(), NewN); 3587 } else if (Range->first == 31) { 3588 // 2. Mask includes the MSB -> Simply shift the bottom N bits off 3589 NewN = EmitShift(ARM::tLSRri, X, Range->second); 3590 ReplaceNode(And.getNode(), NewN); 3591 } else if (Range->first == Range->second) { 3592 // 3. Only one bit is set. We can shift this into the sign bit and use a 3593 // PL/MI comparison. 3594 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first); 3595 ReplaceNode(And.getNode(), NewN); 3596 3597 SwitchEQNEToPLMI = true; 3598 } else if (!Subtarget->hasV6T2Ops()) { 3599 // 4. Do a double shift to clear bottom and top bits, but only in 3600 // thumb-1 mode as in thumb-2 we can use UBFX. 3601 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first); 3602 NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0), 3603 Range->second + (31 - Range->first)); 3604 ReplaceNode(And.getNode(), NewN); 3605 } 3606 3607 } 3608 3609 void ARMDAGToDAGISel::Select(SDNode *N) { 3610 SDLoc dl(N); 3611 3612 if (N->isMachineOpcode()) { 3613 N->setNodeId(-1); 3614 return; // Already selected. 3615 } 3616 3617 switch (N->getOpcode()) { 3618 default: break; 3619 case ISD::STORE: { 3620 // For Thumb1, match an sp-relative store in C++. This is a little 3621 // unfortunate, but I don't think I can make the chain check work 3622 // otherwise. (The chain of the store has to be the same as the chain 3623 // of the CopyFromReg, or else we can't replace the CopyFromReg with 3624 // a direct reference to "SP".) 3625 // 3626 // This is only necessary on Thumb1 because Thumb1 sp-relative stores use 3627 // a different addressing mode from other four-byte stores. 3628 // 3629 // This pattern usually comes up with call arguments. 3630 StoreSDNode *ST = cast<StoreSDNode>(N); 3631 SDValue Ptr = ST->getBasePtr(); 3632 if (Subtarget->isThumb1Only() && ST->isUnindexed()) { 3633 int RHSC = 0; 3634 if (Ptr.getOpcode() == ISD::ADD && 3635 isScaledConstantInRange(Ptr.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) 3636 Ptr = Ptr.getOperand(0); 3637 3638 if (Ptr.getOpcode() == ISD::CopyFromReg && 3639 cast<RegisterSDNode>(Ptr.getOperand(1))->getReg() == ARM::SP && 3640 Ptr.getOperand(0) == ST->getChain()) { 3641 SDValue Ops[] = {ST->getValue(), 3642 CurDAG->getRegister(ARM::SP, MVT::i32), 3643 CurDAG->getTargetConstant(RHSC, dl, MVT::i32), 3644 getAL(CurDAG, dl), 3645 CurDAG->getRegister(0, MVT::i32), 3646 ST->getChain()}; 3647 MachineSDNode *ResNode = 3648 CurDAG->getMachineNode(ARM::tSTRspi, dl, MVT::Other, Ops); 3649 MachineMemOperand *MemOp = ST->getMemOperand(); 3650 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp}); 3651 ReplaceNode(N, ResNode); 3652 return; 3653 } 3654 } 3655 break; 3656 } 3657 case ISD::WRITE_REGISTER: 3658 if (tryWriteRegister(N)) 3659 return; 3660 break; 3661 case ISD::READ_REGISTER: 3662 if (tryReadRegister(N)) 3663 return; 3664 break; 3665 case ISD::INLINEASM: 3666 case ISD::INLINEASM_BR: 3667 if (tryInlineAsm(N)) 3668 return; 3669 break; 3670 case ISD::XOR: 3671 // Select special operations if XOR node forms integer ABS pattern 3672 if (tryABSOp(N)) 3673 return; 3674 // Other cases are autogenerated. 3675 break; 3676 case ISD::Constant: { 3677 unsigned Val = cast<ConstantSDNode>(N)->getZExtValue(); 3678 // If we can't materialize the constant we need to use a literal pool 3679 if (ConstantMaterializationCost(Val, Subtarget) > 2) { 3680 SDValue CPIdx = CurDAG->getTargetConstantPool( 3681 ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val), 3682 TLI->getPointerTy(CurDAG->getDataLayout())); 3683 3684 SDNode *ResNode; 3685 if (Subtarget->isThumb()) { 3686 SDValue Ops[] = { 3687 CPIdx, 3688 getAL(CurDAG, dl), 3689 CurDAG->getRegister(0, MVT::i32), 3690 CurDAG->getEntryNode() 3691 }; 3692 ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other, 3693 Ops); 3694 } else { 3695 SDValue Ops[] = { 3696 CPIdx, 3697 CurDAG->getTargetConstant(0, dl, MVT::i32), 3698 getAL(CurDAG, dl), 3699 CurDAG->getRegister(0, MVT::i32), 3700 CurDAG->getEntryNode() 3701 }; 3702 ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other, 3703 Ops); 3704 } 3705 // Annotate the Node with memory operand information so that MachineInstr 3706 // queries work properly. This e.g. gives the register allocation the 3707 // required information for rematerialization. 3708 MachineFunction& MF = CurDAG->getMachineFunction(); 3709 MachineMemOperand *MemOp = 3710 MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF), 3711 MachineMemOperand::MOLoad, 4, Align(4)); 3712 3713 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp}); 3714 3715 ReplaceNode(N, ResNode); 3716 return; 3717 } 3718 3719 // Other cases are autogenerated. 3720 break; 3721 } 3722 case ISD::FrameIndex: { 3723 // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm. 3724 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 3725 SDValue TFI = CurDAG->getTargetFrameIndex( 3726 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 3727 if (Subtarget->isThumb1Only()) { 3728 // Set the alignment of the frame object to 4, to avoid having to generate 3729 // more than one ADD 3730 MachineFrameInfo &MFI = MF->getFrameInfo(); 3731 if (MFI.getObjectAlign(FI) < Align(4)) 3732 MFI.setObjectAlignment(FI, Align(4)); 3733 CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI, 3734 CurDAG->getTargetConstant(0, dl, MVT::i32)); 3735 return; 3736 } else { 3737 unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ? 3738 ARM::t2ADDri : ARM::ADDri); 3739 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32), 3740 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 3741 CurDAG->getRegister(0, MVT::i32) }; 3742 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3743 return; 3744 } 3745 } 3746 case ISD::INSERT_VECTOR_ELT: { 3747 if (tryInsertVectorElt(N)) 3748 return; 3749 break; 3750 } 3751 case ISD::SRL: 3752 if (tryV6T2BitfieldExtractOp(N, false)) 3753 return; 3754 break; 3755 case ISD::SIGN_EXTEND_INREG: 3756 case ISD::SRA: 3757 if (tryV6T2BitfieldExtractOp(N, true)) 3758 return; 3759 break; 3760 case ISD::FP_TO_UINT: 3761 case ISD::FP_TO_SINT: 3762 if (tryFP_TO_INT(N, dl)) 3763 return; 3764 break; 3765 case ISD::FMUL: 3766 if (tryFMULFixed(N, dl)) 3767 return; 3768 break; 3769 case ISD::MUL: 3770 if (Subtarget->isThumb1Only()) 3771 break; 3772 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) { 3773 unsigned RHSV = C->getZExtValue(); 3774 if (!RHSV) break; 3775 if (isPowerOf2_32(RHSV-1)) { // 2^n+1? 3776 unsigned ShImm = Log2_32(RHSV-1); 3777 if (ShImm >= 32) 3778 break; 3779 SDValue V = N->getOperand(0); 3780 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm); 3781 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32); 3782 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3783 if (Subtarget->isThumb()) { 3784 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 }; 3785 CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops); 3786 return; 3787 } else { 3788 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0, 3789 Reg0 }; 3790 CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops); 3791 return; 3792 } 3793 } 3794 if (isPowerOf2_32(RHSV+1)) { // 2^n-1? 3795 unsigned ShImm = Log2_32(RHSV+1); 3796 if (ShImm >= 32) 3797 break; 3798 SDValue V = N->getOperand(0); 3799 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm); 3800 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32); 3801 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3802 if (Subtarget->isThumb()) { 3803 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 }; 3804 CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops); 3805 return; 3806 } else { 3807 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0, 3808 Reg0 }; 3809 CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops); 3810 return; 3811 } 3812 } 3813 } 3814 break; 3815 case ISD::AND: { 3816 // Check for unsigned bitfield extract 3817 if (tryV6T2BitfieldExtractOp(N, false)) 3818 return; 3819 3820 // If an immediate is used in an AND node, it is possible that the immediate 3821 // can be more optimally materialized when negated. If this is the case we 3822 // can negate the immediate and use a BIC instead. 3823 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 3824 if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) { 3825 uint32_t Imm = (uint32_t) N1C->getZExtValue(); 3826 3827 // In Thumb2 mode, an AND can take a 12-bit immediate. If this 3828 // immediate can be negated and fit in the immediate operand of 3829 // a t2BIC, don't do any manual transform here as this can be 3830 // handled by the generic ISel machinery. 3831 bool PreferImmediateEncoding = 3832 Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm)); 3833 if (!PreferImmediateEncoding && 3834 ConstantMaterializationCost(Imm, Subtarget) > 3835 ConstantMaterializationCost(~Imm, Subtarget)) { 3836 // The current immediate costs more to materialize than a negated 3837 // immediate, so negate the immediate and use a BIC. 3838 SDValue NewImm = 3839 CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32); 3840 // If the new constant didn't exist before, reposition it in the topological 3841 // ordering so it is just before N. Otherwise, don't touch its location. 3842 if (NewImm->getNodeId() == -1) 3843 CurDAG->RepositionNode(N->getIterator(), NewImm.getNode()); 3844 3845 if (!Subtarget->hasThumb2()) { 3846 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), 3847 N->getOperand(0), NewImm, getAL(CurDAG, dl), 3848 CurDAG->getRegister(0, MVT::i32)}; 3849 ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops)); 3850 return; 3851 } else { 3852 SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl), 3853 CurDAG->getRegister(0, MVT::i32), 3854 CurDAG->getRegister(0, MVT::i32)}; 3855 ReplaceNode(N, 3856 CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops)); 3857 return; 3858 } 3859 } 3860 } 3861 3862 // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits 3863 // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits 3864 // are entirely contributed by c2 and lower 16-bits are entirely contributed 3865 // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)). 3866 // Select it to: "movt x, ((c1 & 0xffff) >> 16) 3867 EVT VT = N->getValueType(0); 3868 if (VT != MVT::i32) 3869 break; 3870 unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2()) 3871 ? ARM::t2MOVTi16 3872 : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0); 3873 if (!Opc) 3874 break; 3875 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); 3876 N1C = dyn_cast<ConstantSDNode>(N1); 3877 if (!N1C) 3878 break; 3879 if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) { 3880 SDValue N2 = N0.getOperand(1); 3881 ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2); 3882 if (!N2C) 3883 break; 3884 unsigned N1CVal = N1C->getZExtValue(); 3885 unsigned N2CVal = N2C->getZExtValue(); 3886 if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) && 3887 (N1CVal & 0xffffU) == 0xffffU && 3888 (N2CVal & 0xffffU) == 0x0U) { 3889 SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16, 3890 dl, MVT::i32); 3891 SDValue Ops[] = { N0.getOperand(0), Imm16, 3892 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) }; 3893 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops)); 3894 return; 3895 } 3896 } 3897 3898 break; 3899 } 3900 case ARMISD::UMAAL: { 3901 unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL; 3902 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), 3903 N->getOperand(2), N->getOperand(3), 3904 getAL(CurDAG, dl), 3905 CurDAG->getRegister(0, MVT::i32) }; 3906 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops)); 3907 return; 3908 } 3909 case ARMISD::UMLAL:{ 3910 if (Subtarget->isThumb()) { 3911 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3912 N->getOperand(3), getAL(CurDAG, dl), 3913 CurDAG->getRegister(0, MVT::i32)}; 3914 ReplaceNode( 3915 N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops)); 3916 return; 3917 }else{ 3918 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3919 N->getOperand(3), getAL(CurDAG, dl), 3920 CurDAG->getRegister(0, MVT::i32), 3921 CurDAG->getRegister(0, MVT::i32) }; 3922 ReplaceNode(N, CurDAG->getMachineNode( 3923 Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl, 3924 MVT::i32, MVT::i32, Ops)); 3925 return; 3926 } 3927 } 3928 case ARMISD::SMLAL:{ 3929 if (Subtarget->isThumb()) { 3930 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3931 N->getOperand(3), getAL(CurDAG, dl), 3932 CurDAG->getRegister(0, MVT::i32)}; 3933 ReplaceNode( 3934 N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops)); 3935 return; 3936 }else{ 3937 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3938 N->getOperand(3), getAL(CurDAG, dl), 3939 CurDAG->getRegister(0, MVT::i32), 3940 CurDAG->getRegister(0, MVT::i32) }; 3941 ReplaceNode(N, CurDAG->getMachineNode( 3942 Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl, 3943 MVT::i32, MVT::i32, Ops)); 3944 return; 3945 } 3946 } 3947 case ARMISD::SUBE: { 3948 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP()) 3949 break; 3950 // Look for a pattern to match SMMLS 3951 // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b)))) 3952 if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI || 3953 N->getOperand(2).getOpcode() != ARMISD::SUBC || 3954 !SDValue(N, 1).use_empty()) 3955 break; 3956 3957 if (Subtarget->isThumb()) 3958 assert(Subtarget->hasThumb2() && 3959 "This pattern should not be generated for Thumb"); 3960 3961 SDValue SmulLoHi = N->getOperand(1); 3962 SDValue Subc = N->getOperand(2); 3963 auto *Zero = dyn_cast<ConstantSDNode>(Subc.getOperand(0)); 3964 3965 if (!Zero || Zero->getZExtValue() != 0 || 3966 Subc.getOperand(1) != SmulLoHi.getValue(0) || 3967 N->getOperand(1) != SmulLoHi.getValue(1) || 3968 N->getOperand(2) != Subc.getValue(1)) 3969 break; 3970 3971 unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS; 3972 SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1), 3973 N->getOperand(0), getAL(CurDAG, dl), 3974 CurDAG->getRegister(0, MVT::i32) }; 3975 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops)); 3976 return; 3977 } 3978 case ISD::LOAD: { 3979 if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N)) 3980 return; 3981 if (Subtarget->isThumb() && Subtarget->hasThumb2()) { 3982 if (tryT2IndexedLoad(N)) 3983 return; 3984 } else if (Subtarget->isThumb()) { 3985 if (tryT1IndexedLoad(N)) 3986 return; 3987 } else if (tryARMIndexedLoad(N)) 3988 return; 3989 // Other cases are autogenerated. 3990 break; 3991 } 3992 case ISD::MLOAD: 3993 if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N)) 3994 return; 3995 // Other cases are autogenerated. 3996 break; 3997 case ARMISD::WLSSETUP: { 3998 SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopSetup, dl, MVT::i32, 3999 N->getOperand(0)); 4000 ReplaceUses(N, New); 4001 CurDAG->RemoveDeadNode(N); 4002 return; 4003 } 4004 case ARMISD::WLS: { 4005 SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopStart, dl, MVT::Other, 4006 N->getOperand(1), N->getOperand(2), 4007 N->getOperand(0)); 4008 ReplaceUses(N, New); 4009 CurDAG->RemoveDeadNode(N); 4010 return; 4011 } 4012 case ARMISD::LE: { 4013 SDValue Ops[] = { N->getOperand(1), 4014 N->getOperand(2), 4015 N->getOperand(0) }; 4016 unsigned Opc = ARM::t2LoopEnd; 4017 SDNode *New = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops); 4018 ReplaceUses(N, New); 4019 CurDAG->RemoveDeadNode(N); 4020 return; 4021 } 4022 case ARMISD::LDRD: { 4023 if (Subtarget->isThumb2()) 4024 break; // TableGen handles isel in this case. 4025 SDValue Base, RegOffset, ImmOffset; 4026 const SDValue &Chain = N->getOperand(0); 4027 const SDValue &Addr = N->getOperand(1); 4028 SelectAddrMode3(Addr, Base, RegOffset, ImmOffset); 4029 if (RegOffset != CurDAG->getRegister(0, MVT::i32)) { 4030 // The register-offset variant of LDRD mandates that the register 4031 // allocated to RegOffset is not reused in any of the remaining operands. 4032 // This restriction is currently not enforced. Therefore emitting this 4033 // variant is explicitly avoided. 4034 Base = Addr; 4035 RegOffset = CurDAG->getRegister(0, MVT::i32); 4036 } 4037 SDValue Ops[] = {Base, RegOffset, ImmOffset, Chain}; 4038 SDNode *New = CurDAG->getMachineNode(ARM::LOADDUAL, dl, 4039 {MVT::Untyped, MVT::Other}, Ops); 4040 SDValue Lo = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32, 4041 SDValue(New, 0)); 4042 SDValue Hi = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32, 4043 SDValue(New, 0)); 4044 transferMemOperands(N, New); 4045 ReplaceUses(SDValue(N, 0), Lo); 4046 ReplaceUses(SDValue(N, 1), Hi); 4047 ReplaceUses(SDValue(N, 2), SDValue(New, 1)); 4048 CurDAG->RemoveDeadNode(N); 4049 return; 4050 } 4051 case ARMISD::STRD: { 4052 if (Subtarget->isThumb2()) 4053 break; // TableGen handles isel in this case. 4054 SDValue Base, RegOffset, ImmOffset; 4055 const SDValue &Chain = N->getOperand(0); 4056 const SDValue &Addr = N->getOperand(3); 4057 SelectAddrMode3(Addr, Base, RegOffset, ImmOffset); 4058 if (RegOffset != CurDAG->getRegister(0, MVT::i32)) { 4059 // The register-offset variant of STRD mandates that the register 4060 // allocated to RegOffset is not reused in any of the remaining operands. 4061 // This restriction is currently not enforced. Therefore emitting this 4062 // variant is explicitly avoided. 4063 Base = Addr; 4064 RegOffset = CurDAG->getRegister(0, MVT::i32); 4065 } 4066 SDNode *RegPair = 4067 createGPRPairNode(MVT::Untyped, N->getOperand(1), N->getOperand(2)); 4068 SDValue Ops[] = {SDValue(RegPair, 0), Base, RegOffset, ImmOffset, Chain}; 4069 SDNode *New = CurDAG->getMachineNode(ARM::STOREDUAL, dl, MVT::Other, Ops); 4070 transferMemOperands(N, New); 4071 ReplaceUses(SDValue(N, 0), SDValue(New, 0)); 4072 CurDAG->RemoveDeadNode(N); 4073 return; 4074 } 4075 case ARMISD::LOOP_DEC: { 4076 SDValue Ops[] = { N->getOperand(1), 4077 N->getOperand(2), 4078 N->getOperand(0) }; 4079 SDNode *Dec = 4080 CurDAG->getMachineNode(ARM::t2LoopDec, dl, 4081 CurDAG->getVTList(MVT::i32, MVT::Other), Ops); 4082 ReplaceUses(N, Dec); 4083 CurDAG->RemoveDeadNode(N); 4084 return; 4085 } 4086 case ARMISD::BRCOND: { 4087 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 4088 // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc) 4089 // Pattern complexity = 6 cost = 1 size = 0 4090 4091 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 4092 // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc) 4093 // Pattern complexity = 6 cost = 1 size = 0 4094 4095 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 4096 // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc) 4097 // Pattern complexity = 6 cost = 1 size = 0 4098 4099 unsigned Opc = Subtarget->isThumb() ? 4100 ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc; 4101 SDValue Chain = N->getOperand(0); 4102 SDValue N1 = N->getOperand(1); 4103 SDValue N2 = N->getOperand(2); 4104 SDValue N3 = N->getOperand(3); 4105 SDValue InFlag = N->getOperand(4); 4106 assert(N1.getOpcode() == ISD::BasicBlock); 4107 assert(N2.getOpcode() == ISD::Constant); 4108 assert(N3.getOpcode() == ISD::Register); 4109 4110 unsigned CC = (unsigned) cast<ConstantSDNode>(N2)->getZExtValue(); 4111 4112 if (InFlag.getOpcode() == ARMISD::CMPZ) { 4113 if (InFlag.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) { 4114 SDValue Int = InFlag.getOperand(0); 4115 uint64_t ID = cast<ConstantSDNode>(Int->getOperand(1))->getZExtValue(); 4116 4117 // Handle low-overhead loops. 4118 if (ID == Intrinsic::loop_decrement_reg) { 4119 SDValue Elements = Int.getOperand(2); 4120 SDValue Size = CurDAG->getTargetConstant( 4121 cast<ConstantSDNode>(Int.getOperand(3))->getZExtValue(), dl, 4122 MVT::i32); 4123 4124 SDValue Args[] = { Elements, Size, Int.getOperand(0) }; 4125 SDNode *LoopDec = 4126 CurDAG->getMachineNode(ARM::t2LoopDec, dl, 4127 CurDAG->getVTList(MVT::i32, MVT::Other), 4128 Args); 4129 ReplaceUses(Int.getNode(), LoopDec); 4130 4131 SDValue EndArgs[] = { SDValue(LoopDec, 0), N1, Chain }; 4132 SDNode *LoopEnd = 4133 CurDAG->getMachineNode(ARM::t2LoopEnd, dl, MVT::Other, EndArgs); 4134 4135 ReplaceUses(N, LoopEnd); 4136 CurDAG->RemoveDeadNode(N); 4137 CurDAG->RemoveDeadNode(InFlag.getNode()); 4138 CurDAG->RemoveDeadNode(Int.getNode()); 4139 return; 4140 } 4141 } 4142 4143 bool SwitchEQNEToPLMI; 4144 SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI); 4145 InFlag = N->getOperand(4); 4146 4147 if (SwitchEQNEToPLMI) { 4148 switch ((ARMCC::CondCodes)CC) { 4149 default: llvm_unreachable("CMPZ must be either NE or EQ!"); 4150 case ARMCC::NE: 4151 CC = (unsigned)ARMCC::MI; 4152 break; 4153 case ARMCC::EQ: 4154 CC = (unsigned)ARMCC::PL; 4155 break; 4156 } 4157 } 4158 } 4159 4160 SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32); 4161 SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag }; 4162 SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, 4163 MVT::Glue, Ops); 4164 Chain = SDValue(ResNode, 0); 4165 if (N->getNumValues() == 2) { 4166 InFlag = SDValue(ResNode, 1); 4167 ReplaceUses(SDValue(N, 1), InFlag); 4168 } 4169 ReplaceUses(SDValue(N, 0), 4170 SDValue(Chain.getNode(), Chain.getResNo())); 4171 CurDAG->RemoveDeadNode(N); 4172 return; 4173 } 4174 4175 case ARMISD::CMPZ: { 4176 // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0) 4177 // This allows us to avoid materializing the expensive negative constant. 4178 // The CMPZ #0 is useless and will be peepholed away but we need to keep it 4179 // for its glue output. 4180 SDValue X = N->getOperand(0); 4181 auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode()); 4182 if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) { 4183 int64_t Addend = -C->getSExtValue(); 4184 4185 SDNode *Add = nullptr; 4186 // ADDS can be better than CMN if the immediate fits in a 4187 // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3. 4188 // Outside that range we can just use a CMN which is 32-bit but has a 4189 // 12-bit immediate range. 4190 if (Addend < 1<<8) { 4191 if (Subtarget->isThumb2()) { 4192 SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32), 4193 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 4194 CurDAG->getRegister(0, MVT::i32) }; 4195 Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops); 4196 } else { 4197 unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8; 4198 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X, 4199 CurDAG->getTargetConstant(Addend, dl, MVT::i32), 4200 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)}; 4201 Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops); 4202 } 4203 } 4204 if (Add) { 4205 SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)}; 4206 CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2); 4207 } 4208 } 4209 // Other cases are autogenerated. 4210 break; 4211 } 4212 4213 case ARMISD::CMOV: { 4214 SDValue InFlag = N->getOperand(4); 4215 4216 if (InFlag.getOpcode() == ARMISD::CMPZ) { 4217 bool SwitchEQNEToPLMI; 4218 SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI); 4219 4220 if (SwitchEQNEToPLMI) { 4221 SDValue ARMcc = N->getOperand(2); 4222 ARMCC::CondCodes CC = 4223 (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue(); 4224 4225 switch (CC) { 4226 default: llvm_unreachable("CMPZ must be either NE or EQ!"); 4227 case ARMCC::NE: 4228 CC = ARMCC::MI; 4229 break; 4230 case ARMCC::EQ: 4231 CC = ARMCC::PL; 4232 break; 4233 } 4234 SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32); 4235 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc, 4236 N->getOperand(3), N->getOperand(4)}; 4237 CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops); 4238 } 4239 4240 } 4241 // Other cases are autogenerated. 4242 break; 4243 } 4244 4245 case ARMISD::VZIP: { 4246 unsigned Opc = 0; 4247 EVT VT = N->getValueType(0); 4248 switch (VT.getSimpleVT().SimpleTy) { 4249 default: return; 4250 case MVT::v8i8: Opc = ARM::VZIPd8; break; 4251 case MVT::v4f16: 4252 case MVT::v4i16: Opc = ARM::VZIPd16; break; 4253 case MVT::v2f32: 4254 // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 4255 case MVT::v2i32: Opc = ARM::VTRNd32; break; 4256 case MVT::v16i8: Opc = ARM::VZIPq8; break; 4257 case MVT::v8f16: 4258 case MVT::v8i16: Opc = ARM::VZIPq16; break; 4259 case MVT::v4f32: 4260 case MVT::v4i32: Opc = ARM::VZIPq32; break; 4261 } 4262 SDValue Pred = getAL(CurDAG, dl); 4263 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 4264 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; 4265 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops)); 4266 return; 4267 } 4268 case ARMISD::VUZP: { 4269 unsigned Opc = 0; 4270 EVT VT = N->getValueType(0); 4271 switch (VT.getSimpleVT().SimpleTy) { 4272 default: return; 4273 case MVT::v8i8: Opc = ARM::VUZPd8; break; 4274 case MVT::v4f16: 4275 case MVT::v4i16: Opc = ARM::VUZPd16; break; 4276 case MVT::v2f32: 4277 // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 4278 case MVT::v2i32: Opc = ARM::VTRNd32; break; 4279 case MVT::v16i8: Opc = ARM::VUZPq8; break; 4280 case MVT::v8f16: 4281 case MVT::v8i16: Opc = ARM::VUZPq16; break; 4282 case MVT::v4f32: 4283 case MVT::v4i32: Opc = ARM::VUZPq32; break; 4284 } 4285 SDValue Pred = getAL(CurDAG, dl); 4286 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 4287 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; 4288 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops)); 4289 return; 4290 } 4291 case ARMISD::VTRN: { 4292 unsigned Opc = 0; 4293 EVT VT = N->getValueType(0); 4294 switch (VT.getSimpleVT().SimpleTy) { 4295 default: return; 4296 case MVT::v8i8: Opc = ARM::VTRNd8; break; 4297 case MVT::v4f16: 4298 case MVT::v4i16: Opc = ARM::VTRNd16; break; 4299 case MVT::v2f32: 4300 case MVT::v2i32: Opc = ARM::VTRNd32; break; 4301 case MVT::v16i8: Opc = ARM::VTRNq8; break; 4302 case MVT::v8f16: 4303 case MVT::v8i16: Opc = ARM::VTRNq16; break; 4304 case MVT::v4f32: 4305 case MVT::v4i32: Opc = ARM::VTRNq32; break; 4306 } 4307 SDValue Pred = getAL(CurDAG, dl); 4308 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 4309 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; 4310 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops)); 4311 return; 4312 } 4313 case ARMISD::BUILD_VECTOR: { 4314 EVT VecVT = N->getValueType(0); 4315 EVT EltVT = VecVT.getVectorElementType(); 4316 unsigned NumElts = VecVT.getVectorNumElements(); 4317 if (EltVT == MVT::f64) { 4318 assert(NumElts == 2 && "unexpected type for BUILD_VECTOR"); 4319 ReplaceNode( 4320 N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1))); 4321 return; 4322 } 4323 assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR"); 4324 if (NumElts == 2) { 4325 ReplaceNode( 4326 N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1))); 4327 return; 4328 } 4329 assert(NumElts == 4 && "unexpected type for BUILD_VECTOR"); 4330 ReplaceNode(N, 4331 createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1), 4332 N->getOperand(2), N->getOperand(3))); 4333 return; 4334 } 4335 4336 case ARMISD::VLD1DUP: { 4337 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16, 4338 ARM::VLD1DUPd32 }; 4339 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16, 4340 ARM::VLD1DUPq32 }; 4341 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 1, DOpcodes, QOpcodes); 4342 return; 4343 } 4344 4345 case ARMISD::VLD2DUP: { 4346 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16, 4347 ARM::VLD2DUPd32 }; 4348 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 2, Opcodes); 4349 return; 4350 } 4351 4352 case ARMISD::VLD3DUP: { 4353 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo, 4354 ARM::VLD3DUPd16Pseudo, 4355 ARM::VLD3DUPd32Pseudo }; 4356 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 3, Opcodes); 4357 return; 4358 } 4359 4360 case ARMISD::VLD4DUP: { 4361 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo, 4362 ARM::VLD4DUPd16Pseudo, 4363 ARM::VLD4DUPd32Pseudo }; 4364 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 4, Opcodes); 4365 return; 4366 } 4367 4368 case ARMISD::VLD1DUP_UPD: { 4369 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed, 4370 ARM::VLD1DUPd16wb_fixed, 4371 ARM::VLD1DUPd32wb_fixed }; 4372 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed, 4373 ARM::VLD1DUPq16wb_fixed, 4374 ARM::VLD1DUPq32wb_fixed }; 4375 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 1, DOpcodes, QOpcodes); 4376 return; 4377 } 4378 4379 case ARMISD::VLD2DUP_UPD: { 4380 static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8wb_fixed, 4381 ARM::VLD2DUPd16wb_fixed, 4382 ARM::VLD2DUPd32wb_fixed, 4383 ARM::VLD1q64wb_fixed }; 4384 static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo, 4385 ARM::VLD2DUPq16EvenPseudo, 4386 ARM::VLD2DUPq32EvenPseudo }; 4387 static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudoWB_fixed, 4388 ARM::VLD2DUPq16OddPseudoWB_fixed, 4389 ARM::VLD2DUPq32OddPseudoWB_fixed }; 4390 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 2, DOpcodes, QOpcodes0, QOpcodes1); 4391 return; 4392 } 4393 4394 case ARMISD::VLD3DUP_UPD: { 4395 static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo_UPD, 4396 ARM::VLD3DUPd16Pseudo_UPD, 4397 ARM::VLD3DUPd32Pseudo_UPD, 4398 ARM::VLD1d64TPseudoWB_fixed }; 4399 static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo, 4400 ARM::VLD3DUPq16EvenPseudo, 4401 ARM::VLD3DUPq32EvenPseudo }; 4402 static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo_UPD, 4403 ARM::VLD3DUPq16OddPseudo_UPD, 4404 ARM::VLD3DUPq32OddPseudo_UPD }; 4405 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 4406 return; 4407 } 4408 4409 case ARMISD::VLD4DUP_UPD: { 4410 static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo_UPD, 4411 ARM::VLD4DUPd16Pseudo_UPD, 4412 ARM::VLD4DUPd32Pseudo_UPD, 4413 ARM::VLD1d64QPseudoWB_fixed }; 4414 static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo, 4415 ARM::VLD4DUPq16EvenPseudo, 4416 ARM::VLD4DUPq32EvenPseudo }; 4417 static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo_UPD, 4418 ARM::VLD4DUPq16OddPseudo_UPD, 4419 ARM::VLD4DUPq32OddPseudo_UPD }; 4420 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 4421 return; 4422 } 4423 4424 case ARMISD::VLD1_UPD: { 4425 static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed, 4426 ARM::VLD1d16wb_fixed, 4427 ARM::VLD1d32wb_fixed, 4428 ARM::VLD1d64wb_fixed }; 4429 static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed, 4430 ARM::VLD1q16wb_fixed, 4431 ARM::VLD1q32wb_fixed, 4432 ARM::VLD1q64wb_fixed }; 4433 SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr); 4434 return; 4435 } 4436 4437 case ARMISD::VLD2_UPD: { 4438 if (Subtarget->hasNEON()) { 4439 static const uint16_t DOpcodes[] = { 4440 ARM::VLD2d8wb_fixed, ARM::VLD2d16wb_fixed, ARM::VLD2d32wb_fixed, 4441 ARM::VLD1q64wb_fixed}; 4442 static const uint16_t QOpcodes[] = {ARM::VLD2q8PseudoWB_fixed, 4443 ARM::VLD2q16PseudoWB_fixed, 4444 ARM::VLD2q32PseudoWB_fixed}; 4445 SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr); 4446 } else { 4447 static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8, 4448 ARM::MVE_VLD21_8_wb}; 4449 static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16, 4450 ARM::MVE_VLD21_16_wb}; 4451 static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32, 4452 ARM::MVE_VLD21_32_wb}; 4453 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32}; 4454 SelectMVE_VLD(N, 2, Opcodes, true); 4455 } 4456 return; 4457 } 4458 4459 case ARMISD::VLD3_UPD: { 4460 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD, 4461 ARM::VLD3d16Pseudo_UPD, 4462 ARM::VLD3d32Pseudo_UPD, 4463 ARM::VLD1d64TPseudoWB_fixed}; 4464 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD, 4465 ARM::VLD3q16Pseudo_UPD, 4466 ARM::VLD3q32Pseudo_UPD }; 4467 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD, 4468 ARM::VLD3q16oddPseudo_UPD, 4469 ARM::VLD3q32oddPseudo_UPD }; 4470 SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 4471 return; 4472 } 4473 4474 case ARMISD::VLD4_UPD: { 4475 if (Subtarget->hasNEON()) { 4476 static const uint16_t DOpcodes[] = { 4477 ARM::VLD4d8Pseudo_UPD, ARM::VLD4d16Pseudo_UPD, ARM::VLD4d32Pseudo_UPD, 4478 ARM::VLD1d64QPseudoWB_fixed}; 4479 static const uint16_t QOpcodes0[] = {ARM::VLD4q8Pseudo_UPD, 4480 ARM::VLD4q16Pseudo_UPD, 4481 ARM::VLD4q32Pseudo_UPD}; 4482 static const uint16_t QOpcodes1[] = {ARM::VLD4q8oddPseudo_UPD, 4483 ARM::VLD4q16oddPseudo_UPD, 4484 ARM::VLD4q32oddPseudo_UPD}; 4485 SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 4486 } else { 4487 static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8, 4488 ARM::MVE_VLD42_8, 4489 ARM::MVE_VLD43_8_wb}; 4490 static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16, 4491 ARM::MVE_VLD42_16, 4492 ARM::MVE_VLD43_16_wb}; 4493 static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32, 4494 ARM::MVE_VLD42_32, 4495 ARM::MVE_VLD43_32_wb}; 4496 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32}; 4497 SelectMVE_VLD(N, 4, Opcodes, true); 4498 } 4499 return; 4500 } 4501 4502 case ARMISD::VLD1x2_UPD: { 4503 if (Subtarget->hasNEON()) { 4504 static const uint16_t DOpcodes[] = { 4505 ARM::VLD1q8wb_fixed, ARM::VLD1q16wb_fixed, ARM::VLD1q32wb_fixed, 4506 ARM::VLD1q64wb_fixed}; 4507 static const uint16_t QOpcodes[] = { 4508 ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed, 4509 ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed}; 4510 SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr); 4511 return; 4512 } 4513 break; 4514 } 4515 4516 case ARMISD::VLD1x3_UPD: { 4517 if (Subtarget->hasNEON()) { 4518 static const uint16_t DOpcodes[] = { 4519 ARM::VLD1d8TPseudoWB_fixed, ARM::VLD1d16TPseudoWB_fixed, 4520 ARM::VLD1d32TPseudoWB_fixed, ARM::VLD1d64TPseudoWB_fixed}; 4521 static const uint16_t QOpcodes0[] = { 4522 ARM::VLD1q8LowTPseudo_UPD, ARM::VLD1q16LowTPseudo_UPD, 4523 ARM::VLD1q32LowTPseudo_UPD, ARM::VLD1q64LowTPseudo_UPD}; 4524 static const uint16_t QOpcodes1[] = { 4525 ARM::VLD1q8HighTPseudo_UPD, ARM::VLD1q16HighTPseudo_UPD, 4526 ARM::VLD1q32HighTPseudo_UPD, ARM::VLD1q64HighTPseudo_UPD}; 4527 SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 4528 return; 4529 } 4530 break; 4531 } 4532 4533 case ARMISD::VLD1x4_UPD: { 4534 if (Subtarget->hasNEON()) { 4535 static const uint16_t DOpcodes[] = { 4536 ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed, 4537 ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed}; 4538 static const uint16_t QOpcodes0[] = { 4539 ARM::VLD1q8LowQPseudo_UPD, ARM::VLD1q16LowQPseudo_UPD, 4540 ARM::VLD1q32LowQPseudo_UPD, ARM::VLD1q64LowQPseudo_UPD}; 4541 static const uint16_t QOpcodes1[] = { 4542 ARM::VLD1q8HighQPseudo_UPD, ARM::VLD1q16HighQPseudo_UPD, 4543 ARM::VLD1q32HighQPseudo_UPD, ARM::VLD1q64HighQPseudo_UPD}; 4544 SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 4545 return; 4546 } 4547 break; 4548 } 4549 4550 case ARMISD::VLD2LN_UPD: { 4551 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD, 4552 ARM::VLD2LNd16Pseudo_UPD, 4553 ARM::VLD2LNd32Pseudo_UPD }; 4554 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD, 4555 ARM::VLD2LNq32Pseudo_UPD }; 4556 SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes); 4557 return; 4558 } 4559 4560 case ARMISD::VLD3LN_UPD: { 4561 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD, 4562 ARM::VLD3LNd16Pseudo_UPD, 4563 ARM::VLD3LNd32Pseudo_UPD }; 4564 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD, 4565 ARM::VLD3LNq32Pseudo_UPD }; 4566 SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes); 4567 return; 4568 } 4569 4570 case ARMISD::VLD4LN_UPD: { 4571 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD, 4572 ARM::VLD4LNd16Pseudo_UPD, 4573 ARM::VLD4LNd32Pseudo_UPD }; 4574 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD, 4575 ARM::VLD4LNq32Pseudo_UPD }; 4576 SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes); 4577 return; 4578 } 4579 4580 case ARMISD::VST1_UPD: { 4581 static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed, 4582 ARM::VST1d16wb_fixed, 4583 ARM::VST1d32wb_fixed, 4584 ARM::VST1d64wb_fixed }; 4585 static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed, 4586 ARM::VST1q16wb_fixed, 4587 ARM::VST1q32wb_fixed, 4588 ARM::VST1q64wb_fixed }; 4589 SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr); 4590 return; 4591 } 4592 4593 case ARMISD::VST2_UPD: { 4594 if (Subtarget->hasNEON()) { 4595 static const uint16_t DOpcodes[] = { 4596 ARM::VST2d8wb_fixed, ARM::VST2d16wb_fixed, ARM::VST2d32wb_fixed, 4597 ARM::VST1q64wb_fixed}; 4598 static const uint16_t QOpcodes[] = {ARM::VST2q8PseudoWB_fixed, 4599 ARM::VST2q16PseudoWB_fixed, 4600 ARM::VST2q32PseudoWB_fixed}; 4601 SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr); 4602 return; 4603 } 4604 break; 4605 } 4606 4607 case ARMISD::VST3_UPD: { 4608 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD, 4609 ARM::VST3d16Pseudo_UPD, 4610 ARM::VST3d32Pseudo_UPD, 4611 ARM::VST1d64TPseudoWB_fixed}; 4612 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD, 4613 ARM::VST3q16Pseudo_UPD, 4614 ARM::VST3q32Pseudo_UPD }; 4615 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD, 4616 ARM::VST3q16oddPseudo_UPD, 4617 ARM::VST3q32oddPseudo_UPD }; 4618 SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 4619 return; 4620 } 4621 4622 case ARMISD::VST4_UPD: { 4623 if (Subtarget->hasNEON()) { 4624 static const uint16_t DOpcodes[] = { 4625 ARM::VST4d8Pseudo_UPD, ARM::VST4d16Pseudo_UPD, ARM::VST4d32Pseudo_UPD, 4626 ARM::VST1d64QPseudoWB_fixed}; 4627 static const uint16_t QOpcodes0[] = {ARM::VST4q8Pseudo_UPD, 4628 ARM::VST4q16Pseudo_UPD, 4629 ARM::VST4q32Pseudo_UPD}; 4630 static const uint16_t QOpcodes1[] = {ARM::VST4q8oddPseudo_UPD, 4631 ARM::VST4q16oddPseudo_UPD, 4632 ARM::VST4q32oddPseudo_UPD}; 4633 SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 4634 return; 4635 } 4636 break; 4637 } 4638 4639 case ARMISD::VST1x2_UPD: { 4640 if (Subtarget->hasNEON()) { 4641 static const uint16_t DOpcodes[] = { ARM::VST1q8wb_fixed, 4642 ARM::VST1q16wb_fixed, 4643 ARM::VST1q32wb_fixed, 4644 ARM::VST1q64wb_fixed}; 4645 static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudoWB_fixed, 4646 ARM::VST1d16QPseudoWB_fixed, 4647 ARM::VST1d32QPseudoWB_fixed, 4648 ARM::VST1d64QPseudoWB_fixed }; 4649 SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr); 4650 return; 4651 } 4652 break; 4653 } 4654 4655 case ARMISD::VST1x3_UPD: { 4656 if (Subtarget->hasNEON()) { 4657 static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudoWB_fixed, 4658 ARM::VST1d16TPseudoWB_fixed, 4659 ARM::VST1d32TPseudoWB_fixed, 4660 ARM::VST1d64TPseudoWB_fixed }; 4661 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD, 4662 ARM::VST1q16LowTPseudo_UPD, 4663 ARM::VST1q32LowTPseudo_UPD, 4664 ARM::VST1q64LowTPseudo_UPD }; 4665 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo_UPD, 4666 ARM::VST1q16HighTPseudo_UPD, 4667 ARM::VST1q32HighTPseudo_UPD, 4668 ARM::VST1q64HighTPseudo_UPD }; 4669 SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 4670 return; 4671 } 4672 break; 4673 } 4674 4675 case ARMISD::VST1x4_UPD: { 4676 if (Subtarget->hasNEON()) { 4677 static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudoWB_fixed, 4678 ARM::VST1d16QPseudoWB_fixed, 4679 ARM::VST1d32QPseudoWB_fixed, 4680 ARM::VST1d64QPseudoWB_fixed }; 4681 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD, 4682 ARM::VST1q16LowQPseudo_UPD, 4683 ARM::VST1q32LowQPseudo_UPD, 4684 ARM::VST1q64LowQPseudo_UPD }; 4685 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo_UPD, 4686 ARM::VST1q16HighQPseudo_UPD, 4687 ARM::VST1q32HighQPseudo_UPD, 4688 ARM::VST1q64HighQPseudo_UPD }; 4689 SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 4690 return; 4691 } 4692 break; 4693 } 4694 case ARMISD::VST2LN_UPD: { 4695 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD, 4696 ARM::VST2LNd16Pseudo_UPD, 4697 ARM::VST2LNd32Pseudo_UPD }; 4698 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD, 4699 ARM::VST2LNq32Pseudo_UPD }; 4700 SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes); 4701 return; 4702 } 4703 4704 case ARMISD::VST3LN_UPD: { 4705 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD, 4706 ARM::VST3LNd16Pseudo_UPD, 4707 ARM::VST3LNd32Pseudo_UPD }; 4708 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD, 4709 ARM::VST3LNq32Pseudo_UPD }; 4710 SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes); 4711 return; 4712 } 4713 4714 case ARMISD::VST4LN_UPD: { 4715 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD, 4716 ARM::VST4LNd16Pseudo_UPD, 4717 ARM::VST4LNd32Pseudo_UPD }; 4718 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD, 4719 ARM::VST4LNq32Pseudo_UPD }; 4720 SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes); 4721 return; 4722 } 4723 4724 case ISD::INTRINSIC_VOID: 4725 case ISD::INTRINSIC_W_CHAIN: { 4726 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 4727 switch (IntNo) { 4728 default: 4729 break; 4730 4731 case Intrinsic::arm_mrrc: 4732 case Intrinsic::arm_mrrc2: { 4733 SDLoc dl(N); 4734 SDValue Chain = N->getOperand(0); 4735 unsigned Opc; 4736 4737 if (Subtarget->isThumb()) 4738 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2); 4739 else 4740 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2); 4741 4742 SmallVector<SDValue, 5> Ops; 4743 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(), dl)); /* coproc */ 4744 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(), dl)); /* opc */ 4745 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(), dl)); /* CRm */ 4746 4747 // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded 4748 // instruction will always be '1111' but it is possible in assembly language to specify 4749 // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction. 4750 if (Opc != ARM::MRRC2) { 4751 Ops.push_back(getAL(CurDAG, dl)); 4752 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 4753 } 4754 4755 Ops.push_back(Chain); 4756 4757 // Writes to two registers. 4758 const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other}; 4759 4760 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops)); 4761 return; 4762 } 4763 case Intrinsic::arm_ldaexd: 4764 case Intrinsic::arm_ldrexd: { 4765 SDLoc dl(N); 4766 SDValue Chain = N->getOperand(0); 4767 SDValue MemAddr = N->getOperand(2); 4768 bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps(); 4769 4770 bool IsAcquire = IntNo == Intrinsic::arm_ldaexd; 4771 unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD) 4772 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD); 4773 4774 // arm_ldrexd returns a i64 value in {i32, i32} 4775 std::vector<EVT> ResTys; 4776 if (isThumb) { 4777 ResTys.push_back(MVT::i32); 4778 ResTys.push_back(MVT::i32); 4779 } else 4780 ResTys.push_back(MVT::Untyped); 4781 ResTys.push_back(MVT::Other); 4782 4783 // Place arguments in the right order. 4784 SDValue Ops[] = {MemAddr, getAL(CurDAG, dl), 4785 CurDAG->getRegister(0, MVT::i32), Chain}; 4786 SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops); 4787 // Transfer memoperands. 4788 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 4789 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp}); 4790 4791 // Remap uses. 4792 SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1); 4793 if (!SDValue(N, 0).use_empty()) { 4794 SDValue Result; 4795 if (isThumb) 4796 Result = SDValue(Ld, 0); 4797 else { 4798 SDValue SubRegIdx = 4799 CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32); 4800 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 4801 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx); 4802 Result = SDValue(ResNode,0); 4803 } 4804 ReplaceUses(SDValue(N, 0), Result); 4805 } 4806 if (!SDValue(N, 1).use_empty()) { 4807 SDValue Result; 4808 if (isThumb) 4809 Result = SDValue(Ld, 1); 4810 else { 4811 SDValue SubRegIdx = 4812 CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32); 4813 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 4814 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx); 4815 Result = SDValue(ResNode,0); 4816 } 4817 ReplaceUses(SDValue(N, 1), Result); 4818 } 4819 ReplaceUses(SDValue(N, 2), OutChain); 4820 CurDAG->RemoveDeadNode(N); 4821 return; 4822 } 4823 case Intrinsic::arm_stlexd: 4824 case Intrinsic::arm_strexd: { 4825 SDLoc dl(N); 4826 SDValue Chain = N->getOperand(0); 4827 SDValue Val0 = N->getOperand(2); 4828 SDValue Val1 = N->getOperand(3); 4829 SDValue MemAddr = N->getOperand(4); 4830 4831 // Store exclusive double return a i32 value which is the return status 4832 // of the issued store. 4833 const EVT ResTys[] = {MVT::i32, MVT::Other}; 4834 4835 bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2(); 4836 // Place arguments in the right order. 4837 SmallVector<SDValue, 7> Ops; 4838 if (isThumb) { 4839 Ops.push_back(Val0); 4840 Ops.push_back(Val1); 4841 } else 4842 // arm_strexd uses GPRPair. 4843 Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0)); 4844 Ops.push_back(MemAddr); 4845 Ops.push_back(getAL(CurDAG, dl)); 4846 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 4847 Ops.push_back(Chain); 4848 4849 bool IsRelease = IntNo == Intrinsic::arm_stlexd; 4850 unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD) 4851 : (IsRelease ? ARM::STLEXD : ARM::STREXD); 4852 4853 SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops); 4854 // Transfer memoperands. 4855 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 4856 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 4857 4858 ReplaceNode(N, St); 4859 return; 4860 } 4861 4862 case Intrinsic::arm_neon_vld1: { 4863 static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16, 4864 ARM::VLD1d32, ARM::VLD1d64 }; 4865 static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16, 4866 ARM::VLD1q32, ARM::VLD1q64}; 4867 SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr); 4868 return; 4869 } 4870 4871 case Intrinsic::arm_neon_vld1x2: { 4872 static const uint16_t DOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16, 4873 ARM::VLD1q32, ARM::VLD1q64 }; 4874 static const uint16_t QOpcodes[] = { ARM::VLD1d8QPseudo, 4875 ARM::VLD1d16QPseudo, 4876 ARM::VLD1d32QPseudo, 4877 ARM::VLD1d64QPseudo }; 4878 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr); 4879 return; 4880 } 4881 4882 case Intrinsic::arm_neon_vld1x3: { 4883 static const uint16_t DOpcodes[] = { ARM::VLD1d8TPseudo, 4884 ARM::VLD1d16TPseudo, 4885 ARM::VLD1d32TPseudo, 4886 ARM::VLD1d64TPseudo }; 4887 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowTPseudo_UPD, 4888 ARM::VLD1q16LowTPseudo_UPD, 4889 ARM::VLD1q32LowTPseudo_UPD, 4890 ARM::VLD1q64LowTPseudo_UPD }; 4891 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighTPseudo, 4892 ARM::VLD1q16HighTPseudo, 4893 ARM::VLD1q32HighTPseudo, 4894 ARM::VLD1q64HighTPseudo }; 4895 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 4896 return; 4897 } 4898 4899 case Intrinsic::arm_neon_vld1x4: { 4900 static const uint16_t DOpcodes[] = { ARM::VLD1d8QPseudo, 4901 ARM::VLD1d16QPseudo, 4902 ARM::VLD1d32QPseudo, 4903 ARM::VLD1d64QPseudo }; 4904 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowQPseudo_UPD, 4905 ARM::VLD1q16LowQPseudo_UPD, 4906 ARM::VLD1q32LowQPseudo_UPD, 4907 ARM::VLD1q64LowQPseudo_UPD }; 4908 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighQPseudo, 4909 ARM::VLD1q16HighQPseudo, 4910 ARM::VLD1q32HighQPseudo, 4911 ARM::VLD1q64HighQPseudo }; 4912 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 4913 return; 4914 } 4915 4916 case Intrinsic::arm_neon_vld2: { 4917 static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16, 4918 ARM::VLD2d32, ARM::VLD1q64 }; 4919 static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo, 4920 ARM::VLD2q32Pseudo }; 4921 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr); 4922 return; 4923 } 4924 4925 case Intrinsic::arm_neon_vld3: { 4926 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo, 4927 ARM::VLD3d16Pseudo, 4928 ARM::VLD3d32Pseudo, 4929 ARM::VLD1d64TPseudo }; 4930 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD, 4931 ARM::VLD3q16Pseudo_UPD, 4932 ARM::VLD3q32Pseudo_UPD }; 4933 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo, 4934 ARM::VLD3q16oddPseudo, 4935 ARM::VLD3q32oddPseudo }; 4936 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 4937 return; 4938 } 4939 4940 case Intrinsic::arm_neon_vld4: { 4941 static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo, 4942 ARM::VLD4d16Pseudo, 4943 ARM::VLD4d32Pseudo, 4944 ARM::VLD1d64QPseudo }; 4945 static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD, 4946 ARM::VLD4q16Pseudo_UPD, 4947 ARM::VLD4q32Pseudo_UPD }; 4948 static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo, 4949 ARM::VLD4q16oddPseudo, 4950 ARM::VLD4q32oddPseudo }; 4951 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 4952 return; 4953 } 4954 4955 case Intrinsic::arm_neon_vld2dup: { 4956 static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16, 4957 ARM::VLD2DUPd32, ARM::VLD1q64 }; 4958 static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo, 4959 ARM::VLD2DUPq16EvenPseudo, 4960 ARM::VLD2DUPq32EvenPseudo }; 4961 static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudo, 4962 ARM::VLD2DUPq16OddPseudo, 4963 ARM::VLD2DUPq32OddPseudo }; 4964 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 2, 4965 DOpcodes, QOpcodes0, QOpcodes1); 4966 return; 4967 } 4968 4969 case Intrinsic::arm_neon_vld3dup: { 4970 static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo, 4971 ARM::VLD3DUPd16Pseudo, 4972 ARM::VLD3DUPd32Pseudo, 4973 ARM::VLD1d64TPseudo }; 4974 static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo, 4975 ARM::VLD3DUPq16EvenPseudo, 4976 ARM::VLD3DUPq32EvenPseudo }; 4977 static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo, 4978 ARM::VLD3DUPq16OddPseudo, 4979 ARM::VLD3DUPq32OddPseudo }; 4980 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 3, 4981 DOpcodes, QOpcodes0, QOpcodes1); 4982 return; 4983 } 4984 4985 case Intrinsic::arm_neon_vld4dup: { 4986 static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo, 4987 ARM::VLD4DUPd16Pseudo, 4988 ARM::VLD4DUPd32Pseudo, 4989 ARM::VLD1d64QPseudo }; 4990 static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo, 4991 ARM::VLD4DUPq16EvenPseudo, 4992 ARM::VLD4DUPq32EvenPseudo }; 4993 static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo, 4994 ARM::VLD4DUPq16OddPseudo, 4995 ARM::VLD4DUPq32OddPseudo }; 4996 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 4, 4997 DOpcodes, QOpcodes0, QOpcodes1); 4998 return; 4999 } 5000 5001 case Intrinsic::arm_neon_vld2lane: { 5002 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo, 5003 ARM::VLD2LNd16Pseudo, 5004 ARM::VLD2LNd32Pseudo }; 5005 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo, 5006 ARM::VLD2LNq32Pseudo }; 5007 SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes); 5008 return; 5009 } 5010 5011 case Intrinsic::arm_neon_vld3lane: { 5012 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo, 5013 ARM::VLD3LNd16Pseudo, 5014 ARM::VLD3LNd32Pseudo }; 5015 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo, 5016 ARM::VLD3LNq32Pseudo }; 5017 SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes); 5018 return; 5019 } 5020 5021 case Intrinsic::arm_neon_vld4lane: { 5022 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo, 5023 ARM::VLD4LNd16Pseudo, 5024 ARM::VLD4LNd32Pseudo }; 5025 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo, 5026 ARM::VLD4LNq32Pseudo }; 5027 SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes); 5028 return; 5029 } 5030 5031 case Intrinsic::arm_neon_vst1: { 5032 static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16, 5033 ARM::VST1d32, ARM::VST1d64 }; 5034 static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16, 5035 ARM::VST1q32, ARM::VST1q64 }; 5036 SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr); 5037 return; 5038 } 5039 5040 case Intrinsic::arm_neon_vst1x2: { 5041 static const uint16_t DOpcodes[] = { ARM::VST1q8, ARM::VST1q16, 5042 ARM::VST1q32, ARM::VST1q64 }; 5043 static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudo, 5044 ARM::VST1d16QPseudo, 5045 ARM::VST1d32QPseudo, 5046 ARM::VST1d64QPseudo }; 5047 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr); 5048 return; 5049 } 5050 5051 case Intrinsic::arm_neon_vst1x3: { 5052 static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudo, 5053 ARM::VST1d16TPseudo, 5054 ARM::VST1d32TPseudo, 5055 ARM::VST1d64TPseudo }; 5056 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD, 5057 ARM::VST1q16LowTPseudo_UPD, 5058 ARM::VST1q32LowTPseudo_UPD, 5059 ARM::VST1q64LowTPseudo_UPD }; 5060 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo, 5061 ARM::VST1q16HighTPseudo, 5062 ARM::VST1q32HighTPseudo, 5063 ARM::VST1q64HighTPseudo }; 5064 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 5065 return; 5066 } 5067 5068 case Intrinsic::arm_neon_vst1x4: { 5069 static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudo, 5070 ARM::VST1d16QPseudo, 5071 ARM::VST1d32QPseudo, 5072 ARM::VST1d64QPseudo }; 5073 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD, 5074 ARM::VST1q16LowQPseudo_UPD, 5075 ARM::VST1q32LowQPseudo_UPD, 5076 ARM::VST1q64LowQPseudo_UPD }; 5077 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo, 5078 ARM::VST1q16HighQPseudo, 5079 ARM::VST1q32HighQPseudo, 5080 ARM::VST1q64HighQPseudo }; 5081 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 5082 return; 5083 } 5084 5085 case Intrinsic::arm_neon_vst2: { 5086 static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16, 5087 ARM::VST2d32, ARM::VST1q64 }; 5088 static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo, 5089 ARM::VST2q32Pseudo }; 5090 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr); 5091 return; 5092 } 5093 5094 case Intrinsic::arm_neon_vst3: { 5095 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo, 5096 ARM::VST3d16Pseudo, 5097 ARM::VST3d32Pseudo, 5098 ARM::VST1d64TPseudo }; 5099 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD, 5100 ARM::VST3q16Pseudo_UPD, 5101 ARM::VST3q32Pseudo_UPD }; 5102 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo, 5103 ARM::VST3q16oddPseudo, 5104 ARM::VST3q32oddPseudo }; 5105 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 5106 return; 5107 } 5108 5109 case Intrinsic::arm_neon_vst4: { 5110 static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo, 5111 ARM::VST4d16Pseudo, 5112 ARM::VST4d32Pseudo, 5113 ARM::VST1d64QPseudo }; 5114 static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD, 5115 ARM::VST4q16Pseudo_UPD, 5116 ARM::VST4q32Pseudo_UPD }; 5117 static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo, 5118 ARM::VST4q16oddPseudo, 5119 ARM::VST4q32oddPseudo }; 5120 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 5121 return; 5122 } 5123 5124 case Intrinsic::arm_neon_vst2lane: { 5125 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo, 5126 ARM::VST2LNd16Pseudo, 5127 ARM::VST2LNd32Pseudo }; 5128 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo, 5129 ARM::VST2LNq32Pseudo }; 5130 SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes); 5131 return; 5132 } 5133 5134 case Intrinsic::arm_neon_vst3lane: { 5135 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo, 5136 ARM::VST3LNd16Pseudo, 5137 ARM::VST3LNd32Pseudo }; 5138 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo, 5139 ARM::VST3LNq32Pseudo }; 5140 SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes); 5141 return; 5142 } 5143 5144 case Intrinsic::arm_neon_vst4lane: { 5145 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo, 5146 ARM::VST4LNd16Pseudo, 5147 ARM::VST4LNd32Pseudo }; 5148 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo, 5149 ARM::VST4LNq32Pseudo }; 5150 SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes); 5151 return; 5152 } 5153 5154 case Intrinsic::arm_mve_vldr_gather_base_wb: 5155 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated: { 5156 static const uint16_t Opcodes[] = {ARM::MVE_VLDRWU32_qi_pre, 5157 ARM::MVE_VLDRDU64_qi_pre}; 5158 SelectMVE_WB(N, Opcodes, 5159 IntNo == Intrinsic::arm_mve_vldr_gather_base_wb_predicated); 5160 return; 5161 } 5162 5163 case Intrinsic::arm_mve_vld2q: { 5164 static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8, ARM::MVE_VLD21_8}; 5165 static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16, 5166 ARM::MVE_VLD21_16}; 5167 static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32, 5168 ARM::MVE_VLD21_32}; 5169 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32}; 5170 SelectMVE_VLD(N, 2, Opcodes, false); 5171 return; 5172 } 5173 5174 case Intrinsic::arm_mve_vld4q: { 5175 static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8, 5176 ARM::MVE_VLD42_8, ARM::MVE_VLD43_8}; 5177 static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16, 5178 ARM::MVE_VLD42_16, 5179 ARM::MVE_VLD43_16}; 5180 static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32, 5181 ARM::MVE_VLD42_32, 5182 ARM::MVE_VLD43_32}; 5183 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32}; 5184 SelectMVE_VLD(N, 4, Opcodes, false); 5185 return; 5186 } 5187 } 5188 break; 5189 } 5190 5191 case ISD::INTRINSIC_WO_CHAIN: { 5192 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 5193 switch (IntNo) { 5194 default: 5195 break; 5196 5197 // Scalar f32 -> bf16 5198 case Intrinsic::arm_neon_vcvtbfp2bf: { 5199 SDLoc dl(N); 5200 const SDValue &Src = N->getOperand(1); 5201 llvm::EVT DestTy = N->getValueType(0); 5202 SDValue Pred = getAL(CurDAG, dl); 5203 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 5204 SDValue Ops[] = { Src, Src, Pred, Reg0 }; 5205 CurDAG->SelectNodeTo(N, ARM::BF16_VCVTB, DestTy, Ops); 5206 return; 5207 } 5208 5209 // Vector v4f32 -> v4bf16 5210 case Intrinsic::arm_neon_vcvtfp2bf: { 5211 SDLoc dl(N); 5212 const SDValue &Src = N->getOperand(1); 5213 SDValue Pred = getAL(CurDAG, dl); 5214 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 5215 SDValue Ops[] = { Src, Pred, Reg0 }; 5216 CurDAG->SelectNodeTo(N, ARM::BF16_VCVT, MVT::v4bf16, Ops); 5217 return; 5218 } 5219 5220 case Intrinsic::arm_mve_urshrl: 5221 SelectMVE_LongShift(N, ARM::MVE_URSHRL, true, false); 5222 return; 5223 case Intrinsic::arm_mve_uqshll: 5224 SelectMVE_LongShift(N, ARM::MVE_UQSHLL, true, false); 5225 return; 5226 case Intrinsic::arm_mve_srshrl: 5227 SelectMVE_LongShift(N, ARM::MVE_SRSHRL, true, false); 5228 return; 5229 case Intrinsic::arm_mve_sqshll: 5230 SelectMVE_LongShift(N, ARM::MVE_SQSHLL, true, false); 5231 return; 5232 case Intrinsic::arm_mve_uqrshll: 5233 SelectMVE_LongShift(N, ARM::MVE_UQRSHLL, false, true); 5234 return; 5235 case Intrinsic::arm_mve_sqrshrl: 5236 SelectMVE_LongShift(N, ARM::MVE_SQRSHRL, false, true); 5237 return; 5238 5239 case Intrinsic::arm_mve_vadc: 5240 case Intrinsic::arm_mve_vadc_predicated: 5241 SelectMVE_VADCSBC(N, ARM::MVE_VADC, ARM::MVE_VADCI, true, 5242 IntNo == Intrinsic::arm_mve_vadc_predicated); 5243 return; 5244 case Intrinsic::arm_mve_vsbc: 5245 case Intrinsic::arm_mve_vsbc_predicated: 5246 SelectMVE_VADCSBC(N, ARM::MVE_VSBC, ARM::MVE_VSBCI, true, 5247 IntNo == Intrinsic::arm_mve_vsbc_predicated); 5248 return; 5249 case Intrinsic::arm_mve_vshlc: 5250 case Intrinsic::arm_mve_vshlc_predicated: 5251 SelectMVE_VSHLC(N, IntNo == Intrinsic::arm_mve_vshlc_predicated); 5252 return; 5253 5254 case Intrinsic::arm_mve_vmlldava: 5255 case Intrinsic::arm_mve_vmlldava_predicated: { 5256 static const uint16_t OpcodesU[] = { 5257 ARM::MVE_VMLALDAVu16, ARM::MVE_VMLALDAVu32, 5258 ARM::MVE_VMLALDAVau16, ARM::MVE_VMLALDAVau32, 5259 }; 5260 static const uint16_t OpcodesS[] = { 5261 ARM::MVE_VMLALDAVs16, ARM::MVE_VMLALDAVs32, 5262 ARM::MVE_VMLALDAVas16, ARM::MVE_VMLALDAVas32, 5263 ARM::MVE_VMLALDAVxs16, ARM::MVE_VMLALDAVxs32, 5264 ARM::MVE_VMLALDAVaxs16, ARM::MVE_VMLALDAVaxs32, 5265 ARM::MVE_VMLSLDAVs16, ARM::MVE_VMLSLDAVs32, 5266 ARM::MVE_VMLSLDAVas16, ARM::MVE_VMLSLDAVas32, 5267 ARM::MVE_VMLSLDAVxs16, ARM::MVE_VMLSLDAVxs32, 5268 ARM::MVE_VMLSLDAVaxs16, ARM::MVE_VMLSLDAVaxs32, 5269 }; 5270 SelectMVE_VMLLDAV(N, IntNo == Intrinsic::arm_mve_vmlldava_predicated, 5271 OpcodesS, OpcodesU); 5272 return; 5273 } 5274 5275 case Intrinsic::arm_mve_vrmlldavha: 5276 case Intrinsic::arm_mve_vrmlldavha_predicated: { 5277 static const uint16_t OpcodesU[] = { 5278 ARM::MVE_VRMLALDAVHu32, ARM::MVE_VRMLALDAVHau32, 5279 }; 5280 static const uint16_t OpcodesS[] = { 5281 ARM::MVE_VRMLALDAVHs32, ARM::MVE_VRMLALDAVHas32, 5282 ARM::MVE_VRMLALDAVHxs32, ARM::MVE_VRMLALDAVHaxs32, 5283 ARM::MVE_VRMLSLDAVHs32, ARM::MVE_VRMLSLDAVHas32, 5284 ARM::MVE_VRMLSLDAVHxs32, ARM::MVE_VRMLSLDAVHaxs32, 5285 }; 5286 SelectMVE_VRMLLDAVH(N, IntNo == Intrinsic::arm_mve_vrmlldavha_predicated, 5287 OpcodesS, OpcodesU); 5288 return; 5289 } 5290 5291 case Intrinsic::arm_mve_vidup: 5292 case Intrinsic::arm_mve_vidup_predicated: { 5293 static const uint16_t Opcodes[] = { 5294 ARM::MVE_VIDUPu8, ARM::MVE_VIDUPu16, ARM::MVE_VIDUPu32, 5295 }; 5296 SelectMVE_VxDUP(N, Opcodes, false, 5297 IntNo == Intrinsic::arm_mve_vidup_predicated); 5298 return; 5299 } 5300 5301 case Intrinsic::arm_mve_vddup: 5302 case Intrinsic::arm_mve_vddup_predicated: { 5303 static const uint16_t Opcodes[] = { 5304 ARM::MVE_VDDUPu8, ARM::MVE_VDDUPu16, ARM::MVE_VDDUPu32, 5305 }; 5306 SelectMVE_VxDUP(N, Opcodes, false, 5307 IntNo == Intrinsic::arm_mve_vddup_predicated); 5308 return; 5309 } 5310 5311 case Intrinsic::arm_mve_viwdup: 5312 case Intrinsic::arm_mve_viwdup_predicated: { 5313 static const uint16_t Opcodes[] = { 5314 ARM::MVE_VIWDUPu8, ARM::MVE_VIWDUPu16, ARM::MVE_VIWDUPu32, 5315 }; 5316 SelectMVE_VxDUP(N, Opcodes, true, 5317 IntNo == Intrinsic::arm_mve_viwdup_predicated); 5318 return; 5319 } 5320 5321 case Intrinsic::arm_mve_vdwdup: 5322 case Intrinsic::arm_mve_vdwdup_predicated: { 5323 static const uint16_t Opcodes[] = { 5324 ARM::MVE_VDWDUPu8, ARM::MVE_VDWDUPu16, ARM::MVE_VDWDUPu32, 5325 }; 5326 SelectMVE_VxDUP(N, Opcodes, true, 5327 IntNo == Intrinsic::arm_mve_vdwdup_predicated); 5328 return; 5329 } 5330 5331 case Intrinsic::arm_cde_cx1d: 5332 case Intrinsic::arm_cde_cx1da: 5333 case Intrinsic::arm_cde_cx2d: 5334 case Intrinsic::arm_cde_cx2da: 5335 case Intrinsic::arm_cde_cx3d: 5336 case Intrinsic::arm_cde_cx3da: { 5337 bool HasAccum = IntNo == Intrinsic::arm_cde_cx1da || 5338 IntNo == Intrinsic::arm_cde_cx2da || 5339 IntNo == Intrinsic::arm_cde_cx3da; 5340 size_t NumExtraOps; 5341 uint16_t Opcode; 5342 switch (IntNo) { 5343 case Intrinsic::arm_cde_cx1d: 5344 case Intrinsic::arm_cde_cx1da: 5345 NumExtraOps = 0; 5346 Opcode = HasAccum ? ARM::CDE_CX1DA : ARM::CDE_CX1D; 5347 break; 5348 case Intrinsic::arm_cde_cx2d: 5349 case Intrinsic::arm_cde_cx2da: 5350 NumExtraOps = 1; 5351 Opcode = HasAccum ? ARM::CDE_CX2DA : ARM::CDE_CX2D; 5352 break; 5353 case Intrinsic::arm_cde_cx3d: 5354 case Intrinsic::arm_cde_cx3da: 5355 NumExtraOps = 2; 5356 Opcode = HasAccum ? ARM::CDE_CX3DA : ARM::CDE_CX3D; 5357 break; 5358 default: 5359 llvm_unreachable("Unexpected opcode"); 5360 } 5361 SelectCDE_CXxD(N, Opcode, NumExtraOps, HasAccum); 5362 return; 5363 } 5364 } 5365 break; 5366 } 5367 5368 case ISD::ATOMIC_CMP_SWAP: 5369 SelectCMP_SWAP(N); 5370 return; 5371 } 5372 5373 SelectCode(N); 5374 } 5375 5376 // Inspect a register string of the form 5377 // cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or 5378 // cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string 5379 // and obtain the integer operands from them, adding these operands to the 5380 // provided vector. 5381 static void getIntOperandsFromRegisterString(StringRef RegString, 5382 SelectionDAG *CurDAG, 5383 const SDLoc &DL, 5384 std::vector<SDValue> &Ops) { 5385 SmallVector<StringRef, 5> Fields; 5386 RegString.split(Fields, ':'); 5387 5388 if (Fields.size() > 1) { 5389 bool AllIntFields = true; 5390 5391 for (StringRef Field : Fields) { 5392 // Need to trim out leading 'cp' characters and get the integer field. 5393 unsigned IntField; 5394 AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField); 5395 Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32)); 5396 } 5397 5398 assert(AllIntFields && 5399 "Unexpected non-integer value in special register string."); 5400 (void)AllIntFields; 5401 } 5402 } 5403 5404 // Maps a Banked Register string to its mask value. The mask value returned is 5405 // for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register 5406 // mask operand, which expresses which register is to be used, e.g. r8, and in 5407 // which mode it is to be used, e.g. usr. Returns -1 to signify that the string 5408 // was invalid. 5409 static inline int getBankedRegisterMask(StringRef RegString) { 5410 auto TheReg = ARMBankedReg::lookupBankedRegByName(RegString.lower()); 5411 if (!TheReg) 5412 return -1; 5413 return TheReg->Encoding; 5414 } 5415 5416 // The flags here are common to those allowed for apsr in the A class cores and 5417 // those allowed for the special registers in the M class cores. Returns a 5418 // value representing which flags were present, -1 if invalid. 5419 static inline int getMClassFlagsMask(StringRef Flags) { 5420 return StringSwitch<int>(Flags) 5421 .Case("", 0x2) // no flags means nzcvq for psr registers, and 0x2 is 5422 // correct when flags are not permitted 5423 .Case("g", 0x1) 5424 .Case("nzcvq", 0x2) 5425 .Case("nzcvqg", 0x3) 5426 .Default(-1); 5427 } 5428 5429 // Maps MClass special registers string to its value for use in the 5430 // t2MRS_M/t2MSR_M instruction nodes as the SYSm value operand. 5431 // Returns -1 to signify that the string was invalid. 5432 static int getMClassRegisterMask(StringRef Reg, const ARMSubtarget *Subtarget) { 5433 auto TheReg = ARMSysReg::lookupMClassSysRegByName(Reg); 5434 const FeatureBitset &FeatureBits = Subtarget->getFeatureBits(); 5435 if (!TheReg || !TheReg->hasRequiredFeatures(FeatureBits)) 5436 return -1; 5437 return (int)(TheReg->Encoding & 0xFFF); // SYSm value 5438 } 5439 5440 static int getARClassRegisterMask(StringRef Reg, StringRef Flags) { 5441 // The mask operand contains the special register (R Bit) in bit 4, whether 5442 // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and 5443 // bits 3-0 contains the fields to be accessed in the special register, set by 5444 // the flags provided with the register. 5445 int Mask = 0; 5446 if (Reg == "apsr") { 5447 // The flags permitted for apsr are the same flags that are allowed in 5448 // M class registers. We get the flag value and then shift the flags into 5449 // the correct place to combine with the mask. 5450 Mask = getMClassFlagsMask(Flags); 5451 if (Mask == -1) 5452 return -1; 5453 return Mask << 2; 5454 } 5455 5456 if (Reg != "cpsr" && Reg != "spsr") { 5457 return -1; 5458 } 5459 5460 // This is the same as if the flags were "fc" 5461 if (Flags.empty() || Flags == "all") 5462 return Mask | 0x9; 5463 5464 // Inspect the supplied flags string and set the bits in the mask for 5465 // the relevant and valid flags allowed for cpsr and spsr. 5466 for (char Flag : Flags) { 5467 int FlagVal; 5468 switch (Flag) { 5469 case 'c': 5470 FlagVal = 0x1; 5471 break; 5472 case 'x': 5473 FlagVal = 0x2; 5474 break; 5475 case 's': 5476 FlagVal = 0x4; 5477 break; 5478 case 'f': 5479 FlagVal = 0x8; 5480 break; 5481 default: 5482 FlagVal = 0; 5483 } 5484 5485 // This avoids allowing strings where the same flag bit appears twice. 5486 if (!FlagVal || (Mask & FlagVal)) 5487 return -1; 5488 Mask |= FlagVal; 5489 } 5490 5491 // If the register is spsr then we need to set the R bit. 5492 if (Reg == "spsr") 5493 Mask |= 0x10; 5494 5495 return Mask; 5496 } 5497 5498 // Lower the read_register intrinsic to ARM specific DAG nodes 5499 // using the supplied metadata string to select the instruction node to use 5500 // and the registers/masks to construct as operands for the node. 5501 bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){ 5502 const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1)); 5503 const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0)); 5504 bool IsThumb2 = Subtarget->isThumb2(); 5505 SDLoc DL(N); 5506 5507 std::vector<SDValue> Ops; 5508 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops); 5509 5510 if (!Ops.empty()) { 5511 // If the special register string was constructed of fields (as defined 5512 // in the ACLE) then need to lower to MRC node (32 bit) or 5513 // MRRC node(64 bit), we can make the distinction based on the number of 5514 // operands we have. 5515 unsigned Opcode; 5516 SmallVector<EVT, 3> ResTypes; 5517 if (Ops.size() == 5){ 5518 Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC; 5519 ResTypes.append({ MVT::i32, MVT::Other }); 5520 } else { 5521 assert(Ops.size() == 3 && 5522 "Invalid number of fields in special register string."); 5523 Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC; 5524 ResTypes.append({ MVT::i32, MVT::i32, MVT::Other }); 5525 } 5526 5527 Ops.push_back(getAL(CurDAG, DL)); 5528 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 5529 Ops.push_back(N->getOperand(0)); 5530 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops)); 5531 return true; 5532 } 5533 5534 std::string SpecialReg = RegString->getString().lower(); 5535 5536 int BankedReg = getBankedRegisterMask(SpecialReg); 5537 if (BankedReg != -1) { 5538 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), 5539 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5540 N->getOperand(0) }; 5541 ReplaceNode( 5542 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked, 5543 DL, MVT::i32, MVT::Other, Ops)); 5544 return true; 5545 } 5546 5547 // The VFP registers are read by creating SelectionDAG nodes with opcodes 5548 // corresponding to the register that is being read from. So we switch on the 5549 // string to find which opcode we need to use. 5550 unsigned Opcode = StringSwitch<unsigned>(SpecialReg) 5551 .Case("fpscr", ARM::VMRS) 5552 .Case("fpexc", ARM::VMRS_FPEXC) 5553 .Case("fpsid", ARM::VMRS_FPSID) 5554 .Case("mvfr0", ARM::VMRS_MVFR0) 5555 .Case("mvfr1", ARM::VMRS_MVFR1) 5556 .Case("mvfr2", ARM::VMRS_MVFR2) 5557 .Case("fpinst", ARM::VMRS_FPINST) 5558 .Case("fpinst2", ARM::VMRS_FPINST2) 5559 .Default(0); 5560 5561 // If an opcode was found then we can lower the read to a VFP instruction. 5562 if (Opcode) { 5563 if (!Subtarget->hasVFP2Base()) 5564 return false; 5565 if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8Base()) 5566 return false; 5567 5568 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5569 N->getOperand(0) }; 5570 ReplaceNode(N, 5571 CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops)); 5572 return true; 5573 } 5574 5575 // If the target is M Class then need to validate that the register string 5576 // is an acceptable value, so check that a mask can be constructed from the 5577 // string. 5578 if (Subtarget->isMClass()) { 5579 int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget); 5580 if (SYSmValue == -1) 5581 return false; 5582 5583 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32), 5584 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5585 N->getOperand(0) }; 5586 ReplaceNode( 5587 N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops)); 5588 return true; 5589 } 5590 5591 // Here we know the target is not M Class so we need to check if it is one 5592 // of the remaining possible values which are apsr, cpsr or spsr. 5593 if (SpecialReg == "apsr" || SpecialReg == "cpsr") { 5594 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5595 N->getOperand(0) }; 5596 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS, 5597 DL, MVT::i32, MVT::Other, Ops)); 5598 return true; 5599 } 5600 5601 if (SpecialReg == "spsr") { 5602 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5603 N->getOperand(0) }; 5604 ReplaceNode( 5605 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL, 5606 MVT::i32, MVT::Other, Ops)); 5607 return true; 5608 } 5609 5610 return false; 5611 } 5612 5613 // Lower the write_register intrinsic to ARM specific DAG nodes 5614 // using the supplied metadata string to select the instruction node to use 5615 // and the registers/masks to use in the nodes 5616 bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){ 5617 const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1)); 5618 const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0)); 5619 bool IsThumb2 = Subtarget->isThumb2(); 5620 SDLoc DL(N); 5621 5622 std::vector<SDValue> Ops; 5623 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops); 5624 5625 if (!Ops.empty()) { 5626 // If the special register string was constructed of fields (as defined 5627 // in the ACLE) then need to lower to MCR node (32 bit) or 5628 // MCRR node(64 bit), we can make the distinction based on the number of 5629 // operands we have. 5630 unsigned Opcode; 5631 if (Ops.size() == 5) { 5632 Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR; 5633 Ops.insert(Ops.begin()+2, N->getOperand(2)); 5634 } else { 5635 assert(Ops.size() == 3 && 5636 "Invalid number of fields in special register string."); 5637 Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR; 5638 SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) }; 5639 Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2); 5640 } 5641 5642 Ops.push_back(getAL(CurDAG, DL)); 5643 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 5644 Ops.push_back(N->getOperand(0)); 5645 5646 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops)); 5647 return true; 5648 } 5649 5650 std::string SpecialReg = RegString->getString().lower(); 5651 int BankedReg = getBankedRegisterMask(SpecialReg); 5652 if (BankedReg != -1) { 5653 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2), 5654 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5655 N->getOperand(0) }; 5656 ReplaceNode( 5657 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked, 5658 DL, MVT::Other, Ops)); 5659 return true; 5660 } 5661 5662 // The VFP registers are written to by creating SelectionDAG nodes with 5663 // opcodes corresponding to the register that is being written. So we switch 5664 // on the string to find which opcode we need to use. 5665 unsigned Opcode = StringSwitch<unsigned>(SpecialReg) 5666 .Case("fpscr", ARM::VMSR) 5667 .Case("fpexc", ARM::VMSR_FPEXC) 5668 .Case("fpsid", ARM::VMSR_FPSID) 5669 .Case("fpinst", ARM::VMSR_FPINST) 5670 .Case("fpinst2", ARM::VMSR_FPINST2) 5671 .Default(0); 5672 5673 if (Opcode) { 5674 if (!Subtarget->hasVFP2Base()) 5675 return false; 5676 Ops = { N->getOperand(2), getAL(CurDAG, DL), 5677 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) }; 5678 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops)); 5679 return true; 5680 } 5681 5682 std::pair<StringRef, StringRef> Fields; 5683 Fields = StringRef(SpecialReg).rsplit('_'); 5684 std::string Reg = Fields.first.str(); 5685 StringRef Flags = Fields.second; 5686 5687 // If the target was M Class then need to validate the special register value 5688 // and retrieve the mask for use in the instruction node. 5689 if (Subtarget->isMClass()) { 5690 int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget); 5691 if (SYSmValue == -1) 5692 return false; 5693 5694 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32), 5695 N->getOperand(2), getAL(CurDAG, DL), 5696 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) }; 5697 ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops)); 5698 return true; 5699 } 5700 5701 // We then check to see if a valid mask can be constructed for one of the 5702 // register string values permitted for the A and R class cores. These values 5703 // are apsr, spsr and cpsr; these are also valid on older cores. 5704 int Mask = getARClassRegisterMask(Reg, Flags); 5705 if (Mask != -1) { 5706 Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2), 5707 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5708 N->getOperand(0) }; 5709 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR, 5710 DL, MVT::Other, Ops)); 5711 return true; 5712 } 5713 5714 return false; 5715 } 5716 5717 bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){ 5718 std::vector<SDValue> AsmNodeOperands; 5719 unsigned Flag, Kind; 5720 bool Changed = false; 5721 unsigned NumOps = N->getNumOperands(); 5722 5723 // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint. 5724 // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require 5725 // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs 5726 // respectively. Since there is no constraint to explicitly specify a 5727 // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb, 5728 // the 64-bit data may be referred by H, Q, R modifiers, so we still pack 5729 // them into a GPRPair. 5730 5731 SDLoc dl(N); 5732 SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1) 5733 : SDValue(nullptr,0); 5734 5735 SmallVector<bool, 8> OpChanged; 5736 // Glue node will be appended late. 5737 for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) { 5738 SDValue op = N->getOperand(i); 5739 AsmNodeOperands.push_back(op); 5740 5741 if (i < InlineAsm::Op_FirstOperand) 5742 continue; 5743 5744 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) { 5745 Flag = C->getZExtValue(); 5746 Kind = InlineAsm::getKind(Flag); 5747 } 5748 else 5749 continue; 5750 5751 // Immediate operands to inline asm in the SelectionDAG are modeled with 5752 // two operands. The first is a constant of value InlineAsm::Kind_Imm, and 5753 // the second is a constant with the value of the immediate. If we get here 5754 // and we have a Kind_Imm, skip the next operand, and continue. 5755 if (Kind == InlineAsm::Kind_Imm) { 5756 SDValue op = N->getOperand(++i); 5757 AsmNodeOperands.push_back(op); 5758 continue; 5759 } 5760 5761 unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag); 5762 if (NumRegs) 5763 OpChanged.push_back(false); 5764 5765 unsigned DefIdx = 0; 5766 bool IsTiedToChangedOp = false; 5767 // If it's a use that is tied with a previous def, it has no 5768 // reg class constraint. 5769 if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx)) 5770 IsTiedToChangedOp = OpChanged[DefIdx]; 5771 5772 // Memory operands to inline asm in the SelectionDAG are modeled with two 5773 // operands: a constant of value InlineAsm::Kind_Mem followed by the input 5774 // operand. If we get here and we have a Kind_Mem, skip the next operand (so 5775 // it doesn't get misinterpreted), and continue. We do this here because 5776 // it's important to update the OpChanged array correctly before moving on. 5777 if (Kind == InlineAsm::Kind_Mem) { 5778 SDValue op = N->getOperand(++i); 5779 AsmNodeOperands.push_back(op); 5780 continue; 5781 } 5782 5783 if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef 5784 && Kind != InlineAsm::Kind_RegDefEarlyClobber) 5785 continue; 5786 5787 unsigned RC; 5788 bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC); 5789 if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID)) 5790 || NumRegs != 2) 5791 continue; 5792 5793 assert((i+2 < NumOps) && "Invalid number of operands in inline asm"); 5794 SDValue V0 = N->getOperand(i+1); 5795 SDValue V1 = N->getOperand(i+2); 5796 unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg(); 5797 unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg(); 5798 SDValue PairedReg; 5799 MachineRegisterInfo &MRI = MF->getRegInfo(); 5800 5801 if (Kind == InlineAsm::Kind_RegDef || 5802 Kind == InlineAsm::Kind_RegDefEarlyClobber) { 5803 // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to 5804 // the original GPRs. 5805 5806 Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass); 5807 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped); 5808 SDValue Chain = SDValue(N,0); 5809 5810 SDNode *GU = N->getGluedUser(); 5811 SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped, 5812 Chain.getValue(1)); 5813 5814 // Extract values from a GPRPair reg and copy to the original GPR reg. 5815 SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32, 5816 RegCopy); 5817 SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32, 5818 RegCopy); 5819 SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0, 5820 RegCopy.getValue(1)); 5821 SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1)); 5822 5823 // Update the original glue user. 5824 std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1); 5825 Ops.push_back(T1.getValue(1)); 5826 CurDAG->UpdateNodeOperands(GU, Ops); 5827 } 5828 else { 5829 // For Kind == InlineAsm::Kind_RegUse, we first copy two GPRs into a 5830 // GPRPair and then pass the GPRPair to the inline asm. 5831 SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain]; 5832 5833 // As REG_SEQ doesn't take RegisterSDNode, we copy them first. 5834 SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32, 5835 Chain.getValue(1)); 5836 SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32, 5837 T0.getValue(1)); 5838 SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0); 5839 5840 // Copy REG_SEQ into a GPRPair-typed VR and replace the original two 5841 // i32 VRs of inline asm with it. 5842 Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass); 5843 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped); 5844 Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1)); 5845 5846 AsmNodeOperands[InlineAsm::Op_InputChain] = Chain; 5847 Glue = Chain.getValue(1); 5848 } 5849 5850 Changed = true; 5851 5852 if(PairedReg.getNode()) { 5853 OpChanged[OpChanged.size() -1 ] = true; 5854 Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/); 5855 if (IsTiedToChangedOp) 5856 Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx); 5857 else 5858 Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID); 5859 // Replace the current flag. 5860 AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant( 5861 Flag, dl, MVT::i32); 5862 // Add the new register node and skip the original two GPRs. 5863 AsmNodeOperands.push_back(PairedReg); 5864 // Skip the next two GPRs. 5865 i += 2; 5866 } 5867 } 5868 5869 if (Glue.getNode()) 5870 AsmNodeOperands.push_back(Glue); 5871 if (!Changed) 5872 return false; 5873 5874 SDValue New = CurDAG->getNode(N->getOpcode(), SDLoc(N), 5875 CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands); 5876 New->setNodeId(-1); 5877 ReplaceNode(N, New.getNode()); 5878 return true; 5879 } 5880 5881 5882 bool ARMDAGToDAGISel:: 5883 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, 5884 std::vector<SDValue> &OutOps) { 5885 switch(ConstraintID) { 5886 default: 5887 llvm_unreachable("Unexpected asm memory constraint"); 5888 case InlineAsm::Constraint_m: 5889 case InlineAsm::Constraint_o: 5890 case InlineAsm::Constraint_Q: 5891 case InlineAsm::Constraint_Um: 5892 case InlineAsm::Constraint_Un: 5893 case InlineAsm::Constraint_Uq: 5894 case InlineAsm::Constraint_Us: 5895 case InlineAsm::Constraint_Ut: 5896 case InlineAsm::Constraint_Uv: 5897 case InlineAsm::Constraint_Uy: 5898 // Require the address to be in a register. That is safe for all ARM 5899 // variants and it is hard to do anything much smarter without knowing 5900 // how the operand is used. 5901 OutOps.push_back(Op); 5902 return false; 5903 } 5904 return true; 5905 } 5906 5907 /// createARMISelDag - This pass converts a legalized DAG into a 5908 /// ARM-specific DAG, ready for instruction scheduling. 5909 /// 5910 FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM, 5911 CodeGenOpt::Level OptLevel) { 5912 return new ARMDAGToDAGISel(TM, OptLevel); 5913 } 5914