1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines an instruction selector for the ARM target. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "ARM.h" 14 #include "ARMBaseInstrInfo.h" 15 #include "ARMTargetMachine.h" 16 #include "MCTargetDesc/ARMAddressingModes.h" 17 #include "Utils/ARMBaseInfo.h" 18 #include "llvm/ADT/APSInt.h" 19 #include "llvm/ADT/StringSwitch.h" 20 #include "llvm/CodeGen/MachineFrameInfo.h" 21 #include "llvm/CodeGen/MachineFunction.h" 22 #include "llvm/CodeGen/MachineInstrBuilder.h" 23 #include "llvm/CodeGen/MachineRegisterInfo.h" 24 #include "llvm/CodeGen/SelectionDAG.h" 25 #include "llvm/CodeGen/SelectionDAGISel.h" 26 #include "llvm/CodeGen/TargetLowering.h" 27 #include "llvm/IR/CallingConv.h" 28 #include "llvm/IR/Constants.h" 29 #include "llvm/IR/DerivedTypes.h" 30 #include "llvm/IR/Function.h" 31 #include "llvm/IR/Intrinsics.h" 32 #include "llvm/IR/IntrinsicsARM.h" 33 #include "llvm/IR/LLVMContext.h" 34 #include "llvm/Support/CommandLine.h" 35 #include "llvm/Support/Debug.h" 36 #include "llvm/Support/ErrorHandling.h" 37 #include "llvm/Target/TargetOptions.h" 38 39 using namespace llvm; 40 41 #define DEBUG_TYPE "arm-isel" 42 43 static cl::opt<bool> 44 DisableShifterOp("disable-shifter-op", cl::Hidden, 45 cl::desc("Disable isel of shifter-op"), 46 cl::init(false)); 47 48 //===--------------------------------------------------------------------===// 49 /// ARMDAGToDAGISel - ARM specific code to select ARM machine 50 /// instructions for SelectionDAG operations. 51 /// 52 namespace { 53 54 class ARMDAGToDAGISel : public SelectionDAGISel { 55 /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can 56 /// make the right decision when generating code for different targets. 57 const ARMSubtarget *Subtarget; 58 59 public: 60 explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel) 61 : SelectionDAGISel(tm, OptLevel) {} 62 63 bool runOnMachineFunction(MachineFunction &MF) override { 64 // Reset the subtarget each time through. 65 Subtarget = &MF.getSubtarget<ARMSubtarget>(); 66 SelectionDAGISel::runOnMachineFunction(MF); 67 return true; 68 } 69 70 StringRef getPassName() const override { return "ARM Instruction Selection"; } 71 72 void PreprocessISelDAG() override; 73 74 /// getI32Imm - Return a target constant of type i32 with the specified 75 /// value. 76 inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) { 77 return CurDAG->getTargetConstant(Imm, dl, MVT::i32); 78 } 79 80 void Select(SDNode *N) override; 81 82 /// Return true as some complex patterns, like those that call 83 /// canExtractShiftFromMul can modify the DAG inplace. 84 bool ComplexPatternFuncMutatesDAG() const override { return true; } 85 86 bool hasNoVMLxHazardUse(SDNode *N) const; 87 bool isShifterOpProfitable(const SDValue &Shift, 88 ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt); 89 bool SelectRegShifterOperand(SDValue N, SDValue &A, 90 SDValue &B, SDValue &C, 91 bool CheckProfitability = true); 92 bool SelectImmShifterOperand(SDValue N, SDValue &A, 93 SDValue &B, bool CheckProfitability = true); 94 bool SelectShiftRegShifterOperand(SDValue N, SDValue &A, SDValue &B, 95 SDValue &C) { 96 // Don't apply the profitability check 97 return SelectRegShifterOperand(N, A, B, C, false); 98 } 99 bool SelectShiftImmShifterOperand(SDValue N, SDValue &A, SDValue &B) { 100 // Don't apply the profitability check 101 return SelectImmShifterOperand(N, A, B, false); 102 } 103 bool SelectShiftImmShifterOperandOneUse(SDValue N, SDValue &A, SDValue &B) { 104 if (!N.hasOneUse()) 105 return false; 106 return SelectImmShifterOperand(N, A, B, false); 107 } 108 109 bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out); 110 111 bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); 112 bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc); 113 114 bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) { 115 const ConstantSDNode *CN = cast<ConstantSDNode>(N); 116 Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32); 117 Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32); 118 return true; 119 } 120 121 bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N, 122 SDValue &Offset, SDValue &Opc); 123 bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N, 124 SDValue &Offset, SDValue &Opc); 125 bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N, 126 SDValue &Offset, SDValue &Opc); 127 bool SelectAddrOffsetNone(SDValue N, SDValue &Base); 128 bool SelectAddrMode3(SDValue N, SDValue &Base, 129 SDValue &Offset, SDValue &Opc); 130 bool SelectAddrMode3Offset(SDNode *Op, SDValue N, 131 SDValue &Offset, SDValue &Opc); 132 bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, bool FP16); 133 bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset); 134 bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset); 135 bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align); 136 bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset); 137 138 bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label); 139 140 // Thumb Addressing Modes: 141 bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset); 142 bool SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, SDValue &Offset); 143 bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base, 144 SDValue &OffImm); 145 bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base, 146 SDValue &OffImm); 147 bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base, 148 SDValue &OffImm); 149 bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base, 150 SDValue &OffImm); 151 bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm); 152 template <unsigned Shift> 153 bool SelectTAddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm); 154 155 // Thumb 2 Addressing Modes: 156 bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); 157 template <unsigned Shift> 158 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, SDValue &OffImm); 159 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, 160 SDValue &OffImm); 161 bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N, 162 SDValue &OffImm); 163 template <unsigned Shift> 164 bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm); 165 bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm, 166 unsigned Shift); 167 template <unsigned Shift> 168 bool SelectT2AddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm); 169 bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base, 170 SDValue &OffReg, SDValue &ShImm); 171 bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm); 172 173 template<int Min, int Max> 174 bool SelectImmediateInRange(SDValue N, SDValue &OffImm); 175 176 inline bool is_so_imm(unsigned Imm) const { 177 return ARM_AM::getSOImmVal(Imm) != -1; 178 } 179 180 inline bool is_so_imm_not(unsigned Imm) const { 181 return ARM_AM::getSOImmVal(~Imm) != -1; 182 } 183 184 inline bool is_t2_so_imm(unsigned Imm) const { 185 return ARM_AM::getT2SOImmVal(Imm) != -1; 186 } 187 188 inline bool is_t2_so_imm_not(unsigned Imm) const { 189 return ARM_AM::getT2SOImmVal(~Imm) != -1; 190 } 191 192 // Include the pieces autogenerated from the target description. 193 #include "ARMGenDAGISel.inc" 194 195 private: 196 void transferMemOperands(SDNode *Src, SDNode *Dst); 197 198 /// Indexed (pre/post inc/dec) load matching code for ARM. 199 bool tryARMIndexedLoad(SDNode *N); 200 bool tryT1IndexedLoad(SDNode *N); 201 bool tryT2IndexedLoad(SDNode *N); 202 bool tryMVEIndexedLoad(SDNode *N); 203 bool tryFMULFixed(SDNode *N, SDLoc dl); 204 bool tryFP_TO_INT(SDNode *N, SDLoc dl); 205 bool transformFixedFloatingPointConversion(SDNode *N, SDNode *FMul, 206 bool IsUnsigned, 207 bool FixedToFloat); 208 209 /// SelectVLD - Select NEON load intrinsics. NumVecs should be 210 /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for 211 /// loads of D registers and even subregs and odd subregs of Q registers. 212 /// For NumVecs <= 2, QOpcodes1 is not used. 213 void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, 214 const uint16_t *DOpcodes, const uint16_t *QOpcodes0, 215 const uint16_t *QOpcodes1); 216 217 /// SelectVST - Select NEON store intrinsics. NumVecs should 218 /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for 219 /// stores of D registers and even subregs and odd subregs of Q registers. 220 /// For NumVecs <= 2, QOpcodes1 is not used. 221 void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, 222 const uint16_t *DOpcodes, const uint16_t *QOpcodes0, 223 const uint16_t *QOpcodes1); 224 225 /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should 226 /// be 2, 3 or 4. The opcode arrays specify the instructions used for 227 /// load/store of D registers and Q registers. 228 void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating, 229 unsigned NumVecs, const uint16_t *DOpcodes, 230 const uint16_t *QOpcodes); 231 232 /// Helper functions for setting up clusters of MVE predication operands. 233 template <typename SDValueVector> 234 void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 235 SDValue PredicateMask); 236 template <typename SDValueVector> 237 void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 238 SDValue PredicateMask, SDValue Inactive); 239 240 template <typename SDValueVector> 241 void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc); 242 template <typename SDValueVector> 243 void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, EVT InactiveTy); 244 245 /// SelectMVE_WB - Select MVE writeback load/store intrinsics. 246 void SelectMVE_WB(SDNode *N, const uint16_t *Opcodes, bool Predicated); 247 248 /// SelectMVE_LongShift - Select MVE 64-bit scalar shift intrinsics. 249 void SelectMVE_LongShift(SDNode *N, uint16_t Opcode, bool Immediate, 250 bool HasSaturationOperand); 251 252 /// SelectMVE_VADCSBC - Select MVE vector add/sub-with-carry intrinsics. 253 void SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry, 254 uint16_t OpcodeWithNoCarry, bool Add, bool Predicated); 255 256 /// SelectMVE_VSHLC - Select MVE intrinsics for a shift that carries between 257 /// vector lanes. 258 void SelectMVE_VSHLC(SDNode *N, bool Predicated); 259 260 /// Select long MVE vector reductions with two vector operands 261 /// Stride is the number of vector element widths the instruction can operate 262 /// on: 263 /// 2 for long non-rounding variants, vml{a,s}ldav[a][x]: [i16, i32] 264 /// 1 for long rounding variants: vrml{a,s}ldavh[a][x]: [i32] 265 /// Stride is used when addressing the OpcodesS array which contains multiple 266 /// opcodes for each element width. 267 /// TySize is the index into the list of element types listed above 268 void SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated, 269 const uint16_t *OpcodesS, const uint16_t *OpcodesU, 270 size_t Stride, size_t TySize); 271 272 /// Select a 64-bit MVE vector reduction with two vector operands 273 /// arm_mve_vmlldava_[predicated] 274 void SelectMVE_VMLLDAV(SDNode *N, bool Predicated, const uint16_t *OpcodesS, 275 const uint16_t *OpcodesU); 276 /// Select a 72-bit MVE vector rounding reduction with two vector operands 277 /// int_arm_mve_vrmlldavha[_predicated] 278 void SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated, const uint16_t *OpcodesS, 279 const uint16_t *OpcodesU); 280 281 /// SelectMVE_VLD - Select MVE interleaving load intrinsics. NumVecs 282 /// should be 2 or 4. The opcode array specifies the instructions 283 /// used for 8, 16 and 32-bit lane sizes respectively, and each 284 /// pointer points to a set of NumVecs sub-opcodes used for the 285 /// different stages (e.g. VLD20 versus VLD21) of each load family. 286 void SelectMVE_VLD(SDNode *N, unsigned NumVecs, 287 const uint16_t *const *Opcodes, bool HasWriteback); 288 289 /// SelectMVE_VxDUP - Select MVE incrementing-dup instructions. Opcodes is an 290 /// array of 3 elements for the 8, 16 and 32-bit lane sizes. 291 void SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes, 292 bool Wrapping, bool Predicated); 293 294 /// Select SelectCDE_CXxD - Select CDE dual-GPR instruction (one of CX1D, 295 /// CX1DA, CX2D, CX2DA, CX3, CX3DA). 296 /// \arg \c NumExtraOps number of extra operands besides the coprocossor, 297 /// the accumulator and the immediate operand, i.e. 0 298 /// for CX1*, 1 for CX2*, 2 for CX3* 299 /// \arg \c HasAccum whether the instruction has an accumulator operand 300 void SelectCDE_CXxD(SDNode *N, uint16_t Opcode, size_t NumExtraOps, 301 bool HasAccum); 302 303 /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs 304 /// should be 1, 2, 3 or 4. The opcode array specifies the instructions used 305 /// for loading D registers. 306 void SelectVLDDup(SDNode *N, bool IsIntrinsic, bool isUpdating, 307 unsigned NumVecs, const uint16_t *DOpcodes, 308 const uint16_t *QOpcodes0 = nullptr, 309 const uint16_t *QOpcodes1 = nullptr); 310 311 /// Try to select SBFX/UBFX instructions for ARM. 312 bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned); 313 314 bool tryInsertVectorElt(SDNode *N); 315 316 // Select special operations if node forms integer ABS pattern 317 bool tryABSOp(SDNode *N); 318 319 bool tryReadRegister(SDNode *N); 320 bool tryWriteRegister(SDNode *N); 321 322 bool tryInlineAsm(SDNode *N); 323 324 void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI); 325 326 void SelectCMP_SWAP(SDNode *N); 327 328 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 329 /// inline asm expressions. 330 bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, 331 std::vector<SDValue> &OutOps) override; 332 333 // Form pairs of consecutive R, S, D, or Q registers. 334 SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1); 335 SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1); 336 SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1); 337 SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1); 338 339 // Form sequences of 4 consecutive S, D, or Q registers. 340 SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 341 SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 342 SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 343 344 // Get the alignment operand for a NEON VLD or VST instruction. 345 SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs, 346 bool is64BitVector); 347 348 /// Checks if N is a multiplication by a constant where we can extract out a 349 /// power of two from the constant so that it can be used in a shift, but only 350 /// if it simplifies the materialization of the constant. Returns true if it 351 /// is, and assigns to PowerOfTwo the power of two that should be extracted 352 /// out and to NewMulConst the new constant to be multiplied by. 353 bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift, 354 unsigned &PowerOfTwo, SDValue &NewMulConst) const; 355 356 /// Replace N with M in CurDAG, in a way that also ensures that M gets 357 /// selected when N would have been selected. 358 void replaceDAGValue(const SDValue &N, SDValue M); 359 }; 360 } 361 362 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant 363 /// operand. If so Imm will receive the 32-bit value. 364 static bool isInt32Immediate(SDNode *N, unsigned &Imm) { 365 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) { 366 Imm = cast<ConstantSDNode>(N)->getZExtValue(); 367 return true; 368 } 369 return false; 370 } 371 372 // isInt32Immediate - This method tests to see if a constant operand. 373 // If so Imm will receive the 32 bit value. 374 static bool isInt32Immediate(SDValue N, unsigned &Imm) { 375 return isInt32Immediate(N.getNode(), Imm); 376 } 377 378 // isOpcWithIntImmediate - This method tests to see if the node is a specific 379 // opcode and that it has a immediate integer right operand. 380 // If so Imm will receive the 32 bit value. 381 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) { 382 return N->getOpcode() == Opc && 383 isInt32Immediate(N->getOperand(1).getNode(), Imm); 384 } 385 386 /// Check whether a particular node is a constant value representable as 387 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax). 388 /// 389 /// \param ScaledConstant [out] - On success, the pre-scaled constant value. 390 static bool isScaledConstantInRange(SDValue Node, int Scale, 391 int RangeMin, int RangeMax, 392 int &ScaledConstant) { 393 assert(Scale > 0 && "Invalid scale!"); 394 395 // Check that this is a constant. 396 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node); 397 if (!C) 398 return false; 399 400 ScaledConstant = (int) C->getZExtValue(); 401 if ((ScaledConstant % Scale) != 0) 402 return false; 403 404 ScaledConstant /= Scale; 405 return ScaledConstant >= RangeMin && ScaledConstant < RangeMax; 406 } 407 408 void ARMDAGToDAGISel::PreprocessISelDAG() { 409 if (!Subtarget->hasV6T2Ops()) 410 return; 411 412 bool isThumb2 = Subtarget->isThumb(); 413 // We use make_early_inc_range to avoid invalidation issues. 414 for (SDNode &N : llvm::make_early_inc_range(CurDAG->allnodes())) { 415 if (N.getOpcode() != ISD::ADD) 416 continue; 417 418 // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with 419 // leading zeros, followed by consecutive set bits, followed by 1 or 2 420 // trailing zeros, e.g. 1020. 421 // Transform the expression to 422 // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number 423 // of trailing zeros of c2. The left shift would be folded as an shifter 424 // operand of 'add' and the 'and' and 'srl' would become a bits extraction 425 // node (UBFX). 426 427 SDValue N0 = N.getOperand(0); 428 SDValue N1 = N.getOperand(1); 429 unsigned And_imm = 0; 430 if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) { 431 if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm)) 432 std::swap(N0, N1); 433 } 434 if (!And_imm) 435 continue; 436 437 // Check if the AND mask is an immediate of the form: 000.....1111111100 438 unsigned TZ = countTrailingZeros(And_imm); 439 if (TZ != 1 && TZ != 2) 440 // Be conservative here. Shifter operands aren't always free. e.g. On 441 // Swift, left shifter operand of 1 / 2 for free but others are not. 442 // e.g. 443 // ubfx r3, r1, #16, #8 444 // ldr.w r3, [r0, r3, lsl #2] 445 // vs. 446 // mov.w r9, #1020 447 // and.w r2, r9, r1, lsr #14 448 // ldr r2, [r0, r2] 449 continue; 450 And_imm >>= TZ; 451 if (And_imm & (And_imm + 1)) 452 continue; 453 454 // Look for (and (srl X, c1), c2). 455 SDValue Srl = N1.getOperand(0); 456 unsigned Srl_imm = 0; 457 if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) || 458 (Srl_imm <= 2)) 459 continue; 460 461 // Make sure first operand is not a shifter operand which would prevent 462 // folding of the left shift. 463 SDValue CPTmp0; 464 SDValue CPTmp1; 465 SDValue CPTmp2; 466 if (isThumb2) { 467 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1)) 468 continue; 469 } else { 470 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) || 471 SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2)) 472 continue; 473 } 474 475 // Now make the transformation. 476 Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32, 477 Srl.getOperand(0), 478 CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl), 479 MVT::i32)); 480 N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32, 481 Srl, 482 CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32)); 483 N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32, 484 N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32)); 485 CurDAG->UpdateNodeOperands(&N, N0, N1); 486 } 487 } 488 489 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS 490 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at 491 /// least on current ARM implementations) which should be avoidded. 492 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const { 493 if (OptLevel == CodeGenOpt::None) 494 return true; 495 496 if (!Subtarget->hasVMLxHazards()) 497 return true; 498 499 if (!N->hasOneUse()) 500 return false; 501 502 SDNode *Use = *N->use_begin(); 503 if (Use->getOpcode() == ISD::CopyToReg) 504 return true; 505 if (Use->isMachineOpcode()) { 506 const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>( 507 CurDAG->getSubtarget().getInstrInfo()); 508 509 const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode()); 510 if (MCID.mayStore()) 511 return true; 512 unsigned Opcode = MCID.getOpcode(); 513 if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD) 514 return true; 515 // vmlx feeding into another vmlx. We actually want to unfold 516 // the use later in the MLxExpansion pass. e.g. 517 // vmla 518 // vmla (stall 8 cycles) 519 // 520 // vmul (5 cycles) 521 // vadd (5 cycles) 522 // vmla 523 // This adds up to about 18 - 19 cycles. 524 // 525 // vmla 526 // vmul (stall 4 cycles) 527 // vadd adds up to about 14 cycles. 528 return TII->isFpMLxInstruction(Opcode); 529 } 530 531 return false; 532 } 533 534 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift, 535 ARM_AM::ShiftOpc ShOpcVal, 536 unsigned ShAmt) { 537 if (!Subtarget->isLikeA9() && !Subtarget->isSwift()) 538 return true; 539 if (Shift.hasOneUse()) 540 return true; 541 // R << 2 is free. 542 return ShOpcVal == ARM_AM::lsl && 543 (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1)); 544 } 545 546 bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N, 547 unsigned MaxShift, 548 unsigned &PowerOfTwo, 549 SDValue &NewMulConst) const { 550 assert(N.getOpcode() == ISD::MUL); 551 assert(MaxShift > 0); 552 553 // If the multiply is used in more than one place then changing the constant 554 // will make other uses incorrect, so don't. 555 if (!N.hasOneUse()) return false; 556 // Check if the multiply is by a constant 557 ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1)); 558 if (!MulConst) return false; 559 // If the constant is used in more than one place then modifying it will mean 560 // we need to materialize two constants instead of one, which is a bad idea. 561 if (!MulConst->hasOneUse()) return false; 562 unsigned MulConstVal = MulConst->getZExtValue(); 563 if (MulConstVal == 0) return false; 564 565 // Find the largest power of 2 that MulConstVal is a multiple of 566 PowerOfTwo = MaxShift; 567 while ((MulConstVal % (1 << PowerOfTwo)) != 0) { 568 --PowerOfTwo; 569 if (PowerOfTwo == 0) return false; 570 } 571 572 // Only optimise if the new cost is better 573 unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo); 574 NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32); 575 unsigned OldCost = ConstantMaterializationCost(MulConstVal, Subtarget); 576 unsigned NewCost = ConstantMaterializationCost(NewMulConstVal, Subtarget); 577 return NewCost < OldCost; 578 } 579 580 void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) { 581 CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode()); 582 ReplaceUses(N, M); 583 } 584 585 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N, 586 SDValue &BaseReg, 587 SDValue &Opc, 588 bool CheckProfitability) { 589 if (DisableShifterOp) 590 return false; 591 592 // If N is a multiply-by-constant and it's profitable to extract a shift and 593 // use it in a shifted operand do so. 594 if (N.getOpcode() == ISD::MUL) { 595 unsigned PowerOfTwo = 0; 596 SDValue NewMulConst; 597 if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) { 598 HandleSDNode Handle(N); 599 SDLoc Loc(N); 600 replaceDAGValue(N.getOperand(1), NewMulConst); 601 BaseReg = Handle.getValue(); 602 Opc = CurDAG->getTargetConstant( 603 ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32); 604 return true; 605 } 606 } 607 608 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 609 610 // Don't match base register only case. That is matched to a separate 611 // lower complexity pattern with explicit register operand. 612 if (ShOpcVal == ARM_AM::no_shift) return false; 613 614 BaseReg = N.getOperand(0); 615 unsigned ShImmVal = 0; 616 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 617 if (!RHS) return false; 618 ShImmVal = RHS->getZExtValue() & 31; 619 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), 620 SDLoc(N), MVT::i32); 621 return true; 622 } 623 624 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N, 625 SDValue &BaseReg, 626 SDValue &ShReg, 627 SDValue &Opc, 628 bool CheckProfitability) { 629 if (DisableShifterOp) 630 return false; 631 632 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 633 634 // Don't match base register only case. That is matched to a separate 635 // lower complexity pattern with explicit register operand. 636 if (ShOpcVal == ARM_AM::no_shift) return false; 637 638 BaseReg = N.getOperand(0); 639 unsigned ShImmVal = 0; 640 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 641 if (RHS) return false; 642 643 ShReg = N.getOperand(1); 644 if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal)) 645 return false; 646 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), 647 SDLoc(N), MVT::i32); 648 return true; 649 } 650 651 // Determine whether an ISD::OR's operands are suitable to turn the operation 652 // into an addition, which often has more compact encodings. 653 bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) { 654 assert(Parent->getOpcode() == ISD::OR && "unexpected parent"); 655 Out = N; 656 return CurDAG->haveNoCommonBitsSet(N, Parent->getOperand(1)); 657 } 658 659 660 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N, 661 SDValue &Base, 662 SDValue &OffImm) { 663 // Match simple R + imm12 operands. 664 665 // Base only. 666 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 667 !CurDAG->isBaseWithConstantOffset(N)) { 668 if (N.getOpcode() == ISD::FrameIndex) { 669 // Match frame index. 670 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 671 Base = CurDAG->getTargetFrameIndex( 672 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 673 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 674 return true; 675 } 676 677 if (N.getOpcode() == ARMISD::Wrapper && 678 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 679 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 680 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 681 Base = N.getOperand(0); 682 } else 683 Base = N; 684 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 685 return true; 686 } 687 688 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 689 int RHSC = (int)RHS->getSExtValue(); 690 if (N.getOpcode() == ISD::SUB) 691 RHSC = -RHSC; 692 693 if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits 694 Base = N.getOperand(0); 695 if (Base.getOpcode() == ISD::FrameIndex) { 696 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 697 Base = CurDAG->getTargetFrameIndex( 698 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 699 } 700 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 701 return true; 702 } 703 } 704 705 // Base only. 706 Base = N; 707 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 708 return true; 709 } 710 711 712 713 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, 714 SDValue &Opc) { 715 if (N.getOpcode() == ISD::MUL && 716 ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) { 717 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 718 // X * [3,5,9] -> X + X * [2,4,8] etc. 719 int RHSC = (int)RHS->getZExtValue(); 720 if (RHSC & 1) { 721 RHSC = RHSC & ~1; 722 ARM_AM::AddrOpc AddSub = ARM_AM::add; 723 if (RHSC < 0) { 724 AddSub = ARM_AM::sub; 725 RHSC = - RHSC; 726 } 727 if (isPowerOf2_32(RHSC)) { 728 unsigned ShAmt = Log2_32(RHSC); 729 Base = Offset = N.getOperand(0); 730 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, 731 ARM_AM::lsl), 732 SDLoc(N), MVT::i32); 733 return true; 734 } 735 } 736 } 737 } 738 739 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 740 // ISD::OR that is equivalent to an ISD::ADD. 741 !CurDAG->isBaseWithConstantOffset(N)) 742 return false; 743 744 // Leave simple R +/- imm12 operands for LDRi12 745 if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) { 746 int RHSC; 747 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1, 748 -0x1000+1, 0x1000, RHSC)) // 12 bits. 749 return false; 750 } 751 752 // Otherwise this is R +/- [possibly shifted] R. 753 ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add; 754 ARM_AM::ShiftOpc ShOpcVal = 755 ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode()); 756 unsigned ShAmt = 0; 757 758 Base = N.getOperand(0); 759 Offset = N.getOperand(1); 760 761 if (ShOpcVal != ARM_AM::no_shift) { 762 // Check to see if the RHS of the shift is a constant, if not, we can't fold 763 // it. 764 if (ConstantSDNode *Sh = 765 dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) { 766 ShAmt = Sh->getZExtValue(); 767 if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt)) 768 Offset = N.getOperand(1).getOperand(0); 769 else { 770 ShAmt = 0; 771 ShOpcVal = ARM_AM::no_shift; 772 } 773 } else { 774 ShOpcVal = ARM_AM::no_shift; 775 } 776 } 777 778 // Try matching (R shl C) + (R). 779 if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift && 780 !(Subtarget->isLikeA9() || Subtarget->isSwift() || 781 N.getOperand(0).hasOneUse())) { 782 ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode()); 783 if (ShOpcVal != ARM_AM::no_shift) { 784 // Check to see if the RHS of the shift is a constant, if not, we can't 785 // fold it. 786 if (ConstantSDNode *Sh = 787 dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) { 788 ShAmt = Sh->getZExtValue(); 789 if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) { 790 Offset = N.getOperand(0).getOperand(0); 791 Base = N.getOperand(1); 792 } else { 793 ShAmt = 0; 794 ShOpcVal = ARM_AM::no_shift; 795 } 796 } else { 797 ShOpcVal = ARM_AM::no_shift; 798 } 799 } 800 } 801 802 // If Offset is a multiply-by-constant and it's profitable to extract a shift 803 // and use it in a shifted operand do so. 804 if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) { 805 unsigned PowerOfTwo = 0; 806 SDValue NewMulConst; 807 if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) { 808 HandleSDNode Handle(Offset); 809 replaceDAGValue(Offset.getOperand(1), NewMulConst); 810 Offset = Handle.getValue(); 811 ShAmt = PowerOfTwo; 812 ShOpcVal = ARM_AM::lsl; 813 } 814 } 815 816 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal), 817 SDLoc(N), MVT::i32); 818 return true; 819 } 820 821 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N, 822 SDValue &Offset, SDValue &Opc) { 823 unsigned Opcode = Op->getOpcode(); 824 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 825 ? cast<LoadSDNode>(Op)->getAddressingMode() 826 : cast<StoreSDNode>(Op)->getAddressingMode(); 827 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 828 ? ARM_AM::add : ARM_AM::sub; 829 int Val; 830 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) 831 return false; 832 833 Offset = N; 834 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 835 unsigned ShAmt = 0; 836 if (ShOpcVal != ARM_AM::no_shift) { 837 // Check to see if the RHS of the shift is a constant, if not, we can't fold 838 // it. 839 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 840 ShAmt = Sh->getZExtValue(); 841 if (isShifterOpProfitable(N, ShOpcVal, ShAmt)) 842 Offset = N.getOperand(0); 843 else { 844 ShAmt = 0; 845 ShOpcVal = ARM_AM::no_shift; 846 } 847 } else { 848 ShOpcVal = ARM_AM::no_shift; 849 } 850 } 851 852 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal), 853 SDLoc(N), MVT::i32); 854 return true; 855 } 856 857 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N, 858 SDValue &Offset, SDValue &Opc) { 859 unsigned Opcode = Op->getOpcode(); 860 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 861 ? cast<LoadSDNode>(Op)->getAddressingMode() 862 : cast<StoreSDNode>(Op)->getAddressingMode(); 863 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 864 ? ARM_AM::add : ARM_AM::sub; 865 int Val; 866 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits. 867 if (AddSub == ARM_AM::sub) Val *= -1; 868 Offset = CurDAG->getRegister(0, MVT::i32); 869 Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32); 870 return true; 871 } 872 873 return false; 874 } 875 876 877 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N, 878 SDValue &Offset, SDValue &Opc) { 879 unsigned Opcode = Op->getOpcode(); 880 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 881 ? cast<LoadSDNode>(Op)->getAddressingMode() 882 : cast<StoreSDNode>(Op)->getAddressingMode(); 883 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 884 ? ARM_AM::add : ARM_AM::sub; 885 int Val; 886 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits. 887 Offset = CurDAG->getRegister(0, MVT::i32); 888 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val, 889 ARM_AM::no_shift), 890 SDLoc(Op), MVT::i32); 891 return true; 892 } 893 894 return false; 895 } 896 897 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) { 898 Base = N; 899 return true; 900 } 901 902 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N, 903 SDValue &Base, SDValue &Offset, 904 SDValue &Opc) { 905 if (N.getOpcode() == ISD::SUB) { 906 // X - C is canonicalize to X + -C, no need to handle it here. 907 Base = N.getOperand(0); 908 Offset = N.getOperand(1); 909 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N), 910 MVT::i32); 911 return true; 912 } 913 914 if (!CurDAG->isBaseWithConstantOffset(N)) { 915 Base = N; 916 if (N.getOpcode() == ISD::FrameIndex) { 917 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 918 Base = CurDAG->getTargetFrameIndex( 919 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 920 } 921 Offset = CurDAG->getRegister(0, MVT::i32); 922 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N), 923 MVT::i32); 924 return true; 925 } 926 927 // If the RHS is +/- imm8, fold into addr mode. 928 int RHSC; 929 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1, 930 -256 + 1, 256, RHSC)) { // 8 bits. 931 Base = N.getOperand(0); 932 if (Base.getOpcode() == ISD::FrameIndex) { 933 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 934 Base = CurDAG->getTargetFrameIndex( 935 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 936 } 937 Offset = CurDAG->getRegister(0, MVT::i32); 938 939 ARM_AM::AddrOpc AddSub = ARM_AM::add; 940 if (RHSC < 0) { 941 AddSub = ARM_AM::sub; 942 RHSC = -RHSC; 943 } 944 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N), 945 MVT::i32); 946 return true; 947 } 948 949 Base = N.getOperand(0); 950 Offset = N.getOperand(1); 951 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N), 952 MVT::i32); 953 return true; 954 } 955 956 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N, 957 SDValue &Offset, SDValue &Opc) { 958 unsigned Opcode = Op->getOpcode(); 959 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 960 ? cast<LoadSDNode>(Op)->getAddressingMode() 961 : cast<StoreSDNode>(Op)->getAddressingMode(); 962 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 963 ? ARM_AM::add : ARM_AM::sub; 964 int Val; 965 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits. 966 Offset = CurDAG->getRegister(0, MVT::i32); 967 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op), 968 MVT::i32); 969 return true; 970 } 971 972 Offset = N; 973 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op), 974 MVT::i32); 975 return true; 976 } 977 978 bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, 979 bool FP16) { 980 if (!CurDAG->isBaseWithConstantOffset(N)) { 981 Base = N; 982 if (N.getOpcode() == ISD::FrameIndex) { 983 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 984 Base = CurDAG->getTargetFrameIndex( 985 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 986 } else if (N.getOpcode() == ARMISD::Wrapper && 987 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 988 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 989 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 990 Base = N.getOperand(0); 991 } 992 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0), 993 SDLoc(N), MVT::i32); 994 return true; 995 } 996 997 // If the RHS is +/- imm8, fold into addr mode. 998 int RHSC; 999 const int Scale = FP16 ? 2 : 4; 1000 1001 if (isScaledConstantInRange(N.getOperand(1), Scale, -255, 256, RHSC)) { 1002 Base = N.getOperand(0); 1003 if (Base.getOpcode() == ISD::FrameIndex) { 1004 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1005 Base = CurDAG->getTargetFrameIndex( 1006 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1007 } 1008 1009 ARM_AM::AddrOpc AddSub = ARM_AM::add; 1010 if (RHSC < 0) { 1011 AddSub = ARM_AM::sub; 1012 RHSC = -RHSC; 1013 } 1014 1015 if (FP16) 1016 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(AddSub, RHSC), 1017 SDLoc(N), MVT::i32); 1018 else 1019 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC), 1020 SDLoc(N), MVT::i32); 1021 1022 return true; 1023 } 1024 1025 Base = N; 1026 1027 if (FP16) 1028 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(ARM_AM::add, 0), 1029 SDLoc(N), MVT::i32); 1030 else 1031 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0), 1032 SDLoc(N), MVT::i32); 1033 1034 return true; 1035 } 1036 1037 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N, 1038 SDValue &Base, SDValue &Offset) { 1039 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ false); 1040 } 1041 1042 bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N, 1043 SDValue &Base, SDValue &Offset) { 1044 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ true); 1045 } 1046 1047 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr, 1048 SDValue &Align) { 1049 Addr = N; 1050 1051 unsigned Alignment = 0; 1052 1053 MemSDNode *MemN = cast<MemSDNode>(Parent); 1054 1055 if (isa<LSBaseSDNode>(MemN) || 1056 ((MemN->getOpcode() == ARMISD::VST1_UPD || 1057 MemN->getOpcode() == ARMISD::VLD1_UPD) && 1058 MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) { 1059 // This case occurs only for VLD1-lane/dup and VST1-lane instructions. 1060 // The maximum alignment is equal to the memory size being referenced. 1061 llvm::Align MMOAlign = MemN->getAlign(); 1062 unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8; 1063 if (MMOAlign.value() >= MemSize && MemSize > 1) 1064 Alignment = MemSize; 1065 } else { 1066 // All other uses of addrmode6 are for intrinsics. For now just record 1067 // the raw alignment value; it will be refined later based on the legal 1068 // alignment operands for the intrinsic. 1069 Alignment = MemN->getAlign().value(); 1070 } 1071 1072 Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32); 1073 return true; 1074 } 1075 1076 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N, 1077 SDValue &Offset) { 1078 LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op); 1079 ISD::MemIndexedMode AM = LdSt->getAddressingMode(); 1080 if (AM != ISD::POST_INC) 1081 return false; 1082 Offset = N; 1083 if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) { 1084 if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits()) 1085 Offset = CurDAG->getRegister(0, MVT::i32); 1086 } 1087 return true; 1088 } 1089 1090 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N, 1091 SDValue &Offset, SDValue &Label) { 1092 if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) { 1093 Offset = N.getOperand(0); 1094 SDValue N1 = N.getOperand(1); 1095 Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(), 1096 SDLoc(N), MVT::i32); 1097 return true; 1098 } 1099 1100 return false; 1101 } 1102 1103 1104 //===----------------------------------------------------------------------===// 1105 // Thumb Addressing Modes 1106 //===----------------------------------------------------------------------===// 1107 1108 static bool shouldUseZeroOffsetLdSt(SDValue N) { 1109 // Negative numbers are difficult to materialise in thumb1. If we are 1110 // selecting the add of a negative, instead try to select ri with a zero 1111 // offset, so create the add node directly which will become a sub. 1112 if (N.getOpcode() != ISD::ADD) 1113 return false; 1114 1115 // Look for an imm which is not legal for ld/st, but is legal for sub. 1116 if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1))) 1117 return C->getSExtValue() < 0 && C->getSExtValue() >= -255; 1118 1119 return false; 1120 } 1121 1122 bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, 1123 SDValue &Offset) { 1124 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) { 1125 ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N); 1126 if (!NC || !NC->isZero()) 1127 return false; 1128 1129 Base = Offset = N; 1130 return true; 1131 } 1132 1133 Base = N.getOperand(0); 1134 Offset = N.getOperand(1); 1135 return true; 1136 } 1137 1138 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, SDValue &Base, 1139 SDValue &Offset) { 1140 if (shouldUseZeroOffsetLdSt(N)) 1141 return false; // Select ri instead 1142 return SelectThumbAddrModeRRSext(N, Base, Offset); 1143 } 1144 1145 bool 1146 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, 1147 SDValue &Base, SDValue &OffImm) { 1148 if (shouldUseZeroOffsetLdSt(N)) { 1149 Base = N; 1150 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1151 return true; 1152 } 1153 1154 if (!CurDAG->isBaseWithConstantOffset(N)) { 1155 if (N.getOpcode() == ISD::ADD) { 1156 return false; // We want to select register offset instead 1157 } else if (N.getOpcode() == ARMISD::Wrapper && 1158 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 1159 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 1160 N.getOperand(0).getOpcode() != ISD::TargetConstantPool && 1161 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 1162 Base = N.getOperand(0); 1163 } else { 1164 Base = N; 1165 } 1166 1167 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1168 return true; 1169 } 1170 1171 // If the RHS is + imm5 * scale, fold into addr mode. 1172 int RHSC; 1173 if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) { 1174 Base = N.getOperand(0); 1175 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1176 return true; 1177 } 1178 1179 // Offset is too large, so use register offset instead. 1180 return false; 1181 } 1182 1183 bool 1184 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base, 1185 SDValue &OffImm) { 1186 return SelectThumbAddrModeImm5S(N, 4, Base, OffImm); 1187 } 1188 1189 bool 1190 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base, 1191 SDValue &OffImm) { 1192 return SelectThumbAddrModeImm5S(N, 2, Base, OffImm); 1193 } 1194 1195 bool 1196 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base, 1197 SDValue &OffImm) { 1198 return SelectThumbAddrModeImm5S(N, 1, Base, OffImm); 1199 } 1200 1201 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N, 1202 SDValue &Base, SDValue &OffImm) { 1203 if (N.getOpcode() == ISD::FrameIndex) { 1204 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 1205 // Only multiples of 4 are allowed for the offset, so the frame object 1206 // alignment must be at least 4. 1207 MachineFrameInfo &MFI = MF->getFrameInfo(); 1208 if (MFI.getObjectAlign(FI) < Align(4)) 1209 MFI.setObjectAlignment(FI, Align(4)); 1210 Base = CurDAG->getTargetFrameIndex( 1211 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1212 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1213 return true; 1214 } 1215 1216 if (!CurDAG->isBaseWithConstantOffset(N)) 1217 return false; 1218 1219 if (N.getOperand(0).getOpcode() == ISD::FrameIndex) { 1220 // If the RHS is + imm8 * scale, fold into addr mode. 1221 int RHSC; 1222 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) { 1223 Base = N.getOperand(0); 1224 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1225 // Make sure the offset is inside the object, or we might fail to 1226 // allocate an emergency spill slot. (An out-of-range access is UB, but 1227 // it could show up anyway.) 1228 MachineFrameInfo &MFI = MF->getFrameInfo(); 1229 if (RHSC * 4 < MFI.getObjectSize(FI)) { 1230 // For LHS+RHS to result in an offset that's a multiple of 4 the object 1231 // indexed by the LHS must be 4-byte aligned. 1232 if (!MFI.isFixedObjectIndex(FI) && MFI.getObjectAlign(FI) < Align(4)) 1233 MFI.setObjectAlignment(FI, Align(4)); 1234 if (MFI.getObjectAlign(FI) >= Align(4)) { 1235 Base = CurDAG->getTargetFrameIndex( 1236 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1237 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1238 return true; 1239 } 1240 } 1241 } 1242 } 1243 1244 return false; 1245 } 1246 1247 template <unsigned Shift> 1248 bool ARMDAGToDAGISel::SelectTAddrModeImm7(SDValue N, SDValue &Base, 1249 SDValue &OffImm) { 1250 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) { 1251 int RHSC; 1252 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80, 1253 RHSC)) { 1254 Base = N.getOperand(0); 1255 if (N.getOpcode() == ISD::SUB) 1256 RHSC = -RHSC; 1257 OffImm = 1258 CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32); 1259 return true; 1260 } 1261 } 1262 1263 // Base only. 1264 Base = N; 1265 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1266 return true; 1267 } 1268 1269 1270 //===----------------------------------------------------------------------===// 1271 // Thumb 2 Addressing Modes 1272 //===----------------------------------------------------------------------===// 1273 1274 1275 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N, 1276 SDValue &Base, SDValue &OffImm) { 1277 // Match simple R + imm12 operands. 1278 1279 // Base only. 1280 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 1281 !CurDAG->isBaseWithConstantOffset(N)) { 1282 if (N.getOpcode() == ISD::FrameIndex) { 1283 // Match frame index. 1284 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 1285 Base = CurDAG->getTargetFrameIndex( 1286 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1287 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1288 return true; 1289 } 1290 1291 if (N.getOpcode() == ARMISD::Wrapper && 1292 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 1293 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 1294 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 1295 Base = N.getOperand(0); 1296 if (Base.getOpcode() == ISD::TargetConstantPool) 1297 return false; // We want to select t2LDRpci instead. 1298 } else 1299 Base = N; 1300 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1301 return true; 1302 } 1303 1304 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1305 if (SelectT2AddrModeImm8(N, Base, OffImm)) 1306 // Let t2LDRi8 handle (R - imm8). 1307 return false; 1308 1309 int RHSC = (int)RHS->getZExtValue(); 1310 if (N.getOpcode() == ISD::SUB) 1311 RHSC = -RHSC; 1312 1313 if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned) 1314 Base = N.getOperand(0); 1315 if (Base.getOpcode() == ISD::FrameIndex) { 1316 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1317 Base = CurDAG->getTargetFrameIndex( 1318 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1319 } 1320 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1321 return true; 1322 } 1323 } 1324 1325 // Base only. 1326 Base = N; 1327 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1328 return true; 1329 } 1330 1331 template <unsigned Shift> 1332 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, SDValue &Base, 1333 SDValue &OffImm) { 1334 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) { 1335 int RHSC; 1336 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -255, 256, RHSC)) { 1337 Base = N.getOperand(0); 1338 if (Base.getOpcode() == ISD::FrameIndex) { 1339 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1340 Base = CurDAG->getTargetFrameIndex( 1341 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1342 } 1343 1344 if (N.getOpcode() == ISD::SUB) 1345 RHSC = -RHSC; 1346 OffImm = 1347 CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32); 1348 return true; 1349 } 1350 } 1351 1352 // Base only. 1353 Base = N; 1354 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1355 return true; 1356 } 1357 1358 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, 1359 SDValue &Base, SDValue &OffImm) { 1360 // Match simple R - imm8 operands. 1361 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 1362 !CurDAG->isBaseWithConstantOffset(N)) 1363 return false; 1364 1365 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1366 int RHSC = (int)RHS->getSExtValue(); 1367 if (N.getOpcode() == ISD::SUB) 1368 RHSC = -RHSC; 1369 1370 if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative) 1371 Base = N.getOperand(0); 1372 if (Base.getOpcode() == ISD::FrameIndex) { 1373 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1374 Base = CurDAG->getTargetFrameIndex( 1375 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1376 } 1377 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1378 return true; 1379 } 1380 } 1381 1382 return false; 1383 } 1384 1385 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N, 1386 SDValue &OffImm){ 1387 unsigned Opcode = Op->getOpcode(); 1388 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 1389 ? cast<LoadSDNode>(Op)->getAddressingMode() 1390 : cast<StoreSDNode>(Op)->getAddressingMode(); 1391 int RHSC; 1392 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits. 1393 OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC)) 1394 ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32) 1395 : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32); 1396 return true; 1397 } 1398 1399 return false; 1400 } 1401 1402 template <unsigned Shift> 1403 bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N, SDValue &Base, 1404 SDValue &OffImm) { 1405 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) { 1406 int RHSC; 1407 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80, 1408 RHSC)) { 1409 Base = N.getOperand(0); 1410 if (Base.getOpcode() == ISD::FrameIndex) { 1411 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1412 Base = CurDAG->getTargetFrameIndex( 1413 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1414 } 1415 1416 if (N.getOpcode() == ISD::SUB) 1417 RHSC = -RHSC; 1418 OffImm = 1419 CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32); 1420 return true; 1421 } 1422 } 1423 1424 // Base only. 1425 Base = N; 1426 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1427 return true; 1428 } 1429 1430 template <unsigned Shift> 1431 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, 1432 SDValue &OffImm) { 1433 return SelectT2AddrModeImm7Offset(Op, N, OffImm, Shift); 1434 } 1435 1436 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, 1437 SDValue &OffImm, 1438 unsigned Shift) { 1439 unsigned Opcode = Op->getOpcode(); 1440 ISD::MemIndexedMode AM; 1441 switch (Opcode) { 1442 case ISD::LOAD: 1443 AM = cast<LoadSDNode>(Op)->getAddressingMode(); 1444 break; 1445 case ISD::STORE: 1446 AM = cast<StoreSDNode>(Op)->getAddressingMode(); 1447 break; 1448 case ISD::MLOAD: 1449 AM = cast<MaskedLoadSDNode>(Op)->getAddressingMode(); 1450 break; 1451 case ISD::MSTORE: 1452 AM = cast<MaskedStoreSDNode>(Op)->getAddressingMode(); 1453 break; 1454 default: 1455 llvm_unreachable("Unexpected Opcode for Imm7Offset"); 1456 } 1457 1458 int RHSC; 1459 // 7 bit constant, shifted by Shift. 1460 if (isScaledConstantInRange(N, 1 << Shift, 0, 0x80, RHSC)) { 1461 OffImm = 1462 ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC)) 1463 ? CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32) 1464 : CurDAG->getTargetConstant(-RHSC * (1 << Shift), SDLoc(N), 1465 MVT::i32); 1466 return true; 1467 } 1468 return false; 1469 } 1470 1471 template <int Min, int Max> 1472 bool ARMDAGToDAGISel::SelectImmediateInRange(SDValue N, SDValue &OffImm) { 1473 int Val; 1474 if (isScaledConstantInRange(N, 1, Min, Max, Val)) { 1475 OffImm = CurDAG->getTargetConstant(Val, SDLoc(N), MVT::i32); 1476 return true; 1477 } 1478 return false; 1479 } 1480 1481 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N, 1482 SDValue &Base, 1483 SDValue &OffReg, SDValue &ShImm) { 1484 // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12. 1485 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) 1486 return false; 1487 1488 // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8. 1489 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1490 int RHSC = (int)RHS->getZExtValue(); 1491 if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned) 1492 return false; 1493 else if (RHSC < 0 && RHSC >= -255) // 8 bits 1494 return false; 1495 } 1496 1497 // Look for (R + R) or (R + (R << [1,2,3])). 1498 unsigned ShAmt = 0; 1499 Base = N.getOperand(0); 1500 OffReg = N.getOperand(1); 1501 1502 // Swap if it is ((R << c) + R). 1503 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode()); 1504 if (ShOpcVal != ARM_AM::lsl) { 1505 ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode()); 1506 if (ShOpcVal == ARM_AM::lsl) 1507 std::swap(Base, OffReg); 1508 } 1509 1510 if (ShOpcVal == ARM_AM::lsl) { 1511 // Check to see if the RHS of the shift is a constant, if not, we can't fold 1512 // it. 1513 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) { 1514 ShAmt = Sh->getZExtValue(); 1515 if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt)) 1516 OffReg = OffReg.getOperand(0); 1517 else { 1518 ShAmt = 0; 1519 } 1520 } 1521 } 1522 1523 // If OffReg is a multiply-by-constant and it's profitable to extract a shift 1524 // and use it in a shifted operand do so. 1525 if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) { 1526 unsigned PowerOfTwo = 0; 1527 SDValue NewMulConst; 1528 if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) { 1529 HandleSDNode Handle(OffReg); 1530 replaceDAGValue(OffReg.getOperand(1), NewMulConst); 1531 OffReg = Handle.getValue(); 1532 ShAmt = PowerOfTwo; 1533 } 1534 } 1535 1536 ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32); 1537 1538 return true; 1539 } 1540 1541 bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base, 1542 SDValue &OffImm) { 1543 // This *must* succeed since it's used for the irreplaceable ldrex and strex 1544 // instructions. 1545 Base = N; 1546 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1547 1548 if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N)) 1549 return true; 1550 1551 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 1552 if (!RHS) 1553 return true; 1554 1555 uint32_t RHSC = (int)RHS->getZExtValue(); 1556 if (RHSC > 1020 || RHSC % 4 != 0) 1557 return true; 1558 1559 Base = N.getOperand(0); 1560 if (Base.getOpcode() == ISD::FrameIndex) { 1561 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1562 Base = CurDAG->getTargetFrameIndex( 1563 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1564 } 1565 1566 OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32); 1567 return true; 1568 } 1569 1570 //===--------------------------------------------------------------------===// 1571 1572 /// getAL - Returns a ARMCC::AL immediate node. 1573 static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) { 1574 return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32); 1575 } 1576 1577 void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) { 1578 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); 1579 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp}); 1580 } 1581 1582 bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) { 1583 LoadSDNode *LD = cast<LoadSDNode>(N); 1584 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1585 if (AM == ISD::UNINDEXED) 1586 return false; 1587 1588 EVT LoadedVT = LD->getMemoryVT(); 1589 SDValue Offset, AMOpc; 1590 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1591 unsigned Opcode = 0; 1592 bool Match = false; 1593 if (LoadedVT == MVT::i32 && isPre && 1594 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) { 1595 Opcode = ARM::LDR_PRE_IMM; 1596 Match = true; 1597 } else if (LoadedVT == MVT::i32 && !isPre && 1598 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) { 1599 Opcode = ARM::LDR_POST_IMM; 1600 Match = true; 1601 } else if (LoadedVT == MVT::i32 && 1602 SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) { 1603 Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG; 1604 Match = true; 1605 1606 } else if (LoadedVT == MVT::i16 && 1607 SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) { 1608 Match = true; 1609 Opcode = (LD->getExtensionType() == ISD::SEXTLOAD) 1610 ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST) 1611 : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST); 1612 } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) { 1613 if (LD->getExtensionType() == ISD::SEXTLOAD) { 1614 if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) { 1615 Match = true; 1616 Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST; 1617 } 1618 } else { 1619 if (isPre && 1620 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) { 1621 Match = true; 1622 Opcode = ARM::LDRB_PRE_IMM; 1623 } else if (!isPre && 1624 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) { 1625 Match = true; 1626 Opcode = ARM::LDRB_POST_IMM; 1627 } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) { 1628 Match = true; 1629 Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG; 1630 } 1631 } 1632 } 1633 1634 if (Match) { 1635 if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) { 1636 SDValue Chain = LD->getChain(); 1637 SDValue Base = LD->getBasePtr(); 1638 SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)), 1639 CurDAG->getRegister(0, MVT::i32), Chain }; 1640 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, 1641 MVT::Other, Ops); 1642 transferMemOperands(N, New); 1643 ReplaceNode(N, New); 1644 return true; 1645 } else { 1646 SDValue Chain = LD->getChain(); 1647 SDValue Base = LD->getBasePtr(); 1648 SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)), 1649 CurDAG->getRegister(0, MVT::i32), Chain }; 1650 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, 1651 MVT::Other, Ops); 1652 transferMemOperands(N, New); 1653 ReplaceNode(N, New); 1654 return true; 1655 } 1656 } 1657 1658 return false; 1659 } 1660 1661 bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) { 1662 LoadSDNode *LD = cast<LoadSDNode>(N); 1663 EVT LoadedVT = LD->getMemoryVT(); 1664 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1665 if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD || 1666 LoadedVT.getSimpleVT().SimpleTy != MVT::i32) 1667 return false; 1668 1669 auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset()); 1670 if (!COffs || COffs->getZExtValue() != 4) 1671 return false; 1672 1673 // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}. 1674 // The encoding of LDM is not how the rest of ISel expects a post-inc load to 1675 // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after 1676 // ISel. 1677 SDValue Chain = LD->getChain(); 1678 SDValue Base = LD->getBasePtr(); 1679 SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)), 1680 CurDAG->getRegister(0, MVT::i32), Chain }; 1681 SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32, 1682 MVT::i32, MVT::Other, Ops); 1683 transferMemOperands(N, New); 1684 ReplaceNode(N, New); 1685 return true; 1686 } 1687 1688 bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) { 1689 LoadSDNode *LD = cast<LoadSDNode>(N); 1690 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1691 if (AM == ISD::UNINDEXED) 1692 return false; 1693 1694 EVT LoadedVT = LD->getMemoryVT(); 1695 bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; 1696 SDValue Offset; 1697 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1698 unsigned Opcode = 0; 1699 bool Match = false; 1700 if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) { 1701 switch (LoadedVT.getSimpleVT().SimpleTy) { 1702 case MVT::i32: 1703 Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST; 1704 break; 1705 case MVT::i16: 1706 if (isSExtLd) 1707 Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST; 1708 else 1709 Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST; 1710 break; 1711 case MVT::i8: 1712 case MVT::i1: 1713 if (isSExtLd) 1714 Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST; 1715 else 1716 Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST; 1717 break; 1718 default: 1719 return false; 1720 } 1721 Match = true; 1722 } 1723 1724 if (Match) { 1725 SDValue Chain = LD->getChain(); 1726 SDValue Base = LD->getBasePtr(); 1727 SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)), 1728 CurDAG->getRegister(0, MVT::i32), Chain }; 1729 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, 1730 MVT::Other, Ops); 1731 transferMemOperands(N, New); 1732 ReplaceNode(N, New); 1733 return true; 1734 } 1735 1736 return false; 1737 } 1738 1739 bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) { 1740 EVT LoadedVT; 1741 unsigned Opcode = 0; 1742 bool isSExtLd, isPre; 1743 Align Alignment; 1744 ARMVCC::VPTCodes Pred; 1745 SDValue PredReg; 1746 SDValue Chain, Base, Offset; 1747 1748 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 1749 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1750 if (AM == ISD::UNINDEXED) 1751 return false; 1752 LoadedVT = LD->getMemoryVT(); 1753 if (!LoadedVT.isVector()) 1754 return false; 1755 1756 Chain = LD->getChain(); 1757 Base = LD->getBasePtr(); 1758 Offset = LD->getOffset(); 1759 Alignment = LD->getAlign(); 1760 isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; 1761 isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1762 Pred = ARMVCC::None; 1763 PredReg = CurDAG->getRegister(0, MVT::i32); 1764 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) { 1765 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1766 if (AM == ISD::UNINDEXED) 1767 return false; 1768 LoadedVT = LD->getMemoryVT(); 1769 if (!LoadedVT.isVector()) 1770 return false; 1771 1772 Chain = LD->getChain(); 1773 Base = LD->getBasePtr(); 1774 Offset = LD->getOffset(); 1775 Alignment = LD->getAlign(); 1776 isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; 1777 isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1778 Pred = ARMVCC::Then; 1779 PredReg = LD->getMask(); 1780 } else 1781 llvm_unreachable("Expected a Load or a Masked Load!"); 1782 1783 // We allow LE non-masked loads to change the type (for example use a vldrb.8 1784 // as opposed to a vldrw.32). This can allow extra addressing modes or 1785 // alignments for what is otherwise an equivalent instruction. 1786 bool CanChangeType = Subtarget->isLittle() && !isa<MaskedLoadSDNode>(N); 1787 1788 SDValue NewOffset; 1789 if (Alignment >= Align(2) && LoadedVT == MVT::v4i16 && 1790 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1)) { 1791 if (isSExtLd) 1792 Opcode = isPre ? ARM::MVE_VLDRHS32_pre : ARM::MVE_VLDRHS32_post; 1793 else 1794 Opcode = isPre ? ARM::MVE_VLDRHU32_pre : ARM::MVE_VLDRHU32_post; 1795 } else if (LoadedVT == MVT::v8i8 && 1796 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) { 1797 if (isSExtLd) 1798 Opcode = isPre ? ARM::MVE_VLDRBS16_pre : ARM::MVE_VLDRBS16_post; 1799 else 1800 Opcode = isPre ? ARM::MVE_VLDRBU16_pre : ARM::MVE_VLDRBU16_post; 1801 } else if (LoadedVT == MVT::v4i8 && 1802 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) { 1803 if (isSExtLd) 1804 Opcode = isPre ? ARM::MVE_VLDRBS32_pre : ARM::MVE_VLDRBS32_post; 1805 else 1806 Opcode = isPre ? ARM::MVE_VLDRBU32_pre : ARM::MVE_VLDRBU32_post; 1807 } else if (Alignment >= Align(4) && 1808 (CanChangeType || LoadedVT == MVT::v4i32 || 1809 LoadedVT == MVT::v4f32) && 1810 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 2)) 1811 Opcode = isPre ? ARM::MVE_VLDRWU32_pre : ARM::MVE_VLDRWU32_post; 1812 else if (Alignment >= Align(2) && 1813 (CanChangeType || LoadedVT == MVT::v8i16 || 1814 LoadedVT == MVT::v8f16) && 1815 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1)) 1816 Opcode = isPre ? ARM::MVE_VLDRHU16_pre : ARM::MVE_VLDRHU16_post; 1817 else if ((CanChangeType || LoadedVT == MVT::v16i8) && 1818 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) 1819 Opcode = isPre ? ARM::MVE_VLDRBU8_pre : ARM::MVE_VLDRBU8_post; 1820 else 1821 return false; 1822 1823 SDValue Ops[] = {Base, 1824 NewOffset, 1825 CurDAG->getTargetConstant(Pred, SDLoc(N), MVT::i32), 1826 PredReg, 1827 CurDAG->getRegister(0, MVT::i32), // tp_reg 1828 Chain}; 1829 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, 1830 N->getValueType(0), MVT::Other, Ops); 1831 transferMemOperands(N, New); 1832 ReplaceUses(SDValue(N, 0), SDValue(New, 1)); 1833 ReplaceUses(SDValue(N, 1), SDValue(New, 0)); 1834 ReplaceUses(SDValue(N, 2), SDValue(New, 2)); 1835 CurDAG->RemoveDeadNode(N); 1836 return true; 1837 } 1838 1839 /// Form a GPRPair pseudo register from a pair of GPR regs. 1840 SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) { 1841 SDLoc dl(V0.getNode()); 1842 SDValue RegClass = 1843 CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32); 1844 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32); 1845 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32); 1846 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1847 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1848 } 1849 1850 /// Form a D register from a pair of S registers. 1851 SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1852 SDLoc dl(V0.getNode()); 1853 SDValue RegClass = 1854 CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32); 1855 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32); 1856 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32); 1857 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1858 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1859 } 1860 1861 /// Form a quad register from a pair of D registers. 1862 SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1863 SDLoc dl(V0.getNode()); 1864 SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl, 1865 MVT::i32); 1866 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32); 1867 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32); 1868 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1869 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1870 } 1871 1872 /// Form 4 consecutive D registers from a pair of Q registers. 1873 SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1874 SDLoc dl(V0.getNode()); 1875 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl, 1876 MVT::i32); 1877 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32); 1878 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32); 1879 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1880 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1881 } 1882 1883 /// Form 4 consecutive S registers. 1884 SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, 1885 SDValue V2, SDValue V3) { 1886 SDLoc dl(V0.getNode()); 1887 SDValue RegClass = 1888 CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32); 1889 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32); 1890 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32); 1891 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32); 1892 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32); 1893 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1894 V2, SubReg2, V3, SubReg3 }; 1895 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1896 } 1897 1898 /// Form 4 consecutive D registers. 1899 SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, 1900 SDValue V2, SDValue V3) { 1901 SDLoc dl(V0.getNode()); 1902 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl, 1903 MVT::i32); 1904 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32); 1905 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32); 1906 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32); 1907 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32); 1908 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1909 V2, SubReg2, V3, SubReg3 }; 1910 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1911 } 1912 1913 /// Form 4 consecutive Q registers. 1914 SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, 1915 SDValue V2, SDValue V3) { 1916 SDLoc dl(V0.getNode()); 1917 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl, 1918 MVT::i32); 1919 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32); 1920 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32); 1921 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32); 1922 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32); 1923 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1924 V2, SubReg2, V3, SubReg3 }; 1925 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1926 } 1927 1928 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand 1929 /// of a NEON VLD or VST instruction. The supported values depend on the 1930 /// number of registers being loaded. 1931 SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl, 1932 unsigned NumVecs, bool is64BitVector) { 1933 unsigned NumRegs = NumVecs; 1934 if (!is64BitVector && NumVecs < 3) 1935 NumRegs *= 2; 1936 1937 unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue(); 1938 if (Alignment >= 32 && NumRegs == 4) 1939 Alignment = 32; 1940 else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4)) 1941 Alignment = 16; 1942 else if (Alignment >= 8) 1943 Alignment = 8; 1944 else 1945 Alignment = 0; 1946 1947 return CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 1948 } 1949 1950 static bool isVLDfixed(unsigned Opc) 1951 { 1952 switch (Opc) { 1953 default: return false; 1954 case ARM::VLD1d8wb_fixed : return true; 1955 case ARM::VLD1d16wb_fixed : return true; 1956 case ARM::VLD1d64Qwb_fixed : return true; 1957 case ARM::VLD1d32wb_fixed : return true; 1958 case ARM::VLD1d64wb_fixed : return true; 1959 case ARM::VLD1d8TPseudoWB_fixed : return true; 1960 case ARM::VLD1d16TPseudoWB_fixed : return true; 1961 case ARM::VLD1d32TPseudoWB_fixed : return true; 1962 case ARM::VLD1d64TPseudoWB_fixed : return true; 1963 case ARM::VLD1d8QPseudoWB_fixed : return true; 1964 case ARM::VLD1d16QPseudoWB_fixed : return true; 1965 case ARM::VLD1d32QPseudoWB_fixed : return true; 1966 case ARM::VLD1d64QPseudoWB_fixed : return true; 1967 case ARM::VLD1q8wb_fixed : return true; 1968 case ARM::VLD1q16wb_fixed : return true; 1969 case ARM::VLD1q32wb_fixed : return true; 1970 case ARM::VLD1q64wb_fixed : return true; 1971 case ARM::VLD1DUPd8wb_fixed : return true; 1972 case ARM::VLD1DUPd16wb_fixed : return true; 1973 case ARM::VLD1DUPd32wb_fixed : return true; 1974 case ARM::VLD1DUPq8wb_fixed : return true; 1975 case ARM::VLD1DUPq16wb_fixed : return true; 1976 case ARM::VLD1DUPq32wb_fixed : return true; 1977 case ARM::VLD2d8wb_fixed : return true; 1978 case ARM::VLD2d16wb_fixed : return true; 1979 case ARM::VLD2d32wb_fixed : return true; 1980 case ARM::VLD2q8PseudoWB_fixed : return true; 1981 case ARM::VLD2q16PseudoWB_fixed : return true; 1982 case ARM::VLD2q32PseudoWB_fixed : return true; 1983 case ARM::VLD2DUPd8wb_fixed : return true; 1984 case ARM::VLD2DUPd16wb_fixed : return true; 1985 case ARM::VLD2DUPd32wb_fixed : return true; 1986 case ARM::VLD2DUPq8OddPseudoWB_fixed: return true; 1987 case ARM::VLD2DUPq16OddPseudoWB_fixed: return true; 1988 case ARM::VLD2DUPq32OddPseudoWB_fixed: return true; 1989 } 1990 } 1991 1992 static bool isVSTfixed(unsigned Opc) 1993 { 1994 switch (Opc) { 1995 default: return false; 1996 case ARM::VST1d8wb_fixed : return true; 1997 case ARM::VST1d16wb_fixed : return true; 1998 case ARM::VST1d32wb_fixed : return true; 1999 case ARM::VST1d64wb_fixed : return true; 2000 case ARM::VST1q8wb_fixed : return true; 2001 case ARM::VST1q16wb_fixed : return true; 2002 case ARM::VST1q32wb_fixed : return true; 2003 case ARM::VST1q64wb_fixed : return true; 2004 case ARM::VST1d8TPseudoWB_fixed : return true; 2005 case ARM::VST1d16TPseudoWB_fixed : return true; 2006 case ARM::VST1d32TPseudoWB_fixed : return true; 2007 case ARM::VST1d64TPseudoWB_fixed : return true; 2008 case ARM::VST1d8QPseudoWB_fixed : return true; 2009 case ARM::VST1d16QPseudoWB_fixed : return true; 2010 case ARM::VST1d32QPseudoWB_fixed : return true; 2011 case ARM::VST1d64QPseudoWB_fixed : return true; 2012 case ARM::VST2d8wb_fixed : return true; 2013 case ARM::VST2d16wb_fixed : return true; 2014 case ARM::VST2d32wb_fixed : return true; 2015 case ARM::VST2q8PseudoWB_fixed : return true; 2016 case ARM::VST2q16PseudoWB_fixed : return true; 2017 case ARM::VST2q32PseudoWB_fixed : return true; 2018 } 2019 } 2020 2021 // Get the register stride update opcode of a VLD/VST instruction that 2022 // is otherwise equivalent to the given fixed stride updating instruction. 2023 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) { 2024 assert((isVLDfixed(Opc) || isVSTfixed(Opc)) 2025 && "Incorrect fixed stride updating instruction."); 2026 switch (Opc) { 2027 default: break; 2028 case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register; 2029 case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register; 2030 case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register; 2031 case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register; 2032 case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register; 2033 case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register; 2034 case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register; 2035 case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register; 2036 case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register; 2037 case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register; 2038 case ARM::VLD1d8TPseudoWB_fixed: return ARM::VLD1d8TPseudoWB_register; 2039 case ARM::VLD1d16TPseudoWB_fixed: return ARM::VLD1d16TPseudoWB_register; 2040 case ARM::VLD1d32TPseudoWB_fixed: return ARM::VLD1d32TPseudoWB_register; 2041 case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register; 2042 case ARM::VLD1d8QPseudoWB_fixed: return ARM::VLD1d8QPseudoWB_register; 2043 case ARM::VLD1d16QPseudoWB_fixed: return ARM::VLD1d16QPseudoWB_register; 2044 case ARM::VLD1d32QPseudoWB_fixed: return ARM::VLD1d32QPseudoWB_register; 2045 case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register; 2046 case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register; 2047 case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register; 2048 case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register; 2049 case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register; 2050 case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register; 2051 case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register; 2052 case ARM::VLD2DUPq8OddPseudoWB_fixed: return ARM::VLD2DUPq8OddPseudoWB_register; 2053 case ARM::VLD2DUPq16OddPseudoWB_fixed: return ARM::VLD2DUPq16OddPseudoWB_register; 2054 case ARM::VLD2DUPq32OddPseudoWB_fixed: return ARM::VLD2DUPq32OddPseudoWB_register; 2055 2056 case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register; 2057 case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register; 2058 case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register; 2059 case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register; 2060 case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register; 2061 case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register; 2062 case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register; 2063 case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register; 2064 case ARM::VST1d8TPseudoWB_fixed: return ARM::VST1d8TPseudoWB_register; 2065 case ARM::VST1d16TPseudoWB_fixed: return ARM::VST1d16TPseudoWB_register; 2066 case ARM::VST1d32TPseudoWB_fixed: return ARM::VST1d32TPseudoWB_register; 2067 case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register; 2068 case ARM::VST1d8QPseudoWB_fixed: return ARM::VST1d8QPseudoWB_register; 2069 case ARM::VST1d16QPseudoWB_fixed: return ARM::VST1d16QPseudoWB_register; 2070 case ARM::VST1d32QPseudoWB_fixed: return ARM::VST1d32QPseudoWB_register; 2071 case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register; 2072 2073 case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register; 2074 case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register; 2075 case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register; 2076 case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register; 2077 case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register; 2078 case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register; 2079 2080 case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register; 2081 case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register; 2082 case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register; 2083 case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register; 2084 case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register; 2085 case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register; 2086 2087 case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register; 2088 case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register; 2089 case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register; 2090 } 2091 return Opc; // If not one we handle, return it unchanged. 2092 } 2093 2094 /// Returns true if the given increment is a Constant known to be equal to the 2095 /// access size performed by a NEON load/store. This means the "[rN]!" form can 2096 /// be used. 2097 static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) { 2098 auto C = dyn_cast<ConstantSDNode>(Inc); 2099 return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs; 2100 } 2101 2102 void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, 2103 const uint16_t *DOpcodes, 2104 const uint16_t *QOpcodes0, 2105 const uint16_t *QOpcodes1) { 2106 assert(Subtarget->hasNEON()); 2107 assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range"); 2108 SDLoc dl(N); 2109 2110 SDValue MemAddr, Align; 2111 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating 2112 // nodes are not intrinsics. 2113 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2114 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2115 return; 2116 2117 SDValue Chain = N->getOperand(0); 2118 EVT VT = N->getValueType(0); 2119 bool is64BitVector = VT.is64BitVector(); 2120 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector); 2121 2122 unsigned OpcodeIndex; 2123 switch (VT.getSimpleVT().SimpleTy) { 2124 default: llvm_unreachable("unhandled vld type"); 2125 // Double-register operations: 2126 case MVT::v8i8: OpcodeIndex = 0; break; 2127 case MVT::v4f16: 2128 case MVT::v4bf16: 2129 case MVT::v4i16: OpcodeIndex = 1; break; 2130 case MVT::v2f32: 2131 case MVT::v2i32: OpcodeIndex = 2; break; 2132 case MVT::v1i64: OpcodeIndex = 3; break; 2133 // Quad-register operations: 2134 case MVT::v16i8: OpcodeIndex = 0; break; 2135 case MVT::v8f16: 2136 case MVT::v8bf16: 2137 case MVT::v8i16: OpcodeIndex = 1; break; 2138 case MVT::v4f32: 2139 case MVT::v4i32: OpcodeIndex = 2; break; 2140 case MVT::v2f64: 2141 case MVT::v2i64: OpcodeIndex = 3; break; 2142 } 2143 2144 EVT ResTy; 2145 if (NumVecs == 1) 2146 ResTy = VT; 2147 else { 2148 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 2149 if (!is64BitVector) 2150 ResTyElts *= 2; 2151 ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts); 2152 } 2153 std::vector<EVT> ResTys; 2154 ResTys.push_back(ResTy); 2155 if (isUpdating) 2156 ResTys.push_back(MVT::i32); 2157 ResTys.push_back(MVT::Other); 2158 2159 SDValue Pred = getAL(CurDAG, dl); 2160 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2161 SDNode *VLd; 2162 SmallVector<SDValue, 7> Ops; 2163 2164 // Double registers and VLD1/VLD2 quad registers are directly supported. 2165 if (is64BitVector || NumVecs <= 2) { 2166 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 2167 QOpcodes0[OpcodeIndex]); 2168 Ops.push_back(MemAddr); 2169 Ops.push_back(Align); 2170 if (isUpdating) { 2171 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2172 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs); 2173 if (!IsImmUpdate) { 2174 // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so 2175 // check for the opcode rather than the number of vector elements. 2176 if (isVLDfixed(Opc)) 2177 Opc = getVLDSTRegisterUpdateOpcode(Opc); 2178 Ops.push_back(Inc); 2179 // VLD1/VLD2 fixed increment does not need Reg0 so only include it in 2180 // the operands if not such an opcode. 2181 } else if (!isVLDfixed(Opc)) 2182 Ops.push_back(Reg0); 2183 } 2184 Ops.push_back(Pred); 2185 Ops.push_back(Reg0); 2186 Ops.push_back(Chain); 2187 VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2188 2189 } else { 2190 // Otherwise, quad registers are loaded with two separate instructions, 2191 // where one loads the even registers and the other loads the odd registers. 2192 EVT AddrTy = MemAddr.getValueType(); 2193 2194 // Load the even subregs. This is always an updating load, so that it 2195 // provides the address to the second load for the odd subregs. 2196 SDValue ImplDef = 2197 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0); 2198 const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain }; 2199 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, 2200 ResTy, AddrTy, MVT::Other, OpsA); 2201 Chain = SDValue(VLdA, 2); 2202 2203 // Load the odd subregs. 2204 Ops.push_back(SDValue(VLdA, 1)); 2205 Ops.push_back(Align); 2206 if (isUpdating) { 2207 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2208 assert(isa<ConstantSDNode>(Inc.getNode()) && 2209 "only constant post-increment update allowed for VLD3/4"); 2210 (void)Inc; 2211 Ops.push_back(Reg0); 2212 } 2213 Ops.push_back(SDValue(VLdA, 0)); 2214 Ops.push_back(Pred); 2215 Ops.push_back(Reg0); 2216 Ops.push_back(Chain); 2217 VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops); 2218 } 2219 2220 // Transfer memoperands. 2221 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2222 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLd), {MemOp}); 2223 2224 if (NumVecs == 1) { 2225 ReplaceNode(N, VLd); 2226 return; 2227 } 2228 2229 // Extract out the subregisters. 2230 SDValue SuperReg = SDValue(VLd, 0); 2231 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 && 2232 ARM::qsub_3 == ARM::qsub_0 + 3, 2233 "Unexpected subreg numbering"); 2234 unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0); 2235 for (unsigned Vec = 0; Vec < NumVecs; ++Vec) 2236 ReplaceUses(SDValue(N, Vec), 2237 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg)); 2238 ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1)); 2239 if (isUpdating) 2240 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2)); 2241 CurDAG->RemoveDeadNode(N); 2242 } 2243 2244 void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, 2245 const uint16_t *DOpcodes, 2246 const uint16_t *QOpcodes0, 2247 const uint16_t *QOpcodes1) { 2248 assert(Subtarget->hasNEON()); 2249 assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range"); 2250 SDLoc dl(N); 2251 2252 SDValue MemAddr, Align; 2253 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating 2254 // nodes are not intrinsics. 2255 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2256 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1) 2257 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2258 return; 2259 2260 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2261 2262 SDValue Chain = N->getOperand(0); 2263 EVT VT = N->getOperand(Vec0Idx).getValueType(); 2264 bool is64BitVector = VT.is64BitVector(); 2265 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector); 2266 2267 unsigned OpcodeIndex; 2268 switch (VT.getSimpleVT().SimpleTy) { 2269 default: llvm_unreachable("unhandled vst type"); 2270 // Double-register operations: 2271 case MVT::v8i8: OpcodeIndex = 0; break; 2272 case MVT::v4f16: 2273 case MVT::v4bf16: 2274 case MVT::v4i16: OpcodeIndex = 1; break; 2275 case MVT::v2f32: 2276 case MVT::v2i32: OpcodeIndex = 2; break; 2277 case MVT::v1i64: OpcodeIndex = 3; break; 2278 // Quad-register operations: 2279 case MVT::v16i8: OpcodeIndex = 0; break; 2280 case MVT::v8f16: 2281 case MVT::v8bf16: 2282 case MVT::v8i16: OpcodeIndex = 1; break; 2283 case MVT::v4f32: 2284 case MVT::v4i32: OpcodeIndex = 2; break; 2285 case MVT::v2f64: 2286 case MVT::v2i64: OpcodeIndex = 3; break; 2287 } 2288 2289 std::vector<EVT> ResTys; 2290 if (isUpdating) 2291 ResTys.push_back(MVT::i32); 2292 ResTys.push_back(MVT::Other); 2293 2294 SDValue Pred = getAL(CurDAG, dl); 2295 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2296 SmallVector<SDValue, 7> Ops; 2297 2298 // Double registers and VST1/VST2 quad registers are directly supported. 2299 if (is64BitVector || NumVecs <= 2) { 2300 SDValue SrcReg; 2301 if (NumVecs == 1) { 2302 SrcReg = N->getOperand(Vec0Idx); 2303 } else if (is64BitVector) { 2304 // Form a REG_SEQUENCE to force register allocation. 2305 SDValue V0 = N->getOperand(Vec0Idx + 0); 2306 SDValue V1 = N->getOperand(Vec0Idx + 1); 2307 if (NumVecs == 2) 2308 SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0); 2309 else { 2310 SDValue V2 = N->getOperand(Vec0Idx + 2); 2311 // If it's a vst3, form a quad D-register and leave the last part as 2312 // an undef. 2313 SDValue V3 = (NumVecs == 3) 2314 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) 2315 : N->getOperand(Vec0Idx + 3); 2316 SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0); 2317 } 2318 } else { 2319 // Form a QQ register. 2320 SDValue Q0 = N->getOperand(Vec0Idx); 2321 SDValue Q1 = N->getOperand(Vec0Idx + 1); 2322 SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0); 2323 } 2324 2325 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 2326 QOpcodes0[OpcodeIndex]); 2327 Ops.push_back(MemAddr); 2328 Ops.push_back(Align); 2329 if (isUpdating) { 2330 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2331 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs); 2332 if (!IsImmUpdate) { 2333 // We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so 2334 // check for the opcode rather than the number of vector elements. 2335 if (isVSTfixed(Opc)) 2336 Opc = getVLDSTRegisterUpdateOpcode(Opc); 2337 Ops.push_back(Inc); 2338 } 2339 // VST1/VST2 fixed increment does not need Reg0 so only include it in 2340 // the operands if not such an opcode. 2341 else if (!isVSTfixed(Opc)) 2342 Ops.push_back(Reg0); 2343 } 2344 Ops.push_back(SrcReg); 2345 Ops.push_back(Pred); 2346 Ops.push_back(Reg0); 2347 Ops.push_back(Chain); 2348 SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2349 2350 // Transfer memoperands. 2351 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VSt), {MemOp}); 2352 2353 ReplaceNode(N, VSt); 2354 return; 2355 } 2356 2357 // Otherwise, quad registers are stored with two separate instructions, 2358 // where one stores the even registers and the other stores the odd registers. 2359 2360 // Form the QQQQ REG_SEQUENCE. 2361 SDValue V0 = N->getOperand(Vec0Idx + 0); 2362 SDValue V1 = N->getOperand(Vec0Idx + 1); 2363 SDValue V2 = N->getOperand(Vec0Idx + 2); 2364 SDValue V3 = (NumVecs == 3) 2365 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) 2366 : N->getOperand(Vec0Idx + 3); 2367 SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0); 2368 2369 // Store the even D registers. This is always an updating store, so that it 2370 // provides the address to the second store for the odd subregs. 2371 const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain }; 2372 SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, 2373 MemAddr.getValueType(), 2374 MVT::Other, OpsA); 2375 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStA), {MemOp}); 2376 Chain = SDValue(VStA, 1); 2377 2378 // Store the odd D registers. 2379 Ops.push_back(SDValue(VStA, 0)); 2380 Ops.push_back(Align); 2381 if (isUpdating) { 2382 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2383 assert(isa<ConstantSDNode>(Inc.getNode()) && 2384 "only constant post-increment update allowed for VST3/4"); 2385 (void)Inc; 2386 Ops.push_back(Reg0); 2387 } 2388 Ops.push_back(RegSeq); 2389 Ops.push_back(Pred); 2390 Ops.push_back(Reg0); 2391 Ops.push_back(Chain); 2392 SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, 2393 Ops); 2394 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStB), {MemOp}); 2395 ReplaceNode(N, VStB); 2396 } 2397 2398 void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating, 2399 unsigned NumVecs, 2400 const uint16_t *DOpcodes, 2401 const uint16_t *QOpcodes) { 2402 assert(Subtarget->hasNEON()); 2403 assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range"); 2404 SDLoc dl(N); 2405 2406 SDValue MemAddr, Align; 2407 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating 2408 // nodes are not intrinsics. 2409 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2410 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1) 2411 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2412 return; 2413 2414 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2415 2416 SDValue Chain = N->getOperand(0); 2417 unsigned Lane = 2418 cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue(); 2419 EVT VT = N->getOperand(Vec0Idx).getValueType(); 2420 bool is64BitVector = VT.is64BitVector(); 2421 2422 unsigned Alignment = 0; 2423 if (NumVecs != 3) { 2424 Alignment = cast<ConstantSDNode>(Align)->getZExtValue(); 2425 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8; 2426 if (Alignment > NumBytes) 2427 Alignment = NumBytes; 2428 if (Alignment < 8 && Alignment < NumBytes) 2429 Alignment = 0; 2430 // Alignment must be a power of two; make sure of that. 2431 Alignment = (Alignment & -Alignment); 2432 if (Alignment == 1) 2433 Alignment = 0; 2434 } 2435 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 2436 2437 unsigned OpcodeIndex; 2438 switch (VT.getSimpleVT().SimpleTy) { 2439 default: llvm_unreachable("unhandled vld/vst lane type"); 2440 // Double-register operations: 2441 case MVT::v8i8: OpcodeIndex = 0; break; 2442 case MVT::v4f16: 2443 case MVT::v4bf16: 2444 case MVT::v4i16: OpcodeIndex = 1; break; 2445 case MVT::v2f32: 2446 case MVT::v2i32: OpcodeIndex = 2; break; 2447 // Quad-register operations: 2448 case MVT::v8f16: 2449 case MVT::v8bf16: 2450 case MVT::v8i16: OpcodeIndex = 0; break; 2451 case MVT::v4f32: 2452 case MVT::v4i32: OpcodeIndex = 1; break; 2453 } 2454 2455 std::vector<EVT> ResTys; 2456 if (IsLoad) { 2457 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 2458 if (!is64BitVector) 2459 ResTyElts *= 2; 2460 ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), 2461 MVT::i64, ResTyElts)); 2462 } 2463 if (isUpdating) 2464 ResTys.push_back(MVT::i32); 2465 ResTys.push_back(MVT::Other); 2466 2467 SDValue Pred = getAL(CurDAG, dl); 2468 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2469 2470 SmallVector<SDValue, 8> Ops; 2471 Ops.push_back(MemAddr); 2472 Ops.push_back(Align); 2473 if (isUpdating) { 2474 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2475 bool IsImmUpdate = 2476 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs); 2477 Ops.push_back(IsImmUpdate ? Reg0 : Inc); 2478 } 2479 2480 SDValue SuperReg; 2481 SDValue V0 = N->getOperand(Vec0Idx + 0); 2482 SDValue V1 = N->getOperand(Vec0Idx + 1); 2483 if (NumVecs == 2) { 2484 if (is64BitVector) 2485 SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0); 2486 else 2487 SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0); 2488 } else { 2489 SDValue V2 = N->getOperand(Vec0Idx + 2); 2490 SDValue V3 = (NumVecs == 3) 2491 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) 2492 : N->getOperand(Vec0Idx + 3); 2493 if (is64BitVector) 2494 SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0); 2495 else 2496 SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0); 2497 } 2498 Ops.push_back(SuperReg); 2499 Ops.push_back(getI32Imm(Lane, dl)); 2500 Ops.push_back(Pred); 2501 Ops.push_back(Reg0); 2502 Ops.push_back(Chain); 2503 2504 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 2505 QOpcodes[OpcodeIndex]); 2506 SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2507 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdLn), {MemOp}); 2508 if (!IsLoad) { 2509 ReplaceNode(N, VLdLn); 2510 return; 2511 } 2512 2513 // Extract the subregisters. 2514 SuperReg = SDValue(VLdLn, 0); 2515 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 && 2516 ARM::qsub_3 == ARM::qsub_0 + 3, 2517 "Unexpected subreg numbering"); 2518 unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0; 2519 for (unsigned Vec = 0; Vec < NumVecs; ++Vec) 2520 ReplaceUses(SDValue(N, Vec), 2521 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg)); 2522 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1)); 2523 if (isUpdating) 2524 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2)); 2525 CurDAG->RemoveDeadNode(N); 2526 } 2527 2528 template <typename SDValueVector> 2529 void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 2530 SDValue PredicateMask) { 2531 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32)); 2532 Ops.push_back(PredicateMask); 2533 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg 2534 } 2535 2536 template <typename SDValueVector> 2537 void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 2538 SDValue PredicateMask, 2539 SDValue Inactive) { 2540 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32)); 2541 Ops.push_back(PredicateMask); 2542 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg 2543 Ops.push_back(Inactive); 2544 } 2545 2546 template <typename SDValueVector> 2547 void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc) { 2548 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32)); 2549 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 2550 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg 2551 } 2552 2553 template <typename SDValueVector> 2554 void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 2555 EVT InactiveTy) { 2556 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32)); 2557 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 2558 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg 2559 Ops.push_back(SDValue( 2560 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, InactiveTy), 0)); 2561 } 2562 2563 void ARMDAGToDAGISel::SelectMVE_WB(SDNode *N, const uint16_t *Opcodes, 2564 bool Predicated) { 2565 SDLoc Loc(N); 2566 SmallVector<SDValue, 8> Ops; 2567 2568 uint16_t Opcode; 2569 switch (N->getValueType(1).getVectorElementType().getSizeInBits()) { 2570 case 32: 2571 Opcode = Opcodes[0]; 2572 break; 2573 case 64: 2574 Opcode = Opcodes[1]; 2575 break; 2576 default: 2577 llvm_unreachable("bad vector element size in SelectMVE_WB"); 2578 } 2579 2580 Ops.push_back(N->getOperand(2)); // vector of base addresses 2581 2582 int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(); 2583 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate offset 2584 2585 if (Predicated) 2586 AddMVEPredicateToOps(Ops, Loc, N->getOperand(4)); 2587 else 2588 AddEmptyMVEPredicateToOps(Ops, Loc); 2589 2590 Ops.push_back(N->getOperand(0)); // chain 2591 2592 SmallVector<EVT, 8> VTs; 2593 VTs.push_back(N->getValueType(1)); 2594 VTs.push_back(N->getValueType(0)); 2595 VTs.push_back(N->getValueType(2)); 2596 2597 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), VTs, Ops); 2598 ReplaceUses(SDValue(N, 0), SDValue(New, 1)); 2599 ReplaceUses(SDValue(N, 1), SDValue(New, 0)); 2600 ReplaceUses(SDValue(N, 2), SDValue(New, 2)); 2601 transferMemOperands(N, New); 2602 CurDAG->RemoveDeadNode(N); 2603 } 2604 2605 void ARMDAGToDAGISel::SelectMVE_LongShift(SDNode *N, uint16_t Opcode, 2606 bool Immediate, 2607 bool HasSaturationOperand) { 2608 SDLoc Loc(N); 2609 SmallVector<SDValue, 8> Ops; 2610 2611 // Two 32-bit halves of the value to be shifted 2612 Ops.push_back(N->getOperand(1)); 2613 Ops.push_back(N->getOperand(2)); 2614 2615 // The shift count 2616 if (Immediate) { 2617 int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(); 2618 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count 2619 } else { 2620 Ops.push_back(N->getOperand(3)); 2621 } 2622 2623 // The immediate saturation operand, if any 2624 if (HasSaturationOperand) { 2625 int32_t SatOp = cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(); 2626 int SatBit = (SatOp == 64 ? 0 : 1); 2627 Ops.push_back(getI32Imm(SatBit, Loc)); 2628 } 2629 2630 // MVE scalar shifts are IT-predicable, so include the standard 2631 // predicate arguments. 2632 Ops.push_back(getAL(CurDAG, Loc)); 2633 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 2634 2635 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops)); 2636 } 2637 2638 void ARMDAGToDAGISel::SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry, 2639 uint16_t OpcodeWithNoCarry, 2640 bool Add, bool Predicated) { 2641 SDLoc Loc(N); 2642 SmallVector<SDValue, 8> Ops; 2643 uint16_t Opcode; 2644 2645 unsigned FirstInputOp = Predicated ? 2 : 1; 2646 2647 // Two input vectors and the input carry flag 2648 Ops.push_back(N->getOperand(FirstInputOp)); 2649 Ops.push_back(N->getOperand(FirstInputOp + 1)); 2650 SDValue CarryIn = N->getOperand(FirstInputOp + 2); 2651 ConstantSDNode *CarryInConstant = dyn_cast<ConstantSDNode>(CarryIn); 2652 uint32_t CarryMask = 1 << 29; 2653 uint32_t CarryExpected = Add ? 0 : CarryMask; 2654 if (CarryInConstant && 2655 (CarryInConstant->getZExtValue() & CarryMask) == CarryExpected) { 2656 Opcode = OpcodeWithNoCarry; 2657 } else { 2658 Ops.push_back(CarryIn); 2659 Opcode = OpcodeWithCarry; 2660 } 2661 2662 if (Predicated) 2663 AddMVEPredicateToOps(Ops, Loc, 2664 N->getOperand(FirstInputOp + 3), // predicate 2665 N->getOperand(FirstInputOp - 1)); // inactive 2666 else 2667 AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0)); 2668 2669 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops)); 2670 } 2671 2672 void ARMDAGToDAGISel::SelectMVE_VSHLC(SDNode *N, bool Predicated) { 2673 SDLoc Loc(N); 2674 SmallVector<SDValue, 8> Ops; 2675 2676 // One vector input, followed by a 32-bit word of bits to shift in 2677 // and then an immediate shift count 2678 Ops.push_back(N->getOperand(1)); 2679 Ops.push_back(N->getOperand(2)); 2680 int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(); 2681 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count 2682 2683 if (Predicated) 2684 AddMVEPredicateToOps(Ops, Loc, N->getOperand(4)); 2685 else 2686 AddEmptyMVEPredicateToOps(Ops, Loc); 2687 2688 CurDAG->SelectNodeTo(N, ARM::MVE_VSHLC, N->getVTList(), makeArrayRef(Ops)); 2689 } 2690 2691 static bool SDValueToConstBool(SDValue SDVal) { 2692 assert(isa<ConstantSDNode>(SDVal) && "expected a compile-time constant"); 2693 ConstantSDNode *SDValConstant = dyn_cast<ConstantSDNode>(SDVal); 2694 uint64_t Value = SDValConstant->getZExtValue(); 2695 assert((Value == 0 || Value == 1) && "expected value 0 or 1"); 2696 return Value; 2697 } 2698 2699 void ARMDAGToDAGISel::SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated, 2700 const uint16_t *OpcodesS, 2701 const uint16_t *OpcodesU, 2702 size_t Stride, size_t TySize) { 2703 assert(TySize < Stride && "Invalid TySize"); 2704 bool IsUnsigned = SDValueToConstBool(N->getOperand(1)); 2705 bool IsSub = SDValueToConstBool(N->getOperand(2)); 2706 bool IsExchange = SDValueToConstBool(N->getOperand(3)); 2707 if (IsUnsigned) { 2708 assert(!IsSub && 2709 "Unsigned versions of vmlsldav[a]/vrmlsldavh[a] do not exist"); 2710 assert(!IsExchange && 2711 "Unsigned versions of vmlaldav[a]x/vrmlaldavh[a]x do not exist"); 2712 } 2713 2714 auto OpIsZero = [N](size_t OpNo) { 2715 if (ConstantSDNode *OpConst = dyn_cast<ConstantSDNode>(N->getOperand(OpNo))) 2716 if (OpConst->getZExtValue() == 0) 2717 return true; 2718 return false; 2719 }; 2720 2721 // If the input accumulator value is not zero, select an instruction with 2722 // accumulator, otherwise select an instruction without accumulator 2723 bool IsAccum = !(OpIsZero(4) && OpIsZero(5)); 2724 2725 const uint16_t *Opcodes = IsUnsigned ? OpcodesU : OpcodesS; 2726 if (IsSub) 2727 Opcodes += 4 * Stride; 2728 if (IsExchange) 2729 Opcodes += 2 * Stride; 2730 if (IsAccum) 2731 Opcodes += Stride; 2732 uint16_t Opcode = Opcodes[TySize]; 2733 2734 SDLoc Loc(N); 2735 SmallVector<SDValue, 8> Ops; 2736 // Push the accumulator operands, if they are used 2737 if (IsAccum) { 2738 Ops.push_back(N->getOperand(4)); 2739 Ops.push_back(N->getOperand(5)); 2740 } 2741 // Push the two vector operands 2742 Ops.push_back(N->getOperand(6)); 2743 Ops.push_back(N->getOperand(7)); 2744 2745 if (Predicated) 2746 AddMVEPredicateToOps(Ops, Loc, N->getOperand(8)); 2747 else 2748 AddEmptyMVEPredicateToOps(Ops, Loc); 2749 2750 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops)); 2751 } 2752 2753 void ARMDAGToDAGISel::SelectMVE_VMLLDAV(SDNode *N, bool Predicated, 2754 const uint16_t *OpcodesS, 2755 const uint16_t *OpcodesU) { 2756 EVT VecTy = N->getOperand(6).getValueType(); 2757 size_t SizeIndex; 2758 switch (VecTy.getVectorElementType().getSizeInBits()) { 2759 case 16: 2760 SizeIndex = 0; 2761 break; 2762 case 32: 2763 SizeIndex = 1; 2764 break; 2765 default: 2766 llvm_unreachable("bad vector element size"); 2767 } 2768 2769 SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 2, SizeIndex); 2770 } 2771 2772 void ARMDAGToDAGISel::SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated, 2773 const uint16_t *OpcodesS, 2774 const uint16_t *OpcodesU) { 2775 assert( 2776 N->getOperand(6).getValueType().getVectorElementType().getSizeInBits() == 2777 32 && 2778 "bad vector element size"); 2779 SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 1, 0); 2780 } 2781 2782 void ARMDAGToDAGISel::SelectMVE_VLD(SDNode *N, unsigned NumVecs, 2783 const uint16_t *const *Opcodes, 2784 bool HasWriteback) { 2785 EVT VT = N->getValueType(0); 2786 SDLoc Loc(N); 2787 2788 const uint16_t *OurOpcodes; 2789 switch (VT.getVectorElementType().getSizeInBits()) { 2790 case 8: 2791 OurOpcodes = Opcodes[0]; 2792 break; 2793 case 16: 2794 OurOpcodes = Opcodes[1]; 2795 break; 2796 case 32: 2797 OurOpcodes = Opcodes[2]; 2798 break; 2799 default: 2800 llvm_unreachable("bad vector element size in SelectMVE_VLD"); 2801 } 2802 2803 EVT DataTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, NumVecs * 2); 2804 SmallVector<EVT, 4> ResultTys = {DataTy, MVT::Other}; 2805 unsigned PtrOperand = HasWriteback ? 1 : 2; 2806 2807 auto Data = SDValue( 2808 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, DataTy), 0); 2809 SDValue Chain = N->getOperand(0); 2810 // Add a MVE_VLDn instruction for each Vec, except the last 2811 for (unsigned Stage = 0; Stage < NumVecs - 1; ++Stage) { 2812 SDValue Ops[] = {Data, N->getOperand(PtrOperand), Chain}; 2813 auto LoadInst = 2814 CurDAG->getMachineNode(OurOpcodes[Stage], Loc, ResultTys, Ops); 2815 Data = SDValue(LoadInst, 0); 2816 Chain = SDValue(LoadInst, 1); 2817 transferMemOperands(N, LoadInst); 2818 } 2819 // The last may need a writeback on it 2820 if (HasWriteback) 2821 ResultTys = {DataTy, MVT::i32, MVT::Other}; 2822 SDValue Ops[] = {Data, N->getOperand(PtrOperand), Chain}; 2823 auto LoadInst = 2824 CurDAG->getMachineNode(OurOpcodes[NumVecs - 1], Loc, ResultTys, Ops); 2825 transferMemOperands(N, LoadInst); 2826 2827 unsigned i; 2828 for (i = 0; i < NumVecs; i++) 2829 ReplaceUses(SDValue(N, i), 2830 CurDAG->getTargetExtractSubreg(ARM::qsub_0 + i, Loc, VT, 2831 SDValue(LoadInst, 0))); 2832 if (HasWriteback) 2833 ReplaceUses(SDValue(N, i++), SDValue(LoadInst, 1)); 2834 ReplaceUses(SDValue(N, i), SDValue(LoadInst, HasWriteback ? 2 : 1)); 2835 CurDAG->RemoveDeadNode(N); 2836 } 2837 2838 void ARMDAGToDAGISel::SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes, 2839 bool Wrapping, bool Predicated) { 2840 EVT VT = N->getValueType(0); 2841 SDLoc Loc(N); 2842 2843 uint16_t Opcode; 2844 switch (VT.getScalarSizeInBits()) { 2845 case 8: 2846 Opcode = Opcodes[0]; 2847 break; 2848 case 16: 2849 Opcode = Opcodes[1]; 2850 break; 2851 case 32: 2852 Opcode = Opcodes[2]; 2853 break; 2854 default: 2855 llvm_unreachable("bad vector element size in SelectMVE_VxDUP"); 2856 } 2857 2858 SmallVector<SDValue, 8> Ops; 2859 unsigned OpIdx = 1; 2860 2861 SDValue Inactive; 2862 if (Predicated) 2863 Inactive = N->getOperand(OpIdx++); 2864 2865 Ops.push_back(N->getOperand(OpIdx++)); // base 2866 if (Wrapping) 2867 Ops.push_back(N->getOperand(OpIdx++)); // limit 2868 2869 SDValue ImmOp = N->getOperand(OpIdx++); // step 2870 int ImmValue = cast<ConstantSDNode>(ImmOp)->getZExtValue(); 2871 Ops.push_back(getI32Imm(ImmValue, Loc)); 2872 2873 if (Predicated) 2874 AddMVEPredicateToOps(Ops, Loc, N->getOperand(OpIdx), Inactive); 2875 else 2876 AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0)); 2877 2878 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops)); 2879 } 2880 2881 void ARMDAGToDAGISel::SelectCDE_CXxD(SDNode *N, uint16_t Opcode, 2882 size_t NumExtraOps, bool HasAccum) { 2883 bool IsBigEndian = CurDAG->getDataLayout().isBigEndian(); 2884 SDLoc Loc(N); 2885 SmallVector<SDValue, 8> Ops; 2886 2887 unsigned OpIdx = 1; 2888 2889 // Convert and append the immediate operand designating the coprocessor. 2890 SDValue ImmCorpoc = N->getOperand(OpIdx++); 2891 uint32_t ImmCoprocVal = cast<ConstantSDNode>(ImmCorpoc)->getZExtValue(); 2892 Ops.push_back(getI32Imm(ImmCoprocVal, Loc)); 2893 2894 // For accumulating variants copy the low and high order parts of the 2895 // accumulator into a register pair and add it to the operand vector. 2896 if (HasAccum) { 2897 SDValue AccLo = N->getOperand(OpIdx++); 2898 SDValue AccHi = N->getOperand(OpIdx++); 2899 if (IsBigEndian) 2900 std::swap(AccLo, AccHi); 2901 Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, AccLo, AccHi), 0)); 2902 } 2903 2904 // Copy extra operands as-is. 2905 for (size_t I = 0; I < NumExtraOps; I++) 2906 Ops.push_back(N->getOperand(OpIdx++)); 2907 2908 // Convert and append the immediate operand 2909 SDValue Imm = N->getOperand(OpIdx); 2910 uint32_t ImmVal = cast<ConstantSDNode>(Imm)->getZExtValue(); 2911 Ops.push_back(getI32Imm(ImmVal, Loc)); 2912 2913 // Accumulating variants are IT-predicable, add predicate operands. 2914 if (HasAccum) { 2915 SDValue Pred = getAL(CurDAG, Loc); 2916 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 2917 Ops.push_back(Pred); 2918 Ops.push_back(PredReg); 2919 } 2920 2921 // Create the CDE intruction 2922 SDNode *InstrNode = CurDAG->getMachineNode(Opcode, Loc, MVT::Untyped, Ops); 2923 SDValue ResultPair = SDValue(InstrNode, 0); 2924 2925 // The original intrinsic had two outputs, and the output of the dual-register 2926 // CDE instruction is a register pair. We need to extract the two subregisters 2927 // and replace all uses of the original outputs with the extracted 2928 // subregisters. 2929 uint16_t SubRegs[2] = {ARM::gsub_0, ARM::gsub_1}; 2930 if (IsBigEndian) 2931 std::swap(SubRegs[0], SubRegs[1]); 2932 2933 for (size_t ResIdx = 0; ResIdx < 2; ResIdx++) { 2934 if (SDValue(N, ResIdx).use_empty()) 2935 continue; 2936 SDValue SubReg = CurDAG->getTargetExtractSubreg(SubRegs[ResIdx], Loc, 2937 MVT::i32, ResultPair); 2938 ReplaceUses(SDValue(N, ResIdx), SubReg); 2939 } 2940 2941 CurDAG->RemoveDeadNode(N); 2942 } 2943 2944 void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic, 2945 bool isUpdating, unsigned NumVecs, 2946 const uint16_t *DOpcodes, 2947 const uint16_t *QOpcodes0, 2948 const uint16_t *QOpcodes1) { 2949 assert(Subtarget->hasNEON()); 2950 assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range"); 2951 SDLoc dl(N); 2952 2953 SDValue MemAddr, Align; 2954 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2955 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2956 return; 2957 2958 SDValue Chain = N->getOperand(0); 2959 EVT VT = N->getValueType(0); 2960 bool is64BitVector = VT.is64BitVector(); 2961 2962 unsigned Alignment = 0; 2963 if (NumVecs != 3) { 2964 Alignment = cast<ConstantSDNode>(Align)->getZExtValue(); 2965 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8; 2966 if (Alignment > NumBytes) 2967 Alignment = NumBytes; 2968 if (Alignment < 8 && Alignment < NumBytes) 2969 Alignment = 0; 2970 // Alignment must be a power of two; make sure of that. 2971 Alignment = (Alignment & -Alignment); 2972 if (Alignment == 1) 2973 Alignment = 0; 2974 } 2975 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 2976 2977 unsigned OpcodeIndex; 2978 switch (VT.getSimpleVT().SimpleTy) { 2979 default: llvm_unreachable("unhandled vld-dup type"); 2980 case MVT::v8i8: 2981 case MVT::v16i8: OpcodeIndex = 0; break; 2982 case MVT::v4i16: 2983 case MVT::v8i16: 2984 case MVT::v4f16: 2985 case MVT::v8f16: 2986 case MVT::v4bf16: 2987 case MVT::v8bf16: 2988 OpcodeIndex = 1; break; 2989 case MVT::v2f32: 2990 case MVT::v2i32: 2991 case MVT::v4f32: 2992 case MVT::v4i32: OpcodeIndex = 2; break; 2993 case MVT::v1f64: 2994 case MVT::v1i64: OpcodeIndex = 3; break; 2995 } 2996 2997 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 2998 if (!is64BitVector) 2999 ResTyElts *= 2; 3000 EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts); 3001 3002 std::vector<EVT> ResTys; 3003 ResTys.push_back(ResTy); 3004 if (isUpdating) 3005 ResTys.push_back(MVT::i32); 3006 ResTys.push_back(MVT::Other); 3007 3008 SDValue Pred = getAL(CurDAG, dl); 3009 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3010 3011 SmallVector<SDValue, 6> Ops; 3012 Ops.push_back(MemAddr); 3013 Ops.push_back(Align); 3014 unsigned Opc = is64BitVector ? DOpcodes[OpcodeIndex] 3015 : (NumVecs == 1) ? QOpcodes0[OpcodeIndex] 3016 : QOpcodes1[OpcodeIndex]; 3017 if (isUpdating) { 3018 SDValue Inc = N->getOperand(2); 3019 bool IsImmUpdate = 3020 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs); 3021 if (IsImmUpdate) { 3022 if (!isVLDfixed(Opc)) 3023 Ops.push_back(Reg0); 3024 } else { 3025 if (isVLDfixed(Opc)) 3026 Opc = getVLDSTRegisterUpdateOpcode(Opc); 3027 Ops.push_back(Inc); 3028 } 3029 } 3030 if (is64BitVector || NumVecs == 1) { 3031 // Double registers and VLD1 quad registers are directly supported. 3032 } else if (NumVecs == 2) { 3033 const SDValue OpsA[] = {MemAddr, Align, Pred, Reg0, Chain}; 3034 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, ResTy, 3035 MVT::Other, OpsA); 3036 Chain = SDValue(VLdA, 1); 3037 } else { 3038 SDValue ImplDef = SDValue( 3039 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0); 3040 const SDValue OpsA[] = {MemAddr, Align, ImplDef, Pred, Reg0, Chain}; 3041 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, ResTy, 3042 MVT::Other, OpsA); 3043 Ops.push_back(SDValue(VLdA, 0)); 3044 Chain = SDValue(VLdA, 1); 3045 } 3046 3047 Ops.push_back(Pred); 3048 Ops.push_back(Reg0); 3049 Ops.push_back(Chain); 3050 3051 SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 3052 3053 // Transfer memoperands. 3054 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 3055 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdDup), {MemOp}); 3056 3057 // Extract the subregisters. 3058 if (NumVecs == 1) { 3059 ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0)); 3060 } else { 3061 SDValue SuperReg = SDValue(VLdDup, 0); 3062 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering"); 3063 unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0; 3064 for (unsigned Vec = 0; Vec != NumVecs; ++Vec) { 3065 ReplaceUses(SDValue(N, Vec), 3066 CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg)); 3067 } 3068 } 3069 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1)); 3070 if (isUpdating) 3071 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2)); 3072 CurDAG->RemoveDeadNode(N); 3073 } 3074 3075 bool ARMDAGToDAGISel::tryInsertVectorElt(SDNode *N) { 3076 if (!Subtarget->hasMVEIntegerOps()) 3077 return false; 3078 3079 SDLoc dl(N); 3080 3081 // We are trying to use VMOV/VMOVX/VINS to more efficiently lower insert and 3082 // extracts of v8f16 and v8i16 vectors. Check that we have two adjacent 3083 // inserts of the correct type: 3084 SDValue Ins1 = SDValue(N, 0); 3085 SDValue Ins2 = N->getOperand(0); 3086 EVT VT = Ins1.getValueType(); 3087 if (Ins2.getOpcode() != ISD::INSERT_VECTOR_ELT || !Ins2.hasOneUse() || 3088 !isa<ConstantSDNode>(Ins1.getOperand(2)) || 3089 !isa<ConstantSDNode>(Ins2.getOperand(2)) || 3090 (VT != MVT::v8f16 && VT != MVT::v8i16) || (Ins2.getValueType() != VT)) 3091 return false; 3092 3093 unsigned Lane1 = Ins1.getConstantOperandVal(2); 3094 unsigned Lane2 = Ins2.getConstantOperandVal(2); 3095 if (Lane2 % 2 != 0 || Lane1 != Lane2 + 1) 3096 return false; 3097 3098 // If the inserted values will be able to use T/B already, leave it to the 3099 // existing tablegen patterns. For example VCVTT/VCVTB. 3100 SDValue Val1 = Ins1.getOperand(1); 3101 SDValue Val2 = Ins2.getOperand(1); 3102 if (Val1.getOpcode() == ISD::FP_ROUND || Val2.getOpcode() == ISD::FP_ROUND) 3103 return false; 3104 3105 // Check if the inserted values are both extracts. 3106 if ((Val1.getOpcode() == ISD::EXTRACT_VECTOR_ELT || 3107 Val1.getOpcode() == ARMISD::VGETLANEu) && 3108 (Val2.getOpcode() == ISD::EXTRACT_VECTOR_ELT || 3109 Val2.getOpcode() == ARMISD::VGETLANEu) && 3110 isa<ConstantSDNode>(Val1.getOperand(1)) && 3111 isa<ConstantSDNode>(Val2.getOperand(1)) && 3112 (Val1.getOperand(0).getValueType() == MVT::v8f16 || 3113 Val1.getOperand(0).getValueType() == MVT::v8i16) && 3114 (Val2.getOperand(0).getValueType() == MVT::v8f16 || 3115 Val2.getOperand(0).getValueType() == MVT::v8i16)) { 3116 unsigned ExtractLane1 = Val1.getConstantOperandVal(1); 3117 unsigned ExtractLane2 = Val2.getConstantOperandVal(1); 3118 3119 // If the two extracted lanes are from the same place and adjacent, this 3120 // simplifies into a f32 lane move. 3121 if (Val1.getOperand(0) == Val2.getOperand(0) && ExtractLane2 % 2 == 0 && 3122 ExtractLane1 == ExtractLane2 + 1) { 3123 SDValue NewExt = CurDAG->getTargetExtractSubreg( 3124 ARM::ssub_0 + ExtractLane2 / 2, dl, MVT::f32, Val1.getOperand(0)); 3125 SDValue NewIns = CurDAG->getTargetInsertSubreg( 3126 ARM::ssub_0 + Lane2 / 2, dl, VT, Ins2.getOperand(0), 3127 NewExt); 3128 ReplaceUses(Ins1, NewIns); 3129 return true; 3130 } 3131 3132 // Else v8i16 pattern of an extract and an insert, with a optional vmovx for 3133 // extracting odd lanes. 3134 if (VT == MVT::v8i16 && Subtarget->hasFullFP16()) { 3135 SDValue Inp1 = CurDAG->getTargetExtractSubreg( 3136 ARM::ssub_0 + ExtractLane1 / 2, dl, MVT::f32, Val1.getOperand(0)); 3137 SDValue Inp2 = CurDAG->getTargetExtractSubreg( 3138 ARM::ssub_0 + ExtractLane2 / 2, dl, MVT::f32, Val2.getOperand(0)); 3139 if (ExtractLane1 % 2 != 0) 3140 Inp1 = SDValue(CurDAG->getMachineNode(ARM::VMOVH, dl, MVT::f32, Inp1), 0); 3141 if (ExtractLane2 % 2 != 0) 3142 Inp2 = SDValue(CurDAG->getMachineNode(ARM::VMOVH, dl, MVT::f32, Inp2), 0); 3143 SDNode *VINS = CurDAG->getMachineNode(ARM::VINSH, dl, MVT::f32, Inp2, Inp1); 3144 SDValue NewIns = 3145 CurDAG->getTargetInsertSubreg(ARM::ssub_0 + Lane2 / 2, dl, MVT::v4f32, 3146 Ins2.getOperand(0), SDValue(VINS, 0)); 3147 ReplaceUses(Ins1, NewIns); 3148 return true; 3149 } 3150 } 3151 3152 // The inserted values are not extracted - if they are f16 then insert them 3153 // directly using a VINS. 3154 if (VT == MVT::v8f16 && Subtarget->hasFullFP16()) { 3155 SDNode *VINS = CurDAG->getMachineNode(ARM::VINSH, dl, MVT::f32, Val2, Val1); 3156 SDValue NewIns = 3157 CurDAG->getTargetInsertSubreg(ARM::ssub_0 + Lane2 / 2, dl, MVT::v4f32, 3158 Ins2.getOperand(0), SDValue(VINS, 0)); 3159 ReplaceUses(Ins1, NewIns); 3160 return true; 3161 } 3162 3163 return false; 3164 } 3165 3166 bool ARMDAGToDAGISel::transformFixedFloatingPointConversion(SDNode *N, 3167 SDNode *FMul, 3168 bool IsUnsigned, 3169 bool FixedToFloat) { 3170 auto Type = N->getValueType(0); 3171 unsigned ScalarBits = Type.getScalarSizeInBits(); 3172 if (ScalarBits > 32) 3173 return false; 3174 3175 SDNodeFlags FMulFlags = FMul->getFlags(); 3176 // The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is 3177 // allowed in 16 bit unsigned floats 3178 if (ScalarBits == 16 && !FMulFlags.hasNoInfs() && IsUnsigned) 3179 return false; 3180 3181 SDValue ImmNode = FMul->getOperand(1); 3182 SDValue VecVal = FMul->getOperand(0); 3183 if (VecVal->getOpcode() == ISD::UINT_TO_FP || 3184 VecVal->getOpcode() == ISD::SINT_TO_FP) 3185 VecVal = VecVal->getOperand(0); 3186 3187 if (VecVal.getValueType().getScalarSizeInBits() != ScalarBits) 3188 return false; 3189 3190 if (ImmNode.getOpcode() == ISD::BITCAST) { 3191 if (ImmNode.getValueType().getScalarSizeInBits() != ScalarBits) 3192 return false; 3193 ImmNode = ImmNode.getOperand(0); 3194 } 3195 3196 if (ImmNode.getValueType().getScalarSizeInBits() != ScalarBits) 3197 return false; 3198 3199 APFloat ImmAPF(0.0f); 3200 switch (ImmNode.getOpcode()) { 3201 case ARMISD::VMOVIMM: 3202 case ARMISD::VDUP: { 3203 if (!isa<ConstantSDNode>(ImmNode.getOperand(0))) 3204 return false; 3205 unsigned Imm = ImmNode.getConstantOperandVal(0); 3206 if (ImmNode.getOpcode() == ARMISD::VMOVIMM) 3207 Imm = ARM_AM::decodeVMOVModImm(Imm, ScalarBits); 3208 ImmAPF = 3209 APFloat(ScalarBits == 32 ? APFloat::IEEEsingle() : APFloat::IEEEhalf(), 3210 APInt(ScalarBits, Imm)); 3211 break; 3212 } 3213 case ARMISD::VMOVFPIMM: { 3214 ImmAPF = APFloat(ARM_AM::getFPImmFloat(ImmNode.getConstantOperandVal(0))); 3215 break; 3216 } 3217 default: 3218 return false; 3219 } 3220 3221 // Where n is the number of fractional bits, multiplying by 2^n will convert 3222 // from float to fixed and multiplying by 2^-n will convert from fixed to 3223 // float. Taking log2 of the factor (after taking the inverse in the case of 3224 // float to fixed) will give n. 3225 APFloat ToConvert = ImmAPF; 3226 if (FixedToFloat) { 3227 if (!ImmAPF.getExactInverse(&ToConvert)) 3228 return false; 3229 } 3230 APSInt Converted(64, false); 3231 bool IsExact; 3232 ToConvert.convertToInteger(Converted, llvm::RoundingMode::NearestTiesToEven, 3233 &IsExact); 3234 if (!IsExact || !Converted.isPowerOf2()) 3235 return false; 3236 3237 unsigned FracBits = Converted.logBase2(); 3238 if (FracBits > ScalarBits) 3239 return false; 3240 3241 SmallVector<SDValue, 3> Ops{ 3242 VecVal, CurDAG->getConstant(FracBits, SDLoc(N), MVT::i32)}; 3243 AddEmptyMVEPredicateToOps(Ops, SDLoc(N), Type); 3244 3245 unsigned int Opcode; 3246 switch (ScalarBits) { 3247 case 16: 3248 if (FixedToFloat) 3249 Opcode = IsUnsigned ? ARM::MVE_VCVTf16u16_fix : ARM::MVE_VCVTf16s16_fix; 3250 else 3251 Opcode = IsUnsigned ? ARM::MVE_VCVTu16f16_fix : ARM::MVE_VCVTs16f16_fix; 3252 break; 3253 case 32: 3254 if (FixedToFloat) 3255 Opcode = IsUnsigned ? ARM::MVE_VCVTf32u32_fix : ARM::MVE_VCVTf32s32_fix; 3256 else 3257 Opcode = IsUnsigned ? ARM::MVE_VCVTu32f32_fix : ARM::MVE_VCVTs32f32_fix; 3258 break; 3259 default: 3260 llvm_unreachable("unexpected number of scalar bits"); 3261 break; 3262 } 3263 3264 ReplaceNode(N, CurDAG->getMachineNode(Opcode, SDLoc(N), Type, Ops)); 3265 return true; 3266 } 3267 3268 bool ARMDAGToDAGISel::tryFP_TO_INT(SDNode *N, SDLoc dl) { 3269 // Transform a floating-point to fixed-point conversion to a VCVT 3270 if (!Subtarget->hasMVEFloatOps()) 3271 return false; 3272 EVT Type = N->getValueType(0); 3273 if (!Type.isVector()) 3274 return false; 3275 unsigned int ScalarBits = Type.getScalarSizeInBits(); 3276 3277 bool IsUnsigned = N->getOpcode() == ISD::FP_TO_UINT || 3278 N->getOpcode() == ISD::FP_TO_UINT_SAT; 3279 SDNode *Node = N->getOperand(0).getNode(); 3280 3281 // floating-point to fixed-point with one fractional bit gets turned into an 3282 // FP_TO_[U|S]INT(FADD (x, x)) rather than an FP_TO_[U|S]INT(FMUL (x, y)) 3283 if (Node->getOpcode() == ISD::FADD) { 3284 if (Node->getOperand(0) != Node->getOperand(1)) 3285 return false; 3286 SDNodeFlags Flags = Node->getFlags(); 3287 // The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is 3288 // allowed in 16 bit unsigned floats 3289 if (ScalarBits == 16 && !Flags.hasNoInfs() && IsUnsigned) 3290 return false; 3291 3292 unsigned Opcode; 3293 switch (ScalarBits) { 3294 case 16: 3295 Opcode = IsUnsigned ? ARM::MVE_VCVTu16f16_fix : ARM::MVE_VCVTs16f16_fix; 3296 break; 3297 case 32: 3298 Opcode = IsUnsigned ? ARM::MVE_VCVTu32f32_fix : ARM::MVE_VCVTs32f32_fix; 3299 break; 3300 } 3301 SmallVector<SDValue, 3> Ops{Node->getOperand(0), 3302 CurDAG->getConstant(1, dl, MVT::i32)}; 3303 AddEmptyMVEPredicateToOps(Ops, dl, Type); 3304 3305 ReplaceNode(N, CurDAG->getMachineNode(Opcode, dl, Type, Ops)); 3306 return true; 3307 } 3308 3309 if (Node->getOpcode() != ISD::FMUL) 3310 return false; 3311 3312 return transformFixedFloatingPointConversion(N, Node, IsUnsigned, false); 3313 } 3314 3315 bool ARMDAGToDAGISel::tryFMULFixed(SDNode *N, SDLoc dl) { 3316 // Transform a fixed-point to floating-point conversion to a VCVT 3317 if (!Subtarget->hasMVEFloatOps()) 3318 return false; 3319 auto Type = N->getValueType(0); 3320 if (!Type.isVector()) 3321 return false; 3322 3323 auto LHS = N->getOperand(0); 3324 if (LHS.getOpcode() != ISD::SINT_TO_FP && LHS.getOpcode() != ISD::UINT_TO_FP) 3325 return false; 3326 3327 return transformFixedFloatingPointConversion( 3328 N, N, LHS.getOpcode() == ISD::UINT_TO_FP, true); 3329 } 3330 3331 bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) { 3332 if (!Subtarget->hasV6T2Ops()) 3333 return false; 3334 3335 unsigned Opc = isSigned 3336 ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX) 3337 : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX); 3338 SDLoc dl(N); 3339 3340 // For unsigned extracts, check for a shift right and mask 3341 unsigned And_imm = 0; 3342 if (N->getOpcode() == ISD::AND) { 3343 if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) { 3344 3345 // The immediate is a mask of the low bits iff imm & (imm+1) == 0 3346 if (And_imm & (And_imm + 1)) 3347 return false; 3348 3349 unsigned Srl_imm = 0; 3350 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, 3351 Srl_imm)) { 3352 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 3353 3354 // Mask off the unnecessary bits of the AND immediate; normally 3355 // DAGCombine will do this, but that might not happen if 3356 // targetShrinkDemandedConstant chooses a different immediate. 3357 And_imm &= -1U >> Srl_imm; 3358 3359 // Note: The width operand is encoded as width-1. 3360 unsigned Width = countTrailingOnes(And_imm) - 1; 3361 unsigned LSB = Srl_imm; 3362 3363 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3364 3365 if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) { 3366 // It's cheaper to use a right shift to extract the top bits. 3367 if (Subtarget->isThumb()) { 3368 Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri; 3369 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3370 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 3371 getAL(CurDAG, dl), Reg0, Reg0 }; 3372 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3373 return true; 3374 } 3375 3376 // ARM models shift instructions as MOVsi with shifter operand. 3377 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL); 3378 SDValue ShOpc = 3379 CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl, 3380 MVT::i32); 3381 SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc, 3382 getAL(CurDAG, dl), Reg0, Reg0 }; 3383 CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops); 3384 return true; 3385 } 3386 3387 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx"); 3388 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3389 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 3390 CurDAG->getTargetConstant(Width, dl, MVT::i32), 3391 getAL(CurDAG, dl), Reg0 }; 3392 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3393 return true; 3394 } 3395 } 3396 return false; 3397 } 3398 3399 // Otherwise, we're looking for a shift of a shift 3400 unsigned Shl_imm = 0; 3401 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) { 3402 assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!"); 3403 unsigned Srl_imm = 0; 3404 if (isInt32Immediate(N->getOperand(1), Srl_imm)) { 3405 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 3406 // Note: The width operand is encoded as width-1. 3407 unsigned Width = 32 - Srl_imm - 1; 3408 int LSB = Srl_imm - Shl_imm; 3409 if (LSB < 0) 3410 return false; 3411 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3412 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx"); 3413 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3414 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 3415 CurDAG->getTargetConstant(Width, dl, MVT::i32), 3416 getAL(CurDAG, dl), Reg0 }; 3417 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3418 return true; 3419 } 3420 } 3421 3422 // Or we are looking for a shift of an and, with a mask operand 3423 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) && 3424 isShiftedMask_32(And_imm)) { 3425 unsigned Srl_imm = 0; 3426 unsigned LSB = countTrailingZeros(And_imm); 3427 // Shift must be the same as the ands lsb 3428 if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) { 3429 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 3430 unsigned MSB = 31 - countLeadingZeros(And_imm); 3431 // Note: The width operand is encoded as width-1. 3432 unsigned Width = MSB - LSB; 3433 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3434 assert(Srl_imm + Width + 1 <= 32 && "Shouldn't create an invalid ubfx"); 3435 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3436 CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32), 3437 CurDAG->getTargetConstant(Width, dl, MVT::i32), 3438 getAL(CurDAG, dl), Reg0 }; 3439 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3440 return true; 3441 } 3442 } 3443 3444 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) { 3445 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits(); 3446 unsigned LSB = 0; 3447 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) && 3448 !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB)) 3449 return false; 3450 3451 if (LSB + Width > 32) 3452 return false; 3453 3454 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3455 assert(LSB + Width <= 32 && "Shouldn't create an invalid ubfx"); 3456 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3457 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 3458 CurDAG->getTargetConstant(Width - 1, dl, MVT::i32), 3459 getAL(CurDAG, dl), Reg0 }; 3460 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3461 return true; 3462 } 3463 3464 return false; 3465 } 3466 3467 /// Target-specific DAG combining for ISD::SUB. 3468 /// Target-independent combining lowers SELECT_CC nodes of the form 3469 /// select_cc setg[ge] X, 0, X, -X 3470 /// select_cc setgt X, -1, X, -X 3471 /// select_cc setl[te] X, 0, -X, X 3472 /// select_cc setlt X, 1, -X, X 3473 /// which represent Integer ABS into: 3474 /// Y = sra (X, size(X)-1); sub (xor (X, Y), Y) 3475 /// ARM instruction selection detects the latter and matches it to 3476 /// ARM::ABS or ARM::t2ABS machine node. 3477 bool ARMDAGToDAGISel::tryABSOp(SDNode *N){ 3478 SDValue SUBSrc0 = N->getOperand(0); 3479 SDValue SUBSrc1 = N->getOperand(1); 3480 EVT VT = N->getValueType(0); 3481 3482 if (Subtarget->isThumb1Only()) 3483 return false; 3484 3485 if (SUBSrc0.getOpcode() != ISD::XOR || SUBSrc1.getOpcode() != ISD::SRA) 3486 return false; 3487 3488 SDValue XORSrc0 = SUBSrc0.getOperand(0); 3489 SDValue XORSrc1 = SUBSrc0.getOperand(1); 3490 SDValue SRASrc0 = SUBSrc1.getOperand(0); 3491 SDValue SRASrc1 = SUBSrc1.getOperand(1); 3492 ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(SRASrc1); 3493 EVT XType = SRASrc0.getValueType(); 3494 unsigned Size = XType.getSizeInBits() - 1; 3495 3496 if (XORSrc1 == SUBSrc1 && XORSrc0 == SRASrc0 && XType.isInteger() && 3497 SRAConstant != nullptr && Size == SRAConstant->getZExtValue()) { 3498 unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS; 3499 CurDAG->SelectNodeTo(N, Opcode, VT, XORSrc0); 3500 return true; 3501 } 3502 3503 return false; 3504 } 3505 3506 /// We've got special pseudo-instructions for these 3507 void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) { 3508 unsigned Opcode; 3509 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT(); 3510 if (MemTy == MVT::i8) 3511 Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_8 : ARM::CMP_SWAP_8; 3512 else if (MemTy == MVT::i16) 3513 Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_16 : ARM::CMP_SWAP_16; 3514 else if (MemTy == MVT::i32) 3515 Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_32 : ARM::CMP_SWAP_32; 3516 else 3517 llvm_unreachable("Unknown AtomicCmpSwap type"); 3518 3519 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3), 3520 N->getOperand(0)}; 3521 SDNode *CmpSwap = CurDAG->getMachineNode( 3522 Opcode, SDLoc(N), 3523 CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops); 3524 3525 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); 3526 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp}); 3527 3528 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0)); 3529 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2)); 3530 CurDAG->RemoveDeadNode(N); 3531 } 3532 3533 static Optional<std::pair<unsigned, unsigned>> 3534 getContiguousRangeOfSetBits(const APInt &A) { 3535 unsigned FirstOne = A.getBitWidth() - A.countLeadingZeros() - 1; 3536 unsigned LastOne = A.countTrailingZeros(); 3537 if (A.countPopulation() != (FirstOne - LastOne + 1)) 3538 return Optional<std::pair<unsigned,unsigned>>(); 3539 return std::make_pair(FirstOne, LastOne); 3540 } 3541 3542 void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) { 3543 assert(N->getOpcode() == ARMISD::CMPZ); 3544 SwitchEQNEToPLMI = false; 3545 3546 if (!Subtarget->isThumb()) 3547 // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and 3548 // LSR don't exist as standalone instructions - they need the barrel shifter. 3549 return; 3550 3551 // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X)) 3552 SDValue And = N->getOperand(0); 3553 if (!And->hasOneUse()) 3554 return; 3555 3556 SDValue Zero = N->getOperand(1); 3557 if (!isa<ConstantSDNode>(Zero) || !cast<ConstantSDNode>(Zero)->isZero() || 3558 And->getOpcode() != ISD::AND) 3559 return; 3560 SDValue X = And.getOperand(0); 3561 auto C = dyn_cast<ConstantSDNode>(And.getOperand(1)); 3562 3563 if (!C) 3564 return; 3565 auto Range = getContiguousRangeOfSetBits(C->getAPIntValue()); 3566 if (!Range) 3567 return; 3568 3569 // There are several ways to lower this: 3570 SDNode *NewN; 3571 SDLoc dl(N); 3572 3573 auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* { 3574 if (Subtarget->isThumb2()) { 3575 Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri; 3576 SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32), 3577 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 3578 CurDAG->getRegister(0, MVT::i32) }; 3579 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops); 3580 } else { 3581 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src, 3582 CurDAG->getTargetConstant(Imm, dl, MVT::i32), 3583 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)}; 3584 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops); 3585 } 3586 }; 3587 3588 if (Range->second == 0) { 3589 // 1. Mask includes the LSB -> Simply shift the top N bits off 3590 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first); 3591 ReplaceNode(And.getNode(), NewN); 3592 } else if (Range->first == 31) { 3593 // 2. Mask includes the MSB -> Simply shift the bottom N bits off 3594 NewN = EmitShift(ARM::tLSRri, X, Range->second); 3595 ReplaceNode(And.getNode(), NewN); 3596 } else if (Range->first == Range->second) { 3597 // 3. Only one bit is set. We can shift this into the sign bit and use a 3598 // PL/MI comparison. 3599 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first); 3600 ReplaceNode(And.getNode(), NewN); 3601 3602 SwitchEQNEToPLMI = true; 3603 } else if (!Subtarget->hasV6T2Ops()) { 3604 // 4. Do a double shift to clear bottom and top bits, but only in 3605 // thumb-1 mode as in thumb-2 we can use UBFX. 3606 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first); 3607 NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0), 3608 Range->second + (31 - Range->first)); 3609 ReplaceNode(And.getNode(), NewN); 3610 } 3611 3612 } 3613 3614 void ARMDAGToDAGISel::Select(SDNode *N) { 3615 SDLoc dl(N); 3616 3617 if (N->isMachineOpcode()) { 3618 N->setNodeId(-1); 3619 return; // Already selected. 3620 } 3621 3622 switch (N->getOpcode()) { 3623 default: break; 3624 case ISD::STORE: { 3625 // For Thumb1, match an sp-relative store in C++. This is a little 3626 // unfortunate, but I don't think I can make the chain check work 3627 // otherwise. (The chain of the store has to be the same as the chain 3628 // of the CopyFromReg, or else we can't replace the CopyFromReg with 3629 // a direct reference to "SP".) 3630 // 3631 // This is only necessary on Thumb1 because Thumb1 sp-relative stores use 3632 // a different addressing mode from other four-byte stores. 3633 // 3634 // This pattern usually comes up with call arguments. 3635 StoreSDNode *ST = cast<StoreSDNode>(N); 3636 SDValue Ptr = ST->getBasePtr(); 3637 if (Subtarget->isThumb1Only() && ST->isUnindexed()) { 3638 int RHSC = 0; 3639 if (Ptr.getOpcode() == ISD::ADD && 3640 isScaledConstantInRange(Ptr.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) 3641 Ptr = Ptr.getOperand(0); 3642 3643 if (Ptr.getOpcode() == ISD::CopyFromReg && 3644 cast<RegisterSDNode>(Ptr.getOperand(1))->getReg() == ARM::SP && 3645 Ptr.getOperand(0) == ST->getChain()) { 3646 SDValue Ops[] = {ST->getValue(), 3647 CurDAG->getRegister(ARM::SP, MVT::i32), 3648 CurDAG->getTargetConstant(RHSC, dl, MVT::i32), 3649 getAL(CurDAG, dl), 3650 CurDAG->getRegister(0, MVT::i32), 3651 ST->getChain()}; 3652 MachineSDNode *ResNode = 3653 CurDAG->getMachineNode(ARM::tSTRspi, dl, MVT::Other, Ops); 3654 MachineMemOperand *MemOp = ST->getMemOperand(); 3655 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp}); 3656 ReplaceNode(N, ResNode); 3657 return; 3658 } 3659 } 3660 break; 3661 } 3662 case ISD::WRITE_REGISTER: 3663 if (tryWriteRegister(N)) 3664 return; 3665 break; 3666 case ISD::READ_REGISTER: 3667 if (tryReadRegister(N)) 3668 return; 3669 break; 3670 case ISD::INLINEASM: 3671 case ISD::INLINEASM_BR: 3672 if (tryInlineAsm(N)) 3673 return; 3674 break; 3675 case ISD::SUB: 3676 // Select special operations if SUB node forms integer ABS pattern 3677 if (tryABSOp(N)) 3678 return; 3679 // Other cases are autogenerated. 3680 break; 3681 case ISD::Constant: { 3682 unsigned Val = cast<ConstantSDNode>(N)->getZExtValue(); 3683 // If we can't materialize the constant we need to use a literal pool 3684 if (ConstantMaterializationCost(Val, Subtarget) > 2) { 3685 SDValue CPIdx = CurDAG->getTargetConstantPool( 3686 ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val), 3687 TLI->getPointerTy(CurDAG->getDataLayout())); 3688 3689 SDNode *ResNode; 3690 if (Subtarget->isThumb()) { 3691 SDValue Ops[] = { 3692 CPIdx, 3693 getAL(CurDAG, dl), 3694 CurDAG->getRegister(0, MVT::i32), 3695 CurDAG->getEntryNode() 3696 }; 3697 ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other, 3698 Ops); 3699 } else { 3700 SDValue Ops[] = { 3701 CPIdx, 3702 CurDAG->getTargetConstant(0, dl, MVT::i32), 3703 getAL(CurDAG, dl), 3704 CurDAG->getRegister(0, MVT::i32), 3705 CurDAG->getEntryNode() 3706 }; 3707 ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other, 3708 Ops); 3709 } 3710 // Annotate the Node with memory operand information so that MachineInstr 3711 // queries work properly. This e.g. gives the register allocation the 3712 // required information for rematerialization. 3713 MachineFunction& MF = CurDAG->getMachineFunction(); 3714 MachineMemOperand *MemOp = 3715 MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF), 3716 MachineMemOperand::MOLoad, 4, Align(4)); 3717 3718 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp}); 3719 3720 ReplaceNode(N, ResNode); 3721 return; 3722 } 3723 3724 // Other cases are autogenerated. 3725 break; 3726 } 3727 case ISD::FrameIndex: { 3728 // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm. 3729 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 3730 SDValue TFI = CurDAG->getTargetFrameIndex( 3731 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 3732 if (Subtarget->isThumb1Only()) { 3733 // Set the alignment of the frame object to 4, to avoid having to generate 3734 // more than one ADD 3735 MachineFrameInfo &MFI = MF->getFrameInfo(); 3736 if (MFI.getObjectAlign(FI) < Align(4)) 3737 MFI.setObjectAlignment(FI, Align(4)); 3738 CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI, 3739 CurDAG->getTargetConstant(0, dl, MVT::i32)); 3740 return; 3741 } else { 3742 unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ? 3743 ARM::t2ADDri : ARM::ADDri); 3744 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32), 3745 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 3746 CurDAG->getRegister(0, MVT::i32) }; 3747 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3748 return; 3749 } 3750 } 3751 case ISD::INSERT_VECTOR_ELT: { 3752 if (tryInsertVectorElt(N)) 3753 return; 3754 break; 3755 } 3756 case ISD::SRL: 3757 if (tryV6T2BitfieldExtractOp(N, false)) 3758 return; 3759 break; 3760 case ISD::SIGN_EXTEND_INREG: 3761 case ISD::SRA: 3762 if (tryV6T2BitfieldExtractOp(N, true)) 3763 return; 3764 break; 3765 case ISD::FP_TO_UINT: 3766 case ISD::FP_TO_SINT: 3767 case ISD::FP_TO_UINT_SAT: 3768 case ISD::FP_TO_SINT_SAT: 3769 if (tryFP_TO_INT(N, dl)) 3770 return; 3771 break; 3772 case ISD::FMUL: 3773 if (tryFMULFixed(N, dl)) 3774 return; 3775 break; 3776 case ISD::MUL: 3777 if (Subtarget->isThumb1Only()) 3778 break; 3779 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) { 3780 unsigned RHSV = C->getZExtValue(); 3781 if (!RHSV) break; 3782 if (isPowerOf2_32(RHSV-1)) { // 2^n+1? 3783 unsigned ShImm = Log2_32(RHSV-1); 3784 if (ShImm >= 32) 3785 break; 3786 SDValue V = N->getOperand(0); 3787 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm); 3788 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32); 3789 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3790 if (Subtarget->isThumb()) { 3791 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 }; 3792 CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops); 3793 return; 3794 } else { 3795 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0, 3796 Reg0 }; 3797 CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops); 3798 return; 3799 } 3800 } 3801 if (isPowerOf2_32(RHSV+1)) { // 2^n-1? 3802 unsigned ShImm = Log2_32(RHSV+1); 3803 if (ShImm >= 32) 3804 break; 3805 SDValue V = N->getOperand(0); 3806 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm); 3807 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32); 3808 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3809 if (Subtarget->isThumb()) { 3810 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 }; 3811 CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops); 3812 return; 3813 } else { 3814 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0, 3815 Reg0 }; 3816 CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops); 3817 return; 3818 } 3819 } 3820 } 3821 break; 3822 case ISD::AND: { 3823 // Check for unsigned bitfield extract 3824 if (tryV6T2BitfieldExtractOp(N, false)) 3825 return; 3826 3827 // If an immediate is used in an AND node, it is possible that the immediate 3828 // can be more optimally materialized when negated. If this is the case we 3829 // can negate the immediate and use a BIC instead. 3830 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 3831 if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) { 3832 uint32_t Imm = (uint32_t) N1C->getZExtValue(); 3833 3834 // In Thumb2 mode, an AND can take a 12-bit immediate. If this 3835 // immediate can be negated and fit in the immediate operand of 3836 // a t2BIC, don't do any manual transform here as this can be 3837 // handled by the generic ISel machinery. 3838 bool PreferImmediateEncoding = 3839 Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm)); 3840 if (!PreferImmediateEncoding && 3841 ConstantMaterializationCost(Imm, Subtarget) > 3842 ConstantMaterializationCost(~Imm, Subtarget)) { 3843 // The current immediate costs more to materialize than a negated 3844 // immediate, so negate the immediate and use a BIC. 3845 SDValue NewImm = 3846 CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32); 3847 // If the new constant didn't exist before, reposition it in the topological 3848 // ordering so it is just before N. Otherwise, don't touch its location. 3849 if (NewImm->getNodeId() == -1) 3850 CurDAG->RepositionNode(N->getIterator(), NewImm.getNode()); 3851 3852 if (!Subtarget->hasThumb2()) { 3853 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), 3854 N->getOperand(0), NewImm, getAL(CurDAG, dl), 3855 CurDAG->getRegister(0, MVT::i32)}; 3856 ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops)); 3857 return; 3858 } else { 3859 SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl), 3860 CurDAG->getRegister(0, MVT::i32), 3861 CurDAG->getRegister(0, MVT::i32)}; 3862 ReplaceNode(N, 3863 CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops)); 3864 return; 3865 } 3866 } 3867 } 3868 3869 // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits 3870 // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits 3871 // are entirely contributed by c2 and lower 16-bits are entirely contributed 3872 // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)). 3873 // Select it to: "movt x, ((c1 & 0xffff) >> 16) 3874 EVT VT = N->getValueType(0); 3875 if (VT != MVT::i32) 3876 break; 3877 unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2()) 3878 ? ARM::t2MOVTi16 3879 : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0); 3880 if (!Opc) 3881 break; 3882 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); 3883 N1C = dyn_cast<ConstantSDNode>(N1); 3884 if (!N1C) 3885 break; 3886 if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) { 3887 SDValue N2 = N0.getOperand(1); 3888 ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2); 3889 if (!N2C) 3890 break; 3891 unsigned N1CVal = N1C->getZExtValue(); 3892 unsigned N2CVal = N2C->getZExtValue(); 3893 if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) && 3894 (N1CVal & 0xffffU) == 0xffffU && 3895 (N2CVal & 0xffffU) == 0x0U) { 3896 SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16, 3897 dl, MVT::i32); 3898 SDValue Ops[] = { N0.getOperand(0), Imm16, 3899 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) }; 3900 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops)); 3901 return; 3902 } 3903 } 3904 3905 break; 3906 } 3907 case ARMISD::UMAAL: { 3908 unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL; 3909 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), 3910 N->getOperand(2), N->getOperand(3), 3911 getAL(CurDAG, dl), 3912 CurDAG->getRegister(0, MVT::i32) }; 3913 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops)); 3914 return; 3915 } 3916 case ARMISD::UMLAL:{ 3917 if (Subtarget->isThumb()) { 3918 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3919 N->getOperand(3), getAL(CurDAG, dl), 3920 CurDAG->getRegister(0, MVT::i32)}; 3921 ReplaceNode( 3922 N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops)); 3923 return; 3924 }else{ 3925 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3926 N->getOperand(3), getAL(CurDAG, dl), 3927 CurDAG->getRegister(0, MVT::i32), 3928 CurDAG->getRegister(0, MVT::i32) }; 3929 ReplaceNode(N, CurDAG->getMachineNode( 3930 Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl, 3931 MVT::i32, MVT::i32, Ops)); 3932 return; 3933 } 3934 } 3935 case ARMISD::SMLAL:{ 3936 if (Subtarget->isThumb()) { 3937 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3938 N->getOperand(3), getAL(CurDAG, dl), 3939 CurDAG->getRegister(0, MVT::i32)}; 3940 ReplaceNode( 3941 N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops)); 3942 return; 3943 }else{ 3944 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3945 N->getOperand(3), getAL(CurDAG, dl), 3946 CurDAG->getRegister(0, MVT::i32), 3947 CurDAG->getRegister(0, MVT::i32) }; 3948 ReplaceNode(N, CurDAG->getMachineNode( 3949 Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl, 3950 MVT::i32, MVT::i32, Ops)); 3951 return; 3952 } 3953 } 3954 case ARMISD::SUBE: { 3955 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP()) 3956 break; 3957 // Look for a pattern to match SMMLS 3958 // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b)))) 3959 if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI || 3960 N->getOperand(2).getOpcode() != ARMISD::SUBC || 3961 !SDValue(N, 1).use_empty()) 3962 break; 3963 3964 if (Subtarget->isThumb()) 3965 assert(Subtarget->hasThumb2() && 3966 "This pattern should not be generated for Thumb"); 3967 3968 SDValue SmulLoHi = N->getOperand(1); 3969 SDValue Subc = N->getOperand(2); 3970 auto *Zero = dyn_cast<ConstantSDNode>(Subc.getOperand(0)); 3971 3972 if (!Zero || Zero->getZExtValue() != 0 || 3973 Subc.getOperand(1) != SmulLoHi.getValue(0) || 3974 N->getOperand(1) != SmulLoHi.getValue(1) || 3975 N->getOperand(2) != Subc.getValue(1)) 3976 break; 3977 3978 unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS; 3979 SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1), 3980 N->getOperand(0), getAL(CurDAG, dl), 3981 CurDAG->getRegister(0, MVT::i32) }; 3982 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops)); 3983 return; 3984 } 3985 case ISD::LOAD: { 3986 if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N)) 3987 return; 3988 if (Subtarget->isThumb() && Subtarget->hasThumb2()) { 3989 if (tryT2IndexedLoad(N)) 3990 return; 3991 } else if (Subtarget->isThumb()) { 3992 if (tryT1IndexedLoad(N)) 3993 return; 3994 } else if (tryARMIndexedLoad(N)) 3995 return; 3996 // Other cases are autogenerated. 3997 break; 3998 } 3999 case ISD::MLOAD: 4000 if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N)) 4001 return; 4002 // Other cases are autogenerated. 4003 break; 4004 case ARMISD::WLSSETUP: { 4005 SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopSetup, dl, MVT::i32, 4006 N->getOperand(0)); 4007 ReplaceUses(N, New); 4008 CurDAG->RemoveDeadNode(N); 4009 return; 4010 } 4011 case ARMISD::WLS: { 4012 SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopStart, dl, MVT::Other, 4013 N->getOperand(1), N->getOperand(2), 4014 N->getOperand(0)); 4015 ReplaceUses(N, New); 4016 CurDAG->RemoveDeadNode(N); 4017 return; 4018 } 4019 case ARMISD::LE: { 4020 SDValue Ops[] = { N->getOperand(1), 4021 N->getOperand(2), 4022 N->getOperand(0) }; 4023 unsigned Opc = ARM::t2LoopEnd; 4024 SDNode *New = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops); 4025 ReplaceUses(N, New); 4026 CurDAG->RemoveDeadNode(N); 4027 return; 4028 } 4029 case ARMISD::LDRD: { 4030 if (Subtarget->isThumb2()) 4031 break; // TableGen handles isel in this case. 4032 SDValue Base, RegOffset, ImmOffset; 4033 const SDValue &Chain = N->getOperand(0); 4034 const SDValue &Addr = N->getOperand(1); 4035 SelectAddrMode3(Addr, Base, RegOffset, ImmOffset); 4036 if (RegOffset != CurDAG->getRegister(0, MVT::i32)) { 4037 // The register-offset variant of LDRD mandates that the register 4038 // allocated to RegOffset is not reused in any of the remaining operands. 4039 // This restriction is currently not enforced. Therefore emitting this 4040 // variant is explicitly avoided. 4041 Base = Addr; 4042 RegOffset = CurDAG->getRegister(0, MVT::i32); 4043 } 4044 SDValue Ops[] = {Base, RegOffset, ImmOffset, Chain}; 4045 SDNode *New = CurDAG->getMachineNode(ARM::LOADDUAL, dl, 4046 {MVT::Untyped, MVT::Other}, Ops); 4047 SDValue Lo = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32, 4048 SDValue(New, 0)); 4049 SDValue Hi = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32, 4050 SDValue(New, 0)); 4051 transferMemOperands(N, New); 4052 ReplaceUses(SDValue(N, 0), Lo); 4053 ReplaceUses(SDValue(N, 1), Hi); 4054 ReplaceUses(SDValue(N, 2), SDValue(New, 1)); 4055 CurDAG->RemoveDeadNode(N); 4056 return; 4057 } 4058 case ARMISD::STRD: { 4059 if (Subtarget->isThumb2()) 4060 break; // TableGen handles isel in this case. 4061 SDValue Base, RegOffset, ImmOffset; 4062 const SDValue &Chain = N->getOperand(0); 4063 const SDValue &Addr = N->getOperand(3); 4064 SelectAddrMode3(Addr, Base, RegOffset, ImmOffset); 4065 if (RegOffset != CurDAG->getRegister(0, MVT::i32)) { 4066 // The register-offset variant of STRD mandates that the register 4067 // allocated to RegOffset is not reused in any of the remaining operands. 4068 // This restriction is currently not enforced. Therefore emitting this 4069 // variant is explicitly avoided. 4070 Base = Addr; 4071 RegOffset = CurDAG->getRegister(0, MVT::i32); 4072 } 4073 SDNode *RegPair = 4074 createGPRPairNode(MVT::Untyped, N->getOperand(1), N->getOperand(2)); 4075 SDValue Ops[] = {SDValue(RegPair, 0), Base, RegOffset, ImmOffset, Chain}; 4076 SDNode *New = CurDAG->getMachineNode(ARM::STOREDUAL, dl, MVT::Other, Ops); 4077 transferMemOperands(N, New); 4078 ReplaceUses(SDValue(N, 0), SDValue(New, 0)); 4079 CurDAG->RemoveDeadNode(N); 4080 return; 4081 } 4082 case ARMISD::LOOP_DEC: { 4083 SDValue Ops[] = { N->getOperand(1), 4084 N->getOperand(2), 4085 N->getOperand(0) }; 4086 SDNode *Dec = 4087 CurDAG->getMachineNode(ARM::t2LoopDec, dl, 4088 CurDAG->getVTList(MVT::i32, MVT::Other), Ops); 4089 ReplaceUses(N, Dec); 4090 CurDAG->RemoveDeadNode(N); 4091 return; 4092 } 4093 case ARMISD::BRCOND: { 4094 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 4095 // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc) 4096 // Pattern complexity = 6 cost = 1 size = 0 4097 4098 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 4099 // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc) 4100 // Pattern complexity = 6 cost = 1 size = 0 4101 4102 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 4103 // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc) 4104 // Pattern complexity = 6 cost = 1 size = 0 4105 4106 unsigned Opc = Subtarget->isThumb() ? 4107 ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc; 4108 SDValue Chain = N->getOperand(0); 4109 SDValue N1 = N->getOperand(1); 4110 SDValue N2 = N->getOperand(2); 4111 SDValue N3 = N->getOperand(3); 4112 SDValue InFlag = N->getOperand(4); 4113 assert(N1.getOpcode() == ISD::BasicBlock); 4114 assert(N2.getOpcode() == ISD::Constant); 4115 assert(N3.getOpcode() == ISD::Register); 4116 4117 unsigned CC = (unsigned) cast<ConstantSDNode>(N2)->getZExtValue(); 4118 4119 if (InFlag.getOpcode() == ARMISD::CMPZ) { 4120 if (InFlag.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) { 4121 SDValue Int = InFlag.getOperand(0); 4122 uint64_t ID = cast<ConstantSDNode>(Int->getOperand(1))->getZExtValue(); 4123 4124 // Handle low-overhead loops. 4125 if (ID == Intrinsic::loop_decrement_reg) { 4126 SDValue Elements = Int.getOperand(2); 4127 SDValue Size = CurDAG->getTargetConstant( 4128 cast<ConstantSDNode>(Int.getOperand(3))->getZExtValue(), dl, 4129 MVT::i32); 4130 4131 SDValue Args[] = { Elements, Size, Int.getOperand(0) }; 4132 SDNode *LoopDec = 4133 CurDAG->getMachineNode(ARM::t2LoopDec, dl, 4134 CurDAG->getVTList(MVT::i32, MVT::Other), 4135 Args); 4136 ReplaceUses(Int.getNode(), LoopDec); 4137 4138 SDValue EndArgs[] = { SDValue(LoopDec, 0), N1, Chain }; 4139 SDNode *LoopEnd = 4140 CurDAG->getMachineNode(ARM::t2LoopEnd, dl, MVT::Other, EndArgs); 4141 4142 ReplaceUses(N, LoopEnd); 4143 CurDAG->RemoveDeadNode(N); 4144 CurDAG->RemoveDeadNode(InFlag.getNode()); 4145 CurDAG->RemoveDeadNode(Int.getNode()); 4146 return; 4147 } 4148 } 4149 4150 bool SwitchEQNEToPLMI; 4151 SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI); 4152 InFlag = N->getOperand(4); 4153 4154 if (SwitchEQNEToPLMI) { 4155 switch ((ARMCC::CondCodes)CC) { 4156 default: llvm_unreachable("CMPZ must be either NE or EQ!"); 4157 case ARMCC::NE: 4158 CC = (unsigned)ARMCC::MI; 4159 break; 4160 case ARMCC::EQ: 4161 CC = (unsigned)ARMCC::PL; 4162 break; 4163 } 4164 } 4165 } 4166 4167 SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32); 4168 SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag }; 4169 SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, 4170 MVT::Glue, Ops); 4171 Chain = SDValue(ResNode, 0); 4172 if (N->getNumValues() == 2) { 4173 InFlag = SDValue(ResNode, 1); 4174 ReplaceUses(SDValue(N, 1), InFlag); 4175 } 4176 ReplaceUses(SDValue(N, 0), 4177 SDValue(Chain.getNode(), Chain.getResNo())); 4178 CurDAG->RemoveDeadNode(N); 4179 return; 4180 } 4181 4182 case ARMISD::CMPZ: { 4183 // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0) 4184 // This allows us to avoid materializing the expensive negative constant. 4185 // The CMPZ #0 is useless and will be peepholed away but we need to keep it 4186 // for its glue output. 4187 SDValue X = N->getOperand(0); 4188 auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode()); 4189 if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) { 4190 int64_t Addend = -C->getSExtValue(); 4191 4192 SDNode *Add = nullptr; 4193 // ADDS can be better than CMN if the immediate fits in a 4194 // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3. 4195 // Outside that range we can just use a CMN which is 32-bit but has a 4196 // 12-bit immediate range. 4197 if (Addend < 1<<8) { 4198 if (Subtarget->isThumb2()) { 4199 SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32), 4200 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 4201 CurDAG->getRegister(0, MVT::i32) }; 4202 Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops); 4203 } else { 4204 unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8; 4205 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X, 4206 CurDAG->getTargetConstant(Addend, dl, MVT::i32), 4207 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)}; 4208 Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops); 4209 } 4210 } 4211 if (Add) { 4212 SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)}; 4213 CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2); 4214 } 4215 } 4216 // Other cases are autogenerated. 4217 break; 4218 } 4219 4220 case ARMISD::CMOV: { 4221 SDValue InFlag = N->getOperand(4); 4222 4223 if (InFlag.getOpcode() == ARMISD::CMPZ) { 4224 bool SwitchEQNEToPLMI; 4225 SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI); 4226 4227 if (SwitchEQNEToPLMI) { 4228 SDValue ARMcc = N->getOperand(2); 4229 ARMCC::CondCodes CC = 4230 (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue(); 4231 4232 switch (CC) { 4233 default: llvm_unreachable("CMPZ must be either NE or EQ!"); 4234 case ARMCC::NE: 4235 CC = ARMCC::MI; 4236 break; 4237 case ARMCC::EQ: 4238 CC = ARMCC::PL; 4239 break; 4240 } 4241 SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32); 4242 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc, 4243 N->getOperand(3), N->getOperand(4)}; 4244 CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops); 4245 } 4246 4247 } 4248 // Other cases are autogenerated. 4249 break; 4250 } 4251 4252 case ARMISD::VZIP: { 4253 unsigned Opc = 0; 4254 EVT VT = N->getValueType(0); 4255 switch (VT.getSimpleVT().SimpleTy) { 4256 default: return; 4257 case MVT::v8i8: Opc = ARM::VZIPd8; break; 4258 case MVT::v4f16: 4259 case MVT::v4i16: Opc = ARM::VZIPd16; break; 4260 case MVT::v2f32: 4261 // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 4262 case MVT::v2i32: Opc = ARM::VTRNd32; break; 4263 case MVT::v16i8: Opc = ARM::VZIPq8; break; 4264 case MVT::v8f16: 4265 case MVT::v8i16: Opc = ARM::VZIPq16; break; 4266 case MVT::v4f32: 4267 case MVT::v4i32: Opc = ARM::VZIPq32; break; 4268 } 4269 SDValue Pred = getAL(CurDAG, dl); 4270 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 4271 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; 4272 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops)); 4273 return; 4274 } 4275 case ARMISD::VUZP: { 4276 unsigned Opc = 0; 4277 EVT VT = N->getValueType(0); 4278 switch (VT.getSimpleVT().SimpleTy) { 4279 default: return; 4280 case MVT::v8i8: Opc = ARM::VUZPd8; break; 4281 case MVT::v4f16: 4282 case MVT::v4i16: Opc = ARM::VUZPd16; break; 4283 case MVT::v2f32: 4284 // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 4285 case MVT::v2i32: Opc = ARM::VTRNd32; break; 4286 case MVT::v16i8: Opc = ARM::VUZPq8; break; 4287 case MVT::v8f16: 4288 case MVT::v8i16: Opc = ARM::VUZPq16; break; 4289 case MVT::v4f32: 4290 case MVT::v4i32: Opc = ARM::VUZPq32; break; 4291 } 4292 SDValue Pred = getAL(CurDAG, dl); 4293 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 4294 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; 4295 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops)); 4296 return; 4297 } 4298 case ARMISD::VTRN: { 4299 unsigned Opc = 0; 4300 EVT VT = N->getValueType(0); 4301 switch (VT.getSimpleVT().SimpleTy) { 4302 default: return; 4303 case MVT::v8i8: Opc = ARM::VTRNd8; break; 4304 case MVT::v4f16: 4305 case MVT::v4i16: Opc = ARM::VTRNd16; break; 4306 case MVT::v2f32: 4307 case MVT::v2i32: Opc = ARM::VTRNd32; break; 4308 case MVT::v16i8: Opc = ARM::VTRNq8; break; 4309 case MVT::v8f16: 4310 case MVT::v8i16: Opc = ARM::VTRNq16; break; 4311 case MVT::v4f32: 4312 case MVT::v4i32: Opc = ARM::VTRNq32; break; 4313 } 4314 SDValue Pred = getAL(CurDAG, dl); 4315 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 4316 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; 4317 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops)); 4318 return; 4319 } 4320 case ARMISD::BUILD_VECTOR: { 4321 EVT VecVT = N->getValueType(0); 4322 EVT EltVT = VecVT.getVectorElementType(); 4323 unsigned NumElts = VecVT.getVectorNumElements(); 4324 if (EltVT == MVT::f64) { 4325 assert(NumElts == 2 && "unexpected type for BUILD_VECTOR"); 4326 ReplaceNode( 4327 N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1))); 4328 return; 4329 } 4330 assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR"); 4331 if (NumElts == 2) { 4332 ReplaceNode( 4333 N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1))); 4334 return; 4335 } 4336 assert(NumElts == 4 && "unexpected type for BUILD_VECTOR"); 4337 ReplaceNode(N, 4338 createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1), 4339 N->getOperand(2), N->getOperand(3))); 4340 return; 4341 } 4342 4343 case ARMISD::VLD1DUP: { 4344 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16, 4345 ARM::VLD1DUPd32 }; 4346 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16, 4347 ARM::VLD1DUPq32 }; 4348 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 1, DOpcodes, QOpcodes); 4349 return; 4350 } 4351 4352 case ARMISD::VLD2DUP: { 4353 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16, 4354 ARM::VLD2DUPd32 }; 4355 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 2, Opcodes); 4356 return; 4357 } 4358 4359 case ARMISD::VLD3DUP: { 4360 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo, 4361 ARM::VLD3DUPd16Pseudo, 4362 ARM::VLD3DUPd32Pseudo }; 4363 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 3, Opcodes); 4364 return; 4365 } 4366 4367 case ARMISD::VLD4DUP: { 4368 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo, 4369 ARM::VLD4DUPd16Pseudo, 4370 ARM::VLD4DUPd32Pseudo }; 4371 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 4, Opcodes); 4372 return; 4373 } 4374 4375 case ARMISD::VLD1DUP_UPD: { 4376 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed, 4377 ARM::VLD1DUPd16wb_fixed, 4378 ARM::VLD1DUPd32wb_fixed }; 4379 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed, 4380 ARM::VLD1DUPq16wb_fixed, 4381 ARM::VLD1DUPq32wb_fixed }; 4382 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 1, DOpcodes, QOpcodes); 4383 return; 4384 } 4385 4386 case ARMISD::VLD2DUP_UPD: { 4387 static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8wb_fixed, 4388 ARM::VLD2DUPd16wb_fixed, 4389 ARM::VLD2DUPd32wb_fixed, 4390 ARM::VLD1q64wb_fixed }; 4391 static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo, 4392 ARM::VLD2DUPq16EvenPseudo, 4393 ARM::VLD2DUPq32EvenPseudo }; 4394 static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudoWB_fixed, 4395 ARM::VLD2DUPq16OddPseudoWB_fixed, 4396 ARM::VLD2DUPq32OddPseudoWB_fixed }; 4397 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 2, DOpcodes, QOpcodes0, QOpcodes1); 4398 return; 4399 } 4400 4401 case ARMISD::VLD3DUP_UPD: { 4402 static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo_UPD, 4403 ARM::VLD3DUPd16Pseudo_UPD, 4404 ARM::VLD3DUPd32Pseudo_UPD, 4405 ARM::VLD1d64TPseudoWB_fixed }; 4406 static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo, 4407 ARM::VLD3DUPq16EvenPseudo, 4408 ARM::VLD3DUPq32EvenPseudo }; 4409 static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo_UPD, 4410 ARM::VLD3DUPq16OddPseudo_UPD, 4411 ARM::VLD3DUPq32OddPseudo_UPD }; 4412 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 4413 return; 4414 } 4415 4416 case ARMISD::VLD4DUP_UPD: { 4417 static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo_UPD, 4418 ARM::VLD4DUPd16Pseudo_UPD, 4419 ARM::VLD4DUPd32Pseudo_UPD, 4420 ARM::VLD1d64QPseudoWB_fixed }; 4421 static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo, 4422 ARM::VLD4DUPq16EvenPseudo, 4423 ARM::VLD4DUPq32EvenPseudo }; 4424 static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo_UPD, 4425 ARM::VLD4DUPq16OddPseudo_UPD, 4426 ARM::VLD4DUPq32OddPseudo_UPD }; 4427 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 4428 return; 4429 } 4430 4431 case ARMISD::VLD1_UPD: { 4432 static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed, 4433 ARM::VLD1d16wb_fixed, 4434 ARM::VLD1d32wb_fixed, 4435 ARM::VLD1d64wb_fixed }; 4436 static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed, 4437 ARM::VLD1q16wb_fixed, 4438 ARM::VLD1q32wb_fixed, 4439 ARM::VLD1q64wb_fixed }; 4440 SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr); 4441 return; 4442 } 4443 4444 case ARMISD::VLD2_UPD: { 4445 if (Subtarget->hasNEON()) { 4446 static const uint16_t DOpcodes[] = { 4447 ARM::VLD2d8wb_fixed, ARM::VLD2d16wb_fixed, ARM::VLD2d32wb_fixed, 4448 ARM::VLD1q64wb_fixed}; 4449 static const uint16_t QOpcodes[] = {ARM::VLD2q8PseudoWB_fixed, 4450 ARM::VLD2q16PseudoWB_fixed, 4451 ARM::VLD2q32PseudoWB_fixed}; 4452 SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr); 4453 } else { 4454 static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8, 4455 ARM::MVE_VLD21_8_wb}; 4456 static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16, 4457 ARM::MVE_VLD21_16_wb}; 4458 static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32, 4459 ARM::MVE_VLD21_32_wb}; 4460 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32}; 4461 SelectMVE_VLD(N, 2, Opcodes, true); 4462 } 4463 return; 4464 } 4465 4466 case ARMISD::VLD3_UPD: { 4467 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD, 4468 ARM::VLD3d16Pseudo_UPD, 4469 ARM::VLD3d32Pseudo_UPD, 4470 ARM::VLD1d64TPseudoWB_fixed}; 4471 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD, 4472 ARM::VLD3q16Pseudo_UPD, 4473 ARM::VLD3q32Pseudo_UPD }; 4474 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD, 4475 ARM::VLD3q16oddPseudo_UPD, 4476 ARM::VLD3q32oddPseudo_UPD }; 4477 SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 4478 return; 4479 } 4480 4481 case ARMISD::VLD4_UPD: { 4482 if (Subtarget->hasNEON()) { 4483 static const uint16_t DOpcodes[] = { 4484 ARM::VLD4d8Pseudo_UPD, ARM::VLD4d16Pseudo_UPD, ARM::VLD4d32Pseudo_UPD, 4485 ARM::VLD1d64QPseudoWB_fixed}; 4486 static const uint16_t QOpcodes0[] = {ARM::VLD4q8Pseudo_UPD, 4487 ARM::VLD4q16Pseudo_UPD, 4488 ARM::VLD4q32Pseudo_UPD}; 4489 static const uint16_t QOpcodes1[] = {ARM::VLD4q8oddPseudo_UPD, 4490 ARM::VLD4q16oddPseudo_UPD, 4491 ARM::VLD4q32oddPseudo_UPD}; 4492 SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 4493 } else { 4494 static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8, 4495 ARM::MVE_VLD42_8, 4496 ARM::MVE_VLD43_8_wb}; 4497 static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16, 4498 ARM::MVE_VLD42_16, 4499 ARM::MVE_VLD43_16_wb}; 4500 static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32, 4501 ARM::MVE_VLD42_32, 4502 ARM::MVE_VLD43_32_wb}; 4503 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32}; 4504 SelectMVE_VLD(N, 4, Opcodes, true); 4505 } 4506 return; 4507 } 4508 4509 case ARMISD::VLD1x2_UPD: { 4510 if (Subtarget->hasNEON()) { 4511 static const uint16_t DOpcodes[] = { 4512 ARM::VLD1q8wb_fixed, ARM::VLD1q16wb_fixed, ARM::VLD1q32wb_fixed, 4513 ARM::VLD1q64wb_fixed}; 4514 static const uint16_t QOpcodes[] = { 4515 ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed, 4516 ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed}; 4517 SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr); 4518 return; 4519 } 4520 break; 4521 } 4522 4523 case ARMISD::VLD1x3_UPD: { 4524 if (Subtarget->hasNEON()) { 4525 static const uint16_t DOpcodes[] = { 4526 ARM::VLD1d8TPseudoWB_fixed, ARM::VLD1d16TPseudoWB_fixed, 4527 ARM::VLD1d32TPseudoWB_fixed, ARM::VLD1d64TPseudoWB_fixed}; 4528 static const uint16_t QOpcodes0[] = { 4529 ARM::VLD1q8LowTPseudo_UPD, ARM::VLD1q16LowTPseudo_UPD, 4530 ARM::VLD1q32LowTPseudo_UPD, ARM::VLD1q64LowTPseudo_UPD}; 4531 static const uint16_t QOpcodes1[] = { 4532 ARM::VLD1q8HighTPseudo_UPD, ARM::VLD1q16HighTPseudo_UPD, 4533 ARM::VLD1q32HighTPseudo_UPD, ARM::VLD1q64HighTPseudo_UPD}; 4534 SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 4535 return; 4536 } 4537 break; 4538 } 4539 4540 case ARMISD::VLD1x4_UPD: { 4541 if (Subtarget->hasNEON()) { 4542 static const uint16_t DOpcodes[] = { 4543 ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed, 4544 ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed}; 4545 static const uint16_t QOpcodes0[] = { 4546 ARM::VLD1q8LowQPseudo_UPD, ARM::VLD1q16LowQPseudo_UPD, 4547 ARM::VLD1q32LowQPseudo_UPD, ARM::VLD1q64LowQPseudo_UPD}; 4548 static const uint16_t QOpcodes1[] = { 4549 ARM::VLD1q8HighQPseudo_UPD, ARM::VLD1q16HighQPseudo_UPD, 4550 ARM::VLD1q32HighQPseudo_UPD, ARM::VLD1q64HighQPseudo_UPD}; 4551 SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 4552 return; 4553 } 4554 break; 4555 } 4556 4557 case ARMISD::VLD2LN_UPD: { 4558 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD, 4559 ARM::VLD2LNd16Pseudo_UPD, 4560 ARM::VLD2LNd32Pseudo_UPD }; 4561 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD, 4562 ARM::VLD2LNq32Pseudo_UPD }; 4563 SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes); 4564 return; 4565 } 4566 4567 case ARMISD::VLD3LN_UPD: { 4568 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD, 4569 ARM::VLD3LNd16Pseudo_UPD, 4570 ARM::VLD3LNd32Pseudo_UPD }; 4571 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD, 4572 ARM::VLD3LNq32Pseudo_UPD }; 4573 SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes); 4574 return; 4575 } 4576 4577 case ARMISD::VLD4LN_UPD: { 4578 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD, 4579 ARM::VLD4LNd16Pseudo_UPD, 4580 ARM::VLD4LNd32Pseudo_UPD }; 4581 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD, 4582 ARM::VLD4LNq32Pseudo_UPD }; 4583 SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes); 4584 return; 4585 } 4586 4587 case ARMISD::VST1_UPD: { 4588 static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed, 4589 ARM::VST1d16wb_fixed, 4590 ARM::VST1d32wb_fixed, 4591 ARM::VST1d64wb_fixed }; 4592 static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed, 4593 ARM::VST1q16wb_fixed, 4594 ARM::VST1q32wb_fixed, 4595 ARM::VST1q64wb_fixed }; 4596 SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr); 4597 return; 4598 } 4599 4600 case ARMISD::VST2_UPD: { 4601 if (Subtarget->hasNEON()) { 4602 static const uint16_t DOpcodes[] = { 4603 ARM::VST2d8wb_fixed, ARM::VST2d16wb_fixed, ARM::VST2d32wb_fixed, 4604 ARM::VST1q64wb_fixed}; 4605 static const uint16_t QOpcodes[] = {ARM::VST2q8PseudoWB_fixed, 4606 ARM::VST2q16PseudoWB_fixed, 4607 ARM::VST2q32PseudoWB_fixed}; 4608 SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr); 4609 return; 4610 } 4611 break; 4612 } 4613 4614 case ARMISD::VST3_UPD: { 4615 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD, 4616 ARM::VST3d16Pseudo_UPD, 4617 ARM::VST3d32Pseudo_UPD, 4618 ARM::VST1d64TPseudoWB_fixed}; 4619 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD, 4620 ARM::VST3q16Pseudo_UPD, 4621 ARM::VST3q32Pseudo_UPD }; 4622 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD, 4623 ARM::VST3q16oddPseudo_UPD, 4624 ARM::VST3q32oddPseudo_UPD }; 4625 SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 4626 return; 4627 } 4628 4629 case ARMISD::VST4_UPD: { 4630 if (Subtarget->hasNEON()) { 4631 static const uint16_t DOpcodes[] = { 4632 ARM::VST4d8Pseudo_UPD, ARM::VST4d16Pseudo_UPD, ARM::VST4d32Pseudo_UPD, 4633 ARM::VST1d64QPseudoWB_fixed}; 4634 static const uint16_t QOpcodes0[] = {ARM::VST4q8Pseudo_UPD, 4635 ARM::VST4q16Pseudo_UPD, 4636 ARM::VST4q32Pseudo_UPD}; 4637 static const uint16_t QOpcodes1[] = {ARM::VST4q8oddPseudo_UPD, 4638 ARM::VST4q16oddPseudo_UPD, 4639 ARM::VST4q32oddPseudo_UPD}; 4640 SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 4641 return; 4642 } 4643 break; 4644 } 4645 4646 case ARMISD::VST1x2_UPD: { 4647 if (Subtarget->hasNEON()) { 4648 static const uint16_t DOpcodes[] = { ARM::VST1q8wb_fixed, 4649 ARM::VST1q16wb_fixed, 4650 ARM::VST1q32wb_fixed, 4651 ARM::VST1q64wb_fixed}; 4652 static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudoWB_fixed, 4653 ARM::VST1d16QPseudoWB_fixed, 4654 ARM::VST1d32QPseudoWB_fixed, 4655 ARM::VST1d64QPseudoWB_fixed }; 4656 SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr); 4657 return; 4658 } 4659 break; 4660 } 4661 4662 case ARMISD::VST1x3_UPD: { 4663 if (Subtarget->hasNEON()) { 4664 static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudoWB_fixed, 4665 ARM::VST1d16TPseudoWB_fixed, 4666 ARM::VST1d32TPseudoWB_fixed, 4667 ARM::VST1d64TPseudoWB_fixed }; 4668 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD, 4669 ARM::VST1q16LowTPseudo_UPD, 4670 ARM::VST1q32LowTPseudo_UPD, 4671 ARM::VST1q64LowTPseudo_UPD }; 4672 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo_UPD, 4673 ARM::VST1q16HighTPseudo_UPD, 4674 ARM::VST1q32HighTPseudo_UPD, 4675 ARM::VST1q64HighTPseudo_UPD }; 4676 SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 4677 return; 4678 } 4679 break; 4680 } 4681 4682 case ARMISD::VST1x4_UPD: { 4683 if (Subtarget->hasNEON()) { 4684 static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudoWB_fixed, 4685 ARM::VST1d16QPseudoWB_fixed, 4686 ARM::VST1d32QPseudoWB_fixed, 4687 ARM::VST1d64QPseudoWB_fixed }; 4688 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD, 4689 ARM::VST1q16LowQPseudo_UPD, 4690 ARM::VST1q32LowQPseudo_UPD, 4691 ARM::VST1q64LowQPseudo_UPD }; 4692 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo_UPD, 4693 ARM::VST1q16HighQPseudo_UPD, 4694 ARM::VST1q32HighQPseudo_UPD, 4695 ARM::VST1q64HighQPseudo_UPD }; 4696 SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 4697 return; 4698 } 4699 break; 4700 } 4701 case ARMISD::VST2LN_UPD: { 4702 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD, 4703 ARM::VST2LNd16Pseudo_UPD, 4704 ARM::VST2LNd32Pseudo_UPD }; 4705 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD, 4706 ARM::VST2LNq32Pseudo_UPD }; 4707 SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes); 4708 return; 4709 } 4710 4711 case ARMISD::VST3LN_UPD: { 4712 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD, 4713 ARM::VST3LNd16Pseudo_UPD, 4714 ARM::VST3LNd32Pseudo_UPD }; 4715 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD, 4716 ARM::VST3LNq32Pseudo_UPD }; 4717 SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes); 4718 return; 4719 } 4720 4721 case ARMISD::VST4LN_UPD: { 4722 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD, 4723 ARM::VST4LNd16Pseudo_UPD, 4724 ARM::VST4LNd32Pseudo_UPD }; 4725 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD, 4726 ARM::VST4LNq32Pseudo_UPD }; 4727 SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes); 4728 return; 4729 } 4730 4731 case ISD::INTRINSIC_VOID: 4732 case ISD::INTRINSIC_W_CHAIN: { 4733 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 4734 switch (IntNo) { 4735 default: 4736 break; 4737 4738 case Intrinsic::arm_mrrc: 4739 case Intrinsic::arm_mrrc2: { 4740 SDLoc dl(N); 4741 SDValue Chain = N->getOperand(0); 4742 unsigned Opc; 4743 4744 if (Subtarget->isThumb()) 4745 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2); 4746 else 4747 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2); 4748 4749 SmallVector<SDValue, 5> Ops; 4750 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(), dl)); /* coproc */ 4751 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(), dl)); /* opc */ 4752 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(), dl)); /* CRm */ 4753 4754 // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded 4755 // instruction will always be '1111' but it is possible in assembly language to specify 4756 // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction. 4757 if (Opc != ARM::MRRC2) { 4758 Ops.push_back(getAL(CurDAG, dl)); 4759 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 4760 } 4761 4762 Ops.push_back(Chain); 4763 4764 // Writes to two registers. 4765 const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other}; 4766 4767 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops)); 4768 return; 4769 } 4770 case Intrinsic::arm_ldaexd: 4771 case Intrinsic::arm_ldrexd: { 4772 SDLoc dl(N); 4773 SDValue Chain = N->getOperand(0); 4774 SDValue MemAddr = N->getOperand(2); 4775 bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps(); 4776 4777 bool IsAcquire = IntNo == Intrinsic::arm_ldaexd; 4778 unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD) 4779 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD); 4780 4781 // arm_ldrexd returns a i64 value in {i32, i32} 4782 std::vector<EVT> ResTys; 4783 if (isThumb) { 4784 ResTys.push_back(MVT::i32); 4785 ResTys.push_back(MVT::i32); 4786 } else 4787 ResTys.push_back(MVT::Untyped); 4788 ResTys.push_back(MVT::Other); 4789 4790 // Place arguments in the right order. 4791 SDValue Ops[] = {MemAddr, getAL(CurDAG, dl), 4792 CurDAG->getRegister(0, MVT::i32), Chain}; 4793 SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops); 4794 // Transfer memoperands. 4795 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 4796 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp}); 4797 4798 // Remap uses. 4799 SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1); 4800 if (!SDValue(N, 0).use_empty()) { 4801 SDValue Result; 4802 if (isThumb) 4803 Result = SDValue(Ld, 0); 4804 else { 4805 SDValue SubRegIdx = 4806 CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32); 4807 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 4808 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx); 4809 Result = SDValue(ResNode,0); 4810 } 4811 ReplaceUses(SDValue(N, 0), Result); 4812 } 4813 if (!SDValue(N, 1).use_empty()) { 4814 SDValue Result; 4815 if (isThumb) 4816 Result = SDValue(Ld, 1); 4817 else { 4818 SDValue SubRegIdx = 4819 CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32); 4820 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 4821 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx); 4822 Result = SDValue(ResNode,0); 4823 } 4824 ReplaceUses(SDValue(N, 1), Result); 4825 } 4826 ReplaceUses(SDValue(N, 2), OutChain); 4827 CurDAG->RemoveDeadNode(N); 4828 return; 4829 } 4830 case Intrinsic::arm_stlexd: 4831 case Intrinsic::arm_strexd: { 4832 SDLoc dl(N); 4833 SDValue Chain = N->getOperand(0); 4834 SDValue Val0 = N->getOperand(2); 4835 SDValue Val1 = N->getOperand(3); 4836 SDValue MemAddr = N->getOperand(4); 4837 4838 // Store exclusive double return a i32 value which is the return status 4839 // of the issued store. 4840 const EVT ResTys[] = {MVT::i32, MVT::Other}; 4841 4842 bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2(); 4843 // Place arguments in the right order. 4844 SmallVector<SDValue, 7> Ops; 4845 if (isThumb) { 4846 Ops.push_back(Val0); 4847 Ops.push_back(Val1); 4848 } else 4849 // arm_strexd uses GPRPair. 4850 Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0)); 4851 Ops.push_back(MemAddr); 4852 Ops.push_back(getAL(CurDAG, dl)); 4853 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 4854 Ops.push_back(Chain); 4855 4856 bool IsRelease = IntNo == Intrinsic::arm_stlexd; 4857 unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD) 4858 : (IsRelease ? ARM::STLEXD : ARM::STREXD); 4859 4860 SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops); 4861 // Transfer memoperands. 4862 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 4863 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 4864 4865 ReplaceNode(N, St); 4866 return; 4867 } 4868 4869 case Intrinsic::arm_neon_vld1: { 4870 static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16, 4871 ARM::VLD1d32, ARM::VLD1d64 }; 4872 static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16, 4873 ARM::VLD1q32, ARM::VLD1q64}; 4874 SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr); 4875 return; 4876 } 4877 4878 case Intrinsic::arm_neon_vld1x2: { 4879 static const uint16_t DOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16, 4880 ARM::VLD1q32, ARM::VLD1q64 }; 4881 static const uint16_t QOpcodes[] = { ARM::VLD1d8QPseudo, 4882 ARM::VLD1d16QPseudo, 4883 ARM::VLD1d32QPseudo, 4884 ARM::VLD1d64QPseudo }; 4885 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr); 4886 return; 4887 } 4888 4889 case Intrinsic::arm_neon_vld1x3: { 4890 static const uint16_t DOpcodes[] = { ARM::VLD1d8TPseudo, 4891 ARM::VLD1d16TPseudo, 4892 ARM::VLD1d32TPseudo, 4893 ARM::VLD1d64TPseudo }; 4894 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowTPseudo_UPD, 4895 ARM::VLD1q16LowTPseudo_UPD, 4896 ARM::VLD1q32LowTPseudo_UPD, 4897 ARM::VLD1q64LowTPseudo_UPD }; 4898 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighTPseudo, 4899 ARM::VLD1q16HighTPseudo, 4900 ARM::VLD1q32HighTPseudo, 4901 ARM::VLD1q64HighTPseudo }; 4902 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 4903 return; 4904 } 4905 4906 case Intrinsic::arm_neon_vld1x4: { 4907 static const uint16_t DOpcodes[] = { ARM::VLD1d8QPseudo, 4908 ARM::VLD1d16QPseudo, 4909 ARM::VLD1d32QPseudo, 4910 ARM::VLD1d64QPseudo }; 4911 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowQPseudo_UPD, 4912 ARM::VLD1q16LowQPseudo_UPD, 4913 ARM::VLD1q32LowQPseudo_UPD, 4914 ARM::VLD1q64LowQPseudo_UPD }; 4915 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighQPseudo, 4916 ARM::VLD1q16HighQPseudo, 4917 ARM::VLD1q32HighQPseudo, 4918 ARM::VLD1q64HighQPseudo }; 4919 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 4920 return; 4921 } 4922 4923 case Intrinsic::arm_neon_vld2: { 4924 static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16, 4925 ARM::VLD2d32, ARM::VLD1q64 }; 4926 static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo, 4927 ARM::VLD2q32Pseudo }; 4928 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr); 4929 return; 4930 } 4931 4932 case Intrinsic::arm_neon_vld3: { 4933 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo, 4934 ARM::VLD3d16Pseudo, 4935 ARM::VLD3d32Pseudo, 4936 ARM::VLD1d64TPseudo }; 4937 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD, 4938 ARM::VLD3q16Pseudo_UPD, 4939 ARM::VLD3q32Pseudo_UPD }; 4940 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo, 4941 ARM::VLD3q16oddPseudo, 4942 ARM::VLD3q32oddPseudo }; 4943 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 4944 return; 4945 } 4946 4947 case Intrinsic::arm_neon_vld4: { 4948 static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo, 4949 ARM::VLD4d16Pseudo, 4950 ARM::VLD4d32Pseudo, 4951 ARM::VLD1d64QPseudo }; 4952 static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD, 4953 ARM::VLD4q16Pseudo_UPD, 4954 ARM::VLD4q32Pseudo_UPD }; 4955 static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo, 4956 ARM::VLD4q16oddPseudo, 4957 ARM::VLD4q32oddPseudo }; 4958 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 4959 return; 4960 } 4961 4962 case Intrinsic::arm_neon_vld2dup: { 4963 static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16, 4964 ARM::VLD2DUPd32, ARM::VLD1q64 }; 4965 static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo, 4966 ARM::VLD2DUPq16EvenPseudo, 4967 ARM::VLD2DUPq32EvenPseudo }; 4968 static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudo, 4969 ARM::VLD2DUPq16OddPseudo, 4970 ARM::VLD2DUPq32OddPseudo }; 4971 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 2, 4972 DOpcodes, QOpcodes0, QOpcodes1); 4973 return; 4974 } 4975 4976 case Intrinsic::arm_neon_vld3dup: { 4977 static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo, 4978 ARM::VLD3DUPd16Pseudo, 4979 ARM::VLD3DUPd32Pseudo, 4980 ARM::VLD1d64TPseudo }; 4981 static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo, 4982 ARM::VLD3DUPq16EvenPseudo, 4983 ARM::VLD3DUPq32EvenPseudo }; 4984 static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo, 4985 ARM::VLD3DUPq16OddPseudo, 4986 ARM::VLD3DUPq32OddPseudo }; 4987 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 3, 4988 DOpcodes, QOpcodes0, QOpcodes1); 4989 return; 4990 } 4991 4992 case Intrinsic::arm_neon_vld4dup: { 4993 static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo, 4994 ARM::VLD4DUPd16Pseudo, 4995 ARM::VLD4DUPd32Pseudo, 4996 ARM::VLD1d64QPseudo }; 4997 static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo, 4998 ARM::VLD4DUPq16EvenPseudo, 4999 ARM::VLD4DUPq32EvenPseudo }; 5000 static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo, 5001 ARM::VLD4DUPq16OddPseudo, 5002 ARM::VLD4DUPq32OddPseudo }; 5003 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 4, 5004 DOpcodes, QOpcodes0, QOpcodes1); 5005 return; 5006 } 5007 5008 case Intrinsic::arm_neon_vld2lane: { 5009 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo, 5010 ARM::VLD2LNd16Pseudo, 5011 ARM::VLD2LNd32Pseudo }; 5012 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo, 5013 ARM::VLD2LNq32Pseudo }; 5014 SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes); 5015 return; 5016 } 5017 5018 case Intrinsic::arm_neon_vld3lane: { 5019 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo, 5020 ARM::VLD3LNd16Pseudo, 5021 ARM::VLD3LNd32Pseudo }; 5022 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo, 5023 ARM::VLD3LNq32Pseudo }; 5024 SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes); 5025 return; 5026 } 5027 5028 case Intrinsic::arm_neon_vld4lane: { 5029 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo, 5030 ARM::VLD4LNd16Pseudo, 5031 ARM::VLD4LNd32Pseudo }; 5032 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo, 5033 ARM::VLD4LNq32Pseudo }; 5034 SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes); 5035 return; 5036 } 5037 5038 case Intrinsic::arm_neon_vst1: { 5039 static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16, 5040 ARM::VST1d32, ARM::VST1d64 }; 5041 static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16, 5042 ARM::VST1q32, ARM::VST1q64 }; 5043 SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr); 5044 return; 5045 } 5046 5047 case Intrinsic::arm_neon_vst1x2: { 5048 static const uint16_t DOpcodes[] = { ARM::VST1q8, ARM::VST1q16, 5049 ARM::VST1q32, ARM::VST1q64 }; 5050 static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudo, 5051 ARM::VST1d16QPseudo, 5052 ARM::VST1d32QPseudo, 5053 ARM::VST1d64QPseudo }; 5054 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr); 5055 return; 5056 } 5057 5058 case Intrinsic::arm_neon_vst1x3: { 5059 static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudo, 5060 ARM::VST1d16TPseudo, 5061 ARM::VST1d32TPseudo, 5062 ARM::VST1d64TPseudo }; 5063 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD, 5064 ARM::VST1q16LowTPseudo_UPD, 5065 ARM::VST1q32LowTPseudo_UPD, 5066 ARM::VST1q64LowTPseudo_UPD }; 5067 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo, 5068 ARM::VST1q16HighTPseudo, 5069 ARM::VST1q32HighTPseudo, 5070 ARM::VST1q64HighTPseudo }; 5071 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 5072 return; 5073 } 5074 5075 case Intrinsic::arm_neon_vst1x4: { 5076 static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudo, 5077 ARM::VST1d16QPseudo, 5078 ARM::VST1d32QPseudo, 5079 ARM::VST1d64QPseudo }; 5080 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD, 5081 ARM::VST1q16LowQPseudo_UPD, 5082 ARM::VST1q32LowQPseudo_UPD, 5083 ARM::VST1q64LowQPseudo_UPD }; 5084 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo, 5085 ARM::VST1q16HighQPseudo, 5086 ARM::VST1q32HighQPseudo, 5087 ARM::VST1q64HighQPseudo }; 5088 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 5089 return; 5090 } 5091 5092 case Intrinsic::arm_neon_vst2: { 5093 static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16, 5094 ARM::VST2d32, ARM::VST1q64 }; 5095 static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo, 5096 ARM::VST2q32Pseudo }; 5097 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr); 5098 return; 5099 } 5100 5101 case Intrinsic::arm_neon_vst3: { 5102 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo, 5103 ARM::VST3d16Pseudo, 5104 ARM::VST3d32Pseudo, 5105 ARM::VST1d64TPseudo }; 5106 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD, 5107 ARM::VST3q16Pseudo_UPD, 5108 ARM::VST3q32Pseudo_UPD }; 5109 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo, 5110 ARM::VST3q16oddPseudo, 5111 ARM::VST3q32oddPseudo }; 5112 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 5113 return; 5114 } 5115 5116 case Intrinsic::arm_neon_vst4: { 5117 static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo, 5118 ARM::VST4d16Pseudo, 5119 ARM::VST4d32Pseudo, 5120 ARM::VST1d64QPseudo }; 5121 static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD, 5122 ARM::VST4q16Pseudo_UPD, 5123 ARM::VST4q32Pseudo_UPD }; 5124 static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo, 5125 ARM::VST4q16oddPseudo, 5126 ARM::VST4q32oddPseudo }; 5127 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 5128 return; 5129 } 5130 5131 case Intrinsic::arm_neon_vst2lane: { 5132 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo, 5133 ARM::VST2LNd16Pseudo, 5134 ARM::VST2LNd32Pseudo }; 5135 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo, 5136 ARM::VST2LNq32Pseudo }; 5137 SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes); 5138 return; 5139 } 5140 5141 case Intrinsic::arm_neon_vst3lane: { 5142 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo, 5143 ARM::VST3LNd16Pseudo, 5144 ARM::VST3LNd32Pseudo }; 5145 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo, 5146 ARM::VST3LNq32Pseudo }; 5147 SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes); 5148 return; 5149 } 5150 5151 case Intrinsic::arm_neon_vst4lane: { 5152 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo, 5153 ARM::VST4LNd16Pseudo, 5154 ARM::VST4LNd32Pseudo }; 5155 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo, 5156 ARM::VST4LNq32Pseudo }; 5157 SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes); 5158 return; 5159 } 5160 5161 case Intrinsic::arm_mve_vldr_gather_base_wb: 5162 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated: { 5163 static const uint16_t Opcodes[] = {ARM::MVE_VLDRWU32_qi_pre, 5164 ARM::MVE_VLDRDU64_qi_pre}; 5165 SelectMVE_WB(N, Opcodes, 5166 IntNo == Intrinsic::arm_mve_vldr_gather_base_wb_predicated); 5167 return; 5168 } 5169 5170 case Intrinsic::arm_mve_vld2q: { 5171 static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8, ARM::MVE_VLD21_8}; 5172 static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16, 5173 ARM::MVE_VLD21_16}; 5174 static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32, 5175 ARM::MVE_VLD21_32}; 5176 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32}; 5177 SelectMVE_VLD(N, 2, Opcodes, false); 5178 return; 5179 } 5180 5181 case Intrinsic::arm_mve_vld4q: { 5182 static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8, 5183 ARM::MVE_VLD42_8, ARM::MVE_VLD43_8}; 5184 static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16, 5185 ARM::MVE_VLD42_16, 5186 ARM::MVE_VLD43_16}; 5187 static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32, 5188 ARM::MVE_VLD42_32, 5189 ARM::MVE_VLD43_32}; 5190 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32}; 5191 SelectMVE_VLD(N, 4, Opcodes, false); 5192 return; 5193 } 5194 } 5195 break; 5196 } 5197 5198 case ISD::INTRINSIC_WO_CHAIN: { 5199 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 5200 switch (IntNo) { 5201 default: 5202 break; 5203 5204 // Scalar f32 -> bf16 5205 case Intrinsic::arm_neon_vcvtbfp2bf: { 5206 SDLoc dl(N); 5207 const SDValue &Src = N->getOperand(1); 5208 llvm::EVT DestTy = N->getValueType(0); 5209 SDValue Pred = getAL(CurDAG, dl); 5210 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 5211 SDValue Ops[] = { Src, Src, Pred, Reg0 }; 5212 CurDAG->SelectNodeTo(N, ARM::BF16_VCVTB, DestTy, Ops); 5213 return; 5214 } 5215 5216 // Vector v4f32 -> v4bf16 5217 case Intrinsic::arm_neon_vcvtfp2bf: { 5218 SDLoc dl(N); 5219 const SDValue &Src = N->getOperand(1); 5220 SDValue Pred = getAL(CurDAG, dl); 5221 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 5222 SDValue Ops[] = { Src, Pred, Reg0 }; 5223 CurDAG->SelectNodeTo(N, ARM::BF16_VCVT, MVT::v4bf16, Ops); 5224 return; 5225 } 5226 5227 case Intrinsic::arm_mve_urshrl: 5228 SelectMVE_LongShift(N, ARM::MVE_URSHRL, true, false); 5229 return; 5230 case Intrinsic::arm_mve_uqshll: 5231 SelectMVE_LongShift(N, ARM::MVE_UQSHLL, true, false); 5232 return; 5233 case Intrinsic::arm_mve_srshrl: 5234 SelectMVE_LongShift(N, ARM::MVE_SRSHRL, true, false); 5235 return; 5236 case Intrinsic::arm_mve_sqshll: 5237 SelectMVE_LongShift(N, ARM::MVE_SQSHLL, true, false); 5238 return; 5239 case Intrinsic::arm_mve_uqrshll: 5240 SelectMVE_LongShift(N, ARM::MVE_UQRSHLL, false, true); 5241 return; 5242 case Intrinsic::arm_mve_sqrshrl: 5243 SelectMVE_LongShift(N, ARM::MVE_SQRSHRL, false, true); 5244 return; 5245 5246 case Intrinsic::arm_mve_vadc: 5247 case Intrinsic::arm_mve_vadc_predicated: 5248 SelectMVE_VADCSBC(N, ARM::MVE_VADC, ARM::MVE_VADCI, true, 5249 IntNo == Intrinsic::arm_mve_vadc_predicated); 5250 return; 5251 case Intrinsic::arm_mve_vsbc: 5252 case Intrinsic::arm_mve_vsbc_predicated: 5253 SelectMVE_VADCSBC(N, ARM::MVE_VSBC, ARM::MVE_VSBCI, true, 5254 IntNo == Intrinsic::arm_mve_vsbc_predicated); 5255 return; 5256 case Intrinsic::arm_mve_vshlc: 5257 case Intrinsic::arm_mve_vshlc_predicated: 5258 SelectMVE_VSHLC(N, IntNo == Intrinsic::arm_mve_vshlc_predicated); 5259 return; 5260 5261 case Intrinsic::arm_mve_vmlldava: 5262 case Intrinsic::arm_mve_vmlldava_predicated: { 5263 static const uint16_t OpcodesU[] = { 5264 ARM::MVE_VMLALDAVu16, ARM::MVE_VMLALDAVu32, 5265 ARM::MVE_VMLALDAVau16, ARM::MVE_VMLALDAVau32, 5266 }; 5267 static const uint16_t OpcodesS[] = { 5268 ARM::MVE_VMLALDAVs16, ARM::MVE_VMLALDAVs32, 5269 ARM::MVE_VMLALDAVas16, ARM::MVE_VMLALDAVas32, 5270 ARM::MVE_VMLALDAVxs16, ARM::MVE_VMLALDAVxs32, 5271 ARM::MVE_VMLALDAVaxs16, ARM::MVE_VMLALDAVaxs32, 5272 ARM::MVE_VMLSLDAVs16, ARM::MVE_VMLSLDAVs32, 5273 ARM::MVE_VMLSLDAVas16, ARM::MVE_VMLSLDAVas32, 5274 ARM::MVE_VMLSLDAVxs16, ARM::MVE_VMLSLDAVxs32, 5275 ARM::MVE_VMLSLDAVaxs16, ARM::MVE_VMLSLDAVaxs32, 5276 }; 5277 SelectMVE_VMLLDAV(N, IntNo == Intrinsic::arm_mve_vmlldava_predicated, 5278 OpcodesS, OpcodesU); 5279 return; 5280 } 5281 5282 case Intrinsic::arm_mve_vrmlldavha: 5283 case Intrinsic::arm_mve_vrmlldavha_predicated: { 5284 static const uint16_t OpcodesU[] = { 5285 ARM::MVE_VRMLALDAVHu32, ARM::MVE_VRMLALDAVHau32, 5286 }; 5287 static const uint16_t OpcodesS[] = { 5288 ARM::MVE_VRMLALDAVHs32, ARM::MVE_VRMLALDAVHas32, 5289 ARM::MVE_VRMLALDAVHxs32, ARM::MVE_VRMLALDAVHaxs32, 5290 ARM::MVE_VRMLSLDAVHs32, ARM::MVE_VRMLSLDAVHas32, 5291 ARM::MVE_VRMLSLDAVHxs32, ARM::MVE_VRMLSLDAVHaxs32, 5292 }; 5293 SelectMVE_VRMLLDAVH(N, IntNo == Intrinsic::arm_mve_vrmlldavha_predicated, 5294 OpcodesS, OpcodesU); 5295 return; 5296 } 5297 5298 case Intrinsic::arm_mve_vidup: 5299 case Intrinsic::arm_mve_vidup_predicated: { 5300 static const uint16_t Opcodes[] = { 5301 ARM::MVE_VIDUPu8, ARM::MVE_VIDUPu16, ARM::MVE_VIDUPu32, 5302 }; 5303 SelectMVE_VxDUP(N, Opcodes, false, 5304 IntNo == Intrinsic::arm_mve_vidup_predicated); 5305 return; 5306 } 5307 5308 case Intrinsic::arm_mve_vddup: 5309 case Intrinsic::arm_mve_vddup_predicated: { 5310 static const uint16_t Opcodes[] = { 5311 ARM::MVE_VDDUPu8, ARM::MVE_VDDUPu16, ARM::MVE_VDDUPu32, 5312 }; 5313 SelectMVE_VxDUP(N, Opcodes, false, 5314 IntNo == Intrinsic::arm_mve_vddup_predicated); 5315 return; 5316 } 5317 5318 case Intrinsic::arm_mve_viwdup: 5319 case Intrinsic::arm_mve_viwdup_predicated: { 5320 static const uint16_t Opcodes[] = { 5321 ARM::MVE_VIWDUPu8, ARM::MVE_VIWDUPu16, ARM::MVE_VIWDUPu32, 5322 }; 5323 SelectMVE_VxDUP(N, Opcodes, true, 5324 IntNo == Intrinsic::arm_mve_viwdup_predicated); 5325 return; 5326 } 5327 5328 case Intrinsic::arm_mve_vdwdup: 5329 case Intrinsic::arm_mve_vdwdup_predicated: { 5330 static const uint16_t Opcodes[] = { 5331 ARM::MVE_VDWDUPu8, ARM::MVE_VDWDUPu16, ARM::MVE_VDWDUPu32, 5332 }; 5333 SelectMVE_VxDUP(N, Opcodes, true, 5334 IntNo == Intrinsic::arm_mve_vdwdup_predicated); 5335 return; 5336 } 5337 5338 case Intrinsic::arm_cde_cx1d: 5339 case Intrinsic::arm_cde_cx1da: 5340 case Intrinsic::arm_cde_cx2d: 5341 case Intrinsic::arm_cde_cx2da: 5342 case Intrinsic::arm_cde_cx3d: 5343 case Intrinsic::arm_cde_cx3da: { 5344 bool HasAccum = IntNo == Intrinsic::arm_cde_cx1da || 5345 IntNo == Intrinsic::arm_cde_cx2da || 5346 IntNo == Intrinsic::arm_cde_cx3da; 5347 size_t NumExtraOps; 5348 uint16_t Opcode; 5349 switch (IntNo) { 5350 case Intrinsic::arm_cde_cx1d: 5351 case Intrinsic::arm_cde_cx1da: 5352 NumExtraOps = 0; 5353 Opcode = HasAccum ? ARM::CDE_CX1DA : ARM::CDE_CX1D; 5354 break; 5355 case Intrinsic::arm_cde_cx2d: 5356 case Intrinsic::arm_cde_cx2da: 5357 NumExtraOps = 1; 5358 Opcode = HasAccum ? ARM::CDE_CX2DA : ARM::CDE_CX2D; 5359 break; 5360 case Intrinsic::arm_cde_cx3d: 5361 case Intrinsic::arm_cde_cx3da: 5362 NumExtraOps = 2; 5363 Opcode = HasAccum ? ARM::CDE_CX3DA : ARM::CDE_CX3D; 5364 break; 5365 default: 5366 llvm_unreachable("Unexpected opcode"); 5367 } 5368 SelectCDE_CXxD(N, Opcode, NumExtraOps, HasAccum); 5369 return; 5370 } 5371 } 5372 break; 5373 } 5374 5375 case ISD::ATOMIC_CMP_SWAP: 5376 SelectCMP_SWAP(N); 5377 return; 5378 } 5379 5380 SelectCode(N); 5381 } 5382 5383 // Inspect a register string of the form 5384 // cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or 5385 // cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string 5386 // and obtain the integer operands from them, adding these operands to the 5387 // provided vector. 5388 static void getIntOperandsFromRegisterString(StringRef RegString, 5389 SelectionDAG *CurDAG, 5390 const SDLoc &DL, 5391 std::vector<SDValue> &Ops) { 5392 SmallVector<StringRef, 5> Fields; 5393 RegString.split(Fields, ':'); 5394 5395 if (Fields.size() > 1) { 5396 bool AllIntFields = true; 5397 5398 for (StringRef Field : Fields) { 5399 // Need to trim out leading 'cp' characters and get the integer field. 5400 unsigned IntField; 5401 AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField); 5402 Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32)); 5403 } 5404 5405 assert(AllIntFields && 5406 "Unexpected non-integer value in special register string."); 5407 (void)AllIntFields; 5408 } 5409 } 5410 5411 // Maps a Banked Register string to its mask value. The mask value returned is 5412 // for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register 5413 // mask operand, which expresses which register is to be used, e.g. r8, and in 5414 // which mode it is to be used, e.g. usr. Returns -1 to signify that the string 5415 // was invalid. 5416 static inline int getBankedRegisterMask(StringRef RegString) { 5417 auto TheReg = ARMBankedReg::lookupBankedRegByName(RegString.lower()); 5418 if (!TheReg) 5419 return -1; 5420 return TheReg->Encoding; 5421 } 5422 5423 // The flags here are common to those allowed for apsr in the A class cores and 5424 // those allowed for the special registers in the M class cores. Returns a 5425 // value representing which flags were present, -1 if invalid. 5426 static inline int getMClassFlagsMask(StringRef Flags) { 5427 return StringSwitch<int>(Flags) 5428 .Case("", 0x2) // no flags means nzcvq for psr registers, and 0x2 is 5429 // correct when flags are not permitted 5430 .Case("g", 0x1) 5431 .Case("nzcvq", 0x2) 5432 .Case("nzcvqg", 0x3) 5433 .Default(-1); 5434 } 5435 5436 // Maps MClass special registers string to its value for use in the 5437 // t2MRS_M/t2MSR_M instruction nodes as the SYSm value operand. 5438 // Returns -1 to signify that the string was invalid. 5439 static int getMClassRegisterMask(StringRef Reg, const ARMSubtarget *Subtarget) { 5440 auto TheReg = ARMSysReg::lookupMClassSysRegByName(Reg); 5441 const FeatureBitset &FeatureBits = Subtarget->getFeatureBits(); 5442 if (!TheReg || !TheReg->hasRequiredFeatures(FeatureBits)) 5443 return -1; 5444 return (int)(TheReg->Encoding & 0xFFF); // SYSm value 5445 } 5446 5447 static int getARClassRegisterMask(StringRef Reg, StringRef Flags) { 5448 // The mask operand contains the special register (R Bit) in bit 4, whether 5449 // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and 5450 // bits 3-0 contains the fields to be accessed in the special register, set by 5451 // the flags provided with the register. 5452 int Mask = 0; 5453 if (Reg == "apsr") { 5454 // The flags permitted for apsr are the same flags that are allowed in 5455 // M class registers. We get the flag value and then shift the flags into 5456 // the correct place to combine with the mask. 5457 Mask = getMClassFlagsMask(Flags); 5458 if (Mask == -1) 5459 return -1; 5460 return Mask << 2; 5461 } 5462 5463 if (Reg != "cpsr" && Reg != "spsr") { 5464 return -1; 5465 } 5466 5467 // This is the same as if the flags were "fc" 5468 if (Flags.empty() || Flags == "all") 5469 return Mask | 0x9; 5470 5471 // Inspect the supplied flags string and set the bits in the mask for 5472 // the relevant and valid flags allowed for cpsr and spsr. 5473 for (char Flag : Flags) { 5474 int FlagVal; 5475 switch (Flag) { 5476 case 'c': 5477 FlagVal = 0x1; 5478 break; 5479 case 'x': 5480 FlagVal = 0x2; 5481 break; 5482 case 's': 5483 FlagVal = 0x4; 5484 break; 5485 case 'f': 5486 FlagVal = 0x8; 5487 break; 5488 default: 5489 FlagVal = 0; 5490 } 5491 5492 // This avoids allowing strings where the same flag bit appears twice. 5493 if (!FlagVal || (Mask & FlagVal)) 5494 return -1; 5495 Mask |= FlagVal; 5496 } 5497 5498 // If the register is spsr then we need to set the R bit. 5499 if (Reg == "spsr") 5500 Mask |= 0x10; 5501 5502 return Mask; 5503 } 5504 5505 // Lower the read_register intrinsic to ARM specific DAG nodes 5506 // using the supplied metadata string to select the instruction node to use 5507 // and the registers/masks to construct as operands for the node. 5508 bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){ 5509 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1)); 5510 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0)); 5511 bool IsThumb2 = Subtarget->isThumb2(); 5512 SDLoc DL(N); 5513 5514 std::vector<SDValue> Ops; 5515 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops); 5516 5517 if (!Ops.empty()) { 5518 // If the special register string was constructed of fields (as defined 5519 // in the ACLE) then need to lower to MRC node (32 bit) or 5520 // MRRC node(64 bit), we can make the distinction based on the number of 5521 // operands we have. 5522 unsigned Opcode; 5523 SmallVector<EVT, 3> ResTypes; 5524 if (Ops.size() == 5){ 5525 Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC; 5526 ResTypes.append({ MVT::i32, MVT::Other }); 5527 } else { 5528 assert(Ops.size() == 3 && 5529 "Invalid number of fields in special register string."); 5530 Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC; 5531 ResTypes.append({ MVT::i32, MVT::i32, MVT::Other }); 5532 } 5533 5534 Ops.push_back(getAL(CurDAG, DL)); 5535 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 5536 Ops.push_back(N->getOperand(0)); 5537 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops)); 5538 return true; 5539 } 5540 5541 std::string SpecialReg = RegString->getString().lower(); 5542 5543 int BankedReg = getBankedRegisterMask(SpecialReg); 5544 if (BankedReg != -1) { 5545 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), 5546 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5547 N->getOperand(0) }; 5548 ReplaceNode( 5549 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked, 5550 DL, MVT::i32, MVT::Other, Ops)); 5551 return true; 5552 } 5553 5554 // The VFP registers are read by creating SelectionDAG nodes with opcodes 5555 // corresponding to the register that is being read from. So we switch on the 5556 // string to find which opcode we need to use. 5557 unsigned Opcode = StringSwitch<unsigned>(SpecialReg) 5558 .Case("fpscr", ARM::VMRS) 5559 .Case("fpexc", ARM::VMRS_FPEXC) 5560 .Case("fpsid", ARM::VMRS_FPSID) 5561 .Case("mvfr0", ARM::VMRS_MVFR0) 5562 .Case("mvfr1", ARM::VMRS_MVFR1) 5563 .Case("mvfr2", ARM::VMRS_MVFR2) 5564 .Case("fpinst", ARM::VMRS_FPINST) 5565 .Case("fpinst2", ARM::VMRS_FPINST2) 5566 .Default(0); 5567 5568 // If an opcode was found then we can lower the read to a VFP instruction. 5569 if (Opcode) { 5570 if (!Subtarget->hasVFP2Base()) 5571 return false; 5572 if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8Base()) 5573 return false; 5574 5575 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5576 N->getOperand(0) }; 5577 ReplaceNode(N, 5578 CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops)); 5579 return true; 5580 } 5581 5582 // If the target is M Class then need to validate that the register string 5583 // is an acceptable value, so check that a mask can be constructed from the 5584 // string. 5585 if (Subtarget->isMClass()) { 5586 int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget); 5587 if (SYSmValue == -1) 5588 return false; 5589 5590 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32), 5591 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5592 N->getOperand(0) }; 5593 ReplaceNode( 5594 N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops)); 5595 return true; 5596 } 5597 5598 // Here we know the target is not M Class so we need to check if it is one 5599 // of the remaining possible values which are apsr, cpsr or spsr. 5600 if (SpecialReg == "apsr" || SpecialReg == "cpsr") { 5601 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5602 N->getOperand(0) }; 5603 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS, 5604 DL, MVT::i32, MVT::Other, Ops)); 5605 return true; 5606 } 5607 5608 if (SpecialReg == "spsr") { 5609 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5610 N->getOperand(0) }; 5611 ReplaceNode( 5612 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL, 5613 MVT::i32, MVT::Other, Ops)); 5614 return true; 5615 } 5616 5617 return false; 5618 } 5619 5620 // Lower the write_register intrinsic to ARM specific DAG nodes 5621 // using the supplied metadata string to select the instruction node to use 5622 // and the registers/masks to use in the nodes 5623 bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){ 5624 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1)); 5625 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0)); 5626 bool IsThumb2 = Subtarget->isThumb2(); 5627 SDLoc DL(N); 5628 5629 std::vector<SDValue> Ops; 5630 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops); 5631 5632 if (!Ops.empty()) { 5633 // If the special register string was constructed of fields (as defined 5634 // in the ACLE) then need to lower to MCR node (32 bit) or 5635 // MCRR node(64 bit), we can make the distinction based on the number of 5636 // operands we have. 5637 unsigned Opcode; 5638 if (Ops.size() == 5) { 5639 Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR; 5640 Ops.insert(Ops.begin()+2, N->getOperand(2)); 5641 } else { 5642 assert(Ops.size() == 3 && 5643 "Invalid number of fields in special register string."); 5644 Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR; 5645 SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) }; 5646 Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2); 5647 } 5648 5649 Ops.push_back(getAL(CurDAG, DL)); 5650 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 5651 Ops.push_back(N->getOperand(0)); 5652 5653 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops)); 5654 return true; 5655 } 5656 5657 std::string SpecialReg = RegString->getString().lower(); 5658 int BankedReg = getBankedRegisterMask(SpecialReg); 5659 if (BankedReg != -1) { 5660 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2), 5661 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5662 N->getOperand(0) }; 5663 ReplaceNode( 5664 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked, 5665 DL, MVT::Other, Ops)); 5666 return true; 5667 } 5668 5669 // The VFP registers are written to by creating SelectionDAG nodes with 5670 // opcodes corresponding to the register that is being written. So we switch 5671 // on the string to find which opcode we need to use. 5672 unsigned Opcode = StringSwitch<unsigned>(SpecialReg) 5673 .Case("fpscr", ARM::VMSR) 5674 .Case("fpexc", ARM::VMSR_FPEXC) 5675 .Case("fpsid", ARM::VMSR_FPSID) 5676 .Case("fpinst", ARM::VMSR_FPINST) 5677 .Case("fpinst2", ARM::VMSR_FPINST2) 5678 .Default(0); 5679 5680 if (Opcode) { 5681 if (!Subtarget->hasVFP2Base()) 5682 return false; 5683 Ops = { N->getOperand(2), getAL(CurDAG, DL), 5684 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) }; 5685 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops)); 5686 return true; 5687 } 5688 5689 std::pair<StringRef, StringRef> Fields; 5690 Fields = StringRef(SpecialReg).rsplit('_'); 5691 std::string Reg = Fields.first.str(); 5692 StringRef Flags = Fields.second; 5693 5694 // If the target was M Class then need to validate the special register value 5695 // and retrieve the mask for use in the instruction node. 5696 if (Subtarget->isMClass()) { 5697 int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget); 5698 if (SYSmValue == -1) 5699 return false; 5700 5701 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32), 5702 N->getOperand(2), getAL(CurDAG, DL), 5703 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) }; 5704 ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops)); 5705 return true; 5706 } 5707 5708 // We then check to see if a valid mask can be constructed for one of the 5709 // register string values permitted for the A and R class cores. These values 5710 // are apsr, spsr and cpsr; these are also valid on older cores. 5711 int Mask = getARClassRegisterMask(Reg, Flags); 5712 if (Mask != -1) { 5713 Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2), 5714 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5715 N->getOperand(0) }; 5716 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR, 5717 DL, MVT::Other, Ops)); 5718 return true; 5719 } 5720 5721 return false; 5722 } 5723 5724 bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){ 5725 std::vector<SDValue> AsmNodeOperands; 5726 unsigned Flag, Kind; 5727 bool Changed = false; 5728 unsigned NumOps = N->getNumOperands(); 5729 5730 // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint. 5731 // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require 5732 // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs 5733 // respectively. Since there is no constraint to explicitly specify a 5734 // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb, 5735 // the 64-bit data may be referred by H, Q, R modifiers, so we still pack 5736 // them into a GPRPair. 5737 5738 SDLoc dl(N); 5739 SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps - 1) : SDValue(); 5740 5741 SmallVector<bool, 8> OpChanged; 5742 // Glue node will be appended late. 5743 for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) { 5744 SDValue op = N->getOperand(i); 5745 AsmNodeOperands.push_back(op); 5746 5747 if (i < InlineAsm::Op_FirstOperand) 5748 continue; 5749 5750 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) { 5751 Flag = C->getZExtValue(); 5752 Kind = InlineAsm::getKind(Flag); 5753 } 5754 else 5755 continue; 5756 5757 // Immediate operands to inline asm in the SelectionDAG are modeled with 5758 // two operands. The first is a constant of value InlineAsm::Kind_Imm, and 5759 // the second is a constant with the value of the immediate. If we get here 5760 // and we have a Kind_Imm, skip the next operand, and continue. 5761 if (Kind == InlineAsm::Kind_Imm) { 5762 SDValue op = N->getOperand(++i); 5763 AsmNodeOperands.push_back(op); 5764 continue; 5765 } 5766 5767 unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag); 5768 if (NumRegs) 5769 OpChanged.push_back(false); 5770 5771 unsigned DefIdx = 0; 5772 bool IsTiedToChangedOp = false; 5773 // If it's a use that is tied with a previous def, it has no 5774 // reg class constraint. 5775 if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx)) 5776 IsTiedToChangedOp = OpChanged[DefIdx]; 5777 5778 // Memory operands to inline asm in the SelectionDAG are modeled with two 5779 // operands: a constant of value InlineAsm::Kind_Mem followed by the input 5780 // operand. If we get here and we have a Kind_Mem, skip the next operand (so 5781 // it doesn't get misinterpreted), and continue. We do this here because 5782 // it's important to update the OpChanged array correctly before moving on. 5783 if (Kind == InlineAsm::Kind_Mem) { 5784 SDValue op = N->getOperand(++i); 5785 AsmNodeOperands.push_back(op); 5786 continue; 5787 } 5788 5789 if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef 5790 && Kind != InlineAsm::Kind_RegDefEarlyClobber) 5791 continue; 5792 5793 unsigned RC; 5794 bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC); 5795 if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID)) 5796 || NumRegs != 2) 5797 continue; 5798 5799 assert((i+2 < NumOps) && "Invalid number of operands in inline asm"); 5800 SDValue V0 = N->getOperand(i+1); 5801 SDValue V1 = N->getOperand(i+2); 5802 Register Reg0 = cast<RegisterSDNode>(V0)->getReg(); 5803 Register Reg1 = cast<RegisterSDNode>(V1)->getReg(); 5804 SDValue PairedReg; 5805 MachineRegisterInfo &MRI = MF->getRegInfo(); 5806 5807 if (Kind == InlineAsm::Kind_RegDef || 5808 Kind == InlineAsm::Kind_RegDefEarlyClobber) { 5809 // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to 5810 // the original GPRs. 5811 5812 Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass); 5813 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped); 5814 SDValue Chain = SDValue(N,0); 5815 5816 SDNode *GU = N->getGluedUser(); 5817 SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped, 5818 Chain.getValue(1)); 5819 5820 // Extract values from a GPRPair reg and copy to the original GPR reg. 5821 SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32, 5822 RegCopy); 5823 SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32, 5824 RegCopy); 5825 SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0, 5826 RegCopy.getValue(1)); 5827 SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1)); 5828 5829 // Update the original glue user. 5830 std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1); 5831 Ops.push_back(T1.getValue(1)); 5832 CurDAG->UpdateNodeOperands(GU, Ops); 5833 } 5834 else { 5835 // For Kind == InlineAsm::Kind_RegUse, we first copy two GPRs into a 5836 // GPRPair and then pass the GPRPair to the inline asm. 5837 SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain]; 5838 5839 // As REG_SEQ doesn't take RegisterSDNode, we copy them first. 5840 SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32, 5841 Chain.getValue(1)); 5842 SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32, 5843 T0.getValue(1)); 5844 SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0); 5845 5846 // Copy REG_SEQ into a GPRPair-typed VR and replace the original two 5847 // i32 VRs of inline asm with it. 5848 Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass); 5849 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped); 5850 Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1)); 5851 5852 AsmNodeOperands[InlineAsm::Op_InputChain] = Chain; 5853 Glue = Chain.getValue(1); 5854 } 5855 5856 Changed = true; 5857 5858 if(PairedReg.getNode()) { 5859 OpChanged[OpChanged.size() -1 ] = true; 5860 Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/); 5861 if (IsTiedToChangedOp) 5862 Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx); 5863 else 5864 Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID); 5865 // Replace the current flag. 5866 AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant( 5867 Flag, dl, MVT::i32); 5868 // Add the new register node and skip the original two GPRs. 5869 AsmNodeOperands.push_back(PairedReg); 5870 // Skip the next two GPRs. 5871 i += 2; 5872 } 5873 } 5874 5875 if (Glue.getNode()) 5876 AsmNodeOperands.push_back(Glue); 5877 if (!Changed) 5878 return false; 5879 5880 SDValue New = CurDAG->getNode(N->getOpcode(), SDLoc(N), 5881 CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands); 5882 New->setNodeId(-1); 5883 ReplaceNode(N, New.getNode()); 5884 return true; 5885 } 5886 5887 5888 bool ARMDAGToDAGISel:: 5889 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, 5890 std::vector<SDValue> &OutOps) { 5891 switch(ConstraintID) { 5892 default: 5893 llvm_unreachable("Unexpected asm memory constraint"); 5894 case InlineAsm::Constraint_m: 5895 case InlineAsm::Constraint_o: 5896 case InlineAsm::Constraint_Q: 5897 case InlineAsm::Constraint_Um: 5898 case InlineAsm::Constraint_Un: 5899 case InlineAsm::Constraint_Uq: 5900 case InlineAsm::Constraint_Us: 5901 case InlineAsm::Constraint_Ut: 5902 case InlineAsm::Constraint_Uv: 5903 case InlineAsm::Constraint_Uy: 5904 // Require the address to be in a register. That is safe for all ARM 5905 // variants and it is hard to do anything much smarter without knowing 5906 // how the operand is used. 5907 OutOps.push_back(Op); 5908 return false; 5909 } 5910 return true; 5911 } 5912 5913 /// createARMISelDag - This pass converts a legalized DAG into a 5914 /// ARM-specific DAG, ready for instruction scheduling. 5915 /// 5916 FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM, 5917 CodeGenOpt::Level OptLevel) { 5918 return new ARMDAGToDAGISel(TM, OptLevel); 5919 } 5920