1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines an instruction selector for the ARM target. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "ARM.h" 14 #include "ARMBaseInstrInfo.h" 15 #include "ARMTargetMachine.h" 16 #include "MCTargetDesc/ARMAddressingModes.h" 17 #include "Utils/ARMBaseInfo.h" 18 #include "llvm/ADT/APSInt.h" 19 #include "llvm/ADT/StringSwitch.h" 20 #include "llvm/CodeGen/MachineFrameInfo.h" 21 #include "llvm/CodeGen/MachineFunction.h" 22 #include "llvm/CodeGen/MachineInstrBuilder.h" 23 #include "llvm/CodeGen/MachineRegisterInfo.h" 24 #include "llvm/CodeGen/SelectionDAG.h" 25 #include "llvm/CodeGen/SelectionDAGISel.h" 26 #include "llvm/CodeGen/TargetLowering.h" 27 #include "llvm/IR/CallingConv.h" 28 #include "llvm/IR/Constants.h" 29 #include "llvm/IR/DerivedTypes.h" 30 #include "llvm/IR/Function.h" 31 #include "llvm/IR/Intrinsics.h" 32 #include "llvm/IR/IntrinsicsARM.h" 33 #include "llvm/IR/LLVMContext.h" 34 #include "llvm/Support/CommandLine.h" 35 #include "llvm/Support/Debug.h" 36 #include "llvm/Support/ErrorHandling.h" 37 #include "llvm/Target/TargetOptions.h" 38 #include <optional> 39 40 using namespace llvm; 41 42 #define DEBUG_TYPE "arm-isel" 43 #define PASS_NAME "ARM Instruction Selection" 44 45 static cl::opt<bool> 46 DisableShifterOp("disable-shifter-op", cl::Hidden, 47 cl::desc("Disable isel of shifter-op"), 48 cl::init(false)); 49 50 //===--------------------------------------------------------------------===// 51 /// ARMDAGToDAGISel - ARM specific code to select ARM machine 52 /// instructions for SelectionDAG operations. 53 /// 54 namespace { 55 56 class ARMDAGToDAGISel : public SelectionDAGISel { 57 /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can 58 /// make the right decision when generating code for different targets. 59 const ARMSubtarget *Subtarget; 60 61 public: 62 ARMDAGToDAGISel() = delete; 63 64 explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOptLevel OptLevel) 65 : SelectionDAGISel(tm, OptLevel) {} 66 67 bool runOnMachineFunction(MachineFunction &MF) override { 68 // Reset the subtarget each time through. 69 Subtarget = &MF.getSubtarget<ARMSubtarget>(); 70 SelectionDAGISel::runOnMachineFunction(MF); 71 return true; 72 } 73 74 void PreprocessISelDAG() override; 75 76 /// getI32Imm - Return a target constant of type i32 with the specified 77 /// value. 78 inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) { 79 return CurDAG->getTargetConstant(Imm, dl, MVT::i32); 80 } 81 82 void Select(SDNode *N) override; 83 84 /// Return true as some complex patterns, like those that call 85 /// canExtractShiftFromMul can modify the DAG inplace. 86 bool ComplexPatternFuncMutatesDAG() const override { return true; } 87 88 bool hasNoVMLxHazardUse(SDNode *N) const; 89 bool isShifterOpProfitable(const SDValue &Shift, 90 ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt); 91 bool SelectRegShifterOperand(SDValue N, SDValue &A, 92 SDValue &B, SDValue &C, 93 bool CheckProfitability = true); 94 bool SelectImmShifterOperand(SDValue N, SDValue &A, 95 SDValue &B, bool CheckProfitability = true); 96 bool SelectShiftRegShifterOperand(SDValue N, SDValue &A, SDValue &B, 97 SDValue &C) { 98 // Don't apply the profitability check 99 return SelectRegShifterOperand(N, A, B, C, false); 100 } 101 bool SelectShiftImmShifterOperand(SDValue N, SDValue &A, SDValue &B) { 102 // Don't apply the profitability check 103 return SelectImmShifterOperand(N, A, B, false); 104 } 105 bool SelectShiftImmShifterOperandOneUse(SDValue N, SDValue &A, SDValue &B) { 106 if (!N.hasOneUse()) 107 return false; 108 return SelectImmShifterOperand(N, A, B, false); 109 } 110 111 bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out); 112 113 bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); 114 bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc); 115 116 bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) { 117 const ConstantSDNode *CN = cast<ConstantSDNode>(N); 118 Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32); 119 Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32); 120 return true; 121 } 122 123 bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N, 124 SDValue &Offset, SDValue &Opc); 125 bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N, 126 SDValue &Offset, SDValue &Opc); 127 bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N, 128 SDValue &Offset, SDValue &Opc); 129 bool SelectAddrOffsetNone(SDValue N, SDValue &Base); 130 bool SelectAddrMode3(SDValue N, SDValue &Base, 131 SDValue &Offset, SDValue &Opc); 132 bool SelectAddrMode3Offset(SDNode *Op, SDValue N, 133 SDValue &Offset, SDValue &Opc); 134 bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, bool FP16); 135 bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset); 136 bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset); 137 bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align); 138 bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset); 139 140 bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label); 141 142 // Thumb Addressing Modes: 143 bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset); 144 bool SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, SDValue &Offset); 145 bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base, 146 SDValue &OffImm); 147 bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base, 148 SDValue &OffImm); 149 bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base, 150 SDValue &OffImm); 151 bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base, 152 SDValue &OffImm); 153 bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm); 154 template <unsigned Shift> 155 bool SelectTAddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm); 156 157 // Thumb 2 Addressing Modes: 158 bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); 159 template <unsigned Shift> 160 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, SDValue &OffImm); 161 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, 162 SDValue &OffImm); 163 bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N, 164 SDValue &OffImm); 165 template <unsigned Shift> 166 bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm); 167 bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm, 168 unsigned Shift); 169 template <unsigned Shift> 170 bool SelectT2AddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm); 171 bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base, 172 SDValue &OffReg, SDValue &ShImm); 173 bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm); 174 175 template<int Min, int Max> 176 bool SelectImmediateInRange(SDValue N, SDValue &OffImm); 177 178 inline bool is_so_imm(unsigned Imm) const { 179 return ARM_AM::getSOImmVal(Imm) != -1; 180 } 181 182 inline bool is_so_imm_not(unsigned Imm) const { 183 return ARM_AM::getSOImmVal(~Imm) != -1; 184 } 185 186 inline bool is_t2_so_imm(unsigned Imm) const { 187 return ARM_AM::getT2SOImmVal(Imm) != -1; 188 } 189 190 inline bool is_t2_so_imm_not(unsigned Imm) const { 191 return ARM_AM::getT2SOImmVal(~Imm) != -1; 192 } 193 194 // Include the pieces autogenerated from the target description. 195 #include "ARMGenDAGISel.inc" 196 197 private: 198 void transferMemOperands(SDNode *Src, SDNode *Dst); 199 200 /// Indexed (pre/post inc/dec) load matching code for ARM. 201 bool tryARMIndexedLoad(SDNode *N); 202 bool tryT1IndexedLoad(SDNode *N); 203 bool tryT2IndexedLoad(SDNode *N); 204 bool tryMVEIndexedLoad(SDNode *N); 205 bool tryFMULFixed(SDNode *N, SDLoc dl); 206 bool tryFP_TO_INT(SDNode *N, SDLoc dl); 207 bool transformFixedFloatingPointConversion(SDNode *N, SDNode *FMul, 208 bool IsUnsigned, 209 bool FixedToFloat); 210 211 /// SelectVLD - Select NEON load intrinsics. NumVecs should be 212 /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for 213 /// loads of D registers and even subregs and odd subregs of Q registers. 214 /// For NumVecs <= 2, QOpcodes1 is not used. 215 void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, 216 const uint16_t *DOpcodes, const uint16_t *QOpcodes0, 217 const uint16_t *QOpcodes1); 218 219 /// SelectVST - Select NEON store intrinsics. NumVecs should 220 /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for 221 /// stores of D registers and even subregs and odd subregs of Q registers. 222 /// For NumVecs <= 2, QOpcodes1 is not used. 223 void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, 224 const uint16_t *DOpcodes, const uint16_t *QOpcodes0, 225 const uint16_t *QOpcodes1); 226 227 /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should 228 /// be 2, 3 or 4. The opcode arrays specify the instructions used for 229 /// load/store of D registers and Q registers. 230 void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating, 231 unsigned NumVecs, const uint16_t *DOpcodes, 232 const uint16_t *QOpcodes); 233 234 /// Helper functions for setting up clusters of MVE predication operands. 235 template <typename SDValueVector> 236 void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 237 SDValue PredicateMask); 238 template <typename SDValueVector> 239 void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 240 SDValue PredicateMask, SDValue Inactive); 241 242 template <typename SDValueVector> 243 void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc); 244 template <typename SDValueVector> 245 void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, EVT InactiveTy); 246 247 /// SelectMVE_WB - Select MVE writeback load/store intrinsics. 248 void SelectMVE_WB(SDNode *N, const uint16_t *Opcodes, bool Predicated); 249 250 /// SelectMVE_LongShift - Select MVE 64-bit scalar shift intrinsics. 251 void SelectMVE_LongShift(SDNode *N, uint16_t Opcode, bool Immediate, 252 bool HasSaturationOperand); 253 254 /// SelectMVE_VADCSBC - Select MVE vector add/sub-with-carry intrinsics. 255 void SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry, 256 uint16_t OpcodeWithNoCarry, bool Add, bool Predicated); 257 258 /// SelectMVE_VSHLC - Select MVE intrinsics for a shift that carries between 259 /// vector lanes. 260 void SelectMVE_VSHLC(SDNode *N, bool Predicated); 261 262 /// Select long MVE vector reductions with two vector operands 263 /// Stride is the number of vector element widths the instruction can operate 264 /// on: 265 /// 2 for long non-rounding variants, vml{a,s}ldav[a][x]: [i16, i32] 266 /// 1 for long rounding variants: vrml{a,s}ldavh[a][x]: [i32] 267 /// Stride is used when addressing the OpcodesS array which contains multiple 268 /// opcodes for each element width. 269 /// TySize is the index into the list of element types listed above 270 void SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated, 271 const uint16_t *OpcodesS, const uint16_t *OpcodesU, 272 size_t Stride, size_t TySize); 273 274 /// Select a 64-bit MVE vector reduction with two vector operands 275 /// arm_mve_vmlldava_[predicated] 276 void SelectMVE_VMLLDAV(SDNode *N, bool Predicated, const uint16_t *OpcodesS, 277 const uint16_t *OpcodesU); 278 /// Select a 72-bit MVE vector rounding reduction with two vector operands 279 /// int_arm_mve_vrmlldavha[_predicated] 280 void SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated, const uint16_t *OpcodesS, 281 const uint16_t *OpcodesU); 282 283 /// SelectMVE_VLD - Select MVE interleaving load intrinsics. NumVecs 284 /// should be 2 or 4. The opcode array specifies the instructions 285 /// used for 8, 16 and 32-bit lane sizes respectively, and each 286 /// pointer points to a set of NumVecs sub-opcodes used for the 287 /// different stages (e.g. VLD20 versus VLD21) of each load family. 288 void SelectMVE_VLD(SDNode *N, unsigned NumVecs, 289 const uint16_t *const *Opcodes, bool HasWriteback); 290 291 /// SelectMVE_VxDUP - Select MVE incrementing-dup instructions. Opcodes is an 292 /// array of 3 elements for the 8, 16 and 32-bit lane sizes. 293 void SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes, 294 bool Wrapping, bool Predicated); 295 296 /// Select SelectCDE_CXxD - Select CDE dual-GPR instruction (one of CX1D, 297 /// CX1DA, CX2D, CX2DA, CX3, CX3DA). 298 /// \arg \c NumExtraOps number of extra operands besides the coprocossor, 299 /// the accumulator and the immediate operand, i.e. 0 300 /// for CX1*, 1 for CX2*, 2 for CX3* 301 /// \arg \c HasAccum whether the instruction has an accumulator operand 302 void SelectCDE_CXxD(SDNode *N, uint16_t Opcode, size_t NumExtraOps, 303 bool HasAccum); 304 305 /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs 306 /// should be 1, 2, 3 or 4. The opcode array specifies the instructions used 307 /// for loading D registers. 308 void SelectVLDDup(SDNode *N, bool IsIntrinsic, bool isUpdating, 309 unsigned NumVecs, const uint16_t *DOpcodes, 310 const uint16_t *QOpcodes0 = nullptr, 311 const uint16_t *QOpcodes1 = nullptr); 312 313 /// Try to select SBFX/UBFX instructions for ARM. 314 bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned); 315 316 bool tryInsertVectorElt(SDNode *N); 317 318 // Select special operations if node forms integer ABS pattern 319 bool tryABSOp(SDNode *N); 320 321 bool tryReadRegister(SDNode *N); 322 bool tryWriteRegister(SDNode *N); 323 324 bool tryInlineAsm(SDNode *N); 325 326 void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI); 327 328 void SelectCMP_SWAP(SDNode *N); 329 330 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 331 /// inline asm expressions. 332 bool SelectInlineAsmMemoryOperand(const SDValue &Op, 333 InlineAsm::ConstraintCode ConstraintID, 334 std::vector<SDValue> &OutOps) override; 335 336 // Form pairs of consecutive R, S, D, or Q registers. 337 SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1); 338 SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1); 339 SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1); 340 SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1); 341 342 // Form sequences of 4 consecutive S, D, or Q registers. 343 SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 344 SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 345 SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 346 347 // Get the alignment operand for a NEON VLD or VST instruction. 348 SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs, 349 bool is64BitVector); 350 351 /// Checks if N is a multiplication by a constant where we can extract out a 352 /// power of two from the constant so that it can be used in a shift, but only 353 /// if it simplifies the materialization of the constant. Returns true if it 354 /// is, and assigns to PowerOfTwo the power of two that should be extracted 355 /// out and to NewMulConst the new constant to be multiplied by. 356 bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift, 357 unsigned &PowerOfTwo, SDValue &NewMulConst) const; 358 359 /// Replace N with M in CurDAG, in a way that also ensures that M gets 360 /// selected when N would have been selected. 361 void replaceDAGValue(const SDValue &N, SDValue M); 362 }; 363 364 class ARMDAGToDAGISelLegacy : public SelectionDAGISelLegacy { 365 public: 366 static char ID; 367 ARMDAGToDAGISelLegacy(ARMBaseTargetMachine &tm, CodeGenOptLevel OptLevel) 368 : SelectionDAGISelLegacy( 369 ID, std::make_unique<ARMDAGToDAGISel>(tm, OptLevel)) {} 370 }; 371 } 372 373 char ARMDAGToDAGISelLegacy::ID = 0; 374 375 INITIALIZE_PASS(ARMDAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false) 376 377 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant 378 /// operand. If so Imm will receive the 32-bit value. 379 static bool isInt32Immediate(SDNode *N, unsigned &Imm) { 380 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) { 381 Imm = N->getAsZExtVal(); 382 return true; 383 } 384 return false; 385 } 386 387 // isInt32Immediate - This method tests to see if a constant operand. 388 // If so Imm will receive the 32 bit value. 389 static bool isInt32Immediate(SDValue N, unsigned &Imm) { 390 return isInt32Immediate(N.getNode(), Imm); 391 } 392 393 // isOpcWithIntImmediate - This method tests to see if the node is a specific 394 // opcode and that it has a immediate integer right operand. 395 // If so Imm will receive the 32 bit value. 396 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) { 397 return N->getOpcode() == Opc && 398 isInt32Immediate(N->getOperand(1).getNode(), Imm); 399 } 400 401 /// Check whether a particular node is a constant value representable as 402 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax). 403 /// 404 /// \param ScaledConstant [out] - On success, the pre-scaled constant value. 405 static bool isScaledConstantInRange(SDValue Node, int Scale, 406 int RangeMin, int RangeMax, 407 int &ScaledConstant) { 408 assert(Scale > 0 && "Invalid scale!"); 409 410 // Check that this is a constant. 411 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node); 412 if (!C) 413 return false; 414 415 ScaledConstant = (int) C->getZExtValue(); 416 if ((ScaledConstant % Scale) != 0) 417 return false; 418 419 ScaledConstant /= Scale; 420 return ScaledConstant >= RangeMin && ScaledConstant < RangeMax; 421 } 422 423 void ARMDAGToDAGISel::PreprocessISelDAG() { 424 if (!Subtarget->hasV6T2Ops()) 425 return; 426 427 bool isThumb2 = Subtarget->isThumb(); 428 // We use make_early_inc_range to avoid invalidation issues. 429 for (SDNode &N : llvm::make_early_inc_range(CurDAG->allnodes())) { 430 if (N.getOpcode() != ISD::ADD) 431 continue; 432 433 // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with 434 // leading zeros, followed by consecutive set bits, followed by 1 or 2 435 // trailing zeros, e.g. 1020. 436 // Transform the expression to 437 // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number 438 // of trailing zeros of c2. The left shift would be folded as an shifter 439 // operand of 'add' and the 'and' and 'srl' would become a bits extraction 440 // node (UBFX). 441 442 SDValue N0 = N.getOperand(0); 443 SDValue N1 = N.getOperand(1); 444 unsigned And_imm = 0; 445 if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) { 446 if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm)) 447 std::swap(N0, N1); 448 } 449 if (!And_imm) 450 continue; 451 452 // Check if the AND mask is an immediate of the form: 000.....1111111100 453 unsigned TZ = llvm::countr_zero(And_imm); 454 if (TZ != 1 && TZ != 2) 455 // Be conservative here. Shifter operands aren't always free. e.g. On 456 // Swift, left shifter operand of 1 / 2 for free but others are not. 457 // e.g. 458 // ubfx r3, r1, #16, #8 459 // ldr.w r3, [r0, r3, lsl #2] 460 // vs. 461 // mov.w r9, #1020 462 // and.w r2, r9, r1, lsr #14 463 // ldr r2, [r0, r2] 464 continue; 465 And_imm >>= TZ; 466 if (And_imm & (And_imm + 1)) 467 continue; 468 469 // Look for (and (srl X, c1), c2). 470 SDValue Srl = N1.getOperand(0); 471 unsigned Srl_imm = 0; 472 if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) || 473 (Srl_imm <= 2)) 474 continue; 475 476 // Make sure first operand is not a shifter operand which would prevent 477 // folding of the left shift. 478 SDValue CPTmp0; 479 SDValue CPTmp1; 480 SDValue CPTmp2; 481 if (isThumb2) { 482 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1)) 483 continue; 484 } else { 485 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) || 486 SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2)) 487 continue; 488 } 489 490 // Now make the transformation. 491 Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32, 492 Srl.getOperand(0), 493 CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl), 494 MVT::i32)); 495 N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32, 496 Srl, 497 CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32)); 498 N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32, 499 N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32)); 500 CurDAG->UpdateNodeOperands(&N, N0, N1); 501 } 502 } 503 504 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS 505 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at 506 /// least on current ARM implementations) which should be avoidded. 507 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const { 508 if (OptLevel == CodeGenOptLevel::None) 509 return true; 510 511 if (!Subtarget->hasVMLxHazards()) 512 return true; 513 514 if (!N->hasOneUse()) 515 return false; 516 517 SDNode *Use = *N->use_begin(); 518 if (Use->getOpcode() == ISD::CopyToReg) 519 return true; 520 if (Use->isMachineOpcode()) { 521 const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>( 522 CurDAG->getSubtarget().getInstrInfo()); 523 524 const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode()); 525 if (MCID.mayStore()) 526 return true; 527 unsigned Opcode = MCID.getOpcode(); 528 if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD) 529 return true; 530 // vmlx feeding into another vmlx. We actually want to unfold 531 // the use later in the MLxExpansion pass. e.g. 532 // vmla 533 // vmla (stall 8 cycles) 534 // 535 // vmul (5 cycles) 536 // vadd (5 cycles) 537 // vmla 538 // This adds up to about 18 - 19 cycles. 539 // 540 // vmla 541 // vmul (stall 4 cycles) 542 // vadd adds up to about 14 cycles. 543 return TII->isFpMLxInstruction(Opcode); 544 } 545 546 return false; 547 } 548 549 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift, 550 ARM_AM::ShiftOpc ShOpcVal, 551 unsigned ShAmt) { 552 if (!Subtarget->isLikeA9() && !Subtarget->isSwift()) 553 return true; 554 if (Shift.hasOneUse()) 555 return true; 556 // R << 2 is free. 557 return ShOpcVal == ARM_AM::lsl && 558 (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1)); 559 } 560 561 bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N, 562 unsigned MaxShift, 563 unsigned &PowerOfTwo, 564 SDValue &NewMulConst) const { 565 assert(N.getOpcode() == ISD::MUL); 566 assert(MaxShift > 0); 567 568 // If the multiply is used in more than one place then changing the constant 569 // will make other uses incorrect, so don't. 570 if (!N.hasOneUse()) return false; 571 // Check if the multiply is by a constant 572 ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1)); 573 if (!MulConst) return false; 574 // If the constant is used in more than one place then modifying it will mean 575 // we need to materialize two constants instead of one, which is a bad idea. 576 if (!MulConst->hasOneUse()) return false; 577 unsigned MulConstVal = MulConst->getZExtValue(); 578 if (MulConstVal == 0) return false; 579 580 // Find the largest power of 2 that MulConstVal is a multiple of 581 PowerOfTwo = MaxShift; 582 while ((MulConstVal % (1 << PowerOfTwo)) != 0) { 583 --PowerOfTwo; 584 if (PowerOfTwo == 0) return false; 585 } 586 587 // Only optimise if the new cost is better 588 unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo); 589 NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32); 590 unsigned OldCost = ConstantMaterializationCost(MulConstVal, Subtarget); 591 unsigned NewCost = ConstantMaterializationCost(NewMulConstVal, Subtarget); 592 return NewCost < OldCost; 593 } 594 595 void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) { 596 CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode()); 597 ReplaceUses(N, M); 598 } 599 600 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N, 601 SDValue &BaseReg, 602 SDValue &Opc, 603 bool CheckProfitability) { 604 if (DisableShifterOp) 605 return false; 606 607 // If N is a multiply-by-constant and it's profitable to extract a shift and 608 // use it in a shifted operand do so. 609 if (N.getOpcode() == ISD::MUL) { 610 unsigned PowerOfTwo = 0; 611 SDValue NewMulConst; 612 if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) { 613 HandleSDNode Handle(N); 614 SDLoc Loc(N); 615 replaceDAGValue(N.getOperand(1), NewMulConst); 616 BaseReg = Handle.getValue(); 617 Opc = CurDAG->getTargetConstant( 618 ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32); 619 return true; 620 } 621 } 622 623 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 624 625 // Don't match base register only case. That is matched to a separate 626 // lower complexity pattern with explicit register operand. 627 if (ShOpcVal == ARM_AM::no_shift) return false; 628 629 BaseReg = N.getOperand(0); 630 unsigned ShImmVal = 0; 631 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 632 if (!RHS) return false; 633 ShImmVal = RHS->getZExtValue() & 31; 634 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), 635 SDLoc(N), MVT::i32); 636 return true; 637 } 638 639 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N, 640 SDValue &BaseReg, 641 SDValue &ShReg, 642 SDValue &Opc, 643 bool CheckProfitability) { 644 if (DisableShifterOp) 645 return false; 646 647 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 648 649 // Don't match base register only case. That is matched to a separate 650 // lower complexity pattern with explicit register operand. 651 if (ShOpcVal == ARM_AM::no_shift) return false; 652 653 BaseReg = N.getOperand(0); 654 unsigned ShImmVal = 0; 655 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 656 if (RHS) return false; 657 658 ShReg = N.getOperand(1); 659 if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal)) 660 return false; 661 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), 662 SDLoc(N), MVT::i32); 663 return true; 664 } 665 666 // Determine whether an ISD::OR's operands are suitable to turn the operation 667 // into an addition, which often has more compact encodings. 668 bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) { 669 assert(Parent->getOpcode() == ISD::OR && "unexpected parent"); 670 Out = N; 671 return CurDAG->haveNoCommonBitsSet(N, Parent->getOperand(1)); 672 } 673 674 675 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N, 676 SDValue &Base, 677 SDValue &OffImm) { 678 // Match simple R + imm12 operands. 679 680 // Base only. 681 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 682 !CurDAG->isBaseWithConstantOffset(N)) { 683 if (N.getOpcode() == ISD::FrameIndex) { 684 // Match frame index. 685 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 686 Base = CurDAG->getTargetFrameIndex( 687 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 688 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 689 return true; 690 } 691 692 if (N.getOpcode() == ARMISD::Wrapper && 693 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 694 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 695 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 696 Base = N.getOperand(0); 697 } else 698 Base = N; 699 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 700 return true; 701 } 702 703 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 704 int RHSC = (int)RHS->getSExtValue(); 705 if (N.getOpcode() == ISD::SUB) 706 RHSC = -RHSC; 707 708 if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits 709 Base = N.getOperand(0); 710 if (Base.getOpcode() == ISD::FrameIndex) { 711 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 712 Base = CurDAG->getTargetFrameIndex( 713 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 714 } 715 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 716 return true; 717 } 718 } 719 720 // Base only. 721 Base = N; 722 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 723 return true; 724 } 725 726 727 728 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, 729 SDValue &Opc) { 730 if (N.getOpcode() == ISD::MUL && 731 ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) { 732 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 733 // X * [3,5,9] -> X + X * [2,4,8] etc. 734 int RHSC = (int)RHS->getZExtValue(); 735 if (RHSC & 1) { 736 RHSC = RHSC & ~1; 737 ARM_AM::AddrOpc AddSub = ARM_AM::add; 738 if (RHSC < 0) { 739 AddSub = ARM_AM::sub; 740 RHSC = - RHSC; 741 } 742 if (isPowerOf2_32(RHSC)) { 743 unsigned ShAmt = Log2_32(RHSC); 744 Base = Offset = N.getOperand(0); 745 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, 746 ARM_AM::lsl), 747 SDLoc(N), MVT::i32); 748 return true; 749 } 750 } 751 } 752 } 753 754 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 755 // ISD::OR that is equivalent to an ISD::ADD. 756 !CurDAG->isBaseWithConstantOffset(N)) 757 return false; 758 759 // Leave simple R +/- imm12 operands for LDRi12 760 if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) { 761 int RHSC; 762 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1, 763 -0x1000+1, 0x1000, RHSC)) // 12 bits. 764 return false; 765 } 766 767 // Otherwise this is R +/- [possibly shifted] R. 768 ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add; 769 ARM_AM::ShiftOpc ShOpcVal = 770 ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode()); 771 unsigned ShAmt = 0; 772 773 Base = N.getOperand(0); 774 Offset = N.getOperand(1); 775 776 if (ShOpcVal != ARM_AM::no_shift) { 777 // Check to see if the RHS of the shift is a constant, if not, we can't fold 778 // it. 779 if (ConstantSDNode *Sh = 780 dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) { 781 ShAmt = Sh->getZExtValue(); 782 if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt)) 783 Offset = N.getOperand(1).getOperand(0); 784 else { 785 ShAmt = 0; 786 ShOpcVal = ARM_AM::no_shift; 787 } 788 } else { 789 ShOpcVal = ARM_AM::no_shift; 790 } 791 } 792 793 // Try matching (R shl C) + (R). 794 if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift && 795 !(Subtarget->isLikeA9() || Subtarget->isSwift() || 796 N.getOperand(0).hasOneUse())) { 797 ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode()); 798 if (ShOpcVal != ARM_AM::no_shift) { 799 // Check to see if the RHS of the shift is a constant, if not, we can't 800 // fold it. 801 if (ConstantSDNode *Sh = 802 dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) { 803 ShAmt = Sh->getZExtValue(); 804 if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) { 805 Offset = N.getOperand(0).getOperand(0); 806 Base = N.getOperand(1); 807 } else { 808 ShAmt = 0; 809 ShOpcVal = ARM_AM::no_shift; 810 } 811 } else { 812 ShOpcVal = ARM_AM::no_shift; 813 } 814 } 815 } 816 817 // If Offset is a multiply-by-constant and it's profitable to extract a shift 818 // and use it in a shifted operand do so. 819 if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) { 820 unsigned PowerOfTwo = 0; 821 SDValue NewMulConst; 822 if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) { 823 HandleSDNode Handle(Offset); 824 replaceDAGValue(Offset.getOperand(1), NewMulConst); 825 Offset = Handle.getValue(); 826 ShAmt = PowerOfTwo; 827 ShOpcVal = ARM_AM::lsl; 828 } 829 } 830 831 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal), 832 SDLoc(N), MVT::i32); 833 return true; 834 } 835 836 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N, 837 SDValue &Offset, SDValue &Opc) { 838 unsigned Opcode = Op->getOpcode(); 839 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 840 ? cast<LoadSDNode>(Op)->getAddressingMode() 841 : cast<StoreSDNode>(Op)->getAddressingMode(); 842 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 843 ? ARM_AM::add : ARM_AM::sub; 844 int Val; 845 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) 846 return false; 847 848 Offset = N; 849 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 850 unsigned ShAmt = 0; 851 if (ShOpcVal != ARM_AM::no_shift) { 852 // Check to see if the RHS of the shift is a constant, if not, we can't fold 853 // it. 854 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 855 ShAmt = Sh->getZExtValue(); 856 if (isShifterOpProfitable(N, ShOpcVal, ShAmt)) 857 Offset = N.getOperand(0); 858 else { 859 ShAmt = 0; 860 ShOpcVal = ARM_AM::no_shift; 861 } 862 } else { 863 ShOpcVal = ARM_AM::no_shift; 864 } 865 } 866 867 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal), 868 SDLoc(N), MVT::i32); 869 return true; 870 } 871 872 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N, 873 SDValue &Offset, SDValue &Opc) { 874 unsigned Opcode = Op->getOpcode(); 875 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 876 ? cast<LoadSDNode>(Op)->getAddressingMode() 877 : cast<StoreSDNode>(Op)->getAddressingMode(); 878 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 879 ? ARM_AM::add : ARM_AM::sub; 880 int Val; 881 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits. 882 if (AddSub == ARM_AM::sub) Val *= -1; 883 Offset = CurDAG->getRegister(0, MVT::i32); 884 Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32); 885 return true; 886 } 887 888 return false; 889 } 890 891 892 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N, 893 SDValue &Offset, SDValue &Opc) { 894 unsigned Opcode = Op->getOpcode(); 895 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 896 ? cast<LoadSDNode>(Op)->getAddressingMode() 897 : cast<StoreSDNode>(Op)->getAddressingMode(); 898 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 899 ? ARM_AM::add : ARM_AM::sub; 900 int Val; 901 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits. 902 Offset = CurDAG->getRegister(0, MVT::i32); 903 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val, 904 ARM_AM::no_shift), 905 SDLoc(Op), MVT::i32); 906 return true; 907 } 908 909 return false; 910 } 911 912 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) { 913 Base = N; 914 return true; 915 } 916 917 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N, 918 SDValue &Base, SDValue &Offset, 919 SDValue &Opc) { 920 if (N.getOpcode() == ISD::SUB) { 921 // X - C is canonicalize to X + -C, no need to handle it here. 922 Base = N.getOperand(0); 923 Offset = N.getOperand(1); 924 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N), 925 MVT::i32); 926 return true; 927 } 928 929 if (!CurDAG->isBaseWithConstantOffset(N)) { 930 Base = N; 931 if (N.getOpcode() == ISD::FrameIndex) { 932 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 933 Base = CurDAG->getTargetFrameIndex( 934 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 935 } 936 Offset = CurDAG->getRegister(0, MVT::i32); 937 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N), 938 MVT::i32); 939 return true; 940 } 941 942 // If the RHS is +/- imm8, fold into addr mode. 943 int RHSC; 944 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1, 945 -256 + 1, 256, RHSC)) { // 8 bits. 946 Base = N.getOperand(0); 947 if (Base.getOpcode() == ISD::FrameIndex) { 948 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 949 Base = CurDAG->getTargetFrameIndex( 950 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 951 } 952 Offset = CurDAG->getRegister(0, MVT::i32); 953 954 ARM_AM::AddrOpc AddSub = ARM_AM::add; 955 if (RHSC < 0) { 956 AddSub = ARM_AM::sub; 957 RHSC = -RHSC; 958 } 959 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N), 960 MVT::i32); 961 return true; 962 } 963 964 Base = N.getOperand(0); 965 Offset = N.getOperand(1); 966 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N), 967 MVT::i32); 968 return true; 969 } 970 971 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N, 972 SDValue &Offset, SDValue &Opc) { 973 unsigned Opcode = Op->getOpcode(); 974 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 975 ? cast<LoadSDNode>(Op)->getAddressingMode() 976 : cast<StoreSDNode>(Op)->getAddressingMode(); 977 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 978 ? ARM_AM::add : ARM_AM::sub; 979 int Val; 980 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits. 981 Offset = CurDAG->getRegister(0, MVT::i32); 982 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op), 983 MVT::i32); 984 return true; 985 } 986 987 Offset = N; 988 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op), 989 MVT::i32); 990 return true; 991 } 992 993 bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, 994 bool FP16) { 995 if (!CurDAG->isBaseWithConstantOffset(N)) { 996 Base = N; 997 if (N.getOpcode() == ISD::FrameIndex) { 998 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 999 Base = CurDAG->getTargetFrameIndex( 1000 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1001 } else if (N.getOpcode() == ARMISD::Wrapper && 1002 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 1003 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 1004 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 1005 Base = N.getOperand(0); 1006 } 1007 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0), 1008 SDLoc(N), MVT::i32); 1009 return true; 1010 } 1011 1012 // If the RHS is +/- imm8, fold into addr mode. 1013 int RHSC; 1014 const int Scale = FP16 ? 2 : 4; 1015 1016 if (isScaledConstantInRange(N.getOperand(1), Scale, -255, 256, RHSC)) { 1017 Base = N.getOperand(0); 1018 if (Base.getOpcode() == ISD::FrameIndex) { 1019 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1020 Base = CurDAG->getTargetFrameIndex( 1021 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1022 } 1023 1024 ARM_AM::AddrOpc AddSub = ARM_AM::add; 1025 if (RHSC < 0) { 1026 AddSub = ARM_AM::sub; 1027 RHSC = -RHSC; 1028 } 1029 1030 if (FP16) 1031 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(AddSub, RHSC), 1032 SDLoc(N), MVT::i32); 1033 else 1034 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC), 1035 SDLoc(N), MVT::i32); 1036 1037 return true; 1038 } 1039 1040 Base = N; 1041 1042 if (FP16) 1043 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(ARM_AM::add, 0), 1044 SDLoc(N), MVT::i32); 1045 else 1046 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0), 1047 SDLoc(N), MVT::i32); 1048 1049 return true; 1050 } 1051 1052 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N, 1053 SDValue &Base, SDValue &Offset) { 1054 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ false); 1055 } 1056 1057 bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N, 1058 SDValue &Base, SDValue &Offset) { 1059 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ true); 1060 } 1061 1062 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr, 1063 SDValue &Align) { 1064 Addr = N; 1065 1066 unsigned Alignment = 0; 1067 1068 MemSDNode *MemN = cast<MemSDNode>(Parent); 1069 1070 if (isa<LSBaseSDNode>(MemN) || 1071 ((MemN->getOpcode() == ARMISD::VST1_UPD || 1072 MemN->getOpcode() == ARMISD::VLD1_UPD) && 1073 MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) { 1074 // This case occurs only for VLD1-lane/dup and VST1-lane instructions. 1075 // The maximum alignment is equal to the memory size being referenced. 1076 llvm::Align MMOAlign = MemN->getAlign(); 1077 unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8; 1078 if (MMOAlign.value() >= MemSize && MemSize > 1) 1079 Alignment = MemSize; 1080 } else { 1081 // All other uses of addrmode6 are for intrinsics. For now just record 1082 // the raw alignment value; it will be refined later based on the legal 1083 // alignment operands for the intrinsic. 1084 Alignment = MemN->getAlign().value(); 1085 } 1086 1087 Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32); 1088 return true; 1089 } 1090 1091 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N, 1092 SDValue &Offset) { 1093 LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op); 1094 ISD::MemIndexedMode AM = LdSt->getAddressingMode(); 1095 if (AM != ISD::POST_INC) 1096 return false; 1097 Offset = N; 1098 if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) { 1099 if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits()) 1100 Offset = CurDAG->getRegister(0, MVT::i32); 1101 } 1102 return true; 1103 } 1104 1105 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N, 1106 SDValue &Offset, SDValue &Label) { 1107 if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) { 1108 Offset = N.getOperand(0); 1109 SDValue N1 = N.getOperand(1); 1110 Label = CurDAG->getTargetConstant(N1->getAsZExtVal(), SDLoc(N), MVT::i32); 1111 return true; 1112 } 1113 1114 return false; 1115 } 1116 1117 1118 //===----------------------------------------------------------------------===// 1119 // Thumb Addressing Modes 1120 //===----------------------------------------------------------------------===// 1121 1122 static bool shouldUseZeroOffsetLdSt(SDValue N) { 1123 // Negative numbers are difficult to materialise in thumb1. If we are 1124 // selecting the add of a negative, instead try to select ri with a zero 1125 // offset, so create the add node directly which will become a sub. 1126 if (N.getOpcode() != ISD::ADD) 1127 return false; 1128 1129 // Look for an imm which is not legal for ld/st, but is legal for sub. 1130 if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1))) 1131 return C->getSExtValue() < 0 && C->getSExtValue() >= -255; 1132 1133 return false; 1134 } 1135 1136 bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, 1137 SDValue &Offset) { 1138 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) { 1139 if (!isNullConstant(N)) 1140 return false; 1141 1142 Base = Offset = N; 1143 return true; 1144 } 1145 1146 Base = N.getOperand(0); 1147 Offset = N.getOperand(1); 1148 return true; 1149 } 1150 1151 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, SDValue &Base, 1152 SDValue &Offset) { 1153 if (shouldUseZeroOffsetLdSt(N)) 1154 return false; // Select ri instead 1155 return SelectThumbAddrModeRRSext(N, Base, Offset); 1156 } 1157 1158 bool 1159 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, 1160 SDValue &Base, SDValue &OffImm) { 1161 if (shouldUseZeroOffsetLdSt(N)) { 1162 Base = N; 1163 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1164 return true; 1165 } 1166 1167 if (!CurDAG->isBaseWithConstantOffset(N)) { 1168 if (N.getOpcode() == ISD::ADD) { 1169 return false; // We want to select register offset instead 1170 } else if (N.getOpcode() == ARMISD::Wrapper && 1171 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 1172 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 1173 N.getOperand(0).getOpcode() != ISD::TargetConstantPool && 1174 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 1175 Base = N.getOperand(0); 1176 } else { 1177 Base = N; 1178 } 1179 1180 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1181 return true; 1182 } 1183 1184 // If the RHS is + imm5 * scale, fold into addr mode. 1185 int RHSC; 1186 if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) { 1187 Base = N.getOperand(0); 1188 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1189 return true; 1190 } 1191 1192 // Offset is too large, so use register offset instead. 1193 return false; 1194 } 1195 1196 bool 1197 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base, 1198 SDValue &OffImm) { 1199 return SelectThumbAddrModeImm5S(N, 4, Base, OffImm); 1200 } 1201 1202 bool 1203 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base, 1204 SDValue &OffImm) { 1205 return SelectThumbAddrModeImm5S(N, 2, Base, OffImm); 1206 } 1207 1208 bool 1209 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base, 1210 SDValue &OffImm) { 1211 return SelectThumbAddrModeImm5S(N, 1, Base, OffImm); 1212 } 1213 1214 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N, 1215 SDValue &Base, SDValue &OffImm) { 1216 if (N.getOpcode() == ISD::FrameIndex) { 1217 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 1218 // Only multiples of 4 are allowed for the offset, so the frame object 1219 // alignment must be at least 4. 1220 MachineFrameInfo &MFI = MF->getFrameInfo(); 1221 if (MFI.getObjectAlign(FI) < Align(4)) 1222 MFI.setObjectAlignment(FI, Align(4)); 1223 Base = CurDAG->getTargetFrameIndex( 1224 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1225 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1226 return true; 1227 } 1228 1229 if (!CurDAG->isBaseWithConstantOffset(N)) 1230 return false; 1231 1232 if (N.getOperand(0).getOpcode() == ISD::FrameIndex) { 1233 // If the RHS is + imm8 * scale, fold into addr mode. 1234 int RHSC; 1235 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) { 1236 Base = N.getOperand(0); 1237 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1238 // Make sure the offset is inside the object, or we might fail to 1239 // allocate an emergency spill slot. (An out-of-range access is UB, but 1240 // it could show up anyway.) 1241 MachineFrameInfo &MFI = MF->getFrameInfo(); 1242 if (RHSC * 4 < MFI.getObjectSize(FI)) { 1243 // For LHS+RHS to result in an offset that's a multiple of 4 the object 1244 // indexed by the LHS must be 4-byte aligned. 1245 if (!MFI.isFixedObjectIndex(FI) && MFI.getObjectAlign(FI) < Align(4)) 1246 MFI.setObjectAlignment(FI, Align(4)); 1247 if (MFI.getObjectAlign(FI) >= Align(4)) { 1248 Base = CurDAG->getTargetFrameIndex( 1249 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1250 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1251 return true; 1252 } 1253 } 1254 } 1255 } 1256 1257 return false; 1258 } 1259 1260 template <unsigned Shift> 1261 bool ARMDAGToDAGISel::SelectTAddrModeImm7(SDValue N, SDValue &Base, 1262 SDValue &OffImm) { 1263 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) { 1264 int RHSC; 1265 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80, 1266 RHSC)) { 1267 Base = N.getOperand(0); 1268 if (N.getOpcode() == ISD::SUB) 1269 RHSC = -RHSC; 1270 OffImm = 1271 CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32); 1272 return true; 1273 } 1274 } 1275 1276 // Base only. 1277 Base = N; 1278 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1279 return true; 1280 } 1281 1282 1283 //===----------------------------------------------------------------------===// 1284 // Thumb 2 Addressing Modes 1285 //===----------------------------------------------------------------------===// 1286 1287 1288 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N, 1289 SDValue &Base, SDValue &OffImm) { 1290 // Match simple R + imm12 operands. 1291 1292 // Base only. 1293 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 1294 !CurDAG->isBaseWithConstantOffset(N)) { 1295 if (N.getOpcode() == ISD::FrameIndex) { 1296 // Match frame index. 1297 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 1298 Base = CurDAG->getTargetFrameIndex( 1299 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1300 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1301 return true; 1302 } 1303 1304 if (N.getOpcode() == ARMISD::Wrapper && 1305 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 1306 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 1307 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 1308 Base = N.getOperand(0); 1309 if (Base.getOpcode() == ISD::TargetConstantPool) 1310 return false; // We want to select t2LDRpci instead. 1311 } else 1312 Base = N; 1313 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1314 return true; 1315 } 1316 1317 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1318 if (SelectT2AddrModeImm8(N, Base, OffImm)) 1319 // Let t2LDRi8 handle (R - imm8). 1320 return false; 1321 1322 int RHSC = (int)RHS->getZExtValue(); 1323 if (N.getOpcode() == ISD::SUB) 1324 RHSC = -RHSC; 1325 1326 if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned) 1327 Base = N.getOperand(0); 1328 if (Base.getOpcode() == ISD::FrameIndex) { 1329 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1330 Base = CurDAG->getTargetFrameIndex( 1331 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1332 } 1333 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1334 return true; 1335 } 1336 } 1337 1338 // Base only. 1339 Base = N; 1340 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1341 return true; 1342 } 1343 1344 template <unsigned Shift> 1345 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, SDValue &Base, 1346 SDValue &OffImm) { 1347 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) { 1348 int RHSC; 1349 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -255, 256, RHSC)) { 1350 Base = N.getOperand(0); 1351 if (Base.getOpcode() == ISD::FrameIndex) { 1352 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1353 Base = CurDAG->getTargetFrameIndex( 1354 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1355 } 1356 1357 if (N.getOpcode() == ISD::SUB) 1358 RHSC = -RHSC; 1359 OffImm = 1360 CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32); 1361 return true; 1362 } 1363 } 1364 1365 // Base only. 1366 Base = N; 1367 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1368 return true; 1369 } 1370 1371 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, 1372 SDValue &Base, SDValue &OffImm) { 1373 // Match simple R - imm8 operands. 1374 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 1375 !CurDAG->isBaseWithConstantOffset(N)) 1376 return false; 1377 1378 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1379 int RHSC = (int)RHS->getSExtValue(); 1380 if (N.getOpcode() == ISD::SUB) 1381 RHSC = -RHSC; 1382 1383 if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative) 1384 Base = N.getOperand(0); 1385 if (Base.getOpcode() == ISD::FrameIndex) { 1386 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1387 Base = CurDAG->getTargetFrameIndex( 1388 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1389 } 1390 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1391 return true; 1392 } 1393 } 1394 1395 return false; 1396 } 1397 1398 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N, 1399 SDValue &OffImm){ 1400 unsigned Opcode = Op->getOpcode(); 1401 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 1402 ? cast<LoadSDNode>(Op)->getAddressingMode() 1403 : cast<StoreSDNode>(Op)->getAddressingMode(); 1404 int RHSC; 1405 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits. 1406 OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC)) 1407 ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32) 1408 : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32); 1409 return true; 1410 } 1411 1412 return false; 1413 } 1414 1415 template <unsigned Shift> 1416 bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N, SDValue &Base, 1417 SDValue &OffImm) { 1418 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) { 1419 int RHSC; 1420 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80, 1421 RHSC)) { 1422 Base = N.getOperand(0); 1423 if (Base.getOpcode() == ISD::FrameIndex) { 1424 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1425 Base = CurDAG->getTargetFrameIndex( 1426 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1427 } 1428 1429 if (N.getOpcode() == ISD::SUB) 1430 RHSC = -RHSC; 1431 OffImm = 1432 CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32); 1433 return true; 1434 } 1435 } 1436 1437 // Base only. 1438 Base = N; 1439 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1440 return true; 1441 } 1442 1443 template <unsigned Shift> 1444 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, 1445 SDValue &OffImm) { 1446 return SelectT2AddrModeImm7Offset(Op, N, OffImm, Shift); 1447 } 1448 1449 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, 1450 SDValue &OffImm, 1451 unsigned Shift) { 1452 unsigned Opcode = Op->getOpcode(); 1453 ISD::MemIndexedMode AM; 1454 switch (Opcode) { 1455 case ISD::LOAD: 1456 AM = cast<LoadSDNode>(Op)->getAddressingMode(); 1457 break; 1458 case ISD::STORE: 1459 AM = cast<StoreSDNode>(Op)->getAddressingMode(); 1460 break; 1461 case ISD::MLOAD: 1462 AM = cast<MaskedLoadSDNode>(Op)->getAddressingMode(); 1463 break; 1464 case ISD::MSTORE: 1465 AM = cast<MaskedStoreSDNode>(Op)->getAddressingMode(); 1466 break; 1467 default: 1468 llvm_unreachable("Unexpected Opcode for Imm7Offset"); 1469 } 1470 1471 int RHSC; 1472 // 7 bit constant, shifted by Shift. 1473 if (isScaledConstantInRange(N, 1 << Shift, 0, 0x80, RHSC)) { 1474 OffImm = 1475 ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC)) 1476 ? CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32) 1477 : CurDAG->getTargetConstant(-RHSC * (1 << Shift), SDLoc(N), 1478 MVT::i32); 1479 return true; 1480 } 1481 return false; 1482 } 1483 1484 template <int Min, int Max> 1485 bool ARMDAGToDAGISel::SelectImmediateInRange(SDValue N, SDValue &OffImm) { 1486 int Val; 1487 if (isScaledConstantInRange(N, 1, Min, Max, Val)) { 1488 OffImm = CurDAG->getTargetConstant(Val, SDLoc(N), MVT::i32); 1489 return true; 1490 } 1491 return false; 1492 } 1493 1494 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N, 1495 SDValue &Base, 1496 SDValue &OffReg, SDValue &ShImm) { 1497 // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12. 1498 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) 1499 return false; 1500 1501 // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8. 1502 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1503 int RHSC = (int)RHS->getZExtValue(); 1504 if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned) 1505 return false; 1506 else if (RHSC < 0 && RHSC >= -255) // 8 bits 1507 return false; 1508 } 1509 1510 // Look for (R + R) or (R + (R << [1,2,3])). 1511 unsigned ShAmt = 0; 1512 Base = N.getOperand(0); 1513 OffReg = N.getOperand(1); 1514 1515 // Swap if it is ((R << c) + R). 1516 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode()); 1517 if (ShOpcVal != ARM_AM::lsl) { 1518 ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode()); 1519 if (ShOpcVal == ARM_AM::lsl) 1520 std::swap(Base, OffReg); 1521 } 1522 1523 if (ShOpcVal == ARM_AM::lsl) { 1524 // Check to see if the RHS of the shift is a constant, if not, we can't fold 1525 // it. 1526 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) { 1527 ShAmt = Sh->getZExtValue(); 1528 if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt)) 1529 OffReg = OffReg.getOperand(0); 1530 else { 1531 ShAmt = 0; 1532 } 1533 } 1534 } 1535 1536 // If OffReg is a multiply-by-constant and it's profitable to extract a shift 1537 // and use it in a shifted operand do so. 1538 if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) { 1539 unsigned PowerOfTwo = 0; 1540 SDValue NewMulConst; 1541 if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) { 1542 HandleSDNode Handle(OffReg); 1543 replaceDAGValue(OffReg.getOperand(1), NewMulConst); 1544 OffReg = Handle.getValue(); 1545 ShAmt = PowerOfTwo; 1546 } 1547 } 1548 1549 ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32); 1550 1551 return true; 1552 } 1553 1554 bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base, 1555 SDValue &OffImm) { 1556 // This *must* succeed since it's used for the irreplaceable ldrex and strex 1557 // instructions. 1558 Base = N; 1559 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1560 1561 if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N)) 1562 return true; 1563 1564 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 1565 if (!RHS) 1566 return true; 1567 1568 uint32_t RHSC = (int)RHS->getZExtValue(); 1569 if (RHSC > 1020 || RHSC % 4 != 0) 1570 return true; 1571 1572 Base = N.getOperand(0); 1573 if (Base.getOpcode() == ISD::FrameIndex) { 1574 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1575 Base = CurDAG->getTargetFrameIndex( 1576 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1577 } 1578 1579 OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32); 1580 return true; 1581 } 1582 1583 //===--------------------------------------------------------------------===// 1584 1585 /// getAL - Returns a ARMCC::AL immediate node. 1586 static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) { 1587 return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32); 1588 } 1589 1590 void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) { 1591 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); 1592 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp}); 1593 } 1594 1595 bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) { 1596 LoadSDNode *LD = cast<LoadSDNode>(N); 1597 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1598 if (AM == ISD::UNINDEXED) 1599 return false; 1600 1601 EVT LoadedVT = LD->getMemoryVT(); 1602 SDValue Offset, AMOpc; 1603 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1604 unsigned Opcode = 0; 1605 bool Match = false; 1606 if (LoadedVT == MVT::i32 && isPre && 1607 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) { 1608 Opcode = ARM::LDR_PRE_IMM; 1609 Match = true; 1610 } else if (LoadedVT == MVT::i32 && !isPre && 1611 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) { 1612 Opcode = ARM::LDR_POST_IMM; 1613 Match = true; 1614 } else if (LoadedVT == MVT::i32 && 1615 SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) { 1616 Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG; 1617 Match = true; 1618 1619 } else if (LoadedVT == MVT::i16 && 1620 SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) { 1621 Match = true; 1622 Opcode = (LD->getExtensionType() == ISD::SEXTLOAD) 1623 ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST) 1624 : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST); 1625 } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) { 1626 if (LD->getExtensionType() == ISD::SEXTLOAD) { 1627 if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) { 1628 Match = true; 1629 Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST; 1630 } 1631 } else { 1632 if (isPre && 1633 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) { 1634 Match = true; 1635 Opcode = ARM::LDRB_PRE_IMM; 1636 } else if (!isPre && 1637 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) { 1638 Match = true; 1639 Opcode = ARM::LDRB_POST_IMM; 1640 } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) { 1641 Match = true; 1642 Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG; 1643 } 1644 } 1645 } 1646 1647 if (Match) { 1648 if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) { 1649 SDValue Chain = LD->getChain(); 1650 SDValue Base = LD->getBasePtr(); 1651 SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)), 1652 CurDAG->getRegister(0, MVT::i32), Chain }; 1653 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, 1654 MVT::Other, Ops); 1655 transferMemOperands(N, New); 1656 ReplaceNode(N, New); 1657 return true; 1658 } else { 1659 SDValue Chain = LD->getChain(); 1660 SDValue Base = LD->getBasePtr(); 1661 SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)), 1662 CurDAG->getRegister(0, MVT::i32), Chain }; 1663 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, 1664 MVT::Other, Ops); 1665 transferMemOperands(N, New); 1666 ReplaceNode(N, New); 1667 return true; 1668 } 1669 } 1670 1671 return false; 1672 } 1673 1674 bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) { 1675 LoadSDNode *LD = cast<LoadSDNode>(N); 1676 EVT LoadedVT = LD->getMemoryVT(); 1677 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1678 if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD || 1679 LoadedVT.getSimpleVT().SimpleTy != MVT::i32) 1680 return false; 1681 1682 auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset()); 1683 if (!COffs || COffs->getZExtValue() != 4) 1684 return false; 1685 1686 // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}. 1687 // The encoding of LDM is not how the rest of ISel expects a post-inc load to 1688 // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after 1689 // ISel. 1690 SDValue Chain = LD->getChain(); 1691 SDValue Base = LD->getBasePtr(); 1692 SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)), 1693 CurDAG->getRegister(0, MVT::i32), Chain }; 1694 SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32, 1695 MVT::i32, MVT::Other, Ops); 1696 transferMemOperands(N, New); 1697 ReplaceNode(N, New); 1698 return true; 1699 } 1700 1701 bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) { 1702 LoadSDNode *LD = cast<LoadSDNode>(N); 1703 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1704 if (AM == ISD::UNINDEXED) 1705 return false; 1706 1707 EVT LoadedVT = LD->getMemoryVT(); 1708 bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; 1709 SDValue Offset; 1710 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1711 unsigned Opcode = 0; 1712 bool Match = false; 1713 if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) { 1714 switch (LoadedVT.getSimpleVT().SimpleTy) { 1715 case MVT::i32: 1716 Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST; 1717 break; 1718 case MVT::i16: 1719 if (isSExtLd) 1720 Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST; 1721 else 1722 Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST; 1723 break; 1724 case MVT::i8: 1725 case MVT::i1: 1726 if (isSExtLd) 1727 Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST; 1728 else 1729 Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST; 1730 break; 1731 default: 1732 return false; 1733 } 1734 Match = true; 1735 } 1736 1737 if (Match) { 1738 SDValue Chain = LD->getChain(); 1739 SDValue Base = LD->getBasePtr(); 1740 SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)), 1741 CurDAG->getRegister(0, MVT::i32), Chain }; 1742 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, 1743 MVT::Other, Ops); 1744 transferMemOperands(N, New); 1745 ReplaceNode(N, New); 1746 return true; 1747 } 1748 1749 return false; 1750 } 1751 1752 bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) { 1753 EVT LoadedVT; 1754 unsigned Opcode = 0; 1755 bool isSExtLd, isPre; 1756 Align Alignment; 1757 ARMVCC::VPTCodes Pred; 1758 SDValue PredReg; 1759 SDValue Chain, Base, Offset; 1760 1761 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 1762 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1763 if (AM == ISD::UNINDEXED) 1764 return false; 1765 LoadedVT = LD->getMemoryVT(); 1766 if (!LoadedVT.isVector()) 1767 return false; 1768 1769 Chain = LD->getChain(); 1770 Base = LD->getBasePtr(); 1771 Offset = LD->getOffset(); 1772 Alignment = LD->getAlign(); 1773 isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; 1774 isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1775 Pred = ARMVCC::None; 1776 PredReg = CurDAG->getRegister(0, MVT::i32); 1777 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) { 1778 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1779 if (AM == ISD::UNINDEXED) 1780 return false; 1781 LoadedVT = LD->getMemoryVT(); 1782 if (!LoadedVT.isVector()) 1783 return false; 1784 1785 Chain = LD->getChain(); 1786 Base = LD->getBasePtr(); 1787 Offset = LD->getOffset(); 1788 Alignment = LD->getAlign(); 1789 isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; 1790 isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1791 Pred = ARMVCC::Then; 1792 PredReg = LD->getMask(); 1793 } else 1794 llvm_unreachable("Expected a Load or a Masked Load!"); 1795 1796 // We allow LE non-masked loads to change the type (for example use a vldrb.8 1797 // as opposed to a vldrw.32). This can allow extra addressing modes or 1798 // alignments for what is otherwise an equivalent instruction. 1799 bool CanChangeType = Subtarget->isLittle() && !isa<MaskedLoadSDNode>(N); 1800 1801 SDValue NewOffset; 1802 if (Alignment >= Align(2) && LoadedVT == MVT::v4i16 && 1803 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1)) { 1804 if (isSExtLd) 1805 Opcode = isPre ? ARM::MVE_VLDRHS32_pre : ARM::MVE_VLDRHS32_post; 1806 else 1807 Opcode = isPre ? ARM::MVE_VLDRHU32_pre : ARM::MVE_VLDRHU32_post; 1808 } else if (LoadedVT == MVT::v8i8 && 1809 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) { 1810 if (isSExtLd) 1811 Opcode = isPre ? ARM::MVE_VLDRBS16_pre : ARM::MVE_VLDRBS16_post; 1812 else 1813 Opcode = isPre ? ARM::MVE_VLDRBU16_pre : ARM::MVE_VLDRBU16_post; 1814 } else if (LoadedVT == MVT::v4i8 && 1815 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) { 1816 if (isSExtLd) 1817 Opcode = isPre ? ARM::MVE_VLDRBS32_pre : ARM::MVE_VLDRBS32_post; 1818 else 1819 Opcode = isPre ? ARM::MVE_VLDRBU32_pre : ARM::MVE_VLDRBU32_post; 1820 } else if (Alignment >= Align(4) && 1821 (CanChangeType || LoadedVT == MVT::v4i32 || 1822 LoadedVT == MVT::v4f32) && 1823 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 2)) 1824 Opcode = isPre ? ARM::MVE_VLDRWU32_pre : ARM::MVE_VLDRWU32_post; 1825 else if (Alignment >= Align(2) && 1826 (CanChangeType || LoadedVT == MVT::v8i16 || 1827 LoadedVT == MVT::v8f16) && 1828 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1)) 1829 Opcode = isPre ? ARM::MVE_VLDRHU16_pre : ARM::MVE_VLDRHU16_post; 1830 else if ((CanChangeType || LoadedVT == MVT::v16i8) && 1831 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) 1832 Opcode = isPre ? ARM::MVE_VLDRBU8_pre : ARM::MVE_VLDRBU8_post; 1833 else 1834 return false; 1835 1836 SDValue Ops[] = {Base, 1837 NewOffset, 1838 CurDAG->getTargetConstant(Pred, SDLoc(N), MVT::i32), 1839 PredReg, 1840 CurDAG->getRegister(0, MVT::i32), // tp_reg 1841 Chain}; 1842 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, 1843 N->getValueType(0), MVT::Other, Ops); 1844 transferMemOperands(N, New); 1845 ReplaceUses(SDValue(N, 0), SDValue(New, 1)); 1846 ReplaceUses(SDValue(N, 1), SDValue(New, 0)); 1847 ReplaceUses(SDValue(N, 2), SDValue(New, 2)); 1848 CurDAG->RemoveDeadNode(N); 1849 return true; 1850 } 1851 1852 /// Form a GPRPair pseudo register from a pair of GPR regs. 1853 SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) { 1854 SDLoc dl(V0.getNode()); 1855 SDValue RegClass = 1856 CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32); 1857 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32); 1858 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32); 1859 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1860 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1861 } 1862 1863 /// Form a D register from a pair of S registers. 1864 SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1865 SDLoc dl(V0.getNode()); 1866 SDValue RegClass = 1867 CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32); 1868 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32); 1869 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32); 1870 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1871 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1872 } 1873 1874 /// Form a quad register from a pair of D registers. 1875 SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1876 SDLoc dl(V0.getNode()); 1877 SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl, 1878 MVT::i32); 1879 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32); 1880 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32); 1881 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1882 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1883 } 1884 1885 /// Form 4 consecutive D registers from a pair of Q registers. 1886 SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1887 SDLoc dl(V0.getNode()); 1888 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl, 1889 MVT::i32); 1890 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32); 1891 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32); 1892 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1893 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1894 } 1895 1896 /// Form 4 consecutive S registers. 1897 SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, 1898 SDValue V2, SDValue V3) { 1899 SDLoc dl(V0.getNode()); 1900 SDValue RegClass = 1901 CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32); 1902 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32); 1903 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32); 1904 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32); 1905 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32); 1906 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1907 V2, SubReg2, V3, SubReg3 }; 1908 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1909 } 1910 1911 /// Form 4 consecutive D registers. 1912 SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, 1913 SDValue V2, SDValue V3) { 1914 SDLoc dl(V0.getNode()); 1915 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl, 1916 MVT::i32); 1917 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32); 1918 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32); 1919 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32); 1920 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32); 1921 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1922 V2, SubReg2, V3, SubReg3 }; 1923 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1924 } 1925 1926 /// Form 4 consecutive Q registers. 1927 SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, 1928 SDValue V2, SDValue V3) { 1929 SDLoc dl(V0.getNode()); 1930 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl, 1931 MVT::i32); 1932 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32); 1933 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32); 1934 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32); 1935 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32); 1936 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1937 V2, SubReg2, V3, SubReg3 }; 1938 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1939 } 1940 1941 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand 1942 /// of a NEON VLD or VST instruction. The supported values depend on the 1943 /// number of registers being loaded. 1944 SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl, 1945 unsigned NumVecs, bool is64BitVector) { 1946 unsigned NumRegs = NumVecs; 1947 if (!is64BitVector && NumVecs < 3) 1948 NumRegs *= 2; 1949 1950 unsigned Alignment = Align->getAsZExtVal(); 1951 if (Alignment >= 32 && NumRegs == 4) 1952 Alignment = 32; 1953 else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4)) 1954 Alignment = 16; 1955 else if (Alignment >= 8) 1956 Alignment = 8; 1957 else 1958 Alignment = 0; 1959 1960 return CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 1961 } 1962 1963 static bool isVLDfixed(unsigned Opc) 1964 { 1965 switch (Opc) { 1966 default: return false; 1967 case ARM::VLD1d8wb_fixed : return true; 1968 case ARM::VLD1d16wb_fixed : return true; 1969 case ARM::VLD1d64Qwb_fixed : return true; 1970 case ARM::VLD1d32wb_fixed : return true; 1971 case ARM::VLD1d64wb_fixed : return true; 1972 case ARM::VLD1d8TPseudoWB_fixed : return true; 1973 case ARM::VLD1d16TPseudoWB_fixed : return true; 1974 case ARM::VLD1d32TPseudoWB_fixed : return true; 1975 case ARM::VLD1d64TPseudoWB_fixed : return true; 1976 case ARM::VLD1d8QPseudoWB_fixed : return true; 1977 case ARM::VLD1d16QPseudoWB_fixed : return true; 1978 case ARM::VLD1d32QPseudoWB_fixed : return true; 1979 case ARM::VLD1d64QPseudoWB_fixed : return true; 1980 case ARM::VLD1q8wb_fixed : return true; 1981 case ARM::VLD1q16wb_fixed : return true; 1982 case ARM::VLD1q32wb_fixed : return true; 1983 case ARM::VLD1q64wb_fixed : return true; 1984 case ARM::VLD1DUPd8wb_fixed : return true; 1985 case ARM::VLD1DUPd16wb_fixed : return true; 1986 case ARM::VLD1DUPd32wb_fixed : return true; 1987 case ARM::VLD1DUPq8wb_fixed : return true; 1988 case ARM::VLD1DUPq16wb_fixed : return true; 1989 case ARM::VLD1DUPq32wb_fixed : return true; 1990 case ARM::VLD2d8wb_fixed : return true; 1991 case ARM::VLD2d16wb_fixed : return true; 1992 case ARM::VLD2d32wb_fixed : return true; 1993 case ARM::VLD2q8PseudoWB_fixed : return true; 1994 case ARM::VLD2q16PseudoWB_fixed : return true; 1995 case ARM::VLD2q32PseudoWB_fixed : return true; 1996 case ARM::VLD2DUPd8wb_fixed : return true; 1997 case ARM::VLD2DUPd16wb_fixed : return true; 1998 case ARM::VLD2DUPd32wb_fixed : return true; 1999 case ARM::VLD2DUPq8OddPseudoWB_fixed: return true; 2000 case ARM::VLD2DUPq16OddPseudoWB_fixed: return true; 2001 case ARM::VLD2DUPq32OddPseudoWB_fixed: return true; 2002 } 2003 } 2004 2005 static bool isVSTfixed(unsigned Opc) 2006 { 2007 switch (Opc) { 2008 default: return false; 2009 case ARM::VST1d8wb_fixed : return true; 2010 case ARM::VST1d16wb_fixed : return true; 2011 case ARM::VST1d32wb_fixed : return true; 2012 case ARM::VST1d64wb_fixed : return true; 2013 case ARM::VST1q8wb_fixed : return true; 2014 case ARM::VST1q16wb_fixed : return true; 2015 case ARM::VST1q32wb_fixed : return true; 2016 case ARM::VST1q64wb_fixed : return true; 2017 case ARM::VST1d8TPseudoWB_fixed : return true; 2018 case ARM::VST1d16TPseudoWB_fixed : return true; 2019 case ARM::VST1d32TPseudoWB_fixed : return true; 2020 case ARM::VST1d64TPseudoWB_fixed : return true; 2021 case ARM::VST1d8QPseudoWB_fixed : return true; 2022 case ARM::VST1d16QPseudoWB_fixed : return true; 2023 case ARM::VST1d32QPseudoWB_fixed : return true; 2024 case ARM::VST1d64QPseudoWB_fixed : return true; 2025 case ARM::VST2d8wb_fixed : return true; 2026 case ARM::VST2d16wb_fixed : return true; 2027 case ARM::VST2d32wb_fixed : return true; 2028 case ARM::VST2q8PseudoWB_fixed : return true; 2029 case ARM::VST2q16PseudoWB_fixed : return true; 2030 case ARM::VST2q32PseudoWB_fixed : return true; 2031 } 2032 } 2033 2034 // Get the register stride update opcode of a VLD/VST instruction that 2035 // is otherwise equivalent to the given fixed stride updating instruction. 2036 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) { 2037 assert((isVLDfixed(Opc) || isVSTfixed(Opc)) 2038 && "Incorrect fixed stride updating instruction."); 2039 switch (Opc) { 2040 default: break; 2041 case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register; 2042 case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register; 2043 case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register; 2044 case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register; 2045 case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register; 2046 case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register; 2047 case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register; 2048 case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register; 2049 case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register; 2050 case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register; 2051 case ARM::VLD1d8TPseudoWB_fixed: return ARM::VLD1d8TPseudoWB_register; 2052 case ARM::VLD1d16TPseudoWB_fixed: return ARM::VLD1d16TPseudoWB_register; 2053 case ARM::VLD1d32TPseudoWB_fixed: return ARM::VLD1d32TPseudoWB_register; 2054 case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register; 2055 case ARM::VLD1d8QPseudoWB_fixed: return ARM::VLD1d8QPseudoWB_register; 2056 case ARM::VLD1d16QPseudoWB_fixed: return ARM::VLD1d16QPseudoWB_register; 2057 case ARM::VLD1d32QPseudoWB_fixed: return ARM::VLD1d32QPseudoWB_register; 2058 case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register; 2059 case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register; 2060 case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register; 2061 case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register; 2062 case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register; 2063 case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register; 2064 case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register; 2065 case ARM::VLD2DUPq8OddPseudoWB_fixed: return ARM::VLD2DUPq8OddPseudoWB_register; 2066 case ARM::VLD2DUPq16OddPseudoWB_fixed: return ARM::VLD2DUPq16OddPseudoWB_register; 2067 case ARM::VLD2DUPq32OddPseudoWB_fixed: return ARM::VLD2DUPq32OddPseudoWB_register; 2068 2069 case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register; 2070 case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register; 2071 case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register; 2072 case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register; 2073 case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register; 2074 case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register; 2075 case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register; 2076 case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register; 2077 case ARM::VST1d8TPseudoWB_fixed: return ARM::VST1d8TPseudoWB_register; 2078 case ARM::VST1d16TPseudoWB_fixed: return ARM::VST1d16TPseudoWB_register; 2079 case ARM::VST1d32TPseudoWB_fixed: return ARM::VST1d32TPseudoWB_register; 2080 case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register; 2081 case ARM::VST1d8QPseudoWB_fixed: return ARM::VST1d8QPseudoWB_register; 2082 case ARM::VST1d16QPseudoWB_fixed: return ARM::VST1d16QPseudoWB_register; 2083 case ARM::VST1d32QPseudoWB_fixed: return ARM::VST1d32QPseudoWB_register; 2084 case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register; 2085 2086 case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register; 2087 case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register; 2088 case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register; 2089 case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register; 2090 case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register; 2091 case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register; 2092 2093 case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register; 2094 case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register; 2095 case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register; 2096 case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register; 2097 case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register; 2098 case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register; 2099 2100 case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register; 2101 case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register; 2102 case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register; 2103 } 2104 return Opc; // If not one we handle, return it unchanged. 2105 } 2106 2107 /// Returns true if the given increment is a Constant known to be equal to the 2108 /// access size performed by a NEON load/store. This means the "[rN]!" form can 2109 /// be used. 2110 static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) { 2111 auto C = dyn_cast<ConstantSDNode>(Inc); 2112 return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs; 2113 } 2114 2115 void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, 2116 const uint16_t *DOpcodes, 2117 const uint16_t *QOpcodes0, 2118 const uint16_t *QOpcodes1) { 2119 assert(Subtarget->hasNEON()); 2120 assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range"); 2121 SDLoc dl(N); 2122 2123 SDValue MemAddr, Align; 2124 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating 2125 // nodes are not intrinsics. 2126 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2127 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2128 return; 2129 2130 SDValue Chain = N->getOperand(0); 2131 EVT VT = N->getValueType(0); 2132 bool is64BitVector = VT.is64BitVector(); 2133 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector); 2134 2135 unsigned OpcodeIndex; 2136 switch (VT.getSimpleVT().SimpleTy) { 2137 default: llvm_unreachable("unhandled vld type"); 2138 // Double-register operations: 2139 case MVT::v8i8: OpcodeIndex = 0; break; 2140 case MVT::v4f16: 2141 case MVT::v4bf16: 2142 case MVT::v4i16: OpcodeIndex = 1; break; 2143 case MVT::v2f32: 2144 case MVT::v2i32: OpcodeIndex = 2; break; 2145 case MVT::v1i64: OpcodeIndex = 3; break; 2146 // Quad-register operations: 2147 case MVT::v16i8: OpcodeIndex = 0; break; 2148 case MVT::v8f16: 2149 case MVT::v8bf16: 2150 case MVT::v8i16: OpcodeIndex = 1; break; 2151 case MVT::v4f32: 2152 case MVT::v4i32: OpcodeIndex = 2; break; 2153 case MVT::v2f64: 2154 case MVT::v2i64: OpcodeIndex = 3; break; 2155 } 2156 2157 EVT ResTy; 2158 if (NumVecs == 1) 2159 ResTy = VT; 2160 else { 2161 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 2162 if (!is64BitVector) 2163 ResTyElts *= 2; 2164 ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts); 2165 } 2166 std::vector<EVT> ResTys; 2167 ResTys.push_back(ResTy); 2168 if (isUpdating) 2169 ResTys.push_back(MVT::i32); 2170 ResTys.push_back(MVT::Other); 2171 2172 SDValue Pred = getAL(CurDAG, dl); 2173 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2174 SDNode *VLd; 2175 SmallVector<SDValue, 7> Ops; 2176 2177 // Double registers and VLD1/VLD2 quad registers are directly supported. 2178 if (is64BitVector || NumVecs <= 2) { 2179 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 2180 QOpcodes0[OpcodeIndex]); 2181 Ops.push_back(MemAddr); 2182 Ops.push_back(Align); 2183 if (isUpdating) { 2184 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2185 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs); 2186 if (!IsImmUpdate) { 2187 // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so 2188 // check for the opcode rather than the number of vector elements. 2189 if (isVLDfixed(Opc)) 2190 Opc = getVLDSTRegisterUpdateOpcode(Opc); 2191 Ops.push_back(Inc); 2192 // VLD1/VLD2 fixed increment does not need Reg0 so only include it in 2193 // the operands if not such an opcode. 2194 } else if (!isVLDfixed(Opc)) 2195 Ops.push_back(Reg0); 2196 } 2197 Ops.push_back(Pred); 2198 Ops.push_back(Reg0); 2199 Ops.push_back(Chain); 2200 VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2201 2202 } else { 2203 // Otherwise, quad registers are loaded with two separate instructions, 2204 // where one loads the even registers and the other loads the odd registers. 2205 EVT AddrTy = MemAddr.getValueType(); 2206 2207 // Load the even subregs. This is always an updating load, so that it 2208 // provides the address to the second load for the odd subregs. 2209 SDValue ImplDef = 2210 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0); 2211 const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain }; 2212 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, 2213 ResTy, AddrTy, MVT::Other, OpsA); 2214 Chain = SDValue(VLdA, 2); 2215 2216 // Load the odd subregs. 2217 Ops.push_back(SDValue(VLdA, 1)); 2218 Ops.push_back(Align); 2219 if (isUpdating) { 2220 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2221 assert(isa<ConstantSDNode>(Inc.getNode()) && 2222 "only constant post-increment update allowed for VLD3/4"); 2223 (void)Inc; 2224 Ops.push_back(Reg0); 2225 } 2226 Ops.push_back(SDValue(VLdA, 0)); 2227 Ops.push_back(Pred); 2228 Ops.push_back(Reg0); 2229 Ops.push_back(Chain); 2230 VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops); 2231 } 2232 2233 // Transfer memoperands. 2234 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2235 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLd), {MemOp}); 2236 2237 if (NumVecs == 1) { 2238 ReplaceNode(N, VLd); 2239 return; 2240 } 2241 2242 // Extract out the subregisters. 2243 SDValue SuperReg = SDValue(VLd, 0); 2244 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 && 2245 ARM::qsub_3 == ARM::qsub_0 + 3, 2246 "Unexpected subreg numbering"); 2247 unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0); 2248 for (unsigned Vec = 0; Vec < NumVecs; ++Vec) 2249 ReplaceUses(SDValue(N, Vec), 2250 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg)); 2251 ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1)); 2252 if (isUpdating) 2253 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2)); 2254 CurDAG->RemoveDeadNode(N); 2255 } 2256 2257 void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, 2258 const uint16_t *DOpcodes, 2259 const uint16_t *QOpcodes0, 2260 const uint16_t *QOpcodes1) { 2261 assert(Subtarget->hasNEON()); 2262 assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range"); 2263 SDLoc dl(N); 2264 2265 SDValue MemAddr, Align; 2266 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating 2267 // nodes are not intrinsics. 2268 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2269 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1) 2270 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2271 return; 2272 2273 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2274 2275 SDValue Chain = N->getOperand(0); 2276 EVT VT = N->getOperand(Vec0Idx).getValueType(); 2277 bool is64BitVector = VT.is64BitVector(); 2278 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector); 2279 2280 unsigned OpcodeIndex; 2281 switch (VT.getSimpleVT().SimpleTy) { 2282 default: llvm_unreachable("unhandled vst type"); 2283 // Double-register operations: 2284 case MVT::v8i8: OpcodeIndex = 0; break; 2285 case MVT::v4f16: 2286 case MVT::v4bf16: 2287 case MVT::v4i16: OpcodeIndex = 1; break; 2288 case MVT::v2f32: 2289 case MVT::v2i32: OpcodeIndex = 2; break; 2290 case MVT::v1i64: OpcodeIndex = 3; break; 2291 // Quad-register operations: 2292 case MVT::v16i8: OpcodeIndex = 0; break; 2293 case MVT::v8f16: 2294 case MVT::v8bf16: 2295 case MVT::v8i16: OpcodeIndex = 1; break; 2296 case MVT::v4f32: 2297 case MVT::v4i32: OpcodeIndex = 2; break; 2298 case MVT::v2f64: 2299 case MVT::v2i64: OpcodeIndex = 3; break; 2300 } 2301 2302 std::vector<EVT> ResTys; 2303 if (isUpdating) 2304 ResTys.push_back(MVT::i32); 2305 ResTys.push_back(MVT::Other); 2306 2307 SDValue Pred = getAL(CurDAG, dl); 2308 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2309 SmallVector<SDValue, 7> Ops; 2310 2311 // Double registers and VST1/VST2 quad registers are directly supported. 2312 if (is64BitVector || NumVecs <= 2) { 2313 SDValue SrcReg; 2314 if (NumVecs == 1) { 2315 SrcReg = N->getOperand(Vec0Idx); 2316 } else if (is64BitVector) { 2317 // Form a REG_SEQUENCE to force register allocation. 2318 SDValue V0 = N->getOperand(Vec0Idx + 0); 2319 SDValue V1 = N->getOperand(Vec0Idx + 1); 2320 if (NumVecs == 2) 2321 SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0); 2322 else { 2323 SDValue V2 = N->getOperand(Vec0Idx + 2); 2324 // If it's a vst3, form a quad D-register and leave the last part as 2325 // an undef. 2326 SDValue V3 = (NumVecs == 3) 2327 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) 2328 : N->getOperand(Vec0Idx + 3); 2329 SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0); 2330 } 2331 } else { 2332 // Form a QQ register. 2333 SDValue Q0 = N->getOperand(Vec0Idx); 2334 SDValue Q1 = N->getOperand(Vec0Idx + 1); 2335 SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0); 2336 } 2337 2338 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 2339 QOpcodes0[OpcodeIndex]); 2340 Ops.push_back(MemAddr); 2341 Ops.push_back(Align); 2342 if (isUpdating) { 2343 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2344 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs); 2345 if (!IsImmUpdate) { 2346 // We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so 2347 // check for the opcode rather than the number of vector elements. 2348 if (isVSTfixed(Opc)) 2349 Opc = getVLDSTRegisterUpdateOpcode(Opc); 2350 Ops.push_back(Inc); 2351 } 2352 // VST1/VST2 fixed increment does not need Reg0 so only include it in 2353 // the operands if not such an opcode. 2354 else if (!isVSTfixed(Opc)) 2355 Ops.push_back(Reg0); 2356 } 2357 Ops.push_back(SrcReg); 2358 Ops.push_back(Pred); 2359 Ops.push_back(Reg0); 2360 Ops.push_back(Chain); 2361 SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2362 2363 // Transfer memoperands. 2364 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VSt), {MemOp}); 2365 2366 ReplaceNode(N, VSt); 2367 return; 2368 } 2369 2370 // Otherwise, quad registers are stored with two separate instructions, 2371 // where one stores the even registers and the other stores the odd registers. 2372 2373 // Form the QQQQ REG_SEQUENCE. 2374 SDValue V0 = N->getOperand(Vec0Idx + 0); 2375 SDValue V1 = N->getOperand(Vec0Idx + 1); 2376 SDValue V2 = N->getOperand(Vec0Idx + 2); 2377 SDValue V3 = (NumVecs == 3) 2378 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) 2379 : N->getOperand(Vec0Idx + 3); 2380 SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0); 2381 2382 // Store the even D registers. This is always an updating store, so that it 2383 // provides the address to the second store for the odd subregs. 2384 const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain }; 2385 SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, 2386 MemAddr.getValueType(), 2387 MVT::Other, OpsA); 2388 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStA), {MemOp}); 2389 Chain = SDValue(VStA, 1); 2390 2391 // Store the odd D registers. 2392 Ops.push_back(SDValue(VStA, 0)); 2393 Ops.push_back(Align); 2394 if (isUpdating) { 2395 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2396 assert(isa<ConstantSDNode>(Inc.getNode()) && 2397 "only constant post-increment update allowed for VST3/4"); 2398 (void)Inc; 2399 Ops.push_back(Reg0); 2400 } 2401 Ops.push_back(RegSeq); 2402 Ops.push_back(Pred); 2403 Ops.push_back(Reg0); 2404 Ops.push_back(Chain); 2405 SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, 2406 Ops); 2407 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStB), {MemOp}); 2408 ReplaceNode(N, VStB); 2409 } 2410 2411 void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating, 2412 unsigned NumVecs, 2413 const uint16_t *DOpcodes, 2414 const uint16_t *QOpcodes) { 2415 assert(Subtarget->hasNEON()); 2416 assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range"); 2417 SDLoc dl(N); 2418 2419 SDValue MemAddr, Align; 2420 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating 2421 // nodes are not intrinsics. 2422 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2423 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1) 2424 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2425 return; 2426 2427 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2428 2429 SDValue Chain = N->getOperand(0); 2430 unsigned Lane = N->getConstantOperandVal(Vec0Idx + NumVecs); 2431 EVT VT = N->getOperand(Vec0Idx).getValueType(); 2432 bool is64BitVector = VT.is64BitVector(); 2433 2434 unsigned Alignment = 0; 2435 if (NumVecs != 3) { 2436 Alignment = Align->getAsZExtVal(); 2437 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8; 2438 if (Alignment > NumBytes) 2439 Alignment = NumBytes; 2440 if (Alignment < 8 && Alignment < NumBytes) 2441 Alignment = 0; 2442 // Alignment must be a power of two; make sure of that. 2443 Alignment = (Alignment & -Alignment); 2444 if (Alignment == 1) 2445 Alignment = 0; 2446 } 2447 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 2448 2449 unsigned OpcodeIndex; 2450 switch (VT.getSimpleVT().SimpleTy) { 2451 default: llvm_unreachable("unhandled vld/vst lane type"); 2452 // Double-register operations: 2453 case MVT::v8i8: OpcodeIndex = 0; break; 2454 case MVT::v4f16: 2455 case MVT::v4bf16: 2456 case MVT::v4i16: OpcodeIndex = 1; break; 2457 case MVT::v2f32: 2458 case MVT::v2i32: OpcodeIndex = 2; break; 2459 // Quad-register operations: 2460 case MVT::v8f16: 2461 case MVT::v8bf16: 2462 case MVT::v8i16: OpcodeIndex = 0; break; 2463 case MVT::v4f32: 2464 case MVT::v4i32: OpcodeIndex = 1; break; 2465 } 2466 2467 std::vector<EVT> ResTys; 2468 if (IsLoad) { 2469 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 2470 if (!is64BitVector) 2471 ResTyElts *= 2; 2472 ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), 2473 MVT::i64, ResTyElts)); 2474 } 2475 if (isUpdating) 2476 ResTys.push_back(MVT::i32); 2477 ResTys.push_back(MVT::Other); 2478 2479 SDValue Pred = getAL(CurDAG, dl); 2480 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2481 2482 SmallVector<SDValue, 8> Ops; 2483 Ops.push_back(MemAddr); 2484 Ops.push_back(Align); 2485 if (isUpdating) { 2486 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2487 bool IsImmUpdate = 2488 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs); 2489 Ops.push_back(IsImmUpdate ? Reg0 : Inc); 2490 } 2491 2492 SDValue SuperReg; 2493 SDValue V0 = N->getOperand(Vec0Idx + 0); 2494 SDValue V1 = N->getOperand(Vec0Idx + 1); 2495 if (NumVecs == 2) { 2496 if (is64BitVector) 2497 SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0); 2498 else 2499 SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0); 2500 } else { 2501 SDValue V2 = N->getOperand(Vec0Idx + 2); 2502 SDValue V3 = (NumVecs == 3) 2503 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) 2504 : N->getOperand(Vec0Idx + 3); 2505 if (is64BitVector) 2506 SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0); 2507 else 2508 SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0); 2509 } 2510 Ops.push_back(SuperReg); 2511 Ops.push_back(getI32Imm(Lane, dl)); 2512 Ops.push_back(Pred); 2513 Ops.push_back(Reg0); 2514 Ops.push_back(Chain); 2515 2516 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 2517 QOpcodes[OpcodeIndex]); 2518 SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2519 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdLn), {MemOp}); 2520 if (!IsLoad) { 2521 ReplaceNode(N, VLdLn); 2522 return; 2523 } 2524 2525 // Extract the subregisters. 2526 SuperReg = SDValue(VLdLn, 0); 2527 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 && 2528 ARM::qsub_3 == ARM::qsub_0 + 3, 2529 "Unexpected subreg numbering"); 2530 unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0; 2531 for (unsigned Vec = 0; Vec < NumVecs; ++Vec) 2532 ReplaceUses(SDValue(N, Vec), 2533 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg)); 2534 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1)); 2535 if (isUpdating) 2536 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2)); 2537 CurDAG->RemoveDeadNode(N); 2538 } 2539 2540 template <typename SDValueVector> 2541 void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 2542 SDValue PredicateMask) { 2543 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32)); 2544 Ops.push_back(PredicateMask); 2545 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg 2546 } 2547 2548 template <typename SDValueVector> 2549 void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 2550 SDValue PredicateMask, 2551 SDValue Inactive) { 2552 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32)); 2553 Ops.push_back(PredicateMask); 2554 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg 2555 Ops.push_back(Inactive); 2556 } 2557 2558 template <typename SDValueVector> 2559 void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc) { 2560 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32)); 2561 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 2562 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg 2563 } 2564 2565 template <typename SDValueVector> 2566 void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 2567 EVT InactiveTy) { 2568 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32)); 2569 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 2570 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg 2571 Ops.push_back(SDValue( 2572 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, InactiveTy), 0)); 2573 } 2574 2575 void ARMDAGToDAGISel::SelectMVE_WB(SDNode *N, const uint16_t *Opcodes, 2576 bool Predicated) { 2577 SDLoc Loc(N); 2578 SmallVector<SDValue, 8> Ops; 2579 2580 uint16_t Opcode; 2581 switch (N->getValueType(1).getVectorElementType().getSizeInBits()) { 2582 case 32: 2583 Opcode = Opcodes[0]; 2584 break; 2585 case 64: 2586 Opcode = Opcodes[1]; 2587 break; 2588 default: 2589 llvm_unreachable("bad vector element size in SelectMVE_WB"); 2590 } 2591 2592 Ops.push_back(N->getOperand(2)); // vector of base addresses 2593 2594 int32_t ImmValue = N->getConstantOperandVal(3); 2595 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate offset 2596 2597 if (Predicated) 2598 AddMVEPredicateToOps(Ops, Loc, N->getOperand(4)); 2599 else 2600 AddEmptyMVEPredicateToOps(Ops, Loc); 2601 2602 Ops.push_back(N->getOperand(0)); // chain 2603 2604 SmallVector<EVT, 8> VTs; 2605 VTs.push_back(N->getValueType(1)); 2606 VTs.push_back(N->getValueType(0)); 2607 VTs.push_back(N->getValueType(2)); 2608 2609 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), VTs, Ops); 2610 ReplaceUses(SDValue(N, 0), SDValue(New, 1)); 2611 ReplaceUses(SDValue(N, 1), SDValue(New, 0)); 2612 ReplaceUses(SDValue(N, 2), SDValue(New, 2)); 2613 transferMemOperands(N, New); 2614 CurDAG->RemoveDeadNode(N); 2615 } 2616 2617 void ARMDAGToDAGISel::SelectMVE_LongShift(SDNode *N, uint16_t Opcode, 2618 bool Immediate, 2619 bool HasSaturationOperand) { 2620 SDLoc Loc(N); 2621 SmallVector<SDValue, 8> Ops; 2622 2623 // Two 32-bit halves of the value to be shifted 2624 Ops.push_back(N->getOperand(1)); 2625 Ops.push_back(N->getOperand(2)); 2626 2627 // The shift count 2628 if (Immediate) { 2629 int32_t ImmValue = N->getConstantOperandVal(3); 2630 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count 2631 } else { 2632 Ops.push_back(N->getOperand(3)); 2633 } 2634 2635 // The immediate saturation operand, if any 2636 if (HasSaturationOperand) { 2637 int32_t SatOp = N->getConstantOperandVal(4); 2638 int SatBit = (SatOp == 64 ? 0 : 1); 2639 Ops.push_back(getI32Imm(SatBit, Loc)); 2640 } 2641 2642 // MVE scalar shifts are IT-predicable, so include the standard 2643 // predicate arguments. 2644 Ops.push_back(getAL(CurDAG, Loc)); 2645 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 2646 2647 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops)); 2648 } 2649 2650 void ARMDAGToDAGISel::SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry, 2651 uint16_t OpcodeWithNoCarry, 2652 bool Add, bool Predicated) { 2653 SDLoc Loc(N); 2654 SmallVector<SDValue, 8> Ops; 2655 uint16_t Opcode; 2656 2657 unsigned FirstInputOp = Predicated ? 2 : 1; 2658 2659 // Two input vectors and the input carry flag 2660 Ops.push_back(N->getOperand(FirstInputOp)); 2661 Ops.push_back(N->getOperand(FirstInputOp + 1)); 2662 SDValue CarryIn = N->getOperand(FirstInputOp + 2); 2663 ConstantSDNode *CarryInConstant = dyn_cast<ConstantSDNode>(CarryIn); 2664 uint32_t CarryMask = 1 << 29; 2665 uint32_t CarryExpected = Add ? 0 : CarryMask; 2666 if (CarryInConstant && 2667 (CarryInConstant->getZExtValue() & CarryMask) == CarryExpected) { 2668 Opcode = OpcodeWithNoCarry; 2669 } else { 2670 Ops.push_back(CarryIn); 2671 Opcode = OpcodeWithCarry; 2672 } 2673 2674 if (Predicated) 2675 AddMVEPredicateToOps(Ops, Loc, 2676 N->getOperand(FirstInputOp + 3), // predicate 2677 N->getOperand(FirstInputOp - 1)); // inactive 2678 else 2679 AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0)); 2680 2681 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops)); 2682 } 2683 2684 void ARMDAGToDAGISel::SelectMVE_VSHLC(SDNode *N, bool Predicated) { 2685 SDLoc Loc(N); 2686 SmallVector<SDValue, 8> Ops; 2687 2688 // One vector input, followed by a 32-bit word of bits to shift in 2689 // and then an immediate shift count 2690 Ops.push_back(N->getOperand(1)); 2691 Ops.push_back(N->getOperand(2)); 2692 int32_t ImmValue = N->getConstantOperandVal(3); 2693 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count 2694 2695 if (Predicated) 2696 AddMVEPredicateToOps(Ops, Loc, N->getOperand(4)); 2697 else 2698 AddEmptyMVEPredicateToOps(Ops, Loc); 2699 2700 CurDAG->SelectNodeTo(N, ARM::MVE_VSHLC, N->getVTList(), ArrayRef(Ops)); 2701 } 2702 2703 static bool SDValueToConstBool(SDValue SDVal) { 2704 assert(isa<ConstantSDNode>(SDVal) && "expected a compile-time constant"); 2705 ConstantSDNode *SDValConstant = dyn_cast<ConstantSDNode>(SDVal); 2706 uint64_t Value = SDValConstant->getZExtValue(); 2707 assert((Value == 0 || Value == 1) && "expected value 0 or 1"); 2708 return Value; 2709 } 2710 2711 void ARMDAGToDAGISel::SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated, 2712 const uint16_t *OpcodesS, 2713 const uint16_t *OpcodesU, 2714 size_t Stride, size_t TySize) { 2715 assert(TySize < Stride && "Invalid TySize"); 2716 bool IsUnsigned = SDValueToConstBool(N->getOperand(1)); 2717 bool IsSub = SDValueToConstBool(N->getOperand(2)); 2718 bool IsExchange = SDValueToConstBool(N->getOperand(3)); 2719 if (IsUnsigned) { 2720 assert(!IsSub && 2721 "Unsigned versions of vmlsldav[a]/vrmlsldavh[a] do not exist"); 2722 assert(!IsExchange && 2723 "Unsigned versions of vmlaldav[a]x/vrmlaldavh[a]x do not exist"); 2724 } 2725 2726 auto OpIsZero = [N](size_t OpNo) { 2727 return isNullConstant(N->getOperand(OpNo)); 2728 }; 2729 2730 // If the input accumulator value is not zero, select an instruction with 2731 // accumulator, otherwise select an instruction without accumulator 2732 bool IsAccum = !(OpIsZero(4) && OpIsZero(5)); 2733 2734 const uint16_t *Opcodes = IsUnsigned ? OpcodesU : OpcodesS; 2735 if (IsSub) 2736 Opcodes += 4 * Stride; 2737 if (IsExchange) 2738 Opcodes += 2 * Stride; 2739 if (IsAccum) 2740 Opcodes += Stride; 2741 uint16_t Opcode = Opcodes[TySize]; 2742 2743 SDLoc Loc(N); 2744 SmallVector<SDValue, 8> Ops; 2745 // Push the accumulator operands, if they are used 2746 if (IsAccum) { 2747 Ops.push_back(N->getOperand(4)); 2748 Ops.push_back(N->getOperand(5)); 2749 } 2750 // Push the two vector operands 2751 Ops.push_back(N->getOperand(6)); 2752 Ops.push_back(N->getOperand(7)); 2753 2754 if (Predicated) 2755 AddMVEPredicateToOps(Ops, Loc, N->getOperand(8)); 2756 else 2757 AddEmptyMVEPredicateToOps(Ops, Loc); 2758 2759 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops)); 2760 } 2761 2762 void ARMDAGToDAGISel::SelectMVE_VMLLDAV(SDNode *N, bool Predicated, 2763 const uint16_t *OpcodesS, 2764 const uint16_t *OpcodesU) { 2765 EVT VecTy = N->getOperand(6).getValueType(); 2766 size_t SizeIndex; 2767 switch (VecTy.getVectorElementType().getSizeInBits()) { 2768 case 16: 2769 SizeIndex = 0; 2770 break; 2771 case 32: 2772 SizeIndex = 1; 2773 break; 2774 default: 2775 llvm_unreachable("bad vector element size"); 2776 } 2777 2778 SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 2, SizeIndex); 2779 } 2780 2781 void ARMDAGToDAGISel::SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated, 2782 const uint16_t *OpcodesS, 2783 const uint16_t *OpcodesU) { 2784 assert( 2785 N->getOperand(6).getValueType().getVectorElementType().getSizeInBits() == 2786 32 && 2787 "bad vector element size"); 2788 SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 1, 0); 2789 } 2790 2791 void ARMDAGToDAGISel::SelectMVE_VLD(SDNode *N, unsigned NumVecs, 2792 const uint16_t *const *Opcodes, 2793 bool HasWriteback) { 2794 EVT VT = N->getValueType(0); 2795 SDLoc Loc(N); 2796 2797 const uint16_t *OurOpcodes; 2798 switch (VT.getVectorElementType().getSizeInBits()) { 2799 case 8: 2800 OurOpcodes = Opcodes[0]; 2801 break; 2802 case 16: 2803 OurOpcodes = Opcodes[1]; 2804 break; 2805 case 32: 2806 OurOpcodes = Opcodes[2]; 2807 break; 2808 default: 2809 llvm_unreachable("bad vector element size in SelectMVE_VLD"); 2810 } 2811 2812 EVT DataTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, NumVecs * 2); 2813 SmallVector<EVT, 4> ResultTys = {DataTy, MVT::Other}; 2814 unsigned PtrOperand = HasWriteback ? 1 : 2; 2815 2816 auto Data = SDValue( 2817 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, DataTy), 0); 2818 SDValue Chain = N->getOperand(0); 2819 // Add a MVE_VLDn instruction for each Vec, except the last 2820 for (unsigned Stage = 0; Stage < NumVecs - 1; ++Stage) { 2821 SDValue Ops[] = {Data, N->getOperand(PtrOperand), Chain}; 2822 auto LoadInst = 2823 CurDAG->getMachineNode(OurOpcodes[Stage], Loc, ResultTys, Ops); 2824 Data = SDValue(LoadInst, 0); 2825 Chain = SDValue(LoadInst, 1); 2826 transferMemOperands(N, LoadInst); 2827 } 2828 // The last may need a writeback on it 2829 if (HasWriteback) 2830 ResultTys = {DataTy, MVT::i32, MVT::Other}; 2831 SDValue Ops[] = {Data, N->getOperand(PtrOperand), Chain}; 2832 auto LoadInst = 2833 CurDAG->getMachineNode(OurOpcodes[NumVecs - 1], Loc, ResultTys, Ops); 2834 transferMemOperands(N, LoadInst); 2835 2836 unsigned i; 2837 for (i = 0; i < NumVecs; i++) 2838 ReplaceUses(SDValue(N, i), 2839 CurDAG->getTargetExtractSubreg(ARM::qsub_0 + i, Loc, VT, 2840 SDValue(LoadInst, 0))); 2841 if (HasWriteback) 2842 ReplaceUses(SDValue(N, i++), SDValue(LoadInst, 1)); 2843 ReplaceUses(SDValue(N, i), SDValue(LoadInst, HasWriteback ? 2 : 1)); 2844 CurDAG->RemoveDeadNode(N); 2845 } 2846 2847 void ARMDAGToDAGISel::SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes, 2848 bool Wrapping, bool Predicated) { 2849 EVT VT = N->getValueType(0); 2850 SDLoc Loc(N); 2851 2852 uint16_t Opcode; 2853 switch (VT.getScalarSizeInBits()) { 2854 case 8: 2855 Opcode = Opcodes[0]; 2856 break; 2857 case 16: 2858 Opcode = Opcodes[1]; 2859 break; 2860 case 32: 2861 Opcode = Opcodes[2]; 2862 break; 2863 default: 2864 llvm_unreachable("bad vector element size in SelectMVE_VxDUP"); 2865 } 2866 2867 SmallVector<SDValue, 8> Ops; 2868 unsigned OpIdx = 1; 2869 2870 SDValue Inactive; 2871 if (Predicated) 2872 Inactive = N->getOperand(OpIdx++); 2873 2874 Ops.push_back(N->getOperand(OpIdx++)); // base 2875 if (Wrapping) 2876 Ops.push_back(N->getOperand(OpIdx++)); // limit 2877 2878 SDValue ImmOp = N->getOperand(OpIdx++); // step 2879 int ImmValue = ImmOp->getAsZExtVal(); 2880 Ops.push_back(getI32Imm(ImmValue, Loc)); 2881 2882 if (Predicated) 2883 AddMVEPredicateToOps(Ops, Loc, N->getOperand(OpIdx), Inactive); 2884 else 2885 AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0)); 2886 2887 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops)); 2888 } 2889 2890 void ARMDAGToDAGISel::SelectCDE_CXxD(SDNode *N, uint16_t Opcode, 2891 size_t NumExtraOps, bool HasAccum) { 2892 bool IsBigEndian = CurDAG->getDataLayout().isBigEndian(); 2893 SDLoc Loc(N); 2894 SmallVector<SDValue, 8> Ops; 2895 2896 unsigned OpIdx = 1; 2897 2898 // Convert and append the immediate operand designating the coprocessor. 2899 SDValue ImmCorpoc = N->getOperand(OpIdx++); 2900 uint32_t ImmCoprocVal = ImmCorpoc->getAsZExtVal(); 2901 Ops.push_back(getI32Imm(ImmCoprocVal, Loc)); 2902 2903 // For accumulating variants copy the low and high order parts of the 2904 // accumulator into a register pair and add it to the operand vector. 2905 if (HasAccum) { 2906 SDValue AccLo = N->getOperand(OpIdx++); 2907 SDValue AccHi = N->getOperand(OpIdx++); 2908 if (IsBigEndian) 2909 std::swap(AccLo, AccHi); 2910 Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, AccLo, AccHi), 0)); 2911 } 2912 2913 // Copy extra operands as-is. 2914 for (size_t I = 0; I < NumExtraOps; I++) 2915 Ops.push_back(N->getOperand(OpIdx++)); 2916 2917 // Convert and append the immediate operand 2918 SDValue Imm = N->getOperand(OpIdx); 2919 uint32_t ImmVal = Imm->getAsZExtVal(); 2920 Ops.push_back(getI32Imm(ImmVal, Loc)); 2921 2922 // Accumulating variants are IT-predicable, add predicate operands. 2923 if (HasAccum) { 2924 SDValue Pred = getAL(CurDAG, Loc); 2925 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 2926 Ops.push_back(Pred); 2927 Ops.push_back(PredReg); 2928 } 2929 2930 // Create the CDE intruction 2931 SDNode *InstrNode = CurDAG->getMachineNode(Opcode, Loc, MVT::Untyped, Ops); 2932 SDValue ResultPair = SDValue(InstrNode, 0); 2933 2934 // The original intrinsic had two outputs, and the output of the dual-register 2935 // CDE instruction is a register pair. We need to extract the two subregisters 2936 // and replace all uses of the original outputs with the extracted 2937 // subregisters. 2938 uint16_t SubRegs[2] = {ARM::gsub_0, ARM::gsub_1}; 2939 if (IsBigEndian) 2940 std::swap(SubRegs[0], SubRegs[1]); 2941 2942 for (size_t ResIdx = 0; ResIdx < 2; ResIdx++) { 2943 if (SDValue(N, ResIdx).use_empty()) 2944 continue; 2945 SDValue SubReg = CurDAG->getTargetExtractSubreg(SubRegs[ResIdx], Loc, 2946 MVT::i32, ResultPair); 2947 ReplaceUses(SDValue(N, ResIdx), SubReg); 2948 } 2949 2950 CurDAG->RemoveDeadNode(N); 2951 } 2952 2953 void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic, 2954 bool isUpdating, unsigned NumVecs, 2955 const uint16_t *DOpcodes, 2956 const uint16_t *QOpcodes0, 2957 const uint16_t *QOpcodes1) { 2958 assert(Subtarget->hasNEON()); 2959 assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range"); 2960 SDLoc dl(N); 2961 2962 SDValue MemAddr, Align; 2963 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2964 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2965 return; 2966 2967 SDValue Chain = N->getOperand(0); 2968 EVT VT = N->getValueType(0); 2969 bool is64BitVector = VT.is64BitVector(); 2970 2971 unsigned Alignment = 0; 2972 if (NumVecs != 3) { 2973 Alignment = Align->getAsZExtVal(); 2974 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8; 2975 if (Alignment > NumBytes) 2976 Alignment = NumBytes; 2977 if (Alignment < 8 && Alignment < NumBytes) 2978 Alignment = 0; 2979 // Alignment must be a power of two; make sure of that. 2980 Alignment = (Alignment & -Alignment); 2981 if (Alignment == 1) 2982 Alignment = 0; 2983 } 2984 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 2985 2986 unsigned OpcodeIndex; 2987 switch (VT.getSimpleVT().SimpleTy) { 2988 default: llvm_unreachable("unhandled vld-dup type"); 2989 case MVT::v8i8: 2990 case MVT::v16i8: OpcodeIndex = 0; break; 2991 case MVT::v4i16: 2992 case MVT::v8i16: 2993 case MVT::v4f16: 2994 case MVT::v8f16: 2995 case MVT::v4bf16: 2996 case MVT::v8bf16: 2997 OpcodeIndex = 1; break; 2998 case MVT::v2f32: 2999 case MVT::v2i32: 3000 case MVT::v4f32: 3001 case MVT::v4i32: OpcodeIndex = 2; break; 3002 case MVT::v1f64: 3003 case MVT::v1i64: OpcodeIndex = 3; break; 3004 } 3005 3006 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 3007 if (!is64BitVector) 3008 ResTyElts *= 2; 3009 EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts); 3010 3011 std::vector<EVT> ResTys; 3012 ResTys.push_back(ResTy); 3013 if (isUpdating) 3014 ResTys.push_back(MVT::i32); 3015 ResTys.push_back(MVT::Other); 3016 3017 SDValue Pred = getAL(CurDAG, dl); 3018 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3019 3020 SmallVector<SDValue, 6> Ops; 3021 Ops.push_back(MemAddr); 3022 Ops.push_back(Align); 3023 unsigned Opc = is64BitVector ? DOpcodes[OpcodeIndex] 3024 : (NumVecs == 1) ? QOpcodes0[OpcodeIndex] 3025 : QOpcodes1[OpcodeIndex]; 3026 if (isUpdating) { 3027 SDValue Inc = N->getOperand(2); 3028 bool IsImmUpdate = 3029 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs); 3030 if (IsImmUpdate) { 3031 if (!isVLDfixed(Opc)) 3032 Ops.push_back(Reg0); 3033 } else { 3034 if (isVLDfixed(Opc)) 3035 Opc = getVLDSTRegisterUpdateOpcode(Opc); 3036 Ops.push_back(Inc); 3037 } 3038 } 3039 if (is64BitVector || NumVecs == 1) { 3040 // Double registers and VLD1 quad registers are directly supported. 3041 } else { 3042 SDValue ImplDef = SDValue( 3043 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0); 3044 const SDValue OpsA[] = {MemAddr, Align, ImplDef, Pred, Reg0, Chain}; 3045 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, ResTy, 3046 MVT::Other, OpsA); 3047 Ops.push_back(SDValue(VLdA, 0)); 3048 Chain = SDValue(VLdA, 1); 3049 } 3050 3051 Ops.push_back(Pred); 3052 Ops.push_back(Reg0); 3053 Ops.push_back(Chain); 3054 3055 SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 3056 3057 // Transfer memoperands. 3058 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 3059 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdDup), {MemOp}); 3060 3061 // Extract the subregisters. 3062 if (NumVecs == 1) { 3063 ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0)); 3064 } else { 3065 SDValue SuperReg = SDValue(VLdDup, 0); 3066 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering"); 3067 unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0; 3068 for (unsigned Vec = 0; Vec != NumVecs; ++Vec) { 3069 ReplaceUses(SDValue(N, Vec), 3070 CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg)); 3071 } 3072 } 3073 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1)); 3074 if (isUpdating) 3075 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2)); 3076 CurDAG->RemoveDeadNode(N); 3077 } 3078 3079 bool ARMDAGToDAGISel::tryInsertVectorElt(SDNode *N) { 3080 if (!Subtarget->hasMVEIntegerOps()) 3081 return false; 3082 3083 SDLoc dl(N); 3084 3085 // We are trying to use VMOV/VMOVX/VINS to more efficiently lower insert and 3086 // extracts of v8f16 and v8i16 vectors. Check that we have two adjacent 3087 // inserts of the correct type: 3088 SDValue Ins1 = SDValue(N, 0); 3089 SDValue Ins2 = N->getOperand(0); 3090 EVT VT = Ins1.getValueType(); 3091 if (Ins2.getOpcode() != ISD::INSERT_VECTOR_ELT || !Ins2.hasOneUse() || 3092 !isa<ConstantSDNode>(Ins1.getOperand(2)) || 3093 !isa<ConstantSDNode>(Ins2.getOperand(2)) || 3094 (VT != MVT::v8f16 && VT != MVT::v8i16) || (Ins2.getValueType() != VT)) 3095 return false; 3096 3097 unsigned Lane1 = Ins1.getConstantOperandVal(2); 3098 unsigned Lane2 = Ins2.getConstantOperandVal(2); 3099 if (Lane2 % 2 != 0 || Lane1 != Lane2 + 1) 3100 return false; 3101 3102 // If the inserted values will be able to use T/B already, leave it to the 3103 // existing tablegen patterns. For example VCVTT/VCVTB. 3104 SDValue Val1 = Ins1.getOperand(1); 3105 SDValue Val2 = Ins2.getOperand(1); 3106 if (Val1.getOpcode() == ISD::FP_ROUND || Val2.getOpcode() == ISD::FP_ROUND) 3107 return false; 3108 3109 // Check if the inserted values are both extracts. 3110 if ((Val1.getOpcode() == ISD::EXTRACT_VECTOR_ELT || 3111 Val1.getOpcode() == ARMISD::VGETLANEu) && 3112 (Val2.getOpcode() == ISD::EXTRACT_VECTOR_ELT || 3113 Val2.getOpcode() == ARMISD::VGETLANEu) && 3114 isa<ConstantSDNode>(Val1.getOperand(1)) && 3115 isa<ConstantSDNode>(Val2.getOperand(1)) && 3116 (Val1.getOperand(0).getValueType() == MVT::v8f16 || 3117 Val1.getOperand(0).getValueType() == MVT::v8i16) && 3118 (Val2.getOperand(0).getValueType() == MVT::v8f16 || 3119 Val2.getOperand(0).getValueType() == MVT::v8i16)) { 3120 unsigned ExtractLane1 = Val1.getConstantOperandVal(1); 3121 unsigned ExtractLane2 = Val2.getConstantOperandVal(1); 3122 3123 // If the two extracted lanes are from the same place and adjacent, this 3124 // simplifies into a f32 lane move. 3125 if (Val1.getOperand(0) == Val2.getOperand(0) && ExtractLane2 % 2 == 0 && 3126 ExtractLane1 == ExtractLane2 + 1) { 3127 SDValue NewExt = CurDAG->getTargetExtractSubreg( 3128 ARM::ssub_0 + ExtractLane2 / 2, dl, MVT::f32, Val1.getOperand(0)); 3129 SDValue NewIns = CurDAG->getTargetInsertSubreg( 3130 ARM::ssub_0 + Lane2 / 2, dl, VT, Ins2.getOperand(0), 3131 NewExt); 3132 ReplaceUses(Ins1, NewIns); 3133 return true; 3134 } 3135 3136 // Else v8i16 pattern of an extract and an insert, with a optional vmovx for 3137 // extracting odd lanes. 3138 if (VT == MVT::v8i16 && Subtarget->hasFullFP16()) { 3139 SDValue Inp1 = CurDAG->getTargetExtractSubreg( 3140 ARM::ssub_0 + ExtractLane1 / 2, dl, MVT::f32, Val1.getOperand(0)); 3141 SDValue Inp2 = CurDAG->getTargetExtractSubreg( 3142 ARM::ssub_0 + ExtractLane2 / 2, dl, MVT::f32, Val2.getOperand(0)); 3143 if (ExtractLane1 % 2 != 0) 3144 Inp1 = SDValue(CurDAG->getMachineNode(ARM::VMOVH, dl, MVT::f32, Inp1), 0); 3145 if (ExtractLane2 % 2 != 0) 3146 Inp2 = SDValue(CurDAG->getMachineNode(ARM::VMOVH, dl, MVT::f32, Inp2), 0); 3147 SDNode *VINS = CurDAG->getMachineNode(ARM::VINSH, dl, MVT::f32, Inp2, Inp1); 3148 SDValue NewIns = 3149 CurDAG->getTargetInsertSubreg(ARM::ssub_0 + Lane2 / 2, dl, MVT::v4f32, 3150 Ins2.getOperand(0), SDValue(VINS, 0)); 3151 ReplaceUses(Ins1, NewIns); 3152 return true; 3153 } 3154 } 3155 3156 // The inserted values are not extracted - if they are f16 then insert them 3157 // directly using a VINS. 3158 if (VT == MVT::v8f16 && Subtarget->hasFullFP16()) { 3159 SDNode *VINS = CurDAG->getMachineNode(ARM::VINSH, dl, MVT::f32, Val2, Val1); 3160 SDValue NewIns = 3161 CurDAG->getTargetInsertSubreg(ARM::ssub_0 + Lane2 / 2, dl, MVT::v4f32, 3162 Ins2.getOperand(0), SDValue(VINS, 0)); 3163 ReplaceUses(Ins1, NewIns); 3164 return true; 3165 } 3166 3167 return false; 3168 } 3169 3170 bool ARMDAGToDAGISel::transformFixedFloatingPointConversion(SDNode *N, 3171 SDNode *FMul, 3172 bool IsUnsigned, 3173 bool FixedToFloat) { 3174 auto Type = N->getValueType(0); 3175 unsigned ScalarBits = Type.getScalarSizeInBits(); 3176 if (ScalarBits > 32) 3177 return false; 3178 3179 SDNodeFlags FMulFlags = FMul->getFlags(); 3180 // The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is 3181 // allowed in 16 bit unsigned floats 3182 if (ScalarBits == 16 && !FMulFlags.hasNoInfs() && IsUnsigned) 3183 return false; 3184 3185 SDValue ImmNode = FMul->getOperand(1); 3186 SDValue VecVal = FMul->getOperand(0); 3187 if (VecVal->getOpcode() == ISD::UINT_TO_FP || 3188 VecVal->getOpcode() == ISD::SINT_TO_FP) 3189 VecVal = VecVal->getOperand(0); 3190 3191 if (VecVal.getValueType().getScalarSizeInBits() != ScalarBits) 3192 return false; 3193 3194 if (ImmNode.getOpcode() == ISD::BITCAST) { 3195 if (ImmNode.getValueType().getScalarSizeInBits() != ScalarBits) 3196 return false; 3197 ImmNode = ImmNode.getOperand(0); 3198 } 3199 3200 if (ImmNode.getValueType().getScalarSizeInBits() != ScalarBits) 3201 return false; 3202 3203 APFloat ImmAPF(0.0f); 3204 switch (ImmNode.getOpcode()) { 3205 case ARMISD::VMOVIMM: 3206 case ARMISD::VDUP: { 3207 if (!isa<ConstantSDNode>(ImmNode.getOperand(0))) 3208 return false; 3209 unsigned Imm = ImmNode.getConstantOperandVal(0); 3210 if (ImmNode.getOpcode() == ARMISD::VMOVIMM) 3211 Imm = ARM_AM::decodeVMOVModImm(Imm, ScalarBits); 3212 ImmAPF = 3213 APFloat(ScalarBits == 32 ? APFloat::IEEEsingle() : APFloat::IEEEhalf(), 3214 APInt(ScalarBits, Imm)); 3215 break; 3216 } 3217 case ARMISD::VMOVFPIMM: { 3218 ImmAPF = APFloat(ARM_AM::getFPImmFloat(ImmNode.getConstantOperandVal(0))); 3219 break; 3220 } 3221 default: 3222 return false; 3223 } 3224 3225 // Where n is the number of fractional bits, multiplying by 2^n will convert 3226 // from float to fixed and multiplying by 2^-n will convert from fixed to 3227 // float. Taking log2 of the factor (after taking the inverse in the case of 3228 // float to fixed) will give n. 3229 APFloat ToConvert = ImmAPF; 3230 if (FixedToFloat) { 3231 if (!ImmAPF.getExactInverse(&ToConvert)) 3232 return false; 3233 } 3234 APSInt Converted(64, false); 3235 bool IsExact; 3236 ToConvert.convertToInteger(Converted, llvm::RoundingMode::NearestTiesToEven, 3237 &IsExact); 3238 if (!IsExact || !Converted.isPowerOf2()) 3239 return false; 3240 3241 unsigned FracBits = Converted.logBase2(); 3242 if (FracBits > ScalarBits) 3243 return false; 3244 3245 SmallVector<SDValue, 3> Ops{ 3246 VecVal, CurDAG->getConstant(FracBits, SDLoc(N), MVT::i32)}; 3247 AddEmptyMVEPredicateToOps(Ops, SDLoc(N), Type); 3248 3249 unsigned int Opcode; 3250 switch (ScalarBits) { 3251 case 16: 3252 if (FixedToFloat) 3253 Opcode = IsUnsigned ? ARM::MVE_VCVTf16u16_fix : ARM::MVE_VCVTf16s16_fix; 3254 else 3255 Opcode = IsUnsigned ? ARM::MVE_VCVTu16f16_fix : ARM::MVE_VCVTs16f16_fix; 3256 break; 3257 case 32: 3258 if (FixedToFloat) 3259 Opcode = IsUnsigned ? ARM::MVE_VCVTf32u32_fix : ARM::MVE_VCVTf32s32_fix; 3260 else 3261 Opcode = IsUnsigned ? ARM::MVE_VCVTu32f32_fix : ARM::MVE_VCVTs32f32_fix; 3262 break; 3263 default: 3264 llvm_unreachable("unexpected number of scalar bits"); 3265 break; 3266 } 3267 3268 ReplaceNode(N, CurDAG->getMachineNode(Opcode, SDLoc(N), Type, Ops)); 3269 return true; 3270 } 3271 3272 bool ARMDAGToDAGISel::tryFP_TO_INT(SDNode *N, SDLoc dl) { 3273 // Transform a floating-point to fixed-point conversion to a VCVT 3274 if (!Subtarget->hasMVEFloatOps()) 3275 return false; 3276 EVT Type = N->getValueType(0); 3277 if (!Type.isVector()) 3278 return false; 3279 unsigned int ScalarBits = Type.getScalarSizeInBits(); 3280 3281 bool IsUnsigned = N->getOpcode() == ISD::FP_TO_UINT || 3282 N->getOpcode() == ISD::FP_TO_UINT_SAT; 3283 SDNode *Node = N->getOperand(0).getNode(); 3284 3285 // floating-point to fixed-point with one fractional bit gets turned into an 3286 // FP_TO_[U|S]INT(FADD (x, x)) rather than an FP_TO_[U|S]INT(FMUL (x, y)) 3287 if (Node->getOpcode() == ISD::FADD) { 3288 if (Node->getOperand(0) != Node->getOperand(1)) 3289 return false; 3290 SDNodeFlags Flags = Node->getFlags(); 3291 // The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is 3292 // allowed in 16 bit unsigned floats 3293 if (ScalarBits == 16 && !Flags.hasNoInfs() && IsUnsigned) 3294 return false; 3295 3296 unsigned Opcode; 3297 switch (ScalarBits) { 3298 case 16: 3299 Opcode = IsUnsigned ? ARM::MVE_VCVTu16f16_fix : ARM::MVE_VCVTs16f16_fix; 3300 break; 3301 case 32: 3302 Opcode = IsUnsigned ? ARM::MVE_VCVTu32f32_fix : ARM::MVE_VCVTs32f32_fix; 3303 break; 3304 } 3305 SmallVector<SDValue, 3> Ops{Node->getOperand(0), 3306 CurDAG->getConstant(1, dl, MVT::i32)}; 3307 AddEmptyMVEPredicateToOps(Ops, dl, Type); 3308 3309 ReplaceNode(N, CurDAG->getMachineNode(Opcode, dl, Type, Ops)); 3310 return true; 3311 } 3312 3313 if (Node->getOpcode() != ISD::FMUL) 3314 return false; 3315 3316 return transformFixedFloatingPointConversion(N, Node, IsUnsigned, false); 3317 } 3318 3319 bool ARMDAGToDAGISel::tryFMULFixed(SDNode *N, SDLoc dl) { 3320 // Transform a fixed-point to floating-point conversion to a VCVT 3321 if (!Subtarget->hasMVEFloatOps()) 3322 return false; 3323 auto Type = N->getValueType(0); 3324 if (!Type.isVector()) 3325 return false; 3326 3327 auto LHS = N->getOperand(0); 3328 if (LHS.getOpcode() != ISD::SINT_TO_FP && LHS.getOpcode() != ISD::UINT_TO_FP) 3329 return false; 3330 3331 return transformFixedFloatingPointConversion( 3332 N, N, LHS.getOpcode() == ISD::UINT_TO_FP, true); 3333 } 3334 3335 bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) { 3336 if (!Subtarget->hasV6T2Ops()) 3337 return false; 3338 3339 unsigned Opc = isSigned 3340 ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX) 3341 : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX); 3342 SDLoc dl(N); 3343 3344 // For unsigned extracts, check for a shift right and mask 3345 unsigned And_imm = 0; 3346 if (N->getOpcode() == ISD::AND) { 3347 if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) { 3348 3349 // The immediate is a mask of the low bits iff imm & (imm+1) == 0 3350 if (And_imm & (And_imm + 1)) 3351 return false; 3352 3353 unsigned Srl_imm = 0; 3354 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, 3355 Srl_imm)) { 3356 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 3357 3358 // Mask off the unnecessary bits of the AND immediate; normally 3359 // DAGCombine will do this, but that might not happen if 3360 // targetShrinkDemandedConstant chooses a different immediate. 3361 And_imm &= -1U >> Srl_imm; 3362 3363 // Note: The width operand is encoded as width-1. 3364 unsigned Width = llvm::countr_one(And_imm) - 1; 3365 unsigned LSB = Srl_imm; 3366 3367 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3368 3369 if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) { 3370 // It's cheaper to use a right shift to extract the top bits. 3371 if (Subtarget->isThumb()) { 3372 Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri; 3373 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3374 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 3375 getAL(CurDAG, dl), Reg0, Reg0 }; 3376 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3377 return true; 3378 } 3379 3380 // ARM models shift instructions as MOVsi with shifter operand. 3381 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL); 3382 SDValue ShOpc = 3383 CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl, 3384 MVT::i32); 3385 SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc, 3386 getAL(CurDAG, dl), Reg0, Reg0 }; 3387 CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops); 3388 return true; 3389 } 3390 3391 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx"); 3392 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3393 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 3394 CurDAG->getTargetConstant(Width, dl, MVT::i32), 3395 getAL(CurDAG, dl), Reg0 }; 3396 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3397 return true; 3398 } 3399 } 3400 return false; 3401 } 3402 3403 // Otherwise, we're looking for a shift of a shift 3404 unsigned Shl_imm = 0; 3405 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) { 3406 assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!"); 3407 unsigned Srl_imm = 0; 3408 if (isInt32Immediate(N->getOperand(1), Srl_imm)) { 3409 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 3410 // Note: The width operand is encoded as width-1. 3411 unsigned Width = 32 - Srl_imm - 1; 3412 int LSB = Srl_imm - Shl_imm; 3413 if (LSB < 0) 3414 return false; 3415 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3416 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx"); 3417 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3418 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 3419 CurDAG->getTargetConstant(Width, dl, MVT::i32), 3420 getAL(CurDAG, dl), Reg0 }; 3421 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3422 return true; 3423 } 3424 } 3425 3426 // Or we are looking for a shift of an and, with a mask operand 3427 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) && 3428 isShiftedMask_32(And_imm)) { 3429 unsigned Srl_imm = 0; 3430 unsigned LSB = llvm::countr_zero(And_imm); 3431 // Shift must be the same as the ands lsb 3432 if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) { 3433 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 3434 unsigned MSB = llvm::Log2_32(And_imm); 3435 // Note: The width operand is encoded as width-1. 3436 unsigned Width = MSB - LSB; 3437 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3438 assert(Srl_imm + Width + 1 <= 32 && "Shouldn't create an invalid ubfx"); 3439 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3440 CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32), 3441 CurDAG->getTargetConstant(Width, dl, MVT::i32), 3442 getAL(CurDAG, dl), Reg0 }; 3443 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3444 return true; 3445 } 3446 } 3447 3448 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) { 3449 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits(); 3450 unsigned LSB = 0; 3451 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) && 3452 !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB)) 3453 return false; 3454 3455 if (LSB + Width > 32) 3456 return false; 3457 3458 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3459 assert(LSB + Width <= 32 && "Shouldn't create an invalid ubfx"); 3460 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3461 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 3462 CurDAG->getTargetConstant(Width - 1, dl, MVT::i32), 3463 getAL(CurDAG, dl), Reg0 }; 3464 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3465 return true; 3466 } 3467 3468 return false; 3469 } 3470 3471 /// Target-specific DAG combining for ISD::SUB. 3472 /// Target-independent combining lowers SELECT_CC nodes of the form 3473 /// select_cc setg[ge] X, 0, X, -X 3474 /// select_cc setgt X, -1, X, -X 3475 /// select_cc setl[te] X, 0, -X, X 3476 /// select_cc setlt X, 1, -X, X 3477 /// which represent Integer ABS into: 3478 /// Y = sra (X, size(X)-1); sub (xor (X, Y), Y) 3479 /// ARM instruction selection detects the latter and matches it to 3480 /// ARM::ABS or ARM::t2ABS machine node. 3481 bool ARMDAGToDAGISel::tryABSOp(SDNode *N){ 3482 SDValue SUBSrc0 = N->getOperand(0); 3483 SDValue SUBSrc1 = N->getOperand(1); 3484 EVT VT = N->getValueType(0); 3485 3486 if (Subtarget->isThumb1Only()) 3487 return false; 3488 3489 if (SUBSrc0.getOpcode() != ISD::XOR || SUBSrc1.getOpcode() != ISD::SRA) 3490 return false; 3491 3492 SDValue XORSrc0 = SUBSrc0.getOperand(0); 3493 SDValue XORSrc1 = SUBSrc0.getOperand(1); 3494 SDValue SRASrc0 = SUBSrc1.getOperand(0); 3495 SDValue SRASrc1 = SUBSrc1.getOperand(1); 3496 ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(SRASrc1); 3497 EVT XType = SRASrc0.getValueType(); 3498 unsigned Size = XType.getSizeInBits() - 1; 3499 3500 if (XORSrc1 == SUBSrc1 && XORSrc0 == SRASrc0 && XType.isInteger() && 3501 SRAConstant != nullptr && Size == SRAConstant->getZExtValue()) { 3502 unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS; 3503 CurDAG->SelectNodeTo(N, Opcode, VT, XORSrc0); 3504 return true; 3505 } 3506 3507 return false; 3508 } 3509 3510 /// We've got special pseudo-instructions for these 3511 void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) { 3512 unsigned Opcode; 3513 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT(); 3514 if (MemTy == MVT::i8) 3515 Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_8 : ARM::CMP_SWAP_8; 3516 else if (MemTy == MVT::i16) 3517 Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_16 : ARM::CMP_SWAP_16; 3518 else if (MemTy == MVT::i32) 3519 Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_32 : ARM::CMP_SWAP_32; 3520 else 3521 llvm_unreachable("Unknown AtomicCmpSwap type"); 3522 3523 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3), 3524 N->getOperand(0)}; 3525 SDNode *CmpSwap = CurDAG->getMachineNode( 3526 Opcode, SDLoc(N), 3527 CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops); 3528 3529 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); 3530 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp}); 3531 3532 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0)); 3533 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2)); 3534 CurDAG->RemoveDeadNode(N); 3535 } 3536 3537 static std::optional<std::pair<unsigned, unsigned>> 3538 getContiguousRangeOfSetBits(const APInt &A) { 3539 unsigned FirstOne = A.getBitWidth() - A.countl_zero() - 1; 3540 unsigned LastOne = A.countr_zero(); 3541 if (A.popcount() != (FirstOne - LastOne + 1)) 3542 return std::nullopt; 3543 return std::make_pair(FirstOne, LastOne); 3544 } 3545 3546 void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) { 3547 assert(N->getOpcode() == ARMISD::CMPZ); 3548 SwitchEQNEToPLMI = false; 3549 3550 if (!Subtarget->isThumb()) 3551 // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and 3552 // LSR don't exist as standalone instructions - they need the barrel shifter. 3553 return; 3554 3555 // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X)) 3556 SDValue And = N->getOperand(0); 3557 if (!And->hasOneUse()) 3558 return; 3559 3560 SDValue Zero = N->getOperand(1); 3561 if (!isNullConstant(Zero) || And->getOpcode() != ISD::AND) 3562 return; 3563 SDValue X = And.getOperand(0); 3564 auto C = dyn_cast<ConstantSDNode>(And.getOperand(1)); 3565 3566 if (!C) 3567 return; 3568 auto Range = getContiguousRangeOfSetBits(C->getAPIntValue()); 3569 if (!Range) 3570 return; 3571 3572 // There are several ways to lower this: 3573 SDNode *NewN; 3574 SDLoc dl(N); 3575 3576 auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* { 3577 if (Subtarget->isThumb2()) { 3578 Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri; 3579 SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32), 3580 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 3581 CurDAG->getRegister(0, MVT::i32) }; 3582 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops); 3583 } else { 3584 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src, 3585 CurDAG->getTargetConstant(Imm, dl, MVT::i32), 3586 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)}; 3587 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops); 3588 } 3589 }; 3590 3591 if (Range->second == 0) { 3592 // 1. Mask includes the LSB -> Simply shift the top N bits off 3593 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first); 3594 ReplaceNode(And.getNode(), NewN); 3595 } else if (Range->first == 31) { 3596 // 2. Mask includes the MSB -> Simply shift the bottom N bits off 3597 NewN = EmitShift(ARM::tLSRri, X, Range->second); 3598 ReplaceNode(And.getNode(), NewN); 3599 } else if (Range->first == Range->second) { 3600 // 3. Only one bit is set. We can shift this into the sign bit and use a 3601 // PL/MI comparison. 3602 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first); 3603 ReplaceNode(And.getNode(), NewN); 3604 3605 SwitchEQNEToPLMI = true; 3606 } else if (!Subtarget->hasV6T2Ops()) { 3607 // 4. Do a double shift to clear bottom and top bits, but only in 3608 // thumb-1 mode as in thumb-2 we can use UBFX. 3609 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first); 3610 NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0), 3611 Range->second + (31 - Range->first)); 3612 ReplaceNode(And.getNode(), NewN); 3613 } 3614 } 3615 3616 static unsigned getVectorShuffleOpcode(EVT VT, unsigned Opc64[3], 3617 unsigned Opc128[3]) { 3618 assert((VT.is64BitVector() || VT.is128BitVector()) && 3619 "Unexpected vector shuffle length"); 3620 switch (VT.getScalarSizeInBits()) { 3621 default: 3622 llvm_unreachable("Unexpected vector shuffle element size"); 3623 case 8: 3624 return VT.is64BitVector() ? Opc64[0] : Opc128[0]; 3625 case 16: 3626 return VT.is64BitVector() ? Opc64[1] : Opc128[1]; 3627 case 32: 3628 return VT.is64BitVector() ? Opc64[2] : Opc128[2]; 3629 } 3630 } 3631 3632 void ARMDAGToDAGISel::Select(SDNode *N) { 3633 SDLoc dl(N); 3634 3635 if (N->isMachineOpcode()) { 3636 N->setNodeId(-1); 3637 return; // Already selected. 3638 } 3639 3640 switch (N->getOpcode()) { 3641 default: break; 3642 case ISD::STORE: { 3643 // For Thumb1, match an sp-relative store in C++. This is a little 3644 // unfortunate, but I don't think I can make the chain check work 3645 // otherwise. (The chain of the store has to be the same as the chain 3646 // of the CopyFromReg, or else we can't replace the CopyFromReg with 3647 // a direct reference to "SP".) 3648 // 3649 // This is only necessary on Thumb1 because Thumb1 sp-relative stores use 3650 // a different addressing mode from other four-byte stores. 3651 // 3652 // This pattern usually comes up with call arguments. 3653 StoreSDNode *ST = cast<StoreSDNode>(N); 3654 SDValue Ptr = ST->getBasePtr(); 3655 if (Subtarget->isThumb1Only() && ST->isUnindexed()) { 3656 int RHSC = 0; 3657 if (Ptr.getOpcode() == ISD::ADD && 3658 isScaledConstantInRange(Ptr.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) 3659 Ptr = Ptr.getOperand(0); 3660 3661 if (Ptr.getOpcode() == ISD::CopyFromReg && 3662 cast<RegisterSDNode>(Ptr.getOperand(1))->getReg() == ARM::SP && 3663 Ptr.getOperand(0) == ST->getChain()) { 3664 SDValue Ops[] = {ST->getValue(), 3665 CurDAG->getRegister(ARM::SP, MVT::i32), 3666 CurDAG->getTargetConstant(RHSC, dl, MVT::i32), 3667 getAL(CurDAG, dl), 3668 CurDAG->getRegister(0, MVT::i32), 3669 ST->getChain()}; 3670 MachineSDNode *ResNode = 3671 CurDAG->getMachineNode(ARM::tSTRspi, dl, MVT::Other, Ops); 3672 MachineMemOperand *MemOp = ST->getMemOperand(); 3673 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp}); 3674 ReplaceNode(N, ResNode); 3675 return; 3676 } 3677 } 3678 break; 3679 } 3680 case ISD::WRITE_REGISTER: 3681 if (tryWriteRegister(N)) 3682 return; 3683 break; 3684 case ISD::READ_REGISTER: 3685 if (tryReadRegister(N)) 3686 return; 3687 break; 3688 case ISD::INLINEASM: 3689 case ISD::INLINEASM_BR: 3690 if (tryInlineAsm(N)) 3691 return; 3692 break; 3693 case ISD::SUB: 3694 // Select special operations if SUB node forms integer ABS pattern 3695 if (tryABSOp(N)) 3696 return; 3697 // Other cases are autogenerated. 3698 break; 3699 case ISD::Constant: { 3700 unsigned Val = N->getAsZExtVal(); 3701 // If we can't materialize the constant we need to use a literal pool 3702 if (ConstantMaterializationCost(Val, Subtarget) > 2 && 3703 !Subtarget->genExecuteOnly()) { 3704 SDValue CPIdx = CurDAG->getTargetConstantPool( 3705 ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val), 3706 TLI->getPointerTy(CurDAG->getDataLayout())); 3707 3708 SDNode *ResNode; 3709 if (Subtarget->isThumb()) { 3710 SDValue Ops[] = { 3711 CPIdx, 3712 getAL(CurDAG, dl), 3713 CurDAG->getRegister(0, MVT::i32), 3714 CurDAG->getEntryNode() 3715 }; 3716 ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other, 3717 Ops); 3718 } else { 3719 SDValue Ops[] = { 3720 CPIdx, 3721 CurDAG->getTargetConstant(0, dl, MVT::i32), 3722 getAL(CurDAG, dl), 3723 CurDAG->getRegister(0, MVT::i32), 3724 CurDAG->getEntryNode() 3725 }; 3726 ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other, 3727 Ops); 3728 } 3729 // Annotate the Node with memory operand information so that MachineInstr 3730 // queries work properly. This e.g. gives the register allocation the 3731 // required information for rematerialization. 3732 MachineFunction& MF = CurDAG->getMachineFunction(); 3733 MachineMemOperand *MemOp = 3734 MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF), 3735 MachineMemOperand::MOLoad, 4, Align(4)); 3736 3737 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp}); 3738 3739 ReplaceNode(N, ResNode); 3740 return; 3741 } 3742 3743 // Other cases are autogenerated. 3744 break; 3745 } 3746 case ISD::FrameIndex: { 3747 // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm. 3748 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 3749 SDValue TFI = CurDAG->getTargetFrameIndex( 3750 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 3751 if (Subtarget->isThumb1Only()) { 3752 // Set the alignment of the frame object to 4, to avoid having to generate 3753 // more than one ADD 3754 MachineFrameInfo &MFI = MF->getFrameInfo(); 3755 if (MFI.getObjectAlign(FI) < Align(4)) 3756 MFI.setObjectAlignment(FI, Align(4)); 3757 CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI, 3758 CurDAG->getTargetConstant(0, dl, MVT::i32)); 3759 return; 3760 } else { 3761 unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ? 3762 ARM::t2ADDri : ARM::ADDri); 3763 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32), 3764 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 3765 CurDAG->getRegister(0, MVT::i32) }; 3766 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3767 return; 3768 } 3769 } 3770 case ISD::INSERT_VECTOR_ELT: { 3771 if (tryInsertVectorElt(N)) 3772 return; 3773 break; 3774 } 3775 case ISD::SRL: 3776 if (tryV6T2BitfieldExtractOp(N, false)) 3777 return; 3778 break; 3779 case ISD::SIGN_EXTEND_INREG: 3780 case ISD::SRA: 3781 if (tryV6T2BitfieldExtractOp(N, true)) 3782 return; 3783 break; 3784 case ISD::FP_TO_UINT: 3785 case ISD::FP_TO_SINT: 3786 case ISD::FP_TO_UINT_SAT: 3787 case ISD::FP_TO_SINT_SAT: 3788 if (tryFP_TO_INT(N, dl)) 3789 return; 3790 break; 3791 case ISD::FMUL: 3792 if (tryFMULFixed(N, dl)) 3793 return; 3794 break; 3795 case ISD::MUL: 3796 if (Subtarget->isThumb1Only()) 3797 break; 3798 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) { 3799 unsigned RHSV = C->getZExtValue(); 3800 if (!RHSV) break; 3801 if (isPowerOf2_32(RHSV-1)) { // 2^n+1? 3802 unsigned ShImm = Log2_32(RHSV-1); 3803 if (ShImm >= 32) 3804 break; 3805 SDValue V = N->getOperand(0); 3806 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm); 3807 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32); 3808 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3809 if (Subtarget->isThumb()) { 3810 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 }; 3811 CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops); 3812 return; 3813 } else { 3814 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0, 3815 Reg0 }; 3816 CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops); 3817 return; 3818 } 3819 } 3820 if (isPowerOf2_32(RHSV+1)) { // 2^n-1? 3821 unsigned ShImm = Log2_32(RHSV+1); 3822 if (ShImm >= 32) 3823 break; 3824 SDValue V = N->getOperand(0); 3825 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm); 3826 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32); 3827 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3828 if (Subtarget->isThumb()) { 3829 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 }; 3830 CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops); 3831 return; 3832 } else { 3833 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0, 3834 Reg0 }; 3835 CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops); 3836 return; 3837 } 3838 } 3839 } 3840 break; 3841 case ISD::AND: { 3842 // Check for unsigned bitfield extract 3843 if (tryV6T2BitfieldExtractOp(N, false)) 3844 return; 3845 3846 // If an immediate is used in an AND node, it is possible that the immediate 3847 // can be more optimally materialized when negated. If this is the case we 3848 // can negate the immediate and use a BIC instead. 3849 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 3850 if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) { 3851 uint32_t Imm = (uint32_t) N1C->getZExtValue(); 3852 3853 // In Thumb2 mode, an AND can take a 12-bit immediate. If this 3854 // immediate can be negated and fit in the immediate operand of 3855 // a t2BIC, don't do any manual transform here as this can be 3856 // handled by the generic ISel machinery. 3857 bool PreferImmediateEncoding = 3858 Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm)); 3859 if (!PreferImmediateEncoding && 3860 ConstantMaterializationCost(Imm, Subtarget) > 3861 ConstantMaterializationCost(~Imm, Subtarget)) { 3862 // The current immediate costs more to materialize than a negated 3863 // immediate, so negate the immediate and use a BIC. 3864 SDValue NewImm = 3865 CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32); 3866 // If the new constant didn't exist before, reposition it in the topological 3867 // ordering so it is just before N. Otherwise, don't touch its location. 3868 if (NewImm->getNodeId() == -1) 3869 CurDAG->RepositionNode(N->getIterator(), NewImm.getNode()); 3870 3871 if (!Subtarget->hasThumb2()) { 3872 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), 3873 N->getOperand(0), NewImm, getAL(CurDAG, dl), 3874 CurDAG->getRegister(0, MVT::i32)}; 3875 ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops)); 3876 return; 3877 } else { 3878 SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl), 3879 CurDAG->getRegister(0, MVT::i32), 3880 CurDAG->getRegister(0, MVT::i32)}; 3881 ReplaceNode(N, 3882 CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops)); 3883 return; 3884 } 3885 } 3886 } 3887 3888 // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits 3889 // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits 3890 // are entirely contributed by c2 and lower 16-bits are entirely contributed 3891 // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)). 3892 // Select it to: "movt x, ((c1 & 0xffff) >> 16) 3893 EVT VT = N->getValueType(0); 3894 if (VT != MVT::i32) 3895 break; 3896 unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2()) 3897 ? ARM::t2MOVTi16 3898 : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0); 3899 if (!Opc) 3900 break; 3901 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); 3902 N1C = dyn_cast<ConstantSDNode>(N1); 3903 if (!N1C) 3904 break; 3905 if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) { 3906 SDValue N2 = N0.getOperand(1); 3907 ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2); 3908 if (!N2C) 3909 break; 3910 unsigned N1CVal = N1C->getZExtValue(); 3911 unsigned N2CVal = N2C->getZExtValue(); 3912 if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) && 3913 (N1CVal & 0xffffU) == 0xffffU && 3914 (N2CVal & 0xffffU) == 0x0U) { 3915 SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16, 3916 dl, MVT::i32); 3917 SDValue Ops[] = { N0.getOperand(0), Imm16, 3918 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) }; 3919 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops)); 3920 return; 3921 } 3922 } 3923 3924 break; 3925 } 3926 case ARMISD::UMAAL: { 3927 unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL; 3928 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), 3929 N->getOperand(2), N->getOperand(3), 3930 getAL(CurDAG, dl), 3931 CurDAG->getRegister(0, MVT::i32) }; 3932 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops)); 3933 return; 3934 } 3935 case ARMISD::UMLAL:{ 3936 if (Subtarget->isThumb()) { 3937 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3938 N->getOperand(3), getAL(CurDAG, dl), 3939 CurDAG->getRegister(0, MVT::i32)}; 3940 ReplaceNode( 3941 N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops)); 3942 return; 3943 }else{ 3944 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3945 N->getOperand(3), getAL(CurDAG, dl), 3946 CurDAG->getRegister(0, MVT::i32), 3947 CurDAG->getRegister(0, MVT::i32) }; 3948 ReplaceNode(N, CurDAG->getMachineNode( 3949 Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl, 3950 MVT::i32, MVT::i32, Ops)); 3951 return; 3952 } 3953 } 3954 case ARMISD::SMLAL:{ 3955 if (Subtarget->isThumb()) { 3956 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3957 N->getOperand(3), getAL(CurDAG, dl), 3958 CurDAG->getRegister(0, MVT::i32)}; 3959 ReplaceNode( 3960 N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops)); 3961 return; 3962 }else{ 3963 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3964 N->getOperand(3), getAL(CurDAG, dl), 3965 CurDAG->getRegister(0, MVT::i32), 3966 CurDAG->getRegister(0, MVT::i32) }; 3967 ReplaceNode(N, CurDAG->getMachineNode( 3968 Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl, 3969 MVT::i32, MVT::i32, Ops)); 3970 return; 3971 } 3972 } 3973 case ARMISD::SUBE: { 3974 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP()) 3975 break; 3976 // Look for a pattern to match SMMLS 3977 // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b)))) 3978 if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI || 3979 N->getOperand(2).getOpcode() != ARMISD::SUBC || 3980 !SDValue(N, 1).use_empty()) 3981 break; 3982 3983 if (Subtarget->isThumb()) 3984 assert(Subtarget->hasThumb2() && 3985 "This pattern should not be generated for Thumb"); 3986 3987 SDValue SmulLoHi = N->getOperand(1); 3988 SDValue Subc = N->getOperand(2); 3989 SDValue Zero = Subc.getOperand(0); 3990 3991 if (!isNullConstant(Zero) || Subc.getOperand(1) != SmulLoHi.getValue(0) || 3992 N->getOperand(1) != SmulLoHi.getValue(1) || 3993 N->getOperand(2) != Subc.getValue(1)) 3994 break; 3995 3996 unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS; 3997 SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1), 3998 N->getOperand(0), getAL(CurDAG, dl), 3999 CurDAG->getRegister(0, MVT::i32) }; 4000 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops)); 4001 return; 4002 } 4003 case ISD::LOAD: { 4004 if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N)) 4005 return; 4006 if (Subtarget->isThumb() && Subtarget->hasThumb2()) { 4007 if (tryT2IndexedLoad(N)) 4008 return; 4009 } else if (Subtarget->isThumb()) { 4010 if (tryT1IndexedLoad(N)) 4011 return; 4012 } else if (tryARMIndexedLoad(N)) 4013 return; 4014 // Other cases are autogenerated. 4015 break; 4016 } 4017 case ISD::MLOAD: 4018 if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N)) 4019 return; 4020 // Other cases are autogenerated. 4021 break; 4022 case ARMISD::WLSSETUP: { 4023 SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopSetup, dl, MVT::i32, 4024 N->getOperand(0)); 4025 ReplaceUses(N, New); 4026 CurDAG->RemoveDeadNode(N); 4027 return; 4028 } 4029 case ARMISD::WLS: { 4030 SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopStart, dl, MVT::Other, 4031 N->getOperand(1), N->getOperand(2), 4032 N->getOperand(0)); 4033 ReplaceUses(N, New); 4034 CurDAG->RemoveDeadNode(N); 4035 return; 4036 } 4037 case ARMISD::LE: { 4038 SDValue Ops[] = { N->getOperand(1), 4039 N->getOperand(2), 4040 N->getOperand(0) }; 4041 unsigned Opc = ARM::t2LoopEnd; 4042 SDNode *New = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops); 4043 ReplaceUses(N, New); 4044 CurDAG->RemoveDeadNode(N); 4045 return; 4046 } 4047 case ARMISD::LDRD: { 4048 if (Subtarget->isThumb2()) 4049 break; // TableGen handles isel in this case. 4050 SDValue Base, RegOffset, ImmOffset; 4051 const SDValue &Chain = N->getOperand(0); 4052 const SDValue &Addr = N->getOperand(1); 4053 SelectAddrMode3(Addr, Base, RegOffset, ImmOffset); 4054 if (RegOffset != CurDAG->getRegister(0, MVT::i32)) { 4055 // The register-offset variant of LDRD mandates that the register 4056 // allocated to RegOffset is not reused in any of the remaining operands. 4057 // This restriction is currently not enforced. Therefore emitting this 4058 // variant is explicitly avoided. 4059 Base = Addr; 4060 RegOffset = CurDAG->getRegister(0, MVT::i32); 4061 } 4062 SDValue Ops[] = {Base, RegOffset, ImmOffset, Chain}; 4063 SDNode *New = CurDAG->getMachineNode(ARM::LOADDUAL, dl, 4064 {MVT::Untyped, MVT::Other}, Ops); 4065 SDValue Lo = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32, 4066 SDValue(New, 0)); 4067 SDValue Hi = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32, 4068 SDValue(New, 0)); 4069 transferMemOperands(N, New); 4070 ReplaceUses(SDValue(N, 0), Lo); 4071 ReplaceUses(SDValue(N, 1), Hi); 4072 ReplaceUses(SDValue(N, 2), SDValue(New, 1)); 4073 CurDAG->RemoveDeadNode(N); 4074 return; 4075 } 4076 case ARMISD::STRD: { 4077 if (Subtarget->isThumb2()) 4078 break; // TableGen handles isel in this case. 4079 SDValue Base, RegOffset, ImmOffset; 4080 const SDValue &Chain = N->getOperand(0); 4081 const SDValue &Addr = N->getOperand(3); 4082 SelectAddrMode3(Addr, Base, RegOffset, ImmOffset); 4083 if (RegOffset != CurDAG->getRegister(0, MVT::i32)) { 4084 // The register-offset variant of STRD mandates that the register 4085 // allocated to RegOffset is not reused in any of the remaining operands. 4086 // This restriction is currently not enforced. Therefore emitting this 4087 // variant is explicitly avoided. 4088 Base = Addr; 4089 RegOffset = CurDAG->getRegister(0, MVT::i32); 4090 } 4091 SDNode *RegPair = 4092 createGPRPairNode(MVT::Untyped, N->getOperand(1), N->getOperand(2)); 4093 SDValue Ops[] = {SDValue(RegPair, 0), Base, RegOffset, ImmOffset, Chain}; 4094 SDNode *New = CurDAG->getMachineNode(ARM::STOREDUAL, dl, MVT::Other, Ops); 4095 transferMemOperands(N, New); 4096 ReplaceUses(SDValue(N, 0), SDValue(New, 0)); 4097 CurDAG->RemoveDeadNode(N); 4098 return; 4099 } 4100 case ARMISD::LOOP_DEC: { 4101 SDValue Ops[] = { N->getOperand(1), 4102 N->getOperand(2), 4103 N->getOperand(0) }; 4104 SDNode *Dec = 4105 CurDAG->getMachineNode(ARM::t2LoopDec, dl, 4106 CurDAG->getVTList(MVT::i32, MVT::Other), Ops); 4107 ReplaceUses(N, Dec); 4108 CurDAG->RemoveDeadNode(N); 4109 return; 4110 } 4111 case ARMISD::BRCOND: { 4112 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 4113 // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc) 4114 // Pattern complexity = 6 cost = 1 size = 0 4115 4116 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 4117 // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc) 4118 // Pattern complexity = 6 cost = 1 size = 0 4119 4120 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 4121 // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc) 4122 // Pattern complexity = 6 cost = 1 size = 0 4123 4124 unsigned Opc = Subtarget->isThumb() ? 4125 ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc; 4126 SDValue Chain = N->getOperand(0); 4127 SDValue N1 = N->getOperand(1); 4128 SDValue N2 = N->getOperand(2); 4129 SDValue N3 = N->getOperand(3); 4130 SDValue InGlue = N->getOperand(4); 4131 assert(N1.getOpcode() == ISD::BasicBlock); 4132 assert(N2.getOpcode() == ISD::Constant); 4133 assert(N3.getOpcode() == ISD::Register); 4134 4135 unsigned CC = (unsigned)N2->getAsZExtVal(); 4136 4137 if (InGlue.getOpcode() == ARMISD::CMPZ) { 4138 if (InGlue.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) { 4139 SDValue Int = InGlue.getOperand(0); 4140 uint64_t ID = Int->getConstantOperandVal(1); 4141 4142 // Handle low-overhead loops. 4143 if (ID == Intrinsic::loop_decrement_reg) { 4144 SDValue Elements = Int.getOperand(2); 4145 SDValue Size = CurDAG->getTargetConstant(Int.getConstantOperandVal(3), 4146 dl, MVT::i32); 4147 4148 SDValue Args[] = { Elements, Size, Int.getOperand(0) }; 4149 SDNode *LoopDec = 4150 CurDAG->getMachineNode(ARM::t2LoopDec, dl, 4151 CurDAG->getVTList(MVT::i32, MVT::Other), 4152 Args); 4153 ReplaceUses(Int.getNode(), LoopDec); 4154 4155 SDValue EndArgs[] = { SDValue(LoopDec, 0), N1, Chain }; 4156 SDNode *LoopEnd = 4157 CurDAG->getMachineNode(ARM::t2LoopEnd, dl, MVT::Other, EndArgs); 4158 4159 ReplaceUses(N, LoopEnd); 4160 CurDAG->RemoveDeadNode(N); 4161 CurDAG->RemoveDeadNode(InGlue.getNode()); 4162 CurDAG->RemoveDeadNode(Int.getNode()); 4163 return; 4164 } 4165 } 4166 4167 bool SwitchEQNEToPLMI; 4168 SelectCMPZ(InGlue.getNode(), SwitchEQNEToPLMI); 4169 InGlue = N->getOperand(4); 4170 4171 if (SwitchEQNEToPLMI) { 4172 switch ((ARMCC::CondCodes)CC) { 4173 default: llvm_unreachable("CMPZ must be either NE or EQ!"); 4174 case ARMCC::NE: 4175 CC = (unsigned)ARMCC::MI; 4176 break; 4177 case ARMCC::EQ: 4178 CC = (unsigned)ARMCC::PL; 4179 break; 4180 } 4181 } 4182 } 4183 4184 SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32); 4185 SDValue Ops[] = { N1, Tmp2, N3, Chain, InGlue }; 4186 SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, 4187 MVT::Glue, Ops); 4188 Chain = SDValue(ResNode, 0); 4189 if (N->getNumValues() == 2) { 4190 InGlue = SDValue(ResNode, 1); 4191 ReplaceUses(SDValue(N, 1), InGlue); 4192 } 4193 ReplaceUses(SDValue(N, 0), 4194 SDValue(Chain.getNode(), Chain.getResNo())); 4195 CurDAG->RemoveDeadNode(N); 4196 return; 4197 } 4198 4199 case ARMISD::CMPZ: { 4200 // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0) 4201 // This allows us to avoid materializing the expensive negative constant. 4202 // The CMPZ #0 is useless and will be peepholed away but we need to keep it 4203 // for its glue output. 4204 SDValue X = N->getOperand(0); 4205 auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode()); 4206 if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) { 4207 int64_t Addend = -C->getSExtValue(); 4208 4209 SDNode *Add = nullptr; 4210 // ADDS can be better than CMN if the immediate fits in a 4211 // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3. 4212 // Outside that range we can just use a CMN which is 32-bit but has a 4213 // 12-bit immediate range. 4214 if (Addend < 1<<8) { 4215 if (Subtarget->isThumb2()) { 4216 SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32), 4217 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 4218 CurDAG->getRegister(0, MVT::i32) }; 4219 Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops); 4220 } else { 4221 unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8; 4222 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X, 4223 CurDAG->getTargetConstant(Addend, dl, MVT::i32), 4224 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)}; 4225 Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops); 4226 } 4227 } 4228 if (Add) { 4229 SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)}; 4230 CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2); 4231 } 4232 } 4233 // Other cases are autogenerated. 4234 break; 4235 } 4236 4237 case ARMISD::CMOV: { 4238 SDValue InGlue = N->getOperand(4); 4239 4240 if (InGlue.getOpcode() == ARMISD::CMPZ) { 4241 bool SwitchEQNEToPLMI; 4242 SelectCMPZ(InGlue.getNode(), SwitchEQNEToPLMI); 4243 4244 if (SwitchEQNEToPLMI) { 4245 SDValue ARMcc = N->getOperand(2); 4246 ARMCC::CondCodes CC = (ARMCC::CondCodes)ARMcc->getAsZExtVal(); 4247 4248 switch (CC) { 4249 default: llvm_unreachable("CMPZ must be either NE or EQ!"); 4250 case ARMCC::NE: 4251 CC = ARMCC::MI; 4252 break; 4253 case ARMCC::EQ: 4254 CC = ARMCC::PL; 4255 break; 4256 } 4257 SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32); 4258 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc, 4259 N->getOperand(3), N->getOperand(4)}; 4260 CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops); 4261 } 4262 4263 } 4264 // Other cases are autogenerated. 4265 break; 4266 } 4267 case ARMISD::VZIP: { 4268 EVT VT = N->getValueType(0); 4269 // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 4270 unsigned Opc64[] = {ARM::VZIPd8, ARM::VZIPd16, ARM::VTRNd32}; 4271 unsigned Opc128[] = {ARM::VZIPq8, ARM::VZIPq16, ARM::VZIPq32}; 4272 unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128); 4273 SDValue Pred = getAL(CurDAG, dl); 4274 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 4275 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Pred, PredReg}; 4276 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops)); 4277 return; 4278 } 4279 case ARMISD::VUZP: { 4280 EVT VT = N->getValueType(0); 4281 // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 4282 unsigned Opc64[] = {ARM::VUZPd8, ARM::VUZPd16, ARM::VTRNd32}; 4283 unsigned Opc128[] = {ARM::VUZPq8, ARM::VUZPq16, ARM::VUZPq32}; 4284 unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128); 4285 SDValue Pred = getAL(CurDAG, dl); 4286 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 4287 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Pred, PredReg}; 4288 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops)); 4289 return; 4290 } 4291 case ARMISD::VTRN: { 4292 EVT VT = N->getValueType(0); 4293 unsigned Opc64[] = {ARM::VTRNd8, ARM::VTRNd16, ARM::VTRNd32}; 4294 unsigned Opc128[] = {ARM::VTRNq8, ARM::VTRNq16, ARM::VTRNq32}; 4295 unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128); 4296 SDValue Pred = getAL(CurDAG, dl); 4297 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 4298 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Pred, PredReg}; 4299 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops)); 4300 return; 4301 } 4302 case ARMISD::BUILD_VECTOR: { 4303 EVT VecVT = N->getValueType(0); 4304 EVT EltVT = VecVT.getVectorElementType(); 4305 unsigned NumElts = VecVT.getVectorNumElements(); 4306 if (EltVT == MVT::f64) { 4307 assert(NumElts == 2 && "unexpected type for BUILD_VECTOR"); 4308 ReplaceNode( 4309 N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1))); 4310 return; 4311 } 4312 assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR"); 4313 if (NumElts == 2) { 4314 ReplaceNode( 4315 N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1))); 4316 return; 4317 } 4318 assert(NumElts == 4 && "unexpected type for BUILD_VECTOR"); 4319 ReplaceNode(N, 4320 createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1), 4321 N->getOperand(2), N->getOperand(3))); 4322 return; 4323 } 4324 4325 case ARMISD::VLD1DUP: { 4326 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16, 4327 ARM::VLD1DUPd32 }; 4328 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16, 4329 ARM::VLD1DUPq32 }; 4330 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 1, DOpcodes, QOpcodes); 4331 return; 4332 } 4333 4334 case ARMISD::VLD2DUP: { 4335 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16, 4336 ARM::VLD2DUPd32 }; 4337 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 2, Opcodes); 4338 return; 4339 } 4340 4341 case ARMISD::VLD3DUP: { 4342 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo, 4343 ARM::VLD3DUPd16Pseudo, 4344 ARM::VLD3DUPd32Pseudo }; 4345 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 3, Opcodes); 4346 return; 4347 } 4348 4349 case ARMISD::VLD4DUP: { 4350 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo, 4351 ARM::VLD4DUPd16Pseudo, 4352 ARM::VLD4DUPd32Pseudo }; 4353 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 4, Opcodes); 4354 return; 4355 } 4356 4357 case ARMISD::VLD1DUP_UPD: { 4358 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed, 4359 ARM::VLD1DUPd16wb_fixed, 4360 ARM::VLD1DUPd32wb_fixed }; 4361 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed, 4362 ARM::VLD1DUPq16wb_fixed, 4363 ARM::VLD1DUPq32wb_fixed }; 4364 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 1, DOpcodes, QOpcodes); 4365 return; 4366 } 4367 4368 case ARMISD::VLD2DUP_UPD: { 4369 static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8wb_fixed, 4370 ARM::VLD2DUPd16wb_fixed, 4371 ARM::VLD2DUPd32wb_fixed, 4372 ARM::VLD1q64wb_fixed }; 4373 static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo, 4374 ARM::VLD2DUPq16EvenPseudo, 4375 ARM::VLD2DUPq32EvenPseudo }; 4376 static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudoWB_fixed, 4377 ARM::VLD2DUPq16OddPseudoWB_fixed, 4378 ARM::VLD2DUPq32OddPseudoWB_fixed }; 4379 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 2, DOpcodes, QOpcodes0, QOpcodes1); 4380 return; 4381 } 4382 4383 case ARMISD::VLD3DUP_UPD: { 4384 static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo_UPD, 4385 ARM::VLD3DUPd16Pseudo_UPD, 4386 ARM::VLD3DUPd32Pseudo_UPD, 4387 ARM::VLD1d64TPseudoWB_fixed }; 4388 static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo, 4389 ARM::VLD3DUPq16EvenPseudo, 4390 ARM::VLD3DUPq32EvenPseudo }; 4391 static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo_UPD, 4392 ARM::VLD3DUPq16OddPseudo_UPD, 4393 ARM::VLD3DUPq32OddPseudo_UPD }; 4394 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 4395 return; 4396 } 4397 4398 case ARMISD::VLD4DUP_UPD: { 4399 static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo_UPD, 4400 ARM::VLD4DUPd16Pseudo_UPD, 4401 ARM::VLD4DUPd32Pseudo_UPD, 4402 ARM::VLD1d64QPseudoWB_fixed }; 4403 static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo, 4404 ARM::VLD4DUPq16EvenPseudo, 4405 ARM::VLD4DUPq32EvenPseudo }; 4406 static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo_UPD, 4407 ARM::VLD4DUPq16OddPseudo_UPD, 4408 ARM::VLD4DUPq32OddPseudo_UPD }; 4409 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 4410 return; 4411 } 4412 4413 case ARMISD::VLD1_UPD: { 4414 static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed, 4415 ARM::VLD1d16wb_fixed, 4416 ARM::VLD1d32wb_fixed, 4417 ARM::VLD1d64wb_fixed }; 4418 static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed, 4419 ARM::VLD1q16wb_fixed, 4420 ARM::VLD1q32wb_fixed, 4421 ARM::VLD1q64wb_fixed }; 4422 SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr); 4423 return; 4424 } 4425 4426 case ARMISD::VLD2_UPD: { 4427 if (Subtarget->hasNEON()) { 4428 static const uint16_t DOpcodes[] = { 4429 ARM::VLD2d8wb_fixed, ARM::VLD2d16wb_fixed, ARM::VLD2d32wb_fixed, 4430 ARM::VLD1q64wb_fixed}; 4431 static const uint16_t QOpcodes[] = {ARM::VLD2q8PseudoWB_fixed, 4432 ARM::VLD2q16PseudoWB_fixed, 4433 ARM::VLD2q32PseudoWB_fixed}; 4434 SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr); 4435 } else { 4436 static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8, 4437 ARM::MVE_VLD21_8_wb}; 4438 static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16, 4439 ARM::MVE_VLD21_16_wb}; 4440 static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32, 4441 ARM::MVE_VLD21_32_wb}; 4442 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32}; 4443 SelectMVE_VLD(N, 2, Opcodes, true); 4444 } 4445 return; 4446 } 4447 4448 case ARMISD::VLD3_UPD: { 4449 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD, 4450 ARM::VLD3d16Pseudo_UPD, 4451 ARM::VLD3d32Pseudo_UPD, 4452 ARM::VLD1d64TPseudoWB_fixed}; 4453 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD, 4454 ARM::VLD3q16Pseudo_UPD, 4455 ARM::VLD3q32Pseudo_UPD }; 4456 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD, 4457 ARM::VLD3q16oddPseudo_UPD, 4458 ARM::VLD3q32oddPseudo_UPD }; 4459 SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 4460 return; 4461 } 4462 4463 case ARMISD::VLD4_UPD: { 4464 if (Subtarget->hasNEON()) { 4465 static const uint16_t DOpcodes[] = { 4466 ARM::VLD4d8Pseudo_UPD, ARM::VLD4d16Pseudo_UPD, ARM::VLD4d32Pseudo_UPD, 4467 ARM::VLD1d64QPseudoWB_fixed}; 4468 static const uint16_t QOpcodes0[] = {ARM::VLD4q8Pseudo_UPD, 4469 ARM::VLD4q16Pseudo_UPD, 4470 ARM::VLD4q32Pseudo_UPD}; 4471 static const uint16_t QOpcodes1[] = {ARM::VLD4q8oddPseudo_UPD, 4472 ARM::VLD4q16oddPseudo_UPD, 4473 ARM::VLD4q32oddPseudo_UPD}; 4474 SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 4475 } else { 4476 static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8, 4477 ARM::MVE_VLD42_8, 4478 ARM::MVE_VLD43_8_wb}; 4479 static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16, 4480 ARM::MVE_VLD42_16, 4481 ARM::MVE_VLD43_16_wb}; 4482 static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32, 4483 ARM::MVE_VLD42_32, 4484 ARM::MVE_VLD43_32_wb}; 4485 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32}; 4486 SelectMVE_VLD(N, 4, Opcodes, true); 4487 } 4488 return; 4489 } 4490 4491 case ARMISD::VLD1x2_UPD: { 4492 if (Subtarget->hasNEON()) { 4493 static const uint16_t DOpcodes[] = { 4494 ARM::VLD1q8wb_fixed, ARM::VLD1q16wb_fixed, ARM::VLD1q32wb_fixed, 4495 ARM::VLD1q64wb_fixed}; 4496 static const uint16_t QOpcodes[] = { 4497 ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed, 4498 ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed}; 4499 SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr); 4500 return; 4501 } 4502 break; 4503 } 4504 4505 case ARMISD::VLD1x3_UPD: { 4506 if (Subtarget->hasNEON()) { 4507 static const uint16_t DOpcodes[] = { 4508 ARM::VLD1d8TPseudoWB_fixed, ARM::VLD1d16TPseudoWB_fixed, 4509 ARM::VLD1d32TPseudoWB_fixed, ARM::VLD1d64TPseudoWB_fixed}; 4510 static const uint16_t QOpcodes0[] = { 4511 ARM::VLD1q8LowTPseudo_UPD, ARM::VLD1q16LowTPseudo_UPD, 4512 ARM::VLD1q32LowTPseudo_UPD, ARM::VLD1q64LowTPseudo_UPD}; 4513 static const uint16_t QOpcodes1[] = { 4514 ARM::VLD1q8HighTPseudo_UPD, ARM::VLD1q16HighTPseudo_UPD, 4515 ARM::VLD1q32HighTPseudo_UPD, ARM::VLD1q64HighTPseudo_UPD}; 4516 SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 4517 return; 4518 } 4519 break; 4520 } 4521 4522 case ARMISD::VLD1x4_UPD: { 4523 if (Subtarget->hasNEON()) { 4524 static const uint16_t DOpcodes[] = { 4525 ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed, 4526 ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed}; 4527 static const uint16_t QOpcodes0[] = { 4528 ARM::VLD1q8LowQPseudo_UPD, ARM::VLD1q16LowQPseudo_UPD, 4529 ARM::VLD1q32LowQPseudo_UPD, ARM::VLD1q64LowQPseudo_UPD}; 4530 static const uint16_t QOpcodes1[] = { 4531 ARM::VLD1q8HighQPseudo_UPD, ARM::VLD1q16HighQPseudo_UPD, 4532 ARM::VLD1q32HighQPseudo_UPD, ARM::VLD1q64HighQPseudo_UPD}; 4533 SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 4534 return; 4535 } 4536 break; 4537 } 4538 4539 case ARMISD::VLD2LN_UPD: { 4540 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD, 4541 ARM::VLD2LNd16Pseudo_UPD, 4542 ARM::VLD2LNd32Pseudo_UPD }; 4543 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD, 4544 ARM::VLD2LNq32Pseudo_UPD }; 4545 SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes); 4546 return; 4547 } 4548 4549 case ARMISD::VLD3LN_UPD: { 4550 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD, 4551 ARM::VLD3LNd16Pseudo_UPD, 4552 ARM::VLD3LNd32Pseudo_UPD }; 4553 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD, 4554 ARM::VLD3LNq32Pseudo_UPD }; 4555 SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes); 4556 return; 4557 } 4558 4559 case ARMISD::VLD4LN_UPD: { 4560 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD, 4561 ARM::VLD4LNd16Pseudo_UPD, 4562 ARM::VLD4LNd32Pseudo_UPD }; 4563 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD, 4564 ARM::VLD4LNq32Pseudo_UPD }; 4565 SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes); 4566 return; 4567 } 4568 4569 case ARMISD::VST1_UPD: { 4570 static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed, 4571 ARM::VST1d16wb_fixed, 4572 ARM::VST1d32wb_fixed, 4573 ARM::VST1d64wb_fixed }; 4574 static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed, 4575 ARM::VST1q16wb_fixed, 4576 ARM::VST1q32wb_fixed, 4577 ARM::VST1q64wb_fixed }; 4578 SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr); 4579 return; 4580 } 4581 4582 case ARMISD::VST2_UPD: { 4583 if (Subtarget->hasNEON()) { 4584 static const uint16_t DOpcodes[] = { 4585 ARM::VST2d8wb_fixed, ARM::VST2d16wb_fixed, ARM::VST2d32wb_fixed, 4586 ARM::VST1q64wb_fixed}; 4587 static const uint16_t QOpcodes[] = {ARM::VST2q8PseudoWB_fixed, 4588 ARM::VST2q16PseudoWB_fixed, 4589 ARM::VST2q32PseudoWB_fixed}; 4590 SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr); 4591 return; 4592 } 4593 break; 4594 } 4595 4596 case ARMISD::VST3_UPD: { 4597 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD, 4598 ARM::VST3d16Pseudo_UPD, 4599 ARM::VST3d32Pseudo_UPD, 4600 ARM::VST1d64TPseudoWB_fixed}; 4601 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD, 4602 ARM::VST3q16Pseudo_UPD, 4603 ARM::VST3q32Pseudo_UPD }; 4604 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD, 4605 ARM::VST3q16oddPseudo_UPD, 4606 ARM::VST3q32oddPseudo_UPD }; 4607 SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 4608 return; 4609 } 4610 4611 case ARMISD::VST4_UPD: { 4612 if (Subtarget->hasNEON()) { 4613 static const uint16_t DOpcodes[] = { 4614 ARM::VST4d8Pseudo_UPD, ARM::VST4d16Pseudo_UPD, ARM::VST4d32Pseudo_UPD, 4615 ARM::VST1d64QPseudoWB_fixed}; 4616 static const uint16_t QOpcodes0[] = {ARM::VST4q8Pseudo_UPD, 4617 ARM::VST4q16Pseudo_UPD, 4618 ARM::VST4q32Pseudo_UPD}; 4619 static const uint16_t QOpcodes1[] = {ARM::VST4q8oddPseudo_UPD, 4620 ARM::VST4q16oddPseudo_UPD, 4621 ARM::VST4q32oddPseudo_UPD}; 4622 SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 4623 return; 4624 } 4625 break; 4626 } 4627 4628 case ARMISD::VST1x2_UPD: { 4629 if (Subtarget->hasNEON()) { 4630 static const uint16_t DOpcodes[] = { ARM::VST1q8wb_fixed, 4631 ARM::VST1q16wb_fixed, 4632 ARM::VST1q32wb_fixed, 4633 ARM::VST1q64wb_fixed}; 4634 static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudoWB_fixed, 4635 ARM::VST1d16QPseudoWB_fixed, 4636 ARM::VST1d32QPseudoWB_fixed, 4637 ARM::VST1d64QPseudoWB_fixed }; 4638 SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr); 4639 return; 4640 } 4641 break; 4642 } 4643 4644 case ARMISD::VST1x3_UPD: { 4645 if (Subtarget->hasNEON()) { 4646 static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudoWB_fixed, 4647 ARM::VST1d16TPseudoWB_fixed, 4648 ARM::VST1d32TPseudoWB_fixed, 4649 ARM::VST1d64TPseudoWB_fixed }; 4650 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD, 4651 ARM::VST1q16LowTPseudo_UPD, 4652 ARM::VST1q32LowTPseudo_UPD, 4653 ARM::VST1q64LowTPseudo_UPD }; 4654 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo_UPD, 4655 ARM::VST1q16HighTPseudo_UPD, 4656 ARM::VST1q32HighTPseudo_UPD, 4657 ARM::VST1q64HighTPseudo_UPD }; 4658 SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 4659 return; 4660 } 4661 break; 4662 } 4663 4664 case ARMISD::VST1x4_UPD: { 4665 if (Subtarget->hasNEON()) { 4666 static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudoWB_fixed, 4667 ARM::VST1d16QPseudoWB_fixed, 4668 ARM::VST1d32QPseudoWB_fixed, 4669 ARM::VST1d64QPseudoWB_fixed }; 4670 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD, 4671 ARM::VST1q16LowQPseudo_UPD, 4672 ARM::VST1q32LowQPseudo_UPD, 4673 ARM::VST1q64LowQPseudo_UPD }; 4674 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo_UPD, 4675 ARM::VST1q16HighQPseudo_UPD, 4676 ARM::VST1q32HighQPseudo_UPD, 4677 ARM::VST1q64HighQPseudo_UPD }; 4678 SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 4679 return; 4680 } 4681 break; 4682 } 4683 case ARMISD::VST2LN_UPD: { 4684 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD, 4685 ARM::VST2LNd16Pseudo_UPD, 4686 ARM::VST2LNd32Pseudo_UPD }; 4687 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD, 4688 ARM::VST2LNq32Pseudo_UPD }; 4689 SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes); 4690 return; 4691 } 4692 4693 case ARMISD::VST3LN_UPD: { 4694 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD, 4695 ARM::VST3LNd16Pseudo_UPD, 4696 ARM::VST3LNd32Pseudo_UPD }; 4697 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD, 4698 ARM::VST3LNq32Pseudo_UPD }; 4699 SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes); 4700 return; 4701 } 4702 4703 case ARMISD::VST4LN_UPD: { 4704 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD, 4705 ARM::VST4LNd16Pseudo_UPD, 4706 ARM::VST4LNd32Pseudo_UPD }; 4707 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD, 4708 ARM::VST4LNq32Pseudo_UPD }; 4709 SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes); 4710 return; 4711 } 4712 4713 case ISD::INTRINSIC_VOID: 4714 case ISD::INTRINSIC_W_CHAIN: { 4715 unsigned IntNo = N->getConstantOperandVal(1); 4716 switch (IntNo) { 4717 default: 4718 break; 4719 4720 case Intrinsic::arm_mrrc: 4721 case Intrinsic::arm_mrrc2: { 4722 SDLoc dl(N); 4723 SDValue Chain = N->getOperand(0); 4724 unsigned Opc; 4725 4726 if (Subtarget->isThumb()) 4727 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2); 4728 else 4729 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2); 4730 4731 SmallVector<SDValue, 5> Ops; 4732 Ops.push_back(getI32Imm(N->getConstantOperandVal(2), dl)); /* coproc */ 4733 Ops.push_back(getI32Imm(N->getConstantOperandVal(3), dl)); /* opc */ 4734 Ops.push_back(getI32Imm(N->getConstantOperandVal(4), dl)); /* CRm */ 4735 4736 // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded 4737 // instruction will always be '1111' but it is possible in assembly language to specify 4738 // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction. 4739 if (Opc != ARM::MRRC2) { 4740 Ops.push_back(getAL(CurDAG, dl)); 4741 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 4742 } 4743 4744 Ops.push_back(Chain); 4745 4746 // Writes to two registers. 4747 const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other}; 4748 4749 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops)); 4750 return; 4751 } 4752 case Intrinsic::arm_ldaexd: 4753 case Intrinsic::arm_ldrexd: { 4754 SDLoc dl(N); 4755 SDValue Chain = N->getOperand(0); 4756 SDValue MemAddr = N->getOperand(2); 4757 bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps(); 4758 4759 bool IsAcquire = IntNo == Intrinsic::arm_ldaexd; 4760 unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD) 4761 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD); 4762 4763 // arm_ldrexd returns a i64 value in {i32, i32} 4764 std::vector<EVT> ResTys; 4765 if (isThumb) { 4766 ResTys.push_back(MVT::i32); 4767 ResTys.push_back(MVT::i32); 4768 } else 4769 ResTys.push_back(MVT::Untyped); 4770 ResTys.push_back(MVT::Other); 4771 4772 // Place arguments in the right order. 4773 SDValue Ops[] = {MemAddr, getAL(CurDAG, dl), 4774 CurDAG->getRegister(0, MVT::i32), Chain}; 4775 SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops); 4776 // Transfer memoperands. 4777 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 4778 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp}); 4779 4780 // Remap uses. 4781 SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1); 4782 if (!SDValue(N, 0).use_empty()) { 4783 SDValue Result; 4784 if (isThumb) 4785 Result = SDValue(Ld, 0); 4786 else { 4787 SDValue SubRegIdx = 4788 CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32); 4789 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 4790 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx); 4791 Result = SDValue(ResNode,0); 4792 } 4793 ReplaceUses(SDValue(N, 0), Result); 4794 } 4795 if (!SDValue(N, 1).use_empty()) { 4796 SDValue Result; 4797 if (isThumb) 4798 Result = SDValue(Ld, 1); 4799 else { 4800 SDValue SubRegIdx = 4801 CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32); 4802 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 4803 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx); 4804 Result = SDValue(ResNode,0); 4805 } 4806 ReplaceUses(SDValue(N, 1), Result); 4807 } 4808 ReplaceUses(SDValue(N, 2), OutChain); 4809 CurDAG->RemoveDeadNode(N); 4810 return; 4811 } 4812 case Intrinsic::arm_stlexd: 4813 case Intrinsic::arm_strexd: { 4814 SDLoc dl(N); 4815 SDValue Chain = N->getOperand(0); 4816 SDValue Val0 = N->getOperand(2); 4817 SDValue Val1 = N->getOperand(3); 4818 SDValue MemAddr = N->getOperand(4); 4819 4820 // Store exclusive double return a i32 value which is the return status 4821 // of the issued store. 4822 const EVT ResTys[] = {MVT::i32, MVT::Other}; 4823 4824 bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2(); 4825 // Place arguments in the right order. 4826 SmallVector<SDValue, 7> Ops; 4827 if (isThumb) { 4828 Ops.push_back(Val0); 4829 Ops.push_back(Val1); 4830 } else 4831 // arm_strexd uses GPRPair. 4832 Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0)); 4833 Ops.push_back(MemAddr); 4834 Ops.push_back(getAL(CurDAG, dl)); 4835 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 4836 Ops.push_back(Chain); 4837 4838 bool IsRelease = IntNo == Intrinsic::arm_stlexd; 4839 unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD) 4840 : (IsRelease ? ARM::STLEXD : ARM::STREXD); 4841 4842 SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops); 4843 // Transfer memoperands. 4844 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 4845 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 4846 4847 ReplaceNode(N, St); 4848 return; 4849 } 4850 4851 case Intrinsic::arm_neon_vld1: { 4852 static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16, 4853 ARM::VLD1d32, ARM::VLD1d64 }; 4854 static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16, 4855 ARM::VLD1q32, ARM::VLD1q64}; 4856 SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr); 4857 return; 4858 } 4859 4860 case Intrinsic::arm_neon_vld1x2: { 4861 static const uint16_t DOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16, 4862 ARM::VLD1q32, ARM::VLD1q64 }; 4863 static const uint16_t QOpcodes[] = { ARM::VLD1d8QPseudo, 4864 ARM::VLD1d16QPseudo, 4865 ARM::VLD1d32QPseudo, 4866 ARM::VLD1d64QPseudo }; 4867 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr); 4868 return; 4869 } 4870 4871 case Intrinsic::arm_neon_vld1x3: { 4872 static const uint16_t DOpcodes[] = { ARM::VLD1d8TPseudo, 4873 ARM::VLD1d16TPseudo, 4874 ARM::VLD1d32TPseudo, 4875 ARM::VLD1d64TPseudo }; 4876 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowTPseudo_UPD, 4877 ARM::VLD1q16LowTPseudo_UPD, 4878 ARM::VLD1q32LowTPseudo_UPD, 4879 ARM::VLD1q64LowTPseudo_UPD }; 4880 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighTPseudo, 4881 ARM::VLD1q16HighTPseudo, 4882 ARM::VLD1q32HighTPseudo, 4883 ARM::VLD1q64HighTPseudo }; 4884 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 4885 return; 4886 } 4887 4888 case Intrinsic::arm_neon_vld1x4: { 4889 static const uint16_t DOpcodes[] = { ARM::VLD1d8QPseudo, 4890 ARM::VLD1d16QPseudo, 4891 ARM::VLD1d32QPseudo, 4892 ARM::VLD1d64QPseudo }; 4893 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowQPseudo_UPD, 4894 ARM::VLD1q16LowQPseudo_UPD, 4895 ARM::VLD1q32LowQPseudo_UPD, 4896 ARM::VLD1q64LowQPseudo_UPD }; 4897 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighQPseudo, 4898 ARM::VLD1q16HighQPseudo, 4899 ARM::VLD1q32HighQPseudo, 4900 ARM::VLD1q64HighQPseudo }; 4901 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 4902 return; 4903 } 4904 4905 case Intrinsic::arm_neon_vld2: { 4906 static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16, 4907 ARM::VLD2d32, ARM::VLD1q64 }; 4908 static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo, 4909 ARM::VLD2q32Pseudo }; 4910 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr); 4911 return; 4912 } 4913 4914 case Intrinsic::arm_neon_vld3: { 4915 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo, 4916 ARM::VLD3d16Pseudo, 4917 ARM::VLD3d32Pseudo, 4918 ARM::VLD1d64TPseudo }; 4919 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD, 4920 ARM::VLD3q16Pseudo_UPD, 4921 ARM::VLD3q32Pseudo_UPD }; 4922 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo, 4923 ARM::VLD3q16oddPseudo, 4924 ARM::VLD3q32oddPseudo }; 4925 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 4926 return; 4927 } 4928 4929 case Intrinsic::arm_neon_vld4: { 4930 static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo, 4931 ARM::VLD4d16Pseudo, 4932 ARM::VLD4d32Pseudo, 4933 ARM::VLD1d64QPseudo }; 4934 static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD, 4935 ARM::VLD4q16Pseudo_UPD, 4936 ARM::VLD4q32Pseudo_UPD }; 4937 static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo, 4938 ARM::VLD4q16oddPseudo, 4939 ARM::VLD4q32oddPseudo }; 4940 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 4941 return; 4942 } 4943 4944 case Intrinsic::arm_neon_vld2dup: { 4945 static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16, 4946 ARM::VLD2DUPd32, ARM::VLD1q64 }; 4947 static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo, 4948 ARM::VLD2DUPq16EvenPseudo, 4949 ARM::VLD2DUPq32EvenPseudo }; 4950 static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudo, 4951 ARM::VLD2DUPq16OddPseudo, 4952 ARM::VLD2DUPq32OddPseudo }; 4953 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 2, 4954 DOpcodes, QOpcodes0, QOpcodes1); 4955 return; 4956 } 4957 4958 case Intrinsic::arm_neon_vld3dup: { 4959 static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo, 4960 ARM::VLD3DUPd16Pseudo, 4961 ARM::VLD3DUPd32Pseudo, 4962 ARM::VLD1d64TPseudo }; 4963 static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo, 4964 ARM::VLD3DUPq16EvenPseudo, 4965 ARM::VLD3DUPq32EvenPseudo }; 4966 static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo, 4967 ARM::VLD3DUPq16OddPseudo, 4968 ARM::VLD3DUPq32OddPseudo }; 4969 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 3, 4970 DOpcodes, QOpcodes0, QOpcodes1); 4971 return; 4972 } 4973 4974 case Intrinsic::arm_neon_vld4dup: { 4975 static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo, 4976 ARM::VLD4DUPd16Pseudo, 4977 ARM::VLD4DUPd32Pseudo, 4978 ARM::VLD1d64QPseudo }; 4979 static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo, 4980 ARM::VLD4DUPq16EvenPseudo, 4981 ARM::VLD4DUPq32EvenPseudo }; 4982 static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo, 4983 ARM::VLD4DUPq16OddPseudo, 4984 ARM::VLD4DUPq32OddPseudo }; 4985 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 4, 4986 DOpcodes, QOpcodes0, QOpcodes1); 4987 return; 4988 } 4989 4990 case Intrinsic::arm_neon_vld2lane: { 4991 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo, 4992 ARM::VLD2LNd16Pseudo, 4993 ARM::VLD2LNd32Pseudo }; 4994 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo, 4995 ARM::VLD2LNq32Pseudo }; 4996 SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes); 4997 return; 4998 } 4999 5000 case Intrinsic::arm_neon_vld3lane: { 5001 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo, 5002 ARM::VLD3LNd16Pseudo, 5003 ARM::VLD3LNd32Pseudo }; 5004 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo, 5005 ARM::VLD3LNq32Pseudo }; 5006 SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes); 5007 return; 5008 } 5009 5010 case Intrinsic::arm_neon_vld4lane: { 5011 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo, 5012 ARM::VLD4LNd16Pseudo, 5013 ARM::VLD4LNd32Pseudo }; 5014 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo, 5015 ARM::VLD4LNq32Pseudo }; 5016 SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes); 5017 return; 5018 } 5019 5020 case Intrinsic::arm_neon_vst1: { 5021 static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16, 5022 ARM::VST1d32, ARM::VST1d64 }; 5023 static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16, 5024 ARM::VST1q32, ARM::VST1q64 }; 5025 SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr); 5026 return; 5027 } 5028 5029 case Intrinsic::arm_neon_vst1x2: { 5030 static const uint16_t DOpcodes[] = { ARM::VST1q8, ARM::VST1q16, 5031 ARM::VST1q32, ARM::VST1q64 }; 5032 static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudo, 5033 ARM::VST1d16QPseudo, 5034 ARM::VST1d32QPseudo, 5035 ARM::VST1d64QPseudo }; 5036 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr); 5037 return; 5038 } 5039 5040 case Intrinsic::arm_neon_vst1x3: { 5041 static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudo, 5042 ARM::VST1d16TPseudo, 5043 ARM::VST1d32TPseudo, 5044 ARM::VST1d64TPseudo }; 5045 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD, 5046 ARM::VST1q16LowTPseudo_UPD, 5047 ARM::VST1q32LowTPseudo_UPD, 5048 ARM::VST1q64LowTPseudo_UPD }; 5049 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo, 5050 ARM::VST1q16HighTPseudo, 5051 ARM::VST1q32HighTPseudo, 5052 ARM::VST1q64HighTPseudo }; 5053 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 5054 return; 5055 } 5056 5057 case Intrinsic::arm_neon_vst1x4: { 5058 static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudo, 5059 ARM::VST1d16QPseudo, 5060 ARM::VST1d32QPseudo, 5061 ARM::VST1d64QPseudo }; 5062 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD, 5063 ARM::VST1q16LowQPseudo_UPD, 5064 ARM::VST1q32LowQPseudo_UPD, 5065 ARM::VST1q64LowQPseudo_UPD }; 5066 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo, 5067 ARM::VST1q16HighQPseudo, 5068 ARM::VST1q32HighQPseudo, 5069 ARM::VST1q64HighQPseudo }; 5070 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 5071 return; 5072 } 5073 5074 case Intrinsic::arm_neon_vst2: { 5075 static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16, 5076 ARM::VST2d32, ARM::VST1q64 }; 5077 static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo, 5078 ARM::VST2q32Pseudo }; 5079 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr); 5080 return; 5081 } 5082 5083 case Intrinsic::arm_neon_vst3: { 5084 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo, 5085 ARM::VST3d16Pseudo, 5086 ARM::VST3d32Pseudo, 5087 ARM::VST1d64TPseudo }; 5088 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD, 5089 ARM::VST3q16Pseudo_UPD, 5090 ARM::VST3q32Pseudo_UPD }; 5091 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo, 5092 ARM::VST3q16oddPseudo, 5093 ARM::VST3q32oddPseudo }; 5094 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 5095 return; 5096 } 5097 5098 case Intrinsic::arm_neon_vst4: { 5099 static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo, 5100 ARM::VST4d16Pseudo, 5101 ARM::VST4d32Pseudo, 5102 ARM::VST1d64QPseudo }; 5103 static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD, 5104 ARM::VST4q16Pseudo_UPD, 5105 ARM::VST4q32Pseudo_UPD }; 5106 static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo, 5107 ARM::VST4q16oddPseudo, 5108 ARM::VST4q32oddPseudo }; 5109 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 5110 return; 5111 } 5112 5113 case Intrinsic::arm_neon_vst2lane: { 5114 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo, 5115 ARM::VST2LNd16Pseudo, 5116 ARM::VST2LNd32Pseudo }; 5117 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo, 5118 ARM::VST2LNq32Pseudo }; 5119 SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes); 5120 return; 5121 } 5122 5123 case Intrinsic::arm_neon_vst3lane: { 5124 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo, 5125 ARM::VST3LNd16Pseudo, 5126 ARM::VST3LNd32Pseudo }; 5127 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo, 5128 ARM::VST3LNq32Pseudo }; 5129 SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes); 5130 return; 5131 } 5132 5133 case Intrinsic::arm_neon_vst4lane: { 5134 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo, 5135 ARM::VST4LNd16Pseudo, 5136 ARM::VST4LNd32Pseudo }; 5137 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo, 5138 ARM::VST4LNq32Pseudo }; 5139 SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes); 5140 return; 5141 } 5142 5143 case Intrinsic::arm_mve_vldr_gather_base_wb: 5144 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated: { 5145 static const uint16_t Opcodes[] = {ARM::MVE_VLDRWU32_qi_pre, 5146 ARM::MVE_VLDRDU64_qi_pre}; 5147 SelectMVE_WB(N, Opcodes, 5148 IntNo == Intrinsic::arm_mve_vldr_gather_base_wb_predicated); 5149 return; 5150 } 5151 5152 case Intrinsic::arm_mve_vld2q: { 5153 static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8, ARM::MVE_VLD21_8}; 5154 static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16, 5155 ARM::MVE_VLD21_16}; 5156 static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32, 5157 ARM::MVE_VLD21_32}; 5158 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32}; 5159 SelectMVE_VLD(N, 2, Opcodes, false); 5160 return; 5161 } 5162 5163 case Intrinsic::arm_mve_vld4q: { 5164 static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8, 5165 ARM::MVE_VLD42_8, ARM::MVE_VLD43_8}; 5166 static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16, 5167 ARM::MVE_VLD42_16, 5168 ARM::MVE_VLD43_16}; 5169 static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32, 5170 ARM::MVE_VLD42_32, 5171 ARM::MVE_VLD43_32}; 5172 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32}; 5173 SelectMVE_VLD(N, 4, Opcodes, false); 5174 return; 5175 } 5176 } 5177 break; 5178 } 5179 5180 case ISD::INTRINSIC_WO_CHAIN: { 5181 unsigned IntNo = N->getConstantOperandVal(0); 5182 switch (IntNo) { 5183 default: 5184 break; 5185 5186 // Scalar f32 -> bf16 5187 case Intrinsic::arm_neon_vcvtbfp2bf: { 5188 SDLoc dl(N); 5189 const SDValue &Src = N->getOperand(1); 5190 llvm::EVT DestTy = N->getValueType(0); 5191 SDValue Pred = getAL(CurDAG, dl); 5192 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 5193 SDValue Ops[] = { Src, Src, Pred, Reg0 }; 5194 CurDAG->SelectNodeTo(N, ARM::BF16_VCVTB, DestTy, Ops); 5195 return; 5196 } 5197 5198 // Vector v4f32 -> v4bf16 5199 case Intrinsic::arm_neon_vcvtfp2bf: { 5200 SDLoc dl(N); 5201 const SDValue &Src = N->getOperand(1); 5202 SDValue Pred = getAL(CurDAG, dl); 5203 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 5204 SDValue Ops[] = { Src, Pred, Reg0 }; 5205 CurDAG->SelectNodeTo(N, ARM::BF16_VCVT, MVT::v4bf16, Ops); 5206 return; 5207 } 5208 5209 case Intrinsic::arm_mve_urshrl: 5210 SelectMVE_LongShift(N, ARM::MVE_URSHRL, true, false); 5211 return; 5212 case Intrinsic::arm_mve_uqshll: 5213 SelectMVE_LongShift(N, ARM::MVE_UQSHLL, true, false); 5214 return; 5215 case Intrinsic::arm_mve_srshrl: 5216 SelectMVE_LongShift(N, ARM::MVE_SRSHRL, true, false); 5217 return; 5218 case Intrinsic::arm_mve_sqshll: 5219 SelectMVE_LongShift(N, ARM::MVE_SQSHLL, true, false); 5220 return; 5221 case Intrinsic::arm_mve_uqrshll: 5222 SelectMVE_LongShift(N, ARM::MVE_UQRSHLL, false, true); 5223 return; 5224 case Intrinsic::arm_mve_sqrshrl: 5225 SelectMVE_LongShift(N, ARM::MVE_SQRSHRL, false, true); 5226 return; 5227 5228 case Intrinsic::arm_mve_vadc: 5229 case Intrinsic::arm_mve_vadc_predicated: 5230 SelectMVE_VADCSBC(N, ARM::MVE_VADC, ARM::MVE_VADCI, true, 5231 IntNo == Intrinsic::arm_mve_vadc_predicated); 5232 return; 5233 case Intrinsic::arm_mve_vsbc: 5234 case Intrinsic::arm_mve_vsbc_predicated: 5235 SelectMVE_VADCSBC(N, ARM::MVE_VSBC, ARM::MVE_VSBCI, true, 5236 IntNo == Intrinsic::arm_mve_vsbc_predicated); 5237 return; 5238 case Intrinsic::arm_mve_vshlc: 5239 case Intrinsic::arm_mve_vshlc_predicated: 5240 SelectMVE_VSHLC(N, IntNo == Intrinsic::arm_mve_vshlc_predicated); 5241 return; 5242 5243 case Intrinsic::arm_mve_vmlldava: 5244 case Intrinsic::arm_mve_vmlldava_predicated: { 5245 static const uint16_t OpcodesU[] = { 5246 ARM::MVE_VMLALDAVu16, ARM::MVE_VMLALDAVu32, 5247 ARM::MVE_VMLALDAVau16, ARM::MVE_VMLALDAVau32, 5248 }; 5249 static const uint16_t OpcodesS[] = { 5250 ARM::MVE_VMLALDAVs16, ARM::MVE_VMLALDAVs32, 5251 ARM::MVE_VMLALDAVas16, ARM::MVE_VMLALDAVas32, 5252 ARM::MVE_VMLALDAVxs16, ARM::MVE_VMLALDAVxs32, 5253 ARM::MVE_VMLALDAVaxs16, ARM::MVE_VMLALDAVaxs32, 5254 ARM::MVE_VMLSLDAVs16, ARM::MVE_VMLSLDAVs32, 5255 ARM::MVE_VMLSLDAVas16, ARM::MVE_VMLSLDAVas32, 5256 ARM::MVE_VMLSLDAVxs16, ARM::MVE_VMLSLDAVxs32, 5257 ARM::MVE_VMLSLDAVaxs16, ARM::MVE_VMLSLDAVaxs32, 5258 }; 5259 SelectMVE_VMLLDAV(N, IntNo == Intrinsic::arm_mve_vmlldava_predicated, 5260 OpcodesS, OpcodesU); 5261 return; 5262 } 5263 5264 case Intrinsic::arm_mve_vrmlldavha: 5265 case Intrinsic::arm_mve_vrmlldavha_predicated: { 5266 static const uint16_t OpcodesU[] = { 5267 ARM::MVE_VRMLALDAVHu32, ARM::MVE_VRMLALDAVHau32, 5268 }; 5269 static const uint16_t OpcodesS[] = { 5270 ARM::MVE_VRMLALDAVHs32, ARM::MVE_VRMLALDAVHas32, 5271 ARM::MVE_VRMLALDAVHxs32, ARM::MVE_VRMLALDAVHaxs32, 5272 ARM::MVE_VRMLSLDAVHs32, ARM::MVE_VRMLSLDAVHas32, 5273 ARM::MVE_VRMLSLDAVHxs32, ARM::MVE_VRMLSLDAVHaxs32, 5274 }; 5275 SelectMVE_VRMLLDAVH(N, IntNo == Intrinsic::arm_mve_vrmlldavha_predicated, 5276 OpcodesS, OpcodesU); 5277 return; 5278 } 5279 5280 case Intrinsic::arm_mve_vidup: 5281 case Intrinsic::arm_mve_vidup_predicated: { 5282 static const uint16_t Opcodes[] = { 5283 ARM::MVE_VIDUPu8, ARM::MVE_VIDUPu16, ARM::MVE_VIDUPu32, 5284 }; 5285 SelectMVE_VxDUP(N, Opcodes, false, 5286 IntNo == Intrinsic::arm_mve_vidup_predicated); 5287 return; 5288 } 5289 5290 case Intrinsic::arm_mve_vddup: 5291 case Intrinsic::arm_mve_vddup_predicated: { 5292 static const uint16_t Opcodes[] = { 5293 ARM::MVE_VDDUPu8, ARM::MVE_VDDUPu16, ARM::MVE_VDDUPu32, 5294 }; 5295 SelectMVE_VxDUP(N, Opcodes, false, 5296 IntNo == Intrinsic::arm_mve_vddup_predicated); 5297 return; 5298 } 5299 5300 case Intrinsic::arm_mve_viwdup: 5301 case Intrinsic::arm_mve_viwdup_predicated: { 5302 static const uint16_t Opcodes[] = { 5303 ARM::MVE_VIWDUPu8, ARM::MVE_VIWDUPu16, ARM::MVE_VIWDUPu32, 5304 }; 5305 SelectMVE_VxDUP(N, Opcodes, true, 5306 IntNo == Intrinsic::arm_mve_viwdup_predicated); 5307 return; 5308 } 5309 5310 case Intrinsic::arm_mve_vdwdup: 5311 case Intrinsic::arm_mve_vdwdup_predicated: { 5312 static const uint16_t Opcodes[] = { 5313 ARM::MVE_VDWDUPu8, ARM::MVE_VDWDUPu16, ARM::MVE_VDWDUPu32, 5314 }; 5315 SelectMVE_VxDUP(N, Opcodes, true, 5316 IntNo == Intrinsic::arm_mve_vdwdup_predicated); 5317 return; 5318 } 5319 5320 case Intrinsic::arm_cde_cx1d: 5321 case Intrinsic::arm_cde_cx1da: 5322 case Intrinsic::arm_cde_cx2d: 5323 case Intrinsic::arm_cde_cx2da: 5324 case Intrinsic::arm_cde_cx3d: 5325 case Intrinsic::arm_cde_cx3da: { 5326 bool HasAccum = IntNo == Intrinsic::arm_cde_cx1da || 5327 IntNo == Intrinsic::arm_cde_cx2da || 5328 IntNo == Intrinsic::arm_cde_cx3da; 5329 size_t NumExtraOps; 5330 uint16_t Opcode; 5331 switch (IntNo) { 5332 case Intrinsic::arm_cde_cx1d: 5333 case Intrinsic::arm_cde_cx1da: 5334 NumExtraOps = 0; 5335 Opcode = HasAccum ? ARM::CDE_CX1DA : ARM::CDE_CX1D; 5336 break; 5337 case Intrinsic::arm_cde_cx2d: 5338 case Intrinsic::arm_cde_cx2da: 5339 NumExtraOps = 1; 5340 Opcode = HasAccum ? ARM::CDE_CX2DA : ARM::CDE_CX2D; 5341 break; 5342 case Intrinsic::arm_cde_cx3d: 5343 case Intrinsic::arm_cde_cx3da: 5344 NumExtraOps = 2; 5345 Opcode = HasAccum ? ARM::CDE_CX3DA : ARM::CDE_CX3D; 5346 break; 5347 default: 5348 llvm_unreachable("Unexpected opcode"); 5349 } 5350 SelectCDE_CXxD(N, Opcode, NumExtraOps, HasAccum); 5351 return; 5352 } 5353 } 5354 break; 5355 } 5356 5357 case ISD::ATOMIC_CMP_SWAP: 5358 SelectCMP_SWAP(N); 5359 return; 5360 } 5361 5362 SelectCode(N); 5363 } 5364 5365 // Inspect a register string of the form 5366 // cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or 5367 // cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string 5368 // and obtain the integer operands from them, adding these operands to the 5369 // provided vector. 5370 static void getIntOperandsFromRegisterString(StringRef RegString, 5371 SelectionDAG *CurDAG, 5372 const SDLoc &DL, 5373 std::vector<SDValue> &Ops) { 5374 SmallVector<StringRef, 5> Fields; 5375 RegString.split(Fields, ':'); 5376 5377 if (Fields.size() > 1) { 5378 bool AllIntFields = true; 5379 5380 for (StringRef Field : Fields) { 5381 // Need to trim out leading 'cp' characters and get the integer field. 5382 unsigned IntField; 5383 AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField); 5384 Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32)); 5385 } 5386 5387 assert(AllIntFields && 5388 "Unexpected non-integer value in special register string."); 5389 (void)AllIntFields; 5390 } 5391 } 5392 5393 // Maps a Banked Register string to its mask value. The mask value returned is 5394 // for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register 5395 // mask operand, which expresses which register is to be used, e.g. r8, and in 5396 // which mode it is to be used, e.g. usr. Returns -1 to signify that the string 5397 // was invalid. 5398 static inline int getBankedRegisterMask(StringRef RegString) { 5399 auto TheReg = ARMBankedReg::lookupBankedRegByName(RegString.lower()); 5400 if (!TheReg) 5401 return -1; 5402 return TheReg->Encoding; 5403 } 5404 5405 // The flags here are common to those allowed for apsr in the A class cores and 5406 // those allowed for the special registers in the M class cores. Returns a 5407 // value representing which flags were present, -1 if invalid. 5408 static inline int getMClassFlagsMask(StringRef Flags) { 5409 return StringSwitch<int>(Flags) 5410 .Case("", 0x2) // no flags means nzcvq for psr registers, and 0x2 is 5411 // correct when flags are not permitted 5412 .Case("g", 0x1) 5413 .Case("nzcvq", 0x2) 5414 .Case("nzcvqg", 0x3) 5415 .Default(-1); 5416 } 5417 5418 // Maps MClass special registers string to its value for use in the 5419 // t2MRS_M/t2MSR_M instruction nodes as the SYSm value operand. 5420 // Returns -1 to signify that the string was invalid. 5421 static int getMClassRegisterMask(StringRef Reg, const ARMSubtarget *Subtarget) { 5422 auto TheReg = ARMSysReg::lookupMClassSysRegByName(Reg); 5423 const FeatureBitset &FeatureBits = Subtarget->getFeatureBits(); 5424 if (!TheReg || !TheReg->hasRequiredFeatures(FeatureBits)) 5425 return -1; 5426 return (int)(TheReg->Encoding & 0xFFF); // SYSm value 5427 } 5428 5429 static int getARClassRegisterMask(StringRef Reg, StringRef Flags) { 5430 // The mask operand contains the special register (R Bit) in bit 4, whether 5431 // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and 5432 // bits 3-0 contains the fields to be accessed in the special register, set by 5433 // the flags provided with the register. 5434 int Mask = 0; 5435 if (Reg == "apsr") { 5436 // The flags permitted for apsr are the same flags that are allowed in 5437 // M class registers. We get the flag value and then shift the flags into 5438 // the correct place to combine with the mask. 5439 Mask = getMClassFlagsMask(Flags); 5440 if (Mask == -1) 5441 return -1; 5442 return Mask << 2; 5443 } 5444 5445 if (Reg != "cpsr" && Reg != "spsr") { 5446 return -1; 5447 } 5448 5449 // This is the same as if the flags were "fc" 5450 if (Flags.empty() || Flags == "all") 5451 return Mask | 0x9; 5452 5453 // Inspect the supplied flags string and set the bits in the mask for 5454 // the relevant and valid flags allowed for cpsr and spsr. 5455 for (char Flag : Flags) { 5456 int FlagVal; 5457 switch (Flag) { 5458 case 'c': 5459 FlagVal = 0x1; 5460 break; 5461 case 'x': 5462 FlagVal = 0x2; 5463 break; 5464 case 's': 5465 FlagVal = 0x4; 5466 break; 5467 case 'f': 5468 FlagVal = 0x8; 5469 break; 5470 default: 5471 FlagVal = 0; 5472 } 5473 5474 // This avoids allowing strings where the same flag bit appears twice. 5475 if (!FlagVal || (Mask & FlagVal)) 5476 return -1; 5477 Mask |= FlagVal; 5478 } 5479 5480 // If the register is spsr then we need to set the R bit. 5481 if (Reg == "spsr") 5482 Mask |= 0x10; 5483 5484 return Mask; 5485 } 5486 5487 // Lower the read_register intrinsic to ARM specific DAG nodes 5488 // using the supplied metadata string to select the instruction node to use 5489 // and the registers/masks to construct as operands for the node. 5490 bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){ 5491 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1)); 5492 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0)); 5493 bool IsThumb2 = Subtarget->isThumb2(); 5494 SDLoc DL(N); 5495 5496 std::vector<SDValue> Ops; 5497 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops); 5498 5499 if (!Ops.empty()) { 5500 // If the special register string was constructed of fields (as defined 5501 // in the ACLE) then need to lower to MRC node (32 bit) or 5502 // MRRC node(64 bit), we can make the distinction based on the number of 5503 // operands we have. 5504 unsigned Opcode; 5505 SmallVector<EVT, 3> ResTypes; 5506 if (Ops.size() == 5){ 5507 Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC; 5508 ResTypes.append({ MVT::i32, MVT::Other }); 5509 } else { 5510 assert(Ops.size() == 3 && 5511 "Invalid number of fields in special register string."); 5512 Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC; 5513 ResTypes.append({ MVT::i32, MVT::i32, MVT::Other }); 5514 } 5515 5516 Ops.push_back(getAL(CurDAG, DL)); 5517 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 5518 Ops.push_back(N->getOperand(0)); 5519 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops)); 5520 return true; 5521 } 5522 5523 std::string SpecialReg = RegString->getString().lower(); 5524 5525 int BankedReg = getBankedRegisterMask(SpecialReg); 5526 if (BankedReg != -1) { 5527 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), 5528 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5529 N->getOperand(0) }; 5530 ReplaceNode( 5531 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked, 5532 DL, MVT::i32, MVT::Other, Ops)); 5533 return true; 5534 } 5535 5536 // The VFP registers are read by creating SelectionDAG nodes with opcodes 5537 // corresponding to the register that is being read from. So we switch on the 5538 // string to find which opcode we need to use. 5539 unsigned Opcode = StringSwitch<unsigned>(SpecialReg) 5540 .Case("fpscr", ARM::VMRS) 5541 .Case("fpexc", ARM::VMRS_FPEXC) 5542 .Case("fpsid", ARM::VMRS_FPSID) 5543 .Case("mvfr0", ARM::VMRS_MVFR0) 5544 .Case("mvfr1", ARM::VMRS_MVFR1) 5545 .Case("mvfr2", ARM::VMRS_MVFR2) 5546 .Case("fpinst", ARM::VMRS_FPINST) 5547 .Case("fpinst2", ARM::VMRS_FPINST2) 5548 .Default(0); 5549 5550 // If an opcode was found then we can lower the read to a VFP instruction. 5551 if (Opcode) { 5552 if (!Subtarget->hasVFP2Base()) 5553 return false; 5554 if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8Base()) 5555 return false; 5556 5557 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5558 N->getOperand(0) }; 5559 ReplaceNode(N, 5560 CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops)); 5561 return true; 5562 } 5563 5564 // If the target is M Class then need to validate that the register string 5565 // is an acceptable value, so check that a mask can be constructed from the 5566 // string. 5567 if (Subtarget->isMClass()) { 5568 int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget); 5569 if (SYSmValue == -1) 5570 return false; 5571 5572 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32), 5573 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5574 N->getOperand(0) }; 5575 ReplaceNode( 5576 N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops)); 5577 return true; 5578 } 5579 5580 // Here we know the target is not M Class so we need to check if it is one 5581 // of the remaining possible values which are apsr, cpsr or spsr. 5582 if (SpecialReg == "apsr" || SpecialReg == "cpsr") { 5583 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5584 N->getOperand(0) }; 5585 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS, 5586 DL, MVT::i32, MVT::Other, Ops)); 5587 return true; 5588 } 5589 5590 if (SpecialReg == "spsr") { 5591 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5592 N->getOperand(0) }; 5593 ReplaceNode( 5594 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL, 5595 MVT::i32, MVT::Other, Ops)); 5596 return true; 5597 } 5598 5599 return false; 5600 } 5601 5602 // Lower the write_register intrinsic to ARM specific DAG nodes 5603 // using the supplied metadata string to select the instruction node to use 5604 // and the registers/masks to use in the nodes 5605 bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){ 5606 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1)); 5607 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0)); 5608 bool IsThumb2 = Subtarget->isThumb2(); 5609 SDLoc DL(N); 5610 5611 std::vector<SDValue> Ops; 5612 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops); 5613 5614 if (!Ops.empty()) { 5615 // If the special register string was constructed of fields (as defined 5616 // in the ACLE) then need to lower to MCR node (32 bit) or 5617 // MCRR node(64 bit), we can make the distinction based on the number of 5618 // operands we have. 5619 unsigned Opcode; 5620 if (Ops.size() == 5) { 5621 Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR; 5622 Ops.insert(Ops.begin()+2, N->getOperand(2)); 5623 } else { 5624 assert(Ops.size() == 3 && 5625 "Invalid number of fields in special register string."); 5626 Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR; 5627 SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) }; 5628 Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2); 5629 } 5630 5631 Ops.push_back(getAL(CurDAG, DL)); 5632 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 5633 Ops.push_back(N->getOperand(0)); 5634 5635 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops)); 5636 return true; 5637 } 5638 5639 std::string SpecialReg = RegString->getString().lower(); 5640 int BankedReg = getBankedRegisterMask(SpecialReg); 5641 if (BankedReg != -1) { 5642 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2), 5643 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5644 N->getOperand(0) }; 5645 ReplaceNode( 5646 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked, 5647 DL, MVT::Other, Ops)); 5648 return true; 5649 } 5650 5651 // The VFP registers are written to by creating SelectionDAG nodes with 5652 // opcodes corresponding to the register that is being written. So we switch 5653 // on the string to find which opcode we need to use. 5654 unsigned Opcode = StringSwitch<unsigned>(SpecialReg) 5655 .Case("fpscr", ARM::VMSR) 5656 .Case("fpexc", ARM::VMSR_FPEXC) 5657 .Case("fpsid", ARM::VMSR_FPSID) 5658 .Case("fpinst", ARM::VMSR_FPINST) 5659 .Case("fpinst2", ARM::VMSR_FPINST2) 5660 .Default(0); 5661 5662 if (Opcode) { 5663 if (!Subtarget->hasVFP2Base()) 5664 return false; 5665 Ops = { N->getOperand(2), getAL(CurDAG, DL), 5666 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) }; 5667 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops)); 5668 return true; 5669 } 5670 5671 std::pair<StringRef, StringRef> Fields; 5672 Fields = StringRef(SpecialReg).rsplit('_'); 5673 std::string Reg = Fields.first.str(); 5674 StringRef Flags = Fields.second; 5675 5676 // If the target was M Class then need to validate the special register value 5677 // and retrieve the mask for use in the instruction node. 5678 if (Subtarget->isMClass()) { 5679 int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget); 5680 if (SYSmValue == -1) 5681 return false; 5682 5683 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32), 5684 N->getOperand(2), getAL(CurDAG, DL), 5685 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) }; 5686 ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops)); 5687 return true; 5688 } 5689 5690 // We then check to see if a valid mask can be constructed for one of the 5691 // register string values permitted for the A and R class cores. These values 5692 // are apsr, spsr and cpsr; these are also valid on older cores. 5693 int Mask = getARClassRegisterMask(Reg, Flags); 5694 if (Mask != -1) { 5695 Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2), 5696 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5697 N->getOperand(0) }; 5698 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR, 5699 DL, MVT::Other, Ops)); 5700 return true; 5701 } 5702 5703 return false; 5704 } 5705 5706 bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){ 5707 std::vector<SDValue> AsmNodeOperands; 5708 InlineAsm::Flag Flag; 5709 bool Changed = false; 5710 unsigned NumOps = N->getNumOperands(); 5711 5712 // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint. 5713 // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require 5714 // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs 5715 // respectively. Since there is no constraint to explicitly specify a 5716 // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb, 5717 // the 64-bit data may be referred by H, Q, R modifiers, so we still pack 5718 // them into a GPRPair. 5719 5720 SDLoc dl(N); 5721 SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps - 1) : SDValue(); 5722 5723 SmallVector<bool, 8> OpChanged; 5724 // Glue node will be appended late. 5725 for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) { 5726 SDValue op = N->getOperand(i); 5727 AsmNodeOperands.push_back(op); 5728 5729 if (i < InlineAsm::Op_FirstOperand) 5730 continue; 5731 5732 if (const auto *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) 5733 Flag = InlineAsm::Flag(C->getZExtValue()); 5734 else 5735 continue; 5736 5737 // Immediate operands to inline asm in the SelectionDAG are modeled with 5738 // two operands. The first is a constant of value InlineAsm::Kind::Imm, and 5739 // the second is a constant with the value of the immediate. If we get here 5740 // and we have a Kind::Imm, skip the next operand, and continue. 5741 if (Flag.isImmKind()) { 5742 SDValue op = N->getOperand(++i); 5743 AsmNodeOperands.push_back(op); 5744 continue; 5745 } 5746 5747 const unsigned NumRegs = Flag.getNumOperandRegisters(); 5748 if (NumRegs) 5749 OpChanged.push_back(false); 5750 5751 unsigned DefIdx = 0; 5752 bool IsTiedToChangedOp = false; 5753 // If it's a use that is tied with a previous def, it has no 5754 // reg class constraint. 5755 if (Changed && Flag.isUseOperandTiedToDef(DefIdx)) 5756 IsTiedToChangedOp = OpChanged[DefIdx]; 5757 5758 // Memory operands to inline asm in the SelectionDAG are modeled with two 5759 // operands: a constant of value InlineAsm::Kind::Mem followed by the input 5760 // operand. If we get here and we have a Kind::Mem, skip the next operand 5761 // (so it doesn't get misinterpreted), and continue. We do this here because 5762 // it's important to update the OpChanged array correctly before moving on. 5763 if (Flag.isMemKind()) { 5764 SDValue op = N->getOperand(++i); 5765 AsmNodeOperands.push_back(op); 5766 continue; 5767 } 5768 5769 if (!Flag.isRegUseKind() && !Flag.isRegDefKind() && 5770 !Flag.isRegDefEarlyClobberKind()) 5771 continue; 5772 5773 unsigned RC; 5774 const bool HasRC = Flag.hasRegClassConstraint(RC); 5775 if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID)) 5776 || NumRegs != 2) 5777 continue; 5778 5779 assert((i+2 < NumOps) && "Invalid number of operands in inline asm"); 5780 SDValue V0 = N->getOperand(i+1); 5781 SDValue V1 = N->getOperand(i+2); 5782 Register Reg0 = cast<RegisterSDNode>(V0)->getReg(); 5783 Register Reg1 = cast<RegisterSDNode>(V1)->getReg(); 5784 SDValue PairedReg; 5785 MachineRegisterInfo &MRI = MF->getRegInfo(); 5786 5787 if (Flag.isRegDefKind() || Flag.isRegDefEarlyClobberKind()) { 5788 // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to 5789 // the original GPRs. 5790 5791 Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass); 5792 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped); 5793 SDValue Chain = SDValue(N,0); 5794 5795 SDNode *GU = N->getGluedUser(); 5796 SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped, 5797 Chain.getValue(1)); 5798 5799 // Extract values from a GPRPair reg and copy to the original GPR reg. 5800 SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32, 5801 RegCopy); 5802 SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32, 5803 RegCopy); 5804 SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0, 5805 RegCopy.getValue(1)); 5806 SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1)); 5807 5808 // Update the original glue user. 5809 std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1); 5810 Ops.push_back(T1.getValue(1)); 5811 CurDAG->UpdateNodeOperands(GU, Ops); 5812 } else { 5813 // For Kind == InlineAsm::Kind::RegUse, we first copy two GPRs into a 5814 // GPRPair and then pass the GPRPair to the inline asm. 5815 SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain]; 5816 5817 // As REG_SEQ doesn't take RegisterSDNode, we copy them first. 5818 SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32, 5819 Chain.getValue(1)); 5820 SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32, 5821 T0.getValue(1)); 5822 SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0); 5823 5824 // Copy REG_SEQ into a GPRPair-typed VR and replace the original two 5825 // i32 VRs of inline asm with it. 5826 Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass); 5827 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped); 5828 Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1)); 5829 5830 AsmNodeOperands[InlineAsm::Op_InputChain] = Chain; 5831 Glue = Chain.getValue(1); 5832 } 5833 5834 Changed = true; 5835 5836 if(PairedReg.getNode()) { 5837 OpChanged[OpChanged.size() -1 ] = true; 5838 Flag = InlineAsm::Flag(Flag.getKind(), 1 /* RegNum*/); 5839 if (IsTiedToChangedOp) 5840 Flag.setMatchingOp(DefIdx); 5841 else 5842 Flag.setRegClass(ARM::GPRPairRegClassID); 5843 // Replace the current flag. 5844 AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant( 5845 Flag, dl, MVT::i32); 5846 // Add the new register node and skip the original two GPRs. 5847 AsmNodeOperands.push_back(PairedReg); 5848 // Skip the next two GPRs. 5849 i += 2; 5850 } 5851 } 5852 5853 if (Glue.getNode()) 5854 AsmNodeOperands.push_back(Glue); 5855 if (!Changed) 5856 return false; 5857 5858 SDValue New = CurDAG->getNode(N->getOpcode(), SDLoc(N), 5859 CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands); 5860 New->setNodeId(-1); 5861 ReplaceNode(N, New.getNode()); 5862 return true; 5863 } 5864 5865 bool ARMDAGToDAGISel::SelectInlineAsmMemoryOperand( 5866 const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, 5867 std::vector<SDValue> &OutOps) { 5868 switch(ConstraintID) { 5869 default: 5870 llvm_unreachable("Unexpected asm memory constraint"); 5871 case InlineAsm::ConstraintCode::m: 5872 case InlineAsm::ConstraintCode::o: 5873 case InlineAsm::ConstraintCode::Q: 5874 case InlineAsm::ConstraintCode::Um: 5875 case InlineAsm::ConstraintCode::Un: 5876 case InlineAsm::ConstraintCode::Uq: 5877 case InlineAsm::ConstraintCode::Us: 5878 case InlineAsm::ConstraintCode::Ut: 5879 case InlineAsm::ConstraintCode::Uv: 5880 case InlineAsm::ConstraintCode::Uy: 5881 // Require the address to be in a register. That is safe for all ARM 5882 // variants and it is hard to do anything much smarter without knowing 5883 // how the operand is used. 5884 OutOps.push_back(Op); 5885 return false; 5886 } 5887 return true; 5888 } 5889 5890 /// createARMISelDag - This pass converts a legalized DAG into a 5891 /// ARM-specific DAG, ready for instruction scheduling. 5892 /// 5893 FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM, 5894 CodeGenOptLevel OptLevel) { 5895 return new ARMDAGToDAGISelLegacy(TM, OptLevel); 5896 } 5897