1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines an instruction selector for the ARM target. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "ARM.h" 14 #include "ARMBaseInstrInfo.h" 15 #include "ARMTargetMachine.h" 16 #include "MCTargetDesc/ARMAddressingModes.h" 17 #include "Utils/ARMBaseInfo.h" 18 #include "llvm/ADT/APSInt.h" 19 #include "llvm/ADT/StringSwitch.h" 20 #include "llvm/CodeGen/MachineFrameInfo.h" 21 #include "llvm/CodeGen/MachineFunction.h" 22 #include "llvm/CodeGen/MachineInstrBuilder.h" 23 #include "llvm/CodeGen/MachineRegisterInfo.h" 24 #include "llvm/CodeGen/SelectionDAG.h" 25 #include "llvm/CodeGen/SelectionDAGISel.h" 26 #include "llvm/CodeGen/TargetLowering.h" 27 #include "llvm/IR/CallingConv.h" 28 #include "llvm/IR/Constants.h" 29 #include "llvm/IR/DerivedTypes.h" 30 #include "llvm/IR/Function.h" 31 #include "llvm/IR/Intrinsics.h" 32 #include "llvm/IR/IntrinsicsARM.h" 33 #include "llvm/IR/LLVMContext.h" 34 #include "llvm/Support/CommandLine.h" 35 #include "llvm/Support/Debug.h" 36 #include "llvm/Support/ErrorHandling.h" 37 #include "llvm/Target/TargetOptions.h" 38 #include <optional> 39 40 using namespace llvm; 41 42 #define DEBUG_TYPE "arm-isel" 43 #define PASS_NAME "ARM Instruction Selection" 44 45 static cl::opt<bool> 46 DisableShifterOp("disable-shifter-op", cl::Hidden, 47 cl::desc("Disable isel of shifter-op"), 48 cl::init(false)); 49 50 //===--------------------------------------------------------------------===// 51 /// ARMDAGToDAGISel - ARM specific code to select ARM machine 52 /// instructions for SelectionDAG operations. 53 /// 54 namespace { 55 56 class ARMDAGToDAGISel : public SelectionDAGISel { 57 /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can 58 /// make the right decision when generating code for different targets. 59 const ARMSubtarget *Subtarget; 60 61 public: 62 static char ID; 63 64 ARMDAGToDAGISel() = delete; 65 66 explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel) 67 : SelectionDAGISel(ID, tm, OptLevel) {} 68 69 bool runOnMachineFunction(MachineFunction &MF) override { 70 // Reset the subtarget each time through. 71 Subtarget = &MF.getSubtarget<ARMSubtarget>(); 72 SelectionDAGISel::runOnMachineFunction(MF); 73 return true; 74 } 75 76 void PreprocessISelDAG() override; 77 78 /// getI32Imm - Return a target constant of type i32 with the specified 79 /// value. 80 inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) { 81 return CurDAG->getTargetConstant(Imm, dl, MVT::i32); 82 } 83 84 void Select(SDNode *N) override; 85 86 /// Return true as some complex patterns, like those that call 87 /// canExtractShiftFromMul can modify the DAG inplace. 88 bool ComplexPatternFuncMutatesDAG() const override { return true; } 89 90 bool hasNoVMLxHazardUse(SDNode *N) const; 91 bool isShifterOpProfitable(const SDValue &Shift, 92 ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt); 93 bool SelectRegShifterOperand(SDValue N, SDValue &A, 94 SDValue &B, SDValue &C, 95 bool CheckProfitability = true); 96 bool SelectImmShifterOperand(SDValue N, SDValue &A, 97 SDValue &B, bool CheckProfitability = true); 98 bool SelectShiftRegShifterOperand(SDValue N, SDValue &A, SDValue &B, 99 SDValue &C) { 100 // Don't apply the profitability check 101 return SelectRegShifterOperand(N, A, B, C, false); 102 } 103 bool SelectShiftImmShifterOperand(SDValue N, SDValue &A, SDValue &B) { 104 // Don't apply the profitability check 105 return SelectImmShifterOperand(N, A, B, false); 106 } 107 bool SelectShiftImmShifterOperandOneUse(SDValue N, SDValue &A, SDValue &B) { 108 if (!N.hasOneUse()) 109 return false; 110 return SelectImmShifterOperand(N, A, B, false); 111 } 112 113 bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out); 114 115 bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); 116 bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc); 117 118 bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) { 119 const ConstantSDNode *CN = cast<ConstantSDNode>(N); 120 Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32); 121 Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32); 122 return true; 123 } 124 125 bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N, 126 SDValue &Offset, SDValue &Opc); 127 bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N, 128 SDValue &Offset, SDValue &Opc); 129 bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N, 130 SDValue &Offset, SDValue &Opc); 131 bool SelectAddrOffsetNone(SDValue N, SDValue &Base); 132 bool SelectAddrMode3(SDValue N, SDValue &Base, 133 SDValue &Offset, SDValue &Opc); 134 bool SelectAddrMode3Offset(SDNode *Op, SDValue N, 135 SDValue &Offset, SDValue &Opc); 136 bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, bool FP16); 137 bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset); 138 bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset); 139 bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align); 140 bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset); 141 142 bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label); 143 144 // Thumb Addressing Modes: 145 bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset); 146 bool SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, SDValue &Offset); 147 bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base, 148 SDValue &OffImm); 149 bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base, 150 SDValue &OffImm); 151 bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base, 152 SDValue &OffImm); 153 bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base, 154 SDValue &OffImm); 155 bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm); 156 template <unsigned Shift> 157 bool SelectTAddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm); 158 159 // Thumb 2 Addressing Modes: 160 bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); 161 template <unsigned Shift> 162 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, SDValue &OffImm); 163 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, 164 SDValue &OffImm); 165 bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N, 166 SDValue &OffImm); 167 template <unsigned Shift> 168 bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm); 169 bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm, 170 unsigned Shift); 171 template <unsigned Shift> 172 bool SelectT2AddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm); 173 bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base, 174 SDValue &OffReg, SDValue &ShImm); 175 bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm); 176 177 template<int Min, int Max> 178 bool SelectImmediateInRange(SDValue N, SDValue &OffImm); 179 180 inline bool is_so_imm(unsigned Imm) const { 181 return ARM_AM::getSOImmVal(Imm) != -1; 182 } 183 184 inline bool is_so_imm_not(unsigned Imm) const { 185 return ARM_AM::getSOImmVal(~Imm) != -1; 186 } 187 188 inline bool is_t2_so_imm(unsigned Imm) const { 189 return ARM_AM::getT2SOImmVal(Imm) != -1; 190 } 191 192 inline bool is_t2_so_imm_not(unsigned Imm) const { 193 return ARM_AM::getT2SOImmVal(~Imm) != -1; 194 } 195 196 // Include the pieces autogenerated from the target description. 197 #include "ARMGenDAGISel.inc" 198 199 private: 200 void transferMemOperands(SDNode *Src, SDNode *Dst); 201 202 /// Indexed (pre/post inc/dec) load matching code for ARM. 203 bool tryARMIndexedLoad(SDNode *N); 204 bool tryT1IndexedLoad(SDNode *N); 205 bool tryT2IndexedLoad(SDNode *N); 206 bool tryMVEIndexedLoad(SDNode *N); 207 bool tryFMULFixed(SDNode *N, SDLoc dl); 208 bool tryFP_TO_INT(SDNode *N, SDLoc dl); 209 bool transformFixedFloatingPointConversion(SDNode *N, SDNode *FMul, 210 bool IsUnsigned, 211 bool FixedToFloat); 212 213 /// SelectVLD - Select NEON load intrinsics. NumVecs should be 214 /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for 215 /// loads of D registers and even subregs and odd subregs of Q registers. 216 /// For NumVecs <= 2, QOpcodes1 is not used. 217 void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, 218 const uint16_t *DOpcodes, const uint16_t *QOpcodes0, 219 const uint16_t *QOpcodes1); 220 221 /// SelectVST - Select NEON store intrinsics. NumVecs should 222 /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for 223 /// stores of D registers and even subregs and odd subregs of Q registers. 224 /// For NumVecs <= 2, QOpcodes1 is not used. 225 void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, 226 const uint16_t *DOpcodes, const uint16_t *QOpcodes0, 227 const uint16_t *QOpcodes1); 228 229 /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should 230 /// be 2, 3 or 4. The opcode arrays specify the instructions used for 231 /// load/store of D registers and Q registers. 232 void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating, 233 unsigned NumVecs, const uint16_t *DOpcodes, 234 const uint16_t *QOpcodes); 235 236 /// Helper functions for setting up clusters of MVE predication operands. 237 template <typename SDValueVector> 238 void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 239 SDValue PredicateMask); 240 template <typename SDValueVector> 241 void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 242 SDValue PredicateMask, SDValue Inactive); 243 244 template <typename SDValueVector> 245 void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc); 246 template <typename SDValueVector> 247 void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, EVT InactiveTy); 248 249 /// SelectMVE_WB - Select MVE writeback load/store intrinsics. 250 void SelectMVE_WB(SDNode *N, const uint16_t *Opcodes, bool Predicated); 251 252 /// SelectMVE_LongShift - Select MVE 64-bit scalar shift intrinsics. 253 void SelectMVE_LongShift(SDNode *N, uint16_t Opcode, bool Immediate, 254 bool HasSaturationOperand); 255 256 /// SelectMVE_VADCSBC - Select MVE vector add/sub-with-carry intrinsics. 257 void SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry, 258 uint16_t OpcodeWithNoCarry, bool Add, bool Predicated); 259 260 /// SelectMVE_VSHLC - Select MVE intrinsics for a shift that carries between 261 /// vector lanes. 262 void SelectMVE_VSHLC(SDNode *N, bool Predicated); 263 264 /// Select long MVE vector reductions with two vector operands 265 /// Stride is the number of vector element widths the instruction can operate 266 /// on: 267 /// 2 for long non-rounding variants, vml{a,s}ldav[a][x]: [i16, i32] 268 /// 1 for long rounding variants: vrml{a,s}ldavh[a][x]: [i32] 269 /// Stride is used when addressing the OpcodesS array which contains multiple 270 /// opcodes for each element width. 271 /// TySize is the index into the list of element types listed above 272 void SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated, 273 const uint16_t *OpcodesS, const uint16_t *OpcodesU, 274 size_t Stride, size_t TySize); 275 276 /// Select a 64-bit MVE vector reduction with two vector operands 277 /// arm_mve_vmlldava_[predicated] 278 void SelectMVE_VMLLDAV(SDNode *N, bool Predicated, const uint16_t *OpcodesS, 279 const uint16_t *OpcodesU); 280 /// Select a 72-bit MVE vector rounding reduction with two vector operands 281 /// int_arm_mve_vrmlldavha[_predicated] 282 void SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated, const uint16_t *OpcodesS, 283 const uint16_t *OpcodesU); 284 285 /// SelectMVE_VLD - Select MVE interleaving load intrinsics. NumVecs 286 /// should be 2 or 4. The opcode array specifies the instructions 287 /// used for 8, 16 and 32-bit lane sizes respectively, and each 288 /// pointer points to a set of NumVecs sub-opcodes used for the 289 /// different stages (e.g. VLD20 versus VLD21) of each load family. 290 void SelectMVE_VLD(SDNode *N, unsigned NumVecs, 291 const uint16_t *const *Opcodes, bool HasWriteback); 292 293 /// SelectMVE_VxDUP - Select MVE incrementing-dup instructions. Opcodes is an 294 /// array of 3 elements for the 8, 16 and 32-bit lane sizes. 295 void SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes, 296 bool Wrapping, bool Predicated); 297 298 /// Select SelectCDE_CXxD - Select CDE dual-GPR instruction (one of CX1D, 299 /// CX1DA, CX2D, CX2DA, CX3, CX3DA). 300 /// \arg \c NumExtraOps number of extra operands besides the coprocossor, 301 /// the accumulator and the immediate operand, i.e. 0 302 /// for CX1*, 1 for CX2*, 2 for CX3* 303 /// \arg \c HasAccum whether the instruction has an accumulator operand 304 void SelectCDE_CXxD(SDNode *N, uint16_t Opcode, size_t NumExtraOps, 305 bool HasAccum); 306 307 /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs 308 /// should be 1, 2, 3 or 4. The opcode array specifies the instructions used 309 /// for loading D registers. 310 void SelectVLDDup(SDNode *N, bool IsIntrinsic, bool isUpdating, 311 unsigned NumVecs, const uint16_t *DOpcodes, 312 const uint16_t *QOpcodes0 = nullptr, 313 const uint16_t *QOpcodes1 = nullptr); 314 315 /// Try to select SBFX/UBFX instructions for ARM. 316 bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned); 317 318 bool tryInsertVectorElt(SDNode *N); 319 320 // Select special operations if node forms integer ABS pattern 321 bool tryABSOp(SDNode *N); 322 323 bool tryReadRegister(SDNode *N); 324 bool tryWriteRegister(SDNode *N); 325 326 bool tryInlineAsm(SDNode *N); 327 328 void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI); 329 330 void SelectCMP_SWAP(SDNode *N); 331 332 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 333 /// inline asm expressions. 334 bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, 335 std::vector<SDValue> &OutOps) override; 336 337 // Form pairs of consecutive R, S, D, or Q registers. 338 SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1); 339 SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1); 340 SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1); 341 SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1); 342 343 // Form sequences of 4 consecutive S, D, or Q registers. 344 SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 345 SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 346 SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 347 348 // Get the alignment operand for a NEON VLD or VST instruction. 349 SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs, 350 bool is64BitVector); 351 352 /// Checks if N is a multiplication by a constant where we can extract out a 353 /// power of two from the constant so that it can be used in a shift, but only 354 /// if it simplifies the materialization of the constant. Returns true if it 355 /// is, and assigns to PowerOfTwo the power of two that should be extracted 356 /// out and to NewMulConst the new constant to be multiplied by. 357 bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift, 358 unsigned &PowerOfTwo, SDValue &NewMulConst) const; 359 360 /// Replace N with M in CurDAG, in a way that also ensures that M gets 361 /// selected when N would have been selected. 362 void replaceDAGValue(const SDValue &N, SDValue M); 363 }; 364 } 365 366 char ARMDAGToDAGISel::ID = 0; 367 368 INITIALIZE_PASS(ARMDAGToDAGISel, DEBUG_TYPE, PASS_NAME, false, false) 369 370 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant 371 /// operand. If so Imm will receive the 32-bit value. 372 static bool isInt32Immediate(SDNode *N, unsigned &Imm) { 373 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) { 374 Imm = cast<ConstantSDNode>(N)->getZExtValue(); 375 return true; 376 } 377 return false; 378 } 379 380 // isInt32Immediate - This method tests to see if a constant operand. 381 // If so Imm will receive the 32 bit value. 382 static bool isInt32Immediate(SDValue N, unsigned &Imm) { 383 return isInt32Immediate(N.getNode(), Imm); 384 } 385 386 // isOpcWithIntImmediate - This method tests to see if the node is a specific 387 // opcode and that it has a immediate integer right operand. 388 // If so Imm will receive the 32 bit value. 389 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) { 390 return N->getOpcode() == Opc && 391 isInt32Immediate(N->getOperand(1).getNode(), Imm); 392 } 393 394 /// Check whether a particular node is a constant value representable as 395 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax). 396 /// 397 /// \param ScaledConstant [out] - On success, the pre-scaled constant value. 398 static bool isScaledConstantInRange(SDValue Node, int Scale, 399 int RangeMin, int RangeMax, 400 int &ScaledConstant) { 401 assert(Scale > 0 && "Invalid scale!"); 402 403 // Check that this is a constant. 404 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node); 405 if (!C) 406 return false; 407 408 ScaledConstant = (int) C->getZExtValue(); 409 if ((ScaledConstant % Scale) != 0) 410 return false; 411 412 ScaledConstant /= Scale; 413 return ScaledConstant >= RangeMin && ScaledConstant < RangeMax; 414 } 415 416 void ARMDAGToDAGISel::PreprocessISelDAG() { 417 if (!Subtarget->hasV6T2Ops()) 418 return; 419 420 bool isThumb2 = Subtarget->isThumb(); 421 // We use make_early_inc_range to avoid invalidation issues. 422 for (SDNode &N : llvm::make_early_inc_range(CurDAG->allnodes())) { 423 if (N.getOpcode() != ISD::ADD) 424 continue; 425 426 // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with 427 // leading zeros, followed by consecutive set bits, followed by 1 or 2 428 // trailing zeros, e.g. 1020. 429 // Transform the expression to 430 // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number 431 // of trailing zeros of c2. The left shift would be folded as an shifter 432 // operand of 'add' and the 'and' and 'srl' would become a bits extraction 433 // node (UBFX). 434 435 SDValue N0 = N.getOperand(0); 436 SDValue N1 = N.getOperand(1); 437 unsigned And_imm = 0; 438 if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) { 439 if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm)) 440 std::swap(N0, N1); 441 } 442 if (!And_imm) 443 continue; 444 445 // Check if the AND mask is an immediate of the form: 000.....1111111100 446 unsigned TZ = llvm::countr_zero(And_imm); 447 if (TZ != 1 && TZ != 2) 448 // Be conservative here. Shifter operands aren't always free. e.g. On 449 // Swift, left shifter operand of 1 / 2 for free but others are not. 450 // e.g. 451 // ubfx r3, r1, #16, #8 452 // ldr.w r3, [r0, r3, lsl #2] 453 // vs. 454 // mov.w r9, #1020 455 // and.w r2, r9, r1, lsr #14 456 // ldr r2, [r0, r2] 457 continue; 458 And_imm >>= TZ; 459 if (And_imm & (And_imm + 1)) 460 continue; 461 462 // Look for (and (srl X, c1), c2). 463 SDValue Srl = N1.getOperand(0); 464 unsigned Srl_imm = 0; 465 if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) || 466 (Srl_imm <= 2)) 467 continue; 468 469 // Make sure first operand is not a shifter operand which would prevent 470 // folding of the left shift. 471 SDValue CPTmp0; 472 SDValue CPTmp1; 473 SDValue CPTmp2; 474 if (isThumb2) { 475 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1)) 476 continue; 477 } else { 478 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) || 479 SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2)) 480 continue; 481 } 482 483 // Now make the transformation. 484 Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32, 485 Srl.getOperand(0), 486 CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl), 487 MVT::i32)); 488 N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32, 489 Srl, 490 CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32)); 491 N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32, 492 N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32)); 493 CurDAG->UpdateNodeOperands(&N, N0, N1); 494 } 495 } 496 497 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS 498 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at 499 /// least on current ARM implementations) which should be avoidded. 500 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const { 501 if (OptLevel == CodeGenOpt::None) 502 return true; 503 504 if (!Subtarget->hasVMLxHazards()) 505 return true; 506 507 if (!N->hasOneUse()) 508 return false; 509 510 SDNode *Use = *N->use_begin(); 511 if (Use->getOpcode() == ISD::CopyToReg) 512 return true; 513 if (Use->isMachineOpcode()) { 514 const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>( 515 CurDAG->getSubtarget().getInstrInfo()); 516 517 const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode()); 518 if (MCID.mayStore()) 519 return true; 520 unsigned Opcode = MCID.getOpcode(); 521 if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD) 522 return true; 523 // vmlx feeding into another vmlx. We actually want to unfold 524 // the use later in the MLxExpansion pass. e.g. 525 // vmla 526 // vmla (stall 8 cycles) 527 // 528 // vmul (5 cycles) 529 // vadd (5 cycles) 530 // vmla 531 // This adds up to about 18 - 19 cycles. 532 // 533 // vmla 534 // vmul (stall 4 cycles) 535 // vadd adds up to about 14 cycles. 536 return TII->isFpMLxInstruction(Opcode); 537 } 538 539 return false; 540 } 541 542 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift, 543 ARM_AM::ShiftOpc ShOpcVal, 544 unsigned ShAmt) { 545 if (!Subtarget->isLikeA9() && !Subtarget->isSwift()) 546 return true; 547 if (Shift.hasOneUse()) 548 return true; 549 // R << 2 is free. 550 return ShOpcVal == ARM_AM::lsl && 551 (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1)); 552 } 553 554 bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N, 555 unsigned MaxShift, 556 unsigned &PowerOfTwo, 557 SDValue &NewMulConst) const { 558 assert(N.getOpcode() == ISD::MUL); 559 assert(MaxShift > 0); 560 561 // If the multiply is used in more than one place then changing the constant 562 // will make other uses incorrect, so don't. 563 if (!N.hasOneUse()) return false; 564 // Check if the multiply is by a constant 565 ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1)); 566 if (!MulConst) return false; 567 // If the constant is used in more than one place then modifying it will mean 568 // we need to materialize two constants instead of one, which is a bad idea. 569 if (!MulConst->hasOneUse()) return false; 570 unsigned MulConstVal = MulConst->getZExtValue(); 571 if (MulConstVal == 0) return false; 572 573 // Find the largest power of 2 that MulConstVal is a multiple of 574 PowerOfTwo = MaxShift; 575 while ((MulConstVal % (1 << PowerOfTwo)) != 0) { 576 --PowerOfTwo; 577 if (PowerOfTwo == 0) return false; 578 } 579 580 // Only optimise if the new cost is better 581 unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo); 582 NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32); 583 unsigned OldCost = ConstantMaterializationCost(MulConstVal, Subtarget); 584 unsigned NewCost = ConstantMaterializationCost(NewMulConstVal, Subtarget); 585 return NewCost < OldCost; 586 } 587 588 void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) { 589 CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode()); 590 ReplaceUses(N, M); 591 } 592 593 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N, 594 SDValue &BaseReg, 595 SDValue &Opc, 596 bool CheckProfitability) { 597 if (DisableShifterOp) 598 return false; 599 600 // If N is a multiply-by-constant and it's profitable to extract a shift and 601 // use it in a shifted operand do so. 602 if (N.getOpcode() == ISD::MUL) { 603 unsigned PowerOfTwo = 0; 604 SDValue NewMulConst; 605 if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) { 606 HandleSDNode Handle(N); 607 SDLoc Loc(N); 608 replaceDAGValue(N.getOperand(1), NewMulConst); 609 BaseReg = Handle.getValue(); 610 Opc = CurDAG->getTargetConstant( 611 ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32); 612 return true; 613 } 614 } 615 616 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 617 618 // Don't match base register only case. That is matched to a separate 619 // lower complexity pattern with explicit register operand. 620 if (ShOpcVal == ARM_AM::no_shift) return false; 621 622 BaseReg = N.getOperand(0); 623 unsigned ShImmVal = 0; 624 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 625 if (!RHS) return false; 626 ShImmVal = RHS->getZExtValue() & 31; 627 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), 628 SDLoc(N), MVT::i32); 629 return true; 630 } 631 632 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N, 633 SDValue &BaseReg, 634 SDValue &ShReg, 635 SDValue &Opc, 636 bool CheckProfitability) { 637 if (DisableShifterOp) 638 return false; 639 640 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 641 642 // Don't match base register only case. That is matched to a separate 643 // lower complexity pattern with explicit register operand. 644 if (ShOpcVal == ARM_AM::no_shift) return false; 645 646 BaseReg = N.getOperand(0); 647 unsigned ShImmVal = 0; 648 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 649 if (RHS) return false; 650 651 ShReg = N.getOperand(1); 652 if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal)) 653 return false; 654 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), 655 SDLoc(N), MVT::i32); 656 return true; 657 } 658 659 // Determine whether an ISD::OR's operands are suitable to turn the operation 660 // into an addition, which often has more compact encodings. 661 bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) { 662 assert(Parent->getOpcode() == ISD::OR && "unexpected parent"); 663 Out = N; 664 return CurDAG->haveNoCommonBitsSet(N, Parent->getOperand(1)); 665 } 666 667 668 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N, 669 SDValue &Base, 670 SDValue &OffImm) { 671 // Match simple R + imm12 operands. 672 673 // Base only. 674 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 675 !CurDAG->isBaseWithConstantOffset(N)) { 676 if (N.getOpcode() == ISD::FrameIndex) { 677 // Match frame index. 678 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 679 Base = CurDAG->getTargetFrameIndex( 680 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 681 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 682 return true; 683 } 684 685 if (N.getOpcode() == ARMISD::Wrapper && 686 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 687 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 688 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 689 Base = N.getOperand(0); 690 } else 691 Base = N; 692 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 693 return true; 694 } 695 696 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 697 int RHSC = (int)RHS->getSExtValue(); 698 if (N.getOpcode() == ISD::SUB) 699 RHSC = -RHSC; 700 701 if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits 702 Base = N.getOperand(0); 703 if (Base.getOpcode() == ISD::FrameIndex) { 704 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 705 Base = CurDAG->getTargetFrameIndex( 706 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 707 } 708 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 709 return true; 710 } 711 } 712 713 // Base only. 714 Base = N; 715 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 716 return true; 717 } 718 719 720 721 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, 722 SDValue &Opc) { 723 if (N.getOpcode() == ISD::MUL && 724 ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) { 725 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 726 // X * [3,5,9] -> X + X * [2,4,8] etc. 727 int RHSC = (int)RHS->getZExtValue(); 728 if (RHSC & 1) { 729 RHSC = RHSC & ~1; 730 ARM_AM::AddrOpc AddSub = ARM_AM::add; 731 if (RHSC < 0) { 732 AddSub = ARM_AM::sub; 733 RHSC = - RHSC; 734 } 735 if (isPowerOf2_32(RHSC)) { 736 unsigned ShAmt = Log2_32(RHSC); 737 Base = Offset = N.getOperand(0); 738 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, 739 ARM_AM::lsl), 740 SDLoc(N), MVT::i32); 741 return true; 742 } 743 } 744 } 745 } 746 747 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 748 // ISD::OR that is equivalent to an ISD::ADD. 749 !CurDAG->isBaseWithConstantOffset(N)) 750 return false; 751 752 // Leave simple R +/- imm12 operands for LDRi12 753 if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) { 754 int RHSC; 755 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1, 756 -0x1000+1, 0x1000, RHSC)) // 12 bits. 757 return false; 758 } 759 760 // Otherwise this is R +/- [possibly shifted] R. 761 ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add; 762 ARM_AM::ShiftOpc ShOpcVal = 763 ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode()); 764 unsigned ShAmt = 0; 765 766 Base = N.getOperand(0); 767 Offset = N.getOperand(1); 768 769 if (ShOpcVal != ARM_AM::no_shift) { 770 // Check to see if the RHS of the shift is a constant, if not, we can't fold 771 // it. 772 if (ConstantSDNode *Sh = 773 dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) { 774 ShAmt = Sh->getZExtValue(); 775 if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt)) 776 Offset = N.getOperand(1).getOperand(0); 777 else { 778 ShAmt = 0; 779 ShOpcVal = ARM_AM::no_shift; 780 } 781 } else { 782 ShOpcVal = ARM_AM::no_shift; 783 } 784 } 785 786 // Try matching (R shl C) + (R). 787 if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift && 788 !(Subtarget->isLikeA9() || Subtarget->isSwift() || 789 N.getOperand(0).hasOneUse())) { 790 ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode()); 791 if (ShOpcVal != ARM_AM::no_shift) { 792 // Check to see if the RHS of the shift is a constant, if not, we can't 793 // fold it. 794 if (ConstantSDNode *Sh = 795 dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) { 796 ShAmt = Sh->getZExtValue(); 797 if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) { 798 Offset = N.getOperand(0).getOperand(0); 799 Base = N.getOperand(1); 800 } else { 801 ShAmt = 0; 802 ShOpcVal = ARM_AM::no_shift; 803 } 804 } else { 805 ShOpcVal = ARM_AM::no_shift; 806 } 807 } 808 } 809 810 // If Offset is a multiply-by-constant and it's profitable to extract a shift 811 // and use it in a shifted operand do so. 812 if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) { 813 unsigned PowerOfTwo = 0; 814 SDValue NewMulConst; 815 if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) { 816 HandleSDNode Handle(Offset); 817 replaceDAGValue(Offset.getOperand(1), NewMulConst); 818 Offset = Handle.getValue(); 819 ShAmt = PowerOfTwo; 820 ShOpcVal = ARM_AM::lsl; 821 } 822 } 823 824 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal), 825 SDLoc(N), MVT::i32); 826 return true; 827 } 828 829 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N, 830 SDValue &Offset, SDValue &Opc) { 831 unsigned Opcode = Op->getOpcode(); 832 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 833 ? cast<LoadSDNode>(Op)->getAddressingMode() 834 : cast<StoreSDNode>(Op)->getAddressingMode(); 835 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 836 ? ARM_AM::add : ARM_AM::sub; 837 int Val; 838 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) 839 return false; 840 841 Offset = N; 842 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 843 unsigned ShAmt = 0; 844 if (ShOpcVal != ARM_AM::no_shift) { 845 // Check to see if the RHS of the shift is a constant, if not, we can't fold 846 // it. 847 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 848 ShAmt = Sh->getZExtValue(); 849 if (isShifterOpProfitable(N, ShOpcVal, ShAmt)) 850 Offset = N.getOperand(0); 851 else { 852 ShAmt = 0; 853 ShOpcVal = ARM_AM::no_shift; 854 } 855 } else { 856 ShOpcVal = ARM_AM::no_shift; 857 } 858 } 859 860 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal), 861 SDLoc(N), MVT::i32); 862 return true; 863 } 864 865 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N, 866 SDValue &Offset, SDValue &Opc) { 867 unsigned Opcode = Op->getOpcode(); 868 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 869 ? cast<LoadSDNode>(Op)->getAddressingMode() 870 : cast<StoreSDNode>(Op)->getAddressingMode(); 871 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 872 ? ARM_AM::add : ARM_AM::sub; 873 int Val; 874 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits. 875 if (AddSub == ARM_AM::sub) Val *= -1; 876 Offset = CurDAG->getRegister(0, MVT::i32); 877 Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32); 878 return true; 879 } 880 881 return false; 882 } 883 884 885 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N, 886 SDValue &Offset, SDValue &Opc) { 887 unsigned Opcode = Op->getOpcode(); 888 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 889 ? cast<LoadSDNode>(Op)->getAddressingMode() 890 : cast<StoreSDNode>(Op)->getAddressingMode(); 891 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 892 ? ARM_AM::add : ARM_AM::sub; 893 int Val; 894 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits. 895 Offset = CurDAG->getRegister(0, MVT::i32); 896 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val, 897 ARM_AM::no_shift), 898 SDLoc(Op), MVT::i32); 899 return true; 900 } 901 902 return false; 903 } 904 905 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) { 906 Base = N; 907 return true; 908 } 909 910 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N, 911 SDValue &Base, SDValue &Offset, 912 SDValue &Opc) { 913 if (N.getOpcode() == ISD::SUB) { 914 // X - C is canonicalize to X + -C, no need to handle it here. 915 Base = N.getOperand(0); 916 Offset = N.getOperand(1); 917 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N), 918 MVT::i32); 919 return true; 920 } 921 922 if (!CurDAG->isBaseWithConstantOffset(N)) { 923 Base = N; 924 if (N.getOpcode() == ISD::FrameIndex) { 925 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 926 Base = CurDAG->getTargetFrameIndex( 927 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 928 } 929 Offset = CurDAG->getRegister(0, MVT::i32); 930 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N), 931 MVT::i32); 932 return true; 933 } 934 935 // If the RHS is +/- imm8, fold into addr mode. 936 int RHSC; 937 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1, 938 -256 + 1, 256, RHSC)) { // 8 bits. 939 Base = N.getOperand(0); 940 if (Base.getOpcode() == ISD::FrameIndex) { 941 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 942 Base = CurDAG->getTargetFrameIndex( 943 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 944 } 945 Offset = CurDAG->getRegister(0, MVT::i32); 946 947 ARM_AM::AddrOpc AddSub = ARM_AM::add; 948 if (RHSC < 0) { 949 AddSub = ARM_AM::sub; 950 RHSC = -RHSC; 951 } 952 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N), 953 MVT::i32); 954 return true; 955 } 956 957 Base = N.getOperand(0); 958 Offset = N.getOperand(1); 959 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N), 960 MVT::i32); 961 return true; 962 } 963 964 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N, 965 SDValue &Offset, SDValue &Opc) { 966 unsigned Opcode = Op->getOpcode(); 967 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 968 ? cast<LoadSDNode>(Op)->getAddressingMode() 969 : cast<StoreSDNode>(Op)->getAddressingMode(); 970 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 971 ? ARM_AM::add : ARM_AM::sub; 972 int Val; 973 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits. 974 Offset = CurDAG->getRegister(0, MVT::i32); 975 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op), 976 MVT::i32); 977 return true; 978 } 979 980 Offset = N; 981 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op), 982 MVT::i32); 983 return true; 984 } 985 986 bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, 987 bool FP16) { 988 if (!CurDAG->isBaseWithConstantOffset(N)) { 989 Base = N; 990 if (N.getOpcode() == ISD::FrameIndex) { 991 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 992 Base = CurDAG->getTargetFrameIndex( 993 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 994 } else if (N.getOpcode() == ARMISD::Wrapper && 995 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 996 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 997 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 998 Base = N.getOperand(0); 999 } 1000 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0), 1001 SDLoc(N), MVT::i32); 1002 return true; 1003 } 1004 1005 // If the RHS is +/- imm8, fold into addr mode. 1006 int RHSC; 1007 const int Scale = FP16 ? 2 : 4; 1008 1009 if (isScaledConstantInRange(N.getOperand(1), Scale, -255, 256, RHSC)) { 1010 Base = N.getOperand(0); 1011 if (Base.getOpcode() == ISD::FrameIndex) { 1012 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1013 Base = CurDAG->getTargetFrameIndex( 1014 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1015 } 1016 1017 ARM_AM::AddrOpc AddSub = ARM_AM::add; 1018 if (RHSC < 0) { 1019 AddSub = ARM_AM::sub; 1020 RHSC = -RHSC; 1021 } 1022 1023 if (FP16) 1024 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(AddSub, RHSC), 1025 SDLoc(N), MVT::i32); 1026 else 1027 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC), 1028 SDLoc(N), MVT::i32); 1029 1030 return true; 1031 } 1032 1033 Base = N; 1034 1035 if (FP16) 1036 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(ARM_AM::add, 0), 1037 SDLoc(N), MVT::i32); 1038 else 1039 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0), 1040 SDLoc(N), MVT::i32); 1041 1042 return true; 1043 } 1044 1045 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N, 1046 SDValue &Base, SDValue &Offset) { 1047 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ false); 1048 } 1049 1050 bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N, 1051 SDValue &Base, SDValue &Offset) { 1052 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ true); 1053 } 1054 1055 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr, 1056 SDValue &Align) { 1057 Addr = N; 1058 1059 unsigned Alignment = 0; 1060 1061 MemSDNode *MemN = cast<MemSDNode>(Parent); 1062 1063 if (isa<LSBaseSDNode>(MemN) || 1064 ((MemN->getOpcode() == ARMISD::VST1_UPD || 1065 MemN->getOpcode() == ARMISD::VLD1_UPD) && 1066 MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) { 1067 // This case occurs only for VLD1-lane/dup and VST1-lane instructions. 1068 // The maximum alignment is equal to the memory size being referenced. 1069 llvm::Align MMOAlign = MemN->getAlign(); 1070 unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8; 1071 if (MMOAlign.value() >= MemSize && MemSize > 1) 1072 Alignment = MemSize; 1073 } else { 1074 // All other uses of addrmode6 are for intrinsics. For now just record 1075 // the raw alignment value; it will be refined later based on the legal 1076 // alignment operands for the intrinsic. 1077 Alignment = MemN->getAlign().value(); 1078 } 1079 1080 Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32); 1081 return true; 1082 } 1083 1084 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N, 1085 SDValue &Offset) { 1086 LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op); 1087 ISD::MemIndexedMode AM = LdSt->getAddressingMode(); 1088 if (AM != ISD::POST_INC) 1089 return false; 1090 Offset = N; 1091 if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) { 1092 if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits()) 1093 Offset = CurDAG->getRegister(0, MVT::i32); 1094 } 1095 return true; 1096 } 1097 1098 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N, 1099 SDValue &Offset, SDValue &Label) { 1100 if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) { 1101 Offset = N.getOperand(0); 1102 SDValue N1 = N.getOperand(1); 1103 Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(), 1104 SDLoc(N), MVT::i32); 1105 return true; 1106 } 1107 1108 return false; 1109 } 1110 1111 1112 //===----------------------------------------------------------------------===// 1113 // Thumb Addressing Modes 1114 //===----------------------------------------------------------------------===// 1115 1116 static bool shouldUseZeroOffsetLdSt(SDValue N) { 1117 // Negative numbers are difficult to materialise in thumb1. If we are 1118 // selecting the add of a negative, instead try to select ri with a zero 1119 // offset, so create the add node directly which will become a sub. 1120 if (N.getOpcode() != ISD::ADD) 1121 return false; 1122 1123 // Look for an imm which is not legal for ld/st, but is legal for sub. 1124 if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1))) 1125 return C->getSExtValue() < 0 && C->getSExtValue() >= -255; 1126 1127 return false; 1128 } 1129 1130 bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, 1131 SDValue &Offset) { 1132 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) { 1133 ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N); 1134 if (!NC || !NC->isZero()) 1135 return false; 1136 1137 Base = Offset = N; 1138 return true; 1139 } 1140 1141 Base = N.getOperand(0); 1142 Offset = N.getOperand(1); 1143 return true; 1144 } 1145 1146 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, SDValue &Base, 1147 SDValue &Offset) { 1148 if (shouldUseZeroOffsetLdSt(N)) 1149 return false; // Select ri instead 1150 return SelectThumbAddrModeRRSext(N, Base, Offset); 1151 } 1152 1153 bool 1154 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, 1155 SDValue &Base, SDValue &OffImm) { 1156 if (shouldUseZeroOffsetLdSt(N)) { 1157 Base = N; 1158 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1159 return true; 1160 } 1161 1162 if (!CurDAG->isBaseWithConstantOffset(N)) { 1163 if (N.getOpcode() == ISD::ADD) { 1164 return false; // We want to select register offset instead 1165 } else if (N.getOpcode() == ARMISD::Wrapper && 1166 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 1167 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 1168 N.getOperand(0).getOpcode() != ISD::TargetConstantPool && 1169 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 1170 Base = N.getOperand(0); 1171 } else { 1172 Base = N; 1173 } 1174 1175 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1176 return true; 1177 } 1178 1179 // If the RHS is + imm5 * scale, fold into addr mode. 1180 int RHSC; 1181 if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) { 1182 Base = N.getOperand(0); 1183 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1184 return true; 1185 } 1186 1187 // Offset is too large, so use register offset instead. 1188 return false; 1189 } 1190 1191 bool 1192 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base, 1193 SDValue &OffImm) { 1194 return SelectThumbAddrModeImm5S(N, 4, Base, OffImm); 1195 } 1196 1197 bool 1198 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base, 1199 SDValue &OffImm) { 1200 return SelectThumbAddrModeImm5S(N, 2, Base, OffImm); 1201 } 1202 1203 bool 1204 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base, 1205 SDValue &OffImm) { 1206 return SelectThumbAddrModeImm5S(N, 1, Base, OffImm); 1207 } 1208 1209 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N, 1210 SDValue &Base, SDValue &OffImm) { 1211 if (N.getOpcode() == ISD::FrameIndex) { 1212 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 1213 // Only multiples of 4 are allowed for the offset, so the frame object 1214 // alignment must be at least 4. 1215 MachineFrameInfo &MFI = MF->getFrameInfo(); 1216 if (MFI.getObjectAlign(FI) < Align(4)) 1217 MFI.setObjectAlignment(FI, Align(4)); 1218 Base = CurDAG->getTargetFrameIndex( 1219 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1220 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1221 return true; 1222 } 1223 1224 if (!CurDAG->isBaseWithConstantOffset(N)) 1225 return false; 1226 1227 if (N.getOperand(0).getOpcode() == ISD::FrameIndex) { 1228 // If the RHS is + imm8 * scale, fold into addr mode. 1229 int RHSC; 1230 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) { 1231 Base = N.getOperand(0); 1232 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1233 // Make sure the offset is inside the object, or we might fail to 1234 // allocate an emergency spill slot. (An out-of-range access is UB, but 1235 // it could show up anyway.) 1236 MachineFrameInfo &MFI = MF->getFrameInfo(); 1237 if (RHSC * 4 < MFI.getObjectSize(FI)) { 1238 // For LHS+RHS to result in an offset that's a multiple of 4 the object 1239 // indexed by the LHS must be 4-byte aligned. 1240 if (!MFI.isFixedObjectIndex(FI) && MFI.getObjectAlign(FI) < Align(4)) 1241 MFI.setObjectAlignment(FI, Align(4)); 1242 if (MFI.getObjectAlign(FI) >= Align(4)) { 1243 Base = CurDAG->getTargetFrameIndex( 1244 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1245 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1246 return true; 1247 } 1248 } 1249 } 1250 } 1251 1252 return false; 1253 } 1254 1255 template <unsigned Shift> 1256 bool ARMDAGToDAGISel::SelectTAddrModeImm7(SDValue N, SDValue &Base, 1257 SDValue &OffImm) { 1258 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) { 1259 int RHSC; 1260 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80, 1261 RHSC)) { 1262 Base = N.getOperand(0); 1263 if (N.getOpcode() == ISD::SUB) 1264 RHSC = -RHSC; 1265 OffImm = 1266 CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32); 1267 return true; 1268 } 1269 } 1270 1271 // Base only. 1272 Base = N; 1273 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1274 return true; 1275 } 1276 1277 1278 //===----------------------------------------------------------------------===// 1279 // Thumb 2 Addressing Modes 1280 //===----------------------------------------------------------------------===// 1281 1282 1283 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N, 1284 SDValue &Base, SDValue &OffImm) { 1285 // Match simple R + imm12 operands. 1286 1287 // Base only. 1288 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 1289 !CurDAG->isBaseWithConstantOffset(N)) { 1290 if (N.getOpcode() == ISD::FrameIndex) { 1291 // Match frame index. 1292 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 1293 Base = CurDAG->getTargetFrameIndex( 1294 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1295 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1296 return true; 1297 } 1298 1299 if (N.getOpcode() == ARMISD::Wrapper && 1300 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 1301 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 1302 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 1303 Base = N.getOperand(0); 1304 if (Base.getOpcode() == ISD::TargetConstantPool) 1305 return false; // We want to select t2LDRpci instead. 1306 } else 1307 Base = N; 1308 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1309 return true; 1310 } 1311 1312 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1313 if (SelectT2AddrModeImm8(N, Base, OffImm)) 1314 // Let t2LDRi8 handle (R - imm8). 1315 return false; 1316 1317 int RHSC = (int)RHS->getZExtValue(); 1318 if (N.getOpcode() == ISD::SUB) 1319 RHSC = -RHSC; 1320 1321 if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned) 1322 Base = N.getOperand(0); 1323 if (Base.getOpcode() == ISD::FrameIndex) { 1324 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1325 Base = CurDAG->getTargetFrameIndex( 1326 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1327 } 1328 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1329 return true; 1330 } 1331 } 1332 1333 // Base only. 1334 Base = N; 1335 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1336 return true; 1337 } 1338 1339 template <unsigned Shift> 1340 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, SDValue &Base, 1341 SDValue &OffImm) { 1342 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) { 1343 int RHSC; 1344 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -255, 256, RHSC)) { 1345 Base = N.getOperand(0); 1346 if (Base.getOpcode() == ISD::FrameIndex) { 1347 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1348 Base = CurDAG->getTargetFrameIndex( 1349 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1350 } 1351 1352 if (N.getOpcode() == ISD::SUB) 1353 RHSC = -RHSC; 1354 OffImm = 1355 CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32); 1356 return true; 1357 } 1358 } 1359 1360 // Base only. 1361 Base = N; 1362 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1363 return true; 1364 } 1365 1366 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, 1367 SDValue &Base, SDValue &OffImm) { 1368 // Match simple R - imm8 operands. 1369 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 1370 !CurDAG->isBaseWithConstantOffset(N)) 1371 return false; 1372 1373 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1374 int RHSC = (int)RHS->getSExtValue(); 1375 if (N.getOpcode() == ISD::SUB) 1376 RHSC = -RHSC; 1377 1378 if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative) 1379 Base = N.getOperand(0); 1380 if (Base.getOpcode() == ISD::FrameIndex) { 1381 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1382 Base = CurDAG->getTargetFrameIndex( 1383 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1384 } 1385 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1386 return true; 1387 } 1388 } 1389 1390 return false; 1391 } 1392 1393 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N, 1394 SDValue &OffImm){ 1395 unsigned Opcode = Op->getOpcode(); 1396 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 1397 ? cast<LoadSDNode>(Op)->getAddressingMode() 1398 : cast<StoreSDNode>(Op)->getAddressingMode(); 1399 int RHSC; 1400 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits. 1401 OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC)) 1402 ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32) 1403 : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32); 1404 return true; 1405 } 1406 1407 return false; 1408 } 1409 1410 template <unsigned Shift> 1411 bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N, SDValue &Base, 1412 SDValue &OffImm) { 1413 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) { 1414 int RHSC; 1415 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80, 1416 RHSC)) { 1417 Base = N.getOperand(0); 1418 if (Base.getOpcode() == ISD::FrameIndex) { 1419 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1420 Base = CurDAG->getTargetFrameIndex( 1421 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1422 } 1423 1424 if (N.getOpcode() == ISD::SUB) 1425 RHSC = -RHSC; 1426 OffImm = 1427 CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32); 1428 return true; 1429 } 1430 } 1431 1432 // Base only. 1433 Base = N; 1434 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1435 return true; 1436 } 1437 1438 template <unsigned Shift> 1439 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, 1440 SDValue &OffImm) { 1441 return SelectT2AddrModeImm7Offset(Op, N, OffImm, Shift); 1442 } 1443 1444 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, 1445 SDValue &OffImm, 1446 unsigned Shift) { 1447 unsigned Opcode = Op->getOpcode(); 1448 ISD::MemIndexedMode AM; 1449 switch (Opcode) { 1450 case ISD::LOAD: 1451 AM = cast<LoadSDNode>(Op)->getAddressingMode(); 1452 break; 1453 case ISD::STORE: 1454 AM = cast<StoreSDNode>(Op)->getAddressingMode(); 1455 break; 1456 case ISD::MLOAD: 1457 AM = cast<MaskedLoadSDNode>(Op)->getAddressingMode(); 1458 break; 1459 case ISD::MSTORE: 1460 AM = cast<MaskedStoreSDNode>(Op)->getAddressingMode(); 1461 break; 1462 default: 1463 llvm_unreachable("Unexpected Opcode for Imm7Offset"); 1464 } 1465 1466 int RHSC; 1467 // 7 bit constant, shifted by Shift. 1468 if (isScaledConstantInRange(N, 1 << Shift, 0, 0x80, RHSC)) { 1469 OffImm = 1470 ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC)) 1471 ? CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32) 1472 : CurDAG->getTargetConstant(-RHSC * (1 << Shift), SDLoc(N), 1473 MVT::i32); 1474 return true; 1475 } 1476 return false; 1477 } 1478 1479 template <int Min, int Max> 1480 bool ARMDAGToDAGISel::SelectImmediateInRange(SDValue N, SDValue &OffImm) { 1481 int Val; 1482 if (isScaledConstantInRange(N, 1, Min, Max, Val)) { 1483 OffImm = CurDAG->getTargetConstant(Val, SDLoc(N), MVT::i32); 1484 return true; 1485 } 1486 return false; 1487 } 1488 1489 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N, 1490 SDValue &Base, 1491 SDValue &OffReg, SDValue &ShImm) { 1492 // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12. 1493 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) 1494 return false; 1495 1496 // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8. 1497 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1498 int RHSC = (int)RHS->getZExtValue(); 1499 if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned) 1500 return false; 1501 else if (RHSC < 0 && RHSC >= -255) // 8 bits 1502 return false; 1503 } 1504 1505 // Look for (R + R) or (R + (R << [1,2,3])). 1506 unsigned ShAmt = 0; 1507 Base = N.getOperand(0); 1508 OffReg = N.getOperand(1); 1509 1510 // Swap if it is ((R << c) + R). 1511 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode()); 1512 if (ShOpcVal != ARM_AM::lsl) { 1513 ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode()); 1514 if (ShOpcVal == ARM_AM::lsl) 1515 std::swap(Base, OffReg); 1516 } 1517 1518 if (ShOpcVal == ARM_AM::lsl) { 1519 // Check to see if the RHS of the shift is a constant, if not, we can't fold 1520 // it. 1521 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) { 1522 ShAmt = Sh->getZExtValue(); 1523 if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt)) 1524 OffReg = OffReg.getOperand(0); 1525 else { 1526 ShAmt = 0; 1527 } 1528 } 1529 } 1530 1531 // If OffReg is a multiply-by-constant and it's profitable to extract a shift 1532 // and use it in a shifted operand do so. 1533 if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) { 1534 unsigned PowerOfTwo = 0; 1535 SDValue NewMulConst; 1536 if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) { 1537 HandleSDNode Handle(OffReg); 1538 replaceDAGValue(OffReg.getOperand(1), NewMulConst); 1539 OffReg = Handle.getValue(); 1540 ShAmt = PowerOfTwo; 1541 } 1542 } 1543 1544 ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32); 1545 1546 return true; 1547 } 1548 1549 bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base, 1550 SDValue &OffImm) { 1551 // This *must* succeed since it's used for the irreplaceable ldrex and strex 1552 // instructions. 1553 Base = N; 1554 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1555 1556 if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N)) 1557 return true; 1558 1559 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 1560 if (!RHS) 1561 return true; 1562 1563 uint32_t RHSC = (int)RHS->getZExtValue(); 1564 if (RHSC > 1020 || RHSC % 4 != 0) 1565 return true; 1566 1567 Base = N.getOperand(0); 1568 if (Base.getOpcode() == ISD::FrameIndex) { 1569 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1570 Base = CurDAG->getTargetFrameIndex( 1571 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1572 } 1573 1574 OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32); 1575 return true; 1576 } 1577 1578 //===--------------------------------------------------------------------===// 1579 1580 /// getAL - Returns a ARMCC::AL immediate node. 1581 static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) { 1582 return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32); 1583 } 1584 1585 void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) { 1586 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); 1587 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp}); 1588 } 1589 1590 bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) { 1591 LoadSDNode *LD = cast<LoadSDNode>(N); 1592 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1593 if (AM == ISD::UNINDEXED) 1594 return false; 1595 1596 EVT LoadedVT = LD->getMemoryVT(); 1597 SDValue Offset, AMOpc; 1598 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1599 unsigned Opcode = 0; 1600 bool Match = false; 1601 if (LoadedVT == MVT::i32 && isPre && 1602 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) { 1603 Opcode = ARM::LDR_PRE_IMM; 1604 Match = true; 1605 } else if (LoadedVT == MVT::i32 && !isPre && 1606 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) { 1607 Opcode = ARM::LDR_POST_IMM; 1608 Match = true; 1609 } else if (LoadedVT == MVT::i32 && 1610 SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) { 1611 Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG; 1612 Match = true; 1613 1614 } else if (LoadedVT == MVT::i16 && 1615 SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) { 1616 Match = true; 1617 Opcode = (LD->getExtensionType() == ISD::SEXTLOAD) 1618 ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST) 1619 : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST); 1620 } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) { 1621 if (LD->getExtensionType() == ISD::SEXTLOAD) { 1622 if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) { 1623 Match = true; 1624 Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST; 1625 } 1626 } else { 1627 if (isPre && 1628 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) { 1629 Match = true; 1630 Opcode = ARM::LDRB_PRE_IMM; 1631 } else if (!isPre && 1632 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) { 1633 Match = true; 1634 Opcode = ARM::LDRB_POST_IMM; 1635 } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) { 1636 Match = true; 1637 Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG; 1638 } 1639 } 1640 } 1641 1642 if (Match) { 1643 if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) { 1644 SDValue Chain = LD->getChain(); 1645 SDValue Base = LD->getBasePtr(); 1646 SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)), 1647 CurDAG->getRegister(0, MVT::i32), Chain }; 1648 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, 1649 MVT::Other, Ops); 1650 transferMemOperands(N, New); 1651 ReplaceNode(N, New); 1652 return true; 1653 } else { 1654 SDValue Chain = LD->getChain(); 1655 SDValue Base = LD->getBasePtr(); 1656 SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)), 1657 CurDAG->getRegister(0, MVT::i32), Chain }; 1658 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, 1659 MVT::Other, Ops); 1660 transferMemOperands(N, New); 1661 ReplaceNode(N, New); 1662 return true; 1663 } 1664 } 1665 1666 return false; 1667 } 1668 1669 bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) { 1670 LoadSDNode *LD = cast<LoadSDNode>(N); 1671 EVT LoadedVT = LD->getMemoryVT(); 1672 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1673 if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD || 1674 LoadedVT.getSimpleVT().SimpleTy != MVT::i32) 1675 return false; 1676 1677 auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset()); 1678 if (!COffs || COffs->getZExtValue() != 4) 1679 return false; 1680 1681 // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}. 1682 // The encoding of LDM is not how the rest of ISel expects a post-inc load to 1683 // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after 1684 // ISel. 1685 SDValue Chain = LD->getChain(); 1686 SDValue Base = LD->getBasePtr(); 1687 SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)), 1688 CurDAG->getRegister(0, MVT::i32), Chain }; 1689 SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32, 1690 MVT::i32, MVT::Other, Ops); 1691 transferMemOperands(N, New); 1692 ReplaceNode(N, New); 1693 return true; 1694 } 1695 1696 bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) { 1697 LoadSDNode *LD = cast<LoadSDNode>(N); 1698 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1699 if (AM == ISD::UNINDEXED) 1700 return false; 1701 1702 EVT LoadedVT = LD->getMemoryVT(); 1703 bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; 1704 SDValue Offset; 1705 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1706 unsigned Opcode = 0; 1707 bool Match = false; 1708 if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) { 1709 switch (LoadedVT.getSimpleVT().SimpleTy) { 1710 case MVT::i32: 1711 Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST; 1712 break; 1713 case MVT::i16: 1714 if (isSExtLd) 1715 Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST; 1716 else 1717 Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST; 1718 break; 1719 case MVT::i8: 1720 case MVT::i1: 1721 if (isSExtLd) 1722 Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST; 1723 else 1724 Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST; 1725 break; 1726 default: 1727 return false; 1728 } 1729 Match = true; 1730 } 1731 1732 if (Match) { 1733 SDValue Chain = LD->getChain(); 1734 SDValue Base = LD->getBasePtr(); 1735 SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)), 1736 CurDAG->getRegister(0, MVT::i32), Chain }; 1737 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, 1738 MVT::Other, Ops); 1739 transferMemOperands(N, New); 1740 ReplaceNode(N, New); 1741 return true; 1742 } 1743 1744 return false; 1745 } 1746 1747 bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) { 1748 EVT LoadedVT; 1749 unsigned Opcode = 0; 1750 bool isSExtLd, isPre; 1751 Align Alignment; 1752 ARMVCC::VPTCodes Pred; 1753 SDValue PredReg; 1754 SDValue Chain, Base, Offset; 1755 1756 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 1757 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1758 if (AM == ISD::UNINDEXED) 1759 return false; 1760 LoadedVT = LD->getMemoryVT(); 1761 if (!LoadedVT.isVector()) 1762 return false; 1763 1764 Chain = LD->getChain(); 1765 Base = LD->getBasePtr(); 1766 Offset = LD->getOffset(); 1767 Alignment = LD->getAlign(); 1768 isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; 1769 isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1770 Pred = ARMVCC::None; 1771 PredReg = CurDAG->getRegister(0, MVT::i32); 1772 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) { 1773 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1774 if (AM == ISD::UNINDEXED) 1775 return false; 1776 LoadedVT = LD->getMemoryVT(); 1777 if (!LoadedVT.isVector()) 1778 return false; 1779 1780 Chain = LD->getChain(); 1781 Base = LD->getBasePtr(); 1782 Offset = LD->getOffset(); 1783 Alignment = LD->getAlign(); 1784 isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; 1785 isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1786 Pred = ARMVCC::Then; 1787 PredReg = LD->getMask(); 1788 } else 1789 llvm_unreachable("Expected a Load or a Masked Load!"); 1790 1791 // We allow LE non-masked loads to change the type (for example use a vldrb.8 1792 // as opposed to a vldrw.32). This can allow extra addressing modes or 1793 // alignments for what is otherwise an equivalent instruction. 1794 bool CanChangeType = Subtarget->isLittle() && !isa<MaskedLoadSDNode>(N); 1795 1796 SDValue NewOffset; 1797 if (Alignment >= Align(2) && LoadedVT == MVT::v4i16 && 1798 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1)) { 1799 if (isSExtLd) 1800 Opcode = isPre ? ARM::MVE_VLDRHS32_pre : ARM::MVE_VLDRHS32_post; 1801 else 1802 Opcode = isPre ? ARM::MVE_VLDRHU32_pre : ARM::MVE_VLDRHU32_post; 1803 } else if (LoadedVT == MVT::v8i8 && 1804 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) { 1805 if (isSExtLd) 1806 Opcode = isPre ? ARM::MVE_VLDRBS16_pre : ARM::MVE_VLDRBS16_post; 1807 else 1808 Opcode = isPre ? ARM::MVE_VLDRBU16_pre : ARM::MVE_VLDRBU16_post; 1809 } else if (LoadedVT == MVT::v4i8 && 1810 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) { 1811 if (isSExtLd) 1812 Opcode = isPre ? ARM::MVE_VLDRBS32_pre : ARM::MVE_VLDRBS32_post; 1813 else 1814 Opcode = isPre ? ARM::MVE_VLDRBU32_pre : ARM::MVE_VLDRBU32_post; 1815 } else if (Alignment >= Align(4) && 1816 (CanChangeType || LoadedVT == MVT::v4i32 || 1817 LoadedVT == MVT::v4f32) && 1818 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 2)) 1819 Opcode = isPre ? ARM::MVE_VLDRWU32_pre : ARM::MVE_VLDRWU32_post; 1820 else if (Alignment >= Align(2) && 1821 (CanChangeType || LoadedVT == MVT::v8i16 || 1822 LoadedVT == MVT::v8f16) && 1823 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1)) 1824 Opcode = isPre ? ARM::MVE_VLDRHU16_pre : ARM::MVE_VLDRHU16_post; 1825 else if ((CanChangeType || LoadedVT == MVT::v16i8) && 1826 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) 1827 Opcode = isPre ? ARM::MVE_VLDRBU8_pre : ARM::MVE_VLDRBU8_post; 1828 else 1829 return false; 1830 1831 SDValue Ops[] = {Base, 1832 NewOffset, 1833 CurDAG->getTargetConstant(Pred, SDLoc(N), MVT::i32), 1834 PredReg, 1835 CurDAG->getRegister(0, MVT::i32), // tp_reg 1836 Chain}; 1837 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, 1838 N->getValueType(0), MVT::Other, Ops); 1839 transferMemOperands(N, New); 1840 ReplaceUses(SDValue(N, 0), SDValue(New, 1)); 1841 ReplaceUses(SDValue(N, 1), SDValue(New, 0)); 1842 ReplaceUses(SDValue(N, 2), SDValue(New, 2)); 1843 CurDAG->RemoveDeadNode(N); 1844 return true; 1845 } 1846 1847 /// Form a GPRPair pseudo register from a pair of GPR regs. 1848 SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) { 1849 SDLoc dl(V0.getNode()); 1850 SDValue RegClass = 1851 CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32); 1852 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32); 1853 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32); 1854 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1855 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1856 } 1857 1858 /// Form a D register from a pair of S registers. 1859 SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1860 SDLoc dl(V0.getNode()); 1861 SDValue RegClass = 1862 CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32); 1863 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32); 1864 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32); 1865 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1866 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1867 } 1868 1869 /// Form a quad register from a pair of D registers. 1870 SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1871 SDLoc dl(V0.getNode()); 1872 SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl, 1873 MVT::i32); 1874 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32); 1875 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32); 1876 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1877 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1878 } 1879 1880 /// Form 4 consecutive D registers from a pair of Q registers. 1881 SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1882 SDLoc dl(V0.getNode()); 1883 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl, 1884 MVT::i32); 1885 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32); 1886 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32); 1887 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1888 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1889 } 1890 1891 /// Form 4 consecutive S registers. 1892 SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, 1893 SDValue V2, SDValue V3) { 1894 SDLoc dl(V0.getNode()); 1895 SDValue RegClass = 1896 CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32); 1897 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32); 1898 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32); 1899 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32); 1900 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32); 1901 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1902 V2, SubReg2, V3, SubReg3 }; 1903 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1904 } 1905 1906 /// Form 4 consecutive D registers. 1907 SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, 1908 SDValue V2, SDValue V3) { 1909 SDLoc dl(V0.getNode()); 1910 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl, 1911 MVT::i32); 1912 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32); 1913 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32); 1914 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32); 1915 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32); 1916 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1917 V2, SubReg2, V3, SubReg3 }; 1918 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1919 } 1920 1921 /// Form 4 consecutive Q registers. 1922 SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, 1923 SDValue V2, SDValue V3) { 1924 SDLoc dl(V0.getNode()); 1925 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl, 1926 MVT::i32); 1927 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32); 1928 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32); 1929 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32); 1930 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32); 1931 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1932 V2, SubReg2, V3, SubReg3 }; 1933 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1934 } 1935 1936 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand 1937 /// of a NEON VLD or VST instruction. The supported values depend on the 1938 /// number of registers being loaded. 1939 SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl, 1940 unsigned NumVecs, bool is64BitVector) { 1941 unsigned NumRegs = NumVecs; 1942 if (!is64BitVector && NumVecs < 3) 1943 NumRegs *= 2; 1944 1945 unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue(); 1946 if (Alignment >= 32 && NumRegs == 4) 1947 Alignment = 32; 1948 else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4)) 1949 Alignment = 16; 1950 else if (Alignment >= 8) 1951 Alignment = 8; 1952 else 1953 Alignment = 0; 1954 1955 return CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 1956 } 1957 1958 static bool isVLDfixed(unsigned Opc) 1959 { 1960 switch (Opc) { 1961 default: return false; 1962 case ARM::VLD1d8wb_fixed : return true; 1963 case ARM::VLD1d16wb_fixed : return true; 1964 case ARM::VLD1d64Qwb_fixed : return true; 1965 case ARM::VLD1d32wb_fixed : return true; 1966 case ARM::VLD1d64wb_fixed : return true; 1967 case ARM::VLD1d8TPseudoWB_fixed : return true; 1968 case ARM::VLD1d16TPseudoWB_fixed : return true; 1969 case ARM::VLD1d32TPseudoWB_fixed : return true; 1970 case ARM::VLD1d64TPseudoWB_fixed : return true; 1971 case ARM::VLD1d8QPseudoWB_fixed : return true; 1972 case ARM::VLD1d16QPseudoWB_fixed : return true; 1973 case ARM::VLD1d32QPseudoWB_fixed : return true; 1974 case ARM::VLD1d64QPseudoWB_fixed : return true; 1975 case ARM::VLD1q8wb_fixed : return true; 1976 case ARM::VLD1q16wb_fixed : return true; 1977 case ARM::VLD1q32wb_fixed : return true; 1978 case ARM::VLD1q64wb_fixed : return true; 1979 case ARM::VLD1DUPd8wb_fixed : return true; 1980 case ARM::VLD1DUPd16wb_fixed : return true; 1981 case ARM::VLD1DUPd32wb_fixed : return true; 1982 case ARM::VLD1DUPq8wb_fixed : return true; 1983 case ARM::VLD1DUPq16wb_fixed : return true; 1984 case ARM::VLD1DUPq32wb_fixed : return true; 1985 case ARM::VLD2d8wb_fixed : return true; 1986 case ARM::VLD2d16wb_fixed : return true; 1987 case ARM::VLD2d32wb_fixed : return true; 1988 case ARM::VLD2q8PseudoWB_fixed : return true; 1989 case ARM::VLD2q16PseudoWB_fixed : return true; 1990 case ARM::VLD2q32PseudoWB_fixed : return true; 1991 case ARM::VLD2DUPd8wb_fixed : return true; 1992 case ARM::VLD2DUPd16wb_fixed : return true; 1993 case ARM::VLD2DUPd32wb_fixed : return true; 1994 case ARM::VLD2DUPq8OddPseudoWB_fixed: return true; 1995 case ARM::VLD2DUPq16OddPseudoWB_fixed: return true; 1996 case ARM::VLD2DUPq32OddPseudoWB_fixed: return true; 1997 } 1998 } 1999 2000 static bool isVSTfixed(unsigned Opc) 2001 { 2002 switch (Opc) { 2003 default: return false; 2004 case ARM::VST1d8wb_fixed : return true; 2005 case ARM::VST1d16wb_fixed : return true; 2006 case ARM::VST1d32wb_fixed : return true; 2007 case ARM::VST1d64wb_fixed : return true; 2008 case ARM::VST1q8wb_fixed : return true; 2009 case ARM::VST1q16wb_fixed : return true; 2010 case ARM::VST1q32wb_fixed : return true; 2011 case ARM::VST1q64wb_fixed : return true; 2012 case ARM::VST1d8TPseudoWB_fixed : return true; 2013 case ARM::VST1d16TPseudoWB_fixed : return true; 2014 case ARM::VST1d32TPseudoWB_fixed : return true; 2015 case ARM::VST1d64TPseudoWB_fixed : return true; 2016 case ARM::VST1d8QPseudoWB_fixed : return true; 2017 case ARM::VST1d16QPseudoWB_fixed : return true; 2018 case ARM::VST1d32QPseudoWB_fixed : return true; 2019 case ARM::VST1d64QPseudoWB_fixed : return true; 2020 case ARM::VST2d8wb_fixed : return true; 2021 case ARM::VST2d16wb_fixed : return true; 2022 case ARM::VST2d32wb_fixed : return true; 2023 case ARM::VST2q8PseudoWB_fixed : return true; 2024 case ARM::VST2q16PseudoWB_fixed : return true; 2025 case ARM::VST2q32PseudoWB_fixed : return true; 2026 } 2027 } 2028 2029 // Get the register stride update opcode of a VLD/VST instruction that 2030 // is otherwise equivalent to the given fixed stride updating instruction. 2031 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) { 2032 assert((isVLDfixed(Opc) || isVSTfixed(Opc)) 2033 && "Incorrect fixed stride updating instruction."); 2034 switch (Opc) { 2035 default: break; 2036 case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register; 2037 case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register; 2038 case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register; 2039 case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register; 2040 case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register; 2041 case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register; 2042 case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register; 2043 case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register; 2044 case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register; 2045 case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register; 2046 case ARM::VLD1d8TPseudoWB_fixed: return ARM::VLD1d8TPseudoWB_register; 2047 case ARM::VLD1d16TPseudoWB_fixed: return ARM::VLD1d16TPseudoWB_register; 2048 case ARM::VLD1d32TPseudoWB_fixed: return ARM::VLD1d32TPseudoWB_register; 2049 case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register; 2050 case ARM::VLD1d8QPseudoWB_fixed: return ARM::VLD1d8QPseudoWB_register; 2051 case ARM::VLD1d16QPseudoWB_fixed: return ARM::VLD1d16QPseudoWB_register; 2052 case ARM::VLD1d32QPseudoWB_fixed: return ARM::VLD1d32QPseudoWB_register; 2053 case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register; 2054 case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register; 2055 case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register; 2056 case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register; 2057 case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register; 2058 case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register; 2059 case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register; 2060 case ARM::VLD2DUPq8OddPseudoWB_fixed: return ARM::VLD2DUPq8OddPseudoWB_register; 2061 case ARM::VLD2DUPq16OddPseudoWB_fixed: return ARM::VLD2DUPq16OddPseudoWB_register; 2062 case ARM::VLD2DUPq32OddPseudoWB_fixed: return ARM::VLD2DUPq32OddPseudoWB_register; 2063 2064 case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register; 2065 case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register; 2066 case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register; 2067 case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register; 2068 case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register; 2069 case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register; 2070 case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register; 2071 case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register; 2072 case ARM::VST1d8TPseudoWB_fixed: return ARM::VST1d8TPseudoWB_register; 2073 case ARM::VST1d16TPseudoWB_fixed: return ARM::VST1d16TPseudoWB_register; 2074 case ARM::VST1d32TPseudoWB_fixed: return ARM::VST1d32TPseudoWB_register; 2075 case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register; 2076 case ARM::VST1d8QPseudoWB_fixed: return ARM::VST1d8QPseudoWB_register; 2077 case ARM::VST1d16QPseudoWB_fixed: return ARM::VST1d16QPseudoWB_register; 2078 case ARM::VST1d32QPseudoWB_fixed: return ARM::VST1d32QPseudoWB_register; 2079 case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register; 2080 2081 case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register; 2082 case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register; 2083 case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register; 2084 case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register; 2085 case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register; 2086 case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register; 2087 2088 case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register; 2089 case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register; 2090 case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register; 2091 case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register; 2092 case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register; 2093 case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register; 2094 2095 case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register; 2096 case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register; 2097 case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register; 2098 } 2099 return Opc; // If not one we handle, return it unchanged. 2100 } 2101 2102 /// Returns true if the given increment is a Constant known to be equal to the 2103 /// access size performed by a NEON load/store. This means the "[rN]!" form can 2104 /// be used. 2105 static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) { 2106 auto C = dyn_cast<ConstantSDNode>(Inc); 2107 return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs; 2108 } 2109 2110 void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, 2111 const uint16_t *DOpcodes, 2112 const uint16_t *QOpcodes0, 2113 const uint16_t *QOpcodes1) { 2114 assert(Subtarget->hasNEON()); 2115 assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range"); 2116 SDLoc dl(N); 2117 2118 SDValue MemAddr, Align; 2119 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating 2120 // nodes are not intrinsics. 2121 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2122 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2123 return; 2124 2125 SDValue Chain = N->getOperand(0); 2126 EVT VT = N->getValueType(0); 2127 bool is64BitVector = VT.is64BitVector(); 2128 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector); 2129 2130 unsigned OpcodeIndex; 2131 switch (VT.getSimpleVT().SimpleTy) { 2132 default: llvm_unreachable("unhandled vld type"); 2133 // Double-register operations: 2134 case MVT::v8i8: OpcodeIndex = 0; break; 2135 case MVT::v4f16: 2136 case MVT::v4bf16: 2137 case MVT::v4i16: OpcodeIndex = 1; break; 2138 case MVT::v2f32: 2139 case MVT::v2i32: OpcodeIndex = 2; break; 2140 case MVT::v1i64: OpcodeIndex = 3; break; 2141 // Quad-register operations: 2142 case MVT::v16i8: OpcodeIndex = 0; break; 2143 case MVT::v8f16: 2144 case MVT::v8bf16: 2145 case MVT::v8i16: OpcodeIndex = 1; break; 2146 case MVT::v4f32: 2147 case MVT::v4i32: OpcodeIndex = 2; break; 2148 case MVT::v2f64: 2149 case MVT::v2i64: OpcodeIndex = 3; break; 2150 } 2151 2152 EVT ResTy; 2153 if (NumVecs == 1) 2154 ResTy = VT; 2155 else { 2156 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 2157 if (!is64BitVector) 2158 ResTyElts *= 2; 2159 ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts); 2160 } 2161 std::vector<EVT> ResTys; 2162 ResTys.push_back(ResTy); 2163 if (isUpdating) 2164 ResTys.push_back(MVT::i32); 2165 ResTys.push_back(MVT::Other); 2166 2167 SDValue Pred = getAL(CurDAG, dl); 2168 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2169 SDNode *VLd; 2170 SmallVector<SDValue, 7> Ops; 2171 2172 // Double registers and VLD1/VLD2 quad registers are directly supported. 2173 if (is64BitVector || NumVecs <= 2) { 2174 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 2175 QOpcodes0[OpcodeIndex]); 2176 Ops.push_back(MemAddr); 2177 Ops.push_back(Align); 2178 if (isUpdating) { 2179 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2180 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs); 2181 if (!IsImmUpdate) { 2182 // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so 2183 // check for the opcode rather than the number of vector elements. 2184 if (isVLDfixed(Opc)) 2185 Opc = getVLDSTRegisterUpdateOpcode(Opc); 2186 Ops.push_back(Inc); 2187 // VLD1/VLD2 fixed increment does not need Reg0 so only include it in 2188 // the operands if not such an opcode. 2189 } else if (!isVLDfixed(Opc)) 2190 Ops.push_back(Reg0); 2191 } 2192 Ops.push_back(Pred); 2193 Ops.push_back(Reg0); 2194 Ops.push_back(Chain); 2195 VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2196 2197 } else { 2198 // Otherwise, quad registers are loaded with two separate instructions, 2199 // where one loads the even registers and the other loads the odd registers. 2200 EVT AddrTy = MemAddr.getValueType(); 2201 2202 // Load the even subregs. This is always an updating load, so that it 2203 // provides the address to the second load for the odd subregs. 2204 SDValue ImplDef = 2205 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0); 2206 const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain }; 2207 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, 2208 ResTy, AddrTy, MVT::Other, OpsA); 2209 Chain = SDValue(VLdA, 2); 2210 2211 // Load the odd subregs. 2212 Ops.push_back(SDValue(VLdA, 1)); 2213 Ops.push_back(Align); 2214 if (isUpdating) { 2215 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2216 assert(isa<ConstantSDNode>(Inc.getNode()) && 2217 "only constant post-increment update allowed for VLD3/4"); 2218 (void)Inc; 2219 Ops.push_back(Reg0); 2220 } 2221 Ops.push_back(SDValue(VLdA, 0)); 2222 Ops.push_back(Pred); 2223 Ops.push_back(Reg0); 2224 Ops.push_back(Chain); 2225 VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops); 2226 } 2227 2228 // Transfer memoperands. 2229 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2230 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLd), {MemOp}); 2231 2232 if (NumVecs == 1) { 2233 ReplaceNode(N, VLd); 2234 return; 2235 } 2236 2237 // Extract out the subregisters. 2238 SDValue SuperReg = SDValue(VLd, 0); 2239 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 && 2240 ARM::qsub_3 == ARM::qsub_0 + 3, 2241 "Unexpected subreg numbering"); 2242 unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0); 2243 for (unsigned Vec = 0; Vec < NumVecs; ++Vec) 2244 ReplaceUses(SDValue(N, Vec), 2245 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg)); 2246 ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1)); 2247 if (isUpdating) 2248 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2)); 2249 CurDAG->RemoveDeadNode(N); 2250 } 2251 2252 void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, 2253 const uint16_t *DOpcodes, 2254 const uint16_t *QOpcodes0, 2255 const uint16_t *QOpcodes1) { 2256 assert(Subtarget->hasNEON()); 2257 assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range"); 2258 SDLoc dl(N); 2259 2260 SDValue MemAddr, Align; 2261 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating 2262 // nodes are not intrinsics. 2263 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2264 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1) 2265 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2266 return; 2267 2268 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2269 2270 SDValue Chain = N->getOperand(0); 2271 EVT VT = N->getOperand(Vec0Idx).getValueType(); 2272 bool is64BitVector = VT.is64BitVector(); 2273 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector); 2274 2275 unsigned OpcodeIndex; 2276 switch (VT.getSimpleVT().SimpleTy) { 2277 default: llvm_unreachable("unhandled vst type"); 2278 // Double-register operations: 2279 case MVT::v8i8: OpcodeIndex = 0; break; 2280 case MVT::v4f16: 2281 case MVT::v4bf16: 2282 case MVT::v4i16: OpcodeIndex = 1; break; 2283 case MVT::v2f32: 2284 case MVT::v2i32: OpcodeIndex = 2; break; 2285 case MVT::v1i64: OpcodeIndex = 3; break; 2286 // Quad-register operations: 2287 case MVT::v16i8: OpcodeIndex = 0; break; 2288 case MVT::v8f16: 2289 case MVT::v8bf16: 2290 case MVT::v8i16: OpcodeIndex = 1; break; 2291 case MVT::v4f32: 2292 case MVT::v4i32: OpcodeIndex = 2; break; 2293 case MVT::v2f64: 2294 case MVT::v2i64: OpcodeIndex = 3; break; 2295 } 2296 2297 std::vector<EVT> ResTys; 2298 if (isUpdating) 2299 ResTys.push_back(MVT::i32); 2300 ResTys.push_back(MVT::Other); 2301 2302 SDValue Pred = getAL(CurDAG, dl); 2303 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2304 SmallVector<SDValue, 7> Ops; 2305 2306 // Double registers and VST1/VST2 quad registers are directly supported. 2307 if (is64BitVector || NumVecs <= 2) { 2308 SDValue SrcReg; 2309 if (NumVecs == 1) { 2310 SrcReg = N->getOperand(Vec0Idx); 2311 } else if (is64BitVector) { 2312 // Form a REG_SEQUENCE to force register allocation. 2313 SDValue V0 = N->getOperand(Vec0Idx + 0); 2314 SDValue V1 = N->getOperand(Vec0Idx + 1); 2315 if (NumVecs == 2) 2316 SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0); 2317 else { 2318 SDValue V2 = N->getOperand(Vec0Idx + 2); 2319 // If it's a vst3, form a quad D-register and leave the last part as 2320 // an undef. 2321 SDValue V3 = (NumVecs == 3) 2322 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) 2323 : N->getOperand(Vec0Idx + 3); 2324 SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0); 2325 } 2326 } else { 2327 // Form a QQ register. 2328 SDValue Q0 = N->getOperand(Vec0Idx); 2329 SDValue Q1 = N->getOperand(Vec0Idx + 1); 2330 SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0); 2331 } 2332 2333 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 2334 QOpcodes0[OpcodeIndex]); 2335 Ops.push_back(MemAddr); 2336 Ops.push_back(Align); 2337 if (isUpdating) { 2338 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2339 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs); 2340 if (!IsImmUpdate) { 2341 // We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so 2342 // check for the opcode rather than the number of vector elements. 2343 if (isVSTfixed(Opc)) 2344 Opc = getVLDSTRegisterUpdateOpcode(Opc); 2345 Ops.push_back(Inc); 2346 } 2347 // VST1/VST2 fixed increment does not need Reg0 so only include it in 2348 // the operands if not such an opcode. 2349 else if (!isVSTfixed(Opc)) 2350 Ops.push_back(Reg0); 2351 } 2352 Ops.push_back(SrcReg); 2353 Ops.push_back(Pred); 2354 Ops.push_back(Reg0); 2355 Ops.push_back(Chain); 2356 SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2357 2358 // Transfer memoperands. 2359 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VSt), {MemOp}); 2360 2361 ReplaceNode(N, VSt); 2362 return; 2363 } 2364 2365 // Otherwise, quad registers are stored with two separate instructions, 2366 // where one stores the even registers and the other stores the odd registers. 2367 2368 // Form the QQQQ REG_SEQUENCE. 2369 SDValue V0 = N->getOperand(Vec0Idx + 0); 2370 SDValue V1 = N->getOperand(Vec0Idx + 1); 2371 SDValue V2 = N->getOperand(Vec0Idx + 2); 2372 SDValue V3 = (NumVecs == 3) 2373 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) 2374 : N->getOperand(Vec0Idx + 3); 2375 SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0); 2376 2377 // Store the even D registers. This is always an updating store, so that it 2378 // provides the address to the second store for the odd subregs. 2379 const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain }; 2380 SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, 2381 MemAddr.getValueType(), 2382 MVT::Other, OpsA); 2383 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStA), {MemOp}); 2384 Chain = SDValue(VStA, 1); 2385 2386 // Store the odd D registers. 2387 Ops.push_back(SDValue(VStA, 0)); 2388 Ops.push_back(Align); 2389 if (isUpdating) { 2390 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2391 assert(isa<ConstantSDNode>(Inc.getNode()) && 2392 "only constant post-increment update allowed for VST3/4"); 2393 (void)Inc; 2394 Ops.push_back(Reg0); 2395 } 2396 Ops.push_back(RegSeq); 2397 Ops.push_back(Pred); 2398 Ops.push_back(Reg0); 2399 Ops.push_back(Chain); 2400 SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, 2401 Ops); 2402 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStB), {MemOp}); 2403 ReplaceNode(N, VStB); 2404 } 2405 2406 void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating, 2407 unsigned NumVecs, 2408 const uint16_t *DOpcodes, 2409 const uint16_t *QOpcodes) { 2410 assert(Subtarget->hasNEON()); 2411 assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range"); 2412 SDLoc dl(N); 2413 2414 SDValue MemAddr, Align; 2415 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating 2416 // nodes are not intrinsics. 2417 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2418 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1) 2419 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2420 return; 2421 2422 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2423 2424 SDValue Chain = N->getOperand(0); 2425 unsigned Lane = 2426 cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue(); 2427 EVT VT = N->getOperand(Vec0Idx).getValueType(); 2428 bool is64BitVector = VT.is64BitVector(); 2429 2430 unsigned Alignment = 0; 2431 if (NumVecs != 3) { 2432 Alignment = cast<ConstantSDNode>(Align)->getZExtValue(); 2433 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8; 2434 if (Alignment > NumBytes) 2435 Alignment = NumBytes; 2436 if (Alignment < 8 && Alignment < NumBytes) 2437 Alignment = 0; 2438 // Alignment must be a power of two; make sure of that. 2439 Alignment = (Alignment & -Alignment); 2440 if (Alignment == 1) 2441 Alignment = 0; 2442 } 2443 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 2444 2445 unsigned OpcodeIndex; 2446 switch (VT.getSimpleVT().SimpleTy) { 2447 default: llvm_unreachable("unhandled vld/vst lane type"); 2448 // Double-register operations: 2449 case MVT::v8i8: OpcodeIndex = 0; break; 2450 case MVT::v4f16: 2451 case MVT::v4bf16: 2452 case MVT::v4i16: OpcodeIndex = 1; break; 2453 case MVT::v2f32: 2454 case MVT::v2i32: OpcodeIndex = 2; break; 2455 // Quad-register operations: 2456 case MVT::v8f16: 2457 case MVT::v8bf16: 2458 case MVT::v8i16: OpcodeIndex = 0; break; 2459 case MVT::v4f32: 2460 case MVT::v4i32: OpcodeIndex = 1; break; 2461 } 2462 2463 std::vector<EVT> ResTys; 2464 if (IsLoad) { 2465 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 2466 if (!is64BitVector) 2467 ResTyElts *= 2; 2468 ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), 2469 MVT::i64, ResTyElts)); 2470 } 2471 if (isUpdating) 2472 ResTys.push_back(MVT::i32); 2473 ResTys.push_back(MVT::Other); 2474 2475 SDValue Pred = getAL(CurDAG, dl); 2476 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2477 2478 SmallVector<SDValue, 8> Ops; 2479 Ops.push_back(MemAddr); 2480 Ops.push_back(Align); 2481 if (isUpdating) { 2482 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2483 bool IsImmUpdate = 2484 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs); 2485 Ops.push_back(IsImmUpdate ? Reg0 : Inc); 2486 } 2487 2488 SDValue SuperReg; 2489 SDValue V0 = N->getOperand(Vec0Idx + 0); 2490 SDValue V1 = N->getOperand(Vec0Idx + 1); 2491 if (NumVecs == 2) { 2492 if (is64BitVector) 2493 SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0); 2494 else 2495 SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0); 2496 } else { 2497 SDValue V2 = N->getOperand(Vec0Idx + 2); 2498 SDValue V3 = (NumVecs == 3) 2499 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) 2500 : N->getOperand(Vec0Idx + 3); 2501 if (is64BitVector) 2502 SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0); 2503 else 2504 SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0); 2505 } 2506 Ops.push_back(SuperReg); 2507 Ops.push_back(getI32Imm(Lane, dl)); 2508 Ops.push_back(Pred); 2509 Ops.push_back(Reg0); 2510 Ops.push_back(Chain); 2511 2512 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 2513 QOpcodes[OpcodeIndex]); 2514 SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2515 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdLn), {MemOp}); 2516 if (!IsLoad) { 2517 ReplaceNode(N, VLdLn); 2518 return; 2519 } 2520 2521 // Extract the subregisters. 2522 SuperReg = SDValue(VLdLn, 0); 2523 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 && 2524 ARM::qsub_3 == ARM::qsub_0 + 3, 2525 "Unexpected subreg numbering"); 2526 unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0; 2527 for (unsigned Vec = 0; Vec < NumVecs; ++Vec) 2528 ReplaceUses(SDValue(N, Vec), 2529 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg)); 2530 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1)); 2531 if (isUpdating) 2532 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2)); 2533 CurDAG->RemoveDeadNode(N); 2534 } 2535 2536 template <typename SDValueVector> 2537 void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 2538 SDValue PredicateMask) { 2539 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32)); 2540 Ops.push_back(PredicateMask); 2541 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg 2542 } 2543 2544 template <typename SDValueVector> 2545 void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 2546 SDValue PredicateMask, 2547 SDValue Inactive) { 2548 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32)); 2549 Ops.push_back(PredicateMask); 2550 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg 2551 Ops.push_back(Inactive); 2552 } 2553 2554 template <typename SDValueVector> 2555 void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc) { 2556 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32)); 2557 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 2558 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg 2559 } 2560 2561 template <typename SDValueVector> 2562 void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 2563 EVT InactiveTy) { 2564 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32)); 2565 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 2566 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg 2567 Ops.push_back(SDValue( 2568 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, InactiveTy), 0)); 2569 } 2570 2571 void ARMDAGToDAGISel::SelectMVE_WB(SDNode *N, const uint16_t *Opcodes, 2572 bool Predicated) { 2573 SDLoc Loc(N); 2574 SmallVector<SDValue, 8> Ops; 2575 2576 uint16_t Opcode; 2577 switch (N->getValueType(1).getVectorElementType().getSizeInBits()) { 2578 case 32: 2579 Opcode = Opcodes[0]; 2580 break; 2581 case 64: 2582 Opcode = Opcodes[1]; 2583 break; 2584 default: 2585 llvm_unreachable("bad vector element size in SelectMVE_WB"); 2586 } 2587 2588 Ops.push_back(N->getOperand(2)); // vector of base addresses 2589 2590 int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(); 2591 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate offset 2592 2593 if (Predicated) 2594 AddMVEPredicateToOps(Ops, Loc, N->getOperand(4)); 2595 else 2596 AddEmptyMVEPredicateToOps(Ops, Loc); 2597 2598 Ops.push_back(N->getOperand(0)); // chain 2599 2600 SmallVector<EVT, 8> VTs; 2601 VTs.push_back(N->getValueType(1)); 2602 VTs.push_back(N->getValueType(0)); 2603 VTs.push_back(N->getValueType(2)); 2604 2605 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), VTs, Ops); 2606 ReplaceUses(SDValue(N, 0), SDValue(New, 1)); 2607 ReplaceUses(SDValue(N, 1), SDValue(New, 0)); 2608 ReplaceUses(SDValue(N, 2), SDValue(New, 2)); 2609 transferMemOperands(N, New); 2610 CurDAG->RemoveDeadNode(N); 2611 } 2612 2613 void ARMDAGToDAGISel::SelectMVE_LongShift(SDNode *N, uint16_t Opcode, 2614 bool Immediate, 2615 bool HasSaturationOperand) { 2616 SDLoc Loc(N); 2617 SmallVector<SDValue, 8> Ops; 2618 2619 // Two 32-bit halves of the value to be shifted 2620 Ops.push_back(N->getOperand(1)); 2621 Ops.push_back(N->getOperand(2)); 2622 2623 // The shift count 2624 if (Immediate) { 2625 int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(); 2626 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count 2627 } else { 2628 Ops.push_back(N->getOperand(3)); 2629 } 2630 2631 // The immediate saturation operand, if any 2632 if (HasSaturationOperand) { 2633 int32_t SatOp = cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(); 2634 int SatBit = (SatOp == 64 ? 0 : 1); 2635 Ops.push_back(getI32Imm(SatBit, Loc)); 2636 } 2637 2638 // MVE scalar shifts are IT-predicable, so include the standard 2639 // predicate arguments. 2640 Ops.push_back(getAL(CurDAG, Loc)); 2641 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 2642 2643 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops)); 2644 } 2645 2646 void ARMDAGToDAGISel::SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry, 2647 uint16_t OpcodeWithNoCarry, 2648 bool Add, bool Predicated) { 2649 SDLoc Loc(N); 2650 SmallVector<SDValue, 8> Ops; 2651 uint16_t Opcode; 2652 2653 unsigned FirstInputOp = Predicated ? 2 : 1; 2654 2655 // Two input vectors and the input carry flag 2656 Ops.push_back(N->getOperand(FirstInputOp)); 2657 Ops.push_back(N->getOperand(FirstInputOp + 1)); 2658 SDValue CarryIn = N->getOperand(FirstInputOp + 2); 2659 ConstantSDNode *CarryInConstant = dyn_cast<ConstantSDNode>(CarryIn); 2660 uint32_t CarryMask = 1 << 29; 2661 uint32_t CarryExpected = Add ? 0 : CarryMask; 2662 if (CarryInConstant && 2663 (CarryInConstant->getZExtValue() & CarryMask) == CarryExpected) { 2664 Opcode = OpcodeWithNoCarry; 2665 } else { 2666 Ops.push_back(CarryIn); 2667 Opcode = OpcodeWithCarry; 2668 } 2669 2670 if (Predicated) 2671 AddMVEPredicateToOps(Ops, Loc, 2672 N->getOperand(FirstInputOp + 3), // predicate 2673 N->getOperand(FirstInputOp - 1)); // inactive 2674 else 2675 AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0)); 2676 2677 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops)); 2678 } 2679 2680 void ARMDAGToDAGISel::SelectMVE_VSHLC(SDNode *N, bool Predicated) { 2681 SDLoc Loc(N); 2682 SmallVector<SDValue, 8> Ops; 2683 2684 // One vector input, followed by a 32-bit word of bits to shift in 2685 // and then an immediate shift count 2686 Ops.push_back(N->getOperand(1)); 2687 Ops.push_back(N->getOperand(2)); 2688 int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(); 2689 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count 2690 2691 if (Predicated) 2692 AddMVEPredicateToOps(Ops, Loc, N->getOperand(4)); 2693 else 2694 AddEmptyMVEPredicateToOps(Ops, Loc); 2695 2696 CurDAG->SelectNodeTo(N, ARM::MVE_VSHLC, N->getVTList(), ArrayRef(Ops)); 2697 } 2698 2699 static bool SDValueToConstBool(SDValue SDVal) { 2700 assert(isa<ConstantSDNode>(SDVal) && "expected a compile-time constant"); 2701 ConstantSDNode *SDValConstant = dyn_cast<ConstantSDNode>(SDVal); 2702 uint64_t Value = SDValConstant->getZExtValue(); 2703 assert((Value == 0 || Value == 1) && "expected value 0 or 1"); 2704 return Value; 2705 } 2706 2707 void ARMDAGToDAGISel::SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated, 2708 const uint16_t *OpcodesS, 2709 const uint16_t *OpcodesU, 2710 size_t Stride, size_t TySize) { 2711 assert(TySize < Stride && "Invalid TySize"); 2712 bool IsUnsigned = SDValueToConstBool(N->getOperand(1)); 2713 bool IsSub = SDValueToConstBool(N->getOperand(2)); 2714 bool IsExchange = SDValueToConstBool(N->getOperand(3)); 2715 if (IsUnsigned) { 2716 assert(!IsSub && 2717 "Unsigned versions of vmlsldav[a]/vrmlsldavh[a] do not exist"); 2718 assert(!IsExchange && 2719 "Unsigned versions of vmlaldav[a]x/vrmlaldavh[a]x do not exist"); 2720 } 2721 2722 auto OpIsZero = [N](size_t OpNo) { 2723 return isNullConstant(N->getOperand(OpNo)); 2724 }; 2725 2726 // If the input accumulator value is not zero, select an instruction with 2727 // accumulator, otherwise select an instruction without accumulator 2728 bool IsAccum = !(OpIsZero(4) && OpIsZero(5)); 2729 2730 const uint16_t *Opcodes = IsUnsigned ? OpcodesU : OpcodesS; 2731 if (IsSub) 2732 Opcodes += 4 * Stride; 2733 if (IsExchange) 2734 Opcodes += 2 * Stride; 2735 if (IsAccum) 2736 Opcodes += Stride; 2737 uint16_t Opcode = Opcodes[TySize]; 2738 2739 SDLoc Loc(N); 2740 SmallVector<SDValue, 8> Ops; 2741 // Push the accumulator operands, if they are used 2742 if (IsAccum) { 2743 Ops.push_back(N->getOperand(4)); 2744 Ops.push_back(N->getOperand(5)); 2745 } 2746 // Push the two vector operands 2747 Ops.push_back(N->getOperand(6)); 2748 Ops.push_back(N->getOperand(7)); 2749 2750 if (Predicated) 2751 AddMVEPredicateToOps(Ops, Loc, N->getOperand(8)); 2752 else 2753 AddEmptyMVEPredicateToOps(Ops, Loc); 2754 2755 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops)); 2756 } 2757 2758 void ARMDAGToDAGISel::SelectMVE_VMLLDAV(SDNode *N, bool Predicated, 2759 const uint16_t *OpcodesS, 2760 const uint16_t *OpcodesU) { 2761 EVT VecTy = N->getOperand(6).getValueType(); 2762 size_t SizeIndex; 2763 switch (VecTy.getVectorElementType().getSizeInBits()) { 2764 case 16: 2765 SizeIndex = 0; 2766 break; 2767 case 32: 2768 SizeIndex = 1; 2769 break; 2770 default: 2771 llvm_unreachable("bad vector element size"); 2772 } 2773 2774 SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 2, SizeIndex); 2775 } 2776 2777 void ARMDAGToDAGISel::SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated, 2778 const uint16_t *OpcodesS, 2779 const uint16_t *OpcodesU) { 2780 assert( 2781 N->getOperand(6).getValueType().getVectorElementType().getSizeInBits() == 2782 32 && 2783 "bad vector element size"); 2784 SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 1, 0); 2785 } 2786 2787 void ARMDAGToDAGISel::SelectMVE_VLD(SDNode *N, unsigned NumVecs, 2788 const uint16_t *const *Opcodes, 2789 bool HasWriteback) { 2790 EVT VT = N->getValueType(0); 2791 SDLoc Loc(N); 2792 2793 const uint16_t *OurOpcodes; 2794 switch (VT.getVectorElementType().getSizeInBits()) { 2795 case 8: 2796 OurOpcodes = Opcodes[0]; 2797 break; 2798 case 16: 2799 OurOpcodes = Opcodes[1]; 2800 break; 2801 case 32: 2802 OurOpcodes = Opcodes[2]; 2803 break; 2804 default: 2805 llvm_unreachable("bad vector element size in SelectMVE_VLD"); 2806 } 2807 2808 EVT DataTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, NumVecs * 2); 2809 SmallVector<EVT, 4> ResultTys = {DataTy, MVT::Other}; 2810 unsigned PtrOperand = HasWriteback ? 1 : 2; 2811 2812 auto Data = SDValue( 2813 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, DataTy), 0); 2814 SDValue Chain = N->getOperand(0); 2815 // Add a MVE_VLDn instruction for each Vec, except the last 2816 for (unsigned Stage = 0; Stage < NumVecs - 1; ++Stage) { 2817 SDValue Ops[] = {Data, N->getOperand(PtrOperand), Chain}; 2818 auto LoadInst = 2819 CurDAG->getMachineNode(OurOpcodes[Stage], Loc, ResultTys, Ops); 2820 Data = SDValue(LoadInst, 0); 2821 Chain = SDValue(LoadInst, 1); 2822 transferMemOperands(N, LoadInst); 2823 } 2824 // The last may need a writeback on it 2825 if (HasWriteback) 2826 ResultTys = {DataTy, MVT::i32, MVT::Other}; 2827 SDValue Ops[] = {Data, N->getOperand(PtrOperand), Chain}; 2828 auto LoadInst = 2829 CurDAG->getMachineNode(OurOpcodes[NumVecs - 1], Loc, ResultTys, Ops); 2830 transferMemOperands(N, LoadInst); 2831 2832 unsigned i; 2833 for (i = 0; i < NumVecs; i++) 2834 ReplaceUses(SDValue(N, i), 2835 CurDAG->getTargetExtractSubreg(ARM::qsub_0 + i, Loc, VT, 2836 SDValue(LoadInst, 0))); 2837 if (HasWriteback) 2838 ReplaceUses(SDValue(N, i++), SDValue(LoadInst, 1)); 2839 ReplaceUses(SDValue(N, i), SDValue(LoadInst, HasWriteback ? 2 : 1)); 2840 CurDAG->RemoveDeadNode(N); 2841 } 2842 2843 void ARMDAGToDAGISel::SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes, 2844 bool Wrapping, bool Predicated) { 2845 EVT VT = N->getValueType(0); 2846 SDLoc Loc(N); 2847 2848 uint16_t Opcode; 2849 switch (VT.getScalarSizeInBits()) { 2850 case 8: 2851 Opcode = Opcodes[0]; 2852 break; 2853 case 16: 2854 Opcode = Opcodes[1]; 2855 break; 2856 case 32: 2857 Opcode = Opcodes[2]; 2858 break; 2859 default: 2860 llvm_unreachable("bad vector element size in SelectMVE_VxDUP"); 2861 } 2862 2863 SmallVector<SDValue, 8> Ops; 2864 unsigned OpIdx = 1; 2865 2866 SDValue Inactive; 2867 if (Predicated) 2868 Inactive = N->getOperand(OpIdx++); 2869 2870 Ops.push_back(N->getOperand(OpIdx++)); // base 2871 if (Wrapping) 2872 Ops.push_back(N->getOperand(OpIdx++)); // limit 2873 2874 SDValue ImmOp = N->getOperand(OpIdx++); // step 2875 int ImmValue = cast<ConstantSDNode>(ImmOp)->getZExtValue(); 2876 Ops.push_back(getI32Imm(ImmValue, Loc)); 2877 2878 if (Predicated) 2879 AddMVEPredicateToOps(Ops, Loc, N->getOperand(OpIdx), Inactive); 2880 else 2881 AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0)); 2882 2883 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops)); 2884 } 2885 2886 void ARMDAGToDAGISel::SelectCDE_CXxD(SDNode *N, uint16_t Opcode, 2887 size_t NumExtraOps, bool HasAccum) { 2888 bool IsBigEndian = CurDAG->getDataLayout().isBigEndian(); 2889 SDLoc Loc(N); 2890 SmallVector<SDValue, 8> Ops; 2891 2892 unsigned OpIdx = 1; 2893 2894 // Convert and append the immediate operand designating the coprocessor. 2895 SDValue ImmCorpoc = N->getOperand(OpIdx++); 2896 uint32_t ImmCoprocVal = cast<ConstantSDNode>(ImmCorpoc)->getZExtValue(); 2897 Ops.push_back(getI32Imm(ImmCoprocVal, Loc)); 2898 2899 // For accumulating variants copy the low and high order parts of the 2900 // accumulator into a register pair and add it to the operand vector. 2901 if (HasAccum) { 2902 SDValue AccLo = N->getOperand(OpIdx++); 2903 SDValue AccHi = N->getOperand(OpIdx++); 2904 if (IsBigEndian) 2905 std::swap(AccLo, AccHi); 2906 Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, AccLo, AccHi), 0)); 2907 } 2908 2909 // Copy extra operands as-is. 2910 for (size_t I = 0; I < NumExtraOps; I++) 2911 Ops.push_back(N->getOperand(OpIdx++)); 2912 2913 // Convert and append the immediate operand 2914 SDValue Imm = N->getOperand(OpIdx); 2915 uint32_t ImmVal = cast<ConstantSDNode>(Imm)->getZExtValue(); 2916 Ops.push_back(getI32Imm(ImmVal, Loc)); 2917 2918 // Accumulating variants are IT-predicable, add predicate operands. 2919 if (HasAccum) { 2920 SDValue Pred = getAL(CurDAG, Loc); 2921 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 2922 Ops.push_back(Pred); 2923 Ops.push_back(PredReg); 2924 } 2925 2926 // Create the CDE intruction 2927 SDNode *InstrNode = CurDAG->getMachineNode(Opcode, Loc, MVT::Untyped, Ops); 2928 SDValue ResultPair = SDValue(InstrNode, 0); 2929 2930 // The original intrinsic had two outputs, and the output of the dual-register 2931 // CDE instruction is a register pair. We need to extract the two subregisters 2932 // and replace all uses of the original outputs with the extracted 2933 // subregisters. 2934 uint16_t SubRegs[2] = {ARM::gsub_0, ARM::gsub_1}; 2935 if (IsBigEndian) 2936 std::swap(SubRegs[0], SubRegs[1]); 2937 2938 for (size_t ResIdx = 0; ResIdx < 2; ResIdx++) { 2939 if (SDValue(N, ResIdx).use_empty()) 2940 continue; 2941 SDValue SubReg = CurDAG->getTargetExtractSubreg(SubRegs[ResIdx], Loc, 2942 MVT::i32, ResultPair); 2943 ReplaceUses(SDValue(N, ResIdx), SubReg); 2944 } 2945 2946 CurDAG->RemoveDeadNode(N); 2947 } 2948 2949 void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic, 2950 bool isUpdating, unsigned NumVecs, 2951 const uint16_t *DOpcodes, 2952 const uint16_t *QOpcodes0, 2953 const uint16_t *QOpcodes1) { 2954 assert(Subtarget->hasNEON()); 2955 assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range"); 2956 SDLoc dl(N); 2957 2958 SDValue MemAddr, Align; 2959 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2960 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2961 return; 2962 2963 SDValue Chain = N->getOperand(0); 2964 EVT VT = N->getValueType(0); 2965 bool is64BitVector = VT.is64BitVector(); 2966 2967 unsigned Alignment = 0; 2968 if (NumVecs != 3) { 2969 Alignment = cast<ConstantSDNode>(Align)->getZExtValue(); 2970 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8; 2971 if (Alignment > NumBytes) 2972 Alignment = NumBytes; 2973 if (Alignment < 8 && Alignment < NumBytes) 2974 Alignment = 0; 2975 // Alignment must be a power of two; make sure of that. 2976 Alignment = (Alignment & -Alignment); 2977 if (Alignment == 1) 2978 Alignment = 0; 2979 } 2980 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 2981 2982 unsigned OpcodeIndex; 2983 switch (VT.getSimpleVT().SimpleTy) { 2984 default: llvm_unreachable("unhandled vld-dup type"); 2985 case MVT::v8i8: 2986 case MVT::v16i8: OpcodeIndex = 0; break; 2987 case MVT::v4i16: 2988 case MVT::v8i16: 2989 case MVT::v4f16: 2990 case MVT::v8f16: 2991 case MVT::v4bf16: 2992 case MVT::v8bf16: 2993 OpcodeIndex = 1; break; 2994 case MVT::v2f32: 2995 case MVT::v2i32: 2996 case MVT::v4f32: 2997 case MVT::v4i32: OpcodeIndex = 2; break; 2998 case MVT::v1f64: 2999 case MVT::v1i64: OpcodeIndex = 3; break; 3000 } 3001 3002 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 3003 if (!is64BitVector) 3004 ResTyElts *= 2; 3005 EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts); 3006 3007 std::vector<EVT> ResTys; 3008 ResTys.push_back(ResTy); 3009 if (isUpdating) 3010 ResTys.push_back(MVT::i32); 3011 ResTys.push_back(MVT::Other); 3012 3013 SDValue Pred = getAL(CurDAG, dl); 3014 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3015 3016 SmallVector<SDValue, 6> Ops; 3017 Ops.push_back(MemAddr); 3018 Ops.push_back(Align); 3019 unsigned Opc = is64BitVector ? DOpcodes[OpcodeIndex] 3020 : (NumVecs == 1) ? QOpcodes0[OpcodeIndex] 3021 : QOpcodes1[OpcodeIndex]; 3022 if (isUpdating) { 3023 SDValue Inc = N->getOperand(2); 3024 bool IsImmUpdate = 3025 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs); 3026 if (IsImmUpdate) { 3027 if (!isVLDfixed(Opc)) 3028 Ops.push_back(Reg0); 3029 } else { 3030 if (isVLDfixed(Opc)) 3031 Opc = getVLDSTRegisterUpdateOpcode(Opc); 3032 Ops.push_back(Inc); 3033 } 3034 } 3035 if (is64BitVector || NumVecs == 1) { 3036 // Double registers and VLD1 quad registers are directly supported. 3037 } else if (NumVecs == 2) { 3038 const SDValue OpsA[] = {MemAddr, Align, Pred, Reg0, Chain}; 3039 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, ResTy, 3040 MVT::Other, OpsA); 3041 Chain = SDValue(VLdA, 1); 3042 } else { 3043 SDValue ImplDef = SDValue( 3044 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0); 3045 const SDValue OpsA[] = {MemAddr, Align, ImplDef, Pred, Reg0, Chain}; 3046 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, ResTy, 3047 MVT::Other, OpsA); 3048 Ops.push_back(SDValue(VLdA, 0)); 3049 Chain = SDValue(VLdA, 1); 3050 } 3051 3052 Ops.push_back(Pred); 3053 Ops.push_back(Reg0); 3054 Ops.push_back(Chain); 3055 3056 SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 3057 3058 // Transfer memoperands. 3059 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 3060 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdDup), {MemOp}); 3061 3062 // Extract the subregisters. 3063 if (NumVecs == 1) { 3064 ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0)); 3065 } else { 3066 SDValue SuperReg = SDValue(VLdDup, 0); 3067 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering"); 3068 unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0; 3069 for (unsigned Vec = 0; Vec != NumVecs; ++Vec) { 3070 ReplaceUses(SDValue(N, Vec), 3071 CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg)); 3072 } 3073 } 3074 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1)); 3075 if (isUpdating) 3076 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2)); 3077 CurDAG->RemoveDeadNode(N); 3078 } 3079 3080 bool ARMDAGToDAGISel::tryInsertVectorElt(SDNode *N) { 3081 if (!Subtarget->hasMVEIntegerOps()) 3082 return false; 3083 3084 SDLoc dl(N); 3085 3086 // We are trying to use VMOV/VMOVX/VINS to more efficiently lower insert and 3087 // extracts of v8f16 and v8i16 vectors. Check that we have two adjacent 3088 // inserts of the correct type: 3089 SDValue Ins1 = SDValue(N, 0); 3090 SDValue Ins2 = N->getOperand(0); 3091 EVT VT = Ins1.getValueType(); 3092 if (Ins2.getOpcode() != ISD::INSERT_VECTOR_ELT || !Ins2.hasOneUse() || 3093 !isa<ConstantSDNode>(Ins1.getOperand(2)) || 3094 !isa<ConstantSDNode>(Ins2.getOperand(2)) || 3095 (VT != MVT::v8f16 && VT != MVT::v8i16) || (Ins2.getValueType() != VT)) 3096 return false; 3097 3098 unsigned Lane1 = Ins1.getConstantOperandVal(2); 3099 unsigned Lane2 = Ins2.getConstantOperandVal(2); 3100 if (Lane2 % 2 != 0 || Lane1 != Lane2 + 1) 3101 return false; 3102 3103 // If the inserted values will be able to use T/B already, leave it to the 3104 // existing tablegen patterns. For example VCVTT/VCVTB. 3105 SDValue Val1 = Ins1.getOperand(1); 3106 SDValue Val2 = Ins2.getOperand(1); 3107 if (Val1.getOpcode() == ISD::FP_ROUND || Val2.getOpcode() == ISD::FP_ROUND) 3108 return false; 3109 3110 // Check if the inserted values are both extracts. 3111 if ((Val1.getOpcode() == ISD::EXTRACT_VECTOR_ELT || 3112 Val1.getOpcode() == ARMISD::VGETLANEu) && 3113 (Val2.getOpcode() == ISD::EXTRACT_VECTOR_ELT || 3114 Val2.getOpcode() == ARMISD::VGETLANEu) && 3115 isa<ConstantSDNode>(Val1.getOperand(1)) && 3116 isa<ConstantSDNode>(Val2.getOperand(1)) && 3117 (Val1.getOperand(0).getValueType() == MVT::v8f16 || 3118 Val1.getOperand(0).getValueType() == MVT::v8i16) && 3119 (Val2.getOperand(0).getValueType() == MVT::v8f16 || 3120 Val2.getOperand(0).getValueType() == MVT::v8i16)) { 3121 unsigned ExtractLane1 = Val1.getConstantOperandVal(1); 3122 unsigned ExtractLane2 = Val2.getConstantOperandVal(1); 3123 3124 // If the two extracted lanes are from the same place and adjacent, this 3125 // simplifies into a f32 lane move. 3126 if (Val1.getOperand(0) == Val2.getOperand(0) && ExtractLane2 % 2 == 0 && 3127 ExtractLane1 == ExtractLane2 + 1) { 3128 SDValue NewExt = CurDAG->getTargetExtractSubreg( 3129 ARM::ssub_0 + ExtractLane2 / 2, dl, MVT::f32, Val1.getOperand(0)); 3130 SDValue NewIns = CurDAG->getTargetInsertSubreg( 3131 ARM::ssub_0 + Lane2 / 2, dl, VT, Ins2.getOperand(0), 3132 NewExt); 3133 ReplaceUses(Ins1, NewIns); 3134 return true; 3135 } 3136 3137 // Else v8i16 pattern of an extract and an insert, with a optional vmovx for 3138 // extracting odd lanes. 3139 if (VT == MVT::v8i16 && Subtarget->hasFullFP16()) { 3140 SDValue Inp1 = CurDAG->getTargetExtractSubreg( 3141 ARM::ssub_0 + ExtractLane1 / 2, dl, MVT::f32, Val1.getOperand(0)); 3142 SDValue Inp2 = CurDAG->getTargetExtractSubreg( 3143 ARM::ssub_0 + ExtractLane2 / 2, dl, MVT::f32, Val2.getOperand(0)); 3144 if (ExtractLane1 % 2 != 0) 3145 Inp1 = SDValue(CurDAG->getMachineNode(ARM::VMOVH, dl, MVT::f32, Inp1), 0); 3146 if (ExtractLane2 % 2 != 0) 3147 Inp2 = SDValue(CurDAG->getMachineNode(ARM::VMOVH, dl, MVT::f32, Inp2), 0); 3148 SDNode *VINS = CurDAG->getMachineNode(ARM::VINSH, dl, MVT::f32, Inp2, Inp1); 3149 SDValue NewIns = 3150 CurDAG->getTargetInsertSubreg(ARM::ssub_0 + Lane2 / 2, dl, MVT::v4f32, 3151 Ins2.getOperand(0), SDValue(VINS, 0)); 3152 ReplaceUses(Ins1, NewIns); 3153 return true; 3154 } 3155 } 3156 3157 // The inserted values are not extracted - if they are f16 then insert them 3158 // directly using a VINS. 3159 if (VT == MVT::v8f16 && Subtarget->hasFullFP16()) { 3160 SDNode *VINS = CurDAG->getMachineNode(ARM::VINSH, dl, MVT::f32, Val2, Val1); 3161 SDValue NewIns = 3162 CurDAG->getTargetInsertSubreg(ARM::ssub_0 + Lane2 / 2, dl, MVT::v4f32, 3163 Ins2.getOperand(0), SDValue(VINS, 0)); 3164 ReplaceUses(Ins1, NewIns); 3165 return true; 3166 } 3167 3168 return false; 3169 } 3170 3171 bool ARMDAGToDAGISel::transformFixedFloatingPointConversion(SDNode *N, 3172 SDNode *FMul, 3173 bool IsUnsigned, 3174 bool FixedToFloat) { 3175 auto Type = N->getValueType(0); 3176 unsigned ScalarBits = Type.getScalarSizeInBits(); 3177 if (ScalarBits > 32) 3178 return false; 3179 3180 SDNodeFlags FMulFlags = FMul->getFlags(); 3181 // The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is 3182 // allowed in 16 bit unsigned floats 3183 if (ScalarBits == 16 && !FMulFlags.hasNoInfs() && IsUnsigned) 3184 return false; 3185 3186 SDValue ImmNode = FMul->getOperand(1); 3187 SDValue VecVal = FMul->getOperand(0); 3188 if (VecVal->getOpcode() == ISD::UINT_TO_FP || 3189 VecVal->getOpcode() == ISD::SINT_TO_FP) 3190 VecVal = VecVal->getOperand(0); 3191 3192 if (VecVal.getValueType().getScalarSizeInBits() != ScalarBits) 3193 return false; 3194 3195 if (ImmNode.getOpcode() == ISD::BITCAST) { 3196 if (ImmNode.getValueType().getScalarSizeInBits() != ScalarBits) 3197 return false; 3198 ImmNode = ImmNode.getOperand(0); 3199 } 3200 3201 if (ImmNode.getValueType().getScalarSizeInBits() != ScalarBits) 3202 return false; 3203 3204 APFloat ImmAPF(0.0f); 3205 switch (ImmNode.getOpcode()) { 3206 case ARMISD::VMOVIMM: 3207 case ARMISD::VDUP: { 3208 if (!isa<ConstantSDNode>(ImmNode.getOperand(0))) 3209 return false; 3210 unsigned Imm = ImmNode.getConstantOperandVal(0); 3211 if (ImmNode.getOpcode() == ARMISD::VMOVIMM) 3212 Imm = ARM_AM::decodeVMOVModImm(Imm, ScalarBits); 3213 ImmAPF = 3214 APFloat(ScalarBits == 32 ? APFloat::IEEEsingle() : APFloat::IEEEhalf(), 3215 APInt(ScalarBits, Imm)); 3216 break; 3217 } 3218 case ARMISD::VMOVFPIMM: { 3219 ImmAPF = APFloat(ARM_AM::getFPImmFloat(ImmNode.getConstantOperandVal(0))); 3220 break; 3221 } 3222 default: 3223 return false; 3224 } 3225 3226 // Where n is the number of fractional bits, multiplying by 2^n will convert 3227 // from float to fixed and multiplying by 2^-n will convert from fixed to 3228 // float. Taking log2 of the factor (after taking the inverse in the case of 3229 // float to fixed) will give n. 3230 APFloat ToConvert = ImmAPF; 3231 if (FixedToFloat) { 3232 if (!ImmAPF.getExactInverse(&ToConvert)) 3233 return false; 3234 } 3235 APSInt Converted(64, false); 3236 bool IsExact; 3237 ToConvert.convertToInteger(Converted, llvm::RoundingMode::NearestTiesToEven, 3238 &IsExact); 3239 if (!IsExact || !Converted.isPowerOf2()) 3240 return false; 3241 3242 unsigned FracBits = Converted.logBase2(); 3243 if (FracBits > ScalarBits) 3244 return false; 3245 3246 SmallVector<SDValue, 3> Ops{ 3247 VecVal, CurDAG->getConstant(FracBits, SDLoc(N), MVT::i32)}; 3248 AddEmptyMVEPredicateToOps(Ops, SDLoc(N), Type); 3249 3250 unsigned int Opcode; 3251 switch (ScalarBits) { 3252 case 16: 3253 if (FixedToFloat) 3254 Opcode = IsUnsigned ? ARM::MVE_VCVTf16u16_fix : ARM::MVE_VCVTf16s16_fix; 3255 else 3256 Opcode = IsUnsigned ? ARM::MVE_VCVTu16f16_fix : ARM::MVE_VCVTs16f16_fix; 3257 break; 3258 case 32: 3259 if (FixedToFloat) 3260 Opcode = IsUnsigned ? ARM::MVE_VCVTf32u32_fix : ARM::MVE_VCVTf32s32_fix; 3261 else 3262 Opcode = IsUnsigned ? ARM::MVE_VCVTu32f32_fix : ARM::MVE_VCVTs32f32_fix; 3263 break; 3264 default: 3265 llvm_unreachable("unexpected number of scalar bits"); 3266 break; 3267 } 3268 3269 ReplaceNode(N, CurDAG->getMachineNode(Opcode, SDLoc(N), Type, Ops)); 3270 return true; 3271 } 3272 3273 bool ARMDAGToDAGISel::tryFP_TO_INT(SDNode *N, SDLoc dl) { 3274 // Transform a floating-point to fixed-point conversion to a VCVT 3275 if (!Subtarget->hasMVEFloatOps()) 3276 return false; 3277 EVT Type = N->getValueType(0); 3278 if (!Type.isVector()) 3279 return false; 3280 unsigned int ScalarBits = Type.getScalarSizeInBits(); 3281 3282 bool IsUnsigned = N->getOpcode() == ISD::FP_TO_UINT || 3283 N->getOpcode() == ISD::FP_TO_UINT_SAT; 3284 SDNode *Node = N->getOperand(0).getNode(); 3285 3286 // floating-point to fixed-point with one fractional bit gets turned into an 3287 // FP_TO_[U|S]INT(FADD (x, x)) rather than an FP_TO_[U|S]INT(FMUL (x, y)) 3288 if (Node->getOpcode() == ISD::FADD) { 3289 if (Node->getOperand(0) != Node->getOperand(1)) 3290 return false; 3291 SDNodeFlags Flags = Node->getFlags(); 3292 // The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is 3293 // allowed in 16 bit unsigned floats 3294 if (ScalarBits == 16 && !Flags.hasNoInfs() && IsUnsigned) 3295 return false; 3296 3297 unsigned Opcode; 3298 switch (ScalarBits) { 3299 case 16: 3300 Opcode = IsUnsigned ? ARM::MVE_VCVTu16f16_fix : ARM::MVE_VCVTs16f16_fix; 3301 break; 3302 case 32: 3303 Opcode = IsUnsigned ? ARM::MVE_VCVTu32f32_fix : ARM::MVE_VCVTs32f32_fix; 3304 break; 3305 } 3306 SmallVector<SDValue, 3> Ops{Node->getOperand(0), 3307 CurDAG->getConstant(1, dl, MVT::i32)}; 3308 AddEmptyMVEPredicateToOps(Ops, dl, Type); 3309 3310 ReplaceNode(N, CurDAG->getMachineNode(Opcode, dl, Type, Ops)); 3311 return true; 3312 } 3313 3314 if (Node->getOpcode() != ISD::FMUL) 3315 return false; 3316 3317 return transformFixedFloatingPointConversion(N, Node, IsUnsigned, false); 3318 } 3319 3320 bool ARMDAGToDAGISel::tryFMULFixed(SDNode *N, SDLoc dl) { 3321 // Transform a fixed-point to floating-point conversion to a VCVT 3322 if (!Subtarget->hasMVEFloatOps()) 3323 return false; 3324 auto Type = N->getValueType(0); 3325 if (!Type.isVector()) 3326 return false; 3327 3328 auto LHS = N->getOperand(0); 3329 if (LHS.getOpcode() != ISD::SINT_TO_FP && LHS.getOpcode() != ISD::UINT_TO_FP) 3330 return false; 3331 3332 return transformFixedFloatingPointConversion( 3333 N, N, LHS.getOpcode() == ISD::UINT_TO_FP, true); 3334 } 3335 3336 bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) { 3337 if (!Subtarget->hasV6T2Ops()) 3338 return false; 3339 3340 unsigned Opc = isSigned 3341 ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX) 3342 : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX); 3343 SDLoc dl(N); 3344 3345 // For unsigned extracts, check for a shift right and mask 3346 unsigned And_imm = 0; 3347 if (N->getOpcode() == ISD::AND) { 3348 if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) { 3349 3350 // The immediate is a mask of the low bits iff imm & (imm+1) == 0 3351 if (And_imm & (And_imm + 1)) 3352 return false; 3353 3354 unsigned Srl_imm = 0; 3355 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, 3356 Srl_imm)) { 3357 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 3358 3359 // Mask off the unnecessary bits of the AND immediate; normally 3360 // DAGCombine will do this, but that might not happen if 3361 // targetShrinkDemandedConstant chooses a different immediate. 3362 And_imm &= -1U >> Srl_imm; 3363 3364 // Note: The width operand is encoded as width-1. 3365 unsigned Width = llvm::countr_one(And_imm) - 1; 3366 unsigned LSB = Srl_imm; 3367 3368 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3369 3370 if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) { 3371 // It's cheaper to use a right shift to extract the top bits. 3372 if (Subtarget->isThumb()) { 3373 Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri; 3374 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3375 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 3376 getAL(CurDAG, dl), Reg0, Reg0 }; 3377 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3378 return true; 3379 } 3380 3381 // ARM models shift instructions as MOVsi with shifter operand. 3382 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL); 3383 SDValue ShOpc = 3384 CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl, 3385 MVT::i32); 3386 SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc, 3387 getAL(CurDAG, dl), Reg0, Reg0 }; 3388 CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops); 3389 return true; 3390 } 3391 3392 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx"); 3393 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3394 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 3395 CurDAG->getTargetConstant(Width, dl, MVT::i32), 3396 getAL(CurDAG, dl), Reg0 }; 3397 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3398 return true; 3399 } 3400 } 3401 return false; 3402 } 3403 3404 // Otherwise, we're looking for a shift of a shift 3405 unsigned Shl_imm = 0; 3406 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) { 3407 assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!"); 3408 unsigned Srl_imm = 0; 3409 if (isInt32Immediate(N->getOperand(1), Srl_imm)) { 3410 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 3411 // Note: The width operand is encoded as width-1. 3412 unsigned Width = 32 - Srl_imm - 1; 3413 int LSB = Srl_imm - Shl_imm; 3414 if (LSB < 0) 3415 return false; 3416 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3417 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx"); 3418 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3419 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 3420 CurDAG->getTargetConstant(Width, dl, MVT::i32), 3421 getAL(CurDAG, dl), Reg0 }; 3422 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3423 return true; 3424 } 3425 } 3426 3427 // Or we are looking for a shift of an and, with a mask operand 3428 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) && 3429 isShiftedMask_32(And_imm)) { 3430 unsigned Srl_imm = 0; 3431 unsigned LSB = llvm::countr_zero(And_imm); 3432 // Shift must be the same as the ands lsb 3433 if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) { 3434 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 3435 unsigned MSB = llvm::Log2_32(And_imm); 3436 // Note: The width operand is encoded as width-1. 3437 unsigned Width = MSB - LSB; 3438 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3439 assert(Srl_imm + Width + 1 <= 32 && "Shouldn't create an invalid ubfx"); 3440 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3441 CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32), 3442 CurDAG->getTargetConstant(Width, dl, MVT::i32), 3443 getAL(CurDAG, dl), Reg0 }; 3444 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3445 return true; 3446 } 3447 } 3448 3449 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) { 3450 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits(); 3451 unsigned LSB = 0; 3452 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) && 3453 !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB)) 3454 return false; 3455 3456 if (LSB + Width > 32) 3457 return false; 3458 3459 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3460 assert(LSB + Width <= 32 && "Shouldn't create an invalid ubfx"); 3461 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3462 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 3463 CurDAG->getTargetConstant(Width - 1, dl, MVT::i32), 3464 getAL(CurDAG, dl), Reg0 }; 3465 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3466 return true; 3467 } 3468 3469 return false; 3470 } 3471 3472 /// Target-specific DAG combining for ISD::SUB. 3473 /// Target-independent combining lowers SELECT_CC nodes of the form 3474 /// select_cc setg[ge] X, 0, X, -X 3475 /// select_cc setgt X, -1, X, -X 3476 /// select_cc setl[te] X, 0, -X, X 3477 /// select_cc setlt X, 1, -X, X 3478 /// which represent Integer ABS into: 3479 /// Y = sra (X, size(X)-1); sub (xor (X, Y), Y) 3480 /// ARM instruction selection detects the latter and matches it to 3481 /// ARM::ABS or ARM::t2ABS machine node. 3482 bool ARMDAGToDAGISel::tryABSOp(SDNode *N){ 3483 SDValue SUBSrc0 = N->getOperand(0); 3484 SDValue SUBSrc1 = N->getOperand(1); 3485 EVT VT = N->getValueType(0); 3486 3487 if (Subtarget->isThumb1Only()) 3488 return false; 3489 3490 if (SUBSrc0.getOpcode() != ISD::XOR || SUBSrc1.getOpcode() != ISD::SRA) 3491 return false; 3492 3493 SDValue XORSrc0 = SUBSrc0.getOperand(0); 3494 SDValue XORSrc1 = SUBSrc0.getOperand(1); 3495 SDValue SRASrc0 = SUBSrc1.getOperand(0); 3496 SDValue SRASrc1 = SUBSrc1.getOperand(1); 3497 ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(SRASrc1); 3498 EVT XType = SRASrc0.getValueType(); 3499 unsigned Size = XType.getSizeInBits() - 1; 3500 3501 if (XORSrc1 == SUBSrc1 && XORSrc0 == SRASrc0 && XType.isInteger() && 3502 SRAConstant != nullptr && Size == SRAConstant->getZExtValue()) { 3503 unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS; 3504 CurDAG->SelectNodeTo(N, Opcode, VT, XORSrc0); 3505 return true; 3506 } 3507 3508 return false; 3509 } 3510 3511 /// We've got special pseudo-instructions for these 3512 void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) { 3513 unsigned Opcode; 3514 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT(); 3515 if (MemTy == MVT::i8) 3516 Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_8 : ARM::CMP_SWAP_8; 3517 else if (MemTy == MVT::i16) 3518 Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_16 : ARM::CMP_SWAP_16; 3519 else if (MemTy == MVT::i32) 3520 Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_32 : ARM::CMP_SWAP_32; 3521 else 3522 llvm_unreachable("Unknown AtomicCmpSwap type"); 3523 3524 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3), 3525 N->getOperand(0)}; 3526 SDNode *CmpSwap = CurDAG->getMachineNode( 3527 Opcode, SDLoc(N), 3528 CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops); 3529 3530 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); 3531 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp}); 3532 3533 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0)); 3534 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2)); 3535 CurDAG->RemoveDeadNode(N); 3536 } 3537 3538 static std::optional<std::pair<unsigned, unsigned>> 3539 getContiguousRangeOfSetBits(const APInt &A) { 3540 unsigned FirstOne = A.getBitWidth() - A.countl_zero() - 1; 3541 unsigned LastOne = A.countr_zero(); 3542 if (A.popcount() != (FirstOne - LastOne + 1)) 3543 return std::nullopt; 3544 return std::make_pair(FirstOne, LastOne); 3545 } 3546 3547 void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) { 3548 assert(N->getOpcode() == ARMISD::CMPZ); 3549 SwitchEQNEToPLMI = false; 3550 3551 if (!Subtarget->isThumb()) 3552 // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and 3553 // LSR don't exist as standalone instructions - they need the barrel shifter. 3554 return; 3555 3556 // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X)) 3557 SDValue And = N->getOperand(0); 3558 if (!And->hasOneUse()) 3559 return; 3560 3561 SDValue Zero = N->getOperand(1); 3562 if (!isa<ConstantSDNode>(Zero) || !cast<ConstantSDNode>(Zero)->isZero() || 3563 And->getOpcode() != ISD::AND) 3564 return; 3565 SDValue X = And.getOperand(0); 3566 auto C = dyn_cast<ConstantSDNode>(And.getOperand(1)); 3567 3568 if (!C) 3569 return; 3570 auto Range = getContiguousRangeOfSetBits(C->getAPIntValue()); 3571 if (!Range) 3572 return; 3573 3574 // There are several ways to lower this: 3575 SDNode *NewN; 3576 SDLoc dl(N); 3577 3578 auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* { 3579 if (Subtarget->isThumb2()) { 3580 Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri; 3581 SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32), 3582 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 3583 CurDAG->getRegister(0, MVT::i32) }; 3584 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops); 3585 } else { 3586 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src, 3587 CurDAG->getTargetConstant(Imm, dl, MVT::i32), 3588 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)}; 3589 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops); 3590 } 3591 }; 3592 3593 if (Range->second == 0) { 3594 // 1. Mask includes the LSB -> Simply shift the top N bits off 3595 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first); 3596 ReplaceNode(And.getNode(), NewN); 3597 } else if (Range->first == 31) { 3598 // 2. Mask includes the MSB -> Simply shift the bottom N bits off 3599 NewN = EmitShift(ARM::tLSRri, X, Range->second); 3600 ReplaceNode(And.getNode(), NewN); 3601 } else if (Range->first == Range->second) { 3602 // 3. Only one bit is set. We can shift this into the sign bit and use a 3603 // PL/MI comparison. 3604 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first); 3605 ReplaceNode(And.getNode(), NewN); 3606 3607 SwitchEQNEToPLMI = true; 3608 } else if (!Subtarget->hasV6T2Ops()) { 3609 // 4. Do a double shift to clear bottom and top bits, but only in 3610 // thumb-1 mode as in thumb-2 we can use UBFX. 3611 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first); 3612 NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0), 3613 Range->second + (31 - Range->first)); 3614 ReplaceNode(And.getNode(), NewN); 3615 } 3616 } 3617 3618 static unsigned getVectorShuffleOpcode(EVT VT, unsigned Opc64[3], 3619 unsigned Opc128[3]) { 3620 assert((VT.is64BitVector() || VT.is128BitVector()) && 3621 "Unexpected vector shuffle length"); 3622 switch (VT.getScalarSizeInBits()) { 3623 default: 3624 llvm_unreachable("Unexpected vector shuffle element size"); 3625 case 8: 3626 return VT.is64BitVector() ? Opc64[0] : Opc128[0]; 3627 case 16: 3628 return VT.is64BitVector() ? Opc64[1] : Opc128[1]; 3629 case 32: 3630 return VT.is64BitVector() ? Opc64[2] : Opc128[2]; 3631 } 3632 } 3633 3634 void ARMDAGToDAGISel::Select(SDNode *N) { 3635 SDLoc dl(N); 3636 3637 if (N->isMachineOpcode()) { 3638 N->setNodeId(-1); 3639 return; // Already selected. 3640 } 3641 3642 switch (N->getOpcode()) { 3643 default: break; 3644 case ISD::STORE: { 3645 // For Thumb1, match an sp-relative store in C++. This is a little 3646 // unfortunate, but I don't think I can make the chain check work 3647 // otherwise. (The chain of the store has to be the same as the chain 3648 // of the CopyFromReg, or else we can't replace the CopyFromReg with 3649 // a direct reference to "SP".) 3650 // 3651 // This is only necessary on Thumb1 because Thumb1 sp-relative stores use 3652 // a different addressing mode from other four-byte stores. 3653 // 3654 // This pattern usually comes up with call arguments. 3655 StoreSDNode *ST = cast<StoreSDNode>(N); 3656 SDValue Ptr = ST->getBasePtr(); 3657 if (Subtarget->isThumb1Only() && ST->isUnindexed()) { 3658 int RHSC = 0; 3659 if (Ptr.getOpcode() == ISD::ADD && 3660 isScaledConstantInRange(Ptr.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) 3661 Ptr = Ptr.getOperand(0); 3662 3663 if (Ptr.getOpcode() == ISD::CopyFromReg && 3664 cast<RegisterSDNode>(Ptr.getOperand(1))->getReg() == ARM::SP && 3665 Ptr.getOperand(0) == ST->getChain()) { 3666 SDValue Ops[] = {ST->getValue(), 3667 CurDAG->getRegister(ARM::SP, MVT::i32), 3668 CurDAG->getTargetConstant(RHSC, dl, MVT::i32), 3669 getAL(CurDAG, dl), 3670 CurDAG->getRegister(0, MVT::i32), 3671 ST->getChain()}; 3672 MachineSDNode *ResNode = 3673 CurDAG->getMachineNode(ARM::tSTRspi, dl, MVT::Other, Ops); 3674 MachineMemOperand *MemOp = ST->getMemOperand(); 3675 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp}); 3676 ReplaceNode(N, ResNode); 3677 return; 3678 } 3679 } 3680 break; 3681 } 3682 case ISD::WRITE_REGISTER: 3683 if (tryWriteRegister(N)) 3684 return; 3685 break; 3686 case ISD::READ_REGISTER: 3687 if (tryReadRegister(N)) 3688 return; 3689 break; 3690 case ISD::INLINEASM: 3691 case ISD::INLINEASM_BR: 3692 if (tryInlineAsm(N)) 3693 return; 3694 break; 3695 case ISD::SUB: 3696 // Select special operations if SUB node forms integer ABS pattern 3697 if (tryABSOp(N)) 3698 return; 3699 // Other cases are autogenerated. 3700 break; 3701 case ISD::Constant: { 3702 unsigned Val = cast<ConstantSDNode>(N)->getZExtValue(); 3703 // If we can't materialize the constant we need to use a literal pool 3704 if (ConstantMaterializationCost(Val, Subtarget) > 2 && 3705 !Subtarget->genExecuteOnly()) { 3706 SDValue CPIdx = CurDAG->getTargetConstantPool( 3707 ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val), 3708 TLI->getPointerTy(CurDAG->getDataLayout())); 3709 3710 SDNode *ResNode; 3711 if (Subtarget->isThumb()) { 3712 SDValue Ops[] = { 3713 CPIdx, 3714 getAL(CurDAG, dl), 3715 CurDAG->getRegister(0, MVT::i32), 3716 CurDAG->getEntryNode() 3717 }; 3718 ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other, 3719 Ops); 3720 } else { 3721 SDValue Ops[] = { 3722 CPIdx, 3723 CurDAG->getTargetConstant(0, dl, MVT::i32), 3724 getAL(CurDAG, dl), 3725 CurDAG->getRegister(0, MVT::i32), 3726 CurDAG->getEntryNode() 3727 }; 3728 ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other, 3729 Ops); 3730 } 3731 // Annotate the Node with memory operand information so that MachineInstr 3732 // queries work properly. This e.g. gives the register allocation the 3733 // required information for rematerialization. 3734 MachineFunction& MF = CurDAG->getMachineFunction(); 3735 MachineMemOperand *MemOp = 3736 MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF), 3737 MachineMemOperand::MOLoad, 4, Align(4)); 3738 3739 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp}); 3740 3741 ReplaceNode(N, ResNode); 3742 return; 3743 } 3744 3745 // Other cases are autogenerated. 3746 break; 3747 } 3748 case ISD::FrameIndex: { 3749 // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm. 3750 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 3751 SDValue TFI = CurDAG->getTargetFrameIndex( 3752 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 3753 if (Subtarget->isThumb1Only()) { 3754 // Set the alignment of the frame object to 4, to avoid having to generate 3755 // more than one ADD 3756 MachineFrameInfo &MFI = MF->getFrameInfo(); 3757 if (MFI.getObjectAlign(FI) < Align(4)) 3758 MFI.setObjectAlignment(FI, Align(4)); 3759 CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI, 3760 CurDAG->getTargetConstant(0, dl, MVT::i32)); 3761 return; 3762 } else { 3763 unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ? 3764 ARM::t2ADDri : ARM::ADDri); 3765 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32), 3766 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 3767 CurDAG->getRegister(0, MVT::i32) }; 3768 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3769 return; 3770 } 3771 } 3772 case ISD::INSERT_VECTOR_ELT: { 3773 if (tryInsertVectorElt(N)) 3774 return; 3775 break; 3776 } 3777 case ISD::SRL: 3778 if (tryV6T2BitfieldExtractOp(N, false)) 3779 return; 3780 break; 3781 case ISD::SIGN_EXTEND_INREG: 3782 case ISD::SRA: 3783 if (tryV6T2BitfieldExtractOp(N, true)) 3784 return; 3785 break; 3786 case ISD::FP_TO_UINT: 3787 case ISD::FP_TO_SINT: 3788 case ISD::FP_TO_UINT_SAT: 3789 case ISD::FP_TO_SINT_SAT: 3790 if (tryFP_TO_INT(N, dl)) 3791 return; 3792 break; 3793 case ISD::FMUL: 3794 if (tryFMULFixed(N, dl)) 3795 return; 3796 break; 3797 case ISD::MUL: 3798 if (Subtarget->isThumb1Only()) 3799 break; 3800 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) { 3801 unsigned RHSV = C->getZExtValue(); 3802 if (!RHSV) break; 3803 if (isPowerOf2_32(RHSV-1)) { // 2^n+1? 3804 unsigned ShImm = Log2_32(RHSV-1); 3805 if (ShImm >= 32) 3806 break; 3807 SDValue V = N->getOperand(0); 3808 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm); 3809 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32); 3810 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3811 if (Subtarget->isThumb()) { 3812 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 }; 3813 CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops); 3814 return; 3815 } else { 3816 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0, 3817 Reg0 }; 3818 CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops); 3819 return; 3820 } 3821 } 3822 if (isPowerOf2_32(RHSV+1)) { // 2^n-1? 3823 unsigned ShImm = Log2_32(RHSV+1); 3824 if (ShImm >= 32) 3825 break; 3826 SDValue V = N->getOperand(0); 3827 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm); 3828 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32); 3829 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3830 if (Subtarget->isThumb()) { 3831 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 }; 3832 CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops); 3833 return; 3834 } else { 3835 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0, 3836 Reg0 }; 3837 CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops); 3838 return; 3839 } 3840 } 3841 } 3842 break; 3843 case ISD::AND: { 3844 // Check for unsigned bitfield extract 3845 if (tryV6T2BitfieldExtractOp(N, false)) 3846 return; 3847 3848 // If an immediate is used in an AND node, it is possible that the immediate 3849 // can be more optimally materialized when negated. If this is the case we 3850 // can negate the immediate and use a BIC instead. 3851 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 3852 if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) { 3853 uint32_t Imm = (uint32_t) N1C->getZExtValue(); 3854 3855 // In Thumb2 mode, an AND can take a 12-bit immediate. If this 3856 // immediate can be negated and fit in the immediate operand of 3857 // a t2BIC, don't do any manual transform here as this can be 3858 // handled by the generic ISel machinery. 3859 bool PreferImmediateEncoding = 3860 Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm)); 3861 if (!PreferImmediateEncoding && 3862 ConstantMaterializationCost(Imm, Subtarget) > 3863 ConstantMaterializationCost(~Imm, Subtarget)) { 3864 // The current immediate costs more to materialize than a negated 3865 // immediate, so negate the immediate and use a BIC. 3866 SDValue NewImm = 3867 CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32); 3868 // If the new constant didn't exist before, reposition it in the topological 3869 // ordering so it is just before N. Otherwise, don't touch its location. 3870 if (NewImm->getNodeId() == -1) 3871 CurDAG->RepositionNode(N->getIterator(), NewImm.getNode()); 3872 3873 if (!Subtarget->hasThumb2()) { 3874 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), 3875 N->getOperand(0), NewImm, getAL(CurDAG, dl), 3876 CurDAG->getRegister(0, MVT::i32)}; 3877 ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops)); 3878 return; 3879 } else { 3880 SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl), 3881 CurDAG->getRegister(0, MVT::i32), 3882 CurDAG->getRegister(0, MVT::i32)}; 3883 ReplaceNode(N, 3884 CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops)); 3885 return; 3886 } 3887 } 3888 } 3889 3890 // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits 3891 // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits 3892 // are entirely contributed by c2 and lower 16-bits are entirely contributed 3893 // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)). 3894 // Select it to: "movt x, ((c1 & 0xffff) >> 16) 3895 EVT VT = N->getValueType(0); 3896 if (VT != MVT::i32) 3897 break; 3898 unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2()) 3899 ? ARM::t2MOVTi16 3900 : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0); 3901 if (!Opc) 3902 break; 3903 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); 3904 N1C = dyn_cast<ConstantSDNode>(N1); 3905 if (!N1C) 3906 break; 3907 if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) { 3908 SDValue N2 = N0.getOperand(1); 3909 ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2); 3910 if (!N2C) 3911 break; 3912 unsigned N1CVal = N1C->getZExtValue(); 3913 unsigned N2CVal = N2C->getZExtValue(); 3914 if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) && 3915 (N1CVal & 0xffffU) == 0xffffU && 3916 (N2CVal & 0xffffU) == 0x0U) { 3917 SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16, 3918 dl, MVT::i32); 3919 SDValue Ops[] = { N0.getOperand(0), Imm16, 3920 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) }; 3921 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops)); 3922 return; 3923 } 3924 } 3925 3926 break; 3927 } 3928 case ARMISD::UMAAL: { 3929 unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL; 3930 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), 3931 N->getOperand(2), N->getOperand(3), 3932 getAL(CurDAG, dl), 3933 CurDAG->getRegister(0, MVT::i32) }; 3934 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops)); 3935 return; 3936 } 3937 case ARMISD::UMLAL:{ 3938 if (Subtarget->isThumb()) { 3939 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3940 N->getOperand(3), getAL(CurDAG, dl), 3941 CurDAG->getRegister(0, MVT::i32)}; 3942 ReplaceNode( 3943 N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops)); 3944 return; 3945 }else{ 3946 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3947 N->getOperand(3), getAL(CurDAG, dl), 3948 CurDAG->getRegister(0, MVT::i32), 3949 CurDAG->getRegister(0, MVT::i32) }; 3950 ReplaceNode(N, CurDAG->getMachineNode( 3951 Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl, 3952 MVT::i32, MVT::i32, Ops)); 3953 return; 3954 } 3955 } 3956 case ARMISD::SMLAL:{ 3957 if (Subtarget->isThumb()) { 3958 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3959 N->getOperand(3), getAL(CurDAG, dl), 3960 CurDAG->getRegister(0, MVT::i32)}; 3961 ReplaceNode( 3962 N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops)); 3963 return; 3964 }else{ 3965 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3966 N->getOperand(3), getAL(CurDAG, dl), 3967 CurDAG->getRegister(0, MVT::i32), 3968 CurDAG->getRegister(0, MVT::i32) }; 3969 ReplaceNode(N, CurDAG->getMachineNode( 3970 Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl, 3971 MVT::i32, MVT::i32, Ops)); 3972 return; 3973 } 3974 } 3975 case ARMISD::SUBE: { 3976 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP()) 3977 break; 3978 // Look for a pattern to match SMMLS 3979 // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b)))) 3980 if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI || 3981 N->getOperand(2).getOpcode() != ARMISD::SUBC || 3982 !SDValue(N, 1).use_empty()) 3983 break; 3984 3985 if (Subtarget->isThumb()) 3986 assert(Subtarget->hasThumb2() && 3987 "This pattern should not be generated for Thumb"); 3988 3989 SDValue SmulLoHi = N->getOperand(1); 3990 SDValue Subc = N->getOperand(2); 3991 SDValue Zero = Subc.getOperand(0); 3992 3993 if (!isNullConstant(Zero) || Subc.getOperand(1) != SmulLoHi.getValue(0) || 3994 N->getOperand(1) != SmulLoHi.getValue(1) || 3995 N->getOperand(2) != Subc.getValue(1)) 3996 break; 3997 3998 unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS; 3999 SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1), 4000 N->getOperand(0), getAL(CurDAG, dl), 4001 CurDAG->getRegister(0, MVT::i32) }; 4002 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops)); 4003 return; 4004 } 4005 case ISD::LOAD: { 4006 if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N)) 4007 return; 4008 if (Subtarget->isThumb() && Subtarget->hasThumb2()) { 4009 if (tryT2IndexedLoad(N)) 4010 return; 4011 } else if (Subtarget->isThumb()) { 4012 if (tryT1IndexedLoad(N)) 4013 return; 4014 } else if (tryARMIndexedLoad(N)) 4015 return; 4016 // Other cases are autogenerated. 4017 break; 4018 } 4019 case ISD::MLOAD: 4020 if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N)) 4021 return; 4022 // Other cases are autogenerated. 4023 break; 4024 case ARMISD::WLSSETUP: { 4025 SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopSetup, dl, MVT::i32, 4026 N->getOperand(0)); 4027 ReplaceUses(N, New); 4028 CurDAG->RemoveDeadNode(N); 4029 return; 4030 } 4031 case ARMISD::WLS: { 4032 SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopStart, dl, MVT::Other, 4033 N->getOperand(1), N->getOperand(2), 4034 N->getOperand(0)); 4035 ReplaceUses(N, New); 4036 CurDAG->RemoveDeadNode(N); 4037 return; 4038 } 4039 case ARMISD::LE: { 4040 SDValue Ops[] = { N->getOperand(1), 4041 N->getOperand(2), 4042 N->getOperand(0) }; 4043 unsigned Opc = ARM::t2LoopEnd; 4044 SDNode *New = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops); 4045 ReplaceUses(N, New); 4046 CurDAG->RemoveDeadNode(N); 4047 return; 4048 } 4049 case ARMISD::LDRD: { 4050 if (Subtarget->isThumb2()) 4051 break; // TableGen handles isel in this case. 4052 SDValue Base, RegOffset, ImmOffset; 4053 const SDValue &Chain = N->getOperand(0); 4054 const SDValue &Addr = N->getOperand(1); 4055 SelectAddrMode3(Addr, Base, RegOffset, ImmOffset); 4056 if (RegOffset != CurDAG->getRegister(0, MVT::i32)) { 4057 // The register-offset variant of LDRD mandates that the register 4058 // allocated to RegOffset is not reused in any of the remaining operands. 4059 // This restriction is currently not enforced. Therefore emitting this 4060 // variant is explicitly avoided. 4061 Base = Addr; 4062 RegOffset = CurDAG->getRegister(0, MVT::i32); 4063 } 4064 SDValue Ops[] = {Base, RegOffset, ImmOffset, Chain}; 4065 SDNode *New = CurDAG->getMachineNode(ARM::LOADDUAL, dl, 4066 {MVT::Untyped, MVT::Other}, Ops); 4067 SDValue Lo = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32, 4068 SDValue(New, 0)); 4069 SDValue Hi = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32, 4070 SDValue(New, 0)); 4071 transferMemOperands(N, New); 4072 ReplaceUses(SDValue(N, 0), Lo); 4073 ReplaceUses(SDValue(N, 1), Hi); 4074 ReplaceUses(SDValue(N, 2), SDValue(New, 1)); 4075 CurDAG->RemoveDeadNode(N); 4076 return; 4077 } 4078 case ARMISD::STRD: { 4079 if (Subtarget->isThumb2()) 4080 break; // TableGen handles isel in this case. 4081 SDValue Base, RegOffset, ImmOffset; 4082 const SDValue &Chain = N->getOperand(0); 4083 const SDValue &Addr = N->getOperand(3); 4084 SelectAddrMode3(Addr, Base, RegOffset, ImmOffset); 4085 if (RegOffset != CurDAG->getRegister(0, MVT::i32)) { 4086 // The register-offset variant of STRD mandates that the register 4087 // allocated to RegOffset is not reused in any of the remaining operands. 4088 // This restriction is currently not enforced. Therefore emitting this 4089 // variant is explicitly avoided. 4090 Base = Addr; 4091 RegOffset = CurDAG->getRegister(0, MVT::i32); 4092 } 4093 SDNode *RegPair = 4094 createGPRPairNode(MVT::Untyped, N->getOperand(1), N->getOperand(2)); 4095 SDValue Ops[] = {SDValue(RegPair, 0), Base, RegOffset, ImmOffset, Chain}; 4096 SDNode *New = CurDAG->getMachineNode(ARM::STOREDUAL, dl, MVT::Other, Ops); 4097 transferMemOperands(N, New); 4098 ReplaceUses(SDValue(N, 0), SDValue(New, 0)); 4099 CurDAG->RemoveDeadNode(N); 4100 return; 4101 } 4102 case ARMISD::LOOP_DEC: { 4103 SDValue Ops[] = { N->getOperand(1), 4104 N->getOperand(2), 4105 N->getOperand(0) }; 4106 SDNode *Dec = 4107 CurDAG->getMachineNode(ARM::t2LoopDec, dl, 4108 CurDAG->getVTList(MVT::i32, MVT::Other), Ops); 4109 ReplaceUses(N, Dec); 4110 CurDAG->RemoveDeadNode(N); 4111 return; 4112 } 4113 case ARMISD::BRCOND: { 4114 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 4115 // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc) 4116 // Pattern complexity = 6 cost = 1 size = 0 4117 4118 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 4119 // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc) 4120 // Pattern complexity = 6 cost = 1 size = 0 4121 4122 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 4123 // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc) 4124 // Pattern complexity = 6 cost = 1 size = 0 4125 4126 unsigned Opc = Subtarget->isThumb() ? 4127 ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc; 4128 SDValue Chain = N->getOperand(0); 4129 SDValue N1 = N->getOperand(1); 4130 SDValue N2 = N->getOperand(2); 4131 SDValue N3 = N->getOperand(3); 4132 SDValue InGlue = N->getOperand(4); 4133 assert(N1.getOpcode() == ISD::BasicBlock); 4134 assert(N2.getOpcode() == ISD::Constant); 4135 assert(N3.getOpcode() == ISD::Register); 4136 4137 unsigned CC = (unsigned) cast<ConstantSDNode>(N2)->getZExtValue(); 4138 4139 if (InGlue.getOpcode() == ARMISD::CMPZ) { 4140 if (InGlue.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) { 4141 SDValue Int = InGlue.getOperand(0); 4142 uint64_t ID = cast<ConstantSDNode>(Int->getOperand(1))->getZExtValue(); 4143 4144 // Handle low-overhead loops. 4145 if (ID == Intrinsic::loop_decrement_reg) { 4146 SDValue Elements = Int.getOperand(2); 4147 SDValue Size = CurDAG->getTargetConstant( 4148 cast<ConstantSDNode>(Int.getOperand(3))->getZExtValue(), dl, 4149 MVT::i32); 4150 4151 SDValue Args[] = { Elements, Size, Int.getOperand(0) }; 4152 SDNode *LoopDec = 4153 CurDAG->getMachineNode(ARM::t2LoopDec, dl, 4154 CurDAG->getVTList(MVT::i32, MVT::Other), 4155 Args); 4156 ReplaceUses(Int.getNode(), LoopDec); 4157 4158 SDValue EndArgs[] = { SDValue(LoopDec, 0), N1, Chain }; 4159 SDNode *LoopEnd = 4160 CurDAG->getMachineNode(ARM::t2LoopEnd, dl, MVT::Other, EndArgs); 4161 4162 ReplaceUses(N, LoopEnd); 4163 CurDAG->RemoveDeadNode(N); 4164 CurDAG->RemoveDeadNode(InGlue.getNode()); 4165 CurDAG->RemoveDeadNode(Int.getNode()); 4166 return; 4167 } 4168 } 4169 4170 bool SwitchEQNEToPLMI; 4171 SelectCMPZ(InGlue.getNode(), SwitchEQNEToPLMI); 4172 InGlue = N->getOperand(4); 4173 4174 if (SwitchEQNEToPLMI) { 4175 switch ((ARMCC::CondCodes)CC) { 4176 default: llvm_unreachable("CMPZ must be either NE or EQ!"); 4177 case ARMCC::NE: 4178 CC = (unsigned)ARMCC::MI; 4179 break; 4180 case ARMCC::EQ: 4181 CC = (unsigned)ARMCC::PL; 4182 break; 4183 } 4184 } 4185 } 4186 4187 SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32); 4188 SDValue Ops[] = { N1, Tmp2, N3, Chain, InGlue }; 4189 SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, 4190 MVT::Glue, Ops); 4191 Chain = SDValue(ResNode, 0); 4192 if (N->getNumValues() == 2) { 4193 InGlue = SDValue(ResNode, 1); 4194 ReplaceUses(SDValue(N, 1), InGlue); 4195 } 4196 ReplaceUses(SDValue(N, 0), 4197 SDValue(Chain.getNode(), Chain.getResNo())); 4198 CurDAG->RemoveDeadNode(N); 4199 return; 4200 } 4201 4202 case ARMISD::CMPZ: { 4203 // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0) 4204 // This allows us to avoid materializing the expensive negative constant. 4205 // The CMPZ #0 is useless and will be peepholed away but we need to keep it 4206 // for its glue output. 4207 SDValue X = N->getOperand(0); 4208 auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode()); 4209 if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) { 4210 int64_t Addend = -C->getSExtValue(); 4211 4212 SDNode *Add = nullptr; 4213 // ADDS can be better than CMN if the immediate fits in a 4214 // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3. 4215 // Outside that range we can just use a CMN which is 32-bit but has a 4216 // 12-bit immediate range. 4217 if (Addend < 1<<8) { 4218 if (Subtarget->isThumb2()) { 4219 SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32), 4220 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 4221 CurDAG->getRegister(0, MVT::i32) }; 4222 Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops); 4223 } else { 4224 unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8; 4225 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X, 4226 CurDAG->getTargetConstant(Addend, dl, MVT::i32), 4227 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)}; 4228 Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops); 4229 } 4230 } 4231 if (Add) { 4232 SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)}; 4233 CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2); 4234 } 4235 } 4236 // Other cases are autogenerated. 4237 break; 4238 } 4239 4240 case ARMISD::CMOV: { 4241 SDValue InGlue = N->getOperand(4); 4242 4243 if (InGlue.getOpcode() == ARMISD::CMPZ) { 4244 bool SwitchEQNEToPLMI; 4245 SelectCMPZ(InGlue.getNode(), SwitchEQNEToPLMI); 4246 4247 if (SwitchEQNEToPLMI) { 4248 SDValue ARMcc = N->getOperand(2); 4249 ARMCC::CondCodes CC = 4250 (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue(); 4251 4252 switch (CC) { 4253 default: llvm_unreachable("CMPZ must be either NE or EQ!"); 4254 case ARMCC::NE: 4255 CC = ARMCC::MI; 4256 break; 4257 case ARMCC::EQ: 4258 CC = ARMCC::PL; 4259 break; 4260 } 4261 SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32); 4262 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc, 4263 N->getOperand(3), N->getOperand(4)}; 4264 CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops); 4265 } 4266 4267 } 4268 // Other cases are autogenerated. 4269 break; 4270 } 4271 case ARMISD::VZIP: { 4272 EVT VT = N->getValueType(0); 4273 // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 4274 unsigned Opc64[] = {ARM::VZIPd8, ARM::VZIPd16, ARM::VTRNd32}; 4275 unsigned Opc128[] = {ARM::VZIPq8, ARM::VZIPq16, ARM::VZIPq32}; 4276 unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128); 4277 SDValue Pred = getAL(CurDAG, dl); 4278 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 4279 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Pred, PredReg}; 4280 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops)); 4281 return; 4282 } 4283 case ARMISD::VUZP: { 4284 EVT VT = N->getValueType(0); 4285 // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 4286 unsigned Opc64[] = {ARM::VUZPd8, ARM::VUZPd16, ARM::VTRNd32}; 4287 unsigned Opc128[] = {ARM::VUZPq8, ARM::VUZPq16, ARM::VUZPq32}; 4288 unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128); 4289 SDValue Pred = getAL(CurDAG, dl); 4290 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 4291 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Pred, PredReg}; 4292 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops)); 4293 return; 4294 } 4295 case ARMISD::VTRN: { 4296 EVT VT = N->getValueType(0); 4297 unsigned Opc64[] = {ARM::VTRNd8, ARM::VTRNd16, ARM::VTRNd32}; 4298 unsigned Opc128[] = {ARM::VTRNq8, ARM::VTRNq16, ARM::VTRNq32}; 4299 unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128); 4300 SDValue Pred = getAL(CurDAG, dl); 4301 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 4302 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Pred, PredReg}; 4303 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops)); 4304 return; 4305 } 4306 case ARMISD::BUILD_VECTOR: { 4307 EVT VecVT = N->getValueType(0); 4308 EVT EltVT = VecVT.getVectorElementType(); 4309 unsigned NumElts = VecVT.getVectorNumElements(); 4310 if (EltVT == MVT::f64) { 4311 assert(NumElts == 2 && "unexpected type for BUILD_VECTOR"); 4312 ReplaceNode( 4313 N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1))); 4314 return; 4315 } 4316 assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR"); 4317 if (NumElts == 2) { 4318 ReplaceNode( 4319 N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1))); 4320 return; 4321 } 4322 assert(NumElts == 4 && "unexpected type for BUILD_VECTOR"); 4323 ReplaceNode(N, 4324 createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1), 4325 N->getOperand(2), N->getOperand(3))); 4326 return; 4327 } 4328 4329 case ARMISD::VLD1DUP: { 4330 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16, 4331 ARM::VLD1DUPd32 }; 4332 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16, 4333 ARM::VLD1DUPq32 }; 4334 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 1, DOpcodes, QOpcodes); 4335 return; 4336 } 4337 4338 case ARMISD::VLD2DUP: { 4339 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16, 4340 ARM::VLD2DUPd32 }; 4341 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 2, Opcodes); 4342 return; 4343 } 4344 4345 case ARMISD::VLD3DUP: { 4346 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo, 4347 ARM::VLD3DUPd16Pseudo, 4348 ARM::VLD3DUPd32Pseudo }; 4349 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 3, Opcodes); 4350 return; 4351 } 4352 4353 case ARMISD::VLD4DUP: { 4354 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo, 4355 ARM::VLD4DUPd16Pseudo, 4356 ARM::VLD4DUPd32Pseudo }; 4357 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 4, Opcodes); 4358 return; 4359 } 4360 4361 case ARMISD::VLD1DUP_UPD: { 4362 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed, 4363 ARM::VLD1DUPd16wb_fixed, 4364 ARM::VLD1DUPd32wb_fixed }; 4365 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed, 4366 ARM::VLD1DUPq16wb_fixed, 4367 ARM::VLD1DUPq32wb_fixed }; 4368 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 1, DOpcodes, QOpcodes); 4369 return; 4370 } 4371 4372 case ARMISD::VLD2DUP_UPD: { 4373 static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8wb_fixed, 4374 ARM::VLD2DUPd16wb_fixed, 4375 ARM::VLD2DUPd32wb_fixed, 4376 ARM::VLD1q64wb_fixed }; 4377 static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo, 4378 ARM::VLD2DUPq16EvenPseudo, 4379 ARM::VLD2DUPq32EvenPseudo }; 4380 static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudoWB_fixed, 4381 ARM::VLD2DUPq16OddPseudoWB_fixed, 4382 ARM::VLD2DUPq32OddPseudoWB_fixed }; 4383 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 2, DOpcodes, QOpcodes0, QOpcodes1); 4384 return; 4385 } 4386 4387 case ARMISD::VLD3DUP_UPD: { 4388 static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo_UPD, 4389 ARM::VLD3DUPd16Pseudo_UPD, 4390 ARM::VLD3DUPd32Pseudo_UPD, 4391 ARM::VLD1d64TPseudoWB_fixed }; 4392 static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo, 4393 ARM::VLD3DUPq16EvenPseudo, 4394 ARM::VLD3DUPq32EvenPseudo }; 4395 static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo_UPD, 4396 ARM::VLD3DUPq16OddPseudo_UPD, 4397 ARM::VLD3DUPq32OddPseudo_UPD }; 4398 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 4399 return; 4400 } 4401 4402 case ARMISD::VLD4DUP_UPD: { 4403 static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo_UPD, 4404 ARM::VLD4DUPd16Pseudo_UPD, 4405 ARM::VLD4DUPd32Pseudo_UPD, 4406 ARM::VLD1d64QPseudoWB_fixed }; 4407 static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo, 4408 ARM::VLD4DUPq16EvenPseudo, 4409 ARM::VLD4DUPq32EvenPseudo }; 4410 static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo_UPD, 4411 ARM::VLD4DUPq16OddPseudo_UPD, 4412 ARM::VLD4DUPq32OddPseudo_UPD }; 4413 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 4414 return; 4415 } 4416 4417 case ARMISD::VLD1_UPD: { 4418 static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed, 4419 ARM::VLD1d16wb_fixed, 4420 ARM::VLD1d32wb_fixed, 4421 ARM::VLD1d64wb_fixed }; 4422 static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed, 4423 ARM::VLD1q16wb_fixed, 4424 ARM::VLD1q32wb_fixed, 4425 ARM::VLD1q64wb_fixed }; 4426 SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr); 4427 return; 4428 } 4429 4430 case ARMISD::VLD2_UPD: { 4431 if (Subtarget->hasNEON()) { 4432 static const uint16_t DOpcodes[] = { 4433 ARM::VLD2d8wb_fixed, ARM::VLD2d16wb_fixed, ARM::VLD2d32wb_fixed, 4434 ARM::VLD1q64wb_fixed}; 4435 static const uint16_t QOpcodes[] = {ARM::VLD2q8PseudoWB_fixed, 4436 ARM::VLD2q16PseudoWB_fixed, 4437 ARM::VLD2q32PseudoWB_fixed}; 4438 SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr); 4439 } else { 4440 static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8, 4441 ARM::MVE_VLD21_8_wb}; 4442 static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16, 4443 ARM::MVE_VLD21_16_wb}; 4444 static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32, 4445 ARM::MVE_VLD21_32_wb}; 4446 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32}; 4447 SelectMVE_VLD(N, 2, Opcodes, true); 4448 } 4449 return; 4450 } 4451 4452 case ARMISD::VLD3_UPD: { 4453 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD, 4454 ARM::VLD3d16Pseudo_UPD, 4455 ARM::VLD3d32Pseudo_UPD, 4456 ARM::VLD1d64TPseudoWB_fixed}; 4457 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD, 4458 ARM::VLD3q16Pseudo_UPD, 4459 ARM::VLD3q32Pseudo_UPD }; 4460 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD, 4461 ARM::VLD3q16oddPseudo_UPD, 4462 ARM::VLD3q32oddPseudo_UPD }; 4463 SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 4464 return; 4465 } 4466 4467 case ARMISD::VLD4_UPD: { 4468 if (Subtarget->hasNEON()) { 4469 static const uint16_t DOpcodes[] = { 4470 ARM::VLD4d8Pseudo_UPD, ARM::VLD4d16Pseudo_UPD, ARM::VLD4d32Pseudo_UPD, 4471 ARM::VLD1d64QPseudoWB_fixed}; 4472 static const uint16_t QOpcodes0[] = {ARM::VLD4q8Pseudo_UPD, 4473 ARM::VLD4q16Pseudo_UPD, 4474 ARM::VLD4q32Pseudo_UPD}; 4475 static const uint16_t QOpcodes1[] = {ARM::VLD4q8oddPseudo_UPD, 4476 ARM::VLD4q16oddPseudo_UPD, 4477 ARM::VLD4q32oddPseudo_UPD}; 4478 SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 4479 } else { 4480 static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8, 4481 ARM::MVE_VLD42_8, 4482 ARM::MVE_VLD43_8_wb}; 4483 static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16, 4484 ARM::MVE_VLD42_16, 4485 ARM::MVE_VLD43_16_wb}; 4486 static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32, 4487 ARM::MVE_VLD42_32, 4488 ARM::MVE_VLD43_32_wb}; 4489 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32}; 4490 SelectMVE_VLD(N, 4, Opcodes, true); 4491 } 4492 return; 4493 } 4494 4495 case ARMISD::VLD1x2_UPD: { 4496 if (Subtarget->hasNEON()) { 4497 static const uint16_t DOpcodes[] = { 4498 ARM::VLD1q8wb_fixed, ARM::VLD1q16wb_fixed, ARM::VLD1q32wb_fixed, 4499 ARM::VLD1q64wb_fixed}; 4500 static const uint16_t QOpcodes[] = { 4501 ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed, 4502 ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed}; 4503 SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr); 4504 return; 4505 } 4506 break; 4507 } 4508 4509 case ARMISD::VLD1x3_UPD: { 4510 if (Subtarget->hasNEON()) { 4511 static const uint16_t DOpcodes[] = { 4512 ARM::VLD1d8TPseudoWB_fixed, ARM::VLD1d16TPseudoWB_fixed, 4513 ARM::VLD1d32TPseudoWB_fixed, ARM::VLD1d64TPseudoWB_fixed}; 4514 static const uint16_t QOpcodes0[] = { 4515 ARM::VLD1q8LowTPseudo_UPD, ARM::VLD1q16LowTPseudo_UPD, 4516 ARM::VLD1q32LowTPseudo_UPD, ARM::VLD1q64LowTPseudo_UPD}; 4517 static const uint16_t QOpcodes1[] = { 4518 ARM::VLD1q8HighTPseudo_UPD, ARM::VLD1q16HighTPseudo_UPD, 4519 ARM::VLD1q32HighTPseudo_UPD, ARM::VLD1q64HighTPseudo_UPD}; 4520 SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 4521 return; 4522 } 4523 break; 4524 } 4525 4526 case ARMISD::VLD1x4_UPD: { 4527 if (Subtarget->hasNEON()) { 4528 static const uint16_t DOpcodes[] = { 4529 ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed, 4530 ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed}; 4531 static const uint16_t QOpcodes0[] = { 4532 ARM::VLD1q8LowQPseudo_UPD, ARM::VLD1q16LowQPseudo_UPD, 4533 ARM::VLD1q32LowQPseudo_UPD, ARM::VLD1q64LowQPseudo_UPD}; 4534 static const uint16_t QOpcodes1[] = { 4535 ARM::VLD1q8HighQPseudo_UPD, ARM::VLD1q16HighQPseudo_UPD, 4536 ARM::VLD1q32HighQPseudo_UPD, ARM::VLD1q64HighQPseudo_UPD}; 4537 SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 4538 return; 4539 } 4540 break; 4541 } 4542 4543 case ARMISD::VLD2LN_UPD: { 4544 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD, 4545 ARM::VLD2LNd16Pseudo_UPD, 4546 ARM::VLD2LNd32Pseudo_UPD }; 4547 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD, 4548 ARM::VLD2LNq32Pseudo_UPD }; 4549 SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes); 4550 return; 4551 } 4552 4553 case ARMISD::VLD3LN_UPD: { 4554 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD, 4555 ARM::VLD3LNd16Pseudo_UPD, 4556 ARM::VLD3LNd32Pseudo_UPD }; 4557 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD, 4558 ARM::VLD3LNq32Pseudo_UPD }; 4559 SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes); 4560 return; 4561 } 4562 4563 case ARMISD::VLD4LN_UPD: { 4564 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD, 4565 ARM::VLD4LNd16Pseudo_UPD, 4566 ARM::VLD4LNd32Pseudo_UPD }; 4567 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD, 4568 ARM::VLD4LNq32Pseudo_UPD }; 4569 SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes); 4570 return; 4571 } 4572 4573 case ARMISD::VST1_UPD: { 4574 static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed, 4575 ARM::VST1d16wb_fixed, 4576 ARM::VST1d32wb_fixed, 4577 ARM::VST1d64wb_fixed }; 4578 static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed, 4579 ARM::VST1q16wb_fixed, 4580 ARM::VST1q32wb_fixed, 4581 ARM::VST1q64wb_fixed }; 4582 SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr); 4583 return; 4584 } 4585 4586 case ARMISD::VST2_UPD: { 4587 if (Subtarget->hasNEON()) { 4588 static const uint16_t DOpcodes[] = { 4589 ARM::VST2d8wb_fixed, ARM::VST2d16wb_fixed, ARM::VST2d32wb_fixed, 4590 ARM::VST1q64wb_fixed}; 4591 static const uint16_t QOpcodes[] = {ARM::VST2q8PseudoWB_fixed, 4592 ARM::VST2q16PseudoWB_fixed, 4593 ARM::VST2q32PseudoWB_fixed}; 4594 SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr); 4595 return; 4596 } 4597 break; 4598 } 4599 4600 case ARMISD::VST3_UPD: { 4601 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD, 4602 ARM::VST3d16Pseudo_UPD, 4603 ARM::VST3d32Pseudo_UPD, 4604 ARM::VST1d64TPseudoWB_fixed}; 4605 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD, 4606 ARM::VST3q16Pseudo_UPD, 4607 ARM::VST3q32Pseudo_UPD }; 4608 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD, 4609 ARM::VST3q16oddPseudo_UPD, 4610 ARM::VST3q32oddPseudo_UPD }; 4611 SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 4612 return; 4613 } 4614 4615 case ARMISD::VST4_UPD: { 4616 if (Subtarget->hasNEON()) { 4617 static const uint16_t DOpcodes[] = { 4618 ARM::VST4d8Pseudo_UPD, ARM::VST4d16Pseudo_UPD, ARM::VST4d32Pseudo_UPD, 4619 ARM::VST1d64QPseudoWB_fixed}; 4620 static const uint16_t QOpcodes0[] = {ARM::VST4q8Pseudo_UPD, 4621 ARM::VST4q16Pseudo_UPD, 4622 ARM::VST4q32Pseudo_UPD}; 4623 static const uint16_t QOpcodes1[] = {ARM::VST4q8oddPseudo_UPD, 4624 ARM::VST4q16oddPseudo_UPD, 4625 ARM::VST4q32oddPseudo_UPD}; 4626 SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 4627 return; 4628 } 4629 break; 4630 } 4631 4632 case ARMISD::VST1x2_UPD: { 4633 if (Subtarget->hasNEON()) { 4634 static const uint16_t DOpcodes[] = { ARM::VST1q8wb_fixed, 4635 ARM::VST1q16wb_fixed, 4636 ARM::VST1q32wb_fixed, 4637 ARM::VST1q64wb_fixed}; 4638 static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudoWB_fixed, 4639 ARM::VST1d16QPseudoWB_fixed, 4640 ARM::VST1d32QPseudoWB_fixed, 4641 ARM::VST1d64QPseudoWB_fixed }; 4642 SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr); 4643 return; 4644 } 4645 break; 4646 } 4647 4648 case ARMISD::VST1x3_UPD: { 4649 if (Subtarget->hasNEON()) { 4650 static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudoWB_fixed, 4651 ARM::VST1d16TPseudoWB_fixed, 4652 ARM::VST1d32TPseudoWB_fixed, 4653 ARM::VST1d64TPseudoWB_fixed }; 4654 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD, 4655 ARM::VST1q16LowTPseudo_UPD, 4656 ARM::VST1q32LowTPseudo_UPD, 4657 ARM::VST1q64LowTPseudo_UPD }; 4658 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo_UPD, 4659 ARM::VST1q16HighTPseudo_UPD, 4660 ARM::VST1q32HighTPseudo_UPD, 4661 ARM::VST1q64HighTPseudo_UPD }; 4662 SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 4663 return; 4664 } 4665 break; 4666 } 4667 4668 case ARMISD::VST1x4_UPD: { 4669 if (Subtarget->hasNEON()) { 4670 static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudoWB_fixed, 4671 ARM::VST1d16QPseudoWB_fixed, 4672 ARM::VST1d32QPseudoWB_fixed, 4673 ARM::VST1d64QPseudoWB_fixed }; 4674 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD, 4675 ARM::VST1q16LowQPseudo_UPD, 4676 ARM::VST1q32LowQPseudo_UPD, 4677 ARM::VST1q64LowQPseudo_UPD }; 4678 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo_UPD, 4679 ARM::VST1q16HighQPseudo_UPD, 4680 ARM::VST1q32HighQPseudo_UPD, 4681 ARM::VST1q64HighQPseudo_UPD }; 4682 SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 4683 return; 4684 } 4685 break; 4686 } 4687 case ARMISD::VST2LN_UPD: { 4688 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD, 4689 ARM::VST2LNd16Pseudo_UPD, 4690 ARM::VST2LNd32Pseudo_UPD }; 4691 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD, 4692 ARM::VST2LNq32Pseudo_UPD }; 4693 SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes); 4694 return; 4695 } 4696 4697 case ARMISD::VST3LN_UPD: { 4698 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD, 4699 ARM::VST3LNd16Pseudo_UPD, 4700 ARM::VST3LNd32Pseudo_UPD }; 4701 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD, 4702 ARM::VST3LNq32Pseudo_UPD }; 4703 SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes); 4704 return; 4705 } 4706 4707 case ARMISD::VST4LN_UPD: { 4708 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD, 4709 ARM::VST4LNd16Pseudo_UPD, 4710 ARM::VST4LNd32Pseudo_UPD }; 4711 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD, 4712 ARM::VST4LNq32Pseudo_UPD }; 4713 SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes); 4714 return; 4715 } 4716 4717 case ISD::INTRINSIC_VOID: 4718 case ISD::INTRINSIC_W_CHAIN: { 4719 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 4720 switch (IntNo) { 4721 default: 4722 break; 4723 4724 case Intrinsic::arm_mrrc: 4725 case Intrinsic::arm_mrrc2: { 4726 SDLoc dl(N); 4727 SDValue Chain = N->getOperand(0); 4728 unsigned Opc; 4729 4730 if (Subtarget->isThumb()) 4731 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2); 4732 else 4733 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2); 4734 4735 SmallVector<SDValue, 5> Ops; 4736 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(), dl)); /* coproc */ 4737 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(), dl)); /* opc */ 4738 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(), dl)); /* CRm */ 4739 4740 // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded 4741 // instruction will always be '1111' but it is possible in assembly language to specify 4742 // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction. 4743 if (Opc != ARM::MRRC2) { 4744 Ops.push_back(getAL(CurDAG, dl)); 4745 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 4746 } 4747 4748 Ops.push_back(Chain); 4749 4750 // Writes to two registers. 4751 const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other}; 4752 4753 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops)); 4754 return; 4755 } 4756 case Intrinsic::arm_ldaexd: 4757 case Intrinsic::arm_ldrexd: { 4758 SDLoc dl(N); 4759 SDValue Chain = N->getOperand(0); 4760 SDValue MemAddr = N->getOperand(2); 4761 bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps(); 4762 4763 bool IsAcquire = IntNo == Intrinsic::arm_ldaexd; 4764 unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD) 4765 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD); 4766 4767 // arm_ldrexd returns a i64 value in {i32, i32} 4768 std::vector<EVT> ResTys; 4769 if (isThumb) { 4770 ResTys.push_back(MVT::i32); 4771 ResTys.push_back(MVT::i32); 4772 } else 4773 ResTys.push_back(MVT::Untyped); 4774 ResTys.push_back(MVT::Other); 4775 4776 // Place arguments in the right order. 4777 SDValue Ops[] = {MemAddr, getAL(CurDAG, dl), 4778 CurDAG->getRegister(0, MVT::i32), Chain}; 4779 SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops); 4780 // Transfer memoperands. 4781 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 4782 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp}); 4783 4784 // Remap uses. 4785 SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1); 4786 if (!SDValue(N, 0).use_empty()) { 4787 SDValue Result; 4788 if (isThumb) 4789 Result = SDValue(Ld, 0); 4790 else { 4791 SDValue SubRegIdx = 4792 CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32); 4793 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 4794 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx); 4795 Result = SDValue(ResNode,0); 4796 } 4797 ReplaceUses(SDValue(N, 0), Result); 4798 } 4799 if (!SDValue(N, 1).use_empty()) { 4800 SDValue Result; 4801 if (isThumb) 4802 Result = SDValue(Ld, 1); 4803 else { 4804 SDValue SubRegIdx = 4805 CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32); 4806 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 4807 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx); 4808 Result = SDValue(ResNode,0); 4809 } 4810 ReplaceUses(SDValue(N, 1), Result); 4811 } 4812 ReplaceUses(SDValue(N, 2), OutChain); 4813 CurDAG->RemoveDeadNode(N); 4814 return; 4815 } 4816 case Intrinsic::arm_stlexd: 4817 case Intrinsic::arm_strexd: { 4818 SDLoc dl(N); 4819 SDValue Chain = N->getOperand(0); 4820 SDValue Val0 = N->getOperand(2); 4821 SDValue Val1 = N->getOperand(3); 4822 SDValue MemAddr = N->getOperand(4); 4823 4824 // Store exclusive double return a i32 value which is the return status 4825 // of the issued store. 4826 const EVT ResTys[] = {MVT::i32, MVT::Other}; 4827 4828 bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2(); 4829 // Place arguments in the right order. 4830 SmallVector<SDValue, 7> Ops; 4831 if (isThumb) { 4832 Ops.push_back(Val0); 4833 Ops.push_back(Val1); 4834 } else 4835 // arm_strexd uses GPRPair. 4836 Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0)); 4837 Ops.push_back(MemAddr); 4838 Ops.push_back(getAL(CurDAG, dl)); 4839 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 4840 Ops.push_back(Chain); 4841 4842 bool IsRelease = IntNo == Intrinsic::arm_stlexd; 4843 unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD) 4844 : (IsRelease ? ARM::STLEXD : ARM::STREXD); 4845 4846 SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops); 4847 // Transfer memoperands. 4848 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 4849 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 4850 4851 ReplaceNode(N, St); 4852 return; 4853 } 4854 4855 case Intrinsic::arm_neon_vld1: { 4856 static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16, 4857 ARM::VLD1d32, ARM::VLD1d64 }; 4858 static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16, 4859 ARM::VLD1q32, ARM::VLD1q64}; 4860 SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr); 4861 return; 4862 } 4863 4864 case Intrinsic::arm_neon_vld1x2: { 4865 static const uint16_t DOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16, 4866 ARM::VLD1q32, ARM::VLD1q64 }; 4867 static const uint16_t QOpcodes[] = { ARM::VLD1d8QPseudo, 4868 ARM::VLD1d16QPseudo, 4869 ARM::VLD1d32QPseudo, 4870 ARM::VLD1d64QPseudo }; 4871 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr); 4872 return; 4873 } 4874 4875 case Intrinsic::arm_neon_vld1x3: { 4876 static const uint16_t DOpcodes[] = { ARM::VLD1d8TPseudo, 4877 ARM::VLD1d16TPseudo, 4878 ARM::VLD1d32TPseudo, 4879 ARM::VLD1d64TPseudo }; 4880 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowTPseudo_UPD, 4881 ARM::VLD1q16LowTPseudo_UPD, 4882 ARM::VLD1q32LowTPseudo_UPD, 4883 ARM::VLD1q64LowTPseudo_UPD }; 4884 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighTPseudo, 4885 ARM::VLD1q16HighTPseudo, 4886 ARM::VLD1q32HighTPseudo, 4887 ARM::VLD1q64HighTPseudo }; 4888 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 4889 return; 4890 } 4891 4892 case Intrinsic::arm_neon_vld1x4: { 4893 static const uint16_t DOpcodes[] = { ARM::VLD1d8QPseudo, 4894 ARM::VLD1d16QPseudo, 4895 ARM::VLD1d32QPseudo, 4896 ARM::VLD1d64QPseudo }; 4897 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowQPseudo_UPD, 4898 ARM::VLD1q16LowQPseudo_UPD, 4899 ARM::VLD1q32LowQPseudo_UPD, 4900 ARM::VLD1q64LowQPseudo_UPD }; 4901 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighQPseudo, 4902 ARM::VLD1q16HighQPseudo, 4903 ARM::VLD1q32HighQPseudo, 4904 ARM::VLD1q64HighQPseudo }; 4905 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 4906 return; 4907 } 4908 4909 case Intrinsic::arm_neon_vld2: { 4910 static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16, 4911 ARM::VLD2d32, ARM::VLD1q64 }; 4912 static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo, 4913 ARM::VLD2q32Pseudo }; 4914 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr); 4915 return; 4916 } 4917 4918 case Intrinsic::arm_neon_vld3: { 4919 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo, 4920 ARM::VLD3d16Pseudo, 4921 ARM::VLD3d32Pseudo, 4922 ARM::VLD1d64TPseudo }; 4923 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD, 4924 ARM::VLD3q16Pseudo_UPD, 4925 ARM::VLD3q32Pseudo_UPD }; 4926 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo, 4927 ARM::VLD3q16oddPseudo, 4928 ARM::VLD3q32oddPseudo }; 4929 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 4930 return; 4931 } 4932 4933 case Intrinsic::arm_neon_vld4: { 4934 static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo, 4935 ARM::VLD4d16Pseudo, 4936 ARM::VLD4d32Pseudo, 4937 ARM::VLD1d64QPseudo }; 4938 static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD, 4939 ARM::VLD4q16Pseudo_UPD, 4940 ARM::VLD4q32Pseudo_UPD }; 4941 static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo, 4942 ARM::VLD4q16oddPseudo, 4943 ARM::VLD4q32oddPseudo }; 4944 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 4945 return; 4946 } 4947 4948 case Intrinsic::arm_neon_vld2dup: { 4949 static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16, 4950 ARM::VLD2DUPd32, ARM::VLD1q64 }; 4951 static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo, 4952 ARM::VLD2DUPq16EvenPseudo, 4953 ARM::VLD2DUPq32EvenPseudo }; 4954 static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudo, 4955 ARM::VLD2DUPq16OddPseudo, 4956 ARM::VLD2DUPq32OddPseudo }; 4957 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 2, 4958 DOpcodes, QOpcodes0, QOpcodes1); 4959 return; 4960 } 4961 4962 case Intrinsic::arm_neon_vld3dup: { 4963 static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo, 4964 ARM::VLD3DUPd16Pseudo, 4965 ARM::VLD3DUPd32Pseudo, 4966 ARM::VLD1d64TPseudo }; 4967 static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo, 4968 ARM::VLD3DUPq16EvenPseudo, 4969 ARM::VLD3DUPq32EvenPseudo }; 4970 static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo, 4971 ARM::VLD3DUPq16OddPseudo, 4972 ARM::VLD3DUPq32OddPseudo }; 4973 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 3, 4974 DOpcodes, QOpcodes0, QOpcodes1); 4975 return; 4976 } 4977 4978 case Intrinsic::arm_neon_vld4dup: { 4979 static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo, 4980 ARM::VLD4DUPd16Pseudo, 4981 ARM::VLD4DUPd32Pseudo, 4982 ARM::VLD1d64QPseudo }; 4983 static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo, 4984 ARM::VLD4DUPq16EvenPseudo, 4985 ARM::VLD4DUPq32EvenPseudo }; 4986 static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo, 4987 ARM::VLD4DUPq16OddPseudo, 4988 ARM::VLD4DUPq32OddPseudo }; 4989 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 4, 4990 DOpcodes, QOpcodes0, QOpcodes1); 4991 return; 4992 } 4993 4994 case Intrinsic::arm_neon_vld2lane: { 4995 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo, 4996 ARM::VLD2LNd16Pseudo, 4997 ARM::VLD2LNd32Pseudo }; 4998 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo, 4999 ARM::VLD2LNq32Pseudo }; 5000 SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes); 5001 return; 5002 } 5003 5004 case Intrinsic::arm_neon_vld3lane: { 5005 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo, 5006 ARM::VLD3LNd16Pseudo, 5007 ARM::VLD3LNd32Pseudo }; 5008 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo, 5009 ARM::VLD3LNq32Pseudo }; 5010 SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes); 5011 return; 5012 } 5013 5014 case Intrinsic::arm_neon_vld4lane: { 5015 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo, 5016 ARM::VLD4LNd16Pseudo, 5017 ARM::VLD4LNd32Pseudo }; 5018 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo, 5019 ARM::VLD4LNq32Pseudo }; 5020 SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes); 5021 return; 5022 } 5023 5024 case Intrinsic::arm_neon_vst1: { 5025 static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16, 5026 ARM::VST1d32, ARM::VST1d64 }; 5027 static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16, 5028 ARM::VST1q32, ARM::VST1q64 }; 5029 SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr); 5030 return; 5031 } 5032 5033 case Intrinsic::arm_neon_vst1x2: { 5034 static const uint16_t DOpcodes[] = { ARM::VST1q8, ARM::VST1q16, 5035 ARM::VST1q32, ARM::VST1q64 }; 5036 static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudo, 5037 ARM::VST1d16QPseudo, 5038 ARM::VST1d32QPseudo, 5039 ARM::VST1d64QPseudo }; 5040 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr); 5041 return; 5042 } 5043 5044 case Intrinsic::arm_neon_vst1x3: { 5045 static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudo, 5046 ARM::VST1d16TPseudo, 5047 ARM::VST1d32TPseudo, 5048 ARM::VST1d64TPseudo }; 5049 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD, 5050 ARM::VST1q16LowTPseudo_UPD, 5051 ARM::VST1q32LowTPseudo_UPD, 5052 ARM::VST1q64LowTPseudo_UPD }; 5053 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo, 5054 ARM::VST1q16HighTPseudo, 5055 ARM::VST1q32HighTPseudo, 5056 ARM::VST1q64HighTPseudo }; 5057 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 5058 return; 5059 } 5060 5061 case Intrinsic::arm_neon_vst1x4: { 5062 static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudo, 5063 ARM::VST1d16QPseudo, 5064 ARM::VST1d32QPseudo, 5065 ARM::VST1d64QPseudo }; 5066 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD, 5067 ARM::VST1q16LowQPseudo_UPD, 5068 ARM::VST1q32LowQPseudo_UPD, 5069 ARM::VST1q64LowQPseudo_UPD }; 5070 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo, 5071 ARM::VST1q16HighQPseudo, 5072 ARM::VST1q32HighQPseudo, 5073 ARM::VST1q64HighQPseudo }; 5074 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 5075 return; 5076 } 5077 5078 case Intrinsic::arm_neon_vst2: { 5079 static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16, 5080 ARM::VST2d32, ARM::VST1q64 }; 5081 static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo, 5082 ARM::VST2q32Pseudo }; 5083 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr); 5084 return; 5085 } 5086 5087 case Intrinsic::arm_neon_vst3: { 5088 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo, 5089 ARM::VST3d16Pseudo, 5090 ARM::VST3d32Pseudo, 5091 ARM::VST1d64TPseudo }; 5092 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD, 5093 ARM::VST3q16Pseudo_UPD, 5094 ARM::VST3q32Pseudo_UPD }; 5095 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo, 5096 ARM::VST3q16oddPseudo, 5097 ARM::VST3q32oddPseudo }; 5098 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 5099 return; 5100 } 5101 5102 case Intrinsic::arm_neon_vst4: { 5103 static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo, 5104 ARM::VST4d16Pseudo, 5105 ARM::VST4d32Pseudo, 5106 ARM::VST1d64QPseudo }; 5107 static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD, 5108 ARM::VST4q16Pseudo_UPD, 5109 ARM::VST4q32Pseudo_UPD }; 5110 static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo, 5111 ARM::VST4q16oddPseudo, 5112 ARM::VST4q32oddPseudo }; 5113 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 5114 return; 5115 } 5116 5117 case Intrinsic::arm_neon_vst2lane: { 5118 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo, 5119 ARM::VST2LNd16Pseudo, 5120 ARM::VST2LNd32Pseudo }; 5121 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo, 5122 ARM::VST2LNq32Pseudo }; 5123 SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes); 5124 return; 5125 } 5126 5127 case Intrinsic::arm_neon_vst3lane: { 5128 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo, 5129 ARM::VST3LNd16Pseudo, 5130 ARM::VST3LNd32Pseudo }; 5131 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo, 5132 ARM::VST3LNq32Pseudo }; 5133 SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes); 5134 return; 5135 } 5136 5137 case Intrinsic::arm_neon_vst4lane: { 5138 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo, 5139 ARM::VST4LNd16Pseudo, 5140 ARM::VST4LNd32Pseudo }; 5141 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo, 5142 ARM::VST4LNq32Pseudo }; 5143 SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes); 5144 return; 5145 } 5146 5147 case Intrinsic::arm_mve_vldr_gather_base_wb: 5148 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated: { 5149 static const uint16_t Opcodes[] = {ARM::MVE_VLDRWU32_qi_pre, 5150 ARM::MVE_VLDRDU64_qi_pre}; 5151 SelectMVE_WB(N, Opcodes, 5152 IntNo == Intrinsic::arm_mve_vldr_gather_base_wb_predicated); 5153 return; 5154 } 5155 5156 case Intrinsic::arm_mve_vld2q: { 5157 static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8, ARM::MVE_VLD21_8}; 5158 static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16, 5159 ARM::MVE_VLD21_16}; 5160 static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32, 5161 ARM::MVE_VLD21_32}; 5162 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32}; 5163 SelectMVE_VLD(N, 2, Opcodes, false); 5164 return; 5165 } 5166 5167 case Intrinsic::arm_mve_vld4q: { 5168 static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8, 5169 ARM::MVE_VLD42_8, ARM::MVE_VLD43_8}; 5170 static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16, 5171 ARM::MVE_VLD42_16, 5172 ARM::MVE_VLD43_16}; 5173 static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32, 5174 ARM::MVE_VLD42_32, 5175 ARM::MVE_VLD43_32}; 5176 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32}; 5177 SelectMVE_VLD(N, 4, Opcodes, false); 5178 return; 5179 } 5180 } 5181 break; 5182 } 5183 5184 case ISD::INTRINSIC_WO_CHAIN: { 5185 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 5186 switch (IntNo) { 5187 default: 5188 break; 5189 5190 // Scalar f32 -> bf16 5191 case Intrinsic::arm_neon_vcvtbfp2bf: { 5192 SDLoc dl(N); 5193 const SDValue &Src = N->getOperand(1); 5194 llvm::EVT DestTy = N->getValueType(0); 5195 SDValue Pred = getAL(CurDAG, dl); 5196 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 5197 SDValue Ops[] = { Src, Src, Pred, Reg0 }; 5198 CurDAG->SelectNodeTo(N, ARM::BF16_VCVTB, DestTy, Ops); 5199 return; 5200 } 5201 5202 // Vector v4f32 -> v4bf16 5203 case Intrinsic::arm_neon_vcvtfp2bf: { 5204 SDLoc dl(N); 5205 const SDValue &Src = N->getOperand(1); 5206 SDValue Pred = getAL(CurDAG, dl); 5207 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 5208 SDValue Ops[] = { Src, Pred, Reg0 }; 5209 CurDAG->SelectNodeTo(N, ARM::BF16_VCVT, MVT::v4bf16, Ops); 5210 return; 5211 } 5212 5213 case Intrinsic::arm_mve_urshrl: 5214 SelectMVE_LongShift(N, ARM::MVE_URSHRL, true, false); 5215 return; 5216 case Intrinsic::arm_mve_uqshll: 5217 SelectMVE_LongShift(N, ARM::MVE_UQSHLL, true, false); 5218 return; 5219 case Intrinsic::arm_mve_srshrl: 5220 SelectMVE_LongShift(N, ARM::MVE_SRSHRL, true, false); 5221 return; 5222 case Intrinsic::arm_mve_sqshll: 5223 SelectMVE_LongShift(N, ARM::MVE_SQSHLL, true, false); 5224 return; 5225 case Intrinsic::arm_mve_uqrshll: 5226 SelectMVE_LongShift(N, ARM::MVE_UQRSHLL, false, true); 5227 return; 5228 case Intrinsic::arm_mve_sqrshrl: 5229 SelectMVE_LongShift(N, ARM::MVE_SQRSHRL, false, true); 5230 return; 5231 5232 case Intrinsic::arm_mve_vadc: 5233 case Intrinsic::arm_mve_vadc_predicated: 5234 SelectMVE_VADCSBC(N, ARM::MVE_VADC, ARM::MVE_VADCI, true, 5235 IntNo == Intrinsic::arm_mve_vadc_predicated); 5236 return; 5237 case Intrinsic::arm_mve_vsbc: 5238 case Intrinsic::arm_mve_vsbc_predicated: 5239 SelectMVE_VADCSBC(N, ARM::MVE_VSBC, ARM::MVE_VSBCI, true, 5240 IntNo == Intrinsic::arm_mve_vsbc_predicated); 5241 return; 5242 case Intrinsic::arm_mve_vshlc: 5243 case Intrinsic::arm_mve_vshlc_predicated: 5244 SelectMVE_VSHLC(N, IntNo == Intrinsic::arm_mve_vshlc_predicated); 5245 return; 5246 5247 case Intrinsic::arm_mve_vmlldava: 5248 case Intrinsic::arm_mve_vmlldava_predicated: { 5249 static const uint16_t OpcodesU[] = { 5250 ARM::MVE_VMLALDAVu16, ARM::MVE_VMLALDAVu32, 5251 ARM::MVE_VMLALDAVau16, ARM::MVE_VMLALDAVau32, 5252 }; 5253 static const uint16_t OpcodesS[] = { 5254 ARM::MVE_VMLALDAVs16, ARM::MVE_VMLALDAVs32, 5255 ARM::MVE_VMLALDAVas16, ARM::MVE_VMLALDAVas32, 5256 ARM::MVE_VMLALDAVxs16, ARM::MVE_VMLALDAVxs32, 5257 ARM::MVE_VMLALDAVaxs16, ARM::MVE_VMLALDAVaxs32, 5258 ARM::MVE_VMLSLDAVs16, ARM::MVE_VMLSLDAVs32, 5259 ARM::MVE_VMLSLDAVas16, ARM::MVE_VMLSLDAVas32, 5260 ARM::MVE_VMLSLDAVxs16, ARM::MVE_VMLSLDAVxs32, 5261 ARM::MVE_VMLSLDAVaxs16, ARM::MVE_VMLSLDAVaxs32, 5262 }; 5263 SelectMVE_VMLLDAV(N, IntNo == Intrinsic::arm_mve_vmlldava_predicated, 5264 OpcodesS, OpcodesU); 5265 return; 5266 } 5267 5268 case Intrinsic::arm_mve_vrmlldavha: 5269 case Intrinsic::arm_mve_vrmlldavha_predicated: { 5270 static const uint16_t OpcodesU[] = { 5271 ARM::MVE_VRMLALDAVHu32, ARM::MVE_VRMLALDAVHau32, 5272 }; 5273 static const uint16_t OpcodesS[] = { 5274 ARM::MVE_VRMLALDAVHs32, ARM::MVE_VRMLALDAVHas32, 5275 ARM::MVE_VRMLALDAVHxs32, ARM::MVE_VRMLALDAVHaxs32, 5276 ARM::MVE_VRMLSLDAVHs32, ARM::MVE_VRMLSLDAVHas32, 5277 ARM::MVE_VRMLSLDAVHxs32, ARM::MVE_VRMLSLDAVHaxs32, 5278 }; 5279 SelectMVE_VRMLLDAVH(N, IntNo == Intrinsic::arm_mve_vrmlldavha_predicated, 5280 OpcodesS, OpcodesU); 5281 return; 5282 } 5283 5284 case Intrinsic::arm_mve_vidup: 5285 case Intrinsic::arm_mve_vidup_predicated: { 5286 static const uint16_t Opcodes[] = { 5287 ARM::MVE_VIDUPu8, ARM::MVE_VIDUPu16, ARM::MVE_VIDUPu32, 5288 }; 5289 SelectMVE_VxDUP(N, Opcodes, false, 5290 IntNo == Intrinsic::arm_mve_vidup_predicated); 5291 return; 5292 } 5293 5294 case Intrinsic::arm_mve_vddup: 5295 case Intrinsic::arm_mve_vddup_predicated: { 5296 static const uint16_t Opcodes[] = { 5297 ARM::MVE_VDDUPu8, ARM::MVE_VDDUPu16, ARM::MVE_VDDUPu32, 5298 }; 5299 SelectMVE_VxDUP(N, Opcodes, false, 5300 IntNo == Intrinsic::arm_mve_vddup_predicated); 5301 return; 5302 } 5303 5304 case Intrinsic::arm_mve_viwdup: 5305 case Intrinsic::arm_mve_viwdup_predicated: { 5306 static const uint16_t Opcodes[] = { 5307 ARM::MVE_VIWDUPu8, ARM::MVE_VIWDUPu16, ARM::MVE_VIWDUPu32, 5308 }; 5309 SelectMVE_VxDUP(N, Opcodes, true, 5310 IntNo == Intrinsic::arm_mve_viwdup_predicated); 5311 return; 5312 } 5313 5314 case Intrinsic::arm_mve_vdwdup: 5315 case Intrinsic::arm_mve_vdwdup_predicated: { 5316 static const uint16_t Opcodes[] = { 5317 ARM::MVE_VDWDUPu8, ARM::MVE_VDWDUPu16, ARM::MVE_VDWDUPu32, 5318 }; 5319 SelectMVE_VxDUP(N, Opcodes, true, 5320 IntNo == Intrinsic::arm_mve_vdwdup_predicated); 5321 return; 5322 } 5323 5324 case Intrinsic::arm_cde_cx1d: 5325 case Intrinsic::arm_cde_cx1da: 5326 case Intrinsic::arm_cde_cx2d: 5327 case Intrinsic::arm_cde_cx2da: 5328 case Intrinsic::arm_cde_cx3d: 5329 case Intrinsic::arm_cde_cx3da: { 5330 bool HasAccum = IntNo == Intrinsic::arm_cde_cx1da || 5331 IntNo == Intrinsic::arm_cde_cx2da || 5332 IntNo == Intrinsic::arm_cde_cx3da; 5333 size_t NumExtraOps; 5334 uint16_t Opcode; 5335 switch (IntNo) { 5336 case Intrinsic::arm_cde_cx1d: 5337 case Intrinsic::arm_cde_cx1da: 5338 NumExtraOps = 0; 5339 Opcode = HasAccum ? ARM::CDE_CX1DA : ARM::CDE_CX1D; 5340 break; 5341 case Intrinsic::arm_cde_cx2d: 5342 case Intrinsic::arm_cde_cx2da: 5343 NumExtraOps = 1; 5344 Opcode = HasAccum ? ARM::CDE_CX2DA : ARM::CDE_CX2D; 5345 break; 5346 case Intrinsic::arm_cde_cx3d: 5347 case Intrinsic::arm_cde_cx3da: 5348 NumExtraOps = 2; 5349 Opcode = HasAccum ? ARM::CDE_CX3DA : ARM::CDE_CX3D; 5350 break; 5351 default: 5352 llvm_unreachable("Unexpected opcode"); 5353 } 5354 SelectCDE_CXxD(N, Opcode, NumExtraOps, HasAccum); 5355 return; 5356 } 5357 } 5358 break; 5359 } 5360 5361 case ISD::ATOMIC_CMP_SWAP: 5362 SelectCMP_SWAP(N); 5363 return; 5364 } 5365 5366 SelectCode(N); 5367 } 5368 5369 // Inspect a register string of the form 5370 // cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or 5371 // cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string 5372 // and obtain the integer operands from them, adding these operands to the 5373 // provided vector. 5374 static void getIntOperandsFromRegisterString(StringRef RegString, 5375 SelectionDAG *CurDAG, 5376 const SDLoc &DL, 5377 std::vector<SDValue> &Ops) { 5378 SmallVector<StringRef, 5> Fields; 5379 RegString.split(Fields, ':'); 5380 5381 if (Fields.size() > 1) { 5382 bool AllIntFields = true; 5383 5384 for (StringRef Field : Fields) { 5385 // Need to trim out leading 'cp' characters and get the integer field. 5386 unsigned IntField; 5387 AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField); 5388 Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32)); 5389 } 5390 5391 assert(AllIntFields && 5392 "Unexpected non-integer value in special register string."); 5393 (void)AllIntFields; 5394 } 5395 } 5396 5397 // Maps a Banked Register string to its mask value. The mask value returned is 5398 // for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register 5399 // mask operand, which expresses which register is to be used, e.g. r8, and in 5400 // which mode it is to be used, e.g. usr. Returns -1 to signify that the string 5401 // was invalid. 5402 static inline int getBankedRegisterMask(StringRef RegString) { 5403 auto TheReg = ARMBankedReg::lookupBankedRegByName(RegString.lower()); 5404 if (!TheReg) 5405 return -1; 5406 return TheReg->Encoding; 5407 } 5408 5409 // The flags here are common to those allowed for apsr in the A class cores and 5410 // those allowed for the special registers in the M class cores. Returns a 5411 // value representing which flags were present, -1 if invalid. 5412 static inline int getMClassFlagsMask(StringRef Flags) { 5413 return StringSwitch<int>(Flags) 5414 .Case("", 0x2) // no flags means nzcvq for psr registers, and 0x2 is 5415 // correct when flags are not permitted 5416 .Case("g", 0x1) 5417 .Case("nzcvq", 0x2) 5418 .Case("nzcvqg", 0x3) 5419 .Default(-1); 5420 } 5421 5422 // Maps MClass special registers string to its value for use in the 5423 // t2MRS_M/t2MSR_M instruction nodes as the SYSm value operand. 5424 // Returns -1 to signify that the string was invalid. 5425 static int getMClassRegisterMask(StringRef Reg, const ARMSubtarget *Subtarget) { 5426 auto TheReg = ARMSysReg::lookupMClassSysRegByName(Reg); 5427 const FeatureBitset &FeatureBits = Subtarget->getFeatureBits(); 5428 if (!TheReg || !TheReg->hasRequiredFeatures(FeatureBits)) 5429 return -1; 5430 return (int)(TheReg->Encoding & 0xFFF); // SYSm value 5431 } 5432 5433 static int getARClassRegisterMask(StringRef Reg, StringRef Flags) { 5434 // The mask operand contains the special register (R Bit) in bit 4, whether 5435 // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and 5436 // bits 3-0 contains the fields to be accessed in the special register, set by 5437 // the flags provided with the register. 5438 int Mask = 0; 5439 if (Reg == "apsr") { 5440 // The flags permitted for apsr are the same flags that are allowed in 5441 // M class registers. We get the flag value and then shift the flags into 5442 // the correct place to combine with the mask. 5443 Mask = getMClassFlagsMask(Flags); 5444 if (Mask == -1) 5445 return -1; 5446 return Mask << 2; 5447 } 5448 5449 if (Reg != "cpsr" && Reg != "spsr") { 5450 return -1; 5451 } 5452 5453 // This is the same as if the flags were "fc" 5454 if (Flags.empty() || Flags == "all") 5455 return Mask | 0x9; 5456 5457 // Inspect the supplied flags string and set the bits in the mask for 5458 // the relevant and valid flags allowed for cpsr and spsr. 5459 for (char Flag : Flags) { 5460 int FlagVal; 5461 switch (Flag) { 5462 case 'c': 5463 FlagVal = 0x1; 5464 break; 5465 case 'x': 5466 FlagVal = 0x2; 5467 break; 5468 case 's': 5469 FlagVal = 0x4; 5470 break; 5471 case 'f': 5472 FlagVal = 0x8; 5473 break; 5474 default: 5475 FlagVal = 0; 5476 } 5477 5478 // This avoids allowing strings where the same flag bit appears twice. 5479 if (!FlagVal || (Mask & FlagVal)) 5480 return -1; 5481 Mask |= FlagVal; 5482 } 5483 5484 // If the register is spsr then we need to set the R bit. 5485 if (Reg == "spsr") 5486 Mask |= 0x10; 5487 5488 return Mask; 5489 } 5490 5491 // Lower the read_register intrinsic to ARM specific DAG nodes 5492 // using the supplied metadata string to select the instruction node to use 5493 // and the registers/masks to construct as operands for the node. 5494 bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){ 5495 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1)); 5496 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0)); 5497 bool IsThumb2 = Subtarget->isThumb2(); 5498 SDLoc DL(N); 5499 5500 std::vector<SDValue> Ops; 5501 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops); 5502 5503 if (!Ops.empty()) { 5504 // If the special register string was constructed of fields (as defined 5505 // in the ACLE) then need to lower to MRC node (32 bit) or 5506 // MRRC node(64 bit), we can make the distinction based on the number of 5507 // operands we have. 5508 unsigned Opcode; 5509 SmallVector<EVT, 3> ResTypes; 5510 if (Ops.size() == 5){ 5511 Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC; 5512 ResTypes.append({ MVT::i32, MVT::Other }); 5513 } else { 5514 assert(Ops.size() == 3 && 5515 "Invalid number of fields in special register string."); 5516 Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC; 5517 ResTypes.append({ MVT::i32, MVT::i32, MVT::Other }); 5518 } 5519 5520 Ops.push_back(getAL(CurDAG, DL)); 5521 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 5522 Ops.push_back(N->getOperand(0)); 5523 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops)); 5524 return true; 5525 } 5526 5527 std::string SpecialReg = RegString->getString().lower(); 5528 5529 int BankedReg = getBankedRegisterMask(SpecialReg); 5530 if (BankedReg != -1) { 5531 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), 5532 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5533 N->getOperand(0) }; 5534 ReplaceNode( 5535 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked, 5536 DL, MVT::i32, MVT::Other, Ops)); 5537 return true; 5538 } 5539 5540 // The VFP registers are read by creating SelectionDAG nodes with opcodes 5541 // corresponding to the register that is being read from. So we switch on the 5542 // string to find which opcode we need to use. 5543 unsigned Opcode = StringSwitch<unsigned>(SpecialReg) 5544 .Case("fpscr", ARM::VMRS) 5545 .Case("fpexc", ARM::VMRS_FPEXC) 5546 .Case("fpsid", ARM::VMRS_FPSID) 5547 .Case("mvfr0", ARM::VMRS_MVFR0) 5548 .Case("mvfr1", ARM::VMRS_MVFR1) 5549 .Case("mvfr2", ARM::VMRS_MVFR2) 5550 .Case("fpinst", ARM::VMRS_FPINST) 5551 .Case("fpinst2", ARM::VMRS_FPINST2) 5552 .Default(0); 5553 5554 // If an opcode was found then we can lower the read to a VFP instruction. 5555 if (Opcode) { 5556 if (!Subtarget->hasVFP2Base()) 5557 return false; 5558 if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8Base()) 5559 return false; 5560 5561 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5562 N->getOperand(0) }; 5563 ReplaceNode(N, 5564 CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops)); 5565 return true; 5566 } 5567 5568 // If the target is M Class then need to validate that the register string 5569 // is an acceptable value, so check that a mask can be constructed from the 5570 // string. 5571 if (Subtarget->isMClass()) { 5572 int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget); 5573 if (SYSmValue == -1) 5574 return false; 5575 5576 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32), 5577 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5578 N->getOperand(0) }; 5579 ReplaceNode( 5580 N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops)); 5581 return true; 5582 } 5583 5584 // Here we know the target is not M Class so we need to check if it is one 5585 // of the remaining possible values which are apsr, cpsr or spsr. 5586 if (SpecialReg == "apsr" || SpecialReg == "cpsr") { 5587 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5588 N->getOperand(0) }; 5589 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS, 5590 DL, MVT::i32, MVT::Other, Ops)); 5591 return true; 5592 } 5593 5594 if (SpecialReg == "spsr") { 5595 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5596 N->getOperand(0) }; 5597 ReplaceNode( 5598 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL, 5599 MVT::i32, MVT::Other, Ops)); 5600 return true; 5601 } 5602 5603 return false; 5604 } 5605 5606 // Lower the write_register intrinsic to ARM specific DAG nodes 5607 // using the supplied metadata string to select the instruction node to use 5608 // and the registers/masks to use in the nodes 5609 bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){ 5610 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1)); 5611 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0)); 5612 bool IsThumb2 = Subtarget->isThumb2(); 5613 SDLoc DL(N); 5614 5615 std::vector<SDValue> Ops; 5616 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops); 5617 5618 if (!Ops.empty()) { 5619 // If the special register string was constructed of fields (as defined 5620 // in the ACLE) then need to lower to MCR node (32 bit) or 5621 // MCRR node(64 bit), we can make the distinction based on the number of 5622 // operands we have. 5623 unsigned Opcode; 5624 if (Ops.size() == 5) { 5625 Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR; 5626 Ops.insert(Ops.begin()+2, N->getOperand(2)); 5627 } else { 5628 assert(Ops.size() == 3 && 5629 "Invalid number of fields in special register string."); 5630 Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR; 5631 SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) }; 5632 Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2); 5633 } 5634 5635 Ops.push_back(getAL(CurDAG, DL)); 5636 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 5637 Ops.push_back(N->getOperand(0)); 5638 5639 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops)); 5640 return true; 5641 } 5642 5643 std::string SpecialReg = RegString->getString().lower(); 5644 int BankedReg = getBankedRegisterMask(SpecialReg); 5645 if (BankedReg != -1) { 5646 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2), 5647 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5648 N->getOperand(0) }; 5649 ReplaceNode( 5650 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked, 5651 DL, MVT::Other, Ops)); 5652 return true; 5653 } 5654 5655 // The VFP registers are written to by creating SelectionDAG nodes with 5656 // opcodes corresponding to the register that is being written. So we switch 5657 // on the string to find which opcode we need to use. 5658 unsigned Opcode = StringSwitch<unsigned>(SpecialReg) 5659 .Case("fpscr", ARM::VMSR) 5660 .Case("fpexc", ARM::VMSR_FPEXC) 5661 .Case("fpsid", ARM::VMSR_FPSID) 5662 .Case("fpinst", ARM::VMSR_FPINST) 5663 .Case("fpinst2", ARM::VMSR_FPINST2) 5664 .Default(0); 5665 5666 if (Opcode) { 5667 if (!Subtarget->hasVFP2Base()) 5668 return false; 5669 Ops = { N->getOperand(2), getAL(CurDAG, DL), 5670 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) }; 5671 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops)); 5672 return true; 5673 } 5674 5675 std::pair<StringRef, StringRef> Fields; 5676 Fields = StringRef(SpecialReg).rsplit('_'); 5677 std::string Reg = Fields.first.str(); 5678 StringRef Flags = Fields.second; 5679 5680 // If the target was M Class then need to validate the special register value 5681 // and retrieve the mask for use in the instruction node. 5682 if (Subtarget->isMClass()) { 5683 int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget); 5684 if (SYSmValue == -1) 5685 return false; 5686 5687 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32), 5688 N->getOperand(2), getAL(CurDAG, DL), 5689 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) }; 5690 ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops)); 5691 return true; 5692 } 5693 5694 // We then check to see if a valid mask can be constructed for one of the 5695 // register string values permitted for the A and R class cores. These values 5696 // are apsr, spsr and cpsr; these are also valid on older cores. 5697 int Mask = getARClassRegisterMask(Reg, Flags); 5698 if (Mask != -1) { 5699 Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2), 5700 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5701 N->getOperand(0) }; 5702 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR, 5703 DL, MVT::Other, Ops)); 5704 return true; 5705 } 5706 5707 return false; 5708 } 5709 5710 bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){ 5711 std::vector<SDValue> AsmNodeOperands; 5712 unsigned Flag, Kind; 5713 bool Changed = false; 5714 unsigned NumOps = N->getNumOperands(); 5715 5716 // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint. 5717 // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require 5718 // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs 5719 // respectively. Since there is no constraint to explicitly specify a 5720 // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb, 5721 // the 64-bit data may be referred by H, Q, R modifiers, so we still pack 5722 // them into a GPRPair. 5723 5724 SDLoc dl(N); 5725 SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps - 1) : SDValue(); 5726 5727 SmallVector<bool, 8> OpChanged; 5728 // Glue node will be appended late. 5729 for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) { 5730 SDValue op = N->getOperand(i); 5731 AsmNodeOperands.push_back(op); 5732 5733 if (i < InlineAsm::Op_FirstOperand) 5734 continue; 5735 5736 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) { 5737 Flag = C->getZExtValue(); 5738 Kind = InlineAsm::getKind(Flag); 5739 } 5740 else 5741 continue; 5742 5743 // Immediate operands to inline asm in the SelectionDAG are modeled with 5744 // two operands. The first is a constant of value InlineAsm::Kind_Imm, and 5745 // the second is a constant with the value of the immediate. If we get here 5746 // and we have a Kind_Imm, skip the next operand, and continue. 5747 if (Kind == InlineAsm::Kind_Imm) { 5748 SDValue op = N->getOperand(++i); 5749 AsmNodeOperands.push_back(op); 5750 continue; 5751 } 5752 5753 unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag); 5754 if (NumRegs) 5755 OpChanged.push_back(false); 5756 5757 unsigned DefIdx = 0; 5758 bool IsTiedToChangedOp = false; 5759 // If it's a use that is tied with a previous def, it has no 5760 // reg class constraint. 5761 if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx)) 5762 IsTiedToChangedOp = OpChanged[DefIdx]; 5763 5764 // Memory operands to inline asm in the SelectionDAG are modeled with two 5765 // operands: a constant of value InlineAsm::Kind_Mem followed by the input 5766 // operand. If we get here and we have a Kind_Mem, skip the next operand (so 5767 // it doesn't get misinterpreted), and continue. We do this here because 5768 // it's important to update the OpChanged array correctly before moving on. 5769 if (Kind == InlineAsm::Kind_Mem) { 5770 SDValue op = N->getOperand(++i); 5771 AsmNodeOperands.push_back(op); 5772 continue; 5773 } 5774 5775 if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef 5776 && Kind != InlineAsm::Kind_RegDefEarlyClobber) 5777 continue; 5778 5779 unsigned RC; 5780 bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC); 5781 if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID)) 5782 || NumRegs != 2) 5783 continue; 5784 5785 assert((i+2 < NumOps) && "Invalid number of operands in inline asm"); 5786 SDValue V0 = N->getOperand(i+1); 5787 SDValue V1 = N->getOperand(i+2); 5788 Register Reg0 = cast<RegisterSDNode>(V0)->getReg(); 5789 Register Reg1 = cast<RegisterSDNode>(V1)->getReg(); 5790 SDValue PairedReg; 5791 MachineRegisterInfo &MRI = MF->getRegInfo(); 5792 5793 if (Kind == InlineAsm::Kind_RegDef || 5794 Kind == InlineAsm::Kind_RegDefEarlyClobber) { 5795 // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to 5796 // the original GPRs. 5797 5798 Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass); 5799 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped); 5800 SDValue Chain = SDValue(N,0); 5801 5802 SDNode *GU = N->getGluedUser(); 5803 SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped, 5804 Chain.getValue(1)); 5805 5806 // Extract values from a GPRPair reg and copy to the original GPR reg. 5807 SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32, 5808 RegCopy); 5809 SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32, 5810 RegCopy); 5811 SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0, 5812 RegCopy.getValue(1)); 5813 SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1)); 5814 5815 // Update the original glue user. 5816 std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1); 5817 Ops.push_back(T1.getValue(1)); 5818 CurDAG->UpdateNodeOperands(GU, Ops); 5819 } 5820 else { 5821 // For Kind == InlineAsm::Kind_RegUse, we first copy two GPRs into a 5822 // GPRPair and then pass the GPRPair to the inline asm. 5823 SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain]; 5824 5825 // As REG_SEQ doesn't take RegisterSDNode, we copy them first. 5826 SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32, 5827 Chain.getValue(1)); 5828 SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32, 5829 T0.getValue(1)); 5830 SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0); 5831 5832 // Copy REG_SEQ into a GPRPair-typed VR and replace the original two 5833 // i32 VRs of inline asm with it. 5834 Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass); 5835 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped); 5836 Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1)); 5837 5838 AsmNodeOperands[InlineAsm::Op_InputChain] = Chain; 5839 Glue = Chain.getValue(1); 5840 } 5841 5842 Changed = true; 5843 5844 if(PairedReg.getNode()) { 5845 OpChanged[OpChanged.size() -1 ] = true; 5846 Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/); 5847 if (IsTiedToChangedOp) 5848 Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx); 5849 else 5850 Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID); 5851 // Replace the current flag. 5852 AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant( 5853 Flag, dl, MVT::i32); 5854 // Add the new register node and skip the original two GPRs. 5855 AsmNodeOperands.push_back(PairedReg); 5856 // Skip the next two GPRs. 5857 i += 2; 5858 } 5859 } 5860 5861 if (Glue.getNode()) 5862 AsmNodeOperands.push_back(Glue); 5863 if (!Changed) 5864 return false; 5865 5866 SDValue New = CurDAG->getNode(N->getOpcode(), SDLoc(N), 5867 CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands); 5868 New->setNodeId(-1); 5869 ReplaceNode(N, New.getNode()); 5870 return true; 5871 } 5872 5873 5874 bool ARMDAGToDAGISel:: 5875 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, 5876 std::vector<SDValue> &OutOps) { 5877 switch(ConstraintID) { 5878 default: 5879 llvm_unreachable("Unexpected asm memory constraint"); 5880 case InlineAsm::Constraint_m: 5881 case InlineAsm::Constraint_o: 5882 case InlineAsm::Constraint_Q: 5883 case InlineAsm::Constraint_Um: 5884 case InlineAsm::Constraint_Un: 5885 case InlineAsm::Constraint_Uq: 5886 case InlineAsm::Constraint_Us: 5887 case InlineAsm::Constraint_Ut: 5888 case InlineAsm::Constraint_Uv: 5889 case InlineAsm::Constraint_Uy: 5890 // Require the address to be in a register. That is safe for all ARM 5891 // variants and it is hard to do anything much smarter without knowing 5892 // how the operand is used. 5893 OutOps.push_back(Op); 5894 return false; 5895 } 5896 return true; 5897 } 5898 5899 /// createARMISelDag - This pass converts a legalized DAG into a 5900 /// ARM-specific DAG, ready for instruction scheduling. 5901 /// 5902 FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM, 5903 CodeGenOpt::Level OptLevel) { 5904 return new ARMDAGToDAGISel(TM, OptLevel); 5905 } 5906