1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines an instruction selector for the ARM target. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "ARM.h" 14 #include "ARMBaseInstrInfo.h" 15 #include "ARMTargetMachine.h" 16 #include "MCTargetDesc/ARMAddressingModes.h" 17 #include "Utils/ARMBaseInfo.h" 18 #include "llvm/ADT/APSInt.h" 19 #include "llvm/ADT/StringSwitch.h" 20 #include "llvm/CodeGen/MachineFrameInfo.h" 21 #include "llvm/CodeGen/MachineFunction.h" 22 #include "llvm/CodeGen/MachineInstrBuilder.h" 23 #include "llvm/CodeGen/MachineRegisterInfo.h" 24 #include "llvm/CodeGen/SelectionDAG.h" 25 #include "llvm/CodeGen/SelectionDAGISel.h" 26 #include "llvm/CodeGen/TargetLowering.h" 27 #include "llvm/IR/CallingConv.h" 28 #include "llvm/IR/Constants.h" 29 #include "llvm/IR/DerivedTypes.h" 30 #include "llvm/IR/Function.h" 31 #include "llvm/IR/Intrinsics.h" 32 #include "llvm/IR/IntrinsicsARM.h" 33 #include "llvm/IR/LLVMContext.h" 34 #include "llvm/Support/CommandLine.h" 35 #include "llvm/Support/Debug.h" 36 #include "llvm/Support/ErrorHandling.h" 37 #include "llvm/Target/TargetOptions.h" 38 #include <optional> 39 40 using namespace llvm; 41 42 #define DEBUG_TYPE "arm-isel" 43 #define PASS_NAME "ARM Instruction Selection" 44 45 static cl::opt<bool> 46 DisableShifterOp("disable-shifter-op", cl::Hidden, 47 cl::desc("Disable isel of shifter-op"), 48 cl::init(false)); 49 50 //===--------------------------------------------------------------------===// 51 /// ARMDAGToDAGISel - ARM specific code to select ARM machine 52 /// instructions for SelectionDAG operations. 53 /// 54 namespace { 55 56 class ARMDAGToDAGISel : public SelectionDAGISel { 57 /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can 58 /// make the right decision when generating code for different targets. 59 const ARMSubtarget *Subtarget; 60 61 public: 62 static char ID; 63 64 ARMDAGToDAGISel() = delete; 65 66 explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOptLevel OptLevel) 67 : SelectionDAGISel(ID, tm, OptLevel) {} 68 69 bool runOnMachineFunction(MachineFunction &MF) override { 70 // Reset the subtarget each time through. 71 Subtarget = &MF.getSubtarget<ARMSubtarget>(); 72 SelectionDAGISel::runOnMachineFunction(MF); 73 return true; 74 } 75 76 void PreprocessISelDAG() override; 77 78 /// getI32Imm - Return a target constant of type i32 with the specified 79 /// value. 80 inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) { 81 return CurDAG->getTargetConstant(Imm, dl, MVT::i32); 82 } 83 84 void Select(SDNode *N) override; 85 86 /// Return true as some complex patterns, like those that call 87 /// canExtractShiftFromMul can modify the DAG inplace. 88 bool ComplexPatternFuncMutatesDAG() const override { return true; } 89 90 bool hasNoVMLxHazardUse(SDNode *N) const; 91 bool isShifterOpProfitable(const SDValue &Shift, 92 ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt); 93 bool SelectRegShifterOperand(SDValue N, SDValue &A, 94 SDValue &B, SDValue &C, 95 bool CheckProfitability = true); 96 bool SelectImmShifterOperand(SDValue N, SDValue &A, 97 SDValue &B, bool CheckProfitability = true); 98 bool SelectShiftRegShifterOperand(SDValue N, SDValue &A, SDValue &B, 99 SDValue &C) { 100 // Don't apply the profitability check 101 return SelectRegShifterOperand(N, A, B, C, false); 102 } 103 bool SelectShiftImmShifterOperand(SDValue N, SDValue &A, SDValue &B) { 104 // Don't apply the profitability check 105 return SelectImmShifterOperand(N, A, B, false); 106 } 107 bool SelectShiftImmShifterOperandOneUse(SDValue N, SDValue &A, SDValue &B) { 108 if (!N.hasOneUse()) 109 return false; 110 return SelectImmShifterOperand(N, A, B, false); 111 } 112 113 bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out); 114 115 bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); 116 bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc); 117 118 bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) { 119 const ConstantSDNode *CN = cast<ConstantSDNode>(N); 120 Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32); 121 Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32); 122 return true; 123 } 124 125 bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N, 126 SDValue &Offset, SDValue &Opc); 127 bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N, 128 SDValue &Offset, SDValue &Opc); 129 bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N, 130 SDValue &Offset, SDValue &Opc); 131 bool SelectAddrOffsetNone(SDValue N, SDValue &Base); 132 bool SelectAddrMode3(SDValue N, SDValue &Base, 133 SDValue &Offset, SDValue &Opc); 134 bool SelectAddrMode3Offset(SDNode *Op, SDValue N, 135 SDValue &Offset, SDValue &Opc); 136 bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, bool FP16); 137 bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset); 138 bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset); 139 bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align); 140 bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset); 141 142 bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label); 143 144 // Thumb Addressing Modes: 145 bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset); 146 bool SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, SDValue &Offset); 147 bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base, 148 SDValue &OffImm); 149 bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base, 150 SDValue &OffImm); 151 bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base, 152 SDValue &OffImm); 153 bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base, 154 SDValue &OffImm); 155 bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm); 156 template <unsigned Shift> 157 bool SelectTAddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm); 158 159 // Thumb 2 Addressing Modes: 160 bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); 161 template <unsigned Shift> 162 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, SDValue &OffImm); 163 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, 164 SDValue &OffImm); 165 bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N, 166 SDValue &OffImm); 167 template <unsigned Shift> 168 bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm); 169 bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm, 170 unsigned Shift); 171 template <unsigned Shift> 172 bool SelectT2AddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm); 173 bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base, 174 SDValue &OffReg, SDValue &ShImm); 175 bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm); 176 177 template<int Min, int Max> 178 bool SelectImmediateInRange(SDValue N, SDValue &OffImm); 179 180 inline bool is_so_imm(unsigned Imm) const { 181 return ARM_AM::getSOImmVal(Imm) != -1; 182 } 183 184 inline bool is_so_imm_not(unsigned Imm) const { 185 return ARM_AM::getSOImmVal(~Imm) != -1; 186 } 187 188 inline bool is_t2_so_imm(unsigned Imm) const { 189 return ARM_AM::getT2SOImmVal(Imm) != -1; 190 } 191 192 inline bool is_t2_so_imm_not(unsigned Imm) const { 193 return ARM_AM::getT2SOImmVal(~Imm) != -1; 194 } 195 196 // Include the pieces autogenerated from the target description. 197 #include "ARMGenDAGISel.inc" 198 199 private: 200 void transferMemOperands(SDNode *Src, SDNode *Dst); 201 202 /// Indexed (pre/post inc/dec) load matching code for ARM. 203 bool tryARMIndexedLoad(SDNode *N); 204 bool tryT1IndexedLoad(SDNode *N); 205 bool tryT2IndexedLoad(SDNode *N); 206 bool tryMVEIndexedLoad(SDNode *N); 207 bool tryFMULFixed(SDNode *N, SDLoc dl); 208 bool tryFP_TO_INT(SDNode *N, SDLoc dl); 209 bool transformFixedFloatingPointConversion(SDNode *N, SDNode *FMul, 210 bool IsUnsigned, 211 bool FixedToFloat); 212 213 /// SelectVLD - Select NEON load intrinsics. NumVecs should be 214 /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for 215 /// loads of D registers and even subregs and odd subregs of Q registers. 216 /// For NumVecs <= 2, QOpcodes1 is not used. 217 void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, 218 const uint16_t *DOpcodes, const uint16_t *QOpcodes0, 219 const uint16_t *QOpcodes1); 220 221 /// SelectVST - Select NEON store intrinsics. NumVecs should 222 /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for 223 /// stores of D registers and even subregs and odd subregs of Q registers. 224 /// For NumVecs <= 2, QOpcodes1 is not used. 225 void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, 226 const uint16_t *DOpcodes, const uint16_t *QOpcodes0, 227 const uint16_t *QOpcodes1); 228 229 /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should 230 /// be 2, 3 or 4. The opcode arrays specify the instructions used for 231 /// load/store of D registers and Q registers. 232 void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating, 233 unsigned NumVecs, const uint16_t *DOpcodes, 234 const uint16_t *QOpcodes); 235 236 /// Helper functions for setting up clusters of MVE predication operands. 237 template <typename SDValueVector> 238 void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 239 SDValue PredicateMask); 240 template <typename SDValueVector> 241 void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 242 SDValue PredicateMask, SDValue Inactive); 243 244 template <typename SDValueVector> 245 void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc); 246 template <typename SDValueVector> 247 void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, EVT InactiveTy); 248 249 /// SelectMVE_WB - Select MVE writeback load/store intrinsics. 250 void SelectMVE_WB(SDNode *N, const uint16_t *Opcodes, bool Predicated); 251 252 /// SelectMVE_LongShift - Select MVE 64-bit scalar shift intrinsics. 253 void SelectMVE_LongShift(SDNode *N, uint16_t Opcode, bool Immediate, 254 bool HasSaturationOperand); 255 256 /// SelectMVE_VADCSBC - Select MVE vector add/sub-with-carry intrinsics. 257 void SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry, 258 uint16_t OpcodeWithNoCarry, bool Add, bool Predicated); 259 260 /// SelectMVE_VSHLC - Select MVE intrinsics for a shift that carries between 261 /// vector lanes. 262 void SelectMVE_VSHLC(SDNode *N, bool Predicated); 263 264 /// Select long MVE vector reductions with two vector operands 265 /// Stride is the number of vector element widths the instruction can operate 266 /// on: 267 /// 2 for long non-rounding variants, vml{a,s}ldav[a][x]: [i16, i32] 268 /// 1 for long rounding variants: vrml{a,s}ldavh[a][x]: [i32] 269 /// Stride is used when addressing the OpcodesS array which contains multiple 270 /// opcodes for each element width. 271 /// TySize is the index into the list of element types listed above 272 void SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated, 273 const uint16_t *OpcodesS, const uint16_t *OpcodesU, 274 size_t Stride, size_t TySize); 275 276 /// Select a 64-bit MVE vector reduction with two vector operands 277 /// arm_mve_vmlldava_[predicated] 278 void SelectMVE_VMLLDAV(SDNode *N, bool Predicated, const uint16_t *OpcodesS, 279 const uint16_t *OpcodesU); 280 /// Select a 72-bit MVE vector rounding reduction with two vector operands 281 /// int_arm_mve_vrmlldavha[_predicated] 282 void SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated, const uint16_t *OpcodesS, 283 const uint16_t *OpcodesU); 284 285 /// SelectMVE_VLD - Select MVE interleaving load intrinsics. NumVecs 286 /// should be 2 or 4. The opcode array specifies the instructions 287 /// used for 8, 16 and 32-bit lane sizes respectively, and each 288 /// pointer points to a set of NumVecs sub-opcodes used for the 289 /// different stages (e.g. VLD20 versus VLD21) of each load family. 290 void SelectMVE_VLD(SDNode *N, unsigned NumVecs, 291 const uint16_t *const *Opcodes, bool HasWriteback); 292 293 /// SelectMVE_VxDUP - Select MVE incrementing-dup instructions. Opcodes is an 294 /// array of 3 elements for the 8, 16 and 32-bit lane sizes. 295 void SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes, 296 bool Wrapping, bool Predicated); 297 298 /// Select SelectCDE_CXxD - Select CDE dual-GPR instruction (one of CX1D, 299 /// CX1DA, CX2D, CX2DA, CX3, CX3DA). 300 /// \arg \c NumExtraOps number of extra operands besides the coprocossor, 301 /// the accumulator and the immediate operand, i.e. 0 302 /// for CX1*, 1 for CX2*, 2 for CX3* 303 /// \arg \c HasAccum whether the instruction has an accumulator operand 304 void SelectCDE_CXxD(SDNode *N, uint16_t Opcode, size_t NumExtraOps, 305 bool HasAccum); 306 307 /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs 308 /// should be 1, 2, 3 or 4. The opcode array specifies the instructions used 309 /// for loading D registers. 310 void SelectVLDDup(SDNode *N, bool IsIntrinsic, bool isUpdating, 311 unsigned NumVecs, const uint16_t *DOpcodes, 312 const uint16_t *QOpcodes0 = nullptr, 313 const uint16_t *QOpcodes1 = nullptr); 314 315 /// Try to select SBFX/UBFX instructions for ARM. 316 bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned); 317 318 bool tryInsertVectorElt(SDNode *N); 319 320 // Select special operations if node forms integer ABS pattern 321 bool tryABSOp(SDNode *N); 322 323 bool tryReadRegister(SDNode *N); 324 bool tryWriteRegister(SDNode *N); 325 326 bool tryInlineAsm(SDNode *N); 327 328 void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI); 329 330 void SelectCMP_SWAP(SDNode *N); 331 332 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 333 /// inline asm expressions. 334 bool SelectInlineAsmMemoryOperand(const SDValue &Op, 335 InlineAsm::ConstraintCode ConstraintID, 336 std::vector<SDValue> &OutOps) override; 337 338 // Form pairs of consecutive R, S, D, or Q registers. 339 SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1); 340 SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1); 341 SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1); 342 SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1); 343 344 // Form sequences of 4 consecutive S, D, or Q registers. 345 SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 346 SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 347 SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 348 349 // Get the alignment operand for a NEON VLD or VST instruction. 350 SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs, 351 bool is64BitVector); 352 353 /// Checks if N is a multiplication by a constant where we can extract out a 354 /// power of two from the constant so that it can be used in a shift, but only 355 /// if it simplifies the materialization of the constant. Returns true if it 356 /// is, and assigns to PowerOfTwo the power of two that should be extracted 357 /// out and to NewMulConst the new constant to be multiplied by. 358 bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift, 359 unsigned &PowerOfTwo, SDValue &NewMulConst) const; 360 361 /// Replace N with M in CurDAG, in a way that also ensures that M gets 362 /// selected when N would have been selected. 363 void replaceDAGValue(const SDValue &N, SDValue M); 364 }; 365 } 366 367 char ARMDAGToDAGISel::ID = 0; 368 369 INITIALIZE_PASS(ARMDAGToDAGISel, DEBUG_TYPE, PASS_NAME, false, false) 370 371 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant 372 /// operand. If so Imm will receive the 32-bit value. 373 static bool isInt32Immediate(SDNode *N, unsigned &Imm) { 374 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) { 375 Imm = N->getAsZExtVal(); 376 return true; 377 } 378 return false; 379 } 380 381 // isInt32Immediate - This method tests to see if a constant operand. 382 // If so Imm will receive the 32 bit value. 383 static bool isInt32Immediate(SDValue N, unsigned &Imm) { 384 return isInt32Immediate(N.getNode(), Imm); 385 } 386 387 // isOpcWithIntImmediate - This method tests to see if the node is a specific 388 // opcode and that it has a immediate integer right operand. 389 // If so Imm will receive the 32 bit value. 390 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) { 391 return N->getOpcode() == Opc && 392 isInt32Immediate(N->getOperand(1).getNode(), Imm); 393 } 394 395 /// Check whether a particular node is a constant value representable as 396 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax). 397 /// 398 /// \param ScaledConstant [out] - On success, the pre-scaled constant value. 399 static bool isScaledConstantInRange(SDValue Node, int Scale, 400 int RangeMin, int RangeMax, 401 int &ScaledConstant) { 402 assert(Scale > 0 && "Invalid scale!"); 403 404 // Check that this is a constant. 405 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node); 406 if (!C) 407 return false; 408 409 ScaledConstant = (int) C->getZExtValue(); 410 if ((ScaledConstant % Scale) != 0) 411 return false; 412 413 ScaledConstant /= Scale; 414 return ScaledConstant >= RangeMin && ScaledConstant < RangeMax; 415 } 416 417 void ARMDAGToDAGISel::PreprocessISelDAG() { 418 if (!Subtarget->hasV6T2Ops()) 419 return; 420 421 bool isThumb2 = Subtarget->isThumb(); 422 // We use make_early_inc_range to avoid invalidation issues. 423 for (SDNode &N : llvm::make_early_inc_range(CurDAG->allnodes())) { 424 if (N.getOpcode() != ISD::ADD) 425 continue; 426 427 // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with 428 // leading zeros, followed by consecutive set bits, followed by 1 or 2 429 // trailing zeros, e.g. 1020. 430 // Transform the expression to 431 // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number 432 // of trailing zeros of c2. The left shift would be folded as an shifter 433 // operand of 'add' and the 'and' and 'srl' would become a bits extraction 434 // node (UBFX). 435 436 SDValue N0 = N.getOperand(0); 437 SDValue N1 = N.getOperand(1); 438 unsigned And_imm = 0; 439 if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) { 440 if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm)) 441 std::swap(N0, N1); 442 } 443 if (!And_imm) 444 continue; 445 446 // Check if the AND mask is an immediate of the form: 000.....1111111100 447 unsigned TZ = llvm::countr_zero(And_imm); 448 if (TZ != 1 && TZ != 2) 449 // Be conservative here. Shifter operands aren't always free. e.g. On 450 // Swift, left shifter operand of 1 / 2 for free but others are not. 451 // e.g. 452 // ubfx r3, r1, #16, #8 453 // ldr.w r3, [r0, r3, lsl #2] 454 // vs. 455 // mov.w r9, #1020 456 // and.w r2, r9, r1, lsr #14 457 // ldr r2, [r0, r2] 458 continue; 459 And_imm >>= TZ; 460 if (And_imm & (And_imm + 1)) 461 continue; 462 463 // Look for (and (srl X, c1), c2). 464 SDValue Srl = N1.getOperand(0); 465 unsigned Srl_imm = 0; 466 if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) || 467 (Srl_imm <= 2)) 468 continue; 469 470 // Make sure first operand is not a shifter operand which would prevent 471 // folding of the left shift. 472 SDValue CPTmp0; 473 SDValue CPTmp1; 474 SDValue CPTmp2; 475 if (isThumb2) { 476 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1)) 477 continue; 478 } else { 479 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) || 480 SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2)) 481 continue; 482 } 483 484 // Now make the transformation. 485 Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32, 486 Srl.getOperand(0), 487 CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl), 488 MVT::i32)); 489 N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32, 490 Srl, 491 CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32)); 492 N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32, 493 N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32)); 494 CurDAG->UpdateNodeOperands(&N, N0, N1); 495 } 496 } 497 498 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS 499 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at 500 /// least on current ARM implementations) which should be avoidded. 501 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const { 502 if (OptLevel == CodeGenOptLevel::None) 503 return true; 504 505 if (!Subtarget->hasVMLxHazards()) 506 return true; 507 508 if (!N->hasOneUse()) 509 return false; 510 511 SDNode *Use = *N->use_begin(); 512 if (Use->getOpcode() == ISD::CopyToReg) 513 return true; 514 if (Use->isMachineOpcode()) { 515 const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>( 516 CurDAG->getSubtarget().getInstrInfo()); 517 518 const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode()); 519 if (MCID.mayStore()) 520 return true; 521 unsigned Opcode = MCID.getOpcode(); 522 if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD) 523 return true; 524 // vmlx feeding into another vmlx. We actually want to unfold 525 // the use later in the MLxExpansion pass. e.g. 526 // vmla 527 // vmla (stall 8 cycles) 528 // 529 // vmul (5 cycles) 530 // vadd (5 cycles) 531 // vmla 532 // This adds up to about 18 - 19 cycles. 533 // 534 // vmla 535 // vmul (stall 4 cycles) 536 // vadd adds up to about 14 cycles. 537 return TII->isFpMLxInstruction(Opcode); 538 } 539 540 return false; 541 } 542 543 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift, 544 ARM_AM::ShiftOpc ShOpcVal, 545 unsigned ShAmt) { 546 if (!Subtarget->isLikeA9() && !Subtarget->isSwift()) 547 return true; 548 if (Shift.hasOneUse()) 549 return true; 550 // R << 2 is free. 551 return ShOpcVal == ARM_AM::lsl && 552 (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1)); 553 } 554 555 bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N, 556 unsigned MaxShift, 557 unsigned &PowerOfTwo, 558 SDValue &NewMulConst) const { 559 assert(N.getOpcode() == ISD::MUL); 560 assert(MaxShift > 0); 561 562 // If the multiply is used in more than one place then changing the constant 563 // will make other uses incorrect, so don't. 564 if (!N.hasOneUse()) return false; 565 // Check if the multiply is by a constant 566 ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1)); 567 if (!MulConst) return false; 568 // If the constant is used in more than one place then modifying it will mean 569 // we need to materialize two constants instead of one, which is a bad idea. 570 if (!MulConst->hasOneUse()) return false; 571 unsigned MulConstVal = MulConst->getZExtValue(); 572 if (MulConstVal == 0) return false; 573 574 // Find the largest power of 2 that MulConstVal is a multiple of 575 PowerOfTwo = MaxShift; 576 while ((MulConstVal % (1 << PowerOfTwo)) != 0) { 577 --PowerOfTwo; 578 if (PowerOfTwo == 0) return false; 579 } 580 581 // Only optimise if the new cost is better 582 unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo); 583 NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32); 584 unsigned OldCost = ConstantMaterializationCost(MulConstVal, Subtarget); 585 unsigned NewCost = ConstantMaterializationCost(NewMulConstVal, Subtarget); 586 return NewCost < OldCost; 587 } 588 589 void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) { 590 CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode()); 591 ReplaceUses(N, M); 592 } 593 594 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N, 595 SDValue &BaseReg, 596 SDValue &Opc, 597 bool CheckProfitability) { 598 if (DisableShifterOp) 599 return false; 600 601 // If N is a multiply-by-constant and it's profitable to extract a shift and 602 // use it in a shifted operand do so. 603 if (N.getOpcode() == ISD::MUL) { 604 unsigned PowerOfTwo = 0; 605 SDValue NewMulConst; 606 if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) { 607 HandleSDNode Handle(N); 608 SDLoc Loc(N); 609 replaceDAGValue(N.getOperand(1), NewMulConst); 610 BaseReg = Handle.getValue(); 611 Opc = CurDAG->getTargetConstant( 612 ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32); 613 return true; 614 } 615 } 616 617 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 618 619 // Don't match base register only case. That is matched to a separate 620 // lower complexity pattern with explicit register operand. 621 if (ShOpcVal == ARM_AM::no_shift) return false; 622 623 BaseReg = N.getOperand(0); 624 unsigned ShImmVal = 0; 625 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 626 if (!RHS) return false; 627 ShImmVal = RHS->getZExtValue() & 31; 628 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), 629 SDLoc(N), MVT::i32); 630 return true; 631 } 632 633 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N, 634 SDValue &BaseReg, 635 SDValue &ShReg, 636 SDValue &Opc, 637 bool CheckProfitability) { 638 if (DisableShifterOp) 639 return false; 640 641 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 642 643 // Don't match base register only case. That is matched to a separate 644 // lower complexity pattern with explicit register operand. 645 if (ShOpcVal == ARM_AM::no_shift) return false; 646 647 BaseReg = N.getOperand(0); 648 unsigned ShImmVal = 0; 649 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 650 if (RHS) return false; 651 652 ShReg = N.getOperand(1); 653 if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal)) 654 return false; 655 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), 656 SDLoc(N), MVT::i32); 657 return true; 658 } 659 660 // Determine whether an ISD::OR's operands are suitable to turn the operation 661 // into an addition, which often has more compact encodings. 662 bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) { 663 assert(Parent->getOpcode() == ISD::OR && "unexpected parent"); 664 Out = N; 665 return CurDAG->haveNoCommonBitsSet(N, Parent->getOperand(1)); 666 } 667 668 669 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N, 670 SDValue &Base, 671 SDValue &OffImm) { 672 // Match simple R + imm12 operands. 673 674 // Base only. 675 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 676 !CurDAG->isBaseWithConstantOffset(N)) { 677 if (N.getOpcode() == ISD::FrameIndex) { 678 // Match frame index. 679 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 680 Base = CurDAG->getTargetFrameIndex( 681 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 682 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 683 return true; 684 } 685 686 if (N.getOpcode() == ARMISD::Wrapper && 687 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 688 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 689 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 690 Base = N.getOperand(0); 691 } else 692 Base = N; 693 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 694 return true; 695 } 696 697 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 698 int RHSC = (int)RHS->getSExtValue(); 699 if (N.getOpcode() == ISD::SUB) 700 RHSC = -RHSC; 701 702 if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits 703 Base = N.getOperand(0); 704 if (Base.getOpcode() == ISD::FrameIndex) { 705 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 706 Base = CurDAG->getTargetFrameIndex( 707 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 708 } 709 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 710 return true; 711 } 712 } 713 714 // Base only. 715 Base = N; 716 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 717 return true; 718 } 719 720 721 722 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, 723 SDValue &Opc) { 724 if (N.getOpcode() == ISD::MUL && 725 ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) { 726 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 727 // X * [3,5,9] -> X + X * [2,4,8] etc. 728 int RHSC = (int)RHS->getZExtValue(); 729 if (RHSC & 1) { 730 RHSC = RHSC & ~1; 731 ARM_AM::AddrOpc AddSub = ARM_AM::add; 732 if (RHSC < 0) { 733 AddSub = ARM_AM::sub; 734 RHSC = - RHSC; 735 } 736 if (isPowerOf2_32(RHSC)) { 737 unsigned ShAmt = Log2_32(RHSC); 738 Base = Offset = N.getOperand(0); 739 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, 740 ARM_AM::lsl), 741 SDLoc(N), MVT::i32); 742 return true; 743 } 744 } 745 } 746 } 747 748 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 749 // ISD::OR that is equivalent to an ISD::ADD. 750 !CurDAG->isBaseWithConstantOffset(N)) 751 return false; 752 753 // Leave simple R +/- imm12 operands for LDRi12 754 if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) { 755 int RHSC; 756 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1, 757 -0x1000+1, 0x1000, RHSC)) // 12 bits. 758 return false; 759 } 760 761 // Otherwise this is R +/- [possibly shifted] R. 762 ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add; 763 ARM_AM::ShiftOpc ShOpcVal = 764 ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode()); 765 unsigned ShAmt = 0; 766 767 Base = N.getOperand(0); 768 Offset = N.getOperand(1); 769 770 if (ShOpcVal != ARM_AM::no_shift) { 771 // Check to see if the RHS of the shift is a constant, if not, we can't fold 772 // it. 773 if (ConstantSDNode *Sh = 774 dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) { 775 ShAmt = Sh->getZExtValue(); 776 if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt)) 777 Offset = N.getOperand(1).getOperand(0); 778 else { 779 ShAmt = 0; 780 ShOpcVal = ARM_AM::no_shift; 781 } 782 } else { 783 ShOpcVal = ARM_AM::no_shift; 784 } 785 } 786 787 // Try matching (R shl C) + (R). 788 if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift && 789 !(Subtarget->isLikeA9() || Subtarget->isSwift() || 790 N.getOperand(0).hasOneUse())) { 791 ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode()); 792 if (ShOpcVal != ARM_AM::no_shift) { 793 // Check to see if the RHS of the shift is a constant, if not, we can't 794 // fold it. 795 if (ConstantSDNode *Sh = 796 dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) { 797 ShAmt = Sh->getZExtValue(); 798 if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) { 799 Offset = N.getOperand(0).getOperand(0); 800 Base = N.getOperand(1); 801 } else { 802 ShAmt = 0; 803 ShOpcVal = ARM_AM::no_shift; 804 } 805 } else { 806 ShOpcVal = ARM_AM::no_shift; 807 } 808 } 809 } 810 811 // If Offset is a multiply-by-constant and it's profitable to extract a shift 812 // and use it in a shifted operand do so. 813 if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) { 814 unsigned PowerOfTwo = 0; 815 SDValue NewMulConst; 816 if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) { 817 HandleSDNode Handle(Offset); 818 replaceDAGValue(Offset.getOperand(1), NewMulConst); 819 Offset = Handle.getValue(); 820 ShAmt = PowerOfTwo; 821 ShOpcVal = ARM_AM::lsl; 822 } 823 } 824 825 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal), 826 SDLoc(N), MVT::i32); 827 return true; 828 } 829 830 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N, 831 SDValue &Offset, SDValue &Opc) { 832 unsigned Opcode = Op->getOpcode(); 833 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 834 ? cast<LoadSDNode>(Op)->getAddressingMode() 835 : cast<StoreSDNode>(Op)->getAddressingMode(); 836 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 837 ? ARM_AM::add : ARM_AM::sub; 838 int Val; 839 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) 840 return false; 841 842 Offset = N; 843 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 844 unsigned ShAmt = 0; 845 if (ShOpcVal != ARM_AM::no_shift) { 846 // Check to see if the RHS of the shift is a constant, if not, we can't fold 847 // it. 848 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 849 ShAmt = Sh->getZExtValue(); 850 if (isShifterOpProfitable(N, ShOpcVal, ShAmt)) 851 Offset = N.getOperand(0); 852 else { 853 ShAmt = 0; 854 ShOpcVal = ARM_AM::no_shift; 855 } 856 } else { 857 ShOpcVal = ARM_AM::no_shift; 858 } 859 } 860 861 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal), 862 SDLoc(N), MVT::i32); 863 return true; 864 } 865 866 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N, 867 SDValue &Offset, SDValue &Opc) { 868 unsigned Opcode = Op->getOpcode(); 869 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 870 ? cast<LoadSDNode>(Op)->getAddressingMode() 871 : cast<StoreSDNode>(Op)->getAddressingMode(); 872 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 873 ? ARM_AM::add : ARM_AM::sub; 874 int Val; 875 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits. 876 if (AddSub == ARM_AM::sub) Val *= -1; 877 Offset = CurDAG->getRegister(0, MVT::i32); 878 Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32); 879 return true; 880 } 881 882 return false; 883 } 884 885 886 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N, 887 SDValue &Offset, SDValue &Opc) { 888 unsigned Opcode = Op->getOpcode(); 889 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 890 ? cast<LoadSDNode>(Op)->getAddressingMode() 891 : cast<StoreSDNode>(Op)->getAddressingMode(); 892 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 893 ? ARM_AM::add : ARM_AM::sub; 894 int Val; 895 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits. 896 Offset = CurDAG->getRegister(0, MVT::i32); 897 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val, 898 ARM_AM::no_shift), 899 SDLoc(Op), MVT::i32); 900 return true; 901 } 902 903 return false; 904 } 905 906 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) { 907 Base = N; 908 return true; 909 } 910 911 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N, 912 SDValue &Base, SDValue &Offset, 913 SDValue &Opc) { 914 if (N.getOpcode() == ISD::SUB) { 915 // X - C is canonicalize to X + -C, no need to handle it here. 916 Base = N.getOperand(0); 917 Offset = N.getOperand(1); 918 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N), 919 MVT::i32); 920 return true; 921 } 922 923 if (!CurDAG->isBaseWithConstantOffset(N)) { 924 Base = N; 925 if (N.getOpcode() == ISD::FrameIndex) { 926 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 927 Base = CurDAG->getTargetFrameIndex( 928 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 929 } 930 Offset = CurDAG->getRegister(0, MVT::i32); 931 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N), 932 MVT::i32); 933 return true; 934 } 935 936 // If the RHS is +/- imm8, fold into addr mode. 937 int RHSC; 938 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1, 939 -256 + 1, 256, RHSC)) { // 8 bits. 940 Base = N.getOperand(0); 941 if (Base.getOpcode() == ISD::FrameIndex) { 942 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 943 Base = CurDAG->getTargetFrameIndex( 944 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 945 } 946 Offset = CurDAG->getRegister(0, MVT::i32); 947 948 ARM_AM::AddrOpc AddSub = ARM_AM::add; 949 if (RHSC < 0) { 950 AddSub = ARM_AM::sub; 951 RHSC = -RHSC; 952 } 953 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N), 954 MVT::i32); 955 return true; 956 } 957 958 Base = N.getOperand(0); 959 Offset = N.getOperand(1); 960 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N), 961 MVT::i32); 962 return true; 963 } 964 965 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N, 966 SDValue &Offset, SDValue &Opc) { 967 unsigned Opcode = Op->getOpcode(); 968 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 969 ? cast<LoadSDNode>(Op)->getAddressingMode() 970 : cast<StoreSDNode>(Op)->getAddressingMode(); 971 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 972 ? ARM_AM::add : ARM_AM::sub; 973 int Val; 974 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits. 975 Offset = CurDAG->getRegister(0, MVT::i32); 976 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op), 977 MVT::i32); 978 return true; 979 } 980 981 Offset = N; 982 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op), 983 MVT::i32); 984 return true; 985 } 986 987 bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, 988 bool FP16) { 989 if (!CurDAG->isBaseWithConstantOffset(N)) { 990 Base = N; 991 if (N.getOpcode() == ISD::FrameIndex) { 992 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 993 Base = CurDAG->getTargetFrameIndex( 994 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 995 } else if (N.getOpcode() == ARMISD::Wrapper && 996 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 997 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 998 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 999 Base = N.getOperand(0); 1000 } 1001 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0), 1002 SDLoc(N), MVT::i32); 1003 return true; 1004 } 1005 1006 // If the RHS is +/- imm8, fold into addr mode. 1007 int RHSC; 1008 const int Scale = FP16 ? 2 : 4; 1009 1010 if (isScaledConstantInRange(N.getOperand(1), Scale, -255, 256, RHSC)) { 1011 Base = N.getOperand(0); 1012 if (Base.getOpcode() == ISD::FrameIndex) { 1013 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1014 Base = CurDAG->getTargetFrameIndex( 1015 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1016 } 1017 1018 ARM_AM::AddrOpc AddSub = ARM_AM::add; 1019 if (RHSC < 0) { 1020 AddSub = ARM_AM::sub; 1021 RHSC = -RHSC; 1022 } 1023 1024 if (FP16) 1025 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(AddSub, RHSC), 1026 SDLoc(N), MVT::i32); 1027 else 1028 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC), 1029 SDLoc(N), MVT::i32); 1030 1031 return true; 1032 } 1033 1034 Base = N; 1035 1036 if (FP16) 1037 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(ARM_AM::add, 0), 1038 SDLoc(N), MVT::i32); 1039 else 1040 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0), 1041 SDLoc(N), MVT::i32); 1042 1043 return true; 1044 } 1045 1046 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N, 1047 SDValue &Base, SDValue &Offset) { 1048 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ false); 1049 } 1050 1051 bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N, 1052 SDValue &Base, SDValue &Offset) { 1053 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ true); 1054 } 1055 1056 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr, 1057 SDValue &Align) { 1058 Addr = N; 1059 1060 unsigned Alignment = 0; 1061 1062 MemSDNode *MemN = cast<MemSDNode>(Parent); 1063 1064 if (isa<LSBaseSDNode>(MemN) || 1065 ((MemN->getOpcode() == ARMISD::VST1_UPD || 1066 MemN->getOpcode() == ARMISD::VLD1_UPD) && 1067 MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) { 1068 // This case occurs only for VLD1-lane/dup and VST1-lane instructions. 1069 // The maximum alignment is equal to the memory size being referenced. 1070 llvm::Align MMOAlign = MemN->getAlign(); 1071 unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8; 1072 if (MMOAlign.value() >= MemSize && MemSize > 1) 1073 Alignment = MemSize; 1074 } else { 1075 // All other uses of addrmode6 are for intrinsics. For now just record 1076 // the raw alignment value; it will be refined later based on the legal 1077 // alignment operands for the intrinsic. 1078 Alignment = MemN->getAlign().value(); 1079 } 1080 1081 Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32); 1082 return true; 1083 } 1084 1085 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N, 1086 SDValue &Offset) { 1087 LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op); 1088 ISD::MemIndexedMode AM = LdSt->getAddressingMode(); 1089 if (AM != ISD::POST_INC) 1090 return false; 1091 Offset = N; 1092 if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) { 1093 if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits()) 1094 Offset = CurDAG->getRegister(0, MVT::i32); 1095 } 1096 return true; 1097 } 1098 1099 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N, 1100 SDValue &Offset, SDValue &Label) { 1101 if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) { 1102 Offset = N.getOperand(0); 1103 SDValue N1 = N.getOperand(1); 1104 Label = CurDAG->getTargetConstant(N1->getAsZExtVal(), SDLoc(N), MVT::i32); 1105 return true; 1106 } 1107 1108 return false; 1109 } 1110 1111 1112 //===----------------------------------------------------------------------===// 1113 // Thumb Addressing Modes 1114 //===----------------------------------------------------------------------===// 1115 1116 static bool shouldUseZeroOffsetLdSt(SDValue N) { 1117 // Negative numbers are difficult to materialise in thumb1. If we are 1118 // selecting the add of a negative, instead try to select ri with a zero 1119 // offset, so create the add node directly which will become a sub. 1120 if (N.getOpcode() != ISD::ADD) 1121 return false; 1122 1123 // Look for an imm which is not legal for ld/st, but is legal for sub. 1124 if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1))) 1125 return C->getSExtValue() < 0 && C->getSExtValue() >= -255; 1126 1127 return false; 1128 } 1129 1130 bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, 1131 SDValue &Offset) { 1132 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) { 1133 if (!isNullConstant(N)) 1134 return false; 1135 1136 Base = Offset = N; 1137 return true; 1138 } 1139 1140 Base = N.getOperand(0); 1141 Offset = N.getOperand(1); 1142 return true; 1143 } 1144 1145 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, SDValue &Base, 1146 SDValue &Offset) { 1147 if (shouldUseZeroOffsetLdSt(N)) 1148 return false; // Select ri instead 1149 return SelectThumbAddrModeRRSext(N, Base, Offset); 1150 } 1151 1152 bool 1153 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, 1154 SDValue &Base, SDValue &OffImm) { 1155 if (shouldUseZeroOffsetLdSt(N)) { 1156 Base = N; 1157 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1158 return true; 1159 } 1160 1161 if (!CurDAG->isBaseWithConstantOffset(N)) { 1162 if (N.getOpcode() == ISD::ADD) { 1163 return false; // We want to select register offset instead 1164 } else if (N.getOpcode() == ARMISD::Wrapper && 1165 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 1166 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 1167 N.getOperand(0).getOpcode() != ISD::TargetConstantPool && 1168 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 1169 Base = N.getOperand(0); 1170 } else { 1171 Base = N; 1172 } 1173 1174 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1175 return true; 1176 } 1177 1178 // If the RHS is + imm5 * scale, fold into addr mode. 1179 int RHSC; 1180 if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) { 1181 Base = N.getOperand(0); 1182 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1183 return true; 1184 } 1185 1186 // Offset is too large, so use register offset instead. 1187 return false; 1188 } 1189 1190 bool 1191 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base, 1192 SDValue &OffImm) { 1193 return SelectThumbAddrModeImm5S(N, 4, Base, OffImm); 1194 } 1195 1196 bool 1197 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base, 1198 SDValue &OffImm) { 1199 return SelectThumbAddrModeImm5S(N, 2, Base, OffImm); 1200 } 1201 1202 bool 1203 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base, 1204 SDValue &OffImm) { 1205 return SelectThumbAddrModeImm5S(N, 1, Base, OffImm); 1206 } 1207 1208 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N, 1209 SDValue &Base, SDValue &OffImm) { 1210 if (N.getOpcode() == ISD::FrameIndex) { 1211 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 1212 // Only multiples of 4 are allowed for the offset, so the frame object 1213 // alignment must be at least 4. 1214 MachineFrameInfo &MFI = MF->getFrameInfo(); 1215 if (MFI.getObjectAlign(FI) < Align(4)) 1216 MFI.setObjectAlignment(FI, Align(4)); 1217 Base = CurDAG->getTargetFrameIndex( 1218 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1219 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1220 return true; 1221 } 1222 1223 if (!CurDAG->isBaseWithConstantOffset(N)) 1224 return false; 1225 1226 if (N.getOperand(0).getOpcode() == ISD::FrameIndex) { 1227 // If the RHS is + imm8 * scale, fold into addr mode. 1228 int RHSC; 1229 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) { 1230 Base = N.getOperand(0); 1231 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1232 // Make sure the offset is inside the object, or we might fail to 1233 // allocate an emergency spill slot. (An out-of-range access is UB, but 1234 // it could show up anyway.) 1235 MachineFrameInfo &MFI = MF->getFrameInfo(); 1236 if (RHSC * 4 < MFI.getObjectSize(FI)) { 1237 // For LHS+RHS to result in an offset that's a multiple of 4 the object 1238 // indexed by the LHS must be 4-byte aligned. 1239 if (!MFI.isFixedObjectIndex(FI) && MFI.getObjectAlign(FI) < Align(4)) 1240 MFI.setObjectAlignment(FI, Align(4)); 1241 if (MFI.getObjectAlign(FI) >= Align(4)) { 1242 Base = CurDAG->getTargetFrameIndex( 1243 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1244 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1245 return true; 1246 } 1247 } 1248 } 1249 } 1250 1251 return false; 1252 } 1253 1254 template <unsigned Shift> 1255 bool ARMDAGToDAGISel::SelectTAddrModeImm7(SDValue N, SDValue &Base, 1256 SDValue &OffImm) { 1257 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) { 1258 int RHSC; 1259 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80, 1260 RHSC)) { 1261 Base = N.getOperand(0); 1262 if (N.getOpcode() == ISD::SUB) 1263 RHSC = -RHSC; 1264 OffImm = 1265 CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32); 1266 return true; 1267 } 1268 } 1269 1270 // Base only. 1271 Base = N; 1272 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1273 return true; 1274 } 1275 1276 1277 //===----------------------------------------------------------------------===// 1278 // Thumb 2 Addressing Modes 1279 //===----------------------------------------------------------------------===// 1280 1281 1282 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N, 1283 SDValue &Base, SDValue &OffImm) { 1284 // Match simple R + imm12 operands. 1285 1286 // Base only. 1287 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 1288 !CurDAG->isBaseWithConstantOffset(N)) { 1289 if (N.getOpcode() == ISD::FrameIndex) { 1290 // Match frame index. 1291 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 1292 Base = CurDAG->getTargetFrameIndex( 1293 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1294 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1295 return true; 1296 } 1297 1298 if (N.getOpcode() == ARMISD::Wrapper && 1299 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 1300 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 1301 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 1302 Base = N.getOperand(0); 1303 if (Base.getOpcode() == ISD::TargetConstantPool) 1304 return false; // We want to select t2LDRpci instead. 1305 } else 1306 Base = N; 1307 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1308 return true; 1309 } 1310 1311 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1312 if (SelectT2AddrModeImm8(N, Base, OffImm)) 1313 // Let t2LDRi8 handle (R - imm8). 1314 return false; 1315 1316 int RHSC = (int)RHS->getZExtValue(); 1317 if (N.getOpcode() == ISD::SUB) 1318 RHSC = -RHSC; 1319 1320 if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned) 1321 Base = N.getOperand(0); 1322 if (Base.getOpcode() == ISD::FrameIndex) { 1323 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1324 Base = CurDAG->getTargetFrameIndex( 1325 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1326 } 1327 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1328 return true; 1329 } 1330 } 1331 1332 // Base only. 1333 Base = N; 1334 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1335 return true; 1336 } 1337 1338 template <unsigned Shift> 1339 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, SDValue &Base, 1340 SDValue &OffImm) { 1341 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) { 1342 int RHSC; 1343 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -255, 256, RHSC)) { 1344 Base = N.getOperand(0); 1345 if (Base.getOpcode() == ISD::FrameIndex) { 1346 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1347 Base = CurDAG->getTargetFrameIndex( 1348 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1349 } 1350 1351 if (N.getOpcode() == ISD::SUB) 1352 RHSC = -RHSC; 1353 OffImm = 1354 CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32); 1355 return true; 1356 } 1357 } 1358 1359 // Base only. 1360 Base = N; 1361 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1362 return true; 1363 } 1364 1365 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, 1366 SDValue &Base, SDValue &OffImm) { 1367 // Match simple R - imm8 operands. 1368 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 1369 !CurDAG->isBaseWithConstantOffset(N)) 1370 return false; 1371 1372 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1373 int RHSC = (int)RHS->getSExtValue(); 1374 if (N.getOpcode() == ISD::SUB) 1375 RHSC = -RHSC; 1376 1377 if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative) 1378 Base = N.getOperand(0); 1379 if (Base.getOpcode() == ISD::FrameIndex) { 1380 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1381 Base = CurDAG->getTargetFrameIndex( 1382 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1383 } 1384 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1385 return true; 1386 } 1387 } 1388 1389 return false; 1390 } 1391 1392 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N, 1393 SDValue &OffImm){ 1394 unsigned Opcode = Op->getOpcode(); 1395 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 1396 ? cast<LoadSDNode>(Op)->getAddressingMode() 1397 : cast<StoreSDNode>(Op)->getAddressingMode(); 1398 int RHSC; 1399 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits. 1400 OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC)) 1401 ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32) 1402 : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32); 1403 return true; 1404 } 1405 1406 return false; 1407 } 1408 1409 template <unsigned Shift> 1410 bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N, SDValue &Base, 1411 SDValue &OffImm) { 1412 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) { 1413 int RHSC; 1414 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80, 1415 RHSC)) { 1416 Base = N.getOperand(0); 1417 if (Base.getOpcode() == ISD::FrameIndex) { 1418 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1419 Base = CurDAG->getTargetFrameIndex( 1420 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1421 } 1422 1423 if (N.getOpcode() == ISD::SUB) 1424 RHSC = -RHSC; 1425 OffImm = 1426 CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32); 1427 return true; 1428 } 1429 } 1430 1431 // Base only. 1432 Base = N; 1433 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1434 return true; 1435 } 1436 1437 template <unsigned Shift> 1438 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, 1439 SDValue &OffImm) { 1440 return SelectT2AddrModeImm7Offset(Op, N, OffImm, Shift); 1441 } 1442 1443 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, 1444 SDValue &OffImm, 1445 unsigned Shift) { 1446 unsigned Opcode = Op->getOpcode(); 1447 ISD::MemIndexedMode AM; 1448 switch (Opcode) { 1449 case ISD::LOAD: 1450 AM = cast<LoadSDNode>(Op)->getAddressingMode(); 1451 break; 1452 case ISD::STORE: 1453 AM = cast<StoreSDNode>(Op)->getAddressingMode(); 1454 break; 1455 case ISD::MLOAD: 1456 AM = cast<MaskedLoadSDNode>(Op)->getAddressingMode(); 1457 break; 1458 case ISD::MSTORE: 1459 AM = cast<MaskedStoreSDNode>(Op)->getAddressingMode(); 1460 break; 1461 default: 1462 llvm_unreachable("Unexpected Opcode for Imm7Offset"); 1463 } 1464 1465 int RHSC; 1466 // 7 bit constant, shifted by Shift. 1467 if (isScaledConstantInRange(N, 1 << Shift, 0, 0x80, RHSC)) { 1468 OffImm = 1469 ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC)) 1470 ? CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32) 1471 : CurDAG->getTargetConstant(-RHSC * (1 << Shift), SDLoc(N), 1472 MVT::i32); 1473 return true; 1474 } 1475 return false; 1476 } 1477 1478 template <int Min, int Max> 1479 bool ARMDAGToDAGISel::SelectImmediateInRange(SDValue N, SDValue &OffImm) { 1480 int Val; 1481 if (isScaledConstantInRange(N, 1, Min, Max, Val)) { 1482 OffImm = CurDAG->getTargetConstant(Val, SDLoc(N), MVT::i32); 1483 return true; 1484 } 1485 return false; 1486 } 1487 1488 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N, 1489 SDValue &Base, 1490 SDValue &OffReg, SDValue &ShImm) { 1491 // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12. 1492 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) 1493 return false; 1494 1495 // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8. 1496 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1497 int RHSC = (int)RHS->getZExtValue(); 1498 if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned) 1499 return false; 1500 else if (RHSC < 0 && RHSC >= -255) // 8 bits 1501 return false; 1502 } 1503 1504 // Look for (R + R) or (R + (R << [1,2,3])). 1505 unsigned ShAmt = 0; 1506 Base = N.getOperand(0); 1507 OffReg = N.getOperand(1); 1508 1509 // Swap if it is ((R << c) + R). 1510 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode()); 1511 if (ShOpcVal != ARM_AM::lsl) { 1512 ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode()); 1513 if (ShOpcVal == ARM_AM::lsl) 1514 std::swap(Base, OffReg); 1515 } 1516 1517 if (ShOpcVal == ARM_AM::lsl) { 1518 // Check to see if the RHS of the shift is a constant, if not, we can't fold 1519 // it. 1520 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) { 1521 ShAmt = Sh->getZExtValue(); 1522 if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt)) 1523 OffReg = OffReg.getOperand(0); 1524 else { 1525 ShAmt = 0; 1526 } 1527 } 1528 } 1529 1530 // If OffReg is a multiply-by-constant and it's profitable to extract a shift 1531 // and use it in a shifted operand do so. 1532 if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) { 1533 unsigned PowerOfTwo = 0; 1534 SDValue NewMulConst; 1535 if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) { 1536 HandleSDNode Handle(OffReg); 1537 replaceDAGValue(OffReg.getOperand(1), NewMulConst); 1538 OffReg = Handle.getValue(); 1539 ShAmt = PowerOfTwo; 1540 } 1541 } 1542 1543 ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32); 1544 1545 return true; 1546 } 1547 1548 bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base, 1549 SDValue &OffImm) { 1550 // This *must* succeed since it's used for the irreplaceable ldrex and strex 1551 // instructions. 1552 Base = N; 1553 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1554 1555 if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N)) 1556 return true; 1557 1558 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 1559 if (!RHS) 1560 return true; 1561 1562 uint32_t RHSC = (int)RHS->getZExtValue(); 1563 if (RHSC > 1020 || RHSC % 4 != 0) 1564 return true; 1565 1566 Base = N.getOperand(0); 1567 if (Base.getOpcode() == ISD::FrameIndex) { 1568 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1569 Base = CurDAG->getTargetFrameIndex( 1570 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1571 } 1572 1573 OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32); 1574 return true; 1575 } 1576 1577 //===--------------------------------------------------------------------===// 1578 1579 /// getAL - Returns a ARMCC::AL immediate node. 1580 static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) { 1581 return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32); 1582 } 1583 1584 void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) { 1585 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); 1586 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp}); 1587 } 1588 1589 bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) { 1590 LoadSDNode *LD = cast<LoadSDNode>(N); 1591 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1592 if (AM == ISD::UNINDEXED) 1593 return false; 1594 1595 EVT LoadedVT = LD->getMemoryVT(); 1596 SDValue Offset, AMOpc; 1597 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1598 unsigned Opcode = 0; 1599 bool Match = false; 1600 if (LoadedVT == MVT::i32 && isPre && 1601 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) { 1602 Opcode = ARM::LDR_PRE_IMM; 1603 Match = true; 1604 } else if (LoadedVT == MVT::i32 && !isPre && 1605 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) { 1606 Opcode = ARM::LDR_POST_IMM; 1607 Match = true; 1608 } else if (LoadedVT == MVT::i32 && 1609 SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) { 1610 Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG; 1611 Match = true; 1612 1613 } else if (LoadedVT == MVT::i16 && 1614 SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) { 1615 Match = true; 1616 Opcode = (LD->getExtensionType() == ISD::SEXTLOAD) 1617 ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST) 1618 : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST); 1619 } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) { 1620 if (LD->getExtensionType() == ISD::SEXTLOAD) { 1621 if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) { 1622 Match = true; 1623 Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST; 1624 } 1625 } else { 1626 if (isPre && 1627 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) { 1628 Match = true; 1629 Opcode = ARM::LDRB_PRE_IMM; 1630 } else if (!isPre && 1631 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) { 1632 Match = true; 1633 Opcode = ARM::LDRB_POST_IMM; 1634 } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) { 1635 Match = true; 1636 Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG; 1637 } 1638 } 1639 } 1640 1641 if (Match) { 1642 if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) { 1643 SDValue Chain = LD->getChain(); 1644 SDValue Base = LD->getBasePtr(); 1645 SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)), 1646 CurDAG->getRegister(0, MVT::i32), Chain }; 1647 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, 1648 MVT::Other, Ops); 1649 transferMemOperands(N, New); 1650 ReplaceNode(N, New); 1651 return true; 1652 } else { 1653 SDValue Chain = LD->getChain(); 1654 SDValue Base = LD->getBasePtr(); 1655 SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)), 1656 CurDAG->getRegister(0, MVT::i32), Chain }; 1657 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, 1658 MVT::Other, Ops); 1659 transferMemOperands(N, New); 1660 ReplaceNode(N, New); 1661 return true; 1662 } 1663 } 1664 1665 return false; 1666 } 1667 1668 bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) { 1669 LoadSDNode *LD = cast<LoadSDNode>(N); 1670 EVT LoadedVT = LD->getMemoryVT(); 1671 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1672 if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD || 1673 LoadedVT.getSimpleVT().SimpleTy != MVT::i32) 1674 return false; 1675 1676 auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset()); 1677 if (!COffs || COffs->getZExtValue() != 4) 1678 return false; 1679 1680 // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}. 1681 // The encoding of LDM is not how the rest of ISel expects a post-inc load to 1682 // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after 1683 // ISel. 1684 SDValue Chain = LD->getChain(); 1685 SDValue Base = LD->getBasePtr(); 1686 SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)), 1687 CurDAG->getRegister(0, MVT::i32), Chain }; 1688 SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32, 1689 MVT::i32, MVT::Other, Ops); 1690 transferMemOperands(N, New); 1691 ReplaceNode(N, New); 1692 return true; 1693 } 1694 1695 bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) { 1696 LoadSDNode *LD = cast<LoadSDNode>(N); 1697 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1698 if (AM == ISD::UNINDEXED) 1699 return false; 1700 1701 EVT LoadedVT = LD->getMemoryVT(); 1702 bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; 1703 SDValue Offset; 1704 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1705 unsigned Opcode = 0; 1706 bool Match = false; 1707 if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) { 1708 switch (LoadedVT.getSimpleVT().SimpleTy) { 1709 case MVT::i32: 1710 Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST; 1711 break; 1712 case MVT::i16: 1713 if (isSExtLd) 1714 Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST; 1715 else 1716 Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST; 1717 break; 1718 case MVT::i8: 1719 case MVT::i1: 1720 if (isSExtLd) 1721 Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST; 1722 else 1723 Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST; 1724 break; 1725 default: 1726 return false; 1727 } 1728 Match = true; 1729 } 1730 1731 if (Match) { 1732 SDValue Chain = LD->getChain(); 1733 SDValue Base = LD->getBasePtr(); 1734 SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)), 1735 CurDAG->getRegister(0, MVT::i32), Chain }; 1736 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, 1737 MVT::Other, Ops); 1738 transferMemOperands(N, New); 1739 ReplaceNode(N, New); 1740 return true; 1741 } 1742 1743 return false; 1744 } 1745 1746 bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) { 1747 EVT LoadedVT; 1748 unsigned Opcode = 0; 1749 bool isSExtLd, isPre; 1750 Align Alignment; 1751 ARMVCC::VPTCodes Pred; 1752 SDValue PredReg; 1753 SDValue Chain, Base, Offset; 1754 1755 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 1756 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1757 if (AM == ISD::UNINDEXED) 1758 return false; 1759 LoadedVT = LD->getMemoryVT(); 1760 if (!LoadedVT.isVector()) 1761 return false; 1762 1763 Chain = LD->getChain(); 1764 Base = LD->getBasePtr(); 1765 Offset = LD->getOffset(); 1766 Alignment = LD->getAlign(); 1767 isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; 1768 isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1769 Pred = ARMVCC::None; 1770 PredReg = CurDAG->getRegister(0, MVT::i32); 1771 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) { 1772 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1773 if (AM == ISD::UNINDEXED) 1774 return false; 1775 LoadedVT = LD->getMemoryVT(); 1776 if (!LoadedVT.isVector()) 1777 return false; 1778 1779 Chain = LD->getChain(); 1780 Base = LD->getBasePtr(); 1781 Offset = LD->getOffset(); 1782 Alignment = LD->getAlign(); 1783 isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; 1784 isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1785 Pred = ARMVCC::Then; 1786 PredReg = LD->getMask(); 1787 } else 1788 llvm_unreachable("Expected a Load or a Masked Load!"); 1789 1790 // We allow LE non-masked loads to change the type (for example use a vldrb.8 1791 // as opposed to a vldrw.32). This can allow extra addressing modes or 1792 // alignments for what is otherwise an equivalent instruction. 1793 bool CanChangeType = Subtarget->isLittle() && !isa<MaskedLoadSDNode>(N); 1794 1795 SDValue NewOffset; 1796 if (Alignment >= Align(2) && LoadedVT == MVT::v4i16 && 1797 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1)) { 1798 if (isSExtLd) 1799 Opcode = isPre ? ARM::MVE_VLDRHS32_pre : ARM::MVE_VLDRHS32_post; 1800 else 1801 Opcode = isPre ? ARM::MVE_VLDRHU32_pre : ARM::MVE_VLDRHU32_post; 1802 } else if (LoadedVT == MVT::v8i8 && 1803 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) { 1804 if (isSExtLd) 1805 Opcode = isPre ? ARM::MVE_VLDRBS16_pre : ARM::MVE_VLDRBS16_post; 1806 else 1807 Opcode = isPre ? ARM::MVE_VLDRBU16_pre : ARM::MVE_VLDRBU16_post; 1808 } else if (LoadedVT == MVT::v4i8 && 1809 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) { 1810 if (isSExtLd) 1811 Opcode = isPre ? ARM::MVE_VLDRBS32_pre : ARM::MVE_VLDRBS32_post; 1812 else 1813 Opcode = isPre ? ARM::MVE_VLDRBU32_pre : ARM::MVE_VLDRBU32_post; 1814 } else if (Alignment >= Align(4) && 1815 (CanChangeType || LoadedVT == MVT::v4i32 || 1816 LoadedVT == MVT::v4f32) && 1817 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 2)) 1818 Opcode = isPre ? ARM::MVE_VLDRWU32_pre : ARM::MVE_VLDRWU32_post; 1819 else if (Alignment >= Align(2) && 1820 (CanChangeType || LoadedVT == MVT::v8i16 || 1821 LoadedVT == MVT::v8f16) && 1822 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1)) 1823 Opcode = isPre ? ARM::MVE_VLDRHU16_pre : ARM::MVE_VLDRHU16_post; 1824 else if ((CanChangeType || LoadedVT == MVT::v16i8) && 1825 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) 1826 Opcode = isPre ? ARM::MVE_VLDRBU8_pre : ARM::MVE_VLDRBU8_post; 1827 else 1828 return false; 1829 1830 SDValue Ops[] = {Base, 1831 NewOffset, 1832 CurDAG->getTargetConstant(Pred, SDLoc(N), MVT::i32), 1833 PredReg, 1834 CurDAG->getRegister(0, MVT::i32), // tp_reg 1835 Chain}; 1836 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, 1837 N->getValueType(0), MVT::Other, Ops); 1838 transferMemOperands(N, New); 1839 ReplaceUses(SDValue(N, 0), SDValue(New, 1)); 1840 ReplaceUses(SDValue(N, 1), SDValue(New, 0)); 1841 ReplaceUses(SDValue(N, 2), SDValue(New, 2)); 1842 CurDAG->RemoveDeadNode(N); 1843 return true; 1844 } 1845 1846 /// Form a GPRPair pseudo register from a pair of GPR regs. 1847 SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) { 1848 SDLoc dl(V0.getNode()); 1849 SDValue RegClass = 1850 CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32); 1851 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32); 1852 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32); 1853 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1854 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1855 } 1856 1857 /// Form a D register from a pair of S registers. 1858 SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1859 SDLoc dl(V0.getNode()); 1860 SDValue RegClass = 1861 CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32); 1862 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32); 1863 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32); 1864 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1865 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1866 } 1867 1868 /// Form a quad register from a pair of D registers. 1869 SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1870 SDLoc dl(V0.getNode()); 1871 SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl, 1872 MVT::i32); 1873 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32); 1874 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32); 1875 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1876 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1877 } 1878 1879 /// Form 4 consecutive D registers from a pair of Q registers. 1880 SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1881 SDLoc dl(V0.getNode()); 1882 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl, 1883 MVT::i32); 1884 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32); 1885 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32); 1886 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1887 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1888 } 1889 1890 /// Form 4 consecutive S registers. 1891 SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, 1892 SDValue V2, SDValue V3) { 1893 SDLoc dl(V0.getNode()); 1894 SDValue RegClass = 1895 CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32); 1896 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32); 1897 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32); 1898 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32); 1899 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32); 1900 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1901 V2, SubReg2, V3, SubReg3 }; 1902 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1903 } 1904 1905 /// Form 4 consecutive D registers. 1906 SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, 1907 SDValue V2, SDValue V3) { 1908 SDLoc dl(V0.getNode()); 1909 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl, 1910 MVT::i32); 1911 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32); 1912 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32); 1913 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32); 1914 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32); 1915 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1916 V2, SubReg2, V3, SubReg3 }; 1917 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1918 } 1919 1920 /// Form 4 consecutive Q registers. 1921 SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, 1922 SDValue V2, SDValue V3) { 1923 SDLoc dl(V0.getNode()); 1924 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl, 1925 MVT::i32); 1926 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32); 1927 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32); 1928 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32); 1929 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32); 1930 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1931 V2, SubReg2, V3, SubReg3 }; 1932 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1933 } 1934 1935 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand 1936 /// of a NEON VLD or VST instruction. The supported values depend on the 1937 /// number of registers being loaded. 1938 SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl, 1939 unsigned NumVecs, bool is64BitVector) { 1940 unsigned NumRegs = NumVecs; 1941 if (!is64BitVector && NumVecs < 3) 1942 NumRegs *= 2; 1943 1944 unsigned Alignment = Align->getAsZExtVal(); 1945 if (Alignment >= 32 && NumRegs == 4) 1946 Alignment = 32; 1947 else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4)) 1948 Alignment = 16; 1949 else if (Alignment >= 8) 1950 Alignment = 8; 1951 else 1952 Alignment = 0; 1953 1954 return CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 1955 } 1956 1957 static bool isVLDfixed(unsigned Opc) 1958 { 1959 switch (Opc) { 1960 default: return false; 1961 case ARM::VLD1d8wb_fixed : return true; 1962 case ARM::VLD1d16wb_fixed : return true; 1963 case ARM::VLD1d64Qwb_fixed : return true; 1964 case ARM::VLD1d32wb_fixed : return true; 1965 case ARM::VLD1d64wb_fixed : return true; 1966 case ARM::VLD1d8TPseudoWB_fixed : return true; 1967 case ARM::VLD1d16TPseudoWB_fixed : return true; 1968 case ARM::VLD1d32TPseudoWB_fixed : return true; 1969 case ARM::VLD1d64TPseudoWB_fixed : return true; 1970 case ARM::VLD1d8QPseudoWB_fixed : return true; 1971 case ARM::VLD1d16QPseudoWB_fixed : return true; 1972 case ARM::VLD1d32QPseudoWB_fixed : return true; 1973 case ARM::VLD1d64QPseudoWB_fixed : return true; 1974 case ARM::VLD1q8wb_fixed : return true; 1975 case ARM::VLD1q16wb_fixed : return true; 1976 case ARM::VLD1q32wb_fixed : return true; 1977 case ARM::VLD1q64wb_fixed : return true; 1978 case ARM::VLD1DUPd8wb_fixed : return true; 1979 case ARM::VLD1DUPd16wb_fixed : return true; 1980 case ARM::VLD1DUPd32wb_fixed : return true; 1981 case ARM::VLD1DUPq8wb_fixed : return true; 1982 case ARM::VLD1DUPq16wb_fixed : return true; 1983 case ARM::VLD1DUPq32wb_fixed : return true; 1984 case ARM::VLD2d8wb_fixed : return true; 1985 case ARM::VLD2d16wb_fixed : return true; 1986 case ARM::VLD2d32wb_fixed : return true; 1987 case ARM::VLD2q8PseudoWB_fixed : return true; 1988 case ARM::VLD2q16PseudoWB_fixed : return true; 1989 case ARM::VLD2q32PseudoWB_fixed : return true; 1990 case ARM::VLD2DUPd8wb_fixed : return true; 1991 case ARM::VLD2DUPd16wb_fixed : return true; 1992 case ARM::VLD2DUPd32wb_fixed : return true; 1993 case ARM::VLD2DUPq8OddPseudoWB_fixed: return true; 1994 case ARM::VLD2DUPq16OddPseudoWB_fixed: return true; 1995 case ARM::VLD2DUPq32OddPseudoWB_fixed: return true; 1996 } 1997 } 1998 1999 static bool isVSTfixed(unsigned Opc) 2000 { 2001 switch (Opc) { 2002 default: return false; 2003 case ARM::VST1d8wb_fixed : return true; 2004 case ARM::VST1d16wb_fixed : return true; 2005 case ARM::VST1d32wb_fixed : return true; 2006 case ARM::VST1d64wb_fixed : return true; 2007 case ARM::VST1q8wb_fixed : return true; 2008 case ARM::VST1q16wb_fixed : return true; 2009 case ARM::VST1q32wb_fixed : return true; 2010 case ARM::VST1q64wb_fixed : return true; 2011 case ARM::VST1d8TPseudoWB_fixed : return true; 2012 case ARM::VST1d16TPseudoWB_fixed : return true; 2013 case ARM::VST1d32TPseudoWB_fixed : return true; 2014 case ARM::VST1d64TPseudoWB_fixed : return true; 2015 case ARM::VST1d8QPseudoWB_fixed : return true; 2016 case ARM::VST1d16QPseudoWB_fixed : return true; 2017 case ARM::VST1d32QPseudoWB_fixed : return true; 2018 case ARM::VST1d64QPseudoWB_fixed : return true; 2019 case ARM::VST2d8wb_fixed : return true; 2020 case ARM::VST2d16wb_fixed : return true; 2021 case ARM::VST2d32wb_fixed : return true; 2022 case ARM::VST2q8PseudoWB_fixed : return true; 2023 case ARM::VST2q16PseudoWB_fixed : return true; 2024 case ARM::VST2q32PseudoWB_fixed : return true; 2025 } 2026 } 2027 2028 // Get the register stride update opcode of a VLD/VST instruction that 2029 // is otherwise equivalent to the given fixed stride updating instruction. 2030 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) { 2031 assert((isVLDfixed(Opc) || isVSTfixed(Opc)) 2032 && "Incorrect fixed stride updating instruction."); 2033 switch (Opc) { 2034 default: break; 2035 case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register; 2036 case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register; 2037 case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register; 2038 case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register; 2039 case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register; 2040 case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register; 2041 case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register; 2042 case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register; 2043 case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register; 2044 case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register; 2045 case ARM::VLD1d8TPseudoWB_fixed: return ARM::VLD1d8TPseudoWB_register; 2046 case ARM::VLD1d16TPseudoWB_fixed: return ARM::VLD1d16TPseudoWB_register; 2047 case ARM::VLD1d32TPseudoWB_fixed: return ARM::VLD1d32TPseudoWB_register; 2048 case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register; 2049 case ARM::VLD1d8QPseudoWB_fixed: return ARM::VLD1d8QPseudoWB_register; 2050 case ARM::VLD1d16QPseudoWB_fixed: return ARM::VLD1d16QPseudoWB_register; 2051 case ARM::VLD1d32QPseudoWB_fixed: return ARM::VLD1d32QPseudoWB_register; 2052 case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register; 2053 case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register; 2054 case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register; 2055 case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register; 2056 case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register; 2057 case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register; 2058 case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register; 2059 case ARM::VLD2DUPq8OddPseudoWB_fixed: return ARM::VLD2DUPq8OddPseudoWB_register; 2060 case ARM::VLD2DUPq16OddPseudoWB_fixed: return ARM::VLD2DUPq16OddPseudoWB_register; 2061 case ARM::VLD2DUPq32OddPseudoWB_fixed: return ARM::VLD2DUPq32OddPseudoWB_register; 2062 2063 case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register; 2064 case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register; 2065 case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register; 2066 case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register; 2067 case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register; 2068 case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register; 2069 case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register; 2070 case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register; 2071 case ARM::VST1d8TPseudoWB_fixed: return ARM::VST1d8TPseudoWB_register; 2072 case ARM::VST1d16TPseudoWB_fixed: return ARM::VST1d16TPseudoWB_register; 2073 case ARM::VST1d32TPseudoWB_fixed: return ARM::VST1d32TPseudoWB_register; 2074 case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register; 2075 case ARM::VST1d8QPseudoWB_fixed: return ARM::VST1d8QPseudoWB_register; 2076 case ARM::VST1d16QPseudoWB_fixed: return ARM::VST1d16QPseudoWB_register; 2077 case ARM::VST1d32QPseudoWB_fixed: return ARM::VST1d32QPseudoWB_register; 2078 case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register; 2079 2080 case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register; 2081 case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register; 2082 case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register; 2083 case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register; 2084 case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register; 2085 case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register; 2086 2087 case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register; 2088 case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register; 2089 case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register; 2090 case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register; 2091 case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register; 2092 case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register; 2093 2094 case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register; 2095 case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register; 2096 case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register; 2097 } 2098 return Opc; // If not one we handle, return it unchanged. 2099 } 2100 2101 /// Returns true if the given increment is a Constant known to be equal to the 2102 /// access size performed by a NEON load/store. This means the "[rN]!" form can 2103 /// be used. 2104 static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) { 2105 auto C = dyn_cast<ConstantSDNode>(Inc); 2106 return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs; 2107 } 2108 2109 void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, 2110 const uint16_t *DOpcodes, 2111 const uint16_t *QOpcodes0, 2112 const uint16_t *QOpcodes1) { 2113 assert(Subtarget->hasNEON()); 2114 assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range"); 2115 SDLoc dl(N); 2116 2117 SDValue MemAddr, Align; 2118 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating 2119 // nodes are not intrinsics. 2120 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2121 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2122 return; 2123 2124 SDValue Chain = N->getOperand(0); 2125 EVT VT = N->getValueType(0); 2126 bool is64BitVector = VT.is64BitVector(); 2127 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector); 2128 2129 unsigned OpcodeIndex; 2130 switch (VT.getSimpleVT().SimpleTy) { 2131 default: llvm_unreachable("unhandled vld type"); 2132 // Double-register operations: 2133 case MVT::v8i8: OpcodeIndex = 0; break; 2134 case MVT::v4f16: 2135 case MVT::v4bf16: 2136 case MVT::v4i16: OpcodeIndex = 1; break; 2137 case MVT::v2f32: 2138 case MVT::v2i32: OpcodeIndex = 2; break; 2139 case MVT::v1i64: OpcodeIndex = 3; break; 2140 // Quad-register operations: 2141 case MVT::v16i8: OpcodeIndex = 0; break; 2142 case MVT::v8f16: 2143 case MVT::v8bf16: 2144 case MVT::v8i16: OpcodeIndex = 1; break; 2145 case MVT::v4f32: 2146 case MVT::v4i32: OpcodeIndex = 2; break; 2147 case MVT::v2f64: 2148 case MVT::v2i64: OpcodeIndex = 3; break; 2149 } 2150 2151 EVT ResTy; 2152 if (NumVecs == 1) 2153 ResTy = VT; 2154 else { 2155 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 2156 if (!is64BitVector) 2157 ResTyElts *= 2; 2158 ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts); 2159 } 2160 std::vector<EVT> ResTys; 2161 ResTys.push_back(ResTy); 2162 if (isUpdating) 2163 ResTys.push_back(MVT::i32); 2164 ResTys.push_back(MVT::Other); 2165 2166 SDValue Pred = getAL(CurDAG, dl); 2167 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2168 SDNode *VLd; 2169 SmallVector<SDValue, 7> Ops; 2170 2171 // Double registers and VLD1/VLD2 quad registers are directly supported. 2172 if (is64BitVector || NumVecs <= 2) { 2173 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 2174 QOpcodes0[OpcodeIndex]); 2175 Ops.push_back(MemAddr); 2176 Ops.push_back(Align); 2177 if (isUpdating) { 2178 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2179 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs); 2180 if (!IsImmUpdate) { 2181 // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so 2182 // check for the opcode rather than the number of vector elements. 2183 if (isVLDfixed(Opc)) 2184 Opc = getVLDSTRegisterUpdateOpcode(Opc); 2185 Ops.push_back(Inc); 2186 // VLD1/VLD2 fixed increment does not need Reg0 so only include it in 2187 // the operands if not such an opcode. 2188 } else if (!isVLDfixed(Opc)) 2189 Ops.push_back(Reg0); 2190 } 2191 Ops.push_back(Pred); 2192 Ops.push_back(Reg0); 2193 Ops.push_back(Chain); 2194 VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2195 2196 } else { 2197 // Otherwise, quad registers are loaded with two separate instructions, 2198 // where one loads the even registers and the other loads the odd registers. 2199 EVT AddrTy = MemAddr.getValueType(); 2200 2201 // Load the even subregs. This is always an updating load, so that it 2202 // provides the address to the second load for the odd subregs. 2203 SDValue ImplDef = 2204 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0); 2205 const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain }; 2206 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, 2207 ResTy, AddrTy, MVT::Other, OpsA); 2208 Chain = SDValue(VLdA, 2); 2209 2210 // Load the odd subregs. 2211 Ops.push_back(SDValue(VLdA, 1)); 2212 Ops.push_back(Align); 2213 if (isUpdating) { 2214 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2215 assert(isa<ConstantSDNode>(Inc.getNode()) && 2216 "only constant post-increment update allowed for VLD3/4"); 2217 (void)Inc; 2218 Ops.push_back(Reg0); 2219 } 2220 Ops.push_back(SDValue(VLdA, 0)); 2221 Ops.push_back(Pred); 2222 Ops.push_back(Reg0); 2223 Ops.push_back(Chain); 2224 VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops); 2225 } 2226 2227 // Transfer memoperands. 2228 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2229 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLd), {MemOp}); 2230 2231 if (NumVecs == 1) { 2232 ReplaceNode(N, VLd); 2233 return; 2234 } 2235 2236 // Extract out the subregisters. 2237 SDValue SuperReg = SDValue(VLd, 0); 2238 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 && 2239 ARM::qsub_3 == ARM::qsub_0 + 3, 2240 "Unexpected subreg numbering"); 2241 unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0); 2242 for (unsigned Vec = 0; Vec < NumVecs; ++Vec) 2243 ReplaceUses(SDValue(N, Vec), 2244 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg)); 2245 ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1)); 2246 if (isUpdating) 2247 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2)); 2248 CurDAG->RemoveDeadNode(N); 2249 } 2250 2251 void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, 2252 const uint16_t *DOpcodes, 2253 const uint16_t *QOpcodes0, 2254 const uint16_t *QOpcodes1) { 2255 assert(Subtarget->hasNEON()); 2256 assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range"); 2257 SDLoc dl(N); 2258 2259 SDValue MemAddr, Align; 2260 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating 2261 // nodes are not intrinsics. 2262 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2263 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1) 2264 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2265 return; 2266 2267 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2268 2269 SDValue Chain = N->getOperand(0); 2270 EVT VT = N->getOperand(Vec0Idx).getValueType(); 2271 bool is64BitVector = VT.is64BitVector(); 2272 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector); 2273 2274 unsigned OpcodeIndex; 2275 switch (VT.getSimpleVT().SimpleTy) { 2276 default: llvm_unreachable("unhandled vst type"); 2277 // Double-register operations: 2278 case MVT::v8i8: OpcodeIndex = 0; break; 2279 case MVT::v4f16: 2280 case MVT::v4bf16: 2281 case MVT::v4i16: OpcodeIndex = 1; break; 2282 case MVT::v2f32: 2283 case MVT::v2i32: OpcodeIndex = 2; break; 2284 case MVT::v1i64: OpcodeIndex = 3; break; 2285 // Quad-register operations: 2286 case MVT::v16i8: OpcodeIndex = 0; break; 2287 case MVT::v8f16: 2288 case MVT::v8bf16: 2289 case MVT::v8i16: OpcodeIndex = 1; break; 2290 case MVT::v4f32: 2291 case MVT::v4i32: OpcodeIndex = 2; break; 2292 case MVT::v2f64: 2293 case MVT::v2i64: OpcodeIndex = 3; break; 2294 } 2295 2296 std::vector<EVT> ResTys; 2297 if (isUpdating) 2298 ResTys.push_back(MVT::i32); 2299 ResTys.push_back(MVT::Other); 2300 2301 SDValue Pred = getAL(CurDAG, dl); 2302 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2303 SmallVector<SDValue, 7> Ops; 2304 2305 // Double registers and VST1/VST2 quad registers are directly supported. 2306 if (is64BitVector || NumVecs <= 2) { 2307 SDValue SrcReg; 2308 if (NumVecs == 1) { 2309 SrcReg = N->getOperand(Vec0Idx); 2310 } else if (is64BitVector) { 2311 // Form a REG_SEQUENCE to force register allocation. 2312 SDValue V0 = N->getOperand(Vec0Idx + 0); 2313 SDValue V1 = N->getOperand(Vec0Idx + 1); 2314 if (NumVecs == 2) 2315 SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0); 2316 else { 2317 SDValue V2 = N->getOperand(Vec0Idx + 2); 2318 // If it's a vst3, form a quad D-register and leave the last part as 2319 // an undef. 2320 SDValue V3 = (NumVecs == 3) 2321 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) 2322 : N->getOperand(Vec0Idx + 3); 2323 SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0); 2324 } 2325 } else { 2326 // Form a QQ register. 2327 SDValue Q0 = N->getOperand(Vec0Idx); 2328 SDValue Q1 = N->getOperand(Vec0Idx + 1); 2329 SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0); 2330 } 2331 2332 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 2333 QOpcodes0[OpcodeIndex]); 2334 Ops.push_back(MemAddr); 2335 Ops.push_back(Align); 2336 if (isUpdating) { 2337 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2338 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs); 2339 if (!IsImmUpdate) { 2340 // We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so 2341 // check for the opcode rather than the number of vector elements. 2342 if (isVSTfixed(Opc)) 2343 Opc = getVLDSTRegisterUpdateOpcode(Opc); 2344 Ops.push_back(Inc); 2345 } 2346 // VST1/VST2 fixed increment does not need Reg0 so only include it in 2347 // the operands if not such an opcode. 2348 else if (!isVSTfixed(Opc)) 2349 Ops.push_back(Reg0); 2350 } 2351 Ops.push_back(SrcReg); 2352 Ops.push_back(Pred); 2353 Ops.push_back(Reg0); 2354 Ops.push_back(Chain); 2355 SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2356 2357 // Transfer memoperands. 2358 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VSt), {MemOp}); 2359 2360 ReplaceNode(N, VSt); 2361 return; 2362 } 2363 2364 // Otherwise, quad registers are stored with two separate instructions, 2365 // where one stores the even registers and the other stores the odd registers. 2366 2367 // Form the QQQQ REG_SEQUENCE. 2368 SDValue V0 = N->getOperand(Vec0Idx + 0); 2369 SDValue V1 = N->getOperand(Vec0Idx + 1); 2370 SDValue V2 = N->getOperand(Vec0Idx + 2); 2371 SDValue V3 = (NumVecs == 3) 2372 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) 2373 : N->getOperand(Vec0Idx + 3); 2374 SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0); 2375 2376 // Store the even D registers. This is always an updating store, so that it 2377 // provides the address to the second store for the odd subregs. 2378 const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain }; 2379 SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, 2380 MemAddr.getValueType(), 2381 MVT::Other, OpsA); 2382 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStA), {MemOp}); 2383 Chain = SDValue(VStA, 1); 2384 2385 // Store the odd D registers. 2386 Ops.push_back(SDValue(VStA, 0)); 2387 Ops.push_back(Align); 2388 if (isUpdating) { 2389 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2390 assert(isa<ConstantSDNode>(Inc.getNode()) && 2391 "only constant post-increment update allowed for VST3/4"); 2392 (void)Inc; 2393 Ops.push_back(Reg0); 2394 } 2395 Ops.push_back(RegSeq); 2396 Ops.push_back(Pred); 2397 Ops.push_back(Reg0); 2398 Ops.push_back(Chain); 2399 SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, 2400 Ops); 2401 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStB), {MemOp}); 2402 ReplaceNode(N, VStB); 2403 } 2404 2405 void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating, 2406 unsigned NumVecs, 2407 const uint16_t *DOpcodes, 2408 const uint16_t *QOpcodes) { 2409 assert(Subtarget->hasNEON()); 2410 assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range"); 2411 SDLoc dl(N); 2412 2413 SDValue MemAddr, Align; 2414 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating 2415 // nodes are not intrinsics. 2416 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2417 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1) 2418 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2419 return; 2420 2421 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2422 2423 SDValue Chain = N->getOperand(0); 2424 unsigned Lane = N->getConstantOperandVal(Vec0Idx + NumVecs); 2425 EVT VT = N->getOperand(Vec0Idx).getValueType(); 2426 bool is64BitVector = VT.is64BitVector(); 2427 2428 unsigned Alignment = 0; 2429 if (NumVecs != 3) { 2430 Alignment = Align->getAsZExtVal(); 2431 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8; 2432 if (Alignment > NumBytes) 2433 Alignment = NumBytes; 2434 if (Alignment < 8 && Alignment < NumBytes) 2435 Alignment = 0; 2436 // Alignment must be a power of two; make sure of that. 2437 Alignment = (Alignment & -Alignment); 2438 if (Alignment == 1) 2439 Alignment = 0; 2440 } 2441 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 2442 2443 unsigned OpcodeIndex; 2444 switch (VT.getSimpleVT().SimpleTy) { 2445 default: llvm_unreachable("unhandled vld/vst lane type"); 2446 // Double-register operations: 2447 case MVT::v8i8: OpcodeIndex = 0; break; 2448 case MVT::v4f16: 2449 case MVT::v4bf16: 2450 case MVT::v4i16: OpcodeIndex = 1; break; 2451 case MVT::v2f32: 2452 case MVT::v2i32: OpcodeIndex = 2; break; 2453 // Quad-register operations: 2454 case MVT::v8f16: 2455 case MVT::v8bf16: 2456 case MVT::v8i16: OpcodeIndex = 0; break; 2457 case MVT::v4f32: 2458 case MVT::v4i32: OpcodeIndex = 1; break; 2459 } 2460 2461 std::vector<EVT> ResTys; 2462 if (IsLoad) { 2463 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 2464 if (!is64BitVector) 2465 ResTyElts *= 2; 2466 ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), 2467 MVT::i64, ResTyElts)); 2468 } 2469 if (isUpdating) 2470 ResTys.push_back(MVT::i32); 2471 ResTys.push_back(MVT::Other); 2472 2473 SDValue Pred = getAL(CurDAG, dl); 2474 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2475 2476 SmallVector<SDValue, 8> Ops; 2477 Ops.push_back(MemAddr); 2478 Ops.push_back(Align); 2479 if (isUpdating) { 2480 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2481 bool IsImmUpdate = 2482 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs); 2483 Ops.push_back(IsImmUpdate ? Reg0 : Inc); 2484 } 2485 2486 SDValue SuperReg; 2487 SDValue V0 = N->getOperand(Vec0Idx + 0); 2488 SDValue V1 = N->getOperand(Vec0Idx + 1); 2489 if (NumVecs == 2) { 2490 if (is64BitVector) 2491 SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0); 2492 else 2493 SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0); 2494 } else { 2495 SDValue V2 = N->getOperand(Vec0Idx + 2); 2496 SDValue V3 = (NumVecs == 3) 2497 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) 2498 : N->getOperand(Vec0Idx + 3); 2499 if (is64BitVector) 2500 SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0); 2501 else 2502 SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0); 2503 } 2504 Ops.push_back(SuperReg); 2505 Ops.push_back(getI32Imm(Lane, dl)); 2506 Ops.push_back(Pred); 2507 Ops.push_back(Reg0); 2508 Ops.push_back(Chain); 2509 2510 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 2511 QOpcodes[OpcodeIndex]); 2512 SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2513 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdLn), {MemOp}); 2514 if (!IsLoad) { 2515 ReplaceNode(N, VLdLn); 2516 return; 2517 } 2518 2519 // Extract the subregisters. 2520 SuperReg = SDValue(VLdLn, 0); 2521 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 && 2522 ARM::qsub_3 == ARM::qsub_0 + 3, 2523 "Unexpected subreg numbering"); 2524 unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0; 2525 for (unsigned Vec = 0; Vec < NumVecs; ++Vec) 2526 ReplaceUses(SDValue(N, Vec), 2527 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg)); 2528 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1)); 2529 if (isUpdating) 2530 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2)); 2531 CurDAG->RemoveDeadNode(N); 2532 } 2533 2534 template <typename SDValueVector> 2535 void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 2536 SDValue PredicateMask) { 2537 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32)); 2538 Ops.push_back(PredicateMask); 2539 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg 2540 } 2541 2542 template <typename SDValueVector> 2543 void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 2544 SDValue PredicateMask, 2545 SDValue Inactive) { 2546 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32)); 2547 Ops.push_back(PredicateMask); 2548 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg 2549 Ops.push_back(Inactive); 2550 } 2551 2552 template <typename SDValueVector> 2553 void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc) { 2554 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32)); 2555 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 2556 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg 2557 } 2558 2559 template <typename SDValueVector> 2560 void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 2561 EVT InactiveTy) { 2562 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32)); 2563 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 2564 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg 2565 Ops.push_back(SDValue( 2566 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, InactiveTy), 0)); 2567 } 2568 2569 void ARMDAGToDAGISel::SelectMVE_WB(SDNode *N, const uint16_t *Opcodes, 2570 bool Predicated) { 2571 SDLoc Loc(N); 2572 SmallVector<SDValue, 8> Ops; 2573 2574 uint16_t Opcode; 2575 switch (N->getValueType(1).getVectorElementType().getSizeInBits()) { 2576 case 32: 2577 Opcode = Opcodes[0]; 2578 break; 2579 case 64: 2580 Opcode = Opcodes[1]; 2581 break; 2582 default: 2583 llvm_unreachable("bad vector element size in SelectMVE_WB"); 2584 } 2585 2586 Ops.push_back(N->getOperand(2)); // vector of base addresses 2587 2588 int32_t ImmValue = N->getConstantOperandVal(3); 2589 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate offset 2590 2591 if (Predicated) 2592 AddMVEPredicateToOps(Ops, Loc, N->getOperand(4)); 2593 else 2594 AddEmptyMVEPredicateToOps(Ops, Loc); 2595 2596 Ops.push_back(N->getOperand(0)); // chain 2597 2598 SmallVector<EVT, 8> VTs; 2599 VTs.push_back(N->getValueType(1)); 2600 VTs.push_back(N->getValueType(0)); 2601 VTs.push_back(N->getValueType(2)); 2602 2603 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), VTs, Ops); 2604 ReplaceUses(SDValue(N, 0), SDValue(New, 1)); 2605 ReplaceUses(SDValue(N, 1), SDValue(New, 0)); 2606 ReplaceUses(SDValue(N, 2), SDValue(New, 2)); 2607 transferMemOperands(N, New); 2608 CurDAG->RemoveDeadNode(N); 2609 } 2610 2611 void ARMDAGToDAGISel::SelectMVE_LongShift(SDNode *N, uint16_t Opcode, 2612 bool Immediate, 2613 bool HasSaturationOperand) { 2614 SDLoc Loc(N); 2615 SmallVector<SDValue, 8> Ops; 2616 2617 // Two 32-bit halves of the value to be shifted 2618 Ops.push_back(N->getOperand(1)); 2619 Ops.push_back(N->getOperand(2)); 2620 2621 // The shift count 2622 if (Immediate) { 2623 int32_t ImmValue = N->getConstantOperandVal(3); 2624 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count 2625 } else { 2626 Ops.push_back(N->getOperand(3)); 2627 } 2628 2629 // The immediate saturation operand, if any 2630 if (HasSaturationOperand) { 2631 int32_t SatOp = N->getConstantOperandVal(4); 2632 int SatBit = (SatOp == 64 ? 0 : 1); 2633 Ops.push_back(getI32Imm(SatBit, Loc)); 2634 } 2635 2636 // MVE scalar shifts are IT-predicable, so include the standard 2637 // predicate arguments. 2638 Ops.push_back(getAL(CurDAG, Loc)); 2639 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 2640 2641 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops)); 2642 } 2643 2644 void ARMDAGToDAGISel::SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry, 2645 uint16_t OpcodeWithNoCarry, 2646 bool Add, bool Predicated) { 2647 SDLoc Loc(N); 2648 SmallVector<SDValue, 8> Ops; 2649 uint16_t Opcode; 2650 2651 unsigned FirstInputOp = Predicated ? 2 : 1; 2652 2653 // Two input vectors and the input carry flag 2654 Ops.push_back(N->getOperand(FirstInputOp)); 2655 Ops.push_back(N->getOperand(FirstInputOp + 1)); 2656 SDValue CarryIn = N->getOperand(FirstInputOp + 2); 2657 ConstantSDNode *CarryInConstant = dyn_cast<ConstantSDNode>(CarryIn); 2658 uint32_t CarryMask = 1 << 29; 2659 uint32_t CarryExpected = Add ? 0 : CarryMask; 2660 if (CarryInConstant && 2661 (CarryInConstant->getZExtValue() & CarryMask) == CarryExpected) { 2662 Opcode = OpcodeWithNoCarry; 2663 } else { 2664 Ops.push_back(CarryIn); 2665 Opcode = OpcodeWithCarry; 2666 } 2667 2668 if (Predicated) 2669 AddMVEPredicateToOps(Ops, Loc, 2670 N->getOperand(FirstInputOp + 3), // predicate 2671 N->getOperand(FirstInputOp - 1)); // inactive 2672 else 2673 AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0)); 2674 2675 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops)); 2676 } 2677 2678 void ARMDAGToDAGISel::SelectMVE_VSHLC(SDNode *N, bool Predicated) { 2679 SDLoc Loc(N); 2680 SmallVector<SDValue, 8> Ops; 2681 2682 // One vector input, followed by a 32-bit word of bits to shift in 2683 // and then an immediate shift count 2684 Ops.push_back(N->getOperand(1)); 2685 Ops.push_back(N->getOperand(2)); 2686 int32_t ImmValue = N->getConstantOperandVal(3); 2687 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count 2688 2689 if (Predicated) 2690 AddMVEPredicateToOps(Ops, Loc, N->getOperand(4)); 2691 else 2692 AddEmptyMVEPredicateToOps(Ops, Loc); 2693 2694 CurDAG->SelectNodeTo(N, ARM::MVE_VSHLC, N->getVTList(), ArrayRef(Ops)); 2695 } 2696 2697 static bool SDValueToConstBool(SDValue SDVal) { 2698 assert(isa<ConstantSDNode>(SDVal) && "expected a compile-time constant"); 2699 ConstantSDNode *SDValConstant = dyn_cast<ConstantSDNode>(SDVal); 2700 uint64_t Value = SDValConstant->getZExtValue(); 2701 assert((Value == 0 || Value == 1) && "expected value 0 or 1"); 2702 return Value; 2703 } 2704 2705 void ARMDAGToDAGISel::SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated, 2706 const uint16_t *OpcodesS, 2707 const uint16_t *OpcodesU, 2708 size_t Stride, size_t TySize) { 2709 assert(TySize < Stride && "Invalid TySize"); 2710 bool IsUnsigned = SDValueToConstBool(N->getOperand(1)); 2711 bool IsSub = SDValueToConstBool(N->getOperand(2)); 2712 bool IsExchange = SDValueToConstBool(N->getOperand(3)); 2713 if (IsUnsigned) { 2714 assert(!IsSub && 2715 "Unsigned versions of vmlsldav[a]/vrmlsldavh[a] do not exist"); 2716 assert(!IsExchange && 2717 "Unsigned versions of vmlaldav[a]x/vrmlaldavh[a]x do not exist"); 2718 } 2719 2720 auto OpIsZero = [N](size_t OpNo) { 2721 return isNullConstant(N->getOperand(OpNo)); 2722 }; 2723 2724 // If the input accumulator value is not zero, select an instruction with 2725 // accumulator, otherwise select an instruction without accumulator 2726 bool IsAccum = !(OpIsZero(4) && OpIsZero(5)); 2727 2728 const uint16_t *Opcodes = IsUnsigned ? OpcodesU : OpcodesS; 2729 if (IsSub) 2730 Opcodes += 4 * Stride; 2731 if (IsExchange) 2732 Opcodes += 2 * Stride; 2733 if (IsAccum) 2734 Opcodes += Stride; 2735 uint16_t Opcode = Opcodes[TySize]; 2736 2737 SDLoc Loc(N); 2738 SmallVector<SDValue, 8> Ops; 2739 // Push the accumulator operands, if they are used 2740 if (IsAccum) { 2741 Ops.push_back(N->getOperand(4)); 2742 Ops.push_back(N->getOperand(5)); 2743 } 2744 // Push the two vector operands 2745 Ops.push_back(N->getOperand(6)); 2746 Ops.push_back(N->getOperand(7)); 2747 2748 if (Predicated) 2749 AddMVEPredicateToOps(Ops, Loc, N->getOperand(8)); 2750 else 2751 AddEmptyMVEPredicateToOps(Ops, Loc); 2752 2753 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops)); 2754 } 2755 2756 void ARMDAGToDAGISel::SelectMVE_VMLLDAV(SDNode *N, bool Predicated, 2757 const uint16_t *OpcodesS, 2758 const uint16_t *OpcodesU) { 2759 EVT VecTy = N->getOperand(6).getValueType(); 2760 size_t SizeIndex; 2761 switch (VecTy.getVectorElementType().getSizeInBits()) { 2762 case 16: 2763 SizeIndex = 0; 2764 break; 2765 case 32: 2766 SizeIndex = 1; 2767 break; 2768 default: 2769 llvm_unreachable("bad vector element size"); 2770 } 2771 2772 SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 2, SizeIndex); 2773 } 2774 2775 void ARMDAGToDAGISel::SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated, 2776 const uint16_t *OpcodesS, 2777 const uint16_t *OpcodesU) { 2778 assert( 2779 N->getOperand(6).getValueType().getVectorElementType().getSizeInBits() == 2780 32 && 2781 "bad vector element size"); 2782 SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 1, 0); 2783 } 2784 2785 void ARMDAGToDAGISel::SelectMVE_VLD(SDNode *N, unsigned NumVecs, 2786 const uint16_t *const *Opcodes, 2787 bool HasWriteback) { 2788 EVT VT = N->getValueType(0); 2789 SDLoc Loc(N); 2790 2791 const uint16_t *OurOpcodes; 2792 switch (VT.getVectorElementType().getSizeInBits()) { 2793 case 8: 2794 OurOpcodes = Opcodes[0]; 2795 break; 2796 case 16: 2797 OurOpcodes = Opcodes[1]; 2798 break; 2799 case 32: 2800 OurOpcodes = Opcodes[2]; 2801 break; 2802 default: 2803 llvm_unreachable("bad vector element size in SelectMVE_VLD"); 2804 } 2805 2806 EVT DataTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, NumVecs * 2); 2807 SmallVector<EVT, 4> ResultTys = {DataTy, MVT::Other}; 2808 unsigned PtrOperand = HasWriteback ? 1 : 2; 2809 2810 auto Data = SDValue( 2811 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, DataTy), 0); 2812 SDValue Chain = N->getOperand(0); 2813 // Add a MVE_VLDn instruction for each Vec, except the last 2814 for (unsigned Stage = 0; Stage < NumVecs - 1; ++Stage) { 2815 SDValue Ops[] = {Data, N->getOperand(PtrOperand), Chain}; 2816 auto LoadInst = 2817 CurDAG->getMachineNode(OurOpcodes[Stage], Loc, ResultTys, Ops); 2818 Data = SDValue(LoadInst, 0); 2819 Chain = SDValue(LoadInst, 1); 2820 transferMemOperands(N, LoadInst); 2821 } 2822 // The last may need a writeback on it 2823 if (HasWriteback) 2824 ResultTys = {DataTy, MVT::i32, MVT::Other}; 2825 SDValue Ops[] = {Data, N->getOperand(PtrOperand), Chain}; 2826 auto LoadInst = 2827 CurDAG->getMachineNode(OurOpcodes[NumVecs - 1], Loc, ResultTys, Ops); 2828 transferMemOperands(N, LoadInst); 2829 2830 unsigned i; 2831 for (i = 0; i < NumVecs; i++) 2832 ReplaceUses(SDValue(N, i), 2833 CurDAG->getTargetExtractSubreg(ARM::qsub_0 + i, Loc, VT, 2834 SDValue(LoadInst, 0))); 2835 if (HasWriteback) 2836 ReplaceUses(SDValue(N, i++), SDValue(LoadInst, 1)); 2837 ReplaceUses(SDValue(N, i), SDValue(LoadInst, HasWriteback ? 2 : 1)); 2838 CurDAG->RemoveDeadNode(N); 2839 } 2840 2841 void ARMDAGToDAGISel::SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes, 2842 bool Wrapping, bool Predicated) { 2843 EVT VT = N->getValueType(0); 2844 SDLoc Loc(N); 2845 2846 uint16_t Opcode; 2847 switch (VT.getScalarSizeInBits()) { 2848 case 8: 2849 Opcode = Opcodes[0]; 2850 break; 2851 case 16: 2852 Opcode = Opcodes[1]; 2853 break; 2854 case 32: 2855 Opcode = Opcodes[2]; 2856 break; 2857 default: 2858 llvm_unreachable("bad vector element size in SelectMVE_VxDUP"); 2859 } 2860 2861 SmallVector<SDValue, 8> Ops; 2862 unsigned OpIdx = 1; 2863 2864 SDValue Inactive; 2865 if (Predicated) 2866 Inactive = N->getOperand(OpIdx++); 2867 2868 Ops.push_back(N->getOperand(OpIdx++)); // base 2869 if (Wrapping) 2870 Ops.push_back(N->getOperand(OpIdx++)); // limit 2871 2872 SDValue ImmOp = N->getOperand(OpIdx++); // step 2873 int ImmValue = ImmOp->getAsZExtVal(); 2874 Ops.push_back(getI32Imm(ImmValue, Loc)); 2875 2876 if (Predicated) 2877 AddMVEPredicateToOps(Ops, Loc, N->getOperand(OpIdx), Inactive); 2878 else 2879 AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0)); 2880 2881 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops)); 2882 } 2883 2884 void ARMDAGToDAGISel::SelectCDE_CXxD(SDNode *N, uint16_t Opcode, 2885 size_t NumExtraOps, bool HasAccum) { 2886 bool IsBigEndian = CurDAG->getDataLayout().isBigEndian(); 2887 SDLoc Loc(N); 2888 SmallVector<SDValue, 8> Ops; 2889 2890 unsigned OpIdx = 1; 2891 2892 // Convert and append the immediate operand designating the coprocessor. 2893 SDValue ImmCorpoc = N->getOperand(OpIdx++); 2894 uint32_t ImmCoprocVal = ImmCorpoc->getAsZExtVal(); 2895 Ops.push_back(getI32Imm(ImmCoprocVal, Loc)); 2896 2897 // For accumulating variants copy the low and high order parts of the 2898 // accumulator into a register pair and add it to the operand vector. 2899 if (HasAccum) { 2900 SDValue AccLo = N->getOperand(OpIdx++); 2901 SDValue AccHi = N->getOperand(OpIdx++); 2902 if (IsBigEndian) 2903 std::swap(AccLo, AccHi); 2904 Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, AccLo, AccHi), 0)); 2905 } 2906 2907 // Copy extra operands as-is. 2908 for (size_t I = 0; I < NumExtraOps; I++) 2909 Ops.push_back(N->getOperand(OpIdx++)); 2910 2911 // Convert and append the immediate operand 2912 SDValue Imm = N->getOperand(OpIdx); 2913 uint32_t ImmVal = Imm->getAsZExtVal(); 2914 Ops.push_back(getI32Imm(ImmVal, Loc)); 2915 2916 // Accumulating variants are IT-predicable, add predicate operands. 2917 if (HasAccum) { 2918 SDValue Pred = getAL(CurDAG, Loc); 2919 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 2920 Ops.push_back(Pred); 2921 Ops.push_back(PredReg); 2922 } 2923 2924 // Create the CDE intruction 2925 SDNode *InstrNode = CurDAG->getMachineNode(Opcode, Loc, MVT::Untyped, Ops); 2926 SDValue ResultPair = SDValue(InstrNode, 0); 2927 2928 // The original intrinsic had two outputs, and the output of the dual-register 2929 // CDE instruction is a register pair. We need to extract the two subregisters 2930 // and replace all uses of the original outputs with the extracted 2931 // subregisters. 2932 uint16_t SubRegs[2] = {ARM::gsub_0, ARM::gsub_1}; 2933 if (IsBigEndian) 2934 std::swap(SubRegs[0], SubRegs[1]); 2935 2936 for (size_t ResIdx = 0; ResIdx < 2; ResIdx++) { 2937 if (SDValue(N, ResIdx).use_empty()) 2938 continue; 2939 SDValue SubReg = CurDAG->getTargetExtractSubreg(SubRegs[ResIdx], Loc, 2940 MVT::i32, ResultPair); 2941 ReplaceUses(SDValue(N, ResIdx), SubReg); 2942 } 2943 2944 CurDAG->RemoveDeadNode(N); 2945 } 2946 2947 void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic, 2948 bool isUpdating, unsigned NumVecs, 2949 const uint16_t *DOpcodes, 2950 const uint16_t *QOpcodes0, 2951 const uint16_t *QOpcodes1) { 2952 assert(Subtarget->hasNEON()); 2953 assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range"); 2954 SDLoc dl(N); 2955 2956 SDValue MemAddr, Align; 2957 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2958 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2959 return; 2960 2961 SDValue Chain = N->getOperand(0); 2962 EVT VT = N->getValueType(0); 2963 bool is64BitVector = VT.is64BitVector(); 2964 2965 unsigned Alignment = 0; 2966 if (NumVecs != 3) { 2967 Alignment = Align->getAsZExtVal(); 2968 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8; 2969 if (Alignment > NumBytes) 2970 Alignment = NumBytes; 2971 if (Alignment < 8 && Alignment < NumBytes) 2972 Alignment = 0; 2973 // Alignment must be a power of two; make sure of that. 2974 Alignment = (Alignment & -Alignment); 2975 if (Alignment == 1) 2976 Alignment = 0; 2977 } 2978 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 2979 2980 unsigned OpcodeIndex; 2981 switch (VT.getSimpleVT().SimpleTy) { 2982 default: llvm_unreachable("unhandled vld-dup type"); 2983 case MVT::v8i8: 2984 case MVT::v16i8: OpcodeIndex = 0; break; 2985 case MVT::v4i16: 2986 case MVT::v8i16: 2987 case MVT::v4f16: 2988 case MVT::v8f16: 2989 case MVT::v4bf16: 2990 case MVT::v8bf16: 2991 OpcodeIndex = 1; break; 2992 case MVT::v2f32: 2993 case MVT::v2i32: 2994 case MVT::v4f32: 2995 case MVT::v4i32: OpcodeIndex = 2; break; 2996 case MVT::v1f64: 2997 case MVT::v1i64: OpcodeIndex = 3; break; 2998 } 2999 3000 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 3001 if (!is64BitVector) 3002 ResTyElts *= 2; 3003 EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts); 3004 3005 std::vector<EVT> ResTys; 3006 ResTys.push_back(ResTy); 3007 if (isUpdating) 3008 ResTys.push_back(MVT::i32); 3009 ResTys.push_back(MVT::Other); 3010 3011 SDValue Pred = getAL(CurDAG, dl); 3012 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3013 3014 SmallVector<SDValue, 6> Ops; 3015 Ops.push_back(MemAddr); 3016 Ops.push_back(Align); 3017 unsigned Opc = is64BitVector ? DOpcodes[OpcodeIndex] 3018 : (NumVecs == 1) ? QOpcodes0[OpcodeIndex] 3019 : QOpcodes1[OpcodeIndex]; 3020 if (isUpdating) { 3021 SDValue Inc = N->getOperand(2); 3022 bool IsImmUpdate = 3023 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs); 3024 if (IsImmUpdate) { 3025 if (!isVLDfixed(Opc)) 3026 Ops.push_back(Reg0); 3027 } else { 3028 if (isVLDfixed(Opc)) 3029 Opc = getVLDSTRegisterUpdateOpcode(Opc); 3030 Ops.push_back(Inc); 3031 } 3032 } 3033 if (is64BitVector || NumVecs == 1) { 3034 // Double registers and VLD1 quad registers are directly supported. 3035 } else if (NumVecs == 2) { 3036 const SDValue OpsA[] = {MemAddr, Align, Pred, Reg0, Chain}; 3037 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, ResTy, 3038 MVT::Other, OpsA); 3039 Chain = SDValue(VLdA, 1); 3040 } else { 3041 SDValue ImplDef = SDValue( 3042 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0); 3043 const SDValue OpsA[] = {MemAddr, Align, ImplDef, Pred, Reg0, Chain}; 3044 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, ResTy, 3045 MVT::Other, OpsA); 3046 Ops.push_back(SDValue(VLdA, 0)); 3047 Chain = SDValue(VLdA, 1); 3048 } 3049 3050 Ops.push_back(Pred); 3051 Ops.push_back(Reg0); 3052 Ops.push_back(Chain); 3053 3054 SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 3055 3056 // Transfer memoperands. 3057 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 3058 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdDup), {MemOp}); 3059 3060 // Extract the subregisters. 3061 if (NumVecs == 1) { 3062 ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0)); 3063 } else { 3064 SDValue SuperReg = SDValue(VLdDup, 0); 3065 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering"); 3066 unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0; 3067 for (unsigned Vec = 0; Vec != NumVecs; ++Vec) { 3068 ReplaceUses(SDValue(N, Vec), 3069 CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg)); 3070 } 3071 } 3072 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1)); 3073 if (isUpdating) 3074 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2)); 3075 CurDAG->RemoveDeadNode(N); 3076 } 3077 3078 bool ARMDAGToDAGISel::tryInsertVectorElt(SDNode *N) { 3079 if (!Subtarget->hasMVEIntegerOps()) 3080 return false; 3081 3082 SDLoc dl(N); 3083 3084 // We are trying to use VMOV/VMOVX/VINS to more efficiently lower insert and 3085 // extracts of v8f16 and v8i16 vectors. Check that we have two adjacent 3086 // inserts of the correct type: 3087 SDValue Ins1 = SDValue(N, 0); 3088 SDValue Ins2 = N->getOperand(0); 3089 EVT VT = Ins1.getValueType(); 3090 if (Ins2.getOpcode() != ISD::INSERT_VECTOR_ELT || !Ins2.hasOneUse() || 3091 !isa<ConstantSDNode>(Ins1.getOperand(2)) || 3092 !isa<ConstantSDNode>(Ins2.getOperand(2)) || 3093 (VT != MVT::v8f16 && VT != MVT::v8i16) || (Ins2.getValueType() != VT)) 3094 return false; 3095 3096 unsigned Lane1 = Ins1.getConstantOperandVal(2); 3097 unsigned Lane2 = Ins2.getConstantOperandVal(2); 3098 if (Lane2 % 2 != 0 || Lane1 != Lane2 + 1) 3099 return false; 3100 3101 // If the inserted values will be able to use T/B already, leave it to the 3102 // existing tablegen patterns. For example VCVTT/VCVTB. 3103 SDValue Val1 = Ins1.getOperand(1); 3104 SDValue Val2 = Ins2.getOperand(1); 3105 if (Val1.getOpcode() == ISD::FP_ROUND || Val2.getOpcode() == ISD::FP_ROUND) 3106 return false; 3107 3108 // Check if the inserted values are both extracts. 3109 if ((Val1.getOpcode() == ISD::EXTRACT_VECTOR_ELT || 3110 Val1.getOpcode() == ARMISD::VGETLANEu) && 3111 (Val2.getOpcode() == ISD::EXTRACT_VECTOR_ELT || 3112 Val2.getOpcode() == ARMISD::VGETLANEu) && 3113 isa<ConstantSDNode>(Val1.getOperand(1)) && 3114 isa<ConstantSDNode>(Val2.getOperand(1)) && 3115 (Val1.getOperand(0).getValueType() == MVT::v8f16 || 3116 Val1.getOperand(0).getValueType() == MVT::v8i16) && 3117 (Val2.getOperand(0).getValueType() == MVT::v8f16 || 3118 Val2.getOperand(0).getValueType() == MVT::v8i16)) { 3119 unsigned ExtractLane1 = Val1.getConstantOperandVal(1); 3120 unsigned ExtractLane2 = Val2.getConstantOperandVal(1); 3121 3122 // If the two extracted lanes are from the same place and adjacent, this 3123 // simplifies into a f32 lane move. 3124 if (Val1.getOperand(0) == Val2.getOperand(0) && ExtractLane2 % 2 == 0 && 3125 ExtractLane1 == ExtractLane2 + 1) { 3126 SDValue NewExt = CurDAG->getTargetExtractSubreg( 3127 ARM::ssub_0 + ExtractLane2 / 2, dl, MVT::f32, Val1.getOperand(0)); 3128 SDValue NewIns = CurDAG->getTargetInsertSubreg( 3129 ARM::ssub_0 + Lane2 / 2, dl, VT, Ins2.getOperand(0), 3130 NewExt); 3131 ReplaceUses(Ins1, NewIns); 3132 return true; 3133 } 3134 3135 // Else v8i16 pattern of an extract and an insert, with a optional vmovx for 3136 // extracting odd lanes. 3137 if (VT == MVT::v8i16 && Subtarget->hasFullFP16()) { 3138 SDValue Inp1 = CurDAG->getTargetExtractSubreg( 3139 ARM::ssub_0 + ExtractLane1 / 2, dl, MVT::f32, Val1.getOperand(0)); 3140 SDValue Inp2 = CurDAG->getTargetExtractSubreg( 3141 ARM::ssub_0 + ExtractLane2 / 2, dl, MVT::f32, Val2.getOperand(0)); 3142 if (ExtractLane1 % 2 != 0) 3143 Inp1 = SDValue(CurDAG->getMachineNode(ARM::VMOVH, dl, MVT::f32, Inp1), 0); 3144 if (ExtractLane2 % 2 != 0) 3145 Inp2 = SDValue(CurDAG->getMachineNode(ARM::VMOVH, dl, MVT::f32, Inp2), 0); 3146 SDNode *VINS = CurDAG->getMachineNode(ARM::VINSH, dl, MVT::f32, Inp2, Inp1); 3147 SDValue NewIns = 3148 CurDAG->getTargetInsertSubreg(ARM::ssub_0 + Lane2 / 2, dl, MVT::v4f32, 3149 Ins2.getOperand(0), SDValue(VINS, 0)); 3150 ReplaceUses(Ins1, NewIns); 3151 return true; 3152 } 3153 } 3154 3155 // The inserted values are not extracted - if they are f16 then insert them 3156 // directly using a VINS. 3157 if (VT == MVT::v8f16 && Subtarget->hasFullFP16()) { 3158 SDNode *VINS = CurDAG->getMachineNode(ARM::VINSH, dl, MVT::f32, Val2, Val1); 3159 SDValue NewIns = 3160 CurDAG->getTargetInsertSubreg(ARM::ssub_0 + Lane2 / 2, dl, MVT::v4f32, 3161 Ins2.getOperand(0), SDValue(VINS, 0)); 3162 ReplaceUses(Ins1, NewIns); 3163 return true; 3164 } 3165 3166 return false; 3167 } 3168 3169 bool ARMDAGToDAGISel::transformFixedFloatingPointConversion(SDNode *N, 3170 SDNode *FMul, 3171 bool IsUnsigned, 3172 bool FixedToFloat) { 3173 auto Type = N->getValueType(0); 3174 unsigned ScalarBits = Type.getScalarSizeInBits(); 3175 if (ScalarBits > 32) 3176 return false; 3177 3178 SDNodeFlags FMulFlags = FMul->getFlags(); 3179 // The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is 3180 // allowed in 16 bit unsigned floats 3181 if (ScalarBits == 16 && !FMulFlags.hasNoInfs() && IsUnsigned) 3182 return false; 3183 3184 SDValue ImmNode = FMul->getOperand(1); 3185 SDValue VecVal = FMul->getOperand(0); 3186 if (VecVal->getOpcode() == ISD::UINT_TO_FP || 3187 VecVal->getOpcode() == ISD::SINT_TO_FP) 3188 VecVal = VecVal->getOperand(0); 3189 3190 if (VecVal.getValueType().getScalarSizeInBits() != ScalarBits) 3191 return false; 3192 3193 if (ImmNode.getOpcode() == ISD::BITCAST) { 3194 if (ImmNode.getValueType().getScalarSizeInBits() != ScalarBits) 3195 return false; 3196 ImmNode = ImmNode.getOperand(0); 3197 } 3198 3199 if (ImmNode.getValueType().getScalarSizeInBits() != ScalarBits) 3200 return false; 3201 3202 APFloat ImmAPF(0.0f); 3203 switch (ImmNode.getOpcode()) { 3204 case ARMISD::VMOVIMM: 3205 case ARMISD::VDUP: { 3206 if (!isa<ConstantSDNode>(ImmNode.getOperand(0))) 3207 return false; 3208 unsigned Imm = ImmNode.getConstantOperandVal(0); 3209 if (ImmNode.getOpcode() == ARMISD::VMOVIMM) 3210 Imm = ARM_AM::decodeVMOVModImm(Imm, ScalarBits); 3211 ImmAPF = 3212 APFloat(ScalarBits == 32 ? APFloat::IEEEsingle() : APFloat::IEEEhalf(), 3213 APInt(ScalarBits, Imm)); 3214 break; 3215 } 3216 case ARMISD::VMOVFPIMM: { 3217 ImmAPF = APFloat(ARM_AM::getFPImmFloat(ImmNode.getConstantOperandVal(0))); 3218 break; 3219 } 3220 default: 3221 return false; 3222 } 3223 3224 // Where n is the number of fractional bits, multiplying by 2^n will convert 3225 // from float to fixed and multiplying by 2^-n will convert from fixed to 3226 // float. Taking log2 of the factor (after taking the inverse in the case of 3227 // float to fixed) will give n. 3228 APFloat ToConvert = ImmAPF; 3229 if (FixedToFloat) { 3230 if (!ImmAPF.getExactInverse(&ToConvert)) 3231 return false; 3232 } 3233 APSInt Converted(64, false); 3234 bool IsExact; 3235 ToConvert.convertToInteger(Converted, llvm::RoundingMode::NearestTiesToEven, 3236 &IsExact); 3237 if (!IsExact || !Converted.isPowerOf2()) 3238 return false; 3239 3240 unsigned FracBits = Converted.logBase2(); 3241 if (FracBits > ScalarBits) 3242 return false; 3243 3244 SmallVector<SDValue, 3> Ops{ 3245 VecVal, CurDAG->getConstant(FracBits, SDLoc(N), MVT::i32)}; 3246 AddEmptyMVEPredicateToOps(Ops, SDLoc(N), Type); 3247 3248 unsigned int Opcode; 3249 switch (ScalarBits) { 3250 case 16: 3251 if (FixedToFloat) 3252 Opcode = IsUnsigned ? ARM::MVE_VCVTf16u16_fix : ARM::MVE_VCVTf16s16_fix; 3253 else 3254 Opcode = IsUnsigned ? ARM::MVE_VCVTu16f16_fix : ARM::MVE_VCVTs16f16_fix; 3255 break; 3256 case 32: 3257 if (FixedToFloat) 3258 Opcode = IsUnsigned ? ARM::MVE_VCVTf32u32_fix : ARM::MVE_VCVTf32s32_fix; 3259 else 3260 Opcode = IsUnsigned ? ARM::MVE_VCVTu32f32_fix : ARM::MVE_VCVTs32f32_fix; 3261 break; 3262 default: 3263 llvm_unreachable("unexpected number of scalar bits"); 3264 break; 3265 } 3266 3267 ReplaceNode(N, CurDAG->getMachineNode(Opcode, SDLoc(N), Type, Ops)); 3268 return true; 3269 } 3270 3271 bool ARMDAGToDAGISel::tryFP_TO_INT(SDNode *N, SDLoc dl) { 3272 // Transform a floating-point to fixed-point conversion to a VCVT 3273 if (!Subtarget->hasMVEFloatOps()) 3274 return false; 3275 EVT Type = N->getValueType(0); 3276 if (!Type.isVector()) 3277 return false; 3278 unsigned int ScalarBits = Type.getScalarSizeInBits(); 3279 3280 bool IsUnsigned = N->getOpcode() == ISD::FP_TO_UINT || 3281 N->getOpcode() == ISD::FP_TO_UINT_SAT; 3282 SDNode *Node = N->getOperand(0).getNode(); 3283 3284 // floating-point to fixed-point with one fractional bit gets turned into an 3285 // FP_TO_[U|S]INT(FADD (x, x)) rather than an FP_TO_[U|S]INT(FMUL (x, y)) 3286 if (Node->getOpcode() == ISD::FADD) { 3287 if (Node->getOperand(0) != Node->getOperand(1)) 3288 return false; 3289 SDNodeFlags Flags = Node->getFlags(); 3290 // The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is 3291 // allowed in 16 bit unsigned floats 3292 if (ScalarBits == 16 && !Flags.hasNoInfs() && IsUnsigned) 3293 return false; 3294 3295 unsigned Opcode; 3296 switch (ScalarBits) { 3297 case 16: 3298 Opcode = IsUnsigned ? ARM::MVE_VCVTu16f16_fix : ARM::MVE_VCVTs16f16_fix; 3299 break; 3300 case 32: 3301 Opcode = IsUnsigned ? ARM::MVE_VCVTu32f32_fix : ARM::MVE_VCVTs32f32_fix; 3302 break; 3303 } 3304 SmallVector<SDValue, 3> Ops{Node->getOperand(0), 3305 CurDAG->getConstant(1, dl, MVT::i32)}; 3306 AddEmptyMVEPredicateToOps(Ops, dl, Type); 3307 3308 ReplaceNode(N, CurDAG->getMachineNode(Opcode, dl, Type, Ops)); 3309 return true; 3310 } 3311 3312 if (Node->getOpcode() != ISD::FMUL) 3313 return false; 3314 3315 return transformFixedFloatingPointConversion(N, Node, IsUnsigned, false); 3316 } 3317 3318 bool ARMDAGToDAGISel::tryFMULFixed(SDNode *N, SDLoc dl) { 3319 // Transform a fixed-point to floating-point conversion to a VCVT 3320 if (!Subtarget->hasMVEFloatOps()) 3321 return false; 3322 auto Type = N->getValueType(0); 3323 if (!Type.isVector()) 3324 return false; 3325 3326 auto LHS = N->getOperand(0); 3327 if (LHS.getOpcode() != ISD::SINT_TO_FP && LHS.getOpcode() != ISD::UINT_TO_FP) 3328 return false; 3329 3330 return transformFixedFloatingPointConversion( 3331 N, N, LHS.getOpcode() == ISD::UINT_TO_FP, true); 3332 } 3333 3334 bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) { 3335 if (!Subtarget->hasV6T2Ops()) 3336 return false; 3337 3338 unsigned Opc = isSigned 3339 ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX) 3340 : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX); 3341 SDLoc dl(N); 3342 3343 // For unsigned extracts, check for a shift right and mask 3344 unsigned And_imm = 0; 3345 if (N->getOpcode() == ISD::AND) { 3346 if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) { 3347 3348 // The immediate is a mask of the low bits iff imm & (imm+1) == 0 3349 if (And_imm & (And_imm + 1)) 3350 return false; 3351 3352 unsigned Srl_imm = 0; 3353 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, 3354 Srl_imm)) { 3355 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 3356 3357 // Mask off the unnecessary bits of the AND immediate; normally 3358 // DAGCombine will do this, but that might not happen if 3359 // targetShrinkDemandedConstant chooses a different immediate. 3360 And_imm &= -1U >> Srl_imm; 3361 3362 // Note: The width operand is encoded as width-1. 3363 unsigned Width = llvm::countr_one(And_imm) - 1; 3364 unsigned LSB = Srl_imm; 3365 3366 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3367 3368 if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) { 3369 // It's cheaper to use a right shift to extract the top bits. 3370 if (Subtarget->isThumb()) { 3371 Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri; 3372 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3373 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 3374 getAL(CurDAG, dl), Reg0, Reg0 }; 3375 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3376 return true; 3377 } 3378 3379 // ARM models shift instructions as MOVsi with shifter operand. 3380 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL); 3381 SDValue ShOpc = 3382 CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl, 3383 MVT::i32); 3384 SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc, 3385 getAL(CurDAG, dl), Reg0, Reg0 }; 3386 CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops); 3387 return true; 3388 } 3389 3390 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx"); 3391 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3392 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 3393 CurDAG->getTargetConstant(Width, dl, MVT::i32), 3394 getAL(CurDAG, dl), Reg0 }; 3395 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3396 return true; 3397 } 3398 } 3399 return false; 3400 } 3401 3402 // Otherwise, we're looking for a shift of a shift 3403 unsigned Shl_imm = 0; 3404 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) { 3405 assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!"); 3406 unsigned Srl_imm = 0; 3407 if (isInt32Immediate(N->getOperand(1), Srl_imm)) { 3408 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 3409 // Note: The width operand is encoded as width-1. 3410 unsigned Width = 32 - Srl_imm - 1; 3411 int LSB = Srl_imm - Shl_imm; 3412 if (LSB < 0) 3413 return false; 3414 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3415 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx"); 3416 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3417 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 3418 CurDAG->getTargetConstant(Width, dl, MVT::i32), 3419 getAL(CurDAG, dl), Reg0 }; 3420 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3421 return true; 3422 } 3423 } 3424 3425 // Or we are looking for a shift of an and, with a mask operand 3426 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) && 3427 isShiftedMask_32(And_imm)) { 3428 unsigned Srl_imm = 0; 3429 unsigned LSB = llvm::countr_zero(And_imm); 3430 // Shift must be the same as the ands lsb 3431 if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) { 3432 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 3433 unsigned MSB = llvm::Log2_32(And_imm); 3434 // Note: The width operand is encoded as width-1. 3435 unsigned Width = MSB - LSB; 3436 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3437 assert(Srl_imm + Width + 1 <= 32 && "Shouldn't create an invalid ubfx"); 3438 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3439 CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32), 3440 CurDAG->getTargetConstant(Width, dl, MVT::i32), 3441 getAL(CurDAG, dl), Reg0 }; 3442 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3443 return true; 3444 } 3445 } 3446 3447 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) { 3448 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits(); 3449 unsigned LSB = 0; 3450 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) && 3451 !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB)) 3452 return false; 3453 3454 if (LSB + Width > 32) 3455 return false; 3456 3457 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3458 assert(LSB + Width <= 32 && "Shouldn't create an invalid ubfx"); 3459 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3460 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 3461 CurDAG->getTargetConstant(Width - 1, dl, MVT::i32), 3462 getAL(CurDAG, dl), Reg0 }; 3463 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3464 return true; 3465 } 3466 3467 return false; 3468 } 3469 3470 /// Target-specific DAG combining for ISD::SUB. 3471 /// Target-independent combining lowers SELECT_CC nodes of the form 3472 /// select_cc setg[ge] X, 0, X, -X 3473 /// select_cc setgt X, -1, X, -X 3474 /// select_cc setl[te] X, 0, -X, X 3475 /// select_cc setlt X, 1, -X, X 3476 /// which represent Integer ABS into: 3477 /// Y = sra (X, size(X)-1); sub (xor (X, Y), Y) 3478 /// ARM instruction selection detects the latter and matches it to 3479 /// ARM::ABS or ARM::t2ABS machine node. 3480 bool ARMDAGToDAGISel::tryABSOp(SDNode *N){ 3481 SDValue SUBSrc0 = N->getOperand(0); 3482 SDValue SUBSrc1 = N->getOperand(1); 3483 EVT VT = N->getValueType(0); 3484 3485 if (Subtarget->isThumb1Only()) 3486 return false; 3487 3488 if (SUBSrc0.getOpcode() != ISD::XOR || SUBSrc1.getOpcode() != ISD::SRA) 3489 return false; 3490 3491 SDValue XORSrc0 = SUBSrc0.getOperand(0); 3492 SDValue XORSrc1 = SUBSrc0.getOperand(1); 3493 SDValue SRASrc0 = SUBSrc1.getOperand(0); 3494 SDValue SRASrc1 = SUBSrc1.getOperand(1); 3495 ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(SRASrc1); 3496 EVT XType = SRASrc0.getValueType(); 3497 unsigned Size = XType.getSizeInBits() - 1; 3498 3499 if (XORSrc1 == SUBSrc1 && XORSrc0 == SRASrc0 && XType.isInteger() && 3500 SRAConstant != nullptr && Size == SRAConstant->getZExtValue()) { 3501 unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS; 3502 CurDAG->SelectNodeTo(N, Opcode, VT, XORSrc0); 3503 return true; 3504 } 3505 3506 return false; 3507 } 3508 3509 /// We've got special pseudo-instructions for these 3510 void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) { 3511 unsigned Opcode; 3512 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT(); 3513 if (MemTy == MVT::i8) 3514 Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_8 : ARM::CMP_SWAP_8; 3515 else if (MemTy == MVT::i16) 3516 Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_16 : ARM::CMP_SWAP_16; 3517 else if (MemTy == MVT::i32) 3518 Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_32 : ARM::CMP_SWAP_32; 3519 else 3520 llvm_unreachable("Unknown AtomicCmpSwap type"); 3521 3522 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3), 3523 N->getOperand(0)}; 3524 SDNode *CmpSwap = CurDAG->getMachineNode( 3525 Opcode, SDLoc(N), 3526 CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops); 3527 3528 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); 3529 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp}); 3530 3531 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0)); 3532 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2)); 3533 CurDAG->RemoveDeadNode(N); 3534 } 3535 3536 static std::optional<std::pair<unsigned, unsigned>> 3537 getContiguousRangeOfSetBits(const APInt &A) { 3538 unsigned FirstOne = A.getBitWidth() - A.countl_zero() - 1; 3539 unsigned LastOne = A.countr_zero(); 3540 if (A.popcount() != (FirstOne - LastOne + 1)) 3541 return std::nullopt; 3542 return std::make_pair(FirstOne, LastOne); 3543 } 3544 3545 void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) { 3546 assert(N->getOpcode() == ARMISD::CMPZ); 3547 SwitchEQNEToPLMI = false; 3548 3549 if (!Subtarget->isThumb()) 3550 // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and 3551 // LSR don't exist as standalone instructions - they need the barrel shifter. 3552 return; 3553 3554 // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X)) 3555 SDValue And = N->getOperand(0); 3556 if (!And->hasOneUse()) 3557 return; 3558 3559 SDValue Zero = N->getOperand(1); 3560 if (!isNullConstant(Zero) || And->getOpcode() != ISD::AND) 3561 return; 3562 SDValue X = And.getOperand(0); 3563 auto C = dyn_cast<ConstantSDNode>(And.getOperand(1)); 3564 3565 if (!C) 3566 return; 3567 auto Range = getContiguousRangeOfSetBits(C->getAPIntValue()); 3568 if (!Range) 3569 return; 3570 3571 // There are several ways to lower this: 3572 SDNode *NewN; 3573 SDLoc dl(N); 3574 3575 auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* { 3576 if (Subtarget->isThumb2()) { 3577 Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri; 3578 SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32), 3579 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 3580 CurDAG->getRegister(0, MVT::i32) }; 3581 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops); 3582 } else { 3583 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src, 3584 CurDAG->getTargetConstant(Imm, dl, MVT::i32), 3585 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)}; 3586 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops); 3587 } 3588 }; 3589 3590 if (Range->second == 0) { 3591 // 1. Mask includes the LSB -> Simply shift the top N bits off 3592 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first); 3593 ReplaceNode(And.getNode(), NewN); 3594 } else if (Range->first == 31) { 3595 // 2. Mask includes the MSB -> Simply shift the bottom N bits off 3596 NewN = EmitShift(ARM::tLSRri, X, Range->second); 3597 ReplaceNode(And.getNode(), NewN); 3598 } else if (Range->first == Range->second) { 3599 // 3. Only one bit is set. We can shift this into the sign bit and use a 3600 // PL/MI comparison. 3601 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first); 3602 ReplaceNode(And.getNode(), NewN); 3603 3604 SwitchEQNEToPLMI = true; 3605 } else if (!Subtarget->hasV6T2Ops()) { 3606 // 4. Do a double shift to clear bottom and top bits, but only in 3607 // thumb-1 mode as in thumb-2 we can use UBFX. 3608 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first); 3609 NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0), 3610 Range->second + (31 - Range->first)); 3611 ReplaceNode(And.getNode(), NewN); 3612 } 3613 } 3614 3615 static unsigned getVectorShuffleOpcode(EVT VT, unsigned Opc64[3], 3616 unsigned Opc128[3]) { 3617 assert((VT.is64BitVector() || VT.is128BitVector()) && 3618 "Unexpected vector shuffle length"); 3619 switch (VT.getScalarSizeInBits()) { 3620 default: 3621 llvm_unreachable("Unexpected vector shuffle element size"); 3622 case 8: 3623 return VT.is64BitVector() ? Opc64[0] : Opc128[0]; 3624 case 16: 3625 return VT.is64BitVector() ? Opc64[1] : Opc128[1]; 3626 case 32: 3627 return VT.is64BitVector() ? Opc64[2] : Opc128[2]; 3628 } 3629 } 3630 3631 void ARMDAGToDAGISel::Select(SDNode *N) { 3632 SDLoc dl(N); 3633 3634 if (N->isMachineOpcode()) { 3635 N->setNodeId(-1); 3636 return; // Already selected. 3637 } 3638 3639 switch (N->getOpcode()) { 3640 default: break; 3641 case ISD::STORE: { 3642 // For Thumb1, match an sp-relative store in C++. This is a little 3643 // unfortunate, but I don't think I can make the chain check work 3644 // otherwise. (The chain of the store has to be the same as the chain 3645 // of the CopyFromReg, or else we can't replace the CopyFromReg with 3646 // a direct reference to "SP".) 3647 // 3648 // This is only necessary on Thumb1 because Thumb1 sp-relative stores use 3649 // a different addressing mode from other four-byte stores. 3650 // 3651 // This pattern usually comes up with call arguments. 3652 StoreSDNode *ST = cast<StoreSDNode>(N); 3653 SDValue Ptr = ST->getBasePtr(); 3654 if (Subtarget->isThumb1Only() && ST->isUnindexed()) { 3655 int RHSC = 0; 3656 if (Ptr.getOpcode() == ISD::ADD && 3657 isScaledConstantInRange(Ptr.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) 3658 Ptr = Ptr.getOperand(0); 3659 3660 if (Ptr.getOpcode() == ISD::CopyFromReg && 3661 cast<RegisterSDNode>(Ptr.getOperand(1))->getReg() == ARM::SP && 3662 Ptr.getOperand(0) == ST->getChain()) { 3663 SDValue Ops[] = {ST->getValue(), 3664 CurDAG->getRegister(ARM::SP, MVT::i32), 3665 CurDAG->getTargetConstant(RHSC, dl, MVT::i32), 3666 getAL(CurDAG, dl), 3667 CurDAG->getRegister(0, MVT::i32), 3668 ST->getChain()}; 3669 MachineSDNode *ResNode = 3670 CurDAG->getMachineNode(ARM::tSTRspi, dl, MVT::Other, Ops); 3671 MachineMemOperand *MemOp = ST->getMemOperand(); 3672 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp}); 3673 ReplaceNode(N, ResNode); 3674 return; 3675 } 3676 } 3677 break; 3678 } 3679 case ISD::WRITE_REGISTER: 3680 if (tryWriteRegister(N)) 3681 return; 3682 break; 3683 case ISD::READ_REGISTER: 3684 if (tryReadRegister(N)) 3685 return; 3686 break; 3687 case ISD::INLINEASM: 3688 case ISD::INLINEASM_BR: 3689 if (tryInlineAsm(N)) 3690 return; 3691 break; 3692 case ISD::SUB: 3693 // Select special operations if SUB node forms integer ABS pattern 3694 if (tryABSOp(N)) 3695 return; 3696 // Other cases are autogenerated. 3697 break; 3698 case ISD::Constant: { 3699 unsigned Val = N->getAsZExtVal(); 3700 // If we can't materialize the constant we need to use a literal pool 3701 if (ConstantMaterializationCost(Val, Subtarget) > 2 && 3702 !Subtarget->genExecuteOnly()) { 3703 SDValue CPIdx = CurDAG->getTargetConstantPool( 3704 ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val), 3705 TLI->getPointerTy(CurDAG->getDataLayout())); 3706 3707 SDNode *ResNode; 3708 if (Subtarget->isThumb()) { 3709 SDValue Ops[] = { 3710 CPIdx, 3711 getAL(CurDAG, dl), 3712 CurDAG->getRegister(0, MVT::i32), 3713 CurDAG->getEntryNode() 3714 }; 3715 ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other, 3716 Ops); 3717 } else { 3718 SDValue Ops[] = { 3719 CPIdx, 3720 CurDAG->getTargetConstant(0, dl, MVT::i32), 3721 getAL(CurDAG, dl), 3722 CurDAG->getRegister(0, MVT::i32), 3723 CurDAG->getEntryNode() 3724 }; 3725 ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other, 3726 Ops); 3727 } 3728 // Annotate the Node with memory operand information so that MachineInstr 3729 // queries work properly. This e.g. gives the register allocation the 3730 // required information for rematerialization. 3731 MachineFunction& MF = CurDAG->getMachineFunction(); 3732 MachineMemOperand *MemOp = 3733 MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF), 3734 MachineMemOperand::MOLoad, 4, Align(4)); 3735 3736 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp}); 3737 3738 ReplaceNode(N, ResNode); 3739 return; 3740 } 3741 3742 // Other cases are autogenerated. 3743 break; 3744 } 3745 case ISD::FrameIndex: { 3746 // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm. 3747 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 3748 SDValue TFI = CurDAG->getTargetFrameIndex( 3749 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 3750 if (Subtarget->isThumb1Only()) { 3751 // Set the alignment of the frame object to 4, to avoid having to generate 3752 // more than one ADD 3753 MachineFrameInfo &MFI = MF->getFrameInfo(); 3754 if (MFI.getObjectAlign(FI) < Align(4)) 3755 MFI.setObjectAlignment(FI, Align(4)); 3756 CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI, 3757 CurDAG->getTargetConstant(0, dl, MVT::i32)); 3758 return; 3759 } else { 3760 unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ? 3761 ARM::t2ADDri : ARM::ADDri); 3762 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32), 3763 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 3764 CurDAG->getRegister(0, MVT::i32) }; 3765 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3766 return; 3767 } 3768 } 3769 case ISD::INSERT_VECTOR_ELT: { 3770 if (tryInsertVectorElt(N)) 3771 return; 3772 break; 3773 } 3774 case ISD::SRL: 3775 if (tryV6T2BitfieldExtractOp(N, false)) 3776 return; 3777 break; 3778 case ISD::SIGN_EXTEND_INREG: 3779 case ISD::SRA: 3780 if (tryV6T2BitfieldExtractOp(N, true)) 3781 return; 3782 break; 3783 case ISD::FP_TO_UINT: 3784 case ISD::FP_TO_SINT: 3785 case ISD::FP_TO_UINT_SAT: 3786 case ISD::FP_TO_SINT_SAT: 3787 if (tryFP_TO_INT(N, dl)) 3788 return; 3789 break; 3790 case ISD::FMUL: 3791 if (tryFMULFixed(N, dl)) 3792 return; 3793 break; 3794 case ISD::MUL: 3795 if (Subtarget->isThumb1Only()) 3796 break; 3797 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) { 3798 unsigned RHSV = C->getZExtValue(); 3799 if (!RHSV) break; 3800 if (isPowerOf2_32(RHSV-1)) { // 2^n+1? 3801 unsigned ShImm = Log2_32(RHSV-1); 3802 if (ShImm >= 32) 3803 break; 3804 SDValue V = N->getOperand(0); 3805 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm); 3806 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32); 3807 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3808 if (Subtarget->isThumb()) { 3809 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 }; 3810 CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops); 3811 return; 3812 } else { 3813 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0, 3814 Reg0 }; 3815 CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops); 3816 return; 3817 } 3818 } 3819 if (isPowerOf2_32(RHSV+1)) { // 2^n-1? 3820 unsigned ShImm = Log2_32(RHSV+1); 3821 if (ShImm >= 32) 3822 break; 3823 SDValue V = N->getOperand(0); 3824 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm); 3825 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32); 3826 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3827 if (Subtarget->isThumb()) { 3828 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 }; 3829 CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops); 3830 return; 3831 } else { 3832 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0, 3833 Reg0 }; 3834 CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops); 3835 return; 3836 } 3837 } 3838 } 3839 break; 3840 case ISD::AND: { 3841 // Check for unsigned bitfield extract 3842 if (tryV6T2BitfieldExtractOp(N, false)) 3843 return; 3844 3845 // If an immediate is used in an AND node, it is possible that the immediate 3846 // can be more optimally materialized when negated. If this is the case we 3847 // can negate the immediate and use a BIC instead. 3848 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 3849 if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) { 3850 uint32_t Imm = (uint32_t) N1C->getZExtValue(); 3851 3852 // In Thumb2 mode, an AND can take a 12-bit immediate. If this 3853 // immediate can be negated and fit in the immediate operand of 3854 // a t2BIC, don't do any manual transform here as this can be 3855 // handled by the generic ISel machinery. 3856 bool PreferImmediateEncoding = 3857 Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm)); 3858 if (!PreferImmediateEncoding && 3859 ConstantMaterializationCost(Imm, Subtarget) > 3860 ConstantMaterializationCost(~Imm, Subtarget)) { 3861 // The current immediate costs more to materialize than a negated 3862 // immediate, so negate the immediate and use a BIC. 3863 SDValue NewImm = 3864 CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32); 3865 // If the new constant didn't exist before, reposition it in the topological 3866 // ordering so it is just before N. Otherwise, don't touch its location. 3867 if (NewImm->getNodeId() == -1) 3868 CurDAG->RepositionNode(N->getIterator(), NewImm.getNode()); 3869 3870 if (!Subtarget->hasThumb2()) { 3871 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), 3872 N->getOperand(0), NewImm, getAL(CurDAG, dl), 3873 CurDAG->getRegister(0, MVT::i32)}; 3874 ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops)); 3875 return; 3876 } else { 3877 SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl), 3878 CurDAG->getRegister(0, MVT::i32), 3879 CurDAG->getRegister(0, MVT::i32)}; 3880 ReplaceNode(N, 3881 CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops)); 3882 return; 3883 } 3884 } 3885 } 3886 3887 // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits 3888 // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits 3889 // are entirely contributed by c2 and lower 16-bits are entirely contributed 3890 // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)). 3891 // Select it to: "movt x, ((c1 & 0xffff) >> 16) 3892 EVT VT = N->getValueType(0); 3893 if (VT != MVT::i32) 3894 break; 3895 unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2()) 3896 ? ARM::t2MOVTi16 3897 : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0); 3898 if (!Opc) 3899 break; 3900 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); 3901 N1C = dyn_cast<ConstantSDNode>(N1); 3902 if (!N1C) 3903 break; 3904 if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) { 3905 SDValue N2 = N0.getOperand(1); 3906 ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2); 3907 if (!N2C) 3908 break; 3909 unsigned N1CVal = N1C->getZExtValue(); 3910 unsigned N2CVal = N2C->getZExtValue(); 3911 if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) && 3912 (N1CVal & 0xffffU) == 0xffffU && 3913 (N2CVal & 0xffffU) == 0x0U) { 3914 SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16, 3915 dl, MVT::i32); 3916 SDValue Ops[] = { N0.getOperand(0), Imm16, 3917 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) }; 3918 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops)); 3919 return; 3920 } 3921 } 3922 3923 break; 3924 } 3925 case ARMISD::UMAAL: { 3926 unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL; 3927 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), 3928 N->getOperand(2), N->getOperand(3), 3929 getAL(CurDAG, dl), 3930 CurDAG->getRegister(0, MVT::i32) }; 3931 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops)); 3932 return; 3933 } 3934 case ARMISD::UMLAL:{ 3935 if (Subtarget->isThumb()) { 3936 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3937 N->getOperand(3), getAL(CurDAG, dl), 3938 CurDAG->getRegister(0, MVT::i32)}; 3939 ReplaceNode( 3940 N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops)); 3941 return; 3942 }else{ 3943 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3944 N->getOperand(3), getAL(CurDAG, dl), 3945 CurDAG->getRegister(0, MVT::i32), 3946 CurDAG->getRegister(0, MVT::i32) }; 3947 ReplaceNode(N, CurDAG->getMachineNode( 3948 Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl, 3949 MVT::i32, MVT::i32, Ops)); 3950 return; 3951 } 3952 } 3953 case ARMISD::SMLAL:{ 3954 if (Subtarget->isThumb()) { 3955 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3956 N->getOperand(3), getAL(CurDAG, dl), 3957 CurDAG->getRegister(0, MVT::i32)}; 3958 ReplaceNode( 3959 N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops)); 3960 return; 3961 }else{ 3962 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3963 N->getOperand(3), getAL(CurDAG, dl), 3964 CurDAG->getRegister(0, MVT::i32), 3965 CurDAG->getRegister(0, MVT::i32) }; 3966 ReplaceNode(N, CurDAG->getMachineNode( 3967 Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl, 3968 MVT::i32, MVT::i32, Ops)); 3969 return; 3970 } 3971 } 3972 case ARMISD::SUBE: { 3973 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP()) 3974 break; 3975 // Look for a pattern to match SMMLS 3976 // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b)))) 3977 if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI || 3978 N->getOperand(2).getOpcode() != ARMISD::SUBC || 3979 !SDValue(N, 1).use_empty()) 3980 break; 3981 3982 if (Subtarget->isThumb()) 3983 assert(Subtarget->hasThumb2() && 3984 "This pattern should not be generated for Thumb"); 3985 3986 SDValue SmulLoHi = N->getOperand(1); 3987 SDValue Subc = N->getOperand(2); 3988 SDValue Zero = Subc.getOperand(0); 3989 3990 if (!isNullConstant(Zero) || Subc.getOperand(1) != SmulLoHi.getValue(0) || 3991 N->getOperand(1) != SmulLoHi.getValue(1) || 3992 N->getOperand(2) != Subc.getValue(1)) 3993 break; 3994 3995 unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS; 3996 SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1), 3997 N->getOperand(0), getAL(CurDAG, dl), 3998 CurDAG->getRegister(0, MVT::i32) }; 3999 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops)); 4000 return; 4001 } 4002 case ISD::LOAD: { 4003 if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N)) 4004 return; 4005 if (Subtarget->isThumb() && Subtarget->hasThumb2()) { 4006 if (tryT2IndexedLoad(N)) 4007 return; 4008 } else if (Subtarget->isThumb()) { 4009 if (tryT1IndexedLoad(N)) 4010 return; 4011 } else if (tryARMIndexedLoad(N)) 4012 return; 4013 // Other cases are autogenerated. 4014 break; 4015 } 4016 case ISD::MLOAD: 4017 if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N)) 4018 return; 4019 // Other cases are autogenerated. 4020 break; 4021 case ARMISD::WLSSETUP: { 4022 SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopSetup, dl, MVT::i32, 4023 N->getOperand(0)); 4024 ReplaceUses(N, New); 4025 CurDAG->RemoveDeadNode(N); 4026 return; 4027 } 4028 case ARMISD::WLS: { 4029 SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopStart, dl, MVT::Other, 4030 N->getOperand(1), N->getOperand(2), 4031 N->getOperand(0)); 4032 ReplaceUses(N, New); 4033 CurDAG->RemoveDeadNode(N); 4034 return; 4035 } 4036 case ARMISD::LE: { 4037 SDValue Ops[] = { N->getOperand(1), 4038 N->getOperand(2), 4039 N->getOperand(0) }; 4040 unsigned Opc = ARM::t2LoopEnd; 4041 SDNode *New = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops); 4042 ReplaceUses(N, New); 4043 CurDAG->RemoveDeadNode(N); 4044 return; 4045 } 4046 case ARMISD::LDRD: { 4047 if (Subtarget->isThumb2()) 4048 break; // TableGen handles isel in this case. 4049 SDValue Base, RegOffset, ImmOffset; 4050 const SDValue &Chain = N->getOperand(0); 4051 const SDValue &Addr = N->getOperand(1); 4052 SelectAddrMode3(Addr, Base, RegOffset, ImmOffset); 4053 if (RegOffset != CurDAG->getRegister(0, MVT::i32)) { 4054 // The register-offset variant of LDRD mandates that the register 4055 // allocated to RegOffset is not reused in any of the remaining operands. 4056 // This restriction is currently not enforced. Therefore emitting this 4057 // variant is explicitly avoided. 4058 Base = Addr; 4059 RegOffset = CurDAG->getRegister(0, MVT::i32); 4060 } 4061 SDValue Ops[] = {Base, RegOffset, ImmOffset, Chain}; 4062 SDNode *New = CurDAG->getMachineNode(ARM::LOADDUAL, dl, 4063 {MVT::Untyped, MVT::Other}, Ops); 4064 SDValue Lo = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32, 4065 SDValue(New, 0)); 4066 SDValue Hi = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32, 4067 SDValue(New, 0)); 4068 transferMemOperands(N, New); 4069 ReplaceUses(SDValue(N, 0), Lo); 4070 ReplaceUses(SDValue(N, 1), Hi); 4071 ReplaceUses(SDValue(N, 2), SDValue(New, 1)); 4072 CurDAG->RemoveDeadNode(N); 4073 return; 4074 } 4075 case ARMISD::STRD: { 4076 if (Subtarget->isThumb2()) 4077 break; // TableGen handles isel in this case. 4078 SDValue Base, RegOffset, ImmOffset; 4079 const SDValue &Chain = N->getOperand(0); 4080 const SDValue &Addr = N->getOperand(3); 4081 SelectAddrMode3(Addr, Base, RegOffset, ImmOffset); 4082 if (RegOffset != CurDAG->getRegister(0, MVT::i32)) { 4083 // The register-offset variant of STRD mandates that the register 4084 // allocated to RegOffset is not reused in any of the remaining operands. 4085 // This restriction is currently not enforced. Therefore emitting this 4086 // variant is explicitly avoided. 4087 Base = Addr; 4088 RegOffset = CurDAG->getRegister(0, MVT::i32); 4089 } 4090 SDNode *RegPair = 4091 createGPRPairNode(MVT::Untyped, N->getOperand(1), N->getOperand(2)); 4092 SDValue Ops[] = {SDValue(RegPair, 0), Base, RegOffset, ImmOffset, Chain}; 4093 SDNode *New = CurDAG->getMachineNode(ARM::STOREDUAL, dl, MVT::Other, Ops); 4094 transferMemOperands(N, New); 4095 ReplaceUses(SDValue(N, 0), SDValue(New, 0)); 4096 CurDAG->RemoveDeadNode(N); 4097 return; 4098 } 4099 case ARMISD::LOOP_DEC: { 4100 SDValue Ops[] = { N->getOperand(1), 4101 N->getOperand(2), 4102 N->getOperand(0) }; 4103 SDNode *Dec = 4104 CurDAG->getMachineNode(ARM::t2LoopDec, dl, 4105 CurDAG->getVTList(MVT::i32, MVT::Other), Ops); 4106 ReplaceUses(N, Dec); 4107 CurDAG->RemoveDeadNode(N); 4108 return; 4109 } 4110 case ARMISD::BRCOND: { 4111 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 4112 // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc) 4113 // Pattern complexity = 6 cost = 1 size = 0 4114 4115 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 4116 // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc) 4117 // Pattern complexity = 6 cost = 1 size = 0 4118 4119 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 4120 // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc) 4121 // Pattern complexity = 6 cost = 1 size = 0 4122 4123 unsigned Opc = Subtarget->isThumb() ? 4124 ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc; 4125 SDValue Chain = N->getOperand(0); 4126 SDValue N1 = N->getOperand(1); 4127 SDValue N2 = N->getOperand(2); 4128 SDValue N3 = N->getOperand(3); 4129 SDValue InGlue = N->getOperand(4); 4130 assert(N1.getOpcode() == ISD::BasicBlock); 4131 assert(N2.getOpcode() == ISD::Constant); 4132 assert(N3.getOpcode() == ISD::Register); 4133 4134 unsigned CC = (unsigned)N2->getAsZExtVal(); 4135 4136 if (InGlue.getOpcode() == ARMISD::CMPZ) { 4137 if (InGlue.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) { 4138 SDValue Int = InGlue.getOperand(0); 4139 uint64_t ID = Int->getConstantOperandVal(1); 4140 4141 // Handle low-overhead loops. 4142 if (ID == Intrinsic::loop_decrement_reg) { 4143 SDValue Elements = Int.getOperand(2); 4144 SDValue Size = CurDAG->getTargetConstant(Int.getConstantOperandVal(3), 4145 dl, MVT::i32); 4146 4147 SDValue Args[] = { Elements, Size, Int.getOperand(0) }; 4148 SDNode *LoopDec = 4149 CurDAG->getMachineNode(ARM::t2LoopDec, dl, 4150 CurDAG->getVTList(MVT::i32, MVT::Other), 4151 Args); 4152 ReplaceUses(Int.getNode(), LoopDec); 4153 4154 SDValue EndArgs[] = { SDValue(LoopDec, 0), N1, Chain }; 4155 SDNode *LoopEnd = 4156 CurDAG->getMachineNode(ARM::t2LoopEnd, dl, MVT::Other, EndArgs); 4157 4158 ReplaceUses(N, LoopEnd); 4159 CurDAG->RemoveDeadNode(N); 4160 CurDAG->RemoveDeadNode(InGlue.getNode()); 4161 CurDAG->RemoveDeadNode(Int.getNode()); 4162 return; 4163 } 4164 } 4165 4166 bool SwitchEQNEToPLMI; 4167 SelectCMPZ(InGlue.getNode(), SwitchEQNEToPLMI); 4168 InGlue = N->getOperand(4); 4169 4170 if (SwitchEQNEToPLMI) { 4171 switch ((ARMCC::CondCodes)CC) { 4172 default: llvm_unreachable("CMPZ must be either NE or EQ!"); 4173 case ARMCC::NE: 4174 CC = (unsigned)ARMCC::MI; 4175 break; 4176 case ARMCC::EQ: 4177 CC = (unsigned)ARMCC::PL; 4178 break; 4179 } 4180 } 4181 } 4182 4183 SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32); 4184 SDValue Ops[] = { N1, Tmp2, N3, Chain, InGlue }; 4185 SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, 4186 MVT::Glue, Ops); 4187 Chain = SDValue(ResNode, 0); 4188 if (N->getNumValues() == 2) { 4189 InGlue = SDValue(ResNode, 1); 4190 ReplaceUses(SDValue(N, 1), InGlue); 4191 } 4192 ReplaceUses(SDValue(N, 0), 4193 SDValue(Chain.getNode(), Chain.getResNo())); 4194 CurDAG->RemoveDeadNode(N); 4195 return; 4196 } 4197 4198 case ARMISD::CMPZ: { 4199 // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0) 4200 // This allows us to avoid materializing the expensive negative constant. 4201 // The CMPZ #0 is useless and will be peepholed away but we need to keep it 4202 // for its glue output. 4203 SDValue X = N->getOperand(0); 4204 auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode()); 4205 if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) { 4206 int64_t Addend = -C->getSExtValue(); 4207 4208 SDNode *Add = nullptr; 4209 // ADDS can be better than CMN if the immediate fits in a 4210 // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3. 4211 // Outside that range we can just use a CMN which is 32-bit but has a 4212 // 12-bit immediate range. 4213 if (Addend < 1<<8) { 4214 if (Subtarget->isThumb2()) { 4215 SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32), 4216 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 4217 CurDAG->getRegister(0, MVT::i32) }; 4218 Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops); 4219 } else { 4220 unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8; 4221 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X, 4222 CurDAG->getTargetConstant(Addend, dl, MVT::i32), 4223 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)}; 4224 Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops); 4225 } 4226 } 4227 if (Add) { 4228 SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)}; 4229 CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2); 4230 } 4231 } 4232 // Other cases are autogenerated. 4233 break; 4234 } 4235 4236 case ARMISD::CMOV: { 4237 SDValue InGlue = N->getOperand(4); 4238 4239 if (InGlue.getOpcode() == ARMISD::CMPZ) { 4240 bool SwitchEQNEToPLMI; 4241 SelectCMPZ(InGlue.getNode(), SwitchEQNEToPLMI); 4242 4243 if (SwitchEQNEToPLMI) { 4244 SDValue ARMcc = N->getOperand(2); 4245 ARMCC::CondCodes CC = (ARMCC::CondCodes)ARMcc->getAsZExtVal(); 4246 4247 switch (CC) { 4248 default: llvm_unreachable("CMPZ must be either NE or EQ!"); 4249 case ARMCC::NE: 4250 CC = ARMCC::MI; 4251 break; 4252 case ARMCC::EQ: 4253 CC = ARMCC::PL; 4254 break; 4255 } 4256 SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32); 4257 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc, 4258 N->getOperand(3), N->getOperand(4)}; 4259 CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops); 4260 } 4261 4262 } 4263 // Other cases are autogenerated. 4264 break; 4265 } 4266 case ARMISD::VZIP: { 4267 EVT VT = N->getValueType(0); 4268 // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 4269 unsigned Opc64[] = {ARM::VZIPd8, ARM::VZIPd16, ARM::VTRNd32}; 4270 unsigned Opc128[] = {ARM::VZIPq8, ARM::VZIPq16, ARM::VZIPq32}; 4271 unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128); 4272 SDValue Pred = getAL(CurDAG, dl); 4273 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 4274 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Pred, PredReg}; 4275 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops)); 4276 return; 4277 } 4278 case ARMISD::VUZP: { 4279 EVT VT = N->getValueType(0); 4280 // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 4281 unsigned Opc64[] = {ARM::VUZPd8, ARM::VUZPd16, ARM::VTRNd32}; 4282 unsigned Opc128[] = {ARM::VUZPq8, ARM::VUZPq16, ARM::VUZPq32}; 4283 unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128); 4284 SDValue Pred = getAL(CurDAG, dl); 4285 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 4286 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Pred, PredReg}; 4287 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops)); 4288 return; 4289 } 4290 case ARMISD::VTRN: { 4291 EVT VT = N->getValueType(0); 4292 unsigned Opc64[] = {ARM::VTRNd8, ARM::VTRNd16, ARM::VTRNd32}; 4293 unsigned Opc128[] = {ARM::VTRNq8, ARM::VTRNq16, ARM::VTRNq32}; 4294 unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128); 4295 SDValue Pred = getAL(CurDAG, dl); 4296 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 4297 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Pred, PredReg}; 4298 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops)); 4299 return; 4300 } 4301 case ARMISD::BUILD_VECTOR: { 4302 EVT VecVT = N->getValueType(0); 4303 EVT EltVT = VecVT.getVectorElementType(); 4304 unsigned NumElts = VecVT.getVectorNumElements(); 4305 if (EltVT == MVT::f64) { 4306 assert(NumElts == 2 && "unexpected type for BUILD_VECTOR"); 4307 ReplaceNode( 4308 N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1))); 4309 return; 4310 } 4311 assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR"); 4312 if (NumElts == 2) { 4313 ReplaceNode( 4314 N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1))); 4315 return; 4316 } 4317 assert(NumElts == 4 && "unexpected type for BUILD_VECTOR"); 4318 ReplaceNode(N, 4319 createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1), 4320 N->getOperand(2), N->getOperand(3))); 4321 return; 4322 } 4323 4324 case ARMISD::VLD1DUP: { 4325 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16, 4326 ARM::VLD1DUPd32 }; 4327 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16, 4328 ARM::VLD1DUPq32 }; 4329 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 1, DOpcodes, QOpcodes); 4330 return; 4331 } 4332 4333 case ARMISD::VLD2DUP: { 4334 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16, 4335 ARM::VLD2DUPd32 }; 4336 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 2, Opcodes); 4337 return; 4338 } 4339 4340 case ARMISD::VLD3DUP: { 4341 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo, 4342 ARM::VLD3DUPd16Pseudo, 4343 ARM::VLD3DUPd32Pseudo }; 4344 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 3, Opcodes); 4345 return; 4346 } 4347 4348 case ARMISD::VLD4DUP: { 4349 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo, 4350 ARM::VLD4DUPd16Pseudo, 4351 ARM::VLD4DUPd32Pseudo }; 4352 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 4, Opcodes); 4353 return; 4354 } 4355 4356 case ARMISD::VLD1DUP_UPD: { 4357 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed, 4358 ARM::VLD1DUPd16wb_fixed, 4359 ARM::VLD1DUPd32wb_fixed }; 4360 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed, 4361 ARM::VLD1DUPq16wb_fixed, 4362 ARM::VLD1DUPq32wb_fixed }; 4363 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 1, DOpcodes, QOpcodes); 4364 return; 4365 } 4366 4367 case ARMISD::VLD2DUP_UPD: { 4368 static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8wb_fixed, 4369 ARM::VLD2DUPd16wb_fixed, 4370 ARM::VLD2DUPd32wb_fixed, 4371 ARM::VLD1q64wb_fixed }; 4372 static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo, 4373 ARM::VLD2DUPq16EvenPseudo, 4374 ARM::VLD2DUPq32EvenPseudo }; 4375 static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudoWB_fixed, 4376 ARM::VLD2DUPq16OddPseudoWB_fixed, 4377 ARM::VLD2DUPq32OddPseudoWB_fixed }; 4378 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 2, DOpcodes, QOpcodes0, QOpcodes1); 4379 return; 4380 } 4381 4382 case ARMISD::VLD3DUP_UPD: { 4383 static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo_UPD, 4384 ARM::VLD3DUPd16Pseudo_UPD, 4385 ARM::VLD3DUPd32Pseudo_UPD, 4386 ARM::VLD1d64TPseudoWB_fixed }; 4387 static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo, 4388 ARM::VLD3DUPq16EvenPseudo, 4389 ARM::VLD3DUPq32EvenPseudo }; 4390 static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo_UPD, 4391 ARM::VLD3DUPq16OddPseudo_UPD, 4392 ARM::VLD3DUPq32OddPseudo_UPD }; 4393 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 4394 return; 4395 } 4396 4397 case ARMISD::VLD4DUP_UPD: { 4398 static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo_UPD, 4399 ARM::VLD4DUPd16Pseudo_UPD, 4400 ARM::VLD4DUPd32Pseudo_UPD, 4401 ARM::VLD1d64QPseudoWB_fixed }; 4402 static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo, 4403 ARM::VLD4DUPq16EvenPseudo, 4404 ARM::VLD4DUPq32EvenPseudo }; 4405 static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo_UPD, 4406 ARM::VLD4DUPq16OddPseudo_UPD, 4407 ARM::VLD4DUPq32OddPseudo_UPD }; 4408 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 4409 return; 4410 } 4411 4412 case ARMISD::VLD1_UPD: { 4413 static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed, 4414 ARM::VLD1d16wb_fixed, 4415 ARM::VLD1d32wb_fixed, 4416 ARM::VLD1d64wb_fixed }; 4417 static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed, 4418 ARM::VLD1q16wb_fixed, 4419 ARM::VLD1q32wb_fixed, 4420 ARM::VLD1q64wb_fixed }; 4421 SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr); 4422 return; 4423 } 4424 4425 case ARMISD::VLD2_UPD: { 4426 if (Subtarget->hasNEON()) { 4427 static const uint16_t DOpcodes[] = { 4428 ARM::VLD2d8wb_fixed, ARM::VLD2d16wb_fixed, ARM::VLD2d32wb_fixed, 4429 ARM::VLD1q64wb_fixed}; 4430 static const uint16_t QOpcodes[] = {ARM::VLD2q8PseudoWB_fixed, 4431 ARM::VLD2q16PseudoWB_fixed, 4432 ARM::VLD2q32PseudoWB_fixed}; 4433 SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr); 4434 } else { 4435 static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8, 4436 ARM::MVE_VLD21_8_wb}; 4437 static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16, 4438 ARM::MVE_VLD21_16_wb}; 4439 static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32, 4440 ARM::MVE_VLD21_32_wb}; 4441 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32}; 4442 SelectMVE_VLD(N, 2, Opcodes, true); 4443 } 4444 return; 4445 } 4446 4447 case ARMISD::VLD3_UPD: { 4448 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD, 4449 ARM::VLD3d16Pseudo_UPD, 4450 ARM::VLD3d32Pseudo_UPD, 4451 ARM::VLD1d64TPseudoWB_fixed}; 4452 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD, 4453 ARM::VLD3q16Pseudo_UPD, 4454 ARM::VLD3q32Pseudo_UPD }; 4455 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD, 4456 ARM::VLD3q16oddPseudo_UPD, 4457 ARM::VLD3q32oddPseudo_UPD }; 4458 SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 4459 return; 4460 } 4461 4462 case ARMISD::VLD4_UPD: { 4463 if (Subtarget->hasNEON()) { 4464 static const uint16_t DOpcodes[] = { 4465 ARM::VLD4d8Pseudo_UPD, ARM::VLD4d16Pseudo_UPD, ARM::VLD4d32Pseudo_UPD, 4466 ARM::VLD1d64QPseudoWB_fixed}; 4467 static const uint16_t QOpcodes0[] = {ARM::VLD4q8Pseudo_UPD, 4468 ARM::VLD4q16Pseudo_UPD, 4469 ARM::VLD4q32Pseudo_UPD}; 4470 static const uint16_t QOpcodes1[] = {ARM::VLD4q8oddPseudo_UPD, 4471 ARM::VLD4q16oddPseudo_UPD, 4472 ARM::VLD4q32oddPseudo_UPD}; 4473 SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 4474 } else { 4475 static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8, 4476 ARM::MVE_VLD42_8, 4477 ARM::MVE_VLD43_8_wb}; 4478 static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16, 4479 ARM::MVE_VLD42_16, 4480 ARM::MVE_VLD43_16_wb}; 4481 static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32, 4482 ARM::MVE_VLD42_32, 4483 ARM::MVE_VLD43_32_wb}; 4484 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32}; 4485 SelectMVE_VLD(N, 4, Opcodes, true); 4486 } 4487 return; 4488 } 4489 4490 case ARMISD::VLD1x2_UPD: { 4491 if (Subtarget->hasNEON()) { 4492 static const uint16_t DOpcodes[] = { 4493 ARM::VLD1q8wb_fixed, ARM::VLD1q16wb_fixed, ARM::VLD1q32wb_fixed, 4494 ARM::VLD1q64wb_fixed}; 4495 static const uint16_t QOpcodes[] = { 4496 ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed, 4497 ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed}; 4498 SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr); 4499 return; 4500 } 4501 break; 4502 } 4503 4504 case ARMISD::VLD1x3_UPD: { 4505 if (Subtarget->hasNEON()) { 4506 static const uint16_t DOpcodes[] = { 4507 ARM::VLD1d8TPseudoWB_fixed, ARM::VLD1d16TPseudoWB_fixed, 4508 ARM::VLD1d32TPseudoWB_fixed, ARM::VLD1d64TPseudoWB_fixed}; 4509 static const uint16_t QOpcodes0[] = { 4510 ARM::VLD1q8LowTPseudo_UPD, ARM::VLD1q16LowTPseudo_UPD, 4511 ARM::VLD1q32LowTPseudo_UPD, ARM::VLD1q64LowTPseudo_UPD}; 4512 static const uint16_t QOpcodes1[] = { 4513 ARM::VLD1q8HighTPseudo_UPD, ARM::VLD1q16HighTPseudo_UPD, 4514 ARM::VLD1q32HighTPseudo_UPD, ARM::VLD1q64HighTPseudo_UPD}; 4515 SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 4516 return; 4517 } 4518 break; 4519 } 4520 4521 case ARMISD::VLD1x4_UPD: { 4522 if (Subtarget->hasNEON()) { 4523 static const uint16_t DOpcodes[] = { 4524 ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed, 4525 ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed}; 4526 static const uint16_t QOpcodes0[] = { 4527 ARM::VLD1q8LowQPseudo_UPD, ARM::VLD1q16LowQPseudo_UPD, 4528 ARM::VLD1q32LowQPseudo_UPD, ARM::VLD1q64LowQPseudo_UPD}; 4529 static const uint16_t QOpcodes1[] = { 4530 ARM::VLD1q8HighQPseudo_UPD, ARM::VLD1q16HighQPseudo_UPD, 4531 ARM::VLD1q32HighQPseudo_UPD, ARM::VLD1q64HighQPseudo_UPD}; 4532 SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 4533 return; 4534 } 4535 break; 4536 } 4537 4538 case ARMISD::VLD2LN_UPD: { 4539 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD, 4540 ARM::VLD2LNd16Pseudo_UPD, 4541 ARM::VLD2LNd32Pseudo_UPD }; 4542 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD, 4543 ARM::VLD2LNq32Pseudo_UPD }; 4544 SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes); 4545 return; 4546 } 4547 4548 case ARMISD::VLD3LN_UPD: { 4549 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD, 4550 ARM::VLD3LNd16Pseudo_UPD, 4551 ARM::VLD3LNd32Pseudo_UPD }; 4552 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD, 4553 ARM::VLD3LNq32Pseudo_UPD }; 4554 SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes); 4555 return; 4556 } 4557 4558 case ARMISD::VLD4LN_UPD: { 4559 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD, 4560 ARM::VLD4LNd16Pseudo_UPD, 4561 ARM::VLD4LNd32Pseudo_UPD }; 4562 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD, 4563 ARM::VLD4LNq32Pseudo_UPD }; 4564 SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes); 4565 return; 4566 } 4567 4568 case ARMISD::VST1_UPD: { 4569 static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed, 4570 ARM::VST1d16wb_fixed, 4571 ARM::VST1d32wb_fixed, 4572 ARM::VST1d64wb_fixed }; 4573 static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed, 4574 ARM::VST1q16wb_fixed, 4575 ARM::VST1q32wb_fixed, 4576 ARM::VST1q64wb_fixed }; 4577 SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr); 4578 return; 4579 } 4580 4581 case ARMISD::VST2_UPD: { 4582 if (Subtarget->hasNEON()) { 4583 static const uint16_t DOpcodes[] = { 4584 ARM::VST2d8wb_fixed, ARM::VST2d16wb_fixed, ARM::VST2d32wb_fixed, 4585 ARM::VST1q64wb_fixed}; 4586 static const uint16_t QOpcodes[] = {ARM::VST2q8PseudoWB_fixed, 4587 ARM::VST2q16PseudoWB_fixed, 4588 ARM::VST2q32PseudoWB_fixed}; 4589 SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr); 4590 return; 4591 } 4592 break; 4593 } 4594 4595 case ARMISD::VST3_UPD: { 4596 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD, 4597 ARM::VST3d16Pseudo_UPD, 4598 ARM::VST3d32Pseudo_UPD, 4599 ARM::VST1d64TPseudoWB_fixed}; 4600 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD, 4601 ARM::VST3q16Pseudo_UPD, 4602 ARM::VST3q32Pseudo_UPD }; 4603 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD, 4604 ARM::VST3q16oddPseudo_UPD, 4605 ARM::VST3q32oddPseudo_UPD }; 4606 SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 4607 return; 4608 } 4609 4610 case ARMISD::VST4_UPD: { 4611 if (Subtarget->hasNEON()) { 4612 static const uint16_t DOpcodes[] = { 4613 ARM::VST4d8Pseudo_UPD, ARM::VST4d16Pseudo_UPD, ARM::VST4d32Pseudo_UPD, 4614 ARM::VST1d64QPseudoWB_fixed}; 4615 static const uint16_t QOpcodes0[] = {ARM::VST4q8Pseudo_UPD, 4616 ARM::VST4q16Pseudo_UPD, 4617 ARM::VST4q32Pseudo_UPD}; 4618 static const uint16_t QOpcodes1[] = {ARM::VST4q8oddPseudo_UPD, 4619 ARM::VST4q16oddPseudo_UPD, 4620 ARM::VST4q32oddPseudo_UPD}; 4621 SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 4622 return; 4623 } 4624 break; 4625 } 4626 4627 case ARMISD::VST1x2_UPD: { 4628 if (Subtarget->hasNEON()) { 4629 static const uint16_t DOpcodes[] = { ARM::VST1q8wb_fixed, 4630 ARM::VST1q16wb_fixed, 4631 ARM::VST1q32wb_fixed, 4632 ARM::VST1q64wb_fixed}; 4633 static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudoWB_fixed, 4634 ARM::VST1d16QPseudoWB_fixed, 4635 ARM::VST1d32QPseudoWB_fixed, 4636 ARM::VST1d64QPseudoWB_fixed }; 4637 SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr); 4638 return; 4639 } 4640 break; 4641 } 4642 4643 case ARMISD::VST1x3_UPD: { 4644 if (Subtarget->hasNEON()) { 4645 static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudoWB_fixed, 4646 ARM::VST1d16TPseudoWB_fixed, 4647 ARM::VST1d32TPseudoWB_fixed, 4648 ARM::VST1d64TPseudoWB_fixed }; 4649 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD, 4650 ARM::VST1q16LowTPseudo_UPD, 4651 ARM::VST1q32LowTPseudo_UPD, 4652 ARM::VST1q64LowTPseudo_UPD }; 4653 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo_UPD, 4654 ARM::VST1q16HighTPseudo_UPD, 4655 ARM::VST1q32HighTPseudo_UPD, 4656 ARM::VST1q64HighTPseudo_UPD }; 4657 SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 4658 return; 4659 } 4660 break; 4661 } 4662 4663 case ARMISD::VST1x4_UPD: { 4664 if (Subtarget->hasNEON()) { 4665 static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudoWB_fixed, 4666 ARM::VST1d16QPseudoWB_fixed, 4667 ARM::VST1d32QPseudoWB_fixed, 4668 ARM::VST1d64QPseudoWB_fixed }; 4669 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD, 4670 ARM::VST1q16LowQPseudo_UPD, 4671 ARM::VST1q32LowQPseudo_UPD, 4672 ARM::VST1q64LowQPseudo_UPD }; 4673 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo_UPD, 4674 ARM::VST1q16HighQPseudo_UPD, 4675 ARM::VST1q32HighQPseudo_UPD, 4676 ARM::VST1q64HighQPseudo_UPD }; 4677 SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 4678 return; 4679 } 4680 break; 4681 } 4682 case ARMISD::VST2LN_UPD: { 4683 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD, 4684 ARM::VST2LNd16Pseudo_UPD, 4685 ARM::VST2LNd32Pseudo_UPD }; 4686 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD, 4687 ARM::VST2LNq32Pseudo_UPD }; 4688 SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes); 4689 return; 4690 } 4691 4692 case ARMISD::VST3LN_UPD: { 4693 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD, 4694 ARM::VST3LNd16Pseudo_UPD, 4695 ARM::VST3LNd32Pseudo_UPD }; 4696 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD, 4697 ARM::VST3LNq32Pseudo_UPD }; 4698 SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes); 4699 return; 4700 } 4701 4702 case ARMISD::VST4LN_UPD: { 4703 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD, 4704 ARM::VST4LNd16Pseudo_UPD, 4705 ARM::VST4LNd32Pseudo_UPD }; 4706 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD, 4707 ARM::VST4LNq32Pseudo_UPD }; 4708 SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes); 4709 return; 4710 } 4711 4712 case ISD::INTRINSIC_VOID: 4713 case ISD::INTRINSIC_W_CHAIN: { 4714 unsigned IntNo = N->getConstantOperandVal(1); 4715 switch (IntNo) { 4716 default: 4717 break; 4718 4719 case Intrinsic::arm_mrrc: 4720 case Intrinsic::arm_mrrc2: { 4721 SDLoc dl(N); 4722 SDValue Chain = N->getOperand(0); 4723 unsigned Opc; 4724 4725 if (Subtarget->isThumb()) 4726 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2); 4727 else 4728 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2); 4729 4730 SmallVector<SDValue, 5> Ops; 4731 Ops.push_back(getI32Imm(N->getConstantOperandVal(2), dl)); /* coproc */ 4732 Ops.push_back(getI32Imm(N->getConstantOperandVal(3), dl)); /* opc */ 4733 Ops.push_back(getI32Imm(N->getConstantOperandVal(4), dl)); /* CRm */ 4734 4735 // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded 4736 // instruction will always be '1111' but it is possible in assembly language to specify 4737 // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction. 4738 if (Opc != ARM::MRRC2) { 4739 Ops.push_back(getAL(CurDAG, dl)); 4740 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 4741 } 4742 4743 Ops.push_back(Chain); 4744 4745 // Writes to two registers. 4746 const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other}; 4747 4748 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops)); 4749 return; 4750 } 4751 case Intrinsic::arm_ldaexd: 4752 case Intrinsic::arm_ldrexd: { 4753 SDLoc dl(N); 4754 SDValue Chain = N->getOperand(0); 4755 SDValue MemAddr = N->getOperand(2); 4756 bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps(); 4757 4758 bool IsAcquire = IntNo == Intrinsic::arm_ldaexd; 4759 unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD) 4760 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD); 4761 4762 // arm_ldrexd returns a i64 value in {i32, i32} 4763 std::vector<EVT> ResTys; 4764 if (isThumb) { 4765 ResTys.push_back(MVT::i32); 4766 ResTys.push_back(MVT::i32); 4767 } else 4768 ResTys.push_back(MVT::Untyped); 4769 ResTys.push_back(MVT::Other); 4770 4771 // Place arguments in the right order. 4772 SDValue Ops[] = {MemAddr, getAL(CurDAG, dl), 4773 CurDAG->getRegister(0, MVT::i32), Chain}; 4774 SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops); 4775 // Transfer memoperands. 4776 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 4777 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp}); 4778 4779 // Remap uses. 4780 SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1); 4781 if (!SDValue(N, 0).use_empty()) { 4782 SDValue Result; 4783 if (isThumb) 4784 Result = SDValue(Ld, 0); 4785 else { 4786 SDValue SubRegIdx = 4787 CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32); 4788 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 4789 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx); 4790 Result = SDValue(ResNode,0); 4791 } 4792 ReplaceUses(SDValue(N, 0), Result); 4793 } 4794 if (!SDValue(N, 1).use_empty()) { 4795 SDValue Result; 4796 if (isThumb) 4797 Result = SDValue(Ld, 1); 4798 else { 4799 SDValue SubRegIdx = 4800 CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32); 4801 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 4802 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx); 4803 Result = SDValue(ResNode,0); 4804 } 4805 ReplaceUses(SDValue(N, 1), Result); 4806 } 4807 ReplaceUses(SDValue(N, 2), OutChain); 4808 CurDAG->RemoveDeadNode(N); 4809 return; 4810 } 4811 case Intrinsic::arm_stlexd: 4812 case Intrinsic::arm_strexd: { 4813 SDLoc dl(N); 4814 SDValue Chain = N->getOperand(0); 4815 SDValue Val0 = N->getOperand(2); 4816 SDValue Val1 = N->getOperand(3); 4817 SDValue MemAddr = N->getOperand(4); 4818 4819 // Store exclusive double return a i32 value which is the return status 4820 // of the issued store. 4821 const EVT ResTys[] = {MVT::i32, MVT::Other}; 4822 4823 bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2(); 4824 // Place arguments in the right order. 4825 SmallVector<SDValue, 7> Ops; 4826 if (isThumb) { 4827 Ops.push_back(Val0); 4828 Ops.push_back(Val1); 4829 } else 4830 // arm_strexd uses GPRPair. 4831 Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0)); 4832 Ops.push_back(MemAddr); 4833 Ops.push_back(getAL(CurDAG, dl)); 4834 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 4835 Ops.push_back(Chain); 4836 4837 bool IsRelease = IntNo == Intrinsic::arm_stlexd; 4838 unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD) 4839 : (IsRelease ? ARM::STLEXD : ARM::STREXD); 4840 4841 SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops); 4842 // Transfer memoperands. 4843 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 4844 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 4845 4846 ReplaceNode(N, St); 4847 return; 4848 } 4849 4850 case Intrinsic::arm_neon_vld1: { 4851 static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16, 4852 ARM::VLD1d32, ARM::VLD1d64 }; 4853 static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16, 4854 ARM::VLD1q32, ARM::VLD1q64}; 4855 SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr); 4856 return; 4857 } 4858 4859 case Intrinsic::arm_neon_vld1x2: { 4860 static const uint16_t DOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16, 4861 ARM::VLD1q32, ARM::VLD1q64 }; 4862 static const uint16_t QOpcodes[] = { ARM::VLD1d8QPseudo, 4863 ARM::VLD1d16QPseudo, 4864 ARM::VLD1d32QPseudo, 4865 ARM::VLD1d64QPseudo }; 4866 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr); 4867 return; 4868 } 4869 4870 case Intrinsic::arm_neon_vld1x3: { 4871 static const uint16_t DOpcodes[] = { ARM::VLD1d8TPseudo, 4872 ARM::VLD1d16TPseudo, 4873 ARM::VLD1d32TPseudo, 4874 ARM::VLD1d64TPseudo }; 4875 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowTPseudo_UPD, 4876 ARM::VLD1q16LowTPseudo_UPD, 4877 ARM::VLD1q32LowTPseudo_UPD, 4878 ARM::VLD1q64LowTPseudo_UPD }; 4879 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighTPseudo, 4880 ARM::VLD1q16HighTPseudo, 4881 ARM::VLD1q32HighTPseudo, 4882 ARM::VLD1q64HighTPseudo }; 4883 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 4884 return; 4885 } 4886 4887 case Intrinsic::arm_neon_vld1x4: { 4888 static const uint16_t DOpcodes[] = { ARM::VLD1d8QPseudo, 4889 ARM::VLD1d16QPseudo, 4890 ARM::VLD1d32QPseudo, 4891 ARM::VLD1d64QPseudo }; 4892 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowQPseudo_UPD, 4893 ARM::VLD1q16LowQPseudo_UPD, 4894 ARM::VLD1q32LowQPseudo_UPD, 4895 ARM::VLD1q64LowQPseudo_UPD }; 4896 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighQPseudo, 4897 ARM::VLD1q16HighQPseudo, 4898 ARM::VLD1q32HighQPseudo, 4899 ARM::VLD1q64HighQPseudo }; 4900 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 4901 return; 4902 } 4903 4904 case Intrinsic::arm_neon_vld2: { 4905 static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16, 4906 ARM::VLD2d32, ARM::VLD1q64 }; 4907 static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo, 4908 ARM::VLD2q32Pseudo }; 4909 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr); 4910 return; 4911 } 4912 4913 case Intrinsic::arm_neon_vld3: { 4914 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo, 4915 ARM::VLD3d16Pseudo, 4916 ARM::VLD3d32Pseudo, 4917 ARM::VLD1d64TPseudo }; 4918 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD, 4919 ARM::VLD3q16Pseudo_UPD, 4920 ARM::VLD3q32Pseudo_UPD }; 4921 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo, 4922 ARM::VLD3q16oddPseudo, 4923 ARM::VLD3q32oddPseudo }; 4924 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 4925 return; 4926 } 4927 4928 case Intrinsic::arm_neon_vld4: { 4929 static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo, 4930 ARM::VLD4d16Pseudo, 4931 ARM::VLD4d32Pseudo, 4932 ARM::VLD1d64QPseudo }; 4933 static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD, 4934 ARM::VLD4q16Pseudo_UPD, 4935 ARM::VLD4q32Pseudo_UPD }; 4936 static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo, 4937 ARM::VLD4q16oddPseudo, 4938 ARM::VLD4q32oddPseudo }; 4939 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 4940 return; 4941 } 4942 4943 case Intrinsic::arm_neon_vld2dup: { 4944 static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16, 4945 ARM::VLD2DUPd32, ARM::VLD1q64 }; 4946 static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo, 4947 ARM::VLD2DUPq16EvenPseudo, 4948 ARM::VLD2DUPq32EvenPseudo }; 4949 static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudo, 4950 ARM::VLD2DUPq16OddPseudo, 4951 ARM::VLD2DUPq32OddPseudo }; 4952 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 2, 4953 DOpcodes, QOpcodes0, QOpcodes1); 4954 return; 4955 } 4956 4957 case Intrinsic::arm_neon_vld3dup: { 4958 static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo, 4959 ARM::VLD3DUPd16Pseudo, 4960 ARM::VLD3DUPd32Pseudo, 4961 ARM::VLD1d64TPseudo }; 4962 static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo, 4963 ARM::VLD3DUPq16EvenPseudo, 4964 ARM::VLD3DUPq32EvenPseudo }; 4965 static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo, 4966 ARM::VLD3DUPq16OddPseudo, 4967 ARM::VLD3DUPq32OddPseudo }; 4968 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 3, 4969 DOpcodes, QOpcodes0, QOpcodes1); 4970 return; 4971 } 4972 4973 case Intrinsic::arm_neon_vld4dup: { 4974 static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo, 4975 ARM::VLD4DUPd16Pseudo, 4976 ARM::VLD4DUPd32Pseudo, 4977 ARM::VLD1d64QPseudo }; 4978 static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo, 4979 ARM::VLD4DUPq16EvenPseudo, 4980 ARM::VLD4DUPq32EvenPseudo }; 4981 static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo, 4982 ARM::VLD4DUPq16OddPseudo, 4983 ARM::VLD4DUPq32OddPseudo }; 4984 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 4, 4985 DOpcodes, QOpcodes0, QOpcodes1); 4986 return; 4987 } 4988 4989 case Intrinsic::arm_neon_vld2lane: { 4990 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo, 4991 ARM::VLD2LNd16Pseudo, 4992 ARM::VLD2LNd32Pseudo }; 4993 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo, 4994 ARM::VLD2LNq32Pseudo }; 4995 SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes); 4996 return; 4997 } 4998 4999 case Intrinsic::arm_neon_vld3lane: { 5000 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo, 5001 ARM::VLD3LNd16Pseudo, 5002 ARM::VLD3LNd32Pseudo }; 5003 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo, 5004 ARM::VLD3LNq32Pseudo }; 5005 SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes); 5006 return; 5007 } 5008 5009 case Intrinsic::arm_neon_vld4lane: { 5010 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo, 5011 ARM::VLD4LNd16Pseudo, 5012 ARM::VLD4LNd32Pseudo }; 5013 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo, 5014 ARM::VLD4LNq32Pseudo }; 5015 SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes); 5016 return; 5017 } 5018 5019 case Intrinsic::arm_neon_vst1: { 5020 static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16, 5021 ARM::VST1d32, ARM::VST1d64 }; 5022 static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16, 5023 ARM::VST1q32, ARM::VST1q64 }; 5024 SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr); 5025 return; 5026 } 5027 5028 case Intrinsic::arm_neon_vst1x2: { 5029 static const uint16_t DOpcodes[] = { ARM::VST1q8, ARM::VST1q16, 5030 ARM::VST1q32, ARM::VST1q64 }; 5031 static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudo, 5032 ARM::VST1d16QPseudo, 5033 ARM::VST1d32QPseudo, 5034 ARM::VST1d64QPseudo }; 5035 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr); 5036 return; 5037 } 5038 5039 case Intrinsic::arm_neon_vst1x3: { 5040 static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudo, 5041 ARM::VST1d16TPseudo, 5042 ARM::VST1d32TPseudo, 5043 ARM::VST1d64TPseudo }; 5044 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD, 5045 ARM::VST1q16LowTPseudo_UPD, 5046 ARM::VST1q32LowTPseudo_UPD, 5047 ARM::VST1q64LowTPseudo_UPD }; 5048 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo, 5049 ARM::VST1q16HighTPseudo, 5050 ARM::VST1q32HighTPseudo, 5051 ARM::VST1q64HighTPseudo }; 5052 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 5053 return; 5054 } 5055 5056 case Intrinsic::arm_neon_vst1x4: { 5057 static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudo, 5058 ARM::VST1d16QPseudo, 5059 ARM::VST1d32QPseudo, 5060 ARM::VST1d64QPseudo }; 5061 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD, 5062 ARM::VST1q16LowQPseudo_UPD, 5063 ARM::VST1q32LowQPseudo_UPD, 5064 ARM::VST1q64LowQPseudo_UPD }; 5065 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo, 5066 ARM::VST1q16HighQPseudo, 5067 ARM::VST1q32HighQPseudo, 5068 ARM::VST1q64HighQPseudo }; 5069 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 5070 return; 5071 } 5072 5073 case Intrinsic::arm_neon_vst2: { 5074 static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16, 5075 ARM::VST2d32, ARM::VST1q64 }; 5076 static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo, 5077 ARM::VST2q32Pseudo }; 5078 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr); 5079 return; 5080 } 5081 5082 case Intrinsic::arm_neon_vst3: { 5083 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo, 5084 ARM::VST3d16Pseudo, 5085 ARM::VST3d32Pseudo, 5086 ARM::VST1d64TPseudo }; 5087 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD, 5088 ARM::VST3q16Pseudo_UPD, 5089 ARM::VST3q32Pseudo_UPD }; 5090 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo, 5091 ARM::VST3q16oddPseudo, 5092 ARM::VST3q32oddPseudo }; 5093 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 5094 return; 5095 } 5096 5097 case Intrinsic::arm_neon_vst4: { 5098 static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo, 5099 ARM::VST4d16Pseudo, 5100 ARM::VST4d32Pseudo, 5101 ARM::VST1d64QPseudo }; 5102 static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD, 5103 ARM::VST4q16Pseudo_UPD, 5104 ARM::VST4q32Pseudo_UPD }; 5105 static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo, 5106 ARM::VST4q16oddPseudo, 5107 ARM::VST4q32oddPseudo }; 5108 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 5109 return; 5110 } 5111 5112 case Intrinsic::arm_neon_vst2lane: { 5113 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo, 5114 ARM::VST2LNd16Pseudo, 5115 ARM::VST2LNd32Pseudo }; 5116 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo, 5117 ARM::VST2LNq32Pseudo }; 5118 SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes); 5119 return; 5120 } 5121 5122 case Intrinsic::arm_neon_vst3lane: { 5123 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo, 5124 ARM::VST3LNd16Pseudo, 5125 ARM::VST3LNd32Pseudo }; 5126 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo, 5127 ARM::VST3LNq32Pseudo }; 5128 SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes); 5129 return; 5130 } 5131 5132 case Intrinsic::arm_neon_vst4lane: { 5133 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo, 5134 ARM::VST4LNd16Pseudo, 5135 ARM::VST4LNd32Pseudo }; 5136 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo, 5137 ARM::VST4LNq32Pseudo }; 5138 SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes); 5139 return; 5140 } 5141 5142 case Intrinsic::arm_mve_vldr_gather_base_wb: 5143 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated: { 5144 static const uint16_t Opcodes[] = {ARM::MVE_VLDRWU32_qi_pre, 5145 ARM::MVE_VLDRDU64_qi_pre}; 5146 SelectMVE_WB(N, Opcodes, 5147 IntNo == Intrinsic::arm_mve_vldr_gather_base_wb_predicated); 5148 return; 5149 } 5150 5151 case Intrinsic::arm_mve_vld2q: { 5152 static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8, ARM::MVE_VLD21_8}; 5153 static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16, 5154 ARM::MVE_VLD21_16}; 5155 static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32, 5156 ARM::MVE_VLD21_32}; 5157 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32}; 5158 SelectMVE_VLD(N, 2, Opcodes, false); 5159 return; 5160 } 5161 5162 case Intrinsic::arm_mve_vld4q: { 5163 static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8, 5164 ARM::MVE_VLD42_8, ARM::MVE_VLD43_8}; 5165 static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16, 5166 ARM::MVE_VLD42_16, 5167 ARM::MVE_VLD43_16}; 5168 static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32, 5169 ARM::MVE_VLD42_32, 5170 ARM::MVE_VLD43_32}; 5171 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32}; 5172 SelectMVE_VLD(N, 4, Opcodes, false); 5173 return; 5174 } 5175 } 5176 break; 5177 } 5178 5179 case ISD::INTRINSIC_WO_CHAIN: { 5180 unsigned IntNo = N->getConstantOperandVal(0); 5181 switch (IntNo) { 5182 default: 5183 break; 5184 5185 // Scalar f32 -> bf16 5186 case Intrinsic::arm_neon_vcvtbfp2bf: { 5187 SDLoc dl(N); 5188 const SDValue &Src = N->getOperand(1); 5189 llvm::EVT DestTy = N->getValueType(0); 5190 SDValue Pred = getAL(CurDAG, dl); 5191 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 5192 SDValue Ops[] = { Src, Src, Pred, Reg0 }; 5193 CurDAG->SelectNodeTo(N, ARM::BF16_VCVTB, DestTy, Ops); 5194 return; 5195 } 5196 5197 // Vector v4f32 -> v4bf16 5198 case Intrinsic::arm_neon_vcvtfp2bf: { 5199 SDLoc dl(N); 5200 const SDValue &Src = N->getOperand(1); 5201 SDValue Pred = getAL(CurDAG, dl); 5202 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 5203 SDValue Ops[] = { Src, Pred, Reg0 }; 5204 CurDAG->SelectNodeTo(N, ARM::BF16_VCVT, MVT::v4bf16, Ops); 5205 return; 5206 } 5207 5208 case Intrinsic::arm_mve_urshrl: 5209 SelectMVE_LongShift(N, ARM::MVE_URSHRL, true, false); 5210 return; 5211 case Intrinsic::arm_mve_uqshll: 5212 SelectMVE_LongShift(N, ARM::MVE_UQSHLL, true, false); 5213 return; 5214 case Intrinsic::arm_mve_srshrl: 5215 SelectMVE_LongShift(N, ARM::MVE_SRSHRL, true, false); 5216 return; 5217 case Intrinsic::arm_mve_sqshll: 5218 SelectMVE_LongShift(N, ARM::MVE_SQSHLL, true, false); 5219 return; 5220 case Intrinsic::arm_mve_uqrshll: 5221 SelectMVE_LongShift(N, ARM::MVE_UQRSHLL, false, true); 5222 return; 5223 case Intrinsic::arm_mve_sqrshrl: 5224 SelectMVE_LongShift(N, ARM::MVE_SQRSHRL, false, true); 5225 return; 5226 5227 case Intrinsic::arm_mve_vadc: 5228 case Intrinsic::arm_mve_vadc_predicated: 5229 SelectMVE_VADCSBC(N, ARM::MVE_VADC, ARM::MVE_VADCI, true, 5230 IntNo == Intrinsic::arm_mve_vadc_predicated); 5231 return; 5232 case Intrinsic::arm_mve_vsbc: 5233 case Intrinsic::arm_mve_vsbc_predicated: 5234 SelectMVE_VADCSBC(N, ARM::MVE_VSBC, ARM::MVE_VSBCI, true, 5235 IntNo == Intrinsic::arm_mve_vsbc_predicated); 5236 return; 5237 case Intrinsic::arm_mve_vshlc: 5238 case Intrinsic::arm_mve_vshlc_predicated: 5239 SelectMVE_VSHLC(N, IntNo == Intrinsic::arm_mve_vshlc_predicated); 5240 return; 5241 5242 case Intrinsic::arm_mve_vmlldava: 5243 case Intrinsic::arm_mve_vmlldava_predicated: { 5244 static const uint16_t OpcodesU[] = { 5245 ARM::MVE_VMLALDAVu16, ARM::MVE_VMLALDAVu32, 5246 ARM::MVE_VMLALDAVau16, ARM::MVE_VMLALDAVau32, 5247 }; 5248 static const uint16_t OpcodesS[] = { 5249 ARM::MVE_VMLALDAVs16, ARM::MVE_VMLALDAVs32, 5250 ARM::MVE_VMLALDAVas16, ARM::MVE_VMLALDAVas32, 5251 ARM::MVE_VMLALDAVxs16, ARM::MVE_VMLALDAVxs32, 5252 ARM::MVE_VMLALDAVaxs16, ARM::MVE_VMLALDAVaxs32, 5253 ARM::MVE_VMLSLDAVs16, ARM::MVE_VMLSLDAVs32, 5254 ARM::MVE_VMLSLDAVas16, ARM::MVE_VMLSLDAVas32, 5255 ARM::MVE_VMLSLDAVxs16, ARM::MVE_VMLSLDAVxs32, 5256 ARM::MVE_VMLSLDAVaxs16, ARM::MVE_VMLSLDAVaxs32, 5257 }; 5258 SelectMVE_VMLLDAV(N, IntNo == Intrinsic::arm_mve_vmlldava_predicated, 5259 OpcodesS, OpcodesU); 5260 return; 5261 } 5262 5263 case Intrinsic::arm_mve_vrmlldavha: 5264 case Intrinsic::arm_mve_vrmlldavha_predicated: { 5265 static const uint16_t OpcodesU[] = { 5266 ARM::MVE_VRMLALDAVHu32, ARM::MVE_VRMLALDAVHau32, 5267 }; 5268 static const uint16_t OpcodesS[] = { 5269 ARM::MVE_VRMLALDAVHs32, ARM::MVE_VRMLALDAVHas32, 5270 ARM::MVE_VRMLALDAVHxs32, ARM::MVE_VRMLALDAVHaxs32, 5271 ARM::MVE_VRMLSLDAVHs32, ARM::MVE_VRMLSLDAVHas32, 5272 ARM::MVE_VRMLSLDAVHxs32, ARM::MVE_VRMLSLDAVHaxs32, 5273 }; 5274 SelectMVE_VRMLLDAVH(N, IntNo == Intrinsic::arm_mve_vrmlldavha_predicated, 5275 OpcodesS, OpcodesU); 5276 return; 5277 } 5278 5279 case Intrinsic::arm_mve_vidup: 5280 case Intrinsic::arm_mve_vidup_predicated: { 5281 static const uint16_t Opcodes[] = { 5282 ARM::MVE_VIDUPu8, ARM::MVE_VIDUPu16, ARM::MVE_VIDUPu32, 5283 }; 5284 SelectMVE_VxDUP(N, Opcodes, false, 5285 IntNo == Intrinsic::arm_mve_vidup_predicated); 5286 return; 5287 } 5288 5289 case Intrinsic::arm_mve_vddup: 5290 case Intrinsic::arm_mve_vddup_predicated: { 5291 static const uint16_t Opcodes[] = { 5292 ARM::MVE_VDDUPu8, ARM::MVE_VDDUPu16, ARM::MVE_VDDUPu32, 5293 }; 5294 SelectMVE_VxDUP(N, Opcodes, false, 5295 IntNo == Intrinsic::arm_mve_vddup_predicated); 5296 return; 5297 } 5298 5299 case Intrinsic::arm_mve_viwdup: 5300 case Intrinsic::arm_mve_viwdup_predicated: { 5301 static const uint16_t Opcodes[] = { 5302 ARM::MVE_VIWDUPu8, ARM::MVE_VIWDUPu16, ARM::MVE_VIWDUPu32, 5303 }; 5304 SelectMVE_VxDUP(N, Opcodes, true, 5305 IntNo == Intrinsic::arm_mve_viwdup_predicated); 5306 return; 5307 } 5308 5309 case Intrinsic::arm_mve_vdwdup: 5310 case Intrinsic::arm_mve_vdwdup_predicated: { 5311 static const uint16_t Opcodes[] = { 5312 ARM::MVE_VDWDUPu8, ARM::MVE_VDWDUPu16, ARM::MVE_VDWDUPu32, 5313 }; 5314 SelectMVE_VxDUP(N, Opcodes, true, 5315 IntNo == Intrinsic::arm_mve_vdwdup_predicated); 5316 return; 5317 } 5318 5319 case Intrinsic::arm_cde_cx1d: 5320 case Intrinsic::arm_cde_cx1da: 5321 case Intrinsic::arm_cde_cx2d: 5322 case Intrinsic::arm_cde_cx2da: 5323 case Intrinsic::arm_cde_cx3d: 5324 case Intrinsic::arm_cde_cx3da: { 5325 bool HasAccum = IntNo == Intrinsic::arm_cde_cx1da || 5326 IntNo == Intrinsic::arm_cde_cx2da || 5327 IntNo == Intrinsic::arm_cde_cx3da; 5328 size_t NumExtraOps; 5329 uint16_t Opcode; 5330 switch (IntNo) { 5331 case Intrinsic::arm_cde_cx1d: 5332 case Intrinsic::arm_cde_cx1da: 5333 NumExtraOps = 0; 5334 Opcode = HasAccum ? ARM::CDE_CX1DA : ARM::CDE_CX1D; 5335 break; 5336 case Intrinsic::arm_cde_cx2d: 5337 case Intrinsic::arm_cde_cx2da: 5338 NumExtraOps = 1; 5339 Opcode = HasAccum ? ARM::CDE_CX2DA : ARM::CDE_CX2D; 5340 break; 5341 case Intrinsic::arm_cde_cx3d: 5342 case Intrinsic::arm_cde_cx3da: 5343 NumExtraOps = 2; 5344 Opcode = HasAccum ? ARM::CDE_CX3DA : ARM::CDE_CX3D; 5345 break; 5346 default: 5347 llvm_unreachable("Unexpected opcode"); 5348 } 5349 SelectCDE_CXxD(N, Opcode, NumExtraOps, HasAccum); 5350 return; 5351 } 5352 } 5353 break; 5354 } 5355 5356 case ISD::ATOMIC_CMP_SWAP: 5357 SelectCMP_SWAP(N); 5358 return; 5359 } 5360 5361 SelectCode(N); 5362 } 5363 5364 // Inspect a register string of the form 5365 // cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or 5366 // cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string 5367 // and obtain the integer operands from them, adding these operands to the 5368 // provided vector. 5369 static void getIntOperandsFromRegisterString(StringRef RegString, 5370 SelectionDAG *CurDAG, 5371 const SDLoc &DL, 5372 std::vector<SDValue> &Ops) { 5373 SmallVector<StringRef, 5> Fields; 5374 RegString.split(Fields, ':'); 5375 5376 if (Fields.size() > 1) { 5377 bool AllIntFields = true; 5378 5379 for (StringRef Field : Fields) { 5380 // Need to trim out leading 'cp' characters and get the integer field. 5381 unsigned IntField; 5382 AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField); 5383 Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32)); 5384 } 5385 5386 assert(AllIntFields && 5387 "Unexpected non-integer value in special register string."); 5388 (void)AllIntFields; 5389 } 5390 } 5391 5392 // Maps a Banked Register string to its mask value. The mask value returned is 5393 // for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register 5394 // mask operand, which expresses which register is to be used, e.g. r8, and in 5395 // which mode it is to be used, e.g. usr. Returns -1 to signify that the string 5396 // was invalid. 5397 static inline int getBankedRegisterMask(StringRef RegString) { 5398 auto TheReg = ARMBankedReg::lookupBankedRegByName(RegString.lower()); 5399 if (!TheReg) 5400 return -1; 5401 return TheReg->Encoding; 5402 } 5403 5404 // The flags here are common to those allowed for apsr in the A class cores and 5405 // those allowed for the special registers in the M class cores. Returns a 5406 // value representing which flags were present, -1 if invalid. 5407 static inline int getMClassFlagsMask(StringRef Flags) { 5408 return StringSwitch<int>(Flags) 5409 .Case("", 0x2) // no flags means nzcvq for psr registers, and 0x2 is 5410 // correct when flags are not permitted 5411 .Case("g", 0x1) 5412 .Case("nzcvq", 0x2) 5413 .Case("nzcvqg", 0x3) 5414 .Default(-1); 5415 } 5416 5417 // Maps MClass special registers string to its value for use in the 5418 // t2MRS_M/t2MSR_M instruction nodes as the SYSm value operand. 5419 // Returns -1 to signify that the string was invalid. 5420 static int getMClassRegisterMask(StringRef Reg, const ARMSubtarget *Subtarget) { 5421 auto TheReg = ARMSysReg::lookupMClassSysRegByName(Reg); 5422 const FeatureBitset &FeatureBits = Subtarget->getFeatureBits(); 5423 if (!TheReg || !TheReg->hasRequiredFeatures(FeatureBits)) 5424 return -1; 5425 return (int)(TheReg->Encoding & 0xFFF); // SYSm value 5426 } 5427 5428 static int getARClassRegisterMask(StringRef Reg, StringRef Flags) { 5429 // The mask operand contains the special register (R Bit) in bit 4, whether 5430 // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and 5431 // bits 3-0 contains the fields to be accessed in the special register, set by 5432 // the flags provided with the register. 5433 int Mask = 0; 5434 if (Reg == "apsr") { 5435 // The flags permitted for apsr are the same flags that are allowed in 5436 // M class registers. We get the flag value and then shift the flags into 5437 // the correct place to combine with the mask. 5438 Mask = getMClassFlagsMask(Flags); 5439 if (Mask == -1) 5440 return -1; 5441 return Mask << 2; 5442 } 5443 5444 if (Reg != "cpsr" && Reg != "spsr") { 5445 return -1; 5446 } 5447 5448 // This is the same as if the flags were "fc" 5449 if (Flags.empty() || Flags == "all") 5450 return Mask | 0x9; 5451 5452 // Inspect the supplied flags string and set the bits in the mask for 5453 // the relevant and valid flags allowed for cpsr and spsr. 5454 for (char Flag : Flags) { 5455 int FlagVal; 5456 switch (Flag) { 5457 case 'c': 5458 FlagVal = 0x1; 5459 break; 5460 case 'x': 5461 FlagVal = 0x2; 5462 break; 5463 case 's': 5464 FlagVal = 0x4; 5465 break; 5466 case 'f': 5467 FlagVal = 0x8; 5468 break; 5469 default: 5470 FlagVal = 0; 5471 } 5472 5473 // This avoids allowing strings where the same flag bit appears twice. 5474 if (!FlagVal || (Mask & FlagVal)) 5475 return -1; 5476 Mask |= FlagVal; 5477 } 5478 5479 // If the register is spsr then we need to set the R bit. 5480 if (Reg == "spsr") 5481 Mask |= 0x10; 5482 5483 return Mask; 5484 } 5485 5486 // Lower the read_register intrinsic to ARM specific DAG nodes 5487 // using the supplied metadata string to select the instruction node to use 5488 // and the registers/masks to construct as operands for the node. 5489 bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){ 5490 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1)); 5491 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0)); 5492 bool IsThumb2 = Subtarget->isThumb2(); 5493 SDLoc DL(N); 5494 5495 std::vector<SDValue> Ops; 5496 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops); 5497 5498 if (!Ops.empty()) { 5499 // If the special register string was constructed of fields (as defined 5500 // in the ACLE) then need to lower to MRC node (32 bit) or 5501 // MRRC node(64 bit), we can make the distinction based on the number of 5502 // operands we have. 5503 unsigned Opcode; 5504 SmallVector<EVT, 3> ResTypes; 5505 if (Ops.size() == 5){ 5506 Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC; 5507 ResTypes.append({ MVT::i32, MVT::Other }); 5508 } else { 5509 assert(Ops.size() == 3 && 5510 "Invalid number of fields in special register string."); 5511 Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC; 5512 ResTypes.append({ MVT::i32, MVT::i32, MVT::Other }); 5513 } 5514 5515 Ops.push_back(getAL(CurDAG, DL)); 5516 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 5517 Ops.push_back(N->getOperand(0)); 5518 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops)); 5519 return true; 5520 } 5521 5522 std::string SpecialReg = RegString->getString().lower(); 5523 5524 int BankedReg = getBankedRegisterMask(SpecialReg); 5525 if (BankedReg != -1) { 5526 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), 5527 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5528 N->getOperand(0) }; 5529 ReplaceNode( 5530 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked, 5531 DL, MVT::i32, MVT::Other, Ops)); 5532 return true; 5533 } 5534 5535 // The VFP registers are read by creating SelectionDAG nodes with opcodes 5536 // corresponding to the register that is being read from. So we switch on the 5537 // string to find which opcode we need to use. 5538 unsigned Opcode = StringSwitch<unsigned>(SpecialReg) 5539 .Case("fpscr", ARM::VMRS) 5540 .Case("fpexc", ARM::VMRS_FPEXC) 5541 .Case("fpsid", ARM::VMRS_FPSID) 5542 .Case("mvfr0", ARM::VMRS_MVFR0) 5543 .Case("mvfr1", ARM::VMRS_MVFR1) 5544 .Case("mvfr2", ARM::VMRS_MVFR2) 5545 .Case("fpinst", ARM::VMRS_FPINST) 5546 .Case("fpinst2", ARM::VMRS_FPINST2) 5547 .Default(0); 5548 5549 // If an opcode was found then we can lower the read to a VFP instruction. 5550 if (Opcode) { 5551 if (!Subtarget->hasVFP2Base()) 5552 return false; 5553 if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8Base()) 5554 return false; 5555 5556 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5557 N->getOperand(0) }; 5558 ReplaceNode(N, 5559 CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops)); 5560 return true; 5561 } 5562 5563 // If the target is M Class then need to validate that the register string 5564 // is an acceptable value, so check that a mask can be constructed from the 5565 // string. 5566 if (Subtarget->isMClass()) { 5567 int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget); 5568 if (SYSmValue == -1) 5569 return false; 5570 5571 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32), 5572 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5573 N->getOperand(0) }; 5574 ReplaceNode( 5575 N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops)); 5576 return true; 5577 } 5578 5579 // Here we know the target is not M Class so we need to check if it is one 5580 // of the remaining possible values which are apsr, cpsr or spsr. 5581 if (SpecialReg == "apsr" || SpecialReg == "cpsr") { 5582 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5583 N->getOperand(0) }; 5584 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS, 5585 DL, MVT::i32, MVT::Other, Ops)); 5586 return true; 5587 } 5588 5589 if (SpecialReg == "spsr") { 5590 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5591 N->getOperand(0) }; 5592 ReplaceNode( 5593 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL, 5594 MVT::i32, MVT::Other, Ops)); 5595 return true; 5596 } 5597 5598 return false; 5599 } 5600 5601 // Lower the write_register intrinsic to ARM specific DAG nodes 5602 // using the supplied metadata string to select the instruction node to use 5603 // and the registers/masks to use in the nodes 5604 bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){ 5605 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1)); 5606 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0)); 5607 bool IsThumb2 = Subtarget->isThumb2(); 5608 SDLoc DL(N); 5609 5610 std::vector<SDValue> Ops; 5611 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops); 5612 5613 if (!Ops.empty()) { 5614 // If the special register string was constructed of fields (as defined 5615 // in the ACLE) then need to lower to MCR node (32 bit) or 5616 // MCRR node(64 bit), we can make the distinction based on the number of 5617 // operands we have. 5618 unsigned Opcode; 5619 if (Ops.size() == 5) { 5620 Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR; 5621 Ops.insert(Ops.begin()+2, N->getOperand(2)); 5622 } else { 5623 assert(Ops.size() == 3 && 5624 "Invalid number of fields in special register string."); 5625 Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR; 5626 SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) }; 5627 Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2); 5628 } 5629 5630 Ops.push_back(getAL(CurDAG, DL)); 5631 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 5632 Ops.push_back(N->getOperand(0)); 5633 5634 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops)); 5635 return true; 5636 } 5637 5638 std::string SpecialReg = RegString->getString().lower(); 5639 int BankedReg = getBankedRegisterMask(SpecialReg); 5640 if (BankedReg != -1) { 5641 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2), 5642 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5643 N->getOperand(0) }; 5644 ReplaceNode( 5645 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked, 5646 DL, MVT::Other, Ops)); 5647 return true; 5648 } 5649 5650 // The VFP registers are written to by creating SelectionDAG nodes with 5651 // opcodes corresponding to the register that is being written. So we switch 5652 // on the string to find which opcode we need to use. 5653 unsigned Opcode = StringSwitch<unsigned>(SpecialReg) 5654 .Case("fpscr", ARM::VMSR) 5655 .Case("fpexc", ARM::VMSR_FPEXC) 5656 .Case("fpsid", ARM::VMSR_FPSID) 5657 .Case("fpinst", ARM::VMSR_FPINST) 5658 .Case("fpinst2", ARM::VMSR_FPINST2) 5659 .Default(0); 5660 5661 if (Opcode) { 5662 if (!Subtarget->hasVFP2Base()) 5663 return false; 5664 Ops = { N->getOperand(2), getAL(CurDAG, DL), 5665 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) }; 5666 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops)); 5667 return true; 5668 } 5669 5670 std::pair<StringRef, StringRef> Fields; 5671 Fields = StringRef(SpecialReg).rsplit('_'); 5672 std::string Reg = Fields.first.str(); 5673 StringRef Flags = Fields.second; 5674 5675 // If the target was M Class then need to validate the special register value 5676 // and retrieve the mask for use in the instruction node. 5677 if (Subtarget->isMClass()) { 5678 int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget); 5679 if (SYSmValue == -1) 5680 return false; 5681 5682 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32), 5683 N->getOperand(2), getAL(CurDAG, DL), 5684 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) }; 5685 ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops)); 5686 return true; 5687 } 5688 5689 // We then check to see if a valid mask can be constructed for one of the 5690 // register string values permitted for the A and R class cores. These values 5691 // are apsr, spsr and cpsr; these are also valid on older cores. 5692 int Mask = getARClassRegisterMask(Reg, Flags); 5693 if (Mask != -1) { 5694 Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2), 5695 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5696 N->getOperand(0) }; 5697 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR, 5698 DL, MVT::Other, Ops)); 5699 return true; 5700 } 5701 5702 return false; 5703 } 5704 5705 bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){ 5706 std::vector<SDValue> AsmNodeOperands; 5707 InlineAsm::Flag Flag; 5708 bool Changed = false; 5709 unsigned NumOps = N->getNumOperands(); 5710 5711 // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint. 5712 // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require 5713 // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs 5714 // respectively. Since there is no constraint to explicitly specify a 5715 // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb, 5716 // the 64-bit data may be referred by H, Q, R modifiers, so we still pack 5717 // them into a GPRPair. 5718 5719 SDLoc dl(N); 5720 SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps - 1) : SDValue(); 5721 5722 SmallVector<bool, 8> OpChanged; 5723 // Glue node will be appended late. 5724 for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) { 5725 SDValue op = N->getOperand(i); 5726 AsmNodeOperands.push_back(op); 5727 5728 if (i < InlineAsm::Op_FirstOperand) 5729 continue; 5730 5731 if (const auto *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) 5732 Flag = InlineAsm::Flag(C->getZExtValue()); 5733 else 5734 continue; 5735 5736 // Immediate operands to inline asm in the SelectionDAG are modeled with 5737 // two operands. The first is a constant of value InlineAsm::Kind::Imm, and 5738 // the second is a constant with the value of the immediate. If we get here 5739 // and we have a Kind::Imm, skip the next operand, and continue. 5740 if (Flag.isImmKind()) { 5741 SDValue op = N->getOperand(++i); 5742 AsmNodeOperands.push_back(op); 5743 continue; 5744 } 5745 5746 const unsigned NumRegs = Flag.getNumOperandRegisters(); 5747 if (NumRegs) 5748 OpChanged.push_back(false); 5749 5750 unsigned DefIdx = 0; 5751 bool IsTiedToChangedOp = false; 5752 // If it's a use that is tied with a previous def, it has no 5753 // reg class constraint. 5754 if (Changed && Flag.isUseOperandTiedToDef(DefIdx)) 5755 IsTiedToChangedOp = OpChanged[DefIdx]; 5756 5757 // Memory operands to inline asm in the SelectionDAG are modeled with two 5758 // operands: a constant of value InlineAsm::Kind::Mem followed by the input 5759 // operand. If we get here and we have a Kind::Mem, skip the next operand 5760 // (so it doesn't get misinterpreted), and continue. We do this here because 5761 // it's important to update the OpChanged array correctly before moving on. 5762 if (Flag.isMemKind()) { 5763 SDValue op = N->getOperand(++i); 5764 AsmNodeOperands.push_back(op); 5765 continue; 5766 } 5767 5768 if (!Flag.isRegUseKind() && !Flag.isRegDefKind() && 5769 !Flag.isRegDefEarlyClobberKind()) 5770 continue; 5771 5772 unsigned RC; 5773 const bool HasRC = Flag.hasRegClassConstraint(RC); 5774 if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID)) 5775 || NumRegs != 2) 5776 continue; 5777 5778 assert((i+2 < NumOps) && "Invalid number of operands in inline asm"); 5779 SDValue V0 = N->getOperand(i+1); 5780 SDValue V1 = N->getOperand(i+2); 5781 Register Reg0 = cast<RegisterSDNode>(V0)->getReg(); 5782 Register Reg1 = cast<RegisterSDNode>(V1)->getReg(); 5783 SDValue PairedReg; 5784 MachineRegisterInfo &MRI = MF->getRegInfo(); 5785 5786 if (Flag.isRegDefKind() || Flag.isRegDefEarlyClobberKind()) { 5787 // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to 5788 // the original GPRs. 5789 5790 Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass); 5791 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped); 5792 SDValue Chain = SDValue(N,0); 5793 5794 SDNode *GU = N->getGluedUser(); 5795 SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped, 5796 Chain.getValue(1)); 5797 5798 // Extract values from a GPRPair reg and copy to the original GPR reg. 5799 SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32, 5800 RegCopy); 5801 SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32, 5802 RegCopy); 5803 SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0, 5804 RegCopy.getValue(1)); 5805 SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1)); 5806 5807 // Update the original glue user. 5808 std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1); 5809 Ops.push_back(T1.getValue(1)); 5810 CurDAG->UpdateNodeOperands(GU, Ops); 5811 } else { 5812 // For Kind == InlineAsm::Kind::RegUse, we first copy two GPRs into a 5813 // GPRPair and then pass the GPRPair to the inline asm. 5814 SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain]; 5815 5816 // As REG_SEQ doesn't take RegisterSDNode, we copy them first. 5817 SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32, 5818 Chain.getValue(1)); 5819 SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32, 5820 T0.getValue(1)); 5821 SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0); 5822 5823 // Copy REG_SEQ into a GPRPair-typed VR and replace the original two 5824 // i32 VRs of inline asm with it. 5825 Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass); 5826 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped); 5827 Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1)); 5828 5829 AsmNodeOperands[InlineAsm::Op_InputChain] = Chain; 5830 Glue = Chain.getValue(1); 5831 } 5832 5833 Changed = true; 5834 5835 if(PairedReg.getNode()) { 5836 OpChanged[OpChanged.size() -1 ] = true; 5837 Flag = InlineAsm::Flag(Flag.getKind(), 1 /* RegNum*/); 5838 if (IsTiedToChangedOp) 5839 Flag.setMatchingOp(DefIdx); 5840 else 5841 Flag.setRegClass(ARM::GPRPairRegClassID); 5842 // Replace the current flag. 5843 AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant( 5844 Flag, dl, MVT::i32); 5845 // Add the new register node and skip the original two GPRs. 5846 AsmNodeOperands.push_back(PairedReg); 5847 // Skip the next two GPRs. 5848 i += 2; 5849 } 5850 } 5851 5852 if (Glue.getNode()) 5853 AsmNodeOperands.push_back(Glue); 5854 if (!Changed) 5855 return false; 5856 5857 SDValue New = CurDAG->getNode(N->getOpcode(), SDLoc(N), 5858 CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands); 5859 New->setNodeId(-1); 5860 ReplaceNode(N, New.getNode()); 5861 return true; 5862 } 5863 5864 bool ARMDAGToDAGISel::SelectInlineAsmMemoryOperand( 5865 const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, 5866 std::vector<SDValue> &OutOps) { 5867 switch(ConstraintID) { 5868 default: 5869 llvm_unreachable("Unexpected asm memory constraint"); 5870 case InlineAsm::ConstraintCode::m: 5871 case InlineAsm::ConstraintCode::o: 5872 case InlineAsm::ConstraintCode::Q: 5873 case InlineAsm::ConstraintCode::Um: 5874 case InlineAsm::ConstraintCode::Un: 5875 case InlineAsm::ConstraintCode::Uq: 5876 case InlineAsm::ConstraintCode::Us: 5877 case InlineAsm::ConstraintCode::Ut: 5878 case InlineAsm::ConstraintCode::Uv: 5879 case InlineAsm::ConstraintCode::Uy: 5880 // Require the address to be in a register. That is safe for all ARM 5881 // variants and it is hard to do anything much smarter without knowing 5882 // how the operand is used. 5883 OutOps.push_back(Op); 5884 return false; 5885 } 5886 return true; 5887 } 5888 5889 /// createARMISelDag - This pass converts a legalized DAG into a 5890 /// ARM-specific DAG, ready for instruction scheduling. 5891 /// 5892 FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM, 5893 CodeGenOptLevel OptLevel) { 5894 return new ARMDAGToDAGISel(TM, OptLevel); 5895 } 5896