1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines an instruction selector for the ARM target. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "ARM.h" 14 #include "ARMBaseInstrInfo.h" 15 #include "ARMTargetMachine.h" 16 #include "MCTargetDesc/ARMAddressingModes.h" 17 #include "Utils/ARMBaseInfo.h" 18 #include "llvm/ADT/StringSwitch.h" 19 #include "llvm/CodeGen/MachineFrameInfo.h" 20 #include "llvm/CodeGen/MachineFunction.h" 21 #include "llvm/CodeGen/MachineInstrBuilder.h" 22 #include "llvm/CodeGen/MachineRegisterInfo.h" 23 #include "llvm/CodeGen/SelectionDAG.h" 24 #include "llvm/CodeGen/SelectionDAGISel.h" 25 #include "llvm/CodeGen/TargetLowering.h" 26 #include "llvm/IR/CallingConv.h" 27 #include "llvm/IR/Constants.h" 28 #include "llvm/IR/DerivedTypes.h" 29 #include "llvm/IR/Function.h" 30 #include "llvm/IR/Intrinsics.h" 31 #include "llvm/IR/LLVMContext.h" 32 #include "llvm/Support/CommandLine.h" 33 #include "llvm/Support/Debug.h" 34 #include "llvm/Support/ErrorHandling.h" 35 #include "llvm/Target/TargetOptions.h" 36 37 using namespace llvm; 38 39 #define DEBUG_TYPE "arm-isel" 40 41 static cl::opt<bool> 42 DisableShifterOp("disable-shifter-op", cl::Hidden, 43 cl::desc("Disable isel of shifter-op"), 44 cl::init(false)); 45 46 //===--------------------------------------------------------------------===// 47 /// ARMDAGToDAGISel - ARM specific code to select ARM machine 48 /// instructions for SelectionDAG operations. 49 /// 50 namespace { 51 52 class ARMDAGToDAGISel : public SelectionDAGISel { 53 /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can 54 /// make the right decision when generating code for different targets. 55 const ARMSubtarget *Subtarget; 56 57 public: 58 explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel) 59 : SelectionDAGISel(tm, OptLevel) {} 60 61 bool runOnMachineFunction(MachineFunction &MF) override { 62 // Reset the subtarget each time through. 63 Subtarget = &MF.getSubtarget<ARMSubtarget>(); 64 SelectionDAGISel::runOnMachineFunction(MF); 65 return true; 66 } 67 68 StringRef getPassName() const override { return "ARM Instruction Selection"; } 69 70 void PreprocessISelDAG() override; 71 72 /// getI32Imm - Return a target constant of type i32 with the specified 73 /// value. 74 inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) { 75 return CurDAG->getTargetConstant(Imm, dl, MVT::i32); 76 } 77 78 void Select(SDNode *N) override; 79 80 bool hasNoVMLxHazardUse(SDNode *N) const; 81 bool isShifterOpProfitable(const SDValue &Shift, 82 ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt); 83 bool SelectRegShifterOperand(SDValue N, SDValue &A, 84 SDValue &B, SDValue &C, 85 bool CheckProfitability = true); 86 bool SelectImmShifterOperand(SDValue N, SDValue &A, 87 SDValue &B, bool CheckProfitability = true); 88 bool SelectShiftRegShifterOperand(SDValue N, SDValue &A, 89 SDValue &B, SDValue &C) { 90 // Don't apply the profitability check 91 return SelectRegShifterOperand(N, A, B, C, false); 92 } 93 bool SelectShiftImmShifterOperand(SDValue N, SDValue &A, 94 SDValue &B) { 95 // Don't apply the profitability check 96 return SelectImmShifterOperand(N, A, B, false); 97 } 98 99 bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out); 100 101 bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); 102 bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc); 103 104 bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) { 105 const ConstantSDNode *CN = cast<ConstantSDNode>(N); 106 Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32); 107 Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32); 108 return true; 109 } 110 111 bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N, 112 SDValue &Offset, SDValue &Opc); 113 bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N, 114 SDValue &Offset, SDValue &Opc); 115 bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N, 116 SDValue &Offset, SDValue &Opc); 117 bool SelectAddrOffsetNone(SDValue N, SDValue &Base); 118 bool SelectAddrMode3(SDValue N, SDValue &Base, 119 SDValue &Offset, SDValue &Opc); 120 bool SelectAddrMode3Offset(SDNode *Op, SDValue N, 121 SDValue &Offset, SDValue &Opc); 122 bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, bool FP16); 123 bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset); 124 bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset); 125 bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align); 126 bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset); 127 128 bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label); 129 130 // Thumb Addressing Modes: 131 bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset); 132 bool SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, SDValue &Offset); 133 bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base, 134 SDValue &OffImm); 135 bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base, 136 SDValue &OffImm); 137 bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base, 138 SDValue &OffImm); 139 bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base, 140 SDValue &OffImm); 141 bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm); 142 143 // Thumb 2 Addressing Modes: 144 bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); 145 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, 146 SDValue &OffImm); 147 bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N, 148 SDValue &OffImm); 149 template<unsigned Shift> 150 bool SelectT2AddrModeImm7(SDValue N, SDValue &Base, 151 SDValue &OffImm); 152 bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base, 153 SDValue &OffReg, SDValue &ShImm); 154 bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm); 155 156 inline bool is_so_imm(unsigned Imm) const { 157 return ARM_AM::getSOImmVal(Imm) != -1; 158 } 159 160 inline bool is_so_imm_not(unsigned Imm) const { 161 return ARM_AM::getSOImmVal(~Imm) != -1; 162 } 163 164 inline bool is_t2_so_imm(unsigned Imm) const { 165 return ARM_AM::getT2SOImmVal(Imm) != -1; 166 } 167 168 inline bool is_t2_so_imm_not(unsigned Imm) const { 169 return ARM_AM::getT2SOImmVal(~Imm) != -1; 170 } 171 172 // Include the pieces autogenerated from the target description. 173 #include "ARMGenDAGISel.inc" 174 175 private: 176 void transferMemOperands(SDNode *Src, SDNode *Dst); 177 178 /// Indexed (pre/post inc/dec) load matching code for ARM. 179 bool tryARMIndexedLoad(SDNode *N); 180 bool tryT1IndexedLoad(SDNode *N); 181 bool tryT2IndexedLoad(SDNode *N); 182 183 /// SelectVLD - Select NEON load intrinsics. NumVecs should be 184 /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for 185 /// loads of D registers and even subregs and odd subregs of Q registers. 186 /// For NumVecs <= 2, QOpcodes1 is not used. 187 void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, 188 const uint16_t *DOpcodes, const uint16_t *QOpcodes0, 189 const uint16_t *QOpcodes1); 190 191 /// SelectVST - Select NEON store intrinsics. NumVecs should 192 /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for 193 /// stores of D registers and even subregs and odd subregs of Q registers. 194 /// For NumVecs <= 2, QOpcodes1 is not used. 195 void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, 196 const uint16_t *DOpcodes, const uint16_t *QOpcodes0, 197 const uint16_t *QOpcodes1); 198 199 /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should 200 /// be 2, 3 or 4. The opcode arrays specify the instructions used for 201 /// load/store of D registers and Q registers. 202 void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating, 203 unsigned NumVecs, const uint16_t *DOpcodes, 204 const uint16_t *QOpcodes); 205 206 /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs 207 /// should be 1, 2, 3 or 4. The opcode array specifies the instructions used 208 /// for loading D registers. 209 void SelectVLDDup(SDNode *N, bool IsIntrinsic, bool isUpdating, 210 unsigned NumVecs, const uint16_t *DOpcodes, 211 const uint16_t *QOpcodes0 = nullptr, 212 const uint16_t *QOpcodes1 = nullptr); 213 214 /// Try to select SBFX/UBFX instructions for ARM. 215 bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned); 216 217 // Select special operations if node forms integer ABS pattern 218 bool tryABSOp(SDNode *N); 219 220 bool tryReadRegister(SDNode *N); 221 bool tryWriteRegister(SDNode *N); 222 223 bool tryInlineAsm(SDNode *N); 224 225 void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI); 226 227 void SelectCMP_SWAP(SDNode *N); 228 229 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 230 /// inline asm expressions. 231 bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, 232 std::vector<SDValue> &OutOps) override; 233 234 // Form pairs of consecutive R, S, D, or Q registers. 235 SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1); 236 SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1); 237 SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1); 238 SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1); 239 240 // Form sequences of 4 consecutive S, D, or Q registers. 241 SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 242 SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 243 SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 244 245 // Get the alignment operand for a NEON VLD or VST instruction. 246 SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs, 247 bool is64BitVector); 248 249 /// Returns the number of instructions required to materialize the given 250 /// constant in a register, or 3 if a literal pool load is needed. 251 unsigned ConstantMaterializationCost(unsigned Val) const; 252 253 /// Checks if N is a multiplication by a constant where we can extract out a 254 /// power of two from the constant so that it can be used in a shift, but only 255 /// if it simplifies the materialization of the constant. Returns true if it 256 /// is, and assigns to PowerOfTwo the power of two that should be extracted 257 /// out and to NewMulConst the new constant to be multiplied by. 258 bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift, 259 unsigned &PowerOfTwo, SDValue &NewMulConst) const; 260 261 /// Replace N with M in CurDAG, in a way that also ensures that M gets 262 /// selected when N would have been selected. 263 void replaceDAGValue(const SDValue &N, SDValue M); 264 }; 265 } 266 267 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant 268 /// operand. If so Imm will receive the 32-bit value. 269 static bool isInt32Immediate(SDNode *N, unsigned &Imm) { 270 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) { 271 Imm = cast<ConstantSDNode>(N)->getZExtValue(); 272 return true; 273 } 274 return false; 275 } 276 277 // isInt32Immediate - This method tests to see if a constant operand. 278 // If so Imm will receive the 32 bit value. 279 static bool isInt32Immediate(SDValue N, unsigned &Imm) { 280 return isInt32Immediate(N.getNode(), Imm); 281 } 282 283 // isOpcWithIntImmediate - This method tests to see if the node is a specific 284 // opcode and that it has a immediate integer right operand. 285 // If so Imm will receive the 32 bit value. 286 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) { 287 return N->getOpcode() == Opc && 288 isInt32Immediate(N->getOperand(1).getNode(), Imm); 289 } 290 291 /// Check whether a particular node is a constant value representable as 292 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax). 293 /// 294 /// \param ScaledConstant [out] - On success, the pre-scaled constant value. 295 static bool isScaledConstantInRange(SDValue Node, int Scale, 296 int RangeMin, int RangeMax, 297 int &ScaledConstant) { 298 assert(Scale > 0 && "Invalid scale!"); 299 300 // Check that this is a constant. 301 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node); 302 if (!C) 303 return false; 304 305 ScaledConstant = (int) C->getZExtValue(); 306 if ((ScaledConstant % Scale) != 0) 307 return false; 308 309 ScaledConstant /= Scale; 310 return ScaledConstant >= RangeMin && ScaledConstant < RangeMax; 311 } 312 313 void ARMDAGToDAGISel::PreprocessISelDAG() { 314 if (!Subtarget->hasV6T2Ops()) 315 return; 316 317 bool isThumb2 = Subtarget->isThumb(); 318 for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), 319 E = CurDAG->allnodes_end(); I != E; ) { 320 SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues. 321 322 if (N->getOpcode() != ISD::ADD) 323 continue; 324 325 // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with 326 // leading zeros, followed by consecutive set bits, followed by 1 or 2 327 // trailing zeros, e.g. 1020. 328 // Transform the expression to 329 // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number 330 // of trailing zeros of c2. The left shift would be folded as an shifter 331 // operand of 'add' and the 'and' and 'srl' would become a bits extraction 332 // node (UBFX). 333 334 SDValue N0 = N->getOperand(0); 335 SDValue N1 = N->getOperand(1); 336 unsigned And_imm = 0; 337 if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) { 338 if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm)) 339 std::swap(N0, N1); 340 } 341 if (!And_imm) 342 continue; 343 344 // Check if the AND mask is an immediate of the form: 000.....1111111100 345 unsigned TZ = countTrailingZeros(And_imm); 346 if (TZ != 1 && TZ != 2) 347 // Be conservative here. Shifter operands aren't always free. e.g. On 348 // Swift, left shifter operand of 1 / 2 for free but others are not. 349 // e.g. 350 // ubfx r3, r1, #16, #8 351 // ldr.w r3, [r0, r3, lsl #2] 352 // vs. 353 // mov.w r9, #1020 354 // and.w r2, r9, r1, lsr #14 355 // ldr r2, [r0, r2] 356 continue; 357 And_imm >>= TZ; 358 if (And_imm & (And_imm + 1)) 359 continue; 360 361 // Look for (and (srl X, c1), c2). 362 SDValue Srl = N1.getOperand(0); 363 unsigned Srl_imm = 0; 364 if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) || 365 (Srl_imm <= 2)) 366 continue; 367 368 // Make sure first operand is not a shifter operand which would prevent 369 // folding of the left shift. 370 SDValue CPTmp0; 371 SDValue CPTmp1; 372 SDValue CPTmp2; 373 if (isThumb2) { 374 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1)) 375 continue; 376 } else { 377 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) || 378 SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2)) 379 continue; 380 } 381 382 // Now make the transformation. 383 Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32, 384 Srl.getOperand(0), 385 CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl), 386 MVT::i32)); 387 N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32, 388 Srl, 389 CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32)); 390 N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32, 391 N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32)); 392 CurDAG->UpdateNodeOperands(N, N0, N1); 393 } 394 } 395 396 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS 397 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at 398 /// least on current ARM implementations) which should be avoidded. 399 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const { 400 if (OptLevel == CodeGenOpt::None) 401 return true; 402 403 if (!Subtarget->hasVMLxHazards()) 404 return true; 405 406 if (!N->hasOneUse()) 407 return false; 408 409 SDNode *Use = *N->use_begin(); 410 if (Use->getOpcode() == ISD::CopyToReg) 411 return true; 412 if (Use->isMachineOpcode()) { 413 const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>( 414 CurDAG->getSubtarget().getInstrInfo()); 415 416 const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode()); 417 if (MCID.mayStore()) 418 return true; 419 unsigned Opcode = MCID.getOpcode(); 420 if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD) 421 return true; 422 // vmlx feeding into another vmlx. We actually want to unfold 423 // the use later in the MLxExpansion pass. e.g. 424 // vmla 425 // vmla (stall 8 cycles) 426 // 427 // vmul (5 cycles) 428 // vadd (5 cycles) 429 // vmla 430 // This adds up to about 18 - 19 cycles. 431 // 432 // vmla 433 // vmul (stall 4 cycles) 434 // vadd adds up to about 14 cycles. 435 return TII->isFpMLxInstruction(Opcode); 436 } 437 438 return false; 439 } 440 441 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift, 442 ARM_AM::ShiftOpc ShOpcVal, 443 unsigned ShAmt) { 444 if (!Subtarget->isLikeA9() && !Subtarget->isSwift()) 445 return true; 446 if (Shift.hasOneUse()) 447 return true; 448 // R << 2 is free. 449 return ShOpcVal == ARM_AM::lsl && 450 (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1)); 451 } 452 453 unsigned ARMDAGToDAGISel::ConstantMaterializationCost(unsigned Val) const { 454 if (Subtarget->isThumb()) { 455 if (Val <= 255) return 1; // MOV 456 if (Subtarget->hasV6T2Ops() && 457 (Val <= 0xffff || // MOV 458 ARM_AM::getT2SOImmVal(Val) != -1 || // MOVW 459 ARM_AM::getT2SOImmVal(~Val) != -1)) // MVN 460 return 1; 461 if (Val <= 510) return 2; // MOV + ADDi8 462 if (~Val <= 255) return 2; // MOV + MVN 463 if (ARM_AM::isThumbImmShiftedVal(Val)) return 2; // MOV + LSL 464 } else { 465 if (ARM_AM::getSOImmVal(Val) != -1) return 1; // MOV 466 if (ARM_AM::getSOImmVal(~Val) != -1) return 1; // MVN 467 if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW 468 if (ARM_AM::isSOImmTwoPartVal(Val)) return 2; // two instrs 469 } 470 if (Subtarget->useMovt()) return 2; // MOVW + MOVT 471 return 3; // Literal pool load 472 } 473 474 bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N, 475 unsigned MaxShift, 476 unsigned &PowerOfTwo, 477 SDValue &NewMulConst) const { 478 assert(N.getOpcode() == ISD::MUL); 479 assert(MaxShift > 0); 480 481 // If the multiply is used in more than one place then changing the constant 482 // will make other uses incorrect, so don't. 483 if (!N.hasOneUse()) return false; 484 // Check if the multiply is by a constant 485 ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1)); 486 if (!MulConst) return false; 487 // If the constant is used in more than one place then modifying it will mean 488 // we need to materialize two constants instead of one, which is a bad idea. 489 if (!MulConst->hasOneUse()) return false; 490 unsigned MulConstVal = MulConst->getZExtValue(); 491 if (MulConstVal == 0) return false; 492 493 // Find the largest power of 2 that MulConstVal is a multiple of 494 PowerOfTwo = MaxShift; 495 while ((MulConstVal % (1 << PowerOfTwo)) != 0) { 496 --PowerOfTwo; 497 if (PowerOfTwo == 0) return false; 498 } 499 500 // Only optimise if the new cost is better 501 unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo); 502 NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32); 503 unsigned OldCost = ConstantMaterializationCost(MulConstVal); 504 unsigned NewCost = ConstantMaterializationCost(NewMulConstVal); 505 return NewCost < OldCost; 506 } 507 508 void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) { 509 CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode()); 510 ReplaceUses(N, M); 511 } 512 513 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N, 514 SDValue &BaseReg, 515 SDValue &Opc, 516 bool CheckProfitability) { 517 if (DisableShifterOp) 518 return false; 519 520 // If N is a multiply-by-constant and it's profitable to extract a shift and 521 // use it in a shifted operand do so. 522 if (N.getOpcode() == ISD::MUL) { 523 unsigned PowerOfTwo = 0; 524 SDValue NewMulConst; 525 if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) { 526 HandleSDNode Handle(N); 527 SDLoc Loc(N); 528 replaceDAGValue(N.getOperand(1), NewMulConst); 529 BaseReg = Handle.getValue(); 530 Opc = CurDAG->getTargetConstant( 531 ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32); 532 return true; 533 } 534 } 535 536 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 537 538 // Don't match base register only case. That is matched to a separate 539 // lower complexity pattern with explicit register operand. 540 if (ShOpcVal == ARM_AM::no_shift) return false; 541 542 BaseReg = N.getOperand(0); 543 unsigned ShImmVal = 0; 544 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 545 if (!RHS) return false; 546 ShImmVal = RHS->getZExtValue() & 31; 547 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), 548 SDLoc(N), MVT::i32); 549 return true; 550 } 551 552 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N, 553 SDValue &BaseReg, 554 SDValue &ShReg, 555 SDValue &Opc, 556 bool CheckProfitability) { 557 if (DisableShifterOp) 558 return false; 559 560 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 561 562 // Don't match base register only case. That is matched to a separate 563 // lower complexity pattern with explicit register operand. 564 if (ShOpcVal == ARM_AM::no_shift) return false; 565 566 BaseReg = N.getOperand(0); 567 unsigned ShImmVal = 0; 568 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 569 if (RHS) return false; 570 571 ShReg = N.getOperand(1); 572 if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal)) 573 return false; 574 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), 575 SDLoc(N), MVT::i32); 576 return true; 577 } 578 579 // Determine whether an ISD::OR's operands are suitable to turn the operation 580 // into an addition, which often has more compact encodings. 581 bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) { 582 assert(Parent->getOpcode() == ISD::OR && "unexpected parent"); 583 Out = N; 584 return CurDAG->haveNoCommonBitsSet(N, Parent->getOperand(1)); 585 } 586 587 588 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N, 589 SDValue &Base, 590 SDValue &OffImm) { 591 // Match simple R + imm12 operands. 592 593 // Base only. 594 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 595 !CurDAG->isBaseWithConstantOffset(N)) { 596 if (N.getOpcode() == ISD::FrameIndex) { 597 // Match frame index. 598 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 599 Base = CurDAG->getTargetFrameIndex( 600 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 601 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 602 return true; 603 } 604 605 if (N.getOpcode() == ARMISD::Wrapper && 606 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 607 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 608 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 609 Base = N.getOperand(0); 610 } else 611 Base = N; 612 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 613 return true; 614 } 615 616 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 617 int RHSC = (int)RHS->getSExtValue(); 618 if (N.getOpcode() == ISD::SUB) 619 RHSC = -RHSC; 620 621 if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits 622 Base = N.getOperand(0); 623 if (Base.getOpcode() == ISD::FrameIndex) { 624 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 625 Base = CurDAG->getTargetFrameIndex( 626 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 627 } 628 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 629 return true; 630 } 631 } 632 633 // Base only. 634 Base = N; 635 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 636 return true; 637 } 638 639 640 641 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, 642 SDValue &Opc) { 643 if (N.getOpcode() == ISD::MUL && 644 ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) { 645 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 646 // X * [3,5,9] -> X + X * [2,4,8] etc. 647 int RHSC = (int)RHS->getZExtValue(); 648 if (RHSC & 1) { 649 RHSC = RHSC & ~1; 650 ARM_AM::AddrOpc AddSub = ARM_AM::add; 651 if (RHSC < 0) { 652 AddSub = ARM_AM::sub; 653 RHSC = - RHSC; 654 } 655 if (isPowerOf2_32(RHSC)) { 656 unsigned ShAmt = Log2_32(RHSC); 657 Base = Offset = N.getOperand(0); 658 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, 659 ARM_AM::lsl), 660 SDLoc(N), MVT::i32); 661 return true; 662 } 663 } 664 } 665 } 666 667 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 668 // ISD::OR that is equivalent to an ISD::ADD. 669 !CurDAG->isBaseWithConstantOffset(N)) 670 return false; 671 672 // Leave simple R +/- imm12 operands for LDRi12 673 if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) { 674 int RHSC; 675 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1, 676 -0x1000+1, 0x1000, RHSC)) // 12 bits. 677 return false; 678 } 679 680 // Otherwise this is R +/- [possibly shifted] R. 681 ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add; 682 ARM_AM::ShiftOpc ShOpcVal = 683 ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode()); 684 unsigned ShAmt = 0; 685 686 Base = N.getOperand(0); 687 Offset = N.getOperand(1); 688 689 if (ShOpcVal != ARM_AM::no_shift) { 690 // Check to see if the RHS of the shift is a constant, if not, we can't fold 691 // it. 692 if (ConstantSDNode *Sh = 693 dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) { 694 ShAmt = Sh->getZExtValue(); 695 if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt)) 696 Offset = N.getOperand(1).getOperand(0); 697 else { 698 ShAmt = 0; 699 ShOpcVal = ARM_AM::no_shift; 700 } 701 } else { 702 ShOpcVal = ARM_AM::no_shift; 703 } 704 } 705 706 // Try matching (R shl C) + (R). 707 if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift && 708 !(Subtarget->isLikeA9() || Subtarget->isSwift() || 709 N.getOperand(0).hasOneUse())) { 710 ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode()); 711 if (ShOpcVal != ARM_AM::no_shift) { 712 // Check to see if the RHS of the shift is a constant, if not, we can't 713 // fold it. 714 if (ConstantSDNode *Sh = 715 dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) { 716 ShAmt = Sh->getZExtValue(); 717 if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) { 718 Offset = N.getOperand(0).getOperand(0); 719 Base = N.getOperand(1); 720 } else { 721 ShAmt = 0; 722 ShOpcVal = ARM_AM::no_shift; 723 } 724 } else { 725 ShOpcVal = ARM_AM::no_shift; 726 } 727 } 728 } 729 730 // If Offset is a multiply-by-constant and it's profitable to extract a shift 731 // and use it in a shifted operand do so. 732 if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) { 733 unsigned PowerOfTwo = 0; 734 SDValue NewMulConst; 735 if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) { 736 HandleSDNode Handle(Offset); 737 replaceDAGValue(Offset.getOperand(1), NewMulConst); 738 Offset = Handle.getValue(); 739 ShAmt = PowerOfTwo; 740 ShOpcVal = ARM_AM::lsl; 741 } 742 } 743 744 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal), 745 SDLoc(N), MVT::i32); 746 return true; 747 } 748 749 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N, 750 SDValue &Offset, SDValue &Opc) { 751 unsigned Opcode = Op->getOpcode(); 752 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 753 ? cast<LoadSDNode>(Op)->getAddressingMode() 754 : cast<StoreSDNode>(Op)->getAddressingMode(); 755 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 756 ? ARM_AM::add : ARM_AM::sub; 757 int Val; 758 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) 759 return false; 760 761 Offset = N; 762 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 763 unsigned ShAmt = 0; 764 if (ShOpcVal != ARM_AM::no_shift) { 765 // Check to see if the RHS of the shift is a constant, if not, we can't fold 766 // it. 767 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 768 ShAmt = Sh->getZExtValue(); 769 if (isShifterOpProfitable(N, ShOpcVal, ShAmt)) 770 Offset = N.getOperand(0); 771 else { 772 ShAmt = 0; 773 ShOpcVal = ARM_AM::no_shift; 774 } 775 } else { 776 ShOpcVal = ARM_AM::no_shift; 777 } 778 } 779 780 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal), 781 SDLoc(N), MVT::i32); 782 return true; 783 } 784 785 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N, 786 SDValue &Offset, SDValue &Opc) { 787 unsigned Opcode = Op->getOpcode(); 788 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 789 ? cast<LoadSDNode>(Op)->getAddressingMode() 790 : cast<StoreSDNode>(Op)->getAddressingMode(); 791 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 792 ? ARM_AM::add : ARM_AM::sub; 793 int Val; 794 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits. 795 if (AddSub == ARM_AM::sub) Val *= -1; 796 Offset = CurDAG->getRegister(0, MVT::i32); 797 Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32); 798 return true; 799 } 800 801 return false; 802 } 803 804 805 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N, 806 SDValue &Offset, SDValue &Opc) { 807 unsigned Opcode = Op->getOpcode(); 808 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 809 ? cast<LoadSDNode>(Op)->getAddressingMode() 810 : cast<StoreSDNode>(Op)->getAddressingMode(); 811 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 812 ? ARM_AM::add : ARM_AM::sub; 813 int Val; 814 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits. 815 Offset = CurDAG->getRegister(0, MVT::i32); 816 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val, 817 ARM_AM::no_shift), 818 SDLoc(Op), MVT::i32); 819 return true; 820 } 821 822 return false; 823 } 824 825 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) { 826 Base = N; 827 return true; 828 } 829 830 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N, 831 SDValue &Base, SDValue &Offset, 832 SDValue &Opc) { 833 if (N.getOpcode() == ISD::SUB) { 834 // X - C is canonicalize to X + -C, no need to handle it here. 835 Base = N.getOperand(0); 836 Offset = N.getOperand(1); 837 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N), 838 MVT::i32); 839 return true; 840 } 841 842 if (!CurDAG->isBaseWithConstantOffset(N)) { 843 Base = N; 844 if (N.getOpcode() == ISD::FrameIndex) { 845 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 846 Base = CurDAG->getTargetFrameIndex( 847 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 848 } 849 Offset = CurDAG->getRegister(0, MVT::i32); 850 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N), 851 MVT::i32); 852 return true; 853 } 854 855 // If the RHS is +/- imm8, fold into addr mode. 856 int RHSC; 857 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1, 858 -256 + 1, 256, RHSC)) { // 8 bits. 859 Base = N.getOperand(0); 860 if (Base.getOpcode() == ISD::FrameIndex) { 861 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 862 Base = CurDAG->getTargetFrameIndex( 863 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 864 } 865 Offset = CurDAG->getRegister(0, MVT::i32); 866 867 ARM_AM::AddrOpc AddSub = ARM_AM::add; 868 if (RHSC < 0) { 869 AddSub = ARM_AM::sub; 870 RHSC = -RHSC; 871 } 872 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N), 873 MVT::i32); 874 return true; 875 } 876 877 Base = N.getOperand(0); 878 Offset = N.getOperand(1); 879 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N), 880 MVT::i32); 881 return true; 882 } 883 884 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N, 885 SDValue &Offset, SDValue &Opc) { 886 unsigned Opcode = Op->getOpcode(); 887 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 888 ? cast<LoadSDNode>(Op)->getAddressingMode() 889 : cast<StoreSDNode>(Op)->getAddressingMode(); 890 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 891 ? ARM_AM::add : ARM_AM::sub; 892 int Val; 893 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits. 894 Offset = CurDAG->getRegister(0, MVT::i32); 895 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op), 896 MVT::i32); 897 return true; 898 } 899 900 Offset = N; 901 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op), 902 MVT::i32); 903 return true; 904 } 905 906 bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, 907 bool FP16) { 908 if (!CurDAG->isBaseWithConstantOffset(N)) { 909 Base = N; 910 if (N.getOpcode() == ISD::FrameIndex) { 911 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 912 Base = CurDAG->getTargetFrameIndex( 913 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 914 } else if (N.getOpcode() == ARMISD::Wrapper && 915 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 916 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 917 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 918 Base = N.getOperand(0); 919 } 920 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0), 921 SDLoc(N), MVT::i32); 922 return true; 923 } 924 925 // If the RHS is +/- imm8, fold into addr mode. 926 int RHSC; 927 const int Scale = FP16 ? 2 : 4; 928 929 if (isScaledConstantInRange(N.getOperand(1), Scale, -255, 256, RHSC)) { 930 Base = N.getOperand(0); 931 if (Base.getOpcode() == ISD::FrameIndex) { 932 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 933 Base = CurDAG->getTargetFrameIndex( 934 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 935 } 936 937 ARM_AM::AddrOpc AddSub = ARM_AM::add; 938 if (RHSC < 0) { 939 AddSub = ARM_AM::sub; 940 RHSC = -RHSC; 941 } 942 943 if (FP16) 944 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(AddSub, RHSC), 945 SDLoc(N), MVT::i32); 946 else 947 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC), 948 SDLoc(N), MVT::i32); 949 950 return true; 951 } 952 953 Base = N; 954 955 if (FP16) 956 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(ARM_AM::add, 0), 957 SDLoc(N), MVT::i32); 958 else 959 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0), 960 SDLoc(N), MVT::i32); 961 962 return true; 963 } 964 965 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N, 966 SDValue &Base, SDValue &Offset) { 967 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ false); 968 } 969 970 bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N, 971 SDValue &Base, SDValue &Offset) { 972 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ true); 973 } 974 975 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr, 976 SDValue &Align) { 977 Addr = N; 978 979 unsigned Alignment = 0; 980 981 MemSDNode *MemN = cast<MemSDNode>(Parent); 982 983 if (isa<LSBaseSDNode>(MemN) || 984 ((MemN->getOpcode() == ARMISD::VST1_UPD || 985 MemN->getOpcode() == ARMISD::VLD1_UPD) && 986 MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) { 987 // This case occurs only for VLD1-lane/dup and VST1-lane instructions. 988 // The maximum alignment is equal to the memory size being referenced. 989 unsigned MMOAlign = MemN->getAlignment(); 990 unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8; 991 if (MMOAlign >= MemSize && MemSize > 1) 992 Alignment = MemSize; 993 } else { 994 // All other uses of addrmode6 are for intrinsics. For now just record 995 // the raw alignment value; it will be refined later based on the legal 996 // alignment operands for the intrinsic. 997 Alignment = MemN->getAlignment(); 998 } 999 1000 Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32); 1001 return true; 1002 } 1003 1004 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N, 1005 SDValue &Offset) { 1006 LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op); 1007 ISD::MemIndexedMode AM = LdSt->getAddressingMode(); 1008 if (AM != ISD::POST_INC) 1009 return false; 1010 Offset = N; 1011 if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) { 1012 if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits()) 1013 Offset = CurDAG->getRegister(0, MVT::i32); 1014 } 1015 return true; 1016 } 1017 1018 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N, 1019 SDValue &Offset, SDValue &Label) { 1020 if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) { 1021 Offset = N.getOperand(0); 1022 SDValue N1 = N.getOperand(1); 1023 Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(), 1024 SDLoc(N), MVT::i32); 1025 return true; 1026 } 1027 1028 return false; 1029 } 1030 1031 1032 //===----------------------------------------------------------------------===// 1033 // Thumb Addressing Modes 1034 //===----------------------------------------------------------------------===// 1035 1036 static bool shouldUseZeroOffsetLdSt(SDValue N) { 1037 // Negative numbers are difficult to materialise in thumb1. If we are 1038 // selecting the add of a negative, instead try to select ri with a zero 1039 // offset, so create the add node directly which will become a sub. 1040 if (N.getOpcode() != ISD::ADD) 1041 return false; 1042 1043 // Look for an imm which is not legal for ld/st, but is legal for sub. 1044 if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1))) 1045 return C->getSExtValue() < 0 && C->getSExtValue() >= -255; 1046 1047 return false; 1048 } 1049 1050 bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, 1051 SDValue &Offset) { 1052 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) { 1053 ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N); 1054 if (!NC || !NC->isNullValue()) 1055 return false; 1056 1057 Base = Offset = N; 1058 return true; 1059 } 1060 1061 Base = N.getOperand(0); 1062 Offset = N.getOperand(1); 1063 return true; 1064 } 1065 1066 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, SDValue &Base, 1067 SDValue &Offset) { 1068 if (shouldUseZeroOffsetLdSt(N)) 1069 return false; // Select ri instead 1070 return SelectThumbAddrModeRRSext(N, Base, Offset); 1071 } 1072 1073 bool 1074 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, 1075 SDValue &Base, SDValue &OffImm) { 1076 if (shouldUseZeroOffsetLdSt(N)) { 1077 Base = N; 1078 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1079 return true; 1080 } 1081 1082 if (!CurDAG->isBaseWithConstantOffset(N)) { 1083 if (N.getOpcode() == ISD::ADD) { 1084 return false; // We want to select register offset instead 1085 } else if (N.getOpcode() == ARMISD::Wrapper && 1086 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 1087 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 1088 N.getOperand(0).getOpcode() != ISD::TargetConstantPool && 1089 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 1090 Base = N.getOperand(0); 1091 } else { 1092 Base = N; 1093 } 1094 1095 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1096 return true; 1097 } 1098 1099 // If the RHS is + imm5 * scale, fold into addr mode. 1100 int RHSC; 1101 if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) { 1102 Base = N.getOperand(0); 1103 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1104 return true; 1105 } 1106 1107 // Offset is too large, so use register offset instead. 1108 return false; 1109 } 1110 1111 bool 1112 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base, 1113 SDValue &OffImm) { 1114 return SelectThumbAddrModeImm5S(N, 4, Base, OffImm); 1115 } 1116 1117 bool 1118 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base, 1119 SDValue &OffImm) { 1120 return SelectThumbAddrModeImm5S(N, 2, Base, OffImm); 1121 } 1122 1123 bool 1124 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base, 1125 SDValue &OffImm) { 1126 return SelectThumbAddrModeImm5S(N, 1, Base, OffImm); 1127 } 1128 1129 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N, 1130 SDValue &Base, SDValue &OffImm) { 1131 if (N.getOpcode() == ISD::FrameIndex) { 1132 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 1133 // Only multiples of 4 are allowed for the offset, so the frame object 1134 // alignment must be at least 4. 1135 MachineFrameInfo &MFI = MF->getFrameInfo(); 1136 if (MFI.getObjectAlignment(FI) < 4) 1137 MFI.setObjectAlignment(FI, 4); 1138 Base = CurDAG->getTargetFrameIndex( 1139 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1140 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1141 return true; 1142 } 1143 1144 if (!CurDAG->isBaseWithConstantOffset(N)) 1145 return false; 1146 1147 if (N.getOperand(0).getOpcode() == ISD::FrameIndex) { 1148 // If the RHS is + imm8 * scale, fold into addr mode. 1149 int RHSC; 1150 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) { 1151 Base = N.getOperand(0); 1152 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1153 // Make sure the offset is inside the object, or we might fail to 1154 // allocate an emergency spill slot. (An out-of-range access is UB, but 1155 // it could show up anyway.) 1156 MachineFrameInfo &MFI = MF->getFrameInfo(); 1157 if (RHSC * 4 < MFI.getObjectSize(FI)) { 1158 // For LHS+RHS to result in an offset that's a multiple of 4 the object 1159 // indexed by the LHS must be 4-byte aligned. 1160 if (!MFI.isFixedObjectIndex(FI) && MFI.getObjectAlignment(FI) < 4) 1161 MFI.setObjectAlignment(FI, 4); 1162 if (MFI.getObjectAlignment(FI) >= 4) { 1163 Base = CurDAG->getTargetFrameIndex( 1164 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1165 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1166 return true; 1167 } 1168 } 1169 } 1170 } 1171 1172 return false; 1173 } 1174 1175 1176 //===----------------------------------------------------------------------===// 1177 // Thumb 2 Addressing Modes 1178 //===----------------------------------------------------------------------===// 1179 1180 1181 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N, 1182 SDValue &Base, SDValue &OffImm) { 1183 // Match simple R + imm12 operands. 1184 1185 // Base only. 1186 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 1187 !CurDAG->isBaseWithConstantOffset(N)) { 1188 if (N.getOpcode() == ISD::FrameIndex) { 1189 // Match frame index. 1190 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 1191 Base = CurDAG->getTargetFrameIndex( 1192 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1193 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1194 return true; 1195 } 1196 1197 if (N.getOpcode() == ARMISD::Wrapper && 1198 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 1199 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 1200 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 1201 Base = N.getOperand(0); 1202 if (Base.getOpcode() == ISD::TargetConstantPool) 1203 return false; // We want to select t2LDRpci instead. 1204 } else 1205 Base = N; 1206 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1207 return true; 1208 } 1209 1210 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1211 if (SelectT2AddrModeImm8(N, Base, OffImm)) 1212 // Let t2LDRi8 handle (R - imm8). 1213 return false; 1214 1215 int RHSC = (int)RHS->getZExtValue(); 1216 if (N.getOpcode() == ISD::SUB) 1217 RHSC = -RHSC; 1218 1219 if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned) 1220 Base = N.getOperand(0); 1221 if (Base.getOpcode() == ISD::FrameIndex) { 1222 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1223 Base = CurDAG->getTargetFrameIndex( 1224 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1225 } 1226 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1227 return true; 1228 } 1229 } 1230 1231 // Base only. 1232 Base = N; 1233 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1234 return true; 1235 } 1236 1237 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, 1238 SDValue &Base, SDValue &OffImm) { 1239 // Match simple R - imm8 operands. 1240 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 1241 !CurDAG->isBaseWithConstantOffset(N)) 1242 return false; 1243 1244 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1245 int RHSC = (int)RHS->getSExtValue(); 1246 if (N.getOpcode() == ISD::SUB) 1247 RHSC = -RHSC; 1248 1249 if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative) 1250 Base = N.getOperand(0); 1251 if (Base.getOpcode() == ISD::FrameIndex) { 1252 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1253 Base = CurDAG->getTargetFrameIndex( 1254 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1255 } 1256 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1257 return true; 1258 } 1259 } 1260 1261 return false; 1262 } 1263 1264 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N, 1265 SDValue &OffImm){ 1266 unsigned Opcode = Op->getOpcode(); 1267 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 1268 ? cast<LoadSDNode>(Op)->getAddressingMode() 1269 : cast<StoreSDNode>(Op)->getAddressingMode(); 1270 int RHSC; 1271 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits. 1272 OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC)) 1273 ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32) 1274 : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32); 1275 return true; 1276 } 1277 1278 return false; 1279 } 1280 1281 template<unsigned Shift> 1282 bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N, 1283 SDValue &Base, SDValue &OffImm) { 1284 if (N.getOpcode() == ISD::SUB || 1285 CurDAG->isBaseWithConstantOffset(N)) { 1286 if (auto RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1287 int RHSC = (int)RHS->getZExtValue(); 1288 if (N.getOpcode() == ISD::SUB) 1289 RHSC = -RHSC; 1290 1291 if (isShiftedInt<7, Shift>(RHSC)) { 1292 Base = N.getOperand(0); 1293 if (Base.getOpcode() == ISD::FrameIndex) { 1294 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1295 Base = CurDAG->getTargetFrameIndex( 1296 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1297 } 1298 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1299 return true; 1300 } 1301 } 1302 } 1303 1304 // Base only. 1305 Base = N; 1306 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1307 return true; 1308 } 1309 1310 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N, 1311 SDValue &Base, 1312 SDValue &OffReg, SDValue &ShImm) { 1313 // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12. 1314 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) 1315 return false; 1316 1317 // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8. 1318 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1319 int RHSC = (int)RHS->getZExtValue(); 1320 if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned) 1321 return false; 1322 else if (RHSC < 0 && RHSC >= -255) // 8 bits 1323 return false; 1324 } 1325 1326 // Look for (R + R) or (R + (R << [1,2,3])). 1327 unsigned ShAmt = 0; 1328 Base = N.getOperand(0); 1329 OffReg = N.getOperand(1); 1330 1331 // Swap if it is ((R << c) + R). 1332 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode()); 1333 if (ShOpcVal != ARM_AM::lsl) { 1334 ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode()); 1335 if (ShOpcVal == ARM_AM::lsl) 1336 std::swap(Base, OffReg); 1337 } 1338 1339 if (ShOpcVal == ARM_AM::lsl) { 1340 // Check to see if the RHS of the shift is a constant, if not, we can't fold 1341 // it. 1342 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) { 1343 ShAmt = Sh->getZExtValue(); 1344 if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt)) 1345 OffReg = OffReg.getOperand(0); 1346 else { 1347 ShAmt = 0; 1348 } 1349 } 1350 } 1351 1352 // If OffReg is a multiply-by-constant and it's profitable to extract a shift 1353 // and use it in a shifted operand do so. 1354 if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) { 1355 unsigned PowerOfTwo = 0; 1356 SDValue NewMulConst; 1357 if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) { 1358 HandleSDNode Handle(OffReg); 1359 replaceDAGValue(OffReg.getOperand(1), NewMulConst); 1360 OffReg = Handle.getValue(); 1361 ShAmt = PowerOfTwo; 1362 } 1363 } 1364 1365 ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32); 1366 1367 return true; 1368 } 1369 1370 bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base, 1371 SDValue &OffImm) { 1372 // This *must* succeed since it's used for the irreplaceable ldrex and strex 1373 // instructions. 1374 Base = N; 1375 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1376 1377 if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N)) 1378 return true; 1379 1380 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 1381 if (!RHS) 1382 return true; 1383 1384 uint32_t RHSC = (int)RHS->getZExtValue(); 1385 if (RHSC > 1020 || RHSC % 4 != 0) 1386 return true; 1387 1388 Base = N.getOperand(0); 1389 if (Base.getOpcode() == ISD::FrameIndex) { 1390 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1391 Base = CurDAG->getTargetFrameIndex( 1392 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1393 } 1394 1395 OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32); 1396 return true; 1397 } 1398 1399 //===--------------------------------------------------------------------===// 1400 1401 /// getAL - Returns a ARMCC::AL immediate node. 1402 static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) { 1403 return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32); 1404 } 1405 1406 void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) { 1407 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); 1408 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp}); 1409 } 1410 1411 bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) { 1412 LoadSDNode *LD = cast<LoadSDNode>(N); 1413 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1414 if (AM == ISD::UNINDEXED) 1415 return false; 1416 1417 EVT LoadedVT = LD->getMemoryVT(); 1418 SDValue Offset, AMOpc; 1419 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1420 unsigned Opcode = 0; 1421 bool Match = false; 1422 if (LoadedVT == MVT::i32 && isPre && 1423 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) { 1424 Opcode = ARM::LDR_PRE_IMM; 1425 Match = true; 1426 } else if (LoadedVT == MVT::i32 && !isPre && 1427 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) { 1428 Opcode = ARM::LDR_POST_IMM; 1429 Match = true; 1430 } else if (LoadedVT == MVT::i32 && 1431 SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) { 1432 Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG; 1433 Match = true; 1434 1435 } else if (LoadedVT == MVT::i16 && 1436 SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) { 1437 Match = true; 1438 Opcode = (LD->getExtensionType() == ISD::SEXTLOAD) 1439 ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST) 1440 : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST); 1441 } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) { 1442 if (LD->getExtensionType() == ISD::SEXTLOAD) { 1443 if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) { 1444 Match = true; 1445 Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST; 1446 } 1447 } else { 1448 if (isPre && 1449 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) { 1450 Match = true; 1451 Opcode = ARM::LDRB_PRE_IMM; 1452 } else if (!isPre && 1453 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) { 1454 Match = true; 1455 Opcode = ARM::LDRB_POST_IMM; 1456 } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) { 1457 Match = true; 1458 Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG; 1459 } 1460 } 1461 } 1462 1463 if (Match) { 1464 if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) { 1465 SDValue Chain = LD->getChain(); 1466 SDValue Base = LD->getBasePtr(); 1467 SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)), 1468 CurDAG->getRegister(0, MVT::i32), Chain }; 1469 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, 1470 MVT::Other, Ops); 1471 transferMemOperands(N, New); 1472 ReplaceNode(N, New); 1473 return true; 1474 } else { 1475 SDValue Chain = LD->getChain(); 1476 SDValue Base = LD->getBasePtr(); 1477 SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)), 1478 CurDAG->getRegister(0, MVT::i32), Chain }; 1479 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, 1480 MVT::Other, Ops); 1481 transferMemOperands(N, New); 1482 ReplaceNode(N, New); 1483 return true; 1484 } 1485 } 1486 1487 return false; 1488 } 1489 1490 bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) { 1491 LoadSDNode *LD = cast<LoadSDNode>(N); 1492 EVT LoadedVT = LD->getMemoryVT(); 1493 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1494 if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD || 1495 LoadedVT.getSimpleVT().SimpleTy != MVT::i32) 1496 return false; 1497 1498 auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset()); 1499 if (!COffs || COffs->getZExtValue() != 4) 1500 return false; 1501 1502 // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}. 1503 // The encoding of LDM is not how the rest of ISel expects a post-inc load to 1504 // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after 1505 // ISel. 1506 SDValue Chain = LD->getChain(); 1507 SDValue Base = LD->getBasePtr(); 1508 SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)), 1509 CurDAG->getRegister(0, MVT::i32), Chain }; 1510 SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32, 1511 MVT::i32, MVT::Other, Ops); 1512 transferMemOperands(N, New); 1513 ReplaceNode(N, New); 1514 return true; 1515 } 1516 1517 bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) { 1518 LoadSDNode *LD = cast<LoadSDNode>(N); 1519 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1520 if (AM == ISD::UNINDEXED) 1521 return false; 1522 1523 EVT LoadedVT = LD->getMemoryVT(); 1524 bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; 1525 SDValue Offset; 1526 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1527 unsigned Opcode = 0; 1528 bool Match = false; 1529 if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) { 1530 switch (LoadedVT.getSimpleVT().SimpleTy) { 1531 case MVT::i32: 1532 Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST; 1533 break; 1534 case MVT::i16: 1535 if (isSExtLd) 1536 Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST; 1537 else 1538 Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST; 1539 break; 1540 case MVT::i8: 1541 case MVT::i1: 1542 if (isSExtLd) 1543 Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST; 1544 else 1545 Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST; 1546 break; 1547 default: 1548 return false; 1549 } 1550 Match = true; 1551 } 1552 1553 if (Match) { 1554 SDValue Chain = LD->getChain(); 1555 SDValue Base = LD->getBasePtr(); 1556 SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)), 1557 CurDAG->getRegister(0, MVT::i32), Chain }; 1558 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, 1559 MVT::Other, Ops); 1560 transferMemOperands(N, New); 1561 ReplaceNode(N, New); 1562 return true; 1563 } 1564 1565 return false; 1566 } 1567 1568 /// Form a GPRPair pseudo register from a pair of GPR regs. 1569 SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) { 1570 SDLoc dl(V0.getNode()); 1571 SDValue RegClass = 1572 CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32); 1573 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32); 1574 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32); 1575 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1576 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1577 } 1578 1579 /// Form a D register from a pair of S registers. 1580 SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1581 SDLoc dl(V0.getNode()); 1582 SDValue RegClass = 1583 CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32); 1584 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32); 1585 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32); 1586 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1587 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1588 } 1589 1590 /// Form a quad register from a pair of D registers. 1591 SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1592 SDLoc dl(V0.getNode()); 1593 SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl, 1594 MVT::i32); 1595 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32); 1596 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32); 1597 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1598 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1599 } 1600 1601 /// Form 4 consecutive D registers from a pair of Q registers. 1602 SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1603 SDLoc dl(V0.getNode()); 1604 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl, 1605 MVT::i32); 1606 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32); 1607 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32); 1608 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1609 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1610 } 1611 1612 /// Form 4 consecutive S registers. 1613 SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, 1614 SDValue V2, SDValue V3) { 1615 SDLoc dl(V0.getNode()); 1616 SDValue RegClass = 1617 CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32); 1618 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32); 1619 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32); 1620 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32); 1621 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32); 1622 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1623 V2, SubReg2, V3, SubReg3 }; 1624 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1625 } 1626 1627 /// Form 4 consecutive D registers. 1628 SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, 1629 SDValue V2, SDValue V3) { 1630 SDLoc dl(V0.getNode()); 1631 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl, 1632 MVT::i32); 1633 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32); 1634 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32); 1635 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32); 1636 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32); 1637 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1638 V2, SubReg2, V3, SubReg3 }; 1639 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1640 } 1641 1642 /// Form 4 consecutive Q registers. 1643 SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, 1644 SDValue V2, SDValue V3) { 1645 SDLoc dl(V0.getNode()); 1646 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl, 1647 MVT::i32); 1648 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32); 1649 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32); 1650 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32); 1651 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32); 1652 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1653 V2, SubReg2, V3, SubReg3 }; 1654 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1655 } 1656 1657 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand 1658 /// of a NEON VLD or VST instruction. The supported values depend on the 1659 /// number of registers being loaded. 1660 SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl, 1661 unsigned NumVecs, bool is64BitVector) { 1662 unsigned NumRegs = NumVecs; 1663 if (!is64BitVector && NumVecs < 3) 1664 NumRegs *= 2; 1665 1666 unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue(); 1667 if (Alignment >= 32 && NumRegs == 4) 1668 Alignment = 32; 1669 else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4)) 1670 Alignment = 16; 1671 else if (Alignment >= 8) 1672 Alignment = 8; 1673 else 1674 Alignment = 0; 1675 1676 return CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 1677 } 1678 1679 static bool isVLDfixed(unsigned Opc) 1680 { 1681 switch (Opc) { 1682 default: return false; 1683 case ARM::VLD1d8wb_fixed : return true; 1684 case ARM::VLD1d16wb_fixed : return true; 1685 case ARM::VLD1d64Qwb_fixed : return true; 1686 case ARM::VLD1d32wb_fixed : return true; 1687 case ARM::VLD1d64wb_fixed : return true; 1688 case ARM::VLD1d64TPseudoWB_fixed : return true; 1689 case ARM::VLD1d64QPseudoWB_fixed : return true; 1690 case ARM::VLD1q8wb_fixed : return true; 1691 case ARM::VLD1q16wb_fixed : return true; 1692 case ARM::VLD1q32wb_fixed : return true; 1693 case ARM::VLD1q64wb_fixed : return true; 1694 case ARM::VLD1DUPd8wb_fixed : return true; 1695 case ARM::VLD1DUPd16wb_fixed : return true; 1696 case ARM::VLD1DUPd32wb_fixed : return true; 1697 case ARM::VLD1DUPq8wb_fixed : return true; 1698 case ARM::VLD1DUPq16wb_fixed : return true; 1699 case ARM::VLD1DUPq32wb_fixed : return true; 1700 case ARM::VLD2d8wb_fixed : return true; 1701 case ARM::VLD2d16wb_fixed : return true; 1702 case ARM::VLD2d32wb_fixed : return true; 1703 case ARM::VLD2q8PseudoWB_fixed : return true; 1704 case ARM::VLD2q16PseudoWB_fixed : return true; 1705 case ARM::VLD2q32PseudoWB_fixed : return true; 1706 case ARM::VLD2DUPd8wb_fixed : return true; 1707 case ARM::VLD2DUPd16wb_fixed : return true; 1708 case ARM::VLD2DUPd32wb_fixed : return true; 1709 } 1710 } 1711 1712 static bool isVSTfixed(unsigned Opc) 1713 { 1714 switch (Opc) { 1715 default: return false; 1716 case ARM::VST1d8wb_fixed : return true; 1717 case ARM::VST1d16wb_fixed : return true; 1718 case ARM::VST1d32wb_fixed : return true; 1719 case ARM::VST1d64wb_fixed : return true; 1720 case ARM::VST1q8wb_fixed : return true; 1721 case ARM::VST1q16wb_fixed : return true; 1722 case ARM::VST1q32wb_fixed : return true; 1723 case ARM::VST1q64wb_fixed : return true; 1724 case ARM::VST1d64TPseudoWB_fixed : return true; 1725 case ARM::VST1d64QPseudoWB_fixed : return true; 1726 case ARM::VST2d8wb_fixed : return true; 1727 case ARM::VST2d16wb_fixed : return true; 1728 case ARM::VST2d32wb_fixed : return true; 1729 case ARM::VST2q8PseudoWB_fixed : return true; 1730 case ARM::VST2q16PseudoWB_fixed : return true; 1731 case ARM::VST2q32PseudoWB_fixed : return true; 1732 } 1733 } 1734 1735 // Get the register stride update opcode of a VLD/VST instruction that 1736 // is otherwise equivalent to the given fixed stride updating instruction. 1737 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) { 1738 assert((isVLDfixed(Opc) || isVSTfixed(Opc)) 1739 && "Incorrect fixed stride updating instruction."); 1740 switch (Opc) { 1741 default: break; 1742 case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register; 1743 case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register; 1744 case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register; 1745 case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register; 1746 case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register; 1747 case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register; 1748 case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register; 1749 case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register; 1750 case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register; 1751 case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register; 1752 case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register; 1753 case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register; 1754 case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register; 1755 case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register; 1756 case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register; 1757 case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register; 1758 case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register; 1759 case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register; 1760 1761 case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register; 1762 case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register; 1763 case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register; 1764 case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register; 1765 case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register; 1766 case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register; 1767 case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register; 1768 case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register; 1769 case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register; 1770 case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register; 1771 1772 case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register; 1773 case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register; 1774 case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register; 1775 case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register; 1776 case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register; 1777 case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register; 1778 1779 case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register; 1780 case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register; 1781 case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register; 1782 case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register; 1783 case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register; 1784 case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register; 1785 1786 case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register; 1787 case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register; 1788 case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register; 1789 } 1790 return Opc; // If not one we handle, return it unchanged. 1791 } 1792 1793 /// Returns true if the given increment is a Constant known to be equal to the 1794 /// access size performed by a NEON load/store. This means the "[rN]!" form can 1795 /// be used. 1796 static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) { 1797 auto C = dyn_cast<ConstantSDNode>(Inc); 1798 return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs; 1799 } 1800 1801 void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, 1802 const uint16_t *DOpcodes, 1803 const uint16_t *QOpcodes0, 1804 const uint16_t *QOpcodes1) { 1805 assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range"); 1806 SDLoc dl(N); 1807 1808 SDValue MemAddr, Align; 1809 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating 1810 // nodes are not intrinsics. 1811 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 1812 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 1813 return; 1814 1815 SDValue Chain = N->getOperand(0); 1816 EVT VT = N->getValueType(0); 1817 bool is64BitVector = VT.is64BitVector(); 1818 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector); 1819 1820 unsigned OpcodeIndex; 1821 switch (VT.getSimpleVT().SimpleTy) { 1822 default: llvm_unreachable("unhandled vld type"); 1823 // Double-register operations: 1824 case MVT::v8i8: OpcodeIndex = 0; break; 1825 case MVT::v4f16: 1826 case MVT::v4i16: OpcodeIndex = 1; break; 1827 case MVT::v2f32: 1828 case MVT::v2i32: OpcodeIndex = 2; break; 1829 case MVT::v1i64: OpcodeIndex = 3; break; 1830 // Quad-register operations: 1831 case MVT::v16i8: OpcodeIndex = 0; break; 1832 case MVT::v8f16: 1833 case MVT::v8i16: OpcodeIndex = 1; break; 1834 case MVT::v4f32: 1835 case MVT::v4i32: OpcodeIndex = 2; break; 1836 case MVT::v2f64: 1837 case MVT::v2i64: OpcodeIndex = 3; break; 1838 } 1839 1840 EVT ResTy; 1841 if (NumVecs == 1) 1842 ResTy = VT; 1843 else { 1844 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 1845 if (!is64BitVector) 1846 ResTyElts *= 2; 1847 ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts); 1848 } 1849 std::vector<EVT> ResTys; 1850 ResTys.push_back(ResTy); 1851 if (isUpdating) 1852 ResTys.push_back(MVT::i32); 1853 ResTys.push_back(MVT::Other); 1854 1855 SDValue Pred = getAL(CurDAG, dl); 1856 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 1857 SDNode *VLd; 1858 SmallVector<SDValue, 7> Ops; 1859 1860 // Double registers and VLD1/VLD2 quad registers are directly supported. 1861 if (is64BitVector || NumVecs <= 2) { 1862 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 1863 QOpcodes0[OpcodeIndex]); 1864 Ops.push_back(MemAddr); 1865 Ops.push_back(Align); 1866 if (isUpdating) { 1867 SDValue Inc = N->getOperand(AddrOpIdx + 1); 1868 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs); 1869 if (!IsImmUpdate) { 1870 // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so 1871 // check for the opcode rather than the number of vector elements. 1872 if (isVLDfixed(Opc)) 1873 Opc = getVLDSTRegisterUpdateOpcode(Opc); 1874 Ops.push_back(Inc); 1875 // VLD1/VLD2 fixed increment does not need Reg0 so only include it in 1876 // the operands if not such an opcode. 1877 } else if (!isVLDfixed(Opc)) 1878 Ops.push_back(Reg0); 1879 } 1880 Ops.push_back(Pred); 1881 Ops.push_back(Reg0); 1882 Ops.push_back(Chain); 1883 VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1884 1885 } else { 1886 // Otherwise, quad registers are loaded with two separate instructions, 1887 // where one loads the even registers and the other loads the odd registers. 1888 EVT AddrTy = MemAddr.getValueType(); 1889 1890 // Load the even subregs. This is always an updating load, so that it 1891 // provides the address to the second load for the odd subregs. 1892 SDValue ImplDef = 1893 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0); 1894 const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain }; 1895 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, 1896 ResTy, AddrTy, MVT::Other, OpsA); 1897 Chain = SDValue(VLdA, 2); 1898 1899 // Load the odd subregs. 1900 Ops.push_back(SDValue(VLdA, 1)); 1901 Ops.push_back(Align); 1902 if (isUpdating) { 1903 SDValue Inc = N->getOperand(AddrOpIdx + 1); 1904 assert(isa<ConstantSDNode>(Inc.getNode()) && 1905 "only constant post-increment update allowed for VLD3/4"); 1906 (void)Inc; 1907 Ops.push_back(Reg0); 1908 } 1909 Ops.push_back(SDValue(VLdA, 0)); 1910 Ops.push_back(Pred); 1911 Ops.push_back(Reg0); 1912 Ops.push_back(Chain); 1913 VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops); 1914 } 1915 1916 // Transfer memoperands. 1917 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 1918 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLd), {MemOp}); 1919 1920 if (NumVecs == 1) { 1921 ReplaceNode(N, VLd); 1922 return; 1923 } 1924 1925 // Extract out the subregisters. 1926 SDValue SuperReg = SDValue(VLd, 0); 1927 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 && 1928 ARM::qsub_3 == ARM::qsub_0 + 3, 1929 "Unexpected subreg numbering"); 1930 unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0); 1931 for (unsigned Vec = 0; Vec < NumVecs; ++Vec) 1932 ReplaceUses(SDValue(N, Vec), 1933 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg)); 1934 ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1)); 1935 if (isUpdating) 1936 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2)); 1937 CurDAG->RemoveDeadNode(N); 1938 } 1939 1940 void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, 1941 const uint16_t *DOpcodes, 1942 const uint16_t *QOpcodes0, 1943 const uint16_t *QOpcodes1) { 1944 assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range"); 1945 SDLoc dl(N); 1946 1947 SDValue MemAddr, Align; 1948 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating 1949 // nodes are not intrinsics. 1950 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 1951 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1) 1952 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 1953 return; 1954 1955 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 1956 1957 SDValue Chain = N->getOperand(0); 1958 EVT VT = N->getOperand(Vec0Idx).getValueType(); 1959 bool is64BitVector = VT.is64BitVector(); 1960 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector); 1961 1962 unsigned OpcodeIndex; 1963 switch (VT.getSimpleVT().SimpleTy) { 1964 default: llvm_unreachable("unhandled vst type"); 1965 // Double-register operations: 1966 case MVT::v8i8: OpcodeIndex = 0; break; 1967 case MVT::v4f16: 1968 case MVT::v4i16: OpcodeIndex = 1; break; 1969 case MVT::v2f32: 1970 case MVT::v2i32: OpcodeIndex = 2; break; 1971 case MVT::v1i64: OpcodeIndex = 3; break; 1972 // Quad-register operations: 1973 case MVT::v16i8: OpcodeIndex = 0; break; 1974 case MVT::v8f16: 1975 case MVT::v8i16: OpcodeIndex = 1; break; 1976 case MVT::v4f32: 1977 case MVT::v4i32: OpcodeIndex = 2; break; 1978 case MVT::v2f64: 1979 case MVT::v2i64: OpcodeIndex = 3; break; 1980 } 1981 1982 std::vector<EVT> ResTys; 1983 if (isUpdating) 1984 ResTys.push_back(MVT::i32); 1985 ResTys.push_back(MVT::Other); 1986 1987 SDValue Pred = getAL(CurDAG, dl); 1988 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 1989 SmallVector<SDValue, 7> Ops; 1990 1991 // Double registers and VST1/VST2 quad registers are directly supported. 1992 if (is64BitVector || NumVecs <= 2) { 1993 SDValue SrcReg; 1994 if (NumVecs == 1) { 1995 SrcReg = N->getOperand(Vec0Idx); 1996 } else if (is64BitVector) { 1997 // Form a REG_SEQUENCE to force register allocation. 1998 SDValue V0 = N->getOperand(Vec0Idx + 0); 1999 SDValue V1 = N->getOperand(Vec0Idx + 1); 2000 if (NumVecs == 2) 2001 SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0); 2002 else { 2003 SDValue V2 = N->getOperand(Vec0Idx + 2); 2004 // If it's a vst3, form a quad D-register and leave the last part as 2005 // an undef. 2006 SDValue V3 = (NumVecs == 3) 2007 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) 2008 : N->getOperand(Vec0Idx + 3); 2009 SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0); 2010 } 2011 } else { 2012 // Form a QQ register. 2013 SDValue Q0 = N->getOperand(Vec0Idx); 2014 SDValue Q1 = N->getOperand(Vec0Idx + 1); 2015 SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0); 2016 } 2017 2018 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 2019 QOpcodes0[OpcodeIndex]); 2020 Ops.push_back(MemAddr); 2021 Ops.push_back(Align); 2022 if (isUpdating) { 2023 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2024 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs); 2025 if (!IsImmUpdate) { 2026 // We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so 2027 // check for the opcode rather than the number of vector elements. 2028 if (isVSTfixed(Opc)) 2029 Opc = getVLDSTRegisterUpdateOpcode(Opc); 2030 Ops.push_back(Inc); 2031 } 2032 // VST1/VST2 fixed increment does not need Reg0 so only include it in 2033 // the operands if not such an opcode. 2034 else if (!isVSTfixed(Opc)) 2035 Ops.push_back(Reg0); 2036 } 2037 Ops.push_back(SrcReg); 2038 Ops.push_back(Pred); 2039 Ops.push_back(Reg0); 2040 Ops.push_back(Chain); 2041 SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2042 2043 // Transfer memoperands. 2044 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VSt), {MemOp}); 2045 2046 ReplaceNode(N, VSt); 2047 return; 2048 } 2049 2050 // Otherwise, quad registers are stored with two separate instructions, 2051 // where one stores the even registers and the other stores the odd registers. 2052 2053 // Form the QQQQ REG_SEQUENCE. 2054 SDValue V0 = N->getOperand(Vec0Idx + 0); 2055 SDValue V1 = N->getOperand(Vec0Idx + 1); 2056 SDValue V2 = N->getOperand(Vec0Idx + 2); 2057 SDValue V3 = (NumVecs == 3) 2058 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) 2059 : N->getOperand(Vec0Idx + 3); 2060 SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0); 2061 2062 // Store the even D registers. This is always an updating store, so that it 2063 // provides the address to the second store for the odd subregs. 2064 const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain }; 2065 SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, 2066 MemAddr.getValueType(), 2067 MVT::Other, OpsA); 2068 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStA), {MemOp}); 2069 Chain = SDValue(VStA, 1); 2070 2071 // Store the odd D registers. 2072 Ops.push_back(SDValue(VStA, 0)); 2073 Ops.push_back(Align); 2074 if (isUpdating) { 2075 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2076 assert(isa<ConstantSDNode>(Inc.getNode()) && 2077 "only constant post-increment update allowed for VST3/4"); 2078 (void)Inc; 2079 Ops.push_back(Reg0); 2080 } 2081 Ops.push_back(RegSeq); 2082 Ops.push_back(Pred); 2083 Ops.push_back(Reg0); 2084 Ops.push_back(Chain); 2085 SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, 2086 Ops); 2087 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStB), {MemOp}); 2088 ReplaceNode(N, VStB); 2089 } 2090 2091 void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating, 2092 unsigned NumVecs, 2093 const uint16_t *DOpcodes, 2094 const uint16_t *QOpcodes) { 2095 assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range"); 2096 SDLoc dl(N); 2097 2098 SDValue MemAddr, Align; 2099 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating 2100 // nodes are not intrinsics. 2101 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2102 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1) 2103 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2104 return; 2105 2106 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2107 2108 SDValue Chain = N->getOperand(0); 2109 unsigned Lane = 2110 cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue(); 2111 EVT VT = N->getOperand(Vec0Idx).getValueType(); 2112 bool is64BitVector = VT.is64BitVector(); 2113 2114 unsigned Alignment = 0; 2115 if (NumVecs != 3) { 2116 Alignment = cast<ConstantSDNode>(Align)->getZExtValue(); 2117 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8; 2118 if (Alignment > NumBytes) 2119 Alignment = NumBytes; 2120 if (Alignment < 8 && Alignment < NumBytes) 2121 Alignment = 0; 2122 // Alignment must be a power of two; make sure of that. 2123 Alignment = (Alignment & -Alignment); 2124 if (Alignment == 1) 2125 Alignment = 0; 2126 } 2127 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 2128 2129 unsigned OpcodeIndex; 2130 switch (VT.getSimpleVT().SimpleTy) { 2131 default: llvm_unreachable("unhandled vld/vst lane type"); 2132 // Double-register operations: 2133 case MVT::v8i8: OpcodeIndex = 0; break; 2134 case MVT::v4f16: 2135 case MVT::v4i16: OpcodeIndex = 1; break; 2136 case MVT::v2f32: 2137 case MVT::v2i32: OpcodeIndex = 2; break; 2138 // Quad-register operations: 2139 case MVT::v8f16: 2140 case MVT::v8i16: OpcodeIndex = 0; break; 2141 case MVT::v4f32: 2142 case MVT::v4i32: OpcodeIndex = 1; break; 2143 } 2144 2145 std::vector<EVT> ResTys; 2146 if (IsLoad) { 2147 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 2148 if (!is64BitVector) 2149 ResTyElts *= 2; 2150 ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), 2151 MVT::i64, ResTyElts)); 2152 } 2153 if (isUpdating) 2154 ResTys.push_back(MVT::i32); 2155 ResTys.push_back(MVT::Other); 2156 2157 SDValue Pred = getAL(CurDAG, dl); 2158 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2159 2160 SmallVector<SDValue, 8> Ops; 2161 Ops.push_back(MemAddr); 2162 Ops.push_back(Align); 2163 if (isUpdating) { 2164 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2165 bool IsImmUpdate = 2166 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs); 2167 Ops.push_back(IsImmUpdate ? Reg0 : Inc); 2168 } 2169 2170 SDValue SuperReg; 2171 SDValue V0 = N->getOperand(Vec0Idx + 0); 2172 SDValue V1 = N->getOperand(Vec0Idx + 1); 2173 if (NumVecs == 2) { 2174 if (is64BitVector) 2175 SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0); 2176 else 2177 SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0); 2178 } else { 2179 SDValue V2 = N->getOperand(Vec0Idx + 2); 2180 SDValue V3 = (NumVecs == 3) 2181 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) 2182 : N->getOperand(Vec0Idx + 3); 2183 if (is64BitVector) 2184 SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0); 2185 else 2186 SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0); 2187 } 2188 Ops.push_back(SuperReg); 2189 Ops.push_back(getI32Imm(Lane, dl)); 2190 Ops.push_back(Pred); 2191 Ops.push_back(Reg0); 2192 Ops.push_back(Chain); 2193 2194 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 2195 QOpcodes[OpcodeIndex]); 2196 SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2197 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdLn), {MemOp}); 2198 if (!IsLoad) { 2199 ReplaceNode(N, VLdLn); 2200 return; 2201 } 2202 2203 // Extract the subregisters. 2204 SuperReg = SDValue(VLdLn, 0); 2205 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 && 2206 ARM::qsub_3 == ARM::qsub_0 + 3, 2207 "Unexpected subreg numbering"); 2208 unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0; 2209 for (unsigned Vec = 0; Vec < NumVecs; ++Vec) 2210 ReplaceUses(SDValue(N, Vec), 2211 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg)); 2212 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1)); 2213 if (isUpdating) 2214 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2)); 2215 CurDAG->RemoveDeadNode(N); 2216 } 2217 2218 void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic, 2219 bool isUpdating, unsigned NumVecs, 2220 const uint16_t *DOpcodes, 2221 const uint16_t *QOpcodes0, 2222 const uint16_t *QOpcodes1) { 2223 assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range"); 2224 SDLoc dl(N); 2225 2226 SDValue MemAddr, Align; 2227 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2228 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2229 return; 2230 2231 SDValue Chain = N->getOperand(0); 2232 EVT VT = N->getValueType(0); 2233 bool is64BitVector = VT.is64BitVector(); 2234 2235 unsigned Alignment = 0; 2236 if (NumVecs != 3) { 2237 Alignment = cast<ConstantSDNode>(Align)->getZExtValue(); 2238 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8; 2239 if (Alignment > NumBytes) 2240 Alignment = NumBytes; 2241 if (Alignment < 8 && Alignment < NumBytes) 2242 Alignment = 0; 2243 // Alignment must be a power of two; make sure of that. 2244 Alignment = (Alignment & -Alignment); 2245 if (Alignment == 1) 2246 Alignment = 0; 2247 } 2248 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 2249 2250 unsigned OpcodeIndex; 2251 switch (VT.getSimpleVT().SimpleTy) { 2252 default: llvm_unreachable("unhandled vld-dup type"); 2253 case MVT::v8i8: 2254 case MVT::v16i8: OpcodeIndex = 0; break; 2255 case MVT::v4i16: 2256 case MVT::v8i16: 2257 case MVT::v4f16: 2258 case MVT::v8f16: 2259 OpcodeIndex = 1; break; 2260 case MVT::v2f32: 2261 case MVT::v2i32: 2262 case MVT::v4f32: 2263 case MVT::v4i32: OpcodeIndex = 2; break; 2264 case MVT::v1f64: 2265 case MVT::v1i64: OpcodeIndex = 3; break; 2266 } 2267 2268 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 2269 if (!is64BitVector) 2270 ResTyElts *= 2; 2271 EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts); 2272 2273 std::vector<EVT> ResTys; 2274 ResTys.push_back(ResTy); 2275 if (isUpdating) 2276 ResTys.push_back(MVT::i32); 2277 ResTys.push_back(MVT::Other); 2278 2279 SDValue Pred = getAL(CurDAG, dl); 2280 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2281 2282 SDNode *VLdDup; 2283 if (is64BitVector || NumVecs == 1) { 2284 SmallVector<SDValue, 6> Ops; 2285 Ops.push_back(MemAddr); 2286 Ops.push_back(Align); 2287 unsigned Opc = is64BitVector ? DOpcodes[OpcodeIndex] : 2288 QOpcodes0[OpcodeIndex]; 2289 if (isUpdating) { 2290 // fixed-stride update instructions don't have an explicit writeback 2291 // operand. It's implicit in the opcode itself. 2292 SDValue Inc = N->getOperand(2); 2293 bool IsImmUpdate = 2294 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs); 2295 if (NumVecs <= 2 && !IsImmUpdate) 2296 Opc = getVLDSTRegisterUpdateOpcode(Opc); 2297 if (!IsImmUpdate) 2298 Ops.push_back(Inc); 2299 // FIXME: VLD3 and VLD4 haven't been updated to that form yet. 2300 else if (NumVecs > 2) 2301 Ops.push_back(Reg0); 2302 } 2303 Ops.push_back(Pred); 2304 Ops.push_back(Reg0); 2305 Ops.push_back(Chain); 2306 VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2307 } else if (NumVecs == 2) { 2308 const SDValue OpsA[] = { MemAddr, Align, Pred, Reg0, Chain }; 2309 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], 2310 dl, ResTys, OpsA); 2311 2312 Chain = SDValue(VLdA, 1); 2313 const SDValue OpsB[] = { MemAddr, Align, Pred, Reg0, Chain }; 2314 VLdDup = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, OpsB); 2315 } else { 2316 SDValue ImplDef = 2317 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0); 2318 const SDValue OpsA[] = { MemAddr, Align, ImplDef, Pred, Reg0, Chain }; 2319 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], 2320 dl, ResTys, OpsA); 2321 2322 SDValue SuperReg = SDValue(VLdA, 0); 2323 Chain = SDValue(VLdA, 1); 2324 const SDValue OpsB[] = { MemAddr, Align, SuperReg, Pred, Reg0, Chain }; 2325 VLdDup = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, OpsB); 2326 } 2327 2328 // Transfer memoperands. 2329 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2330 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdDup), {MemOp}); 2331 2332 // Extract the subregisters. 2333 if (NumVecs == 1) { 2334 ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0)); 2335 } else { 2336 SDValue SuperReg = SDValue(VLdDup, 0); 2337 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering"); 2338 unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0; 2339 for (unsigned Vec = 0; Vec != NumVecs; ++Vec) { 2340 ReplaceUses(SDValue(N, Vec), 2341 CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg)); 2342 } 2343 } 2344 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1)); 2345 if (isUpdating) 2346 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2)); 2347 CurDAG->RemoveDeadNode(N); 2348 } 2349 2350 bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) { 2351 if (!Subtarget->hasV6T2Ops()) 2352 return false; 2353 2354 unsigned Opc = isSigned 2355 ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX) 2356 : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX); 2357 SDLoc dl(N); 2358 2359 // For unsigned extracts, check for a shift right and mask 2360 unsigned And_imm = 0; 2361 if (N->getOpcode() == ISD::AND) { 2362 if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) { 2363 2364 // The immediate is a mask of the low bits iff imm & (imm+1) == 0 2365 if (And_imm & (And_imm + 1)) 2366 return false; 2367 2368 unsigned Srl_imm = 0; 2369 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, 2370 Srl_imm)) { 2371 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 2372 2373 // Mask off the unnecessary bits of the AND immediate; normally 2374 // DAGCombine will do this, but that might not happen if 2375 // targetShrinkDemandedConstant chooses a different immediate. 2376 And_imm &= -1U >> Srl_imm; 2377 2378 // Note: The width operand is encoded as width-1. 2379 unsigned Width = countTrailingOnes(And_imm) - 1; 2380 unsigned LSB = Srl_imm; 2381 2382 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2383 2384 if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) { 2385 // It's cheaper to use a right shift to extract the top bits. 2386 if (Subtarget->isThumb()) { 2387 Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri; 2388 SDValue Ops[] = { N->getOperand(0).getOperand(0), 2389 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 2390 getAL(CurDAG, dl), Reg0, Reg0 }; 2391 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 2392 return true; 2393 } 2394 2395 // ARM models shift instructions as MOVsi with shifter operand. 2396 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL); 2397 SDValue ShOpc = 2398 CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl, 2399 MVT::i32); 2400 SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc, 2401 getAL(CurDAG, dl), Reg0, Reg0 }; 2402 CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops); 2403 return true; 2404 } 2405 2406 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx"); 2407 SDValue Ops[] = { N->getOperand(0).getOperand(0), 2408 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 2409 CurDAG->getTargetConstant(Width, dl, MVT::i32), 2410 getAL(CurDAG, dl), Reg0 }; 2411 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 2412 return true; 2413 } 2414 } 2415 return false; 2416 } 2417 2418 // Otherwise, we're looking for a shift of a shift 2419 unsigned Shl_imm = 0; 2420 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) { 2421 assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!"); 2422 unsigned Srl_imm = 0; 2423 if (isInt32Immediate(N->getOperand(1), Srl_imm)) { 2424 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 2425 // Note: The width operand is encoded as width-1. 2426 unsigned Width = 32 - Srl_imm - 1; 2427 int LSB = Srl_imm - Shl_imm; 2428 if (LSB < 0) 2429 return false; 2430 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2431 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx"); 2432 SDValue Ops[] = { N->getOperand(0).getOperand(0), 2433 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 2434 CurDAG->getTargetConstant(Width, dl, MVT::i32), 2435 getAL(CurDAG, dl), Reg0 }; 2436 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 2437 return true; 2438 } 2439 } 2440 2441 // Or we are looking for a shift of an and, with a mask operand 2442 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) && 2443 isShiftedMask_32(And_imm)) { 2444 unsigned Srl_imm = 0; 2445 unsigned LSB = countTrailingZeros(And_imm); 2446 // Shift must be the same as the ands lsb 2447 if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) { 2448 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 2449 unsigned MSB = 31 - countLeadingZeros(And_imm); 2450 // Note: The width operand is encoded as width-1. 2451 unsigned Width = MSB - LSB; 2452 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2453 assert(Srl_imm + Width + 1 <= 32 && "Shouldn't create an invalid ubfx"); 2454 SDValue Ops[] = { N->getOperand(0).getOperand(0), 2455 CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32), 2456 CurDAG->getTargetConstant(Width, dl, MVT::i32), 2457 getAL(CurDAG, dl), Reg0 }; 2458 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 2459 return true; 2460 } 2461 } 2462 2463 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) { 2464 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits(); 2465 unsigned LSB = 0; 2466 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) && 2467 !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB)) 2468 return false; 2469 2470 if (LSB + Width > 32) 2471 return false; 2472 2473 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2474 assert(LSB + Width <= 32 && "Shouldn't create an invalid ubfx"); 2475 SDValue Ops[] = { N->getOperand(0).getOperand(0), 2476 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 2477 CurDAG->getTargetConstant(Width - 1, dl, MVT::i32), 2478 getAL(CurDAG, dl), Reg0 }; 2479 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 2480 return true; 2481 } 2482 2483 return false; 2484 } 2485 2486 /// Target-specific DAG combining for ISD::XOR. 2487 /// Target-independent combining lowers SELECT_CC nodes of the form 2488 /// select_cc setg[ge] X, 0, X, -X 2489 /// select_cc setgt X, -1, X, -X 2490 /// select_cc setl[te] X, 0, -X, X 2491 /// select_cc setlt X, 1, -X, X 2492 /// which represent Integer ABS into: 2493 /// Y = sra (X, size(X)-1); xor (add (X, Y), Y) 2494 /// ARM instruction selection detects the latter and matches it to 2495 /// ARM::ABS or ARM::t2ABS machine node. 2496 bool ARMDAGToDAGISel::tryABSOp(SDNode *N){ 2497 SDValue XORSrc0 = N->getOperand(0); 2498 SDValue XORSrc1 = N->getOperand(1); 2499 EVT VT = N->getValueType(0); 2500 2501 if (Subtarget->isThumb1Only()) 2502 return false; 2503 2504 if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA) 2505 return false; 2506 2507 SDValue ADDSrc0 = XORSrc0.getOperand(0); 2508 SDValue ADDSrc1 = XORSrc0.getOperand(1); 2509 SDValue SRASrc0 = XORSrc1.getOperand(0); 2510 SDValue SRASrc1 = XORSrc1.getOperand(1); 2511 ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(SRASrc1); 2512 EVT XType = SRASrc0.getValueType(); 2513 unsigned Size = XType.getSizeInBits() - 1; 2514 2515 if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 && 2516 XType.isInteger() && SRAConstant != nullptr && 2517 Size == SRAConstant->getZExtValue()) { 2518 unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS; 2519 CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0); 2520 return true; 2521 } 2522 2523 return false; 2524 } 2525 2526 /// We've got special pseudo-instructions for these 2527 void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) { 2528 unsigned Opcode; 2529 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT(); 2530 if (MemTy == MVT::i8) 2531 Opcode = ARM::CMP_SWAP_8; 2532 else if (MemTy == MVT::i16) 2533 Opcode = ARM::CMP_SWAP_16; 2534 else if (MemTy == MVT::i32) 2535 Opcode = ARM::CMP_SWAP_32; 2536 else 2537 llvm_unreachable("Unknown AtomicCmpSwap type"); 2538 2539 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3), 2540 N->getOperand(0)}; 2541 SDNode *CmpSwap = CurDAG->getMachineNode( 2542 Opcode, SDLoc(N), 2543 CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops); 2544 2545 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); 2546 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp}); 2547 2548 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0)); 2549 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2)); 2550 CurDAG->RemoveDeadNode(N); 2551 } 2552 2553 static Optional<std::pair<unsigned, unsigned>> 2554 getContiguousRangeOfSetBits(const APInt &A) { 2555 unsigned FirstOne = A.getBitWidth() - A.countLeadingZeros() - 1; 2556 unsigned LastOne = A.countTrailingZeros(); 2557 if (A.countPopulation() != (FirstOne - LastOne + 1)) 2558 return Optional<std::pair<unsigned,unsigned>>(); 2559 return std::make_pair(FirstOne, LastOne); 2560 } 2561 2562 void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) { 2563 assert(N->getOpcode() == ARMISD::CMPZ); 2564 SwitchEQNEToPLMI = false; 2565 2566 if (!Subtarget->isThumb()) 2567 // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and 2568 // LSR don't exist as standalone instructions - they need the barrel shifter. 2569 return; 2570 2571 // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X)) 2572 SDValue And = N->getOperand(0); 2573 if (!And->hasOneUse()) 2574 return; 2575 2576 SDValue Zero = N->getOperand(1); 2577 if (!isa<ConstantSDNode>(Zero) || !cast<ConstantSDNode>(Zero)->isNullValue() || 2578 And->getOpcode() != ISD::AND) 2579 return; 2580 SDValue X = And.getOperand(0); 2581 auto C = dyn_cast<ConstantSDNode>(And.getOperand(1)); 2582 2583 if (!C) 2584 return; 2585 auto Range = getContiguousRangeOfSetBits(C->getAPIntValue()); 2586 if (!Range) 2587 return; 2588 2589 // There are several ways to lower this: 2590 SDNode *NewN; 2591 SDLoc dl(N); 2592 2593 auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* { 2594 if (Subtarget->isThumb2()) { 2595 Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri; 2596 SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32), 2597 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 2598 CurDAG->getRegister(0, MVT::i32) }; 2599 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops); 2600 } else { 2601 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src, 2602 CurDAG->getTargetConstant(Imm, dl, MVT::i32), 2603 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)}; 2604 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops); 2605 } 2606 }; 2607 2608 if (Range->second == 0) { 2609 // 1. Mask includes the LSB -> Simply shift the top N bits off 2610 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first); 2611 ReplaceNode(And.getNode(), NewN); 2612 } else if (Range->first == 31) { 2613 // 2. Mask includes the MSB -> Simply shift the bottom N bits off 2614 NewN = EmitShift(ARM::tLSRri, X, Range->second); 2615 ReplaceNode(And.getNode(), NewN); 2616 } else if (Range->first == Range->second) { 2617 // 3. Only one bit is set. We can shift this into the sign bit and use a 2618 // PL/MI comparison. 2619 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first); 2620 ReplaceNode(And.getNode(), NewN); 2621 2622 SwitchEQNEToPLMI = true; 2623 } else if (!Subtarget->hasV6T2Ops()) { 2624 // 4. Do a double shift to clear bottom and top bits, but only in 2625 // thumb-1 mode as in thumb-2 we can use UBFX. 2626 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first); 2627 NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0), 2628 Range->second + (31 - Range->first)); 2629 ReplaceNode(And.getNode(), NewN); 2630 } 2631 2632 } 2633 2634 void ARMDAGToDAGISel::Select(SDNode *N) { 2635 SDLoc dl(N); 2636 2637 if (N->isMachineOpcode()) { 2638 N->setNodeId(-1); 2639 return; // Already selected. 2640 } 2641 2642 switch (N->getOpcode()) { 2643 default: break; 2644 case ISD::STORE: { 2645 // For Thumb1, match an sp-relative store in C++. This is a little 2646 // unfortunate, but I don't think I can make the chain check work 2647 // otherwise. (The chain of the store has to be the same as the chain 2648 // of the CopyFromReg, or else we can't replace the CopyFromReg with 2649 // a direct reference to "SP".) 2650 // 2651 // This is only necessary on Thumb1 because Thumb1 sp-relative stores use 2652 // a different addressing mode from other four-byte stores. 2653 // 2654 // This pattern usually comes up with call arguments. 2655 StoreSDNode *ST = cast<StoreSDNode>(N); 2656 SDValue Ptr = ST->getBasePtr(); 2657 if (Subtarget->isThumb1Only() && ST->isUnindexed()) { 2658 int RHSC = 0; 2659 if (Ptr.getOpcode() == ISD::ADD && 2660 isScaledConstantInRange(Ptr.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) 2661 Ptr = Ptr.getOperand(0); 2662 2663 if (Ptr.getOpcode() == ISD::CopyFromReg && 2664 cast<RegisterSDNode>(Ptr.getOperand(1))->getReg() == ARM::SP && 2665 Ptr.getOperand(0) == ST->getChain()) { 2666 SDValue Ops[] = {ST->getValue(), 2667 CurDAG->getRegister(ARM::SP, MVT::i32), 2668 CurDAG->getTargetConstant(RHSC, dl, MVT::i32), 2669 getAL(CurDAG, dl), 2670 CurDAG->getRegister(0, MVT::i32), 2671 ST->getChain()}; 2672 MachineSDNode *ResNode = 2673 CurDAG->getMachineNode(ARM::tSTRspi, dl, MVT::Other, Ops); 2674 MachineMemOperand *MemOp = ST->getMemOperand(); 2675 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp}); 2676 ReplaceNode(N, ResNode); 2677 return; 2678 } 2679 } 2680 break; 2681 } 2682 case ISD::WRITE_REGISTER: 2683 if (tryWriteRegister(N)) 2684 return; 2685 break; 2686 case ISD::READ_REGISTER: 2687 if (tryReadRegister(N)) 2688 return; 2689 break; 2690 case ISD::INLINEASM: 2691 case ISD::INLINEASM_BR: 2692 if (tryInlineAsm(N)) 2693 return; 2694 break; 2695 case ISD::XOR: 2696 // Select special operations if XOR node forms integer ABS pattern 2697 if (tryABSOp(N)) 2698 return; 2699 // Other cases are autogenerated. 2700 break; 2701 case ISD::Constant: { 2702 unsigned Val = cast<ConstantSDNode>(N)->getZExtValue(); 2703 // If we can't materialize the constant we need to use a literal pool 2704 if (ConstantMaterializationCost(Val) > 2) { 2705 SDValue CPIdx = CurDAG->getTargetConstantPool( 2706 ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val), 2707 TLI->getPointerTy(CurDAG->getDataLayout())); 2708 2709 SDNode *ResNode; 2710 if (Subtarget->isThumb()) { 2711 SDValue Ops[] = { 2712 CPIdx, 2713 getAL(CurDAG, dl), 2714 CurDAG->getRegister(0, MVT::i32), 2715 CurDAG->getEntryNode() 2716 }; 2717 ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other, 2718 Ops); 2719 } else { 2720 SDValue Ops[] = { 2721 CPIdx, 2722 CurDAG->getTargetConstant(0, dl, MVT::i32), 2723 getAL(CurDAG, dl), 2724 CurDAG->getRegister(0, MVT::i32), 2725 CurDAG->getEntryNode() 2726 }; 2727 ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other, 2728 Ops); 2729 } 2730 // Annotate the Node with memory operand information so that MachineInstr 2731 // queries work properly. This e.g. gives the register allocation the 2732 // required information for rematerialization. 2733 MachineFunction& MF = CurDAG->getMachineFunction(); 2734 MachineMemOperand *MemOp = 2735 MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF), 2736 MachineMemOperand::MOLoad, 4, 4); 2737 2738 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp}); 2739 2740 ReplaceNode(N, ResNode); 2741 return; 2742 } 2743 2744 // Other cases are autogenerated. 2745 break; 2746 } 2747 case ISD::FrameIndex: { 2748 // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm. 2749 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 2750 SDValue TFI = CurDAG->getTargetFrameIndex( 2751 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 2752 if (Subtarget->isThumb1Only()) { 2753 // Set the alignment of the frame object to 4, to avoid having to generate 2754 // more than one ADD 2755 MachineFrameInfo &MFI = MF->getFrameInfo(); 2756 if (MFI.getObjectAlignment(FI) < 4) 2757 MFI.setObjectAlignment(FI, 4); 2758 CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI, 2759 CurDAG->getTargetConstant(0, dl, MVT::i32)); 2760 return; 2761 } else { 2762 unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ? 2763 ARM::t2ADDri : ARM::ADDri); 2764 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32), 2765 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 2766 CurDAG->getRegister(0, MVT::i32) }; 2767 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 2768 return; 2769 } 2770 } 2771 case ISD::SRL: 2772 if (tryV6T2BitfieldExtractOp(N, false)) 2773 return; 2774 break; 2775 case ISD::SIGN_EXTEND_INREG: 2776 case ISD::SRA: 2777 if (tryV6T2BitfieldExtractOp(N, true)) 2778 return; 2779 break; 2780 case ISD::MUL: 2781 if (Subtarget->isThumb1Only()) 2782 break; 2783 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) { 2784 unsigned RHSV = C->getZExtValue(); 2785 if (!RHSV) break; 2786 if (isPowerOf2_32(RHSV-1)) { // 2^n+1? 2787 unsigned ShImm = Log2_32(RHSV-1); 2788 if (ShImm >= 32) 2789 break; 2790 SDValue V = N->getOperand(0); 2791 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm); 2792 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32); 2793 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2794 if (Subtarget->isThumb()) { 2795 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 }; 2796 CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops); 2797 return; 2798 } else { 2799 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0, 2800 Reg0 }; 2801 CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops); 2802 return; 2803 } 2804 } 2805 if (isPowerOf2_32(RHSV+1)) { // 2^n-1? 2806 unsigned ShImm = Log2_32(RHSV+1); 2807 if (ShImm >= 32) 2808 break; 2809 SDValue V = N->getOperand(0); 2810 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm); 2811 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32); 2812 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2813 if (Subtarget->isThumb()) { 2814 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 }; 2815 CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops); 2816 return; 2817 } else { 2818 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0, 2819 Reg0 }; 2820 CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops); 2821 return; 2822 } 2823 } 2824 } 2825 break; 2826 case ISD::AND: { 2827 // Check for unsigned bitfield extract 2828 if (tryV6T2BitfieldExtractOp(N, false)) 2829 return; 2830 2831 // If an immediate is used in an AND node, it is possible that the immediate 2832 // can be more optimally materialized when negated. If this is the case we 2833 // can negate the immediate and use a BIC instead. 2834 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 2835 if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) { 2836 uint32_t Imm = (uint32_t) N1C->getZExtValue(); 2837 2838 // In Thumb2 mode, an AND can take a 12-bit immediate. If this 2839 // immediate can be negated and fit in the immediate operand of 2840 // a t2BIC, don't do any manual transform here as this can be 2841 // handled by the generic ISel machinery. 2842 bool PreferImmediateEncoding = 2843 Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm)); 2844 if (!PreferImmediateEncoding && 2845 ConstantMaterializationCost(Imm) > 2846 ConstantMaterializationCost(~Imm)) { 2847 // The current immediate costs more to materialize than a negated 2848 // immediate, so negate the immediate and use a BIC. 2849 SDValue NewImm = 2850 CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32); 2851 // If the new constant didn't exist before, reposition it in the topological 2852 // ordering so it is just before N. Otherwise, don't touch its location. 2853 if (NewImm->getNodeId() == -1) 2854 CurDAG->RepositionNode(N->getIterator(), NewImm.getNode()); 2855 2856 if (!Subtarget->hasThumb2()) { 2857 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), 2858 N->getOperand(0), NewImm, getAL(CurDAG, dl), 2859 CurDAG->getRegister(0, MVT::i32)}; 2860 ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops)); 2861 return; 2862 } else { 2863 SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl), 2864 CurDAG->getRegister(0, MVT::i32), 2865 CurDAG->getRegister(0, MVT::i32)}; 2866 ReplaceNode(N, 2867 CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops)); 2868 return; 2869 } 2870 } 2871 } 2872 2873 // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits 2874 // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits 2875 // are entirely contributed by c2 and lower 16-bits are entirely contributed 2876 // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)). 2877 // Select it to: "movt x, ((c1 & 0xffff) >> 16) 2878 EVT VT = N->getValueType(0); 2879 if (VT != MVT::i32) 2880 break; 2881 unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2()) 2882 ? ARM::t2MOVTi16 2883 : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0); 2884 if (!Opc) 2885 break; 2886 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); 2887 N1C = dyn_cast<ConstantSDNode>(N1); 2888 if (!N1C) 2889 break; 2890 if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) { 2891 SDValue N2 = N0.getOperand(1); 2892 ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2); 2893 if (!N2C) 2894 break; 2895 unsigned N1CVal = N1C->getZExtValue(); 2896 unsigned N2CVal = N2C->getZExtValue(); 2897 if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) && 2898 (N1CVal & 0xffffU) == 0xffffU && 2899 (N2CVal & 0xffffU) == 0x0U) { 2900 SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16, 2901 dl, MVT::i32); 2902 SDValue Ops[] = { N0.getOperand(0), Imm16, 2903 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) }; 2904 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops)); 2905 return; 2906 } 2907 } 2908 2909 break; 2910 } 2911 case ARMISD::UMAAL: { 2912 unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL; 2913 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), 2914 N->getOperand(2), N->getOperand(3), 2915 getAL(CurDAG, dl), 2916 CurDAG->getRegister(0, MVT::i32) }; 2917 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops)); 2918 return; 2919 } 2920 case ARMISD::UMLAL:{ 2921 if (Subtarget->isThumb()) { 2922 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 2923 N->getOperand(3), getAL(CurDAG, dl), 2924 CurDAG->getRegister(0, MVT::i32)}; 2925 ReplaceNode( 2926 N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops)); 2927 return; 2928 }else{ 2929 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 2930 N->getOperand(3), getAL(CurDAG, dl), 2931 CurDAG->getRegister(0, MVT::i32), 2932 CurDAG->getRegister(0, MVT::i32) }; 2933 ReplaceNode(N, CurDAG->getMachineNode( 2934 Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl, 2935 MVT::i32, MVT::i32, Ops)); 2936 return; 2937 } 2938 } 2939 case ARMISD::SMLAL:{ 2940 if (Subtarget->isThumb()) { 2941 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 2942 N->getOperand(3), getAL(CurDAG, dl), 2943 CurDAG->getRegister(0, MVT::i32)}; 2944 ReplaceNode( 2945 N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops)); 2946 return; 2947 }else{ 2948 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 2949 N->getOperand(3), getAL(CurDAG, dl), 2950 CurDAG->getRegister(0, MVT::i32), 2951 CurDAG->getRegister(0, MVT::i32) }; 2952 ReplaceNode(N, CurDAG->getMachineNode( 2953 Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl, 2954 MVT::i32, MVT::i32, Ops)); 2955 return; 2956 } 2957 } 2958 case ARMISD::SUBE: { 2959 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP()) 2960 break; 2961 // Look for a pattern to match SMMLS 2962 // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b)))) 2963 if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI || 2964 N->getOperand(2).getOpcode() != ARMISD::SUBC || 2965 !SDValue(N, 1).use_empty()) 2966 break; 2967 2968 if (Subtarget->isThumb()) 2969 assert(Subtarget->hasThumb2() && 2970 "This pattern should not be generated for Thumb"); 2971 2972 SDValue SmulLoHi = N->getOperand(1); 2973 SDValue Subc = N->getOperand(2); 2974 auto *Zero = dyn_cast<ConstantSDNode>(Subc.getOperand(0)); 2975 2976 if (!Zero || Zero->getZExtValue() != 0 || 2977 Subc.getOperand(1) != SmulLoHi.getValue(0) || 2978 N->getOperand(1) != SmulLoHi.getValue(1) || 2979 N->getOperand(2) != Subc.getValue(1)) 2980 break; 2981 2982 unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS; 2983 SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1), 2984 N->getOperand(0), getAL(CurDAG, dl), 2985 CurDAG->getRegister(0, MVT::i32) }; 2986 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops)); 2987 return; 2988 } 2989 case ISD::LOAD: { 2990 if (Subtarget->isThumb() && Subtarget->hasThumb2()) { 2991 if (tryT2IndexedLoad(N)) 2992 return; 2993 } else if (Subtarget->isThumb()) { 2994 if (tryT1IndexedLoad(N)) 2995 return; 2996 } else if (tryARMIndexedLoad(N)) 2997 return; 2998 // Other cases are autogenerated. 2999 break; 3000 } 3001 case ARMISD::WLS: { 3002 SDValue Ops[] = { N->getOperand(1), // Loop count 3003 N->getOperand(2), // Exit target 3004 N->getOperand(0) }; 3005 SDNode *LoopStart = 3006 CurDAG->getMachineNode(ARM::t2WhileLoopStart, dl, MVT::Other, Ops); 3007 ReplaceUses(N, LoopStart); 3008 CurDAG->RemoveDeadNode(N); 3009 return; 3010 } 3011 case ARMISD::BRCOND: { 3012 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 3013 // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc) 3014 // Pattern complexity = 6 cost = 1 size = 0 3015 3016 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 3017 // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc) 3018 // Pattern complexity = 6 cost = 1 size = 0 3019 3020 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 3021 // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc) 3022 // Pattern complexity = 6 cost = 1 size = 0 3023 3024 unsigned Opc = Subtarget->isThumb() ? 3025 ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc; 3026 SDValue Chain = N->getOperand(0); 3027 SDValue N1 = N->getOperand(1); 3028 SDValue N2 = N->getOperand(2); 3029 SDValue N3 = N->getOperand(3); 3030 SDValue InFlag = N->getOperand(4); 3031 assert(N1.getOpcode() == ISD::BasicBlock); 3032 assert(N2.getOpcode() == ISD::Constant); 3033 assert(N3.getOpcode() == ISD::Register); 3034 3035 unsigned CC = (unsigned) cast<ConstantSDNode>(N2)->getZExtValue(); 3036 3037 if (InFlag.getOpcode() == ARMISD::CMPZ) { 3038 if (InFlag.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) { 3039 SDValue Int = InFlag.getOperand(0); 3040 uint64_t ID = cast<ConstantSDNode>(Int->getOperand(1))->getZExtValue(); 3041 3042 // Handle low-overhead loops. 3043 if (ID == Intrinsic::loop_decrement_reg) { 3044 SDValue Elements = Int.getOperand(2); 3045 SDValue Size = CurDAG->getTargetConstant( 3046 cast<ConstantSDNode>(Int.getOperand(3))->getZExtValue(), dl, 3047 MVT::i32); 3048 3049 SDValue Args[] = { Elements, Size, Int.getOperand(0) }; 3050 SDNode *LoopDec = 3051 CurDAG->getMachineNode(ARM::t2LoopDec, dl, 3052 CurDAG->getVTList(MVT::i32, MVT::Other), 3053 Args); 3054 ReplaceUses(Int.getNode(), LoopDec); 3055 3056 SDValue EndArgs[] = { SDValue(LoopDec, 0), N1, Chain }; 3057 SDNode *LoopEnd = 3058 CurDAG->getMachineNode(ARM::t2LoopEnd, dl, MVT::Other, EndArgs); 3059 3060 ReplaceUses(N, LoopEnd); 3061 CurDAG->RemoveDeadNode(N); 3062 CurDAG->RemoveDeadNode(InFlag.getNode()); 3063 CurDAG->RemoveDeadNode(Int.getNode()); 3064 return; 3065 } 3066 } 3067 3068 bool SwitchEQNEToPLMI; 3069 SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI); 3070 InFlag = N->getOperand(4); 3071 3072 if (SwitchEQNEToPLMI) { 3073 switch ((ARMCC::CondCodes)CC) { 3074 default: llvm_unreachable("CMPZ must be either NE or EQ!"); 3075 case ARMCC::NE: 3076 CC = (unsigned)ARMCC::MI; 3077 break; 3078 case ARMCC::EQ: 3079 CC = (unsigned)ARMCC::PL; 3080 break; 3081 } 3082 } 3083 } 3084 3085 SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32); 3086 SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag }; 3087 SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, 3088 MVT::Glue, Ops); 3089 Chain = SDValue(ResNode, 0); 3090 if (N->getNumValues() == 2) { 3091 InFlag = SDValue(ResNode, 1); 3092 ReplaceUses(SDValue(N, 1), InFlag); 3093 } 3094 ReplaceUses(SDValue(N, 0), 3095 SDValue(Chain.getNode(), Chain.getResNo())); 3096 CurDAG->RemoveDeadNode(N); 3097 return; 3098 } 3099 3100 case ARMISD::CMPZ: { 3101 // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0) 3102 // This allows us to avoid materializing the expensive negative constant. 3103 // The CMPZ #0 is useless and will be peepholed away but we need to keep it 3104 // for its glue output. 3105 SDValue X = N->getOperand(0); 3106 auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode()); 3107 if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) { 3108 int64_t Addend = -C->getSExtValue(); 3109 3110 SDNode *Add = nullptr; 3111 // ADDS can be better than CMN if the immediate fits in a 3112 // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3. 3113 // Outside that range we can just use a CMN which is 32-bit but has a 3114 // 12-bit immediate range. 3115 if (Addend < 1<<8) { 3116 if (Subtarget->isThumb2()) { 3117 SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32), 3118 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 3119 CurDAG->getRegister(0, MVT::i32) }; 3120 Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops); 3121 } else { 3122 unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8; 3123 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X, 3124 CurDAG->getTargetConstant(Addend, dl, MVT::i32), 3125 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)}; 3126 Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops); 3127 } 3128 } 3129 if (Add) { 3130 SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)}; 3131 CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2); 3132 } 3133 } 3134 // Other cases are autogenerated. 3135 break; 3136 } 3137 3138 case ARMISD::CMOV: { 3139 SDValue InFlag = N->getOperand(4); 3140 3141 if (InFlag.getOpcode() == ARMISD::CMPZ) { 3142 bool SwitchEQNEToPLMI; 3143 SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI); 3144 3145 if (SwitchEQNEToPLMI) { 3146 SDValue ARMcc = N->getOperand(2); 3147 ARMCC::CondCodes CC = 3148 (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue(); 3149 3150 switch (CC) { 3151 default: llvm_unreachable("CMPZ must be either NE or EQ!"); 3152 case ARMCC::NE: 3153 CC = ARMCC::MI; 3154 break; 3155 case ARMCC::EQ: 3156 CC = ARMCC::PL; 3157 break; 3158 } 3159 SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32); 3160 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc, 3161 N->getOperand(3), N->getOperand(4)}; 3162 CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops); 3163 } 3164 3165 } 3166 // Other cases are autogenerated. 3167 break; 3168 } 3169 3170 case ARMISD::VZIP: { 3171 unsigned Opc = 0; 3172 EVT VT = N->getValueType(0); 3173 switch (VT.getSimpleVT().SimpleTy) { 3174 default: return; 3175 case MVT::v8i8: Opc = ARM::VZIPd8; break; 3176 case MVT::v4f16: 3177 case MVT::v4i16: Opc = ARM::VZIPd16; break; 3178 case MVT::v2f32: 3179 // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 3180 case MVT::v2i32: Opc = ARM::VTRNd32; break; 3181 case MVT::v16i8: Opc = ARM::VZIPq8; break; 3182 case MVT::v8f16: 3183 case MVT::v8i16: Opc = ARM::VZIPq16; break; 3184 case MVT::v4f32: 3185 case MVT::v4i32: Opc = ARM::VZIPq32; break; 3186 } 3187 SDValue Pred = getAL(CurDAG, dl); 3188 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 3189 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; 3190 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops)); 3191 return; 3192 } 3193 case ARMISD::VUZP: { 3194 unsigned Opc = 0; 3195 EVT VT = N->getValueType(0); 3196 switch (VT.getSimpleVT().SimpleTy) { 3197 default: return; 3198 case MVT::v8i8: Opc = ARM::VUZPd8; break; 3199 case MVT::v4f16: 3200 case MVT::v4i16: Opc = ARM::VUZPd16; break; 3201 case MVT::v2f32: 3202 // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 3203 case MVT::v2i32: Opc = ARM::VTRNd32; break; 3204 case MVT::v16i8: Opc = ARM::VUZPq8; break; 3205 case MVT::v8f16: 3206 case MVT::v8i16: Opc = ARM::VUZPq16; break; 3207 case MVT::v4f32: 3208 case MVT::v4i32: Opc = ARM::VUZPq32; break; 3209 } 3210 SDValue Pred = getAL(CurDAG, dl); 3211 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 3212 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; 3213 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops)); 3214 return; 3215 } 3216 case ARMISD::VTRN: { 3217 unsigned Opc = 0; 3218 EVT VT = N->getValueType(0); 3219 switch (VT.getSimpleVT().SimpleTy) { 3220 default: return; 3221 case MVT::v8i8: Opc = ARM::VTRNd8; break; 3222 case MVT::v4f16: 3223 case MVT::v4i16: Opc = ARM::VTRNd16; break; 3224 case MVT::v2f32: 3225 case MVT::v2i32: Opc = ARM::VTRNd32; break; 3226 case MVT::v16i8: Opc = ARM::VTRNq8; break; 3227 case MVT::v8f16: 3228 case MVT::v8i16: Opc = ARM::VTRNq16; break; 3229 case MVT::v4f32: 3230 case MVT::v4i32: Opc = ARM::VTRNq32; break; 3231 } 3232 SDValue Pred = getAL(CurDAG, dl); 3233 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 3234 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; 3235 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops)); 3236 return; 3237 } 3238 case ARMISD::BUILD_VECTOR: { 3239 EVT VecVT = N->getValueType(0); 3240 EVT EltVT = VecVT.getVectorElementType(); 3241 unsigned NumElts = VecVT.getVectorNumElements(); 3242 if (EltVT == MVT::f64) { 3243 assert(NumElts == 2 && "unexpected type for BUILD_VECTOR"); 3244 ReplaceNode( 3245 N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1))); 3246 return; 3247 } 3248 assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR"); 3249 if (NumElts == 2) { 3250 ReplaceNode( 3251 N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1))); 3252 return; 3253 } 3254 assert(NumElts == 4 && "unexpected type for BUILD_VECTOR"); 3255 ReplaceNode(N, 3256 createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1), 3257 N->getOperand(2), N->getOperand(3))); 3258 return; 3259 } 3260 3261 case ARMISD::VLD1DUP: { 3262 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16, 3263 ARM::VLD1DUPd32 }; 3264 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16, 3265 ARM::VLD1DUPq32 }; 3266 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 1, DOpcodes, QOpcodes); 3267 return; 3268 } 3269 3270 case ARMISD::VLD2DUP: { 3271 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16, 3272 ARM::VLD2DUPd32 }; 3273 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 2, Opcodes); 3274 return; 3275 } 3276 3277 case ARMISD::VLD3DUP: { 3278 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo, 3279 ARM::VLD3DUPd16Pseudo, 3280 ARM::VLD3DUPd32Pseudo }; 3281 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 3, Opcodes); 3282 return; 3283 } 3284 3285 case ARMISD::VLD4DUP: { 3286 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo, 3287 ARM::VLD4DUPd16Pseudo, 3288 ARM::VLD4DUPd32Pseudo }; 3289 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 4, Opcodes); 3290 return; 3291 } 3292 3293 case ARMISD::VLD1DUP_UPD: { 3294 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed, 3295 ARM::VLD1DUPd16wb_fixed, 3296 ARM::VLD1DUPd32wb_fixed }; 3297 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed, 3298 ARM::VLD1DUPq16wb_fixed, 3299 ARM::VLD1DUPq32wb_fixed }; 3300 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 1, DOpcodes, QOpcodes); 3301 return; 3302 } 3303 3304 case ARMISD::VLD2DUP_UPD: { 3305 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed, 3306 ARM::VLD2DUPd16wb_fixed, 3307 ARM::VLD2DUPd32wb_fixed }; 3308 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 2, Opcodes); 3309 return; 3310 } 3311 3312 case ARMISD::VLD3DUP_UPD: { 3313 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD, 3314 ARM::VLD3DUPd16Pseudo_UPD, 3315 ARM::VLD3DUPd32Pseudo_UPD }; 3316 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 3, Opcodes); 3317 return; 3318 } 3319 3320 case ARMISD::VLD4DUP_UPD: { 3321 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD, 3322 ARM::VLD4DUPd16Pseudo_UPD, 3323 ARM::VLD4DUPd32Pseudo_UPD }; 3324 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 4, Opcodes); 3325 return; 3326 } 3327 3328 case ARMISD::VLD1_UPD: { 3329 static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed, 3330 ARM::VLD1d16wb_fixed, 3331 ARM::VLD1d32wb_fixed, 3332 ARM::VLD1d64wb_fixed }; 3333 static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed, 3334 ARM::VLD1q16wb_fixed, 3335 ARM::VLD1q32wb_fixed, 3336 ARM::VLD1q64wb_fixed }; 3337 SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr); 3338 return; 3339 } 3340 3341 case ARMISD::VLD2_UPD: { 3342 static const uint16_t DOpcodes[] = { ARM::VLD2d8wb_fixed, 3343 ARM::VLD2d16wb_fixed, 3344 ARM::VLD2d32wb_fixed, 3345 ARM::VLD1q64wb_fixed}; 3346 static const uint16_t QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed, 3347 ARM::VLD2q16PseudoWB_fixed, 3348 ARM::VLD2q32PseudoWB_fixed }; 3349 SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr); 3350 return; 3351 } 3352 3353 case ARMISD::VLD3_UPD: { 3354 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD, 3355 ARM::VLD3d16Pseudo_UPD, 3356 ARM::VLD3d32Pseudo_UPD, 3357 ARM::VLD1d64TPseudoWB_fixed}; 3358 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD, 3359 ARM::VLD3q16Pseudo_UPD, 3360 ARM::VLD3q32Pseudo_UPD }; 3361 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD, 3362 ARM::VLD3q16oddPseudo_UPD, 3363 ARM::VLD3q32oddPseudo_UPD }; 3364 SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 3365 return; 3366 } 3367 3368 case ARMISD::VLD4_UPD: { 3369 static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo_UPD, 3370 ARM::VLD4d16Pseudo_UPD, 3371 ARM::VLD4d32Pseudo_UPD, 3372 ARM::VLD1d64QPseudoWB_fixed}; 3373 static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD, 3374 ARM::VLD4q16Pseudo_UPD, 3375 ARM::VLD4q32Pseudo_UPD }; 3376 static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD, 3377 ARM::VLD4q16oddPseudo_UPD, 3378 ARM::VLD4q32oddPseudo_UPD }; 3379 SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 3380 return; 3381 } 3382 3383 case ARMISD::VLD2LN_UPD: { 3384 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD, 3385 ARM::VLD2LNd16Pseudo_UPD, 3386 ARM::VLD2LNd32Pseudo_UPD }; 3387 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD, 3388 ARM::VLD2LNq32Pseudo_UPD }; 3389 SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes); 3390 return; 3391 } 3392 3393 case ARMISD::VLD3LN_UPD: { 3394 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD, 3395 ARM::VLD3LNd16Pseudo_UPD, 3396 ARM::VLD3LNd32Pseudo_UPD }; 3397 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD, 3398 ARM::VLD3LNq32Pseudo_UPD }; 3399 SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes); 3400 return; 3401 } 3402 3403 case ARMISD::VLD4LN_UPD: { 3404 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD, 3405 ARM::VLD4LNd16Pseudo_UPD, 3406 ARM::VLD4LNd32Pseudo_UPD }; 3407 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD, 3408 ARM::VLD4LNq32Pseudo_UPD }; 3409 SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes); 3410 return; 3411 } 3412 3413 case ARMISD::VST1_UPD: { 3414 static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed, 3415 ARM::VST1d16wb_fixed, 3416 ARM::VST1d32wb_fixed, 3417 ARM::VST1d64wb_fixed }; 3418 static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed, 3419 ARM::VST1q16wb_fixed, 3420 ARM::VST1q32wb_fixed, 3421 ARM::VST1q64wb_fixed }; 3422 SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr); 3423 return; 3424 } 3425 3426 case ARMISD::VST2_UPD: { 3427 static const uint16_t DOpcodes[] = { ARM::VST2d8wb_fixed, 3428 ARM::VST2d16wb_fixed, 3429 ARM::VST2d32wb_fixed, 3430 ARM::VST1q64wb_fixed}; 3431 static const uint16_t QOpcodes[] = { ARM::VST2q8PseudoWB_fixed, 3432 ARM::VST2q16PseudoWB_fixed, 3433 ARM::VST2q32PseudoWB_fixed }; 3434 SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr); 3435 return; 3436 } 3437 3438 case ARMISD::VST3_UPD: { 3439 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD, 3440 ARM::VST3d16Pseudo_UPD, 3441 ARM::VST3d32Pseudo_UPD, 3442 ARM::VST1d64TPseudoWB_fixed}; 3443 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD, 3444 ARM::VST3q16Pseudo_UPD, 3445 ARM::VST3q32Pseudo_UPD }; 3446 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD, 3447 ARM::VST3q16oddPseudo_UPD, 3448 ARM::VST3q32oddPseudo_UPD }; 3449 SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 3450 return; 3451 } 3452 3453 case ARMISD::VST4_UPD: { 3454 static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo_UPD, 3455 ARM::VST4d16Pseudo_UPD, 3456 ARM::VST4d32Pseudo_UPD, 3457 ARM::VST1d64QPseudoWB_fixed}; 3458 static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD, 3459 ARM::VST4q16Pseudo_UPD, 3460 ARM::VST4q32Pseudo_UPD }; 3461 static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD, 3462 ARM::VST4q16oddPseudo_UPD, 3463 ARM::VST4q32oddPseudo_UPD }; 3464 SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 3465 return; 3466 } 3467 3468 case ARMISD::VST2LN_UPD: { 3469 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD, 3470 ARM::VST2LNd16Pseudo_UPD, 3471 ARM::VST2LNd32Pseudo_UPD }; 3472 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD, 3473 ARM::VST2LNq32Pseudo_UPD }; 3474 SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes); 3475 return; 3476 } 3477 3478 case ARMISD::VST3LN_UPD: { 3479 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD, 3480 ARM::VST3LNd16Pseudo_UPD, 3481 ARM::VST3LNd32Pseudo_UPD }; 3482 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD, 3483 ARM::VST3LNq32Pseudo_UPD }; 3484 SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes); 3485 return; 3486 } 3487 3488 case ARMISD::VST4LN_UPD: { 3489 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD, 3490 ARM::VST4LNd16Pseudo_UPD, 3491 ARM::VST4LNd32Pseudo_UPD }; 3492 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD, 3493 ARM::VST4LNq32Pseudo_UPD }; 3494 SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes); 3495 return; 3496 } 3497 3498 case ISD::INTRINSIC_VOID: 3499 case ISD::INTRINSIC_W_CHAIN: { 3500 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 3501 switch (IntNo) { 3502 default: 3503 break; 3504 3505 case Intrinsic::arm_mrrc: 3506 case Intrinsic::arm_mrrc2: { 3507 SDLoc dl(N); 3508 SDValue Chain = N->getOperand(0); 3509 unsigned Opc; 3510 3511 if (Subtarget->isThumb()) 3512 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2); 3513 else 3514 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2); 3515 3516 SmallVector<SDValue, 5> Ops; 3517 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(), dl)); /* coproc */ 3518 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(), dl)); /* opc */ 3519 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(), dl)); /* CRm */ 3520 3521 // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded 3522 // instruction will always be '1111' but it is possible in assembly language to specify 3523 // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction. 3524 if (Opc != ARM::MRRC2) { 3525 Ops.push_back(getAL(CurDAG, dl)); 3526 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 3527 } 3528 3529 Ops.push_back(Chain); 3530 3531 // Writes to two registers. 3532 const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other}; 3533 3534 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops)); 3535 return; 3536 } 3537 case Intrinsic::arm_ldaexd: 3538 case Intrinsic::arm_ldrexd: { 3539 SDLoc dl(N); 3540 SDValue Chain = N->getOperand(0); 3541 SDValue MemAddr = N->getOperand(2); 3542 bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps(); 3543 3544 bool IsAcquire = IntNo == Intrinsic::arm_ldaexd; 3545 unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD) 3546 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD); 3547 3548 // arm_ldrexd returns a i64 value in {i32, i32} 3549 std::vector<EVT> ResTys; 3550 if (isThumb) { 3551 ResTys.push_back(MVT::i32); 3552 ResTys.push_back(MVT::i32); 3553 } else 3554 ResTys.push_back(MVT::Untyped); 3555 ResTys.push_back(MVT::Other); 3556 3557 // Place arguments in the right order. 3558 SDValue Ops[] = {MemAddr, getAL(CurDAG, dl), 3559 CurDAG->getRegister(0, MVT::i32), Chain}; 3560 SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops); 3561 // Transfer memoperands. 3562 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 3563 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp}); 3564 3565 // Remap uses. 3566 SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1); 3567 if (!SDValue(N, 0).use_empty()) { 3568 SDValue Result; 3569 if (isThumb) 3570 Result = SDValue(Ld, 0); 3571 else { 3572 SDValue SubRegIdx = 3573 CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32); 3574 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 3575 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx); 3576 Result = SDValue(ResNode,0); 3577 } 3578 ReplaceUses(SDValue(N, 0), Result); 3579 } 3580 if (!SDValue(N, 1).use_empty()) { 3581 SDValue Result; 3582 if (isThumb) 3583 Result = SDValue(Ld, 1); 3584 else { 3585 SDValue SubRegIdx = 3586 CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32); 3587 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 3588 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx); 3589 Result = SDValue(ResNode,0); 3590 } 3591 ReplaceUses(SDValue(N, 1), Result); 3592 } 3593 ReplaceUses(SDValue(N, 2), OutChain); 3594 CurDAG->RemoveDeadNode(N); 3595 return; 3596 } 3597 case Intrinsic::arm_stlexd: 3598 case Intrinsic::arm_strexd: { 3599 SDLoc dl(N); 3600 SDValue Chain = N->getOperand(0); 3601 SDValue Val0 = N->getOperand(2); 3602 SDValue Val1 = N->getOperand(3); 3603 SDValue MemAddr = N->getOperand(4); 3604 3605 // Store exclusive double return a i32 value which is the return status 3606 // of the issued store. 3607 const EVT ResTys[] = {MVT::i32, MVT::Other}; 3608 3609 bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2(); 3610 // Place arguments in the right order. 3611 SmallVector<SDValue, 7> Ops; 3612 if (isThumb) { 3613 Ops.push_back(Val0); 3614 Ops.push_back(Val1); 3615 } else 3616 // arm_strexd uses GPRPair. 3617 Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0)); 3618 Ops.push_back(MemAddr); 3619 Ops.push_back(getAL(CurDAG, dl)); 3620 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 3621 Ops.push_back(Chain); 3622 3623 bool IsRelease = IntNo == Intrinsic::arm_stlexd; 3624 unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD) 3625 : (IsRelease ? ARM::STLEXD : ARM::STREXD); 3626 3627 SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops); 3628 // Transfer memoperands. 3629 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 3630 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 3631 3632 ReplaceNode(N, St); 3633 return; 3634 } 3635 3636 case Intrinsic::arm_neon_vld1: { 3637 static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16, 3638 ARM::VLD1d32, ARM::VLD1d64 }; 3639 static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16, 3640 ARM::VLD1q32, ARM::VLD1q64}; 3641 SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr); 3642 return; 3643 } 3644 3645 case Intrinsic::arm_neon_vld1x2: { 3646 static const uint16_t DOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16, 3647 ARM::VLD1q32, ARM::VLD1q64 }; 3648 static const uint16_t QOpcodes[] = { ARM::VLD1d8QPseudo, 3649 ARM::VLD1d16QPseudo, 3650 ARM::VLD1d32QPseudo, 3651 ARM::VLD1d64QPseudo }; 3652 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr); 3653 return; 3654 } 3655 3656 case Intrinsic::arm_neon_vld1x3: { 3657 static const uint16_t DOpcodes[] = { ARM::VLD1d8TPseudo, 3658 ARM::VLD1d16TPseudo, 3659 ARM::VLD1d32TPseudo, 3660 ARM::VLD1d64TPseudo }; 3661 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowTPseudo_UPD, 3662 ARM::VLD1q16LowTPseudo_UPD, 3663 ARM::VLD1q32LowTPseudo_UPD, 3664 ARM::VLD1q64LowTPseudo_UPD }; 3665 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighTPseudo, 3666 ARM::VLD1q16HighTPseudo, 3667 ARM::VLD1q32HighTPseudo, 3668 ARM::VLD1q64HighTPseudo }; 3669 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 3670 return; 3671 } 3672 3673 case Intrinsic::arm_neon_vld1x4: { 3674 static const uint16_t DOpcodes[] = { ARM::VLD1d8QPseudo, 3675 ARM::VLD1d16QPseudo, 3676 ARM::VLD1d32QPseudo, 3677 ARM::VLD1d64QPseudo }; 3678 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowQPseudo_UPD, 3679 ARM::VLD1q16LowQPseudo_UPD, 3680 ARM::VLD1q32LowQPseudo_UPD, 3681 ARM::VLD1q64LowQPseudo_UPD }; 3682 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighQPseudo, 3683 ARM::VLD1q16HighQPseudo, 3684 ARM::VLD1q32HighQPseudo, 3685 ARM::VLD1q64HighQPseudo }; 3686 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 3687 return; 3688 } 3689 3690 case Intrinsic::arm_neon_vld2: { 3691 static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16, 3692 ARM::VLD2d32, ARM::VLD1q64 }; 3693 static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo, 3694 ARM::VLD2q32Pseudo }; 3695 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr); 3696 return; 3697 } 3698 3699 case Intrinsic::arm_neon_vld3: { 3700 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo, 3701 ARM::VLD3d16Pseudo, 3702 ARM::VLD3d32Pseudo, 3703 ARM::VLD1d64TPseudo }; 3704 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD, 3705 ARM::VLD3q16Pseudo_UPD, 3706 ARM::VLD3q32Pseudo_UPD }; 3707 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo, 3708 ARM::VLD3q16oddPseudo, 3709 ARM::VLD3q32oddPseudo }; 3710 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 3711 return; 3712 } 3713 3714 case Intrinsic::arm_neon_vld4: { 3715 static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo, 3716 ARM::VLD4d16Pseudo, 3717 ARM::VLD4d32Pseudo, 3718 ARM::VLD1d64QPseudo }; 3719 static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD, 3720 ARM::VLD4q16Pseudo_UPD, 3721 ARM::VLD4q32Pseudo_UPD }; 3722 static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo, 3723 ARM::VLD4q16oddPseudo, 3724 ARM::VLD4q32oddPseudo }; 3725 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 3726 return; 3727 } 3728 3729 case Intrinsic::arm_neon_vld2dup: { 3730 static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16, 3731 ARM::VLD2DUPd32, ARM::VLD1q64 }; 3732 static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo, 3733 ARM::VLD2DUPq16EvenPseudo, 3734 ARM::VLD2DUPq32EvenPseudo }; 3735 static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudo, 3736 ARM::VLD2DUPq16OddPseudo, 3737 ARM::VLD2DUPq32OddPseudo }; 3738 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 2, 3739 DOpcodes, QOpcodes0, QOpcodes1); 3740 return; 3741 } 3742 3743 case Intrinsic::arm_neon_vld3dup: { 3744 static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo, 3745 ARM::VLD3DUPd16Pseudo, 3746 ARM::VLD3DUPd32Pseudo, 3747 ARM::VLD1d64TPseudo }; 3748 static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo, 3749 ARM::VLD3DUPq16EvenPseudo, 3750 ARM::VLD3DUPq32EvenPseudo }; 3751 static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo, 3752 ARM::VLD3DUPq16OddPseudo, 3753 ARM::VLD3DUPq32OddPseudo }; 3754 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 3, 3755 DOpcodes, QOpcodes0, QOpcodes1); 3756 return; 3757 } 3758 3759 case Intrinsic::arm_neon_vld4dup: { 3760 static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo, 3761 ARM::VLD4DUPd16Pseudo, 3762 ARM::VLD4DUPd32Pseudo, 3763 ARM::VLD1d64QPseudo }; 3764 static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo, 3765 ARM::VLD4DUPq16EvenPseudo, 3766 ARM::VLD4DUPq32EvenPseudo }; 3767 static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo, 3768 ARM::VLD4DUPq16OddPseudo, 3769 ARM::VLD4DUPq32OddPseudo }; 3770 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 4, 3771 DOpcodes, QOpcodes0, QOpcodes1); 3772 return; 3773 } 3774 3775 case Intrinsic::arm_neon_vld2lane: { 3776 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo, 3777 ARM::VLD2LNd16Pseudo, 3778 ARM::VLD2LNd32Pseudo }; 3779 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo, 3780 ARM::VLD2LNq32Pseudo }; 3781 SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes); 3782 return; 3783 } 3784 3785 case Intrinsic::arm_neon_vld3lane: { 3786 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo, 3787 ARM::VLD3LNd16Pseudo, 3788 ARM::VLD3LNd32Pseudo }; 3789 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo, 3790 ARM::VLD3LNq32Pseudo }; 3791 SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes); 3792 return; 3793 } 3794 3795 case Intrinsic::arm_neon_vld4lane: { 3796 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo, 3797 ARM::VLD4LNd16Pseudo, 3798 ARM::VLD4LNd32Pseudo }; 3799 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo, 3800 ARM::VLD4LNq32Pseudo }; 3801 SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes); 3802 return; 3803 } 3804 3805 case Intrinsic::arm_neon_vst1: { 3806 static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16, 3807 ARM::VST1d32, ARM::VST1d64 }; 3808 static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16, 3809 ARM::VST1q32, ARM::VST1q64 }; 3810 SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr); 3811 return; 3812 } 3813 3814 case Intrinsic::arm_neon_vst1x2: { 3815 static const uint16_t DOpcodes[] = { ARM::VST1q8, ARM::VST1q16, 3816 ARM::VST1q32, ARM::VST1q64 }; 3817 static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudo, 3818 ARM::VST1d16QPseudo, 3819 ARM::VST1d32QPseudo, 3820 ARM::VST1d64QPseudo }; 3821 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr); 3822 return; 3823 } 3824 3825 case Intrinsic::arm_neon_vst1x3: { 3826 static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudo, 3827 ARM::VST1d16TPseudo, 3828 ARM::VST1d32TPseudo, 3829 ARM::VST1d64TPseudo }; 3830 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD, 3831 ARM::VST1q16LowTPseudo_UPD, 3832 ARM::VST1q32LowTPseudo_UPD, 3833 ARM::VST1q64LowTPseudo_UPD }; 3834 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo, 3835 ARM::VST1q16HighTPseudo, 3836 ARM::VST1q32HighTPseudo, 3837 ARM::VST1q64HighTPseudo }; 3838 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 3839 return; 3840 } 3841 3842 case Intrinsic::arm_neon_vst1x4: { 3843 static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudo, 3844 ARM::VST1d16QPseudo, 3845 ARM::VST1d32QPseudo, 3846 ARM::VST1d64QPseudo }; 3847 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD, 3848 ARM::VST1q16LowQPseudo_UPD, 3849 ARM::VST1q32LowQPseudo_UPD, 3850 ARM::VST1q64LowQPseudo_UPD }; 3851 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo, 3852 ARM::VST1q16HighQPseudo, 3853 ARM::VST1q32HighQPseudo, 3854 ARM::VST1q64HighQPseudo }; 3855 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 3856 return; 3857 } 3858 3859 case Intrinsic::arm_neon_vst2: { 3860 static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16, 3861 ARM::VST2d32, ARM::VST1q64 }; 3862 static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo, 3863 ARM::VST2q32Pseudo }; 3864 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr); 3865 return; 3866 } 3867 3868 case Intrinsic::arm_neon_vst3: { 3869 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo, 3870 ARM::VST3d16Pseudo, 3871 ARM::VST3d32Pseudo, 3872 ARM::VST1d64TPseudo }; 3873 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD, 3874 ARM::VST3q16Pseudo_UPD, 3875 ARM::VST3q32Pseudo_UPD }; 3876 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo, 3877 ARM::VST3q16oddPseudo, 3878 ARM::VST3q32oddPseudo }; 3879 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 3880 return; 3881 } 3882 3883 case Intrinsic::arm_neon_vst4: { 3884 static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo, 3885 ARM::VST4d16Pseudo, 3886 ARM::VST4d32Pseudo, 3887 ARM::VST1d64QPseudo }; 3888 static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD, 3889 ARM::VST4q16Pseudo_UPD, 3890 ARM::VST4q32Pseudo_UPD }; 3891 static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo, 3892 ARM::VST4q16oddPseudo, 3893 ARM::VST4q32oddPseudo }; 3894 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 3895 return; 3896 } 3897 3898 case Intrinsic::arm_neon_vst2lane: { 3899 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo, 3900 ARM::VST2LNd16Pseudo, 3901 ARM::VST2LNd32Pseudo }; 3902 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo, 3903 ARM::VST2LNq32Pseudo }; 3904 SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes); 3905 return; 3906 } 3907 3908 case Intrinsic::arm_neon_vst3lane: { 3909 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo, 3910 ARM::VST3LNd16Pseudo, 3911 ARM::VST3LNd32Pseudo }; 3912 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo, 3913 ARM::VST3LNq32Pseudo }; 3914 SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes); 3915 return; 3916 } 3917 3918 case Intrinsic::arm_neon_vst4lane: { 3919 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo, 3920 ARM::VST4LNd16Pseudo, 3921 ARM::VST4LNd32Pseudo }; 3922 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo, 3923 ARM::VST4LNq32Pseudo }; 3924 SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes); 3925 return; 3926 } 3927 } 3928 break; 3929 } 3930 3931 case ISD::ATOMIC_CMP_SWAP: 3932 SelectCMP_SWAP(N); 3933 return; 3934 } 3935 3936 SelectCode(N); 3937 } 3938 3939 // Inspect a register string of the form 3940 // cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or 3941 // cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string 3942 // and obtain the integer operands from them, adding these operands to the 3943 // provided vector. 3944 static void getIntOperandsFromRegisterString(StringRef RegString, 3945 SelectionDAG *CurDAG, 3946 const SDLoc &DL, 3947 std::vector<SDValue> &Ops) { 3948 SmallVector<StringRef, 5> Fields; 3949 RegString.split(Fields, ':'); 3950 3951 if (Fields.size() > 1) { 3952 bool AllIntFields = true; 3953 3954 for (StringRef Field : Fields) { 3955 // Need to trim out leading 'cp' characters and get the integer field. 3956 unsigned IntField; 3957 AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField); 3958 Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32)); 3959 } 3960 3961 assert(AllIntFields && 3962 "Unexpected non-integer value in special register string."); 3963 } 3964 } 3965 3966 // Maps a Banked Register string to its mask value. The mask value returned is 3967 // for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register 3968 // mask operand, which expresses which register is to be used, e.g. r8, and in 3969 // which mode it is to be used, e.g. usr. Returns -1 to signify that the string 3970 // was invalid. 3971 static inline int getBankedRegisterMask(StringRef RegString) { 3972 auto TheReg = ARMBankedReg::lookupBankedRegByName(RegString.lower()); 3973 if (!TheReg) 3974 return -1; 3975 return TheReg->Encoding; 3976 } 3977 3978 // The flags here are common to those allowed for apsr in the A class cores and 3979 // those allowed for the special registers in the M class cores. Returns a 3980 // value representing which flags were present, -1 if invalid. 3981 static inline int getMClassFlagsMask(StringRef Flags) { 3982 return StringSwitch<int>(Flags) 3983 .Case("", 0x2) // no flags means nzcvq for psr registers, and 0x2 is 3984 // correct when flags are not permitted 3985 .Case("g", 0x1) 3986 .Case("nzcvq", 0x2) 3987 .Case("nzcvqg", 0x3) 3988 .Default(-1); 3989 } 3990 3991 // Maps MClass special registers string to its value for use in the 3992 // t2MRS_M/t2MSR_M instruction nodes as the SYSm value operand. 3993 // Returns -1 to signify that the string was invalid. 3994 static int getMClassRegisterMask(StringRef Reg, const ARMSubtarget *Subtarget) { 3995 auto TheReg = ARMSysReg::lookupMClassSysRegByName(Reg); 3996 const FeatureBitset &FeatureBits = Subtarget->getFeatureBits(); 3997 if (!TheReg || !TheReg->hasRequiredFeatures(FeatureBits)) 3998 return -1; 3999 return (int)(TheReg->Encoding & 0xFFF); // SYSm value 4000 } 4001 4002 static int getARClassRegisterMask(StringRef Reg, StringRef Flags) { 4003 // The mask operand contains the special register (R Bit) in bit 4, whether 4004 // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and 4005 // bits 3-0 contains the fields to be accessed in the special register, set by 4006 // the flags provided with the register. 4007 int Mask = 0; 4008 if (Reg == "apsr") { 4009 // The flags permitted for apsr are the same flags that are allowed in 4010 // M class registers. We get the flag value and then shift the flags into 4011 // the correct place to combine with the mask. 4012 Mask = getMClassFlagsMask(Flags); 4013 if (Mask == -1) 4014 return -1; 4015 return Mask << 2; 4016 } 4017 4018 if (Reg != "cpsr" && Reg != "spsr") { 4019 return -1; 4020 } 4021 4022 // This is the same as if the flags were "fc" 4023 if (Flags.empty() || Flags == "all") 4024 return Mask | 0x9; 4025 4026 // Inspect the supplied flags string and set the bits in the mask for 4027 // the relevant and valid flags allowed for cpsr and spsr. 4028 for (char Flag : Flags) { 4029 int FlagVal; 4030 switch (Flag) { 4031 case 'c': 4032 FlagVal = 0x1; 4033 break; 4034 case 'x': 4035 FlagVal = 0x2; 4036 break; 4037 case 's': 4038 FlagVal = 0x4; 4039 break; 4040 case 'f': 4041 FlagVal = 0x8; 4042 break; 4043 default: 4044 FlagVal = 0; 4045 } 4046 4047 // This avoids allowing strings where the same flag bit appears twice. 4048 if (!FlagVal || (Mask & FlagVal)) 4049 return -1; 4050 Mask |= FlagVal; 4051 } 4052 4053 // If the register is spsr then we need to set the R bit. 4054 if (Reg == "spsr") 4055 Mask |= 0x10; 4056 4057 return Mask; 4058 } 4059 4060 // Lower the read_register intrinsic to ARM specific DAG nodes 4061 // using the supplied metadata string to select the instruction node to use 4062 // and the registers/masks to construct as operands for the node. 4063 bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){ 4064 const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1)); 4065 const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0)); 4066 bool IsThumb2 = Subtarget->isThumb2(); 4067 SDLoc DL(N); 4068 4069 std::vector<SDValue> Ops; 4070 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops); 4071 4072 if (!Ops.empty()) { 4073 // If the special register string was constructed of fields (as defined 4074 // in the ACLE) then need to lower to MRC node (32 bit) or 4075 // MRRC node(64 bit), we can make the distinction based on the number of 4076 // operands we have. 4077 unsigned Opcode; 4078 SmallVector<EVT, 3> ResTypes; 4079 if (Ops.size() == 5){ 4080 Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC; 4081 ResTypes.append({ MVT::i32, MVT::Other }); 4082 } else { 4083 assert(Ops.size() == 3 && 4084 "Invalid number of fields in special register string."); 4085 Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC; 4086 ResTypes.append({ MVT::i32, MVT::i32, MVT::Other }); 4087 } 4088 4089 Ops.push_back(getAL(CurDAG, DL)); 4090 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 4091 Ops.push_back(N->getOperand(0)); 4092 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops)); 4093 return true; 4094 } 4095 4096 std::string SpecialReg = RegString->getString().lower(); 4097 4098 int BankedReg = getBankedRegisterMask(SpecialReg); 4099 if (BankedReg != -1) { 4100 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), 4101 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 4102 N->getOperand(0) }; 4103 ReplaceNode( 4104 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked, 4105 DL, MVT::i32, MVT::Other, Ops)); 4106 return true; 4107 } 4108 4109 // The VFP registers are read by creating SelectionDAG nodes with opcodes 4110 // corresponding to the register that is being read from. So we switch on the 4111 // string to find which opcode we need to use. 4112 unsigned Opcode = StringSwitch<unsigned>(SpecialReg) 4113 .Case("fpscr", ARM::VMRS) 4114 .Case("fpexc", ARM::VMRS_FPEXC) 4115 .Case("fpsid", ARM::VMRS_FPSID) 4116 .Case("mvfr0", ARM::VMRS_MVFR0) 4117 .Case("mvfr1", ARM::VMRS_MVFR1) 4118 .Case("mvfr2", ARM::VMRS_MVFR2) 4119 .Case("fpinst", ARM::VMRS_FPINST) 4120 .Case("fpinst2", ARM::VMRS_FPINST2) 4121 .Default(0); 4122 4123 // If an opcode was found then we can lower the read to a VFP instruction. 4124 if (Opcode) { 4125 if (!Subtarget->hasVFP2Base()) 4126 return false; 4127 if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8Base()) 4128 return false; 4129 4130 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 4131 N->getOperand(0) }; 4132 ReplaceNode(N, 4133 CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops)); 4134 return true; 4135 } 4136 4137 // If the target is M Class then need to validate that the register string 4138 // is an acceptable value, so check that a mask can be constructed from the 4139 // string. 4140 if (Subtarget->isMClass()) { 4141 int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget); 4142 if (SYSmValue == -1) 4143 return false; 4144 4145 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32), 4146 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 4147 N->getOperand(0) }; 4148 ReplaceNode( 4149 N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops)); 4150 return true; 4151 } 4152 4153 // Here we know the target is not M Class so we need to check if it is one 4154 // of the remaining possible values which are apsr, cpsr or spsr. 4155 if (SpecialReg == "apsr" || SpecialReg == "cpsr") { 4156 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 4157 N->getOperand(0) }; 4158 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS, 4159 DL, MVT::i32, MVT::Other, Ops)); 4160 return true; 4161 } 4162 4163 if (SpecialReg == "spsr") { 4164 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 4165 N->getOperand(0) }; 4166 ReplaceNode( 4167 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL, 4168 MVT::i32, MVT::Other, Ops)); 4169 return true; 4170 } 4171 4172 return false; 4173 } 4174 4175 // Lower the write_register intrinsic to ARM specific DAG nodes 4176 // using the supplied metadata string to select the instruction node to use 4177 // and the registers/masks to use in the nodes 4178 bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){ 4179 const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1)); 4180 const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0)); 4181 bool IsThumb2 = Subtarget->isThumb2(); 4182 SDLoc DL(N); 4183 4184 std::vector<SDValue> Ops; 4185 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops); 4186 4187 if (!Ops.empty()) { 4188 // If the special register string was constructed of fields (as defined 4189 // in the ACLE) then need to lower to MCR node (32 bit) or 4190 // MCRR node(64 bit), we can make the distinction based on the number of 4191 // operands we have. 4192 unsigned Opcode; 4193 if (Ops.size() == 5) { 4194 Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR; 4195 Ops.insert(Ops.begin()+2, N->getOperand(2)); 4196 } else { 4197 assert(Ops.size() == 3 && 4198 "Invalid number of fields in special register string."); 4199 Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR; 4200 SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) }; 4201 Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2); 4202 } 4203 4204 Ops.push_back(getAL(CurDAG, DL)); 4205 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 4206 Ops.push_back(N->getOperand(0)); 4207 4208 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops)); 4209 return true; 4210 } 4211 4212 std::string SpecialReg = RegString->getString().lower(); 4213 int BankedReg = getBankedRegisterMask(SpecialReg); 4214 if (BankedReg != -1) { 4215 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2), 4216 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 4217 N->getOperand(0) }; 4218 ReplaceNode( 4219 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked, 4220 DL, MVT::Other, Ops)); 4221 return true; 4222 } 4223 4224 // The VFP registers are written to by creating SelectionDAG nodes with 4225 // opcodes corresponding to the register that is being written. So we switch 4226 // on the string to find which opcode we need to use. 4227 unsigned Opcode = StringSwitch<unsigned>(SpecialReg) 4228 .Case("fpscr", ARM::VMSR) 4229 .Case("fpexc", ARM::VMSR_FPEXC) 4230 .Case("fpsid", ARM::VMSR_FPSID) 4231 .Case("fpinst", ARM::VMSR_FPINST) 4232 .Case("fpinst2", ARM::VMSR_FPINST2) 4233 .Default(0); 4234 4235 if (Opcode) { 4236 if (!Subtarget->hasVFP2Base()) 4237 return false; 4238 Ops = { N->getOperand(2), getAL(CurDAG, DL), 4239 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) }; 4240 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops)); 4241 return true; 4242 } 4243 4244 std::pair<StringRef, StringRef> Fields; 4245 Fields = StringRef(SpecialReg).rsplit('_'); 4246 std::string Reg = Fields.first.str(); 4247 StringRef Flags = Fields.second; 4248 4249 // If the target was M Class then need to validate the special register value 4250 // and retrieve the mask for use in the instruction node. 4251 if (Subtarget->isMClass()) { 4252 int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget); 4253 if (SYSmValue == -1) 4254 return false; 4255 4256 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32), 4257 N->getOperand(2), getAL(CurDAG, DL), 4258 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) }; 4259 ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops)); 4260 return true; 4261 } 4262 4263 // We then check to see if a valid mask can be constructed for one of the 4264 // register string values permitted for the A and R class cores. These values 4265 // are apsr, spsr and cpsr; these are also valid on older cores. 4266 int Mask = getARClassRegisterMask(Reg, Flags); 4267 if (Mask != -1) { 4268 Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2), 4269 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 4270 N->getOperand(0) }; 4271 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR, 4272 DL, MVT::Other, Ops)); 4273 return true; 4274 } 4275 4276 return false; 4277 } 4278 4279 bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){ 4280 std::vector<SDValue> AsmNodeOperands; 4281 unsigned Flag, Kind; 4282 bool Changed = false; 4283 unsigned NumOps = N->getNumOperands(); 4284 4285 // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint. 4286 // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require 4287 // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs 4288 // respectively. Since there is no constraint to explicitly specify a 4289 // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb, 4290 // the 64-bit data may be referred by H, Q, R modifiers, so we still pack 4291 // them into a GPRPair. 4292 4293 SDLoc dl(N); 4294 SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1) 4295 : SDValue(nullptr,0); 4296 4297 SmallVector<bool, 8> OpChanged; 4298 // Glue node will be appended late. 4299 for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) { 4300 SDValue op = N->getOperand(i); 4301 AsmNodeOperands.push_back(op); 4302 4303 if (i < InlineAsm::Op_FirstOperand) 4304 continue; 4305 4306 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) { 4307 Flag = C->getZExtValue(); 4308 Kind = InlineAsm::getKind(Flag); 4309 } 4310 else 4311 continue; 4312 4313 // Immediate operands to inline asm in the SelectionDAG are modeled with 4314 // two operands. The first is a constant of value InlineAsm::Kind_Imm, and 4315 // the second is a constant with the value of the immediate. If we get here 4316 // and we have a Kind_Imm, skip the next operand, and continue. 4317 if (Kind == InlineAsm::Kind_Imm) { 4318 SDValue op = N->getOperand(++i); 4319 AsmNodeOperands.push_back(op); 4320 continue; 4321 } 4322 4323 unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag); 4324 if (NumRegs) 4325 OpChanged.push_back(false); 4326 4327 unsigned DefIdx = 0; 4328 bool IsTiedToChangedOp = false; 4329 // If it's a use that is tied with a previous def, it has no 4330 // reg class constraint. 4331 if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx)) 4332 IsTiedToChangedOp = OpChanged[DefIdx]; 4333 4334 // Memory operands to inline asm in the SelectionDAG are modeled with two 4335 // operands: a constant of value InlineAsm::Kind_Mem followed by the input 4336 // operand. If we get here and we have a Kind_Mem, skip the next operand (so 4337 // it doesn't get misinterpreted), and continue. We do this here because 4338 // it's important to update the OpChanged array correctly before moving on. 4339 if (Kind == InlineAsm::Kind_Mem) { 4340 SDValue op = N->getOperand(++i); 4341 AsmNodeOperands.push_back(op); 4342 continue; 4343 } 4344 4345 if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef 4346 && Kind != InlineAsm::Kind_RegDefEarlyClobber) 4347 continue; 4348 4349 unsigned RC; 4350 bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC); 4351 if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID)) 4352 || NumRegs != 2) 4353 continue; 4354 4355 assert((i+2 < NumOps) && "Invalid number of operands in inline asm"); 4356 SDValue V0 = N->getOperand(i+1); 4357 SDValue V1 = N->getOperand(i+2); 4358 unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg(); 4359 unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg(); 4360 SDValue PairedReg; 4361 MachineRegisterInfo &MRI = MF->getRegInfo(); 4362 4363 if (Kind == InlineAsm::Kind_RegDef || 4364 Kind == InlineAsm::Kind_RegDefEarlyClobber) { 4365 // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to 4366 // the original GPRs. 4367 4368 unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass); 4369 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped); 4370 SDValue Chain = SDValue(N,0); 4371 4372 SDNode *GU = N->getGluedUser(); 4373 SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped, 4374 Chain.getValue(1)); 4375 4376 // Extract values from a GPRPair reg and copy to the original GPR reg. 4377 SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32, 4378 RegCopy); 4379 SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32, 4380 RegCopy); 4381 SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0, 4382 RegCopy.getValue(1)); 4383 SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1)); 4384 4385 // Update the original glue user. 4386 std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1); 4387 Ops.push_back(T1.getValue(1)); 4388 CurDAG->UpdateNodeOperands(GU, Ops); 4389 } 4390 else { 4391 // For Kind == InlineAsm::Kind_RegUse, we first copy two GPRs into a 4392 // GPRPair and then pass the GPRPair to the inline asm. 4393 SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain]; 4394 4395 // As REG_SEQ doesn't take RegisterSDNode, we copy them first. 4396 SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32, 4397 Chain.getValue(1)); 4398 SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32, 4399 T0.getValue(1)); 4400 SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0); 4401 4402 // Copy REG_SEQ into a GPRPair-typed VR and replace the original two 4403 // i32 VRs of inline asm with it. 4404 unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass); 4405 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped); 4406 Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1)); 4407 4408 AsmNodeOperands[InlineAsm::Op_InputChain] = Chain; 4409 Glue = Chain.getValue(1); 4410 } 4411 4412 Changed = true; 4413 4414 if(PairedReg.getNode()) { 4415 OpChanged[OpChanged.size() -1 ] = true; 4416 Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/); 4417 if (IsTiedToChangedOp) 4418 Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx); 4419 else 4420 Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID); 4421 // Replace the current flag. 4422 AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant( 4423 Flag, dl, MVT::i32); 4424 // Add the new register node and skip the original two GPRs. 4425 AsmNodeOperands.push_back(PairedReg); 4426 // Skip the next two GPRs. 4427 i += 2; 4428 } 4429 } 4430 4431 if (Glue.getNode()) 4432 AsmNodeOperands.push_back(Glue); 4433 if (!Changed) 4434 return false; 4435 4436 SDValue New = CurDAG->getNode(N->getOpcode(), SDLoc(N), 4437 CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands); 4438 New->setNodeId(-1); 4439 ReplaceNode(N, New.getNode()); 4440 return true; 4441 } 4442 4443 4444 bool ARMDAGToDAGISel:: 4445 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, 4446 std::vector<SDValue> &OutOps) { 4447 switch(ConstraintID) { 4448 default: 4449 llvm_unreachable("Unexpected asm memory constraint"); 4450 case InlineAsm::Constraint_i: 4451 // FIXME: It seems strange that 'i' is needed here since it's supposed to 4452 // be an immediate and not a memory constraint. 4453 LLVM_FALLTHROUGH; 4454 case InlineAsm::Constraint_m: 4455 case InlineAsm::Constraint_o: 4456 case InlineAsm::Constraint_Q: 4457 case InlineAsm::Constraint_Um: 4458 case InlineAsm::Constraint_Un: 4459 case InlineAsm::Constraint_Uq: 4460 case InlineAsm::Constraint_Us: 4461 case InlineAsm::Constraint_Ut: 4462 case InlineAsm::Constraint_Uv: 4463 case InlineAsm::Constraint_Uy: 4464 // Require the address to be in a register. That is safe for all ARM 4465 // variants and it is hard to do anything much smarter without knowing 4466 // how the operand is used. 4467 OutOps.push_back(Op); 4468 return false; 4469 } 4470 return true; 4471 } 4472 4473 /// createARMISelDag - This pass converts a legalized DAG into a 4474 /// ARM-specific DAG, ready for instruction scheduling. 4475 /// 4476 FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM, 4477 CodeGenOpt::Level OptLevel) { 4478 return new ARMDAGToDAGISel(TM, OptLevel); 4479 } 4480