1 //===-- ARMBaseInstrInfo.cpp - ARM Instruction Information ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the Base ARM implementation of the TargetInstrInfo class.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "ARMBaseInstrInfo.h"
14 #include "ARMBaseRegisterInfo.h"
15 #include "ARMConstantPoolValue.h"
16 #include "ARMFeatures.h"
17 #include "ARMHazardRecognizer.h"
18 #include "ARMMachineFunctionInfo.h"
19 #include "ARMSubtarget.h"
20 #include "MCTargetDesc/ARMAddressingModes.h"
21 #include "MCTargetDesc/ARMBaseInfo.h"
22 #include "MVETailPredUtils.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallSet.h"
26 #include "llvm/ADT/SmallVector.h"
27 #include "llvm/CodeGen/DFAPacketizer.h"
28 #include "llvm/CodeGen/LiveVariables.h"
29 #include "llvm/CodeGen/MachineBasicBlock.h"
30 #include "llvm/CodeGen/MachineConstantPool.h"
31 #include "llvm/CodeGen/MachineFrameInfo.h"
32 #include "llvm/CodeGen/MachineFunction.h"
33 #include "llvm/CodeGen/MachineInstr.h"
34 #include "llvm/CodeGen/MachineInstrBuilder.h"
35 #include "llvm/CodeGen/MachineMemOperand.h"
36 #include "llvm/CodeGen/MachineModuleInfo.h"
37 #include "llvm/CodeGen/MachineOperand.h"
38 #include "llvm/CodeGen/MachinePipeliner.h"
39 #include "llvm/CodeGen/MachineRegisterInfo.h"
40 #include "llvm/CodeGen/MachineScheduler.h"
41 #include "llvm/CodeGen/MultiHazardRecognizer.h"
42 #include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
43 #include "llvm/CodeGen/SelectionDAGNodes.h"
44 #include "llvm/CodeGen/TargetInstrInfo.h"
45 #include "llvm/CodeGen/TargetRegisterInfo.h"
46 #include "llvm/CodeGen/TargetSchedule.h"
47 #include "llvm/IR/Attributes.h"
48 #include "llvm/IR/Constants.h"
49 #include "llvm/IR/DebugLoc.h"
50 #include "llvm/IR/Function.h"
51 #include "llvm/IR/GlobalValue.h"
52 #include "llvm/IR/Module.h"
53 #include "llvm/MC/MCAsmInfo.h"
54 #include "llvm/MC/MCInstrDesc.h"
55 #include "llvm/MC/MCInstrItineraries.h"
56 #include "llvm/Support/BranchProbability.h"
57 #include "llvm/Support/Casting.h"
58 #include "llvm/Support/CommandLine.h"
59 #include "llvm/Support/Compiler.h"
60 #include "llvm/Support/Debug.h"
61 #include "llvm/Support/ErrorHandling.h"
62 #include "llvm/Support/raw_ostream.h"
63 #include "llvm/Target/TargetMachine.h"
64 #include "llvm/TargetParser/Triple.h"
65 #include <algorithm>
66 #include <cassert>
67 #include <cstdint>
68 #include <iterator>
69 #include <new>
70 #include <utility>
71 #include <vector>
72
73 using namespace llvm;
74
75 #define DEBUG_TYPE "arm-instrinfo"
76
77 #define GET_INSTRINFO_CTOR_DTOR
78 #include "ARMGenInstrInfo.inc"
79
80 static cl::opt<bool>
81 EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,
82 cl::desc("Enable ARM 2-addr to 3-addr conv"));
83
84 /// ARM_MLxEntry - Record information about MLA / MLS instructions.
85 struct ARM_MLxEntry {
86 uint16_t MLxOpc; // MLA / MLS opcode
87 uint16_t MulOpc; // Expanded multiplication opcode
88 uint16_t AddSubOpc; // Expanded add / sub opcode
89 bool NegAcc; // True if the acc is negated before the add / sub.
90 bool HasLane; // True if instruction has an extra "lane" operand.
91 };
92
93 static const ARM_MLxEntry ARM_MLxTable[] = {
94 // MLxOpc, MulOpc, AddSubOpc, NegAcc, HasLane
95 // fp scalar ops
96 { ARM::VMLAS, ARM::VMULS, ARM::VADDS, false, false },
97 { ARM::VMLSS, ARM::VMULS, ARM::VSUBS, false, false },
98 { ARM::VMLAD, ARM::VMULD, ARM::VADDD, false, false },
99 { ARM::VMLSD, ARM::VMULD, ARM::VSUBD, false, false },
100 { ARM::VNMLAS, ARM::VNMULS, ARM::VSUBS, true, false },
101 { ARM::VNMLSS, ARM::VMULS, ARM::VSUBS, true, false },
102 { ARM::VNMLAD, ARM::VNMULD, ARM::VSUBD, true, false },
103 { ARM::VNMLSD, ARM::VMULD, ARM::VSUBD, true, false },
104
105 // fp SIMD ops
106 { ARM::VMLAfd, ARM::VMULfd, ARM::VADDfd, false, false },
107 { ARM::VMLSfd, ARM::VMULfd, ARM::VSUBfd, false, false },
108 { ARM::VMLAfq, ARM::VMULfq, ARM::VADDfq, false, false },
109 { ARM::VMLSfq, ARM::VMULfq, ARM::VSUBfq, false, false },
110 { ARM::VMLAslfd, ARM::VMULslfd, ARM::VADDfd, false, true },
111 { ARM::VMLSslfd, ARM::VMULslfd, ARM::VSUBfd, false, true },
112 { ARM::VMLAslfq, ARM::VMULslfq, ARM::VADDfq, false, true },
113 { ARM::VMLSslfq, ARM::VMULslfq, ARM::VSUBfq, false, true },
114 };
115
ARMBaseInstrInfo(const ARMSubtarget & STI)116 ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI)
117 : ARMGenInstrInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
118 Subtarget(STI) {
119 for (unsigned i = 0, e = std::size(ARM_MLxTable); i != e; ++i) {
120 if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second)
121 llvm_unreachable("Duplicated entries?");
122 MLxHazardOpcodes.insert(ARM_MLxTable[i].AddSubOpc);
123 MLxHazardOpcodes.insert(ARM_MLxTable[i].MulOpc);
124 }
125 }
126
127 // Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl
128 // currently defaults to no prepass hazard recognizer.
129 ScheduleHazardRecognizer *
CreateTargetHazardRecognizer(const TargetSubtargetInfo * STI,const ScheduleDAG * DAG) const130 ARMBaseInstrInfo::CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
131 const ScheduleDAG *DAG) const {
132 if (usePreRAHazardRecognizer()) {
133 const InstrItineraryData *II =
134 static_cast<const ARMSubtarget *>(STI)->getInstrItineraryData();
135 return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched");
136 }
137 return TargetInstrInfo::CreateTargetHazardRecognizer(STI, DAG);
138 }
139
140 // Called during:
141 // - pre-RA scheduling
142 // - post-RA scheduling when FeatureUseMISched is set
CreateTargetMIHazardRecognizer(const InstrItineraryData * II,const ScheduleDAGMI * DAG) const143 ScheduleHazardRecognizer *ARMBaseInstrInfo::CreateTargetMIHazardRecognizer(
144 const InstrItineraryData *II, const ScheduleDAGMI *DAG) const {
145 MultiHazardRecognizer *MHR = new MultiHazardRecognizer();
146
147 // We would like to restrict this hazard recognizer to only
148 // post-RA scheduling; we can tell that we're post-RA because we don't
149 // track VRegLiveness.
150 // Cortex-M7: TRM indicates that there is a single ITCM bank and two DTCM
151 // banks banked on bit 2. Assume that TCMs are in use.
152 if (Subtarget.isCortexM7() && !DAG->hasVRegLiveness())
153 MHR->AddHazardRecognizer(
154 std::make_unique<ARMBankConflictHazardRecognizer>(DAG, 0x4, true));
155
156 // Not inserting ARMHazardRecognizerFPMLx because that would change
157 // legacy behavior
158
159 auto BHR = TargetInstrInfo::CreateTargetMIHazardRecognizer(II, DAG);
160 MHR->AddHazardRecognizer(std::unique_ptr<ScheduleHazardRecognizer>(BHR));
161 return MHR;
162 }
163
164 // Called during post-RA scheduling when FeatureUseMISched is not set
165 ScheduleHazardRecognizer *ARMBaseInstrInfo::
CreateTargetPostRAHazardRecognizer(const InstrItineraryData * II,const ScheduleDAG * DAG) const166 CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
167 const ScheduleDAG *DAG) const {
168 MultiHazardRecognizer *MHR = new MultiHazardRecognizer();
169
170 if (Subtarget.isThumb2() || Subtarget.hasVFP2Base())
171 MHR->AddHazardRecognizer(std::make_unique<ARMHazardRecognizerFPMLx>());
172
173 auto BHR = TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG);
174 if (BHR)
175 MHR->AddHazardRecognizer(std::unique_ptr<ScheduleHazardRecognizer>(BHR));
176 return MHR;
177 }
178
179 MachineInstr *
convertToThreeAddress(MachineInstr & MI,LiveVariables * LV,LiveIntervals * LIS) const180 ARMBaseInstrInfo::convertToThreeAddress(MachineInstr &MI, LiveVariables *LV,
181 LiveIntervals *LIS) const {
182 // FIXME: Thumb2 support.
183
184 if (!EnableARM3Addr)
185 return nullptr;
186
187 MachineFunction &MF = *MI.getParent()->getParent();
188 uint64_t TSFlags = MI.getDesc().TSFlags;
189 bool isPre = false;
190 switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) {
191 default: return nullptr;
192 case ARMII::IndexModePre:
193 isPre = true;
194 break;
195 case ARMII::IndexModePost:
196 break;
197 }
198
199 // Try splitting an indexed load/store to an un-indexed one plus an add/sub
200 // operation.
201 unsigned MemOpc = getUnindexedOpcode(MI.getOpcode());
202 if (MemOpc == 0)
203 return nullptr;
204
205 MachineInstr *UpdateMI = nullptr;
206 MachineInstr *MemMI = nullptr;
207 unsigned AddrMode = (TSFlags & ARMII::AddrModeMask);
208 const MCInstrDesc &MCID = MI.getDesc();
209 unsigned NumOps = MCID.getNumOperands();
210 bool isLoad = !MI.mayStore();
211 const MachineOperand &WB = isLoad ? MI.getOperand(1) : MI.getOperand(0);
212 const MachineOperand &Base = MI.getOperand(2);
213 const MachineOperand &Offset = MI.getOperand(NumOps - 3);
214 Register WBReg = WB.getReg();
215 Register BaseReg = Base.getReg();
216 Register OffReg = Offset.getReg();
217 unsigned OffImm = MI.getOperand(NumOps - 2).getImm();
218 ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI.getOperand(NumOps - 1).getImm();
219 switch (AddrMode) {
220 default: llvm_unreachable("Unknown indexed op!");
221 case ARMII::AddrMode2: {
222 bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub;
223 unsigned Amt = ARM_AM::getAM2Offset(OffImm);
224 if (OffReg == 0) {
225 if (ARM_AM::getSOImmVal(Amt) == -1)
226 // Can't encode it in a so_imm operand. This transformation will
227 // add more than 1 instruction. Abandon!
228 return nullptr;
229 UpdateMI = BuildMI(MF, MI.getDebugLoc(),
230 get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
231 .addReg(BaseReg)
232 .addImm(Amt)
233 .add(predOps(Pred))
234 .add(condCodeOp());
235 } else if (Amt != 0) {
236 ARM_AM::ShiftOpc ShOpc = ARM_AM::getAM2ShiftOpc(OffImm);
237 unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt);
238 UpdateMI = BuildMI(MF, MI.getDebugLoc(),
239 get(isSub ? ARM::SUBrsi : ARM::ADDrsi), WBReg)
240 .addReg(BaseReg)
241 .addReg(OffReg)
242 .addReg(0)
243 .addImm(SOOpc)
244 .add(predOps(Pred))
245 .add(condCodeOp());
246 } else
247 UpdateMI = BuildMI(MF, MI.getDebugLoc(),
248 get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
249 .addReg(BaseReg)
250 .addReg(OffReg)
251 .add(predOps(Pred))
252 .add(condCodeOp());
253 break;
254 }
255 case ARMII::AddrMode3 : {
256 bool isSub = ARM_AM::getAM3Op(OffImm) == ARM_AM::sub;
257 unsigned Amt = ARM_AM::getAM3Offset(OffImm);
258 if (OffReg == 0)
259 // Immediate is 8-bits. It's guaranteed to fit in a so_imm operand.
260 UpdateMI = BuildMI(MF, MI.getDebugLoc(),
261 get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
262 .addReg(BaseReg)
263 .addImm(Amt)
264 .add(predOps(Pred))
265 .add(condCodeOp());
266 else
267 UpdateMI = BuildMI(MF, MI.getDebugLoc(),
268 get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
269 .addReg(BaseReg)
270 .addReg(OffReg)
271 .add(predOps(Pred))
272 .add(condCodeOp());
273 break;
274 }
275 }
276
277 std::vector<MachineInstr*> NewMIs;
278 if (isPre) {
279 if (isLoad)
280 MemMI =
281 BuildMI(MF, MI.getDebugLoc(), get(MemOpc), MI.getOperand(0).getReg())
282 .addReg(WBReg)
283 .addImm(0)
284 .addImm(Pred);
285 else
286 MemMI = BuildMI(MF, MI.getDebugLoc(), get(MemOpc))
287 .addReg(MI.getOperand(1).getReg())
288 .addReg(WBReg)
289 .addReg(0)
290 .addImm(0)
291 .addImm(Pred);
292 NewMIs.push_back(MemMI);
293 NewMIs.push_back(UpdateMI);
294 } else {
295 if (isLoad)
296 MemMI =
297 BuildMI(MF, MI.getDebugLoc(), get(MemOpc), MI.getOperand(0).getReg())
298 .addReg(BaseReg)
299 .addImm(0)
300 .addImm(Pred);
301 else
302 MemMI = BuildMI(MF, MI.getDebugLoc(), get(MemOpc))
303 .addReg(MI.getOperand(1).getReg())
304 .addReg(BaseReg)
305 .addReg(0)
306 .addImm(0)
307 .addImm(Pred);
308 if (WB.isDead())
309 UpdateMI->getOperand(0).setIsDead();
310 NewMIs.push_back(UpdateMI);
311 NewMIs.push_back(MemMI);
312 }
313
314 // Transfer LiveVariables states, kill / dead info.
315 if (LV) {
316 for (const MachineOperand &MO : MI.operands()) {
317 if (MO.isReg() && MO.getReg().isVirtual()) {
318 Register Reg = MO.getReg();
319
320 LiveVariables::VarInfo &VI = LV->getVarInfo(Reg);
321 if (MO.isDef()) {
322 MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI;
323 if (MO.isDead())
324 LV->addVirtualRegisterDead(Reg, *NewMI);
325 }
326 if (MO.isUse() && MO.isKill()) {
327 for (unsigned j = 0; j < 2; ++j) {
328 // Look at the two new MI's in reverse order.
329 MachineInstr *NewMI = NewMIs[j];
330 if (!NewMI->readsRegister(Reg, /*TRI=*/nullptr))
331 continue;
332 LV->addVirtualRegisterKilled(Reg, *NewMI);
333 if (VI.removeKill(MI))
334 VI.Kills.push_back(NewMI);
335 break;
336 }
337 }
338 }
339 }
340 }
341
342 MachineBasicBlock &MBB = *MI.getParent();
343 MBB.insert(MI, NewMIs[1]);
344 MBB.insert(MI, NewMIs[0]);
345 return NewMIs[0];
346 }
347
348 // Branch analysis.
349 // Cond vector output format:
350 // 0 elements indicates an unconditional branch
351 // 2 elements indicates a conditional branch; the elements are
352 // the condition to check and the CPSR.
353 // 3 elements indicates a hardware loop end; the elements
354 // are the opcode, the operand value to test, and a dummy
355 // operand used to pad out to 3 operands.
analyzeBranch(MachineBasicBlock & MBB,MachineBasicBlock * & TBB,MachineBasicBlock * & FBB,SmallVectorImpl<MachineOperand> & Cond,bool AllowModify) const356 bool ARMBaseInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
357 MachineBasicBlock *&TBB,
358 MachineBasicBlock *&FBB,
359 SmallVectorImpl<MachineOperand> &Cond,
360 bool AllowModify) const {
361 TBB = nullptr;
362 FBB = nullptr;
363
364 MachineBasicBlock::instr_iterator I = MBB.instr_end();
365 if (I == MBB.instr_begin())
366 return false; // Empty blocks are easy.
367 --I;
368
369 // Walk backwards from the end of the basic block until the branch is
370 // analyzed or we give up.
371 while (isPredicated(*I) || I->isTerminator() || I->isDebugValue()) {
372 // Flag to be raised on unanalyzeable instructions. This is useful in cases
373 // where we want to clean up on the end of the basic block before we bail
374 // out.
375 bool CantAnalyze = false;
376
377 // Skip over DEBUG values, predicated nonterminators and speculation
378 // barrier terminators.
379 while (I->isDebugInstr() || !I->isTerminator() ||
380 isSpeculationBarrierEndBBOpcode(I->getOpcode()) ||
381 I->getOpcode() == ARM::t2DoLoopStartTP){
382 if (I == MBB.instr_begin())
383 return false;
384 --I;
385 }
386
387 if (isIndirectBranchOpcode(I->getOpcode()) ||
388 isJumpTableBranchOpcode(I->getOpcode())) {
389 // Indirect branches and jump tables can't be analyzed, but we still want
390 // to clean up any instructions at the tail of the basic block.
391 CantAnalyze = true;
392 } else if (isUncondBranchOpcode(I->getOpcode())) {
393 TBB = I->getOperand(0).getMBB();
394 } else if (isCondBranchOpcode(I->getOpcode())) {
395 // Bail out if we encounter multiple conditional branches.
396 if (!Cond.empty())
397 return true;
398
399 assert(!FBB && "FBB should have been null.");
400 FBB = TBB;
401 TBB = I->getOperand(0).getMBB();
402 Cond.push_back(I->getOperand(1));
403 Cond.push_back(I->getOperand(2));
404 } else if (I->isReturn()) {
405 // Returns can't be analyzed, but we should run cleanup.
406 CantAnalyze = true;
407 } else if (I->getOpcode() == ARM::t2LoopEnd &&
408 MBB.getParent()
409 ->getSubtarget<ARMSubtarget>()
410 .enableMachinePipeliner()) {
411 if (!Cond.empty())
412 return true;
413 FBB = TBB;
414 TBB = I->getOperand(1).getMBB();
415 Cond.push_back(MachineOperand::CreateImm(I->getOpcode()));
416 Cond.push_back(I->getOperand(0));
417 Cond.push_back(MachineOperand::CreateImm(0));
418 } else {
419 // We encountered other unrecognized terminator. Bail out immediately.
420 return true;
421 }
422
423 // Cleanup code - to be run for unpredicated unconditional branches and
424 // returns.
425 if (!isPredicated(*I) &&
426 (isUncondBranchOpcode(I->getOpcode()) ||
427 isIndirectBranchOpcode(I->getOpcode()) ||
428 isJumpTableBranchOpcode(I->getOpcode()) ||
429 I->isReturn())) {
430 // Forget any previous condition branch information - it no longer applies.
431 Cond.clear();
432 FBB = nullptr;
433
434 // If we can modify the function, delete everything below this
435 // unconditional branch.
436 if (AllowModify) {
437 MachineBasicBlock::iterator DI = std::next(I);
438 while (DI != MBB.instr_end()) {
439 MachineInstr &InstToDelete = *DI;
440 ++DI;
441 // Speculation barriers must not be deleted.
442 if (isSpeculationBarrierEndBBOpcode(InstToDelete.getOpcode()))
443 continue;
444 InstToDelete.eraseFromParent();
445 }
446 }
447 }
448
449 if (CantAnalyze) {
450 // We may not be able to analyze the block, but we could still have
451 // an unconditional branch as the last instruction in the block, which
452 // just branches to layout successor. If this is the case, then just
453 // remove it if we're allowed to make modifications.
454 if (AllowModify && !isPredicated(MBB.back()) &&
455 isUncondBranchOpcode(MBB.back().getOpcode()) &&
456 TBB && MBB.isLayoutSuccessor(TBB))
457 removeBranch(MBB);
458 return true;
459 }
460
461 if (I == MBB.instr_begin())
462 return false;
463
464 --I;
465 }
466
467 // We made it past the terminators without bailing out - we must have
468 // analyzed this branch successfully.
469 return false;
470 }
471
removeBranch(MachineBasicBlock & MBB,int * BytesRemoved) const472 unsigned ARMBaseInstrInfo::removeBranch(MachineBasicBlock &MBB,
473 int *BytesRemoved) const {
474 assert(!BytesRemoved && "code size not handled");
475
476 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
477 if (I == MBB.end())
478 return 0;
479
480 if (!isUncondBranchOpcode(I->getOpcode()) &&
481 !isCondBranchOpcode(I->getOpcode()) && I->getOpcode() != ARM::t2LoopEnd)
482 return 0;
483
484 // Remove the branch.
485 I->eraseFromParent();
486
487 I = MBB.end();
488
489 if (I == MBB.begin()) return 1;
490 --I;
491 if (!isCondBranchOpcode(I->getOpcode()) && I->getOpcode() != ARM::t2LoopEnd)
492 return 1;
493
494 // Remove the branch.
495 I->eraseFromParent();
496 return 2;
497 }
498
insertBranch(MachineBasicBlock & MBB,MachineBasicBlock * TBB,MachineBasicBlock * FBB,ArrayRef<MachineOperand> Cond,const DebugLoc & DL,int * BytesAdded) const499 unsigned ARMBaseInstrInfo::insertBranch(MachineBasicBlock &MBB,
500 MachineBasicBlock *TBB,
501 MachineBasicBlock *FBB,
502 ArrayRef<MachineOperand> Cond,
503 const DebugLoc &DL,
504 int *BytesAdded) const {
505 assert(!BytesAdded && "code size not handled");
506 ARMFunctionInfo *AFI = MBB.getParent()->getInfo<ARMFunctionInfo>();
507 int BOpc = !AFI->isThumbFunction()
508 ? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB);
509 int BccOpc = !AFI->isThumbFunction()
510 ? ARM::Bcc : (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc);
511 bool isThumb = AFI->isThumbFunction() || AFI->isThumb2Function();
512
513 // Shouldn't be a fall through.
514 assert(TBB && "insertBranch must not be told to insert a fallthrough");
515 assert((Cond.size() == 2 || Cond.size() == 0 || Cond.size() == 3) &&
516 "ARM branch conditions have two or three components!");
517
518 // For conditional branches, we use addOperand to preserve CPSR flags.
519
520 if (!FBB) {
521 if (Cond.empty()) { // Unconditional branch?
522 if (isThumb)
523 BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB).add(predOps(ARMCC::AL));
524 else
525 BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
526 } else if (Cond.size() == 2) {
527 BuildMI(&MBB, DL, get(BccOpc))
528 .addMBB(TBB)
529 .addImm(Cond[0].getImm())
530 .add(Cond[1]);
531 } else
532 BuildMI(&MBB, DL, get(Cond[0].getImm())).add(Cond[1]).addMBB(TBB);
533 return 1;
534 }
535
536 // Two-way conditional branch.
537 if (Cond.size() == 2)
538 BuildMI(&MBB, DL, get(BccOpc))
539 .addMBB(TBB)
540 .addImm(Cond[0].getImm())
541 .add(Cond[1]);
542 else if (Cond.size() == 3)
543 BuildMI(&MBB, DL, get(Cond[0].getImm())).add(Cond[1]).addMBB(TBB);
544 if (isThumb)
545 BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).add(predOps(ARMCC::AL));
546 else
547 BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
548 return 2;
549 }
550
551 bool ARMBaseInstrInfo::
reverseBranchCondition(SmallVectorImpl<MachineOperand> & Cond) const552 reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
553 if (Cond.size() == 2) {
554 ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm();
555 Cond[0].setImm(ARMCC::getOppositeCondition(CC));
556 return false;
557 }
558 return true;
559 }
560
isPredicated(const MachineInstr & MI) const561 bool ARMBaseInstrInfo::isPredicated(const MachineInstr &MI) const {
562 if (MI.isBundle()) {
563 MachineBasicBlock::const_instr_iterator I = MI.getIterator();
564 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
565 while (++I != E && I->isInsideBundle()) {
566 int PIdx = I->findFirstPredOperandIdx();
567 if (PIdx != -1 && I->getOperand(PIdx).getImm() != ARMCC::AL)
568 return true;
569 }
570 return false;
571 }
572
573 int PIdx = MI.findFirstPredOperandIdx();
574 return PIdx != -1 && MI.getOperand(PIdx).getImm() != ARMCC::AL;
575 }
576
createMIROperandComment(const MachineInstr & MI,const MachineOperand & Op,unsigned OpIdx,const TargetRegisterInfo * TRI) const577 std::string ARMBaseInstrInfo::createMIROperandComment(
578 const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx,
579 const TargetRegisterInfo *TRI) const {
580
581 // First, let's see if there is a generic comment for this operand
582 std::string GenericComment =
583 TargetInstrInfo::createMIROperandComment(MI, Op, OpIdx, TRI);
584 if (!GenericComment.empty())
585 return GenericComment;
586
587 // If not, check if we have an immediate operand.
588 if (!Op.isImm())
589 return std::string();
590
591 // And print its corresponding condition code if the immediate is a
592 // predicate.
593 int FirstPredOp = MI.findFirstPredOperandIdx();
594 if (FirstPredOp != (int) OpIdx)
595 return std::string();
596
597 std::string CC = "CC::";
598 CC += ARMCondCodeToString((ARMCC::CondCodes)Op.getImm());
599 return CC;
600 }
601
PredicateInstruction(MachineInstr & MI,ArrayRef<MachineOperand> Pred) const602 bool ARMBaseInstrInfo::PredicateInstruction(
603 MachineInstr &MI, ArrayRef<MachineOperand> Pred) const {
604 unsigned Opc = MI.getOpcode();
605 if (isUncondBranchOpcode(Opc)) {
606 MI.setDesc(get(getMatchingCondBranchOpcode(Opc)));
607 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
608 .addImm(Pred[0].getImm())
609 .addReg(Pred[1].getReg());
610 return true;
611 }
612
613 int PIdx = MI.findFirstPredOperandIdx();
614 if (PIdx != -1) {
615 MachineOperand &PMO = MI.getOperand(PIdx);
616 PMO.setImm(Pred[0].getImm());
617 MI.getOperand(PIdx+1).setReg(Pred[1].getReg());
618
619 // Thumb 1 arithmetic instructions do not set CPSR when executed inside an
620 // IT block. This affects how they are printed.
621 const MCInstrDesc &MCID = MI.getDesc();
622 if (MCID.TSFlags & ARMII::ThumbArithFlagSetting) {
623 assert(MCID.operands()[1].isOptionalDef() &&
624 "CPSR def isn't expected operand");
625 assert((MI.getOperand(1).isDead() ||
626 MI.getOperand(1).getReg() != ARM::CPSR) &&
627 "if conversion tried to stop defining used CPSR");
628 MI.getOperand(1).setReg(ARM::NoRegister);
629 }
630
631 return true;
632 }
633 return false;
634 }
635
SubsumesPredicate(ArrayRef<MachineOperand> Pred1,ArrayRef<MachineOperand> Pred2) const636 bool ARMBaseInstrInfo::SubsumesPredicate(ArrayRef<MachineOperand> Pred1,
637 ArrayRef<MachineOperand> Pred2) const {
638 if (Pred1.size() > 2 || Pred2.size() > 2)
639 return false;
640
641 ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm();
642 ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm();
643 if (CC1 == CC2)
644 return true;
645
646 switch (CC1) {
647 default:
648 return false;
649 case ARMCC::AL:
650 return true;
651 case ARMCC::HS:
652 return CC2 == ARMCC::HI;
653 case ARMCC::LS:
654 return CC2 == ARMCC::LO || CC2 == ARMCC::EQ;
655 case ARMCC::GE:
656 return CC2 == ARMCC::GT;
657 case ARMCC::LE:
658 return CC2 == ARMCC::LT;
659 }
660 }
661
ClobbersPredicate(MachineInstr & MI,std::vector<MachineOperand> & Pred,bool SkipDead) const662 bool ARMBaseInstrInfo::ClobbersPredicate(MachineInstr &MI,
663 std::vector<MachineOperand> &Pred,
664 bool SkipDead) const {
665 bool Found = false;
666 for (const MachineOperand &MO : MI.operands()) {
667 bool ClobbersCPSR = MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR);
668 bool IsCPSR = MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR;
669 if (ClobbersCPSR || IsCPSR) {
670
671 // Filter out T1 instructions that have a dead CPSR,
672 // allowing IT blocks to be generated containing T1 instructions
673 const MCInstrDesc &MCID = MI.getDesc();
674 if (MCID.TSFlags & ARMII::ThumbArithFlagSetting && MO.isDead() &&
675 SkipDead)
676 continue;
677
678 Pred.push_back(MO);
679 Found = true;
680 }
681 }
682
683 return Found;
684 }
685
isCPSRDefined(const MachineInstr & MI)686 bool ARMBaseInstrInfo::isCPSRDefined(const MachineInstr &MI) {
687 for (const auto &MO : MI.operands())
688 if (MO.isReg() && MO.getReg() == ARM::CPSR && MO.isDef() && !MO.isDead())
689 return true;
690 return false;
691 }
692
isEligibleForITBlock(const MachineInstr * MI)693 static bool isEligibleForITBlock(const MachineInstr *MI) {
694 switch (MI->getOpcode()) {
695 default: return true;
696 case ARM::tADC: // ADC (register) T1
697 case ARM::tADDi3: // ADD (immediate) T1
698 case ARM::tADDi8: // ADD (immediate) T2
699 case ARM::tADDrr: // ADD (register) T1
700 case ARM::tAND: // AND (register) T1
701 case ARM::tASRri: // ASR (immediate) T1
702 case ARM::tASRrr: // ASR (register) T1
703 case ARM::tBIC: // BIC (register) T1
704 case ARM::tEOR: // EOR (register) T1
705 case ARM::tLSLri: // LSL (immediate) T1
706 case ARM::tLSLrr: // LSL (register) T1
707 case ARM::tLSRri: // LSR (immediate) T1
708 case ARM::tLSRrr: // LSR (register) T1
709 case ARM::tMUL: // MUL T1
710 case ARM::tMVN: // MVN (register) T1
711 case ARM::tORR: // ORR (register) T1
712 case ARM::tROR: // ROR (register) T1
713 case ARM::tRSB: // RSB (immediate) T1
714 case ARM::tSBC: // SBC (register) T1
715 case ARM::tSUBi3: // SUB (immediate) T1
716 case ARM::tSUBi8: // SUB (immediate) T2
717 case ARM::tSUBrr: // SUB (register) T1
718 return !ARMBaseInstrInfo::isCPSRDefined(*MI);
719 }
720 }
721
722 /// isPredicable - Return true if the specified instruction can be predicated.
723 /// By default, this returns true for every instruction with a
724 /// PredicateOperand.
isPredicable(const MachineInstr & MI) const725 bool ARMBaseInstrInfo::isPredicable(const MachineInstr &MI) const {
726 if (!MI.isPredicable())
727 return false;
728
729 if (MI.isBundle())
730 return false;
731
732 if (!isEligibleForITBlock(&MI))
733 return false;
734
735 const MachineFunction *MF = MI.getParent()->getParent();
736 const ARMFunctionInfo *AFI =
737 MF->getInfo<ARMFunctionInfo>();
738
739 // Neon instructions in Thumb2 IT blocks are deprecated, see ARMARM.
740 // In their ARM encoding, they can't be encoded in a conditional form.
741 if ((MI.getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON)
742 return false;
743
744 // Make indirect control flow changes unpredicable when SLS mitigation is
745 // enabled.
746 const ARMSubtarget &ST = MF->getSubtarget<ARMSubtarget>();
747 if (ST.hardenSlsRetBr() && isIndirectControlFlowNotComingBack(MI))
748 return false;
749 if (ST.hardenSlsBlr() && isIndirectCall(MI))
750 return false;
751
752 if (AFI->isThumb2Function()) {
753 if (getSubtarget().restrictIT())
754 return isV8EligibleForIT(&MI);
755 }
756
757 return true;
758 }
759
760 namespace llvm {
761
IsCPSRDead(const MachineInstr * MI)762 template <> bool IsCPSRDead<MachineInstr>(const MachineInstr *MI) {
763 for (const MachineOperand &MO : MI->operands()) {
764 if (!MO.isReg() || MO.isUndef() || MO.isUse())
765 continue;
766 if (MO.getReg() != ARM::CPSR)
767 continue;
768 if (!MO.isDead())
769 return false;
770 }
771 // all definitions of CPSR are dead
772 return true;
773 }
774
775 } // end namespace llvm
776
777 /// GetInstSize - Return the size of the specified MachineInstr.
778 ///
getInstSizeInBytes(const MachineInstr & MI) const779 unsigned ARMBaseInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
780 const MachineBasicBlock &MBB = *MI.getParent();
781 const MachineFunction *MF = MBB.getParent();
782 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
783
784 const MCInstrDesc &MCID = MI.getDesc();
785
786 switch (MI.getOpcode()) {
787 default:
788 // Return the size specified in .td file. If there's none, return 0, as we
789 // can't define a default size (Thumb1 instructions are 2 bytes, Thumb2
790 // instructions are 2-4 bytes, and ARM instructions are 4 bytes), in
791 // contrast to AArch64 instructions which have a default size of 4 bytes for
792 // example.
793 return MCID.getSize();
794 case TargetOpcode::BUNDLE:
795 return getInstBundleLength(MI);
796 case ARM::CONSTPOOL_ENTRY:
797 case ARM::JUMPTABLE_INSTS:
798 case ARM::JUMPTABLE_ADDRS:
799 case ARM::JUMPTABLE_TBB:
800 case ARM::JUMPTABLE_TBH:
801 // If this machine instr is a constant pool entry, its size is recorded as
802 // operand #2.
803 return MI.getOperand(2).getImm();
804 case ARM::SPACE:
805 return MI.getOperand(1).getImm();
806 case ARM::INLINEASM:
807 case ARM::INLINEASM_BR: {
808 // If this machine instr is an inline asm, measure it.
809 unsigned Size = getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
810 if (!MF->getInfo<ARMFunctionInfo>()->isThumbFunction())
811 Size = alignTo(Size, 4);
812 return Size;
813 }
814 }
815 }
816
getInstBundleLength(const MachineInstr & MI) const817 unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr &MI) const {
818 unsigned Size = 0;
819 MachineBasicBlock::const_instr_iterator I = MI.getIterator();
820 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
821 while (++I != E && I->isInsideBundle()) {
822 assert(!I->isBundle() && "No nested bundle!");
823 Size += getInstSizeInBytes(*I);
824 }
825 return Size;
826 }
827
copyFromCPSR(MachineBasicBlock & MBB,MachineBasicBlock::iterator I,unsigned DestReg,bool KillSrc,const ARMSubtarget & Subtarget) const828 void ARMBaseInstrInfo::copyFromCPSR(MachineBasicBlock &MBB,
829 MachineBasicBlock::iterator I,
830 unsigned DestReg, bool KillSrc,
831 const ARMSubtarget &Subtarget) const {
832 unsigned Opc = Subtarget.isThumb()
833 ? (Subtarget.isMClass() ? ARM::t2MRS_M : ARM::t2MRS_AR)
834 : ARM::MRS;
835
836 MachineInstrBuilder MIB =
837 BuildMI(MBB, I, I->getDebugLoc(), get(Opc), DestReg);
838
839 // There is only 1 A/R class MRS instruction, and it always refers to
840 // APSR. However, there are lots of other possibilities on M-class cores.
841 if (Subtarget.isMClass())
842 MIB.addImm(0x800);
843
844 MIB.add(predOps(ARMCC::AL))
845 .addReg(ARM::CPSR, RegState::Implicit | getKillRegState(KillSrc));
846 }
847
copyToCPSR(MachineBasicBlock & MBB,MachineBasicBlock::iterator I,unsigned SrcReg,bool KillSrc,const ARMSubtarget & Subtarget) const848 void ARMBaseInstrInfo::copyToCPSR(MachineBasicBlock &MBB,
849 MachineBasicBlock::iterator I,
850 unsigned SrcReg, bool KillSrc,
851 const ARMSubtarget &Subtarget) const {
852 unsigned Opc = Subtarget.isThumb()
853 ? (Subtarget.isMClass() ? ARM::t2MSR_M : ARM::t2MSR_AR)
854 : ARM::MSR;
855
856 MachineInstrBuilder MIB = BuildMI(MBB, I, I->getDebugLoc(), get(Opc));
857
858 if (Subtarget.isMClass())
859 MIB.addImm(0x800);
860 else
861 MIB.addImm(8);
862
863 MIB.addReg(SrcReg, getKillRegState(KillSrc))
864 .add(predOps(ARMCC::AL))
865 .addReg(ARM::CPSR, RegState::Implicit | RegState::Define);
866 }
867
addUnpredicatedMveVpredNOp(MachineInstrBuilder & MIB)868 void llvm::addUnpredicatedMveVpredNOp(MachineInstrBuilder &MIB) {
869 MIB.addImm(ARMVCC::None);
870 MIB.addReg(0);
871 MIB.addReg(0); // tp_reg
872 }
873
addUnpredicatedMveVpredROp(MachineInstrBuilder & MIB,Register DestReg)874 void llvm::addUnpredicatedMveVpredROp(MachineInstrBuilder &MIB,
875 Register DestReg) {
876 addUnpredicatedMveVpredNOp(MIB);
877 MIB.addReg(DestReg, RegState::Undef);
878 }
879
addPredicatedMveVpredNOp(MachineInstrBuilder & MIB,unsigned Cond)880 void llvm::addPredicatedMveVpredNOp(MachineInstrBuilder &MIB, unsigned Cond) {
881 MIB.addImm(Cond);
882 MIB.addReg(ARM::VPR, RegState::Implicit);
883 MIB.addReg(0); // tp_reg
884 }
885
addPredicatedMveVpredROp(MachineInstrBuilder & MIB,unsigned Cond,unsigned Inactive)886 void llvm::addPredicatedMveVpredROp(MachineInstrBuilder &MIB,
887 unsigned Cond, unsigned Inactive) {
888 addPredicatedMveVpredNOp(MIB, Cond);
889 MIB.addReg(Inactive);
890 }
891
copyPhysReg(MachineBasicBlock & MBB,MachineBasicBlock::iterator I,const DebugLoc & DL,MCRegister DestReg,MCRegister SrcReg,bool KillSrc) const892 void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
893 MachineBasicBlock::iterator I,
894 const DebugLoc &DL, MCRegister DestReg,
895 MCRegister SrcReg, bool KillSrc) const {
896 bool GPRDest = ARM::GPRRegClass.contains(DestReg);
897 bool GPRSrc = ARM::GPRRegClass.contains(SrcReg);
898
899 if (GPRDest && GPRSrc) {
900 BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg)
901 .addReg(SrcReg, getKillRegState(KillSrc))
902 .add(predOps(ARMCC::AL))
903 .add(condCodeOp());
904 return;
905 }
906
907 bool SPRDest = ARM::SPRRegClass.contains(DestReg);
908 bool SPRSrc = ARM::SPRRegClass.contains(SrcReg);
909
910 unsigned Opc = 0;
911 if (SPRDest && SPRSrc)
912 Opc = ARM::VMOVS;
913 else if (GPRDest && SPRSrc)
914 Opc = ARM::VMOVRS;
915 else if (SPRDest && GPRSrc)
916 Opc = ARM::VMOVSR;
917 else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && Subtarget.hasFP64())
918 Opc = ARM::VMOVD;
919 else if (ARM::QPRRegClass.contains(DestReg, SrcReg))
920 Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MQPRCopy;
921
922 if (Opc) {
923 MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg);
924 MIB.addReg(SrcReg, getKillRegState(KillSrc));
925 if (Opc == ARM::VORRq || Opc == ARM::MVE_VORR)
926 MIB.addReg(SrcReg, getKillRegState(KillSrc));
927 if (Opc == ARM::MVE_VORR)
928 addUnpredicatedMveVpredROp(MIB, DestReg);
929 else if (Opc != ARM::MQPRCopy)
930 MIB.add(predOps(ARMCC::AL));
931 return;
932 }
933
934 // Handle register classes that require multiple instructions.
935 unsigned BeginIdx = 0;
936 unsigned SubRegs = 0;
937 int Spacing = 1;
938
939 // Use VORRq when possible.
940 if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) {
941 Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR;
942 BeginIdx = ARM::qsub_0;
943 SubRegs = 2;
944 } else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) {
945 Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR;
946 BeginIdx = ARM::qsub_0;
947 SubRegs = 4;
948 // Fall back to VMOVD.
949 } else if (ARM::DPairRegClass.contains(DestReg, SrcReg)) {
950 Opc = ARM::VMOVD;
951 BeginIdx = ARM::dsub_0;
952 SubRegs = 2;
953 } else if (ARM::DTripleRegClass.contains(DestReg, SrcReg)) {
954 Opc = ARM::VMOVD;
955 BeginIdx = ARM::dsub_0;
956 SubRegs = 3;
957 } else if (ARM::DQuadRegClass.contains(DestReg, SrcReg)) {
958 Opc = ARM::VMOVD;
959 BeginIdx = ARM::dsub_0;
960 SubRegs = 4;
961 } else if (ARM::GPRPairRegClass.contains(DestReg, SrcReg)) {
962 Opc = Subtarget.isThumb2() ? ARM::tMOVr : ARM::MOVr;
963 BeginIdx = ARM::gsub_0;
964 SubRegs = 2;
965 } else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg)) {
966 Opc = ARM::VMOVD;
967 BeginIdx = ARM::dsub_0;
968 SubRegs = 2;
969 Spacing = 2;
970 } else if (ARM::DTripleSpcRegClass.contains(DestReg, SrcReg)) {
971 Opc = ARM::VMOVD;
972 BeginIdx = ARM::dsub_0;
973 SubRegs = 3;
974 Spacing = 2;
975 } else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg)) {
976 Opc = ARM::VMOVD;
977 BeginIdx = ARM::dsub_0;
978 SubRegs = 4;
979 Spacing = 2;
980 } else if (ARM::DPRRegClass.contains(DestReg, SrcReg) &&
981 !Subtarget.hasFP64()) {
982 Opc = ARM::VMOVS;
983 BeginIdx = ARM::ssub_0;
984 SubRegs = 2;
985 } else if (SrcReg == ARM::CPSR) {
986 copyFromCPSR(MBB, I, DestReg, KillSrc, Subtarget);
987 return;
988 } else if (DestReg == ARM::CPSR) {
989 copyToCPSR(MBB, I, SrcReg, KillSrc, Subtarget);
990 return;
991 } else if (DestReg == ARM::VPR) {
992 assert(ARM::GPRRegClass.contains(SrcReg));
993 BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMSR_P0), DestReg)
994 .addReg(SrcReg, getKillRegState(KillSrc))
995 .add(predOps(ARMCC::AL));
996 return;
997 } else if (SrcReg == ARM::VPR) {
998 assert(ARM::GPRRegClass.contains(DestReg));
999 BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMRS_P0), DestReg)
1000 .addReg(SrcReg, getKillRegState(KillSrc))
1001 .add(predOps(ARMCC::AL));
1002 return;
1003 } else if (DestReg == ARM::FPSCR_NZCV) {
1004 assert(ARM::GPRRegClass.contains(SrcReg));
1005 BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMSR_FPSCR_NZCVQC), DestReg)
1006 .addReg(SrcReg, getKillRegState(KillSrc))
1007 .add(predOps(ARMCC::AL));
1008 return;
1009 } else if (SrcReg == ARM::FPSCR_NZCV) {
1010 assert(ARM::GPRRegClass.contains(DestReg));
1011 BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMRS_FPSCR_NZCVQC), DestReg)
1012 .addReg(SrcReg, getKillRegState(KillSrc))
1013 .add(predOps(ARMCC::AL));
1014 return;
1015 }
1016
1017 assert(Opc && "Impossible reg-to-reg copy");
1018
1019 const TargetRegisterInfo *TRI = &getRegisterInfo();
1020 MachineInstrBuilder Mov;
1021
1022 // Copy register tuples backward when the first Dest reg overlaps with SrcReg.
1023 if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) {
1024 BeginIdx = BeginIdx + ((SubRegs - 1) * Spacing);
1025 Spacing = -Spacing;
1026 }
1027 #ifndef NDEBUG
1028 SmallSet<unsigned, 4> DstRegs;
1029 #endif
1030 for (unsigned i = 0; i != SubRegs; ++i) {
1031 Register Dst = TRI->getSubReg(DestReg, BeginIdx + i * Spacing);
1032 Register Src = TRI->getSubReg(SrcReg, BeginIdx + i * Spacing);
1033 assert(Dst && Src && "Bad sub-register");
1034 #ifndef NDEBUG
1035 assert(!DstRegs.count(Src) && "destructive vector copy");
1036 DstRegs.insert(Dst);
1037 #endif
1038 Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst).addReg(Src);
1039 // VORR (NEON or MVE) takes two source operands.
1040 if (Opc == ARM::VORRq || Opc == ARM::MVE_VORR) {
1041 Mov.addReg(Src);
1042 }
1043 // MVE VORR takes predicate operands in place of an ordinary condition.
1044 if (Opc == ARM::MVE_VORR)
1045 addUnpredicatedMveVpredROp(Mov, Dst);
1046 else
1047 Mov = Mov.add(predOps(ARMCC::AL));
1048 // MOVr can set CC.
1049 if (Opc == ARM::MOVr)
1050 Mov = Mov.add(condCodeOp());
1051 }
1052 // Add implicit super-register defs and kills to the last instruction.
1053 Mov->addRegisterDefined(DestReg, TRI);
1054 if (KillSrc)
1055 Mov->addRegisterKilled(SrcReg, TRI);
1056 }
1057
1058 std::optional<DestSourcePair>
isCopyInstrImpl(const MachineInstr & MI) const1059 ARMBaseInstrInfo::isCopyInstrImpl(const MachineInstr &MI) const {
1060 // VMOVRRD is also a copy instruction but it requires
1061 // special way of handling. It is more complex copy version
1062 // and since that we are not considering it. For recognition
1063 // of such instruction isExtractSubregLike MI interface fuction
1064 // could be used.
1065 // VORRq is considered as a move only if two inputs are
1066 // the same register.
1067 if (!MI.isMoveReg() ||
1068 (MI.getOpcode() == ARM::VORRq &&
1069 MI.getOperand(1).getReg() != MI.getOperand(2).getReg()))
1070 return std::nullopt;
1071 return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};
1072 }
1073
1074 std::optional<ParamLoadedValue>
describeLoadedValue(const MachineInstr & MI,Register Reg) const1075 ARMBaseInstrInfo::describeLoadedValue(const MachineInstr &MI,
1076 Register Reg) const {
1077 if (auto DstSrcPair = isCopyInstrImpl(MI)) {
1078 Register DstReg = DstSrcPair->Destination->getReg();
1079
1080 // TODO: We don't handle cases where the forwarding reg is narrower/wider
1081 // than the copy registers. Consider for example:
1082 //
1083 // s16 = VMOVS s0
1084 // s17 = VMOVS s1
1085 // call @callee(d0)
1086 //
1087 // We'd like to describe the call site value of d0 as d8, but this requires
1088 // gathering and merging the descriptions for the two VMOVS instructions.
1089 //
1090 // We also don't handle the reverse situation, where the forwarding reg is
1091 // narrower than the copy destination:
1092 //
1093 // d8 = VMOVD d0
1094 // call @callee(s1)
1095 //
1096 // We need to produce a fragment description (the call site value of s1 is
1097 // /not/ just d8).
1098 if (DstReg != Reg)
1099 return std::nullopt;
1100 }
1101 return TargetInstrInfo::describeLoadedValue(MI, Reg);
1102 }
1103
1104 const MachineInstrBuilder &
AddDReg(MachineInstrBuilder & MIB,unsigned Reg,unsigned SubIdx,unsigned State,const TargetRegisterInfo * TRI) const1105 ARMBaseInstrInfo::AddDReg(MachineInstrBuilder &MIB, unsigned Reg,
1106 unsigned SubIdx, unsigned State,
1107 const TargetRegisterInfo *TRI) const {
1108 if (!SubIdx)
1109 return MIB.addReg(Reg, State);
1110
1111 if (Register::isPhysicalRegister(Reg))
1112 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
1113 return MIB.addReg(Reg, State, SubIdx);
1114 }
1115
storeRegToStackSlot(MachineBasicBlock & MBB,MachineBasicBlock::iterator I,Register SrcReg,bool isKill,int FI,const TargetRegisterClass * RC,const TargetRegisterInfo * TRI,Register VReg) const1116 void ARMBaseInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
1117 MachineBasicBlock::iterator I,
1118 Register SrcReg, bool isKill, int FI,
1119 const TargetRegisterClass *RC,
1120 const TargetRegisterInfo *TRI,
1121 Register VReg) const {
1122 MachineFunction &MF = *MBB.getParent();
1123 MachineFrameInfo &MFI = MF.getFrameInfo();
1124 Align Alignment = MFI.getObjectAlign(FI);
1125
1126 MachineMemOperand *MMO = MF.getMachineMemOperand(
1127 MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore,
1128 MFI.getObjectSize(FI), Alignment);
1129
1130 switch (TRI->getSpillSize(*RC)) {
1131 case 2:
1132 if (ARM::HPRRegClass.hasSubClassEq(RC)) {
1133 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRH))
1134 .addReg(SrcReg, getKillRegState(isKill))
1135 .addFrameIndex(FI)
1136 .addImm(0)
1137 .addMemOperand(MMO)
1138 .add(predOps(ARMCC::AL));
1139 } else
1140 llvm_unreachable("Unknown reg class!");
1141 break;
1142 case 4:
1143 if (ARM::GPRRegClass.hasSubClassEq(RC)) {
1144 BuildMI(MBB, I, DebugLoc(), get(ARM::STRi12))
1145 .addReg(SrcReg, getKillRegState(isKill))
1146 .addFrameIndex(FI)
1147 .addImm(0)
1148 .addMemOperand(MMO)
1149 .add(predOps(ARMCC::AL));
1150 } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
1151 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRS))
1152 .addReg(SrcReg, getKillRegState(isKill))
1153 .addFrameIndex(FI)
1154 .addImm(0)
1155 .addMemOperand(MMO)
1156 .add(predOps(ARMCC::AL));
1157 } else if (ARM::VCCRRegClass.hasSubClassEq(RC)) {
1158 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTR_P0_off))
1159 .addReg(SrcReg, getKillRegState(isKill))
1160 .addFrameIndex(FI)
1161 .addImm(0)
1162 .addMemOperand(MMO)
1163 .add(predOps(ARMCC::AL));
1164 } else
1165 llvm_unreachable("Unknown reg class!");
1166 break;
1167 case 8:
1168 if (ARM::DPRRegClass.hasSubClassEq(RC)) {
1169 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRD))
1170 .addReg(SrcReg, getKillRegState(isKill))
1171 .addFrameIndex(FI)
1172 .addImm(0)
1173 .addMemOperand(MMO)
1174 .add(predOps(ARMCC::AL));
1175 } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
1176 if (Subtarget.hasV5TEOps()) {
1177 MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::STRD));
1178 AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
1179 AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
1180 MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)
1181 .add(predOps(ARMCC::AL));
1182 } else {
1183 // Fallback to STM instruction, which has existed since the dawn of
1184 // time.
1185 MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::STMIA))
1186 .addFrameIndex(FI)
1187 .addMemOperand(MMO)
1188 .add(predOps(ARMCC::AL));
1189 AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
1190 AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
1191 }
1192 } else
1193 llvm_unreachable("Unknown reg class!");
1194 break;
1195 case 16:
1196 if (ARM::DPairRegClass.hasSubClassEq(RC) && Subtarget.hasNEON()) {
1197 // Use aligned spills if the stack can be realigned.
1198 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF)) {
1199 BuildMI(MBB, I, DebugLoc(), get(ARM::VST1q64))
1200 .addFrameIndex(FI)
1201 .addImm(16)
1202 .addReg(SrcReg, getKillRegState(isKill))
1203 .addMemOperand(MMO)
1204 .add(predOps(ARMCC::AL));
1205 } else {
1206 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTMQIA))
1207 .addReg(SrcReg, getKillRegState(isKill))
1208 .addFrameIndex(FI)
1209 .addMemOperand(MMO)
1210 .add(predOps(ARMCC::AL));
1211 }
1212 } else if (ARM::QPRRegClass.hasSubClassEq(RC) &&
1213 Subtarget.hasMVEIntegerOps()) {
1214 auto MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::MVE_VSTRWU32));
1215 MIB.addReg(SrcReg, getKillRegState(isKill))
1216 .addFrameIndex(FI)
1217 .addImm(0)
1218 .addMemOperand(MMO);
1219 addUnpredicatedMveVpredNOp(MIB);
1220 } else
1221 llvm_unreachable("Unknown reg class!");
1222 break;
1223 case 24:
1224 if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
1225 // Use aligned spills if the stack can be realigned.
1226 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
1227 Subtarget.hasNEON()) {
1228 BuildMI(MBB, I, DebugLoc(), get(ARM::VST1d64TPseudo))
1229 .addFrameIndex(FI)
1230 .addImm(16)
1231 .addReg(SrcReg, getKillRegState(isKill))
1232 .addMemOperand(MMO)
1233 .add(predOps(ARMCC::AL));
1234 } else {
1235 MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(),
1236 get(ARM::VSTMDIA))
1237 .addFrameIndex(FI)
1238 .add(predOps(ARMCC::AL))
1239 .addMemOperand(MMO);
1240 MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
1241 MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
1242 AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
1243 }
1244 } else
1245 llvm_unreachable("Unknown reg class!");
1246 break;
1247 case 32:
1248 if (ARM::QQPRRegClass.hasSubClassEq(RC) ||
1249 ARM::MQQPRRegClass.hasSubClassEq(RC) ||
1250 ARM::DQuadRegClass.hasSubClassEq(RC)) {
1251 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
1252 Subtarget.hasNEON()) {
1253 // FIXME: It's possible to only store part of the QQ register if the
1254 // spilled def has a sub-register index.
1255 BuildMI(MBB, I, DebugLoc(), get(ARM::VST1d64QPseudo))
1256 .addFrameIndex(FI)
1257 .addImm(16)
1258 .addReg(SrcReg, getKillRegState(isKill))
1259 .addMemOperand(MMO)
1260 .add(predOps(ARMCC::AL));
1261 } else if (Subtarget.hasMVEIntegerOps()) {
1262 BuildMI(MBB, I, DebugLoc(), get(ARM::MQQPRStore))
1263 .addReg(SrcReg, getKillRegState(isKill))
1264 .addFrameIndex(FI)
1265 .addMemOperand(MMO);
1266 } else {
1267 MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(),
1268 get(ARM::VSTMDIA))
1269 .addFrameIndex(FI)
1270 .add(predOps(ARMCC::AL))
1271 .addMemOperand(MMO);
1272 MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
1273 MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
1274 MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
1275 AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
1276 }
1277 } else
1278 llvm_unreachable("Unknown reg class!");
1279 break;
1280 case 64:
1281 if (ARM::MQQQQPRRegClass.hasSubClassEq(RC) &&
1282 Subtarget.hasMVEIntegerOps()) {
1283 BuildMI(MBB, I, DebugLoc(), get(ARM::MQQQQPRStore))
1284 .addReg(SrcReg, getKillRegState(isKill))
1285 .addFrameIndex(FI)
1286 .addMemOperand(MMO);
1287 } else if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
1288 MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::VSTMDIA))
1289 .addFrameIndex(FI)
1290 .add(predOps(ARMCC::AL))
1291 .addMemOperand(MMO);
1292 MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
1293 MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
1294 MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
1295 MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
1296 MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI);
1297 MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI);
1298 MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI);
1299 AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI);
1300 } else
1301 llvm_unreachable("Unknown reg class!");
1302 break;
1303 default:
1304 llvm_unreachable("Unknown reg class!");
1305 }
1306 }
1307
isStoreToStackSlot(const MachineInstr & MI,int & FrameIndex) const1308 Register ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
1309 int &FrameIndex) const {
1310 switch (MI.getOpcode()) {
1311 default: break;
1312 case ARM::STRrs:
1313 case ARM::t2STRs: // FIXME: don't use t2STRs to access frame.
1314 if (MI.getOperand(1).isFI() && MI.getOperand(2).isReg() &&
1315 MI.getOperand(3).isImm() && MI.getOperand(2).getReg() == 0 &&
1316 MI.getOperand(3).getImm() == 0) {
1317 FrameIndex = MI.getOperand(1).getIndex();
1318 return MI.getOperand(0).getReg();
1319 }
1320 break;
1321 case ARM::STRi12:
1322 case ARM::t2STRi12:
1323 case ARM::tSTRspi:
1324 case ARM::VSTRD:
1325 case ARM::VSTRS:
1326 case ARM::VSTR_P0_off:
1327 case ARM::MVE_VSTRWU32:
1328 if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
1329 MI.getOperand(2).getImm() == 0) {
1330 FrameIndex = MI.getOperand(1).getIndex();
1331 return MI.getOperand(0).getReg();
1332 }
1333 break;
1334 case ARM::VST1q64:
1335 case ARM::VST1d64TPseudo:
1336 case ARM::VST1d64QPseudo:
1337 if (MI.getOperand(0).isFI() && MI.getOperand(2).getSubReg() == 0) {
1338 FrameIndex = MI.getOperand(0).getIndex();
1339 return MI.getOperand(2).getReg();
1340 }
1341 break;
1342 case ARM::VSTMQIA:
1343 if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
1344 FrameIndex = MI.getOperand(1).getIndex();
1345 return MI.getOperand(0).getReg();
1346 }
1347 break;
1348 case ARM::MQQPRStore:
1349 case ARM::MQQQQPRStore:
1350 if (MI.getOperand(1).isFI()) {
1351 FrameIndex = MI.getOperand(1).getIndex();
1352 return MI.getOperand(0).getReg();
1353 }
1354 break;
1355 }
1356
1357 return 0;
1358 }
1359
isStoreToStackSlotPostFE(const MachineInstr & MI,int & FrameIndex) const1360 Register ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr &MI,
1361 int &FrameIndex) const {
1362 SmallVector<const MachineMemOperand *, 1> Accesses;
1363 if (MI.mayStore() && hasStoreToStackSlot(MI, Accesses) &&
1364 Accesses.size() == 1) {
1365 FrameIndex =
1366 cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
1367 ->getFrameIndex();
1368 return true;
1369 }
1370 return false;
1371 }
1372
loadRegFromStackSlot(MachineBasicBlock & MBB,MachineBasicBlock::iterator I,Register DestReg,int FI,const TargetRegisterClass * RC,const TargetRegisterInfo * TRI,Register VReg) const1373 void ARMBaseInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
1374 MachineBasicBlock::iterator I,
1375 Register DestReg, int FI,
1376 const TargetRegisterClass *RC,
1377 const TargetRegisterInfo *TRI,
1378 Register VReg) const {
1379 DebugLoc DL;
1380 if (I != MBB.end()) DL = I->getDebugLoc();
1381 MachineFunction &MF = *MBB.getParent();
1382 MachineFrameInfo &MFI = MF.getFrameInfo();
1383 const Align Alignment = MFI.getObjectAlign(FI);
1384 MachineMemOperand *MMO = MF.getMachineMemOperand(
1385 MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad,
1386 MFI.getObjectSize(FI), Alignment);
1387
1388 switch (TRI->getSpillSize(*RC)) {
1389 case 2:
1390 if (ARM::HPRRegClass.hasSubClassEq(RC)) {
1391 BuildMI(MBB, I, DL, get(ARM::VLDRH), DestReg)
1392 .addFrameIndex(FI)
1393 .addImm(0)
1394 .addMemOperand(MMO)
1395 .add(predOps(ARMCC::AL));
1396 } else
1397 llvm_unreachable("Unknown reg class!");
1398 break;
1399 case 4:
1400 if (ARM::GPRRegClass.hasSubClassEq(RC)) {
1401 BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg)
1402 .addFrameIndex(FI)
1403 .addImm(0)
1404 .addMemOperand(MMO)
1405 .add(predOps(ARMCC::AL));
1406 } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
1407 BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg)
1408 .addFrameIndex(FI)
1409 .addImm(0)
1410 .addMemOperand(MMO)
1411 .add(predOps(ARMCC::AL));
1412 } else if (ARM::VCCRRegClass.hasSubClassEq(RC)) {
1413 BuildMI(MBB, I, DL, get(ARM::VLDR_P0_off), DestReg)
1414 .addFrameIndex(FI)
1415 .addImm(0)
1416 .addMemOperand(MMO)
1417 .add(predOps(ARMCC::AL));
1418 } else
1419 llvm_unreachable("Unknown reg class!");
1420 break;
1421 case 8:
1422 if (ARM::DPRRegClass.hasSubClassEq(RC)) {
1423 BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
1424 .addFrameIndex(FI)
1425 .addImm(0)
1426 .addMemOperand(MMO)
1427 .add(predOps(ARMCC::AL));
1428 } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
1429 MachineInstrBuilder MIB;
1430
1431 if (Subtarget.hasV5TEOps()) {
1432 MIB = BuildMI(MBB, I, DL, get(ARM::LDRD));
1433 AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
1434 AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
1435 MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)
1436 .add(predOps(ARMCC::AL));
1437 } else {
1438 // Fallback to LDM instruction, which has existed since the dawn of
1439 // time.
1440 MIB = BuildMI(MBB, I, DL, get(ARM::LDMIA))
1441 .addFrameIndex(FI)
1442 .addMemOperand(MMO)
1443 .add(predOps(ARMCC::AL));
1444 MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
1445 MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
1446 }
1447
1448 if (DestReg.isPhysical())
1449 MIB.addReg(DestReg, RegState::ImplicitDefine);
1450 } else
1451 llvm_unreachable("Unknown reg class!");
1452 break;
1453 case 16:
1454 if (ARM::DPairRegClass.hasSubClassEq(RC) && Subtarget.hasNEON()) {
1455 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF)) {
1456 BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg)
1457 .addFrameIndex(FI)
1458 .addImm(16)
1459 .addMemOperand(MMO)
1460 .add(predOps(ARMCC::AL));
1461 } else {
1462 BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg)
1463 .addFrameIndex(FI)
1464 .addMemOperand(MMO)
1465 .add(predOps(ARMCC::AL));
1466 }
1467 } else if (ARM::QPRRegClass.hasSubClassEq(RC) &&
1468 Subtarget.hasMVEIntegerOps()) {
1469 auto MIB = BuildMI(MBB, I, DL, get(ARM::MVE_VLDRWU32), DestReg);
1470 MIB.addFrameIndex(FI)
1471 .addImm(0)
1472 .addMemOperand(MMO);
1473 addUnpredicatedMveVpredNOp(MIB);
1474 } else
1475 llvm_unreachable("Unknown reg class!");
1476 break;
1477 case 24:
1478 if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
1479 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
1480 Subtarget.hasNEON()) {
1481 BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg)
1482 .addFrameIndex(FI)
1483 .addImm(16)
1484 .addMemOperand(MMO)
1485 .add(predOps(ARMCC::AL));
1486 } else {
1487 MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1488 .addFrameIndex(FI)
1489 .addMemOperand(MMO)
1490 .add(predOps(ARMCC::AL));
1491 MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
1492 MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
1493 MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
1494 if (DestReg.isPhysical())
1495 MIB.addReg(DestReg, RegState::ImplicitDefine);
1496 }
1497 } else
1498 llvm_unreachable("Unknown reg class!");
1499 break;
1500 case 32:
1501 if (ARM::QQPRRegClass.hasSubClassEq(RC) ||
1502 ARM::MQQPRRegClass.hasSubClassEq(RC) ||
1503 ARM::DQuadRegClass.hasSubClassEq(RC)) {
1504 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
1505 Subtarget.hasNEON()) {
1506 BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg)
1507 .addFrameIndex(FI)
1508 .addImm(16)
1509 .addMemOperand(MMO)
1510 .add(predOps(ARMCC::AL));
1511 } else if (Subtarget.hasMVEIntegerOps()) {
1512 BuildMI(MBB, I, DL, get(ARM::MQQPRLoad), DestReg)
1513 .addFrameIndex(FI)
1514 .addMemOperand(MMO);
1515 } else {
1516 MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1517 .addFrameIndex(FI)
1518 .add(predOps(ARMCC::AL))
1519 .addMemOperand(MMO);
1520 MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
1521 MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
1522 MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
1523 MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
1524 if (DestReg.isPhysical())
1525 MIB.addReg(DestReg, RegState::ImplicitDefine);
1526 }
1527 } else
1528 llvm_unreachable("Unknown reg class!");
1529 break;
1530 case 64:
1531 if (ARM::MQQQQPRRegClass.hasSubClassEq(RC) &&
1532 Subtarget.hasMVEIntegerOps()) {
1533 BuildMI(MBB, I, DL, get(ARM::MQQQQPRLoad), DestReg)
1534 .addFrameIndex(FI)
1535 .addMemOperand(MMO);
1536 } else if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
1537 MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1538 .addFrameIndex(FI)
1539 .add(predOps(ARMCC::AL))
1540 .addMemOperand(MMO);
1541 MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
1542 MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
1543 MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
1544 MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
1545 MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::DefineNoRead, TRI);
1546 MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::DefineNoRead, TRI);
1547 MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::DefineNoRead, TRI);
1548 MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::DefineNoRead, TRI);
1549 if (DestReg.isPhysical())
1550 MIB.addReg(DestReg, RegState::ImplicitDefine);
1551 } else
1552 llvm_unreachable("Unknown reg class!");
1553 break;
1554 default:
1555 llvm_unreachable("Unknown regclass!");
1556 }
1557 }
1558
isLoadFromStackSlot(const MachineInstr & MI,int & FrameIndex) const1559 Register ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
1560 int &FrameIndex) const {
1561 switch (MI.getOpcode()) {
1562 default: break;
1563 case ARM::LDRrs:
1564 case ARM::t2LDRs: // FIXME: don't use t2LDRs to access frame.
1565 if (MI.getOperand(1).isFI() && MI.getOperand(2).isReg() &&
1566 MI.getOperand(3).isImm() && MI.getOperand(2).getReg() == 0 &&
1567 MI.getOperand(3).getImm() == 0) {
1568 FrameIndex = MI.getOperand(1).getIndex();
1569 return MI.getOperand(0).getReg();
1570 }
1571 break;
1572 case ARM::LDRi12:
1573 case ARM::t2LDRi12:
1574 case ARM::tLDRspi:
1575 case ARM::VLDRD:
1576 case ARM::VLDRS:
1577 case ARM::VLDR_P0_off:
1578 case ARM::MVE_VLDRWU32:
1579 if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
1580 MI.getOperand(2).getImm() == 0) {
1581 FrameIndex = MI.getOperand(1).getIndex();
1582 return MI.getOperand(0).getReg();
1583 }
1584 break;
1585 case ARM::VLD1q64:
1586 case ARM::VLD1d8TPseudo:
1587 case ARM::VLD1d16TPseudo:
1588 case ARM::VLD1d32TPseudo:
1589 case ARM::VLD1d64TPseudo:
1590 case ARM::VLD1d8QPseudo:
1591 case ARM::VLD1d16QPseudo:
1592 case ARM::VLD1d32QPseudo:
1593 case ARM::VLD1d64QPseudo:
1594 if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
1595 FrameIndex = MI.getOperand(1).getIndex();
1596 return MI.getOperand(0).getReg();
1597 }
1598 break;
1599 case ARM::VLDMQIA:
1600 if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
1601 FrameIndex = MI.getOperand(1).getIndex();
1602 return MI.getOperand(0).getReg();
1603 }
1604 break;
1605 case ARM::MQQPRLoad:
1606 case ARM::MQQQQPRLoad:
1607 if (MI.getOperand(1).isFI()) {
1608 FrameIndex = MI.getOperand(1).getIndex();
1609 return MI.getOperand(0).getReg();
1610 }
1611 break;
1612 }
1613
1614 return 0;
1615 }
1616
isLoadFromStackSlotPostFE(const MachineInstr & MI,int & FrameIndex) const1617 Register ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr &MI,
1618 int &FrameIndex) const {
1619 SmallVector<const MachineMemOperand *, 1> Accesses;
1620 if (MI.mayLoad() && hasLoadFromStackSlot(MI, Accesses) &&
1621 Accesses.size() == 1) {
1622 FrameIndex =
1623 cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
1624 ->getFrameIndex();
1625 return true;
1626 }
1627 return false;
1628 }
1629
1630 /// Expands MEMCPY to either LDMIA/STMIA or LDMIA_UPD/STMID_UPD
1631 /// depending on whether the result is used.
expandMEMCPY(MachineBasicBlock::iterator MI) const1632 void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI) const {
1633 bool isThumb1 = Subtarget.isThumb1Only();
1634 bool isThumb2 = Subtarget.isThumb2();
1635 const ARMBaseInstrInfo *TII = Subtarget.getInstrInfo();
1636
1637 DebugLoc dl = MI->getDebugLoc();
1638 MachineBasicBlock *BB = MI->getParent();
1639
1640 MachineInstrBuilder LDM, STM;
1641 if (isThumb1 || !MI->getOperand(1).isDead()) {
1642 MachineOperand LDWb(MI->getOperand(1));
1643 LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA_UPD
1644 : isThumb1 ? ARM::tLDMIA_UPD
1645 : ARM::LDMIA_UPD))
1646 .add(LDWb);
1647 } else {
1648 LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA : ARM::LDMIA));
1649 }
1650
1651 if (isThumb1 || !MI->getOperand(0).isDead()) {
1652 MachineOperand STWb(MI->getOperand(0));
1653 STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA_UPD
1654 : isThumb1 ? ARM::tSTMIA_UPD
1655 : ARM::STMIA_UPD))
1656 .add(STWb);
1657 } else {
1658 STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA : ARM::STMIA));
1659 }
1660
1661 MachineOperand LDBase(MI->getOperand(3));
1662 LDM.add(LDBase).add(predOps(ARMCC::AL));
1663
1664 MachineOperand STBase(MI->getOperand(2));
1665 STM.add(STBase).add(predOps(ARMCC::AL));
1666
1667 // Sort the scratch registers into ascending order.
1668 const TargetRegisterInfo &TRI = getRegisterInfo();
1669 SmallVector<unsigned, 6> ScratchRegs;
1670 for (MachineOperand &MO : llvm::drop_begin(MI->operands(), 5))
1671 ScratchRegs.push_back(MO.getReg());
1672 llvm::sort(ScratchRegs,
1673 [&TRI](const unsigned &Reg1, const unsigned &Reg2) -> bool {
1674 return TRI.getEncodingValue(Reg1) <
1675 TRI.getEncodingValue(Reg2);
1676 });
1677
1678 for (const auto &Reg : ScratchRegs) {
1679 LDM.addReg(Reg, RegState::Define);
1680 STM.addReg(Reg, RegState::Kill);
1681 }
1682
1683 BB->erase(MI);
1684 }
1685
expandPostRAPseudo(MachineInstr & MI) const1686 bool ARMBaseInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
1687 if (MI.getOpcode() == TargetOpcode::LOAD_STACK_GUARD) {
1688 expandLoadStackGuard(MI);
1689 MI.getParent()->erase(MI);
1690 return true;
1691 }
1692
1693 if (MI.getOpcode() == ARM::MEMCPY) {
1694 expandMEMCPY(MI);
1695 return true;
1696 }
1697
1698 // This hook gets to expand COPY instructions before they become
1699 // copyPhysReg() calls. Look for VMOVS instructions that can legally be
1700 // widened to VMOVD. We prefer the VMOVD when possible because it may be
1701 // changed into a VORR that can go down the NEON pipeline.
1702 if (!MI.isCopy() || Subtarget.dontWidenVMOVS() || !Subtarget.hasFP64())
1703 return false;
1704
1705 // Look for a copy between even S-registers. That is where we keep floats
1706 // when using NEON v2f32 instructions for f32 arithmetic.
1707 Register DstRegS = MI.getOperand(0).getReg();
1708 Register SrcRegS = MI.getOperand(1).getReg();
1709 if (!ARM::SPRRegClass.contains(DstRegS, SrcRegS))
1710 return false;
1711
1712 const TargetRegisterInfo *TRI = &getRegisterInfo();
1713 unsigned DstRegD = TRI->getMatchingSuperReg(DstRegS, ARM::ssub_0,
1714 &ARM::DPRRegClass);
1715 unsigned SrcRegD = TRI->getMatchingSuperReg(SrcRegS, ARM::ssub_0,
1716 &ARM::DPRRegClass);
1717 if (!DstRegD || !SrcRegD)
1718 return false;
1719
1720 // We want to widen this into a DstRegD = VMOVD SrcRegD copy. This is only
1721 // legal if the COPY already defines the full DstRegD, and it isn't a
1722 // sub-register insertion.
1723 if (!MI.definesRegister(DstRegD, TRI) || MI.readsRegister(DstRegD, TRI))
1724 return false;
1725
1726 // A dead copy shouldn't show up here, but reject it just in case.
1727 if (MI.getOperand(0).isDead())
1728 return false;
1729
1730 // All clear, widen the COPY.
1731 LLVM_DEBUG(dbgs() << "widening: " << MI);
1732 MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
1733
1734 // Get rid of the old implicit-def of DstRegD. Leave it if it defines a Q-reg
1735 // or some other super-register.
1736 int ImpDefIdx = MI.findRegisterDefOperandIdx(DstRegD, /*TRI=*/nullptr);
1737 if (ImpDefIdx != -1)
1738 MI.removeOperand(ImpDefIdx);
1739
1740 // Change the opcode and operands.
1741 MI.setDesc(get(ARM::VMOVD));
1742 MI.getOperand(0).setReg(DstRegD);
1743 MI.getOperand(1).setReg(SrcRegD);
1744 MIB.add(predOps(ARMCC::AL));
1745
1746 // We are now reading SrcRegD instead of SrcRegS. This may upset the
1747 // register scavenger and machine verifier, so we need to indicate that we
1748 // are reading an undefined value from SrcRegD, but a proper value from
1749 // SrcRegS.
1750 MI.getOperand(1).setIsUndef();
1751 MIB.addReg(SrcRegS, RegState::Implicit);
1752
1753 // SrcRegD may actually contain an unrelated value in the ssub_1
1754 // sub-register. Don't kill it. Only kill the ssub_0 sub-register.
1755 if (MI.getOperand(1).isKill()) {
1756 MI.getOperand(1).setIsKill(false);
1757 MI.addRegisterKilled(SrcRegS, TRI, true);
1758 }
1759
1760 LLVM_DEBUG(dbgs() << "replaced by: " << MI);
1761 return true;
1762 }
1763
1764 /// Create a copy of a const pool value. Update CPI to the new index and return
1765 /// the label UID.
duplicateCPV(MachineFunction & MF,unsigned & CPI)1766 static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
1767 MachineConstantPool *MCP = MF.getConstantPool();
1768 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1769
1770 const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI];
1771 assert(MCPE.isMachineConstantPoolEntry() &&
1772 "Expecting a machine constantpool entry!");
1773 ARMConstantPoolValue *ACPV =
1774 static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal);
1775
1776 unsigned PCLabelId = AFI->createPICLabelUId();
1777 ARMConstantPoolValue *NewCPV = nullptr;
1778
1779 // FIXME: The below assumes PIC relocation model and that the function
1780 // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and
1781 // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR
1782 // instructions, so that's probably OK, but is PIC always correct when
1783 // we get here?
1784 if (ACPV->isGlobalValue())
1785 NewCPV = ARMConstantPoolConstant::Create(
1786 cast<ARMConstantPoolConstant>(ACPV)->getGV(), PCLabelId, ARMCP::CPValue,
1787 4, ACPV->getModifier(), ACPV->mustAddCurrentAddress());
1788 else if (ACPV->isExtSymbol())
1789 NewCPV = ARMConstantPoolSymbol::
1790 Create(MF.getFunction().getContext(),
1791 cast<ARMConstantPoolSymbol>(ACPV)->getSymbol(), PCLabelId, 4);
1792 else if (ACPV->isBlockAddress())
1793 NewCPV = ARMConstantPoolConstant::
1794 Create(cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress(), PCLabelId,
1795 ARMCP::CPBlockAddress, 4);
1796 else if (ACPV->isLSDA())
1797 NewCPV = ARMConstantPoolConstant::Create(&MF.getFunction(), PCLabelId,
1798 ARMCP::CPLSDA, 4);
1799 else if (ACPV->isMachineBasicBlock())
1800 NewCPV = ARMConstantPoolMBB::
1801 Create(MF.getFunction().getContext(),
1802 cast<ARMConstantPoolMBB>(ACPV)->getMBB(), PCLabelId, 4);
1803 else
1804 llvm_unreachable("Unexpected ARM constantpool value type!!");
1805 CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlign());
1806 return PCLabelId;
1807 }
1808
reMaterialize(MachineBasicBlock & MBB,MachineBasicBlock::iterator I,Register DestReg,unsigned SubIdx,const MachineInstr & Orig,const TargetRegisterInfo & TRI) const1809 void ARMBaseInstrInfo::reMaterialize(MachineBasicBlock &MBB,
1810 MachineBasicBlock::iterator I,
1811 Register DestReg, unsigned SubIdx,
1812 const MachineInstr &Orig,
1813 const TargetRegisterInfo &TRI) const {
1814 unsigned Opcode = Orig.getOpcode();
1815 switch (Opcode) {
1816 default: {
1817 MachineInstr *MI = MBB.getParent()->CloneMachineInstr(&Orig);
1818 MI->substituteRegister(Orig.getOperand(0).getReg(), DestReg, SubIdx, TRI);
1819 MBB.insert(I, MI);
1820 break;
1821 }
1822 case ARM::tLDRpci_pic:
1823 case ARM::t2LDRpci_pic: {
1824 MachineFunction &MF = *MBB.getParent();
1825 unsigned CPI = Orig.getOperand(1).getIndex();
1826 unsigned PCLabelId = duplicateCPV(MF, CPI);
1827 BuildMI(MBB, I, Orig.getDebugLoc(), get(Opcode), DestReg)
1828 .addConstantPoolIndex(CPI)
1829 .addImm(PCLabelId)
1830 .cloneMemRefs(Orig);
1831 break;
1832 }
1833 }
1834 }
1835
1836 MachineInstr &
duplicate(MachineBasicBlock & MBB,MachineBasicBlock::iterator InsertBefore,const MachineInstr & Orig) const1837 ARMBaseInstrInfo::duplicate(MachineBasicBlock &MBB,
1838 MachineBasicBlock::iterator InsertBefore,
1839 const MachineInstr &Orig) const {
1840 MachineInstr &Cloned = TargetInstrInfo::duplicate(MBB, InsertBefore, Orig);
1841 MachineBasicBlock::instr_iterator I = Cloned.getIterator();
1842 for (;;) {
1843 switch (I->getOpcode()) {
1844 case ARM::tLDRpci_pic:
1845 case ARM::t2LDRpci_pic: {
1846 MachineFunction &MF = *MBB.getParent();
1847 unsigned CPI = I->getOperand(1).getIndex();
1848 unsigned PCLabelId = duplicateCPV(MF, CPI);
1849 I->getOperand(1).setIndex(CPI);
1850 I->getOperand(2).setImm(PCLabelId);
1851 break;
1852 }
1853 }
1854 if (!I->isBundledWithSucc())
1855 break;
1856 ++I;
1857 }
1858 return Cloned;
1859 }
1860
produceSameValue(const MachineInstr & MI0,const MachineInstr & MI1,const MachineRegisterInfo * MRI) const1861 bool ARMBaseInstrInfo::produceSameValue(const MachineInstr &MI0,
1862 const MachineInstr &MI1,
1863 const MachineRegisterInfo *MRI) const {
1864 unsigned Opcode = MI0.getOpcode();
1865 if (Opcode == ARM::t2LDRpci || Opcode == ARM::t2LDRpci_pic ||
1866 Opcode == ARM::tLDRpci || Opcode == ARM::tLDRpci_pic ||
1867 Opcode == ARM::LDRLIT_ga_pcrel || Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
1868 Opcode == ARM::tLDRLIT_ga_pcrel || Opcode == ARM::t2LDRLIT_ga_pcrel ||
1869 Opcode == ARM::MOV_ga_pcrel || Opcode == ARM::MOV_ga_pcrel_ldr ||
1870 Opcode == ARM::t2MOV_ga_pcrel) {
1871 if (MI1.getOpcode() != Opcode)
1872 return false;
1873 if (MI0.getNumOperands() != MI1.getNumOperands())
1874 return false;
1875
1876 const MachineOperand &MO0 = MI0.getOperand(1);
1877 const MachineOperand &MO1 = MI1.getOperand(1);
1878 if (MO0.getOffset() != MO1.getOffset())
1879 return false;
1880
1881 if (Opcode == ARM::LDRLIT_ga_pcrel || Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
1882 Opcode == ARM::tLDRLIT_ga_pcrel || Opcode == ARM::t2LDRLIT_ga_pcrel ||
1883 Opcode == ARM::MOV_ga_pcrel || Opcode == ARM::MOV_ga_pcrel_ldr ||
1884 Opcode == ARM::t2MOV_ga_pcrel)
1885 // Ignore the PC labels.
1886 return MO0.getGlobal() == MO1.getGlobal();
1887
1888 const MachineFunction *MF = MI0.getParent()->getParent();
1889 const MachineConstantPool *MCP = MF->getConstantPool();
1890 int CPI0 = MO0.getIndex();
1891 int CPI1 = MO1.getIndex();
1892 const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0];
1893 const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1];
1894 bool isARMCP0 = MCPE0.isMachineConstantPoolEntry();
1895 bool isARMCP1 = MCPE1.isMachineConstantPoolEntry();
1896 if (isARMCP0 && isARMCP1) {
1897 ARMConstantPoolValue *ACPV0 =
1898 static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal);
1899 ARMConstantPoolValue *ACPV1 =
1900 static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal);
1901 return ACPV0->hasSameValue(ACPV1);
1902 } else if (!isARMCP0 && !isARMCP1) {
1903 return MCPE0.Val.ConstVal == MCPE1.Val.ConstVal;
1904 }
1905 return false;
1906 } else if (Opcode == ARM::PICLDR) {
1907 if (MI1.getOpcode() != Opcode)
1908 return false;
1909 if (MI0.getNumOperands() != MI1.getNumOperands())
1910 return false;
1911
1912 Register Addr0 = MI0.getOperand(1).getReg();
1913 Register Addr1 = MI1.getOperand(1).getReg();
1914 if (Addr0 != Addr1) {
1915 if (!MRI || !Addr0.isVirtual() || !Addr1.isVirtual())
1916 return false;
1917
1918 // This assumes SSA form.
1919 MachineInstr *Def0 = MRI->getVRegDef(Addr0);
1920 MachineInstr *Def1 = MRI->getVRegDef(Addr1);
1921 // Check if the loaded value, e.g. a constantpool of a global address, are
1922 // the same.
1923 if (!produceSameValue(*Def0, *Def1, MRI))
1924 return false;
1925 }
1926
1927 for (unsigned i = 3, e = MI0.getNumOperands(); i != e; ++i) {
1928 // %12 = PICLDR %11, 0, 14, %noreg
1929 const MachineOperand &MO0 = MI0.getOperand(i);
1930 const MachineOperand &MO1 = MI1.getOperand(i);
1931 if (!MO0.isIdenticalTo(MO1))
1932 return false;
1933 }
1934 return true;
1935 }
1936
1937 return MI0.isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs);
1938 }
1939
1940 /// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to
1941 /// determine if two loads are loading from the same base address. It should
1942 /// only return true if the base pointers are the same and the only differences
1943 /// between the two addresses is the offset. It also returns the offsets by
1944 /// reference.
1945 ///
1946 /// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
1947 /// is permanently disabled.
areLoadsFromSameBasePtr(SDNode * Load1,SDNode * Load2,int64_t & Offset1,int64_t & Offset2) const1948 bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
1949 int64_t &Offset1,
1950 int64_t &Offset2) const {
1951 // Don't worry about Thumb: just ARM and Thumb2.
1952 if (Subtarget.isThumb1Only()) return false;
1953
1954 if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode())
1955 return false;
1956
1957 auto IsLoadOpcode = [&](unsigned Opcode) {
1958 switch (Opcode) {
1959 default:
1960 return false;
1961 case ARM::LDRi12:
1962 case ARM::LDRBi12:
1963 case ARM::LDRD:
1964 case ARM::LDRH:
1965 case ARM::LDRSB:
1966 case ARM::LDRSH:
1967 case ARM::VLDRD:
1968 case ARM::VLDRS:
1969 case ARM::t2LDRi8:
1970 case ARM::t2LDRBi8:
1971 case ARM::t2LDRDi8:
1972 case ARM::t2LDRSHi8:
1973 case ARM::t2LDRi12:
1974 case ARM::t2LDRBi12:
1975 case ARM::t2LDRSHi12:
1976 return true;
1977 }
1978 };
1979
1980 if (!IsLoadOpcode(Load1->getMachineOpcode()) ||
1981 !IsLoadOpcode(Load2->getMachineOpcode()))
1982 return false;
1983
1984 // Check if base addresses and chain operands match.
1985 if (Load1->getOperand(0) != Load2->getOperand(0) ||
1986 Load1->getOperand(4) != Load2->getOperand(4))
1987 return false;
1988
1989 // Index should be Reg0.
1990 if (Load1->getOperand(3) != Load2->getOperand(3))
1991 return false;
1992
1993 // Determine the offsets.
1994 if (isa<ConstantSDNode>(Load1->getOperand(1)) &&
1995 isa<ConstantSDNode>(Load2->getOperand(1))) {
1996 Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getSExtValue();
1997 Offset2 = cast<ConstantSDNode>(Load2->getOperand(1))->getSExtValue();
1998 return true;
1999 }
2000
2001 return false;
2002 }
2003
2004 /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
2005 /// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should
2006 /// be scheduled togther. On some targets if two loads are loading from
2007 /// addresses in the same cache line, it's better if they are scheduled
2008 /// together. This function takes two integers that represent the load offsets
2009 /// from the common base address. It returns true if it decides it's desirable
2010 /// to schedule the two loads together. "NumLoads" is the number of loads that
2011 /// have already been scheduled after Load1.
2012 ///
2013 /// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
2014 /// is permanently disabled.
shouldScheduleLoadsNear(SDNode * Load1,SDNode * Load2,int64_t Offset1,int64_t Offset2,unsigned NumLoads) const2015 bool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
2016 int64_t Offset1, int64_t Offset2,
2017 unsigned NumLoads) const {
2018 // Don't worry about Thumb: just ARM and Thumb2.
2019 if (Subtarget.isThumb1Only()) return false;
2020
2021 assert(Offset2 > Offset1);
2022
2023 if ((Offset2 - Offset1) / 8 > 64)
2024 return false;
2025
2026 // Check if the machine opcodes are different. If they are different
2027 // then we consider them to not be of the same base address,
2028 // EXCEPT in the case of Thumb2 byte loads where one is LDRBi8 and the other LDRBi12.
2029 // In this case, they are considered to be the same because they are different
2030 // encoding forms of the same basic instruction.
2031 if ((Load1->getMachineOpcode() != Load2->getMachineOpcode()) &&
2032 !((Load1->getMachineOpcode() == ARM::t2LDRBi8 &&
2033 Load2->getMachineOpcode() == ARM::t2LDRBi12) ||
2034 (Load1->getMachineOpcode() == ARM::t2LDRBi12 &&
2035 Load2->getMachineOpcode() == ARM::t2LDRBi8)))
2036 return false; // FIXME: overly conservative?
2037
2038 // Four loads in a row should be sufficient.
2039 if (NumLoads >= 3)
2040 return false;
2041
2042 return true;
2043 }
2044
isSchedulingBoundary(const MachineInstr & MI,const MachineBasicBlock * MBB,const MachineFunction & MF) const2045 bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
2046 const MachineBasicBlock *MBB,
2047 const MachineFunction &MF) const {
2048 // Debug info is never a scheduling boundary. It's necessary to be explicit
2049 // due to the special treatment of IT instructions below, otherwise a
2050 // dbg_value followed by an IT will result in the IT instruction being
2051 // considered a scheduling hazard, which is wrong. It should be the actual
2052 // instruction preceding the dbg_value instruction(s), just like it is
2053 // when debug info is not present.
2054 if (MI.isDebugInstr())
2055 return false;
2056
2057 // Terminators and labels can't be scheduled around.
2058 if (MI.isTerminator() || MI.isPosition())
2059 return true;
2060
2061 // INLINEASM_BR can jump to another block
2062 if (MI.getOpcode() == TargetOpcode::INLINEASM_BR)
2063 return true;
2064
2065 if (isSEHInstruction(MI))
2066 return true;
2067
2068 // Treat the start of the IT block as a scheduling boundary, but schedule
2069 // t2IT along with all instructions following it.
2070 // FIXME: This is a big hammer. But the alternative is to add all potential
2071 // true and anti dependencies to IT block instructions as implicit operands
2072 // to the t2IT instruction. The added compile time and complexity does not
2073 // seem worth it.
2074 MachineBasicBlock::const_iterator I = MI;
2075 // Make sure to skip any debug instructions
2076 while (++I != MBB->end() && I->isDebugInstr())
2077 ;
2078 if (I != MBB->end() && I->getOpcode() == ARM::t2IT)
2079 return true;
2080
2081 // Don't attempt to schedule around any instruction that defines
2082 // a stack-oriented pointer, as it's unlikely to be profitable. This
2083 // saves compile time, because it doesn't require every single
2084 // stack slot reference to depend on the instruction that does the
2085 // modification.
2086 // Calls don't actually change the stack pointer, even if they have imp-defs.
2087 // No ARM calling conventions change the stack pointer. (X86 calling
2088 // conventions sometimes do).
2089 if (!MI.isCall() && MI.definesRegister(ARM::SP, /*TRI=*/nullptr))
2090 return true;
2091
2092 return false;
2093 }
2094
2095 bool ARMBaseInstrInfo::
isProfitableToIfCvt(MachineBasicBlock & MBB,unsigned NumCycles,unsigned ExtraPredCycles,BranchProbability Probability) const2096 isProfitableToIfCvt(MachineBasicBlock &MBB,
2097 unsigned NumCycles, unsigned ExtraPredCycles,
2098 BranchProbability Probability) const {
2099 if (!NumCycles)
2100 return false;
2101
2102 // If we are optimizing for size, see if the branch in the predecessor can be
2103 // lowered to cbn?z by the constant island lowering pass, and return false if
2104 // so. This results in a shorter instruction sequence.
2105 if (MBB.getParent()->getFunction().hasOptSize()) {
2106 MachineBasicBlock *Pred = *MBB.pred_begin();
2107 if (!Pred->empty()) {
2108 MachineInstr *LastMI = &*Pred->rbegin();
2109 if (LastMI->getOpcode() == ARM::t2Bcc) {
2110 const TargetRegisterInfo *TRI = &getRegisterInfo();
2111 MachineInstr *CmpMI = findCMPToFoldIntoCBZ(LastMI, TRI);
2112 if (CmpMI)
2113 return false;
2114 }
2115 }
2116 }
2117 return isProfitableToIfCvt(MBB, NumCycles, ExtraPredCycles,
2118 MBB, 0, 0, Probability);
2119 }
2120
2121 bool ARMBaseInstrInfo::
isProfitableToIfCvt(MachineBasicBlock & TBB,unsigned TCycles,unsigned TExtra,MachineBasicBlock & FBB,unsigned FCycles,unsigned FExtra,BranchProbability Probability) const2122 isProfitableToIfCvt(MachineBasicBlock &TBB,
2123 unsigned TCycles, unsigned TExtra,
2124 MachineBasicBlock &FBB,
2125 unsigned FCycles, unsigned FExtra,
2126 BranchProbability Probability) const {
2127 if (!TCycles)
2128 return false;
2129
2130 // In thumb code we often end up trading one branch for a IT block, and
2131 // if we are cloning the instruction can increase code size. Prevent
2132 // blocks with multiple predecesors from being ifcvted to prevent this
2133 // cloning.
2134 if (Subtarget.isThumb2() && TBB.getParent()->getFunction().hasMinSize()) {
2135 if (TBB.pred_size() != 1 || FBB.pred_size() != 1)
2136 return false;
2137 }
2138
2139 // Attempt to estimate the relative costs of predication versus branching.
2140 // Here we scale up each component of UnpredCost to avoid precision issue when
2141 // scaling TCycles/FCycles by Probability.
2142 const unsigned ScalingUpFactor = 1024;
2143
2144 unsigned PredCost = (TCycles + FCycles + TExtra + FExtra) * ScalingUpFactor;
2145 unsigned UnpredCost;
2146 if (!Subtarget.hasBranchPredictor()) {
2147 // When we don't have a branch predictor it's always cheaper to not take a
2148 // branch than take it, so we have to take that into account.
2149 unsigned NotTakenBranchCost = 1;
2150 unsigned TakenBranchCost = Subtarget.getMispredictionPenalty();
2151 unsigned TUnpredCycles, FUnpredCycles;
2152 if (!FCycles) {
2153 // Triangle: TBB is the fallthrough
2154 TUnpredCycles = TCycles + NotTakenBranchCost;
2155 FUnpredCycles = TakenBranchCost;
2156 } else {
2157 // Diamond: TBB is the block that is branched to, FBB is the fallthrough
2158 TUnpredCycles = TCycles + TakenBranchCost;
2159 FUnpredCycles = FCycles + NotTakenBranchCost;
2160 // The branch at the end of FBB will disappear when it's predicated, so
2161 // discount it from PredCost.
2162 PredCost -= 1 * ScalingUpFactor;
2163 }
2164 // The total cost is the cost of each path scaled by their probabilites
2165 unsigned TUnpredCost = Probability.scale(TUnpredCycles * ScalingUpFactor);
2166 unsigned FUnpredCost = Probability.getCompl().scale(FUnpredCycles * ScalingUpFactor);
2167 UnpredCost = TUnpredCost + FUnpredCost;
2168 // When predicating assume that the first IT can be folded away but later
2169 // ones cost one cycle each
2170 if (Subtarget.isThumb2() && TCycles + FCycles > 4) {
2171 PredCost += ((TCycles + FCycles - 4) / 4) * ScalingUpFactor;
2172 }
2173 } else {
2174 unsigned TUnpredCost = Probability.scale(TCycles * ScalingUpFactor);
2175 unsigned FUnpredCost =
2176 Probability.getCompl().scale(FCycles * ScalingUpFactor);
2177 UnpredCost = TUnpredCost + FUnpredCost;
2178 UnpredCost += 1 * ScalingUpFactor; // The branch itself
2179 UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10;
2180 }
2181
2182 return PredCost <= UnpredCost;
2183 }
2184
2185 unsigned
extraSizeToPredicateInstructions(const MachineFunction & MF,unsigned NumInsts) const2186 ARMBaseInstrInfo::extraSizeToPredicateInstructions(const MachineFunction &MF,
2187 unsigned NumInsts) const {
2188 // Thumb2 needs a 2-byte IT instruction to predicate up to 4 instructions.
2189 // ARM has a condition code field in every predicable instruction, using it
2190 // doesn't change code size.
2191 if (!Subtarget.isThumb2())
2192 return 0;
2193
2194 // It's possible that the size of the IT is restricted to a single block.
2195 unsigned MaxInsts = Subtarget.restrictIT() ? 1 : 4;
2196 return divideCeil(NumInsts, MaxInsts) * 2;
2197 }
2198
2199 unsigned
predictBranchSizeForIfCvt(MachineInstr & MI) const2200 ARMBaseInstrInfo::predictBranchSizeForIfCvt(MachineInstr &MI) const {
2201 // If this branch is likely to be folded into the comparison to form a
2202 // CB(N)Z, then removing it won't reduce code size at all, because that will
2203 // just replace the CB(N)Z with a CMP.
2204 if (MI.getOpcode() == ARM::t2Bcc &&
2205 findCMPToFoldIntoCBZ(&MI, &getRegisterInfo()))
2206 return 0;
2207
2208 unsigned Size = getInstSizeInBytes(MI);
2209
2210 // For Thumb2, all branches are 32-bit instructions during the if conversion
2211 // pass, but may be replaced with 16-bit instructions during size reduction.
2212 // Since the branches considered by if conversion tend to be forward branches
2213 // over small basic blocks, they are very likely to be in range for the
2214 // narrow instructions, so we assume the final code size will be half what it
2215 // currently is.
2216 if (Subtarget.isThumb2())
2217 Size /= 2;
2218
2219 return Size;
2220 }
2221
2222 bool
isProfitableToUnpredicate(MachineBasicBlock & TMBB,MachineBasicBlock & FMBB) const2223 ARMBaseInstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB,
2224 MachineBasicBlock &FMBB) const {
2225 // Reduce false anti-dependencies to let the target's out-of-order execution
2226 // engine do its thing.
2227 return Subtarget.isProfitableToUnpredicate();
2228 }
2229
2230 /// getInstrPredicate - If instruction is predicated, returns its predicate
2231 /// condition, otherwise returns AL. It also returns the condition code
2232 /// register by reference.
getInstrPredicate(const MachineInstr & MI,Register & PredReg)2233 ARMCC::CondCodes llvm::getInstrPredicate(const MachineInstr &MI,
2234 Register &PredReg) {
2235 int PIdx = MI.findFirstPredOperandIdx();
2236 if (PIdx == -1) {
2237 PredReg = 0;
2238 return ARMCC::AL;
2239 }
2240
2241 PredReg = MI.getOperand(PIdx+1).getReg();
2242 return (ARMCC::CondCodes)MI.getOperand(PIdx).getImm();
2243 }
2244
getMatchingCondBranchOpcode(unsigned Opc)2245 unsigned llvm::getMatchingCondBranchOpcode(unsigned Opc) {
2246 if (Opc == ARM::B)
2247 return ARM::Bcc;
2248 if (Opc == ARM::tB)
2249 return ARM::tBcc;
2250 if (Opc == ARM::t2B)
2251 return ARM::t2Bcc;
2252
2253 llvm_unreachable("Unknown unconditional branch opcode!");
2254 }
2255
commuteInstructionImpl(MachineInstr & MI,bool NewMI,unsigned OpIdx1,unsigned OpIdx2) const2256 MachineInstr *ARMBaseInstrInfo::commuteInstructionImpl(MachineInstr &MI,
2257 bool NewMI,
2258 unsigned OpIdx1,
2259 unsigned OpIdx2) const {
2260 switch (MI.getOpcode()) {
2261 case ARM::MOVCCr:
2262 case ARM::t2MOVCCr: {
2263 // MOVCC can be commuted by inverting the condition.
2264 Register PredReg;
2265 ARMCC::CondCodes CC = getInstrPredicate(MI, PredReg);
2266 // MOVCC AL can't be inverted. Shouldn't happen.
2267 if (CC == ARMCC::AL || PredReg != ARM::CPSR)
2268 return nullptr;
2269 MachineInstr *CommutedMI =
2270 TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
2271 if (!CommutedMI)
2272 return nullptr;
2273 // After swapping the MOVCC operands, also invert the condition.
2274 CommutedMI->getOperand(CommutedMI->findFirstPredOperandIdx())
2275 .setImm(ARMCC::getOppositeCondition(CC));
2276 return CommutedMI;
2277 }
2278 }
2279 return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
2280 }
2281
2282 /// Identify instructions that can be folded into a MOVCC instruction, and
2283 /// return the defining instruction.
2284 MachineInstr *
canFoldIntoMOVCC(Register Reg,const MachineRegisterInfo & MRI,const TargetInstrInfo * TII) const2285 ARMBaseInstrInfo::canFoldIntoMOVCC(Register Reg, const MachineRegisterInfo &MRI,
2286 const TargetInstrInfo *TII) const {
2287 if (!Reg.isVirtual())
2288 return nullptr;
2289 if (!MRI.hasOneNonDBGUse(Reg))
2290 return nullptr;
2291 MachineInstr *MI = MRI.getVRegDef(Reg);
2292 if (!MI)
2293 return nullptr;
2294 // Check if MI can be predicated and folded into the MOVCC.
2295 if (!isPredicable(*MI))
2296 return nullptr;
2297 // Check if MI has any non-dead defs or physreg uses. This also detects
2298 // predicated instructions which will be reading CPSR.
2299 for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 1)) {
2300 // Reject frame index operands, PEI can't handle the predicated pseudos.
2301 if (MO.isFI() || MO.isCPI() || MO.isJTI())
2302 return nullptr;
2303 if (!MO.isReg())
2304 continue;
2305 // MI can't have any tied operands, that would conflict with predication.
2306 if (MO.isTied())
2307 return nullptr;
2308 if (MO.getReg().isPhysical())
2309 return nullptr;
2310 if (MO.isDef() && !MO.isDead())
2311 return nullptr;
2312 }
2313 bool DontMoveAcrossStores = true;
2314 if (!MI->isSafeToMove(/* AliasAnalysis = */ nullptr, DontMoveAcrossStores))
2315 return nullptr;
2316 return MI;
2317 }
2318
analyzeSelect(const MachineInstr & MI,SmallVectorImpl<MachineOperand> & Cond,unsigned & TrueOp,unsigned & FalseOp,bool & Optimizable) const2319 bool ARMBaseInstrInfo::analyzeSelect(const MachineInstr &MI,
2320 SmallVectorImpl<MachineOperand> &Cond,
2321 unsigned &TrueOp, unsigned &FalseOp,
2322 bool &Optimizable) const {
2323 assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) &&
2324 "Unknown select instruction");
2325 // MOVCC operands:
2326 // 0: Def.
2327 // 1: True use.
2328 // 2: False use.
2329 // 3: Condition code.
2330 // 4: CPSR use.
2331 TrueOp = 1;
2332 FalseOp = 2;
2333 Cond.push_back(MI.getOperand(3));
2334 Cond.push_back(MI.getOperand(4));
2335 // We can always fold a def.
2336 Optimizable = true;
2337 return false;
2338 }
2339
2340 MachineInstr *
optimizeSelect(MachineInstr & MI,SmallPtrSetImpl<MachineInstr * > & SeenMIs,bool PreferFalse) const2341 ARMBaseInstrInfo::optimizeSelect(MachineInstr &MI,
2342 SmallPtrSetImpl<MachineInstr *> &SeenMIs,
2343 bool PreferFalse) const {
2344 assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) &&
2345 "Unknown select instruction");
2346 MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
2347 MachineInstr *DefMI = canFoldIntoMOVCC(MI.getOperand(2).getReg(), MRI, this);
2348 bool Invert = !DefMI;
2349 if (!DefMI)
2350 DefMI = canFoldIntoMOVCC(MI.getOperand(1).getReg(), MRI, this);
2351 if (!DefMI)
2352 return nullptr;
2353
2354 // Find new register class to use.
2355 MachineOperand FalseReg = MI.getOperand(Invert ? 2 : 1);
2356 MachineOperand TrueReg = MI.getOperand(Invert ? 1 : 2);
2357 Register DestReg = MI.getOperand(0).getReg();
2358 const TargetRegisterClass *FalseClass = MRI.getRegClass(FalseReg.getReg());
2359 const TargetRegisterClass *TrueClass = MRI.getRegClass(TrueReg.getReg());
2360 if (!MRI.constrainRegClass(DestReg, FalseClass))
2361 return nullptr;
2362 if (!MRI.constrainRegClass(DestReg, TrueClass))
2363 return nullptr;
2364
2365 // Create a new predicated version of DefMI.
2366 // Rfalse is the first use.
2367 MachineInstrBuilder NewMI =
2368 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), DefMI->getDesc(), DestReg);
2369
2370 // Copy all the DefMI operands, excluding its (null) predicate.
2371 const MCInstrDesc &DefDesc = DefMI->getDesc();
2372 for (unsigned i = 1, e = DefDesc.getNumOperands();
2373 i != e && !DefDesc.operands()[i].isPredicate(); ++i)
2374 NewMI.add(DefMI->getOperand(i));
2375
2376 unsigned CondCode = MI.getOperand(3).getImm();
2377 if (Invert)
2378 NewMI.addImm(ARMCC::getOppositeCondition(ARMCC::CondCodes(CondCode)));
2379 else
2380 NewMI.addImm(CondCode);
2381 NewMI.add(MI.getOperand(4));
2382
2383 // DefMI is not the -S version that sets CPSR, so add an optional %noreg.
2384 if (NewMI->hasOptionalDef())
2385 NewMI.add(condCodeOp());
2386
2387 // The output register value when the predicate is false is an implicit
2388 // register operand tied to the first def.
2389 // The tie makes the register allocator ensure the FalseReg is allocated the
2390 // same register as operand 0.
2391 FalseReg.setImplicit();
2392 NewMI.add(FalseReg);
2393 NewMI->tieOperands(0, NewMI->getNumOperands() - 1);
2394
2395 // Update SeenMIs set: register newly created MI and erase removed DefMI.
2396 SeenMIs.insert(NewMI);
2397 SeenMIs.erase(DefMI);
2398
2399 // If MI is inside a loop, and DefMI is outside the loop, then kill flags on
2400 // DefMI would be invalid when tranferred inside the loop. Checking for a
2401 // loop is expensive, but at least remove kill flags if they are in different
2402 // BBs.
2403 if (DefMI->getParent() != MI.getParent())
2404 NewMI->clearKillInfo();
2405
2406 // The caller will erase MI, but not DefMI.
2407 DefMI->eraseFromParent();
2408 return NewMI;
2409 }
2410
2411 /// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the
2412 /// instruction is encoded with an 'S' bit is determined by the optional CPSR
2413 /// def operand.
2414 ///
2415 /// This will go away once we can teach tblgen how to set the optional CPSR def
2416 /// operand itself.
2417 struct AddSubFlagsOpcodePair {
2418 uint16_t PseudoOpc;
2419 uint16_t MachineOpc;
2420 };
2421
2422 static const AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = {
2423 {ARM::ADDSri, ARM::ADDri},
2424 {ARM::ADDSrr, ARM::ADDrr},
2425 {ARM::ADDSrsi, ARM::ADDrsi},
2426 {ARM::ADDSrsr, ARM::ADDrsr},
2427
2428 {ARM::SUBSri, ARM::SUBri},
2429 {ARM::SUBSrr, ARM::SUBrr},
2430 {ARM::SUBSrsi, ARM::SUBrsi},
2431 {ARM::SUBSrsr, ARM::SUBrsr},
2432
2433 {ARM::RSBSri, ARM::RSBri},
2434 {ARM::RSBSrsi, ARM::RSBrsi},
2435 {ARM::RSBSrsr, ARM::RSBrsr},
2436
2437 {ARM::tADDSi3, ARM::tADDi3},
2438 {ARM::tADDSi8, ARM::tADDi8},
2439 {ARM::tADDSrr, ARM::tADDrr},
2440 {ARM::tADCS, ARM::tADC},
2441
2442 {ARM::tSUBSi3, ARM::tSUBi3},
2443 {ARM::tSUBSi8, ARM::tSUBi8},
2444 {ARM::tSUBSrr, ARM::tSUBrr},
2445 {ARM::tSBCS, ARM::tSBC},
2446 {ARM::tRSBS, ARM::tRSB},
2447 {ARM::tLSLSri, ARM::tLSLri},
2448
2449 {ARM::t2ADDSri, ARM::t2ADDri},
2450 {ARM::t2ADDSrr, ARM::t2ADDrr},
2451 {ARM::t2ADDSrs, ARM::t2ADDrs},
2452
2453 {ARM::t2SUBSri, ARM::t2SUBri},
2454 {ARM::t2SUBSrr, ARM::t2SUBrr},
2455 {ARM::t2SUBSrs, ARM::t2SUBrs},
2456
2457 {ARM::t2RSBSri, ARM::t2RSBri},
2458 {ARM::t2RSBSrs, ARM::t2RSBrs},
2459 };
2460
convertAddSubFlagsOpcode(unsigned OldOpc)2461 unsigned llvm::convertAddSubFlagsOpcode(unsigned OldOpc) {
2462 for (const auto &Entry : AddSubFlagsOpcodeMap)
2463 if (OldOpc == Entry.PseudoOpc)
2464 return Entry.MachineOpc;
2465 return 0;
2466 }
2467
emitARMRegPlusImmediate(MachineBasicBlock & MBB,MachineBasicBlock::iterator & MBBI,const DebugLoc & dl,Register DestReg,Register BaseReg,int NumBytes,ARMCC::CondCodes Pred,Register PredReg,const ARMBaseInstrInfo & TII,unsigned MIFlags)2468 void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB,
2469 MachineBasicBlock::iterator &MBBI,
2470 const DebugLoc &dl, Register DestReg,
2471 Register BaseReg, int NumBytes,
2472 ARMCC::CondCodes Pred, Register PredReg,
2473 const ARMBaseInstrInfo &TII,
2474 unsigned MIFlags) {
2475 if (NumBytes == 0 && DestReg != BaseReg) {
2476 BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), DestReg)
2477 .addReg(BaseReg, RegState::Kill)
2478 .add(predOps(Pred, PredReg))
2479 .add(condCodeOp())
2480 .setMIFlags(MIFlags);
2481 return;
2482 }
2483
2484 bool isSub = NumBytes < 0;
2485 if (isSub) NumBytes = -NumBytes;
2486
2487 while (NumBytes) {
2488 unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes);
2489 unsigned ThisVal = NumBytes & llvm::rotr<uint32_t>(0xFF, RotAmt);
2490 assert(ThisVal && "Didn't extract field correctly");
2491
2492 // We will handle these bits from offset, clear them.
2493 NumBytes &= ~ThisVal;
2494
2495 assert(ARM_AM::getSOImmVal(ThisVal) != -1 && "Bit extraction didn't work?");
2496
2497 // Build the new ADD / SUB.
2498 unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri;
2499 BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
2500 .addReg(BaseReg, RegState::Kill)
2501 .addImm(ThisVal)
2502 .add(predOps(Pred, PredReg))
2503 .add(condCodeOp())
2504 .setMIFlags(MIFlags);
2505 BaseReg = DestReg;
2506 }
2507 }
2508
tryFoldSPUpdateIntoPushPop(const ARMSubtarget & Subtarget,MachineFunction & MF,MachineInstr * MI,unsigned NumBytes)2509 bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget,
2510 MachineFunction &MF, MachineInstr *MI,
2511 unsigned NumBytes) {
2512 // This optimisation potentially adds lots of load and store
2513 // micro-operations, it's only really a great benefit to code-size.
2514 if (!Subtarget.hasMinSize())
2515 return false;
2516
2517 // If only one register is pushed/popped, LLVM can use an LDR/STR
2518 // instead. We can't modify those so make sure we're dealing with an
2519 // instruction we understand.
2520 bool IsPop = isPopOpcode(MI->getOpcode());
2521 bool IsPush = isPushOpcode(MI->getOpcode());
2522 if (!IsPush && !IsPop)
2523 return false;
2524
2525 bool IsVFPPushPop = MI->getOpcode() == ARM::VSTMDDB_UPD ||
2526 MI->getOpcode() == ARM::VLDMDIA_UPD;
2527 bool IsT1PushPop = MI->getOpcode() == ARM::tPUSH ||
2528 MI->getOpcode() == ARM::tPOP ||
2529 MI->getOpcode() == ARM::tPOP_RET;
2530
2531 assert((IsT1PushPop || (MI->getOperand(0).getReg() == ARM::SP &&
2532 MI->getOperand(1).getReg() == ARM::SP)) &&
2533 "trying to fold sp update into non-sp-updating push/pop");
2534
2535 // The VFP push & pop act on D-registers, so we can only fold an adjustment
2536 // by a multiple of 8 bytes in correctly. Similarly rN is 4-bytes. Don't try
2537 // if this is violated.
2538 if (NumBytes % (IsVFPPushPop ? 8 : 4) != 0)
2539 return false;
2540
2541 // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
2542 // pred) so the list starts at 4. Thumb1 starts after the predicate.
2543 int RegListIdx = IsT1PushPop ? 2 : 4;
2544
2545 // Calculate the space we'll need in terms of registers.
2546 unsigned RegsNeeded;
2547 const TargetRegisterClass *RegClass;
2548 if (IsVFPPushPop) {
2549 RegsNeeded = NumBytes / 8;
2550 RegClass = &ARM::DPRRegClass;
2551 } else {
2552 RegsNeeded = NumBytes / 4;
2553 RegClass = &ARM::GPRRegClass;
2554 }
2555
2556 // We're going to have to strip all list operands off before
2557 // re-adding them since the order matters, so save the existing ones
2558 // for later.
2559 SmallVector<MachineOperand, 4> RegList;
2560
2561 // We're also going to need the first register transferred by this
2562 // instruction, which won't necessarily be the first register in the list.
2563 unsigned FirstRegEnc = -1;
2564
2565 const TargetRegisterInfo *TRI = MF.getRegInfo().getTargetRegisterInfo();
2566 for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i) {
2567 MachineOperand &MO = MI->getOperand(i);
2568 RegList.push_back(MO);
2569
2570 if (MO.isReg() && !MO.isImplicit() &&
2571 TRI->getEncodingValue(MO.getReg()) < FirstRegEnc)
2572 FirstRegEnc = TRI->getEncodingValue(MO.getReg());
2573 }
2574
2575 const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
2576
2577 // Now try to find enough space in the reglist to allocate NumBytes.
2578 for (int CurRegEnc = FirstRegEnc - 1; CurRegEnc >= 0 && RegsNeeded;
2579 --CurRegEnc) {
2580 unsigned CurReg = RegClass->getRegister(CurRegEnc);
2581 if (IsT1PushPop && CurRegEnc > TRI->getEncodingValue(ARM::R7))
2582 continue;
2583 if (!IsPop) {
2584 // Pushing any register is completely harmless, mark the register involved
2585 // as undef since we don't care about its value and must not restore it
2586 // during stack unwinding.
2587 RegList.push_back(MachineOperand::CreateReg(CurReg, false, false,
2588 false, false, true));
2589 --RegsNeeded;
2590 continue;
2591 }
2592
2593 // However, we can only pop an extra register if it's not live. For
2594 // registers live within the function we might clobber a return value
2595 // register; the other way a register can be live here is if it's
2596 // callee-saved.
2597 if (isCalleeSavedRegister(CurReg, CSRegs) ||
2598 MI->getParent()->computeRegisterLiveness(TRI, CurReg, MI) !=
2599 MachineBasicBlock::LQR_Dead) {
2600 // VFP pops don't allow holes in the register list, so any skip is fatal
2601 // for our transformation. GPR pops do, so we should just keep looking.
2602 if (IsVFPPushPop)
2603 return false;
2604 else
2605 continue;
2606 }
2607
2608 // Mark the unimportant registers as <def,dead> in the POP.
2609 RegList.push_back(MachineOperand::CreateReg(CurReg, true, false, false,
2610 true));
2611 --RegsNeeded;
2612 }
2613
2614 if (RegsNeeded > 0)
2615 return false;
2616
2617 // Finally we know we can profitably perform the optimisation so go
2618 // ahead: strip all existing registers off and add them back again
2619 // in the right order.
2620 for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i)
2621 MI->removeOperand(i);
2622
2623 // Add the complete list back in.
2624 MachineInstrBuilder MIB(MF, &*MI);
2625 for (const MachineOperand &MO : llvm::reverse(RegList))
2626 MIB.add(MO);
2627
2628 return true;
2629 }
2630
rewriteARMFrameIndex(MachineInstr & MI,unsigned FrameRegIdx,Register FrameReg,int & Offset,const ARMBaseInstrInfo & TII)2631 bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
2632 Register FrameReg, int &Offset,
2633 const ARMBaseInstrInfo &TII) {
2634 unsigned Opcode = MI.getOpcode();
2635 const MCInstrDesc &Desc = MI.getDesc();
2636 unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
2637 bool isSub = false;
2638
2639 // Memory operands in inline assembly always use AddrMode2.
2640 if (Opcode == ARM::INLINEASM || Opcode == ARM::INLINEASM_BR)
2641 AddrMode = ARMII::AddrMode2;
2642
2643 if (Opcode == ARM::ADDri) {
2644 Offset += MI.getOperand(FrameRegIdx+1).getImm();
2645 if (Offset == 0) {
2646 // Turn it into a move.
2647 MI.setDesc(TII.get(ARM::MOVr));
2648 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2649 MI.removeOperand(FrameRegIdx+1);
2650 Offset = 0;
2651 return true;
2652 } else if (Offset < 0) {
2653 Offset = -Offset;
2654 isSub = true;
2655 MI.setDesc(TII.get(ARM::SUBri));
2656 }
2657
2658 // Common case: small offset, fits into instruction.
2659 if (ARM_AM::getSOImmVal(Offset) != -1) {
2660 // Replace the FrameIndex with sp / fp
2661 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2662 MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
2663 Offset = 0;
2664 return true;
2665 }
2666
2667 // Otherwise, pull as much of the immedidate into this ADDri/SUBri
2668 // as possible.
2669 unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset);
2670 unsigned ThisImmVal = Offset & llvm::rotr<uint32_t>(0xFF, RotAmt);
2671
2672 // We will handle these bits from offset, clear them.
2673 Offset &= ~ThisImmVal;
2674
2675 // Get the properly encoded SOImmVal field.
2676 assert(ARM_AM::getSOImmVal(ThisImmVal) != -1 &&
2677 "Bit extraction didn't work?");
2678 MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal);
2679 } else {
2680 unsigned ImmIdx = 0;
2681 int InstrOffs = 0;
2682 unsigned NumBits = 0;
2683 unsigned Scale = 1;
2684 switch (AddrMode) {
2685 case ARMII::AddrMode_i12:
2686 ImmIdx = FrameRegIdx + 1;
2687 InstrOffs = MI.getOperand(ImmIdx).getImm();
2688 NumBits = 12;
2689 break;
2690 case ARMII::AddrMode2:
2691 ImmIdx = FrameRegIdx+2;
2692 InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm());
2693 if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2694 InstrOffs *= -1;
2695 NumBits = 12;
2696 break;
2697 case ARMII::AddrMode3:
2698 ImmIdx = FrameRegIdx+2;
2699 InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm());
2700 if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2701 InstrOffs *= -1;
2702 NumBits = 8;
2703 break;
2704 case ARMII::AddrMode4:
2705 case ARMII::AddrMode6:
2706 // Can't fold any offset even if it's zero.
2707 return false;
2708 case ARMII::AddrMode5:
2709 ImmIdx = FrameRegIdx+1;
2710 InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
2711 if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2712 InstrOffs *= -1;
2713 NumBits = 8;
2714 Scale = 4;
2715 break;
2716 case ARMII::AddrMode5FP16:
2717 ImmIdx = FrameRegIdx+1;
2718 InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
2719 if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2720 InstrOffs *= -1;
2721 NumBits = 8;
2722 Scale = 2;
2723 break;
2724 case ARMII::AddrModeT2_i7:
2725 case ARMII::AddrModeT2_i7s2:
2726 case ARMII::AddrModeT2_i7s4:
2727 ImmIdx = FrameRegIdx+1;
2728 InstrOffs = MI.getOperand(ImmIdx).getImm();
2729 NumBits = 7;
2730 Scale = (AddrMode == ARMII::AddrModeT2_i7s2 ? 2 :
2731 AddrMode == ARMII::AddrModeT2_i7s4 ? 4 : 1);
2732 break;
2733 default:
2734 llvm_unreachable("Unsupported addressing mode!");
2735 }
2736
2737 Offset += InstrOffs * Scale;
2738 assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!");
2739 if (Offset < 0) {
2740 Offset = -Offset;
2741 isSub = true;
2742 }
2743
2744 // Attempt to fold address comp. if opcode has offset bits
2745 if (NumBits > 0) {
2746 // Common case: small offset, fits into instruction.
2747 MachineOperand &ImmOp = MI.getOperand(ImmIdx);
2748 int ImmedOffset = Offset / Scale;
2749 unsigned Mask = (1 << NumBits) - 1;
2750 if ((unsigned)Offset <= Mask * Scale) {
2751 // Replace the FrameIndex with sp
2752 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2753 // FIXME: When addrmode2 goes away, this will simplify (like the
2754 // T2 version), as the LDR.i12 versions don't need the encoding
2755 // tricks for the offset value.
2756 if (isSub) {
2757 if (AddrMode == ARMII::AddrMode_i12)
2758 ImmedOffset = -ImmedOffset;
2759 else
2760 ImmedOffset |= 1 << NumBits;
2761 }
2762 ImmOp.ChangeToImmediate(ImmedOffset);
2763 Offset = 0;
2764 return true;
2765 }
2766
2767 // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
2768 ImmedOffset = ImmedOffset & Mask;
2769 if (isSub) {
2770 if (AddrMode == ARMII::AddrMode_i12)
2771 ImmedOffset = -ImmedOffset;
2772 else
2773 ImmedOffset |= 1 << NumBits;
2774 }
2775 ImmOp.ChangeToImmediate(ImmedOffset);
2776 Offset &= ~(Mask*Scale);
2777 }
2778 }
2779
2780 Offset = (isSub) ? -Offset : Offset;
2781 return Offset == 0;
2782 }
2783
2784 /// analyzeCompare - For a comparison instruction, return the source registers
2785 /// in SrcReg and SrcReg2 if having two register operands, and the value it
2786 /// compares against in CmpValue. Return true if the comparison instruction
2787 /// can be analyzed.
analyzeCompare(const MachineInstr & MI,Register & SrcReg,Register & SrcReg2,int64_t & CmpMask,int64_t & CmpValue) const2788 bool ARMBaseInstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
2789 Register &SrcReg2, int64_t &CmpMask,
2790 int64_t &CmpValue) const {
2791 switch (MI.getOpcode()) {
2792 default: break;
2793 case ARM::CMPri:
2794 case ARM::t2CMPri:
2795 case ARM::tCMPi8:
2796 SrcReg = MI.getOperand(0).getReg();
2797 SrcReg2 = 0;
2798 CmpMask = ~0;
2799 CmpValue = MI.getOperand(1).getImm();
2800 return true;
2801 case ARM::CMPrr:
2802 case ARM::t2CMPrr:
2803 case ARM::tCMPr:
2804 SrcReg = MI.getOperand(0).getReg();
2805 SrcReg2 = MI.getOperand(1).getReg();
2806 CmpMask = ~0;
2807 CmpValue = 0;
2808 return true;
2809 case ARM::TSTri:
2810 case ARM::t2TSTri:
2811 SrcReg = MI.getOperand(0).getReg();
2812 SrcReg2 = 0;
2813 CmpMask = MI.getOperand(1).getImm();
2814 CmpValue = 0;
2815 return true;
2816 }
2817
2818 return false;
2819 }
2820
2821 /// isSuitableForMask - Identify a suitable 'and' instruction that
2822 /// operates on the given source register and applies the same mask
2823 /// as a 'tst' instruction. Provide a limited look-through for copies.
2824 /// When successful, MI will hold the found instruction.
isSuitableForMask(MachineInstr * & MI,Register SrcReg,int CmpMask,bool CommonUse)2825 static bool isSuitableForMask(MachineInstr *&MI, Register SrcReg,
2826 int CmpMask, bool CommonUse) {
2827 switch (MI->getOpcode()) {
2828 case ARM::ANDri:
2829 case ARM::t2ANDri:
2830 if (CmpMask != MI->getOperand(2).getImm())
2831 return false;
2832 if (SrcReg == MI->getOperand(CommonUse ? 1 : 0).getReg())
2833 return true;
2834 break;
2835 }
2836
2837 return false;
2838 }
2839
2840 /// getCmpToAddCondition - assume the flags are set by CMP(a,b), return
2841 /// the condition code if we modify the instructions such that flags are
2842 /// set by ADD(a,b,X).
getCmpToAddCondition(ARMCC::CondCodes CC)2843 inline static ARMCC::CondCodes getCmpToAddCondition(ARMCC::CondCodes CC) {
2844 switch (CC) {
2845 default: return ARMCC::AL;
2846 case ARMCC::HS: return ARMCC::LO;
2847 case ARMCC::LO: return ARMCC::HS;
2848 case ARMCC::VS: return ARMCC::VS;
2849 case ARMCC::VC: return ARMCC::VC;
2850 }
2851 }
2852
2853 /// isRedundantFlagInstr - check whether the first instruction, whose only
2854 /// purpose is to update flags, can be made redundant.
2855 /// CMPrr can be made redundant by SUBrr if the operands are the same.
2856 /// CMPri can be made redundant by SUBri if the operands are the same.
2857 /// CMPrr(r0, r1) can be made redundant by ADDr[ri](r0, r1, X).
2858 /// This function can be extended later on.
isRedundantFlagInstr(const MachineInstr * CmpI,Register SrcReg,Register SrcReg2,int64_t ImmValue,const MachineInstr * OI,bool & IsThumb1)2859 inline static bool isRedundantFlagInstr(const MachineInstr *CmpI,
2860 Register SrcReg, Register SrcReg2,
2861 int64_t ImmValue,
2862 const MachineInstr *OI,
2863 bool &IsThumb1) {
2864 if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) &&
2865 (OI->getOpcode() == ARM::SUBrr || OI->getOpcode() == ARM::t2SUBrr) &&
2866 ((OI->getOperand(1).getReg() == SrcReg &&
2867 OI->getOperand(2).getReg() == SrcReg2) ||
2868 (OI->getOperand(1).getReg() == SrcReg2 &&
2869 OI->getOperand(2).getReg() == SrcReg))) {
2870 IsThumb1 = false;
2871 return true;
2872 }
2873
2874 if (CmpI->getOpcode() == ARM::tCMPr && OI->getOpcode() == ARM::tSUBrr &&
2875 ((OI->getOperand(2).getReg() == SrcReg &&
2876 OI->getOperand(3).getReg() == SrcReg2) ||
2877 (OI->getOperand(2).getReg() == SrcReg2 &&
2878 OI->getOperand(3).getReg() == SrcReg))) {
2879 IsThumb1 = true;
2880 return true;
2881 }
2882
2883 if ((CmpI->getOpcode() == ARM::CMPri || CmpI->getOpcode() == ARM::t2CMPri) &&
2884 (OI->getOpcode() == ARM::SUBri || OI->getOpcode() == ARM::t2SUBri) &&
2885 OI->getOperand(1).getReg() == SrcReg &&
2886 OI->getOperand(2).getImm() == ImmValue) {
2887 IsThumb1 = false;
2888 return true;
2889 }
2890
2891 if (CmpI->getOpcode() == ARM::tCMPi8 &&
2892 (OI->getOpcode() == ARM::tSUBi8 || OI->getOpcode() == ARM::tSUBi3) &&
2893 OI->getOperand(2).getReg() == SrcReg &&
2894 OI->getOperand(3).getImm() == ImmValue) {
2895 IsThumb1 = true;
2896 return true;
2897 }
2898
2899 if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) &&
2900 (OI->getOpcode() == ARM::ADDrr || OI->getOpcode() == ARM::t2ADDrr ||
2901 OI->getOpcode() == ARM::ADDri || OI->getOpcode() == ARM::t2ADDri) &&
2902 OI->getOperand(0).isReg() && OI->getOperand(1).isReg() &&
2903 OI->getOperand(0).getReg() == SrcReg &&
2904 OI->getOperand(1).getReg() == SrcReg2) {
2905 IsThumb1 = false;
2906 return true;
2907 }
2908
2909 if (CmpI->getOpcode() == ARM::tCMPr &&
2910 (OI->getOpcode() == ARM::tADDi3 || OI->getOpcode() == ARM::tADDi8 ||
2911 OI->getOpcode() == ARM::tADDrr) &&
2912 OI->getOperand(0).getReg() == SrcReg &&
2913 OI->getOperand(2).getReg() == SrcReg2) {
2914 IsThumb1 = true;
2915 return true;
2916 }
2917
2918 return false;
2919 }
2920
isOptimizeCompareCandidate(MachineInstr * MI,bool & IsThumb1)2921 static bool isOptimizeCompareCandidate(MachineInstr *MI, bool &IsThumb1) {
2922 switch (MI->getOpcode()) {
2923 default: return false;
2924 case ARM::tLSLri:
2925 case ARM::tLSRri:
2926 case ARM::tLSLrr:
2927 case ARM::tLSRrr:
2928 case ARM::tSUBrr:
2929 case ARM::tADDrr:
2930 case ARM::tADDi3:
2931 case ARM::tADDi8:
2932 case ARM::tSUBi3:
2933 case ARM::tSUBi8:
2934 case ARM::tMUL:
2935 case ARM::tADC:
2936 case ARM::tSBC:
2937 case ARM::tRSB:
2938 case ARM::tAND:
2939 case ARM::tORR:
2940 case ARM::tEOR:
2941 case ARM::tBIC:
2942 case ARM::tMVN:
2943 case ARM::tASRri:
2944 case ARM::tASRrr:
2945 case ARM::tROR:
2946 IsThumb1 = true;
2947 [[fallthrough]];
2948 case ARM::RSBrr:
2949 case ARM::RSBri:
2950 case ARM::RSCrr:
2951 case ARM::RSCri:
2952 case ARM::ADDrr:
2953 case ARM::ADDri:
2954 case ARM::ADCrr:
2955 case ARM::ADCri:
2956 case ARM::SUBrr:
2957 case ARM::SUBri:
2958 case ARM::SBCrr:
2959 case ARM::SBCri:
2960 case ARM::t2RSBri:
2961 case ARM::t2ADDrr:
2962 case ARM::t2ADDri:
2963 case ARM::t2ADCrr:
2964 case ARM::t2ADCri:
2965 case ARM::t2SUBrr:
2966 case ARM::t2SUBri:
2967 case ARM::t2SBCrr:
2968 case ARM::t2SBCri:
2969 case ARM::ANDrr:
2970 case ARM::ANDri:
2971 case ARM::ANDrsr:
2972 case ARM::ANDrsi:
2973 case ARM::t2ANDrr:
2974 case ARM::t2ANDri:
2975 case ARM::t2ANDrs:
2976 case ARM::ORRrr:
2977 case ARM::ORRri:
2978 case ARM::ORRrsr:
2979 case ARM::ORRrsi:
2980 case ARM::t2ORRrr:
2981 case ARM::t2ORRri:
2982 case ARM::t2ORRrs:
2983 case ARM::EORrr:
2984 case ARM::EORri:
2985 case ARM::EORrsr:
2986 case ARM::EORrsi:
2987 case ARM::t2EORrr:
2988 case ARM::t2EORri:
2989 case ARM::t2EORrs:
2990 case ARM::BICri:
2991 case ARM::BICrr:
2992 case ARM::BICrsi:
2993 case ARM::BICrsr:
2994 case ARM::t2BICri:
2995 case ARM::t2BICrr:
2996 case ARM::t2BICrs:
2997 case ARM::t2LSRri:
2998 case ARM::t2LSRrr:
2999 case ARM::t2LSLri:
3000 case ARM::t2LSLrr:
3001 case ARM::MOVsr:
3002 case ARM::MOVsi:
3003 return true;
3004 }
3005 }
3006
3007 /// optimizeCompareInstr - Convert the instruction supplying the argument to the
3008 /// comparison into one that sets the zero bit in the flags register;
3009 /// Remove a redundant Compare instruction if an earlier instruction can set the
3010 /// flags in the same way as Compare.
3011 /// E.g. SUBrr(r1,r2) and CMPrr(r1,r2). We also handle the case where two
3012 /// operands are swapped: SUBrr(r1,r2) and CMPrr(r2,r1), by updating the
3013 /// condition code of instructions which use the flags.
optimizeCompareInstr(MachineInstr & CmpInstr,Register SrcReg,Register SrcReg2,int64_t CmpMask,int64_t CmpValue,const MachineRegisterInfo * MRI) const3014 bool ARMBaseInstrInfo::optimizeCompareInstr(
3015 MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask,
3016 int64_t CmpValue, const MachineRegisterInfo *MRI) const {
3017 // Get the unique definition of SrcReg.
3018 MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
3019 if (!MI) return false;
3020
3021 // Masked compares sometimes use the same register as the corresponding 'and'.
3022 if (CmpMask != ~0) {
3023 if (!isSuitableForMask(MI, SrcReg, CmpMask, false) || isPredicated(*MI)) {
3024 MI = nullptr;
3025 for (MachineRegisterInfo::use_instr_iterator
3026 UI = MRI->use_instr_begin(SrcReg), UE = MRI->use_instr_end();
3027 UI != UE; ++UI) {
3028 if (UI->getParent() != CmpInstr.getParent())
3029 continue;
3030 MachineInstr *PotentialAND = &*UI;
3031 if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true) ||
3032 isPredicated(*PotentialAND))
3033 continue;
3034 MI = PotentialAND;
3035 break;
3036 }
3037 if (!MI) return false;
3038 }
3039 }
3040
3041 // Get ready to iterate backward from CmpInstr.
3042 MachineBasicBlock::iterator I = CmpInstr, E = MI,
3043 B = CmpInstr.getParent()->begin();
3044
3045 // Early exit if CmpInstr is at the beginning of the BB.
3046 if (I == B) return false;
3047
3048 // There are two possible candidates which can be changed to set CPSR:
3049 // One is MI, the other is a SUB or ADD instruction.
3050 // For CMPrr(r1,r2), we are looking for SUB(r1,r2), SUB(r2,r1), or
3051 // ADDr[ri](r1, r2, X).
3052 // For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue).
3053 MachineInstr *SubAdd = nullptr;
3054 if (SrcReg2 != 0)
3055 // MI is not a candidate for CMPrr.
3056 MI = nullptr;
3057 else if (MI->getParent() != CmpInstr.getParent() || CmpValue != 0) {
3058 // Conservatively refuse to convert an instruction which isn't in the same
3059 // BB as the comparison.
3060 // For CMPri w/ CmpValue != 0, a SubAdd may still be a candidate.
3061 // Thus we cannot return here.
3062 if (CmpInstr.getOpcode() == ARM::CMPri ||
3063 CmpInstr.getOpcode() == ARM::t2CMPri ||
3064 CmpInstr.getOpcode() == ARM::tCMPi8)
3065 MI = nullptr;
3066 else
3067 return false;
3068 }
3069
3070 bool IsThumb1 = false;
3071 if (MI && !isOptimizeCompareCandidate(MI, IsThumb1))
3072 return false;
3073
3074 // We also want to do this peephole for cases like this: if (a*b == 0),
3075 // and optimise away the CMP instruction from the generated code sequence:
3076 // MULS, MOVS, MOVS, CMP. Here the MOVS instructions load the boolean values
3077 // resulting from the select instruction, but these MOVS instructions for
3078 // Thumb1 (V6M) are flag setting and are thus preventing this optimisation.
3079 // However, if we only have MOVS instructions in between the CMP and the
3080 // other instruction (the MULS in this example), then the CPSR is dead so we
3081 // can safely reorder the sequence into: MOVS, MOVS, MULS, CMP. We do this
3082 // reordering and then continue the analysis hoping we can eliminate the
3083 // CMP. This peephole works on the vregs, so is still in SSA form. As a
3084 // consequence, the movs won't redefine/kill the MUL operands which would
3085 // make this reordering illegal.
3086 const TargetRegisterInfo *TRI = &getRegisterInfo();
3087 if (MI && IsThumb1) {
3088 --I;
3089 if (I != E && !MI->readsRegister(ARM::CPSR, TRI)) {
3090 bool CanReorder = true;
3091 for (; I != E; --I) {
3092 if (I->getOpcode() != ARM::tMOVi8) {
3093 CanReorder = false;
3094 break;
3095 }
3096 }
3097 if (CanReorder) {
3098 MI = MI->removeFromParent();
3099 E = CmpInstr;
3100 CmpInstr.getParent()->insert(E, MI);
3101 }
3102 }
3103 I = CmpInstr;
3104 E = MI;
3105 }
3106
3107 // Check that CPSR isn't set between the comparison instruction and the one we
3108 // want to change. At the same time, search for SubAdd.
3109 bool SubAddIsThumb1 = false;
3110 do {
3111 const MachineInstr &Instr = *--I;
3112
3113 // Check whether CmpInstr can be made redundant by the current instruction.
3114 if (isRedundantFlagInstr(&CmpInstr, SrcReg, SrcReg2, CmpValue, &Instr,
3115 SubAddIsThumb1)) {
3116 SubAdd = &*I;
3117 break;
3118 }
3119
3120 // Allow E (which was initially MI) to be SubAdd but do not search before E.
3121 if (I == E)
3122 break;
3123
3124 if (Instr.modifiesRegister(ARM::CPSR, TRI) ||
3125 Instr.readsRegister(ARM::CPSR, TRI))
3126 // This instruction modifies or uses CPSR after the one we want to
3127 // change. We can't do this transformation.
3128 return false;
3129
3130 if (I == B) {
3131 // In some cases, we scan the use-list of an instruction for an AND;
3132 // that AND is in the same BB, but may not be scheduled before the
3133 // corresponding TST. In that case, bail out.
3134 //
3135 // FIXME: We could try to reschedule the AND.
3136 return false;
3137 }
3138 } while (true);
3139
3140 // Return false if no candidates exist.
3141 if (!MI && !SubAdd)
3142 return false;
3143
3144 // If we found a SubAdd, use it as it will be closer to the CMP
3145 if (SubAdd) {
3146 MI = SubAdd;
3147 IsThumb1 = SubAddIsThumb1;
3148 }
3149
3150 // We can't use a predicated instruction - it doesn't always write the flags.
3151 if (isPredicated(*MI))
3152 return false;
3153
3154 // Scan forward for the use of CPSR
3155 // When checking against MI: if it's a conditional code that requires
3156 // checking of the V bit or C bit, then this is not safe to do.
3157 // It is safe to remove CmpInstr if CPSR is redefined or killed.
3158 // If we are done with the basic block, we need to check whether CPSR is
3159 // live-out.
3160 SmallVector<std::pair<MachineOperand*, ARMCC::CondCodes>, 4>
3161 OperandsToUpdate;
3162 bool isSafe = false;
3163 I = CmpInstr;
3164 E = CmpInstr.getParent()->end();
3165 while (!isSafe && ++I != E) {
3166 const MachineInstr &Instr = *I;
3167 for (unsigned IO = 0, EO = Instr.getNumOperands();
3168 !isSafe && IO != EO; ++IO) {
3169 const MachineOperand &MO = Instr.getOperand(IO);
3170 if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) {
3171 isSafe = true;
3172 break;
3173 }
3174 if (!MO.isReg() || MO.getReg() != ARM::CPSR)
3175 continue;
3176 if (MO.isDef()) {
3177 isSafe = true;
3178 break;
3179 }
3180 // Condition code is after the operand before CPSR except for VSELs.
3181 ARMCC::CondCodes CC;
3182 bool IsInstrVSel = true;
3183 switch (Instr.getOpcode()) {
3184 default:
3185 IsInstrVSel = false;
3186 CC = (ARMCC::CondCodes)Instr.getOperand(IO - 1).getImm();
3187 break;
3188 case ARM::VSELEQD:
3189 case ARM::VSELEQS:
3190 case ARM::VSELEQH:
3191 CC = ARMCC::EQ;
3192 break;
3193 case ARM::VSELGTD:
3194 case ARM::VSELGTS:
3195 case ARM::VSELGTH:
3196 CC = ARMCC::GT;
3197 break;
3198 case ARM::VSELGED:
3199 case ARM::VSELGES:
3200 case ARM::VSELGEH:
3201 CC = ARMCC::GE;
3202 break;
3203 case ARM::VSELVSD:
3204 case ARM::VSELVSS:
3205 case ARM::VSELVSH:
3206 CC = ARMCC::VS;
3207 break;
3208 }
3209
3210 if (SubAdd) {
3211 // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based
3212 // on CMP needs to be updated to be based on SUB.
3213 // If we have ADD(r1, r2, X) and CMP(r1, r2), the condition code also
3214 // needs to be modified.
3215 // Push the condition code operands to OperandsToUpdate.
3216 // If it is safe to remove CmpInstr, the condition code of these
3217 // operands will be modified.
3218 unsigned Opc = SubAdd->getOpcode();
3219 bool IsSub = Opc == ARM::SUBrr || Opc == ARM::t2SUBrr ||
3220 Opc == ARM::SUBri || Opc == ARM::t2SUBri ||
3221 Opc == ARM::tSUBrr || Opc == ARM::tSUBi3 ||
3222 Opc == ARM::tSUBi8;
3223 unsigned OpI = Opc != ARM::tSUBrr ? 1 : 2;
3224 if (!IsSub ||
3225 (SrcReg2 != 0 && SubAdd->getOperand(OpI).getReg() == SrcReg2 &&
3226 SubAdd->getOperand(OpI + 1).getReg() == SrcReg)) {
3227 // VSel doesn't support condition code update.
3228 if (IsInstrVSel)
3229 return false;
3230 // Ensure we can swap the condition.
3231 ARMCC::CondCodes NewCC = (IsSub ? getSwappedCondition(CC) : getCmpToAddCondition(CC));
3232 if (NewCC == ARMCC::AL)
3233 return false;
3234 OperandsToUpdate.push_back(
3235 std::make_pair(&((*I).getOperand(IO - 1)), NewCC));
3236 }
3237 } else {
3238 // No SubAdd, so this is x = <op> y, z; cmp x, 0.
3239 switch (CC) {
3240 case ARMCC::EQ: // Z
3241 case ARMCC::NE: // Z
3242 case ARMCC::MI: // N
3243 case ARMCC::PL: // N
3244 case ARMCC::AL: // none
3245 // CPSR can be used multiple times, we should continue.
3246 break;
3247 case ARMCC::HS: // C
3248 case ARMCC::LO: // C
3249 case ARMCC::VS: // V
3250 case ARMCC::VC: // V
3251 case ARMCC::HI: // C Z
3252 case ARMCC::LS: // C Z
3253 case ARMCC::GE: // N V
3254 case ARMCC::LT: // N V
3255 case ARMCC::GT: // Z N V
3256 case ARMCC::LE: // Z N V
3257 // The instruction uses the V bit or C bit which is not safe.
3258 return false;
3259 }
3260 }
3261 }
3262 }
3263
3264 // If CPSR is not killed nor re-defined, we should check whether it is
3265 // live-out. If it is live-out, do not optimize.
3266 if (!isSafe) {
3267 MachineBasicBlock *MBB = CmpInstr.getParent();
3268 for (MachineBasicBlock *Succ : MBB->successors())
3269 if (Succ->isLiveIn(ARM::CPSR))
3270 return false;
3271 }
3272
3273 // Toggle the optional operand to CPSR (if it exists - in Thumb1 we always
3274 // set CPSR so this is represented as an explicit output)
3275 if (!IsThumb1) {
3276 unsigned CPSRRegNum = MI->getNumExplicitOperands() - 1;
3277 MI->getOperand(CPSRRegNum).setReg(ARM::CPSR);
3278 MI->getOperand(CPSRRegNum).setIsDef(true);
3279 }
3280 assert(!isPredicated(*MI) && "Can't use flags from predicated instruction");
3281 CmpInstr.eraseFromParent();
3282
3283 // Modify the condition code of operands in OperandsToUpdate.
3284 // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to
3285 // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
3286 for (unsigned i = 0, e = OperandsToUpdate.size(); i < e; i++)
3287 OperandsToUpdate[i].first->setImm(OperandsToUpdate[i].second);
3288
3289 MI->clearRegisterDeads(ARM::CPSR);
3290
3291 return true;
3292 }
3293
shouldSink(const MachineInstr & MI) const3294 bool ARMBaseInstrInfo::shouldSink(const MachineInstr &MI) const {
3295 // Do not sink MI if it might be used to optimize a redundant compare.
3296 // We heuristically only look at the instruction immediately following MI to
3297 // avoid potentially searching the entire basic block.
3298 if (isPredicated(MI))
3299 return true;
3300 MachineBasicBlock::const_iterator Next = &MI;
3301 ++Next;
3302 Register SrcReg, SrcReg2;
3303 int64_t CmpMask, CmpValue;
3304 bool IsThumb1;
3305 if (Next != MI.getParent()->end() &&
3306 analyzeCompare(*Next, SrcReg, SrcReg2, CmpMask, CmpValue) &&
3307 isRedundantFlagInstr(&*Next, SrcReg, SrcReg2, CmpValue, &MI, IsThumb1))
3308 return false;
3309 return true;
3310 }
3311
foldImmediate(MachineInstr & UseMI,MachineInstr & DefMI,Register Reg,MachineRegisterInfo * MRI) const3312 bool ARMBaseInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
3313 Register Reg,
3314 MachineRegisterInfo *MRI) const {
3315 // Fold large immediates into add, sub, or, xor.
3316 unsigned DefOpc = DefMI.getOpcode();
3317 if (DefOpc != ARM::t2MOVi32imm && DefOpc != ARM::MOVi32imm &&
3318 DefOpc != ARM::tMOVi32imm)
3319 return false;
3320 if (!DefMI.getOperand(1).isImm())
3321 // Could be t2MOVi32imm @xx
3322 return false;
3323
3324 if (!MRI->hasOneNonDBGUse(Reg))
3325 return false;
3326
3327 const MCInstrDesc &DefMCID = DefMI.getDesc();
3328 if (DefMCID.hasOptionalDef()) {
3329 unsigned NumOps = DefMCID.getNumOperands();
3330 const MachineOperand &MO = DefMI.getOperand(NumOps - 1);
3331 if (MO.getReg() == ARM::CPSR && !MO.isDead())
3332 // If DefMI defines CPSR and it is not dead, it's obviously not safe
3333 // to delete DefMI.
3334 return false;
3335 }
3336
3337 const MCInstrDesc &UseMCID = UseMI.getDesc();
3338 if (UseMCID.hasOptionalDef()) {
3339 unsigned NumOps = UseMCID.getNumOperands();
3340 if (UseMI.getOperand(NumOps - 1).getReg() == ARM::CPSR)
3341 // If the instruction sets the flag, do not attempt this optimization
3342 // since it may change the semantics of the code.
3343 return false;
3344 }
3345
3346 unsigned UseOpc = UseMI.getOpcode();
3347 unsigned NewUseOpc = 0;
3348 uint32_t ImmVal = (uint32_t)DefMI.getOperand(1).getImm();
3349 uint32_t SOImmValV1 = 0, SOImmValV2 = 0;
3350 bool Commute = false;
3351 switch (UseOpc) {
3352 default: return false;
3353 case ARM::SUBrr:
3354 case ARM::ADDrr:
3355 case ARM::ORRrr:
3356 case ARM::EORrr:
3357 case ARM::t2SUBrr:
3358 case ARM::t2ADDrr:
3359 case ARM::t2ORRrr:
3360 case ARM::t2EORrr: {
3361 Commute = UseMI.getOperand(2).getReg() != Reg;
3362 switch (UseOpc) {
3363 default: break;
3364 case ARM::ADDrr:
3365 case ARM::SUBrr:
3366 if (UseOpc == ARM::SUBrr && Commute)
3367 return false;
3368
3369 // ADD/SUB are special because they're essentially the same operation, so
3370 // we can handle a larger range of immediates.
3371 if (ARM_AM::isSOImmTwoPartVal(ImmVal))
3372 NewUseOpc = UseOpc == ARM::ADDrr ? ARM::ADDri : ARM::SUBri;
3373 else if (ARM_AM::isSOImmTwoPartVal(-ImmVal)) {
3374 ImmVal = -ImmVal;
3375 NewUseOpc = UseOpc == ARM::ADDrr ? ARM::SUBri : ARM::ADDri;
3376 } else
3377 return false;
3378 SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
3379 SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
3380 break;
3381 case ARM::ORRrr:
3382 case ARM::EORrr:
3383 if (!ARM_AM::isSOImmTwoPartVal(ImmVal))
3384 return false;
3385 SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
3386 SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
3387 switch (UseOpc) {
3388 default: break;
3389 case ARM::ORRrr: NewUseOpc = ARM::ORRri; break;
3390 case ARM::EORrr: NewUseOpc = ARM::EORri; break;
3391 }
3392 break;
3393 case ARM::t2ADDrr:
3394 case ARM::t2SUBrr: {
3395 if (UseOpc == ARM::t2SUBrr && Commute)
3396 return false;
3397
3398 // ADD/SUB are special because they're essentially the same operation, so
3399 // we can handle a larger range of immediates.
3400 const bool ToSP = DefMI.getOperand(0).getReg() == ARM::SP;
3401 const unsigned t2ADD = ToSP ? ARM::t2ADDspImm : ARM::t2ADDri;
3402 const unsigned t2SUB = ToSP ? ARM::t2SUBspImm : ARM::t2SUBri;
3403 if (ARM_AM::isT2SOImmTwoPartVal(ImmVal))
3404 NewUseOpc = UseOpc == ARM::t2ADDrr ? t2ADD : t2SUB;
3405 else if (ARM_AM::isT2SOImmTwoPartVal(-ImmVal)) {
3406 ImmVal = -ImmVal;
3407 NewUseOpc = UseOpc == ARM::t2ADDrr ? t2SUB : t2ADD;
3408 } else
3409 return false;
3410 SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
3411 SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
3412 break;
3413 }
3414 case ARM::t2ORRrr:
3415 case ARM::t2EORrr:
3416 if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal))
3417 return false;
3418 SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
3419 SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
3420 switch (UseOpc) {
3421 default: break;
3422 case ARM::t2ORRrr: NewUseOpc = ARM::t2ORRri; break;
3423 case ARM::t2EORrr: NewUseOpc = ARM::t2EORri; break;
3424 }
3425 break;
3426 }
3427 }
3428 }
3429
3430 unsigned OpIdx = Commute ? 2 : 1;
3431 Register Reg1 = UseMI.getOperand(OpIdx).getReg();
3432 bool isKill = UseMI.getOperand(OpIdx).isKill();
3433 const TargetRegisterClass *TRC = MRI->getRegClass(Reg);
3434 Register NewReg = MRI->createVirtualRegister(TRC);
3435 BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), get(NewUseOpc),
3436 NewReg)
3437 .addReg(Reg1, getKillRegState(isKill))
3438 .addImm(SOImmValV1)
3439 .add(predOps(ARMCC::AL))
3440 .add(condCodeOp());
3441 UseMI.setDesc(get(NewUseOpc));
3442 UseMI.getOperand(1).setReg(NewReg);
3443 UseMI.getOperand(1).setIsKill();
3444 UseMI.getOperand(2).ChangeToImmediate(SOImmValV2);
3445 DefMI.eraseFromParent();
3446 // FIXME: t2ADDrr should be split, as different rulles apply when writing to SP.
3447 // Just as t2ADDri, that was split to [t2ADDri, t2ADDspImm].
3448 // Then the below code will not be needed, as the input/output register
3449 // classes will be rgpr or gprSP.
3450 // For now, we fix the UseMI operand explicitly here:
3451 switch(NewUseOpc){
3452 case ARM::t2ADDspImm:
3453 case ARM::t2SUBspImm:
3454 case ARM::t2ADDri:
3455 case ARM::t2SUBri:
3456 MRI->constrainRegClass(UseMI.getOperand(0).getReg(), TRC);
3457 }
3458 return true;
3459 }
3460
getNumMicroOpsSwiftLdSt(const InstrItineraryData * ItinData,const MachineInstr & MI)3461 static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData,
3462 const MachineInstr &MI) {
3463 switch (MI.getOpcode()) {
3464 default: {
3465 const MCInstrDesc &Desc = MI.getDesc();
3466 int UOps = ItinData->getNumMicroOps(Desc.getSchedClass());
3467 assert(UOps >= 0 && "bad # UOps");
3468 return UOps;
3469 }
3470
3471 case ARM::LDRrs:
3472 case ARM::LDRBrs:
3473 case ARM::STRrs:
3474 case ARM::STRBrs: {
3475 unsigned ShOpVal = MI.getOperand(3).getImm();
3476 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3477 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3478 if (!isSub &&
3479 (ShImm == 0 ||
3480 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3481 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3482 return 1;
3483 return 2;
3484 }
3485
3486 case ARM::LDRH:
3487 case ARM::STRH: {
3488 if (!MI.getOperand(2).getReg())
3489 return 1;
3490
3491 unsigned ShOpVal = MI.getOperand(3).getImm();
3492 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3493 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3494 if (!isSub &&
3495 (ShImm == 0 ||
3496 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3497 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3498 return 1;
3499 return 2;
3500 }
3501
3502 case ARM::LDRSB:
3503 case ARM::LDRSH:
3504 return (ARM_AM::getAM3Op(MI.getOperand(3).getImm()) == ARM_AM::sub) ? 3 : 2;
3505
3506 case ARM::LDRSB_POST:
3507 case ARM::LDRSH_POST: {
3508 Register Rt = MI.getOperand(0).getReg();
3509 Register Rm = MI.getOperand(3).getReg();
3510 return (Rt == Rm) ? 4 : 3;
3511 }
3512
3513 case ARM::LDR_PRE_REG:
3514 case ARM::LDRB_PRE_REG: {
3515 Register Rt = MI.getOperand(0).getReg();
3516 Register Rm = MI.getOperand(3).getReg();
3517 if (Rt == Rm)
3518 return 3;
3519 unsigned ShOpVal = MI.getOperand(4).getImm();
3520 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3521 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3522 if (!isSub &&
3523 (ShImm == 0 ||
3524 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3525 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3526 return 2;
3527 return 3;
3528 }
3529
3530 case ARM::STR_PRE_REG:
3531 case ARM::STRB_PRE_REG: {
3532 unsigned ShOpVal = MI.getOperand(4).getImm();
3533 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3534 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3535 if (!isSub &&
3536 (ShImm == 0 ||
3537 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3538 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3539 return 2;
3540 return 3;
3541 }
3542
3543 case ARM::LDRH_PRE:
3544 case ARM::STRH_PRE: {
3545 Register Rt = MI.getOperand(0).getReg();
3546 Register Rm = MI.getOperand(3).getReg();
3547 if (!Rm)
3548 return 2;
3549 if (Rt == Rm)
3550 return 3;
3551 return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 3 : 2;
3552 }
3553
3554 case ARM::LDR_POST_REG:
3555 case ARM::LDRB_POST_REG:
3556 case ARM::LDRH_POST: {
3557 Register Rt = MI.getOperand(0).getReg();
3558 Register Rm = MI.getOperand(3).getReg();
3559 return (Rt == Rm) ? 3 : 2;
3560 }
3561
3562 case ARM::LDR_PRE_IMM:
3563 case ARM::LDRB_PRE_IMM:
3564 case ARM::LDR_POST_IMM:
3565 case ARM::LDRB_POST_IMM:
3566 case ARM::STRB_POST_IMM:
3567 case ARM::STRB_POST_REG:
3568 case ARM::STRB_PRE_IMM:
3569 case ARM::STRH_POST:
3570 case ARM::STR_POST_IMM:
3571 case ARM::STR_POST_REG:
3572 case ARM::STR_PRE_IMM:
3573 return 2;
3574
3575 case ARM::LDRSB_PRE:
3576 case ARM::LDRSH_PRE: {
3577 Register Rm = MI.getOperand(3).getReg();
3578 if (Rm == 0)
3579 return 3;
3580 Register Rt = MI.getOperand(0).getReg();
3581 if (Rt == Rm)
3582 return 4;
3583 unsigned ShOpVal = MI.getOperand(4).getImm();
3584 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3585 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3586 if (!isSub &&
3587 (ShImm == 0 ||
3588 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3589 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3590 return 3;
3591 return 4;
3592 }
3593
3594 case ARM::LDRD: {
3595 Register Rt = MI.getOperand(0).getReg();
3596 Register Rn = MI.getOperand(2).getReg();
3597 Register Rm = MI.getOperand(3).getReg();
3598 if (Rm)
3599 return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 4
3600 : 3;
3601 return (Rt == Rn) ? 3 : 2;
3602 }
3603
3604 case ARM::STRD: {
3605 Register Rm = MI.getOperand(3).getReg();
3606 if (Rm)
3607 return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 4
3608 : 3;
3609 return 2;
3610 }
3611
3612 case ARM::LDRD_POST:
3613 case ARM::t2LDRD_POST:
3614 return 3;
3615
3616 case ARM::STRD_POST:
3617 case ARM::t2STRD_POST:
3618 return 4;
3619
3620 case ARM::LDRD_PRE: {
3621 Register Rt = MI.getOperand(0).getReg();
3622 Register Rn = MI.getOperand(3).getReg();
3623 Register Rm = MI.getOperand(4).getReg();
3624 if (Rm)
3625 return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 5
3626 : 4;
3627 return (Rt == Rn) ? 4 : 3;
3628 }
3629
3630 case ARM::t2LDRD_PRE: {
3631 Register Rt = MI.getOperand(0).getReg();
3632 Register Rn = MI.getOperand(3).getReg();
3633 return (Rt == Rn) ? 4 : 3;
3634 }
3635
3636 case ARM::STRD_PRE: {
3637 Register Rm = MI.getOperand(4).getReg();
3638 if (Rm)
3639 return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 5
3640 : 4;
3641 return 3;
3642 }
3643
3644 case ARM::t2STRD_PRE:
3645 return 3;
3646
3647 case ARM::t2LDR_POST:
3648 case ARM::t2LDRB_POST:
3649 case ARM::t2LDRB_PRE:
3650 case ARM::t2LDRSBi12:
3651 case ARM::t2LDRSBi8:
3652 case ARM::t2LDRSBpci:
3653 case ARM::t2LDRSBs:
3654 case ARM::t2LDRH_POST:
3655 case ARM::t2LDRH_PRE:
3656 case ARM::t2LDRSBT:
3657 case ARM::t2LDRSB_POST:
3658 case ARM::t2LDRSB_PRE:
3659 case ARM::t2LDRSH_POST:
3660 case ARM::t2LDRSH_PRE:
3661 case ARM::t2LDRSHi12:
3662 case ARM::t2LDRSHi8:
3663 case ARM::t2LDRSHpci:
3664 case ARM::t2LDRSHs:
3665 return 2;
3666
3667 case ARM::t2LDRDi8: {
3668 Register Rt = MI.getOperand(0).getReg();
3669 Register Rn = MI.getOperand(2).getReg();
3670 return (Rt == Rn) ? 3 : 2;
3671 }
3672
3673 case ARM::t2STRB_POST:
3674 case ARM::t2STRB_PRE:
3675 case ARM::t2STRBs:
3676 case ARM::t2STRDi8:
3677 case ARM::t2STRH_POST:
3678 case ARM::t2STRH_PRE:
3679 case ARM::t2STRHs:
3680 case ARM::t2STR_POST:
3681 case ARM::t2STR_PRE:
3682 case ARM::t2STRs:
3683 return 2;
3684 }
3685 }
3686
3687 // Return the number of 32-bit words loaded by LDM or stored by STM. If this
3688 // can't be easily determined return 0 (missing MachineMemOperand).
3689 //
3690 // FIXME: The current MachineInstr design does not support relying on machine
3691 // mem operands to determine the width of a memory access. Instead, we expect
3692 // the target to provide this information based on the instruction opcode and
3693 // operands. However, using MachineMemOperand is the best solution now for
3694 // two reasons:
3695 //
3696 // 1) getNumMicroOps tries to infer LDM memory width from the total number of MI
3697 // operands. This is much more dangerous than using the MachineMemOperand
3698 // sizes because CodeGen passes can insert/remove optional machine operands. In
3699 // fact, it's totally incorrect for preRA passes and appears to be wrong for
3700 // postRA passes as well.
3701 //
3702 // 2) getNumLDMAddresses is only used by the scheduling machine model and any
3703 // machine model that calls this should handle the unknown (zero size) case.
3704 //
3705 // Long term, we should require a target hook that verifies MachineMemOperand
3706 // sizes during MC lowering. That target hook should be local to MC lowering
3707 // because we can't ensure that it is aware of other MI forms. Doing this will
3708 // ensure that MachineMemOperands are correctly propagated through all passes.
getNumLDMAddresses(const MachineInstr & MI) const3709 unsigned ARMBaseInstrInfo::getNumLDMAddresses(const MachineInstr &MI) const {
3710 unsigned Size = 0;
3711 for (MachineInstr::mmo_iterator I = MI.memoperands_begin(),
3712 E = MI.memoperands_end();
3713 I != E; ++I) {
3714 Size += (*I)->getSize().getValue();
3715 }
3716 // FIXME: The scheduler currently can't handle values larger than 16. But
3717 // the values can actually go up to 32 for floating-point load/store
3718 // multiple (VLDMIA etc.). Also, the way this code is reasoning about memory
3719 // operations isn't right; we could end up with "extra" memory operands for
3720 // various reasons, like tail merge merging two memory operations.
3721 return std::min(Size / 4, 16U);
3722 }
3723
getNumMicroOpsSingleIssuePlusExtras(unsigned Opc,unsigned NumRegs)3724 static unsigned getNumMicroOpsSingleIssuePlusExtras(unsigned Opc,
3725 unsigned NumRegs) {
3726 unsigned UOps = 1 + NumRegs; // 1 for address computation.
3727 switch (Opc) {
3728 default:
3729 break;
3730 case ARM::VLDMDIA_UPD:
3731 case ARM::VLDMDDB_UPD:
3732 case ARM::VLDMSIA_UPD:
3733 case ARM::VLDMSDB_UPD:
3734 case ARM::VSTMDIA_UPD:
3735 case ARM::VSTMDDB_UPD:
3736 case ARM::VSTMSIA_UPD:
3737 case ARM::VSTMSDB_UPD:
3738 case ARM::LDMIA_UPD:
3739 case ARM::LDMDA_UPD:
3740 case ARM::LDMDB_UPD:
3741 case ARM::LDMIB_UPD:
3742 case ARM::STMIA_UPD:
3743 case ARM::STMDA_UPD:
3744 case ARM::STMDB_UPD:
3745 case ARM::STMIB_UPD:
3746 case ARM::tLDMIA_UPD:
3747 case ARM::tSTMIA_UPD:
3748 case ARM::t2LDMIA_UPD:
3749 case ARM::t2LDMDB_UPD:
3750 case ARM::t2STMIA_UPD:
3751 case ARM::t2STMDB_UPD:
3752 ++UOps; // One for base register writeback.
3753 break;
3754 case ARM::LDMIA_RET:
3755 case ARM::tPOP_RET:
3756 case ARM::t2LDMIA_RET:
3757 UOps += 2; // One for base reg wb, one for write to pc.
3758 break;
3759 }
3760 return UOps;
3761 }
3762
getNumMicroOps(const InstrItineraryData * ItinData,const MachineInstr & MI) const3763 unsigned ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
3764 const MachineInstr &MI) const {
3765 if (!ItinData || ItinData->isEmpty())
3766 return 1;
3767
3768 const MCInstrDesc &Desc = MI.getDesc();
3769 unsigned Class = Desc.getSchedClass();
3770 int ItinUOps = ItinData->getNumMicroOps(Class);
3771 if (ItinUOps >= 0) {
3772 if (Subtarget.isSwift() && (Desc.mayLoad() || Desc.mayStore()))
3773 return getNumMicroOpsSwiftLdSt(ItinData, MI);
3774
3775 return ItinUOps;
3776 }
3777
3778 unsigned Opc = MI.getOpcode();
3779 switch (Opc) {
3780 default:
3781 llvm_unreachable("Unexpected multi-uops instruction!");
3782 case ARM::VLDMQIA:
3783 case ARM::VSTMQIA:
3784 return 2;
3785
3786 // The number of uOps for load / store multiple are determined by the number
3787 // registers.
3788 //
3789 // On Cortex-A8, each pair of register loads / stores can be scheduled on the
3790 // same cycle. The scheduling for the first load / store must be done
3791 // separately by assuming the address is not 64-bit aligned.
3792 //
3793 // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address
3794 // is not 64-bit aligned, then AGU would take an extra cycle. For VFP / NEON
3795 // load / store multiple, the formula is (#reg / 2) + (#reg % 2) + 1.
3796 case ARM::VLDMDIA:
3797 case ARM::VLDMDIA_UPD:
3798 case ARM::VLDMDDB_UPD:
3799 case ARM::VLDMSIA:
3800 case ARM::VLDMSIA_UPD:
3801 case ARM::VLDMSDB_UPD:
3802 case ARM::VSTMDIA:
3803 case ARM::VSTMDIA_UPD:
3804 case ARM::VSTMDDB_UPD:
3805 case ARM::VSTMSIA:
3806 case ARM::VSTMSIA_UPD:
3807 case ARM::VSTMSDB_UPD: {
3808 unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands();
3809 return (NumRegs / 2) + (NumRegs % 2) + 1;
3810 }
3811
3812 case ARM::LDMIA_RET:
3813 case ARM::LDMIA:
3814 case ARM::LDMDA:
3815 case ARM::LDMDB:
3816 case ARM::LDMIB:
3817 case ARM::LDMIA_UPD:
3818 case ARM::LDMDA_UPD:
3819 case ARM::LDMDB_UPD:
3820 case ARM::LDMIB_UPD:
3821 case ARM::STMIA:
3822 case ARM::STMDA:
3823 case ARM::STMDB:
3824 case ARM::STMIB:
3825 case ARM::STMIA_UPD:
3826 case ARM::STMDA_UPD:
3827 case ARM::STMDB_UPD:
3828 case ARM::STMIB_UPD:
3829 case ARM::tLDMIA:
3830 case ARM::tLDMIA_UPD:
3831 case ARM::tSTMIA_UPD:
3832 case ARM::tPOP_RET:
3833 case ARM::tPOP:
3834 case ARM::tPUSH:
3835 case ARM::t2LDMIA_RET:
3836 case ARM::t2LDMIA:
3837 case ARM::t2LDMDB:
3838 case ARM::t2LDMIA_UPD:
3839 case ARM::t2LDMDB_UPD:
3840 case ARM::t2STMIA:
3841 case ARM::t2STMDB:
3842 case ARM::t2STMIA_UPD:
3843 case ARM::t2STMDB_UPD: {
3844 unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands() + 1;
3845 switch (Subtarget.getLdStMultipleTiming()) {
3846 case ARMSubtarget::SingleIssuePlusExtras:
3847 return getNumMicroOpsSingleIssuePlusExtras(Opc, NumRegs);
3848 case ARMSubtarget::SingleIssue:
3849 // Assume the worst.
3850 return NumRegs;
3851 case ARMSubtarget::DoubleIssue: {
3852 if (NumRegs < 4)
3853 return 2;
3854 // 4 registers would be issued: 2, 2.
3855 // 5 registers would be issued: 2, 2, 1.
3856 unsigned UOps = (NumRegs / 2);
3857 if (NumRegs % 2)
3858 ++UOps;
3859 return UOps;
3860 }
3861 case ARMSubtarget::DoubleIssueCheckUnalignedAccess: {
3862 unsigned UOps = (NumRegs / 2);
3863 // If there are odd number of registers or if it's not 64-bit aligned,
3864 // then it takes an extra AGU (Address Generation Unit) cycle.
3865 if ((NumRegs % 2) || !MI.hasOneMemOperand() ||
3866 (*MI.memoperands_begin())->getAlign() < Align(8))
3867 ++UOps;
3868 return UOps;
3869 }
3870 }
3871 }
3872 }
3873 llvm_unreachable("Didn't find the number of microops");
3874 }
3875
3876 std::optional<unsigned>
getVLDMDefCycle(const InstrItineraryData * ItinData,const MCInstrDesc & DefMCID,unsigned DefClass,unsigned DefIdx,unsigned DefAlign) const3877 ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData,
3878 const MCInstrDesc &DefMCID, unsigned DefClass,
3879 unsigned DefIdx, unsigned DefAlign) const {
3880 int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
3881 if (RegNo <= 0)
3882 // Def is the address writeback.
3883 return ItinData->getOperandCycle(DefClass, DefIdx);
3884
3885 unsigned DefCycle;
3886 if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3887 // (regno / 2) + (regno % 2) + 1
3888 DefCycle = RegNo / 2 + 1;
3889 if (RegNo % 2)
3890 ++DefCycle;
3891 } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3892 DefCycle = RegNo;
3893 bool isSLoad = false;
3894
3895 switch (DefMCID.getOpcode()) {
3896 default: break;
3897 case ARM::VLDMSIA:
3898 case ARM::VLDMSIA_UPD:
3899 case ARM::VLDMSDB_UPD:
3900 isSLoad = true;
3901 break;
3902 }
3903
3904 // If there are odd number of 'S' registers or if it's not 64-bit aligned,
3905 // then it takes an extra cycle.
3906 if ((isSLoad && (RegNo % 2)) || DefAlign < 8)
3907 ++DefCycle;
3908 } else {
3909 // Assume the worst.
3910 DefCycle = RegNo + 2;
3911 }
3912
3913 return DefCycle;
3914 }
3915
3916 std::optional<unsigned>
getLDMDefCycle(const InstrItineraryData * ItinData,const MCInstrDesc & DefMCID,unsigned DefClass,unsigned DefIdx,unsigned DefAlign) const3917 ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData,
3918 const MCInstrDesc &DefMCID, unsigned DefClass,
3919 unsigned DefIdx, unsigned DefAlign) const {
3920 int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
3921 if (RegNo <= 0)
3922 // Def is the address writeback.
3923 return ItinData->getOperandCycle(DefClass, DefIdx);
3924
3925 unsigned DefCycle;
3926 if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3927 // 4 registers would be issued: 1, 2, 1.
3928 // 5 registers would be issued: 1, 2, 2.
3929 DefCycle = RegNo / 2;
3930 if (DefCycle < 1)
3931 DefCycle = 1;
3932 // Result latency is issue cycle + 2: E2.
3933 DefCycle += 2;
3934 } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3935 DefCycle = (RegNo / 2);
3936 // If there are odd number of registers or if it's not 64-bit aligned,
3937 // then it takes an extra AGU (Address Generation Unit) cycle.
3938 if ((RegNo % 2) || DefAlign < 8)
3939 ++DefCycle;
3940 // Result latency is AGU cycles + 2.
3941 DefCycle += 2;
3942 } else {
3943 // Assume the worst.
3944 DefCycle = RegNo + 2;
3945 }
3946
3947 return DefCycle;
3948 }
3949
3950 std::optional<unsigned>
getVSTMUseCycle(const InstrItineraryData * ItinData,const MCInstrDesc & UseMCID,unsigned UseClass,unsigned UseIdx,unsigned UseAlign) const3951 ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData,
3952 const MCInstrDesc &UseMCID, unsigned UseClass,
3953 unsigned UseIdx, unsigned UseAlign) const {
3954 int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
3955 if (RegNo <= 0)
3956 return ItinData->getOperandCycle(UseClass, UseIdx);
3957
3958 unsigned UseCycle;
3959 if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3960 // (regno / 2) + (regno % 2) + 1
3961 UseCycle = RegNo / 2 + 1;
3962 if (RegNo % 2)
3963 ++UseCycle;
3964 } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3965 UseCycle = RegNo;
3966 bool isSStore = false;
3967
3968 switch (UseMCID.getOpcode()) {
3969 default: break;
3970 case ARM::VSTMSIA:
3971 case ARM::VSTMSIA_UPD:
3972 case ARM::VSTMSDB_UPD:
3973 isSStore = true;
3974 break;
3975 }
3976
3977 // If there are odd number of 'S' registers or if it's not 64-bit aligned,
3978 // then it takes an extra cycle.
3979 if ((isSStore && (RegNo % 2)) || UseAlign < 8)
3980 ++UseCycle;
3981 } else {
3982 // Assume the worst.
3983 UseCycle = RegNo + 2;
3984 }
3985
3986 return UseCycle;
3987 }
3988
3989 std::optional<unsigned>
getSTMUseCycle(const InstrItineraryData * ItinData,const MCInstrDesc & UseMCID,unsigned UseClass,unsigned UseIdx,unsigned UseAlign) const3990 ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData,
3991 const MCInstrDesc &UseMCID, unsigned UseClass,
3992 unsigned UseIdx, unsigned UseAlign) const {
3993 int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
3994 if (RegNo <= 0)
3995 return ItinData->getOperandCycle(UseClass, UseIdx);
3996
3997 unsigned UseCycle;
3998 if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3999 UseCycle = RegNo / 2;
4000 if (UseCycle < 2)
4001 UseCycle = 2;
4002 // Read in E3.
4003 UseCycle += 2;
4004 } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
4005 UseCycle = (RegNo / 2);
4006 // If there are odd number of registers or if it's not 64-bit aligned,
4007 // then it takes an extra AGU (Address Generation Unit) cycle.
4008 if ((RegNo % 2) || UseAlign < 8)
4009 ++UseCycle;
4010 } else {
4011 // Assume the worst.
4012 UseCycle = 1;
4013 }
4014 return UseCycle;
4015 }
4016
getOperandLatency(const InstrItineraryData * ItinData,const MCInstrDesc & DefMCID,unsigned DefIdx,unsigned DefAlign,const MCInstrDesc & UseMCID,unsigned UseIdx,unsigned UseAlign) const4017 std::optional<unsigned> ARMBaseInstrInfo::getOperandLatency(
4018 const InstrItineraryData *ItinData, const MCInstrDesc &DefMCID,
4019 unsigned DefIdx, unsigned DefAlign, const MCInstrDesc &UseMCID,
4020 unsigned UseIdx, unsigned UseAlign) const {
4021 unsigned DefClass = DefMCID.getSchedClass();
4022 unsigned UseClass = UseMCID.getSchedClass();
4023
4024 if (DefIdx < DefMCID.getNumDefs() && UseIdx < UseMCID.getNumOperands())
4025 return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
4026
4027 // This may be a def / use of a variable_ops instruction, the operand
4028 // latency might be determinable dynamically. Let the target try to
4029 // figure it out.
4030 std::optional<unsigned> DefCycle;
4031 bool LdmBypass = false;
4032 switch (DefMCID.getOpcode()) {
4033 default:
4034 DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
4035 break;
4036
4037 case ARM::VLDMDIA:
4038 case ARM::VLDMDIA_UPD:
4039 case ARM::VLDMDDB_UPD:
4040 case ARM::VLDMSIA:
4041 case ARM::VLDMSIA_UPD:
4042 case ARM::VLDMSDB_UPD:
4043 DefCycle = getVLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
4044 break;
4045
4046 case ARM::LDMIA_RET:
4047 case ARM::LDMIA:
4048 case ARM::LDMDA:
4049 case ARM::LDMDB:
4050 case ARM::LDMIB:
4051 case ARM::LDMIA_UPD:
4052 case ARM::LDMDA_UPD:
4053 case ARM::LDMDB_UPD:
4054 case ARM::LDMIB_UPD:
4055 case ARM::tLDMIA:
4056 case ARM::tLDMIA_UPD:
4057 case ARM::tPUSH:
4058 case ARM::t2LDMIA_RET:
4059 case ARM::t2LDMIA:
4060 case ARM::t2LDMDB:
4061 case ARM::t2LDMIA_UPD:
4062 case ARM::t2LDMDB_UPD:
4063 LdmBypass = true;
4064 DefCycle = getLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
4065 break;
4066 }
4067
4068 if (!DefCycle)
4069 // We can't seem to determine the result latency of the def, assume it's 2.
4070 DefCycle = 2;
4071
4072 std::optional<unsigned> UseCycle;
4073 switch (UseMCID.getOpcode()) {
4074 default:
4075 UseCycle = ItinData->getOperandCycle(UseClass, UseIdx);
4076 break;
4077
4078 case ARM::VSTMDIA:
4079 case ARM::VSTMDIA_UPD:
4080 case ARM::VSTMDDB_UPD:
4081 case ARM::VSTMSIA:
4082 case ARM::VSTMSIA_UPD:
4083 case ARM::VSTMSDB_UPD:
4084 UseCycle = getVSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
4085 break;
4086
4087 case ARM::STMIA:
4088 case ARM::STMDA:
4089 case ARM::STMDB:
4090 case ARM::STMIB:
4091 case ARM::STMIA_UPD:
4092 case ARM::STMDA_UPD:
4093 case ARM::STMDB_UPD:
4094 case ARM::STMIB_UPD:
4095 case ARM::tSTMIA_UPD:
4096 case ARM::tPOP_RET:
4097 case ARM::tPOP:
4098 case ARM::t2STMIA:
4099 case ARM::t2STMDB:
4100 case ARM::t2STMIA_UPD:
4101 case ARM::t2STMDB_UPD:
4102 UseCycle = getSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
4103 break;
4104 }
4105
4106 if (!UseCycle)
4107 // Assume it's read in the first stage.
4108 UseCycle = 1;
4109
4110 if (UseCycle > *DefCycle + 1)
4111 return std::nullopt;
4112
4113 UseCycle = *DefCycle - *UseCycle + 1;
4114 if (UseCycle > 0u) {
4115 if (LdmBypass) {
4116 // It's a variable_ops instruction so we can't use DefIdx here. Just use
4117 // first def operand.
4118 if (ItinData->hasPipelineForwarding(DefClass, DefMCID.getNumOperands()-1,
4119 UseClass, UseIdx))
4120 UseCycle = *UseCycle - 1;
4121 } else if (ItinData->hasPipelineForwarding(DefClass, DefIdx,
4122 UseClass, UseIdx)) {
4123 UseCycle = *UseCycle - 1;
4124 }
4125 }
4126
4127 return UseCycle;
4128 }
4129
getBundledDefMI(const TargetRegisterInfo * TRI,const MachineInstr * MI,unsigned Reg,unsigned & DefIdx,unsigned & Dist)4130 static const MachineInstr *getBundledDefMI(const TargetRegisterInfo *TRI,
4131 const MachineInstr *MI, unsigned Reg,
4132 unsigned &DefIdx, unsigned &Dist) {
4133 Dist = 0;
4134
4135 MachineBasicBlock::const_iterator I = MI; ++I;
4136 MachineBasicBlock::const_instr_iterator II = std::prev(I.getInstrIterator());
4137 assert(II->isInsideBundle() && "Empty bundle?");
4138
4139 int Idx = -1;
4140 while (II->isInsideBundle()) {
4141 Idx = II->findRegisterDefOperandIdx(Reg, TRI, false, true);
4142 if (Idx != -1)
4143 break;
4144 --II;
4145 ++Dist;
4146 }
4147
4148 assert(Idx != -1 && "Cannot find bundled definition!");
4149 DefIdx = Idx;
4150 return &*II;
4151 }
4152
getBundledUseMI(const TargetRegisterInfo * TRI,const MachineInstr & MI,unsigned Reg,unsigned & UseIdx,unsigned & Dist)4153 static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI,
4154 const MachineInstr &MI, unsigned Reg,
4155 unsigned &UseIdx, unsigned &Dist) {
4156 Dist = 0;
4157
4158 MachineBasicBlock::const_instr_iterator II = ++MI.getIterator();
4159 assert(II->isInsideBundle() && "Empty bundle?");
4160 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
4161
4162 // FIXME: This doesn't properly handle multiple uses.
4163 int Idx = -1;
4164 while (II != E && II->isInsideBundle()) {
4165 Idx = II->findRegisterUseOperandIdx(Reg, TRI, false);
4166 if (Idx != -1)
4167 break;
4168 if (II->getOpcode() != ARM::t2IT)
4169 ++Dist;
4170 ++II;
4171 }
4172
4173 if (Idx == -1) {
4174 Dist = 0;
4175 return nullptr;
4176 }
4177
4178 UseIdx = Idx;
4179 return &*II;
4180 }
4181
4182 /// Return the number of cycles to add to (or subtract from) the static
4183 /// itinerary based on the def opcode and alignment. The caller will ensure that
4184 /// adjusted latency is at least one cycle.
adjustDefLatency(const ARMSubtarget & Subtarget,const MachineInstr & DefMI,const MCInstrDesc & DefMCID,unsigned DefAlign)4185 static int adjustDefLatency(const ARMSubtarget &Subtarget,
4186 const MachineInstr &DefMI,
4187 const MCInstrDesc &DefMCID, unsigned DefAlign) {
4188 int Adjust = 0;
4189 if (Subtarget.isCortexA8() || Subtarget.isLikeA9() || Subtarget.isCortexA7()) {
4190 // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
4191 // variants are one cycle cheaper.
4192 switch (DefMCID.getOpcode()) {
4193 default: break;
4194 case ARM::LDRrs:
4195 case ARM::LDRBrs: {
4196 unsigned ShOpVal = DefMI.getOperand(3).getImm();
4197 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4198 if (ShImm == 0 ||
4199 (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
4200 --Adjust;
4201 break;
4202 }
4203 case ARM::t2LDRs:
4204 case ARM::t2LDRBs:
4205 case ARM::t2LDRHs:
4206 case ARM::t2LDRSHs: {
4207 // Thumb2 mode: lsl only.
4208 unsigned ShAmt = DefMI.getOperand(3).getImm();
4209 if (ShAmt == 0 || ShAmt == 2)
4210 --Adjust;
4211 break;
4212 }
4213 }
4214 } else if (Subtarget.isSwift()) {
4215 // FIXME: Properly handle all of the latency adjustments for address
4216 // writeback.
4217 switch (DefMCID.getOpcode()) {
4218 default: break;
4219 case ARM::LDRrs:
4220 case ARM::LDRBrs: {
4221 unsigned ShOpVal = DefMI.getOperand(3).getImm();
4222 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
4223 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4224 if (!isSub &&
4225 (ShImm == 0 ||
4226 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
4227 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
4228 Adjust -= 2;
4229 else if (!isSub &&
4230 ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
4231 --Adjust;
4232 break;
4233 }
4234 case ARM::t2LDRs:
4235 case ARM::t2LDRBs:
4236 case ARM::t2LDRHs:
4237 case ARM::t2LDRSHs: {
4238 // Thumb2 mode: lsl only.
4239 unsigned ShAmt = DefMI.getOperand(3).getImm();
4240 if (ShAmt == 0 || ShAmt == 1 || ShAmt == 2 || ShAmt == 3)
4241 Adjust -= 2;
4242 break;
4243 }
4244 }
4245 }
4246
4247 if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment()) {
4248 switch (DefMCID.getOpcode()) {
4249 default: break;
4250 case ARM::VLD1q8:
4251 case ARM::VLD1q16:
4252 case ARM::VLD1q32:
4253 case ARM::VLD1q64:
4254 case ARM::VLD1q8wb_fixed:
4255 case ARM::VLD1q16wb_fixed:
4256 case ARM::VLD1q32wb_fixed:
4257 case ARM::VLD1q64wb_fixed:
4258 case ARM::VLD1q8wb_register:
4259 case ARM::VLD1q16wb_register:
4260 case ARM::VLD1q32wb_register:
4261 case ARM::VLD1q64wb_register:
4262 case ARM::VLD2d8:
4263 case ARM::VLD2d16:
4264 case ARM::VLD2d32:
4265 case ARM::VLD2q8:
4266 case ARM::VLD2q16:
4267 case ARM::VLD2q32:
4268 case ARM::VLD2d8wb_fixed:
4269 case ARM::VLD2d16wb_fixed:
4270 case ARM::VLD2d32wb_fixed:
4271 case ARM::VLD2q8wb_fixed:
4272 case ARM::VLD2q16wb_fixed:
4273 case ARM::VLD2q32wb_fixed:
4274 case ARM::VLD2d8wb_register:
4275 case ARM::VLD2d16wb_register:
4276 case ARM::VLD2d32wb_register:
4277 case ARM::VLD2q8wb_register:
4278 case ARM::VLD2q16wb_register:
4279 case ARM::VLD2q32wb_register:
4280 case ARM::VLD3d8:
4281 case ARM::VLD3d16:
4282 case ARM::VLD3d32:
4283 case ARM::VLD1d64T:
4284 case ARM::VLD3d8_UPD:
4285 case ARM::VLD3d16_UPD:
4286 case ARM::VLD3d32_UPD:
4287 case ARM::VLD1d64Twb_fixed:
4288 case ARM::VLD1d64Twb_register:
4289 case ARM::VLD3q8_UPD:
4290 case ARM::VLD3q16_UPD:
4291 case ARM::VLD3q32_UPD:
4292 case ARM::VLD4d8:
4293 case ARM::VLD4d16:
4294 case ARM::VLD4d32:
4295 case ARM::VLD1d64Q:
4296 case ARM::VLD4d8_UPD:
4297 case ARM::VLD4d16_UPD:
4298 case ARM::VLD4d32_UPD:
4299 case ARM::VLD1d64Qwb_fixed:
4300 case ARM::VLD1d64Qwb_register:
4301 case ARM::VLD4q8_UPD:
4302 case ARM::VLD4q16_UPD:
4303 case ARM::VLD4q32_UPD:
4304 case ARM::VLD1DUPq8:
4305 case ARM::VLD1DUPq16:
4306 case ARM::VLD1DUPq32:
4307 case ARM::VLD1DUPq8wb_fixed:
4308 case ARM::VLD1DUPq16wb_fixed:
4309 case ARM::VLD1DUPq32wb_fixed:
4310 case ARM::VLD1DUPq8wb_register:
4311 case ARM::VLD1DUPq16wb_register:
4312 case ARM::VLD1DUPq32wb_register:
4313 case ARM::VLD2DUPd8:
4314 case ARM::VLD2DUPd16:
4315 case ARM::VLD2DUPd32:
4316 case ARM::VLD2DUPd8wb_fixed:
4317 case ARM::VLD2DUPd16wb_fixed:
4318 case ARM::VLD2DUPd32wb_fixed:
4319 case ARM::VLD2DUPd8wb_register:
4320 case ARM::VLD2DUPd16wb_register:
4321 case ARM::VLD2DUPd32wb_register:
4322 case ARM::VLD4DUPd8:
4323 case ARM::VLD4DUPd16:
4324 case ARM::VLD4DUPd32:
4325 case ARM::VLD4DUPd8_UPD:
4326 case ARM::VLD4DUPd16_UPD:
4327 case ARM::VLD4DUPd32_UPD:
4328 case ARM::VLD1LNd8:
4329 case ARM::VLD1LNd16:
4330 case ARM::VLD1LNd32:
4331 case ARM::VLD1LNd8_UPD:
4332 case ARM::VLD1LNd16_UPD:
4333 case ARM::VLD1LNd32_UPD:
4334 case ARM::VLD2LNd8:
4335 case ARM::VLD2LNd16:
4336 case ARM::VLD2LNd32:
4337 case ARM::VLD2LNq16:
4338 case ARM::VLD2LNq32:
4339 case ARM::VLD2LNd8_UPD:
4340 case ARM::VLD2LNd16_UPD:
4341 case ARM::VLD2LNd32_UPD:
4342 case ARM::VLD2LNq16_UPD:
4343 case ARM::VLD2LNq32_UPD:
4344 case ARM::VLD4LNd8:
4345 case ARM::VLD4LNd16:
4346 case ARM::VLD4LNd32:
4347 case ARM::VLD4LNq16:
4348 case ARM::VLD4LNq32:
4349 case ARM::VLD4LNd8_UPD:
4350 case ARM::VLD4LNd16_UPD:
4351 case ARM::VLD4LNd32_UPD:
4352 case ARM::VLD4LNq16_UPD:
4353 case ARM::VLD4LNq32_UPD:
4354 // If the address is not 64-bit aligned, the latencies of these
4355 // instructions increases by one.
4356 ++Adjust;
4357 break;
4358 }
4359 }
4360 return Adjust;
4361 }
4362
getOperandLatency(const InstrItineraryData * ItinData,const MachineInstr & DefMI,unsigned DefIdx,const MachineInstr & UseMI,unsigned UseIdx) const4363 std::optional<unsigned> ARMBaseInstrInfo::getOperandLatency(
4364 const InstrItineraryData *ItinData, const MachineInstr &DefMI,
4365 unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const {
4366 // No operand latency. The caller may fall back to getInstrLatency.
4367 if (!ItinData || ItinData->isEmpty())
4368 return std::nullopt;
4369
4370 const MachineOperand &DefMO = DefMI.getOperand(DefIdx);
4371 Register Reg = DefMO.getReg();
4372
4373 const MachineInstr *ResolvedDefMI = &DefMI;
4374 unsigned DefAdj = 0;
4375 if (DefMI.isBundle())
4376 ResolvedDefMI =
4377 getBundledDefMI(&getRegisterInfo(), &DefMI, Reg, DefIdx, DefAdj);
4378 if (ResolvedDefMI->isCopyLike() || ResolvedDefMI->isInsertSubreg() ||
4379 ResolvedDefMI->isRegSequence() || ResolvedDefMI->isImplicitDef()) {
4380 return 1;
4381 }
4382
4383 const MachineInstr *ResolvedUseMI = &UseMI;
4384 unsigned UseAdj = 0;
4385 if (UseMI.isBundle()) {
4386 ResolvedUseMI =
4387 getBundledUseMI(&getRegisterInfo(), UseMI, Reg, UseIdx, UseAdj);
4388 if (!ResolvedUseMI)
4389 return std::nullopt;
4390 }
4391
4392 return getOperandLatencyImpl(
4393 ItinData, *ResolvedDefMI, DefIdx, ResolvedDefMI->getDesc(), DefAdj, DefMO,
4394 Reg, *ResolvedUseMI, UseIdx, ResolvedUseMI->getDesc(), UseAdj);
4395 }
4396
getOperandLatencyImpl(const InstrItineraryData * ItinData,const MachineInstr & DefMI,unsigned DefIdx,const MCInstrDesc & DefMCID,unsigned DefAdj,const MachineOperand & DefMO,unsigned Reg,const MachineInstr & UseMI,unsigned UseIdx,const MCInstrDesc & UseMCID,unsigned UseAdj) const4397 std::optional<unsigned> ARMBaseInstrInfo::getOperandLatencyImpl(
4398 const InstrItineraryData *ItinData, const MachineInstr &DefMI,
4399 unsigned DefIdx, const MCInstrDesc &DefMCID, unsigned DefAdj,
4400 const MachineOperand &DefMO, unsigned Reg, const MachineInstr &UseMI,
4401 unsigned UseIdx, const MCInstrDesc &UseMCID, unsigned UseAdj) const {
4402 if (Reg == ARM::CPSR) {
4403 if (DefMI.getOpcode() == ARM::FMSTAT) {
4404 // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?)
4405 return Subtarget.isLikeA9() ? 1 : 20;
4406 }
4407
4408 // CPSR set and branch can be paired in the same cycle.
4409 if (UseMI.isBranch())
4410 return 0;
4411
4412 // Otherwise it takes the instruction latency (generally one).
4413 unsigned Latency = getInstrLatency(ItinData, DefMI);
4414
4415 // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to
4416 // its uses. Instructions which are otherwise scheduled between them may
4417 // incur a code size penalty (not able to use the CPSR setting 16-bit
4418 // instructions).
4419 if (Latency > 0 && Subtarget.isThumb2()) {
4420 const MachineFunction *MF = DefMI.getParent()->getParent();
4421 // FIXME: Use Function::hasOptSize().
4422 if (MF->getFunction().hasFnAttribute(Attribute::OptimizeForSize))
4423 --Latency;
4424 }
4425 return Latency;
4426 }
4427
4428 if (DefMO.isImplicit() || UseMI.getOperand(UseIdx).isImplicit())
4429 return std::nullopt;
4430
4431 unsigned DefAlign = DefMI.hasOneMemOperand()
4432 ? (*DefMI.memoperands_begin())->getAlign().value()
4433 : 0;
4434 unsigned UseAlign = UseMI.hasOneMemOperand()
4435 ? (*UseMI.memoperands_begin())->getAlign().value()
4436 : 0;
4437
4438 // Get the itinerary's latency if possible, and handle variable_ops.
4439 std::optional<unsigned> Latency = getOperandLatency(
4440 ItinData, DefMCID, DefIdx, DefAlign, UseMCID, UseIdx, UseAlign);
4441 // Unable to find operand latency. The caller may resort to getInstrLatency.
4442 if (!Latency)
4443 return std::nullopt;
4444
4445 // Adjust for IT block position.
4446 int Adj = DefAdj + UseAdj;
4447
4448 // Adjust for dynamic def-side opcode variants not captured by the itinerary.
4449 Adj += adjustDefLatency(Subtarget, DefMI, DefMCID, DefAlign);
4450 if (Adj >= 0 || (int)*Latency > -Adj) {
4451 return *Latency + Adj;
4452 }
4453 // Return the itinerary latency, which may be zero but not less than zero.
4454 return Latency;
4455 }
4456
4457 std::optional<unsigned>
getOperandLatency(const InstrItineraryData * ItinData,SDNode * DefNode,unsigned DefIdx,SDNode * UseNode,unsigned UseIdx) const4458 ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
4459 SDNode *DefNode, unsigned DefIdx,
4460 SDNode *UseNode, unsigned UseIdx) const {
4461 if (!DefNode->isMachineOpcode())
4462 return 1;
4463
4464 const MCInstrDesc &DefMCID = get(DefNode->getMachineOpcode());
4465
4466 if (isZeroCost(DefMCID.Opcode))
4467 return 0;
4468
4469 if (!ItinData || ItinData->isEmpty())
4470 return DefMCID.mayLoad() ? 3 : 1;
4471
4472 if (!UseNode->isMachineOpcode()) {
4473 std::optional<unsigned> Latency =
4474 ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx);
4475 int Adj = Subtarget.getPreISelOperandLatencyAdjustment();
4476 int Threshold = 1 + Adj;
4477 return !Latency || Latency <= (unsigned)Threshold ? 1 : *Latency - Adj;
4478 }
4479
4480 const MCInstrDesc &UseMCID = get(UseNode->getMachineOpcode());
4481 auto *DefMN = cast<MachineSDNode>(DefNode);
4482 unsigned DefAlign = !DefMN->memoperands_empty()
4483 ? (*DefMN->memoperands_begin())->getAlign().value()
4484 : 0;
4485 auto *UseMN = cast<MachineSDNode>(UseNode);
4486 unsigned UseAlign = !UseMN->memoperands_empty()
4487 ? (*UseMN->memoperands_begin())->getAlign().value()
4488 : 0;
4489 std::optional<unsigned> Latency = getOperandLatency(
4490 ItinData, DefMCID, DefIdx, DefAlign, UseMCID, UseIdx, UseAlign);
4491 if (!Latency)
4492 return std::nullopt;
4493
4494 if (Latency > 1U &&
4495 (Subtarget.isCortexA8() || Subtarget.isLikeA9() ||
4496 Subtarget.isCortexA7())) {
4497 // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
4498 // variants are one cycle cheaper.
4499 switch (DefMCID.getOpcode()) {
4500 default: break;
4501 case ARM::LDRrs:
4502 case ARM::LDRBrs: {
4503 unsigned ShOpVal = DefNode->getConstantOperandVal(2);
4504 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4505 if (ShImm == 0 ||
4506 (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
4507 Latency = *Latency - 1;
4508 break;
4509 }
4510 case ARM::t2LDRs:
4511 case ARM::t2LDRBs:
4512 case ARM::t2LDRHs:
4513 case ARM::t2LDRSHs: {
4514 // Thumb2 mode: lsl only.
4515 unsigned ShAmt = DefNode->getConstantOperandVal(2);
4516 if (ShAmt == 0 || ShAmt == 2)
4517 Latency = *Latency - 1;
4518 break;
4519 }
4520 }
4521 } else if (DefIdx == 0 && Latency > 2U && Subtarget.isSwift()) {
4522 // FIXME: Properly handle all of the latency adjustments for address
4523 // writeback.
4524 switch (DefMCID.getOpcode()) {
4525 default: break;
4526 case ARM::LDRrs:
4527 case ARM::LDRBrs: {
4528 unsigned ShOpVal = DefNode->getConstantOperandVal(2);
4529 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4530 if (ShImm == 0 ||
4531 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
4532 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
4533 Latency = *Latency - 2;
4534 else if (ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
4535 Latency = *Latency - 1;
4536 break;
4537 }
4538 case ARM::t2LDRs:
4539 case ARM::t2LDRBs:
4540 case ARM::t2LDRHs:
4541 case ARM::t2LDRSHs:
4542 // Thumb2 mode: lsl 0-3 only.
4543 Latency = *Latency - 2;
4544 break;
4545 }
4546 }
4547
4548 if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment())
4549 switch (DefMCID.getOpcode()) {
4550 default: break;
4551 case ARM::VLD1q8:
4552 case ARM::VLD1q16:
4553 case ARM::VLD1q32:
4554 case ARM::VLD1q64:
4555 case ARM::VLD1q8wb_register:
4556 case ARM::VLD1q16wb_register:
4557 case ARM::VLD1q32wb_register:
4558 case ARM::VLD1q64wb_register:
4559 case ARM::VLD1q8wb_fixed:
4560 case ARM::VLD1q16wb_fixed:
4561 case ARM::VLD1q32wb_fixed:
4562 case ARM::VLD1q64wb_fixed:
4563 case ARM::VLD2d8:
4564 case ARM::VLD2d16:
4565 case ARM::VLD2d32:
4566 case ARM::VLD2q8Pseudo:
4567 case ARM::VLD2q16Pseudo:
4568 case ARM::VLD2q32Pseudo:
4569 case ARM::VLD2d8wb_fixed:
4570 case ARM::VLD2d16wb_fixed:
4571 case ARM::VLD2d32wb_fixed:
4572 case ARM::VLD2q8PseudoWB_fixed:
4573 case ARM::VLD2q16PseudoWB_fixed:
4574 case ARM::VLD2q32PseudoWB_fixed:
4575 case ARM::VLD2d8wb_register:
4576 case ARM::VLD2d16wb_register:
4577 case ARM::VLD2d32wb_register:
4578 case ARM::VLD2q8PseudoWB_register:
4579 case ARM::VLD2q16PseudoWB_register:
4580 case ARM::VLD2q32PseudoWB_register:
4581 case ARM::VLD3d8Pseudo:
4582 case ARM::VLD3d16Pseudo:
4583 case ARM::VLD3d32Pseudo:
4584 case ARM::VLD1d8TPseudo:
4585 case ARM::VLD1d16TPseudo:
4586 case ARM::VLD1d32TPseudo:
4587 case ARM::VLD1d64TPseudo:
4588 case ARM::VLD1d64TPseudoWB_fixed:
4589 case ARM::VLD1d64TPseudoWB_register:
4590 case ARM::VLD3d8Pseudo_UPD:
4591 case ARM::VLD3d16Pseudo_UPD:
4592 case ARM::VLD3d32Pseudo_UPD:
4593 case ARM::VLD3q8Pseudo_UPD:
4594 case ARM::VLD3q16Pseudo_UPD:
4595 case ARM::VLD3q32Pseudo_UPD:
4596 case ARM::VLD3q8oddPseudo:
4597 case ARM::VLD3q16oddPseudo:
4598 case ARM::VLD3q32oddPseudo:
4599 case ARM::VLD3q8oddPseudo_UPD:
4600 case ARM::VLD3q16oddPseudo_UPD:
4601 case ARM::VLD3q32oddPseudo_UPD:
4602 case ARM::VLD4d8Pseudo:
4603 case ARM::VLD4d16Pseudo:
4604 case ARM::VLD4d32Pseudo:
4605 case ARM::VLD1d8QPseudo:
4606 case ARM::VLD1d16QPseudo:
4607 case ARM::VLD1d32QPseudo:
4608 case ARM::VLD1d64QPseudo:
4609 case ARM::VLD1d64QPseudoWB_fixed:
4610 case ARM::VLD1d64QPseudoWB_register:
4611 case ARM::VLD1q8HighQPseudo:
4612 case ARM::VLD1q8LowQPseudo_UPD:
4613 case ARM::VLD1q8HighTPseudo:
4614 case ARM::VLD1q8LowTPseudo_UPD:
4615 case ARM::VLD1q16HighQPseudo:
4616 case ARM::VLD1q16LowQPseudo_UPD:
4617 case ARM::VLD1q16HighTPseudo:
4618 case ARM::VLD1q16LowTPseudo_UPD:
4619 case ARM::VLD1q32HighQPseudo:
4620 case ARM::VLD1q32LowQPseudo_UPD:
4621 case ARM::VLD1q32HighTPseudo:
4622 case ARM::VLD1q32LowTPseudo_UPD:
4623 case ARM::VLD1q64HighQPseudo:
4624 case ARM::VLD1q64LowQPseudo_UPD:
4625 case ARM::VLD1q64HighTPseudo:
4626 case ARM::VLD1q64LowTPseudo_UPD:
4627 case ARM::VLD4d8Pseudo_UPD:
4628 case ARM::VLD4d16Pseudo_UPD:
4629 case ARM::VLD4d32Pseudo_UPD:
4630 case ARM::VLD4q8Pseudo_UPD:
4631 case ARM::VLD4q16Pseudo_UPD:
4632 case ARM::VLD4q32Pseudo_UPD:
4633 case ARM::VLD4q8oddPseudo:
4634 case ARM::VLD4q16oddPseudo:
4635 case ARM::VLD4q32oddPseudo:
4636 case ARM::VLD4q8oddPseudo_UPD:
4637 case ARM::VLD4q16oddPseudo_UPD:
4638 case ARM::VLD4q32oddPseudo_UPD:
4639 case ARM::VLD1DUPq8:
4640 case ARM::VLD1DUPq16:
4641 case ARM::VLD1DUPq32:
4642 case ARM::VLD1DUPq8wb_fixed:
4643 case ARM::VLD1DUPq16wb_fixed:
4644 case ARM::VLD1DUPq32wb_fixed:
4645 case ARM::VLD1DUPq8wb_register:
4646 case ARM::VLD1DUPq16wb_register:
4647 case ARM::VLD1DUPq32wb_register:
4648 case ARM::VLD2DUPd8:
4649 case ARM::VLD2DUPd16:
4650 case ARM::VLD2DUPd32:
4651 case ARM::VLD2DUPd8wb_fixed:
4652 case ARM::VLD2DUPd16wb_fixed:
4653 case ARM::VLD2DUPd32wb_fixed:
4654 case ARM::VLD2DUPd8wb_register:
4655 case ARM::VLD2DUPd16wb_register:
4656 case ARM::VLD2DUPd32wb_register:
4657 case ARM::VLD2DUPq8EvenPseudo:
4658 case ARM::VLD2DUPq8OddPseudo:
4659 case ARM::VLD2DUPq16EvenPseudo:
4660 case ARM::VLD2DUPq16OddPseudo:
4661 case ARM::VLD2DUPq32EvenPseudo:
4662 case ARM::VLD2DUPq32OddPseudo:
4663 case ARM::VLD3DUPq8EvenPseudo:
4664 case ARM::VLD3DUPq8OddPseudo:
4665 case ARM::VLD3DUPq16EvenPseudo:
4666 case ARM::VLD3DUPq16OddPseudo:
4667 case ARM::VLD3DUPq32EvenPseudo:
4668 case ARM::VLD3DUPq32OddPseudo:
4669 case ARM::VLD4DUPd8Pseudo:
4670 case ARM::VLD4DUPd16Pseudo:
4671 case ARM::VLD4DUPd32Pseudo:
4672 case ARM::VLD4DUPd8Pseudo_UPD:
4673 case ARM::VLD4DUPd16Pseudo_UPD:
4674 case ARM::VLD4DUPd32Pseudo_UPD:
4675 case ARM::VLD4DUPq8EvenPseudo:
4676 case ARM::VLD4DUPq8OddPseudo:
4677 case ARM::VLD4DUPq16EvenPseudo:
4678 case ARM::VLD4DUPq16OddPseudo:
4679 case ARM::VLD4DUPq32EvenPseudo:
4680 case ARM::VLD4DUPq32OddPseudo:
4681 case ARM::VLD1LNq8Pseudo:
4682 case ARM::VLD1LNq16Pseudo:
4683 case ARM::VLD1LNq32Pseudo:
4684 case ARM::VLD1LNq8Pseudo_UPD:
4685 case ARM::VLD1LNq16Pseudo_UPD:
4686 case ARM::VLD1LNq32Pseudo_UPD:
4687 case ARM::VLD2LNd8Pseudo:
4688 case ARM::VLD2LNd16Pseudo:
4689 case ARM::VLD2LNd32Pseudo:
4690 case ARM::VLD2LNq16Pseudo:
4691 case ARM::VLD2LNq32Pseudo:
4692 case ARM::VLD2LNd8Pseudo_UPD:
4693 case ARM::VLD2LNd16Pseudo_UPD:
4694 case ARM::VLD2LNd32Pseudo_UPD:
4695 case ARM::VLD2LNq16Pseudo_UPD:
4696 case ARM::VLD2LNq32Pseudo_UPD:
4697 case ARM::VLD4LNd8Pseudo:
4698 case ARM::VLD4LNd16Pseudo:
4699 case ARM::VLD4LNd32Pseudo:
4700 case ARM::VLD4LNq16Pseudo:
4701 case ARM::VLD4LNq32Pseudo:
4702 case ARM::VLD4LNd8Pseudo_UPD:
4703 case ARM::VLD4LNd16Pseudo_UPD:
4704 case ARM::VLD4LNd32Pseudo_UPD:
4705 case ARM::VLD4LNq16Pseudo_UPD:
4706 case ARM::VLD4LNq32Pseudo_UPD:
4707 // If the address is not 64-bit aligned, the latencies of these
4708 // instructions increases by one.
4709 Latency = *Latency + 1;
4710 break;
4711 }
4712
4713 return Latency;
4714 }
4715
getPredicationCost(const MachineInstr & MI) const4716 unsigned ARMBaseInstrInfo::getPredicationCost(const MachineInstr &MI) const {
4717 if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() ||
4718 MI.isImplicitDef())
4719 return 0;
4720
4721 if (MI.isBundle())
4722 return 0;
4723
4724 const MCInstrDesc &MCID = MI.getDesc();
4725
4726 if (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) &&
4727 !Subtarget.cheapPredicableCPSRDef())) {
4728 // When predicated, CPSR is an additional source operand for CPSR updating
4729 // instructions, this apparently increases their latencies.
4730 return 1;
4731 }
4732 return 0;
4733 }
4734
getInstrLatency(const InstrItineraryData * ItinData,const MachineInstr & MI,unsigned * PredCost) const4735 unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
4736 const MachineInstr &MI,
4737 unsigned *PredCost) const {
4738 if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() ||
4739 MI.isImplicitDef())
4740 return 1;
4741
4742 // An instruction scheduler typically runs on unbundled instructions, however
4743 // other passes may query the latency of a bundled instruction.
4744 if (MI.isBundle()) {
4745 unsigned Latency = 0;
4746 MachineBasicBlock::const_instr_iterator I = MI.getIterator();
4747 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
4748 while (++I != E && I->isInsideBundle()) {
4749 if (I->getOpcode() != ARM::t2IT)
4750 Latency += getInstrLatency(ItinData, *I, PredCost);
4751 }
4752 return Latency;
4753 }
4754
4755 const MCInstrDesc &MCID = MI.getDesc();
4756 if (PredCost && (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) &&
4757 !Subtarget.cheapPredicableCPSRDef()))) {
4758 // When predicated, CPSR is an additional source operand for CPSR updating
4759 // instructions, this apparently increases their latencies.
4760 *PredCost = 1;
4761 }
4762 // Be sure to call getStageLatency for an empty itinerary in case it has a
4763 // valid MinLatency property.
4764 if (!ItinData)
4765 return MI.mayLoad() ? 3 : 1;
4766
4767 unsigned Class = MCID.getSchedClass();
4768
4769 // For instructions with variable uops, use uops as latency.
4770 if (!ItinData->isEmpty() && ItinData->getNumMicroOps(Class) < 0)
4771 return getNumMicroOps(ItinData, MI);
4772
4773 // For the common case, fall back on the itinerary's latency.
4774 unsigned Latency = ItinData->getStageLatency(Class);
4775
4776 // Adjust for dynamic def-side opcode variants not captured by the itinerary.
4777 unsigned DefAlign =
4778 MI.hasOneMemOperand() ? (*MI.memoperands_begin())->getAlign().value() : 0;
4779 int Adj = adjustDefLatency(Subtarget, MI, MCID, DefAlign);
4780 if (Adj >= 0 || (int)Latency > -Adj) {
4781 return Latency + Adj;
4782 }
4783 return Latency;
4784 }
4785
getInstrLatency(const InstrItineraryData * ItinData,SDNode * Node) const4786 unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
4787 SDNode *Node) const {
4788 if (!Node->isMachineOpcode())
4789 return 1;
4790
4791 if (!ItinData || ItinData->isEmpty())
4792 return 1;
4793
4794 unsigned Opcode = Node->getMachineOpcode();
4795 switch (Opcode) {
4796 default:
4797 return ItinData->getStageLatency(get(Opcode).getSchedClass());
4798 case ARM::VLDMQIA:
4799 case ARM::VSTMQIA:
4800 return 2;
4801 }
4802 }
4803
hasHighOperandLatency(const TargetSchedModel & SchedModel,const MachineRegisterInfo * MRI,const MachineInstr & DefMI,unsigned DefIdx,const MachineInstr & UseMI,unsigned UseIdx) const4804 bool ARMBaseInstrInfo::hasHighOperandLatency(const TargetSchedModel &SchedModel,
4805 const MachineRegisterInfo *MRI,
4806 const MachineInstr &DefMI,
4807 unsigned DefIdx,
4808 const MachineInstr &UseMI,
4809 unsigned UseIdx) const {
4810 unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask;
4811 unsigned UDomain = UseMI.getDesc().TSFlags & ARMII::DomainMask;
4812 if (Subtarget.nonpipelinedVFP() &&
4813 (DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP))
4814 return true;
4815
4816 // Hoist VFP / NEON instructions with 4 or higher latency.
4817 unsigned Latency =
4818 SchedModel.computeOperandLatency(&DefMI, DefIdx, &UseMI, UseIdx);
4819 if (Latency <= 3)
4820 return false;
4821 return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON ||
4822 UDomain == ARMII::DomainVFP || UDomain == ARMII::DomainNEON;
4823 }
4824
hasLowDefLatency(const TargetSchedModel & SchedModel,const MachineInstr & DefMI,unsigned DefIdx) const4825 bool ARMBaseInstrInfo::hasLowDefLatency(const TargetSchedModel &SchedModel,
4826 const MachineInstr &DefMI,
4827 unsigned DefIdx) const {
4828 const InstrItineraryData *ItinData = SchedModel.getInstrItineraries();
4829 if (!ItinData || ItinData->isEmpty())
4830 return false;
4831
4832 unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask;
4833 if (DDomain == ARMII::DomainGeneral) {
4834 unsigned DefClass = DefMI.getDesc().getSchedClass();
4835 std::optional<unsigned> DefCycle =
4836 ItinData->getOperandCycle(DefClass, DefIdx);
4837 return DefCycle && DefCycle <= 2U;
4838 }
4839 return false;
4840 }
4841
verifyInstruction(const MachineInstr & MI,StringRef & ErrInfo) const4842 bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr &MI,
4843 StringRef &ErrInfo) const {
4844 if (convertAddSubFlagsOpcode(MI.getOpcode())) {
4845 ErrInfo = "Pseudo flag setting opcodes only exist in Selection DAG";
4846 return false;
4847 }
4848 if (MI.getOpcode() == ARM::tMOVr && !Subtarget.hasV6Ops()) {
4849 // Make sure we don't generate a lo-lo mov that isn't supported.
4850 if (!ARM::hGPRRegClass.contains(MI.getOperand(0).getReg()) &&
4851 !ARM::hGPRRegClass.contains(MI.getOperand(1).getReg())) {
4852 ErrInfo = "Non-flag-setting Thumb1 mov is v6-only";
4853 return false;
4854 }
4855 }
4856 if (MI.getOpcode() == ARM::tPUSH ||
4857 MI.getOpcode() == ARM::tPOP ||
4858 MI.getOpcode() == ARM::tPOP_RET) {
4859 for (const MachineOperand &MO : llvm::drop_begin(MI.operands(), 2)) {
4860 if (MO.isImplicit() || !MO.isReg())
4861 continue;
4862 Register Reg = MO.getReg();
4863 if (Reg < ARM::R0 || Reg > ARM::R7) {
4864 if (!(MI.getOpcode() == ARM::tPUSH && Reg == ARM::LR) &&
4865 !(MI.getOpcode() == ARM::tPOP_RET && Reg == ARM::PC)) {
4866 ErrInfo = "Unsupported register in Thumb1 push/pop";
4867 return false;
4868 }
4869 }
4870 }
4871 }
4872 if (MI.getOpcode() == ARM::MVE_VMOV_q_rr) {
4873 assert(MI.getOperand(4).isImm() && MI.getOperand(5).isImm());
4874 if ((MI.getOperand(4).getImm() != 2 && MI.getOperand(4).getImm() != 3) ||
4875 MI.getOperand(4).getImm() != MI.getOperand(5).getImm() + 2) {
4876 ErrInfo = "Incorrect array index for MVE_VMOV_q_rr";
4877 return false;
4878 }
4879 }
4880
4881 // Check the address model by taking the first Imm operand and checking it is
4882 // legal for that addressing mode.
4883 ARMII::AddrMode AddrMode =
4884 (ARMII::AddrMode)(MI.getDesc().TSFlags & ARMII::AddrModeMask);
4885 switch (AddrMode) {
4886 default:
4887 break;
4888 case ARMII::AddrModeT2_i7:
4889 case ARMII::AddrModeT2_i7s2:
4890 case ARMII::AddrModeT2_i7s4:
4891 case ARMII::AddrModeT2_i8:
4892 case ARMII::AddrModeT2_i8pos:
4893 case ARMII::AddrModeT2_i8neg:
4894 case ARMII::AddrModeT2_i8s4:
4895 case ARMII::AddrModeT2_i12: {
4896 uint32_t Imm = 0;
4897 for (auto Op : MI.operands()) {
4898 if (Op.isImm()) {
4899 Imm = Op.getImm();
4900 break;
4901 }
4902 }
4903 if (!isLegalAddressImm(MI.getOpcode(), Imm, this)) {
4904 ErrInfo = "Incorrect AddrMode Imm for instruction";
4905 return false;
4906 }
4907 break;
4908 }
4909 }
4910 return true;
4911 }
4912
expandLoadStackGuardBase(MachineBasicBlock::iterator MI,unsigned LoadImmOpc,unsigned LoadOpc) const4913 void ARMBaseInstrInfo::expandLoadStackGuardBase(MachineBasicBlock::iterator MI,
4914 unsigned LoadImmOpc,
4915 unsigned LoadOpc) const {
4916 assert(!Subtarget.isROPI() && !Subtarget.isRWPI() &&
4917 "ROPI/RWPI not currently supported with stack guard");
4918
4919 MachineBasicBlock &MBB = *MI->getParent();
4920 DebugLoc DL = MI->getDebugLoc();
4921 Register Reg = MI->getOperand(0).getReg();
4922 MachineInstrBuilder MIB;
4923 unsigned int Offset = 0;
4924
4925 if (LoadImmOpc == ARM::MRC || LoadImmOpc == ARM::t2MRC) {
4926 assert(!Subtarget.isReadTPSoft() &&
4927 "TLS stack protector requires hardware TLS register");
4928
4929 BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg)
4930 .addImm(15)
4931 .addImm(0)
4932 .addImm(13)
4933 .addImm(0)
4934 .addImm(3)
4935 .add(predOps(ARMCC::AL));
4936
4937 Module &M = *MBB.getParent()->getFunction().getParent();
4938 Offset = M.getStackProtectorGuardOffset();
4939 if (Offset & ~0xfffU) {
4940 // The offset won't fit in the LDR's 12-bit immediate field, so emit an
4941 // extra ADD to cover the delta. This gives us a guaranteed 8 additional
4942 // bits, resulting in a range of 0 to +1 MiB for the guard offset.
4943 unsigned AddOpc = (LoadImmOpc == ARM::MRC) ? ARM::ADDri : ARM::t2ADDri;
4944 BuildMI(MBB, MI, DL, get(AddOpc), Reg)
4945 .addReg(Reg, RegState::Kill)
4946 .addImm(Offset & ~0xfffU)
4947 .add(predOps(ARMCC::AL))
4948 .addReg(0);
4949 Offset &= 0xfffU;
4950 }
4951 } else {
4952 const GlobalValue *GV =
4953 cast<GlobalValue>((*MI->memoperands_begin())->getValue());
4954 bool IsIndirect = Subtarget.isGVIndirectSymbol(GV);
4955
4956 unsigned TargetFlags = ARMII::MO_NO_FLAG;
4957 if (Subtarget.isTargetMachO()) {
4958 TargetFlags |= ARMII::MO_NONLAZY;
4959 } else if (Subtarget.isTargetCOFF()) {
4960 if (GV->hasDLLImportStorageClass())
4961 TargetFlags |= ARMII::MO_DLLIMPORT;
4962 else if (IsIndirect)
4963 TargetFlags |= ARMII::MO_COFFSTUB;
4964 } else if (IsIndirect) {
4965 TargetFlags |= ARMII::MO_GOT;
4966 }
4967
4968 if (LoadImmOpc == ARM::tMOVi32imm) { // Thumb-1 execute-only
4969 Register CPSRSaveReg = ARM::R12; // Use R12 as scratch register
4970 auto APSREncoding =
4971 ARMSysReg::lookupMClassSysRegByName("apsr_nzcvq")->Encoding;
4972 BuildMI(MBB, MI, DL, get(ARM::t2MRS_M), CPSRSaveReg)
4973 .addImm(APSREncoding)
4974 .add(predOps(ARMCC::AL));
4975 BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg)
4976 .addGlobalAddress(GV, 0, TargetFlags);
4977 BuildMI(MBB, MI, DL, get(ARM::t2MSR_M))
4978 .addImm(APSREncoding)
4979 .addReg(CPSRSaveReg, RegState::Kill)
4980 .add(predOps(ARMCC::AL));
4981 } else {
4982 BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg)
4983 .addGlobalAddress(GV, 0, TargetFlags);
4984 }
4985
4986 if (IsIndirect) {
4987 MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
4988 MIB.addReg(Reg, RegState::Kill).addImm(0);
4989 auto Flags = MachineMemOperand::MOLoad |
4990 MachineMemOperand::MODereferenceable |
4991 MachineMemOperand::MOInvariant;
4992 MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand(
4993 MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 4, Align(4));
4994 MIB.addMemOperand(MMO).add(predOps(ARMCC::AL));
4995 }
4996 }
4997
4998 MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
4999 MIB.addReg(Reg, RegState::Kill)
5000 .addImm(Offset)
5001 .cloneMemRefs(*MI)
5002 .add(predOps(ARMCC::AL));
5003 }
5004
5005 bool
isFpMLxInstruction(unsigned Opcode,unsigned & MulOpc,unsigned & AddSubOpc,bool & NegAcc,bool & HasLane) const5006 ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc,
5007 unsigned &AddSubOpc,
5008 bool &NegAcc, bool &HasLane) const {
5009 DenseMap<unsigned, unsigned>::const_iterator I = MLxEntryMap.find(Opcode);
5010 if (I == MLxEntryMap.end())
5011 return false;
5012
5013 const ARM_MLxEntry &Entry = ARM_MLxTable[I->second];
5014 MulOpc = Entry.MulOpc;
5015 AddSubOpc = Entry.AddSubOpc;
5016 NegAcc = Entry.NegAcc;
5017 HasLane = Entry.HasLane;
5018 return true;
5019 }
5020
5021 //===----------------------------------------------------------------------===//
5022 // Execution domains.
5023 //===----------------------------------------------------------------------===//
5024 //
5025 // Some instructions go down the NEON pipeline, some go down the VFP pipeline,
5026 // and some can go down both. The vmov instructions go down the VFP pipeline,
5027 // but they can be changed to vorr equivalents that are executed by the NEON
5028 // pipeline.
5029 //
5030 // We use the following execution domain numbering:
5031 //
5032 enum ARMExeDomain {
5033 ExeGeneric = 0,
5034 ExeVFP = 1,
5035 ExeNEON = 2
5036 };
5037
5038 //
5039 // Also see ARMInstrFormats.td and Domain* enums in ARMBaseInfo.h
5040 //
5041 std::pair<uint16_t, uint16_t>
getExecutionDomain(const MachineInstr & MI) const5042 ARMBaseInstrInfo::getExecutionDomain(const MachineInstr &MI) const {
5043 // If we don't have access to NEON instructions then we won't be able
5044 // to swizzle anything to the NEON domain. Check to make sure.
5045 if (Subtarget.hasNEON()) {
5046 // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON
5047 // if they are not predicated.
5048 if (MI.getOpcode() == ARM::VMOVD && !isPredicated(MI))
5049 return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON));
5050
5051 // CortexA9 is particularly picky about mixing the two and wants these
5052 // converted.
5053 if (Subtarget.useNEONForFPMovs() && !isPredicated(MI) &&
5054 (MI.getOpcode() == ARM::VMOVRS || MI.getOpcode() == ARM::VMOVSR ||
5055 MI.getOpcode() == ARM::VMOVS))
5056 return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON));
5057 }
5058 // No other instructions can be swizzled, so just determine their domain.
5059 unsigned Domain = MI.getDesc().TSFlags & ARMII::DomainMask;
5060
5061 if (Domain & ARMII::DomainNEON)
5062 return std::make_pair(ExeNEON, 0);
5063
5064 // Certain instructions can go either way on Cortex-A8.
5065 // Treat them as NEON instructions.
5066 if ((Domain & ARMII::DomainNEONA8) && Subtarget.isCortexA8())
5067 return std::make_pair(ExeNEON, 0);
5068
5069 if (Domain & ARMII::DomainVFP)
5070 return std::make_pair(ExeVFP, 0);
5071
5072 return std::make_pair(ExeGeneric, 0);
5073 }
5074
getCorrespondingDRegAndLane(const TargetRegisterInfo * TRI,unsigned SReg,unsigned & Lane)5075 static unsigned getCorrespondingDRegAndLane(const TargetRegisterInfo *TRI,
5076 unsigned SReg, unsigned &Lane) {
5077 unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_0, &ARM::DPRRegClass);
5078 Lane = 0;
5079
5080 if (DReg != ARM::NoRegister)
5081 return DReg;
5082
5083 Lane = 1;
5084 DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1, &ARM::DPRRegClass);
5085
5086 assert(DReg && "S-register with no D super-register?");
5087 return DReg;
5088 }
5089
5090 /// getImplicitSPRUseForDPRUse - Given a use of a DPR register and lane,
5091 /// set ImplicitSReg to a register number that must be marked as implicit-use or
5092 /// zero if no register needs to be defined as implicit-use.
5093 ///
5094 /// If the function cannot determine if an SPR should be marked implicit use or
5095 /// not, it returns false.
5096 ///
5097 /// This function handles cases where an instruction is being modified from taking
5098 /// an SPR to a DPR[Lane]. A use of the DPR is being added, which may conflict
5099 /// with an earlier def of an SPR corresponding to DPR[Lane^1] (i.e. the other
5100 /// lane of the DPR).
5101 ///
5102 /// If the other SPR is defined, an implicit-use of it should be added. Else,
5103 /// (including the case where the DPR itself is defined), it should not.
5104 ///
getImplicitSPRUseForDPRUse(const TargetRegisterInfo * TRI,MachineInstr & MI,unsigned DReg,unsigned Lane,unsigned & ImplicitSReg)5105 static bool getImplicitSPRUseForDPRUse(const TargetRegisterInfo *TRI,
5106 MachineInstr &MI, unsigned DReg,
5107 unsigned Lane, unsigned &ImplicitSReg) {
5108 // If the DPR is defined or used already, the other SPR lane will be chained
5109 // correctly, so there is nothing to be done.
5110 if (MI.definesRegister(DReg, TRI) || MI.readsRegister(DReg, TRI)) {
5111 ImplicitSReg = 0;
5112 return true;
5113 }
5114
5115 // Otherwise we need to go searching to see if the SPR is set explicitly.
5116 ImplicitSReg = TRI->getSubReg(DReg,
5117 (Lane & 1) ? ARM::ssub_0 : ARM::ssub_1);
5118 MachineBasicBlock::LivenessQueryResult LQR =
5119 MI.getParent()->computeRegisterLiveness(TRI, ImplicitSReg, MI);
5120
5121 if (LQR == MachineBasicBlock::LQR_Live)
5122 return true;
5123 else if (LQR == MachineBasicBlock::LQR_Unknown)
5124 return false;
5125
5126 // If the register is known not to be live, there is no need to add an
5127 // implicit-use.
5128 ImplicitSReg = 0;
5129 return true;
5130 }
5131
setExecutionDomain(MachineInstr & MI,unsigned Domain) const5132 void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI,
5133 unsigned Domain) const {
5134 unsigned DstReg, SrcReg, DReg;
5135 unsigned Lane;
5136 MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
5137 const TargetRegisterInfo *TRI = &getRegisterInfo();
5138 switch (MI.getOpcode()) {
5139 default:
5140 llvm_unreachable("cannot handle opcode!");
5141 break;
5142 case ARM::VMOVD:
5143 if (Domain != ExeNEON)
5144 break;
5145
5146 // Zap the predicate operands.
5147 assert(!isPredicated(MI) && "Cannot predicate a VORRd");
5148
5149 // Make sure we've got NEON instructions.
5150 assert(Subtarget.hasNEON() && "VORRd requires NEON");
5151
5152 // Source instruction is %DDst = VMOVD %DSrc, 14, %noreg (; implicits)
5153 DstReg = MI.getOperand(0).getReg();
5154 SrcReg = MI.getOperand(1).getReg();
5155
5156 for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
5157 MI.removeOperand(i - 1);
5158
5159 // Change to a %DDst = VORRd %DSrc, %DSrc, 14, %noreg (; implicits)
5160 MI.setDesc(get(ARM::VORRd));
5161 MIB.addReg(DstReg, RegState::Define)
5162 .addReg(SrcReg)
5163 .addReg(SrcReg)
5164 .add(predOps(ARMCC::AL));
5165 break;
5166 case ARM::VMOVRS:
5167 if (Domain != ExeNEON)
5168 break;
5169 assert(!isPredicated(MI) && "Cannot predicate a VGETLN");
5170
5171 // Source instruction is %RDst = VMOVRS %SSrc, 14, %noreg (; implicits)
5172 DstReg = MI.getOperand(0).getReg();
5173 SrcReg = MI.getOperand(1).getReg();
5174
5175 for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
5176 MI.removeOperand(i - 1);
5177
5178 DReg = getCorrespondingDRegAndLane(TRI, SrcReg, Lane);
5179
5180 // Convert to %RDst = VGETLNi32 %DSrc, Lane, 14, %noreg (; imps)
5181 // Note that DSrc has been widened and the other lane may be undef, which
5182 // contaminates the entire register.
5183 MI.setDesc(get(ARM::VGETLNi32));
5184 MIB.addReg(DstReg, RegState::Define)
5185 .addReg(DReg, RegState::Undef)
5186 .addImm(Lane)
5187 .add(predOps(ARMCC::AL));
5188
5189 // The old source should be an implicit use, otherwise we might think it
5190 // was dead before here.
5191 MIB.addReg(SrcReg, RegState::Implicit);
5192 break;
5193 case ARM::VMOVSR: {
5194 if (Domain != ExeNEON)
5195 break;
5196 assert(!isPredicated(MI) && "Cannot predicate a VSETLN");
5197
5198 // Source instruction is %SDst = VMOVSR %RSrc, 14, %noreg (; implicits)
5199 DstReg = MI.getOperand(0).getReg();
5200 SrcReg = MI.getOperand(1).getReg();
5201
5202 DReg = getCorrespondingDRegAndLane(TRI, DstReg, Lane);
5203
5204 unsigned ImplicitSReg;
5205 if (!getImplicitSPRUseForDPRUse(TRI, MI, DReg, Lane, ImplicitSReg))
5206 break;
5207
5208 for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
5209 MI.removeOperand(i - 1);
5210
5211 // Convert to %DDst = VSETLNi32 %DDst, %RSrc, Lane, 14, %noreg (; imps)
5212 // Again DDst may be undefined at the beginning of this instruction.
5213 MI.setDesc(get(ARM::VSETLNi32));
5214 MIB.addReg(DReg, RegState::Define)
5215 .addReg(DReg, getUndefRegState(!MI.readsRegister(DReg, TRI)))
5216 .addReg(SrcReg)
5217 .addImm(Lane)
5218 .add(predOps(ARMCC::AL));
5219
5220 // The narrower destination must be marked as set to keep previous chains
5221 // in place.
5222 MIB.addReg(DstReg, RegState::Define | RegState::Implicit);
5223 if (ImplicitSReg != 0)
5224 MIB.addReg(ImplicitSReg, RegState::Implicit);
5225 break;
5226 }
5227 case ARM::VMOVS: {
5228 if (Domain != ExeNEON)
5229 break;
5230
5231 // Source instruction is %SDst = VMOVS %SSrc, 14, %noreg (; implicits)
5232 DstReg = MI.getOperand(0).getReg();
5233 SrcReg = MI.getOperand(1).getReg();
5234
5235 unsigned DstLane = 0, SrcLane = 0, DDst, DSrc;
5236 DDst = getCorrespondingDRegAndLane(TRI, DstReg, DstLane);
5237 DSrc = getCorrespondingDRegAndLane(TRI, SrcReg, SrcLane);
5238
5239 unsigned ImplicitSReg;
5240 if (!getImplicitSPRUseForDPRUse(TRI, MI, DSrc, SrcLane, ImplicitSReg))
5241 break;
5242
5243 for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
5244 MI.removeOperand(i - 1);
5245
5246 if (DSrc == DDst) {
5247 // Destination can be:
5248 // %DDst = VDUPLN32d %DDst, Lane, 14, %noreg (; implicits)
5249 MI.setDesc(get(ARM::VDUPLN32d));
5250 MIB.addReg(DDst, RegState::Define)
5251 .addReg(DDst, getUndefRegState(!MI.readsRegister(DDst, TRI)))
5252 .addImm(SrcLane)
5253 .add(predOps(ARMCC::AL));
5254
5255 // Neither the source or the destination are naturally represented any
5256 // more, so add them in manually.
5257 MIB.addReg(DstReg, RegState::Implicit | RegState::Define);
5258 MIB.addReg(SrcReg, RegState::Implicit);
5259 if (ImplicitSReg != 0)
5260 MIB.addReg(ImplicitSReg, RegState::Implicit);
5261 break;
5262 }
5263
5264 // In general there's no single instruction that can perform an S <-> S
5265 // move in NEON space, but a pair of VEXT instructions *can* do the
5266 // job. It turns out that the VEXTs needed will only use DSrc once, with
5267 // the position based purely on the combination of lane-0 and lane-1
5268 // involved. For example
5269 // vmov s0, s2 -> vext.32 d0, d0, d1, #1 vext.32 d0, d0, d0, #1
5270 // vmov s1, s3 -> vext.32 d0, d1, d0, #1 vext.32 d0, d0, d0, #1
5271 // vmov s0, s3 -> vext.32 d0, d0, d0, #1 vext.32 d0, d1, d0, #1
5272 // vmov s1, s2 -> vext.32 d0, d0, d0, #1 vext.32 d0, d0, d1, #1
5273 //
5274 // Pattern of the MachineInstrs is:
5275 // %DDst = VEXTd32 %DSrc1, %DSrc2, Lane, 14, %noreg (;implicits)
5276 MachineInstrBuilder NewMIB;
5277 NewMIB = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::VEXTd32),
5278 DDst);
5279
5280 // On the first instruction, both DSrc and DDst may be undef if present.
5281 // Specifically when the original instruction didn't have them as an
5282 // <imp-use>.
5283 unsigned CurReg = SrcLane == 1 && DstLane == 1 ? DSrc : DDst;
5284 bool CurUndef = !MI.readsRegister(CurReg, TRI);
5285 NewMIB.addReg(CurReg, getUndefRegState(CurUndef));
5286
5287 CurReg = SrcLane == 0 && DstLane == 0 ? DSrc : DDst;
5288 CurUndef = !MI.readsRegister(CurReg, TRI);
5289 NewMIB.addReg(CurReg, getUndefRegState(CurUndef))
5290 .addImm(1)
5291 .add(predOps(ARMCC::AL));
5292
5293 if (SrcLane == DstLane)
5294 NewMIB.addReg(SrcReg, RegState::Implicit);
5295
5296 MI.setDesc(get(ARM::VEXTd32));
5297 MIB.addReg(DDst, RegState::Define);
5298
5299 // On the second instruction, DDst has definitely been defined above, so
5300 // it is not undef. DSrc, if present, can be undef as above.
5301 CurReg = SrcLane == 1 && DstLane == 0 ? DSrc : DDst;
5302 CurUndef = CurReg == DSrc && !MI.readsRegister(CurReg, TRI);
5303 MIB.addReg(CurReg, getUndefRegState(CurUndef));
5304
5305 CurReg = SrcLane == 0 && DstLane == 1 ? DSrc : DDst;
5306 CurUndef = CurReg == DSrc && !MI.readsRegister(CurReg, TRI);
5307 MIB.addReg(CurReg, getUndefRegState(CurUndef))
5308 .addImm(1)
5309 .add(predOps(ARMCC::AL));
5310
5311 if (SrcLane != DstLane)
5312 MIB.addReg(SrcReg, RegState::Implicit);
5313
5314 // As before, the original destination is no longer represented, add it
5315 // implicitly.
5316 MIB.addReg(DstReg, RegState::Define | RegState::Implicit);
5317 if (ImplicitSReg != 0)
5318 MIB.addReg(ImplicitSReg, RegState::Implicit);
5319 break;
5320 }
5321 }
5322 }
5323
5324 //===----------------------------------------------------------------------===//
5325 // Partial register updates
5326 //===----------------------------------------------------------------------===//
5327 //
5328 // Swift renames NEON registers with 64-bit granularity. That means any
5329 // instruction writing an S-reg implicitly reads the containing D-reg. The
5330 // problem is mostly avoided by translating f32 operations to v2f32 operations
5331 // on D-registers, but f32 loads are still a problem.
5332 //
5333 // These instructions can load an f32 into a NEON register:
5334 //
5335 // VLDRS - Only writes S, partial D update.
5336 // VLD1LNd32 - Writes all D-regs, explicit partial D update, 2 uops.
5337 // VLD1DUPd32 - Writes all D-regs, no partial reg update, 2 uops.
5338 //
5339 // FCONSTD can be used as a dependency-breaking instruction.
getPartialRegUpdateClearance(const MachineInstr & MI,unsigned OpNum,const TargetRegisterInfo * TRI) const5340 unsigned ARMBaseInstrInfo::getPartialRegUpdateClearance(
5341 const MachineInstr &MI, unsigned OpNum,
5342 const TargetRegisterInfo *TRI) const {
5343 auto PartialUpdateClearance = Subtarget.getPartialUpdateClearance();
5344 if (!PartialUpdateClearance)
5345 return 0;
5346
5347 assert(TRI && "Need TRI instance");
5348
5349 const MachineOperand &MO = MI.getOperand(OpNum);
5350 if (MO.readsReg())
5351 return 0;
5352 Register Reg = MO.getReg();
5353 int UseOp = -1;
5354
5355 switch (MI.getOpcode()) {
5356 // Normal instructions writing only an S-register.
5357 case ARM::VLDRS:
5358 case ARM::FCONSTS:
5359 case ARM::VMOVSR:
5360 case ARM::VMOVv8i8:
5361 case ARM::VMOVv4i16:
5362 case ARM::VMOVv2i32:
5363 case ARM::VMOVv2f32:
5364 case ARM::VMOVv1i64:
5365 UseOp = MI.findRegisterUseOperandIdx(Reg, TRI, false);
5366 break;
5367
5368 // Explicitly reads the dependency.
5369 case ARM::VLD1LNd32:
5370 UseOp = 3;
5371 break;
5372 default:
5373 return 0;
5374 }
5375
5376 // If this instruction actually reads a value from Reg, there is no unwanted
5377 // dependency.
5378 if (UseOp != -1 && MI.getOperand(UseOp).readsReg())
5379 return 0;
5380
5381 // We must be able to clobber the whole D-reg.
5382 if (Reg.isVirtual()) {
5383 // Virtual register must be a def undef foo:ssub_0 operand.
5384 if (!MO.getSubReg() || MI.readsVirtualRegister(Reg))
5385 return 0;
5386 } else if (ARM::SPRRegClass.contains(Reg)) {
5387 // Physical register: MI must define the full D-reg.
5388 unsigned DReg = TRI->getMatchingSuperReg(Reg, ARM::ssub_0,
5389 &ARM::DPRRegClass);
5390 if (!DReg || !MI.definesRegister(DReg, TRI))
5391 return 0;
5392 }
5393
5394 // MI has an unwanted D-register dependency.
5395 // Avoid defs in the previous N instructrions.
5396 return PartialUpdateClearance;
5397 }
5398
5399 // Break a partial register dependency after getPartialRegUpdateClearance
5400 // returned non-zero.
breakPartialRegDependency(MachineInstr & MI,unsigned OpNum,const TargetRegisterInfo * TRI) const5401 void ARMBaseInstrInfo::breakPartialRegDependency(
5402 MachineInstr &MI, unsigned OpNum, const TargetRegisterInfo *TRI) const {
5403 assert(OpNum < MI.getDesc().getNumDefs() && "OpNum is not a def");
5404 assert(TRI && "Need TRI instance");
5405
5406 const MachineOperand &MO = MI.getOperand(OpNum);
5407 Register Reg = MO.getReg();
5408 assert(Reg.isPhysical() && "Can't break virtual register dependencies.");
5409 unsigned DReg = Reg;
5410
5411 // If MI defines an S-reg, find the corresponding D super-register.
5412 if (ARM::SPRRegClass.contains(Reg)) {
5413 DReg = ARM::D0 + (Reg - ARM::S0) / 2;
5414 assert(TRI->isSuperRegister(Reg, DReg) && "Register enums broken");
5415 }
5416
5417 assert(ARM::DPRRegClass.contains(DReg) && "Can only break D-reg deps");
5418 assert(MI.definesRegister(DReg, TRI) && "MI doesn't clobber full D-reg");
5419
5420 // FIXME: In some cases, VLDRS can be changed to a VLD1DUPd32 which defines
5421 // the full D-register by loading the same value to both lanes. The
5422 // instruction is micro-coded with 2 uops, so don't do this until we can
5423 // properly schedule micro-coded instructions. The dispatcher stalls cause
5424 // too big regressions.
5425
5426 // Insert the dependency-breaking FCONSTD before MI.
5427 // 96 is the encoding of 0.5, but the actual value doesn't matter here.
5428 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::FCONSTD), DReg)
5429 .addImm(96)
5430 .add(predOps(ARMCC::AL));
5431 MI.addRegisterKilled(DReg, TRI, true);
5432 }
5433
hasNOP() const5434 bool ARMBaseInstrInfo::hasNOP() const {
5435 return Subtarget.hasFeature(ARM::HasV6KOps);
5436 }
5437
isSwiftFastImmShift(const MachineInstr * MI) const5438 bool ARMBaseInstrInfo::isSwiftFastImmShift(const MachineInstr *MI) const {
5439 if (MI->getNumOperands() < 4)
5440 return true;
5441 unsigned ShOpVal = MI->getOperand(3).getImm();
5442 unsigned ShImm = ARM_AM::getSORegOffset(ShOpVal);
5443 // Swift supports faster shifts for: lsl 2, lsl 1, and lsr 1.
5444 if ((ShImm == 1 && ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsr) ||
5445 ((ShImm == 1 || ShImm == 2) &&
5446 ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsl))
5447 return true;
5448
5449 return false;
5450 }
5451
getRegSequenceLikeInputs(const MachineInstr & MI,unsigned DefIdx,SmallVectorImpl<RegSubRegPairAndIdx> & InputRegs) const5452 bool ARMBaseInstrInfo::getRegSequenceLikeInputs(
5453 const MachineInstr &MI, unsigned DefIdx,
5454 SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const {
5455 assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
5456 assert(MI.isRegSequenceLike() && "Invalid kind of instruction");
5457
5458 switch (MI.getOpcode()) {
5459 case ARM::VMOVDRR:
5460 // dX = VMOVDRR rY, rZ
5461 // is the same as:
5462 // dX = REG_SEQUENCE rY, ssub_0, rZ, ssub_1
5463 // Populate the InputRegs accordingly.
5464 // rY
5465 const MachineOperand *MOReg = &MI.getOperand(1);
5466 if (!MOReg->isUndef())
5467 InputRegs.push_back(RegSubRegPairAndIdx(MOReg->getReg(),
5468 MOReg->getSubReg(), ARM::ssub_0));
5469 // rZ
5470 MOReg = &MI.getOperand(2);
5471 if (!MOReg->isUndef())
5472 InputRegs.push_back(RegSubRegPairAndIdx(MOReg->getReg(),
5473 MOReg->getSubReg(), ARM::ssub_1));
5474 return true;
5475 }
5476 llvm_unreachable("Target dependent opcode missing");
5477 }
5478
getExtractSubregLikeInputs(const MachineInstr & MI,unsigned DefIdx,RegSubRegPairAndIdx & InputReg) const5479 bool ARMBaseInstrInfo::getExtractSubregLikeInputs(
5480 const MachineInstr &MI, unsigned DefIdx,
5481 RegSubRegPairAndIdx &InputReg) const {
5482 assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
5483 assert(MI.isExtractSubregLike() && "Invalid kind of instruction");
5484
5485 switch (MI.getOpcode()) {
5486 case ARM::VMOVRRD:
5487 // rX, rY = VMOVRRD dZ
5488 // is the same as:
5489 // rX = EXTRACT_SUBREG dZ, ssub_0
5490 // rY = EXTRACT_SUBREG dZ, ssub_1
5491 const MachineOperand &MOReg = MI.getOperand(2);
5492 if (MOReg.isUndef())
5493 return false;
5494 InputReg.Reg = MOReg.getReg();
5495 InputReg.SubReg = MOReg.getSubReg();
5496 InputReg.SubIdx = DefIdx == 0 ? ARM::ssub_0 : ARM::ssub_1;
5497 return true;
5498 }
5499 llvm_unreachable("Target dependent opcode missing");
5500 }
5501
getInsertSubregLikeInputs(const MachineInstr & MI,unsigned DefIdx,RegSubRegPair & BaseReg,RegSubRegPairAndIdx & InsertedReg) const5502 bool ARMBaseInstrInfo::getInsertSubregLikeInputs(
5503 const MachineInstr &MI, unsigned DefIdx, RegSubRegPair &BaseReg,
5504 RegSubRegPairAndIdx &InsertedReg) const {
5505 assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
5506 assert(MI.isInsertSubregLike() && "Invalid kind of instruction");
5507
5508 switch (MI.getOpcode()) {
5509 case ARM::VSETLNi32:
5510 case ARM::MVE_VMOV_to_lane_32:
5511 // dX = VSETLNi32 dY, rZ, imm
5512 // qX = MVE_VMOV_to_lane_32 qY, rZ, imm
5513 const MachineOperand &MOBaseReg = MI.getOperand(1);
5514 const MachineOperand &MOInsertedReg = MI.getOperand(2);
5515 if (MOInsertedReg.isUndef())
5516 return false;
5517 const MachineOperand &MOIndex = MI.getOperand(3);
5518 BaseReg.Reg = MOBaseReg.getReg();
5519 BaseReg.SubReg = MOBaseReg.getSubReg();
5520
5521 InsertedReg.Reg = MOInsertedReg.getReg();
5522 InsertedReg.SubReg = MOInsertedReg.getSubReg();
5523 InsertedReg.SubIdx = ARM::ssub_0 + MOIndex.getImm();
5524 return true;
5525 }
5526 llvm_unreachable("Target dependent opcode missing");
5527 }
5528
5529 std::pair<unsigned, unsigned>
decomposeMachineOperandsTargetFlags(unsigned TF) const5530 ARMBaseInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
5531 const unsigned Mask = ARMII::MO_OPTION_MASK;
5532 return std::make_pair(TF & Mask, TF & ~Mask);
5533 }
5534
5535 ArrayRef<std::pair<unsigned, const char *>>
getSerializableDirectMachineOperandTargetFlags() const5536 ARMBaseInstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
5537 using namespace ARMII;
5538
5539 static const std::pair<unsigned, const char *> TargetFlags[] = {
5540 {MO_LO16, "arm-lo16"}, {MO_HI16, "arm-hi16"},
5541 {MO_LO_0_7, "arm-lo-0-7"}, {MO_HI_0_7, "arm-hi-0-7"},
5542 {MO_LO_8_15, "arm-lo-8-15"}, {MO_HI_8_15, "arm-hi-8-15"},
5543 };
5544 return ArrayRef(TargetFlags);
5545 }
5546
5547 ArrayRef<std::pair<unsigned, const char *>>
getSerializableBitmaskMachineOperandTargetFlags() const5548 ARMBaseInstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
5549 using namespace ARMII;
5550
5551 static const std::pair<unsigned, const char *> TargetFlags[] = {
5552 {MO_COFFSTUB, "arm-coffstub"},
5553 {MO_GOT, "arm-got"},
5554 {MO_SBREL, "arm-sbrel"},
5555 {MO_DLLIMPORT, "arm-dllimport"},
5556 {MO_SECREL, "arm-secrel"},
5557 {MO_NONLAZY, "arm-nonlazy"}};
5558 return ArrayRef(TargetFlags);
5559 }
5560
5561 std::optional<RegImmPair>
isAddImmediate(const MachineInstr & MI,Register Reg) const5562 ARMBaseInstrInfo::isAddImmediate(const MachineInstr &MI, Register Reg) const {
5563 int Sign = 1;
5564 unsigned Opcode = MI.getOpcode();
5565 int64_t Offset = 0;
5566
5567 // TODO: Handle cases where Reg is a super- or sub-register of the
5568 // destination register.
5569 const MachineOperand &Op0 = MI.getOperand(0);
5570 if (!Op0.isReg() || Reg != Op0.getReg())
5571 return std::nullopt;
5572
5573 // We describe SUBri or ADDri instructions.
5574 if (Opcode == ARM::SUBri)
5575 Sign = -1;
5576 else if (Opcode != ARM::ADDri)
5577 return std::nullopt;
5578
5579 // TODO: Third operand can be global address (usually some string). Since
5580 // strings can be relocated we cannot calculate their offsets for
5581 // now.
5582 if (!MI.getOperand(1).isReg() || !MI.getOperand(2).isImm())
5583 return std::nullopt;
5584
5585 Offset = MI.getOperand(2).getImm() * Sign;
5586 return RegImmPair{MI.getOperand(1).getReg(), Offset};
5587 }
5588
registerDefinedBetween(unsigned Reg,MachineBasicBlock::iterator From,MachineBasicBlock::iterator To,const TargetRegisterInfo * TRI)5589 bool llvm::registerDefinedBetween(unsigned Reg,
5590 MachineBasicBlock::iterator From,
5591 MachineBasicBlock::iterator To,
5592 const TargetRegisterInfo *TRI) {
5593 for (auto I = From; I != To; ++I)
5594 if (I->modifiesRegister(Reg, TRI))
5595 return true;
5596 return false;
5597 }
5598
findCMPToFoldIntoCBZ(MachineInstr * Br,const TargetRegisterInfo * TRI)5599 MachineInstr *llvm::findCMPToFoldIntoCBZ(MachineInstr *Br,
5600 const TargetRegisterInfo *TRI) {
5601 // Search backwards to the instruction that defines CSPR. This may or not
5602 // be a CMP, we check that after this loop. If we find another instruction
5603 // that reads cpsr, we return nullptr.
5604 MachineBasicBlock::iterator CmpMI = Br;
5605 while (CmpMI != Br->getParent()->begin()) {
5606 --CmpMI;
5607 if (CmpMI->modifiesRegister(ARM::CPSR, TRI))
5608 break;
5609 if (CmpMI->readsRegister(ARM::CPSR, TRI))
5610 break;
5611 }
5612
5613 // Check that this inst is a CMP r[0-7], #0 and that the register
5614 // is not redefined between the cmp and the br.
5615 if (CmpMI->getOpcode() != ARM::tCMPi8 && CmpMI->getOpcode() != ARM::t2CMPri)
5616 return nullptr;
5617 Register Reg = CmpMI->getOperand(0).getReg();
5618 Register PredReg;
5619 ARMCC::CondCodes Pred = getInstrPredicate(*CmpMI, PredReg);
5620 if (Pred != ARMCC::AL || CmpMI->getOperand(1).getImm() != 0)
5621 return nullptr;
5622 if (!isARMLowRegister(Reg))
5623 return nullptr;
5624 if (registerDefinedBetween(Reg, CmpMI->getNextNode(), Br, TRI))
5625 return nullptr;
5626
5627 return &*CmpMI;
5628 }
5629
ConstantMaterializationCost(unsigned Val,const ARMSubtarget * Subtarget,bool ForCodesize)5630 unsigned llvm::ConstantMaterializationCost(unsigned Val,
5631 const ARMSubtarget *Subtarget,
5632 bool ForCodesize) {
5633 if (Subtarget->isThumb()) {
5634 if (Val <= 255) // MOV
5635 return ForCodesize ? 2 : 1;
5636 if (Subtarget->hasV6T2Ops() && (Val <= 0xffff || // MOV
5637 ARM_AM::getT2SOImmVal(Val) != -1 || // MOVW
5638 ARM_AM::getT2SOImmVal(~Val) != -1)) // MVN
5639 return ForCodesize ? 4 : 1;
5640 if (Val <= 510) // MOV + ADDi8
5641 return ForCodesize ? 4 : 2;
5642 if (~Val <= 255) // MOV + MVN
5643 return ForCodesize ? 4 : 2;
5644 if (ARM_AM::isThumbImmShiftedVal(Val)) // MOV + LSL
5645 return ForCodesize ? 4 : 2;
5646 } else {
5647 if (ARM_AM::getSOImmVal(Val) != -1) // MOV
5648 return ForCodesize ? 4 : 1;
5649 if (ARM_AM::getSOImmVal(~Val) != -1) // MVN
5650 return ForCodesize ? 4 : 1;
5651 if (Subtarget->hasV6T2Ops() && Val <= 0xffff) // MOVW
5652 return ForCodesize ? 4 : 1;
5653 if (ARM_AM::isSOImmTwoPartVal(Val)) // two instrs
5654 return ForCodesize ? 8 : 2;
5655 if (ARM_AM::isSOImmTwoPartValNeg(Val)) // two instrs
5656 return ForCodesize ? 8 : 2;
5657 }
5658 if (Subtarget->useMovt()) // MOVW + MOVT
5659 return ForCodesize ? 8 : 2;
5660 return ForCodesize ? 8 : 3; // Literal pool load
5661 }
5662
HasLowerConstantMaterializationCost(unsigned Val1,unsigned Val2,const ARMSubtarget * Subtarget,bool ForCodesize)5663 bool llvm::HasLowerConstantMaterializationCost(unsigned Val1, unsigned Val2,
5664 const ARMSubtarget *Subtarget,
5665 bool ForCodesize) {
5666 // Check with ForCodesize
5667 unsigned Cost1 = ConstantMaterializationCost(Val1, Subtarget, ForCodesize);
5668 unsigned Cost2 = ConstantMaterializationCost(Val2, Subtarget, ForCodesize);
5669 if (Cost1 < Cost2)
5670 return true;
5671 if (Cost1 > Cost2)
5672 return false;
5673
5674 // If they are equal, try with !ForCodesize
5675 return ConstantMaterializationCost(Val1, Subtarget, !ForCodesize) <
5676 ConstantMaterializationCost(Val2, Subtarget, !ForCodesize);
5677 }
5678
5679 /// Constants defining how certain sequences should be outlined.
5680 /// This encompasses how an outlined function should be called, and what kind of
5681 /// frame should be emitted for that outlined function.
5682 ///
5683 /// \p MachineOutlinerTailCall implies that the function is being created from
5684 /// a sequence of instructions ending in a return.
5685 ///
5686 /// That is,
5687 ///
5688 /// I1 OUTLINED_FUNCTION:
5689 /// I2 --> B OUTLINED_FUNCTION I1
5690 /// BX LR I2
5691 /// BX LR
5692 ///
5693 /// +-------------------------+--------+-----+
5694 /// | | Thumb2 | ARM |
5695 /// +-------------------------+--------+-----+
5696 /// | Call overhead in Bytes | 4 | 4 |
5697 /// | Frame overhead in Bytes | 0 | 0 |
5698 /// | Stack fixup required | No | No |
5699 /// +-------------------------+--------+-----+
5700 ///
5701 /// \p MachineOutlinerThunk implies that the function is being created from
5702 /// a sequence of instructions ending in a call. The outlined function is
5703 /// called with a BL instruction, and the outlined function tail-calls the
5704 /// original call destination.
5705 ///
5706 /// That is,
5707 ///
5708 /// I1 OUTLINED_FUNCTION:
5709 /// I2 --> BL OUTLINED_FUNCTION I1
5710 /// BL f I2
5711 /// B f
5712 ///
5713 /// +-------------------------+--------+-----+
5714 /// | | Thumb2 | ARM |
5715 /// +-------------------------+--------+-----+
5716 /// | Call overhead in Bytes | 4 | 4 |
5717 /// | Frame overhead in Bytes | 0 | 0 |
5718 /// | Stack fixup required | No | No |
5719 /// +-------------------------+--------+-----+
5720 ///
5721 /// \p MachineOutlinerNoLRSave implies that the function should be called using
5722 /// a BL instruction, but doesn't require LR to be saved and restored. This
5723 /// happens when LR is known to be dead.
5724 ///
5725 /// That is,
5726 ///
5727 /// I1 OUTLINED_FUNCTION:
5728 /// I2 --> BL OUTLINED_FUNCTION I1
5729 /// I3 I2
5730 /// I3
5731 /// BX LR
5732 ///
5733 /// +-------------------------+--------+-----+
5734 /// | | Thumb2 | ARM |
5735 /// +-------------------------+--------+-----+
5736 /// | Call overhead in Bytes | 4 | 4 |
5737 /// | Frame overhead in Bytes | 2 | 4 |
5738 /// | Stack fixup required | No | No |
5739 /// +-------------------------+--------+-----+
5740 ///
5741 /// \p MachineOutlinerRegSave implies that the function should be called with a
5742 /// save and restore of LR to an available register. This allows us to avoid
5743 /// stack fixups. Note that this outlining variant is compatible with the
5744 /// NoLRSave case.
5745 ///
5746 /// That is,
5747 ///
5748 /// I1 Save LR OUTLINED_FUNCTION:
5749 /// I2 --> BL OUTLINED_FUNCTION I1
5750 /// I3 Restore LR I2
5751 /// I3
5752 /// BX LR
5753 ///
5754 /// +-------------------------+--------+-----+
5755 /// | | Thumb2 | ARM |
5756 /// +-------------------------+--------+-----+
5757 /// | Call overhead in Bytes | 8 | 12 |
5758 /// | Frame overhead in Bytes | 2 | 4 |
5759 /// | Stack fixup required | No | No |
5760 /// +-------------------------+--------+-----+
5761 ///
5762 /// \p MachineOutlinerDefault implies that the function should be called with
5763 /// a save and restore of LR to the stack.
5764 ///
5765 /// That is,
5766 ///
5767 /// I1 Save LR OUTLINED_FUNCTION:
5768 /// I2 --> BL OUTLINED_FUNCTION I1
5769 /// I3 Restore LR I2
5770 /// I3
5771 /// BX LR
5772 ///
5773 /// +-------------------------+--------+-----+
5774 /// | | Thumb2 | ARM |
5775 /// +-------------------------+--------+-----+
5776 /// | Call overhead in Bytes | 8 | 12 |
5777 /// | Frame overhead in Bytes | 2 | 4 |
5778 /// | Stack fixup required | Yes | Yes |
5779 /// +-------------------------+--------+-----+
5780
5781 enum MachineOutlinerClass {
5782 MachineOutlinerTailCall,
5783 MachineOutlinerThunk,
5784 MachineOutlinerNoLRSave,
5785 MachineOutlinerRegSave,
5786 MachineOutlinerDefault
5787 };
5788
5789 enum MachineOutlinerMBBFlags {
5790 LRUnavailableSomewhere = 0x2,
5791 HasCalls = 0x4,
5792 UnsafeRegsDead = 0x8
5793 };
5794
5795 struct OutlinerCosts {
5796 int CallTailCall;
5797 int FrameTailCall;
5798 int CallThunk;
5799 int FrameThunk;
5800 int CallNoLRSave;
5801 int FrameNoLRSave;
5802 int CallRegSave;
5803 int FrameRegSave;
5804 int CallDefault;
5805 int FrameDefault;
5806 int SaveRestoreLROnStack;
5807
OutlinerCostsOutlinerCosts5808 OutlinerCosts(const ARMSubtarget &target)
5809 : CallTailCall(target.isThumb() ? 4 : 4),
5810 FrameTailCall(target.isThumb() ? 0 : 0),
5811 CallThunk(target.isThumb() ? 4 : 4),
5812 FrameThunk(target.isThumb() ? 0 : 0),
5813 CallNoLRSave(target.isThumb() ? 4 : 4),
5814 FrameNoLRSave(target.isThumb() ? 2 : 4),
5815 CallRegSave(target.isThumb() ? 8 : 12),
5816 FrameRegSave(target.isThumb() ? 2 : 4),
5817 CallDefault(target.isThumb() ? 8 : 12),
5818 FrameDefault(target.isThumb() ? 2 : 4),
5819 SaveRestoreLROnStack(target.isThumb() ? 8 : 8) {}
5820 };
5821
5822 Register
findRegisterToSaveLRTo(outliner::Candidate & C) const5823 ARMBaseInstrInfo::findRegisterToSaveLRTo(outliner::Candidate &C) const {
5824 MachineFunction *MF = C.getMF();
5825 const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo();
5826 const ARMBaseRegisterInfo *ARI =
5827 static_cast<const ARMBaseRegisterInfo *>(&TRI);
5828
5829 BitVector regsReserved = ARI->getReservedRegs(*MF);
5830 // Check if there is an available register across the sequence that we can
5831 // use.
5832 for (Register Reg : ARM::rGPRRegClass) {
5833 if (!(Reg < regsReserved.size() && regsReserved.test(Reg)) &&
5834 Reg != ARM::LR && // LR is not reserved, but don't use it.
5835 Reg != ARM::R12 && // R12 is not guaranteed to be preserved.
5836 C.isAvailableAcrossAndOutOfSeq(Reg, TRI) &&
5837 C.isAvailableInsideSeq(Reg, TRI))
5838 return Reg;
5839 }
5840 return Register();
5841 }
5842
5843 // Compute liveness of LR at the point after the interval [I, E), which
5844 // denotes a *backward* iteration through instructions. Used only for return
5845 // basic blocks, which do not end with a tail call.
isLRAvailable(const TargetRegisterInfo & TRI,MachineBasicBlock::reverse_iterator I,MachineBasicBlock::reverse_iterator E)5846 static bool isLRAvailable(const TargetRegisterInfo &TRI,
5847 MachineBasicBlock::reverse_iterator I,
5848 MachineBasicBlock::reverse_iterator E) {
5849 // At the end of the function LR dead.
5850 bool Live = false;
5851 for (; I != E; ++I) {
5852 const MachineInstr &MI = *I;
5853
5854 // Check defs of LR.
5855 if (MI.modifiesRegister(ARM::LR, &TRI))
5856 Live = false;
5857
5858 // Check uses of LR.
5859 unsigned Opcode = MI.getOpcode();
5860 if (Opcode == ARM::BX_RET || Opcode == ARM::MOVPCLR ||
5861 Opcode == ARM::SUBS_PC_LR || Opcode == ARM::tBX_RET ||
5862 Opcode == ARM::tBXNS_RET) {
5863 // These instructions use LR, but it's not an (explicit or implicit)
5864 // operand.
5865 Live = true;
5866 continue;
5867 }
5868 if (MI.readsRegister(ARM::LR, &TRI))
5869 Live = true;
5870 }
5871 return !Live;
5872 }
5873
5874 std::optional<outliner::OutlinedFunction>
getOutliningCandidateInfo(std::vector<outliner::Candidate> & RepeatedSequenceLocs) const5875 ARMBaseInstrInfo::getOutliningCandidateInfo(
5876 std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
5877 unsigned SequenceSize = 0;
5878 for (auto &MI : RepeatedSequenceLocs[0])
5879 SequenceSize += getInstSizeInBytes(MI);
5880
5881 // Properties about candidate MBBs that hold for all of them.
5882 unsigned FlagsSetInAll = 0xF;
5883
5884 // Compute liveness information for each candidate, and set FlagsSetInAll.
5885 const TargetRegisterInfo &TRI = getRegisterInfo();
5886 for (outliner::Candidate &C : RepeatedSequenceLocs)
5887 FlagsSetInAll &= C.Flags;
5888
5889 // According to the ARM Procedure Call Standard, the following are
5890 // undefined on entry/exit from a function call:
5891 //
5892 // * Register R12(IP),
5893 // * Condition codes (and thus the CPSR register)
5894 //
5895 // Since we control the instructions which are part of the outlined regions
5896 // we don't need to be fully compliant with the AAPCS, but we have to
5897 // guarantee that if a veneer is inserted at link time the code is still
5898 // correct. Because of this, we can't outline any sequence of instructions
5899 // where one of these registers is live into/across it. Thus, we need to
5900 // delete those candidates.
5901 auto CantGuaranteeValueAcrossCall = [&TRI](outliner::Candidate &C) {
5902 // If the unsafe registers in this block are all dead, then we don't need
5903 // to compute liveness here.
5904 if (C.Flags & UnsafeRegsDead)
5905 return false;
5906 return C.isAnyUnavailableAcrossOrOutOfSeq({ARM::R12, ARM::CPSR}, TRI);
5907 };
5908
5909 // Are there any candidates where those registers are live?
5910 if (!(FlagsSetInAll & UnsafeRegsDead)) {
5911 // Erase every candidate that violates the restrictions above. (It could be
5912 // true that we have viable candidates, so it's not worth bailing out in
5913 // the case that, say, 1 out of 20 candidates violate the restructions.)
5914 llvm::erase_if(RepeatedSequenceLocs, CantGuaranteeValueAcrossCall);
5915
5916 // If the sequence doesn't have enough candidates left, then we're done.
5917 if (RepeatedSequenceLocs.size() < 2)
5918 return std::nullopt;
5919 }
5920
5921 // We expect the majority of the outlining candidates to be in consensus with
5922 // regard to return address sign and authentication, and branch target
5923 // enforcement, in other words, partitioning according to all the four
5924 // possible combinations of PAC-RET and BTI is going to yield one big subset
5925 // and three small (likely empty) subsets. That allows us to cull incompatible
5926 // candidates separately for PAC-RET and BTI.
5927
5928 // Partition the candidates in two sets: one with BTI enabled and one with BTI
5929 // disabled. Remove the candidates from the smaller set. If they are the same
5930 // number prefer the non-BTI ones for outlining, since they have less
5931 // overhead.
5932 auto NoBTI =
5933 llvm::partition(RepeatedSequenceLocs, [](const outliner::Candidate &C) {
5934 const ARMFunctionInfo &AFI = *C.getMF()->getInfo<ARMFunctionInfo>();
5935 return AFI.branchTargetEnforcement();
5936 });
5937 if (std::distance(RepeatedSequenceLocs.begin(), NoBTI) >
5938 std::distance(NoBTI, RepeatedSequenceLocs.end()))
5939 RepeatedSequenceLocs.erase(NoBTI, RepeatedSequenceLocs.end());
5940 else
5941 RepeatedSequenceLocs.erase(RepeatedSequenceLocs.begin(), NoBTI);
5942
5943 if (RepeatedSequenceLocs.size() < 2)
5944 return std::nullopt;
5945
5946 // Likewise, partition the candidates according to PAC-RET enablement.
5947 auto NoPAC =
5948 llvm::partition(RepeatedSequenceLocs, [](const outliner::Candidate &C) {
5949 const ARMFunctionInfo &AFI = *C.getMF()->getInfo<ARMFunctionInfo>();
5950 // If the function happens to not spill the LR, do not disqualify it
5951 // from the outlining.
5952 return AFI.shouldSignReturnAddress(true);
5953 });
5954 if (std::distance(RepeatedSequenceLocs.begin(), NoPAC) >
5955 std::distance(NoPAC, RepeatedSequenceLocs.end()))
5956 RepeatedSequenceLocs.erase(NoPAC, RepeatedSequenceLocs.end());
5957 else
5958 RepeatedSequenceLocs.erase(RepeatedSequenceLocs.begin(), NoPAC);
5959
5960 if (RepeatedSequenceLocs.size() < 2)
5961 return std::nullopt;
5962
5963 // At this point, we have only "safe" candidates to outline. Figure out
5964 // frame + call instruction information.
5965
5966 unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back().getOpcode();
5967
5968 // Helper lambda which sets call information for every candidate.
5969 auto SetCandidateCallInfo =
5970 [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) {
5971 for (outliner::Candidate &C : RepeatedSequenceLocs)
5972 C.setCallInfo(CallID, NumBytesForCall);
5973 };
5974
5975 OutlinerCosts Costs(Subtarget);
5976
5977 const auto &SomeMFI =
5978 *RepeatedSequenceLocs.front().getMF()->getInfo<ARMFunctionInfo>();
5979 // Adjust costs to account for the BTI instructions.
5980 if (SomeMFI.branchTargetEnforcement()) {
5981 Costs.FrameDefault += 4;
5982 Costs.FrameNoLRSave += 4;
5983 Costs.FrameRegSave += 4;
5984 Costs.FrameTailCall += 4;
5985 Costs.FrameThunk += 4;
5986 }
5987
5988 // Adjust costs to account for sign and authentication instructions.
5989 if (SomeMFI.shouldSignReturnAddress(true)) {
5990 Costs.CallDefault += 8; // +PAC instr, +AUT instr
5991 Costs.SaveRestoreLROnStack += 8; // +PAC instr, +AUT instr
5992 }
5993
5994 unsigned FrameID = MachineOutlinerDefault;
5995 unsigned NumBytesToCreateFrame = Costs.FrameDefault;
5996
5997 // If the last instruction in any candidate is a terminator, then we should
5998 // tail call all of the candidates.
5999 if (RepeatedSequenceLocs[0].back().isTerminator()) {
6000 FrameID = MachineOutlinerTailCall;
6001 NumBytesToCreateFrame = Costs.FrameTailCall;
6002 SetCandidateCallInfo(MachineOutlinerTailCall, Costs.CallTailCall);
6003 } else if (LastInstrOpcode == ARM::BL || LastInstrOpcode == ARM::BLX ||
6004 LastInstrOpcode == ARM::BLX_noip || LastInstrOpcode == ARM::tBL ||
6005 LastInstrOpcode == ARM::tBLXr ||
6006 LastInstrOpcode == ARM::tBLXr_noip ||
6007 LastInstrOpcode == ARM::tBLXi) {
6008 FrameID = MachineOutlinerThunk;
6009 NumBytesToCreateFrame = Costs.FrameThunk;
6010 SetCandidateCallInfo(MachineOutlinerThunk, Costs.CallThunk);
6011 } else {
6012 // We need to decide how to emit calls + frames. We can always emit the same
6013 // frame if we don't need to save to the stack. If we have to save to the
6014 // stack, then we need a different frame.
6015 unsigned NumBytesNoStackCalls = 0;
6016 std::vector<outliner::Candidate> CandidatesWithoutStackFixups;
6017
6018 for (outliner::Candidate &C : RepeatedSequenceLocs) {
6019 // LR liveness is overestimated in return blocks, unless they end with a
6020 // tail call.
6021 const auto Last = C.getMBB()->rbegin();
6022 const bool LRIsAvailable =
6023 C.getMBB()->isReturnBlock() && !Last->isCall()
6024 ? isLRAvailable(TRI, Last,
6025 (MachineBasicBlock::reverse_iterator)C.begin())
6026 : C.isAvailableAcrossAndOutOfSeq(ARM::LR, TRI);
6027 if (LRIsAvailable) {
6028 FrameID = MachineOutlinerNoLRSave;
6029 NumBytesNoStackCalls += Costs.CallNoLRSave;
6030 C.setCallInfo(MachineOutlinerNoLRSave, Costs.CallNoLRSave);
6031 CandidatesWithoutStackFixups.push_back(C);
6032 }
6033
6034 // Is an unused register available? If so, we won't modify the stack, so
6035 // we can outline with the same frame type as those that don't save LR.
6036 else if (findRegisterToSaveLRTo(C)) {
6037 FrameID = MachineOutlinerRegSave;
6038 NumBytesNoStackCalls += Costs.CallRegSave;
6039 C.setCallInfo(MachineOutlinerRegSave, Costs.CallRegSave);
6040 CandidatesWithoutStackFixups.push_back(C);
6041 }
6042
6043 // Is SP used in the sequence at all? If not, we don't have to modify
6044 // the stack, so we are guaranteed to get the same frame.
6045 else if (C.isAvailableInsideSeq(ARM::SP, TRI)) {
6046 NumBytesNoStackCalls += Costs.CallDefault;
6047 C.setCallInfo(MachineOutlinerDefault, Costs.CallDefault);
6048 CandidatesWithoutStackFixups.push_back(C);
6049 }
6050
6051 // If we outline this, we need to modify the stack. Pretend we don't
6052 // outline this by saving all of its bytes.
6053 else
6054 NumBytesNoStackCalls += SequenceSize;
6055 }
6056
6057 // If there are no places where we have to save LR, then note that we don't
6058 // have to update the stack. Otherwise, give every candidate the default
6059 // call type
6060 if (NumBytesNoStackCalls <=
6061 RepeatedSequenceLocs.size() * Costs.CallDefault) {
6062 RepeatedSequenceLocs = CandidatesWithoutStackFixups;
6063 FrameID = MachineOutlinerNoLRSave;
6064 if (RepeatedSequenceLocs.size() < 2)
6065 return std::nullopt;
6066 } else
6067 SetCandidateCallInfo(MachineOutlinerDefault, Costs.CallDefault);
6068 }
6069
6070 // Does every candidate's MBB contain a call? If so, then we might have a
6071 // call in the range.
6072 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
6073 // check if the range contains a call. These require a save + restore of
6074 // the link register.
6075 outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];
6076 if (std::any_of(FirstCand.begin(), std::prev(FirstCand.end()),
6077 [](const MachineInstr &MI) { return MI.isCall(); }))
6078 NumBytesToCreateFrame += Costs.SaveRestoreLROnStack;
6079
6080 // Handle the last instruction separately. If it is tail call, then the
6081 // last instruction is a call, we don't want to save + restore in this
6082 // case. However, it could be possible that the last instruction is a
6083 // call without it being valid to tail call this sequence. We should
6084 // consider this as well.
6085 else if (FrameID != MachineOutlinerThunk &&
6086 FrameID != MachineOutlinerTailCall && FirstCand.back().isCall())
6087 NumBytesToCreateFrame += Costs.SaveRestoreLROnStack;
6088 }
6089
6090 return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize,
6091 NumBytesToCreateFrame, FrameID);
6092 }
6093
checkAndUpdateStackOffset(MachineInstr * MI,int64_t Fixup,bool Updt) const6094 bool ARMBaseInstrInfo::checkAndUpdateStackOffset(MachineInstr *MI,
6095 int64_t Fixup,
6096 bool Updt) const {
6097 int SPIdx = MI->findRegisterUseOperandIdx(ARM::SP, /*TRI=*/nullptr);
6098 unsigned AddrMode = (MI->getDesc().TSFlags & ARMII::AddrModeMask);
6099 if (SPIdx < 0)
6100 // No SP operand
6101 return true;
6102 else if (SPIdx != 1 && (AddrMode != ARMII::AddrModeT2_i8s4 || SPIdx != 2))
6103 // If SP is not the base register we can't do much
6104 return false;
6105
6106 // Stack might be involved but addressing mode doesn't handle any offset.
6107 // Rq: AddrModeT1_[1|2|4] don't operate on SP
6108 if (AddrMode == ARMII::AddrMode1 || // Arithmetic instructions
6109 AddrMode == ARMII::AddrMode4 || // Load/Store Multiple
6110 AddrMode == ARMII::AddrMode6 || // Neon Load/Store Multiple
6111 AddrMode == ARMII::AddrModeT2_so || // SP can't be used as based register
6112 AddrMode == ARMII::AddrModeT2_pc || // PCrel access
6113 AddrMode == ARMII::AddrMode2 || // Used by PRE and POST indexed LD/ST
6114 AddrMode == ARMII::AddrModeT2_i7 || // v8.1-M MVE
6115 AddrMode == ARMII::AddrModeT2_i7s2 || // v8.1-M MVE
6116 AddrMode == ARMII::AddrModeT2_i7s4 || // v8.1-M sys regs VLDR/VSTR
6117 AddrMode == ARMII::AddrModeNone ||
6118 AddrMode == ARMII::AddrModeT2_i8 || // Pre/Post inc instructions
6119 AddrMode == ARMII::AddrModeT2_i8neg) // Always negative imm
6120 return false;
6121
6122 unsigned NumOps = MI->getDesc().getNumOperands();
6123 unsigned ImmIdx = NumOps - 3;
6124
6125 const MachineOperand &Offset = MI->getOperand(ImmIdx);
6126 assert(Offset.isImm() && "Is not an immediate");
6127 int64_t OffVal = Offset.getImm();
6128
6129 if (OffVal < 0)
6130 // Don't override data if the are below SP.
6131 return false;
6132
6133 unsigned NumBits = 0;
6134 unsigned Scale = 1;
6135
6136 switch (AddrMode) {
6137 case ARMII::AddrMode3:
6138 if (ARM_AM::getAM3Op(OffVal) == ARM_AM::sub)
6139 return false;
6140 OffVal = ARM_AM::getAM3Offset(OffVal);
6141 NumBits = 8;
6142 break;
6143 case ARMII::AddrMode5:
6144 if (ARM_AM::getAM5Op(OffVal) == ARM_AM::sub)
6145 return false;
6146 OffVal = ARM_AM::getAM5Offset(OffVal);
6147 NumBits = 8;
6148 Scale = 4;
6149 break;
6150 case ARMII::AddrMode5FP16:
6151 if (ARM_AM::getAM5FP16Op(OffVal) == ARM_AM::sub)
6152 return false;
6153 OffVal = ARM_AM::getAM5FP16Offset(OffVal);
6154 NumBits = 8;
6155 Scale = 2;
6156 break;
6157 case ARMII::AddrModeT2_i8pos:
6158 NumBits = 8;
6159 break;
6160 case ARMII::AddrModeT2_i8s4:
6161 // FIXME: Values are already scaled in this addressing mode.
6162 assert((Fixup & 3) == 0 && "Can't encode this offset!");
6163 NumBits = 10;
6164 break;
6165 case ARMII::AddrModeT2_ldrex:
6166 NumBits = 8;
6167 Scale = 4;
6168 break;
6169 case ARMII::AddrModeT2_i12:
6170 case ARMII::AddrMode_i12:
6171 NumBits = 12;
6172 break;
6173 case ARMII::AddrModeT1_s: // SP-relative LD/ST
6174 NumBits = 8;
6175 Scale = 4;
6176 break;
6177 default:
6178 llvm_unreachable("Unsupported addressing mode!");
6179 }
6180 // Make sure the offset is encodable for instructions that scale the
6181 // immediate.
6182 assert(((OffVal * Scale + Fixup) & (Scale - 1)) == 0 &&
6183 "Can't encode this offset!");
6184 OffVal += Fixup / Scale;
6185
6186 unsigned Mask = (1 << NumBits) - 1;
6187
6188 if (OffVal <= Mask) {
6189 if (Updt)
6190 MI->getOperand(ImmIdx).setImm(OffVal);
6191 return true;
6192 }
6193
6194 return false;
6195 }
6196
mergeOutliningCandidateAttributes(Function & F,std::vector<outliner::Candidate> & Candidates) const6197 void ARMBaseInstrInfo::mergeOutliningCandidateAttributes(
6198 Function &F, std::vector<outliner::Candidate> &Candidates) const {
6199 outliner::Candidate &C = Candidates.front();
6200 // branch-target-enforcement is guaranteed to be consistent between all
6201 // candidates, so we only need to look at one.
6202 const Function &CFn = C.getMF()->getFunction();
6203 if (CFn.hasFnAttribute("branch-target-enforcement"))
6204 F.addFnAttr(CFn.getFnAttribute("branch-target-enforcement"));
6205
6206 ARMGenInstrInfo::mergeOutliningCandidateAttributes(F, Candidates);
6207 }
6208
isFunctionSafeToOutlineFrom(MachineFunction & MF,bool OutlineFromLinkOnceODRs) const6209 bool ARMBaseInstrInfo::isFunctionSafeToOutlineFrom(
6210 MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
6211 const Function &F = MF.getFunction();
6212
6213 // Can F be deduplicated by the linker? If it can, don't outline from it.
6214 if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
6215 return false;
6216
6217 // Don't outline from functions with section markings; the program could
6218 // expect that all the code is in the named section.
6219 // FIXME: Allow outlining from multiple functions with the same section
6220 // marking.
6221 if (F.hasSection())
6222 return false;
6223
6224 // FIXME: Thumb1 outlining is not handled
6225 if (MF.getInfo<ARMFunctionInfo>()->isThumb1OnlyFunction())
6226 return false;
6227
6228 // It's safe to outline from MF.
6229 return true;
6230 }
6231
isMBBSafeToOutlineFrom(MachineBasicBlock & MBB,unsigned & Flags) const6232 bool ARMBaseInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
6233 unsigned &Flags) const {
6234 // Check if LR is available through all of the MBB. If it's not, then set
6235 // a flag.
6236 assert(MBB.getParent()->getRegInfo().tracksLiveness() &&
6237 "Suitable Machine Function for outlining must track liveness");
6238
6239 LiveRegUnits LRU(getRegisterInfo());
6240
6241 for (MachineInstr &MI : llvm::reverse(MBB))
6242 LRU.accumulate(MI);
6243
6244 // Check if each of the unsafe registers are available...
6245 bool R12AvailableInBlock = LRU.available(ARM::R12);
6246 bool CPSRAvailableInBlock = LRU.available(ARM::CPSR);
6247
6248 // If all of these are dead (and not live out), we know we don't have to check
6249 // them later.
6250 if (R12AvailableInBlock && CPSRAvailableInBlock)
6251 Flags |= MachineOutlinerMBBFlags::UnsafeRegsDead;
6252
6253 // Now, add the live outs to the set.
6254 LRU.addLiveOuts(MBB);
6255
6256 // If any of these registers is available in the MBB, but also a live out of
6257 // the block, then we know outlining is unsafe.
6258 if (R12AvailableInBlock && !LRU.available(ARM::R12))
6259 return false;
6260 if (CPSRAvailableInBlock && !LRU.available(ARM::CPSR))
6261 return false;
6262
6263 // Check if there's a call inside this MachineBasicBlock. If there is, then
6264 // set a flag.
6265 if (any_of(MBB, [](MachineInstr &MI) { return MI.isCall(); }))
6266 Flags |= MachineOutlinerMBBFlags::HasCalls;
6267
6268 // LR liveness is overestimated in return blocks.
6269
6270 bool LRIsAvailable =
6271 MBB.isReturnBlock() && !MBB.back().isCall()
6272 ? isLRAvailable(getRegisterInfo(), MBB.rbegin(), MBB.rend())
6273 : LRU.available(ARM::LR);
6274 if (!LRIsAvailable)
6275 Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere;
6276
6277 return true;
6278 }
6279
6280 outliner::InstrType
getOutliningTypeImpl(MachineBasicBlock::iterator & MIT,unsigned Flags) const6281 ARMBaseInstrInfo::getOutliningTypeImpl(MachineBasicBlock::iterator &MIT,
6282 unsigned Flags) const {
6283 MachineInstr &MI = *MIT;
6284 const TargetRegisterInfo *TRI = &getRegisterInfo();
6285
6286 // PIC instructions contain labels, outlining them would break offset
6287 // computing. unsigned Opc = MI.getOpcode();
6288 unsigned Opc = MI.getOpcode();
6289 if (Opc == ARM::tPICADD || Opc == ARM::PICADD || Opc == ARM::PICSTR ||
6290 Opc == ARM::PICSTRB || Opc == ARM::PICSTRH || Opc == ARM::PICLDR ||
6291 Opc == ARM::PICLDRB || Opc == ARM::PICLDRH || Opc == ARM::PICLDRSB ||
6292 Opc == ARM::PICLDRSH || Opc == ARM::t2LDRpci_pic ||
6293 Opc == ARM::t2MOVi16_ga_pcrel || Opc == ARM::t2MOVTi16_ga_pcrel ||
6294 Opc == ARM::t2MOV_ga_pcrel)
6295 return outliner::InstrType::Illegal;
6296
6297 // Be conservative with ARMv8.1 MVE instructions.
6298 if (Opc == ARM::t2BF_LabelPseudo || Opc == ARM::t2DoLoopStart ||
6299 Opc == ARM::t2DoLoopStartTP || Opc == ARM::t2WhileLoopStart ||
6300 Opc == ARM::t2WhileLoopStartLR || Opc == ARM::t2WhileLoopStartTP ||
6301 Opc == ARM::t2LoopDec || Opc == ARM::t2LoopEnd ||
6302 Opc == ARM::t2LoopEndDec)
6303 return outliner::InstrType::Illegal;
6304
6305 const MCInstrDesc &MCID = MI.getDesc();
6306 uint64_t MIFlags = MCID.TSFlags;
6307 if ((MIFlags & ARMII::DomainMask) == ARMII::DomainMVE)
6308 return outliner::InstrType::Illegal;
6309
6310 // Is this a terminator for a basic block?
6311 if (MI.isTerminator())
6312 // TargetInstrInfo::getOutliningType has already filtered out anything
6313 // that would break this, so we can allow it here.
6314 return outliner::InstrType::Legal;
6315
6316 // Don't outline if link register or program counter value are used.
6317 if (MI.readsRegister(ARM::LR, TRI) || MI.readsRegister(ARM::PC, TRI))
6318 return outliner::InstrType::Illegal;
6319
6320 if (MI.isCall()) {
6321 // Get the function associated with the call. Look at each operand and find
6322 // the one that represents the calle and get its name.
6323 const Function *Callee = nullptr;
6324 for (const MachineOperand &MOP : MI.operands()) {
6325 if (MOP.isGlobal()) {
6326 Callee = dyn_cast<Function>(MOP.getGlobal());
6327 break;
6328 }
6329 }
6330
6331 // Dont't outline calls to "mcount" like functions, in particular Linux
6332 // kernel function tracing relies on it.
6333 if (Callee &&
6334 (Callee->getName() == "\01__gnu_mcount_nc" ||
6335 Callee->getName() == "\01mcount" || Callee->getName() == "__mcount"))
6336 return outliner::InstrType::Illegal;
6337
6338 // If we don't know anything about the callee, assume it depends on the
6339 // stack layout of the caller. In that case, it's only legal to outline
6340 // as a tail-call. Explicitly list the call instructions we know about so
6341 // we don't get unexpected results with call pseudo-instructions.
6342 auto UnknownCallOutlineType = outliner::InstrType::Illegal;
6343 if (Opc == ARM::BL || Opc == ARM::tBL || Opc == ARM::BLX ||
6344 Opc == ARM::BLX_noip || Opc == ARM::tBLXr || Opc == ARM::tBLXr_noip ||
6345 Opc == ARM::tBLXi)
6346 UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
6347
6348 if (!Callee)
6349 return UnknownCallOutlineType;
6350
6351 // We have a function we have information about. Check if it's something we
6352 // can safely outline.
6353 MachineFunction *MF = MI.getParent()->getParent();
6354 MachineFunction *CalleeMF = MF->getMMI().getMachineFunction(*Callee);
6355
6356 // We don't know what's going on with the callee at all. Don't touch it.
6357 if (!CalleeMF)
6358 return UnknownCallOutlineType;
6359
6360 // Check if we know anything about the callee saves on the function. If we
6361 // don't, then don't touch it, since that implies that we haven't computed
6362 // anything about its stack frame yet.
6363 MachineFrameInfo &MFI = CalleeMF->getFrameInfo();
6364 if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 ||
6365 MFI.getNumObjects() > 0)
6366 return UnknownCallOutlineType;
6367
6368 // At this point, we can say that CalleeMF ought to not pass anything on the
6369 // stack. Therefore, we can outline it.
6370 return outliner::InstrType::Legal;
6371 }
6372
6373 // Since calls are handled, don't touch LR or PC
6374 if (MI.modifiesRegister(ARM::LR, TRI) || MI.modifiesRegister(ARM::PC, TRI))
6375 return outliner::InstrType::Illegal;
6376
6377 // Does this use the stack?
6378 if (MI.modifiesRegister(ARM::SP, TRI) || MI.readsRegister(ARM::SP, TRI)) {
6379 // True if there is no chance that any outlined candidate from this range
6380 // could require stack fixups. That is, both
6381 // * LR is available in the range (No save/restore around call)
6382 // * The range doesn't include calls (No save/restore in outlined frame)
6383 // are true.
6384 // These conditions also ensure correctness of the return address
6385 // authentication - we insert sign and authentication instructions only if
6386 // we save/restore LR on stack, but then this condition ensures that the
6387 // outlined range does not modify the SP, therefore the SP value used for
6388 // signing is the same as the one used for authentication.
6389 // FIXME: This is very restrictive; the flags check the whole block,
6390 // not just the bit we will try to outline.
6391 bool MightNeedStackFixUp =
6392 (Flags & (MachineOutlinerMBBFlags::LRUnavailableSomewhere |
6393 MachineOutlinerMBBFlags::HasCalls));
6394
6395 if (!MightNeedStackFixUp)
6396 return outliner::InstrType::Legal;
6397
6398 // Any modification of SP will break our code to save/restore LR.
6399 // FIXME: We could handle some instructions which add a constant offset to
6400 // SP, with a bit more work.
6401 if (MI.modifiesRegister(ARM::SP, TRI))
6402 return outliner::InstrType::Illegal;
6403
6404 // At this point, we have a stack instruction that we might need to fix up.
6405 // up. We'll handle it if it's a load or store.
6406 if (checkAndUpdateStackOffset(&MI, Subtarget.getStackAlignment().value(),
6407 false))
6408 return outliner::InstrType::Legal;
6409
6410 // We can't fix it up, so don't outline it.
6411 return outliner::InstrType::Illegal;
6412 }
6413
6414 // Be conservative with IT blocks.
6415 if (MI.readsRegister(ARM::ITSTATE, TRI) ||
6416 MI.modifiesRegister(ARM::ITSTATE, TRI))
6417 return outliner::InstrType::Illegal;
6418
6419 // Don't outline CFI instructions.
6420 if (MI.isCFIInstruction())
6421 return outliner::InstrType::Illegal;
6422
6423 return outliner::InstrType::Legal;
6424 }
6425
fixupPostOutline(MachineBasicBlock & MBB) const6426 void ARMBaseInstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
6427 for (MachineInstr &MI : MBB) {
6428 checkAndUpdateStackOffset(&MI, Subtarget.getStackAlignment().value(), true);
6429 }
6430 }
6431
saveLROnStack(MachineBasicBlock & MBB,MachineBasicBlock::iterator It,bool CFI,bool Auth) const6432 void ARMBaseInstrInfo::saveLROnStack(MachineBasicBlock &MBB,
6433 MachineBasicBlock::iterator It, bool CFI,
6434 bool Auth) const {
6435 int Align = std::max(Subtarget.getStackAlignment().value(), uint64_t(8));
6436 unsigned MIFlags = CFI ? MachineInstr::FrameSetup : 0;
6437 assert(Align >= 8 && Align <= 256);
6438 if (Auth) {
6439 assert(Subtarget.isThumb2());
6440 // Compute PAC in R12. Outlining ensures R12 is dead across the outlined
6441 // sequence.
6442 BuildMI(MBB, It, DebugLoc(), get(ARM::t2PAC)).setMIFlags(MIFlags);
6443 BuildMI(MBB, It, DebugLoc(), get(ARM::t2STRD_PRE), ARM::SP)
6444 .addReg(ARM::R12, RegState::Kill)
6445 .addReg(ARM::LR, RegState::Kill)
6446 .addReg(ARM::SP)
6447 .addImm(-Align)
6448 .add(predOps(ARMCC::AL))
6449 .setMIFlags(MIFlags);
6450 } else {
6451 unsigned Opc = Subtarget.isThumb() ? ARM::t2STR_PRE : ARM::STR_PRE_IMM;
6452 BuildMI(MBB, It, DebugLoc(), get(Opc), ARM::SP)
6453 .addReg(ARM::LR, RegState::Kill)
6454 .addReg(ARM::SP)
6455 .addImm(-Align)
6456 .add(predOps(ARMCC::AL))
6457 .setMIFlags(MIFlags);
6458 }
6459
6460 if (!CFI)
6461 return;
6462
6463 MachineFunction &MF = *MBB.getParent();
6464
6465 // Add a CFI, saying CFA is offset by Align bytes from SP.
6466 int64_t StackPosEntry =
6467 MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, Align));
6468 BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
6469 .addCFIIndex(StackPosEntry)
6470 .setMIFlags(MachineInstr::FrameSetup);
6471
6472 // Add a CFI saying that the LR that we want to find is now higher than
6473 // before.
6474 int LROffset = Auth ? Align - 4 : Align;
6475 const MCRegisterInfo *MRI = Subtarget.getRegisterInfo();
6476 unsigned DwarfLR = MRI->getDwarfRegNum(ARM::LR, true);
6477 int64_t LRPosEntry = MF.addFrameInst(
6478 MCCFIInstruction::createOffset(nullptr, DwarfLR, -LROffset));
6479 BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
6480 .addCFIIndex(LRPosEntry)
6481 .setMIFlags(MachineInstr::FrameSetup);
6482 if (Auth) {
6483 // Add a CFI for the location of the return adddress PAC.
6484 unsigned DwarfRAC = MRI->getDwarfRegNum(ARM::RA_AUTH_CODE, true);
6485 int64_t RACPosEntry = MF.addFrameInst(
6486 MCCFIInstruction::createOffset(nullptr, DwarfRAC, -Align));
6487 BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
6488 .addCFIIndex(RACPosEntry)
6489 .setMIFlags(MachineInstr::FrameSetup);
6490 }
6491 }
6492
emitCFIForLRSaveToReg(MachineBasicBlock & MBB,MachineBasicBlock::iterator It,Register Reg) const6493 void ARMBaseInstrInfo::emitCFIForLRSaveToReg(MachineBasicBlock &MBB,
6494 MachineBasicBlock::iterator It,
6495 Register Reg) const {
6496 MachineFunction &MF = *MBB.getParent();
6497 const MCRegisterInfo *MRI = Subtarget.getRegisterInfo();
6498 unsigned DwarfLR = MRI->getDwarfRegNum(ARM::LR, true);
6499 unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
6500
6501 int64_t LRPosEntry = MF.addFrameInst(
6502 MCCFIInstruction::createRegister(nullptr, DwarfLR, DwarfReg));
6503 BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
6504 .addCFIIndex(LRPosEntry)
6505 .setMIFlags(MachineInstr::FrameSetup);
6506 }
6507
restoreLRFromStack(MachineBasicBlock & MBB,MachineBasicBlock::iterator It,bool CFI,bool Auth) const6508 void ARMBaseInstrInfo::restoreLRFromStack(MachineBasicBlock &MBB,
6509 MachineBasicBlock::iterator It,
6510 bool CFI, bool Auth) const {
6511 int Align = Subtarget.getStackAlignment().value();
6512 unsigned MIFlags = CFI ? MachineInstr::FrameDestroy : 0;
6513 if (Auth) {
6514 assert(Subtarget.isThumb2());
6515 // Restore return address PAC and LR.
6516 BuildMI(MBB, It, DebugLoc(), get(ARM::t2LDRD_POST))
6517 .addReg(ARM::R12, RegState::Define)
6518 .addReg(ARM::LR, RegState::Define)
6519 .addReg(ARM::SP, RegState::Define)
6520 .addReg(ARM::SP)
6521 .addImm(Align)
6522 .add(predOps(ARMCC::AL))
6523 .setMIFlags(MIFlags);
6524 // LR authentication is after the CFI instructions, below.
6525 } else {
6526 unsigned Opc = Subtarget.isThumb() ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
6527 MachineInstrBuilder MIB = BuildMI(MBB, It, DebugLoc(), get(Opc), ARM::LR)
6528 .addReg(ARM::SP, RegState::Define)
6529 .addReg(ARM::SP);
6530 if (!Subtarget.isThumb())
6531 MIB.addReg(0);
6532 MIB.addImm(Subtarget.getStackAlignment().value())
6533 .add(predOps(ARMCC::AL))
6534 .setMIFlags(MIFlags);
6535 }
6536
6537 if (CFI) {
6538 // Now stack has moved back up...
6539 MachineFunction &MF = *MBB.getParent();
6540 const MCRegisterInfo *MRI = Subtarget.getRegisterInfo();
6541 unsigned DwarfLR = MRI->getDwarfRegNum(ARM::LR, true);
6542 int64_t StackPosEntry =
6543 MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 0));
6544 BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
6545 .addCFIIndex(StackPosEntry)
6546 .setMIFlags(MachineInstr::FrameDestroy);
6547
6548 // ... and we have restored LR.
6549 int64_t LRPosEntry =
6550 MF.addFrameInst(MCCFIInstruction::createRestore(nullptr, DwarfLR));
6551 BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
6552 .addCFIIndex(LRPosEntry)
6553 .setMIFlags(MachineInstr::FrameDestroy);
6554
6555 if (Auth) {
6556 unsigned DwarfRAC = MRI->getDwarfRegNum(ARM::RA_AUTH_CODE, true);
6557 int64_t Entry =
6558 MF.addFrameInst(MCCFIInstruction::createUndefined(nullptr, DwarfRAC));
6559 BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
6560 .addCFIIndex(Entry)
6561 .setMIFlags(MachineInstr::FrameDestroy);
6562 }
6563 }
6564
6565 if (Auth)
6566 BuildMI(MBB, It, DebugLoc(), get(ARM::t2AUT));
6567 }
6568
emitCFIForLRRestoreFromReg(MachineBasicBlock & MBB,MachineBasicBlock::iterator It) const6569 void ARMBaseInstrInfo::emitCFIForLRRestoreFromReg(
6570 MachineBasicBlock &MBB, MachineBasicBlock::iterator It) const {
6571 MachineFunction &MF = *MBB.getParent();
6572 const MCRegisterInfo *MRI = Subtarget.getRegisterInfo();
6573 unsigned DwarfLR = MRI->getDwarfRegNum(ARM::LR, true);
6574
6575 int64_t LRPosEntry =
6576 MF.addFrameInst(MCCFIInstruction::createRestore(nullptr, DwarfLR));
6577 BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
6578 .addCFIIndex(LRPosEntry)
6579 .setMIFlags(MachineInstr::FrameDestroy);
6580 }
6581
buildOutlinedFrame(MachineBasicBlock & MBB,MachineFunction & MF,const outliner::OutlinedFunction & OF) const6582 void ARMBaseInstrInfo::buildOutlinedFrame(
6583 MachineBasicBlock &MBB, MachineFunction &MF,
6584 const outliner::OutlinedFunction &OF) const {
6585 // For thunk outlining, rewrite the last instruction from a call to a
6586 // tail-call.
6587 if (OF.FrameConstructionID == MachineOutlinerThunk) {
6588 MachineInstr *Call = &*--MBB.instr_end();
6589 bool isThumb = Subtarget.isThumb();
6590 unsigned FuncOp = isThumb ? 2 : 0;
6591 unsigned Opc = Call->getOperand(FuncOp).isReg()
6592 ? isThumb ? ARM::tTAILJMPr : ARM::TAILJMPr
6593 : isThumb ? Subtarget.isTargetMachO() ? ARM::tTAILJMPd
6594 : ARM::tTAILJMPdND
6595 : ARM::TAILJMPd;
6596 MachineInstrBuilder MIB = BuildMI(MBB, MBB.end(), DebugLoc(), get(Opc))
6597 .add(Call->getOperand(FuncOp));
6598 if (isThumb && !Call->getOperand(FuncOp).isReg())
6599 MIB.add(predOps(ARMCC::AL));
6600 Call->eraseFromParent();
6601 }
6602
6603 // Is there a call in the outlined range?
6604 auto IsNonTailCall = [](MachineInstr &MI) {
6605 return MI.isCall() && !MI.isReturn();
6606 };
6607 if (llvm::any_of(MBB.instrs(), IsNonTailCall)) {
6608 MachineBasicBlock::iterator It = MBB.begin();
6609 MachineBasicBlock::iterator Et = MBB.end();
6610
6611 if (OF.FrameConstructionID == MachineOutlinerTailCall ||
6612 OF.FrameConstructionID == MachineOutlinerThunk)
6613 Et = std::prev(MBB.end());
6614
6615 // We have to save and restore LR, we need to add it to the liveins if it
6616 // is not already part of the set. This is suffient since outlined
6617 // functions only have one block.
6618 if (!MBB.isLiveIn(ARM::LR))
6619 MBB.addLiveIn(ARM::LR);
6620
6621 // Insert a save before the outlined region
6622 bool Auth = OF.Candidates.front()
6623 .getMF()
6624 ->getInfo<ARMFunctionInfo>()
6625 ->shouldSignReturnAddress(true);
6626 saveLROnStack(MBB, It, true, Auth);
6627
6628 // Fix up the instructions in the range, since we're going to modify the
6629 // stack.
6630 assert(OF.FrameConstructionID != MachineOutlinerDefault &&
6631 "Can only fix up stack references once");
6632 fixupPostOutline(MBB);
6633
6634 // Insert a restore before the terminator for the function. Restore LR.
6635 restoreLRFromStack(MBB, Et, true, Auth);
6636 }
6637
6638 // If this is a tail call outlined function, then there's already a return.
6639 if (OF.FrameConstructionID == MachineOutlinerTailCall ||
6640 OF.FrameConstructionID == MachineOutlinerThunk)
6641 return;
6642
6643 // Here we have to insert the return ourselves. Get the correct opcode from
6644 // current feature set.
6645 BuildMI(MBB, MBB.end(), DebugLoc(), get(Subtarget.getReturnOpcode()))
6646 .add(predOps(ARMCC::AL));
6647
6648 // Did we have to modify the stack by saving the link register?
6649 if (OF.FrameConstructionID != MachineOutlinerDefault &&
6650 OF.Candidates[0].CallConstructionID != MachineOutlinerDefault)
6651 return;
6652
6653 // We modified the stack.
6654 // Walk over the basic block and fix up all the stack accesses.
6655 fixupPostOutline(MBB);
6656 }
6657
insertOutlinedCall(Module & M,MachineBasicBlock & MBB,MachineBasicBlock::iterator & It,MachineFunction & MF,outliner::Candidate & C) const6658 MachineBasicBlock::iterator ARMBaseInstrInfo::insertOutlinedCall(
6659 Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It,
6660 MachineFunction &MF, outliner::Candidate &C) const {
6661 MachineInstrBuilder MIB;
6662 MachineBasicBlock::iterator CallPt;
6663 unsigned Opc;
6664 bool isThumb = Subtarget.isThumb();
6665
6666 // Are we tail calling?
6667 if (C.CallConstructionID == MachineOutlinerTailCall) {
6668 // If yes, then we can just branch to the label.
6669 Opc = isThumb
6670 ? Subtarget.isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND
6671 : ARM::TAILJMPd;
6672 MIB = BuildMI(MF, DebugLoc(), get(Opc))
6673 .addGlobalAddress(M.getNamedValue(MF.getName()));
6674 if (isThumb)
6675 MIB.add(predOps(ARMCC::AL));
6676 It = MBB.insert(It, MIB);
6677 return It;
6678 }
6679
6680 // Create the call instruction.
6681 Opc = isThumb ? ARM::tBL : ARM::BL;
6682 MachineInstrBuilder CallMIB = BuildMI(MF, DebugLoc(), get(Opc));
6683 if (isThumb)
6684 CallMIB.add(predOps(ARMCC::AL));
6685 CallMIB.addGlobalAddress(M.getNamedValue(MF.getName()));
6686
6687 if (C.CallConstructionID == MachineOutlinerNoLRSave ||
6688 C.CallConstructionID == MachineOutlinerThunk) {
6689 // No, so just insert the call.
6690 It = MBB.insert(It, CallMIB);
6691 return It;
6692 }
6693
6694 const ARMFunctionInfo &AFI = *C.getMF()->getInfo<ARMFunctionInfo>();
6695 // Can we save to a register?
6696 if (C.CallConstructionID == MachineOutlinerRegSave) {
6697 Register Reg = findRegisterToSaveLRTo(C);
6698 assert(Reg != 0 && "No callee-saved register available?");
6699
6700 // Save and restore LR from that register.
6701 copyPhysReg(MBB, It, DebugLoc(), Reg, ARM::LR, true);
6702 if (!AFI.isLRSpilled())
6703 emitCFIForLRSaveToReg(MBB, It, Reg);
6704 CallPt = MBB.insert(It, CallMIB);
6705 copyPhysReg(MBB, It, DebugLoc(), ARM::LR, Reg, true);
6706 if (!AFI.isLRSpilled())
6707 emitCFIForLRRestoreFromReg(MBB, It);
6708 It--;
6709 return CallPt;
6710 }
6711 // We have the default case. Save and restore from SP.
6712 if (!MBB.isLiveIn(ARM::LR))
6713 MBB.addLiveIn(ARM::LR);
6714 bool Auth = !AFI.isLRSpilled() && AFI.shouldSignReturnAddress(true);
6715 saveLROnStack(MBB, It, !AFI.isLRSpilled(), Auth);
6716 CallPt = MBB.insert(It, CallMIB);
6717 restoreLRFromStack(MBB, It, !AFI.isLRSpilled(), Auth);
6718 It--;
6719 return CallPt;
6720 }
6721
shouldOutlineFromFunctionByDefault(MachineFunction & MF) const6722 bool ARMBaseInstrInfo::shouldOutlineFromFunctionByDefault(
6723 MachineFunction &MF) const {
6724 return Subtarget.isMClass() && MF.getFunction().hasMinSize();
6725 }
6726
isReallyTriviallyReMaterializable(const MachineInstr & MI) const6727 bool ARMBaseInstrInfo::isReallyTriviallyReMaterializable(
6728 const MachineInstr &MI) const {
6729 // Try hard to rematerialize any VCTPs because if we spill P0, it will block
6730 // the tail predication conversion. This means that the element count
6731 // register has to be live for longer, but that has to be better than
6732 // spill/restore and VPT predication.
6733 return (isVCTP(&MI) && !isPredicated(MI)) ||
6734 TargetInstrInfo::isReallyTriviallyReMaterializable(MI);
6735 }
6736
getBLXOpcode(const MachineFunction & MF)6737 unsigned llvm::getBLXOpcode(const MachineFunction &MF) {
6738 return (MF.getSubtarget<ARMSubtarget>().hardenSlsBlr()) ? ARM::BLX_noip
6739 : ARM::BLX;
6740 }
6741
gettBLXrOpcode(const MachineFunction & MF)6742 unsigned llvm::gettBLXrOpcode(const MachineFunction &MF) {
6743 return (MF.getSubtarget<ARMSubtarget>().hardenSlsBlr()) ? ARM::tBLXr_noip
6744 : ARM::tBLXr;
6745 }
6746
getBLXpredOpcode(const MachineFunction & MF)6747 unsigned llvm::getBLXpredOpcode(const MachineFunction &MF) {
6748 return (MF.getSubtarget<ARMSubtarget>().hardenSlsBlr()) ? ARM::BLX_pred_noip
6749 : ARM::BLX_pred;
6750 }
6751
6752 namespace {
6753 class ARMPipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
6754 MachineInstr *EndLoop, *LoopCount;
6755 MachineFunction *MF;
6756 const TargetInstrInfo *TII;
6757
6758 // Bitset[0 .. MAX_STAGES-1] ... iterations needed
6759 // [LAST_IS_USE] : last reference to register in schedule is a use
6760 // [SEEN_AS_LIVE] : Normal pressure algorithm believes register is live
6761 static int constexpr MAX_STAGES = 30;
6762 static int constexpr LAST_IS_USE = MAX_STAGES;
6763 static int constexpr SEEN_AS_LIVE = MAX_STAGES + 1;
6764 typedef std::bitset<MAX_STAGES + 2> IterNeed;
6765 typedef std::map<unsigned, IterNeed> IterNeeds;
6766
6767 void bumpCrossIterationPressure(RegPressureTracker &RPT,
6768 const IterNeeds &CIN);
6769 bool tooMuchRegisterPressure(SwingSchedulerDAG &SSD, SMSchedule &SMS);
6770
6771 // Meanings of the various stuff with loop types:
6772 // t2Bcc:
6773 // EndLoop = branch at end of original BB that will become a kernel
6774 // LoopCount = CC setter live into branch
6775 // t2LoopEnd:
6776 // EndLoop = branch at end of original BB
6777 // LoopCount = t2LoopDec
6778 public:
ARMPipelinerLoopInfo(MachineInstr * EndLoop,MachineInstr * LoopCount)6779 ARMPipelinerLoopInfo(MachineInstr *EndLoop, MachineInstr *LoopCount)
6780 : EndLoop(EndLoop), LoopCount(LoopCount),
6781 MF(EndLoop->getParent()->getParent()),
6782 TII(MF->getSubtarget().getInstrInfo()) {}
6783
shouldIgnoreForPipelining(const MachineInstr * MI) const6784 bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {
6785 // Only ignore the terminator.
6786 return MI == EndLoop || MI == LoopCount;
6787 }
6788
shouldUseSchedule(SwingSchedulerDAG & SSD,SMSchedule & SMS)6789 bool shouldUseSchedule(SwingSchedulerDAG &SSD, SMSchedule &SMS) override {
6790 if (tooMuchRegisterPressure(SSD, SMS))
6791 return false;
6792
6793 return true;
6794 }
6795
createTripCountGreaterCondition(int TC,MachineBasicBlock & MBB,SmallVectorImpl<MachineOperand> & Cond)6796 std::optional<bool> createTripCountGreaterCondition(
6797 int TC, MachineBasicBlock &MBB,
6798 SmallVectorImpl<MachineOperand> &Cond) override {
6799
6800 if (isCondBranchOpcode(EndLoop->getOpcode())) {
6801 Cond.push_back(EndLoop->getOperand(1));
6802 Cond.push_back(EndLoop->getOperand(2));
6803 if (EndLoop->getOperand(0).getMBB() == EndLoop->getParent()) {
6804 TII->reverseBranchCondition(Cond);
6805 }
6806 return {};
6807 } else if (EndLoop->getOpcode() == ARM::t2LoopEnd) {
6808 // General case just lets the unrolled t2LoopDec do the subtraction and
6809 // therefore just needs to check if zero has been reached.
6810 MachineInstr *LoopDec = nullptr;
6811 for (auto &I : MBB.instrs())
6812 if (I.getOpcode() == ARM::t2LoopDec)
6813 LoopDec = &I;
6814 assert(LoopDec && "Unable to find copied LoopDec");
6815 // Check if we're done with the loop.
6816 BuildMI(&MBB, LoopDec->getDebugLoc(), TII->get(ARM::t2CMPri))
6817 .addReg(LoopDec->getOperand(0).getReg())
6818 .addImm(0)
6819 .addImm(ARMCC::AL)
6820 .addReg(ARM::NoRegister);
6821 Cond.push_back(MachineOperand::CreateImm(ARMCC::EQ));
6822 Cond.push_back(MachineOperand::CreateReg(ARM::CPSR, false));
6823 return {};
6824 } else
6825 llvm_unreachable("Unknown EndLoop");
6826 }
6827
setPreheader(MachineBasicBlock * NewPreheader)6828 void setPreheader(MachineBasicBlock *NewPreheader) override {}
6829
adjustTripCount(int TripCountAdjust)6830 void adjustTripCount(int TripCountAdjust) override {}
6831
disposed()6832 void disposed() override {}
6833 };
6834
bumpCrossIterationPressure(RegPressureTracker & RPT,const IterNeeds & CIN)6835 void ARMPipelinerLoopInfo::bumpCrossIterationPressure(RegPressureTracker &RPT,
6836 const IterNeeds &CIN) {
6837 // Increase pressure by the amounts in CrossIterationNeeds
6838 for (const auto &N : CIN) {
6839 int Cnt = N.second.count() - N.second[SEEN_AS_LIVE] * 2;
6840 for (int I = 0; I < Cnt; ++I)
6841 RPT.increaseRegPressure(Register(N.first), LaneBitmask::getNone(),
6842 LaneBitmask::getAll());
6843 }
6844 // Decrease pressure by the amounts in CrossIterationNeeds
6845 for (const auto &N : CIN) {
6846 int Cnt = N.second.count() - N.second[SEEN_AS_LIVE] * 2;
6847 for (int I = 0; I < Cnt; ++I)
6848 RPT.decreaseRegPressure(Register(N.first), LaneBitmask::getAll(),
6849 LaneBitmask::getNone());
6850 }
6851 }
6852
tooMuchRegisterPressure(SwingSchedulerDAG & SSD,SMSchedule & SMS)6853 bool ARMPipelinerLoopInfo::tooMuchRegisterPressure(SwingSchedulerDAG &SSD,
6854 SMSchedule &SMS) {
6855 IterNeeds CrossIterationNeeds;
6856
6857 // Determine which values will be loop-carried after the schedule is
6858 // applied
6859
6860 for (auto &SU : SSD.SUnits) {
6861 const MachineInstr *MI = SU.getInstr();
6862 int Stg = SMS.stageScheduled(const_cast<SUnit *>(&SU));
6863 for (auto &S : SU.Succs)
6864 if (MI->isPHI() && S.getKind() == SDep::Anti) {
6865 Register Reg = S.getReg();
6866 if (Reg.isVirtual())
6867 CrossIterationNeeds.insert(std::make_pair(Reg.id(), IterNeed()))
6868 .first->second.set(0);
6869 } else if (S.isAssignedRegDep()) {
6870 int OStg = SMS.stageScheduled(S.getSUnit());
6871 if (OStg >= 0 && OStg != Stg) {
6872 Register Reg = S.getReg();
6873 if (Reg.isVirtual())
6874 CrossIterationNeeds.insert(std::make_pair(Reg.id(), IterNeed()))
6875 .first->second |= ((1 << (OStg - Stg)) - 1);
6876 }
6877 }
6878 }
6879
6880 // Determine more-or-less what the proposed schedule (reversed) is going to
6881 // be; it might not be quite the same because the within-cycle ordering
6882 // created by SMSchedule depends upon changes to help with address offsets and
6883 // the like.
6884 std::vector<SUnit *> ProposedSchedule;
6885 for (int Cycle = SMS.getFinalCycle(); Cycle >= SMS.getFirstCycle(); --Cycle)
6886 for (int Stage = 0, StageEnd = SMS.getMaxStageCount(); Stage <= StageEnd;
6887 ++Stage) {
6888 std::deque<SUnit *> Instrs =
6889 SMS.getInstructions(Cycle + Stage * SMS.getInitiationInterval());
6890 std::sort(Instrs.begin(), Instrs.end(),
6891 [](SUnit *A, SUnit *B) { return A->NodeNum > B->NodeNum; });
6892 for (SUnit *SU : Instrs)
6893 ProposedSchedule.push_back(SU);
6894 }
6895
6896 // Learn whether the last use/def of each cross-iteration register is a use or
6897 // def. If it is a def, RegisterPressure will implicitly increase max pressure
6898 // and we do not have to add the pressure.
6899 for (auto *SU : ProposedSchedule)
6900 for (ConstMIBundleOperands OperI(*SU->getInstr()); OperI.isValid();
6901 ++OperI) {
6902 auto MO = *OperI;
6903 if (!MO.isReg() || !MO.getReg())
6904 continue;
6905 Register Reg = MO.getReg();
6906 auto CIter = CrossIterationNeeds.find(Reg.id());
6907 if (CIter == CrossIterationNeeds.end() || CIter->second[LAST_IS_USE] ||
6908 CIter->second[SEEN_AS_LIVE])
6909 continue;
6910 if (MO.isDef() && !MO.isDead())
6911 CIter->second.set(SEEN_AS_LIVE);
6912 else if (MO.isUse())
6913 CIter->second.set(LAST_IS_USE);
6914 }
6915 for (auto &CI : CrossIterationNeeds)
6916 CI.second.reset(LAST_IS_USE);
6917
6918 RegionPressure RecRegPressure;
6919 RegPressureTracker RPTracker(RecRegPressure);
6920 RegisterClassInfo RegClassInfo;
6921 RegClassInfo.runOnMachineFunction(*MF);
6922 RPTracker.init(MF, &RegClassInfo, nullptr, EndLoop->getParent(),
6923 EndLoop->getParent()->end(), false, false);
6924 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
6925
6926 bumpCrossIterationPressure(RPTracker, CrossIterationNeeds);
6927
6928 for (auto *SU : ProposedSchedule) {
6929 MachineBasicBlock::const_iterator CurInstI = SU->getInstr();
6930 RPTracker.setPos(std::next(CurInstI));
6931 RPTracker.recede();
6932
6933 // Track what cross-iteration registers would be seen as live
6934 for (ConstMIBundleOperands OperI(*CurInstI); OperI.isValid(); ++OperI) {
6935 auto MO = *OperI;
6936 if (!MO.isReg() || !MO.getReg())
6937 continue;
6938 Register Reg = MO.getReg();
6939 if (MO.isDef() && !MO.isDead()) {
6940 auto CIter = CrossIterationNeeds.find(Reg.id());
6941 if (CIter != CrossIterationNeeds.end()) {
6942 CIter->second.reset(0);
6943 CIter->second.reset(SEEN_AS_LIVE);
6944 }
6945 }
6946 }
6947 for (auto &S : SU->Preds) {
6948 auto Stg = SMS.stageScheduled(SU);
6949 if (S.isAssignedRegDep()) {
6950 Register Reg = S.getReg();
6951 auto CIter = CrossIterationNeeds.find(Reg.id());
6952 if (CIter != CrossIterationNeeds.end()) {
6953 auto Stg2 = SMS.stageScheduled(const_cast<SUnit *>(S.getSUnit()));
6954 assert(Stg2 <= Stg && "Data dependence upon earlier stage");
6955 if (Stg - Stg2 < MAX_STAGES)
6956 CIter->second.set(Stg - Stg2);
6957 CIter->second.set(SEEN_AS_LIVE);
6958 }
6959 }
6960 }
6961
6962 bumpCrossIterationPressure(RPTracker, CrossIterationNeeds);
6963 }
6964
6965 auto &P = RPTracker.getPressure().MaxSetPressure;
6966 for (unsigned I = 0, E = P.size(); I < E; ++I)
6967 if (P[I] > TRI->getRegPressureSetLimit(*MF, I)) {
6968 return true;
6969 }
6970 return false;
6971 }
6972
6973 } // namespace
6974
6975 std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
analyzeLoopForPipelining(MachineBasicBlock * LoopBB) const6976 ARMBaseInstrInfo::analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const {
6977 MachineBasicBlock::iterator I = LoopBB->getFirstTerminator();
6978 MachineBasicBlock *Preheader = *LoopBB->pred_begin();
6979 if (Preheader == LoopBB)
6980 Preheader = *std::next(LoopBB->pred_begin());
6981
6982 if (I != LoopBB->end() && I->getOpcode() == ARM::t2Bcc) {
6983 // If the branch is a Bcc, then the CPSR should be set somewhere within the
6984 // block. We need to determine the reaching definition of CPSR so that
6985 // it can be marked as non-pipelineable, allowing the pipeliner to force
6986 // it into stage 0 or give up if it cannot or will not do so.
6987 MachineInstr *CCSetter = nullptr;
6988 for (auto &L : LoopBB->instrs()) {
6989 if (L.isCall())
6990 return nullptr;
6991 if (isCPSRDefined(L))
6992 CCSetter = &L;
6993 }
6994 if (CCSetter)
6995 return std::make_unique<ARMPipelinerLoopInfo>(&*I, CCSetter);
6996 else
6997 return nullptr; // Unable to find the CC setter, so unable to guarantee
6998 // that pipeline will work
6999 }
7000
7001 // Recognize:
7002 // preheader:
7003 // %1 = t2DoopLoopStart %0
7004 // loop:
7005 // %2 = phi %1, <not loop>, %..., %loop
7006 // %3 = t2LoopDec %2, <imm>
7007 // t2LoopEnd %3, %loop
7008
7009 if (I != LoopBB->end() && I->getOpcode() == ARM::t2LoopEnd) {
7010 for (auto &L : LoopBB->instrs())
7011 if (L.isCall())
7012 return nullptr;
7013 else if (isVCTP(&L))
7014 return nullptr;
7015 Register LoopDecResult = I->getOperand(0).getReg();
7016 MachineRegisterInfo &MRI = LoopBB->getParent()->getRegInfo();
7017 MachineInstr *LoopDec = MRI.getUniqueVRegDef(LoopDecResult);
7018 if (!LoopDec || LoopDec->getOpcode() != ARM::t2LoopDec)
7019 return nullptr;
7020 MachineInstr *LoopStart = nullptr;
7021 for (auto &J : Preheader->instrs())
7022 if (J.getOpcode() == ARM::t2DoLoopStart)
7023 LoopStart = &J;
7024 if (!LoopStart)
7025 return nullptr;
7026 return std::make_unique<ARMPipelinerLoopInfo>(&*I, LoopDec);
7027 }
7028 return nullptr;
7029 }
7030