1 //===- AArch64InstrInfo.h - AArch64 Instruction Information -----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the AArch64 implementation of the TargetInstrInfo class.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64INSTRINFO_H
14 #define LLVM_LIB_TARGET_AARCH64_AARCH64INSTRINFO_H
15
16 #include "AArch64.h"
17 #include "AArch64RegisterInfo.h"
18 #include "llvm/CodeGen/TargetInstrInfo.h"
19 #include "llvm/Support/TypeSize.h"
20 #include <optional>
21
22 #define GET_INSTRINFO_HEADER
23 #include "AArch64GenInstrInfo.inc"
24
25 namespace llvm {
26
27 class AArch64Subtarget;
28
29 static const MachineMemOperand::Flags MOSuppressPair =
30 MachineMemOperand::MOTargetFlag1;
31 static const MachineMemOperand::Flags MOStridedAccess =
32 MachineMemOperand::MOTargetFlag2;
33
34 #define FALKOR_STRIDED_ACCESS_MD "falkor.strided.access"
35
36 // AArch64 MachineCombiner patterns
37 enum AArch64MachineCombinerPattern : unsigned {
38 // These are patterns used to reduce the length of dependence chain.
39 SUBADD_OP1 = MachineCombinerPattern::TARGET_PATTERN_START,
40 SUBADD_OP2,
41
42 // These are multiply-add patterns matched by the AArch64 machine combiner.
43 MULADDW_OP1,
44 MULADDW_OP2,
45 MULSUBW_OP1,
46 MULSUBW_OP2,
47 MULADDWI_OP1,
48 MULSUBWI_OP1,
49 MULADDX_OP1,
50 MULADDX_OP2,
51 MULSUBX_OP1,
52 MULSUBX_OP2,
53 MULADDXI_OP1,
54 MULSUBXI_OP1,
55 // NEON integers vectors
56 MULADDv8i8_OP1,
57 MULADDv8i8_OP2,
58 MULADDv16i8_OP1,
59 MULADDv16i8_OP2,
60 MULADDv4i16_OP1,
61 MULADDv4i16_OP2,
62 MULADDv8i16_OP1,
63 MULADDv8i16_OP2,
64 MULADDv2i32_OP1,
65 MULADDv2i32_OP2,
66 MULADDv4i32_OP1,
67 MULADDv4i32_OP2,
68
69 MULSUBv8i8_OP1,
70 MULSUBv8i8_OP2,
71 MULSUBv16i8_OP1,
72 MULSUBv16i8_OP2,
73 MULSUBv4i16_OP1,
74 MULSUBv4i16_OP2,
75 MULSUBv8i16_OP1,
76 MULSUBv8i16_OP2,
77 MULSUBv2i32_OP1,
78 MULSUBv2i32_OP2,
79 MULSUBv4i32_OP1,
80 MULSUBv4i32_OP2,
81
82 MULADDv4i16_indexed_OP1,
83 MULADDv4i16_indexed_OP2,
84 MULADDv8i16_indexed_OP1,
85 MULADDv8i16_indexed_OP2,
86 MULADDv2i32_indexed_OP1,
87 MULADDv2i32_indexed_OP2,
88 MULADDv4i32_indexed_OP1,
89 MULADDv4i32_indexed_OP2,
90
91 MULSUBv4i16_indexed_OP1,
92 MULSUBv4i16_indexed_OP2,
93 MULSUBv8i16_indexed_OP1,
94 MULSUBv8i16_indexed_OP2,
95 MULSUBv2i32_indexed_OP1,
96 MULSUBv2i32_indexed_OP2,
97 MULSUBv4i32_indexed_OP1,
98 MULSUBv4i32_indexed_OP2,
99
100 // Floating Point
101 FMULADDH_OP1,
102 FMULADDH_OP2,
103 FMULSUBH_OP1,
104 FMULSUBH_OP2,
105 FMULADDS_OP1,
106 FMULADDS_OP2,
107 FMULSUBS_OP1,
108 FMULSUBS_OP2,
109 FMULADDD_OP1,
110 FMULADDD_OP2,
111 FMULSUBD_OP1,
112 FMULSUBD_OP2,
113 FNMULSUBH_OP1,
114 FNMULSUBS_OP1,
115 FNMULSUBD_OP1,
116 FMLAv1i32_indexed_OP1,
117 FMLAv1i32_indexed_OP2,
118 FMLAv1i64_indexed_OP1,
119 FMLAv1i64_indexed_OP2,
120 FMLAv4f16_OP1,
121 FMLAv4f16_OP2,
122 FMLAv8f16_OP1,
123 FMLAv8f16_OP2,
124 FMLAv2f32_OP2,
125 FMLAv2f32_OP1,
126 FMLAv2f64_OP1,
127 FMLAv2f64_OP2,
128 FMLAv4i16_indexed_OP1,
129 FMLAv4i16_indexed_OP2,
130 FMLAv8i16_indexed_OP1,
131 FMLAv8i16_indexed_OP2,
132 FMLAv2i32_indexed_OP1,
133 FMLAv2i32_indexed_OP2,
134 FMLAv2i64_indexed_OP1,
135 FMLAv2i64_indexed_OP2,
136 FMLAv4f32_OP1,
137 FMLAv4f32_OP2,
138 FMLAv4i32_indexed_OP1,
139 FMLAv4i32_indexed_OP2,
140 FMLSv1i32_indexed_OP2,
141 FMLSv1i64_indexed_OP2,
142 FMLSv4f16_OP1,
143 FMLSv4f16_OP2,
144 FMLSv8f16_OP1,
145 FMLSv8f16_OP2,
146 FMLSv2f32_OP1,
147 FMLSv2f32_OP2,
148 FMLSv2f64_OP1,
149 FMLSv2f64_OP2,
150 FMLSv4i16_indexed_OP1,
151 FMLSv4i16_indexed_OP2,
152 FMLSv8i16_indexed_OP1,
153 FMLSv8i16_indexed_OP2,
154 FMLSv2i32_indexed_OP1,
155 FMLSv2i32_indexed_OP2,
156 FMLSv2i64_indexed_OP1,
157 FMLSv2i64_indexed_OP2,
158 FMLSv4f32_OP1,
159 FMLSv4f32_OP2,
160 FMLSv4i32_indexed_OP1,
161 FMLSv4i32_indexed_OP2,
162
163 FMULv2i32_indexed_OP1,
164 FMULv2i32_indexed_OP2,
165 FMULv2i64_indexed_OP1,
166 FMULv2i64_indexed_OP2,
167 FMULv4i16_indexed_OP1,
168 FMULv4i16_indexed_OP2,
169 FMULv4i32_indexed_OP1,
170 FMULv4i32_indexed_OP2,
171 FMULv8i16_indexed_OP1,
172 FMULv8i16_indexed_OP2,
173
174 FNMADD,
175 };
176 class AArch64InstrInfo final : public AArch64GenInstrInfo {
177 const AArch64RegisterInfo RI;
178 const AArch64Subtarget &Subtarget;
179
180 public:
181 explicit AArch64InstrInfo(const AArch64Subtarget &STI);
182
183 /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
184 /// such, whenever a client has an instance of instruction info, it should
185 /// always be able to get register info as well (through this method).
getRegisterInfo()186 const AArch64RegisterInfo &getRegisterInfo() const { return RI; }
187
188 unsigned getInstSizeInBytes(const MachineInstr &MI) const override;
189
190 bool isAsCheapAsAMove(const MachineInstr &MI) const override;
191
192 bool isCoalescableExtInstr(const MachineInstr &MI, Register &SrcReg,
193 Register &DstReg, unsigned &SubIdx) const override;
194
195 bool
196 areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
197 const MachineInstr &MIb) const override;
198
199 Register isLoadFromStackSlot(const MachineInstr &MI,
200 int &FrameIndex) const override;
201 Register isStoreToStackSlot(const MachineInstr &MI,
202 int &FrameIndex) const override;
203
204 /// Does this instruction set its full destination register to zero?
205 static bool isGPRZero(const MachineInstr &MI);
206
207 /// Does this instruction rename a GPR without modifying bits?
208 static bool isGPRCopy(const MachineInstr &MI);
209
210 /// Does this instruction rename an FPR without modifying bits?
211 static bool isFPRCopy(const MachineInstr &MI);
212
213 /// Return true if pairing the given load or store is hinted to be
214 /// unprofitable.
215 static bool isLdStPairSuppressed(const MachineInstr &MI);
216
217 /// Return true if the given load or store is a strided memory access.
218 static bool isStridedAccess(const MachineInstr &MI);
219
220 /// Return true if it has an unscaled load/store offset.
221 static bool hasUnscaledLdStOffset(unsigned Opc);
hasUnscaledLdStOffset(MachineInstr & MI)222 static bool hasUnscaledLdStOffset(MachineInstr &MI) {
223 return hasUnscaledLdStOffset(MI.getOpcode());
224 }
225
226 /// Returns the unscaled load/store for the scaled load/store opcode,
227 /// if there is a corresponding unscaled variant available.
228 static std::optional<unsigned> getUnscaledLdSt(unsigned Opc);
229
230 /// Scaling factor for (scaled or unscaled) load or store.
231 static int getMemScale(unsigned Opc);
getMemScale(const MachineInstr & MI)232 static int getMemScale(const MachineInstr &MI) {
233 return getMemScale(MI.getOpcode());
234 }
235
236 /// Returns whether the instruction is a pre-indexed load.
237 static bool isPreLd(const MachineInstr &MI);
238
239 /// Returns whether the instruction is a pre-indexed store.
240 static bool isPreSt(const MachineInstr &MI);
241
242 /// Returns whether the instruction is a pre-indexed load/store.
243 static bool isPreLdSt(const MachineInstr &MI);
244
245 /// Returns whether the instruction is a paired load/store.
246 static bool isPairedLdSt(const MachineInstr &MI);
247
248 /// Returns the base register operator of a load/store.
249 static const MachineOperand &getLdStBaseOp(const MachineInstr &MI);
250
251 /// Returns the immediate offset operator of a load/store.
252 static const MachineOperand &getLdStOffsetOp(const MachineInstr &MI);
253
254 /// Returns whether the physical register is FP or NEON.
255 static bool isFpOrNEON(Register Reg);
256
257 /// Returns whether the instruction is FP or NEON.
258 static bool isFpOrNEON(const MachineInstr &MI);
259
260 /// Returns whether the instruction is in H form (16 bit operands)
261 static bool isHForm(const MachineInstr &MI);
262
263 /// Returns whether the instruction is in Q form (128 bit operands)
264 static bool isQForm(const MachineInstr &MI);
265
266 /// Returns whether the instruction can be compatible with non-zero BTYPE.
267 static bool hasBTISemantics(const MachineInstr &MI);
268
269 /// Returns the index for the immediate for a given instruction.
270 static unsigned getLoadStoreImmIdx(unsigned Opc);
271
272 /// Return true if pairing the given load or store may be paired with another.
273 static bool isPairableLdStInst(const MachineInstr &MI);
274
275 /// Returns true if MI is one of the TCRETURN* instructions.
276 static bool isTailCallReturnInst(const MachineInstr &MI);
277
278 /// Return the opcode that set flags when possible. The caller is
279 /// responsible for ensuring the opc has a flag setting equivalent.
280 static unsigned convertToFlagSettingOpc(unsigned Opc);
281
282 /// Return true if this is a load/store that can be potentially paired/merged.
283 bool isCandidateToMergeOrPair(const MachineInstr &MI) const;
284
285 /// Hint that pairing the given load or store is unprofitable.
286 static void suppressLdStPair(MachineInstr &MI);
287
288 std::optional<ExtAddrMode>
289 getAddrModeFromMemoryOp(const MachineInstr &MemI,
290 const TargetRegisterInfo *TRI) const override;
291
292 bool canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg,
293 const MachineInstr &AddrI,
294 ExtAddrMode &AM) const override;
295
296 MachineInstr *emitLdStWithAddr(MachineInstr &MemI,
297 const ExtAddrMode &AM) const override;
298
299 bool getMemOperandsWithOffsetWidth(
300 const MachineInstr &MI, SmallVectorImpl<const MachineOperand *> &BaseOps,
301 int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
302 const TargetRegisterInfo *TRI) const override;
303
304 /// If \p OffsetIsScalable is set to 'true', the offset is scaled by `vscale`.
305 /// This is true for some SVE instructions like ldr/str that have a
306 /// 'reg + imm' addressing mode where the immediate is an index to the
307 /// scalable vector located at 'reg + imm * vscale x #bytes'.
308 bool getMemOperandWithOffsetWidth(const MachineInstr &MI,
309 const MachineOperand *&BaseOp,
310 int64_t &Offset, bool &OffsetIsScalable,
311 TypeSize &Width,
312 const TargetRegisterInfo *TRI) const;
313
314 /// Return the immediate offset of the base register in a load/store \p LdSt.
315 MachineOperand &getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const;
316
317 /// Returns true if opcode \p Opc is a memory operation. If it is, set
318 /// \p Scale, \p Width, \p MinOffset, and \p MaxOffset accordingly.
319 ///
320 /// For unscaled instructions, \p Scale is set to 1.
321 static bool getMemOpInfo(unsigned Opcode, TypeSize &Scale, TypeSize &Width,
322 int64_t &MinOffset, int64_t &MaxOffset);
323
324 bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
325 int64_t Offset1, bool OffsetIsScalable1,
326 ArrayRef<const MachineOperand *> BaseOps2,
327 int64_t Offset2, bool OffsetIsScalable2,
328 unsigned ClusterSize,
329 unsigned NumBytes) const override;
330
331 void copyPhysRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
332 const DebugLoc &DL, MCRegister DestReg,
333 MCRegister SrcReg, bool KillSrc, unsigned Opcode,
334 llvm::ArrayRef<unsigned> Indices) const;
335 void copyGPRRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
336 DebugLoc DL, unsigned DestReg, unsigned SrcReg,
337 bool KillSrc, unsigned Opcode, unsigned ZeroReg,
338 llvm::ArrayRef<unsigned> Indices) const;
339 void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
340 const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg,
341 bool KillSrc) const override;
342
343 void storeRegToStackSlot(MachineBasicBlock &MBB,
344 MachineBasicBlock::iterator MBBI, Register SrcReg,
345 bool isKill, int FrameIndex,
346 const TargetRegisterClass *RC,
347 const TargetRegisterInfo *TRI,
348 Register VReg) const override;
349
350 void loadRegFromStackSlot(MachineBasicBlock &MBB,
351 MachineBasicBlock::iterator MBBI, Register DestReg,
352 int FrameIndex, const TargetRegisterClass *RC,
353 const TargetRegisterInfo *TRI,
354 Register VReg) const override;
355
356 // This tells target independent code that it is okay to pass instructions
357 // with subreg operands to foldMemoryOperandImpl.
isSubregFoldable()358 bool isSubregFoldable() const override { return true; }
359
360 using TargetInstrInfo::foldMemoryOperandImpl;
361 MachineInstr *
362 foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
363 ArrayRef<unsigned> Ops,
364 MachineBasicBlock::iterator InsertPt, int FrameIndex,
365 LiveIntervals *LIS = nullptr,
366 VirtRegMap *VRM = nullptr) const override;
367
368 /// \returns true if a branch from an instruction with opcode \p BranchOpc
369 /// bytes is capable of jumping to a position \p BrOffset bytes away.
370 bool isBranchOffsetInRange(unsigned BranchOpc,
371 int64_t BrOffset) const override;
372
373 MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override;
374
375 void insertIndirectBranch(MachineBasicBlock &MBB,
376 MachineBasicBlock &NewDestBB,
377 MachineBasicBlock &RestoreBB, const DebugLoc &DL,
378 int64_t BrOffset, RegScavenger *RS) const override;
379
380 bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
381 MachineBasicBlock *&FBB,
382 SmallVectorImpl<MachineOperand> &Cond,
383 bool AllowModify = false) const override;
384 bool analyzeBranchPredicate(MachineBasicBlock &MBB,
385 MachineBranchPredicate &MBP,
386 bool AllowModify) const override;
387 unsigned removeBranch(MachineBasicBlock &MBB,
388 int *BytesRemoved = nullptr) const override;
389 unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
390 MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
391 const DebugLoc &DL,
392 int *BytesAdded = nullptr) const override;
393
394 std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
395 analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override;
396
397 bool
398 reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
399 bool canInsertSelect(const MachineBasicBlock &, ArrayRef<MachineOperand> Cond,
400 Register, Register, Register, int &, int &,
401 int &) const override;
402 void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
403 const DebugLoc &DL, Register DstReg,
404 ArrayRef<MachineOperand> Cond, Register TrueReg,
405 Register FalseReg) const override;
406
407 void insertNoop(MachineBasicBlock &MBB,
408 MachineBasicBlock::iterator MI) const override;
409
410 MCInst getNop() const override;
411
412 bool isSchedulingBoundary(const MachineInstr &MI,
413 const MachineBasicBlock *MBB,
414 const MachineFunction &MF) const override;
415
416 /// analyzeCompare - For a comparison instruction, return the source registers
417 /// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
418 /// Return true if the comparison instruction can be analyzed.
419 bool analyzeCompare(const MachineInstr &MI, Register &SrcReg,
420 Register &SrcReg2, int64_t &CmpMask,
421 int64_t &CmpValue) const override;
422 /// optimizeCompareInstr - Convert the instruction supplying the argument to
423 /// the comparison into one that sets the zero bit in the flags register.
424 bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
425 Register SrcReg2, int64_t CmpMask, int64_t CmpValue,
426 const MachineRegisterInfo *MRI) const override;
427 bool optimizeCondBranch(MachineInstr &MI) const override;
428
429 CombinerObjective getCombinerObjective(unsigned Pattern) const override;
430 /// Return true when a code sequence can improve throughput. It
431 /// should be called only for instructions in loops.
432 /// \param Pattern - combiner pattern
433 bool isThroughputPattern(unsigned Pattern) const override;
434 /// Return true when there is potentially a faster code sequence
435 /// for an instruction chain ending in ``Root``. All potential patterns are
436 /// listed in the ``Patterns`` array.
437 bool getMachineCombinerPatterns(MachineInstr &Root,
438 SmallVectorImpl<unsigned> &Patterns,
439 bool DoRegPressureReduce) const override;
440 /// Return true when Inst is associative and commutative so that it can be
441 /// reassociated. If Invert is true, then the inverse of Inst operation must
442 /// be checked.
443 bool isAssociativeAndCommutative(const MachineInstr &Inst,
444 bool Invert) const override;
445 /// When getMachineCombinerPatterns() finds patterns, this function generates
446 /// the instructions that could replace the original code sequence
447 void genAlternativeCodeSequence(
448 MachineInstr &Root, unsigned Pattern,
449 SmallVectorImpl<MachineInstr *> &InsInstrs,
450 SmallVectorImpl<MachineInstr *> &DelInstrs,
451 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const override;
452 /// AArch64 supports MachineCombiner.
453 bool useMachineCombiner() const override;
454
455 bool expandPostRAPseudo(MachineInstr &MI) const override;
456
457 std::pair<unsigned, unsigned>
458 decomposeMachineOperandsTargetFlags(unsigned TF) const override;
459 ArrayRef<std::pair<unsigned, const char *>>
460 getSerializableDirectMachineOperandTargetFlags() const override;
461 ArrayRef<std::pair<unsigned, const char *>>
462 getSerializableBitmaskMachineOperandTargetFlags() const override;
463 ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
464 getSerializableMachineMemOperandTargetFlags() const override;
465
466 bool isFunctionSafeToOutlineFrom(MachineFunction &MF,
467 bool OutlineFromLinkOnceODRs) const override;
468 std::optional<outliner::OutlinedFunction> getOutliningCandidateInfo(
469 std::vector<outliner::Candidate> &RepeatedSequenceLocs) const override;
470 void mergeOutliningCandidateAttributes(
471 Function &F, std::vector<outliner::Candidate> &Candidates) const override;
472 outliner::InstrType
473 getOutliningTypeImpl(MachineBasicBlock::iterator &MIT, unsigned Flags) const override;
474 SmallVector<
475 std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>>
476 getOutlinableRanges(MachineBasicBlock &MBB, unsigned &Flags) const override;
477 void buildOutlinedFrame(MachineBasicBlock &MBB, MachineFunction &MF,
478 const outliner::OutlinedFunction &OF) const override;
479 MachineBasicBlock::iterator
480 insertOutlinedCall(Module &M, MachineBasicBlock &MBB,
481 MachineBasicBlock::iterator &It, MachineFunction &MF,
482 outliner::Candidate &C) const override;
483 bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override;
484
485 void buildClearRegister(Register Reg, MachineBasicBlock &MBB,
486 MachineBasicBlock::iterator Iter, DebugLoc &DL,
487 bool AllowSideEffects = true) const override;
488
489 /// Returns the vector element size (B, H, S or D) of an SVE opcode.
490 uint64_t getElementSizeForOpcode(unsigned Opc) const;
491 /// Returns true if the opcode is for an SVE instruction that sets the
492 /// condition codes as if it's results had been fed to a PTEST instruction
493 /// along with the same general predicate.
494 bool isPTestLikeOpcode(unsigned Opc) const;
495 /// Returns true if the opcode is for an SVE WHILE## instruction.
496 bool isWhileOpcode(unsigned Opc) const;
497 /// Returns true if the instruction has a shift by immediate that can be
498 /// executed in one cycle less.
499 static bool isFalkorShiftExtFast(const MachineInstr &MI);
500 /// Return true if the instructions is a SEH instruciton used for unwinding
501 /// on Windows.
502 static bool isSEHInstruction(const MachineInstr &MI);
503
504 std::optional<RegImmPair> isAddImmediate(const MachineInstr &MI,
505 Register Reg) const override;
506
507 bool isFunctionSafeToSplit(const MachineFunction &MF) const override;
508
509 bool isMBBSafeToSplitToCold(const MachineBasicBlock &MBB) const override;
510
511 std::optional<ParamLoadedValue>
512 describeLoadedValue(const MachineInstr &MI, Register Reg) const override;
513
514 unsigned int getTailDuplicateSize(CodeGenOptLevel OptLevel) const override;
515
516 bool isExtendLikelyToBeFolded(MachineInstr &ExtMI,
517 MachineRegisterInfo &MRI) const override;
518
519 static void decomposeStackOffsetForFrameOffsets(const StackOffset &Offset,
520 int64_t &NumBytes,
521 int64_t &NumPredicateVectors,
522 int64_t &NumDataVectors);
523 static void decomposeStackOffsetForDwarfOffsets(const StackOffset &Offset,
524 int64_t &ByteSized,
525 int64_t &VGSized);
526
527 // Return true if address of the form BaseReg + Scale * ScaledReg + Offset can
528 // be used for a load/store of NumBytes. BaseReg is always present and
529 // implicit.
530 bool isLegalAddressingMode(unsigned NumBytes, int64_t Offset,
531 unsigned Scale) const;
532
533 // Decrement the SP, issuing probes along the way. `TargetReg` is the new top
534 // of the stack. `FrameSetup` is passed as true, if the allocation is a part
535 // of constructing the activation frame of a function.
536 MachineBasicBlock::iterator probedStackAlloc(MachineBasicBlock::iterator MBBI,
537 Register TargetReg,
538 bool FrameSetup) const;
539
540 #define GET_INSTRINFO_HELPER_DECLS
541 #include "AArch64GenInstrInfo.inc"
542
543 protected:
544 /// If the specific machine instruction is an instruction that moves/copies
545 /// value from one register to another register return destination and source
546 /// registers as machine operands.
547 std::optional<DestSourcePair>
548 isCopyInstrImpl(const MachineInstr &MI) const override;
549 std::optional<DestSourcePair>
550 isCopyLikeInstrImpl(const MachineInstr &MI) const override;
551
552 private:
553 unsigned getInstBundleLength(const MachineInstr &MI) const;
554
555 /// Sets the offsets on outlined instructions in \p MBB which use SP
556 /// so that they will be valid post-outlining.
557 ///
558 /// \param MBB A \p MachineBasicBlock in an outlined function.
559 void fixupPostOutline(MachineBasicBlock &MBB) const;
560
561 void instantiateCondBranch(MachineBasicBlock &MBB, const DebugLoc &DL,
562 MachineBasicBlock *TBB,
563 ArrayRef<MachineOperand> Cond) const;
564 bool substituteCmpToZero(MachineInstr &CmpInstr, unsigned SrcReg,
565 const MachineRegisterInfo &MRI) const;
566 bool removeCmpToZeroOrOne(MachineInstr &CmpInstr, unsigned SrcReg,
567 int CmpValue, const MachineRegisterInfo &MRI) const;
568
569 /// Returns an unused general-purpose register which can be used for
570 /// constructing an outlined call if one exists. Returns 0 otherwise.
571 Register findRegisterToSaveLRTo(outliner::Candidate &C) const;
572
573 /// Remove a ptest of a predicate-generating operation that already sets, or
574 /// can be made to set, the condition codes in an identical manner
575 bool optimizePTestInstr(MachineInstr *PTest, unsigned MaskReg,
576 unsigned PredReg,
577 const MachineRegisterInfo *MRI) const;
578 std::optional<unsigned>
579 canRemovePTestInstr(MachineInstr *PTest, MachineInstr *Mask,
580 MachineInstr *Pred, const MachineRegisterInfo *MRI) const;
581 };
582
583 struct UsedNZCV {
584 bool N = false;
585 bool Z = false;
586 bool C = false;
587 bool V = false;
588
589 UsedNZCV() = default;
590
591 UsedNZCV &operator|=(const UsedNZCV &UsedFlags) {
592 this->N |= UsedFlags.N;
593 this->Z |= UsedFlags.Z;
594 this->C |= UsedFlags.C;
595 this->V |= UsedFlags.V;
596 return *this;
597 }
598 };
599
600 /// \returns Conditions flags used after \p CmpInstr in its MachineBB if NZCV
601 /// flags are not alive in successors of the same \p CmpInstr and \p MI parent.
602 /// \returns std::nullopt otherwise.
603 ///
604 /// Collect instructions using that flags in \p CCUseInstrs if provided.
605 std::optional<UsedNZCV>
606 examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr,
607 const TargetRegisterInfo &TRI,
608 SmallVectorImpl<MachineInstr *> *CCUseInstrs = nullptr);
609
610 /// Return true if there is an instruction /after/ \p DefMI and before \p UseMI
611 /// which either reads or clobbers NZCV.
612 bool isNZCVTouchedInInstructionRange(const MachineInstr &DefMI,
613 const MachineInstr &UseMI,
614 const TargetRegisterInfo *TRI);
615
616 MCCFIInstruction createDefCFA(const TargetRegisterInfo &TRI, unsigned FrameReg,
617 unsigned Reg, const StackOffset &Offset,
618 bool LastAdjustmentWasScalable = true);
619 MCCFIInstruction createCFAOffset(const TargetRegisterInfo &MRI, unsigned Reg,
620 const StackOffset &OffsetFromDefCFA);
621
622 /// emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg
623 /// plus Offset. This is intended to be used from within the prolog/epilog
624 /// insertion (PEI) pass, where a virtual scratch register may be allocated
625 /// if necessary, to be replaced by the scavenger at the end of PEI.
626 void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
627 const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
628 StackOffset Offset, const TargetInstrInfo *TII,
629 MachineInstr::MIFlag = MachineInstr::NoFlags,
630 bool SetNZCV = false, bool NeedsWinCFI = false,
631 bool *HasWinCFI = nullptr, bool EmitCFAOffset = false,
632 StackOffset InitialOffset = {},
633 unsigned FrameReg = AArch64::SP);
634
635 /// rewriteAArch64FrameIndex - Rewrite MI to access 'Offset' bytes from the
636 /// FP. Return false if the offset could not be handled directly in MI, and
637 /// return the left-over portion by reference.
638 bool rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
639 unsigned FrameReg, StackOffset &Offset,
640 const AArch64InstrInfo *TII);
641
642 /// Use to report the frame offset status in isAArch64FrameOffsetLegal.
643 enum AArch64FrameOffsetStatus {
644 AArch64FrameOffsetCannotUpdate = 0x0, ///< Offset cannot apply.
645 AArch64FrameOffsetIsLegal = 0x1, ///< Offset is legal.
646 AArch64FrameOffsetCanUpdate = 0x2 ///< Offset can apply, at least partly.
647 };
648
649 /// Check if the @p Offset is a valid frame offset for @p MI.
650 /// The returned value reports the validity of the frame offset for @p MI.
651 /// It uses the values defined by AArch64FrameOffsetStatus for that.
652 /// If result == AArch64FrameOffsetCannotUpdate, @p MI cannot be updated to
653 /// use an offset.eq
654 /// If result & AArch64FrameOffsetIsLegal, @p Offset can completely be
655 /// rewritten in @p MI.
656 /// If result & AArch64FrameOffsetCanUpdate, @p Offset contains the
657 /// amount that is off the limit of the legal offset.
658 /// If set, @p OutUseUnscaledOp will contain the whether @p MI should be
659 /// turned into an unscaled operator, which opcode is in @p OutUnscaledOp.
660 /// If set, @p EmittableOffset contains the amount that can be set in @p MI
661 /// (possibly with @p OutUnscaledOp if OutUseUnscaledOp is true) and that
662 /// is a legal offset.
663 int isAArch64FrameOffsetLegal(const MachineInstr &MI, StackOffset &Offset,
664 bool *OutUseUnscaledOp = nullptr,
665 unsigned *OutUnscaledOp = nullptr,
666 int64_t *EmittableOffset = nullptr);
667
isUncondBranchOpcode(int Opc)668 static inline bool isUncondBranchOpcode(int Opc) { return Opc == AArch64::B; }
669
isCondBranchOpcode(int Opc)670 static inline bool isCondBranchOpcode(int Opc) {
671 switch (Opc) {
672 case AArch64::Bcc:
673 case AArch64::CBZW:
674 case AArch64::CBZX:
675 case AArch64::CBNZW:
676 case AArch64::CBNZX:
677 case AArch64::TBZW:
678 case AArch64::TBZX:
679 case AArch64::TBNZW:
680 case AArch64::TBNZX:
681 return true;
682 default:
683 return false;
684 }
685 }
686
isIndirectBranchOpcode(int Opc)687 static inline bool isIndirectBranchOpcode(int Opc) {
688 switch (Opc) {
689 case AArch64::BR:
690 case AArch64::BRAA:
691 case AArch64::BRAB:
692 case AArch64::BRAAZ:
693 case AArch64::BRABZ:
694 return true;
695 }
696 return false;
697 }
698
isPTrueOpcode(unsigned Opc)699 static inline bool isPTrueOpcode(unsigned Opc) {
700 switch (Opc) {
701 case AArch64::PTRUE_B:
702 case AArch64::PTRUE_H:
703 case AArch64::PTRUE_S:
704 case AArch64::PTRUE_D:
705 return true;
706 default:
707 return false;
708 }
709 }
710
711 /// Return opcode to be used for indirect calls.
712 unsigned getBLRCallOpcode(const MachineFunction &MF);
713
714 /// Return XPAC opcode to be used for a ptrauth strip using the given key.
getXPACOpcodeForKey(AArch64PACKey::ID K)715 static inline unsigned getXPACOpcodeForKey(AArch64PACKey::ID K) {
716 using namespace AArch64PACKey;
717 switch (K) {
718 case IA: case IB: return AArch64::XPACI;
719 case DA: case DB: return AArch64::XPACD;
720 }
721 llvm_unreachable("Unhandled AArch64PACKey::ID enum");
722 }
723
724 /// Return AUT opcode to be used for a ptrauth auth using the given key, or its
725 /// AUT*Z variant that doesn't take a discriminator operand, using zero instead.
getAUTOpcodeForKey(AArch64PACKey::ID K,bool Zero)726 static inline unsigned getAUTOpcodeForKey(AArch64PACKey::ID K, bool Zero) {
727 using namespace AArch64PACKey;
728 switch (K) {
729 case IA: return Zero ? AArch64::AUTIZA : AArch64::AUTIA;
730 case IB: return Zero ? AArch64::AUTIZB : AArch64::AUTIB;
731 case DA: return Zero ? AArch64::AUTDZA : AArch64::AUTDA;
732 case DB: return Zero ? AArch64::AUTDZB : AArch64::AUTDB;
733 }
734 llvm_unreachable("Unhandled AArch64PACKey::ID enum");
735 }
736
737 /// Return PAC opcode to be used for a ptrauth sign using the given key, or its
738 /// PAC*Z variant that doesn't take a discriminator operand, using zero instead.
getPACOpcodeForKey(AArch64PACKey::ID K,bool Zero)739 static inline unsigned getPACOpcodeForKey(AArch64PACKey::ID K, bool Zero) {
740 using namespace AArch64PACKey;
741 switch (K) {
742 case IA: return Zero ? AArch64::PACIZA : AArch64::PACIA;
743 case IB: return Zero ? AArch64::PACIZB : AArch64::PACIB;
744 case DA: return Zero ? AArch64::PACDZA : AArch64::PACDA;
745 case DB: return Zero ? AArch64::PACDZB : AArch64::PACDB;
746 }
747 llvm_unreachable("Unhandled AArch64PACKey::ID enum");
748 }
749
750 // struct TSFlags {
751 #define TSFLAG_ELEMENT_SIZE_TYPE(X) (X) // 3-bits
752 #define TSFLAG_DESTRUCTIVE_INST_TYPE(X) ((X) << 3) // 4-bits
753 #define TSFLAG_FALSE_LANE_TYPE(X) ((X) << 7) // 2-bits
754 #define TSFLAG_INSTR_FLAGS(X) ((X) << 9) // 2-bits
755 #define TSFLAG_SME_MATRIX_TYPE(X) ((X) << 11) // 3-bits
756 // }
757
758 namespace AArch64 {
759
760 enum ElementSizeType {
761 ElementSizeMask = TSFLAG_ELEMENT_SIZE_TYPE(0x7),
762 ElementSizeNone = TSFLAG_ELEMENT_SIZE_TYPE(0x0),
763 ElementSizeB = TSFLAG_ELEMENT_SIZE_TYPE(0x1),
764 ElementSizeH = TSFLAG_ELEMENT_SIZE_TYPE(0x2),
765 ElementSizeS = TSFLAG_ELEMENT_SIZE_TYPE(0x3),
766 ElementSizeD = TSFLAG_ELEMENT_SIZE_TYPE(0x4),
767 };
768
769 enum DestructiveInstType {
770 DestructiveInstTypeMask = TSFLAG_DESTRUCTIVE_INST_TYPE(0xf),
771 NotDestructive = TSFLAG_DESTRUCTIVE_INST_TYPE(0x0),
772 DestructiveOther = TSFLAG_DESTRUCTIVE_INST_TYPE(0x1),
773 DestructiveUnary = TSFLAG_DESTRUCTIVE_INST_TYPE(0x2),
774 DestructiveBinaryImm = TSFLAG_DESTRUCTIVE_INST_TYPE(0x3),
775 DestructiveBinaryShImmUnpred = TSFLAG_DESTRUCTIVE_INST_TYPE(0x4),
776 DestructiveBinary = TSFLAG_DESTRUCTIVE_INST_TYPE(0x5),
777 DestructiveBinaryComm = TSFLAG_DESTRUCTIVE_INST_TYPE(0x6),
778 DestructiveBinaryCommWithRev = TSFLAG_DESTRUCTIVE_INST_TYPE(0x7),
779 DestructiveTernaryCommWithRev = TSFLAG_DESTRUCTIVE_INST_TYPE(0x8),
780 DestructiveUnaryPassthru = TSFLAG_DESTRUCTIVE_INST_TYPE(0x9),
781 };
782
783 enum FalseLaneType {
784 FalseLanesMask = TSFLAG_FALSE_LANE_TYPE(0x3),
785 FalseLanesZero = TSFLAG_FALSE_LANE_TYPE(0x1),
786 FalseLanesUndef = TSFLAG_FALSE_LANE_TYPE(0x2),
787 };
788
789 // NOTE: This is a bit field.
790 static const uint64_t InstrFlagIsWhile = TSFLAG_INSTR_FLAGS(0x1);
791 static const uint64_t InstrFlagIsPTestLike = TSFLAG_INSTR_FLAGS(0x2);
792
793 enum SMEMatrixType {
794 SMEMatrixTypeMask = TSFLAG_SME_MATRIX_TYPE(0x7),
795 SMEMatrixNone = TSFLAG_SME_MATRIX_TYPE(0x0),
796 SMEMatrixTileB = TSFLAG_SME_MATRIX_TYPE(0x1),
797 SMEMatrixTileH = TSFLAG_SME_MATRIX_TYPE(0x2),
798 SMEMatrixTileS = TSFLAG_SME_MATRIX_TYPE(0x3),
799 SMEMatrixTileD = TSFLAG_SME_MATRIX_TYPE(0x4),
800 SMEMatrixTileQ = TSFLAG_SME_MATRIX_TYPE(0x5),
801 SMEMatrixArray = TSFLAG_SME_MATRIX_TYPE(0x6),
802 };
803
804 #undef TSFLAG_ELEMENT_SIZE_TYPE
805 #undef TSFLAG_DESTRUCTIVE_INST_TYPE
806 #undef TSFLAG_FALSE_LANE_TYPE
807 #undef TSFLAG_INSTR_FLAGS
808 #undef TSFLAG_SME_MATRIX_TYPE
809
810 int getSVEPseudoMap(uint16_t Opcode);
811 int getSVERevInstr(uint16_t Opcode);
812 int getSVENonRevInstr(uint16_t Opcode);
813
814 int getSMEPseudoMap(uint16_t Opcode);
815 }
816
817 } // end namespace llvm
818
819 #endif
820