xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.h (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===- AArch64InstrInfo.h - AArch64 Instruction Information -----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the AArch64 implementation of the TargetInstrInfo class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64INSTRINFO_H
14 #define LLVM_LIB_TARGET_AARCH64_AARCH64INSTRINFO_H
15 
16 #include "AArch64.h"
17 #include "AArch64RegisterInfo.h"
18 #include "llvm/CodeGen/TargetInstrInfo.h"
19 #include "llvm/Support/TypeSize.h"
20 #include <optional>
21 
22 #define GET_INSTRINFO_HEADER
23 #include "AArch64GenInstrInfo.inc"
24 
25 namespace llvm {
26 
27 class AArch64Subtarget;
28 
29 static const MachineMemOperand::Flags MOSuppressPair =
30     MachineMemOperand::MOTargetFlag1;
31 static const MachineMemOperand::Flags MOStridedAccess =
32     MachineMemOperand::MOTargetFlag2;
33 
34 #define FALKOR_STRIDED_ACCESS_MD "falkor.strided.access"
35 
36 // AArch64 MachineCombiner patterns
37 enum AArch64MachineCombinerPattern : unsigned {
38   // These are patterns used to reduce the length of dependence chain.
39   SUBADD_OP1 = MachineCombinerPattern::TARGET_PATTERN_START,
40   SUBADD_OP2,
41 
42   // These are multiply-add patterns matched by the AArch64 machine combiner.
43   MULADDW_OP1,
44   MULADDW_OP2,
45   MULSUBW_OP1,
46   MULSUBW_OP2,
47   MULADDWI_OP1,
48   MULSUBWI_OP1,
49   MULADDX_OP1,
50   MULADDX_OP2,
51   MULSUBX_OP1,
52   MULSUBX_OP2,
53   MULADDXI_OP1,
54   MULSUBXI_OP1,
55   // NEON integers vectors
56   MULADDv8i8_OP1,
57   MULADDv8i8_OP2,
58   MULADDv16i8_OP1,
59   MULADDv16i8_OP2,
60   MULADDv4i16_OP1,
61   MULADDv4i16_OP2,
62   MULADDv8i16_OP1,
63   MULADDv8i16_OP2,
64   MULADDv2i32_OP1,
65   MULADDv2i32_OP2,
66   MULADDv4i32_OP1,
67   MULADDv4i32_OP2,
68 
69   MULSUBv8i8_OP1,
70   MULSUBv8i8_OP2,
71   MULSUBv16i8_OP1,
72   MULSUBv16i8_OP2,
73   MULSUBv4i16_OP1,
74   MULSUBv4i16_OP2,
75   MULSUBv8i16_OP1,
76   MULSUBv8i16_OP2,
77   MULSUBv2i32_OP1,
78   MULSUBv2i32_OP2,
79   MULSUBv4i32_OP1,
80   MULSUBv4i32_OP2,
81 
82   MULADDv4i16_indexed_OP1,
83   MULADDv4i16_indexed_OP2,
84   MULADDv8i16_indexed_OP1,
85   MULADDv8i16_indexed_OP2,
86   MULADDv2i32_indexed_OP1,
87   MULADDv2i32_indexed_OP2,
88   MULADDv4i32_indexed_OP1,
89   MULADDv4i32_indexed_OP2,
90 
91   MULSUBv4i16_indexed_OP1,
92   MULSUBv4i16_indexed_OP2,
93   MULSUBv8i16_indexed_OP1,
94   MULSUBv8i16_indexed_OP2,
95   MULSUBv2i32_indexed_OP1,
96   MULSUBv2i32_indexed_OP2,
97   MULSUBv4i32_indexed_OP1,
98   MULSUBv4i32_indexed_OP2,
99 
100   // Floating Point
101   FMULADDH_OP1,
102   FMULADDH_OP2,
103   FMULSUBH_OP1,
104   FMULSUBH_OP2,
105   FMULADDS_OP1,
106   FMULADDS_OP2,
107   FMULSUBS_OP1,
108   FMULSUBS_OP2,
109   FMULADDD_OP1,
110   FMULADDD_OP2,
111   FMULSUBD_OP1,
112   FMULSUBD_OP2,
113   FNMULSUBH_OP1,
114   FNMULSUBS_OP1,
115   FNMULSUBD_OP1,
116   FMLAv1i32_indexed_OP1,
117   FMLAv1i32_indexed_OP2,
118   FMLAv1i64_indexed_OP1,
119   FMLAv1i64_indexed_OP2,
120   FMLAv4f16_OP1,
121   FMLAv4f16_OP2,
122   FMLAv8f16_OP1,
123   FMLAv8f16_OP2,
124   FMLAv2f32_OP2,
125   FMLAv2f32_OP1,
126   FMLAv2f64_OP1,
127   FMLAv2f64_OP2,
128   FMLAv4i16_indexed_OP1,
129   FMLAv4i16_indexed_OP2,
130   FMLAv8i16_indexed_OP1,
131   FMLAv8i16_indexed_OP2,
132   FMLAv2i32_indexed_OP1,
133   FMLAv2i32_indexed_OP2,
134   FMLAv2i64_indexed_OP1,
135   FMLAv2i64_indexed_OP2,
136   FMLAv4f32_OP1,
137   FMLAv4f32_OP2,
138   FMLAv4i32_indexed_OP1,
139   FMLAv4i32_indexed_OP2,
140   FMLSv1i32_indexed_OP2,
141   FMLSv1i64_indexed_OP2,
142   FMLSv4f16_OP1,
143   FMLSv4f16_OP2,
144   FMLSv8f16_OP1,
145   FMLSv8f16_OP2,
146   FMLSv2f32_OP1,
147   FMLSv2f32_OP2,
148   FMLSv2f64_OP1,
149   FMLSv2f64_OP2,
150   FMLSv4i16_indexed_OP1,
151   FMLSv4i16_indexed_OP2,
152   FMLSv8i16_indexed_OP1,
153   FMLSv8i16_indexed_OP2,
154   FMLSv2i32_indexed_OP1,
155   FMLSv2i32_indexed_OP2,
156   FMLSv2i64_indexed_OP1,
157   FMLSv2i64_indexed_OP2,
158   FMLSv4f32_OP1,
159   FMLSv4f32_OP2,
160   FMLSv4i32_indexed_OP1,
161   FMLSv4i32_indexed_OP2,
162 
163   FMULv2i32_indexed_OP1,
164   FMULv2i32_indexed_OP2,
165   FMULv2i64_indexed_OP1,
166   FMULv2i64_indexed_OP2,
167   FMULv4i16_indexed_OP1,
168   FMULv4i16_indexed_OP2,
169   FMULv4i32_indexed_OP1,
170   FMULv4i32_indexed_OP2,
171   FMULv8i16_indexed_OP1,
172   FMULv8i16_indexed_OP2,
173 
174   FNMADD,
175 };
176 class AArch64InstrInfo final : public AArch64GenInstrInfo {
177   const AArch64RegisterInfo RI;
178   const AArch64Subtarget &Subtarget;
179 
180 public:
181   explicit AArch64InstrInfo(const AArch64Subtarget &STI);
182 
183   /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
184   /// such, whenever a client has an instance of instruction info, it should
185   /// always be able to get register info as well (through this method).
getRegisterInfo()186   const AArch64RegisterInfo &getRegisterInfo() const { return RI; }
187 
188   unsigned getInstSizeInBytes(const MachineInstr &MI) const override;
189 
190   bool isAsCheapAsAMove(const MachineInstr &MI) const override;
191 
192   bool isCoalescableExtInstr(const MachineInstr &MI, Register &SrcReg,
193                              Register &DstReg, unsigned &SubIdx) const override;
194 
195   bool
196   areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
197                                   const MachineInstr &MIb) const override;
198 
199   Register isLoadFromStackSlot(const MachineInstr &MI,
200                                int &FrameIndex) const override;
201   Register isStoreToStackSlot(const MachineInstr &MI,
202                               int &FrameIndex) const override;
203 
204   /// Does this instruction set its full destination register to zero?
205   static bool isGPRZero(const MachineInstr &MI);
206 
207   /// Does this instruction rename a GPR without modifying bits?
208   static bool isGPRCopy(const MachineInstr &MI);
209 
210   /// Does this instruction rename an FPR without modifying bits?
211   static bool isFPRCopy(const MachineInstr &MI);
212 
213   /// Return true if pairing the given load or store is hinted to be
214   /// unprofitable.
215   static bool isLdStPairSuppressed(const MachineInstr &MI);
216 
217   /// Return true if the given load or store is a strided memory access.
218   static bool isStridedAccess(const MachineInstr &MI);
219 
220   /// Return true if it has an unscaled load/store offset.
221   static bool hasUnscaledLdStOffset(unsigned Opc);
hasUnscaledLdStOffset(MachineInstr & MI)222   static bool hasUnscaledLdStOffset(MachineInstr &MI) {
223     return hasUnscaledLdStOffset(MI.getOpcode());
224   }
225 
226   /// Returns the unscaled load/store for the scaled load/store opcode,
227   /// if there is a corresponding unscaled variant available.
228   static std::optional<unsigned> getUnscaledLdSt(unsigned Opc);
229 
230   /// Scaling factor for (scaled or unscaled) load or store.
231   static int getMemScale(unsigned Opc);
getMemScale(const MachineInstr & MI)232   static int getMemScale(const MachineInstr &MI) {
233     return getMemScale(MI.getOpcode());
234   }
235 
236   /// Returns whether the instruction is a pre-indexed load.
237   static bool isPreLd(const MachineInstr &MI);
238 
239   /// Returns whether the instruction is a pre-indexed store.
240   static bool isPreSt(const MachineInstr &MI);
241 
242   /// Returns whether the instruction is a pre-indexed load/store.
243   static bool isPreLdSt(const MachineInstr &MI);
244 
245   /// Returns whether the instruction is a paired load/store.
246   static bool isPairedLdSt(const MachineInstr &MI);
247 
248   /// Returns the base register operator of a load/store.
249   static const MachineOperand &getLdStBaseOp(const MachineInstr &MI);
250 
251   /// Returns the immediate offset operator of a load/store.
252   static const MachineOperand &getLdStOffsetOp(const MachineInstr &MI);
253 
254   /// Returns whether the physical register is FP or NEON.
255   static bool isFpOrNEON(Register Reg);
256 
257   /// Returns the shift amount operator of a load/store.
258   static const MachineOperand &getLdStAmountOp(const MachineInstr &MI);
259 
260   /// Returns whether the instruction is FP or NEON.
261   static bool isFpOrNEON(const MachineInstr &MI);
262 
263   /// Returns whether the instruction is in H form (16 bit operands)
264   static bool isHForm(const MachineInstr &MI);
265 
266   /// Returns whether the instruction is in Q form (128 bit operands)
267   static bool isQForm(const MachineInstr &MI);
268 
269   /// Returns whether the instruction can be compatible with non-zero BTYPE.
270   static bool hasBTISemantics(const MachineInstr &MI);
271 
272   /// Returns the index for the immediate for a given instruction.
273   static unsigned getLoadStoreImmIdx(unsigned Opc);
274 
275   /// Return true if pairing the given load or store may be paired with another.
276   static bool isPairableLdStInst(const MachineInstr &MI);
277 
278   /// Returns true if MI is one of the TCRETURN* instructions.
279   static bool isTailCallReturnInst(const MachineInstr &MI);
280 
281   /// Return the opcode that set flags when possible.  The caller is
282   /// responsible for ensuring the opc has a flag setting equivalent.
283   static unsigned convertToFlagSettingOpc(unsigned Opc);
284 
285   /// Return true if this is a load/store that can be potentially paired/merged.
286   bool isCandidateToMergeOrPair(const MachineInstr &MI) const;
287 
288   /// Hint that pairing the given load or store is unprofitable.
289   static void suppressLdStPair(MachineInstr &MI);
290 
291   std::optional<ExtAddrMode>
292   getAddrModeFromMemoryOp(const MachineInstr &MemI,
293                           const TargetRegisterInfo *TRI) const override;
294 
295   bool canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg,
296                            const MachineInstr &AddrI,
297                            ExtAddrMode &AM) const override;
298 
299   MachineInstr *emitLdStWithAddr(MachineInstr &MemI,
300                                  const ExtAddrMode &AM) const override;
301 
302   bool getMemOperandsWithOffsetWidth(
303       const MachineInstr &MI, SmallVectorImpl<const MachineOperand *> &BaseOps,
304       int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
305       const TargetRegisterInfo *TRI) const override;
306 
307   /// If \p OffsetIsScalable is set to 'true', the offset is scaled by `vscale`.
308   /// This is true for some SVE instructions like ldr/str that have a
309   /// 'reg + imm' addressing mode where the immediate is an index to the
310   /// scalable vector located at 'reg + imm * vscale x #bytes'.
311   bool getMemOperandWithOffsetWidth(const MachineInstr &MI,
312                                     const MachineOperand *&BaseOp,
313                                     int64_t &Offset, bool &OffsetIsScalable,
314                                     TypeSize &Width,
315                                     const TargetRegisterInfo *TRI) const;
316 
317   /// Return the immediate offset of the base register in a load/store \p LdSt.
318   MachineOperand &getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const;
319 
320   /// Returns true if opcode \p Opc is a memory operation. If it is, set
321   /// \p Scale, \p Width, \p MinOffset, and \p MaxOffset accordingly.
322   ///
323   /// For unscaled instructions, \p Scale is set to 1. All values are in bytes.
324   /// MinOffset/MaxOffset are the un-scaled limits of the immediate in the
325   /// instruction, the actual offset limit is [MinOffset*Scale,
326   /// MaxOffset*Scale].
327   static bool getMemOpInfo(unsigned Opcode, TypeSize &Scale, TypeSize &Width,
328                            int64_t &MinOffset, int64_t &MaxOffset);
329 
330   bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
331                            int64_t Offset1, bool OffsetIsScalable1,
332                            ArrayRef<const MachineOperand *> BaseOps2,
333                            int64_t Offset2, bool OffsetIsScalable2,
334                            unsigned ClusterSize,
335                            unsigned NumBytes) const override;
336 
337   void copyPhysRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
338                         const DebugLoc &DL, MCRegister DestReg,
339                         MCRegister SrcReg, bool KillSrc, unsigned Opcode,
340                         llvm::ArrayRef<unsigned> Indices) const;
341   void copyGPRRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
342                        const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg,
343                        bool KillSrc, unsigned Opcode, unsigned ZeroReg,
344                        llvm::ArrayRef<unsigned> Indices) const;
345   void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
346                    const DebugLoc &DL, Register DestReg, Register SrcReg,
347                    bool KillSrc, bool RenamableDest = false,
348                    bool RenamableSrc = false) const override;
349 
350   void storeRegToStackSlot(
351       MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg,
352       bool isKill, int FrameIndex, const TargetRegisterClass *RC,
353       const TargetRegisterInfo *TRI, Register VReg,
354       MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
355 
356   void loadRegFromStackSlot(
357       MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
358       Register DestReg, int FrameIndex, const TargetRegisterClass *RC,
359       const TargetRegisterInfo *TRI, Register VReg,
360       MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
361 
362   // This tells target independent code that it is okay to pass instructions
363   // with subreg operands to foldMemoryOperandImpl.
isSubregFoldable()364   bool isSubregFoldable() const override { return true; }
365 
366   using TargetInstrInfo::foldMemoryOperandImpl;
367   MachineInstr *
368   foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
369                         ArrayRef<unsigned> Ops,
370                         MachineBasicBlock::iterator InsertPt, int FrameIndex,
371                         LiveIntervals *LIS = nullptr,
372                         VirtRegMap *VRM = nullptr) const override;
373 
374   /// \returns true if a branch from an instruction with opcode \p BranchOpc
375   ///  bytes is capable of jumping to a position \p BrOffset bytes away.
376   bool isBranchOffsetInRange(unsigned BranchOpc,
377                              int64_t BrOffset) const override;
378 
379   MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override;
380 
381   void insertIndirectBranch(MachineBasicBlock &MBB,
382                             MachineBasicBlock &NewDestBB,
383                             MachineBasicBlock &RestoreBB, const DebugLoc &DL,
384                             int64_t BrOffset, RegScavenger *RS) const override;
385 
386   bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
387                      MachineBasicBlock *&FBB,
388                      SmallVectorImpl<MachineOperand> &Cond,
389                      bool AllowModify = false) const override;
390   bool analyzeBranchPredicate(MachineBasicBlock &MBB,
391                               MachineBranchPredicate &MBP,
392                               bool AllowModify) const override;
393   unsigned removeBranch(MachineBasicBlock &MBB,
394                         int *BytesRemoved = nullptr) const override;
395   unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
396                         MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
397                         const DebugLoc &DL,
398                         int *BytesAdded = nullptr) const override;
399 
400   std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
401   analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override;
402 
403   bool
404   reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
405   bool canInsertSelect(const MachineBasicBlock &, ArrayRef<MachineOperand> Cond,
406                        Register, Register, Register, int &, int &,
407                        int &) const override;
408   void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
409                     const DebugLoc &DL, Register DstReg,
410                     ArrayRef<MachineOperand> Cond, Register TrueReg,
411                     Register FalseReg) const override;
412 
413   void insertNoop(MachineBasicBlock &MBB,
414                   MachineBasicBlock::iterator MI) const override;
415 
416   MCInst getNop() const override;
417 
418   bool isSchedulingBoundary(const MachineInstr &MI,
419                             const MachineBasicBlock *MBB,
420                             const MachineFunction &MF) const override;
421 
422   /// analyzeCompare - For a comparison instruction, return the source registers
423   /// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
424   /// Return true if the comparison instruction can be analyzed.
425   bool analyzeCompare(const MachineInstr &MI, Register &SrcReg,
426                       Register &SrcReg2, int64_t &CmpMask,
427                       int64_t &CmpValue) const override;
428   /// optimizeCompareInstr - Convert the instruction supplying the argument to
429   /// the comparison into one that sets the zero bit in the flags register.
430   bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
431                             Register SrcReg2, int64_t CmpMask, int64_t CmpValue,
432                             const MachineRegisterInfo *MRI) const override;
433   bool optimizeCondBranch(MachineInstr &MI) const override;
434 
435   CombinerObjective getCombinerObjective(unsigned Pattern) const override;
436   /// Return true when a code sequence can improve throughput. It
437   /// should be called only for instructions in loops.
438   /// \param Pattern - combiner pattern
439   bool isThroughputPattern(unsigned Pattern) const override;
440   /// Return true when there is potentially a faster code sequence
441   /// for an instruction chain ending in ``Root``. All potential patterns are
442   /// listed in the ``Patterns`` array.
443   bool getMachineCombinerPatterns(MachineInstr &Root,
444                                   SmallVectorImpl<unsigned> &Patterns,
445                                   bool DoRegPressureReduce) const override;
446   /// Return true when Inst is associative and commutative so that it can be
447   /// reassociated. If Invert is true, then the inverse of Inst operation must
448   /// be checked.
449   bool isAssociativeAndCommutative(const MachineInstr &Inst,
450                                    bool Invert) const override;
451 
452   /// Returns true if \P Opcode is an instruction which performs accumulation
453   /// into a destination register.
454   bool isAccumulationOpcode(unsigned Opcode) const override;
455 
456   /// Returns an opcode which defines the accumulator used by \P Opcode.
457   unsigned getAccumulationStartOpcode(unsigned Opcode) const override;
458 
459   unsigned
460   getReduceOpcodeForAccumulator(unsigned int AccumulatorOpCode) const override;
461 
462   /// When getMachineCombinerPatterns() finds patterns, this function
463   /// generates the instructions that could replace the original code
464   /// sequence
465   void genAlternativeCodeSequence(
466       MachineInstr &Root, unsigned Pattern,
467       SmallVectorImpl<MachineInstr *> &InsInstrs,
468       SmallVectorImpl<MachineInstr *> &DelInstrs,
469       DenseMap<Register, unsigned> &InstrIdxForVirtReg) const override;
470   /// AArch64 supports MachineCombiner.
471   bool useMachineCombiner() const override;
472 
473   bool expandPostRAPseudo(MachineInstr &MI) const override;
474 
475   std::pair<unsigned, unsigned>
476   decomposeMachineOperandsTargetFlags(unsigned TF) const override;
477   ArrayRef<std::pair<unsigned, const char *>>
478   getSerializableDirectMachineOperandTargetFlags() const override;
479   ArrayRef<std::pair<unsigned, const char *>>
480   getSerializableBitmaskMachineOperandTargetFlags() const override;
481   ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
482   getSerializableMachineMemOperandTargetFlags() const override;
483 
484   bool isFunctionSafeToOutlineFrom(MachineFunction &MF,
485                                    bool OutlineFromLinkOnceODRs) const override;
486   std::optional<std::unique_ptr<outliner::OutlinedFunction>>
487   getOutliningCandidateInfo(
488       const MachineModuleInfo &MMI,
489       std::vector<outliner::Candidate> &RepeatedSequenceLocs,
490       unsigned MinRepeats) const override;
491   void mergeOutliningCandidateAttributes(
492       Function &F, std::vector<outliner::Candidate> &Candidates) const override;
493   outliner::InstrType getOutliningTypeImpl(const MachineModuleInfo &MMI,
494                                            MachineBasicBlock::iterator &MIT,
495                                            unsigned Flags) const override;
496   SmallVector<
497       std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>>
498   getOutlinableRanges(MachineBasicBlock &MBB, unsigned &Flags) const override;
499   void buildOutlinedFrame(MachineBasicBlock &MBB, MachineFunction &MF,
500                           const outliner::OutlinedFunction &OF) const override;
501   MachineBasicBlock::iterator
502   insertOutlinedCall(Module &M, MachineBasicBlock &MBB,
503                      MachineBasicBlock::iterator &It, MachineFunction &MF,
504                      outliner::Candidate &C) const override;
505   bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override;
506 
507   void buildClearRegister(Register Reg, MachineBasicBlock &MBB,
508                           MachineBasicBlock::iterator Iter, DebugLoc &DL,
509                           bool AllowSideEffects = true) const override;
510 
511   /// Returns the vector element size (B, H, S or D) of an SVE opcode.
512   uint64_t getElementSizeForOpcode(unsigned Opc) const;
513   /// Returns true if the opcode is for an SVE instruction that sets the
514   /// condition codes as if it's results had been fed to a PTEST instruction
515   /// along with the same general predicate.
516   bool isPTestLikeOpcode(unsigned Opc) const;
517   /// Returns true if the opcode is for an SVE WHILE## instruction.
518   bool isWhileOpcode(unsigned Opc) const;
519   /// Returns true if the instruction has a shift by immediate that can be
520   /// executed in one cycle less.
521   static bool isFalkorShiftExtFast(const MachineInstr &MI);
522   /// Return true if the instructions is a SEH instruction used for unwinding
523   /// on Windows.
524   static bool isSEHInstruction(const MachineInstr &MI);
525 
526   std::optional<RegImmPair> isAddImmediate(const MachineInstr &MI,
527                                            Register Reg) const override;
528 
529   bool isFunctionSafeToSplit(const MachineFunction &MF) const override;
530 
531   bool isMBBSafeToSplitToCold(const MachineBasicBlock &MBB) const override;
532 
533   std::optional<ParamLoadedValue>
534   describeLoadedValue(const MachineInstr &MI, Register Reg) const override;
535 
536   unsigned int getTailDuplicateSize(CodeGenOptLevel OptLevel) const override;
537 
538   bool isExtendLikelyToBeFolded(MachineInstr &ExtMI,
539                                 MachineRegisterInfo &MRI) const override;
540 
541   static void decomposeStackOffsetForFrameOffsets(const StackOffset &Offset,
542                                                   int64_t &NumBytes,
543                                                   int64_t &NumPredicateVectors,
544                                                   int64_t &NumDataVectors);
545   static void decomposeStackOffsetForDwarfOffsets(const StackOffset &Offset,
546                                                   int64_t &ByteSized,
547                                                   int64_t &VGSized);
548 
549   // Return true if address of the form BaseReg + Scale * ScaledReg + Offset can
550   // be used for a load/store of NumBytes. BaseReg is always present and
551   // implicit.
552   bool isLegalAddressingMode(unsigned NumBytes, int64_t Offset,
553                              unsigned Scale) const;
554 
555   // Decrement the SP, issuing probes along the way. `TargetReg` is the new top
556   // of the stack. `FrameSetup` is passed as true, if the allocation is a part
557   // of constructing the activation frame of a function.
558   MachineBasicBlock::iterator probedStackAlloc(MachineBasicBlock::iterator MBBI,
559                                                Register TargetReg,
560                                                bool FrameSetup) const;
561 
562 #define GET_INSTRINFO_HELPER_DECLS
563 #include "AArch64GenInstrInfo.inc"
564 
565 protected:
566   /// If the specific machine instruction is an instruction that moves/copies
567   /// value from one register to another register return destination and source
568   /// registers as machine operands.
569   std::optional<DestSourcePair>
570   isCopyInstrImpl(const MachineInstr &MI) const override;
571   std::optional<DestSourcePair>
572   isCopyLikeInstrImpl(const MachineInstr &MI) const override;
573 
574 private:
575   unsigned getInstBundleLength(const MachineInstr &MI) const;
576 
577   /// Sets the offsets on outlined instructions in \p MBB which use SP
578   /// so that they will be valid post-outlining.
579   ///
580   /// \param MBB A \p MachineBasicBlock in an outlined function.
581   void fixupPostOutline(MachineBasicBlock &MBB) const;
582 
583   void instantiateCondBranch(MachineBasicBlock &MBB, const DebugLoc &DL,
584                              MachineBasicBlock *TBB,
585                              ArrayRef<MachineOperand> Cond) const;
586   bool substituteCmpToZero(MachineInstr &CmpInstr, unsigned SrcReg,
587                            const MachineRegisterInfo &MRI) const;
588   bool removeCmpToZeroOrOne(MachineInstr &CmpInstr, unsigned SrcReg,
589                             int CmpValue, const MachineRegisterInfo &MRI) const;
590 
591   /// Returns an unused general-purpose register which can be used for
592   /// constructing an outlined call if one exists. Returns 0 otherwise.
593   Register findRegisterToSaveLRTo(outliner::Candidate &C) const;
594 
595   /// Remove a ptest of a predicate-generating operation that already sets, or
596   /// can be made to set, the condition codes in an identical manner
597   bool optimizePTestInstr(MachineInstr *PTest, unsigned MaskReg,
598                           unsigned PredReg,
599                           const MachineRegisterInfo *MRI) const;
600   std::optional<unsigned>
601   canRemovePTestInstr(MachineInstr *PTest, MachineInstr *Mask,
602                       MachineInstr *Pred, const MachineRegisterInfo *MRI) const;
603 
604   /// verifyInstruction - Perform target specific instruction verification.
605   bool verifyInstruction(const MachineInstr &MI,
606                          StringRef &ErrInfo) const override;
607 };
608 
609 struct UsedNZCV {
610   bool N = false;
611   bool Z = false;
612   bool C = false;
613   bool V = false;
614 
615   UsedNZCV() = default;
616 
617   UsedNZCV &operator|=(const UsedNZCV &UsedFlags) {
618     this->N |= UsedFlags.N;
619     this->Z |= UsedFlags.Z;
620     this->C |= UsedFlags.C;
621     this->V |= UsedFlags.V;
622     return *this;
623   }
624 };
625 
626 /// \returns Conditions flags used after \p CmpInstr in its MachineBB if  NZCV
627 /// flags are not alive in successors of the same \p CmpInstr and \p MI parent.
628 /// \returns std::nullopt otherwise.
629 ///
630 /// Collect instructions using that flags in \p CCUseInstrs if provided.
631 std::optional<UsedNZCV>
632 examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr,
633                  const TargetRegisterInfo &TRI,
634                  SmallVectorImpl<MachineInstr *> *CCUseInstrs = nullptr);
635 
636 /// Return true if there is an instruction /after/ \p DefMI and before \p UseMI
637 /// which either reads or clobbers NZCV.
638 bool isNZCVTouchedInInstructionRange(const MachineInstr &DefMI,
639                                      const MachineInstr &UseMI,
640                                      const TargetRegisterInfo *TRI);
641 
642 MCCFIInstruction createDefCFA(const TargetRegisterInfo &TRI, unsigned FrameReg,
643                               unsigned Reg, const StackOffset &Offset,
644                               bool LastAdjustmentWasScalable = true);
645 MCCFIInstruction createCFAOffset(const TargetRegisterInfo &MRI, unsigned Reg,
646                                  const StackOffset &OffsetFromDefCFA);
647 
648 /// emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg
649 /// plus Offset.  This is intended to be used from within the prolog/epilog
650 /// insertion (PEI) pass, where a virtual scratch register may be allocated
651 /// if necessary, to be replaced by the scavenger at the end of PEI.
652 void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
653                      const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
654                      StackOffset Offset, const TargetInstrInfo *TII,
655                      MachineInstr::MIFlag = MachineInstr::NoFlags,
656                      bool SetNZCV = false, bool NeedsWinCFI = false,
657                      bool *HasWinCFI = nullptr, bool EmitCFAOffset = false,
658                      StackOffset InitialOffset = {},
659                      unsigned FrameReg = AArch64::SP);
660 
661 /// rewriteAArch64FrameIndex - Rewrite MI to access 'Offset' bytes from the
662 /// FP. Return false if the offset could not be handled directly in MI, and
663 /// return the left-over portion by reference.
664 bool rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
665                               unsigned FrameReg, StackOffset &Offset,
666                               const AArch64InstrInfo *TII);
667 
668 /// Use to report the frame offset status in isAArch64FrameOffsetLegal.
669 enum AArch64FrameOffsetStatus {
670   AArch64FrameOffsetCannotUpdate = 0x0, ///< Offset cannot apply.
671   AArch64FrameOffsetIsLegal = 0x1,      ///< Offset is legal.
672   AArch64FrameOffsetCanUpdate = 0x2     ///< Offset can apply, at least partly.
673 };
674 
675 /// Check if the @p Offset is a valid frame offset for @p MI.
676 /// The returned value reports the validity of the frame offset for @p MI.
677 /// It uses the values defined by AArch64FrameOffsetStatus for that.
678 /// If result == AArch64FrameOffsetCannotUpdate, @p MI cannot be updated to
679 /// use an offset.eq
680 /// If result & AArch64FrameOffsetIsLegal, @p Offset can completely be
681 /// rewritten in @p MI.
682 /// If result & AArch64FrameOffsetCanUpdate, @p Offset contains the
683 /// amount that is off the limit of the legal offset.
684 /// If set, @p OutUseUnscaledOp will contain the whether @p MI should be
685 /// turned into an unscaled operator, which opcode is in @p OutUnscaledOp.
686 /// If set, @p EmittableOffset contains the amount that can be set in @p MI
687 /// (possibly with @p OutUnscaledOp if OutUseUnscaledOp is true) and that
688 /// is a legal offset.
689 int isAArch64FrameOffsetLegal(const MachineInstr &MI, StackOffset &Offset,
690                               bool *OutUseUnscaledOp = nullptr,
691                               unsigned *OutUnscaledOp = nullptr,
692                               int64_t *EmittableOffset = nullptr);
693 
isUncondBranchOpcode(int Opc)694 static inline bool isUncondBranchOpcode(int Opc) { return Opc == AArch64::B; }
695 
isCondBranchOpcode(int Opc)696 static inline bool isCondBranchOpcode(int Opc) {
697   switch (Opc) {
698   case AArch64::Bcc:
699   case AArch64::CBZW:
700   case AArch64::CBZX:
701   case AArch64::CBNZW:
702   case AArch64::CBNZX:
703   case AArch64::TBZW:
704   case AArch64::TBZX:
705   case AArch64::TBNZW:
706   case AArch64::TBNZX:
707   case AArch64::CBWPri:
708   case AArch64::CBXPri:
709   case AArch64::CBWPrr:
710   case AArch64::CBXPrr:
711     return true;
712   default:
713     return false;
714   }
715 }
716 
isIndirectBranchOpcode(int Opc)717 static inline bool isIndirectBranchOpcode(int Opc) {
718   switch (Opc) {
719   case AArch64::BR:
720   case AArch64::BRAA:
721   case AArch64::BRAB:
722   case AArch64::BRAAZ:
723   case AArch64::BRABZ:
724     return true;
725   }
726   return false;
727 }
728 
isIndirectCallOpcode(unsigned Opc)729 static inline bool isIndirectCallOpcode(unsigned Opc) {
730   switch (Opc) {
731   case AArch64::BLR:
732   case AArch64::BLRAA:
733   case AArch64::BLRAB:
734   case AArch64::BLRAAZ:
735   case AArch64::BLRABZ:
736     return true;
737   default:
738     return false;
739   }
740 }
741 
isPTrueOpcode(unsigned Opc)742 static inline bool isPTrueOpcode(unsigned Opc) {
743   switch (Opc) {
744   case AArch64::PTRUE_B:
745   case AArch64::PTRUE_H:
746   case AArch64::PTRUE_S:
747   case AArch64::PTRUE_D:
748     return true;
749   default:
750     return false;
751   }
752 }
753 
754 /// Return opcode to be used for indirect calls.
755 unsigned getBLRCallOpcode(const MachineFunction &MF);
756 
757 /// Return XPAC opcode to be used for a ptrauth strip using the given key.
getXPACOpcodeForKey(AArch64PACKey::ID K)758 static inline unsigned getXPACOpcodeForKey(AArch64PACKey::ID K) {
759   using namespace AArch64PACKey;
760   switch (K) {
761   case IA: case IB: return AArch64::XPACI;
762   case DA: case DB: return AArch64::XPACD;
763   }
764   llvm_unreachable("Unhandled AArch64PACKey::ID enum");
765 }
766 
767 /// Return AUT opcode to be used for a ptrauth auth using the given key, or its
768 /// AUT*Z variant that doesn't take a discriminator operand, using zero instead.
getAUTOpcodeForKey(AArch64PACKey::ID K,bool Zero)769 static inline unsigned getAUTOpcodeForKey(AArch64PACKey::ID K, bool Zero) {
770   using namespace AArch64PACKey;
771   switch (K) {
772   case IA: return Zero ? AArch64::AUTIZA : AArch64::AUTIA;
773   case IB: return Zero ? AArch64::AUTIZB : AArch64::AUTIB;
774   case DA: return Zero ? AArch64::AUTDZA : AArch64::AUTDA;
775   case DB: return Zero ? AArch64::AUTDZB : AArch64::AUTDB;
776   }
777   llvm_unreachable("Unhandled AArch64PACKey::ID enum");
778 }
779 
780 /// Return PAC opcode to be used for a ptrauth sign using the given key, or its
781 /// PAC*Z variant that doesn't take a discriminator operand, using zero instead.
getPACOpcodeForKey(AArch64PACKey::ID K,bool Zero)782 static inline unsigned getPACOpcodeForKey(AArch64PACKey::ID K, bool Zero) {
783   using namespace AArch64PACKey;
784   switch (K) {
785   case IA: return Zero ? AArch64::PACIZA : AArch64::PACIA;
786   case IB: return Zero ? AArch64::PACIZB : AArch64::PACIB;
787   case DA: return Zero ? AArch64::PACDZA : AArch64::PACDA;
788   case DB: return Zero ? AArch64::PACDZB : AArch64::PACDB;
789   }
790   llvm_unreachable("Unhandled AArch64PACKey::ID enum");
791 }
792 
793 // struct TSFlags {
794 #define TSFLAG_ELEMENT_SIZE_TYPE(X)      (X)        // 3-bits
795 #define TSFLAG_DESTRUCTIVE_INST_TYPE(X) ((X) << 3)  // 4-bits
796 #define TSFLAG_FALSE_LANE_TYPE(X)       ((X) << 7)  // 2-bits
797 #define TSFLAG_INSTR_FLAGS(X)           ((X) << 9)  // 2-bits
798 #define TSFLAG_SME_MATRIX_TYPE(X)       ((X) << 11) // 3-bits
799 // }
800 
801 namespace AArch64 {
802 
803 enum ElementSizeType {
804   ElementSizeMask = TSFLAG_ELEMENT_SIZE_TYPE(0x7),
805   ElementSizeNone = TSFLAG_ELEMENT_SIZE_TYPE(0x0),
806   ElementSizeB    = TSFLAG_ELEMENT_SIZE_TYPE(0x1),
807   ElementSizeH    = TSFLAG_ELEMENT_SIZE_TYPE(0x2),
808   ElementSizeS    = TSFLAG_ELEMENT_SIZE_TYPE(0x3),
809   ElementSizeD    = TSFLAG_ELEMENT_SIZE_TYPE(0x4),
810 };
811 
812 enum DestructiveInstType {
813   DestructiveInstTypeMask       = TSFLAG_DESTRUCTIVE_INST_TYPE(0xf),
814   NotDestructive                = TSFLAG_DESTRUCTIVE_INST_TYPE(0x0),
815   DestructiveOther              = TSFLAG_DESTRUCTIVE_INST_TYPE(0x1),
816   DestructiveUnary              = TSFLAG_DESTRUCTIVE_INST_TYPE(0x2),
817   DestructiveBinaryImm          = TSFLAG_DESTRUCTIVE_INST_TYPE(0x3),
818   DestructiveBinaryShImmUnpred  = TSFLAG_DESTRUCTIVE_INST_TYPE(0x4),
819   DestructiveBinary             = TSFLAG_DESTRUCTIVE_INST_TYPE(0x5),
820   DestructiveBinaryComm         = TSFLAG_DESTRUCTIVE_INST_TYPE(0x6),
821   DestructiveBinaryCommWithRev  = TSFLAG_DESTRUCTIVE_INST_TYPE(0x7),
822   DestructiveTernaryCommWithRev = TSFLAG_DESTRUCTIVE_INST_TYPE(0x8),
823   DestructiveUnaryPassthru      = TSFLAG_DESTRUCTIVE_INST_TYPE(0x9),
824 };
825 
826 enum FalseLaneType {
827   FalseLanesMask  = TSFLAG_FALSE_LANE_TYPE(0x3),
828   FalseLanesZero  = TSFLAG_FALSE_LANE_TYPE(0x1),
829   FalseLanesUndef = TSFLAG_FALSE_LANE_TYPE(0x2),
830 };
831 
832 // NOTE: This is a bit field.
833 static const uint64_t InstrFlagIsWhile     = TSFLAG_INSTR_FLAGS(0x1);
834 static const uint64_t InstrFlagIsPTestLike = TSFLAG_INSTR_FLAGS(0x2);
835 
836 enum SMEMatrixType {
837   SMEMatrixTypeMask = TSFLAG_SME_MATRIX_TYPE(0x7),
838   SMEMatrixNone     = TSFLAG_SME_MATRIX_TYPE(0x0),
839   SMEMatrixTileB    = TSFLAG_SME_MATRIX_TYPE(0x1),
840   SMEMatrixTileH    = TSFLAG_SME_MATRIX_TYPE(0x2),
841   SMEMatrixTileS    = TSFLAG_SME_MATRIX_TYPE(0x3),
842   SMEMatrixTileD    = TSFLAG_SME_MATRIX_TYPE(0x4),
843   SMEMatrixTileQ    = TSFLAG_SME_MATRIX_TYPE(0x5),
844   SMEMatrixArray    = TSFLAG_SME_MATRIX_TYPE(0x6),
845 };
846 
847 #undef TSFLAG_ELEMENT_SIZE_TYPE
848 #undef TSFLAG_DESTRUCTIVE_INST_TYPE
849 #undef TSFLAG_FALSE_LANE_TYPE
850 #undef TSFLAG_INSTR_FLAGS
851 #undef TSFLAG_SME_MATRIX_TYPE
852 
853 int getSVEPseudoMap(uint16_t Opcode);
854 int getSVERevInstr(uint16_t Opcode);
855 int getSVENonRevInstr(uint16_t Opcode);
856 
857 int getSMEPseudoMap(uint16_t Opcode);
858 }
859 
860 } // end namespace llvm
861 
862 #endif
863